##// END OF EJS Templates
revlog: deprecate the _nodecache attribute (API)...
marmoute -
r43930:6e3e3e54 default
parent child Browse files
Show More
@@ -1,2947 +1,2952
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @util.propertycache
208 @util.propertycache
209 def nodemap(self):
209 def nodemap(self):
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 for r in range(0, len(self)):
211 for r in range(0, len(self)):
212 n = self[r][7]
212 n = self[r][7]
213 nodemap[n] = r
213 nodemap[n] = r
214 return nodemap
214 return nodemap
215
215
216 def clearcaches(self):
216 def clearcaches(self):
217 self.__dict__.pop('nodemap', None)
217 self.__dict__.pop('nodemap', None)
218
218
219 def __getitem__(self, i):
219 def __getitem__(self, i):
220 if i == -1:
220 if i == -1:
221 return (0, 0, 0, -1, -1, -1, -1, nullid)
221 return (0, 0, 0, -1, -1, -1, -1, nullid)
222 return list.__getitem__(self, i)
222 return list.__getitem__(self, i)
223
223
224
224
225 class revlogoldio(object):
225 class revlogoldio(object):
226 def __init__(self):
226 def __init__(self):
227 self.size = indexformatv0.size
227 self.size = indexformatv0.size
228
228
229 def parseindex(self, data, inline):
229 def parseindex(self, data, inline):
230 s = self.size
230 s = self.size
231 index = []
231 index = []
232 nodemap = revlogutils.NodeMap({nullid: nullrev})
232 nodemap = revlogutils.NodeMap({nullid: nullrev})
233 n = off = 0
233 n = off = 0
234 l = len(data)
234 l = len(data)
235 while off + s <= l:
235 while off + s <= l:
236 cur = data[off : off + s]
236 cur = data[off : off + s]
237 off += s
237 off += s
238 e = indexformatv0_unpack(cur)
238 e = indexformatv0_unpack(cur)
239 # transform to revlogv1 format
239 # transform to revlogv1 format
240 e2 = (
240 e2 = (
241 offset_type(e[0], 0),
241 offset_type(e[0], 0),
242 e[1],
242 e[1],
243 -1,
243 -1,
244 e[2],
244 e[2],
245 e[3],
245 e[3],
246 nodemap.get(e[4], nullrev),
246 nodemap.get(e[4], nullrev),
247 nodemap.get(e[5], nullrev),
247 nodemap.get(e[5], nullrev),
248 e[6],
248 e[6],
249 )
249 )
250 index.append(e2)
250 index.append(e2)
251 nodemap[e[6]] = n
251 nodemap[e[6]] = n
252 n += 1
252 n += 1
253
253
254 index = revlogoldindex(index)
254 index = revlogoldindex(index)
255 return index, None
255 return index, None
256
256
257 def packentry(self, entry, node, version, rev):
257 def packentry(self, entry, node, version, rev):
258 if gettype(entry[0]):
258 if gettype(entry[0]):
259 raise error.RevlogError(
259 raise error.RevlogError(
260 _(b'index entry flags need revlog version 1')
260 _(b'index entry flags need revlog version 1')
261 )
261 )
262 e2 = (
262 e2 = (
263 getoffset(entry[0]),
263 getoffset(entry[0]),
264 entry[1],
264 entry[1],
265 entry[3],
265 entry[3],
266 entry[4],
266 entry[4],
267 node(entry[5]),
267 node(entry[5]),
268 node(entry[6]),
268 node(entry[6]),
269 entry[7],
269 entry[7],
270 )
270 )
271 return indexformatv0_pack(*e2)
271 return indexformatv0_pack(*e2)
272
272
273
273
274 # index ng:
274 # index ng:
275 # 6 bytes: offset
275 # 6 bytes: offset
276 # 2 bytes: flags
276 # 2 bytes: flags
277 # 4 bytes: compressed length
277 # 4 bytes: compressed length
278 # 4 bytes: uncompressed length
278 # 4 bytes: uncompressed length
279 # 4 bytes: base rev
279 # 4 bytes: base rev
280 # 4 bytes: link rev
280 # 4 bytes: link rev
281 # 4 bytes: parent 1 rev
281 # 4 bytes: parent 1 rev
282 # 4 bytes: parent 2 rev
282 # 4 bytes: parent 2 rev
283 # 32 bytes: nodeid
283 # 32 bytes: nodeid
284 indexformatng = struct.Struct(b">Qiiiiii20s12x")
284 indexformatng = struct.Struct(b">Qiiiiii20s12x")
285 indexformatng_pack = indexformatng.pack
285 indexformatng_pack = indexformatng.pack
286 versionformat = struct.Struct(b">I")
286 versionformat = struct.Struct(b">I")
287 versionformat_pack = versionformat.pack
287 versionformat_pack = versionformat.pack
288 versionformat_unpack = versionformat.unpack
288 versionformat_unpack = versionformat.unpack
289
289
290 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
290 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
291 # signed integer)
291 # signed integer)
292 _maxentrysize = 0x7FFFFFFF
292 _maxentrysize = 0x7FFFFFFF
293
293
294
294
295 class revlogio(object):
295 class revlogio(object):
296 def __init__(self):
296 def __init__(self):
297 self.size = indexformatng.size
297 self.size = indexformatng.size
298
298
299 def parseindex(self, data, inline):
299 def parseindex(self, data, inline):
300 # call the C implementation to parse the index data
300 # call the C implementation to parse the index data
301 index, cache = parsers.parse_index2(data, inline)
301 index, cache = parsers.parse_index2(data, inline)
302 return index, cache
302 return index, cache
303
303
304 def packentry(self, entry, node, version, rev):
304 def packentry(self, entry, node, version, rev):
305 p = indexformatng_pack(*entry)
305 p = indexformatng_pack(*entry)
306 if rev == 0:
306 if rev == 0:
307 p = versionformat_pack(version) + p[4:]
307 p = versionformat_pack(version) + p[4:]
308 return p
308 return p
309
309
310
310
311 class revlog(object):
311 class revlog(object):
312 """
312 """
313 the underlying revision storage object
313 the underlying revision storage object
314
314
315 A revlog consists of two parts, an index and the revision data.
315 A revlog consists of two parts, an index and the revision data.
316
316
317 The index is a file with a fixed record size containing
317 The index is a file with a fixed record size containing
318 information on each revision, including its nodeid (hash), the
318 information on each revision, including its nodeid (hash), the
319 nodeids of its parents, the position and offset of its data within
319 nodeids of its parents, the position and offset of its data within
320 the data file, and the revision it's based on. Finally, each entry
320 the data file, and the revision it's based on. Finally, each entry
321 contains a linkrev entry that can serve as a pointer to external
321 contains a linkrev entry that can serve as a pointer to external
322 data.
322 data.
323
323
324 The revision data itself is a linear collection of data chunks.
324 The revision data itself is a linear collection of data chunks.
325 Each chunk represents a revision and is usually represented as a
325 Each chunk represents a revision and is usually represented as a
326 delta against the previous chunk. To bound lookup time, runs of
326 delta against the previous chunk. To bound lookup time, runs of
327 deltas are limited to about 2 times the length of the original
327 deltas are limited to about 2 times the length of the original
328 version data. This makes retrieval of a version proportional to
328 version data. This makes retrieval of a version proportional to
329 its size, or O(1) relative to the number of revisions.
329 its size, or O(1) relative to the number of revisions.
330
330
331 Both pieces of the revlog are written to in an append-only
331 Both pieces of the revlog are written to in an append-only
332 fashion, which means we never need to rewrite a file to insert or
332 fashion, which means we never need to rewrite a file to insert or
333 remove data, and can use some simple techniques to avoid the need
333 remove data, and can use some simple techniques to avoid the need
334 for locking while reading.
334 for locking while reading.
335
335
336 If checkambig, indexfile is opened with checkambig=True at
336 If checkambig, indexfile is opened with checkambig=True at
337 writing, to avoid file stat ambiguity.
337 writing, to avoid file stat ambiguity.
338
338
339 If mmaplargeindex is True, and an mmapindexthreshold is set, the
339 If mmaplargeindex is True, and an mmapindexthreshold is set, the
340 index will be mmapped rather than read if it is larger than the
340 index will be mmapped rather than read if it is larger than the
341 configured threshold.
341 configured threshold.
342
342
343 If censorable is True, the revlog can have censored revisions.
343 If censorable is True, the revlog can have censored revisions.
344
344
345 If `upperboundcomp` is not None, this is the expected maximal gain from
345 If `upperboundcomp` is not None, this is the expected maximal gain from
346 compression for the data content.
346 compression for the data content.
347 """
347 """
348
348
349 _flagserrorclass = error.RevlogError
349 _flagserrorclass = error.RevlogError
350
350
351 def __init__(
351 def __init__(
352 self,
352 self,
353 opener,
353 opener,
354 indexfile,
354 indexfile,
355 datafile=None,
355 datafile=None,
356 checkambig=False,
356 checkambig=False,
357 mmaplargeindex=False,
357 mmaplargeindex=False,
358 censorable=False,
358 censorable=False,
359 upperboundcomp=None,
359 upperboundcomp=None,
360 ):
360 ):
361 """
361 """
362 create a revlog object
362 create a revlog object
363
363
364 opener is a function that abstracts the file opening operation
364 opener is a function that abstracts the file opening operation
365 and can be used to implement COW semantics or the like.
365 and can be used to implement COW semantics or the like.
366
366
367 """
367 """
368 self.upperboundcomp = upperboundcomp
368 self.upperboundcomp = upperboundcomp
369 self.indexfile = indexfile
369 self.indexfile = indexfile
370 self.datafile = datafile or (indexfile[:-2] + b".d")
370 self.datafile = datafile or (indexfile[:-2] + b".d")
371 self.opener = opener
371 self.opener = opener
372 # When True, indexfile is opened with checkambig=True at writing, to
372 # When True, indexfile is opened with checkambig=True at writing, to
373 # avoid file stat ambiguity.
373 # avoid file stat ambiguity.
374 self._checkambig = checkambig
374 self._checkambig = checkambig
375 self._mmaplargeindex = mmaplargeindex
375 self._mmaplargeindex = mmaplargeindex
376 self._censorable = censorable
376 self._censorable = censorable
377 # 3-tuple of (node, rev, text) for a raw revision.
377 # 3-tuple of (node, rev, text) for a raw revision.
378 self._revisioncache = None
378 self._revisioncache = None
379 # Maps rev to chain base rev.
379 # Maps rev to chain base rev.
380 self._chainbasecache = util.lrucachedict(100)
380 self._chainbasecache = util.lrucachedict(100)
381 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
381 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
382 self._chunkcache = (0, b'')
382 self._chunkcache = (0, b'')
383 # How much data to read and cache into the raw revlog data cache.
383 # How much data to read and cache into the raw revlog data cache.
384 self._chunkcachesize = 65536
384 self._chunkcachesize = 65536
385 self._maxchainlen = None
385 self._maxchainlen = None
386 self._deltabothparents = True
386 self._deltabothparents = True
387 self.index = None
387 self.index = None
388 # Mapping of partial identifiers to full nodes.
388 # Mapping of partial identifiers to full nodes.
389 self._pcache = {}
389 self._pcache = {}
390 # Mapping of revision integer to full node.
390 # Mapping of revision integer to full node.
391 self._nodecache = None
392 self._nodepos = None
391 self._nodepos = None
393 self._compengine = b'zlib'
392 self._compengine = b'zlib'
394 self._compengineopts = {}
393 self._compengineopts = {}
395 self._maxdeltachainspan = -1
394 self._maxdeltachainspan = -1
396 self._withsparseread = False
395 self._withsparseread = False
397 self._sparserevlog = False
396 self._sparserevlog = False
398 self._srdensitythreshold = 0.50
397 self._srdensitythreshold = 0.50
399 self._srmingapsize = 262144
398 self._srmingapsize = 262144
400
399
401 # Make copy of flag processors so each revlog instance can support
400 # Make copy of flag processors so each revlog instance can support
402 # custom flags.
401 # custom flags.
403 self._flagprocessors = dict(flagutil.flagprocessors)
402 self._flagprocessors = dict(flagutil.flagprocessors)
404
403
405 # 2-tuple of file handles being used for active writing.
404 # 2-tuple of file handles being used for active writing.
406 self._writinghandles = None
405 self._writinghandles = None
407
406
408 self._loadindex()
407 self._loadindex()
409
408
410 def _loadindex(self):
409 def _loadindex(self):
411 mmapindexthreshold = None
410 mmapindexthreshold = None
412 opts = self.opener.options
411 opts = self.opener.options
413
412
414 if b'revlogv2' in opts:
413 if b'revlogv2' in opts:
415 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
414 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
416 elif b'revlogv1' in opts:
415 elif b'revlogv1' in opts:
417 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
416 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
418 if b'generaldelta' in opts:
417 if b'generaldelta' in opts:
419 newversionflags |= FLAG_GENERALDELTA
418 newversionflags |= FLAG_GENERALDELTA
420 elif b'revlogv0' in self.opener.options:
419 elif b'revlogv0' in self.opener.options:
421 newversionflags = REVLOGV0
420 newversionflags = REVLOGV0
422 else:
421 else:
423 newversionflags = REVLOG_DEFAULT_VERSION
422 newversionflags = REVLOG_DEFAULT_VERSION
424
423
425 if b'chunkcachesize' in opts:
424 if b'chunkcachesize' in opts:
426 self._chunkcachesize = opts[b'chunkcachesize']
425 self._chunkcachesize = opts[b'chunkcachesize']
427 if b'maxchainlen' in opts:
426 if b'maxchainlen' in opts:
428 self._maxchainlen = opts[b'maxchainlen']
427 self._maxchainlen = opts[b'maxchainlen']
429 if b'deltabothparents' in opts:
428 if b'deltabothparents' in opts:
430 self._deltabothparents = opts[b'deltabothparents']
429 self._deltabothparents = opts[b'deltabothparents']
431 self._lazydelta = bool(opts.get(b'lazydelta', True))
430 self._lazydelta = bool(opts.get(b'lazydelta', True))
432 self._lazydeltabase = False
431 self._lazydeltabase = False
433 if self._lazydelta:
432 if self._lazydelta:
434 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
433 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
435 if b'compengine' in opts:
434 if b'compengine' in opts:
436 self._compengine = opts[b'compengine']
435 self._compengine = opts[b'compengine']
437 if b'zlib.level' in opts:
436 if b'zlib.level' in opts:
438 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
437 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
439 if b'zstd.level' in opts:
438 if b'zstd.level' in opts:
440 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
439 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
441 if b'maxdeltachainspan' in opts:
440 if b'maxdeltachainspan' in opts:
442 self._maxdeltachainspan = opts[b'maxdeltachainspan']
441 self._maxdeltachainspan = opts[b'maxdeltachainspan']
443 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
442 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
444 mmapindexthreshold = opts[b'mmapindexthreshold']
443 mmapindexthreshold = opts[b'mmapindexthreshold']
445 self.hassidedata = bool(opts.get(b'side-data', False))
444 self.hassidedata = bool(opts.get(b'side-data', False))
446 if self.hassidedata:
445 if self.hassidedata:
447 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
446 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
448 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
447 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
449 withsparseread = bool(opts.get(b'with-sparse-read', False))
448 withsparseread = bool(opts.get(b'with-sparse-read', False))
450 # sparse-revlog forces sparse-read
449 # sparse-revlog forces sparse-read
451 self._withsparseread = self._sparserevlog or withsparseread
450 self._withsparseread = self._sparserevlog or withsparseread
452 if b'sparse-read-density-threshold' in opts:
451 if b'sparse-read-density-threshold' in opts:
453 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
452 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
454 if b'sparse-read-min-gap-size' in opts:
453 if b'sparse-read-min-gap-size' in opts:
455 self._srmingapsize = opts[b'sparse-read-min-gap-size']
454 self._srmingapsize = opts[b'sparse-read-min-gap-size']
456 if opts.get(b'enableellipsis'):
455 if opts.get(b'enableellipsis'):
457 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
456 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
458
457
459 # revlog v0 doesn't have flag processors
458 # revlog v0 doesn't have flag processors
460 for flag, processor in pycompat.iteritems(
459 for flag, processor in pycompat.iteritems(
461 opts.get(b'flagprocessors', {})
460 opts.get(b'flagprocessors', {})
462 ):
461 ):
463 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
462 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
464
463
465 if self._chunkcachesize <= 0:
464 if self._chunkcachesize <= 0:
466 raise error.RevlogError(
465 raise error.RevlogError(
467 _(b'revlog chunk cache size %r is not greater than 0')
466 _(b'revlog chunk cache size %r is not greater than 0')
468 % self._chunkcachesize
467 % self._chunkcachesize
469 )
468 )
470 elif self._chunkcachesize & (self._chunkcachesize - 1):
469 elif self._chunkcachesize & (self._chunkcachesize - 1):
471 raise error.RevlogError(
470 raise error.RevlogError(
472 _(b'revlog chunk cache size %r is not a power of 2')
471 _(b'revlog chunk cache size %r is not a power of 2')
473 % self._chunkcachesize
472 % self._chunkcachesize
474 )
473 )
475
474
476 indexdata = b''
475 indexdata = b''
477 self._initempty = True
476 self._initempty = True
478 try:
477 try:
479 with self._indexfp() as f:
478 with self._indexfp() as f:
480 if (
479 if (
481 mmapindexthreshold is not None
480 mmapindexthreshold is not None
482 and self.opener.fstat(f).st_size >= mmapindexthreshold
481 and self.opener.fstat(f).st_size >= mmapindexthreshold
483 ):
482 ):
484 # TODO: should .close() to release resources without
483 # TODO: should .close() to release resources without
485 # relying on Python GC
484 # relying on Python GC
486 indexdata = util.buffer(util.mmapread(f))
485 indexdata = util.buffer(util.mmapread(f))
487 else:
486 else:
488 indexdata = f.read()
487 indexdata = f.read()
489 if len(indexdata) > 0:
488 if len(indexdata) > 0:
490 versionflags = versionformat_unpack(indexdata[:4])[0]
489 versionflags = versionformat_unpack(indexdata[:4])[0]
491 self._initempty = False
490 self._initempty = False
492 else:
491 else:
493 versionflags = newversionflags
492 versionflags = newversionflags
494 except IOError as inst:
493 except IOError as inst:
495 if inst.errno != errno.ENOENT:
494 if inst.errno != errno.ENOENT:
496 raise
495 raise
497
496
498 versionflags = newversionflags
497 versionflags = newversionflags
499
498
500 self.version = versionflags
499 self.version = versionflags
501
500
502 flags = versionflags & ~0xFFFF
501 flags = versionflags & ~0xFFFF
503 fmt = versionflags & 0xFFFF
502 fmt = versionflags & 0xFFFF
504
503
505 if fmt == REVLOGV0:
504 if fmt == REVLOGV0:
506 if flags:
505 if flags:
507 raise error.RevlogError(
506 raise error.RevlogError(
508 _(b'unknown flags (%#04x) in version %d revlog %s')
507 _(b'unknown flags (%#04x) in version %d revlog %s')
509 % (flags >> 16, fmt, self.indexfile)
508 % (flags >> 16, fmt, self.indexfile)
510 )
509 )
511
510
512 self._inline = False
511 self._inline = False
513 self._generaldelta = False
512 self._generaldelta = False
514
513
515 elif fmt == REVLOGV1:
514 elif fmt == REVLOGV1:
516 if flags & ~REVLOGV1_FLAGS:
515 if flags & ~REVLOGV1_FLAGS:
517 raise error.RevlogError(
516 raise error.RevlogError(
518 _(b'unknown flags (%#04x) in version %d revlog %s')
517 _(b'unknown flags (%#04x) in version %d revlog %s')
519 % (flags >> 16, fmt, self.indexfile)
518 % (flags >> 16, fmt, self.indexfile)
520 )
519 )
521
520
522 self._inline = versionflags & FLAG_INLINE_DATA
521 self._inline = versionflags & FLAG_INLINE_DATA
523 self._generaldelta = versionflags & FLAG_GENERALDELTA
522 self._generaldelta = versionflags & FLAG_GENERALDELTA
524
523
525 elif fmt == REVLOGV2:
524 elif fmt == REVLOGV2:
526 if flags & ~REVLOGV2_FLAGS:
525 if flags & ~REVLOGV2_FLAGS:
527 raise error.RevlogError(
526 raise error.RevlogError(
528 _(b'unknown flags (%#04x) in version %d revlog %s')
527 _(b'unknown flags (%#04x) in version %d revlog %s')
529 % (flags >> 16, fmt, self.indexfile)
528 % (flags >> 16, fmt, self.indexfile)
530 )
529 )
531
530
532 self._inline = versionflags & FLAG_INLINE_DATA
531 self._inline = versionflags & FLAG_INLINE_DATA
533 # generaldelta implied by version 2 revlogs.
532 # generaldelta implied by version 2 revlogs.
534 self._generaldelta = True
533 self._generaldelta = True
535
534
536 else:
535 else:
537 raise error.RevlogError(
536 raise error.RevlogError(
538 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
537 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
539 )
538 )
540 # sparse-revlog can't be on without general-delta (issue6056)
539 # sparse-revlog can't be on without general-delta (issue6056)
541 if not self._generaldelta:
540 if not self._generaldelta:
542 self._sparserevlog = False
541 self._sparserevlog = False
543
542
544 self._storedeltachains = True
543 self._storedeltachains = True
545
544
546 self._io = revlogio()
545 self._io = revlogio()
547 if self.version == REVLOGV0:
546 if self.version == REVLOGV0:
548 self._io = revlogoldio()
547 self._io = revlogoldio()
549 try:
548 try:
550 d = self._io.parseindex(indexdata, self._inline)
549 d = self._io.parseindex(indexdata, self._inline)
551 except (ValueError, IndexError):
550 except (ValueError, IndexError):
552 raise error.RevlogError(
551 raise error.RevlogError(
553 _(b"index %s is corrupted") % self.indexfile
552 _(b"index %s is corrupted") % self.indexfile
554 )
553 )
555 self.index, self._chunkcache = d
554 self.index, self._chunkcache = d
556 self.nodemap = self._nodecache = self.index.nodemap
555 self.nodemap = self.index.nodemap
557 if not self._chunkcache:
556 if not self._chunkcache:
558 self._chunkclear()
557 self._chunkclear()
559 # revnum -> (chain-length, sum-delta-length)
558 # revnum -> (chain-length, sum-delta-length)
560 self._chaininfocache = {}
559 self._chaininfocache = {}
561 # revlog header -> revlog compressor
560 # revlog header -> revlog compressor
562 self._decompressors = {}
561 self._decompressors = {}
563
562
564 @util.propertycache
563 @util.propertycache
565 def _compressor(self):
564 def _compressor(self):
566 engine = util.compengines[self._compengine]
565 engine = util.compengines[self._compengine]
567 return engine.revlogcompressor(self._compengineopts)
566 return engine.revlogcompressor(self._compengineopts)
568
567
569 def _indexfp(self, mode=b'r'):
568 def _indexfp(self, mode=b'r'):
570 """file object for the revlog's index file"""
569 """file object for the revlog's index file"""
571 args = {'mode': mode}
570 args = {'mode': mode}
572 if mode != b'r':
571 if mode != b'r':
573 args['checkambig'] = self._checkambig
572 args['checkambig'] = self._checkambig
574 if mode == b'w':
573 if mode == b'w':
575 args['atomictemp'] = True
574 args['atomictemp'] = True
576 return self.opener(self.indexfile, **args)
575 return self.opener(self.indexfile, **args)
577
576
578 def _datafp(self, mode=b'r'):
577 def _datafp(self, mode=b'r'):
579 """file object for the revlog's data file"""
578 """file object for the revlog's data file"""
580 return self.opener(self.datafile, mode=mode)
579 return self.opener(self.datafile, mode=mode)
581
580
582 @contextlib.contextmanager
581 @contextlib.contextmanager
583 def _datareadfp(self, existingfp=None):
582 def _datareadfp(self, existingfp=None):
584 """file object suitable to read data"""
583 """file object suitable to read data"""
585 # Use explicit file handle, if given.
584 # Use explicit file handle, if given.
586 if existingfp is not None:
585 if existingfp is not None:
587 yield existingfp
586 yield existingfp
588
587
589 # Use a file handle being actively used for writes, if available.
588 # Use a file handle being actively used for writes, if available.
590 # There is some danger to doing this because reads will seek the
589 # There is some danger to doing this because reads will seek the
591 # file. However, _writeentry() performs a SEEK_END before all writes,
590 # file. However, _writeentry() performs a SEEK_END before all writes,
592 # so we should be safe.
591 # so we should be safe.
593 elif self._writinghandles:
592 elif self._writinghandles:
594 if self._inline:
593 if self._inline:
595 yield self._writinghandles[0]
594 yield self._writinghandles[0]
596 else:
595 else:
597 yield self._writinghandles[1]
596 yield self._writinghandles[1]
598
597
599 # Otherwise open a new file handle.
598 # Otherwise open a new file handle.
600 else:
599 else:
601 if self._inline:
600 if self._inline:
602 func = self._indexfp
601 func = self._indexfp
603 else:
602 else:
604 func = self._datafp
603 func = self._datafp
605 with func() as fp:
604 with func() as fp:
606 yield fp
605 yield fp
607
606
608 def tiprev(self):
607 def tiprev(self):
609 return len(self.index) - 1
608 return len(self.index) - 1
610
609
611 def tip(self):
610 def tip(self):
612 return self.node(self.tiprev())
611 return self.node(self.tiprev())
613
612
614 def __contains__(self, rev):
613 def __contains__(self, rev):
615 return 0 <= rev < len(self)
614 return 0 <= rev < len(self)
616
615
617 def __len__(self):
616 def __len__(self):
618 return len(self.index)
617 return len(self.index)
619
618
620 def __iter__(self):
619 def __iter__(self):
621 return iter(pycompat.xrange(len(self)))
620 return iter(pycompat.xrange(len(self)))
622
621
623 def revs(self, start=0, stop=None):
622 def revs(self, start=0, stop=None):
624 """iterate over all rev in this revlog (from start to stop)"""
623 """iterate over all rev in this revlog (from start to stop)"""
625 return storageutil.iterrevs(len(self), start=start, stop=stop)
624 return storageutil.iterrevs(len(self), start=start, stop=stop)
626
625
627 @util.propertycache
626 @util.propertycache
628 def nodemap(self):
627 def nodemap(self):
629 if self.index:
628 if self.index:
630 # populate mapping down to the initial node
629 # populate mapping down to the initial node
631 node0 = self.index[0][7] # get around changelog filtering
630 node0 = self.index[0][7] # get around changelog filtering
632 self.rev(node0)
631 self.rev(node0)
633 return self.index.nodemap
632 return self.index.nodemap
634
633
634 @property
635 def _nodecache(self):
636 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
637 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
638 return self.index.nodemap
639
635 def hasnode(self, node):
640 def hasnode(self, node):
636 try:
641 try:
637 self.rev(node)
642 self.rev(node)
638 return True
643 return True
639 except KeyError:
644 except KeyError:
640 return False
645 return False
641
646
642 def candelta(self, baserev, rev):
647 def candelta(self, baserev, rev):
643 """whether two revisions (baserev, rev) can be delta-ed or not"""
648 """whether two revisions (baserev, rev) can be delta-ed or not"""
644 # Disable delta if either rev requires a content-changing flag
649 # Disable delta if either rev requires a content-changing flag
645 # processor (ex. LFS). This is because such flag processor can alter
650 # processor (ex. LFS). This is because such flag processor can alter
646 # the rawtext content that the delta will be based on, and two clients
651 # the rawtext content that the delta will be based on, and two clients
647 # could have a same revlog node with different flags (i.e. different
652 # could have a same revlog node with different flags (i.e. different
648 # rawtext contents) and the delta could be incompatible.
653 # rawtext contents) and the delta could be incompatible.
649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
654 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
655 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
651 ):
656 ):
652 return False
657 return False
653 return True
658 return True
654
659
655 def clearcaches(self):
660 def clearcaches(self):
656 self._revisioncache = None
661 self._revisioncache = None
657 self._chainbasecache.clear()
662 self._chainbasecache.clear()
658 self._chunkcache = (0, b'')
663 self._chunkcache = (0, b'')
659 self._pcache = {}
664 self._pcache = {}
660 self.index.clearcaches()
665 self.index.clearcaches()
661
666
662 def rev(self, node):
667 def rev(self, node):
663 try:
668 try:
664 return self.index.nodemap[node]
669 return self.index.nodemap[node]
665 except TypeError:
670 except TypeError:
666 raise
671 raise
667 except error.RevlogError:
672 except error.RevlogError:
668 # parsers.c radix tree lookup failed
673 # parsers.c radix tree lookup failed
669 if node == wdirid or node in wdirfilenodeids:
674 if node == wdirid or node in wdirfilenodeids:
670 raise error.WdirUnsupported
675 raise error.WdirUnsupported
671 raise error.LookupError(node, self.indexfile, _(b'no node'))
676 raise error.LookupError(node, self.indexfile, _(b'no node'))
672
677
673 # Accessors for index entries.
678 # Accessors for index entries.
674
679
675 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
680 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
676 # are flags.
681 # are flags.
677 def start(self, rev):
682 def start(self, rev):
678 return int(self.index[rev][0] >> 16)
683 return int(self.index[rev][0] >> 16)
679
684
680 def flags(self, rev):
685 def flags(self, rev):
681 return self.index[rev][0] & 0xFFFF
686 return self.index[rev][0] & 0xFFFF
682
687
683 def length(self, rev):
688 def length(self, rev):
684 return self.index[rev][1]
689 return self.index[rev][1]
685
690
686 def rawsize(self, rev):
691 def rawsize(self, rev):
687 """return the length of the uncompressed text for a given revision"""
692 """return the length of the uncompressed text for a given revision"""
688 l = self.index[rev][2]
693 l = self.index[rev][2]
689 if l >= 0:
694 if l >= 0:
690 return l
695 return l
691
696
692 t = self.rawdata(rev)
697 t = self.rawdata(rev)
693 return len(t)
698 return len(t)
694
699
695 def size(self, rev):
700 def size(self, rev):
696 """length of non-raw text (processed by a "read" flag processor)"""
701 """length of non-raw text (processed by a "read" flag processor)"""
697 # fast path: if no "read" flag processor could change the content,
702 # fast path: if no "read" flag processor could change the content,
698 # size is rawsize. note: ELLIPSIS is known to not change the content.
703 # size is rawsize. note: ELLIPSIS is known to not change the content.
699 flags = self.flags(rev)
704 flags = self.flags(rev)
700 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
705 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
701 return self.rawsize(rev)
706 return self.rawsize(rev)
702
707
703 return len(self.revision(rev, raw=False))
708 return len(self.revision(rev, raw=False))
704
709
705 def chainbase(self, rev):
710 def chainbase(self, rev):
706 base = self._chainbasecache.get(rev)
711 base = self._chainbasecache.get(rev)
707 if base is not None:
712 if base is not None:
708 return base
713 return base
709
714
710 index = self.index
715 index = self.index
711 iterrev = rev
716 iterrev = rev
712 base = index[iterrev][3]
717 base = index[iterrev][3]
713 while base != iterrev:
718 while base != iterrev:
714 iterrev = base
719 iterrev = base
715 base = index[iterrev][3]
720 base = index[iterrev][3]
716
721
717 self._chainbasecache[rev] = base
722 self._chainbasecache[rev] = base
718 return base
723 return base
719
724
720 def linkrev(self, rev):
725 def linkrev(self, rev):
721 return self.index[rev][4]
726 return self.index[rev][4]
722
727
723 def parentrevs(self, rev):
728 def parentrevs(self, rev):
724 try:
729 try:
725 entry = self.index[rev]
730 entry = self.index[rev]
726 except IndexError:
731 except IndexError:
727 if rev == wdirrev:
732 if rev == wdirrev:
728 raise error.WdirUnsupported
733 raise error.WdirUnsupported
729 raise
734 raise
730
735
731 return entry[5], entry[6]
736 return entry[5], entry[6]
732
737
733 # fast parentrevs(rev) where rev isn't filtered
738 # fast parentrevs(rev) where rev isn't filtered
734 _uncheckedparentrevs = parentrevs
739 _uncheckedparentrevs = parentrevs
735
740
736 def node(self, rev):
741 def node(self, rev):
737 try:
742 try:
738 return self.index[rev][7]
743 return self.index[rev][7]
739 except IndexError:
744 except IndexError:
740 if rev == wdirrev:
745 if rev == wdirrev:
741 raise error.WdirUnsupported
746 raise error.WdirUnsupported
742 raise
747 raise
743
748
744 # Derived from index values.
749 # Derived from index values.
745
750
746 def end(self, rev):
751 def end(self, rev):
747 return self.start(rev) + self.length(rev)
752 return self.start(rev) + self.length(rev)
748
753
749 def parents(self, node):
754 def parents(self, node):
750 i = self.index
755 i = self.index
751 d = i[self.rev(node)]
756 d = i[self.rev(node)]
752 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
757 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
753
758
754 def chainlen(self, rev):
759 def chainlen(self, rev):
755 return self._chaininfo(rev)[0]
760 return self._chaininfo(rev)[0]
756
761
757 def _chaininfo(self, rev):
762 def _chaininfo(self, rev):
758 chaininfocache = self._chaininfocache
763 chaininfocache = self._chaininfocache
759 if rev in chaininfocache:
764 if rev in chaininfocache:
760 return chaininfocache[rev]
765 return chaininfocache[rev]
761 index = self.index
766 index = self.index
762 generaldelta = self._generaldelta
767 generaldelta = self._generaldelta
763 iterrev = rev
768 iterrev = rev
764 e = index[iterrev]
769 e = index[iterrev]
765 clen = 0
770 clen = 0
766 compresseddeltalen = 0
771 compresseddeltalen = 0
767 while iterrev != e[3]:
772 while iterrev != e[3]:
768 clen += 1
773 clen += 1
769 compresseddeltalen += e[1]
774 compresseddeltalen += e[1]
770 if generaldelta:
775 if generaldelta:
771 iterrev = e[3]
776 iterrev = e[3]
772 else:
777 else:
773 iterrev -= 1
778 iterrev -= 1
774 if iterrev in chaininfocache:
779 if iterrev in chaininfocache:
775 t = chaininfocache[iterrev]
780 t = chaininfocache[iterrev]
776 clen += t[0]
781 clen += t[0]
777 compresseddeltalen += t[1]
782 compresseddeltalen += t[1]
778 break
783 break
779 e = index[iterrev]
784 e = index[iterrev]
780 else:
785 else:
781 # Add text length of base since decompressing that also takes
786 # Add text length of base since decompressing that also takes
782 # work. For cache hits the length is already included.
787 # work. For cache hits the length is already included.
783 compresseddeltalen += e[1]
788 compresseddeltalen += e[1]
784 r = (clen, compresseddeltalen)
789 r = (clen, compresseddeltalen)
785 chaininfocache[rev] = r
790 chaininfocache[rev] = r
786 return r
791 return r
787
792
788 def _deltachain(self, rev, stoprev=None):
793 def _deltachain(self, rev, stoprev=None):
789 """Obtain the delta chain for a revision.
794 """Obtain the delta chain for a revision.
790
795
791 ``stoprev`` specifies a revision to stop at. If not specified, we
796 ``stoprev`` specifies a revision to stop at. If not specified, we
792 stop at the base of the chain.
797 stop at the base of the chain.
793
798
794 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
799 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
795 revs in ascending order and ``stopped`` is a bool indicating whether
800 revs in ascending order and ``stopped`` is a bool indicating whether
796 ``stoprev`` was hit.
801 ``stoprev`` was hit.
797 """
802 """
798 # Try C implementation.
803 # Try C implementation.
799 try:
804 try:
800 return self.index.deltachain(rev, stoprev, self._generaldelta)
805 return self.index.deltachain(rev, stoprev, self._generaldelta)
801 except AttributeError:
806 except AttributeError:
802 pass
807 pass
803
808
804 chain = []
809 chain = []
805
810
806 # Alias to prevent attribute lookup in tight loop.
811 # Alias to prevent attribute lookup in tight loop.
807 index = self.index
812 index = self.index
808 generaldelta = self._generaldelta
813 generaldelta = self._generaldelta
809
814
810 iterrev = rev
815 iterrev = rev
811 e = index[iterrev]
816 e = index[iterrev]
812 while iterrev != e[3] and iterrev != stoprev:
817 while iterrev != e[3] and iterrev != stoprev:
813 chain.append(iterrev)
818 chain.append(iterrev)
814 if generaldelta:
819 if generaldelta:
815 iterrev = e[3]
820 iterrev = e[3]
816 else:
821 else:
817 iterrev -= 1
822 iterrev -= 1
818 e = index[iterrev]
823 e = index[iterrev]
819
824
820 if iterrev == stoprev:
825 if iterrev == stoprev:
821 stopped = True
826 stopped = True
822 else:
827 else:
823 chain.append(iterrev)
828 chain.append(iterrev)
824 stopped = False
829 stopped = False
825
830
826 chain.reverse()
831 chain.reverse()
827 return chain, stopped
832 return chain, stopped
828
833
829 def ancestors(self, revs, stoprev=0, inclusive=False):
834 def ancestors(self, revs, stoprev=0, inclusive=False):
830 """Generate the ancestors of 'revs' in reverse revision order.
835 """Generate the ancestors of 'revs' in reverse revision order.
831 Does not generate revs lower than stoprev.
836 Does not generate revs lower than stoprev.
832
837
833 See the documentation for ancestor.lazyancestors for more details."""
838 See the documentation for ancestor.lazyancestors for more details."""
834
839
835 # first, make sure start revisions aren't filtered
840 # first, make sure start revisions aren't filtered
836 revs = list(revs)
841 revs = list(revs)
837 checkrev = self.node
842 checkrev = self.node
838 for r in revs:
843 for r in revs:
839 checkrev(r)
844 checkrev(r)
840 # and we're sure ancestors aren't filtered as well
845 # and we're sure ancestors aren't filtered as well
841
846
842 if rustancestor is not None:
847 if rustancestor is not None:
843 lazyancestors = rustancestor.LazyAncestors
848 lazyancestors = rustancestor.LazyAncestors
844 arg = self.index
849 arg = self.index
845 elif util.safehasattr(parsers, b'rustlazyancestors'):
850 elif util.safehasattr(parsers, b'rustlazyancestors'):
846 lazyancestors = ancestor.rustlazyancestors
851 lazyancestors = ancestor.rustlazyancestors
847 arg = self.index
852 arg = self.index
848 else:
853 else:
849 lazyancestors = ancestor.lazyancestors
854 lazyancestors = ancestor.lazyancestors
850 arg = self._uncheckedparentrevs
855 arg = self._uncheckedparentrevs
851 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
856 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
852
857
853 def descendants(self, revs):
858 def descendants(self, revs):
854 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
859 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
855
860
856 def findcommonmissing(self, common=None, heads=None):
861 def findcommonmissing(self, common=None, heads=None):
857 """Return a tuple of the ancestors of common and the ancestors of heads
862 """Return a tuple of the ancestors of common and the ancestors of heads
858 that are not ancestors of common. In revset terminology, we return the
863 that are not ancestors of common. In revset terminology, we return the
859 tuple:
864 tuple:
860
865
861 ::common, (::heads) - (::common)
866 ::common, (::heads) - (::common)
862
867
863 The list is sorted by revision number, meaning it is
868 The list is sorted by revision number, meaning it is
864 topologically sorted.
869 topologically sorted.
865
870
866 'heads' and 'common' are both lists of node IDs. If heads is
871 'heads' and 'common' are both lists of node IDs. If heads is
867 not supplied, uses all of the revlog's heads. If common is not
872 not supplied, uses all of the revlog's heads. If common is not
868 supplied, uses nullid."""
873 supplied, uses nullid."""
869 if common is None:
874 if common is None:
870 common = [nullid]
875 common = [nullid]
871 if heads is None:
876 if heads is None:
872 heads = self.heads()
877 heads = self.heads()
873
878
874 common = [self.rev(n) for n in common]
879 common = [self.rev(n) for n in common]
875 heads = [self.rev(n) for n in heads]
880 heads = [self.rev(n) for n in heads]
876
881
877 # we want the ancestors, but inclusive
882 # we want the ancestors, but inclusive
878 class lazyset(object):
883 class lazyset(object):
879 def __init__(self, lazyvalues):
884 def __init__(self, lazyvalues):
880 self.addedvalues = set()
885 self.addedvalues = set()
881 self.lazyvalues = lazyvalues
886 self.lazyvalues = lazyvalues
882
887
883 def __contains__(self, value):
888 def __contains__(self, value):
884 return value in self.addedvalues or value in self.lazyvalues
889 return value in self.addedvalues or value in self.lazyvalues
885
890
886 def __iter__(self):
891 def __iter__(self):
887 added = self.addedvalues
892 added = self.addedvalues
888 for r in added:
893 for r in added:
889 yield r
894 yield r
890 for r in self.lazyvalues:
895 for r in self.lazyvalues:
891 if not r in added:
896 if not r in added:
892 yield r
897 yield r
893
898
894 def add(self, value):
899 def add(self, value):
895 self.addedvalues.add(value)
900 self.addedvalues.add(value)
896
901
897 def update(self, values):
902 def update(self, values):
898 self.addedvalues.update(values)
903 self.addedvalues.update(values)
899
904
900 has = lazyset(self.ancestors(common))
905 has = lazyset(self.ancestors(common))
901 has.add(nullrev)
906 has.add(nullrev)
902 has.update(common)
907 has.update(common)
903
908
904 # take all ancestors from heads that aren't in has
909 # take all ancestors from heads that aren't in has
905 missing = set()
910 missing = set()
906 visit = collections.deque(r for r in heads if r not in has)
911 visit = collections.deque(r for r in heads if r not in has)
907 while visit:
912 while visit:
908 r = visit.popleft()
913 r = visit.popleft()
909 if r in missing:
914 if r in missing:
910 continue
915 continue
911 else:
916 else:
912 missing.add(r)
917 missing.add(r)
913 for p in self.parentrevs(r):
918 for p in self.parentrevs(r):
914 if p not in has:
919 if p not in has:
915 visit.append(p)
920 visit.append(p)
916 missing = list(missing)
921 missing = list(missing)
917 missing.sort()
922 missing.sort()
918 return has, [self.node(miss) for miss in missing]
923 return has, [self.node(miss) for miss in missing]
919
924
920 def incrementalmissingrevs(self, common=None):
925 def incrementalmissingrevs(self, common=None):
921 """Return an object that can be used to incrementally compute the
926 """Return an object that can be used to incrementally compute the
922 revision numbers of the ancestors of arbitrary sets that are not
927 revision numbers of the ancestors of arbitrary sets that are not
923 ancestors of common. This is an ancestor.incrementalmissingancestors
928 ancestors of common. This is an ancestor.incrementalmissingancestors
924 object.
929 object.
925
930
926 'common' is a list of revision numbers. If common is not supplied, uses
931 'common' is a list of revision numbers. If common is not supplied, uses
927 nullrev.
932 nullrev.
928 """
933 """
929 if common is None:
934 if common is None:
930 common = [nullrev]
935 common = [nullrev]
931
936
932 if rustancestor is not None:
937 if rustancestor is not None:
933 return rustancestor.MissingAncestors(self.index, common)
938 return rustancestor.MissingAncestors(self.index, common)
934 return ancestor.incrementalmissingancestors(self.parentrevs, common)
939 return ancestor.incrementalmissingancestors(self.parentrevs, common)
935
940
936 def findmissingrevs(self, common=None, heads=None):
941 def findmissingrevs(self, common=None, heads=None):
937 """Return the revision numbers of the ancestors of heads that
942 """Return the revision numbers of the ancestors of heads that
938 are not ancestors of common.
943 are not ancestors of common.
939
944
940 More specifically, return a list of revision numbers corresponding to
945 More specifically, return a list of revision numbers corresponding to
941 nodes N such that every N satisfies the following constraints:
946 nodes N such that every N satisfies the following constraints:
942
947
943 1. N is an ancestor of some node in 'heads'
948 1. N is an ancestor of some node in 'heads'
944 2. N is not an ancestor of any node in 'common'
949 2. N is not an ancestor of any node in 'common'
945
950
946 The list is sorted by revision number, meaning it is
951 The list is sorted by revision number, meaning it is
947 topologically sorted.
952 topologically sorted.
948
953
949 'heads' and 'common' are both lists of revision numbers. If heads is
954 'heads' and 'common' are both lists of revision numbers. If heads is
950 not supplied, uses all of the revlog's heads. If common is not
955 not supplied, uses all of the revlog's heads. If common is not
951 supplied, uses nullid."""
956 supplied, uses nullid."""
952 if common is None:
957 if common is None:
953 common = [nullrev]
958 common = [nullrev]
954 if heads is None:
959 if heads is None:
955 heads = self.headrevs()
960 heads = self.headrevs()
956
961
957 inc = self.incrementalmissingrevs(common=common)
962 inc = self.incrementalmissingrevs(common=common)
958 return inc.missingancestors(heads)
963 return inc.missingancestors(heads)
959
964
960 def findmissing(self, common=None, heads=None):
965 def findmissing(self, common=None, heads=None):
961 """Return the ancestors of heads that are not ancestors of common.
966 """Return the ancestors of heads that are not ancestors of common.
962
967
963 More specifically, return a list of nodes N such that every N
968 More specifically, return a list of nodes N such that every N
964 satisfies the following constraints:
969 satisfies the following constraints:
965
970
966 1. N is an ancestor of some node in 'heads'
971 1. N is an ancestor of some node in 'heads'
967 2. N is not an ancestor of any node in 'common'
972 2. N is not an ancestor of any node in 'common'
968
973
969 The list is sorted by revision number, meaning it is
974 The list is sorted by revision number, meaning it is
970 topologically sorted.
975 topologically sorted.
971
976
972 'heads' and 'common' are both lists of node IDs. If heads is
977 'heads' and 'common' are both lists of node IDs. If heads is
973 not supplied, uses all of the revlog's heads. If common is not
978 not supplied, uses all of the revlog's heads. If common is not
974 supplied, uses nullid."""
979 supplied, uses nullid."""
975 if common is None:
980 if common is None:
976 common = [nullid]
981 common = [nullid]
977 if heads is None:
982 if heads is None:
978 heads = self.heads()
983 heads = self.heads()
979
984
980 common = [self.rev(n) for n in common]
985 common = [self.rev(n) for n in common]
981 heads = [self.rev(n) for n in heads]
986 heads = [self.rev(n) for n in heads]
982
987
983 inc = self.incrementalmissingrevs(common=common)
988 inc = self.incrementalmissingrevs(common=common)
984 return [self.node(r) for r in inc.missingancestors(heads)]
989 return [self.node(r) for r in inc.missingancestors(heads)]
985
990
986 def nodesbetween(self, roots=None, heads=None):
991 def nodesbetween(self, roots=None, heads=None):
987 """Return a topological path from 'roots' to 'heads'.
992 """Return a topological path from 'roots' to 'heads'.
988
993
989 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
994 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
990 topologically sorted list of all nodes N that satisfy both of
995 topologically sorted list of all nodes N that satisfy both of
991 these constraints:
996 these constraints:
992
997
993 1. N is a descendant of some node in 'roots'
998 1. N is a descendant of some node in 'roots'
994 2. N is an ancestor of some node in 'heads'
999 2. N is an ancestor of some node in 'heads'
995
1000
996 Every node is considered to be both a descendant and an ancestor
1001 Every node is considered to be both a descendant and an ancestor
997 of itself, so every reachable node in 'roots' and 'heads' will be
1002 of itself, so every reachable node in 'roots' and 'heads' will be
998 included in 'nodes'.
1003 included in 'nodes'.
999
1004
1000 'outroots' is the list of reachable nodes in 'roots', i.e., the
1005 'outroots' is the list of reachable nodes in 'roots', i.e., the
1001 subset of 'roots' that is returned in 'nodes'. Likewise,
1006 subset of 'roots' that is returned in 'nodes'. Likewise,
1002 'outheads' is the subset of 'heads' that is also in 'nodes'.
1007 'outheads' is the subset of 'heads' that is also in 'nodes'.
1003
1008
1004 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1009 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1005 unspecified, uses nullid as the only root. If 'heads' is
1010 unspecified, uses nullid as the only root. If 'heads' is
1006 unspecified, uses list of all of the revlog's heads."""
1011 unspecified, uses list of all of the revlog's heads."""
1007 nonodes = ([], [], [])
1012 nonodes = ([], [], [])
1008 if roots is not None:
1013 if roots is not None:
1009 roots = list(roots)
1014 roots = list(roots)
1010 if not roots:
1015 if not roots:
1011 return nonodes
1016 return nonodes
1012 lowestrev = min([self.rev(n) for n in roots])
1017 lowestrev = min([self.rev(n) for n in roots])
1013 else:
1018 else:
1014 roots = [nullid] # Everybody's a descendant of nullid
1019 roots = [nullid] # Everybody's a descendant of nullid
1015 lowestrev = nullrev
1020 lowestrev = nullrev
1016 if (lowestrev == nullrev) and (heads is None):
1021 if (lowestrev == nullrev) and (heads is None):
1017 # We want _all_ the nodes!
1022 # We want _all_ the nodes!
1018 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1023 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1019 if heads is None:
1024 if heads is None:
1020 # All nodes are ancestors, so the latest ancestor is the last
1025 # All nodes are ancestors, so the latest ancestor is the last
1021 # node.
1026 # node.
1022 highestrev = len(self) - 1
1027 highestrev = len(self) - 1
1023 # Set ancestors to None to signal that every node is an ancestor.
1028 # Set ancestors to None to signal that every node is an ancestor.
1024 ancestors = None
1029 ancestors = None
1025 # Set heads to an empty dictionary for later discovery of heads
1030 # Set heads to an empty dictionary for later discovery of heads
1026 heads = {}
1031 heads = {}
1027 else:
1032 else:
1028 heads = list(heads)
1033 heads = list(heads)
1029 if not heads:
1034 if not heads:
1030 return nonodes
1035 return nonodes
1031 ancestors = set()
1036 ancestors = set()
1032 # Turn heads into a dictionary so we can remove 'fake' heads.
1037 # Turn heads into a dictionary so we can remove 'fake' heads.
1033 # Also, later we will be using it to filter out the heads we can't
1038 # Also, later we will be using it to filter out the heads we can't
1034 # find from roots.
1039 # find from roots.
1035 heads = dict.fromkeys(heads, False)
1040 heads = dict.fromkeys(heads, False)
1036 # Start at the top and keep marking parents until we're done.
1041 # Start at the top and keep marking parents until we're done.
1037 nodestotag = set(heads)
1042 nodestotag = set(heads)
1038 # Remember where the top was so we can use it as a limit later.
1043 # Remember where the top was so we can use it as a limit later.
1039 highestrev = max([self.rev(n) for n in nodestotag])
1044 highestrev = max([self.rev(n) for n in nodestotag])
1040 while nodestotag:
1045 while nodestotag:
1041 # grab a node to tag
1046 # grab a node to tag
1042 n = nodestotag.pop()
1047 n = nodestotag.pop()
1043 # Never tag nullid
1048 # Never tag nullid
1044 if n == nullid:
1049 if n == nullid:
1045 continue
1050 continue
1046 # A node's revision number represents its place in a
1051 # A node's revision number represents its place in a
1047 # topologically sorted list of nodes.
1052 # topologically sorted list of nodes.
1048 r = self.rev(n)
1053 r = self.rev(n)
1049 if r >= lowestrev:
1054 if r >= lowestrev:
1050 if n not in ancestors:
1055 if n not in ancestors:
1051 # If we are possibly a descendant of one of the roots
1056 # If we are possibly a descendant of one of the roots
1052 # and we haven't already been marked as an ancestor
1057 # and we haven't already been marked as an ancestor
1053 ancestors.add(n) # Mark as ancestor
1058 ancestors.add(n) # Mark as ancestor
1054 # Add non-nullid parents to list of nodes to tag.
1059 # Add non-nullid parents to list of nodes to tag.
1055 nodestotag.update(
1060 nodestotag.update(
1056 [p for p in self.parents(n) if p != nullid]
1061 [p for p in self.parents(n) if p != nullid]
1057 )
1062 )
1058 elif n in heads: # We've seen it before, is it a fake head?
1063 elif n in heads: # We've seen it before, is it a fake head?
1059 # So it is, real heads should not be the ancestors of
1064 # So it is, real heads should not be the ancestors of
1060 # any other heads.
1065 # any other heads.
1061 heads.pop(n)
1066 heads.pop(n)
1062 if not ancestors:
1067 if not ancestors:
1063 return nonodes
1068 return nonodes
1064 # Now that we have our set of ancestors, we want to remove any
1069 # Now that we have our set of ancestors, we want to remove any
1065 # roots that are not ancestors.
1070 # roots that are not ancestors.
1066
1071
1067 # If one of the roots was nullid, everything is included anyway.
1072 # If one of the roots was nullid, everything is included anyway.
1068 if lowestrev > nullrev:
1073 if lowestrev > nullrev:
1069 # But, since we weren't, let's recompute the lowest rev to not
1074 # But, since we weren't, let's recompute the lowest rev to not
1070 # include roots that aren't ancestors.
1075 # include roots that aren't ancestors.
1071
1076
1072 # Filter out roots that aren't ancestors of heads
1077 # Filter out roots that aren't ancestors of heads
1073 roots = [root for root in roots if root in ancestors]
1078 roots = [root for root in roots if root in ancestors]
1074 # Recompute the lowest revision
1079 # Recompute the lowest revision
1075 if roots:
1080 if roots:
1076 lowestrev = min([self.rev(root) for root in roots])
1081 lowestrev = min([self.rev(root) for root in roots])
1077 else:
1082 else:
1078 # No more roots? Return empty list
1083 # No more roots? Return empty list
1079 return nonodes
1084 return nonodes
1080 else:
1085 else:
1081 # We are descending from nullid, and don't need to care about
1086 # We are descending from nullid, and don't need to care about
1082 # any other roots.
1087 # any other roots.
1083 lowestrev = nullrev
1088 lowestrev = nullrev
1084 roots = [nullid]
1089 roots = [nullid]
1085 # Transform our roots list into a set.
1090 # Transform our roots list into a set.
1086 descendants = set(roots)
1091 descendants = set(roots)
1087 # Also, keep the original roots so we can filter out roots that aren't
1092 # Also, keep the original roots so we can filter out roots that aren't
1088 # 'real' roots (i.e. are descended from other roots).
1093 # 'real' roots (i.e. are descended from other roots).
1089 roots = descendants.copy()
1094 roots = descendants.copy()
1090 # Our topologically sorted list of output nodes.
1095 # Our topologically sorted list of output nodes.
1091 orderedout = []
1096 orderedout = []
1092 # Don't start at nullid since we don't want nullid in our output list,
1097 # Don't start at nullid since we don't want nullid in our output list,
1093 # and if nullid shows up in descendants, empty parents will look like
1098 # and if nullid shows up in descendants, empty parents will look like
1094 # they're descendants.
1099 # they're descendants.
1095 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1100 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1096 n = self.node(r)
1101 n = self.node(r)
1097 isdescendant = False
1102 isdescendant = False
1098 if lowestrev == nullrev: # Everybody is a descendant of nullid
1103 if lowestrev == nullrev: # Everybody is a descendant of nullid
1099 isdescendant = True
1104 isdescendant = True
1100 elif n in descendants:
1105 elif n in descendants:
1101 # n is already a descendant
1106 # n is already a descendant
1102 isdescendant = True
1107 isdescendant = True
1103 # This check only needs to be done here because all the roots
1108 # This check only needs to be done here because all the roots
1104 # will start being marked is descendants before the loop.
1109 # will start being marked is descendants before the loop.
1105 if n in roots:
1110 if n in roots:
1106 # If n was a root, check if it's a 'real' root.
1111 # If n was a root, check if it's a 'real' root.
1107 p = tuple(self.parents(n))
1112 p = tuple(self.parents(n))
1108 # If any of its parents are descendants, it's not a root.
1113 # If any of its parents are descendants, it's not a root.
1109 if (p[0] in descendants) or (p[1] in descendants):
1114 if (p[0] in descendants) or (p[1] in descendants):
1110 roots.remove(n)
1115 roots.remove(n)
1111 else:
1116 else:
1112 p = tuple(self.parents(n))
1117 p = tuple(self.parents(n))
1113 # A node is a descendant if either of its parents are
1118 # A node is a descendant if either of its parents are
1114 # descendants. (We seeded the dependents list with the roots
1119 # descendants. (We seeded the dependents list with the roots
1115 # up there, remember?)
1120 # up there, remember?)
1116 if (p[0] in descendants) or (p[1] in descendants):
1121 if (p[0] in descendants) or (p[1] in descendants):
1117 descendants.add(n)
1122 descendants.add(n)
1118 isdescendant = True
1123 isdescendant = True
1119 if isdescendant and ((ancestors is None) or (n in ancestors)):
1124 if isdescendant and ((ancestors is None) or (n in ancestors)):
1120 # Only include nodes that are both descendants and ancestors.
1125 # Only include nodes that are both descendants and ancestors.
1121 orderedout.append(n)
1126 orderedout.append(n)
1122 if (ancestors is not None) and (n in heads):
1127 if (ancestors is not None) and (n in heads):
1123 # We're trying to figure out which heads are reachable
1128 # We're trying to figure out which heads are reachable
1124 # from roots.
1129 # from roots.
1125 # Mark this head as having been reached
1130 # Mark this head as having been reached
1126 heads[n] = True
1131 heads[n] = True
1127 elif ancestors is None:
1132 elif ancestors is None:
1128 # Otherwise, we're trying to discover the heads.
1133 # Otherwise, we're trying to discover the heads.
1129 # Assume this is a head because if it isn't, the next step
1134 # Assume this is a head because if it isn't, the next step
1130 # will eventually remove it.
1135 # will eventually remove it.
1131 heads[n] = True
1136 heads[n] = True
1132 # But, obviously its parents aren't.
1137 # But, obviously its parents aren't.
1133 for p in self.parents(n):
1138 for p in self.parents(n):
1134 heads.pop(p, None)
1139 heads.pop(p, None)
1135 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1140 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1136 roots = list(roots)
1141 roots = list(roots)
1137 assert orderedout
1142 assert orderedout
1138 assert roots
1143 assert roots
1139 assert heads
1144 assert heads
1140 return (orderedout, roots, heads)
1145 return (orderedout, roots, heads)
1141
1146
1142 def headrevs(self, revs=None):
1147 def headrevs(self, revs=None):
1143 if revs is None:
1148 if revs is None:
1144 try:
1149 try:
1145 return self.index.headrevs()
1150 return self.index.headrevs()
1146 except AttributeError:
1151 except AttributeError:
1147 return self._headrevs()
1152 return self._headrevs()
1148 if rustdagop is not None:
1153 if rustdagop is not None:
1149 return rustdagop.headrevs(self.index, revs)
1154 return rustdagop.headrevs(self.index, revs)
1150 return dagop.headrevs(revs, self._uncheckedparentrevs)
1155 return dagop.headrevs(revs, self._uncheckedparentrevs)
1151
1156
1152 def computephases(self, roots):
1157 def computephases(self, roots):
1153 return self.index.computephasesmapsets(roots)
1158 return self.index.computephasesmapsets(roots)
1154
1159
1155 def _headrevs(self):
1160 def _headrevs(self):
1156 count = len(self)
1161 count = len(self)
1157 if not count:
1162 if not count:
1158 return [nullrev]
1163 return [nullrev]
1159 # we won't iter over filtered rev so nobody is a head at start
1164 # we won't iter over filtered rev so nobody is a head at start
1160 ishead = [0] * (count + 1)
1165 ishead = [0] * (count + 1)
1161 index = self.index
1166 index = self.index
1162 for r in self:
1167 for r in self:
1163 ishead[r] = 1 # I may be an head
1168 ishead[r] = 1 # I may be an head
1164 e = index[r]
1169 e = index[r]
1165 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1170 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1166 return [r for r, val in enumerate(ishead) if val]
1171 return [r for r, val in enumerate(ishead) if val]
1167
1172
1168 def heads(self, start=None, stop=None):
1173 def heads(self, start=None, stop=None):
1169 """return the list of all nodes that have no children
1174 """return the list of all nodes that have no children
1170
1175
1171 if start is specified, only heads that are descendants of
1176 if start is specified, only heads that are descendants of
1172 start will be returned
1177 start will be returned
1173 if stop is specified, it will consider all the revs from stop
1178 if stop is specified, it will consider all the revs from stop
1174 as if they had no children
1179 as if they had no children
1175 """
1180 """
1176 if start is None and stop is None:
1181 if start is None and stop is None:
1177 if not len(self):
1182 if not len(self):
1178 return [nullid]
1183 return [nullid]
1179 return [self.node(r) for r in self.headrevs()]
1184 return [self.node(r) for r in self.headrevs()]
1180
1185
1181 if start is None:
1186 if start is None:
1182 start = nullrev
1187 start = nullrev
1183 else:
1188 else:
1184 start = self.rev(start)
1189 start = self.rev(start)
1185
1190
1186 stoprevs = set(self.rev(n) for n in stop or [])
1191 stoprevs = set(self.rev(n) for n in stop or [])
1187
1192
1188 revs = dagop.headrevssubset(
1193 revs = dagop.headrevssubset(
1189 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1194 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1190 )
1195 )
1191
1196
1192 return [self.node(rev) for rev in revs]
1197 return [self.node(rev) for rev in revs]
1193
1198
1194 def children(self, node):
1199 def children(self, node):
1195 """find the children of a given node"""
1200 """find the children of a given node"""
1196 c = []
1201 c = []
1197 p = self.rev(node)
1202 p = self.rev(node)
1198 for r in self.revs(start=p + 1):
1203 for r in self.revs(start=p + 1):
1199 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1204 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1200 if prevs:
1205 if prevs:
1201 for pr in prevs:
1206 for pr in prevs:
1202 if pr == p:
1207 if pr == p:
1203 c.append(self.node(r))
1208 c.append(self.node(r))
1204 elif p == nullrev:
1209 elif p == nullrev:
1205 c.append(self.node(r))
1210 c.append(self.node(r))
1206 return c
1211 return c
1207
1212
1208 def commonancestorsheads(self, a, b):
1213 def commonancestorsheads(self, a, b):
1209 """calculate all the heads of the common ancestors of nodes a and b"""
1214 """calculate all the heads of the common ancestors of nodes a and b"""
1210 a, b = self.rev(a), self.rev(b)
1215 a, b = self.rev(a), self.rev(b)
1211 ancs = self._commonancestorsheads(a, b)
1216 ancs = self._commonancestorsheads(a, b)
1212 return pycompat.maplist(self.node, ancs)
1217 return pycompat.maplist(self.node, ancs)
1213
1218
1214 def _commonancestorsheads(self, *revs):
1219 def _commonancestorsheads(self, *revs):
1215 """calculate all the heads of the common ancestors of revs"""
1220 """calculate all the heads of the common ancestors of revs"""
1216 try:
1221 try:
1217 ancs = self.index.commonancestorsheads(*revs)
1222 ancs = self.index.commonancestorsheads(*revs)
1218 except (AttributeError, OverflowError): # C implementation failed
1223 except (AttributeError, OverflowError): # C implementation failed
1219 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1224 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1220 return ancs
1225 return ancs
1221
1226
1222 def isancestor(self, a, b):
1227 def isancestor(self, a, b):
1223 """return True if node a is an ancestor of node b
1228 """return True if node a is an ancestor of node b
1224
1229
1225 A revision is considered an ancestor of itself."""
1230 A revision is considered an ancestor of itself."""
1226 a, b = self.rev(a), self.rev(b)
1231 a, b = self.rev(a), self.rev(b)
1227 return self.isancestorrev(a, b)
1232 return self.isancestorrev(a, b)
1228
1233
1229 def isancestorrev(self, a, b):
1234 def isancestorrev(self, a, b):
1230 """return True if revision a is an ancestor of revision b
1235 """return True if revision a is an ancestor of revision b
1231
1236
1232 A revision is considered an ancestor of itself.
1237 A revision is considered an ancestor of itself.
1233
1238
1234 The implementation of this is trivial but the use of
1239 The implementation of this is trivial but the use of
1235 reachableroots is not."""
1240 reachableroots is not."""
1236 if a == nullrev:
1241 if a == nullrev:
1237 return True
1242 return True
1238 elif a == b:
1243 elif a == b:
1239 return True
1244 return True
1240 elif a > b:
1245 elif a > b:
1241 return False
1246 return False
1242 return bool(self.reachableroots(a, [b], [a], includepath=False))
1247 return bool(self.reachableroots(a, [b], [a], includepath=False))
1243
1248
1244 def reachableroots(self, minroot, heads, roots, includepath=False):
1249 def reachableroots(self, minroot, heads, roots, includepath=False):
1245 """return (heads(::<roots> and <roots>::<heads>))
1250 """return (heads(::<roots> and <roots>::<heads>))
1246
1251
1247 If includepath is True, return (<roots>::<heads>)."""
1252 If includepath is True, return (<roots>::<heads>)."""
1248 try:
1253 try:
1249 return self.index.reachableroots2(
1254 return self.index.reachableroots2(
1250 minroot, heads, roots, includepath
1255 minroot, heads, roots, includepath
1251 )
1256 )
1252 except AttributeError:
1257 except AttributeError:
1253 return dagop._reachablerootspure(
1258 return dagop._reachablerootspure(
1254 self.parentrevs, minroot, roots, heads, includepath
1259 self.parentrevs, minroot, roots, heads, includepath
1255 )
1260 )
1256
1261
1257 def ancestor(self, a, b):
1262 def ancestor(self, a, b):
1258 """calculate the "best" common ancestor of nodes a and b"""
1263 """calculate the "best" common ancestor of nodes a and b"""
1259
1264
1260 a, b = self.rev(a), self.rev(b)
1265 a, b = self.rev(a), self.rev(b)
1261 try:
1266 try:
1262 ancs = self.index.ancestors(a, b)
1267 ancs = self.index.ancestors(a, b)
1263 except (AttributeError, OverflowError):
1268 except (AttributeError, OverflowError):
1264 ancs = ancestor.ancestors(self.parentrevs, a, b)
1269 ancs = ancestor.ancestors(self.parentrevs, a, b)
1265 if ancs:
1270 if ancs:
1266 # choose a consistent winner when there's a tie
1271 # choose a consistent winner when there's a tie
1267 return min(map(self.node, ancs))
1272 return min(map(self.node, ancs))
1268 return nullid
1273 return nullid
1269
1274
1270 def _match(self, id):
1275 def _match(self, id):
1271 if isinstance(id, int):
1276 if isinstance(id, int):
1272 # rev
1277 # rev
1273 return self.node(id)
1278 return self.node(id)
1274 if len(id) == 20:
1279 if len(id) == 20:
1275 # possibly a binary node
1280 # possibly a binary node
1276 # odds of a binary node being all hex in ASCII are 1 in 10**25
1281 # odds of a binary node being all hex in ASCII are 1 in 10**25
1277 try:
1282 try:
1278 node = id
1283 node = id
1279 self.rev(node) # quick search the index
1284 self.rev(node) # quick search the index
1280 return node
1285 return node
1281 except error.LookupError:
1286 except error.LookupError:
1282 pass # may be partial hex id
1287 pass # may be partial hex id
1283 try:
1288 try:
1284 # str(rev)
1289 # str(rev)
1285 rev = int(id)
1290 rev = int(id)
1286 if b"%d" % rev != id:
1291 if b"%d" % rev != id:
1287 raise ValueError
1292 raise ValueError
1288 if rev < 0:
1293 if rev < 0:
1289 rev = len(self) + rev
1294 rev = len(self) + rev
1290 if rev < 0 or rev >= len(self):
1295 if rev < 0 or rev >= len(self):
1291 raise ValueError
1296 raise ValueError
1292 return self.node(rev)
1297 return self.node(rev)
1293 except (ValueError, OverflowError):
1298 except (ValueError, OverflowError):
1294 pass
1299 pass
1295 if len(id) == 40:
1300 if len(id) == 40:
1296 try:
1301 try:
1297 # a full hex nodeid?
1302 # a full hex nodeid?
1298 node = bin(id)
1303 node = bin(id)
1299 self.rev(node)
1304 self.rev(node)
1300 return node
1305 return node
1301 except (TypeError, error.LookupError):
1306 except (TypeError, error.LookupError):
1302 pass
1307 pass
1303
1308
1304 def _partialmatch(self, id):
1309 def _partialmatch(self, id):
1305 # we don't care wdirfilenodeids as they should be always full hash
1310 # we don't care wdirfilenodeids as they should be always full hash
1306 maybewdir = wdirhex.startswith(id)
1311 maybewdir = wdirhex.startswith(id)
1307 try:
1312 try:
1308 partial = self.index.partialmatch(id)
1313 partial = self.index.partialmatch(id)
1309 if partial and self.hasnode(partial):
1314 if partial and self.hasnode(partial):
1310 if maybewdir:
1315 if maybewdir:
1311 # single 'ff...' match in radix tree, ambiguous with wdir
1316 # single 'ff...' match in radix tree, ambiguous with wdir
1312 raise error.RevlogError
1317 raise error.RevlogError
1313 return partial
1318 return partial
1314 if maybewdir:
1319 if maybewdir:
1315 # no 'ff...' match in radix tree, wdir identified
1320 # no 'ff...' match in radix tree, wdir identified
1316 raise error.WdirUnsupported
1321 raise error.WdirUnsupported
1317 return None
1322 return None
1318 except error.RevlogError:
1323 except error.RevlogError:
1319 # parsers.c radix tree lookup gave multiple matches
1324 # parsers.c radix tree lookup gave multiple matches
1320 # fast path: for unfiltered changelog, radix tree is accurate
1325 # fast path: for unfiltered changelog, radix tree is accurate
1321 if not getattr(self, 'filteredrevs', None):
1326 if not getattr(self, 'filteredrevs', None):
1322 raise error.AmbiguousPrefixLookupError(
1327 raise error.AmbiguousPrefixLookupError(
1323 id, self.indexfile, _(b'ambiguous identifier')
1328 id, self.indexfile, _(b'ambiguous identifier')
1324 )
1329 )
1325 # fall through to slow path that filters hidden revisions
1330 # fall through to slow path that filters hidden revisions
1326 except (AttributeError, ValueError):
1331 except (AttributeError, ValueError):
1327 # we are pure python, or key was too short to search radix tree
1332 # we are pure python, or key was too short to search radix tree
1328 pass
1333 pass
1329
1334
1330 if id in self._pcache:
1335 if id in self._pcache:
1331 return self._pcache[id]
1336 return self._pcache[id]
1332
1337
1333 if len(id) <= 40:
1338 if len(id) <= 40:
1334 try:
1339 try:
1335 # hex(node)[:...]
1340 # hex(node)[:...]
1336 l = len(id) // 2 # grab an even number of digits
1341 l = len(id) // 2 # grab an even number of digits
1337 prefix = bin(id[: l * 2])
1342 prefix = bin(id[: l * 2])
1338 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1343 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1339 nl = [
1344 nl = [
1340 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1345 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1341 ]
1346 ]
1342 if nullhex.startswith(id):
1347 if nullhex.startswith(id):
1343 nl.append(nullid)
1348 nl.append(nullid)
1344 if len(nl) > 0:
1349 if len(nl) > 0:
1345 if len(nl) == 1 and not maybewdir:
1350 if len(nl) == 1 and not maybewdir:
1346 self._pcache[id] = nl[0]
1351 self._pcache[id] = nl[0]
1347 return nl[0]
1352 return nl[0]
1348 raise error.AmbiguousPrefixLookupError(
1353 raise error.AmbiguousPrefixLookupError(
1349 id, self.indexfile, _(b'ambiguous identifier')
1354 id, self.indexfile, _(b'ambiguous identifier')
1350 )
1355 )
1351 if maybewdir:
1356 if maybewdir:
1352 raise error.WdirUnsupported
1357 raise error.WdirUnsupported
1353 return None
1358 return None
1354 except TypeError:
1359 except TypeError:
1355 pass
1360 pass
1356
1361
1357 def lookup(self, id):
1362 def lookup(self, id):
1358 """locate a node based on:
1363 """locate a node based on:
1359 - revision number or str(revision number)
1364 - revision number or str(revision number)
1360 - nodeid or subset of hex nodeid
1365 - nodeid or subset of hex nodeid
1361 """
1366 """
1362 n = self._match(id)
1367 n = self._match(id)
1363 if n is not None:
1368 if n is not None:
1364 return n
1369 return n
1365 n = self._partialmatch(id)
1370 n = self._partialmatch(id)
1366 if n:
1371 if n:
1367 return n
1372 return n
1368
1373
1369 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1374 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1370
1375
1371 def shortest(self, node, minlength=1):
1376 def shortest(self, node, minlength=1):
1372 """Find the shortest unambiguous prefix that matches node."""
1377 """Find the shortest unambiguous prefix that matches node."""
1373
1378
1374 def isvalid(prefix):
1379 def isvalid(prefix):
1375 try:
1380 try:
1376 matchednode = self._partialmatch(prefix)
1381 matchednode = self._partialmatch(prefix)
1377 except error.AmbiguousPrefixLookupError:
1382 except error.AmbiguousPrefixLookupError:
1378 return False
1383 return False
1379 except error.WdirUnsupported:
1384 except error.WdirUnsupported:
1380 # single 'ff...' match
1385 # single 'ff...' match
1381 return True
1386 return True
1382 if matchednode is None:
1387 if matchednode is None:
1383 raise error.LookupError(node, self.indexfile, _(b'no node'))
1388 raise error.LookupError(node, self.indexfile, _(b'no node'))
1384 return True
1389 return True
1385
1390
1386 def maybewdir(prefix):
1391 def maybewdir(prefix):
1387 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1392 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1388
1393
1389 hexnode = hex(node)
1394 hexnode = hex(node)
1390
1395
1391 def disambiguate(hexnode, minlength):
1396 def disambiguate(hexnode, minlength):
1392 """Disambiguate against wdirid."""
1397 """Disambiguate against wdirid."""
1393 for length in range(minlength, 41):
1398 for length in range(minlength, 41):
1394 prefix = hexnode[:length]
1399 prefix = hexnode[:length]
1395 if not maybewdir(prefix):
1400 if not maybewdir(prefix):
1396 return prefix
1401 return prefix
1397
1402
1398 if not getattr(self, 'filteredrevs', None):
1403 if not getattr(self, 'filteredrevs', None):
1399 try:
1404 try:
1400 length = max(self.index.shortest(node), minlength)
1405 length = max(self.index.shortest(node), minlength)
1401 return disambiguate(hexnode, length)
1406 return disambiguate(hexnode, length)
1402 except error.RevlogError:
1407 except error.RevlogError:
1403 if node != wdirid:
1408 if node != wdirid:
1404 raise error.LookupError(node, self.indexfile, _(b'no node'))
1409 raise error.LookupError(node, self.indexfile, _(b'no node'))
1405 except AttributeError:
1410 except AttributeError:
1406 # Fall through to pure code
1411 # Fall through to pure code
1407 pass
1412 pass
1408
1413
1409 if node == wdirid:
1414 if node == wdirid:
1410 for length in range(minlength, 41):
1415 for length in range(minlength, 41):
1411 prefix = hexnode[:length]
1416 prefix = hexnode[:length]
1412 if isvalid(prefix):
1417 if isvalid(prefix):
1413 return prefix
1418 return prefix
1414
1419
1415 for length in range(minlength, 41):
1420 for length in range(minlength, 41):
1416 prefix = hexnode[:length]
1421 prefix = hexnode[:length]
1417 if isvalid(prefix):
1422 if isvalid(prefix):
1418 return disambiguate(hexnode, length)
1423 return disambiguate(hexnode, length)
1419
1424
1420 def cmp(self, node, text):
1425 def cmp(self, node, text):
1421 """compare text with a given file revision
1426 """compare text with a given file revision
1422
1427
1423 returns True if text is different than what is stored.
1428 returns True if text is different than what is stored.
1424 """
1429 """
1425 p1, p2 = self.parents(node)
1430 p1, p2 = self.parents(node)
1426 return storageutil.hashrevisionsha1(text, p1, p2) != node
1431 return storageutil.hashrevisionsha1(text, p1, p2) != node
1427
1432
1428 def _cachesegment(self, offset, data):
1433 def _cachesegment(self, offset, data):
1429 """Add a segment to the revlog cache.
1434 """Add a segment to the revlog cache.
1430
1435
1431 Accepts an absolute offset and the data that is at that location.
1436 Accepts an absolute offset and the data that is at that location.
1432 """
1437 """
1433 o, d = self._chunkcache
1438 o, d = self._chunkcache
1434 # try to add to existing cache
1439 # try to add to existing cache
1435 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1440 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1436 self._chunkcache = o, d + data
1441 self._chunkcache = o, d + data
1437 else:
1442 else:
1438 self._chunkcache = offset, data
1443 self._chunkcache = offset, data
1439
1444
1440 def _readsegment(self, offset, length, df=None):
1445 def _readsegment(self, offset, length, df=None):
1441 """Load a segment of raw data from the revlog.
1446 """Load a segment of raw data from the revlog.
1442
1447
1443 Accepts an absolute offset, length to read, and an optional existing
1448 Accepts an absolute offset, length to read, and an optional existing
1444 file handle to read from.
1449 file handle to read from.
1445
1450
1446 If an existing file handle is passed, it will be seeked and the
1451 If an existing file handle is passed, it will be seeked and the
1447 original seek position will NOT be restored.
1452 original seek position will NOT be restored.
1448
1453
1449 Returns a str or buffer of raw byte data.
1454 Returns a str or buffer of raw byte data.
1450
1455
1451 Raises if the requested number of bytes could not be read.
1456 Raises if the requested number of bytes could not be read.
1452 """
1457 """
1453 # Cache data both forward and backward around the requested
1458 # Cache data both forward and backward around the requested
1454 # data, in a fixed size window. This helps speed up operations
1459 # data, in a fixed size window. This helps speed up operations
1455 # involving reading the revlog backwards.
1460 # involving reading the revlog backwards.
1456 cachesize = self._chunkcachesize
1461 cachesize = self._chunkcachesize
1457 realoffset = offset & ~(cachesize - 1)
1462 realoffset = offset & ~(cachesize - 1)
1458 reallength = (
1463 reallength = (
1459 (offset + length + cachesize) & ~(cachesize - 1)
1464 (offset + length + cachesize) & ~(cachesize - 1)
1460 ) - realoffset
1465 ) - realoffset
1461 with self._datareadfp(df) as df:
1466 with self._datareadfp(df) as df:
1462 df.seek(realoffset)
1467 df.seek(realoffset)
1463 d = df.read(reallength)
1468 d = df.read(reallength)
1464
1469
1465 self._cachesegment(realoffset, d)
1470 self._cachesegment(realoffset, d)
1466 if offset != realoffset or reallength != length:
1471 if offset != realoffset or reallength != length:
1467 startoffset = offset - realoffset
1472 startoffset = offset - realoffset
1468 if len(d) - startoffset < length:
1473 if len(d) - startoffset < length:
1469 raise error.RevlogError(
1474 raise error.RevlogError(
1470 _(
1475 _(
1471 b'partial read of revlog %s; expected %d bytes from '
1476 b'partial read of revlog %s; expected %d bytes from '
1472 b'offset %d, got %d'
1477 b'offset %d, got %d'
1473 )
1478 )
1474 % (
1479 % (
1475 self.indexfile if self._inline else self.datafile,
1480 self.indexfile if self._inline else self.datafile,
1476 length,
1481 length,
1477 realoffset,
1482 realoffset,
1478 len(d) - startoffset,
1483 len(d) - startoffset,
1479 )
1484 )
1480 )
1485 )
1481
1486
1482 return util.buffer(d, startoffset, length)
1487 return util.buffer(d, startoffset, length)
1483
1488
1484 if len(d) < length:
1489 if len(d) < length:
1485 raise error.RevlogError(
1490 raise error.RevlogError(
1486 _(
1491 _(
1487 b'partial read of revlog %s; expected %d bytes from offset '
1492 b'partial read of revlog %s; expected %d bytes from offset '
1488 b'%d, got %d'
1493 b'%d, got %d'
1489 )
1494 )
1490 % (
1495 % (
1491 self.indexfile if self._inline else self.datafile,
1496 self.indexfile if self._inline else self.datafile,
1492 length,
1497 length,
1493 offset,
1498 offset,
1494 len(d),
1499 len(d),
1495 )
1500 )
1496 )
1501 )
1497
1502
1498 return d
1503 return d
1499
1504
1500 def _getsegment(self, offset, length, df=None):
1505 def _getsegment(self, offset, length, df=None):
1501 """Obtain a segment of raw data from the revlog.
1506 """Obtain a segment of raw data from the revlog.
1502
1507
1503 Accepts an absolute offset, length of bytes to obtain, and an
1508 Accepts an absolute offset, length of bytes to obtain, and an
1504 optional file handle to the already-opened revlog. If the file
1509 optional file handle to the already-opened revlog. If the file
1505 handle is used, it's original seek position will not be preserved.
1510 handle is used, it's original seek position will not be preserved.
1506
1511
1507 Requests for data may be returned from a cache.
1512 Requests for data may be returned from a cache.
1508
1513
1509 Returns a str or a buffer instance of raw byte data.
1514 Returns a str or a buffer instance of raw byte data.
1510 """
1515 """
1511 o, d = self._chunkcache
1516 o, d = self._chunkcache
1512 l = len(d)
1517 l = len(d)
1513
1518
1514 # is it in the cache?
1519 # is it in the cache?
1515 cachestart = offset - o
1520 cachestart = offset - o
1516 cacheend = cachestart + length
1521 cacheend = cachestart + length
1517 if cachestart >= 0 and cacheend <= l:
1522 if cachestart >= 0 and cacheend <= l:
1518 if cachestart == 0 and cacheend == l:
1523 if cachestart == 0 and cacheend == l:
1519 return d # avoid a copy
1524 return d # avoid a copy
1520 return util.buffer(d, cachestart, cacheend - cachestart)
1525 return util.buffer(d, cachestart, cacheend - cachestart)
1521
1526
1522 return self._readsegment(offset, length, df=df)
1527 return self._readsegment(offset, length, df=df)
1523
1528
1524 def _getsegmentforrevs(self, startrev, endrev, df=None):
1529 def _getsegmentforrevs(self, startrev, endrev, df=None):
1525 """Obtain a segment of raw data corresponding to a range of revisions.
1530 """Obtain a segment of raw data corresponding to a range of revisions.
1526
1531
1527 Accepts the start and end revisions and an optional already-open
1532 Accepts the start and end revisions and an optional already-open
1528 file handle to be used for reading. If the file handle is read, its
1533 file handle to be used for reading. If the file handle is read, its
1529 seek position will not be preserved.
1534 seek position will not be preserved.
1530
1535
1531 Requests for data may be satisfied by a cache.
1536 Requests for data may be satisfied by a cache.
1532
1537
1533 Returns a 2-tuple of (offset, data) for the requested range of
1538 Returns a 2-tuple of (offset, data) for the requested range of
1534 revisions. Offset is the integer offset from the beginning of the
1539 revisions. Offset is the integer offset from the beginning of the
1535 revlog and data is a str or buffer of the raw byte data.
1540 revlog and data is a str or buffer of the raw byte data.
1536
1541
1537 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1542 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1538 to determine where each revision's data begins and ends.
1543 to determine where each revision's data begins and ends.
1539 """
1544 """
1540 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1545 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1541 # (functions are expensive).
1546 # (functions are expensive).
1542 index = self.index
1547 index = self.index
1543 istart = index[startrev]
1548 istart = index[startrev]
1544 start = int(istart[0] >> 16)
1549 start = int(istart[0] >> 16)
1545 if startrev == endrev:
1550 if startrev == endrev:
1546 end = start + istart[1]
1551 end = start + istart[1]
1547 else:
1552 else:
1548 iend = index[endrev]
1553 iend = index[endrev]
1549 end = int(iend[0] >> 16) + iend[1]
1554 end = int(iend[0] >> 16) + iend[1]
1550
1555
1551 if self._inline:
1556 if self._inline:
1552 start += (startrev + 1) * self._io.size
1557 start += (startrev + 1) * self._io.size
1553 end += (endrev + 1) * self._io.size
1558 end += (endrev + 1) * self._io.size
1554 length = end - start
1559 length = end - start
1555
1560
1556 return start, self._getsegment(start, length, df=df)
1561 return start, self._getsegment(start, length, df=df)
1557
1562
1558 def _chunk(self, rev, df=None):
1563 def _chunk(self, rev, df=None):
1559 """Obtain a single decompressed chunk for a revision.
1564 """Obtain a single decompressed chunk for a revision.
1560
1565
1561 Accepts an integer revision and an optional already-open file handle
1566 Accepts an integer revision and an optional already-open file handle
1562 to be used for reading. If used, the seek position of the file will not
1567 to be used for reading. If used, the seek position of the file will not
1563 be preserved.
1568 be preserved.
1564
1569
1565 Returns a str holding uncompressed data for the requested revision.
1570 Returns a str holding uncompressed data for the requested revision.
1566 """
1571 """
1567 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1572 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1568
1573
1569 def _chunks(self, revs, df=None, targetsize=None):
1574 def _chunks(self, revs, df=None, targetsize=None):
1570 """Obtain decompressed chunks for the specified revisions.
1575 """Obtain decompressed chunks for the specified revisions.
1571
1576
1572 Accepts an iterable of numeric revisions that are assumed to be in
1577 Accepts an iterable of numeric revisions that are assumed to be in
1573 ascending order. Also accepts an optional already-open file handle
1578 ascending order. Also accepts an optional already-open file handle
1574 to be used for reading. If used, the seek position of the file will
1579 to be used for reading. If used, the seek position of the file will
1575 not be preserved.
1580 not be preserved.
1576
1581
1577 This function is similar to calling ``self._chunk()`` multiple times,
1582 This function is similar to calling ``self._chunk()`` multiple times,
1578 but is faster.
1583 but is faster.
1579
1584
1580 Returns a list with decompressed data for each requested revision.
1585 Returns a list with decompressed data for each requested revision.
1581 """
1586 """
1582 if not revs:
1587 if not revs:
1583 return []
1588 return []
1584 start = self.start
1589 start = self.start
1585 length = self.length
1590 length = self.length
1586 inline = self._inline
1591 inline = self._inline
1587 iosize = self._io.size
1592 iosize = self._io.size
1588 buffer = util.buffer
1593 buffer = util.buffer
1589
1594
1590 l = []
1595 l = []
1591 ladd = l.append
1596 ladd = l.append
1592
1597
1593 if not self._withsparseread:
1598 if not self._withsparseread:
1594 slicedchunks = (revs,)
1599 slicedchunks = (revs,)
1595 else:
1600 else:
1596 slicedchunks = deltautil.slicechunk(
1601 slicedchunks = deltautil.slicechunk(
1597 self, revs, targetsize=targetsize
1602 self, revs, targetsize=targetsize
1598 )
1603 )
1599
1604
1600 for revschunk in slicedchunks:
1605 for revschunk in slicedchunks:
1601 firstrev = revschunk[0]
1606 firstrev = revschunk[0]
1602 # Skip trailing revisions with empty diff
1607 # Skip trailing revisions with empty diff
1603 for lastrev in revschunk[::-1]:
1608 for lastrev in revschunk[::-1]:
1604 if length(lastrev) != 0:
1609 if length(lastrev) != 0:
1605 break
1610 break
1606
1611
1607 try:
1612 try:
1608 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1613 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1609 except OverflowError:
1614 except OverflowError:
1610 # issue4215 - we can't cache a run of chunks greater than
1615 # issue4215 - we can't cache a run of chunks greater than
1611 # 2G on Windows
1616 # 2G on Windows
1612 return [self._chunk(rev, df=df) for rev in revschunk]
1617 return [self._chunk(rev, df=df) for rev in revschunk]
1613
1618
1614 decomp = self.decompress
1619 decomp = self.decompress
1615 for rev in revschunk:
1620 for rev in revschunk:
1616 chunkstart = start(rev)
1621 chunkstart = start(rev)
1617 if inline:
1622 if inline:
1618 chunkstart += (rev + 1) * iosize
1623 chunkstart += (rev + 1) * iosize
1619 chunklength = length(rev)
1624 chunklength = length(rev)
1620 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1625 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1621
1626
1622 return l
1627 return l
1623
1628
1624 def _chunkclear(self):
1629 def _chunkclear(self):
1625 """Clear the raw chunk cache."""
1630 """Clear the raw chunk cache."""
1626 self._chunkcache = (0, b'')
1631 self._chunkcache = (0, b'')
1627
1632
1628 def deltaparent(self, rev):
1633 def deltaparent(self, rev):
1629 """return deltaparent of the given revision"""
1634 """return deltaparent of the given revision"""
1630 base = self.index[rev][3]
1635 base = self.index[rev][3]
1631 if base == rev:
1636 if base == rev:
1632 return nullrev
1637 return nullrev
1633 elif self._generaldelta:
1638 elif self._generaldelta:
1634 return base
1639 return base
1635 else:
1640 else:
1636 return rev - 1
1641 return rev - 1
1637
1642
1638 def issnapshot(self, rev):
1643 def issnapshot(self, rev):
1639 """tells whether rev is a snapshot
1644 """tells whether rev is a snapshot
1640 """
1645 """
1641 if not self._sparserevlog:
1646 if not self._sparserevlog:
1642 return self.deltaparent(rev) == nullrev
1647 return self.deltaparent(rev) == nullrev
1643 elif util.safehasattr(self.index, b'issnapshot'):
1648 elif util.safehasattr(self.index, b'issnapshot'):
1644 # directly assign the method to cache the testing and access
1649 # directly assign the method to cache the testing and access
1645 self.issnapshot = self.index.issnapshot
1650 self.issnapshot = self.index.issnapshot
1646 return self.issnapshot(rev)
1651 return self.issnapshot(rev)
1647 if rev == nullrev:
1652 if rev == nullrev:
1648 return True
1653 return True
1649 entry = self.index[rev]
1654 entry = self.index[rev]
1650 base = entry[3]
1655 base = entry[3]
1651 if base == rev:
1656 if base == rev:
1652 return True
1657 return True
1653 if base == nullrev:
1658 if base == nullrev:
1654 return True
1659 return True
1655 p1 = entry[5]
1660 p1 = entry[5]
1656 p2 = entry[6]
1661 p2 = entry[6]
1657 if base == p1 or base == p2:
1662 if base == p1 or base == p2:
1658 return False
1663 return False
1659 return self.issnapshot(base)
1664 return self.issnapshot(base)
1660
1665
1661 def snapshotdepth(self, rev):
1666 def snapshotdepth(self, rev):
1662 """number of snapshot in the chain before this one"""
1667 """number of snapshot in the chain before this one"""
1663 if not self.issnapshot(rev):
1668 if not self.issnapshot(rev):
1664 raise error.ProgrammingError(b'revision %d not a snapshot')
1669 raise error.ProgrammingError(b'revision %d not a snapshot')
1665 return len(self._deltachain(rev)[0]) - 1
1670 return len(self._deltachain(rev)[0]) - 1
1666
1671
1667 def revdiff(self, rev1, rev2):
1672 def revdiff(self, rev1, rev2):
1668 """return or calculate a delta between two revisions
1673 """return or calculate a delta between two revisions
1669
1674
1670 The delta calculated is in binary form and is intended to be written to
1675 The delta calculated is in binary form and is intended to be written to
1671 revlog data directly. So this function needs raw revision data.
1676 revlog data directly. So this function needs raw revision data.
1672 """
1677 """
1673 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1678 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1674 return bytes(self._chunk(rev2))
1679 return bytes(self._chunk(rev2))
1675
1680
1676 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1681 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1677
1682
1678 def _processflags(self, text, flags, operation, raw=False):
1683 def _processflags(self, text, flags, operation, raw=False):
1679 """deprecated entry point to access flag processors"""
1684 """deprecated entry point to access flag processors"""
1680 msg = b'_processflag(...) use the specialized variant'
1685 msg = b'_processflag(...) use the specialized variant'
1681 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1686 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1682 if raw:
1687 if raw:
1683 return text, flagutil.processflagsraw(self, text, flags)
1688 return text, flagutil.processflagsraw(self, text, flags)
1684 elif operation == b'read':
1689 elif operation == b'read':
1685 return flagutil.processflagsread(self, text, flags)
1690 return flagutil.processflagsread(self, text, flags)
1686 else: # write operation
1691 else: # write operation
1687 return flagutil.processflagswrite(self, text, flags)
1692 return flagutil.processflagswrite(self, text, flags)
1688
1693
1689 def revision(self, nodeorrev, _df=None, raw=False):
1694 def revision(self, nodeorrev, _df=None, raw=False):
1690 """return an uncompressed revision of a given node or revision
1695 """return an uncompressed revision of a given node or revision
1691 number.
1696 number.
1692
1697
1693 _df - an existing file handle to read from. (internal-only)
1698 _df - an existing file handle to read from. (internal-only)
1694 raw - an optional argument specifying if the revision data is to be
1699 raw - an optional argument specifying if the revision data is to be
1695 treated as raw data when applying flag transforms. 'raw' should be set
1700 treated as raw data when applying flag transforms. 'raw' should be set
1696 to True when generating changegroups or in debug commands.
1701 to True when generating changegroups or in debug commands.
1697 """
1702 """
1698 if raw:
1703 if raw:
1699 msg = (
1704 msg = (
1700 b'revlog.revision(..., raw=True) is deprecated, '
1705 b'revlog.revision(..., raw=True) is deprecated, '
1701 b'use revlog.rawdata(...)'
1706 b'use revlog.rawdata(...)'
1702 )
1707 )
1703 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1708 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1704 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1709 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1705
1710
1706 def sidedata(self, nodeorrev, _df=None):
1711 def sidedata(self, nodeorrev, _df=None):
1707 """a map of extra data related to the changeset but not part of the hash
1712 """a map of extra data related to the changeset but not part of the hash
1708
1713
1709 This function currently return a dictionary. However, more advanced
1714 This function currently return a dictionary. However, more advanced
1710 mapping object will likely be used in the future for a more
1715 mapping object will likely be used in the future for a more
1711 efficient/lazy code.
1716 efficient/lazy code.
1712 """
1717 """
1713 return self._revisiondata(nodeorrev, _df)[1]
1718 return self._revisiondata(nodeorrev, _df)[1]
1714
1719
1715 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1720 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1716 # deal with <nodeorrev> argument type
1721 # deal with <nodeorrev> argument type
1717 if isinstance(nodeorrev, int):
1722 if isinstance(nodeorrev, int):
1718 rev = nodeorrev
1723 rev = nodeorrev
1719 node = self.node(rev)
1724 node = self.node(rev)
1720 else:
1725 else:
1721 node = nodeorrev
1726 node = nodeorrev
1722 rev = None
1727 rev = None
1723
1728
1724 # fast path the special `nullid` rev
1729 # fast path the special `nullid` rev
1725 if node == nullid:
1730 if node == nullid:
1726 return b"", {}
1731 return b"", {}
1727
1732
1728 # The text as stored inside the revlog. Might be the revision or might
1733 # The text as stored inside the revlog. Might be the revision or might
1729 # need to be processed to retrieve the revision.
1734 # need to be processed to retrieve the revision.
1730 rawtext = None
1735 rawtext = None
1731
1736
1732 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1737 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1733
1738
1734 if raw and validated:
1739 if raw and validated:
1735 # if we don't want to process the raw text and that raw
1740 # if we don't want to process the raw text and that raw
1736 # text is cached, we can exit early.
1741 # text is cached, we can exit early.
1737 return rawtext, {}
1742 return rawtext, {}
1738 if rev is None:
1743 if rev is None:
1739 rev = self.rev(node)
1744 rev = self.rev(node)
1740 # the revlog's flag for this revision
1745 # the revlog's flag for this revision
1741 # (usually alter its state or content)
1746 # (usually alter its state or content)
1742 flags = self.flags(rev)
1747 flags = self.flags(rev)
1743
1748
1744 if validated and flags == REVIDX_DEFAULT_FLAGS:
1749 if validated and flags == REVIDX_DEFAULT_FLAGS:
1745 # no extra flags set, no flag processor runs, text = rawtext
1750 # no extra flags set, no flag processor runs, text = rawtext
1746 return rawtext, {}
1751 return rawtext, {}
1747
1752
1748 sidedata = {}
1753 sidedata = {}
1749 if raw:
1754 if raw:
1750 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1755 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1751 text = rawtext
1756 text = rawtext
1752 else:
1757 else:
1753 try:
1758 try:
1754 r = flagutil.processflagsread(self, rawtext, flags)
1759 r = flagutil.processflagsread(self, rawtext, flags)
1755 except error.SidedataHashError as exc:
1760 except error.SidedataHashError as exc:
1756 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1761 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1757 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1762 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1758 raise error.RevlogError(msg)
1763 raise error.RevlogError(msg)
1759 text, validatehash, sidedata = r
1764 text, validatehash, sidedata = r
1760 if validatehash:
1765 if validatehash:
1761 self.checkhash(text, node, rev=rev)
1766 self.checkhash(text, node, rev=rev)
1762 if not validated:
1767 if not validated:
1763 self._revisioncache = (node, rev, rawtext)
1768 self._revisioncache = (node, rev, rawtext)
1764
1769
1765 return text, sidedata
1770 return text, sidedata
1766
1771
1767 def _rawtext(self, node, rev, _df=None):
1772 def _rawtext(self, node, rev, _df=None):
1768 """return the possibly unvalidated rawtext for a revision
1773 """return the possibly unvalidated rawtext for a revision
1769
1774
1770 returns (rev, rawtext, validated)
1775 returns (rev, rawtext, validated)
1771 """
1776 """
1772
1777
1773 # revision in the cache (could be useful to apply delta)
1778 # revision in the cache (could be useful to apply delta)
1774 cachedrev = None
1779 cachedrev = None
1775 # An intermediate text to apply deltas to
1780 # An intermediate text to apply deltas to
1776 basetext = None
1781 basetext = None
1777
1782
1778 # Check if we have the entry in cache
1783 # Check if we have the entry in cache
1779 # The cache entry looks like (node, rev, rawtext)
1784 # The cache entry looks like (node, rev, rawtext)
1780 if self._revisioncache:
1785 if self._revisioncache:
1781 if self._revisioncache[0] == node:
1786 if self._revisioncache[0] == node:
1782 return (rev, self._revisioncache[2], True)
1787 return (rev, self._revisioncache[2], True)
1783 cachedrev = self._revisioncache[1]
1788 cachedrev = self._revisioncache[1]
1784
1789
1785 if rev is None:
1790 if rev is None:
1786 rev = self.rev(node)
1791 rev = self.rev(node)
1787
1792
1788 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1793 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1789 if stopped:
1794 if stopped:
1790 basetext = self._revisioncache[2]
1795 basetext = self._revisioncache[2]
1791
1796
1792 # drop cache to save memory, the caller is expected to
1797 # drop cache to save memory, the caller is expected to
1793 # update self._revisioncache after validating the text
1798 # update self._revisioncache after validating the text
1794 self._revisioncache = None
1799 self._revisioncache = None
1795
1800
1796 targetsize = None
1801 targetsize = None
1797 rawsize = self.index[rev][2]
1802 rawsize = self.index[rev][2]
1798 if 0 <= rawsize:
1803 if 0 <= rawsize:
1799 targetsize = 4 * rawsize
1804 targetsize = 4 * rawsize
1800
1805
1801 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1806 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1802 if basetext is None:
1807 if basetext is None:
1803 basetext = bytes(bins[0])
1808 basetext = bytes(bins[0])
1804 bins = bins[1:]
1809 bins = bins[1:]
1805
1810
1806 rawtext = mdiff.patches(basetext, bins)
1811 rawtext = mdiff.patches(basetext, bins)
1807 del basetext # let us have a chance to free memory early
1812 del basetext # let us have a chance to free memory early
1808 return (rev, rawtext, False)
1813 return (rev, rawtext, False)
1809
1814
1810 def rawdata(self, nodeorrev, _df=None):
1815 def rawdata(self, nodeorrev, _df=None):
1811 """return an uncompressed raw data of a given node or revision number.
1816 """return an uncompressed raw data of a given node or revision number.
1812
1817
1813 _df - an existing file handle to read from. (internal-only)
1818 _df - an existing file handle to read from. (internal-only)
1814 """
1819 """
1815 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1820 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1816
1821
1817 def hash(self, text, p1, p2):
1822 def hash(self, text, p1, p2):
1818 """Compute a node hash.
1823 """Compute a node hash.
1819
1824
1820 Available as a function so that subclasses can replace the hash
1825 Available as a function so that subclasses can replace the hash
1821 as needed.
1826 as needed.
1822 """
1827 """
1823 return storageutil.hashrevisionsha1(text, p1, p2)
1828 return storageutil.hashrevisionsha1(text, p1, p2)
1824
1829
1825 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1830 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1826 """Check node hash integrity.
1831 """Check node hash integrity.
1827
1832
1828 Available as a function so that subclasses can extend hash mismatch
1833 Available as a function so that subclasses can extend hash mismatch
1829 behaviors as needed.
1834 behaviors as needed.
1830 """
1835 """
1831 try:
1836 try:
1832 if p1 is None and p2 is None:
1837 if p1 is None and p2 is None:
1833 p1, p2 = self.parents(node)
1838 p1, p2 = self.parents(node)
1834 if node != self.hash(text, p1, p2):
1839 if node != self.hash(text, p1, p2):
1835 # Clear the revision cache on hash failure. The revision cache
1840 # Clear the revision cache on hash failure. The revision cache
1836 # only stores the raw revision and clearing the cache does have
1841 # only stores the raw revision and clearing the cache does have
1837 # the side-effect that we won't have a cache hit when the raw
1842 # the side-effect that we won't have a cache hit when the raw
1838 # revision data is accessed. But this case should be rare and
1843 # revision data is accessed. But this case should be rare and
1839 # it is extra work to teach the cache about the hash
1844 # it is extra work to teach the cache about the hash
1840 # verification state.
1845 # verification state.
1841 if self._revisioncache and self._revisioncache[0] == node:
1846 if self._revisioncache and self._revisioncache[0] == node:
1842 self._revisioncache = None
1847 self._revisioncache = None
1843
1848
1844 revornode = rev
1849 revornode = rev
1845 if revornode is None:
1850 if revornode is None:
1846 revornode = templatefilters.short(hex(node))
1851 revornode = templatefilters.short(hex(node))
1847 raise error.RevlogError(
1852 raise error.RevlogError(
1848 _(b"integrity check failed on %s:%s")
1853 _(b"integrity check failed on %s:%s")
1849 % (self.indexfile, pycompat.bytestr(revornode))
1854 % (self.indexfile, pycompat.bytestr(revornode))
1850 )
1855 )
1851 except error.RevlogError:
1856 except error.RevlogError:
1852 if self._censorable and storageutil.iscensoredtext(text):
1857 if self._censorable and storageutil.iscensoredtext(text):
1853 raise error.CensoredNodeError(self.indexfile, node, text)
1858 raise error.CensoredNodeError(self.indexfile, node, text)
1854 raise
1859 raise
1855
1860
1856 def _enforceinlinesize(self, tr, fp=None):
1861 def _enforceinlinesize(self, tr, fp=None):
1857 """Check if the revlog is too big for inline and convert if so.
1862 """Check if the revlog is too big for inline and convert if so.
1858
1863
1859 This should be called after revisions are added to the revlog. If the
1864 This should be called after revisions are added to the revlog. If the
1860 revlog has grown too large to be an inline revlog, it will convert it
1865 revlog has grown too large to be an inline revlog, it will convert it
1861 to use multiple index and data files.
1866 to use multiple index and data files.
1862 """
1867 """
1863 tiprev = len(self) - 1
1868 tiprev = len(self) - 1
1864 if (
1869 if (
1865 not self._inline
1870 not self._inline
1866 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1871 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1867 ):
1872 ):
1868 return
1873 return
1869
1874
1870 trinfo = tr.find(self.indexfile)
1875 trinfo = tr.find(self.indexfile)
1871 if trinfo is None:
1876 if trinfo is None:
1872 raise error.RevlogError(
1877 raise error.RevlogError(
1873 _(b"%s not found in the transaction") % self.indexfile
1878 _(b"%s not found in the transaction") % self.indexfile
1874 )
1879 )
1875
1880
1876 trindex = trinfo[2]
1881 trindex = trinfo[2]
1877 if trindex is not None:
1882 if trindex is not None:
1878 dataoff = self.start(trindex)
1883 dataoff = self.start(trindex)
1879 else:
1884 else:
1880 # revlog was stripped at start of transaction, use all leftover data
1885 # revlog was stripped at start of transaction, use all leftover data
1881 trindex = len(self) - 1
1886 trindex = len(self) - 1
1882 dataoff = self.end(tiprev)
1887 dataoff = self.end(tiprev)
1883
1888
1884 tr.add(self.datafile, dataoff)
1889 tr.add(self.datafile, dataoff)
1885
1890
1886 if fp:
1891 if fp:
1887 fp.flush()
1892 fp.flush()
1888 fp.close()
1893 fp.close()
1889 # We can't use the cached file handle after close(). So prevent
1894 # We can't use the cached file handle after close(). So prevent
1890 # its usage.
1895 # its usage.
1891 self._writinghandles = None
1896 self._writinghandles = None
1892
1897
1893 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1898 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1894 for r in self:
1899 for r in self:
1895 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1900 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1896
1901
1897 with self._indexfp(b'w') as fp:
1902 with self._indexfp(b'w') as fp:
1898 self.version &= ~FLAG_INLINE_DATA
1903 self.version &= ~FLAG_INLINE_DATA
1899 self._inline = False
1904 self._inline = False
1900 io = self._io
1905 io = self._io
1901 for i in self:
1906 for i in self:
1902 e = io.packentry(self.index[i], self.node, self.version, i)
1907 e = io.packentry(self.index[i], self.node, self.version, i)
1903 fp.write(e)
1908 fp.write(e)
1904
1909
1905 # the temp file replace the real index when we exit the context
1910 # the temp file replace the real index when we exit the context
1906 # manager
1911 # manager
1907
1912
1908 tr.replace(self.indexfile, trindex * self._io.size)
1913 tr.replace(self.indexfile, trindex * self._io.size)
1909 self._chunkclear()
1914 self._chunkclear()
1910
1915
1911 def _nodeduplicatecallback(self, transaction, node):
1916 def _nodeduplicatecallback(self, transaction, node):
1912 """called when trying to add a node already stored.
1917 """called when trying to add a node already stored.
1913 """
1918 """
1914
1919
1915 def addrevision(
1920 def addrevision(
1916 self,
1921 self,
1917 text,
1922 text,
1918 transaction,
1923 transaction,
1919 link,
1924 link,
1920 p1,
1925 p1,
1921 p2,
1926 p2,
1922 cachedelta=None,
1927 cachedelta=None,
1923 node=None,
1928 node=None,
1924 flags=REVIDX_DEFAULT_FLAGS,
1929 flags=REVIDX_DEFAULT_FLAGS,
1925 deltacomputer=None,
1930 deltacomputer=None,
1926 sidedata=None,
1931 sidedata=None,
1927 ):
1932 ):
1928 """add a revision to the log
1933 """add a revision to the log
1929
1934
1930 text - the revision data to add
1935 text - the revision data to add
1931 transaction - the transaction object used for rollback
1936 transaction - the transaction object used for rollback
1932 link - the linkrev data to add
1937 link - the linkrev data to add
1933 p1, p2 - the parent nodeids of the revision
1938 p1, p2 - the parent nodeids of the revision
1934 cachedelta - an optional precomputed delta
1939 cachedelta - an optional precomputed delta
1935 node - nodeid of revision; typically node is not specified, and it is
1940 node - nodeid of revision; typically node is not specified, and it is
1936 computed by default as hash(text, p1, p2), however subclasses might
1941 computed by default as hash(text, p1, p2), however subclasses might
1937 use different hashing method (and override checkhash() in such case)
1942 use different hashing method (and override checkhash() in such case)
1938 flags - the known flags to set on the revision
1943 flags - the known flags to set on the revision
1939 deltacomputer - an optional deltacomputer instance shared between
1944 deltacomputer - an optional deltacomputer instance shared between
1940 multiple calls
1945 multiple calls
1941 """
1946 """
1942 if link == nullrev:
1947 if link == nullrev:
1943 raise error.RevlogError(
1948 raise error.RevlogError(
1944 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1949 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1945 )
1950 )
1946
1951
1947 if sidedata is None:
1952 if sidedata is None:
1948 sidedata = {}
1953 sidedata = {}
1949 flags = flags & ~REVIDX_SIDEDATA
1954 flags = flags & ~REVIDX_SIDEDATA
1950 elif not self.hassidedata:
1955 elif not self.hassidedata:
1951 raise error.ProgrammingError(
1956 raise error.ProgrammingError(
1952 _(b"trying to add sidedata to a revlog who don't support them")
1957 _(b"trying to add sidedata to a revlog who don't support them")
1953 )
1958 )
1954 else:
1959 else:
1955 flags |= REVIDX_SIDEDATA
1960 flags |= REVIDX_SIDEDATA
1956
1961
1957 if flags:
1962 if flags:
1958 node = node or self.hash(text, p1, p2)
1963 node = node or self.hash(text, p1, p2)
1959
1964
1960 rawtext, validatehash = flagutil.processflagswrite(
1965 rawtext, validatehash = flagutil.processflagswrite(
1961 self, text, flags, sidedata=sidedata
1966 self, text, flags, sidedata=sidedata
1962 )
1967 )
1963
1968
1964 # If the flag processor modifies the revision data, ignore any provided
1969 # If the flag processor modifies the revision data, ignore any provided
1965 # cachedelta.
1970 # cachedelta.
1966 if rawtext != text:
1971 if rawtext != text:
1967 cachedelta = None
1972 cachedelta = None
1968
1973
1969 if len(rawtext) > _maxentrysize:
1974 if len(rawtext) > _maxentrysize:
1970 raise error.RevlogError(
1975 raise error.RevlogError(
1971 _(
1976 _(
1972 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1977 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1973 )
1978 )
1974 % (self.indexfile, len(rawtext))
1979 % (self.indexfile, len(rawtext))
1975 )
1980 )
1976
1981
1977 node = node or self.hash(rawtext, p1, p2)
1982 node = node or self.hash(rawtext, p1, p2)
1978 if node in self.nodemap:
1983 if node in self.nodemap:
1979 return node
1984 return node
1980
1985
1981 if validatehash:
1986 if validatehash:
1982 self.checkhash(rawtext, node, p1=p1, p2=p2)
1987 self.checkhash(rawtext, node, p1=p1, p2=p2)
1983
1988
1984 return self.addrawrevision(
1989 return self.addrawrevision(
1985 rawtext,
1990 rawtext,
1986 transaction,
1991 transaction,
1987 link,
1992 link,
1988 p1,
1993 p1,
1989 p2,
1994 p2,
1990 node,
1995 node,
1991 flags,
1996 flags,
1992 cachedelta=cachedelta,
1997 cachedelta=cachedelta,
1993 deltacomputer=deltacomputer,
1998 deltacomputer=deltacomputer,
1994 )
1999 )
1995
2000
1996 def addrawrevision(
2001 def addrawrevision(
1997 self,
2002 self,
1998 rawtext,
2003 rawtext,
1999 transaction,
2004 transaction,
2000 link,
2005 link,
2001 p1,
2006 p1,
2002 p2,
2007 p2,
2003 node,
2008 node,
2004 flags,
2009 flags,
2005 cachedelta=None,
2010 cachedelta=None,
2006 deltacomputer=None,
2011 deltacomputer=None,
2007 ):
2012 ):
2008 """add a raw revision with known flags, node and parents
2013 """add a raw revision with known flags, node and parents
2009 useful when reusing a revision not stored in this revlog (ex: received
2014 useful when reusing a revision not stored in this revlog (ex: received
2010 over wire, or read from an external bundle).
2015 over wire, or read from an external bundle).
2011 """
2016 """
2012 dfh = None
2017 dfh = None
2013 if not self._inline:
2018 if not self._inline:
2014 dfh = self._datafp(b"a+")
2019 dfh = self._datafp(b"a+")
2015 ifh = self._indexfp(b"a+")
2020 ifh = self._indexfp(b"a+")
2016 try:
2021 try:
2017 return self._addrevision(
2022 return self._addrevision(
2018 node,
2023 node,
2019 rawtext,
2024 rawtext,
2020 transaction,
2025 transaction,
2021 link,
2026 link,
2022 p1,
2027 p1,
2023 p2,
2028 p2,
2024 flags,
2029 flags,
2025 cachedelta,
2030 cachedelta,
2026 ifh,
2031 ifh,
2027 dfh,
2032 dfh,
2028 deltacomputer=deltacomputer,
2033 deltacomputer=deltacomputer,
2029 )
2034 )
2030 finally:
2035 finally:
2031 if dfh:
2036 if dfh:
2032 dfh.close()
2037 dfh.close()
2033 ifh.close()
2038 ifh.close()
2034
2039
2035 def compress(self, data):
2040 def compress(self, data):
2036 """Generate a possibly-compressed representation of data."""
2041 """Generate a possibly-compressed representation of data."""
2037 if not data:
2042 if not data:
2038 return b'', data
2043 return b'', data
2039
2044
2040 compressed = self._compressor.compress(data)
2045 compressed = self._compressor.compress(data)
2041
2046
2042 if compressed:
2047 if compressed:
2043 # The revlog compressor added the header in the returned data.
2048 # The revlog compressor added the header in the returned data.
2044 return b'', compressed
2049 return b'', compressed
2045
2050
2046 if data[0:1] == b'\0':
2051 if data[0:1] == b'\0':
2047 return b'', data
2052 return b'', data
2048 return b'u', data
2053 return b'u', data
2049
2054
2050 def decompress(self, data):
2055 def decompress(self, data):
2051 """Decompress a revlog chunk.
2056 """Decompress a revlog chunk.
2052
2057
2053 The chunk is expected to begin with a header identifying the
2058 The chunk is expected to begin with a header identifying the
2054 format type so it can be routed to an appropriate decompressor.
2059 format type so it can be routed to an appropriate decompressor.
2055 """
2060 """
2056 if not data:
2061 if not data:
2057 return data
2062 return data
2058
2063
2059 # Revlogs are read much more frequently than they are written and many
2064 # Revlogs are read much more frequently than they are written and many
2060 # chunks only take microseconds to decompress, so performance is
2065 # chunks only take microseconds to decompress, so performance is
2061 # important here.
2066 # important here.
2062 #
2067 #
2063 # We can make a few assumptions about revlogs:
2068 # We can make a few assumptions about revlogs:
2064 #
2069 #
2065 # 1) the majority of chunks will be compressed (as opposed to inline
2070 # 1) the majority of chunks will be compressed (as opposed to inline
2066 # raw data).
2071 # raw data).
2067 # 2) decompressing *any* data will likely by at least 10x slower than
2072 # 2) decompressing *any* data will likely by at least 10x slower than
2068 # returning raw inline data.
2073 # returning raw inline data.
2069 # 3) we want to prioritize common and officially supported compression
2074 # 3) we want to prioritize common and officially supported compression
2070 # engines
2075 # engines
2071 #
2076 #
2072 # It follows that we want to optimize for "decompress compressed data
2077 # It follows that we want to optimize for "decompress compressed data
2073 # when encoded with common and officially supported compression engines"
2078 # when encoded with common and officially supported compression engines"
2074 # case over "raw data" and "data encoded by less common or non-official
2079 # case over "raw data" and "data encoded by less common or non-official
2075 # compression engines." That is why we have the inline lookup first
2080 # compression engines." That is why we have the inline lookup first
2076 # followed by the compengines lookup.
2081 # followed by the compengines lookup.
2077 #
2082 #
2078 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2083 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2079 # compressed chunks. And this matters for changelog and manifest reads.
2084 # compressed chunks. And this matters for changelog and manifest reads.
2080 t = data[0:1]
2085 t = data[0:1]
2081
2086
2082 if t == b'x':
2087 if t == b'x':
2083 try:
2088 try:
2084 return _zlibdecompress(data)
2089 return _zlibdecompress(data)
2085 except zlib.error as e:
2090 except zlib.error as e:
2086 raise error.RevlogError(
2091 raise error.RevlogError(
2087 _(b'revlog decompress error: %s')
2092 _(b'revlog decompress error: %s')
2088 % stringutil.forcebytestr(e)
2093 % stringutil.forcebytestr(e)
2089 )
2094 )
2090 # '\0' is more common than 'u' so it goes first.
2095 # '\0' is more common than 'u' so it goes first.
2091 elif t == b'\0':
2096 elif t == b'\0':
2092 return data
2097 return data
2093 elif t == b'u':
2098 elif t == b'u':
2094 return util.buffer(data, 1)
2099 return util.buffer(data, 1)
2095
2100
2096 try:
2101 try:
2097 compressor = self._decompressors[t]
2102 compressor = self._decompressors[t]
2098 except KeyError:
2103 except KeyError:
2099 try:
2104 try:
2100 engine = util.compengines.forrevlogheader(t)
2105 engine = util.compengines.forrevlogheader(t)
2101 compressor = engine.revlogcompressor(self._compengineopts)
2106 compressor = engine.revlogcompressor(self._compengineopts)
2102 self._decompressors[t] = compressor
2107 self._decompressors[t] = compressor
2103 except KeyError:
2108 except KeyError:
2104 raise error.RevlogError(_(b'unknown compression type %r') % t)
2109 raise error.RevlogError(_(b'unknown compression type %r') % t)
2105
2110
2106 return compressor.decompress(data)
2111 return compressor.decompress(data)
2107
2112
2108 def _addrevision(
2113 def _addrevision(
2109 self,
2114 self,
2110 node,
2115 node,
2111 rawtext,
2116 rawtext,
2112 transaction,
2117 transaction,
2113 link,
2118 link,
2114 p1,
2119 p1,
2115 p2,
2120 p2,
2116 flags,
2121 flags,
2117 cachedelta,
2122 cachedelta,
2118 ifh,
2123 ifh,
2119 dfh,
2124 dfh,
2120 alwayscache=False,
2125 alwayscache=False,
2121 deltacomputer=None,
2126 deltacomputer=None,
2122 ):
2127 ):
2123 """internal function to add revisions to the log
2128 """internal function to add revisions to the log
2124
2129
2125 see addrevision for argument descriptions.
2130 see addrevision for argument descriptions.
2126
2131
2127 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2132 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2128
2133
2129 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2134 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2130 be used.
2135 be used.
2131
2136
2132 invariants:
2137 invariants:
2133 - rawtext is optional (can be None); if not set, cachedelta must be set.
2138 - rawtext is optional (can be None); if not set, cachedelta must be set.
2134 if both are set, they must correspond to each other.
2139 if both are set, they must correspond to each other.
2135 """
2140 """
2136 if node == nullid:
2141 if node == nullid:
2137 raise error.RevlogError(
2142 raise error.RevlogError(
2138 _(b"%s: attempt to add null revision") % self.indexfile
2143 _(b"%s: attempt to add null revision") % self.indexfile
2139 )
2144 )
2140 if node == wdirid or node in wdirfilenodeids:
2145 if node == wdirid or node in wdirfilenodeids:
2141 raise error.RevlogError(
2146 raise error.RevlogError(
2142 _(b"%s: attempt to add wdir revision") % self.indexfile
2147 _(b"%s: attempt to add wdir revision") % self.indexfile
2143 )
2148 )
2144
2149
2145 if self._inline:
2150 if self._inline:
2146 fh = ifh
2151 fh = ifh
2147 else:
2152 else:
2148 fh = dfh
2153 fh = dfh
2149
2154
2150 btext = [rawtext]
2155 btext = [rawtext]
2151
2156
2152 curr = len(self)
2157 curr = len(self)
2153 prev = curr - 1
2158 prev = curr - 1
2154 offset = self.end(prev)
2159 offset = self.end(prev)
2155 p1r, p2r = self.rev(p1), self.rev(p2)
2160 p1r, p2r = self.rev(p1), self.rev(p2)
2156
2161
2157 # full versions are inserted when the needed deltas
2162 # full versions are inserted when the needed deltas
2158 # become comparable to the uncompressed text
2163 # become comparable to the uncompressed text
2159 if rawtext is None:
2164 if rawtext is None:
2160 # need rawtext size, before changed by flag processors, which is
2165 # need rawtext size, before changed by flag processors, which is
2161 # the non-raw size. use revlog explicitly to avoid filelog's extra
2166 # the non-raw size. use revlog explicitly to avoid filelog's extra
2162 # logic that might remove metadata size.
2167 # logic that might remove metadata size.
2163 textlen = mdiff.patchedsize(
2168 textlen = mdiff.patchedsize(
2164 revlog.size(self, cachedelta[0]), cachedelta[1]
2169 revlog.size(self, cachedelta[0]), cachedelta[1]
2165 )
2170 )
2166 else:
2171 else:
2167 textlen = len(rawtext)
2172 textlen = len(rawtext)
2168
2173
2169 if deltacomputer is None:
2174 if deltacomputer is None:
2170 deltacomputer = deltautil.deltacomputer(self)
2175 deltacomputer = deltautil.deltacomputer(self)
2171
2176
2172 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2177 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2173
2178
2174 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2179 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2175
2180
2176 e = (
2181 e = (
2177 offset_type(offset, flags),
2182 offset_type(offset, flags),
2178 deltainfo.deltalen,
2183 deltainfo.deltalen,
2179 textlen,
2184 textlen,
2180 deltainfo.base,
2185 deltainfo.base,
2181 link,
2186 link,
2182 p1r,
2187 p1r,
2183 p2r,
2188 p2r,
2184 node,
2189 node,
2185 )
2190 )
2186 self.index.append(e)
2191 self.index.append(e)
2187 self.nodemap[node] = curr
2192 self.nodemap[node] = curr
2188
2193
2189 # Reset the pure node cache start lookup offset to account for new
2194 # Reset the pure node cache start lookup offset to account for new
2190 # revision.
2195 # revision.
2191 if self._nodepos is not None:
2196 if self._nodepos is not None:
2192 self._nodepos = curr
2197 self._nodepos = curr
2193
2198
2194 entry = self._io.packentry(e, self.node, self.version, curr)
2199 entry = self._io.packentry(e, self.node, self.version, curr)
2195 self._writeentry(
2200 self._writeentry(
2196 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2201 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2197 )
2202 )
2198
2203
2199 rawtext = btext[0]
2204 rawtext = btext[0]
2200
2205
2201 if alwayscache and rawtext is None:
2206 if alwayscache and rawtext is None:
2202 rawtext = deltacomputer.buildtext(revinfo, fh)
2207 rawtext = deltacomputer.buildtext(revinfo, fh)
2203
2208
2204 if type(rawtext) == bytes: # only accept immutable objects
2209 if type(rawtext) == bytes: # only accept immutable objects
2205 self._revisioncache = (node, curr, rawtext)
2210 self._revisioncache = (node, curr, rawtext)
2206 self._chainbasecache[curr] = deltainfo.chainbase
2211 self._chainbasecache[curr] = deltainfo.chainbase
2207 return node
2212 return node
2208
2213
2209 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2214 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2210 # Files opened in a+ mode have inconsistent behavior on various
2215 # Files opened in a+ mode have inconsistent behavior on various
2211 # platforms. Windows requires that a file positioning call be made
2216 # platforms. Windows requires that a file positioning call be made
2212 # when the file handle transitions between reads and writes. See
2217 # when the file handle transitions between reads and writes. See
2213 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2218 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2214 # platforms, Python or the platform itself can be buggy. Some versions
2219 # platforms, Python or the platform itself can be buggy. Some versions
2215 # of Solaris have been observed to not append at the end of the file
2220 # of Solaris have been observed to not append at the end of the file
2216 # if the file was seeked to before the end. See issue4943 for more.
2221 # if the file was seeked to before the end. See issue4943 for more.
2217 #
2222 #
2218 # We work around this issue by inserting a seek() before writing.
2223 # We work around this issue by inserting a seek() before writing.
2219 # Note: This is likely not necessary on Python 3. However, because
2224 # Note: This is likely not necessary on Python 3. However, because
2220 # the file handle is reused for reads and may be seeked there, we need
2225 # the file handle is reused for reads and may be seeked there, we need
2221 # to be careful before changing this.
2226 # to be careful before changing this.
2222 ifh.seek(0, os.SEEK_END)
2227 ifh.seek(0, os.SEEK_END)
2223 if dfh:
2228 if dfh:
2224 dfh.seek(0, os.SEEK_END)
2229 dfh.seek(0, os.SEEK_END)
2225
2230
2226 curr = len(self) - 1
2231 curr = len(self) - 1
2227 if not self._inline:
2232 if not self._inline:
2228 transaction.add(self.datafile, offset)
2233 transaction.add(self.datafile, offset)
2229 transaction.add(self.indexfile, curr * len(entry))
2234 transaction.add(self.indexfile, curr * len(entry))
2230 if data[0]:
2235 if data[0]:
2231 dfh.write(data[0])
2236 dfh.write(data[0])
2232 dfh.write(data[1])
2237 dfh.write(data[1])
2233 ifh.write(entry)
2238 ifh.write(entry)
2234 else:
2239 else:
2235 offset += curr * self._io.size
2240 offset += curr * self._io.size
2236 transaction.add(self.indexfile, offset, curr)
2241 transaction.add(self.indexfile, offset, curr)
2237 ifh.write(entry)
2242 ifh.write(entry)
2238 ifh.write(data[0])
2243 ifh.write(data[0])
2239 ifh.write(data[1])
2244 ifh.write(data[1])
2240 self._enforceinlinesize(transaction, ifh)
2245 self._enforceinlinesize(transaction, ifh)
2241
2246
2242 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2247 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2243 """
2248 """
2244 add a delta group
2249 add a delta group
2245
2250
2246 given a set of deltas, add them to the revision log. the
2251 given a set of deltas, add them to the revision log. the
2247 first delta is against its parent, which should be in our
2252 first delta is against its parent, which should be in our
2248 log, the rest are against the previous delta.
2253 log, the rest are against the previous delta.
2249
2254
2250 If ``addrevisioncb`` is defined, it will be called with arguments of
2255 If ``addrevisioncb`` is defined, it will be called with arguments of
2251 this revlog and the node that was added.
2256 this revlog and the node that was added.
2252 """
2257 """
2253
2258
2254 if self._writinghandles:
2259 if self._writinghandles:
2255 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2260 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2256
2261
2257 nodes = []
2262 nodes = []
2258
2263
2259 r = len(self)
2264 r = len(self)
2260 end = 0
2265 end = 0
2261 if r:
2266 if r:
2262 end = self.end(r - 1)
2267 end = self.end(r - 1)
2263 ifh = self._indexfp(b"a+")
2268 ifh = self._indexfp(b"a+")
2264 isize = r * self._io.size
2269 isize = r * self._io.size
2265 if self._inline:
2270 if self._inline:
2266 transaction.add(self.indexfile, end + isize, r)
2271 transaction.add(self.indexfile, end + isize, r)
2267 dfh = None
2272 dfh = None
2268 else:
2273 else:
2269 transaction.add(self.indexfile, isize, r)
2274 transaction.add(self.indexfile, isize, r)
2270 transaction.add(self.datafile, end)
2275 transaction.add(self.datafile, end)
2271 dfh = self._datafp(b"a+")
2276 dfh = self._datafp(b"a+")
2272
2277
2273 def flush():
2278 def flush():
2274 if dfh:
2279 if dfh:
2275 dfh.flush()
2280 dfh.flush()
2276 ifh.flush()
2281 ifh.flush()
2277
2282
2278 self._writinghandles = (ifh, dfh)
2283 self._writinghandles = (ifh, dfh)
2279
2284
2280 try:
2285 try:
2281 deltacomputer = deltautil.deltacomputer(self)
2286 deltacomputer = deltautil.deltacomputer(self)
2282 # loop through our set of deltas
2287 # loop through our set of deltas
2283 for data in deltas:
2288 for data in deltas:
2284 node, p1, p2, linknode, deltabase, delta, flags = data
2289 node, p1, p2, linknode, deltabase, delta, flags = data
2285 link = linkmapper(linknode)
2290 link = linkmapper(linknode)
2286 flags = flags or REVIDX_DEFAULT_FLAGS
2291 flags = flags or REVIDX_DEFAULT_FLAGS
2287
2292
2288 nodes.append(node)
2293 nodes.append(node)
2289
2294
2290 if node in self.nodemap:
2295 if node in self.nodemap:
2291 self._nodeduplicatecallback(transaction, node)
2296 self._nodeduplicatecallback(transaction, node)
2292 # this can happen if two branches make the same change
2297 # this can happen if two branches make the same change
2293 continue
2298 continue
2294
2299
2295 for p in (p1, p2):
2300 for p in (p1, p2):
2296 if p not in self.nodemap:
2301 if p not in self.nodemap:
2297 raise error.LookupError(
2302 raise error.LookupError(
2298 p, self.indexfile, _(b'unknown parent')
2303 p, self.indexfile, _(b'unknown parent')
2299 )
2304 )
2300
2305
2301 if deltabase not in self.nodemap:
2306 if deltabase not in self.nodemap:
2302 raise error.LookupError(
2307 raise error.LookupError(
2303 deltabase, self.indexfile, _(b'unknown delta base')
2308 deltabase, self.indexfile, _(b'unknown delta base')
2304 )
2309 )
2305
2310
2306 baserev = self.rev(deltabase)
2311 baserev = self.rev(deltabase)
2307
2312
2308 if baserev != nullrev and self.iscensored(baserev):
2313 if baserev != nullrev and self.iscensored(baserev):
2309 # if base is censored, delta must be full replacement in a
2314 # if base is censored, delta must be full replacement in a
2310 # single patch operation
2315 # single patch operation
2311 hlen = struct.calcsize(b">lll")
2316 hlen = struct.calcsize(b">lll")
2312 oldlen = self.rawsize(baserev)
2317 oldlen = self.rawsize(baserev)
2313 newlen = len(delta) - hlen
2318 newlen = len(delta) - hlen
2314 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2319 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2315 raise error.CensoredBaseError(
2320 raise error.CensoredBaseError(
2316 self.indexfile, self.node(baserev)
2321 self.indexfile, self.node(baserev)
2317 )
2322 )
2318
2323
2319 if not flags and self._peek_iscensored(baserev, delta, flush):
2324 if not flags and self._peek_iscensored(baserev, delta, flush):
2320 flags |= REVIDX_ISCENSORED
2325 flags |= REVIDX_ISCENSORED
2321
2326
2322 # We assume consumers of addrevisioncb will want to retrieve
2327 # We assume consumers of addrevisioncb will want to retrieve
2323 # the added revision, which will require a call to
2328 # the added revision, which will require a call to
2324 # revision(). revision() will fast path if there is a cache
2329 # revision(). revision() will fast path if there is a cache
2325 # hit. So, we tell _addrevision() to always cache in this case.
2330 # hit. So, we tell _addrevision() to always cache in this case.
2326 # We're only using addgroup() in the context of changegroup
2331 # We're only using addgroup() in the context of changegroup
2327 # generation so the revision data can always be handled as raw
2332 # generation so the revision data can always be handled as raw
2328 # by the flagprocessor.
2333 # by the flagprocessor.
2329 self._addrevision(
2334 self._addrevision(
2330 node,
2335 node,
2331 None,
2336 None,
2332 transaction,
2337 transaction,
2333 link,
2338 link,
2334 p1,
2339 p1,
2335 p2,
2340 p2,
2336 flags,
2341 flags,
2337 (baserev, delta),
2342 (baserev, delta),
2338 ifh,
2343 ifh,
2339 dfh,
2344 dfh,
2340 alwayscache=bool(addrevisioncb),
2345 alwayscache=bool(addrevisioncb),
2341 deltacomputer=deltacomputer,
2346 deltacomputer=deltacomputer,
2342 )
2347 )
2343
2348
2344 if addrevisioncb:
2349 if addrevisioncb:
2345 addrevisioncb(self, node)
2350 addrevisioncb(self, node)
2346
2351
2347 if not dfh and not self._inline:
2352 if not dfh and not self._inline:
2348 # addrevision switched from inline to conventional
2353 # addrevision switched from inline to conventional
2349 # reopen the index
2354 # reopen the index
2350 ifh.close()
2355 ifh.close()
2351 dfh = self._datafp(b"a+")
2356 dfh = self._datafp(b"a+")
2352 ifh = self._indexfp(b"a+")
2357 ifh = self._indexfp(b"a+")
2353 self._writinghandles = (ifh, dfh)
2358 self._writinghandles = (ifh, dfh)
2354 finally:
2359 finally:
2355 self._writinghandles = None
2360 self._writinghandles = None
2356
2361
2357 if dfh:
2362 if dfh:
2358 dfh.close()
2363 dfh.close()
2359 ifh.close()
2364 ifh.close()
2360
2365
2361 return nodes
2366 return nodes
2362
2367
2363 def iscensored(self, rev):
2368 def iscensored(self, rev):
2364 """Check if a file revision is censored."""
2369 """Check if a file revision is censored."""
2365 if not self._censorable:
2370 if not self._censorable:
2366 return False
2371 return False
2367
2372
2368 return self.flags(rev) & REVIDX_ISCENSORED
2373 return self.flags(rev) & REVIDX_ISCENSORED
2369
2374
2370 def _peek_iscensored(self, baserev, delta, flush):
2375 def _peek_iscensored(self, baserev, delta, flush):
2371 """Quickly check if a delta produces a censored revision."""
2376 """Quickly check if a delta produces a censored revision."""
2372 if not self._censorable:
2377 if not self._censorable:
2373 return False
2378 return False
2374
2379
2375 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2380 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2376
2381
2377 def getstrippoint(self, minlink):
2382 def getstrippoint(self, minlink):
2378 """find the minimum rev that must be stripped to strip the linkrev
2383 """find the minimum rev that must be stripped to strip the linkrev
2379
2384
2380 Returns a tuple containing the minimum rev and a set of all revs that
2385 Returns a tuple containing the minimum rev and a set of all revs that
2381 have linkrevs that will be broken by this strip.
2386 have linkrevs that will be broken by this strip.
2382 """
2387 """
2383 return storageutil.resolvestripinfo(
2388 return storageutil.resolvestripinfo(
2384 minlink,
2389 minlink,
2385 len(self) - 1,
2390 len(self) - 1,
2386 self.headrevs(),
2391 self.headrevs(),
2387 self.linkrev,
2392 self.linkrev,
2388 self.parentrevs,
2393 self.parentrevs,
2389 )
2394 )
2390
2395
2391 def strip(self, minlink, transaction):
2396 def strip(self, minlink, transaction):
2392 """truncate the revlog on the first revision with a linkrev >= minlink
2397 """truncate the revlog on the first revision with a linkrev >= minlink
2393
2398
2394 This function is called when we're stripping revision minlink and
2399 This function is called when we're stripping revision minlink and
2395 its descendants from the repository.
2400 its descendants from the repository.
2396
2401
2397 We have to remove all revisions with linkrev >= minlink, because
2402 We have to remove all revisions with linkrev >= minlink, because
2398 the equivalent changelog revisions will be renumbered after the
2403 the equivalent changelog revisions will be renumbered after the
2399 strip.
2404 strip.
2400
2405
2401 So we truncate the revlog on the first of these revisions, and
2406 So we truncate the revlog on the first of these revisions, and
2402 trust that the caller has saved the revisions that shouldn't be
2407 trust that the caller has saved the revisions that shouldn't be
2403 removed and that it'll re-add them after this truncation.
2408 removed and that it'll re-add them after this truncation.
2404 """
2409 """
2405 if len(self) == 0:
2410 if len(self) == 0:
2406 return
2411 return
2407
2412
2408 rev, _ = self.getstrippoint(minlink)
2413 rev, _ = self.getstrippoint(minlink)
2409 if rev == len(self):
2414 if rev == len(self):
2410 return
2415 return
2411
2416
2412 # first truncate the files on disk
2417 # first truncate the files on disk
2413 end = self.start(rev)
2418 end = self.start(rev)
2414 if not self._inline:
2419 if not self._inline:
2415 transaction.add(self.datafile, end)
2420 transaction.add(self.datafile, end)
2416 end = rev * self._io.size
2421 end = rev * self._io.size
2417 else:
2422 else:
2418 end += rev * self._io.size
2423 end += rev * self._io.size
2419
2424
2420 transaction.add(self.indexfile, end)
2425 transaction.add(self.indexfile, end)
2421
2426
2422 # then reset internal state in memory to forget those revisions
2427 # then reset internal state in memory to forget those revisions
2423 self._revisioncache = None
2428 self._revisioncache = None
2424 self._chaininfocache = {}
2429 self._chaininfocache = {}
2425 self._chunkclear()
2430 self._chunkclear()
2426 for x in pycompat.xrange(rev, len(self)):
2431 for x in pycompat.xrange(rev, len(self)):
2427 del self.nodemap[self.node(x)]
2432 del self.nodemap[self.node(x)]
2428
2433
2429 del self.index[rev:-1]
2434 del self.index[rev:-1]
2430 self._nodepos = None
2435 self._nodepos = None
2431
2436
2432 def checksize(self):
2437 def checksize(self):
2433 """Check size of index and data files
2438 """Check size of index and data files
2434
2439
2435 return a (dd, di) tuple.
2440 return a (dd, di) tuple.
2436 - dd: extra bytes for the "data" file
2441 - dd: extra bytes for the "data" file
2437 - di: extra bytes for the "index" file
2442 - di: extra bytes for the "index" file
2438
2443
2439 A healthy revlog will return (0, 0).
2444 A healthy revlog will return (0, 0).
2440 """
2445 """
2441 expected = 0
2446 expected = 0
2442 if len(self):
2447 if len(self):
2443 expected = max(0, self.end(len(self) - 1))
2448 expected = max(0, self.end(len(self) - 1))
2444
2449
2445 try:
2450 try:
2446 with self._datafp() as f:
2451 with self._datafp() as f:
2447 f.seek(0, io.SEEK_END)
2452 f.seek(0, io.SEEK_END)
2448 actual = f.tell()
2453 actual = f.tell()
2449 dd = actual - expected
2454 dd = actual - expected
2450 except IOError as inst:
2455 except IOError as inst:
2451 if inst.errno != errno.ENOENT:
2456 if inst.errno != errno.ENOENT:
2452 raise
2457 raise
2453 dd = 0
2458 dd = 0
2454
2459
2455 try:
2460 try:
2456 f = self.opener(self.indexfile)
2461 f = self.opener(self.indexfile)
2457 f.seek(0, io.SEEK_END)
2462 f.seek(0, io.SEEK_END)
2458 actual = f.tell()
2463 actual = f.tell()
2459 f.close()
2464 f.close()
2460 s = self._io.size
2465 s = self._io.size
2461 i = max(0, actual // s)
2466 i = max(0, actual // s)
2462 di = actual - (i * s)
2467 di = actual - (i * s)
2463 if self._inline:
2468 if self._inline:
2464 databytes = 0
2469 databytes = 0
2465 for r in self:
2470 for r in self:
2466 databytes += max(0, self.length(r))
2471 databytes += max(0, self.length(r))
2467 dd = 0
2472 dd = 0
2468 di = actual - len(self) * s - databytes
2473 di = actual - len(self) * s - databytes
2469 except IOError as inst:
2474 except IOError as inst:
2470 if inst.errno != errno.ENOENT:
2475 if inst.errno != errno.ENOENT:
2471 raise
2476 raise
2472 di = 0
2477 di = 0
2473
2478
2474 return (dd, di)
2479 return (dd, di)
2475
2480
2476 def files(self):
2481 def files(self):
2477 res = [self.indexfile]
2482 res = [self.indexfile]
2478 if not self._inline:
2483 if not self._inline:
2479 res.append(self.datafile)
2484 res.append(self.datafile)
2480 return res
2485 return res
2481
2486
2482 def emitrevisions(
2487 def emitrevisions(
2483 self,
2488 self,
2484 nodes,
2489 nodes,
2485 nodesorder=None,
2490 nodesorder=None,
2486 revisiondata=False,
2491 revisiondata=False,
2487 assumehaveparentrevisions=False,
2492 assumehaveparentrevisions=False,
2488 deltamode=repository.CG_DELTAMODE_STD,
2493 deltamode=repository.CG_DELTAMODE_STD,
2489 ):
2494 ):
2490 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2495 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2491 raise error.ProgrammingError(
2496 raise error.ProgrammingError(
2492 b'unhandled value for nodesorder: %s' % nodesorder
2497 b'unhandled value for nodesorder: %s' % nodesorder
2493 )
2498 )
2494
2499
2495 if nodesorder is None and not self._generaldelta:
2500 if nodesorder is None and not self._generaldelta:
2496 nodesorder = b'storage'
2501 nodesorder = b'storage'
2497
2502
2498 if (
2503 if (
2499 not self._storedeltachains
2504 not self._storedeltachains
2500 and deltamode != repository.CG_DELTAMODE_PREV
2505 and deltamode != repository.CG_DELTAMODE_PREV
2501 ):
2506 ):
2502 deltamode = repository.CG_DELTAMODE_FULL
2507 deltamode = repository.CG_DELTAMODE_FULL
2503
2508
2504 return storageutil.emitrevisions(
2509 return storageutil.emitrevisions(
2505 self,
2510 self,
2506 nodes,
2511 nodes,
2507 nodesorder,
2512 nodesorder,
2508 revlogrevisiondelta,
2513 revlogrevisiondelta,
2509 deltaparentfn=self.deltaparent,
2514 deltaparentfn=self.deltaparent,
2510 candeltafn=self.candelta,
2515 candeltafn=self.candelta,
2511 rawsizefn=self.rawsize,
2516 rawsizefn=self.rawsize,
2512 revdifffn=self.revdiff,
2517 revdifffn=self.revdiff,
2513 flagsfn=self.flags,
2518 flagsfn=self.flags,
2514 deltamode=deltamode,
2519 deltamode=deltamode,
2515 revisiondata=revisiondata,
2520 revisiondata=revisiondata,
2516 assumehaveparentrevisions=assumehaveparentrevisions,
2521 assumehaveparentrevisions=assumehaveparentrevisions,
2517 )
2522 )
2518
2523
2519 DELTAREUSEALWAYS = b'always'
2524 DELTAREUSEALWAYS = b'always'
2520 DELTAREUSESAMEREVS = b'samerevs'
2525 DELTAREUSESAMEREVS = b'samerevs'
2521 DELTAREUSENEVER = b'never'
2526 DELTAREUSENEVER = b'never'
2522
2527
2523 DELTAREUSEFULLADD = b'fulladd'
2528 DELTAREUSEFULLADD = b'fulladd'
2524
2529
2525 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2530 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2526
2531
2527 def clone(
2532 def clone(
2528 self,
2533 self,
2529 tr,
2534 tr,
2530 destrevlog,
2535 destrevlog,
2531 addrevisioncb=None,
2536 addrevisioncb=None,
2532 deltareuse=DELTAREUSESAMEREVS,
2537 deltareuse=DELTAREUSESAMEREVS,
2533 forcedeltabothparents=None,
2538 forcedeltabothparents=None,
2534 sidedatacompanion=None,
2539 sidedatacompanion=None,
2535 ):
2540 ):
2536 """Copy this revlog to another, possibly with format changes.
2541 """Copy this revlog to another, possibly with format changes.
2537
2542
2538 The destination revlog will contain the same revisions and nodes.
2543 The destination revlog will contain the same revisions and nodes.
2539 However, it may not be bit-for-bit identical due to e.g. delta encoding
2544 However, it may not be bit-for-bit identical due to e.g. delta encoding
2540 differences.
2545 differences.
2541
2546
2542 The ``deltareuse`` argument control how deltas from the existing revlog
2547 The ``deltareuse`` argument control how deltas from the existing revlog
2543 are preserved in the destination revlog. The argument can have the
2548 are preserved in the destination revlog. The argument can have the
2544 following values:
2549 following values:
2545
2550
2546 DELTAREUSEALWAYS
2551 DELTAREUSEALWAYS
2547 Deltas will always be reused (if possible), even if the destination
2552 Deltas will always be reused (if possible), even if the destination
2548 revlog would not select the same revisions for the delta. This is the
2553 revlog would not select the same revisions for the delta. This is the
2549 fastest mode of operation.
2554 fastest mode of operation.
2550 DELTAREUSESAMEREVS
2555 DELTAREUSESAMEREVS
2551 Deltas will be reused if the destination revlog would pick the same
2556 Deltas will be reused if the destination revlog would pick the same
2552 revisions for the delta. This mode strikes a balance between speed
2557 revisions for the delta. This mode strikes a balance between speed
2553 and optimization.
2558 and optimization.
2554 DELTAREUSENEVER
2559 DELTAREUSENEVER
2555 Deltas will never be reused. This is the slowest mode of execution.
2560 Deltas will never be reused. This is the slowest mode of execution.
2556 This mode can be used to recompute deltas (e.g. if the diff/delta
2561 This mode can be used to recompute deltas (e.g. if the diff/delta
2557 algorithm changes).
2562 algorithm changes).
2558 DELTAREUSEFULLADD
2563 DELTAREUSEFULLADD
2559 Revision will be re-added as if their were new content. This is
2564 Revision will be re-added as if their were new content. This is
2560 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2565 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2561 eg: large file detection and handling.
2566 eg: large file detection and handling.
2562
2567
2563 Delta computation can be slow, so the choice of delta reuse policy can
2568 Delta computation can be slow, so the choice of delta reuse policy can
2564 significantly affect run time.
2569 significantly affect run time.
2565
2570
2566 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2571 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2567 two extremes. Deltas will be reused if they are appropriate. But if the
2572 two extremes. Deltas will be reused if they are appropriate. But if the
2568 delta could choose a better revision, it will do so. This means if you
2573 delta could choose a better revision, it will do so. This means if you
2569 are converting a non-generaldelta revlog to a generaldelta revlog,
2574 are converting a non-generaldelta revlog to a generaldelta revlog,
2570 deltas will be recomputed if the delta's parent isn't a parent of the
2575 deltas will be recomputed if the delta's parent isn't a parent of the
2571 revision.
2576 revision.
2572
2577
2573 In addition to the delta policy, the ``forcedeltabothparents``
2578 In addition to the delta policy, the ``forcedeltabothparents``
2574 argument controls whether to force compute deltas against both parents
2579 argument controls whether to force compute deltas against both parents
2575 for merges. By default, the current default is used.
2580 for merges. By default, the current default is used.
2576
2581
2577 If not None, the `sidedatacompanion` is callable that accept two
2582 If not None, the `sidedatacompanion` is callable that accept two
2578 arguments:
2583 arguments:
2579
2584
2580 (srcrevlog, rev)
2585 (srcrevlog, rev)
2581
2586
2582 and return a triplet that control changes to sidedata content from the
2587 and return a triplet that control changes to sidedata content from the
2583 old revision to the new clone result:
2588 old revision to the new clone result:
2584
2589
2585 (dropall, filterout, update)
2590 (dropall, filterout, update)
2586
2591
2587 * if `dropall` is True, all sidedata should be dropped
2592 * if `dropall` is True, all sidedata should be dropped
2588 * `filterout` is a set of sidedata keys that should be dropped
2593 * `filterout` is a set of sidedata keys that should be dropped
2589 * `update` is a mapping of additionnal/new key -> value
2594 * `update` is a mapping of additionnal/new key -> value
2590 """
2595 """
2591 if deltareuse not in self.DELTAREUSEALL:
2596 if deltareuse not in self.DELTAREUSEALL:
2592 raise ValueError(
2597 raise ValueError(
2593 _(b'value for deltareuse invalid: %s') % deltareuse
2598 _(b'value for deltareuse invalid: %s') % deltareuse
2594 )
2599 )
2595
2600
2596 if len(destrevlog):
2601 if len(destrevlog):
2597 raise ValueError(_(b'destination revlog is not empty'))
2602 raise ValueError(_(b'destination revlog is not empty'))
2598
2603
2599 if getattr(self, 'filteredrevs', None):
2604 if getattr(self, 'filteredrevs', None):
2600 raise ValueError(_(b'source revlog has filtered revisions'))
2605 raise ValueError(_(b'source revlog has filtered revisions'))
2601 if getattr(destrevlog, 'filteredrevs', None):
2606 if getattr(destrevlog, 'filteredrevs', None):
2602 raise ValueError(_(b'destination revlog has filtered revisions'))
2607 raise ValueError(_(b'destination revlog has filtered revisions'))
2603
2608
2604 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2609 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2605 # if possible.
2610 # if possible.
2606 oldlazydelta = destrevlog._lazydelta
2611 oldlazydelta = destrevlog._lazydelta
2607 oldlazydeltabase = destrevlog._lazydeltabase
2612 oldlazydeltabase = destrevlog._lazydeltabase
2608 oldamd = destrevlog._deltabothparents
2613 oldamd = destrevlog._deltabothparents
2609
2614
2610 try:
2615 try:
2611 if deltareuse == self.DELTAREUSEALWAYS:
2616 if deltareuse == self.DELTAREUSEALWAYS:
2612 destrevlog._lazydeltabase = True
2617 destrevlog._lazydeltabase = True
2613 destrevlog._lazydelta = True
2618 destrevlog._lazydelta = True
2614 elif deltareuse == self.DELTAREUSESAMEREVS:
2619 elif deltareuse == self.DELTAREUSESAMEREVS:
2615 destrevlog._lazydeltabase = False
2620 destrevlog._lazydeltabase = False
2616 destrevlog._lazydelta = True
2621 destrevlog._lazydelta = True
2617 elif deltareuse == self.DELTAREUSENEVER:
2622 elif deltareuse == self.DELTAREUSENEVER:
2618 destrevlog._lazydeltabase = False
2623 destrevlog._lazydeltabase = False
2619 destrevlog._lazydelta = False
2624 destrevlog._lazydelta = False
2620
2625
2621 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2626 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2622
2627
2623 self._clone(
2628 self._clone(
2624 tr,
2629 tr,
2625 destrevlog,
2630 destrevlog,
2626 addrevisioncb,
2631 addrevisioncb,
2627 deltareuse,
2632 deltareuse,
2628 forcedeltabothparents,
2633 forcedeltabothparents,
2629 sidedatacompanion,
2634 sidedatacompanion,
2630 )
2635 )
2631
2636
2632 finally:
2637 finally:
2633 destrevlog._lazydelta = oldlazydelta
2638 destrevlog._lazydelta = oldlazydelta
2634 destrevlog._lazydeltabase = oldlazydeltabase
2639 destrevlog._lazydeltabase = oldlazydeltabase
2635 destrevlog._deltabothparents = oldamd
2640 destrevlog._deltabothparents = oldamd
2636
2641
2637 def _clone(
2642 def _clone(
2638 self,
2643 self,
2639 tr,
2644 tr,
2640 destrevlog,
2645 destrevlog,
2641 addrevisioncb,
2646 addrevisioncb,
2642 deltareuse,
2647 deltareuse,
2643 forcedeltabothparents,
2648 forcedeltabothparents,
2644 sidedatacompanion,
2649 sidedatacompanion,
2645 ):
2650 ):
2646 """perform the core duty of `revlog.clone` after parameter processing"""
2651 """perform the core duty of `revlog.clone` after parameter processing"""
2647 deltacomputer = deltautil.deltacomputer(destrevlog)
2652 deltacomputer = deltautil.deltacomputer(destrevlog)
2648 index = self.index
2653 index = self.index
2649 for rev in self:
2654 for rev in self:
2650 entry = index[rev]
2655 entry = index[rev]
2651
2656
2652 # Some classes override linkrev to take filtered revs into
2657 # Some classes override linkrev to take filtered revs into
2653 # account. Use raw entry from index.
2658 # account. Use raw entry from index.
2654 flags = entry[0] & 0xFFFF
2659 flags = entry[0] & 0xFFFF
2655 linkrev = entry[4]
2660 linkrev = entry[4]
2656 p1 = index[entry[5]][7]
2661 p1 = index[entry[5]][7]
2657 p2 = index[entry[6]][7]
2662 p2 = index[entry[6]][7]
2658 node = entry[7]
2663 node = entry[7]
2659
2664
2660 sidedataactions = (False, [], {})
2665 sidedataactions = (False, [], {})
2661 if sidedatacompanion is not None:
2666 if sidedatacompanion is not None:
2662 sidedataactions = sidedatacompanion(self, rev)
2667 sidedataactions = sidedatacompanion(self, rev)
2663
2668
2664 # (Possibly) reuse the delta from the revlog if allowed and
2669 # (Possibly) reuse the delta from the revlog if allowed and
2665 # the revlog chunk is a delta.
2670 # the revlog chunk is a delta.
2666 cachedelta = None
2671 cachedelta = None
2667 rawtext = None
2672 rawtext = None
2668 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2673 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2669 dropall, filterout, update = sidedataactions
2674 dropall, filterout, update = sidedataactions
2670 text, sidedata = self._revisiondata(rev)
2675 text, sidedata = self._revisiondata(rev)
2671 if dropall:
2676 if dropall:
2672 sidedata = {}
2677 sidedata = {}
2673 for key in filterout:
2678 for key in filterout:
2674 sidedata.pop(key, None)
2679 sidedata.pop(key, None)
2675 sidedata.update(update)
2680 sidedata.update(update)
2676 if not sidedata:
2681 if not sidedata:
2677 sidedata = None
2682 sidedata = None
2678 destrevlog.addrevision(
2683 destrevlog.addrevision(
2679 text,
2684 text,
2680 tr,
2685 tr,
2681 linkrev,
2686 linkrev,
2682 p1,
2687 p1,
2683 p2,
2688 p2,
2684 cachedelta=cachedelta,
2689 cachedelta=cachedelta,
2685 node=node,
2690 node=node,
2686 flags=flags,
2691 flags=flags,
2687 deltacomputer=deltacomputer,
2692 deltacomputer=deltacomputer,
2688 sidedata=sidedata,
2693 sidedata=sidedata,
2689 )
2694 )
2690 else:
2695 else:
2691 if destrevlog._lazydelta:
2696 if destrevlog._lazydelta:
2692 dp = self.deltaparent(rev)
2697 dp = self.deltaparent(rev)
2693 if dp != nullrev:
2698 if dp != nullrev:
2694 cachedelta = (dp, bytes(self._chunk(rev)))
2699 cachedelta = (dp, bytes(self._chunk(rev)))
2695
2700
2696 if not cachedelta:
2701 if not cachedelta:
2697 rawtext = self.rawdata(rev)
2702 rawtext = self.rawdata(rev)
2698
2703
2699 ifh = destrevlog.opener(
2704 ifh = destrevlog.opener(
2700 destrevlog.indexfile, b'a+', checkambig=False
2705 destrevlog.indexfile, b'a+', checkambig=False
2701 )
2706 )
2702 dfh = None
2707 dfh = None
2703 if not destrevlog._inline:
2708 if not destrevlog._inline:
2704 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2709 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2705 try:
2710 try:
2706 destrevlog._addrevision(
2711 destrevlog._addrevision(
2707 node,
2712 node,
2708 rawtext,
2713 rawtext,
2709 tr,
2714 tr,
2710 linkrev,
2715 linkrev,
2711 p1,
2716 p1,
2712 p2,
2717 p2,
2713 flags,
2718 flags,
2714 cachedelta,
2719 cachedelta,
2715 ifh,
2720 ifh,
2716 dfh,
2721 dfh,
2717 deltacomputer=deltacomputer,
2722 deltacomputer=deltacomputer,
2718 )
2723 )
2719 finally:
2724 finally:
2720 if dfh:
2725 if dfh:
2721 dfh.close()
2726 dfh.close()
2722 ifh.close()
2727 ifh.close()
2723
2728
2724 if addrevisioncb:
2729 if addrevisioncb:
2725 addrevisioncb(self, rev, node)
2730 addrevisioncb(self, rev, node)
2726
2731
2727 def censorrevision(self, tr, censornode, tombstone=b''):
2732 def censorrevision(self, tr, censornode, tombstone=b''):
2728 if (self.version & 0xFFFF) == REVLOGV0:
2733 if (self.version & 0xFFFF) == REVLOGV0:
2729 raise error.RevlogError(
2734 raise error.RevlogError(
2730 _(b'cannot censor with version %d revlogs') % self.version
2735 _(b'cannot censor with version %d revlogs') % self.version
2731 )
2736 )
2732
2737
2733 censorrev = self.rev(censornode)
2738 censorrev = self.rev(censornode)
2734 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2739 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2735
2740
2736 if len(tombstone) > self.rawsize(censorrev):
2741 if len(tombstone) > self.rawsize(censorrev):
2737 raise error.Abort(
2742 raise error.Abort(
2738 _(b'censor tombstone must be no longer than censored data')
2743 _(b'censor tombstone must be no longer than censored data')
2739 )
2744 )
2740
2745
2741 # Rewriting the revlog in place is hard. Our strategy for censoring is
2746 # Rewriting the revlog in place is hard. Our strategy for censoring is
2742 # to create a new revlog, copy all revisions to it, then replace the
2747 # to create a new revlog, copy all revisions to it, then replace the
2743 # revlogs on transaction close.
2748 # revlogs on transaction close.
2744
2749
2745 newindexfile = self.indexfile + b'.tmpcensored'
2750 newindexfile = self.indexfile + b'.tmpcensored'
2746 newdatafile = self.datafile + b'.tmpcensored'
2751 newdatafile = self.datafile + b'.tmpcensored'
2747
2752
2748 # This is a bit dangerous. We could easily have a mismatch of state.
2753 # This is a bit dangerous. We could easily have a mismatch of state.
2749 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2754 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2750 newrl.version = self.version
2755 newrl.version = self.version
2751 newrl._generaldelta = self._generaldelta
2756 newrl._generaldelta = self._generaldelta
2752 newrl._io = self._io
2757 newrl._io = self._io
2753
2758
2754 for rev in self.revs():
2759 for rev in self.revs():
2755 node = self.node(rev)
2760 node = self.node(rev)
2756 p1, p2 = self.parents(node)
2761 p1, p2 = self.parents(node)
2757
2762
2758 if rev == censorrev:
2763 if rev == censorrev:
2759 newrl.addrawrevision(
2764 newrl.addrawrevision(
2760 tombstone,
2765 tombstone,
2761 tr,
2766 tr,
2762 self.linkrev(censorrev),
2767 self.linkrev(censorrev),
2763 p1,
2768 p1,
2764 p2,
2769 p2,
2765 censornode,
2770 censornode,
2766 REVIDX_ISCENSORED,
2771 REVIDX_ISCENSORED,
2767 )
2772 )
2768
2773
2769 if newrl.deltaparent(rev) != nullrev:
2774 if newrl.deltaparent(rev) != nullrev:
2770 raise error.Abort(
2775 raise error.Abort(
2771 _(
2776 _(
2772 b'censored revision stored as delta; '
2777 b'censored revision stored as delta; '
2773 b'cannot censor'
2778 b'cannot censor'
2774 ),
2779 ),
2775 hint=_(
2780 hint=_(
2776 b'censoring of revlogs is not '
2781 b'censoring of revlogs is not '
2777 b'fully implemented; please report '
2782 b'fully implemented; please report '
2778 b'this bug'
2783 b'this bug'
2779 ),
2784 ),
2780 )
2785 )
2781 continue
2786 continue
2782
2787
2783 if self.iscensored(rev):
2788 if self.iscensored(rev):
2784 if self.deltaparent(rev) != nullrev:
2789 if self.deltaparent(rev) != nullrev:
2785 raise error.Abort(
2790 raise error.Abort(
2786 _(
2791 _(
2787 b'cannot censor due to censored '
2792 b'cannot censor due to censored '
2788 b'revision having delta stored'
2793 b'revision having delta stored'
2789 )
2794 )
2790 )
2795 )
2791 rawtext = self._chunk(rev)
2796 rawtext = self._chunk(rev)
2792 else:
2797 else:
2793 rawtext = self.rawdata(rev)
2798 rawtext = self.rawdata(rev)
2794
2799
2795 newrl.addrawrevision(
2800 newrl.addrawrevision(
2796 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2801 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2797 )
2802 )
2798
2803
2799 tr.addbackup(self.indexfile, location=b'store')
2804 tr.addbackup(self.indexfile, location=b'store')
2800 if not self._inline:
2805 if not self._inline:
2801 tr.addbackup(self.datafile, location=b'store')
2806 tr.addbackup(self.datafile, location=b'store')
2802
2807
2803 self.opener.rename(newrl.indexfile, self.indexfile)
2808 self.opener.rename(newrl.indexfile, self.indexfile)
2804 if not self._inline:
2809 if not self._inline:
2805 self.opener.rename(newrl.datafile, self.datafile)
2810 self.opener.rename(newrl.datafile, self.datafile)
2806
2811
2807 self.clearcaches()
2812 self.clearcaches()
2808 self._loadindex()
2813 self._loadindex()
2809
2814
2810 def verifyintegrity(self, state):
2815 def verifyintegrity(self, state):
2811 """Verifies the integrity of the revlog.
2816 """Verifies the integrity of the revlog.
2812
2817
2813 Yields ``revlogproblem`` instances describing problems that are
2818 Yields ``revlogproblem`` instances describing problems that are
2814 found.
2819 found.
2815 """
2820 """
2816 dd, di = self.checksize()
2821 dd, di = self.checksize()
2817 if dd:
2822 if dd:
2818 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2823 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2819 if di:
2824 if di:
2820 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2825 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2821
2826
2822 version = self.version & 0xFFFF
2827 version = self.version & 0xFFFF
2823
2828
2824 # The verifier tells us what version revlog we should be.
2829 # The verifier tells us what version revlog we should be.
2825 if version != state[b'expectedversion']:
2830 if version != state[b'expectedversion']:
2826 yield revlogproblem(
2831 yield revlogproblem(
2827 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2832 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2828 % (self.indexfile, version, state[b'expectedversion'])
2833 % (self.indexfile, version, state[b'expectedversion'])
2829 )
2834 )
2830
2835
2831 state[b'skipread'] = set()
2836 state[b'skipread'] = set()
2832
2837
2833 for rev in self:
2838 for rev in self:
2834 node = self.node(rev)
2839 node = self.node(rev)
2835
2840
2836 # Verify contents. 4 cases to care about:
2841 # Verify contents. 4 cases to care about:
2837 #
2842 #
2838 # common: the most common case
2843 # common: the most common case
2839 # rename: with a rename
2844 # rename: with a rename
2840 # meta: file content starts with b'\1\n', the metadata
2845 # meta: file content starts with b'\1\n', the metadata
2841 # header defined in filelog.py, but without a rename
2846 # header defined in filelog.py, but without a rename
2842 # ext: content stored externally
2847 # ext: content stored externally
2843 #
2848 #
2844 # More formally, their differences are shown below:
2849 # More formally, their differences are shown below:
2845 #
2850 #
2846 # | common | rename | meta | ext
2851 # | common | rename | meta | ext
2847 # -------------------------------------------------------
2852 # -------------------------------------------------------
2848 # flags() | 0 | 0 | 0 | not 0
2853 # flags() | 0 | 0 | 0 | not 0
2849 # renamed() | False | True | False | ?
2854 # renamed() | False | True | False | ?
2850 # rawtext[0:2]=='\1\n'| False | True | True | ?
2855 # rawtext[0:2]=='\1\n'| False | True | True | ?
2851 #
2856 #
2852 # "rawtext" means the raw text stored in revlog data, which
2857 # "rawtext" means the raw text stored in revlog data, which
2853 # could be retrieved by "rawdata(rev)". "text"
2858 # could be retrieved by "rawdata(rev)". "text"
2854 # mentioned below is "revision(rev)".
2859 # mentioned below is "revision(rev)".
2855 #
2860 #
2856 # There are 3 different lengths stored physically:
2861 # There are 3 different lengths stored physically:
2857 # 1. L1: rawsize, stored in revlog index
2862 # 1. L1: rawsize, stored in revlog index
2858 # 2. L2: len(rawtext), stored in revlog data
2863 # 2. L2: len(rawtext), stored in revlog data
2859 # 3. L3: len(text), stored in revlog data if flags==0, or
2864 # 3. L3: len(text), stored in revlog data if flags==0, or
2860 # possibly somewhere else if flags!=0
2865 # possibly somewhere else if flags!=0
2861 #
2866 #
2862 # L1 should be equal to L2. L3 could be different from them.
2867 # L1 should be equal to L2. L3 could be different from them.
2863 # "text" may or may not affect commit hash depending on flag
2868 # "text" may or may not affect commit hash depending on flag
2864 # processors (see flagutil.addflagprocessor).
2869 # processors (see flagutil.addflagprocessor).
2865 #
2870 #
2866 # | common | rename | meta | ext
2871 # | common | rename | meta | ext
2867 # -------------------------------------------------
2872 # -------------------------------------------------
2868 # rawsize() | L1 | L1 | L1 | L1
2873 # rawsize() | L1 | L1 | L1 | L1
2869 # size() | L1 | L2-LM | L1(*) | L1 (?)
2874 # size() | L1 | L2-LM | L1(*) | L1 (?)
2870 # len(rawtext) | L2 | L2 | L2 | L2
2875 # len(rawtext) | L2 | L2 | L2 | L2
2871 # len(text) | L2 | L2 | L2 | L3
2876 # len(text) | L2 | L2 | L2 | L3
2872 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2877 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2873 #
2878 #
2874 # LM: length of metadata, depending on rawtext
2879 # LM: length of metadata, depending on rawtext
2875 # (*): not ideal, see comment in filelog.size
2880 # (*): not ideal, see comment in filelog.size
2876 # (?): could be "- len(meta)" if the resolved content has
2881 # (?): could be "- len(meta)" if the resolved content has
2877 # rename metadata
2882 # rename metadata
2878 #
2883 #
2879 # Checks needed to be done:
2884 # Checks needed to be done:
2880 # 1. length check: L1 == L2, in all cases.
2885 # 1. length check: L1 == L2, in all cases.
2881 # 2. hash check: depending on flag processor, we may need to
2886 # 2. hash check: depending on flag processor, we may need to
2882 # use either "text" (external), or "rawtext" (in revlog).
2887 # use either "text" (external), or "rawtext" (in revlog).
2883
2888
2884 try:
2889 try:
2885 skipflags = state.get(b'skipflags', 0)
2890 skipflags = state.get(b'skipflags', 0)
2886 if skipflags:
2891 if skipflags:
2887 skipflags &= self.flags(rev)
2892 skipflags &= self.flags(rev)
2888
2893
2889 if skipflags:
2894 if skipflags:
2890 state[b'skipread'].add(node)
2895 state[b'skipread'].add(node)
2891 else:
2896 else:
2892 # Side-effect: read content and verify hash.
2897 # Side-effect: read content and verify hash.
2893 self.revision(node)
2898 self.revision(node)
2894
2899
2895 l1 = self.rawsize(rev)
2900 l1 = self.rawsize(rev)
2896 l2 = len(self.rawdata(node))
2901 l2 = len(self.rawdata(node))
2897
2902
2898 if l1 != l2:
2903 if l1 != l2:
2899 yield revlogproblem(
2904 yield revlogproblem(
2900 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2905 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2901 node=node,
2906 node=node,
2902 )
2907 )
2903
2908
2904 except error.CensoredNodeError:
2909 except error.CensoredNodeError:
2905 if state[b'erroroncensored']:
2910 if state[b'erroroncensored']:
2906 yield revlogproblem(
2911 yield revlogproblem(
2907 error=_(b'censored file data'), node=node
2912 error=_(b'censored file data'), node=node
2908 )
2913 )
2909 state[b'skipread'].add(node)
2914 state[b'skipread'].add(node)
2910 except Exception as e:
2915 except Exception as e:
2911 yield revlogproblem(
2916 yield revlogproblem(
2912 error=_(b'unpacking %s: %s')
2917 error=_(b'unpacking %s: %s')
2913 % (short(node), stringutil.forcebytestr(e)),
2918 % (short(node), stringutil.forcebytestr(e)),
2914 node=node,
2919 node=node,
2915 )
2920 )
2916 state[b'skipread'].add(node)
2921 state[b'skipread'].add(node)
2917
2922
2918 def storageinfo(
2923 def storageinfo(
2919 self,
2924 self,
2920 exclusivefiles=False,
2925 exclusivefiles=False,
2921 sharedfiles=False,
2926 sharedfiles=False,
2922 revisionscount=False,
2927 revisionscount=False,
2923 trackedsize=False,
2928 trackedsize=False,
2924 storedsize=False,
2929 storedsize=False,
2925 ):
2930 ):
2926 d = {}
2931 d = {}
2927
2932
2928 if exclusivefiles:
2933 if exclusivefiles:
2929 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2934 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2930 if not self._inline:
2935 if not self._inline:
2931 d[b'exclusivefiles'].append((self.opener, self.datafile))
2936 d[b'exclusivefiles'].append((self.opener, self.datafile))
2932
2937
2933 if sharedfiles:
2938 if sharedfiles:
2934 d[b'sharedfiles'] = []
2939 d[b'sharedfiles'] = []
2935
2940
2936 if revisionscount:
2941 if revisionscount:
2937 d[b'revisionscount'] = len(self)
2942 d[b'revisionscount'] = len(self)
2938
2943
2939 if trackedsize:
2944 if trackedsize:
2940 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2945 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2941
2946
2942 if storedsize:
2947 if storedsize:
2943 d[b'storedsize'] = sum(
2948 d[b'storedsize'] = sum(
2944 self.opener.stat(path).st_size for path in self.files()
2949 self.opener.stat(path).st_size for path in self.files()
2945 )
2950 )
2946
2951
2947 return d
2952 return d
General Comments 0
You need to be logged in to leave comments. Login now