##// END OF EJS Templates
revlog: introduce an explicit NodeMap class for pure code...
marmoute -
r43924:a7c0c5b5 default
parent child Browse files
Show More
@@ -1,2961 +1,2963 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 templatefilters,
69 templatefilters,
69 util,
70 util,
70 )
71 )
71 from .interfaces import (
72 from .interfaces import (
72 repository,
73 repository,
73 util as interfaceutil,
74 util as interfaceutil,
74 )
75 )
75 from .revlogutils import (
76 from .revlogutils import (
76 deltas as deltautil,
77 deltas as deltautil,
77 flagutil,
78 flagutil,
78 sidedata as sidedatautil,
79 sidedata as sidedatautil,
79 )
80 )
80 from .utils import (
81 from .utils import (
81 storageutil,
82 storageutil,
82 stringutil,
83 stringutil,
83 )
84 )
84
85
85 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
86 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
87 REVLOGV0
88 REVLOGV0
88 REVLOGV1
89 REVLOGV1
89 REVLOGV2
90 REVLOGV2
90 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
91 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
92 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
95 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
96 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
97 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
98 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
99 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
100 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
101 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
103 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104
105
105 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
106 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
107 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
108
109
109 # Aliased for performance.
110 # Aliased for performance.
110 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
111
112
112 # max size of revlog with inline data
113 # max size of revlog with inline data
113 _maxinline = 131072
114 _maxinline = 131072
114 _chunksize = 1048576
115 _chunksize = 1048576
115
116
116 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
117 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
118 return text, False, {}
119 return text, False, {}
119
120
120
121
121 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 return text, False
123 return text, False
123
124
124
125
125 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
126 return False
127 return False
127
128
128
129
129 ellipsisprocessor = (
130 ellipsisprocessor = (
130 ellipsisreadprocessor,
131 ellipsisreadprocessor,
131 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
132 ellipsisrawprocessor,
133 ellipsisrawprocessor,
133 )
134 )
134
135
135
136
136 def getoffset(q):
137 def getoffset(q):
137 return int(q >> 16)
138 return int(q >> 16)
138
139
139
140
140 def gettype(q):
141 def gettype(q):
141 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
142
143
143
144
144 def offset_type(offset, type):
145 def offset_type(offset, type):
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
147 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
148
149
149
150
150 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
151 class _revisioninfo(object):
152 class _revisioninfo(object):
152 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
153 node: expected hash of the revision
154 node: expected hash of the revision
154 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
155 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
156 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
157 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
158
159
159 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
160 """
161 """
161
162
162 node = attr.ib()
163 node = attr.ib()
163 p1 = attr.ib()
164 p1 = attr.ib()
164 p2 = attr.ib()
165 p2 = attr.ib()
165 btext = attr.ib()
166 btext = attr.ib()
166 textlen = attr.ib()
167 textlen = attr.ib()
167 cachedelta = attr.ib()
168 cachedelta = attr.ib()
168 flags = attr.ib()
169 flags = attr.ib()
169
170
170
171
171 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @attr.s(slots=True)
173 @attr.s(slots=True)
173 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
174 node = attr.ib()
175 node = attr.ib()
175 p1node = attr.ib()
176 p1node = attr.ib()
176 p2node = attr.ib()
177 p2node = attr.ib()
177 basenode = attr.ib()
178 basenode = attr.ib()
178 flags = attr.ib()
179 flags = attr.ib()
179 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
180 revision = attr.ib()
181 revision = attr.ib()
181 delta = attr.ib()
182 delta = attr.ib()
182 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
183
184
184
185
185 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @attr.s(frozen=True)
187 @attr.s(frozen=True)
187 class revlogproblem(object):
188 class revlogproblem(object):
188 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
189 error = attr.ib(default=None)
190 error = attr.ib(default=None)
190 node = attr.ib(default=None)
191 node = attr.ib(default=None)
191
192
192
193
193 # index v0:
194 # index v0:
194 # 4 bytes: offset
195 # 4 bytes: offset
195 # 4 bytes: compressed length
196 # 4 bytes: compressed length
196 # 4 bytes: base rev
197 # 4 bytes: base rev
197 # 4 bytes: link rev
198 # 4 bytes: link rev
198 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: nodeid
201 # 20 bytes: nodeid
201 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
204
205
205
206
206 class revlogoldindex(list):
207 class revlogoldindex(list):
207 def __getitem__(self, i):
208 def __getitem__(self, i):
208 if i == -1:
209 if i == -1:
209 return (0, 0, 0, -1, -1, -1, -1, nullid)
210 return (0, 0, 0, -1, -1, -1, -1, nullid)
210 return list.__getitem__(self, i)
211 return list.__getitem__(self, i)
211
212
212
213
213 class revlogoldio(object):
214 class revlogoldio(object):
214 def __init__(self):
215 def __init__(self):
215 self.size = indexformatv0.size
216 self.size = indexformatv0.size
216
217
217 def parseindex(self, data, inline):
218 def parseindex(self, data, inline):
218 s = self.size
219 s = self.size
219 index = []
220 index = []
220 nodemap = {nullid: nullrev}
221 nodemap = revlogutils.NodeMap({nullid: nullrev})
221 n = off = 0
222 n = off = 0
222 l = len(data)
223 l = len(data)
223 while off + s <= l:
224 while off + s <= l:
224 cur = data[off : off + s]
225 cur = data[off : off + s]
225 off += s
226 off += s
226 e = indexformatv0_unpack(cur)
227 e = indexformatv0_unpack(cur)
227 # transform to revlogv1 format
228 # transform to revlogv1 format
228 e2 = (
229 e2 = (
229 offset_type(e[0], 0),
230 offset_type(e[0], 0),
230 e[1],
231 e[1],
231 -1,
232 -1,
232 e[2],
233 e[2],
233 e[3],
234 e[3],
234 nodemap.get(e[4], nullrev),
235 nodemap.get(e[4], nullrev),
235 nodemap.get(e[5], nullrev),
236 nodemap.get(e[5], nullrev),
236 e[6],
237 e[6],
237 )
238 )
238 index.append(e2)
239 index.append(e2)
239 nodemap[e[6]] = n
240 nodemap[e[6]] = n
240 n += 1
241 n += 1
241
242
242 return revlogoldindex(index), nodemap, None
243 return revlogoldindex(index), nodemap, None
243
244
244 def packentry(self, entry, node, version, rev):
245 def packentry(self, entry, node, version, rev):
245 if gettype(entry[0]):
246 if gettype(entry[0]):
246 raise error.RevlogError(
247 raise error.RevlogError(
247 _(b'index entry flags need revlog version 1')
248 _(b'index entry flags need revlog version 1')
248 )
249 )
249 e2 = (
250 e2 = (
250 getoffset(entry[0]),
251 getoffset(entry[0]),
251 entry[1],
252 entry[1],
252 entry[3],
253 entry[3],
253 entry[4],
254 entry[4],
254 node(entry[5]),
255 node(entry[5]),
255 node(entry[6]),
256 node(entry[6]),
256 entry[7],
257 entry[7],
257 )
258 )
258 return indexformatv0_pack(*e2)
259 return indexformatv0_pack(*e2)
259
260
260
261
261 # index ng:
262 # index ng:
262 # 6 bytes: offset
263 # 6 bytes: offset
263 # 2 bytes: flags
264 # 2 bytes: flags
264 # 4 bytes: compressed length
265 # 4 bytes: compressed length
265 # 4 bytes: uncompressed length
266 # 4 bytes: uncompressed length
266 # 4 bytes: base rev
267 # 4 bytes: base rev
267 # 4 bytes: link rev
268 # 4 bytes: link rev
268 # 4 bytes: parent 1 rev
269 # 4 bytes: parent 1 rev
269 # 4 bytes: parent 2 rev
270 # 4 bytes: parent 2 rev
270 # 32 bytes: nodeid
271 # 32 bytes: nodeid
271 indexformatng = struct.Struct(b">Qiiiiii20s12x")
272 indexformatng = struct.Struct(b">Qiiiiii20s12x")
272 indexformatng_pack = indexformatng.pack
273 indexformatng_pack = indexformatng.pack
273 versionformat = struct.Struct(b">I")
274 versionformat = struct.Struct(b">I")
274 versionformat_pack = versionformat.pack
275 versionformat_pack = versionformat.pack
275 versionformat_unpack = versionformat.unpack
276 versionformat_unpack = versionformat.unpack
276
277
277 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
278 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
278 # signed integer)
279 # signed integer)
279 _maxentrysize = 0x7FFFFFFF
280 _maxentrysize = 0x7FFFFFFF
280
281
281
282
282 class revlogio(object):
283 class revlogio(object):
283 def __init__(self):
284 def __init__(self):
284 self.size = indexformatng.size
285 self.size = indexformatng.size
285
286
286 def parseindex(self, data, inline):
287 def parseindex(self, data, inline):
287 # call the C implementation to parse the index data
288 # call the C implementation to parse the index data
288 index, cache = parsers.parse_index2(data, inline)
289 index, cache = parsers.parse_index2(data, inline)
289 return index, getattr(index, 'nodemap', None), cache
290 return index, getattr(index, 'nodemap', None), cache
290
291
291 def packentry(self, entry, node, version, rev):
292 def packentry(self, entry, node, version, rev):
292 p = indexformatng_pack(*entry)
293 p = indexformatng_pack(*entry)
293 if rev == 0:
294 if rev == 0:
294 p = versionformat_pack(version) + p[4:]
295 p = versionformat_pack(version) + p[4:]
295 return p
296 return p
296
297
297
298
298 class revlog(object):
299 class revlog(object):
299 """
300 """
300 the underlying revision storage object
301 the underlying revision storage object
301
302
302 A revlog consists of two parts, an index and the revision data.
303 A revlog consists of two parts, an index and the revision data.
303
304
304 The index is a file with a fixed record size containing
305 The index is a file with a fixed record size containing
305 information on each revision, including its nodeid (hash), the
306 information on each revision, including its nodeid (hash), the
306 nodeids of its parents, the position and offset of its data within
307 nodeids of its parents, the position and offset of its data within
307 the data file, and the revision it's based on. Finally, each entry
308 the data file, and the revision it's based on. Finally, each entry
308 contains a linkrev entry that can serve as a pointer to external
309 contains a linkrev entry that can serve as a pointer to external
309 data.
310 data.
310
311
311 The revision data itself is a linear collection of data chunks.
312 The revision data itself is a linear collection of data chunks.
312 Each chunk represents a revision and is usually represented as a
313 Each chunk represents a revision and is usually represented as a
313 delta against the previous chunk. To bound lookup time, runs of
314 delta against the previous chunk. To bound lookup time, runs of
314 deltas are limited to about 2 times the length of the original
315 deltas are limited to about 2 times the length of the original
315 version data. This makes retrieval of a version proportional to
316 version data. This makes retrieval of a version proportional to
316 its size, or O(1) relative to the number of revisions.
317 its size, or O(1) relative to the number of revisions.
317
318
318 Both pieces of the revlog are written to in an append-only
319 Both pieces of the revlog are written to in an append-only
319 fashion, which means we never need to rewrite a file to insert or
320 fashion, which means we never need to rewrite a file to insert or
320 remove data, and can use some simple techniques to avoid the need
321 remove data, and can use some simple techniques to avoid the need
321 for locking while reading.
322 for locking while reading.
322
323
323 If checkambig, indexfile is opened with checkambig=True at
324 If checkambig, indexfile is opened with checkambig=True at
324 writing, to avoid file stat ambiguity.
325 writing, to avoid file stat ambiguity.
325
326
326 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 index will be mmapped rather than read if it is larger than the
328 index will be mmapped rather than read if it is larger than the
328 configured threshold.
329 configured threshold.
329
330
330 If censorable is True, the revlog can have censored revisions.
331 If censorable is True, the revlog can have censored revisions.
331
332
332 If `upperboundcomp` is not None, this is the expected maximal gain from
333 If `upperboundcomp` is not None, this is the expected maximal gain from
333 compression for the data content.
334 compression for the data content.
334 """
335 """
335
336
336 _flagserrorclass = error.RevlogError
337 _flagserrorclass = error.RevlogError
337
338
338 def __init__(
339 def __init__(
339 self,
340 self,
340 opener,
341 opener,
341 indexfile,
342 indexfile,
342 datafile=None,
343 datafile=None,
343 checkambig=False,
344 checkambig=False,
344 mmaplargeindex=False,
345 mmaplargeindex=False,
345 censorable=False,
346 censorable=False,
346 upperboundcomp=None,
347 upperboundcomp=None,
347 ):
348 ):
348 """
349 """
349 create a revlog object
350 create a revlog object
350
351
351 opener is a function that abstracts the file opening operation
352 opener is a function that abstracts the file opening operation
352 and can be used to implement COW semantics or the like.
353 and can be used to implement COW semantics or the like.
353
354
354 """
355 """
355 self.upperboundcomp = upperboundcomp
356 self.upperboundcomp = upperboundcomp
356 self.indexfile = indexfile
357 self.indexfile = indexfile
357 self.datafile = datafile or (indexfile[:-2] + b".d")
358 self.datafile = datafile or (indexfile[:-2] + b".d")
358 self.opener = opener
359 self.opener = opener
359 # When True, indexfile is opened with checkambig=True at writing, to
360 # When True, indexfile is opened with checkambig=True at writing, to
360 # avoid file stat ambiguity.
361 # avoid file stat ambiguity.
361 self._checkambig = checkambig
362 self._checkambig = checkambig
362 self._mmaplargeindex = mmaplargeindex
363 self._mmaplargeindex = mmaplargeindex
363 self._censorable = censorable
364 self._censorable = censorable
364 # 3-tuple of (node, rev, text) for a raw revision.
365 # 3-tuple of (node, rev, text) for a raw revision.
365 self._revisioncache = None
366 self._revisioncache = None
366 # Maps rev to chain base rev.
367 # Maps rev to chain base rev.
367 self._chainbasecache = util.lrucachedict(100)
368 self._chainbasecache = util.lrucachedict(100)
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 self._chunkcache = (0, b'')
370 self._chunkcache = (0, b'')
370 # How much data to read and cache into the raw revlog data cache.
371 # How much data to read and cache into the raw revlog data cache.
371 self._chunkcachesize = 65536
372 self._chunkcachesize = 65536
372 self._maxchainlen = None
373 self._maxchainlen = None
373 self._deltabothparents = True
374 self._deltabothparents = True
374 self.index = []
375 self.index = []
375 # Mapping of partial identifiers to full nodes.
376 # Mapping of partial identifiers to full nodes.
376 self._pcache = {}
377 self._pcache = {}
377 # Mapping of revision integer to full node.
378 # Mapping of revision integer to full node.
378 self._nodecache = {nullid: nullrev}
379 self._nodecache = revlogutils.NodeMap({nullid: nullrev})
379 self._nodepos = None
380 self._nodepos = None
380 self._compengine = b'zlib'
381 self._compengine = b'zlib'
381 self._compengineopts = {}
382 self._compengineopts = {}
382 self._maxdeltachainspan = -1
383 self._maxdeltachainspan = -1
383 self._withsparseread = False
384 self._withsparseread = False
384 self._sparserevlog = False
385 self._sparserevlog = False
385 self._srdensitythreshold = 0.50
386 self._srdensitythreshold = 0.50
386 self._srmingapsize = 262144
387 self._srmingapsize = 262144
387
388
388 # Make copy of flag processors so each revlog instance can support
389 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
390 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
391 self._flagprocessors = dict(flagutil.flagprocessors)
391
392
392 # 2-tuple of file handles being used for active writing.
393 # 2-tuple of file handles being used for active writing.
393 self._writinghandles = None
394 self._writinghandles = None
394
395
395 self._loadindex()
396 self._loadindex()
396
397
397 def _loadindex(self):
398 def _loadindex(self):
398 mmapindexthreshold = None
399 mmapindexthreshold = None
399 opts = self.opener.options
400 opts = self.opener.options
400
401
401 if b'revlogv2' in opts:
402 if b'revlogv2' in opts:
402 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
403 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
403 elif b'revlogv1' in opts:
404 elif b'revlogv1' in opts:
404 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
405 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
405 if b'generaldelta' in opts:
406 if b'generaldelta' in opts:
406 newversionflags |= FLAG_GENERALDELTA
407 newversionflags |= FLAG_GENERALDELTA
407 elif b'revlogv0' in self.opener.options:
408 elif b'revlogv0' in self.opener.options:
408 newversionflags = REVLOGV0
409 newversionflags = REVLOGV0
409 else:
410 else:
410 newversionflags = REVLOG_DEFAULT_VERSION
411 newversionflags = REVLOG_DEFAULT_VERSION
411
412
412 if b'chunkcachesize' in opts:
413 if b'chunkcachesize' in opts:
413 self._chunkcachesize = opts[b'chunkcachesize']
414 self._chunkcachesize = opts[b'chunkcachesize']
414 if b'maxchainlen' in opts:
415 if b'maxchainlen' in opts:
415 self._maxchainlen = opts[b'maxchainlen']
416 self._maxchainlen = opts[b'maxchainlen']
416 if b'deltabothparents' in opts:
417 if b'deltabothparents' in opts:
417 self._deltabothparents = opts[b'deltabothparents']
418 self._deltabothparents = opts[b'deltabothparents']
418 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 self._lazydeltabase = False
420 self._lazydeltabase = False
420 if self._lazydelta:
421 if self._lazydelta:
421 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 if b'compengine' in opts:
423 if b'compengine' in opts:
423 self._compengine = opts[b'compengine']
424 self._compengine = opts[b'compengine']
424 if b'zlib.level' in opts:
425 if b'zlib.level' in opts:
425 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 if b'zstd.level' in opts:
427 if b'zstd.level' in opts:
427 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 if b'maxdeltachainspan' in opts:
429 if b'maxdeltachainspan' in opts:
429 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 mmapindexthreshold = opts[b'mmapindexthreshold']
432 mmapindexthreshold = opts[b'mmapindexthreshold']
432 self.hassidedata = bool(opts.get(b'side-data', False))
433 self.hassidedata = bool(opts.get(b'side-data', False))
433 if self.hassidedata:
434 if self.hassidedata:
434 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
435 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
435 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
436 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
436 withsparseread = bool(opts.get(b'with-sparse-read', False))
437 withsparseread = bool(opts.get(b'with-sparse-read', False))
437 # sparse-revlog forces sparse-read
438 # sparse-revlog forces sparse-read
438 self._withsparseread = self._sparserevlog or withsparseread
439 self._withsparseread = self._sparserevlog or withsparseread
439 if b'sparse-read-density-threshold' in opts:
440 if b'sparse-read-density-threshold' in opts:
440 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
441 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
441 if b'sparse-read-min-gap-size' in opts:
442 if b'sparse-read-min-gap-size' in opts:
442 self._srmingapsize = opts[b'sparse-read-min-gap-size']
443 self._srmingapsize = opts[b'sparse-read-min-gap-size']
443 if opts.get(b'enableellipsis'):
444 if opts.get(b'enableellipsis'):
444 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
445 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
445
446
446 # revlog v0 doesn't have flag processors
447 # revlog v0 doesn't have flag processors
447 for flag, processor in pycompat.iteritems(
448 for flag, processor in pycompat.iteritems(
448 opts.get(b'flagprocessors', {})
449 opts.get(b'flagprocessors', {})
449 ):
450 ):
450 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
451 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
451
452
452 if self._chunkcachesize <= 0:
453 if self._chunkcachesize <= 0:
453 raise error.RevlogError(
454 raise error.RevlogError(
454 _(b'revlog chunk cache size %r is not greater than 0')
455 _(b'revlog chunk cache size %r is not greater than 0')
455 % self._chunkcachesize
456 % self._chunkcachesize
456 )
457 )
457 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 raise error.RevlogError(
459 raise error.RevlogError(
459 _(b'revlog chunk cache size %r is not a power of 2')
460 _(b'revlog chunk cache size %r is not a power of 2')
460 % self._chunkcachesize
461 % self._chunkcachesize
461 )
462 )
462
463
463 indexdata = b''
464 indexdata = b''
464 self._initempty = True
465 self._initempty = True
465 try:
466 try:
466 with self._indexfp() as f:
467 with self._indexfp() as f:
467 if (
468 if (
468 mmapindexthreshold is not None
469 mmapindexthreshold is not None
469 and self.opener.fstat(f).st_size >= mmapindexthreshold
470 and self.opener.fstat(f).st_size >= mmapindexthreshold
470 ):
471 ):
471 # TODO: should .close() to release resources without
472 # TODO: should .close() to release resources without
472 # relying on Python GC
473 # relying on Python GC
473 indexdata = util.buffer(util.mmapread(f))
474 indexdata = util.buffer(util.mmapread(f))
474 else:
475 else:
475 indexdata = f.read()
476 indexdata = f.read()
476 if len(indexdata) > 0:
477 if len(indexdata) > 0:
477 versionflags = versionformat_unpack(indexdata[:4])[0]
478 versionflags = versionformat_unpack(indexdata[:4])[0]
478 self._initempty = False
479 self._initempty = False
479 else:
480 else:
480 versionflags = newversionflags
481 versionflags = newversionflags
481 except IOError as inst:
482 except IOError as inst:
482 if inst.errno != errno.ENOENT:
483 if inst.errno != errno.ENOENT:
483 raise
484 raise
484
485
485 versionflags = newversionflags
486 versionflags = newversionflags
486
487
487 self.version = versionflags
488 self.version = versionflags
488
489
489 flags = versionflags & ~0xFFFF
490 flags = versionflags & ~0xFFFF
490 fmt = versionflags & 0xFFFF
491 fmt = versionflags & 0xFFFF
491
492
492 if fmt == REVLOGV0:
493 if fmt == REVLOGV0:
493 if flags:
494 if flags:
494 raise error.RevlogError(
495 raise error.RevlogError(
495 _(b'unknown flags (%#04x) in version %d revlog %s')
496 _(b'unknown flags (%#04x) in version %d revlog %s')
496 % (flags >> 16, fmt, self.indexfile)
497 % (flags >> 16, fmt, self.indexfile)
497 )
498 )
498
499
499 self._inline = False
500 self._inline = False
500 self._generaldelta = False
501 self._generaldelta = False
501
502
502 elif fmt == REVLOGV1:
503 elif fmt == REVLOGV1:
503 if flags & ~REVLOGV1_FLAGS:
504 if flags & ~REVLOGV1_FLAGS:
504 raise error.RevlogError(
505 raise error.RevlogError(
505 _(b'unknown flags (%#04x) in version %d revlog %s')
506 _(b'unknown flags (%#04x) in version %d revlog %s')
506 % (flags >> 16, fmt, self.indexfile)
507 % (flags >> 16, fmt, self.indexfile)
507 )
508 )
508
509
509 self._inline = versionflags & FLAG_INLINE_DATA
510 self._inline = versionflags & FLAG_INLINE_DATA
510 self._generaldelta = versionflags & FLAG_GENERALDELTA
511 self._generaldelta = versionflags & FLAG_GENERALDELTA
511
512
512 elif fmt == REVLOGV2:
513 elif fmt == REVLOGV2:
513 if flags & ~REVLOGV2_FLAGS:
514 if flags & ~REVLOGV2_FLAGS:
514 raise error.RevlogError(
515 raise error.RevlogError(
515 _(b'unknown flags (%#04x) in version %d revlog %s')
516 _(b'unknown flags (%#04x) in version %d revlog %s')
516 % (flags >> 16, fmt, self.indexfile)
517 % (flags >> 16, fmt, self.indexfile)
517 )
518 )
518
519
519 self._inline = versionflags & FLAG_INLINE_DATA
520 self._inline = versionflags & FLAG_INLINE_DATA
520 # generaldelta implied by version 2 revlogs.
521 # generaldelta implied by version 2 revlogs.
521 self._generaldelta = True
522 self._generaldelta = True
522
523
523 else:
524 else:
524 raise error.RevlogError(
525 raise error.RevlogError(
525 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 )
527 )
527 # sparse-revlog can't be on without general-delta (issue6056)
528 # sparse-revlog can't be on without general-delta (issue6056)
528 if not self._generaldelta:
529 if not self._generaldelta:
529 self._sparserevlog = False
530 self._sparserevlog = False
530
531
531 self._storedeltachains = True
532 self._storedeltachains = True
532
533
533 self._io = revlogio()
534 self._io = revlogio()
534 if self.version == REVLOGV0:
535 if self.version == REVLOGV0:
535 self._io = revlogoldio()
536 self._io = revlogoldio()
536 try:
537 try:
537 d = self._io.parseindex(indexdata, self._inline)
538 d = self._io.parseindex(indexdata, self._inline)
538 except (ValueError, IndexError):
539 except (ValueError, IndexError):
539 raise error.RevlogError(
540 raise error.RevlogError(
540 _(b"index %s is corrupted") % self.indexfile
541 _(b"index %s is corrupted") % self.indexfile
541 )
542 )
542 self.index, nodemap, self._chunkcache = d
543 self.index, nodemap, self._chunkcache = d
543 if nodemap is not None:
544 if nodemap is not None:
544 self.nodemap = self._nodecache = nodemap
545 self.nodemap = self._nodecache = nodemap
545 if not self._chunkcache:
546 if not self._chunkcache:
546 self._chunkclear()
547 self._chunkclear()
547 # revnum -> (chain-length, sum-delta-length)
548 # revnum -> (chain-length, sum-delta-length)
548 self._chaininfocache = {}
549 self._chaininfocache = {}
549 # revlog header -> revlog compressor
550 # revlog header -> revlog compressor
550 self._decompressors = {}
551 self._decompressors = {}
551
552
552 @util.propertycache
553 @util.propertycache
553 def _compressor(self):
554 def _compressor(self):
554 engine = util.compengines[self._compengine]
555 engine = util.compengines[self._compengine]
555 return engine.revlogcompressor(self._compengineopts)
556 return engine.revlogcompressor(self._compengineopts)
556
557
557 def _indexfp(self, mode=b'r'):
558 def _indexfp(self, mode=b'r'):
558 """file object for the revlog's index file"""
559 """file object for the revlog's index file"""
559 args = {'mode': mode}
560 args = {'mode': mode}
560 if mode != b'r':
561 if mode != b'r':
561 args['checkambig'] = self._checkambig
562 args['checkambig'] = self._checkambig
562 if mode == b'w':
563 if mode == b'w':
563 args['atomictemp'] = True
564 args['atomictemp'] = True
564 return self.opener(self.indexfile, **args)
565 return self.opener(self.indexfile, **args)
565
566
566 def _datafp(self, mode=b'r'):
567 def _datafp(self, mode=b'r'):
567 """file object for the revlog's data file"""
568 """file object for the revlog's data file"""
568 return self.opener(self.datafile, mode=mode)
569 return self.opener(self.datafile, mode=mode)
569
570
570 @contextlib.contextmanager
571 @contextlib.contextmanager
571 def _datareadfp(self, existingfp=None):
572 def _datareadfp(self, existingfp=None):
572 """file object suitable to read data"""
573 """file object suitable to read data"""
573 # Use explicit file handle, if given.
574 # Use explicit file handle, if given.
574 if existingfp is not None:
575 if existingfp is not None:
575 yield existingfp
576 yield existingfp
576
577
577 # Use a file handle being actively used for writes, if available.
578 # Use a file handle being actively used for writes, if available.
578 # There is some danger to doing this because reads will seek the
579 # There is some danger to doing this because reads will seek the
579 # file. However, _writeentry() performs a SEEK_END before all writes,
580 # file. However, _writeentry() performs a SEEK_END before all writes,
580 # so we should be safe.
581 # so we should be safe.
581 elif self._writinghandles:
582 elif self._writinghandles:
582 if self._inline:
583 if self._inline:
583 yield self._writinghandles[0]
584 yield self._writinghandles[0]
584 else:
585 else:
585 yield self._writinghandles[1]
586 yield self._writinghandles[1]
586
587
587 # Otherwise open a new file handle.
588 # Otherwise open a new file handle.
588 else:
589 else:
589 if self._inline:
590 if self._inline:
590 func = self._indexfp
591 func = self._indexfp
591 else:
592 else:
592 func = self._datafp
593 func = self._datafp
593 with func() as fp:
594 with func() as fp:
594 yield fp
595 yield fp
595
596
596 def tiprev(self):
597 def tiprev(self):
597 return len(self.index) - 1
598 return len(self.index) - 1
598
599
599 def tip(self):
600 def tip(self):
600 return self.node(self.tiprev())
601 return self.node(self.tiprev())
601
602
602 def __contains__(self, rev):
603 def __contains__(self, rev):
603 return 0 <= rev < len(self)
604 return 0 <= rev < len(self)
604
605
605 def __len__(self):
606 def __len__(self):
606 return len(self.index)
607 return len(self.index)
607
608
608 def __iter__(self):
609 def __iter__(self):
609 return iter(pycompat.xrange(len(self)))
610 return iter(pycompat.xrange(len(self)))
610
611
611 def revs(self, start=0, stop=None):
612 def revs(self, start=0, stop=None):
612 """iterate over all rev in this revlog (from start to stop)"""
613 """iterate over all rev in this revlog (from start to stop)"""
613 return storageutil.iterrevs(len(self), start=start, stop=stop)
614 return storageutil.iterrevs(len(self), start=start, stop=stop)
614
615
615 @util.propertycache
616 @util.propertycache
616 def nodemap(self):
617 def nodemap(self):
617 if self.index:
618 if self.index:
618 # populate mapping down to the initial node
619 # populate mapping down to the initial node
619 node0 = self.index[0][7] # get around changelog filtering
620 node0 = self.index[0][7] # get around changelog filtering
620 self.rev(node0)
621 self.rev(node0)
621 return self._nodecache
622 return self._nodecache
622
623
623 def hasnode(self, node):
624 def hasnode(self, node):
624 try:
625 try:
625 self.rev(node)
626 self.rev(node)
626 return True
627 return True
627 except KeyError:
628 except KeyError:
628 return False
629 return False
629
630
630 def candelta(self, baserev, rev):
631 def candelta(self, baserev, rev):
631 """whether two revisions (baserev, rev) can be delta-ed or not"""
632 """whether two revisions (baserev, rev) can be delta-ed or not"""
632 # Disable delta if either rev requires a content-changing flag
633 # Disable delta if either rev requires a content-changing flag
633 # processor (ex. LFS). This is because such flag processor can alter
634 # processor (ex. LFS). This is because such flag processor can alter
634 # the rawtext content that the delta will be based on, and two clients
635 # the rawtext content that the delta will be based on, and two clients
635 # could have a same revlog node with different flags (i.e. different
636 # could have a same revlog node with different flags (i.e. different
636 # rawtext contents) and the delta could be incompatible.
637 # rawtext contents) and the delta could be incompatible.
637 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
638 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
638 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
639 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
639 ):
640 ):
640 return False
641 return False
641 return True
642 return True
642
643
643 def clearcaches(self):
644 def clearcaches(self):
644 self._revisioncache = None
645 self._revisioncache = None
645 self._chainbasecache.clear()
646 self._chainbasecache.clear()
646 self._chunkcache = (0, b'')
647 self._chunkcache = (0, b'')
647 self._pcache = {}
648 self._pcache = {}
648
649
649 try:
650 try:
650 # If we are using the native C version, you are in a fun case
651 # If we are using the native C version, you are in a fun case
651 # where self.index, self.nodemap and self._nodecaches is the same
652 # where self.index, self.nodemap and self._nodecaches is the same
652 # object.
653 # object.
653 self._nodecache.clearcaches()
654 self._nodecache.clearcaches()
654 except AttributeError:
655 except AttributeError:
655 self._nodecache = {nullid: nullrev}
656 self._nodecache = revlogutils.NodeMap({nullid: nullrev})
656 self._nodepos = None
657 self._nodepos = None
657
658
658 def rev(self, node):
659 def rev(self, node):
659 try:
660 try:
660 return self._nodecache[node]
661 return self._nodecache[node]
661 except TypeError:
662 except TypeError:
662 raise
663 raise
663 except error.RevlogError:
664 except error.RevlogError:
664 # parsers.c radix tree lookup failed
665 if not isinstance(self._nodecache, revlogutils.NodeMap):
665 if node == wdirid or node in wdirfilenodeids:
666 # parsers.c radix tree lookup failed
666 raise error.WdirUnsupported
667 if node == wdirid or node in wdirfilenodeids:
667 raise error.LookupError(node, self.indexfile, _(b'no node'))
668 raise error.WdirUnsupported
668 except KeyError:
669 raise error.LookupError(node, self.indexfile, _(b'no node'))
669 # pure python cache lookup failed
670 n = self._nodecache
671 i = self.index
672 p = self._nodepos
673 if p is None:
674 p = len(i) - 1
675 else:
670 else:
676 assert p < len(i)
671 # pure python cache lookup failed
677 for r in pycompat.xrange(p, -1, -1):
672 n = self._nodecache
678 v = i[r][7]
673 i = self.index
679 n[v] = r
674 p = self._nodepos
680 if v == node:
675 if p is None:
681 self._nodepos = r - 1
676 p = len(i) - 1
682 return r
677 else:
683 if node == wdirid or node in wdirfilenodeids:
678 assert p < len(i)
684 raise error.WdirUnsupported
679 for r in pycompat.xrange(p, -1, -1):
685 raise error.LookupError(node, self.indexfile, _(b'no node'))
680 v = i[r][7]
681 n[v] = r
682 if v == node:
683 self._nodepos = r - 1
684 return r
685 if node == wdirid or node in wdirfilenodeids:
686 raise error.WdirUnsupported
687 raise error.LookupError(node, self.indexfile, _(b'no node'))
686
688
687 # Accessors for index entries.
689 # Accessors for index entries.
688
690
689 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
691 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
690 # are flags.
692 # are flags.
691 def start(self, rev):
693 def start(self, rev):
692 return int(self.index[rev][0] >> 16)
694 return int(self.index[rev][0] >> 16)
693
695
694 def flags(self, rev):
696 def flags(self, rev):
695 return self.index[rev][0] & 0xFFFF
697 return self.index[rev][0] & 0xFFFF
696
698
697 def length(self, rev):
699 def length(self, rev):
698 return self.index[rev][1]
700 return self.index[rev][1]
699
701
700 def rawsize(self, rev):
702 def rawsize(self, rev):
701 """return the length of the uncompressed text for a given revision"""
703 """return the length of the uncompressed text for a given revision"""
702 l = self.index[rev][2]
704 l = self.index[rev][2]
703 if l >= 0:
705 if l >= 0:
704 return l
706 return l
705
707
706 t = self.rawdata(rev)
708 t = self.rawdata(rev)
707 return len(t)
709 return len(t)
708
710
709 def size(self, rev):
711 def size(self, rev):
710 """length of non-raw text (processed by a "read" flag processor)"""
712 """length of non-raw text (processed by a "read" flag processor)"""
711 # fast path: if no "read" flag processor could change the content,
713 # fast path: if no "read" flag processor could change the content,
712 # size is rawsize. note: ELLIPSIS is known to not change the content.
714 # size is rawsize. note: ELLIPSIS is known to not change the content.
713 flags = self.flags(rev)
715 flags = self.flags(rev)
714 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
716 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
715 return self.rawsize(rev)
717 return self.rawsize(rev)
716
718
717 return len(self.revision(rev, raw=False))
719 return len(self.revision(rev, raw=False))
718
720
719 def chainbase(self, rev):
721 def chainbase(self, rev):
720 base = self._chainbasecache.get(rev)
722 base = self._chainbasecache.get(rev)
721 if base is not None:
723 if base is not None:
722 return base
724 return base
723
725
724 index = self.index
726 index = self.index
725 iterrev = rev
727 iterrev = rev
726 base = index[iterrev][3]
728 base = index[iterrev][3]
727 while base != iterrev:
729 while base != iterrev:
728 iterrev = base
730 iterrev = base
729 base = index[iterrev][3]
731 base = index[iterrev][3]
730
732
731 self._chainbasecache[rev] = base
733 self._chainbasecache[rev] = base
732 return base
734 return base
733
735
734 def linkrev(self, rev):
736 def linkrev(self, rev):
735 return self.index[rev][4]
737 return self.index[rev][4]
736
738
737 def parentrevs(self, rev):
739 def parentrevs(self, rev):
738 try:
740 try:
739 entry = self.index[rev]
741 entry = self.index[rev]
740 except IndexError:
742 except IndexError:
741 if rev == wdirrev:
743 if rev == wdirrev:
742 raise error.WdirUnsupported
744 raise error.WdirUnsupported
743 raise
745 raise
744
746
745 return entry[5], entry[6]
747 return entry[5], entry[6]
746
748
747 # fast parentrevs(rev) where rev isn't filtered
749 # fast parentrevs(rev) where rev isn't filtered
748 _uncheckedparentrevs = parentrevs
750 _uncheckedparentrevs = parentrevs
749
751
750 def node(self, rev):
752 def node(self, rev):
751 try:
753 try:
752 return self.index[rev][7]
754 return self.index[rev][7]
753 except IndexError:
755 except IndexError:
754 if rev == wdirrev:
756 if rev == wdirrev:
755 raise error.WdirUnsupported
757 raise error.WdirUnsupported
756 raise
758 raise
757
759
758 # Derived from index values.
760 # Derived from index values.
759
761
760 def end(self, rev):
762 def end(self, rev):
761 return self.start(rev) + self.length(rev)
763 return self.start(rev) + self.length(rev)
762
764
763 def parents(self, node):
765 def parents(self, node):
764 i = self.index
766 i = self.index
765 d = i[self.rev(node)]
767 d = i[self.rev(node)]
766 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
768 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
767
769
768 def chainlen(self, rev):
770 def chainlen(self, rev):
769 return self._chaininfo(rev)[0]
771 return self._chaininfo(rev)[0]
770
772
771 def _chaininfo(self, rev):
773 def _chaininfo(self, rev):
772 chaininfocache = self._chaininfocache
774 chaininfocache = self._chaininfocache
773 if rev in chaininfocache:
775 if rev in chaininfocache:
774 return chaininfocache[rev]
776 return chaininfocache[rev]
775 index = self.index
777 index = self.index
776 generaldelta = self._generaldelta
778 generaldelta = self._generaldelta
777 iterrev = rev
779 iterrev = rev
778 e = index[iterrev]
780 e = index[iterrev]
779 clen = 0
781 clen = 0
780 compresseddeltalen = 0
782 compresseddeltalen = 0
781 while iterrev != e[3]:
783 while iterrev != e[3]:
782 clen += 1
784 clen += 1
783 compresseddeltalen += e[1]
785 compresseddeltalen += e[1]
784 if generaldelta:
786 if generaldelta:
785 iterrev = e[3]
787 iterrev = e[3]
786 else:
788 else:
787 iterrev -= 1
789 iterrev -= 1
788 if iterrev in chaininfocache:
790 if iterrev in chaininfocache:
789 t = chaininfocache[iterrev]
791 t = chaininfocache[iterrev]
790 clen += t[0]
792 clen += t[0]
791 compresseddeltalen += t[1]
793 compresseddeltalen += t[1]
792 break
794 break
793 e = index[iterrev]
795 e = index[iterrev]
794 else:
796 else:
795 # Add text length of base since decompressing that also takes
797 # Add text length of base since decompressing that also takes
796 # work. For cache hits the length is already included.
798 # work. For cache hits the length is already included.
797 compresseddeltalen += e[1]
799 compresseddeltalen += e[1]
798 r = (clen, compresseddeltalen)
800 r = (clen, compresseddeltalen)
799 chaininfocache[rev] = r
801 chaininfocache[rev] = r
800 return r
802 return r
801
803
802 def _deltachain(self, rev, stoprev=None):
804 def _deltachain(self, rev, stoprev=None):
803 """Obtain the delta chain for a revision.
805 """Obtain the delta chain for a revision.
804
806
805 ``stoprev`` specifies a revision to stop at. If not specified, we
807 ``stoprev`` specifies a revision to stop at. If not specified, we
806 stop at the base of the chain.
808 stop at the base of the chain.
807
809
808 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
810 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
809 revs in ascending order and ``stopped`` is a bool indicating whether
811 revs in ascending order and ``stopped`` is a bool indicating whether
810 ``stoprev`` was hit.
812 ``stoprev`` was hit.
811 """
813 """
812 # Try C implementation.
814 # Try C implementation.
813 try:
815 try:
814 return self.index.deltachain(rev, stoprev, self._generaldelta)
816 return self.index.deltachain(rev, stoprev, self._generaldelta)
815 except AttributeError:
817 except AttributeError:
816 pass
818 pass
817
819
818 chain = []
820 chain = []
819
821
820 # Alias to prevent attribute lookup in tight loop.
822 # Alias to prevent attribute lookup in tight loop.
821 index = self.index
823 index = self.index
822 generaldelta = self._generaldelta
824 generaldelta = self._generaldelta
823
825
824 iterrev = rev
826 iterrev = rev
825 e = index[iterrev]
827 e = index[iterrev]
826 while iterrev != e[3] and iterrev != stoprev:
828 while iterrev != e[3] and iterrev != stoprev:
827 chain.append(iterrev)
829 chain.append(iterrev)
828 if generaldelta:
830 if generaldelta:
829 iterrev = e[3]
831 iterrev = e[3]
830 else:
832 else:
831 iterrev -= 1
833 iterrev -= 1
832 e = index[iterrev]
834 e = index[iterrev]
833
835
834 if iterrev == stoprev:
836 if iterrev == stoprev:
835 stopped = True
837 stopped = True
836 else:
838 else:
837 chain.append(iterrev)
839 chain.append(iterrev)
838 stopped = False
840 stopped = False
839
841
840 chain.reverse()
842 chain.reverse()
841 return chain, stopped
843 return chain, stopped
842
844
843 def ancestors(self, revs, stoprev=0, inclusive=False):
845 def ancestors(self, revs, stoprev=0, inclusive=False):
844 """Generate the ancestors of 'revs' in reverse revision order.
846 """Generate the ancestors of 'revs' in reverse revision order.
845 Does not generate revs lower than stoprev.
847 Does not generate revs lower than stoprev.
846
848
847 See the documentation for ancestor.lazyancestors for more details."""
849 See the documentation for ancestor.lazyancestors for more details."""
848
850
849 # first, make sure start revisions aren't filtered
851 # first, make sure start revisions aren't filtered
850 revs = list(revs)
852 revs = list(revs)
851 checkrev = self.node
853 checkrev = self.node
852 for r in revs:
854 for r in revs:
853 checkrev(r)
855 checkrev(r)
854 # and we're sure ancestors aren't filtered as well
856 # and we're sure ancestors aren't filtered as well
855
857
856 if rustancestor is not None:
858 if rustancestor is not None:
857 lazyancestors = rustancestor.LazyAncestors
859 lazyancestors = rustancestor.LazyAncestors
858 arg = self.index
860 arg = self.index
859 elif util.safehasattr(parsers, b'rustlazyancestors'):
861 elif util.safehasattr(parsers, b'rustlazyancestors'):
860 lazyancestors = ancestor.rustlazyancestors
862 lazyancestors = ancestor.rustlazyancestors
861 arg = self.index
863 arg = self.index
862 else:
864 else:
863 lazyancestors = ancestor.lazyancestors
865 lazyancestors = ancestor.lazyancestors
864 arg = self._uncheckedparentrevs
866 arg = self._uncheckedparentrevs
865 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
867 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
866
868
867 def descendants(self, revs):
869 def descendants(self, revs):
868 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
870 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
869
871
870 def findcommonmissing(self, common=None, heads=None):
872 def findcommonmissing(self, common=None, heads=None):
871 """Return a tuple of the ancestors of common and the ancestors of heads
873 """Return a tuple of the ancestors of common and the ancestors of heads
872 that are not ancestors of common. In revset terminology, we return the
874 that are not ancestors of common. In revset terminology, we return the
873 tuple:
875 tuple:
874
876
875 ::common, (::heads) - (::common)
877 ::common, (::heads) - (::common)
876
878
877 The list is sorted by revision number, meaning it is
879 The list is sorted by revision number, meaning it is
878 topologically sorted.
880 topologically sorted.
879
881
880 'heads' and 'common' are both lists of node IDs. If heads is
882 'heads' and 'common' are both lists of node IDs. If heads is
881 not supplied, uses all of the revlog's heads. If common is not
883 not supplied, uses all of the revlog's heads. If common is not
882 supplied, uses nullid."""
884 supplied, uses nullid."""
883 if common is None:
885 if common is None:
884 common = [nullid]
886 common = [nullid]
885 if heads is None:
887 if heads is None:
886 heads = self.heads()
888 heads = self.heads()
887
889
888 common = [self.rev(n) for n in common]
890 common = [self.rev(n) for n in common]
889 heads = [self.rev(n) for n in heads]
891 heads = [self.rev(n) for n in heads]
890
892
891 # we want the ancestors, but inclusive
893 # we want the ancestors, but inclusive
892 class lazyset(object):
894 class lazyset(object):
893 def __init__(self, lazyvalues):
895 def __init__(self, lazyvalues):
894 self.addedvalues = set()
896 self.addedvalues = set()
895 self.lazyvalues = lazyvalues
897 self.lazyvalues = lazyvalues
896
898
897 def __contains__(self, value):
899 def __contains__(self, value):
898 return value in self.addedvalues or value in self.lazyvalues
900 return value in self.addedvalues or value in self.lazyvalues
899
901
900 def __iter__(self):
902 def __iter__(self):
901 added = self.addedvalues
903 added = self.addedvalues
902 for r in added:
904 for r in added:
903 yield r
905 yield r
904 for r in self.lazyvalues:
906 for r in self.lazyvalues:
905 if not r in added:
907 if not r in added:
906 yield r
908 yield r
907
909
908 def add(self, value):
910 def add(self, value):
909 self.addedvalues.add(value)
911 self.addedvalues.add(value)
910
912
911 def update(self, values):
913 def update(self, values):
912 self.addedvalues.update(values)
914 self.addedvalues.update(values)
913
915
914 has = lazyset(self.ancestors(common))
916 has = lazyset(self.ancestors(common))
915 has.add(nullrev)
917 has.add(nullrev)
916 has.update(common)
918 has.update(common)
917
919
918 # take all ancestors from heads that aren't in has
920 # take all ancestors from heads that aren't in has
919 missing = set()
921 missing = set()
920 visit = collections.deque(r for r in heads if r not in has)
922 visit = collections.deque(r for r in heads if r not in has)
921 while visit:
923 while visit:
922 r = visit.popleft()
924 r = visit.popleft()
923 if r in missing:
925 if r in missing:
924 continue
926 continue
925 else:
927 else:
926 missing.add(r)
928 missing.add(r)
927 for p in self.parentrevs(r):
929 for p in self.parentrevs(r):
928 if p not in has:
930 if p not in has:
929 visit.append(p)
931 visit.append(p)
930 missing = list(missing)
932 missing = list(missing)
931 missing.sort()
933 missing.sort()
932 return has, [self.node(miss) for miss in missing]
934 return has, [self.node(miss) for miss in missing]
933
935
934 def incrementalmissingrevs(self, common=None):
936 def incrementalmissingrevs(self, common=None):
935 """Return an object that can be used to incrementally compute the
937 """Return an object that can be used to incrementally compute the
936 revision numbers of the ancestors of arbitrary sets that are not
938 revision numbers of the ancestors of arbitrary sets that are not
937 ancestors of common. This is an ancestor.incrementalmissingancestors
939 ancestors of common. This is an ancestor.incrementalmissingancestors
938 object.
940 object.
939
941
940 'common' is a list of revision numbers. If common is not supplied, uses
942 'common' is a list of revision numbers. If common is not supplied, uses
941 nullrev.
943 nullrev.
942 """
944 """
943 if common is None:
945 if common is None:
944 common = [nullrev]
946 common = [nullrev]
945
947
946 if rustancestor is not None:
948 if rustancestor is not None:
947 return rustancestor.MissingAncestors(self.index, common)
949 return rustancestor.MissingAncestors(self.index, common)
948 return ancestor.incrementalmissingancestors(self.parentrevs, common)
950 return ancestor.incrementalmissingancestors(self.parentrevs, common)
949
951
950 def findmissingrevs(self, common=None, heads=None):
952 def findmissingrevs(self, common=None, heads=None):
951 """Return the revision numbers of the ancestors of heads that
953 """Return the revision numbers of the ancestors of heads that
952 are not ancestors of common.
954 are not ancestors of common.
953
955
954 More specifically, return a list of revision numbers corresponding to
956 More specifically, return a list of revision numbers corresponding to
955 nodes N such that every N satisfies the following constraints:
957 nodes N such that every N satisfies the following constraints:
956
958
957 1. N is an ancestor of some node in 'heads'
959 1. N is an ancestor of some node in 'heads'
958 2. N is not an ancestor of any node in 'common'
960 2. N is not an ancestor of any node in 'common'
959
961
960 The list is sorted by revision number, meaning it is
962 The list is sorted by revision number, meaning it is
961 topologically sorted.
963 topologically sorted.
962
964
963 'heads' and 'common' are both lists of revision numbers. If heads is
965 'heads' and 'common' are both lists of revision numbers. If heads is
964 not supplied, uses all of the revlog's heads. If common is not
966 not supplied, uses all of the revlog's heads. If common is not
965 supplied, uses nullid."""
967 supplied, uses nullid."""
966 if common is None:
968 if common is None:
967 common = [nullrev]
969 common = [nullrev]
968 if heads is None:
970 if heads is None:
969 heads = self.headrevs()
971 heads = self.headrevs()
970
972
971 inc = self.incrementalmissingrevs(common=common)
973 inc = self.incrementalmissingrevs(common=common)
972 return inc.missingancestors(heads)
974 return inc.missingancestors(heads)
973
975
974 def findmissing(self, common=None, heads=None):
976 def findmissing(self, common=None, heads=None):
975 """Return the ancestors of heads that are not ancestors of common.
977 """Return the ancestors of heads that are not ancestors of common.
976
978
977 More specifically, return a list of nodes N such that every N
979 More specifically, return a list of nodes N such that every N
978 satisfies the following constraints:
980 satisfies the following constraints:
979
981
980 1. N is an ancestor of some node in 'heads'
982 1. N is an ancestor of some node in 'heads'
981 2. N is not an ancestor of any node in 'common'
983 2. N is not an ancestor of any node in 'common'
982
984
983 The list is sorted by revision number, meaning it is
985 The list is sorted by revision number, meaning it is
984 topologically sorted.
986 topologically sorted.
985
987
986 'heads' and 'common' are both lists of node IDs. If heads is
988 'heads' and 'common' are both lists of node IDs. If heads is
987 not supplied, uses all of the revlog's heads. If common is not
989 not supplied, uses all of the revlog's heads. If common is not
988 supplied, uses nullid."""
990 supplied, uses nullid."""
989 if common is None:
991 if common is None:
990 common = [nullid]
992 common = [nullid]
991 if heads is None:
993 if heads is None:
992 heads = self.heads()
994 heads = self.heads()
993
995
994 common = [self.rev(n) for n in common]
996 common = [self.rev(n) for n in common]
995 heads = [self.rev(n) for n in heads]
997 heads = [self.rev(n) for n in heads]
996
998
997 inc = self.incrementalmissingrevs(common=common)
999 inc = self.incrementalmissingrevs(common=common)
998 return [self.node(r) for r in inc.missingancestors(heads)]
1000 return [self.node(r) for r in inc.missingancestors(heads)]
999
1001
1000 def nodesbetween(self, roots=None, heads=None):
1002 def nodesbetween(self, roots=None, heads=None):
1001 """Return a topological path from 'roots' to 'heads'.
1003 """Return a topological path from 'roots' to 'heads'.
1002
1004
1003 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1005 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1004 topologically sorted list of all nodes N that satisfy both of
1006 topologically sorted list of all nodes N that satisfy both of
1005 these constraints:
1007 these constraints:
1006
1008
1007 1. N is a descendant of some node in 'roots'
1009 1. N is a descendant of some node in 'roots'
1008 2. N is an ancestor of some node in 'heads'
1010 2. N is an ancestor of some node in 'heads'
1009
1011
1010 Every node is considered to be both a descendant and an ancestor
1012 Every node is considered to be both a descendant and an ancestor
1011 of itself, so every reachable node in 'roots' and 'heads' will be
1013 of itself, so every reachable node in 'roots' and 'heads' will be
1012 included in 'nodes'.
1014 included in 'nodes'.
1013
1015
1014 'outroots' is the list of reachable nodes in 'roots', i.e., the
1016 'outroots' is the list of reachable nodes in 'roots', i.e., the
1015 subset of 'roots' that is returned in 'nodes'. Likewise,
1017 subset of 'roots' that is returned in 'nodes'. Likewise,
1016 'outheads' is the subset of 'heads' that is also in 'nodes'.
1018 'outheads' is the subset of 'heads' that is also in 'nodes'.
1017
1019
1018 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1020 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1019 unspecified, uses nullid as the only root. If 'heads' is
1021 unspecified, uses nullid as the only root. If 'heads' is
1020 unspecified, uses list of all of the revlog's heads."""
1022 unspecified, uses list of all of the revlog's heads."""
1021 nonodes = ([], [], [])
1023 nonodes = ([], [], [])
1022 if roots is not None:
1024 if roots is not None:
1023 roots = list(roots)
1025 roots = list(roots)
1024 if not roots:
1026 if not roots:
1025 return nonodes
1027 return nonodes
1026 lowestrev = min([self.rev(n) for n in roots])
1028 lowestrev = min([self.rev(n) for n in roots])
1027 else:
1029 else:
1028 roots = [nullid] # Everybody's a descendant of nullid
1030 roots = [nullid] # Everybody's a descendant of nullid
1029 lowestrev = nullrev
1031 lowestrev = nullrev
1030 if (lowestrev == nullrev) and (heads is None):
1032 if (lowestrev == nullrev) and (heads is None):
1031 # We want _all_ the nodes!
1033 # We want _all_ the nodes!
1032 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1034 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1033 if heads is None:
1035 if heads is None:
1034 # All nodes are ancestors, so the latest ancestor is the last
1036 # All nodes are ancestors, so the latest ancestor is the last
1035 # node.
1037 # node.
1036 highestrev = len(self) - 1
1038 highestrev = len(self) - 1
1037 # Set ancestors to None to signal that every node is an ancestor.
1039 # Set ancestors to None to signal that every node is an ancestor.
1038 ancestors = None
1040 ancestors = None
1039 # Set heads to an empty dictionary for later discovery of heads
1041 # Set heads to an empty dictionary for later discovery of heads
1040 heads = {}
1042 heads = {}
1041 else:
1043 else:
1042 heads = list(heads)
1044 heads = list(heads)
1043 if not heads:
1045 if not heads:
1044 return nonodes
1046 return nonodes
1045 ancestors = set()
1047 ancestors = set()
1046 # Turn heads into a dictionary so we can remove 'fake' heads.
1048 # Turn heads into a dictionary so we can remove 'fake' heads.
1047 # Also, later we will be using it to filter out the heads we can't
1049 # Also, later we will be using it to filter out the heads we can't
1048 # find from roots.
1050 # find from roots.
1049 heads = dict.fromkeys(heads, False)
1051 heads = dict.fromkeys(heads, False)
1050 # Start at the top and keep marking parents until we're done.
1052 # Start at the top and keep marking parents until we're done.
1051 nodestotag = set(heads)
1053 nodestotag = set(heads)
1052 # Remember where the top was so we can use it as a limit later.
1054 # Remember where the top was so we can use it as a limit later.
1053 highestrev = max([self.rev(n) for n in nodestotag])
1055 highestrev = max([self.rev(n) for n in nodestotag])
1054 while nodestotag:
1056 while nodestotag:
1055 # grab a node to tag
1057 # grab a node to tag
1056 n = nodestotag.pop()
1058 n = nodestotag.pop()
1057 # Never tag nullid
1059 # Never tag nullid
1058 if n == nullid:
1060 if n == nullid:
1059 continue
1061 continue
1060 # A node's revision number represents its place in a
1062 # A node's revision number represents its place in a
1061 # topologically sorted list of nodes.
1063 # topologically sorted list of nodes.
1062 r = self.rev(n)
1064 r = self.rev(n)
1063 if r >= lowestrev:
1065 if r >= lowestrev:
1064 if n not in ancestors:
1066 if n not in ancestors:
1065 # If we are possibly a descendant of one of the roots
1067 # If we are possibly a descendant of one of the roots
1066 # and we haven't already been marked as an ancestor
1068 # and we haven't already been marked as an ancestor
1067 ancestors.add(n) # Mark as ancestor
1069 ancestors.add(n) # Mark as ancestor
1068 # Add non-nullid parents to list of nodes to tag.
1070 # Add non-nullid parents to list of nodes to tag.
1069 nodestotag.update(
1071 nodestotag.update(
1070 [p for p in self.parents(n) if p != nullid]
1072 [p for p in self.parents(n) if p != nullid]
1071 )
1073 )
1072 elif n in heads: # We've seen it before, is it a fake head?
1074 elif n in heads: # We've seen it before, is it a fake head?
1073 # So it is, real heads should not be the ancestors of
1075 # So it is, real heads should not be the ancestors of
1074 # any other heads.
1076 # any other heads.
1075 heads.pop(n)
1077 heads.pop(n)
1076 if not ancestors:
1078 if not ancestors:
1077 return nonodes
1079 return nonodes
1078 # Now that we have our set of ancestors, we want to remove any
1080 # Now that we have our set of ancestors, we want to remove any
1079 # roots that are not ancestors.
1081 # roots that are not ancestors.
1080
1082
1081 # If one of the roots was nullid, everything is included anyway.
1083 # If one of the roots was nullid, everything is included anyway.
1082 if lowestrev > nullrev:
1084 if lowestrev > nullrev:
1083 # But, since we weren't, let's recompute the lowest rev to not
1085 # But, since we weren't, let's recompute the lowest rev to not
1084 # include roots that aren't ancestors.
1086 # include roots that aren't ancestors.
1085
1087
1086 # Filter out roots that aren't ancestors of heads
1088 # Filter out roots that aren't ancestors of heads
1087 roots = [root for root in roots if root in ancestors]
1089 roots = [root for root in roots if root in ancestors]
1088 # Recompute the lowest revision
1090 # Recompute the lowest revision
1089 if roots:
1091 if roots:
1090 lowestrev = min([self.rev(root) for root in roots])
1092 lowestrev = min([self.rev(root) for root in roots])
1091 else:
1093 else:
1092 # No more roots? Return empty list
1094 # No more roots? Return empty list
1093 return nonodes
1095 return nonodes
1094 else:
1096 else:
1095 # We are descending from nullid, and don't need to care about
1097 # We are descending from nullid, and don't need to care about
1096 # any other roots.
1098 # any other roots.
1097 lowestrev = nullrev
1099 lowestrev = nullrev
1098 roots = [nullid]
1100 roots = [nullid]
1099 # Transform our roots list into a set.
1101 # Transform our roots list into a set.
1100 descendants = set(roots)
1102 descendants = set(roots)
1101 # Also, keep the original roots so we can filter out roots that aren't
1103 # Also, keep the original roots so we can filter out roots that aren't
1102 # 'real' roots (i.e. are descended from other roots).
1104 # 'real' roots (i.e. are descended from other roots).
1103 roots = descendants.copy()
1105 roots = descendants.copy()
1104 # Our topologically sorted list of output nodes.
1106 # Our topologically sorted list of output nodes.
1105 orderedout = []
1107 orderedout = []
1106 # Don't start at nullid since we don't want nullid in our output list,
1108 # Don't start at nullid since we don't want nullid in our output list,
1107 # and if nullid shows up in descendants, empty parents will look like
1109 # and if nullid shows up in descendants, empty parents will look like
1108 # they're descendants.
1110 # they're descendants.
1109 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1111 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1110 n = self.node(r)
1112 n = self.node(r)
1111 isdescendant = False
1113 isdescendant = False
1112 if lowestrev == nullrev: # Everybody is a descendant of nullid
1114 if lowestrev == nullrev: # Everybody is a descendant of nullid
1113 isdescendant = True
1115 isdescendant = True
1114 elif n in descendants:
1116 elif n in descendants:
1115 # n is already a descendant
1117 # n is already a descendant
1116 isdescendant = True
1118 isdescendant = True
1117 # This check only needs to be done here because all the roots
1119 # This check only needs to be done here because all the roots
1118 # will start being marked is descendants before the loop.
1120 # will start being marked is descendants before the loop.
1119 if n in roots:
1121 if n in roots:
1120 # If n was a root, check if it's a 'real' root.
1122 # If n was a root, check if it's a 'real' root.
1121 p = tuple(self.parents(n))
1123 p = tuple(self.parents(n))
1122 # If any of its parents are descendants, it's not a root.
1124 # If any of its parents are descendants, it's not a root.
1123 if (p[0] in descendants) or (p[1] in descendants):
1125 if (p[0] in descendants) or (p[1] in descendants):
1124 roots.remove(n)
1126 roots.remove(n)
1125 else:
1127 else:
1126 p = tuple(self.parents(n))
1128 p = tuple(self.parents(n))
1127 # A node is a descendant if either of its parents are
1129 # A node is a descendant if either of its parents are
1128 # descendants. (We seeded the dependents list with the roots
1130 # descendants. (We seeded the dependents list with the roots
1129 # up there, remember?)
1131 # up there, remember?)
1130 if (p[0] in descendants) or (p[1] in descendants):
1132 if (p[0] in descendants) or (p[1] in descendants):
1131 descendants.add(n)
1133 descendants.add(n)
1132 isdescendant = True
1134 isdescendant = True
1133 if isdescendant and ((ancestors is None) or (n in ancestors)):
1135 if isdescendant and ((ancestors is None) or (n in ancestors)):
1134 # Only include nodes that are both descendants and ancestors.
1136 # Only include nodes that are both descendants and ancestors.
1135 orderedout.append(n)
1137 orderedout.append(n)
1136 if (ancestors is not None) and (n in heads):
1138 if (ancestors is not None) and (n in heads):
1137 # We're trying to figure out which heads are reachable
1139 # We're trying to figure out which heads are reachable
1138 # from roots.
1140 # from roots.
1139 # Mark this head as having been reached
1141 # Mark this head as having been reached
1140 heads[n] = True
1142 heads[n] = True
1141 elif ancestors is None:
1143 elif ancestors is None:
1142 # Otherwise, we're trying to discover the heads.
1144 # Otherwise, we're trying to discover the heads.
1143 # Assume this is a head because if it isn't, the next step
1145 # Assume this is a head because if it isn't, the next step
1144 # will eventually remove it.
1146 # will eventually remove it.
1145 heads[n] = True
1147 heads[n] = True
1146 # But, obviously its parents aren't.
1148 # But, obviously its parents aren't.
1147 for p in self.parents(n):
1149 for p in self.parents(n):
1148 heads.pop(p, None)
1150 heads.pop(p, None)
1149 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1151 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1150 roots = list(roots)
1152 roots = list(roots)
1151 assert orderedout
1153 assert orderedout
1152 assert roots
1154 assert roots
1153 assert heads
1155 assert heads
1154 return (orderedout, roots, heads)
1156 return (orderedout, roots, heads)
1155
1157
1156 def headrevs(self, revs=None):
1158 def headrevs(self, revs=None):
1157 if revs is None:
1159 if revs is None:
1158 try:
1160 try:
1159 return self.index.headrevs()
1161 return self.index.headrevs()
1160 except AttributeError:
1162 except AttributeError:
1161 return self._headrevs()
1163 return self._headrevs()
1162 if rustdagop is not None:
1164 if rustdagop is not None:
1163 return rustdagop.headrevs(self.index, revs)
1165 return rustdagop.headrevs(self.index, revs)
1164 return dagop.headrevs(revs, self._uncheckedparentrevs)
1166 return dagop.headrevs(revs, self._uncheckedparentrevs)
1165
1167
1166 def computephases(self, roots):
1168 def computephases(self, roots):
1167 return self.index.computephasesmapsets(roots)
1169 return self.index.computephasesmapsets(roots)
1168
1170
1169 def _headrevs(self):
1171 def _headrevs(self):
1170 count = len(self)
1172 count = len(self)
1171 if not count:
1173 if not count:
1172 return [nullrev]
1174 return [nullrev]
1173 # we won't iter over filtered rev so nobody is a head at start
1175 # we won't iter over filtered rev so nobody is a head at start
1174 ishead = [0] * (count + 1)
1176 ishead = [0] * (count + 1)
1175 index = self.index
1177 index = self.index
1176 for r in self:
1178 for r in self:
1177 ishead[r] = 1 # I may be an head
1179 ishead[r] = 1 # I may be an head
1178 e = index[r]
1180 e = index[r]
1179 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1181 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1180 return [r for r, val in enumerate(ishead) if val]
1182 return [r for r, val in enumerate(ishead) if val]
1181
1183
1182 def heads(self, start=None, stop=None):
1184 def heads(self, start=None, stop=None):
1183 """return the list of all nodes that have no children
1185 """return the list of all nodes that have no children
1184
1186
1185 if start is specified, only heads that are descendants of
1187 if start is specified, only heads that are descendants of
1186 start will be returned
1188 start will be returned
1187 if stop is specified, it will consider all the revs from stop
1189 if stop is specified, it will consider all the revs from stop
1188 as if they had no children
1190 as if they had no children
1189 """
1191 """
1190 if start is None and stop is None:
1192 if start is None and stop is None:
1191 if not len(self):
1193 if not len(self):
1192 return [nullid]
1194 return [nullid]
1193 return [self.node(r) for r in self.headrevs()]
1195 return [self.node(r) for r in self.headrevs()]
1194
1196
1195 if start is None:
1197 if start is None:
1196 start = nullrev
1198 start = nullrev
1197 else:
1199 else:
1198 start = self.rev(start)
1200 start = self.rev(start)
1199
1201
1200 stoprevs = set(self.rev(n) for n in stop or [])
1202 stoprevs = set(self.rev(n) for n in stop or [])
1201
1203
1202 revs = dagop.headrevssubset(
1204 revs = dagop.headrevssubset(
1203 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1205 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1204 )
1206 )
1205
1207
1206 return [self.node(rev) for rev in revs]
1208 return [self.node(rev) for rev in revs]
1207
1209
1208 def children(self, node):
1210 def children(self, node):
1209 """find the children of a given node"""
1211 """find the children of a given node"""
1210 c = []
1212 c = []
1211 p = self.rev(node)
1213 p = self.rev(node)
1212 for r in self.revs(start=p + 1):
1214 for r in self.revs(start=p + 1):
1213 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1215 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1214 if prevs:
1216 if prevs:
1215 for pr in prevs:
1217 for pr in prevs:
1216 if pr == p:
1218 if pr == p:
1217 c.append(self.node(r))
1219 c.append(self.node(r))
1218 elif p == nullrev:
1220 elif p == nullrev:
1219 c.append(self.node(r))
1221 c.append(self.node(r))
1220 return c
1222 return c
1221
1223
1222 def commonancestorsheads(self, a, b):
1224 def commonancestorsheads(self, a, b):
1223 """calculate all the heads of the common ancestors of nodes a and b"""
1225 """calculate all the heads of the common ancestors of nodes a and b"""
1224 a, b = self.rev(a), self.rev(b)
1226 a, b = self.rev(a), self.rev(b)
1225 ancs = self._commonancestorsheads(a, b)
1227 ancs = self._commonancestorsheads(a, b)
1226 return pycompat.maplist(self.node, ancs)
1228 return pycompat.maplist(self.node, ancs)
1227
1229
1228 def _commonancestorsheads(self, *revs):
1230 def _commonancestorsheads(self, *revs):
1229 """calculate all the heads of the common ancestors of revs"""
1231 """calculate all the heads of the common ancestors of revs"""
1230 try:
1232 try:
1231 ancs = self.index.commonancestorsheads(*revs)
1233 ancs = self.index.commonancestorsheads(*revs)
1232 except (AttributeError, OverflowError): # C implementation failed
1234 except (AttributeError, OverflowError): # C implementation failed
1233 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1235 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1234 return ancs
1236 return ancs
1235
1237
1236 def isancestor(self, a, b):
1238 def isancestor(self, a, b):
1237 """return True if node a is an ancestor of node b
1239 """return True if node a is an ancestor of node b
1238
1240
1239 A revision is considered an ancestor of itself."""
1241 A revision is considered an ancestor of itself."""
1240 a, b = self.rev(a), self.rev(b)
1242 a, b = self.rev(a), self.rev(b)
1241 return self.isancestorrev(a, b)
1243 return self.isancestorrev(a, b)
1242
1244
1243 def isancestorrev(self, a, b):
1245 def isancestorrev(self, a, b):
1244 """return True if revision a is an ancestor of revision b
1246 """return True if revision a is an ancestor of revision b
1245
1247
1246 A revision is considered an ancestor of itself.
1248 A revision is considered an ancestor of itself.
1247
1249
1248 The implementation of this is trivial but the use of
1250 The implementation of this is trivial but the use of
1249 reachableroots is not."""
1251 reachableroots is not."""
1250 if a == nullrev:
1252 if a == nullrev:
1251 return True
1253 return True
1252 elif a == b:
1254 elif a == b:
1253 return True
1255 return True
1254 elif a > b:
1256 elif a > b:
1255 return False
1257 return False
1256 return bool(self.reachableroots(a, [b], [a], includepath=False))
1258 return bool(self.reachableroots(a, [b], [a], includepath=False))
1257
1259
1258 def reachableroots(self, minroot, heads, roots, includepath=False):
1260 def reachableroots(self, minroot, heads, roots, includepath=False):
1259 """return (heads(::<roots> and <roots>::<heads>))
1261 """return (heads(::<roots> and <roots>::<heads>))
1260
1262
1261 If includepath is True, return (<roots>::<heads>)."""
1263 If includepath is True, return (<roots>::<heads>)."""
1262 try:
1264 try:
1263 return self.index.reachableroots2(
1265 return self.index.reachableroots2(
1264 minroot, heads, roots, includepath
1266 minroot, heads, roots, includepath
1265 )
1267 )
1266 except AttributeError:
1268 except AttributeError:
1267 return dagop._reachablerootspure(
1269 return dagop._reachablerootspure(
1268 self.parentrevs, minroot, roots, heads, includepath
1270 self.parentrevs, minroot, roots, heads, includepath
1269 )
1271 )
1270
1272
1271 def ancestor(self, a, b):
1273 def ancestor(self, a, b):
1272 """calculate the "best" common ancestor of nodes a and b"""
1274 """calculate the "best" common ancestor of nodes a and b"""
1273
1275
1274 a, b = self.rev(a), self.rev(b)
1276 a, b = self.rev(a), self.rev(b)
1275 try:
1277 try:
1276 ancs = self.index.ancestors(a, b)
1278 ancs = self.index.ancestors(a, b)
1277 except (AttributeError, OverflowError):
1279 except (AttributeError, OverflowError):
1278 ancs = ancestor.ancestors(self.parentrevs, a, b)
1280 ancs = ancestor.ancestors(self.parentrevs, a, b)
1279 if ancs:
1281 if ancs:
1280 # choose a consistent winner when there's a tie
1282 # choose a consistent winner when there's a tie
1281 return min(map(self.node, ancs))
1283 return min(map(self.node, ancs))
1282 return nullid
1284 return nullid
1283
1285
1284 def _match(self, id):
1286 def _match(self, id):
1285 if isinstance(id, int):
1287 if isinstance(id, int):
1286 # rev
1288 # rev
1287 return self.node(id)
1289 return self.node(id)
1288 if len(id) == 20:
1290 if len(id) == 20:
1289 # possibly a binary node
1291 # possibly a binary node
1290 # odds of a binary node being all hex in ASCII are 1 in 10**25
1292 # odds of a binary node being all hex in ASCII are 1 in 10**25
1291 try:
1293 try:
1292 node = id
1294 node = id
1293 self.rev(node) # quick search the index
1295 self.rev(node) # quick search the index
1294 return node
1296 return node
1295 except error.LookupError:
1297 except error.LookupError:
1296 pass # may be partial hex id
1298 pass # may be partial hex id
1297 try:
1299 try:
1298 # str(rev)
1300 # str(rev)
1299 rev = int(id)
1301 rev = int(id)
1300 if b"%d" % rev != id:
1302 if b"%d" % rev != id:
1301 raise ValueError
1303 raise ValueError
1302 if rev < 0:
1304 if rev < 0:
1303 rev = len(self) + rev
1305 rev = len(self) + rev
1304 if rev < 0 or rev >= len(self):
1306 if rev < 0 or rev >= len(self):
1305 raise ValueError
1307 raise ValueError
1306 return self.node(rev)
1308 return self.node(rev)
1307 except (ValueError, OverflowError):
1309 except (ValueError, OverflowError):
1308 pass
1310 pass
1309 if len(id) == 40:
1311 if len(id) == 40:
1310 try:
1312 try:
1311 # a full hex nodeid?
1313 # a full hex nodeid?
1312 node = bin(id)
1314 node = bin(id)
1313 self.rev(node)
1315 self.rev(node)
1314 return node
1316 return node
1315 except (TypeError, error.LookupError):
1317 except (TypeError, error.LookupError):
1316 pass
1318 pass
1317
1319
1318 def _partialmatch(self, id):
1320 def _partialmatch(self, id):
1319 # we don't care wdirfilenodeids as they should be always full hash
1321 # we don't care wdirfilenodeids as they should be always full hash
1320 maybewdir = wdirhex.startswith(id)
1322 maybewdir = wdirhex.startswith(id)
1321 try:
1323 try:
1322 partial = self.index.partialmatch(id)
1324 partial = self.index.partialmatch(id)
1323 if partial and self.hasnode(partial):
1325 if partial and self.hasnode(partial):
1324 if maybewdir:
1326 if maybewdir:
1325 # single 'ff...' match in radix tree, ambiguous with wdir
1327 # single 'ff...' match in radix tree, ambiguous with wdir
1326 raise error.RevlogError
1328 raise error.RevlogError
1327 return partial
1329 return partial
1328 if maybewdir:
1330 if maybewdir:
1329 # no 'ff...' match in radix tree, wdir identified
1331 # no 'ff...' match in radix tree, wdir identified
1330 raise error.WdirUnsupported
1332 raise error.WdirUnsupported
1331 return None
1333 return None
1332 except error.RevlogError:
1334 except error.RevlogError:
1333 # parsers.c radix tree lookup gave multiple matches
1335 # parsers.c radix tree lookup gave multiple matches
1334 # fast path: for unfiltered changelog, radix tree is accurate
1336 # fast path: for unfiltered changelog, radix tree is accurate
1335 if not getattr(self, 'filteredrevs', None):
1337 if not getattr(self, 'filteredrevs', None):
1336 raise error.AmbiguousPrefixLookupError(
1338 raise error.AmbiguousPrefixLookupError(
1337 id, self.indexfile, _(b'ambiguous identifier')
1339 id, self.indexfile, _(b'ambiguous identifier')
1338 )
1340 )
1339 # fall through to slow path that filters hidden revisions
1341 # fall through to slow path that filters hidden revisions
1340 except (AttributeError, ValueError):
1342 except (AttributeError, ValueError):
1341 # we are pure python, or key was too short to search radix tree
1343 # we are pure python, or key was too short to search radix tree
1342 pass
1344 pass
1343
1345
1344 if id in self._pcache:
1346 if id in self._pcache:
1345 return self._pcache[id]
1347 return self._pcache[id]
1346
1348
1347 if len(id) <= 40:
1349 if len(id) <= 40:
1348 try:
1350 try:
1349 # hex(node)[:...]
1351 # hex(node)[:...]
1350 l = len(id) // 2 # grab an even number of digits
1352 l = len(id) // 2 # grab an even number of digits
1351 prefix = bin(id[: l * 2])
1353 prefix = bin(id[: l * 2])
1352 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1354 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1353 nl = [
1355 nl = [
1354 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1356 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1355 ]
1357 ]
1356 if nullhex.startswith(id):
1358 if nullhex.startswith(id):
1357 nl.append(nullid)
1359 nl.append(nullid)
1358 if len(nl) > 0:
1360 if len(nl) > 0:
1359 if len(nl) == 1 and not maybewdir:
1361 if len(nl) == 1 and not maybewdir:
1360 self._pcache[id] = nl[0]
1362 self._pcache[id] = nl[0]
1361 return nl[0]
1363 return nl[0]
1362 raise error.AmbiguousPrefixLookupError(
1364 raise error.AmbiguousPrefixLookupError(
1363 id, self.indexfile, _(b'ambiguous identifier')
1365 id, self.indexfile, _(b'ambiguous identifier')
1364 )
1366 )
1365 if maybewdir:
1367 if maybewdir:
1366 raise error.WdirUnsupported
1368 raise error.WdirUnsupported
1367 return None
1369 return None
1368 except TypeError:
1370 except TypeError:
1369 pass
1371 pass
1370
1372
1371 def lookup(self, id):
1373 def lookup(self, id):
1372 """locate a node based on:
1374 """locate a node based on:
1373 - revision number or str(revision number)
1375 - revision number or str(revision number)
1374 - nodeid or subset of hex nodeid
1376 - nodeid or subset of hex nodeid
1375 """
1377 """
1376 n = self._match(id)
1378 n = self._match(id)
1377 if n is not None:
1379 if n is not None:
1378 return n
1380 return n
1379 n = self._partialmatch(id)
1381 n = self._partialmatch(id)
1380 if n:
1382 if n:
1381 return n
1383 return n
1382
1384
1383 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1385 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1384
1386
1385 def shortest(self, node, minlength=1):
1387 def shortest(self, node, minlength=1):
1386 """Find the shortest unambiguous prefix that matches node."""
1388 """Find the shortest unambiguous prefix that matches node."""
1387
1389
1388 def isvalid(prefix):
1390 def isvalid(prefix):
1389 try:
1391 try:
1390 matchednode = self._partialmatch(prefix)
1392 matchednode = self._partialmatch(prefix)
1391 except error.AmbiguousPrefixLookupError:
1393 except error.AmbiguousPrefixLookupError:
1392 return False
1394 return False
1393 except error.WdirUnsupported:
1395 except error.WdirUnsupported:
1394 # single 'ff...' match
1396 # single 'ff...' match
1395 return True
1397 return True
1396 if matchednode is None:
1398 if matchednode is None:
1397 raise error.LookupError(node, self.indexfile, _(b'no node'))
1399 raise error.LookupError(node, self.indexfile, _(b'no node'))
1398 return True
1400 return True
1399
1401
1400 def maybewdir(prefix):
1402 def maybewdir(prefix):
1401 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1403 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1402
1404
1403 hexnode = hex(node)
1405 hexnode = hex(node)
1404
1406
1405 def disambiguate(hexnode, minlength):
1407 def disambiguate(hexnode, minlength):
1406 """Disambiguate against wdirid."""
1408 """Disambiguate against wdirid."""
1407 for length in range(minlength, 41):
1409 for length in range(minlength, 41):
1408 prefix = hexnode[:length]
1410 prefix = hexnode[:length]
1409 if not maybewdir(prefix):
1411 if not maybewdir(prefix):
1410 return prefix
1412 return prefix
1411
1413
1412 if not getattr(self, 'filteredrevs', None):
1414 if not getattr(self, 'filteredrevs', None):
1413 try:
1415 try:
1414 length = max(self.index.shortest(node), minlength)
1416 length = max(self.index.shortest(node), minlength)
1415 return disambiguate(hexnode, length)
1417 return disambiguate(hexnode, length)
1416 except error.RevlogError:
1418 except error.RevlogError:
1417 if node != wdirid:
1419 if node != wdirid:
1418 raise error.LookupError(node, self.indexfile, _(b'no node'))
1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1419 except AttributeError:
1421 except AttributeError:
1420 # Fall through to pure code
1422 # Fall through to pure code
1421 pass
1423 pass
1422
1424
1423 if node == wdirid:
1425 if node == wdirid:
1424 for length in range(minlength, 41):
1426 for length in range(minlength, 41):
1425 prefix = hexnode[:length]
1427 prefix = hexnode[:length]
1426 if isvalid(prefix):
1428 if isvalid(prefix):
1427 return prefix
1429 return prefix
1428
1430
1429 for length in range(minlength, 41):
1431 for length in range(minlength, 41):
1430 prefix = hexnode[:length]
1432 prefix = hexnode[:length]
1431 if isvalid(prefix):
1433 if isvalid(prefix):
1432 return disambiguate(hexnode, length)
1434 return disambiguate(hexnode, length)
1433
1435
1434 def cmp(self, node, text):
1436 def cmp(self, node, text):
1435 """compare text with a given file revision
1437 """compare text with a given file revision
1436
1438
1437 returns True if text is different than what is stored.
1439 returns True if text is different than what is stored.
1438 """
1440 """
1439 p1, p2 = self.parents(node)
1441 p1, p2 = self.parents(node)
1440 return storageutil.hashrevisionsha1(text, p1, p2) != node
1442 return storageutil.hashrevisionsha1(text, p1, p2) != node
1441
1443
1442 def _cachesegment(self, offset, data):
1444 def _cachesegment(self, offset, data):
1443 """Add a segment to the revlog cache.
1445 """Add a segment to the revlog cache.
1444
1446
1445 Accepts an absolute offset and the data that is at that location.
1447 Accepts an absolute offset and the data that is at that location.
1446 """
1448 """
1447 o, d = self._chunkcache
1449 o, d = self._chunkcache
1448 # try to add to existing cache
1450 # try to add to existing cache
1449 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1451 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1450 self._chunkcache = o, d + data
1452 self._chunkcache = o, d + data
1451 else:
1453 else:
1452 self._chunkcache = offset, data
1454 self._chunkcache = offset, data
1453
1455
1454 def _readsegment(self, offset, length, df=None):
1456 def _readsegment(self, offset, length, df=None):
1455 """Load a segment of raw data from the revlog.
1457 """Load a segment of raw data from the revlog.
1456
1458
1457 Accepts an absolute offset, length to read, and an optional existing
1459 Accepts an absolute offset, length to read, and an optional existing
1458 file handle to read from.
1460 file handle to read from.
1459
1461
1460 If an existing file handle is passed, it will be seeked and the
1462 If an existing file handle is passed, it will be seeked and the
1461 original seek position will NOT be restored.
1463 original seek position will NOT be restored.
1462
1464
1463 Returns a str or buffer of raw byte data.
1465 Returns a str or buffer of raw byte data.
1464
1466
1465 Raises if the requested number of bytes could not be read.
1467 Raises if the requested number of bytes could not be read.
1466 """
1468 """
1467 # Cache data both forward and backward around the requested
1469 # Cache data both forward and backward around the requested
1468 # data, in a fixed size window. This helps speed up operations
1470 # data, in a fixed size window. This helps speed up operations
1469 # involving reading the revlog backwards.
1471 # involving reading the revlog backwards.
1470 cachesize = self._chunkcachesize
1472 cachesize = self._chunkcachesize
1471 realoffset = offset & ~(cachesize - 1)
1473 realoffset = offset & ~(cachesize - 1)
1472 reallength = (
1474 reallength = (
1473 (offset + length + cachesize) & ~(cachesize - 1)
1475 (offset + length + cachesize) & ~(cachesize - 1)
1474 ) - realoffset
1476 ) - realoffset
1475 with self._datareadfp(df) as df:
1477 with self._datareadfp(df) as df:
1476 df.seek(realoffset)
1478 df.seek(realoffset)
1477 d = df.read(reallength)
1479 d = df.read(reallength)
1478
1480
1479 self._cachesegment(realoffset, d)
1481 self._cachesegment(realoffset, d)
1480 if offset != realoffset or reallength != length:
1482 if offset != realoffset or reallength != length:
1481 startoffset = offset - realoffset
1483 startoffset = offset - realoffset
1482 if len(d) - startoffset < length:
1484 if len(d) - startoffset < length:
1483 raise error.RevlogError(
1485 raise error.RevlogError(
1484 _(
1486 _(
1485 b'partial read of revlog %s; expected %d bytes from '
1487 b'partial read of revlog %s; expected %d bytes from '
1486 b'offset %d, got %d'
1488 b'offset %d, got %d'
1487 )
1489 )
1488 % (
1490 % (
1489 self.indexfile if self._inline else self.datafile,
1491 self.indexfile if self._inline else self.datafile,
1490 length,
1492 length,
1491 realoffset,
1493 realoffset,
1492 len(d) - startoffset,
1494 len(d) - startoffset,
1493 )
1495 )
1494 )
1496 )
1495
1497
1496 return util.buffer(d, startoffset, length)
1498 return util.buffer(d, startoffset, length)
1497
1499
1498 if len(d) < length:
1500 if len(d) < length:
1499 raise error.RevlogError(
1501 raise error.RevlogError(
1500 _(
1502 _(
1501 b'partial read of revlog %s; expected %d bytes from offset '
1503 b'partial read of revlog %s; expected %d bytes from offset '
1502 b'%d, got %d'
1504 b'%d, got %d'
1503 )
1505 )
1504 % (
1506 % (
1505 self.indexfile if self._inline else self.datafile,
1507 self.indexfile if self._inline else self.datafile,
1506 length,
1508 length,
1507 offset,
1509 offset,
1508 len(d),
1510 len(d),
1509 )
1511 )
1510 )
1512 )
1511
1513
1512 return d
1514 return d
1513
1515
1514 def _getsegment(self, offset, length, df=None):
1516 def _getsegment(self, offset, length, df=None):
1515 """Obtain a segment of raw data from the revlog.
1517 """Obtain a segment of raw data from the revlog.
1516
1518
1517 Accepts an absolute offset, length of bytes to obtain, and an
1519 Accepts an absolute offset, length of bytes to obtain, and an
1518 optional file handle to the already-opened revlog. If the file
1520 optional file handle to the already-opened revlog. If the file
1519 handle is used, it's original seek position will not be preserved.
1521 handle is used, it's original seek position will not be preserved.
1520
1522
1521 Requests for data may be returned from a cache.
1523 Requests for data may be returned from a cache.
1522
1524
1523 Returns a str or a buffer instance of raw byte data.
1525 Returns a str or a buffer instance of raw byte data.
1524 """
1526 """
1525 o, d = self._chunkcache
1527 o, d = self._chunkcache
1526 l = len(d)
1528 l = len(d)
1527
1529
1528 # is it in the cache?
1530 # is it in the cache?
1529 cachestart = offset - o
1531 cachestart = offset - o
1530 cacheend = cachestart + length
1532 cacheend = cachestart + length
1531 if cachestart >= 0 and cacheend <= l:
1533 if cachestart >= 0 and cacheend <= l:
1532 if cachestart == 0 and cacheend == l:
1534 if cachestart == 0 and cacheend == l:
1533 return d # avoid a copy
1535 return d # avoid a copy
1534 return util.buffer(d, cachestart, cacheend - cachestart)
1536 return util.buffer(d, cachestart, cacheend - cachestart)
1535
1537
1536 return self._readsegment(offset, length, df=df)
1538 return self._readsegment(offset, length, df=df)
1537
1539
1538 def _getsegmentforrevs(self, startrev, endrev, df=None):
1540 def _getsegmentforrevs(self, startrev, endrev, df=None):
1539 """Obtain a segment of raw data corresponding to a range of revisions.
1541 """Obtain a segment of raw data corresponding to a range of revisions.
1540
1542
1541 Accepts the start and end revisions and an optional already-open
1543 Accepts the start and end revisions and an optional already-open
1542 file handle to be used for reading. If the file handle is read, its
1544 file handle to be used for reading. If the file handle is read, its
1543 seek position will not be preserved.
1545 seek position will not be preserved.
1544
1546
1545 Requests for data may be satisfied by a cache.
1547 Requests for data may be satisfied by a cache.
1546
1548
1547 Returns a 2-tuple of (offset, data) for the requested range of
1549 Returns a 2-tuple of (offset, data) for the requested range of
1548 revisions. Offset is the integer offset from the beginning of the
1550 revisions. Offset is the integer offset from the beginning of the
1549 revlog and data is a str or buffer of the raw byte data.
1551 revlog and data is a str or buffer of the raw byte data.
1550
1552
1551 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1553 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1552 to determine where each revision's data begins and ends.
1554 to determine where each revision's data begins and ends.
1553 """
1555 """
1554 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1556 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1555 # (functions are expensive).
1557 # (functions are expensive).
1556 index = self.index
1558 index = self.index
1557 istart = index[startrev]
1559 istart = index[startrev]
1558 start = int(istart[0] >> 16)
1560 start = int(istart[0] >> 16)
1559 if startrev == endrev:
1561 if startrev == endrev:
1560 end = start + istart[1]
1562 end = start + istart[1]
1561 else:
1563 else:
1562 iend = index[endrev]
1564 iend = index[endrev]
1563 end = int(iend[0] >> 16) + iend[1]
1565 end = int(iend[0] >> 16) + iend[1]
1564
1566
1565 if self._inline:
1567 if self._inline:
1566 start += (startrev + 1) * self._io.size
1568 start += (startrev + 1) * self._io.size
1567 end += (endrev + 1) * self._io.size
1569 end += (endrev + 1) * self._io.size
1568 length = end - start
1570 length = end - start
1569
1571
1570 return start, self._getsegment(start, length, df=df)
1572 return start, self._getsegment(start, length, df=df)
1571
1573
1572 def _chunk(self, rev, df=None):
1574 def _chunk(self, rev, df=None):
1573 """Obtain a single decompressed chunk for a revision.
1575 """Obtain a single decompressed chunk for a revision.
1574
1576
1575 Accepts an integer revision and an optional already-open file handle
1577 Accepts an integer revision and an optional already-open file handle
1576 to be used for reading. If used, the seek position of the file will not
1578 to be used for reading. If used, the seek position of the file will not
1577 be preserved.
1579 be preserved.
1578
1580
1579 Returns a str holding uncompressed data for the requested revision.
1581 Returns a str holding uncompressed data for the requested revision.
1580 """
1582 """
1581 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1583 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1582
1584
1583 def _chunks(self, revs, df=None, targetsize=None):
1585 def _chunks(self, revs, df=None, targetsize=None):
1584 """Obtain decompressed chunks for the specified revisions.
1586 """Obtain decompressed chunks for the specified revisions.
1585
1587
1586 Accepts an iterable of numeric revisions that are assumed to be in
1588 Accepts an iterable of numeric revisions that are assumed to be in
1587 ascending order. Also accepts an optional already-open file handle
1589 ascending order. Also accepts an optional already-open file handle
1588 to be used for reading. If used, the seek position of the file will
1590 to be used for reading. If used, the seek position of the file will
1589 not be preserved.
1591 not be preserved.
1590
1592
1591 This function is similar to calling ``self._chunk()`` multiple times,
1593 This function is similar to calling ``self._chunk()`` multiple times,
1592 but is faster.
1594 but is faster.
1593
1595
1594 Returns a list with decompressed data for each requested revision.
1596 Returns a list with decompressed data for each requested revision.
1595 """
1597 """
1596 if not revs:
1598 if not revs:
1597 return []
1599 return []
1598 start = self.start
1600 start = self.start
1599 length = self.length
1601 length = self.length
1600 inline = self._inline
1602 inline = self._inline
1601 iosize = self._io.size
1603 iosize = self._io.size
1602 buffer = util.buffer
1604 buffer = util.buffer
1603
1605
1604 l = []
1606 l = []
1605 ladd = l.append
1607 ladd = l.append
1606
1608
1607 if not self._withsparseread:
1609 if not self._withsparseread:
1608 slicedchunks = (revs,)
1610 slicedchunks = (revs,)
1609 else:
1611 else:
1610 slicedchunks = deltautil.slicechunk(
1612 slicedchunks = deltautil.slicechunk(
1611 self, revs, targetsize=targetsize
1613 self, revs, targetsize=targetsize
1612 )
1614 )
1613
1615
1614 for revschunk in slicedchunks:
1616 for revschunk in slicedchunks:
1615 firstrev = revschunk[0]
1617 firstrev = revschunk[0]
1616 # Skip trailing revisions with empty diff
1618 # Skip trailing revisions with empty diff
1617 for lastrev in revschunk[::-1]:
1619 for lastrev in revschunk[::-1]:
1618 if length(lastrev) != 0:
1620 if length(lastrev) != 0:
1619 break
1621 break
1620
1622
1621 try:
1623 try:
1622 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1623 except OverflowError:
1625 except OverflowError:
1624 # issue4215 - we can't cache a run of chunks greater than
1626 # issue4215 - we can't cache a run of chunks greater than
1625 # 2G on Windows
1627 # 2G on Windows
1626 return [self._chunk(rev, df=df) for rev in revschunk]
1628 return [self._chunk(rev, df=df) for rev in revschunk]
1627
1629
1628 decomp = self.decompress
1630 decomp = self.decompress
1629 for rev in revschunk:
1631 for rev in revschunk:
1630 chunkstart = start(rev)
1632 chunkstart = start(rev)
1631 if inline:
1633 if inline:
1632 chunkstart += (rev + 1) * iosize
1634 chunkstart += (rev + 1) * iosize
1633 chunklength = length(rev)
1635 chunklength = length(rev)
1634 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1635
1637
1636 return l
1638 return l
1637
1639
1638 def _chunkclear(self):
1640 def _chunkclear(self):
1639 """Clear the raw chunk cache."""
1641 """Clear the raw chunk cache."""
1640 self._chunkcache = (0, b'')
1642 self._chunkcache = (0, b'')
1641
1643
1642 def deltaparent(self, rev):
1644 def deltaparent(self, rev):
1643 """return deltaparent of the given revision"""
1645 """return deltaparent of the given revision"""
1644 base = self.index[rev][3]
1646 base = self.index[rev][3]
1645 if base == rev:
1647 if base == rev:
1646 return nullrev
1648 return nullrev
1647 elif self._generaldelta:
1649 elif self._generaldelta:
1648 return base
1650 return base
1649 else:
1651 else:
1650 return rev - 1
1652 return rev - 1
1651
1653
1652 def issnapshot(self, rev):
1654 def issnapshot(self, rev):
1653 """tells whether rev is a snapshot
1655 """tells whether rev is a snapshot
1654 """
1656 """
1655 if not self._sparserevlog:
1657 if not self._sparserevlog:
1656 return self.deltaparent(rev) == nullrev
1658 return self.deltaparent(rev) == nullrev
1657 elif util.safehasattr(self.index, b'issnapshot'):
1659 elif util.safehasattr(self.index, b'issnapshot'):
1658 # directly assign the method to cache the testing and access
1660 # directly assign the method to cache the testing and access
1659 self.issnapshot = self.index.issnapshot
1661 self.issnapshot = self.index.issnapshot
1660 return self.issnapshot(rev)
1662 return self.issnapshot(rev)
1661 if rev == nullrev:
1663 if rev == nullrev:
1662 return True
1664 return True
1663 entry = self.index[rev]
1665 entry = self.index[rev]
1664 base = entry[3]
1666 base = entry[3]
1665 if base == rev:
1667 if base == rev:
1666 return True
1668 return True
1667 if base == nullrev:
1669 if base == nullrev:
1668 return True
1670 return True
1669 p1 = entry[5]
1671 p1 = entry[5]
1670 p2 = entry[6]
1672 p2 = entry[6]
1671 if base == p1 or base == p2:
1673 if base == p1 or base == p2:
1672 return False
1674 return False
1673 return self.issnapshot(base)
1675 return self.issnapshot(base)
1674
1676
1675 def snapshotdepth(self, rev):
1677 def snapshotdepth(self, rev):
1676 """number of snapshot in the chain before this one"""
1678 """number of snapshot in the chain before this one"""
1677 if not self.issnapshot(rev):
1679 if not self.issnapshot(rev):
1678 raise error.ProgrammingError(b'revision %d not a snapshot')
1680 raise error.ProgrammingError(b'revision %d not a snapshot')
1679 return len(self._deltachain(rev)[0]) - 1
1681 return len(self._deltachain(rev)[0]) - 1
1680
1682
1681 def revdiff(self, rev1, rev2):
1683 def revdiff(self, rev1, rev2):
1682 """return or calculate a delta between two revisions
1684 """return or calculate a delta between two revisions
1683
1685
1684 The delta calculated is in binary form and is intended to be written to
1686 The delta calculated is in binary form and is intended to be written to
1685 revlog data directly. So this function needs raw revision data.
1687 revlog data directly. So this function needs raw revision data.
1686 """
1688 """
1687 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1689 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1688 return bytes(self._chunk(rev2))
1690 return bytes(self._chunk(rev2))
1689
1691
1690 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1692 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1691
1693
1692 def _processflags(self, text, flags, operation, raw=False):
1694 def _processflags(self, text, flags, operation, raw=False):
1693 """deprecated entry point to access flag processors"""
1695 """deprecated entry point to access flag processors"""
1694 msg = b'_processflag(...) use the specialized variant'
1696 msg = b'_processflag(...) use the specialized variant'
1695 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1697 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1696 if raw:
1698 if raw:
1697 return text, flagutil.processflagsraw(self, text, flags)
1699 return text, flagutil.processflagsraw(self, text, flags)
1698 elif operation == b'read':
1700 elif operation == b'read':
1699 return flagutil.processflagsread(self, text, flags)
1701 return flagutil.processflagsread(self, text, flags)
1700 else: # write operation
1702 else: # write operation
1701 return flagutil.processflagswrite(self, text, flags)
1703 return flagutil.processflagswrite(self, text, flags)
1702
1704
1703 def revision(self, nodeorrev, _df=None, raw=False):
1705 def revision(self, nodeorrev, _df=None, raw=False):
1704 """return an uncompressed revision of a given node or revision
1706 """return an uncompressed revision of a given node or revision
1705 number.
1707 number.
1706
1708
1707 _df - an existing file handle to read from. (internal-only)
1709 _df - an existing file handle to read from. (internal-only)
1708 raw - an optional argument specifying if the revision data is to be
1710 raw - an optional argument specifying if the revision data is to be
1709 treated as raw data when applying flag transforms. 'raw' should be set
1711 treated as raw data when applying flag transforms. 'raw' should be set
1710 to True when generating changegroups or in debug commands.
1712 to True when generating changegroups or in debug commands.
1711 """
1713 """
1712 if raw:
1714 if raw:
1713 msg = (
1715 msg = (
1714 b'revlog.revision(..., raw=True) is deprecated, '
1716 b'revlog.revision(..., raw=True) is deprecated, '
1715 b'use revlog.rawdata(...)'
1717 b'use revlog.rawdata(...)'
1716 )
1718 )
1717 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1718 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1720 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1719
1721
1720 def sidedata(self, nodeorrev, _df=None):
1722 def sidedata(self, nodeorrev, _df=None):
1721 """a map of extra data related to the changeset but not part of the hash
1723 """a map of extra data related to the changeset but not part of the hash
1722
1724
1723 This function currently return a dictionary. However, more advanced
1725 This function currently return a dictionary. However, more advanced
1724 mapping object will likely be used in the future for a more
1726 mapping object will likely be used in the future for a more
1725 efficient/lazy code.
1727 efficient/lazy code.
1726 """
1728 """
1727 return self._revisiondata(nodeorrev, _df)[1]
1729 return self._revisiondata(nodeorrev, _df)[1]
1728
1730
1729 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1731 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1730 # deal with <nodeorrev> argument type
1732 # deal with <nodeorrev> argument type
1731 if isinstance(nodeorrev, int):
1733 if isinstance(nodeorrev, int):
1732 rev = nodeorrev
1734 rev = nodeorrev
1733 node = self.node(rev)
1735 node = self.node(rev)
1734 else:
1736 else:
1735 node = nodeorrev
1737 node = nodeorrev
1736 rev = None
1738 rev = None
1737
1739
1738 # fast path the special `nullid` rev
1740 # fast path the special `nullid` rev
1739 if node == nullid:
1741 if node == nullid:
1740 return b"", {}
1742 return b"", {}
1741
1743
1742 # The text as stored inside the revlog. Might be the revision or might
1744 # The text as stored inside the revlog. Might be the revision or might
1743 # need to be processed to retrieve the revision.
1745 # need to be processed to retrieve the revision.
1744 rawtext = None
1746 rawtext = None
1745
1747
1746 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1748 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1747
1749
1748 if raw and validated:
1750 if raw and validated:
1749 # if we don't want to process the raw text and that raw
1751 # if we don't want to process the raw text and that raw
1750 # text is cached, we can exit early.
1752 # text is cached, we can exit early.
1751 return rawtext, {}
1753 return rawtext, {}
1752 if rev is None:
1754 if rev is None:
1753 rev = self.rev(node)
1755 rev = self.rev(node)
1754 # the revlog's flag for this revision
1756 # the revlog's flag for this revision
1755 # (usually alter its state or content)
1757 # (usually alter its state or content)
1756 flags = self.flags(rev)
1758 flags = self.flags(rev)
1757
1759
1758 if validated and flags == REVIDX_DEFAULT_FLAGS:
1760 if validated and flags == REVIDX_DEFAULT_FLAGS:
1759 # no extra flags set, no flag processor runs, text = rawtext
1761 # no extra flags set, no flag processor runs, text = rawtext
1760 return rawtext, {}
1762 return rawtext, {}
1761
1763
1762 sidedata = {}
1764 sidedata = {}
1763 if raw:
1765 if raw:
1764 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1766 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1765 text = rawtext
1767 text = rawtext
1766 else:
1768 else:
1767 try:
1769 try:
1768 r = flagutil.processflagsread(self, rawtext, flags)
1770 r = flagutil.processflagsread(self, rawtext, flags)
1769 except error.SidedataHashError as exc:
1771 except error.SidedataHashError as exc:
1770 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1772 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1771 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1773 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1772 raise error.RevlogError(msg)
1774 raise error.RevlogError(msg)
1773 text, validatehash, sidedata = r
1775 text, validatehash, sidedata = r
1774 if validatehash:
1776 if validatehash:
1775 self.checkhash(text, node, rev=rev)
1777 self.checkhash(text, node, rev=rev)
1776 if not validated:
1778 if not validated:
1777 self._revisioncache = (node, rev, rawtext)
1779 self._revisioncache = (node, rev, rawtext)
1778
1780
1779 return text, sidedata
1781 return text, sidedata
1780
1782
1781 def _rawtext(self, node, rev, _df=None):
1783 def _rawtext(self, node, rev, _df=None):
1782 """return the possibly unvalidated rawtext for a revision
1784 """return the possibly unvalidated rawtext for a revision
1783
1785
1784 returns (rev, rawtext, validated)
1786 returns (rev, rawtext, validated)
1785 """
1787 """
1786
1788
1787 # revision in the cache (could be useful to apply delta)
1789 # revision in the cache (could be useful to apply delta)
1788 cachedrev = None
1790 cachedrev = None
1789 # An intermediate text to apply deltas to
1791 # An intermediate text to apply deltas to
1790 basetext = None
1792 basetext = None
1791
1793
1792 # Check if we have the entry in cache
1794 # Check if we have the entry in cache
1793 # The cache entry looks like (node, rev, rawtext)
1795 # The cache entry looks like (node, rev, rawtext)
1794 if self._revisioncache:
1796 if self._revisioncache:
1795 if self._revisioncache[0] == node:
1797 if self._revisioncache[0] == node:
1796 return (rev, self._revisioncache[2], True)
1798 return (rev, self._revisioncache[2], True)
1797 cachedrev = self._revisioncache[1]
1799 cachedrev = self._revisioncache[1]
1798
1800
1799 if rev is None:
1801 if rev is None:
1800 rev = self.rev(node)
1802 rev = self.rev(node)
1801
1803
1802 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1804 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1803 if stopped:
1805 if stopped:
1804 basetext = self._revisioncache[2]
1806 basetext = self._revisioncache[2]
1805
1807
1806 # drop cache to save memory, the caller is expected to
1808 # drop cache to save memory, the caller is expected to
1807 # update self._revisioncache after validating the text
1809 # update self._revisioncache after validating the text
1808 self._revisioncache = None
1810 self._revisioncache = None
1809
1811
1810 targetsize = None
1812 targetsize = None
1811 rawsize = self.index[rev][2]
1813 rawsize = self.index[rev][2]
1812 if 0 <= rawsize:
1814 if 0 <= rawsize:
1813 targetsize = 4 * rawsize
1815 targetsize = 4 * rawsize
1814
1816
1815 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1817 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1816 if basetext is None:
1818 if basetext is None:
1817 basetext = bytes(bins[0])
1819 basetext = bytes(bins[0])
1818 bins = bins[1:]
1820 bins = bins[1:]
1819
1821
1820 rawtext = mdiff.patches(basetext, bins)
1822 rawtext = mdiff.patches(basetext, bins)
1821 del basetext # let us have a chance to free memory early
1823 del basetext # let us have a chance to free memory early
1822 return (rev, rawtext, False)
1824 return (rev, rawtext, False)
1823
1825
1824 def rawdata(self, nodeorrev, _df=None):
1826 def rawdata(self, nodeorrev, _df=None):
1825 """return an uncompressed raw data of a given node or revision number.
1827 """return an uncompressed raw data of a given node or revision number.
1826
1828
1827 _df - an existing file handle to read from. (internal-only)
1829 _df - an existing file handle to read from. (internal-only)
1828 """
1830 """
1829 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1831 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1830
1832
1831 def hash(self, text, p1, p2):
1833 def hash(self, text, p1, p2):
1832 """Compute a node hash.
1834 """Compute a node hash.
1833
1835
1834 Available as a function so that subclasses can replace the hash
1836 Available as a function so that subclasses can replace the hash
1835 as needed.
1837 as needed.
1836 """
1838 """
1837 return storageutil.hashrevisionsha1(text, p1, p2)
1839 return storageutil.hashrevisionsha1(text, p1, p2)
1838
1840
1839 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1841 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1840 """Check node hash integrity.
1842 """Check node hash integrity.
1841
1843
1842 Available as a function so that subclasses can extend hash mismatch
1844 Available as a function so that subclasses can extend hash mismatch
1843 behaviors as needed.
1845 behaviors as needed.
1844 """
1846 """
1845 try:
1847 try:
1846 if p1 is None and p2 is None:
1848 if p1 is None and p2 is None:
1847 p1, p2 = self.parents(node)
1849 p1, p2 = self.parents(node)
1848 if node != self.hash(text, p1, p2):
1850 if node != self.hash(text, p1, p2):
1849 # Clear the revision cache on hash failure. The revision cache
1851 # Clear the revision cache on hash failure. The revision cache
1850 # only stores the raw revision and clearing the cache does have
1852 # only stores the raw revision and clearing the cache does have
1851 # the side-effect that we won't have a cache hit when the raw
1853 # the side-effect that we won't have a cache hit when the raw
1852 # revision data is accessed. But this case should be rare and
1854 # revision data is accessed. But this case should be rare and
1853 # it is extra work to teach the cache about the hash
1855 # it is extra work to teach the cache about the hash
1854 # verification state.
1856 # verification state.
1855 if self._revisioncache and self._revisioncache[0] == node:
1857 if self._revisioncache and self._revisioncache[0] == node:
1856 self._revisioncache = None
1858 self._revisioncache = None
1857
1859
1858 revornode = rev
1860 revornode = rev
1859 if revornode is None:
1861 if revornode is None:
1860 revornode = templatefilters.short(hex(node))
1862 revornode = templatefilters.short(hex(node))
1861 raise error.RevlogError(
1863 raise error.RevlogError(
1862 _(b"integrity check failed on %s:%s")
1864 _(b"integrity check failed on %s:%s")
1863 % (self.indexfile, pycompat.bytestr(revornode))
1865 % (self.indexfile, pycompat.bytestr(revornode))
1864 )
1866 )
1865 except error.RevlogError:
1867 except error.RevlogError:
1866 if self._censorable and storageutil.iscensoredtext(text):
1868 if self._censorable and storageutil.iscensoredtext(text):
1867 raise error.CensoredNodeError(self.indexfile, node, text)
1869 raise error.CensoredNodeError(self.indexfile, node, text)
1868 raise
1870 raise
1869
1871
1870 def _enforceinlinesize(self, tr, fp=None):
1872 def _enforceinlinesize(self, tr, fp=None):
1871 """Check if the revlog is too big for inline and convert if so.
1873 """Check if the revlog is too big for inline and convert if so.
1872
1874
1873 This should be called after revisions are added to the revlog. If the
1875 This should be called after revisions are added to the revlog. If the
1874 revlog has grown too large to be an inline revlog, it will convert it
1876 revlog has grown too large to be an inline revlog, it will convert it
1875 to use multiple index and data files.
1877 to use multiple index and data files.
1876 """
1878 """
1877 tiprev = len(self) - 1
1879 tiprev = len(self) - 1
1878 if (
1880 if (
1879 not self._inline
1881 not self._inline
1880 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1882 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1881 ):
1883 ):
1882 return
1884 return
1883
1885
1884 trinfo = tr.find(self.indexfile)
1886 trinfo = tr.find(self.indexfile)
1885 if trinfo is None:
1887 if trinfo is None:
1886 raise error.RevlogError(
1888 raise error.RevlogError(
1887 _(b"%s not found in the transaction") % self.indexfile
1889 _(b"%s not found in the transaction") % self.indexfile
1888 )
1890 )
1889
1891
1890 trindex = trinfo[2]
1892 trindex = trinfo[2]
1891 if trindex is not None:
1893 if trindex is not None:
1892 dataoff = self.start(trindex)
1894 dataoff = self.start(trindex)
1893 else:
1895 else:
1894 # revlog was stripped at start of transaction, use all leftover data
1896 # revlog was stripped at start of transaction, use all leftover data
1895 trindex = len(self) - 1
1897 trindex = len(self) - 1
1896 dataoff = self.end(tiprev)
1898 dataoff = self.end(tiprev)
1897
1899
1898 tr.add(self.datafile, dataoff)
1900 tr.add(self.datafile, dataoff)
1899
1901
1900 if fp:
1902 if fp:
1901 fp.flush()
1903 fp.flush()
1902 fp.close()
1904 fp.close()
1903 # We can't use the cached file handle after close(). So prevent
1905 # We can't use the cached file handle after close(). So prevent
1904 # its usage.
1906 # its usage.
1905 self._writinghandles = None
1907 self._writinghandles = None
1906
1908
1907 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1909 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1908 for r in self:
1910 for r in self:
1909 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1911 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1910
1912
1911 with self._indexfp(b'w') as fp:
1913 with self._indexfp(b'w') as fp:
1912 self.version &= ~FLAG_INLINE_DATA
1914 self.version &= ~FLAG_INLINE_DATA
1913 self._inline = False
1915 self._inline = False
1914 io = self._io
1916 io = self._io
1915 for i in self:
1917 for i in self:
1916 e = io.packentry(self.index[i], self.node, self.version, i)
1918 e = io.packentry(self.index[i], self.node, self.version, i)
1917 fp.write(e)
1919 fp.write(e)
1918
1920
1919 # the temp file replace the real index when we exit the context
1921 # the temp file replace the real index when we exit the context
1920 # manager
1922 # manager
1921
1923
1922 tr.replace(self.indexfile, trindex * self._io.size)
1924 tr.replace(self.indexfile, trindex * self._io.size)
1923 self._chunkclear()
1925 self._chunkclear()
1924
1926
1925 def _nodeduplicatecallback(self, transaction, node):
1927 def _nodeduplicatecallback(self, transaction, node):
1926 """called when trying to add a node already stored.
1928 """called when trying to add a node already stored.
1927 """
1929 """
1928
1930
1929 def addrevision(
1931 def addrevision(
1930 self,
1932 self,
1931 text,
1933 text,
1932 transaction,
1934 transaction,
1933 link,
1935 link,
1934 p1,
1936 p1,
1935 p2,
1937 p2,
1936 cachedelta=None,
1938 cachedelta=None,
1937 node=None,
1939 node=None,
1938 flags=REVIDX_DEFAULT_FLAGS,
1940 flags=REVIDX_DEFAULT_FLAGS,
1939 deltacomputer=None,
1941 deltacomputer=None,
1940 sidedata=None,
1942 sidedata=None,
1941 ):
1943 ):
1942 """add a revision to the log
1944 """add a revision to the log
1943
1945
1944 text - the revision data to add
1946 text - the revision data to add
1945 transaction - the transaction object used for rollback
1947 transaction - the transaction object used for rollback
1946 link - the linkrev data to add
1948 link - the linkrev data to add
1947 p1, p2 - the parent nodeids of the revision
1949 p1, p2 - the parent nodeids of the revision
1948 cachedelta - an optional precomputed delta
1950 cachedelta - an optional precomputed delta
1949 node - nodeid of revision; typically node is not specified, and it is
1951 node - nodeid of revision; typically node is not specified, and it is
1950 computed by default as hash(text, p1, p2), however subclasses might
1952 computed by default as hash(text, p1, p2), however subclasses might
1951 use different hashing method (and override checkhash() in such case)
1953 use different hashing method (and override checkhash() in such case)
1952 flags - the known flags to set on the revision
1954 flags - the known flags to set on the revision
1953 deltacomputer - an optional deltacomputer instance shared between
1955 deltacomputer - an optional deltacomputer instance shared between
1954 multiple calls
1956 multiple calls
1955 """
1957 """
1956 if link == nullrev:
1958 if link == nullrev:
1957 raise error.RevlogError(
1959 raise error.RevlogError(
1958 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1960 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1959 )
1961 )
1960
1962
1961 if sidedata is None:
1963 if sidedata is None:
1962 sidedata = {}
1964 sidedata = {}
1963 flags = flags & ~REVIDX_SIDEDATA
1965 flags = flags & ~REVIDX_SIDEDATA
1964 elif not self.hassidedata:
1966 elif not self.hassidedata:
1965 raise error.ProgrammingError(
1967 raise error.ProgrammingError(
1966 _(b"trying to add sidedata to a revlog who don't support them")
1968 _(b"trying to add sidedata to a revlog who don't support them")
1967 )
1969 )
1968 else:
1970 else:
1969 flags |= REVIDX_SIDEDATA
1971 flags |= REVIDX_SIDEDATA
1970
1972
1971 if flags:
1973 if flags:
1972 node = node or self.hash(text, p1, p2)
1974 node = node or self.hash(text, p1, p2)
1973
1975
1974 rawtext, validatehash = flagutil.processflagswrite(
1976 rawtext, validatehash = flagutil.processflagswrite(
1975 self, text, flags, sidedata=sidedata
1977 self, text, flags, sidedata=sidedata
1976 )
1978 )
1977
1979
1978 # If the flag processor modifies the revision data, ignore any provided
1980 # If the flag processor modifies the revision data, ignore any provided
1979 # cachedelta.
1981 # cachedelta.
1980 if rawtext != text:
1982 if rawtext != text:
1981 cachedelta = None
1983 cachedelta = None
1982
1984
1983 if len(rawtext) > _maxentrysize:
1985 if len(rawtext) > _maxentrysize:
1984 raise error.RevlogError(
1986 raise error.RevlogError(
1985 _(
1987 _(
1986 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1988 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1987 )
1989 )
1988 % (self.indexfile, len(rawtext))
1990 % (self.indexfile, len(rawtext))
1989 )
1991 )
1990
1992
1991 node = node or self.hash(rawtext, p1, p2)
1993 node = node or self.hash(rawtext, p1, p2)
1992 if node in self.nodemap:
1994 if node in self.nodemap:
1993 return node
1995 return node
1994
1996
1995 if validatehash:
1997 if validatehash:
1996 self.checkhash(rawtext, node, p1=p1, p2=p2)
1998 self.checkhash(rawtext, node, p1=p1, p2=p2)
1997
1999
1998 return self.addrawrevision(
2000 return self.addrawrevision(
1999 rawtext,
2001 rawtext,
2000 transaction,
2002 transaction,
2001 link,
2003 link,
2002 p1,
2004 p1,
2003 p2,
2005 p2,
2004 node,
2006 node,
2005 flags,
2007 flags,
2006 cachedelta=cachedelta,
2008 cachedelta=cachedelta,
2007 deltacomputer=deltacomputer,
2009 deltacomputer=deltacomputer,
2008 )
2010 )
2009
2011
2010 def addrawrevision(
2012 def addrawrevision(
2011 self,
2013 self,
2012 rawtext,
2014 rawtext,
2013 transaction,
2015 transaction,
2014 link,
2016 link,
2015 p1,
2017 p1,
2016 p2,
2018 p2,
2017 node,
2019 node,
2018 flags,
2020 flags,
2019 cachedelta=None,
2021 cachedelta=None,
2020 deltacomputer=None,
2022 deltacomputer=None,
2021 ):
2023 ):
2022 """add a raw revision with known flags, node and parents
2024 """add a raw revision with known flags, node and parents
2023 useful when reusing a revision not stored in this revlog (ex: received
2025 useful when reusing a revision not stored in this revlog (ex: received
2024 over wire, or read from an external bundle).
2026 over wire, or read from an external bundle).
2025 """
2027 """
2026 dfh = None
2028 dfh = None
2027 if not self._inline:
2029 if not self._inline:
2028 dfh = self._datafp(b"a+")
2030 dfh = self._datafp(b"a+")
2029 ifh = self._indexfp(b"a+")
2031 ifh = self._indexfp(b"a+")
2030 try:
2032 try:
2031 return self._addrevision(
2033 return self._addrevision(
2032 node,
2034 node,
2033 rawtext,
2035 rawtext,
2034 transaction,
2036 transaction,
2035 link,
2037 link,
2036 p1,
2038 p1,
2037 p2,
2039 p2,
2038 flags,
2040 flags,
2039 cachedelta,
2041 cachedelta,
2040 ifh,
2042 ifh,
2041 dfh,
2043 dfh,
2042 deltacomputer=deltacomputer,
2044 deltacomputer=deltacomputer,
2043 )
2045 )
2044 finally:
2046 finally:
2045 if dfh:
2047 if dfh:
2046 dfh.close()
2048 dfh.close()
2047 ifh.close()
2049 ifh.close()
2048
2050
2049 def compress(self, data):
2051 def compress(self, data):
2050 """Generate a possibly-compressed representation of data."""
2052 """Generate a possibly-compressed representation of data."""
2051 if not data:
2053 if not data:
2052 return b'', data
2054 return b'', data
2053
2055
2054 compressed = self._compressor.compress(data)
2056 compressed = self._compressor.compress(data)
2055
2057
2056 if compressed:
2058 if compressed:
2057 # The revlog compressor added the header in the returned data.
2059 # The revlog compressor added the header in the returned data.
2058 return b'', compressed
2060 return b'', compressed
2059
2061
2060 if data[0:1] == b'\0':
2062 if data[0:1] == b'\0':
2061 return b'', data
2063 return b'', data
2062 return b'u', data
2064 return b'u', data
2063
2065
2064 def decompress(self, data):
2066 def decompress(self, data):
2065 """Decompress a revlog chunk.
2067 """Decompress a revlog chunk.
2066
2068
2067 The chunk is expected to begin with a header identifying the
2069 The chunk is expected to begin with a header identifying the
2068 format type so it can be routed to an appropriate decompressor.
2070 format type so it can be routed to an appropriate decompressor.
2069 """
2071 """
2070 if not data:
2072 if not data:
2071 return data
2073 return data
2072
2074
2073 # Revlogs are read much more frequently than they are written and many
2075 # Revlogs are read much more frequently than they are written and many
2074 # chunks only take microseconds to decompress, so performance is
2076 # chunks only take microseconds to decompress, so performance is
2075 # important here.
2077 # important here.
2076 #
2078 #
2077 # We can make a few assumptions about revlogs:
2079 # We can make a few assumptions about revlogs:
2078 #
2080 #
2079 # 1) the majority of chunks will be compressed (as opposed to inline
2081 # 1) the majority of chunks will be compressed (as opposed to inline
2080 # raw data).
2082 # raw data).
2081 # 2) decompressing *any* data will likely by at least 10x slower than
2083 # 2) decompressing *any* data will likely by at least 10x slower than
2082 # returning raw inline data.
2084 # returning raw inline data.
2083 # 3) we want to prioritize common and officially supported compression
2085 # 3) we want to prioritize common and officially supported compression
2084 # engines
2086 # engines
2085 #
2087 #
2086 # It follows that we want to optimize for "decompress compressed data
2088 # It follows that we want to optimize for "decompress compressed data
2087 # when encoded with common and officially supported compression engines"
2089 # when encoded with common and officially supported compression engines"
2088 # case over "raw data" and "data encoded by less common or non-official
2090 # case over "raw data" and "data encoded by less common or non-official
2089 # compression engines." That is why we have the inline lookup first
2091 # compression engines." That is why we have the inline lookup first
2090 # followed by the compengines lookup.
2092 # followed by the compengines lookup.
2091 #
2093 #
2092 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2094 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2093 # compressed chunks. And this matters for changelog and manifest reads.
2095 # compressed chunks. And this matters for changelog and manifest reads.
2094 t = data[0:1]
2096 t = data[0:1]
2095
2097
2096 if t == b'x':
2098 if t == b'x':
2097 try:
2099 try:
2098 return _zlibdecompress(data)
2100 return _zlibdecompress(data)
2099 except zlib.error as e:
2101 except zlib.error as e:
2100 raise error.RevlogError(
2102 raise error.RevlogError(
2101 _(b'revlog decompress error: %s')
2103 _(b'revlog decompress error: %s')
2102 % stringutil.forcebytestr(e)
2104 % stringutil.forcebytestr(e)
2103 )
2105 )
2104 # '\0' is more common than 'u' so it goes first.
2106 # '\0' is more common than 'u' so it goes first.
2105 elif t == b'\0':
2107 elif t == b'\0':
2106 return data
2108 return data
2107 elif t == b'u':
2109 elif t == b'u':
2108 return util.buffer(data, 1)
2110 return util.buffer(data, 1)
2109
2111
2110 try:
2112 try:
2111 compressor = self._decompressors[t]
2113 compressor = self._decompressors[t]
2112 except KeyError:
2114 except KeyError:
2113 try:
2115 try:
2114 engine = util.compengines.forrevlogheader(t)
2116 engine = util.compengines.forrevlogheader(t)
2115 compressor = engine.revlogcompressor(self._compengineopts)
2117 compressor = engine.revlogcompressor(self._compengineopts)
2116 self._decompressors[t] = compressor
2118 self._decompressors[t] = compressor
2117 except KeyError:
2119 except KeyError:
2118 raise error.RevlogError(_(b'unknown compression type %r') % t)
2120 raise error.RevlogError(_(b'unknown compression type %r') % t)
2119
2121
2120 return compressor.decompress(data)
2122 return compressor.decompress(data)
2121
2123
2122 def _addrevision(
2124 def _addrevision(
2123 self,
2125 self,
2124 node,
2126 node,
2125 rawtext,
2127 rawtext,
2126 transaction,
2128 transaction,
2127 link,
2129 link,
2128 p1,
2130 p1,
2129 p2,
2131 p2,
2130 flags,
2132 flags,
2131 cachedelta,
2133 cachedelta,
2132 ifh,
2134 ifh,
2133 dfh,
2135 dfh,
2134 alwayscache=False,
2136 alwayscache=False,
2135 deltacomputer=None,
2137 deltacomputer=None,
2136 ):
2138 ):
2137 """internal function to add revisions to the log
2139 """internal function to add revisions to the log
2138
2140
2139 see addrevision for argument descriptions.
2141 see addrevision for argument descriptions.
2140
2142
2141 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2142
2144
2143 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2144 be used.
2146 be used.
2145
2147
2146 invariants:
2148 invariants:
2147 - rawtext is optional (can be None); if not set, cachedelta must be set.
2149 - rawtext is optional (can be None); if not set, cachedelta must be set.
2148 if both are set, they must correspond to each other.
2150 if both are set, they must correspond to each other.
2149 """
2151 """
2150 if node == nullid:
2152 if node == nullid:
2151 raise error.RevlogError(
2153 raise error.RevlogError(
2152 _(b"%s: attempt to add null revision") % self.indexfile
2154 _(b"%s: attempt to add null revision") % self.indexfile
2153 )
2155 )
2154 if node == wdirid or node in wdirfilenodeids:
2156 if node == wdirid or node in wdirfilenodeids:
2155 raise error.RevlogError(
2157 raise error.RevlogError(
2156 _(b"%s: attempt to add wdir revision") % self.indexfile
2158 _(b"%s: attempt to add wdir revision") % self.indexfile
2157 )
2159 )
2158
2160
2159 if self._inline:
2161 if self._inline:
2160 fh = ifh
2162 fh = ifh
2161 else:
2163 else:
2162 fh = dfh
2164 fh = dfh
2163
2165
2164 btext = [rawtext]
2166 btext = [rawtext]
2165
2167
2166 curr = len(self)
2168 curr = len(self)
2167 prev = curr - 1
2169 prev = curr - 1
2168 offset = self.end(prev)
2170 offset = self.end(prev)
2169 p1r, p2r = self.rev(p1), self.rev(p2)
2171 p1r, p2r = self.rev(p1), self.rev(p2)
2170
2172
2171 # full versions are inserted when the needed deltas
2173 # full versions are inserted when the needed deltas
2172 # become comparable to the uncompressed text
2174 # become comparable to the uncompressed text
2173 if rawtext is None:
2175 if rawtext is None:
2174 # need rawtext size, before changed by flag processors, which is
2176 # need rawtext size, before changed by flag processors, which is
2175 # the non-raw size. use revlog explicitly to avoid filelog's extra
2177 # the non-raw size. use revlog explicitly to avoid filelog's extra
2176 # logic that might remove metadata size.
2178 # logic that might remove metadata size.
2177 textlen = mdiff.patchedsize(
2179 textlen = mdiff.patchedsize(
2178 revlog.size(self, cachedelta[0]), cachedelta[1]
2180 revlog.size(self, cachedelta[0]), cachedelta[1]
2179 )
2181 )
2180 else:
2182 else:
2181 textlen = len(rawtext)
2183 textlen = len(rawtext)
2182
2184
2183 if deltacomputer is None:
2185 if deltacomputer is None:
2184 deltacomputer = deltautil.deltacomputer(self)
2186 deltacomputer = deltautil.deltacomputer(self)
2185
2187
2186 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2188 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2187
2189
2188 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2190 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2189
2191
2190 e = (
2192 e = (
2191 offset_type(offset, flags),
2193 offset_type(offset, flags),
2192 deltainfo.deltalen,
2194 deltainfo.deltalen,
2193 textlen,
2195 textlen,
2194 deltainfo.base,
2196 deltainfo.base,
2195 link,
2197 link,
2196 p1r,
2198 p1r,
2197 p2r,
2199 p2r,
2198 node,
2200 node,
2199 )
2201 )
2200 self.index.append(e)
2202 self.index.append(e)
2201 self.nodemap[node] = curr
2203 self.nodemap[node] = curr
2202
2204
2203 # Reset the pure node cache start lookup offset to account for new
2205 # Reset the pure node cache start lookup offset to account for new
2204 # revision.
2206 # revision.
2205 if self._nodepos is not None:
2207 if self._nodepos is not None:
2206 self._nodepos = curr
2208 self._nodepos = curr
2207
2209
2208 entry = self._io.packentry(e, self.node, self.version, curr)
2210 entry = self._io.packentry(e, self.node, self.version, curr)
2209 self._writeentry(
2211 self._writeentry(
2210 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2212 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2211 )
2213 )
2212
2214
2213 rawtext = btext[0]
2215 rawtext = btext[0]
2214
2216
2215 if alwayscache and rawtext is None:
2217 if alwayscache and rawtext is None:
2216 rawtext = deltacomputer.buildtext(revinfo, fh)
2218 rawtext = deltacomputer.buildtext(revinfo, fh)
2217
2219
2218 if type(rawtext) == bytes: # only accept immutable objects
2220 if type(rawtext) == bytes: # only accept immutable objects
2219 self._revisioncache = (node, curr, rawtext)
2221 self._revisioncache = (node, curr, rawtext)
2220 self._chainbasecache[curr] = deltainfo.chainbase
2222 self._chainbasecache[curr] = deltainfo.chainbase
2221 return node
2223 return node
2222
2224
2223 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2225 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2224 # Files opened in a+ mode have inconsistent behavior on various
2226 # Files opened in a+ mode have inconsistent behavior on various
2225 # platforms. Windows requires that a file positioning call be made
2227 # platforms. Windows requires that a file positioning call be made
2226 # when the file handle transitions between reads and writes. See
2228 # when the file handle transitions between reads and writes. See
2227 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2229 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2228 # platforms, Python or the platform itself can be buggy. Some versions
2230 # platforms, Python or the platform itself can be buggy. Some versions
2229 # of Solaris have been observed to not append at the end of the file
2231 # of Solaris have been observed to not append at the end of the file
2230 # if the file was seeked to before the end. See issue4943 for more.
2232 # if the file was seeked to before the end. See issue4943 for more.
2231 #
2233 #
2232 # We work around this issue by inserting a seek() before writing.
2234 # We work around this issue by inserting a seek() before writing.
2233 # Note: This is likely not necessary on Python 3. However, because
2235 # Note: This is likely not necessary on Python 3. However, because
2234 # the file handle is reused for reads and may be seeked there, we need
2236 # the file handle is reused for reads and may be seeked there, we need
2235 # to be careful before changing this.
2237 # to be careful before changing this.
2236 ifh.seek(0, os.SEEK_END)
2238 ifh.seek(0, os.SEEK_END)
2237 if dfh:
2239 if dfh:
2238 dfh.seek(0, os.SEEK_END)
2240 dfh.seek(0, os.SEEK_END)
2239
2241
2240 curr = len(self) - 1
2242 curr = len(self) - 1
2241 if not self._inline:
2243 if not self._inline:
2242 transaction.add(self.datafile, offset)
2244 transaction.add(self.datafile, offset)
2243 transaction.add(self.indexfile, curr * len(entry))
2245 transaction.add(self.indexfile, curr * len(entry))
2244 if data[0]:
2246 if data[0]:
2245 dfh.write(data[0])
2247 dfh.write(data[0])
2246 dfh.write(data[1])
2248 dfh.write(data[1])
2247 ifh.write(entry)
2249 ifh.write(entry)
2248 else:
2250 else:
2249 offset += curr * self._io.size
2251 offset += curr * self._io.size
2250 transaction.add(self.indexfile, offset, curr)
2252 transaction.add(self.indexfile, offset, curr)
2251 ifh.write(entry)
2253 ifh.write(entry)
2252 ifh.write(data[0])
2254 ifh.write(data[0])
2253 ifh.write(data[1])
2255 ifh.write(data[1])
2254 self._enforceinlinesize(transaction, ifh)
2256 self._enforceinlinesize(transaction, ifh)
2255
2257
2256 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2258 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2257 """
2259 """
2258 add a delta group
2260 add a delta group
2259
2261
2260 given a set of deltas, add them to the revision log. the
2262 given a set of deltas, add them to the revision log. the
2261 first delta is against its parent, which should be in our
2263 first delta is against its parent, which should be in our
2262 log, the rest are against the previous delta.
2264 log, the rest are against the previous delta.
2263
2265
2264 If ``addrevisioncb`` is defined, it will be called with arguments of
2266 If ``addrevisioncb`` is defined, it will be called with arguments of
2265 this revlog and the node that was added.
2267 this revlog and the node that was added.
2266 """
2268 """
2267
2269
2268 if self._writinghandles:
2270 if self._writinghandles:
2269 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2271 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2270
2272
2271 nodes = []
2273 nodes = []
2272
2274
2273 r = len(self)
2275 r = len(self)
2274 end = 0
2276 end = 0
2275 if r:
2277 if r:
2276 end = self.end(r - 1)
2278 end = self.end(r - 1)
2277 ifh = self._indexfp(b"a+")
2279 ifh = self._indexfp(b"a+")
2278 isize = r * self._io.size
2280 isize = r * self._io.size
2279 if self._inline:
2281 if self._inline:
2280 transaction.add(self.indexfile, end + isize, r)
2282 transaction.add(self.indexfile, end + isize, r)
2281 dfh = None
2283 dfh = None
2282 else:
2284 else:
2283 transaction.add(self.indexfile, isize, r)
2285 transaction.add(self.indexfile, isize, r)
2284 transaction.add(self.datafile, end)
2286 transaction.add(self.datafile, end)
2285 dfh = self._datafp(b"a+")
2287 dfh = self._datafp(b"a+")
2286
2288
2287 def flush():
2289 def flush():
2288 if dfh:
2290 if dfh:
2289 dfh.flush()
2291 dfh.flush()
2290 ifh.flush()
2292 ifh.flush()
2291
2293
2292 self._writinghandles = (ifh, dfh)
2294 self._writinghandles = (ifh, dfh)
2293
2295
2294 try:
2296 try:
2295 deltacomputer = deltautil.deltacomputer(self)
2297 deltacomputer = deltautil.deltacomputer(self)
2296 # loop through our set of deltas
2298 # loop through our set of deltas
2297 for data in deltas:
2299 for data in deltas:
2298 node, p1, p2, linknode, deltabase, delta, flags = data
2300 node, p1, p2, linknode, deltabase, delta, flags = data
2299 link = linkmapper(linknode)
2301 link = linkmapper(linknode)
2300 flags = flags or REVIDX_DEFAULT_FLAGS
2302 flags = flags or REVIDX_DEFAULT_FLAGS
2301
2303
2302 nodes.append(node)
2304 nodes.append(node)
2303
2305
2304 if node in self.nodemap:
2306 if node in self.nodemap:
2305 self._nodeduplicatecallback(transaction, node)
2307 self._nodeduplicatecallback(transaction, node)
2306 # this can happen if two branches make the same change
2308 # this can happen if two branches make the same change
2307 continue
2309 continue
2308
2310
2309 for p in (p1, p2):
2311 for p in (p1, p2):
2310 if p not in self.nodemap:
2312 if p not in self.nodemap:
2311 raise error.LookupError(
2313 raise error.LookupError(
2312 p, self.indexfile, _(b'unknown parent')
2314 p, self.indexfile, _(b'unknown parent')
2313 )
2315 )
2314
2316
2315 if deltabase not in self.nodemap:
2317 if deltabase not in self.nodemap:
2316 raise error.LookupError(
2318 raise error.LookupError(
2317 deltabase, self.indexfile, _(b'unknown delta base')
2319 deltabase, self.indexfile, _(b'unknown delta base')
2318 )
2320 )
2319
2321
2320 baserev = self.rev(deltabase)
2322 baserev = self.rev(deltabase)
2321
2323
2322 if baserev != nullrev and self.iscensored(baserev):
2324 if baserev != nullrev and self.iscensored(baserev):
2323 # if base is censored, delta must be full replacement in a
2325 # if base is censored, delta must be full replacement in a
2324 # single patch operation
2326 # single patch operation
2325 hlen = struct.calcsize(b">lll")
2327 hlen = struct.calcsize(b">lll")
2326 oldlen = self.rawsize(baserev)
2328 oldlen = self.rawsize(baserev)
2327 newlen = len(delta) - hlen
2329 newlen = len(delta) - hlen
2328 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2330 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2329 raise error.CensoredBaseError(
2331 raise error.CensoredBaseError(
2330 self.indexfile, self.node(baserev)
2332 self.indexfile, self.node(baserev)
2331 )
2333 )
2332
2334
2333 if not flags and self._peek_iscensored(baserev, delta, flush):
2335 if not flags and self._peek_iscensored(baserev, delta, flush):
2334 flags |= REVIDX_ISCENSORED
2336 flags |= REVIDX_ISCENSORED
2335
2337
2336 # We assume consumers of addrevisioncb will want to retrieve
2338 # We assume consumers of addrevisioncb will want to retrieve
2337 # the added revision, which will require a call to
2339 # the added revision, which will require a call to
2338 # revision(). revision() will fast path if there is a cache
2340 # revision(). revision() will fast path if there is a cache
2339 # hit. So, we tell _addrevision() to always cache in this case.
2341 # hit. So, we tell _addrevision() to always cache in this case.
2340 # We're only using addgroup() in the context of changegroup
2342 # We're only using addgroup() in the context of changegroup
2341 # generation so the revision data can always be handled as raw
2343 # generation so the revision data can always be handled as raw
2342 # by the flagprocessor.
2344 # by the flagprocessor.
2343 self._addrevision(
2345 self._addrevision(
2344 node,
2346 node,
2345 None,
2347 None,
2346 transaction,
2348 transaction,
2347 link,
2349 link,
2348 p1,
2350 p1,
2349 p2,
2351 p2,
2350 flags,
2352 flags,
2351 (baserev, delta),
2353 (baserev, delta),
2352 ifh,
2354 ifh,
2353 dfh,
2355 dfh,
2354 alwayscache=bool(addrevisioncb),
2356 alwayscache=bool(addrevisioncb),
2355 deltacomputer=deltacomputer,
2357 deltacomputer=deltacomputer,
2356 )
2358 )
2357
2359
2358 if addrevisioncb:
2360 if addrevisioncb:
2359 addrevisioncb(self, node)
2361 addrevisioncb(self, node)
2360
2362
2361 if not dfh and not self._inline:
2363 if not dfh and not self._inline:
2362 # addrevision switched from inline to conventional
2364 # addrevision switched from inline to conventional
2363 # reopen the index
2365 # reopen the index
2364 ifh.close()
2366 ifh.close()
2365 dfh = self._datafp(b"a+")
2367 dfh = self._datafp(b"a+")
2366 ifh = self._indexfp(b"a+")
2368 ifh = self._indexfp(b"a+")
2367 self._writinghandles = (ifh, dfh)
2369 self._writinghandles = (ifh, dfh)
2368 finally:
2370 finally:
2369 self._writinghandles = None
2371 self._writinghandles = None
2370
2372
2371 if dfh:
2373 if dfh:
2372 dfh.close()
2374 dfh.close()
2373 ifh.close()
2375 ifh.close()
2374
2376
2375 return nodes
2377 return nodes
2376
2378
2377 def iscensored(self, rev):
2379 def iscensored(self, rev):
2378 """Check if a file revision is censored."""
2380 """Check if a file revision is censored."""
2379 if not self._censorable:
2381 if not self._censorable:
2380 return False
2382 return False
2381
2383
2382 return self.flags(rev) & REVIDX_ISCENSORED
2384 return self.flags(rev) & REVIDX_ISCENSORED
2383
2385
2384 def _peek_iscensored(self, baserev, delta, flush):
2386 def _peek_iscensored(self, baserev, delta, flush):
2385 """Quickly check if a delta produces a censored revision."""
2387 """Quickly check if a delta produces a censored revision."""
2386 if not self._censorable:
2388 if not self._censorable:
2387 return False
2389 return False
2388
2390
2389 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2391 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2390
2392
2391 def getstrippoint(self, minlink):
2393 def getstrippoint(self, minlink):
2392 """find the minimum rev that must be stripped to strip the linkrev
2394 """find the minimum rev that must be stripped to strip the linkrev
2393
2395
2394 Returns a tuple containing the minimum rev and a set of all revs that
2396 Returns a tuple containing the minimum rev and a set of all revs that
2395 have linkrevs that will be broken by this strip.
2397 have linkrevs that will be broken by this strip.
2396 """
2398 """
2397 return storageutil.resolvestripinfo(
2399 return storageutil.resolvestripinfo(
2398 minlink,
2400 minlink,
2399 len(self) - 1,
2401 len(self) - 1,
2400 self.headrevs(),
2402 self.headrevs(),
2401 self.linkrev,
2403 self.linkrev,
2402 self.parentrevs,
2404 self.parentrevs,
2403 )
2405 )
2404
2406
2405 def strip(self, minlink, transaction):
2407 def strip(self, minlink, transaction):
2406 """truncate the revlog on the first revision with a linkrev >= minlink
2408 """truncate the revlog on the first revision with a linkrev >= minlink
2407
2409
2408 This function is called when we're stripping revision minlink and
2410 This function is called when we're stripping revision minlink and
2409 its descendants from the repository.
2411 its descendants from the repository.
2410
2412
2411 We have to remove all revisions with linkrev >= minlink, because
2413 We have to remove all revisions with linkrev >= minlink, because
2412 the equivalent changelog revisions will be renumbered after the
2414 the equivalent changelog revisions will be renumbered after the
2413 strip.
2415 strip.
2414
2416
2415 So we truncate the revlog on the first of these revisions, and
2417 So we truncate the revlog on the first of these revisions, and
2416 trust that the caller has saved the revisions that shouldn't be
2418 trust that the caller has saved the revisions that shouldn't be
2417 removed and that it'll re-add them after this truncation.
2419 removed and that it'll re-add them after this truncation.
2418 """
2420 """
2419 if len(self) == 0:
2421 if len(self) == 0:
2420 return
2422 return
2421
2423
2422 rev, _ = self.getstrippoint(minlink)
2424 rev, _ = self.getstrippoint(minlink)
2423 if rev == len(self):
2425 if rev == len(self):
2424 return
2426 return
2425
2427
2426 # first truncate the files on disk
2428 # first truncate the files on disk
2427 end = self.start(rev)
2429 end = self.start(rev)
2428 if not self._inline:
2430 if not self._inline:
2429 transaction.add(self.datafile, end)
2431 transaction.add(self.datafile, end)
2430 end = rev * self._io.size
2432 end = rev * self._io.size
2431 else:
2433 else:
2432 end += rev * self._io.size
2434 end += rev * self._io.size
2433
2435
2434 transaction.add(self.indexfile, end)
2436 transaction.add(self.indexfile, end)
2435
2437
2436 # then reset internal state in memory to forget those revisions
2438 # then reset internal state in memory to forget those revisions
2437 self._revisioncache = None
2439 self._revisioncache = None
2438 self._chaininfocache = {}
2440 self._chaininfocache = {}
2439 self._chunkclear()
2441 self._chunkclear()
2440 for x in pycompat.xrange(rev, len(self)):
2442 for x in pycompat.xrange(rev, len(self)):
2441 del self.nodemap[self.node(x)]
2443 del self.nodemap[self.node(x)]
2442
2444
2443 del self.index[rev:-1]
2445 del self.index[rev:-1]
2444 self._nodepos = None
2446 self._nodepos = None
2445
2447
2446 def checksize(self):
2448 def checksize(self):
2447 """Check size of index and data files
2449 """Check size of index and data files
2448
2450
2449 return a (dd, di) tuple.
2451 return a (dd, di) tuple.
2450 - dd: extra bytes for the "data" file
2452 - dd: extra bytes for the "data" file
2451 - di: extra bytes for the "index" file
2453 - di: extra bytes for the "index" file
2452
2454
2453 A healthy revlog will return (0, 0).
2455 A healthy revlog will return (0, 0).
2454 """
2456 """
2455 expected = 0
2457 expected = 0
2456 if len(self):
2458 if len(self):
2457 expected = max(0, self.end(len(self) - 1))
2459 expected = max(0, self.end(len(self) - 1))
2458
2460
2459 try:
2461 try:
2460 with self._datafp() as f:
2462 with self._datafp() as f:
2461 f.seek(0, io.SEEK_END)
2463 f.seek(0, io.SEEK_END)
2462 actual = f.tell()
2464 actual = f.tell()
2463 dd = actual - expected
2465 dd = actual - expected
2464 except IOError as inst:
2466 except IOError as inst:
2465 if inst.errno != errno.ENOENT:
2467 if inst.errno != errno.ENOENT:
2466 raise
2468 raise
2467 dd = 0
2469 dd = 0
2468
2470
2469 try:
2471 try:
2470 f = self.opener(self.indexfile)
2472 f = self.opener(self.indexfile)
2471 f.seek(0, io.SEEK_END)
2473 f.seek(0, io.SEEK_END)
2472 actual = f.tell()
2474 actual = f.tell()
2473 f.close()
2475 f.close()
2474 s = self._io.size
2476 s = self._io.size
2475 i = max(0, actual // s)
2477 i = max(0, actual // s)
2476 di = actual - (i * s)
2478 di = actual - (i * s)
2477 if self._inline:
2479 if self._inline:
2478 databytes = 0
2480 databytes = 0
2479 for r in self:
2481 for r in self:
2480 databytes += max(0, self.length(r))
2482 databytes += max(0, self.length(r))
2481 dd = 0
2483 dd = 0
2482 di = actual - len(self) * s - databytes
2484 di = actual - len(self) * s - databytes
2483 except IOError as inst:
2485 except IOError as inst:
2484 if inst.errno != errno.ENOENT:
2486 if inst.errno != errno.ENOENT:
2485 raise
2487 raise
2486 di = 0
2488 di = 0
2487
2489
2488 return (dd, di)
2490 return (dd, di)
2489
2491
2490 def files(self):
2492 def files(self):
2491 res = [self.indexfile]
2493 res = [self.indexfile]
2492 if not self._inline:
2494 if not self._inline:
2493 res.append(self.datafile)
2495 res.append(self.datafile)
2494 return res
2496 return res
2495
2497
2496 def emitrevisions(
2498 def emitrevisions(
2497 self,
2499 self,
2498 nodes,
2500 nodes,
2499 nodesorder=None,
2501 nodesorder=None,
2500 revisiondata=False,
2502 revisiondata=False,
2501 assumehaveparentrevisions=False,
2503 assumehaveparentrevisions=False,
2502 deltamode=repository.CG_DELTAMODE_STD,
2504 deltamode=repository.CG_DELTAMODE_STD,
2503 ):
2505 ):
2504 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2506 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2505 raise error.ProgrammingError(
2507 raise error.ProgrammingError(
2506 b'unhandled value for nodesorder: %s' % nodesorder
2508 b'unhandled value for nodesorder: %s' % nodesorder
2507 )
2509 )
2508
2510
2509 if nodesorder is None and not self._generaldelta:
2511 if nodesorder is None and not self._generaldelta:
2510 nodesorder = b'storage'
2512 nodesorder = b'storage'
2511
2513
2512 if (
2514 if (
2513 not self._storedeltachains
2515 not self._storedeltachains
2514 and deltamode != repository.CG_DELTAMODE_PREV
2516 and deltamode != repository.CG_DELTAMODE_PREV
2515 ):
2517 ):
2516 deltamode = repository.CG_DELTAMODE_FULL
2518 deltamode = repository.CG_DELTAMODE_FULL
2517
2519
2518 return storageutil.emitrevisions(
2520 return storageutil.emitrevisions(
2519 self,
2521 self,
2520 nodes,
2522 nodes,
2521 nodesorder,
2523 nodesorder,
2522 revlogrevisiondelta,
2524 revlogrevisiondelta,
2523 deltaparentfn=self.deltaparent,
2525 deltaparentfn=self.deltaparent,
2524 candeltafn=self.candelta,
2526 candeltafn=self.candelta,
2525 rawsizefn=self.rawsize,
2527 rawsizefn=self.rawsize,
2526 revdifffn=self.revdiff,
2528 revdifffn=self.revdiff,
2527 flagsfn=self.flags,
2529 flagsfn=self.flags,
2528 deltamode=deltamode,
2530 deltamode=deltamode,
2529 revisiondata=revisiondata,
2531 revisiondata=revisiondata,
2530 assumehaveparentrevisions=assumehaveparentrevisions,
2532 assumehaveparentrevisions=assumehaveparentrevisions,
2531 )
2533 )
2532
2534
2533 DELTAREUSEALWAYS = b'always'
2535 DELTAREUSEALWAYS = b'always'
2534 DELTAREUSESAMEREVS = b'samerevs'
2536 DELTAREUSESAMEREVS = b'samerevs'
2535 DELTAREUSENEVER = b'never'
2537 DELTAREUSENEVER = b'never'
2536
2538
2537 DELTAREUSEFULLADD = b'fulladd'
2539 DELTAREUSEFULLADD = b'fulladd'
2538
2540
2539 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2541 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2540
2542
2541 def clone(
2543 def clone(
2542 self,
2544 self,
2543 tr,
2545 tr,
2544 destrevlog,
2546 destrevlog,
2545 addrevisioncb=None,
2547 addrevisioncb=None,
2546 deltareuse=DELTAREUSESAMEREVS,
2548 deltareuse=DELTAREUSESAMEREVS,
2547 forcedeltabothparents=None,
2549 forcedeltabothparents=None,
2548 sidedatacompanion=None,
2550 sidedatacompanion=None,
2549 ):
2551 ):
2550 """Copy this revlog to another, possibly with format changes.
2552 """Copy this revlog to another, possibly with format changes.
2551
2553
2552 The destination revlog will contain the same revisions and nodes.
2554 The destination revlog will contain the same revisions and nodes.
2553 However, it may not be bit-for-bit identical due to e.g. delta encoding
2555 However, it may not be bit-for-bit identical due to e.g. delta encoding
2554 differences.
2556 differences.
2555
2557
2556 The ``deltareuse`` argument control how deltas from the existing revlog
2558 The ``deltareuse`` argument control how deltas from the existing revlog
2557 are preserved in the destination revlog. The argument can have the
2559 are preserved in the destination revlog. The argument can have the
2558 following values:
2560 following values:
2559
2561
2560 DELTAREUSEALWAYS
2562 DELTAREUSEALWAYS
2561 Deltas will always be reused (if possible), even if the destination
2563 Deltas will always be reused (if possible), even if the destination
2562 revlog would not select the same revisions for the delta. This is the
2564 revlog would not select the same revisions for the delta. This is the
2563 fastest mode of operation.
2565 fastest mode of operation.
2564 DELTAREUSESAMEREVS
2566 DELTAREUSESAMEREVS
2565 Deltas will be reused if the destination revlog would pick the same
2567 Deltas will be reused if the destination revlog would pick the same
2566 revisions for the delta. This mode strikes a balance between speed
2568 revisions for the delta. This mode strikes a balance between speed
2567 and optimization.
2569 and optimization.
2568 DELTAREUSENEVER
2570 DELTAREUSENEVER
2569 Deltas will never be reused. This is the slowest mode of execution.
2571 Deltas will never be reused. This is the slowest mode of execution.
2570 This mode can be used to recompute deltas (e.g. if the diff/delta
2572 This mode can be used to recompute deltas (e.g. if the diff/delta
2571 algorithm changes).
2573 algorithm changes).
2572 DELTAREUSEFULLADD
2574 DELTAREUSEFULLADD
2573 Revision will be re-added as if their were new content. This is
2575 Revision will be re-added as if their were new content. This is
2574 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2576 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2575 eg: large file detection and handling.
2577 eg: large file detection and handling.
2576
2578
2577 Delta computation can be slow, so the choice of delta reuse policy can
2579 Delta computation can be slow, so the choice of delta reuse policy can
2578 significantly affect run time.
2580 significantly affect run time.
2579
2581
2580 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2582 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2581 two extremes. Deltas will be reused if they are appropriate. But if the
2583 two extremes. Deltas will be reused if they are appropriate. But if the
2582 delta could choose a better revision, it will do so. This means if you
2584 delta could choose a better revision, it will do so. This means if you
2583 are converting a non-generaldelta revlog to a generaldelta revlog,
2585 are converting a non-generaldelta revlog to a generaldelta revlog,
2584 deltas will be recomputed if the delta's parent isn't a parent of the
2586 deltas will be recomputed if the delta's parent isn't a parent of the
2585 revision.
2587 revision.
2586
2588
2587 In addition to the delta policy, the ``forcedeltabothparents``
2589 In addition to the delta policy, the ``forcedeltabothparents``
2588 argument controls whether to force compute deltas against both parents
2590 argument controls whether to force compute deltas against both parents
2589 for merges. By default, the current default is used.
2591 for merges. By default, the current default is used.
2590
2592
2591 If not None, the `sidedatacompanion` is callable that accept two
2593 If not None, the `sidedatacompanion` is callable that accept two
2592 arguments:
2594 arguments:
2593
2595
2594 (srcrevlog, rev)
2596 (srcrevlog, rev)
2595
2597
2596 and return a triplet that control changes to sidedata content from the
2598 and return a triplet that control changes to sidedata content from the
2597 old revision to the new clone result:
2599 old revision to the new clone result:
2598
2600
2599 (dropall, filterout, update)
2601 (dropall, filterout, update)
2600
2602
2601 * if `dropall` is True, all sidedata should be dropped
2603 * if `dropall` is True, all sidedata should be dropped
2602 * `filterout` is a set of sidedata keys that should be dropped
2604 * `filterout` is a set of sidedata keys that should be dropped
2603 * `update` is a mapping of additionnal/new key -> value
2605 * `update` is a mapping of additionnal/new key -> value
2604 """
2606 """
2605 if deltareuse not in self.DELTAREUSEALL:
2607 if deltareuse not in self.DELTAREUSEALL:
2606 raise ValueError(
2608 raise ValueError(
2607 _(b'value for deltareuse invalid: %s') % deltareuse
2609 _(b'value for deltareuse invalid: %s') % deltareuse
2608 )
2610 )
2609
2611
2610 if len(destrevlog):
2612 if len(destrevlog):
2611 raise ValueError(_(b'destination revlog is not empty'))
2613 raise ValueError(_(b'destination revlog is not empty'))
2612
2614
2613 if getattr(self, 'filteredrevs', None):
2615 if getattr(self, 'filteredrevs', None):
2614 raise ValueError(_(b'source revlog has filtered revisions'))
2616 raise ValueError(_(b'source revlog has filtered revisions'))
2615 if getattr(destrevlog, 'filteredrevs', None):
2617 if getattr(destrevlog, 'filteredrevs', None):
2616 raise ValueError(_(b'destination revlog has filtered revisions'))
2618 raise ValueError(_(b'destination revlog has filtered revisions'))
2617
2619
2618 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2620 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2619 # if possible.
2621 # if possible.
2620 oldlazydelta = destrevlog._lazydelta
2622 oldlazydelta = destrevlog._lazydelta
2621 oldlazydeltabase = destrevlog._lazydeltabase
2623 oldlazydeltabase = destrevlog._lazydeltabase
2622 oldamd = destrevlog._deltabothparents
2624 oldamd = destrevlog._deltabothparents
2623
2625
2624 try:
2626 try:
2625 if deltareuse == self.DELTAREUSEALWAYS:
2627 if deltareuse == self.DELTAREUSEALWAYS:
2626 destrevlog._lazydeltabase = True
2628 destrevlog._lazydeltabase = True
2627 destrevlog._lazydelta = True
2629 destrevlog._lazydelta = True
2628 elif deltareuse == self.DELTAREUSESAMEREVS:
2630 elif deltareuse == self.DELTAREUSESAMEREVS:
2629 destrevlog._lazydeltabase = False
2631 destrevlog._lazydeltabase = False
2630 destrevlog._lazydelta = True
2632 destrevlog._lazydelta = True
2631 elif deltareuse == self.DELTAREUSENEVER:
2633 elif deltareuse == self.DELTAREUSENEVER:
2632 destrevlog._lazydeltabase = False
2634 destrevlog._lazydeltabase = False
2633 destrevlog._lazydelta = False
2635 destrevlog._lazydelta = False
2634
2636
2635 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2637 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2636
2638
2637 self._clone(
2639 self._clone(
2638 tr,
2640 tr,
2639 destrevlog,
2641 destrevlog,
2640 addrevisioncb,
2642 addrevisioncb,
2641 deltareuse,
2643 deltareuse,
2642 forcedeltabothparents,
2644 forcedeltabothparents,
2643 sidedatacompanion,
2645 sidedatacompanion,
2644 )
2646 )
2645
2647
2646 finally:
2648 finally:
2647 destrevlog._lazydelta = oldlazydelta
2649 destrevlog._lazydelta = oldlazydelta
2648 destrevlog._lazydeltabase = oldlazydeltabase
2650 destrevlog._lazydeltabase = oldlazydeltabase
2649 destrevlog._deltabothparents = oldamd
2651 destrevlog._deltabothparents = oldamd
2650
2652
2651 def _clone(
2653 def _clone(
2652 self,
2654 self,
2653 tr,
2655 tr,
2654 destrevlog,
2656 destrevlog,
2655 addrevisioncb,
2657 addrevisioncb,
2656 deltareuse,
2658 deltareuse,
2657 forcedeltabothparents,
2659 forcedeltabothparents,
2658 sidedatacompanion,
2660 sidedatacompanion,
2659 ):
2661 ):
2660 """perform the core duty of `revlog.clone` after parameter processing"""
2662 """perform the core duty of `revlog.clone` after parameter processing"""
2661 deltacomputer = deltautil.deltacomputer(destrevlog)
2663 deltacomputer = deltautil.deltacomputer(destrevlog)
2662 index = self.index
2664 index = self.index
2663 for rev in self:
2665 for rev in self:
2664 entry = index[rev]
2666 entry = index[rev]
2665
2667
2666 # Some classes override linkrev to take filtered revs into
2668 # Some classes override linkrev to take filtered revs into
2667 # account. Use raw entry from index.
2669 # account. Use raw entry from index.
2668 flags = entry[0] & 0xFFFF
2670 flags = entry[0] & 0xFFFF
2669 linkrev = entry[4]
2671 linkrev = entry[4]
2670 p1 = index[entry[5]][7]
2672 p1 = index[entry[5]][7]
2671 p2 = index[entry[6]][7]
2673 p2 = index[entry[6]][7]
2672 node = entry[7]
2674 node = entry[7]
2673
2675
2674 sidedataactions = (False, [], {})
2676 sidedataactions = (False, [], {})
2675 if sidedatacompanion is not None:
2677 if sidedatacompanion is not None:
2676 sidedataactions = sidedatacompanion(self, rev)
2678 sidedataactions = sidedatacompanion(self, rev)
2677
2679
2678 # (Possibly) reuse the delta from the revlog if allowed and
2680 # (Possibly) reuse the delta from the revlog if allowed and
2679 # the revlog chunk is a delta.
2681 # the revlog chunk is a delta.
2680 cachedelta = None
2682 cachedelta = None
2681 rawtext = None
2683 rawtext = None
2682 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2684 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2683 dropall, filterout, update = sidedataactions
2685 dropall, filterout, update = sidedataactions
2684 text, sidedata = self._revisiondata(rev)
2686 text, sidedata = self._revisiondata(rev)
2685 if dropall:
2687 if dropall:
2686 sidedata = {}
2688 sidedata = {}
2687 for key in filterout:
2689 for key in filterout:
2688 sidedata.pop(key, None)
2690 sidedata.pop(key, None)
2689 sidedata.update(update)
2691 sidedata.update(update)
2690 if not sidedata:
2692 if not sidedata:
2691 sidedata = None
2693 sidedata = None
2692 destrevlog.addrevision(
2694 destrevlog.addrevision(
2693 text,
2695 text,
2694 tr,
2696 tr,
2695 linkrev,
2697 linkrev,
2696 p1,
2698 p1,
2697 p2,
2699 p2,
2698 cachedelta=cachedelta,
2700 cachedelta=cachedelta,
2699 node=node,
2701 node=node,
2700 flags=flags,
2702 flags=flags,
2701 deltacomputer=deltacomputer,
2703 deltacomputer=deltacomputer,
2702 sidedata=sidedata,
2704 sidedata=sidedata,
2703 )
2705 )
2704 else:
2706 else:
2705 if destrevlog._lazydelta:
2707 if destrevlog._lazydelta:
2706 dp = self.deltaparent(rev)
2708 dp = self.deltaparent(rev)
2707 if dp != nullrev:
2709 if dp != nullrev:
2708 cachedelta = (dp, bytes(self._chunk(rev)))
2710 cachedelta = (dp, bytes(self._chunk(rev)))
2709
2711
2710 if not cachedelta:
2712 if not cachedelta:
2711 rawtext = self.rawdata(rev)
2713 rawtext = self.rawdata(rev)
2712
2714
2713 ifh = destrevlog.opener(
2715 ifh = destrevlog.opener(
2714 destrevlog.indexfile, b'a+', checkambig=False
2716 destrevlog.indexfile, b'a+', checkambig=False
2715 )
2717 )
2716 dfh = None
2718 dfh = None
2717 if not destrevlog._inline:
2719 if not destrevlog._inline:
2718 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2720 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2719 try:
2721 try:
2720 destrevlog._addrevision(
2722 destrevlog._addrevision(
2721 node,
2723 node,
2722 rawtext,
2724 rawtext,
2723 tr,
2725 tr,
2724 linkrev,
2726 linkrev,
2725 p1,
2727 p1,
2726 p2,
2728 p2,
2727 flags,
2729 flags,
2728 cachedelta,
2730 cachedelta,
2729 ifh,
2731 ifh,
2730 dfh,
2732 dfh,
2731 deltacomputer=deltacomputer,
2733 deltacomputer=deltacomputer,
2732 )
2734 )
2733 finally:
2735 finally:
2734 if dfh:
2736 if dfh:
2735 dfh.close()
2737 dfh.close()
2736 ifh.close()
2738 ifh.close()
2737
2739
2738 if addrevisioncb:
2740 if addrevisioncb:
2739 addrevisioncb(self, rev, node)
2741 addrevisioncb(self, rev, node)
2740
2742
2741 def censorrevision(self, tr, censornode, tombstone=b''):
2743 def censorrevision(self, tr, censornode, tombstone=b''):
2742 if (self.version & 0xFFFF) == REVLOGV0:
2744 if (self.version & 0xFFFF) == REVLOGV0:
2743 raise error.RevlogError(
2745 raise error.RevlogError(
2744 _(b'cannot censor with version %d revlogs') % self.version
2746 _(b'cannot censor with version %d revlogs') % self.version
2745 )
2747 )
2746
2748
2747 censorrev = self.rev(censornode)
2749 censorrev = self.rev(censornode)
2748 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2750 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2749
2751
2750 if len(tombstone) > self.rawsize(censorrev):
2752 if len(tombstone) > self.rawsize(censorrev):
2751 raise error.Abort(
2753 raise error.Abort(
2752 _(b'censor tombstone must be no longer than censored data')
2754 _(b'censor tombstone must be no longer than censored data')
2753 )
2755 )
2754
2756
2755 # Rewriting the revlog in place is hard. Our strategy for censoring is
2757 # Rewriting the revlog in place is hard. Our strategy for censoring is
2756 # to create a new revlog, copy all revisions to it, then replace the
2758 # to create a new revlog, copy all revisions to it, then replace the
2757 # revlogs on transaction close.
2759 # revlogs on transaction close.
2758
2760
2759 newindexfile = self.indexfile + b'.tmpcensored'
2761 newindexfile = self.indexfile + b'.tmpcensored'
2760 newdatafile = self.datafile + b'.tmpcensored'
2762 newdatafile = self.datafile + b'.tmpcensored'
2761
2763
2762 # This is a bit dangerous. We could easily have a mismatch of state.
2764 # This is a bit dangerous. We could easily have a mismatch of state.
2763 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2765 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2764 newrl.version = self.version
2766 newrl.version = self.version
2765 newrl._generaldelta = self._generaldelta
2767 newrl._generaldelta = self._generaldelta
2766 newrl._io = self._io
2768 newrl._io = self._io
2767
2769
2768 for rev in self.revs():
2770 for rev in self.revs():
2769 node = self.node(rev)
2771 node = self.node(rev)
2770 p1, p2 = self.parents(node)
2772 p1, p2 = self.parents(node)
2771
2773
2772 if rev == censorrev:
2774 if rev == censorrev:
2773 newrl.addrawrevision(
2775 newrl.addrawrevision(
2774 tombstone,
2776 tombstone,
2775 tr,
2777 tr,
2776 self.linkrev(censorrev),
2778 self.linkrev(censorrev),
2777 p1,
2779 p1,
2778 p2,
2780 p2,
2779 censornode,
2781 censornode,
2780 REVIDX_ISCENSORED,
2782 REVIDX_ISCENSORED,
2781 )
2783 )
2782
2784
2783 if newrl.deltaparent(rev) != nullrev:
2785 if newrl.deltaparent(rev) != nullrev:
2784 raise error.Abort(
2786 raise error.Abort(
2785 _(
2787 _(
2786 b'censored revision stored as delta; '
2788 b'censored revision stored as delta; '
2787 b'cannot censor'
2789 b'cannot censor'
2788 ),
2790 ),
2789 hint=_(
2791 hint=_(
2790 b'censoring of revlogs is not '
2792 b'censoring of revlogs is not '
2791 b'fully implemented; please report '
2793 b'fully implemented; please report '
2792 b'this bug'
2794 b'this bug'
2793 ),
2795 ),
2794 )
2796 )
2795 continue
2797 continue
2796
2798
2797 if self.iscensored(rev):
2799 if self.iscensored(rev):
2798 if self.deltaparent(rev) != nullrev:
2800 if self.deltaparent(rev) != nullrev:
2799 raise error.Abort(
2801 raise error.Abort(
2800 _(
2802 _(
2801 b'cannot censor due to censored '
2803 b'cannot censor due to censored '
2802 b'revision having delta stored'
2804 b'revision having delta stored'
2803 )
2805 )
2804 )
2806 )
2805 rawtext = self._chunk(rev)
2807 rawtext = self._chunk(rev)
2806 else:
2808 else:
2807 rawtext = self.rawdata(rev)
2809 rawtext = self.rawdata(rev)
2808
2810
2809 newrl.addrawrevision(
2811 newrl.addrawrevision(
2810 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2812 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2811 )
2813 )
2812
2814
2813 tr.addbackup(self.indexfile, location=b'store')
2815 tr.addbackup(self.indexfile, location=b'store')
2814 if not self._inline:
2816 if not self._inline:
2815 tr.addbackup(self.datafile, location=b'store')
2817 tr.addbackup(self.datafile, location=b'store')
2816
2818
2817 self.opener.rename(newrl.indexfile, self.indexfile)
2819 self.opener.rename(newrl.indexfile, self.indexfile)
2818 if not self._inline:
2820 if not self._inline:
2819 self.opener.rename(newrl.datafile, self.datafile)
2821 self.opener.rename(newrl.datafile, self.datafile)
2820
2822
2821 self.clearcaches()
2823 self.clearcaches()
2822 self._loadindex()
2824 self._loadindex()
2823
2825
2824 def verifyintegrity(self, state):
2826 def verifyintegrity(self, state):
2825 """Verifies the integrity of the revlog.
2827 """Verifies the integrity of the revlog.
2826
2828
2827 Yields ``revlogproblem`` instances describing problems that are
2829 Yields ``revlogproblem`` instances describing problems that are
2828 found.
2830 found.
2829 """
2831 """
2830 dd, di = self.checksize()
2832 dd, di = self.checksize()
2831 if dd:
2833 if dd:
2832 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2834 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2833 if di:
2835 if di:
2834 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2836 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2835
2837
2836 version = self.version & 0xFFFF
2838 version = self.version & 0xFFFF
2837
2839
2838 # The verifier tells us what version revlog we should be.
2840 # The verifier tells us what version revlog we should be.
2839 if version != state[b'expectedversion']:
2841 if version != state[b'expectedversion']:
2840 yield revlogproblem(
2842 yield revlogproblem(
2841 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2843 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2842 % (self.indexfile, version, state[b'expectedversion'])
2844 % (self.indexfile, version, state[b'expectedversion'])
2843 )
2845 )
2844
2846
2845 state[b'skipread'] = set()
2847 state[b'skipread'] = set()
2846
2848
2847 for rev in self:
2849 for rev in self:
2848 node = self.node(rev)
2850 node = self.node(rev)
2849
2851
2850 # Verify contents. 4 cases to care about:
2852 # Verify contents. 4 cases to care about:
2851 #
2853 #
2852 # common: the most common case
2854 # common: the most common case
2853 # rename: with a rename
2855 # rename: with a rename
2854 # meta: file content starts with b'\1\n', the metadata
2856 # meta: file content starts with b'\1\n', the metadata
2855 # header defined in filelog.py, but without a rename
2857 # header defined in filelog.py, but without a rename
2856 # ext: content stored externally
2858 # ext: content stored externally
2857 #
2859 #
2858 # More formally, their differences are shown below:
2860 # More formally, their differences are shown below:
2859 #
2861 #
2860 # | common | rename | meta | ext
2862 # | common | rename | meta | ext
2861 # -------------------------------------------------------
2863 # -------------------------------------------------------
2862 # flags() | 0 | 0 | 0 | not 0
2864 # flags() | 0 | 0 | 0 | not 0
2863 # renamed() | False | True | False | ?
2865 # renamed() | False | True | False | ?
2864 # rawtext[0:2]=='\1\n'| False | True | True | ?
2866 # rawtext[0:2]=='\1\n'| False | True | True | ?
2865 #
2867 #
2866 # "rawtext" means the raw text stored in revlog data, which
2868 # "rawtext" means the raw text stored in revlog data, which
2867 # could be retrieved by "rawdata(rev)". "text"
2869 # could be retrieved by "rawdata(rev)". "text"
2868 # mentioned below is "revision(rev)".
2870 # mentioned below is "revision(rev)".
2869 #
2871 #
2870 # There are 3 different lengths stored physically:
2872 # There are 3 different lengths stored physically:
2871 # 1. L1: rawsize, stored in revlog index
2873 # 1. L1: rawsize, stored in revlog index
2872 # 2. L2: len(rawtext), stored in revlog data
2874 # 2. L2: len(rawtext), stored in revlog data
2873 # 3. L3: len(text), stored in revlog data if flags==0, or
2875 # 3. L3: len(text), stored in revlog data if flags==0, or
2874 # possibly somewhere else if flags!=0
2876 # possibly somewhere else if flags!=0
2875 #
2877 #
2876 # L1 should be equal to L2. L3 could be different from them.
2878 # L1 should be equal to L2. L3 could be different from them.
2877 # "text" may or may not affect commit hash depending on flag
2879 # "text" may or may not affect commit hash depending on flag
2878 # processors (see flagutil.addflagprocessor).
2880 # processors (see flagutil.addflagprocessor).
2879 #
2881 #
2880 # | common | rename | meta | ext
2882 # | common | rename | meta | ext
2881 # -------------------------------------------------
2883 # -------------------------------------------------
2882 # rawsize() | L1 | L1 | L1 | L1
2884 # rawsize() | L1 | L1 | L1 | L1
2883 # size() | L1 | L2-LM | L1(*) | L1 (?)
2885 # size() | L1 | L2-LM | L1(*) | L1 (?)
2884 # len(rawtext) | L2 | L2 | L2 | L2
2886 # len(rawtext) | L2 | L2 | L2 | L2
2885 # len(text) | L2 | L2 | L2 | L3
2887 # len(text) | L2 | L2 | L2 | L3
2886 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2888 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2887 #
2889 #
2888 # LM: length of metadata, depending on rawtext
2890 # LM: length of metadata, depending on rawtext
2889 # (*): not ideal, see comment in filelog.size
2891 # (*): not ideal, see comment in filelog.size
2890 # (?): could be "- len(meta)" if the resolved content has
2892 # (?): could be "- len(meta)" if the resolved content has
2891 # rename metadata
2893 # rename metadata
2892 #
2894 #
2893 # Checks needed to be done:
2895 # Checks needed to be done:
2894 # 1. length check: L1 == L2, in all cases.
2896 # 1. length check: L1 == L2, in all cases.
2895 # 2. hash check: depending on flag processor, we may need to
2897 # 2. hash check: depending on flag processor, we may need to
2896 # use either "text" (external), or "rawtext" (in revlog).
2898 # use either "text" (external), or "rawtext" (in revlog).
2897
2899
2898 try:
2900 try:
2899 skipflags = state.get(b'skipflags', 0)
2901 skipflags = state.get(b'skipflags', 0)
2900 if skipflags:
2902 if skipflags:
2901 skipflags &= self.flags(rev)
2903 skipflags &= self.flags(rev)
2902
2904
2903 if skipflags:
2905 if skipflags:
2904 state[b'skipread'].add(node)
2906 state[b'skipread'].add(node)
2905 else:
2907 else:
2906 # Side-effect: read content and verify hash.
2908 # Side-effect: read content and verify hash.
2907 self.revision(node)
2909 self.revision(node)
2908
2910
2909 l1 = self.rawsize(rev)
2911 l1 = self.rawsize(rev)
2910 l2 = len(self.rawdata(node))
2912 l2 = len(self.rawdata(node))
2911
2913
2912 if l1 != l2:
2914 if l1 != l2:
2913 yield revlogproblem(
2915 yield revlogproblem(
2914 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2916 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2915 node=node,
2917 node=node,
2916 )
2918 )
2917
2919
2918 except error.CensoredNodeError:
2920 except error.CensoredNodeError:
2919 if state[b'erroroncensored']:
2921 if state[b'erroroncensored']:
2920 yield revlogproblem(
2922 yield revlogproblem(
2921 error=_(b'censored file data'), node=node
2923 error=_(b'censored file data'), node=node
2922 )
2924 )
2923 state[b'skipread'].add(node)
2925 state[b'skipread'].add(node)
2924 except Exception as e:
2926 except Exception as e:
2925 yield revlogproblem(
2927 yield revlogproblem(
2926 error=_(b'unpacking %s: %s')
2928 error=_(b'unpacking %s: %s')
2927 % (short(node), stringutil.forcebytestr(e)),
2929 % (short(node), stringutil.forcebytestr(e)),
2928 node=node,
2930 node=node,
2929 )
2931 )
2930 state[b'skipread'].add(node)
2932 state[b'skipread'].add(node)
2931
2933
2932 def storageinfo(
2934 def storageinfo(
2933 self,
2935 self,
2934 exclusivefiles=False,
2936 exclusivefiles=False,
2935 sharedfiles=False,
2937 sharedfiles=False,
2936 revisionscount=False,
2938 revisionscount=False,
2937 trackedsize=False,
2939 trackedsize=False,
2938 storedsize=False,
2940 storedsize=False,
2939 ):
2941 ):
2940 d = {}
2942 d = {}
2941
2943
2942 if exclusivefiles:
2944 if exclusivefiles:
2943 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2945 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2944 if not self._inline:
2946 if not self._inline:
2945 d[b'exclusivefiles'].append((self.opener, self.datafile))
2947 d[b'exclusivefiles'].append((self.opener, self.datafile))
2946
2948
2947 if sharedfiles:
2949 if sharedfiles:
2948 d[b'sharedfiles'] = []
2950 d[b'sharedfiles'] = []
2949
2951
2950 if revisionscount:
2952 if revisionscount:
2951 d[b'revisionscount'] = len(self)
2953 d[b'revisionscount'] = len(self)
2952
2954
2953 if trackedsize:
2955 if trackedsize:
2954 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2956 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2955
2957
2956 if storedsize:
2958 if storedsize:
2957 d[b'storedsize'] = sum(
2959 d[b'storedsize'] = sum(
2958 self.opener.stat(path).st_size for path in self.files()
2960 self.opener.stat(path).st_size for path in self.files()
2959 )
2961 )
2960
2962
2961 return d
2963 return d
@@ -0,0 +1,14 b''
1 # mercurial.revlogutils -- basic utilities for revlog
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9 from .. import error
10
11
12 class NodeMap(dict):
13 def __missing__(self, x):
14 raise error.RevlogError('unknown node: %s' % x)
General Comments 0
You need to be logged in to leave comments. Login now