##// END OF EJS Templates
revlog: add a way to control sidedata changes during revlog.clone...
marmoute -
r43403:3de4d13f default
parent child Browse files
Show More
@@ -1,2919 +1,2958 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 sidedata as sidedatautil,
78 sidedata as sidedatautil,
79 )
79 )
80 from .utils import (
80 from .utils import (
81 storageutil,
81 storageutil,
82 stringutil,
82 stringutil,
83 )
83 )
84
84
85 # blanked usage of all the name to prevent pyflakes constraints
85 # blanked usage of all the name to prevent pyflakes constraints
86 # We need these name available in the module for extensions.
86 # We need these name available in the module for extensions.
87 REVLOGV0
87 REVLOGV0
88 REVLOGV1
88 REVLOGV1
89 REVLOGV2
89 REVLOGV2
90 FLAG_INLINE_DATA
90 FLAG_INLINE_DATA
91 FLAG_GENERALDELTA
91 FLAG_GENERALDELTA
92 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_VERSION
94 REVLOG_DEFAULT_VERSION
95 REVLOGV1_FLAGS
95 REVLOGV1_FLAGS
96 REVLOGV2_FLAGS
96 REVLOGV2_FLAGS
97 REVIDX_ISCENSORED
97 REVIDX_ISCENSORED
98 REVIDX_ELLIPSIS
98 REVIDX_ELLIPSIS
99 REVIDX_SIDEDATA
99 REVIDX_SIDEDATA
100 REVIDX_EXTSTORED
100 REVIDX_EXTSTORED
101 REVIDX_DEFAULT_FLAGS
101 REVIDX_DEFAULT_FLAGS
102 REVIDX_FLAGS_ORDER
102 REVIDX_FLAGS_ORDER
103 REVIDX_RAWTEXT_CHANGING_FLAGS
103 REVIDX_RAWTEXT_CHANGING_FLAGS
104
104
105 parsers = policy.importmod(r'parsers')
105 parsers = policy.importmod(r'parsers')
106 rustancestor = policy.importrust(r'ancestor')
106 rustancestor = policy.importrust(r'ancestor')
107 rustdagop = policy.importrust(r'dagop')
107 rustdagop = policy.importrust(r'dagop')
108
108
109 # Aliased for performance.
109 # Aliased for performance.
110 _zlibdecompress = zlib.decompress
110 _zlibdecompress = zlib.decompress
111
111
112 # max size of revlog with inline data
112 # max size of revlog with inline data
113 _maxinline = 131072
113 _maxinline = 131072
114 _chunksize = 1048576
114 _chunksize = 1048576
115
115
116 # Flag processors for REVIDX_ELLIPSIS.
116 # Flag processors for REVIDX_ELLIPSIS.
117 def ellipsisreadprocessor(rl, text):
117 def ellipsisreadprocessor(rl, text):
118 return text, False, {}
118 return text, False, {}
119
119
120
120
121 def ellipsiswriteprocessor(rl, text, sidedata):
121 def ellipsiswriteprocessor(rl, text, sidedata):
122 return text, False
122 return text, False
123
123
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128
128
129 ellipsisprocessor = (
129 ellipsisprocessor = (
130 ellipsisreadprocessor,
130 ellipsisreadprocessor,
131 ellipsiswriteprocessor,
131 ellipsiswriteprocessor,
132 ellipsisrawprocessor,
132 ellipsisrawprocessor,
133 )
133 )
134
134
135
135
136 def getoffset(q):
136 def getoffset(q):
137 return int(q >> 16)
137 return int(q >> 16)
138
138
139
139
140 def gettype(q):
140 def gettype(q):
141 return int(q & 0xFFFF)
141 return int(q & 0xFFFF)
142
142
143
143
144 def offset_type(offset, type):
144 def offset_type(offset, type):
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 raise ValueError(b'unknown revlog index flags')
146 raise ValueError(b'unknown revlog index flags')
147 return int(int(offset) << 16 | type)
147 return int(int(offset) << 16 | type)
148
148
149
149
150 @attr.s(slots=True, frozen=True)
150 @attr.s(slots=True, frozen=True)
151 class _revisioninfo(object):
151 class _revisioninfo(object):
152 """Information about a revision that allows building its fulltext
152 """Information about a revision that allows building its fulltext
153 node: expected hash of the revision
153 node: expected hash of the revision
154 p1, p2: parent revs of the revision
154 p1, p2: parent revs of the revision
155 btext: built text cache consisting of a one-element list
155 btext: built text cache consisting of a one-element list
156 cachedelta: (baserev, uncompressed_delta) or None
156 cachedelta: (baserev, uncompressed_delta) or None
157 flags: flags associated to the revision storage
157 flags: flags associated to the revision storage
158
158
159 One of btext[0] or cachedelta must be set.
159 One of btext[0] or cachedelta must be set.
160 """
160 """
161
161
162 node = attr.ib()
162 node = attr.ib()
163 p1 = attr.ib()
163 p1 = attr.ib()
164 p2 = attr.ib()
164 p2 = attr.ib()
165 btext = attr.ib()
165 btext = attr.ib()
166 textlen = attr.ib()
166 textlen = attr.ib()
167 cachedelta = attr.ib()
167 cachedelta = attr.ib()
168 flags = attr.ib()
168 flags = attr.ib()
169
169
170
170
171 @interfaceutil.implementer(repository.irevisiondelta)
171 @interfaceutil.implementer(repository.irevisiondelta)
172 @attr.s(slots=True)
172 @attr.s(slots=True)
173 class revlogrevisiondelta(object):
173 class revlogrevisiondelta(object):
174 node = attr.ib()
174 node = attr.ib()
175 p1node = attr.ib()
175 p1node = attr.ib()
176 p2node = attr.ib()
176 p2node = attr.ib()
177 basenode = attr.ib()
177 basenode = attr.ib()
178 flags = attr.ib()
178 flags = attr.ib()
179 baserevisionsize = attr.ib()
179 baserevisionsize = attr.ib()
180 revision = attr.ib()
180 revision = attr.ib()
181 delta = attr.ib()
181 delta = attr.ib()
182 linknode = attr.ib(default=None)
182 linknode = attr.ib(default=None)
183
183
184
184
185 @interfaceutil.implementer(repository.iverifyproblem)
185 @interfaceutil.implementer(repository.iverifyproblem)
186 @attr.s(frozen=True)
186 @attr.s(frozen=True)
187 class revlogproblem(object):
187 class revlogproblem(object):
188 warning = attr.ib(default=None)
188 warning = attr.ib(default=None)
189 error = attr.ib(default=None)
189 error = attr.ib(default=None)
190 node = attr.ib(default=None)
190 node = attr.ib(default=None)
191
191
192
192
193 # index v0:
193 # index v0:
194 # 4 bytes: offset
194 # 4 bytes: offset
195 # 4 bytes: compressed length
195 # 4 bytes: compressed length
196 # 4 bytes: base rev
196 # 4 bytes: base rev
197 # 4 bytes: link rev
197 # 4 bytes: link rev
198 # 20 bytes: parent 1 nodeid
198 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 2 nodeid
199 # 20 bytes: parent 2 nodeid
200 # 20 bytes: nodeid
200 # 20 bytes: nodeid
201 indexformatv0 = struct.Struct(b">4l20s20s20s")
201 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0_pack = indexformatv0.pack
202 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_unpack = indexformatv0.unpack
203 indexformatv0_unpack = indexformatv0.unpack
204
204
205
205
206 class revlogoldindex(list):
206 class revlogoldindex(list):
207 def __getitem__(self, i):
207 def __getitem__(self, i):
208 if i == -1:
208 if i == -1:
209 return (0, 0, 0, -1, -1, -1, -1, nullid)
209 return (0, 0, 0, -1, -1, -1, -1, nullid)
210 return list.__getitem__(self, i)
210 return list.__getitem__(self, i)
211
211
212
212
213 class revlogoldio(object):
213 class revlogoldio(object):
214 def __init__(self):
214 def __init__(self):
215 self.size = indexformatv0.size
215 self.size = indexformatv0.size
216
216
217 def parseindex(self, data, inline):
217 def parseindex(self, data, inline):
218 s = self.size
218 s = self.size
219 index = []
219 index = []
220 nodemap = {nullid: nullrev}
220 nodemap = {nullid: nullrev}
221 n = off = 0
221 n = off = 0
222 l = len(data)
222 l = len(data)
223 while off + s <= l:
223 while off + s <= l:
224 cur = data[off : off + s]
224 cur = data[off : off + s]
225 off += s
225 off += s
226 e = indexformatv0_unpack(cur)
226 e = indexformatv0_unpack(cur)
227 # transform to revlogv1 format
227 # transform to revlogv1 format
228 e2 = (
228 e2 = (
229 offset_type(e[0], 0),
229 offset_type(e[0], 0),
230 e[1],
230 e[1],
231 -1,
231 -1,
232 e[2],
232 e[2],
233 e[3],
233 e[3],
234 nodemap.get(e[4], nullrev),
234 nodemap.get(e[4], nullrev),
235 nodemap.get(e[5], nullrev),
235 nodemap.get(e[5], nullrev),
236 e[6],
236 e[6],
237 )
237 )
238 index.append(e2)
238 index.append(e2)
239 nodemap[e[6]] = n
239 nodemap[e[6]] = n
240 n += 1
240 n += 1
241
241
242 return revlogoldindex(index), nodemap, None
242 return revlogoldindex(index), nodemap, None
243
243
244 def packentry(self, entry, node, version, rev):
244 def packentry(self, entry, node, version, rev):
245 if gettype(entry[0]):
245 if gettype(entry[0]):
246 raise error.RevlogError(
246 raise error.RevlogError(
247 _(b'index entry flags need revlog version 1')
247 _(b'index entry flags need revlog version 1')
248 )
248 )
249 e2 = (
249 e2 = (
250 getoffset(entry[0]),
250 getoffset(entry[0]),
251 entry[1],
251 entry[1],
252 entry[3],
252 entry[3],
253 entry[4],
253 entry[4],
254 node(entry[5]),
254 node(entry[5]),
255 node(entry[6]),
255 node(entry[6]),
256 entry[7],
256 entry[7],
257 )
257 )
258 return indexformatv0_pack(*e2)
258 return indexformatv0_pack(*e2)
259
259
260
260
261 # index ng:
261 # index ng:
262 # 6 bytes: offset
262 # 6 bytes: offset
263 # 2 bytes: flags
263 # 2 bytes: flags
264 # 4 bytes: compressed length
264 # 4 bytes: compressed length
265 # 4 bytes: uncompressed length
265 # 4 bytes: uncompressed length
266 # 4 bytes: base rev
266 # 4 bytes: base rev
267 # 4 bytes: link rev
267 # 4 bytes: link rev
268 # 4 bytes: parent 1 rev
268 # 4 bytes: parent 1 rev
269 # 4 bytes: parent 2 rev
269 # 4 bytes: parent 2 rev
270 # 32 bytes: nodeid
270 # 32 bytes: nodeid
271 indexformatng = struct.Struct(b">Qiiiiii20s12x")
271 indexformatng = struct.Struct(b">Qiiiiii20s12x")
272 indexformatng_pack = indexformatng.pack
272 indexformatng_pack = indexformatng.pack
273 versionformat = struct.Struct(b">I")
273 versionformat = struct.Struct(b">I")
274 versionformat_pack = versionformat.pack
274 versionformat_pack = versionformat.pack
275 versionformat_unpack = versionformat.unpack
275 versionformat_unpack = versionformat.unpack
276
276
277 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
277 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
278 # signed integer)
278 # signed integer)
279 _maxentrysize = 0x7FFFFFFF
279 _maxentrysize = 0x7FFFFFFF
280
280
281
281
282 class revlogio(object):
282 class revlogio(object):
283 def __init__(self):
283 def __init__(self):
284 self.size = indexformatng.size
284 self.size = indexformatng.size
285
285
286 def parseindex(self, data, inline):
286 def parseindex(self, data, inline):
287 # call the C implementation to parse the index data
287 # call the C implementation to parse the index data
288 index, cache = parsers.parse_index2(data, inline)
288 index, cache = parsers.parse_index2(data, inline)
289 return index, getattr(index, 'nodemap', None), cache
289 return index, getattr(index, 'nodemap', None), cache
290
290
291 def packentry(self, entry, node, version, rev):
291 def packentry(self, entry, node, version, rev):
292 p = indexformatng_pack(*entry)
292 p = indexformatng_pack(*entry)
293 if rev == 0:
293 if rev == 0:
294 p = versionformat_pack(version) + p[4:]
294 p = versionformat_pack(version) + p[4:]
295 return p
295 return p
296
296
297
297
298 class revlog(object):
298 class revlog(object):
299 """
299 """
300 the underlying revision storage object
300 the underlying revision storage object
301
301
302 A revlog consists of two parts, an index and the revision data.
302 A revlog consists of two parts, an index and the revision data.
303
303
304 The index is a file with a fixed record size containing
304 The index is a file with a fixed record size containing
305 information on each revision, including its nodeid (hash), the
305 information on each revision, including its nodeid (hash), the
306 nodeids of its parents, the position and offset of its data within
306 nodeids of its parents, the position and offset of its data within
307 the data file, and the revision it's based on. Finally, each entry
307 the data file, and the revision it's based on. Finally, each entry
308 contains a linkrev entry that can serve as a pointer to external
308 contains a linkrev entry that can serve as a pointer to external
309 data.
309 data.
310
310
311 The revision data itself is a linear collection of data chunks.
311 The revision data itself is a linear collection of data chunks.
312 Each chunk represents a revision and is usually represented as a
312 Each chunk represents a revision and is usually represented as a
313 delta against the previous chunk. To bound lookup time, runs of
313 delta against the previous chunk. To bound lookup time, runs of
314 deltas are limited to about 2 times the length of the original
314 deltas are limited to about 2 times the length of the original
315 version data. This makes retrieval of a version proportional to
315 version data. This makes retrieval of a version proportional to
316 its size, or O(1) relative to the number of revisions.
316 its size, or O(1) relative to the number of revisions.
317
317
318 Both pieces of the revlog are written to in an append-only
318 Both pieces of the revlog are written to in an append-only
319 fashion, which means we never need to rewrite a file to insert or
319 fashion, which means we never need to rewrite a file to insert or
320 remove data, and can use some simple techniques to avoid the need
320 remove data, and can use some simple techniques to avoid the need
321 for locking while reading.
321 for locking while reading.
322
322
323 If checkambig, indexfile is opened with checkambig=True at
323 If checkambig, indexfile is opened with checkambig=True at
324 writing, to avoid file stat ambiguity.
324 writing, to avoid file stat ambiguity.
325
325
326 If mmaplargeindex is True, and an mmapindexthreshold is set, the
326 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 index will be mmapped rather than read if it is larger than the
327 index will be mmapped rather than read if it is larger than the
328 configured threshold.
328 configured threshold.
329
329
330 If censorable is True, the revlog can have censored revisions.
330 If censorable is True, the revlog can have censored revisions.
331
331
332 If `upperboundcomp` is not None, this is the expected maximal gain from
332 If `upperboundcomp` is not None, this is the expected maximal gain from
333 compression for the data content.
333 compression for the data content.
334 """
334 """
335
335
336 _flagserrorclass = error.RevlogError
336 _flagserrorclass = error.RevlogError
337
337
338 def __init__(
338 def __init__(
339 self,
339 self,
340 opener,
340 opener,
341 indexfile,
341 indexfile,
342 datafile=None,
342 datafile=None,
343 checkambig=False,
343 checkambig=False,
344 mmaplargeindex=False,
344 mmaplargeindex=False,
345 censorable=False,
345 censorable=False,
346 upperboundcomp=None,
346 upperboundcomp=None,
347 ):
347 ):
348 """
348 """
349 create a revlog object
349 create a revlog object
350
350
351 opener is a function that abstracts the file opening operation
351 opener is a function that abstracts the file opening operation
352 and can be used to implement COW semantics or the like.
352 and can be used to implement COW semantics or the like.
353
353
354 """
354 """
355 self.upperboundcomp = upperboundcomp
355 self.upperboundcomp = upperboundcomp
356 self.indexfile = indexfile
356 self.indexfile = indexfile
357 self.datafile = datafile or (indexfile[:-2] + b".d")
357 self.datafile = datafile or (indexfile[:-2] + b".d")
358 self.opener = opener
358 self.opener = opener
359 # When True, indexfile is opened with checkambig=True at writing, to
359 # When True, indexfile is opened with checkambig=True at writing, to
360 # avoid file stat ambiguity.
360 # avoid file stat ambiguity.
361 self._checkambig = checkambig
361 self._checkambig = checkambig
362 self._mmaplargeindex = mmaplargeindex
362 self._mmaplargeindex = mmaplargeindex
363 self._censorable = censorable
363 self._censorable = censorable
364 # 3-tuple of (node, rev, text) for a raw revision.
364 # 3-tuple of (node, rev, text) for a raw revision.
365 self._revisioncache = None
365 self._revisioncache = None
366 # Maps rev to chain base rev.
366 # Maps rev to chain base rev.
367 self._chainbasecache = util.lrucachedict(100)
367 self._chainbasecache = util.lrucachedict(100)
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 self._chunkcache = (0, b'')
369 self._chunkcache = (0, b'')
370 # How much data to read and cache into the raw revlog data cache.
370 # How much data to read and cache into the raw revlog data cache.
371 self._chunkcachesize = 65536
371 self._chunkcachesize = 65536
372 self._maxchainlen = None
372 self._maxchainlen = None
373 self._deltabothparents = True
373 self._deltabothparents = True
374 self.index = []
374 self.index = []
375 # Mapping of partial identifiers to full nodes.
375 # Mapping of partial identifiers to full nodes.
376 self._pcache = {}
376 self._pcache = {}
377 # Mapping of revision integer to full node.
377 # Mapping of revision integer to full node.
378 self._nodecache = {nullid: nullrev}
378 self._nodecache = {nullid: nullrev}
379 self._nodepos = None
379 self._nodepos = None
380 self._compengine = b'zlib'
380 self._compengine = b'zlib'
381 self._compengineopts = {}
381 self._compengineopts = {}
382 self._maxdeltachainspan = -1
382 self._maxdeltachainspan = -1
383 self._withsparseread = False
383 self._withsparseread = False
384 self._sparserevlog = False
384 self._sparserevlog = False
385 self._srdensitythreshold = 0.50
385 self._srdensitythreshold = 0.50
386 self._srmingapsize = 262144
386 self._srmingapsize = 262144
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 2-tuple of file handles being used for active writing.
392 # 2-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394
394
395 self._loadindex()
395 self._loadindex()
396
396
397 def _loadindex(self):
397 def _loadindex(self):
398 mmapindexthreshold = None
398 mmapindexthreshold = None
399 opts = self.opener.options
399 opts = self.opener.options
400
400
401 if b'revlogv2' in opts:
401 if b'revlogv2' in opts:
402 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
402 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
403 elif b'revlogv1' in opts:
403 elif b'revlogv1' in opts:
404 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
404 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
405 if b'generaldelta' in opts:
405 if b'generaldelta' in opts:
406 newversionflags |= FLAG_GENERALDELTA
406 newversionflags |= FLAG_GENERALDELTA
407 elif b'revlogv0' in self.opener.options:
407 elif b'revlogv0' in self.opener.options:
408 newversionflags = REVLOGV0
408 newversionflags = REVLOGV0
409 else:
409 else:
410 newversionflags = REVLOG_DEFAULT_VERSION
410 newversionflags = REVLOG_DEFAULT_VERSION
411
411
412 if b'chunkcachesize' in opts:
412 if b'chunkcachesize' in opts:
413 self._chunkcachesize = opts[b'chunkcachesize']
413 self._chunkcachesize = opts[b'chunkcachesize']
414 if b'maxchainlen' in opts:
414 if b'maxchainlen' in opts:
415 self._maxchainlen = opts[b'maxchainlen']
415 self._maxchainlen = opts[b'maxchainlen']
416 if b'deltabothparents' in opts:
416 if b'deltabothparents' in opts:
417 self._deltabothparents = opts[b'deltabothparents']
417 self._deltabothparents = opts[b'deltabothparents']
418 self._lazydelta = bool(opts.get(b'lazydelta', True))
418 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 self._lazydeltabase = False
419 self._lazydeltabase = False
420 if self._lazydelta:
420 if self._lazydelta:
421 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
421 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 if b'compengine' in opts:
422 if b'compengine' in opts:
423 self._compengine = opts[b'compengine']
423 self._compengine = opts[b'compengine']
424 if b'zlib.level' in opts:
424 if b'zlib.level' in opts:
425 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
425 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 if b'zstd.level' in opts:
426 if b'zstd.level' in opts:
427 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
427 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 if b'maxdeltachainspan' in opts:
428 if b'maxdeltachainspan' in opts:
429 self._maxdeltachainspan = opts[b'maxdeltachainspan']
429 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
430 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 mmapindexthreshold = opts[b'mmapindexthreshold']
431 mmapindexthreshold = opts[b'mmapindexthreshold']
432 self.hassidedata = bool(opts.get(b'side-data', False))
432 self.hassidedata = bool(opts.get(b'side-data', False))
433 if self.hassidedata:
433 if self.hassidedata:
434 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
434 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
435 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
435 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
436 withsparseread = bool(opts.get(b'with-sparse-read', False))
436 withsparseread = bool(opts.get(b'with-sparse-read', False))
437 # sparse-revlog forces sparse-read
437 # sparse-revlog forces sparse-read
438 self._withsparseread = self._sparserevlog or withsparseread
438 self._withsparseread = self._sparserevlog or withsparseread
439 if b'sparse-read-density-threshold' in opts:
439 if b'sparse-read-density-threshold' in opts:
440 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
440 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
441 if b'sparse-read-min-gap-size' in opts:
441 if b'sparse-read-min-gap-size' in opts:
442 self._srmingapsize = opts[b'sparse-read-min-gap-size']
442 self._srmingapsize = opts[b'sparse-read-min-gap-size']
443 if opts.get(b'enableellipsis'):
443 if opts.get(b'enableellipsis'):
444 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
444 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
445
445
446 # revlog v0 doesn't have flag processors
446 # revlog v0 doesn't have flag processors
447 for flag, processor in pycompat.iteritems(
447 for flag, processor in pycompat.iteritems(
448 opts.get(b'flagprocessors', {})
448 opts.get(b'flagprocessors', {})
449 ):
449 ):
450 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
450 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
451
451
452 if self._chunkcachesize <= 0:
452 if self._chunkcachesize <= 0:
453 raise error.RevlogError(
453 raise error.RevlogError(
454 _(b'revlog chunk cache size %r is not greater than 0')
454 _(b'revlog chunk cache size %r is not greater than 0')
455 % self._chunkcachesize
455 % self._chunkcachesize
456 )
456 )
457 elif self._chunkcachesize & (self._chunkcachesize - 1):
457 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 raise error.RevlogError(
458 raise error.RevlogError(
459 _(b'revlog chunk cache size %r is not a power of 2')
459 _(b'revlog chunk cache size %r is not a power of 2')
460 % self._chunkcachesize
460 % self._chunkcachesize
461 )
461 )
462
462
463 indexdata = b''
463 indexdata = b''
464 self._initempty = True
464 self._initempty = True
465 try:
465 try:
466 with self._indexfp() as f:
466 with self._indexfp() as f:
467 if (
467 if (
468 mmapindexthreshold is not None
468 mmapindexthreshold is not None
469 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 and self.opener.fstat(f).st_size >= mmapindexthreshold
470 ):
470 ):
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 indexdata = util.buffer(util.mmapread(f))
473 indexdata = util.buffer(util.mmapread(f))
474 else:
474 else:
475 indexdata = f.read()
475 indexdata = f.read()
476 if len(indexdata) > 0:
476 if len(indexdata) > 0:
477 versionflags = versionformat_unpack(indexdata[:4])[0]
477 versionflags = versionformat_unpack(indexdata[:4])[0]
478 self._initempty = False
478 self._initempty = False
479 else:
479 else:
480 versionflags = newversionflags
480 versionflags = newversionflags
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484
484
485 versionflags = newversionflags
485 versionflags = newversionflags
486
486
487 self.version = versionflags
487 self.version = versionflags
488
488
489 flags = versionflags & ~0xFFFF
489 flags = versionflags & ~0xFFFF
490 fmt = versionflags & 0xFFFF
490 fmt = versionflags & 0xFFFF
491
491
492 if fmt == REVLOGV0:
492 if fmt == REVLOGV0:
493 if flags:
493 if flags:
494 raise error.RevlogError(
494 raise error.RevlogError(
495 _(b'unknown flags (%#04x) in version %d revlog %s')
495 _(b'unknown flags (%#04x) in version %d revlog %s')
496 % (flags >> 16, fmt, self.indexfile)
496 % (flags >> 16, fmt, self.indexfile)
497 )
497 )
498
498
499 self._inline = False
499 self._inline = False
500 self._generaldelta = False
500 self._generaldelta = False
501
501
502 elif fmt == REVLOGV1:
502 elif fmt == REVLOGV1:
503 if flags & ~REVLOGV1_FLAGS:
503 if flags & ~REVLOGV1_FLAGS:
504 raise error.RevlogError(
504 raise error.RevlogError(
505 _(b'unknown flags (%#04x) in version %d revlog %s')
505 _(b'unknown flags (%#04x) in version %d revlog %s')
506 % (flags >> 16, fmt, self.indexfile)
506 % (flags >> 16, fmt, self.indexfile)
507 )
507 )
508
508
509 self._inline = versionflags & FLAG_INLINE_DATA
509 self._inline = versionflags & FLAG_INLINE_DATA
510 self._generaldelta = versionflags & FLAG_GENERALDELTA
510 self._generaldelta = versionflags & FLAG_GENERALDELTA
511
511
512 elif fmt == REVLOGV2:
512 elif fmt == REVLOGV2:
513 if flags & ~REVLOGV2_FLAGS:
513 if flags & ~REVLOGV2_FLAGS:
514 raise error.RevlogError(
514 raise error.RevlogError(
515 _(b'unknown flags (%#04x) in version %d revlog %s')
515 _(b'unknown flags (%#04x) in version %d revlog %s')
516 % (flags >> 16, fmt, self.indexfile)
516 % (flags >> 16, fmt, self.indexfile)
517 )
517 )
518
518
519 self._inline = versionflags & FLAG_INLINE_DATA
519 self._inline = versionflags & FLAG_INLINE_DATA
520 # generaldelta implied by version 2 revlogs.
520 # generaldelta implied by version 2 revlogs.
521 self._generaldelta = True
521 self._generaldelta = True
522
522
523 else:
523 else:
524 raise error.RevlogError(
524 raise error.RevlogError(
525 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
525 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 )
526 )
527 # sparse-revlog can't be on without general-delta (issue6056)
527 # sparse-revlog can't be on without general-delta (issue6056)
528 if not self._generaldelta:
528 if not self._generaldelta:
529 self._sparserevlog = False
529 self._sparserevlog = False
530
530
531 self._storedeltachains = True
531 self._storedeltachains = True
532
532
533 self._io = revlogio()
533 self._io = revlogio()
534 if self.version == REVLOGV0:
534 if self.version == REVLOGV0:
535 self._io = revlogoldio()
535 self._io = revlogoldio()
536 try:
536 try:
537 d = self._io.parseindex(indexdata, self._inline)
537 d = self._io.parseindex(indexdata, self._inline)
538 except (ValueError, IndexError):
538 except (ValueError, IndexError):
539 raise error.RevlogError(
539 raise error.RevlogError(
540 _(b"index %s is corrupted") % self.indexfile
540 _(b"index %s is corrupted") % self.indexfile
541 )
541 )
542 self.index, nodemap, self._chunkcache = d
542 self.index, nodemap, self._chunkcache = d
543 if nodemap is not None:
543 if nodemap is not None:
544 self.nodemap = self._nodecache = nodemap
544 self.nodemap = self._nodecache = nodemap
545 if not self._chunkcache:
545 if not self._chunkcache:
546 self._chunkclear()
546 self._chunkclear()
547 # revnum -> (chain-length, sum-delta-length)
547 # revnum -> (chain-length, sum-delta-length)
548 self._chaininfocache = {}
548 self._chaininfocache = {}
549 # revlog header -> revlog compressor
549 # revlog header -> revlog compressor
550 self._decompressors = {}
550 self._decompressors = {}
551
551
552 @util.propertycache
552 @util.propertycache
553 def _compressor(self):
553 def _compressor(self):
554 engine = util.compengines[self._compengine]
554 engine = util.compengines[self._compengine]
555 return engine.revlogcompressor(self._compengineopts)
555 return engine.revlogcompressor(self._compengineopts)
556
556
557 def _indexfp(self, mode=b'r'):
557 def _indexfp(self, mode=b'r'):
558 """file object for the revlog's index file"""
558 """file object for the revlog's index file"""
559 args = {r'mode': mode}
559 args = {r'mode': mode}
560 if mode != b'r':
560 if mode != b'r':
561 args[r'checkambig'] = self._checkambig
561 args[r'checkambig'] = self._checkambig
562 if mode == b'w':
562 if mode == b'w':
563 args[r'atomictemp'] = True
563 args[r'atomictemp'] = True
564 return self.opener(self.indexfile, **args)
564 return self.opener(self.indexfile, **args)
565
565
566 def _datafp(self, mode=b'r'):
566 def _datafp(self, mode=b'r'):
567 """file object for the revlog's data file"""
567 """file object for the revlog's data file"""
568 return self.opener(self.datafile, mode=mode)
568 return self.opener(self.datafile, mode=mode)
569
569
570 @contextlib.contextmanager
570 @contextlib.contextmanager
571 def _datareadfp(self, existingfp=None):
571 def _datareadfp(self, existingfp=None):
572 """file object suitable to read data"""
572 """file object suitable to read data"""
573 # Use explicit file handle, if given.
573 # Use explicit file handle, if given.
574 if existingfp is not None:
574 if existingfp is not None:
575 yield existingfp
575 yield existingfp
576
576
577 # Use a file handle being actively used for writes, if available.
577 # Use a file handle being actively used for writes, if available.
578 # There is some danger to doing this because reads will seek the
578 # There is some danger to doing this because reads will seek the
579 # file. However, _writeentry() performs a SEEK_END before all writes,
579 # file. However, _writeentry() performs a SEEK_END before all writes,
580 # so we should be safe.
580 # so we should be safe.
581 elif self._writinghandles:
581 elif self._writinghandles:
582 if self._inline:
582 if self._inline:
583 yield self._writinghandles[0]
583 yield self._writinghandles[0]
584 else:
584 else:
585 yield self._writinghandles[1]
585 yield self._writinghandles[1]
586
586
587 # Otherwise open a new file handle.
587 # Otherwise open a new file handle.
588 else:
588 else:
589 if self._inline:
589 if self._inline:
590 func = self._indexfp
590 func = self._indexfp
591 else:
591 else:
592 func = self._datafp
592 func = self._datafp
593 with func() as fp:
593 with func() as fp:
594 yield fp
594 yield fp
595
595
596 def tip(self):
596 def tip(self):
597 return self.node(len(self.index) - 1)
597 return self.node(len(self.index) - 1)
598
598
599 def __contains__(self, rev):
599 def __contains__(self, rev):
600 return 0 <= rev < len(self)
600 return 0 <= rev < len(self)
601
601
602 def __len__(self):
602 def __len__(self):
603 return len(self.index)
603 return len(self.index)
604
604
605 def __iter__(self):
605 def __iter__(self):
606 return iter(pycompat.xrange(len(self)))
606 return iter(pycompat.xrange(len(self)))
607
607
608 def revs(self, start=0, stop=None):
608 def revs(self, start=0, stop=None):
609 """iterate over all rev in this revlog (from start to stop)"""
609 """iterate over all rev in this revlog (from start to stop)"""
610 return storageutil.iterrevs(len(self), start=start, stop=stop)
610 return storageutil.iterrevs(len(self), start=start, stop=stop)
611
611
612 @util.propertycache
612 @util.propertycache
613 def nodemap(self):
613 def nodemap(self):
614 if self.index:
614 if self.index:
615 # populate mapping down to the initial node
615 # populate mapping down to the initial node
616 node0 = self.index[0][7] # get around changelog filtering
616 node0 = self.index[0][7] # get around changelog filtering
617 self.rev(node0)
617 self.rev(node0)
618 return self._nodecache
618 return self._nodecache
619
619
620 def hasnode(self, node):
620 def hasnode(self, node):
621 try:
621 try:
622 self.rev(node)
622 self.rev(node)
623 return True
623 return True
624 except KeyError:
624 except KeyError:
625 return False
625 return False
626
626
627 def candelta(self, baserev, rev):
627 def candelta(self, baserev, rev):
628 """whether two revisions (baserev, rev) can be delta-ed or not"""
628 """whether two revisions (baserev, rev) can be delta-ed or not"""
629 # Disable delta if either rev requires a content-changing flag
629 # Disable delta if either rev requires a content-changing flag
630 # processor (ex. LFS). This is because such flag processor can alter
630 # processor (ex. LFS). This is because such flag processor can alter
631 # the rawtext content that the delta will be based on, and two clients
631 # the rawtext content that the delta will be based on, and two clients
632 # could have a same revlog node with different flags (i.e. different
632 # could have a same revlog node with different flags (i.e. different
633 # rawtext contents) and the delta could be incompatible.
633 # rawtext contents) and the delta could be incompatible.
634 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
634 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
635 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
635 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
636 ):
636 ):
637 return False
637 return False
638 return True
638 return True
639
639
640 def clearcaches(self):
640 def clearcaches(self):
641 self._revisioncache = None
641 self._revisioncache = None
642 self._chainbasecache.clear()
642 self._chainbasecache.clear()
643 self._chunkcache = (0, b'')
643 self._chunkcache = (0, b'')
644 self._pcache = {}
644 self._pcache = {}
645
645
646 try:
646 try:
647 # If we are using the native C version, you are in a fun case
647 # If we are using the native C version, you are in a fun case
648 # where self.index, self.nodemap and self._nodecaches is the same
648 # where self.index, self.nodemap and self._nodecaches is the same
649 # object.
649 # object.
650 self._nodecache.clearcaches()
650 self._nodecache.clearcaches()
651 except AttributeError:
651 except AttributeError:
652 self._nodecache = {nullid: nullrev}
652 self._nodecache = {nullid: nullrev}
653 self._nodepos = None
653 self._nodepos = None
654
654
655 def rev(self, node):
655 def rev(self, node):
656 try:
656 try:
657 return self._nodecache[node]
657 return self._nodecache[node]
658 except TypeError:
658 except TypeError:
659 raise
659 raise
660 except error.RevlogError:
660 except error.RevlogError:
661 # parsers.c radix tree lookup failed
661 # parsers.c radix tree lookup failed
662 if node == wdirid or node in wdirfilenodeids:
662 if node == wdirid or node in wdirfilenodeids:
663 raise error.WdirUnsupported
663 raise error.WdirUnsupported
664 raise error.LookupError(node, self.indexfile, _(b'no node'))
664 raise error.LookupError(node, self.indexfile, _(b'no node'))
665 except KeyError:
665 except KeyError:
666 # pure python cache lookup failed
666 # pure python cache lookup failed
667 n = self._nodecache
667 n = self._nodecache
668 i = self.index
668 i = self.index
669 p = self._nodepos
669 p = self._nodepos
670 if p is None:
670 if p is None:
671 p = len(i) - 1
671 p = len(i) - 1
672 else:
672 else:
673 assert p < len(i)
673 assert p < len(i)
674 for r in pycompat.xrange(p, -1, -1):
674 for r in pycompat.xrange(p, -1, -1):
675 v = i[r][7]
675 v = i[r][7]
676 n[v] = r
676 n[v] = r
677 if v == node:
677 if v == node:
678 self._nodepos = r - 1
678 self._nodepos = r - 1
679 return r
679 return r
680 if node == wdirid or node in wdirfilenodeids:
680 if node == wdirid or node in wdirfilenodeids:
681 raise error.WdirUnsupported
681 raise error.WdirUnsupported
682 raise error.LookupError(node, self.indexfile, _(b'no node'))
682 raise error.LookupError(node, self.indexfile, _(b'no node'))
683
683
684 # Accessors for index entries.
684 # Accessors for index entries.
685
685
686 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
686 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
687 # are flags.
687 # are flags.
688 def start(self, rev):
688 def start(self, rev):
689 return int(self.index[rev][0] >> 16)
689 return int(self.index[rev][0] >> 16)
690
690
691 def flags(self, rev):
691 def flags(self, rev):
692 return self.index[rev][0] & 0xFFFF
692 return self.index[rev][0] & 0xFFFF
693
693
694 def length(self, rev):
694 def length(self, rev):
695 return self.index[rev][1]
695 return self.index[rev][1]
696
696
697 def rawsize(self, rev):
697 def rawsize(self, rev):
698 """return the length of the uncompressed text for a given revision"""
698 """return the length of the uncompressed text for a given revision"""
699 l = self.index[rev][2]
699 l = self.index[rev][2]
700 if l >= 0:
700 if l >= 0:
701 return l
701 return l
702
702
703 t = self.rawdata(rev)
703 t = self.rawdata(rev)
704 return len(t)
704 return len(t)
705
705
706 def size(self, rev):
706 def size(self, rev):
707 """length of non-raw text (processed by a "read" flag processor)"""
707 """length of non-raw text (processed by a "read" flag processor)"""
708 # fast path: if no "read" flag processor could change the content,
708 # fast path: if no "read" flag processor could change the content,
709 # size is rawsize. note: ELLIPSIS is known to not change the content.
709 # size is rawsize. note: ELLIPSIS is known to not change the content.
710 flags = self.flags(rev)
710 flags = self.flags(rev)
711 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
711 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
712 return self.rawsize(rev)
712 return self.rawsize(rev)
713
713
714 return len(self.revision(rev, raw=False))
714 return len(self.revision(rev, raw=False))
715
715
716 def chainbase(self, rev):
716 def chainbase(self, rev):
717 base = self._chainbasecache.get(rev)
717 base = self._chainbasecache.get(rev)
718 if base is not None:
718 if base is not None:
719 return base
719 return base
720
720
721 index = self.index
721 index = self.index
722 iterrev = rev
722 iterrev = rev
723 base = index[iterrev][3]
723 base = index[iterrev][3]
724 while base != iterrev:
724 while base != iterrev:
725 iterrev = base
725 iterrev = base
726 base = index[iterrev][3]
726 base = index[iterrev][3]
727
727
728 self._chainbasecache[rev] = base
728 self._chainbasecache[rev] = base
729 return base
729 return base
730
730
731 def linkrev(self, rev):
731 def linkrev(self, rev):
732 return self.index[rev][4]
732 return self.index[rev][4]
733
733
734 def parentrevs(self, rev):
734 def parentrevs(self, rev):
735 try:
735 try:
736 entry = self.index[rev]
736 entry = self.index[rev]
737 except IndexError:
737 except IndexError:
738 if rev == wdirrev:
738 if rev == wdirrev:
739 raise error.WdirUnsupported
739 raise error.WdirUnsupported
740 raise
740 raise
741
741
742 return entry[5], entry[6]
742 return entry[5], entry[6]
743
743
744 # fast parentrevs(rev) where rev isn't filtered
744 # fast parentrevs(rev) where rev isn't filtered
745 _uncheckedparentrevs = parentrevs
745 _uncheckedparentrevs = parentrevs
746
746
747 def node(self, rev):
747 def node(self, rev):
748 try:
748 try:
749 return self.index[rev][7]
749 return self.index[rev][7]
750 except IndexError:
750 except IndexError:
751 if rev == wdirrev:
751 if rev == wdirrev:
752 raise error.WdirUnsupported
752 raise error.WdirUnsupported
753 raise
753 raise
754
754
755 # Derived from index values.
755 # Derived from index values.
756
756
757 def end(self, rev):
757 def end(self, rev):
758 return self.start(rev) + self.length(rev)
758 return self.start(rev) + self.length(rev)
759
759
760 def parents(self, node):
760 def parents(self, node):
761 i = self.index
761 i = self.index
762 d = i[self.rev(node)]
762 d = i[self.rev(node)]
763 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
763 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
764
764
765 def chainlen(self, rev):
765 def chainlen(self, rev):
766 return self._chaininfo(rev)[0]
766 return self._chaininfo(rev)[0]
767
767
768 def _chaininfo(self, rev):
768 def _chaininfo(self, rev):
769 chaininfocache = self._chaininfocache
769 chaininfocache = self._chaininfocache
770 if rev in chaininfocache:
770 if rev in chaininfocache:
771 return chaininfocache[rev]
771 return chaininfocache[rev]
772 index = self.index
772 index = self.index
773 generaldelta = self._generaldelta
773 generaldelta = self._generaldelta
774 iterrev = rev
774 iterrev = rev
775 e = index[iterrev]
775 e = index[iterrev]
776 clen = 0
776 clen = 0
777 compresseddeltalen = 0
777 compresseddeltalen = 0
778 while iterrev != e[3]:
778 while iterrev != e[3]:
779 clen += 1
779 clen += 1
780 compresseddeltalen += e[1]
780 compresseddeltalen += e[1]
781 if generaldelta:
781 if generaldelta:
782 iterrev = e[3]
782 iterrev = e[3]
783 else:
783 else:
784 iterrev -= 1
784 iterrev -= 1
785 if iterrev in chaininfocache:
785 if iterrev in chaininfocache:
786 t = chaininfocache[iterrev]
786 t = chaininfocache[iterrev]
787 clen += t[0]
787 clen += t[0]
788 compresseddeltalen += t[1]
788 compresseddeltalen += t[1]
789 break
789 break
790 e = index[iterrev]
790 e = index[iterrev]
791 else:
791 else:
792 # Add text length of base since decompressing that also takes
792 # Add text length of base since decompressing that also takes
793 # work. For cache hits the length is already included.
793 # work. For cache hits the length is already included.
794 compresseddeltalen += e[1]
794 compresseddeltalen += e[1]
795 r = (clen, compresseddeltalen)
795 r = (clen, compresseddeltalen)
796 chaininfocache[rev] = r
796 chaininfocache[rev] = r
797 return r
797 return r
798
798
799 def _deltachain(self, rev, stoprev=None):
799 def _deltachain(self, rev, stoprev=None):
800 """Obtain the delta chain for a revision.
800 """Obtain the delta chain for a revision.
801
801
802 ``stoprev`` specifies a revision to stop at. If not specified, we
802 ``stoprev`` specifies a revision to stop at. If not specified, we
803 stop at the base of the chain.
803 stop at the base of the chain.
804
804
805 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
805 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
806 revs in ascending order and ``stopped`` is a bool indicating whether
806 revs in ascending order and ``stopped`` is a bool indicating whether
807 ``stoprev`` was hit.
807 ``stoprev`` was hit.
808 """
808 """
809 # Try C implementation.
809 # Try C implementation.
810 try:
810 try:
811 return self.index.deltachain(rev, stoprev, self._generaldelta)
811 return self.index.deltachain(rev, stoprev, self._generaldelta)
812 except AttributeError:
812 except AttributeError:
813 pass
813 pass
814
814
815 chain = []
815 chain = []
816
816
817 # Alias to prevent attribute lookup in tight loop.
817 # Alias to prevent attribute lookup in tight loop.
818 index = self.index
818 index = self.index
819 generaldelta = self._generaldelta
819 generaldelta = self._generaldelta
820
820
821 iterrev = rev
821 iterrev = rev
822 e = index[iterrev]
822 e = index[iterrev]
823 while iterrev != e[3] and iterrev != stoprev:
823 while iterrev != e[3] and iterrev != stoprev:
824 chain.append(iterrev)
824 chain.append(iterrev)
825 if generaldelta:
825 if generaldelta:
826 iterrev = e[3]
826 iterrev = e[3]
827 else:
827 else:
828 iterrev -= 1
828 iterrev -= 1
829 e = index[iterrev]
829 e = index[iterrev]
830
830
831 if iterrev == stoprev:
831 if iterrev == stoprev:
832 stopped = True
832 stopped = True
833 else:
833 else:
834 chain.append(iterrev)
834 chain.append(iterrev)
835 stopped = False
835 stopped = False
836
836
837 chain.reverse()
837 chain.reverse()
838 return chain, stopped
838 return chain, stopped
839
839
840 def ancestors(self, revs, stoprev=0, inclusive=False):
840 def ancestors(self, revs, stoprev=0, inclusive=False):
841 """Generate the ancestors of 'revs' in reverse revision order.
841 """Generate the ancestors of 'revs' in reverse revision order.
842 Does not generate revs lower than stoprev.
842 Does not generate revs lower than stoprev.
843
843
844 See the documentation for ancestor.lazyancestors for more details."""
844 See the documentation for ancestor.lazyancestors for more details."""
845
845
846 # first, make sure start revisions aren't filtered
846 # first, make sure start revisions aren't filtered
847 revs = list(revs)
847 revs = list(revs)
848 checkrev = self.node
848 checkrev = self.node
849 for r in revs:
849 for r in revs:
850 checkrev(r)
850 checkrev(r)
851 # and we're sure ancestors aren't filtered as well
851 # and we're sure ancestors aren't filtered as well
852
852
853 if rustancestor is not None:
853 if rustancestor is not None:
854 lazyancestors = rustancestor.LazyAncestors
854 lazyancestors = rustancestor.LazyAncestors
855 arg = self.index
855 arg = self.index
856 elif util.safehasattr(parsers, b'rustlazyancestors'):
856 elif util.safehasattr(parsers, b'rustlazyancestors'):
857 lazyancestors = ancestor.rustlazyancestors
857 lazyancestors = ancestor.rustlazyancestors
858 arg = self.index
858 arg = self.index
859 else:
859 else:
860 lazyancestors = ancestor.lazyancestors
860 lazyancestors = ancestor.lazyancestors
861 arg = self._uncheckedparentrevs
861 arg = self._uncheckedparentrevs
862 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
862 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
863
863
864 def descendants(self, revs):
864 def descendants(self, revs):
865 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
865 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
866
866
867 def findcommonmissing(self, common=None, heads=None):
867 def findcommonmissing(self, common=None, heads=None):
868 """Return a tuple of the ancestors of common and the ancestors of heads
868 """Return a tuple of the ancestors of common and the ancestors of heads
869 that are not ancestors of common. In revset terminology, we return the
869 that are not ancestors of common. In revset terminology, we return the
870 tuple:
870 tuple:
871
871
872 ::common, (::heads) - (::common)
872 ::common, (::heads) - (::common)
873
873
874 The list is sorted by revision number, meaning it is
874 The list is sorted by revision number, meaning it is
875 topologically sorted.
875 topologically sorted.
876
876
877 'heads' and 'common' are both lists of node IDs. If heads is
877 'heads' and 'common' are both lists of node IDs. If heads is
878 not supplied, uses all of the revlog's heads. If common is not
878 not supplied, uses all of the revlog's heads. If common is not
879 supplied, uses nullid."""
879 supplied, uses nullid."""
880 if common is None:
880 if common is None:
881 common = [nullid]
881 common = [nullid]
882 if heads is None:
882 if heads is None:
883 heads = self.heads()
883 heads = self.heads()
884
884
885 common = [self.rev(n) for n in common]
885 common = [self.rev(n) for n in common]
886 heads = [self.rev(n) for n in heads]
886 heads = [self.rev(n) for n in heads]
887
887
888 # we want the ancestors, but inclusive
888 # we want the ancestors, but inclusive
889 class lazyset(object):
889 class lazyset(object):
890 def __init__(self, lazyvalues):
890 def __init__(self, lazyvalues):
891 self.addedvalues = set()
891 self.addedvalues = set()
892 self.lazyvalues = lazyvalues
892 self.lazyvalues = lazyvalues
893
893
894 def __contains__(self, value):
894 def __contains__(self, value):
895 return value in self.addedvalues or value in self.lazyvalues
895 return value in self.addedvalues or value in self.lazyvalues
896
896
897 def __iter__(self):
897 def __iter__(self):
898 added = self.addedvalues
898 added = self.addedvalues
899 for r in added:
899 for r in added:
900 yield r
900 yield r
901 for r in self.lazyvalues:
901 for r in self.lazyvalues:
902 if not r in added:
902 if not r in added:
903 yield r
903 yield r
904
904
905 def add(self, value):
905 def add(self, value):
906 self.addedvalues.add(value)
906 self.addedvalues.add(value)
907
907
908 def update(self, values):
908 def update(self, values):
909 self.addedvalues.update(values)
909 self.addedvalues.update(values)
910
910
911 has = lazyset(self.ancestors(common))
911 has = lazyset(self.ancestors(common))
912 has.add(nullrev)
912 has.add(nullrev)
913 has.update(common)
913 has.update(common)
914
914
915 # take all ancestors from heads that aren't in has
915 # take all ancestors from heads that aren't in has
916 missing = set()
916 missing = set()
917 visit = collections.deque(r for r in heads if r not in has)
917 visit = collections.deque(r for r in heads if r not in has)
918 while visit:
918 while visit:
919 r = visit.popleft()
919 r = visit.popleft()
920 if r in missing:
920 if r in missing:
921 continue
921 continue
922 else:
922 else:
923 missing.add(r)
923 missing.add(r)
924 for p in self.parentrevs(r):
924 for p in self.parentrevs(r):
925 if p not in has:
925 if p not in has:
926 visit.append(p)
926 visit.append(p)
927 missing = list(missing)
927 missing = list(missing)
928 missing.sort()
928 missing.sort()
929 return has, [self.node(miss) for miss in missing]
929 return has, [self.node(miss) for miss in missing]
930
930
931 def incrementalmissingrevs(self, common=None):
931 def incrementalmissingrevs(self, common=None):
932 """Return an object that can be used to incrementally compute the
932 """Return an object that can be used to incrementally compute the
933 revision numbers of the ancestors of arbitrary sets that are not
933 revision numbers of the ancestors of arbitrary sets that are not
934 ancestors of common. This is an ancestor.incrementalmissingancestors
934 ancestors of common. This is an ancestor.incrementalmissingancestors
935 object.
935 object.
936
936
937 'common' is a list of revision numbers. If common is not supplied, uses
937 'common' is a list of revision numbers. If common is not supplied, uses
938 nullrev.
938 nullrev.
939 """
939 """
940 if common is None:
940 if common is None:
941 common = [nullrev]
941 common = [nullrev]
942
942
943 if rustancestor is not None:
943 if rustancestor is not None:
944 return rustancestor.MissingAncestors(self.index, common)
944 return rustancestor.MissingAncestors(self.index, common)
945 return ancestor.incrementalmissingancestors(self.parentrevs, common)
945 return ancestor.incrementalmissingancestors(self.parentrevs, common)
946
946
947 def findmissingrevs(self, common=None, heads=None):
947 def findmissingrevs(self, common=None, heads=None):
948 """Return the revision numbers of the ancestors of heads that
948 """Return the revision numbers of the ancestors of heads that
949 are not ancestors of common.
949 are not ancestors of common.
950
950
951 More specifically, return a list of revision numbers corresponding to
951 More specifically, return a list of revision numbers corresponding to
952 nodes N such that every N satisfies the following constraints:
952 nodes N such that every N satisfies the following constraints:
953
953
954 1. N is an ancestor of some node in 'heads'
954 1. N is an ancestor of some node in 'heads'
955 2. N is not an ancestor of any node in 'common'
955 2. N is not an ancestor of any node in 'common'
956
956
957 The list is sorted by revision number, meaning it is
957 The list is sorted by revision number, meaning it is
958 topologically sorted.
958 topologically sorted.
959
959
960 'heads' and 'common' are both lists of revision numbers. If heads is
960 'heads' and 'common' are both lists of revision numbers. If heads is
961 not supplied, uses all of the revlog's heads. If common is not
961 not supplied, uses all of the revlog's heads. If common is not
962 supplied, uses nullid."""
962 supplied, uses nullid."""
963 if common is None:
963 if common is None:
964 common = [nullrev]
964 common = [nullrev]
965 if heads is None:
965 if heads is None:
966 heads = self.headrevs()
966 heads = self.headrevs()
967
967
968 inc = self.incrementalmissingrevs(common=common)
968 inc = self.incrementalmissingrevs(common=common)
969 return inc.missingancestors(heads)
969 return inc.missingancestors(heads)
970
970
971 def findmissing(self, common=None, heads=None):
971 def findmissing(self, common=None, heads=None):
972 """Return the ancestors of heads that are not ancestors of common.
972 """Return the ancestors of heads that are not ancestors of common.
973
973
974 More specifically, return a list of nodes N such that every N
974 More specifically, return a list of nodes N such that every N
975 satisfies the following constraints:
975 satisfies the following constraints:
976
976
977 1. N is an ancestor of some node in 'heads'
977 1. N is an ancestor of some node in 'heads'
978 2. N is not an ancestor of any node in 'common'
978 2. N is not an ancestor of any node in 'common'
979
979
980 The list is sorted by revision number, meaning it is
980 The list is sorted by revision number, meaning it is
981 topologically sorted.
981 topologically sorted.
982
982
983 'heads' and 'common' are both lists of node IDs. If heads is
983 'heads' and 'common' are both lists of node IDs. If heads is
984 not supplied, uses all of the revlog's heads. If common is not
984 not supplied, uses all of the revlog's heads. If common is not
985 supplied, uses nullid."""
985 supplied, uses nullid."""
986 if common is None:
986 if common is None:
987 common = [nullid]
987 common = [nullid]
988 if heads is None:
988 if heads is None:
989 heads = self.heads()
989 heads = self.heads()
990
990
991 common = [self.rev(n) for n in common]
991 common = [self.rev(n) for n in common]
992 heads = [self.rev(n) for n in heads]
992 heads = [self.rev(n) for n in heads]
993
993
994 inc = self.incrementalmissingrevs(common=common)
994 inc = self.incrementalmissingrevs(common=common)
995 return [self.node(r) for r in inc.missingancestors(heads)]
995 return [self.node(r) for r in inc.missingancestors(heads)]
996
996
997 def nodesbetween(self, roots=None, heads=None):
997 def nodesbetween(self, roots=None, heads=None):
998 """Return a topological path from 'roots' to 'heads'.
998 """Return a topological path from 'roots' to 'heads'.
999
999
1000 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1000 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1001 topologically sorted list of all nodes N that satisfy both of
1001 topologically sorted list of all nodes N that satisfy both of
1002 these constraints:
1002 these constraints:
1003
1003
1004 1. N is a descendant of some node in 'roots'
1004 1. N is a descendant of some node in 'roots'
1005 2. N is an ancestor of some node in 'heads'
1005 2. N is an ancestor of some node in 'heads'
1006
1006
1007 Every node is considered to be both a descendant and an ancestor
1007 Every node is considered to be both a descendant and an ancestor
1008 of itself, so every reachable node in 'roots' and 'heads' will be
1008 of itself, so every reachable node in 'roots' and 'heads' will be
1009 included in 'nodes'.
1009 included in 'nodes'.
1010
1010
1011 'outroots' is the list of reachable nodes in 'roots', i.e., the
1011 'outroots' is the list of reachable nodes in 'roots', i.e., the
1012 subset of 'roots' that is returned in 'nodes'. Likewise,
1012 subset of 'roots' that is returned in 'nodes'. Likewise,
1013 'outheads' is the subset of 'heads' that is also in 'nodes'.
1013 'outheads' is the subset of 'heads' that is also in 'nodes'.
1014
1014
1015 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1015 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1016 unspecified, uses nullid as the only root. If 'heads' is
1016 unspecified, uses nullid as the only root. If 'heads' is
1017 unspecified, uses list of all of the revlog's heads."""
1017 unspecified, uses list of all of the revlog's heads."""
1018 nonodes = ([], [], [])
1018 nonodes = ([], [], [])
1019 if roots is not None:
1019 if roots is not None:
1020 roots = list(roots)
1020 roots = list(roots)
1021 if not roots:
1021 if not roots:
1022 return nonodes
1022 return nonodes
1023 lowestrev = min([self.rev(n) for n in roots])
1023 lowestrev = min([self.rev(n) for n in roots])
1024 else:
1024 else:
1025 roots = [nullid] # Everybody's a descendant of nullid
1025 roots = [nullid] # Everybody's a descendant of nullid
1026 lowestrev = nullrev
1026 lowestrev = nullrev
1027 if (lowestrev == nullrev) and (heads is None):
1027 if (lowestrev == nullrev) and (heads is None):
1028 # We want _all_ the nodes!
1028 # We want _all_ the nodes!
1029 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1029 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1030 if heads is None:
1030 if heads is None:
1031 # All nodes are ancestors, so the latest ancestor is the last
1031 # All nodes are ancestors, so the latest ancestor is the last
1032 # node.
1032 # node.
1033 highestrev = len(self) - 1
1033 highestrev = len(self) - 1
1034 # Set ancestors to None to signal that every node is an ancestor.
1034 # Set ancestors to None to signal that every node is an ancestor.
1035 ancestors = None
1035 ancestors = None
1036 # Set heads to an empty dictionary for later discovery of heads
1036 # Set heads to an empty dictionary for later discovery of heads
1037 heads = {}
1037 heads = {}
1038 else:
1038 else:
1039 heads = list(heads)
1039 heads = list(heads)
1040 if not heads:
1040 if not heads:
1041 return nonodes
1041 return nonodes
1042 ancestors = set()
1042 ancestors = set()
1043 # Turn heads into a dictionary so we can remove 'fake' heads.
1043 # Turn heads into a dictionary so we can remove 'fake' heads.
1044 # Also, later we will be using it to filter out the heads we can't
1044 # Also, later we will be using it to filter out the heads we can't
1045 # find from roots.
1045 # find from roots.
1046 heads = dict.fromkeys(heads, False)
1046 heads = dict.fromkeys(heads, False)
1047 # Start at the top and keep marking parents until we're done.
1047 # Start at the top and keep marking parents until we're done.
1048 nodestotag = set(heads)
1048 nodestotag = set(heads)
1049 # Remember where the top was so we can use it as a limit later.
1049 # Remember where the top was so we can use it as a limit later.
1050 highestrev = max([self.rev(n) for n in nodestotag])
1050 highestrev = max([self.rev(n) for n in nodestotag])
1051 while nodestotag:
1051 while nodestotag:
1052 # grab a node to tag
1052 # grab a node to tag
1053 n = nodestotag.pop()
1053 n = nodestotag.pop()
1054 # Never tag nullid
1054 # Never tag nullid
1055 if n == nullid:
1055 if n == nullid:
1056 continue
1056 continue
1057 # A node's revision number represents its place in a
1057 # A node's revision number represents its place in a
1058 # topologically sorted list of nodes.
1058 # topologically sorted list of nodes.
1059 r = self.rev(n)
1059 r = self.rev(n)
1060 if r >= lowestrev:
1060 if r >= lowestrev:
1061 if n not in ancestors:
1061 if n not in ancestors:
1062 # If we are possibly a descendant of one of the roots
1062 # If we are possibly a descendant of one of the roots
1063 # and we haven't already been marked as an ancestor
1063 # and we haven't already been marked as an ancestor
1064 ancestors.add(n) # Mark as ancestor
1064 ancestors.add(n) # Mark as ancestor
1065 # Add non-nullid parents to list of nodes to tag.
1065 # Add non-nullid parents to list of nodes to tag.
1066 nodestotag.update(
1066 nodestotag.update(
1067 [p for p in self.parents(n) if p != nullid]
1067 [p for p in self.parents(n) if p != nullid]
1068 )
1068 )
1069 elif n in heads: # We've seen it before, is it a fake head?
1069 elif n in heads: # We've seen it before, is it a fake head?
1070 # So it is, real heads should not be the ancestors of
1070 # So it is, real heads should not be the ancestors of
1071 # any other heads.
1071 # any other heads.
1072 heads.pop(n)
1072 heads.pop(n)
1073 if not ancestors:
1073 if not ancestors:
1074 return nonodes
1074 return nonodes
1075 # Now that we have our set of ancestors, we want to remove any
1075 # Now that we have our set of ancestors, we want to remove any
1076 # roots that are not ancestors.
1076 # roots that are not ancestors.
1077
1077
1078 # If one of the roots was nullid, everything is included anyway.
1078 # If one of the roots was nullid, everything is included anyway.
1079 if lowestrev > nullrev:
1079 if lowestrev > nullrev:
1080 # But, since we weren't, let's recompute the lowest rev to not
1080 # But, since we weren't, let's recompute the lowest rev to not
1081 # include roots that aren't ancestors.
1081 # include roots that aren't ancestors.
1082
1082
1083 # Filter out roots that aren't ancestors of heads
1083 # Filter out roots that aren't ancestors of heads
1084 roots = [root for root in roots if root in ancestors]
1084 roots = [root for root in roots if root in ancestors]
1085 # Recompute the lowest revision
1085 # Recompute the lowest revision
1086 if roots:
1086 if roots:
1087 lowestrev = min([self.rev(root) for root in roots])
1087 lowestrev = min([self.rev(root) for root in roots])
1088 else:
1088 else:
1089 # No more roots? Return empty list
1089 # No more roots? Return empty list
1090 return nonodes
1090 return nonodes
1091 else:
1091 else:
1092 # We are descending from nullid, and don't need to care about
1092 # We are descending from nullid, and don't need to care about
1093 # any other roots.
1093 # any other roots.
1094 lowestrev = nullrev
1094 lowestrev = nullrev
1095 roots = [nullid]
1095 roots = [nullid]
1096 # Transform our roots list into a set.
1096 # Transform our roots list into a set.
1097 descendants = set(roots)
1097 descendants = set(roots)
1098 # Also, keep the original roots so we can filter out roots that aren't
1098 # Also, keep the original roots so we can filter out roots that aren't
1099 # 'real' roots (i.e. are descended from other roots).
1099 # 'real' roots (i.e. are descended from other roots).
1100 roots = descendants.copy()
1100 roots = descendants.copy()
1101 # Our topologically sorted list of output nodes.
1101 # Our topologically sorted list of output nodes.
1102 orderedout = []
1102 orderedout = []
1103 # Don't start at nullid since we don't want nullid in our output list,
1103 # Don't start at nullid since we don't want nullid in our output list,
1104 # and if nullid shows up in descendants, empty parents will look like
1104 # and if nullid shows up in descendants, empty parents will look like
1105 # they're descendants.
1105 # they're descendants.
1106 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1106 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1107 n = self.node(r)
1107 n = self.node(r)
1108 isdescendant = False
1108 isdescendant = False
1109 if lowestrev == nullrev: # Everybody is a descendant of nullid
1109 if lowestrev == nullrev: # Everybody is a descendant of nullid
1110 isdescendant = True
1110 isdescendant = True
1111 elif n in descendants:
1111 elif n in descendants:
1112 # n is already a descendant
1112 # n is already a descendant
1113 isdescendant = True
1113 isdescendant = True
1114 # This check only needs to be done here because all the roots
1114 # This check only needs to be done here because all the roots
1115 # will start being marked is descendants before the loop.
1115 # will start being marked is descendants before the loop.
1116 if n in roots:
1116 if n in roots:
1117 # If n was a root, check if it's a 'real' root.
1117 # If n was a root, check if it's a 'real' root.
1118 p = tuple(self.parents(n))
1118 p = tuple(self.parents(n))
1119 # If any of its parents are descendants, it's not a root.
1119 # If any of its parents are descendants, it's not a root.
1120 if (p[0] in descendants) or (p[1] in descendants):
1120 if (p[0] in descendants) or (p[1] in descendants):
1121 roots.remove(n)
1121 roots.remove(n)
1122 else:
1122 else:
1123 p = tuple(self.parents(n))
1123 p = tuple(self.parents(n))
1124 # A node is a descendant if either of its parents are
1124 # A node is a descendant if either of its parents are
1125 # descendants. (We seeded the dependents list with the roots
1125 # descendants. (We seeded the dependents list with the roots
1126 # up there, remember?)
1126 # up there, remember?)
1127 if (p[0] in descendants) or (p[1] in descendants):
1127 if (p[0] in descendants) or (p[1] in descendants):
1128 descendants.add(n)
1128 descendants.add(n)
1129 isdescendant = True
1129 isdescendant = True
1130 if isdescendant and ((ancestors is None) or (n in ancestors)):
1130 if isdescendant and ((ancestors is None) or (n in ancestors)):
1131 # Only include nodes that are both descendants and ancestors.
1131 # Only include nodes that are both descendants and ancestors.
1132 orderedout.append(n)
1132 orderedout.append(n)
1133 if (ancestors is not None) and (n in heads):
1133 if (ancestors is not None) and (n in heads):
1134 # We're trying to figure out which heads are reachable
1134 # We're trying to figure out which heads are reachable
1135 # from roots.
1135 # from roots.
1136 # Mark this head as having been reached
1136 # Mark this head as having been reached
1137 heads[n] = True
1137 heads[n] = True
1138 elif ancestors is None:
1138 elif ancestors is None:
1139 # Otherwise, we're trying to discover the heads.
1139 # Otherwise, we're trying to discover the heads.
1140 # Assume this is a head because if it isn't, the next step
1140 # Assume this is a head because if it isn't, the next step
1141 # will eventually remove it.
1141 # will eventually remove it.
1142 heads[n] = True
1142 heads[n] = True
1143 # But, obviously its parents aren't.
1143 # But, obviously its parents aren't.
1144 for p in self.parents(n):
1144 for p in self.parents(n):
1145 heads.pop(p, None)
1145 heads.pop(p, None)
1146 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1146 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1147 roots = list(roots)
1147 roots = list(roots)
1148 assert orderedout
1148 assert orderedout
1149 assert roots
1149 assert roots
1150 assert heads
1150 assert heads
1151 return (orderedout, roots, heads)
1151 return (orderedout, roots, heads)
1152
1152
1153 def headrevs(self, revs=None):
1153 def headrevs(self, revs=None):
1154 if revs is None:
1154 if revs is None:
1155 try:
1155 try:
1156 return self.index.headrevs()
1156 return self.index.headrevs()
1157 except AttributeError:
1157 except AttributeError:
1158 return self._headrevs()
1158 return self._headrevs()
1159 if rustdagop is not None:
1159 if rustdagop is not None:
1160 return rustdagop.headrevs(self.index, revs)
1160 return rustdagop.headrevs(self.index, revs)
1161 return dagop.headrevs(revs, self._uncheckedparentrevs)
1161 return dagop.headrevs(revs, self._uncheckedparentrevs)
1162
1162
1163 def computephases(self, roots):
1163 def computephases(self, roots):
1164 return self.index.computephasesmapsets(roots)
1164 return self.index.computephasesmapsets(roots)
1165
1165
1166 def _headrevs(self):
1166 def _headrevs(self):
1167 count = len(self)
1167 count = len(self)
1168 if not count:
1168 if not count:
1169 return [nullrev]
1169 return [nullrev]
1170 # we won't iter over filtered rev so nobody is a head at start
1170 # we won't iter over filtered rev so nobody is a head at start
1171 ishead = [0] * (count + 1)
1171 ishead = [0] * (count + 1)
1172 index = self.index
1172 index = self.index
1173 for r in self:
1173 for r in self:
1174 ishead[r] = 1 # I may be an head
1174 ishead[r] = 1 # I may be an head
1175 e = index[r]
1175 e = index[r]
1176 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1176 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1177 return [r for r, val in enumerate(ishead) if val]
1177 return [r for r, val in enumerate(ishead) if val]
1178
1178
1179 def heads(self, start=None, stop=None):
1179 def heads(self, start=None, stop=None):
1180 """return the list of all nodes that have no children
1180 """return the list of all nodes that have no children
1181
1181
1182 if start is specified, only heads that are descendants of
1182 if start is specified, only heads that are descendants of
1183 start will be returned
1183 start will be returned
1184 if stop is specified, it will consider all the revs from stop
1184 if stop is specified, it will consider all the revs from stop
1185 as if they had no children
1185 as if they had no children
1186 """
1186 """
1187 if start is None and stop is None:
1187 if start is None and stop is None:
1188 if not len(self):
1188 if not len(self):
1189 return [nullid]
1189 return [nullid]
1190 return [self.node(r) for r in self.headrevs()]
1190 return [self.node(r) for r in self.headrevs()]
1191
1191
1192 if start is None:
1192 if start is None:
1193 start = nullrev
1193 start = nullrev
1194 else:
1194 else:
1195 start = self.rev(start)
1195 start = self.rev(start)
1196
1196
1197 stoprevs = set(self.rev(n) for n in stop or [])
1197 stoprevs = set(self.rev(n) for n in stop or [])
1198
1198
1199 revs = dagop.headrevssubset(
1199 revs = dagop.headrevssubset(
1200 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1200 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1201 )
1201 )
1202
1202
1203 return [self.node(rev) for rev in revs]
1203 return [self.node(rev) for rev in revs]
1204
1204
1205 def children(self, node):
1205 def children(self, node):
1206 """find the children of a given node"""
1206 """find the children of a given node"""
1207 c = []
1207 c = []
1208 p = self.rev(node)
1208 p = self.rev(node)
1209 for r in self.revs(start=p + 1):
1209 for r in self.revs(start=p + 1):
1210 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1210 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1211 if prevs:
1211 if prevs:
1212 for pr in prevs:
1212 for pr in prevs:
1213 if pr == p:
1213 if pr == p:
1214 c.append(self.node(r))
1214 c.append(self.node(r))
1215 elif p == nullrev:
1215 elif p == nullrev:
1216 c.append(self.node(r))
1216 c.append(self.node(r))
1217 return c
1217 return c
1218
1218
1219 def commonancestorsheads(self, a, b):
1219 def commonancestorsheads(self, a, b):
1220 """calculate all the heads of the common ancestors of nodes a and b"""
1220 """calculate all the heads of the common ancestors of nodes a and b"""
1221 a, b = self.rev(a), self.rev(b)
1221 a, b = self.rev(a), self.rev(b)
1222 ancs = self._commonancestorsheads(a, b)
1222 ancs = self._commonancestorsheads(a, b)
1223 return pycompat.maplist(self.node, ancs)
1223 return pycompat.maplist(self.node, ancs)
1224
1224
1225 def _commonancestorsheads(self, *revs):
1225 def _commonancestorsheads(self, *revs):
1226 """calculate all the heads of the common ancestors of revs"""
1226 """calculate all the heads of the common ancestors of revs"""
1227 try:
1227 try:
1228 ancs = self.index.commonancestorsheads(*revs)
1228 ancs = self.index.commonancestorsheads(*revs)
1229 except (AttributeError, OverflowError): # C implementation failed
1229 except (AttributeError, OverflowError): # C implementation failed
1230 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1230 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1231 return ancs
1231 return ancs
1232
1232
1233 def isancestor(self, a, b):
1233 def isancestor(self, a, b):
1234 """return True if node a is an ancestor of node b
1234 """return True if node a is an ancestor of node b
1235
1235
1236 A revision is considered an ancestor of itself."""
1236 A revision is considered an ancestor of itself."""
1237 a, b = self.rev(a), self.rev(b)
1237 a, b = self.rev(a), self.rev(b)
1238 return self.isancestorrev(a, b)
1238 return self.isancestorrev(a, b)
1239
1239
1240 def isancestorrev(self, a, b):
1240 def isancestorrev(self, a, b):
1241 """return True if revision a is an ancestor of revision b
1241 """return True if revision a is an ancestor of revision b
1242
1242
1243 A revision is considered an ancestor of itself.
1243 A revision is considered an ancestor of itself.
1244
1244
1245 The implementation of this is trivial but the use of
1245 The implementation of this is trivial but the use of
1246 reachableroots is not."""
1246 reachableroots is not."""
1247 if a == nullrev:
1247 if a == nullrev:
1248 return True
1248 return True
1249 elif a == b:
1249 elif a == b:
1250 return True
1250 return True
1251 elif a > b:
1251 elif a > b:
1252 return False
1252 return False
1253 return bool(self.reachableroots(a, [b], [a], includepath=False))
1253 return bool(self.reachableroots(a, [b], [a], includepath=False))
1254
1254
1255 def reachableroots(self, minroot, heads, roots, includepath=False):
1255 def reachableroots(self, minroot, heads, roots, includepath=False):
1256 """return (heads(::<roots> and <roots>::<heads>))
1256 """return (heads(::<roots> and <roots>::<heads>))
1257
1257
1258 If includepath is True, return (<roots>::<heads>)."""
1258 If includepath is True, return (<roots>::<heads>)."""
1259 try:
1259 try:
1260 return self.index.reachableroots2(
1260 return self.index.reachableroots2(
1261 minroot, heads, roots, includepath
1261 minroot, heads, roots, includepath
1262 )
1262 )
1263 except AttributeError:
1263 except AttributeError:
1264 return dagop._reachablerootspure(
1264 return dagop._reachablerootspure(
1265 self.parentrevs, minroot, roots, heads, includepath
1265 self.parentrevs, minroot, roots, heads, includepath
1266 )
1266 )
1267
1267
1268 def ancestor(self, a, b):
1268 def ancestor(self, a, b):
1269 """calculate the "best" common ancestor of nodes a and b"""
1269 """calculate the "best" common ancestor of nodes a and b"""
1270
1270
1271 a, b = self.rev(a), self.rev(b)
1271 a, b = self.rev(a), self.rev(b)
1272 try:
1272 try:
1273 ancs = self.index.ancestors(a, b)
1273 ancs = self.index.ancestors(a, b)
1274 except (AttributeError, OverflowError):
1274 except (AttributeError, OverflowError):
1275 ancs = ancestor.ancestors(self.parentrevs, a, b)
1275 ancs = ancestor.ancestors(self.parentrevs, a, b)
1276 if ancs:
1276 if ancs:
1277 # choose a consistent winner when there's a tie
1277 # choose a consistent winner when there's a tie
1278 return min(map(self.node, ancs))
1278 return min(map(self.node, ancs))
1279 return nullid
1279 return nullid
1280
1280
1281 def _match(self, id):
1281 def _match(self, id):
1282 if isinstance(id, int):
1282 if isinstance(id, int):
1283 # rev
1283 # rev
1284 return self.node(id)
1284 return self.node(id)
1285 if len(id) == 20:
1285 if len(id) == 20:
1286 # possibly a binary node
1286 # possibly a binary node
1287 # odds of a binary node being all hex in ASCII are 1 in 10**25
1287 # odds of a binary node being all hex in ASCII are 1 in 10**25
1288 try:
1288 try:
1289 node = id
1289 node = id
1290 self.rev(node) # quick search the index
1290 self.rev(node) # quick search the index
1291 return node
1291 return node
1292 except error.LookupError:
1292 except error.LookupError:
1293 pass # may be partial hex id
1293 pass # may be partial hex id
1294 try:
1294 try:
1295 # str(rev)
1295 # str(rev)
1296 rev = int(id)
1296 rev = int(id)
1297 if b"%d" % rev != id:
1297 if b"%d" % rev != id:
1298 raise ValueError
1298 raise ValueError
1299 if rev < 0:
1299 if rev < 0:
1300 rev = len(self) + rev
1300 rev = len(self) + rev
1301 if rev < 0 or rev >= len(self):
1301 if rev < 0 or rev >= len(self):
1302 raise ValueError
1302 raise ValueError
1303 return self.node(rev)
1303 return self.node(rev)
1304 except (ValueError, OverflowError):
1304 except (ValueError, OverflowError):
1305 pass
1305 pass
1306 if len(id) == 40:
1306 if len(id) == 40:
1307 try:
1307 try:
1308 # a full hex nodeid?
1308 # a full hex nodeid?
1309 node = bin(id)
1309 node = bin(id)
1310 self.rev(node)
1310 self.rev(node)
1311 return node
1311 return node
1312 except (TypeError, error.LookupError):
1312 except (TypeError, error.LookupError):
1313 pass
1313 pass
1314
1314
1315 def _partialmatch(self, id):
1315 def _partialmatch(self, id):
1316 # we don't care wdirfilenodeids as they should be always full hash
1316 # we don't care wdirfilenodeids as they should be always full hash
1317 maybewdir = wdirhex.startswith(id)
1317 maybewdir = wdirhex.startswith(id)
1318 try:
1318 try:
1319 partial = self.index.partialmatch(id)
1319 partial = self.index.partialmatch(id)
1320 if partial and self.hasnode(partial):
1320 if partial and self.hasnode(partial):
1321 if maybewdir:
1321 if maybewdir:
1322 # single 'ff...' match in radix tree, ambiguous with wdir
1322 # single 'ff...' match in radix tree, ambiguous with wdir
1323 raise error.RevlogError
1323 raise error.RevlogError
1324 return partial
1324 return partial
1325 if maybewdir:
1325 if maybewdir:
1326 # no 'ff...' match in radix tree, wdir identified
1326 # no 'ff...' match in radix tree, wdir identified
1327 raise error.WdirUnsupported
1327 raise error.WdirUnsupported
1328 return None
1328 return None
1329 except error.RevlogError:
1329 except error.RevlogError:
1330 # parsers.c radix tree lookup gave multiple matches
1330 # parsers.c radix tree lookup gave multiple matches
1331 # fast path: for unfiltered changelog, radix tree is accurate
1331 # fast path: for unfiltered changelog, radix tree is accurate
1332 if not getattr(self, 'filteredrevs', None):
1332 if not getattr(self, 'filteredrevs', None):
1333 raise error.AmbiguousPrefixLookupError(
1333 raise error.AmbiguousPrefixLookupError(
1334 id, self.indexfile, _(b'ambiguous identifier')
1334 id, self.indexfile, _(b'ambiguous identifier')
1335 )
1335 )
1336 # fall through to slow path that filters hidden revisions
1336 # fall through to slow path that filters hidden revisions
1337 except (AttributeError, ValueError):
1337 except (AttributeError, ValueError):
1338 # we are pure python, or key was too short to search radix tree
1338 # we are pure python, or key was too short to search radix tree
1339 pass
1339 pass
1340
1340
1341 if id in self._pcache:
1341 if id in self._pcache:
1342 return self._pcache[id]
1342 return self._pcache[id]
1343
1343
1344 if len(id) <= 40:
1344 if len(id) <= 40:
1345 try:
1345 try:
1346 # hex(node)[:...]
1346 # hex(node)[:...]
1347 l = len(id) // 2 # grab an even number of digits
1347 l = len(id) // 2 # grab an even number of digits
1348 prefix = bin(id[: l * 2])
1348 prefix = bin(id[: l * 2])
1349 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1349 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1350 nl = [
1350 nl = [
1351 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1351 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1352 ]
1352 ]
1353 if nullhex.startswith(id):
1353 if nullhex.startswith(id):
1354 nl.append(nullid)
1354 nl.append(nullid)
1355 if len(nl) > 0:
1355 if len(nl) > 0:
1356 if len(nl) == 1 and not maybewdir:
1356 if len(nl) == 1 and not maybewdir:
1357 self._pcache[id] = nl[0]
1357 self._pcache[id] = nl[0]
1358 return nl[0]
1358 return nl[0]
1359 raise error.AmbiguousPrefixLookupError(
1359 raise error.AmbiguousPrefixLookupError(
1360 id, self.indexfile, _(b'ambiguous identifier')
1360 id, self.indexfile, _(b'ambiguous identifier')
1361 )
1361 )
1362 if maybewdir:
1362 if maybewdir:
1363 raise error.WdirUnsupported
1363 raise error.WdirUnsupported
1364 return None
1364 return None
1365 except TypeError:
1365 except TypeError:
1366 pass
1366 pass
1367
1367
1368 def lookup(self, id):
1368 def lookup(self, id):
1369 """locate a node based on:
1369 """locate a node based on:
1370 - revision number or str(revision number)
1370 - revision number or str(revision number)
1371 - nodeid or subset of hex nodeid
1371 - nodeid or subset of hex nodeid
1372 """
1372 """
1373 n = self._match(id)
1373 n = self._match(id)
1374 if n is not None:
1374 if n is not None:
1375 return n
1375 return n
1376 n = self._partialmatch(id)
1376 n = self._partialmatch(id)
1377 if n:
1377 if n:
1378 return n
1378 return n
1379
1379
1380 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1380 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1381
1381
1382 def shortest(self, node, minlength=1):
1382 def shortest(self, node, minlength=1):
1383 """Find the shortest unambiguous prefix that matches node."""
1383 """Find the shortest unambiguous prefix that matches node."""
1384
1384
1385 def isvalid(prefix):
1385 def isvalid(prefix):
1386 try:
1386 try:
1387 matchednode = self._partialmatch(prefix)
1387 matchednode = self._partialmatch(prefix)
1388 except error.AmbiguousPrefixLookupError:
1388 except error.AmbiguousPrefixLookupError:
1389 return False
1389 return False
1390 except error.WdirUnsupported:
1390 except error.WdirUnsupported:
1391 # single 'ff...' match
1391 # single 'ff...' match
1392 return True
1392 return True
1393 if matchednode is None:
1393 if matchednode is None:
1394 raise error.LookupError(node, self.indexfile, _(b'no node'))
1394 raise error.LookupError(node, self.indexfile, _(b'no node'))
1395 return True
1395 return True
1396
1396
1397 def maybewdir(prefix):
1397 def maybewdir(prefix):
1398 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1398 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1399
1399
1400 hexnode = hex(node)
1400 hexnode = hex(node)
1401
1401
1402 def disambiguate(hexnode, minlength):
1402 def disambiguate(hexnode, minlength):
1403 """Disambiguate against wdirid."""
1403 """Disambiguate against wdirid."""
1404 for length in range(minlength, 41):
1404 for length in range(minlength, 41):
1405 prefix = hexnode[:length]
1405 prefix = hexnode[:length]
1406 if not maybewdir(prefix):
1406 if not maybewdir(prefix):
1407 return prefix
1407 return prefix
1408
1408
1409 if not getattr(self, 'filteredrevs', None):
1409 if not getattr(self, 'filteredrevs', None):
1410 try:
1410 try:
1411 length = max(self.index.shortest(node), minlength)
1411 length = max(self.index.shortest(node), minlength)
1412 return disambiguate(hexnode, length)
1412 return disambiguate(hexnode, length)
1413 except error.RevlogError:
1413 except error.RevlogError:
1414 if node != wdirid:
1414 if node != wdirid:
1415 raise error.LookupError(node, self.indexfile, _(b'no node'))
1415 raise error.LookupError(node, self.indexfile, _(b'no node'))
1416 except AttributeError:
1416 except AttributeError:
1417 # Fall through to pure code
1417 # Fall through to pure code
1418 pass
1418 pass
1419
1419
1420 if node == wdirid:
1420 if node == wdirid:
1421 for length in range(minlength, 41):
1421 for length in range(minlength, 41):
1422 prefix = hexnode[:length]
1422 prefix = hexnode[:length]
1423 if isvalid(prefix):
1423 if isvalid(prefix):
1424 return prefix
1424 return prefix
1425
1425
1426 for length in range(minlength, 41):
1426 for length in range(minlength, 41):
1427 prefix = hexnode[:length]
1427 prefix = hexnode[:length]
1428 if isvalid(prefix):
1428 if isvalid(prefix):
1429 return disambiguate(hexnode, length)
1429 return disambiguate(hexnode, length)
1430
1430
1431 def cmp(self, node, text):
1431 def cmp(self, node, text):
1432 """compare text with a given file revision
1432 """compare text with a given file revision
1433
1433
1434 returns True if text is different than what is stored.
1434 returns True if text is different than what is stored.
1435 """
1435 """
1436 p1, p2 = self.parents(node)
1436 p1, p2 = self.parents(node)
1437 return storageutil.hashrevisionsha1(text, p1, p2) != node
1437 return storageutil.hashrevisionsha1(text, p1, p2) != node
1438
1438
1439 def _cachesegment(self, offset, data):
1439 def _cachesegment(self, offset, data):
1440 """Add a segment to the revlog cache.
1440 """Add a segment to the revlog cache.
1441
1441
1442 Accepts an absolute offset and the data that is at that location.
1442 Accepts an absolute offset and the data that is at that location.
1443 """
1443 """
1444 o, d = self._chunkcache
1444 o, d = self._chunkcache
1445 # try to add to existing cache
1445 # try to add to existing cache
1446 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1446 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1447 self._chunkcache = o, d + data
1447 self._chunkcache = o, d + data
1448 else:
1448 else:
1449 self._chunkcache = offset, data
1449 self._chunkcache = offset, data
1450
1450
1451 def _readsegment(self, offset, length, df=None):
1451 def _readsegment(self, offset, length, df=None):
1452 """Load a segment of raw data from the revlog.
1452 """Load a segment of raw data from the revlog.
1453
1453
1454 Accepts an absolute offset, length to read, and an optional existing
1454 Accepts an absolute offset, length to read, and an optional existing
1455 file handle to read from.
1455 file handle to read from.
1456
1456
1457 If an existing file handle is passed, it will be seeked and the
1457 If an existing file handle is passed, it will be seeked and the
1458 original seek position will NOT be restored.
1458 original seek position will NOT be restored.
1459
1459
1460 Returns a str or buffer of raw byte data.
1460 Returns a str or buffer of raw byte data.
1461
1461
1462 Raises if the requested number of bytes could not be read.
1462 Raises if the requested number of bytes could not be read.
1463 """
1463 """
1464 # Cache data both forward and backward around the requested
1464 # Cache data both forward and backward around the requested
1465 # data, in a fixed size window. This helps speed up operations
1465 # data, in a fixed size window. This helps speed up operations
1466 # involving reading the revlog backwards.
1466 # involving reading the revlog backwards.
1467 cachesize = self._chunkcachesize
1467 cachesize = self._chunkcachesize
1468 realoffset = offset & ~(cachesize - 1)
1468 realoffset = offset & ~(cachesize - 1)
1469 reallength = (
1469 reallength = (
1470 (offset + length + cachesize) & ~(cachesize - 1)
1470 (offset + length + cachesize) & ~(cachesize - 1)
1471 ) - realoffset
1471 ) - realoffset
1472 with self._datareadfp(df) as df:
1472 with self._datareadfp(df) as df:
1473 df.seek(realoffset)
1473 df.seek(realoffset)
1474 d = df.read(reallength)
1474 d = df.read(reallength)
1475
1475
1476 self._cachesegment(realoffset, d)
1476 self._cachesegment(realoffset, d)
1477 if offset != realoffset or reallength != length:
1477 if offset != realoffset or reallength != length:
1478 startoffset = offset - realoffset
1478 startoffset = offset - realoffset
1479 if len(d) - startoffset < length:
1479 if len(d) - startoffset < length:
1480 raise error.RevlogError(
1480 raise error.RevlogError(
1481 _(
1481 _(
1482 b'partial read of revlog %s; expected %d bytes from '
1482 b'partial read of revlog %s; expected %d bytes from '
1483 b'offset %d, got %d'
1483 b'offset %d, got %d'
1484 )
1484 )
1485 % (
1485 % (
1486 self.indexfile if self._inline else self.datafile,
1486 self.indexfile if self._inline else self.datafile,
1487 length,
1487 length,
1488 realoffset,
1488 realoffset,
1489 len(d) - startoffset,
1489 len(d) - startoffset,
1490 )
1490 )
1491 )
1491 )
1492
1492
1493 return util.buffer(d, startoffset, length)
1493 return util.buffer(d, startoffset, length)
1494
1494
1495 if len(d) < length:
1495 if len(d) < length:
1496 raise error.RevlogError(
1496 raise error.RevlogError(
1497 _(
1497 _(
1498 b'partial read of revlog %s; expected %d bytes from offset '
1498 b'partial read of revlog %s; expected %d bytes from offset '
1499 b'%d, got %d'
1499 b'%d, got %d'
1500 )
1500 )
1501 % (
1501 % (
1502 self.indexfile if self._inline else self.datafile,
1502 self.indexfile if self._inline else self.datafile,
1503 length,
1503 length,
1504 offset,
1504 offset,
1505 len(d),
1505 len(d),
1506 )
1506 )
1507 )
1507 )
1508
1508
1509 return d
1509 return d
1510
1510
1511 def _getsegment(self, offset, length, df=None):
1511 def _getsegment(self, offset, length, df=None):
1512 """Obtain a segment of raw data from the revlog.
1512 """Obtain a segment of raw data from the revlog.
1513
1513
1514 Accepts an absolute offset, length of bytes to obtain, and an
1514 Accepts an absolute offset, length of bytes to obtain, and an
1515 optional file handle to the already-opened revlog. If the file
1515 optional file handle to the already-opened revlog. If the file
1516 handle is used, it's original seek position will not be preserved.
1516 handle is used, it's original seek position will not be preserved.
1517
1517
1518 Requests for data may be returned from a cache.
1518 Requests for data may be returned from a cache.
1519
1519
1520 Returns a str or a buffer instance of raw byte data.
1520 Returns a str or a buffer instance of raw byte data.
1521 """
1521 """
1522 o, d = self._chunkcache
1522 o, d = self._chunkcache
1523 l = len(d)
1523 l = len(d)
1524
1524
1525 # is it in the cache?
1525 # is it in the cache?
1526 cachestart = offset - o
1526 cachestart = offset - o
1527 cacheend = cachestart + length
1527 cacheend = cachestart + length
1528 if cachestart >= 0 and cacheend <= l:
1528 if cachestart >= 0 and cacheend <= l:
1529 if cachestart == 0 and cacheend == l:
1529 if cachestart == 0 and cacheend == l:
1530 return d # avoid a copy
1530 return d # avoid a copy
1531 return util.buffer(d, cachestart, cacheend - cachestart)
1531 return util.buffer(d, cachestart, cacheend - cachestart)
1532
1532
1533 return self._readsegment(offset, length, df=df)
1533 return self._readsegment(offset, length, df=df)
1534
1534
1535 def _getsegmentforrevs(self, startrev, endrev, df=None):
1535 def _getsegmentforrevs(self, startrev, endrev, df=None):
1536 """Obtain a segment of raw data corresponding to a range of revisions.
1536 """Obtain a segment of raw data corresponding to a range of revisions.
1537
1537
1538 Accepts the start and end revisions and an optional already-open
1538 Accepts the start and end revisions and an optional already-open
1539 file handle to be used for reading. If the file handle is read, its
1539 file handle to be used for reading. If the file handle is read, its
1540 seek position will not be preserved.
1540 seek position will not be preserved.
1541
1541
1542 Requests for data may be satisfied by a cache.
1542 Requests for data may be satisfied by a cache.
1543
1543
1544 Returns a 2-tuple of (offset, data) for the requested range of
1544 Returns a 2-tuple of (offset, data) for the requested range of
1545 revisions. Offset is the integer offset from the beginning of the
1545 revisions. Offset is the integer offset from the beginning of the
1546 revlog and data is a str or buffer of the raw byte data.
1546 revlog and data is a str or buffer of the raw byte data.
1547
1547
1548 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1548 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1549 to determine where each revision's data begins and ends.
1549 to determine where each revision's data begins and ends.
1550 """
1550 """
1551 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1551 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1552 # (functions are expensive).
1552 # (functions are expensive).
1553 index = self.index
1553 index = self.index
1554 istart = index[startrev]
1554 istart = index[startrev]
1555 start = int(istart[0] >> 16)
1555 start = int(istart[0] >> 16)
1556 if startrev == endrev:
1556 if startrev == endrev:
1557 end = start + istart[1]
1557 end = start + istart[1]
1558 else:
1558 else:
1559 iend = index[endrev]
1559 iend = index[endrev]
1560 end = int(iend[0] >> 16) + iend[1]
1560 end = int(iend[0] >> 16) + iend[1]
1561
1561
1562 if self._inline:
1562 if self._inline:
1563 start += (startrev + 1) * self._io.size
1563 start += (startrev + 1) * self._io.size
1564 end += (endrev + 1) * self._io.size
1564 end += (endrev + 1) * self._io.size
1565 length = end - start
1565 length = end - start
1566
1566
1567 return start, self._getsegment(start, length, df=df)
1567 return start, self._getsegment(start, length, df=df)
1568
1568
1569 def _chunk(self, rev, df=None):
1569 def _chunk(self, rev, df=None):
1570 """Obtain a single decompressed chunk for a revision.
1570 """Obtain a single decompressed chunk for a revision.
1571
1571
1572 Accepts an integer revision and an optional already-open file handle
1572 Accepts an integer revision and an optional already-open file handle
1573 to be used for reading. If used, the seek position of the file will not
1573 to be used for reading. If used, the seek position of the file will not
1574 be preserved.
1574 be preserved.
1575
1575
1576 Returns a str holding uncompressed data for the requested revision.
1576 Returns a str holding uncompressed data for the requested revision.
1577 """
1577 """
1578 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1578 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1579
1579
1580 def _chunks(self, revs, df=None, targetsize=None):
1580 def _chunks(self, revs, df=None, targetsize=None):
1581 """Obtain decompressed chunks for the specified revisions.
1581 """Obtain decompressed chunks for the specified revisions.
1582
1582
1583 Accepts an iterable of numeric revisions that are assumed to be in
1583 Accepts an iterable of numeric revisions that are assumed to be in
1584 ascending order. Also accepts an optional already-open file handle
1584 ascending order. Also accepts an optional already-open file handle
1585 to be used for reading. If used, the seek position of the file will
1585 to be used for reading. If used, the seek position of the file will
1586 not be preserved.
1586 not be preserved.
1587
1587
1588 This function is similar to calling ``self._chunk()`` multiple times,
1588 This function is similar to calling ``self._chunk()`` multiple times,
1589 but is faster.
1589 but is faster.
1590
1590
1591 Returns a list with decompressed data for each requested revision.
1591 Returns a list with decompressed data for each requested revision.
1592 """
1592 """
1593 if not revs:
1593 if not revs:
1594 return []
1594 return []
1595 start = self.start
1595 start = self.start
1596 length = self.length
1596 length = self.length
1597 inline = self._inline
1597 inline = self._inline
1598 iosize = self._io.size
1598 iosize = self._io.size
1599 buffer = util.buffer
1599 buffer = util.buffer
1600
1600
1601 l = []
1601 l = []
1602 ladd = l.append
1602 ladd = l.append
1603
1603
1604 if not self._withsparseread:
1604 if not self._withsparseread:
1605 slicedchunks = (revs,)
1605 slicedchunks = (revs,)
1606 else:
1606 else:
1607 slicedchunks = deltautil.slicechunk(
1607 slicedchunks = deltautil.slicechunk(
1608 self, revs, targetsize=targetsize
1608 self, revs, targetsize=targetsize
1609 )
1609 )
1610
1610
1611 for revschunk in slicedchunks:
1611 for revschunk in slicedchunks:
1612 firstrev = revschunk[0]
1612 firstrev = revschunk[0]
1613 # Skip trailing revisions with empty diff
1613 # Skip trailing revisions with empty diff
1614 for lastrev in revschunk[::-1]:
1614 for lastrev in revschunk[::-1]:
1615 if length(lastrev) != 0:
1615 if length(lastrev) != 0:
1616 break
1616 break
1617
1617
1618 try:
1618 try:
1619 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1619 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1620 except OverflowError:
1620 except OverflowError:
1621 # issue4215 - we can't cache a run of chunks greater than
1621 # issue4215 - we can't cache a run of chunks greater than
1622 # 2G on Windows
1622 # 2G on Windows
1623 return [self._chunk(rev, df=df) for rev in revschunk]
1623 return [self._chunk(rev, df=df) for rev in revschunk]
1624
1624
1625 decomp = self.decompress
1625 decomp = self.decompress
1626 for rev in revschunk:
1626 for rev in revschunk:
1627 chunkstart = start(rev)
1627 chunkstart = start(rev)
1628 if inline:
1628 if inline:
1629 chunkstart += (rev + 1) * iosize
1629 chunkstart += (rev + 1) * iosize
1630 chunklength = length(rev)
1630 chunklength = length(rev)
1631 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1631 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1632
1632
1633 return l
1633 return l
1634
1634
1635 def _chunkclear(self):
1635 def _chunkclear(self):
1636 """Clear the raw chunk cache."""
1636 """Clear the raw chunk cache."""
1637 self._chunkcache = (0, b'')
1637 self._chunkcache = (0, b'')
1638
1638
1639 def deltaparent(self, rev):
1639 def deltaparent(self, rev):
1640 """return deltaparent of the given revision"""
1640 """return deltaparent of the given revision"""
1641 base = self.index[rev][3]
1641 base = self.index[rev][3]
1642 if base == rev:
1642 if base == rev:
1643 return nullrev
1643 return nullrev
1644 elif self._generaldelta:
1644 elif self._generaldelta:
1645 return base
1645 return base
1646 else:
1646 else:
1647 return rev - 1
1647 return rev - 1
1648
1648
1649 def issnapshot(self, rev):
1649 def issnapshot(self, rev):
1650 """tells whether rev is a snapshot
1650 """tells whether rev is a snapshot
1651 """
1651 """
1652 if not self._sparserevlog:
1652 if not self._sparserevlog:
1653 return self.deltaparent(rev) == nullrev
1653 return self.deltaparent(rev) == nullrev
1654 elif util.safehasattr(self.index, b'issnapshot'):
1654 elif util.safehasattr(self.index, b'issnapshot'):
1655 # directly assign the method to cache the testing and access
1655 # directly assign the method to cache the testing and access
1656 self.issnapshot = self.index.issnapshot
1656 self.issnapshot = self.index.issnapshot
1657 return self.issnapshot(rev)
1657 return self.issnapshot(rev)
1658 if rev == nullrev:
1658 if rev == nullrev:
1659 return True
1659 return True
1660 entry = self.index[rev]
1660 entry = self.index[rev]
1661 base = entry[3]
1661 base = entry[3]
1662 if base == rev:
1662 if base == rev:
1663 return True
1663 return True
1664 if base == nullrev:
1664 if base == nullrev:
1665 return True
1665 return True
1666 p1 = entry[5]
1666 p1 = entry[5]
1667 p2 = entry[6]
1667 p2 = entry[6]
1668 if base == p1 or base == p2:
1668 if base == p1 or base == p2:
1669 return False
1669 return False
1670 return self.issnapshot(base)
1670 return self.issnapshot(base)
1671
1671
1672 def snapshotdepth(self, rev):
1672 def snapshotdepth(self, rev):
1673 """number of snapshot in the chain before this one"""
1673 """number of snapshot in the chain before this one"""
1674 if not self.issnapshot(rev):
1674 if not self.issnapshot(rev):
1675 raise error.ProgrammingError(b'revision %d not a snapshot')
1675 raise error.ProgrammingError(b'revision %d not a snapshot')
1676 return len(self._deltachain(rev)[0]) - 1
1676 return len(self._deltachain(rev)[0]) - 1
1677
1677
1678 def revdiff(self, rev1, rev2):
1678 def revdiff(self, rev1, rev2):
1679 """return or calculate a delta between two revisions
1679 """return or calculate a delta between two revisions
1680
1680
1681 The delta calculated is in binary form and is intended to be written to
1681 The delta calculated is in binary form and is intended to be written to
1682 revlog data directly. So this function needs raw revision data.
1682 revlog data directly. So this function needs raw revision data.
1683 """
1683 """
1684 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1684 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1685 return bytes(self._chunk(rev2))
1685 return bytes(self._chunk(rev2))
1686
1686
1687 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1687 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1688
1688
1689 def _processflags(self, text, flags, operation, raw=False):
1689 def _processflags(self, text, flags, operation, raw=False):
1690 """deprecated entry point to access flag processors"""
1690 """deprecated entry point to access flag processors"""
1691 msg = b'_processflag(...) use the specialized variant'
1691 msg = b'_processflag(...) use the specialized variant'
1692 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1692 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1693 if raw:
1693 if raw:
1694 return text, flagutil.processflagsraw(self, text, flags)
1694 return text, flagutil.processflagsraw(self, text, flags)
1695 elif operation == b'read':
1695 elif operation == b'read':
1696 return flagutil.processflagsread(self, text, flags)
1696 return flagutil.processflagsread(self, text, flags)
1697 else: # write operation
1697 else: # write operation
1698 return flagutil.processflagswrite(self, text, flags)
1698 return flagutil.processflagswrite(self, text, flags)
1699
1699
1700 def revision(self, nodeorrev, _df=None, raw=False):
1700 def revision(self, nodeorrev, _df=None, raw=False):
1701 """return an uncompressed revision of a given node or revision
1701 """return an uncompressed revision of a given node or revision
1702 number.
1702 number.
1703
1703
1704 _df - an existing file handle to read from. (internal-only)
1704 _df - an existing file handle to read from. (internal-only)
1705 raw - an optional argument specifying if the revision data is to be
1705 raw - an optional argument specifying if the revision data is to be
1706 treated as raw data when applying flag transforms. 'raw' should be set
1706 treated as raw data when applying flag transforms. 'raw' should be set
1707 to True when generating changegroups or in debug commands.
1707 to True when generating changegroups or in debug commands.
1708 """
1708 """
1709 if raw:
1709 if raw:
1710 msg = (
1710 msg = (
1711 b'revlog.revision(..., raw=True) is deprecated, '
1711 b'revlog.revision(..., raw=True) is deprecated, '
1712 b'use revlog.rawdata(...)'
1712 b'use revlog.rawdata(...)'
1713 )
1713 )
1714 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1714 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1715 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1715 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1716
1716
1717 def sidedata(self, nodeorrev, _df=None):
1717 def sidedata(self, nodeorrev, _df=None):
1718 """a map of extra data related to the changeset but not part of the hash
1718 """a map of extra data related to the changeset but not part of the hash
1719
1719
1720 This function currently return a dictionary. However, more advanced
1720 This function currently return a dictionary. However, more advanced
1721 mapping object will likely be used in the future for a more
1721 mapping object will likely be used in the future for a more
1722 efficient/lazy code.
1722 efficient/lazy code.
1723 """
1723 """
1724 return self._revisiondata(nodeorrev, _df)[1]
1724 return self._revisiondata(nodeorrev, _df)[1]
1725
1725
1726 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1726 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1727 # deal with <nodeorrev> argument type
1727 # deal with <nodeorrev> argument type
1728 if isinstance(nodeorrev, int):
1728 if isinstance(nodeorrev, int):
1729 rev = nodeorrev
1729 rev = nodeorrev
1730 node = self.node(rev)
1730 node = self.node(rev)
1731 else:
1731 else:
1732 node = nodeorrev
1732 node = nodeorrev
1733 rev = None
1733 rev = None
1734
1734
1735 # fast path the special `nullid` rev
1735 # fast path the special `nullid` rev
1736 if node == nullid:
1736 if node == nullid:
1737 return b"", {}
1737 return b"", {}
1738
1738
1739 # The text as stored inside the revlog. Might be the revision or might
1739 # The text as stored inside the revlog. Might be the revision or might
1740 # need to be processed to retrieve the revision.
1740 # need to be processed to retrieve the revision.
1741 rawtext = None
1741 rawtext = None
1742
1742
1743 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1743 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1744
1744
1745 if raw and validated:
1745 if raw and validated:
1746 # if we don't want to process the raw text and that raw
1746 # if we don't want to process the raw text and that raw
1747 # text is cached, we can exit early.
1747 # text is cached, we can exit early.
1748 return rawtext, {}
1748 return rawtext, {}
1749 if rev is None:
1749 if rev is None:
1750 rev = self.rev(node)
1750 rev = self.rev(node)
1751 # the revlog's flag for this revision
1751 # the revlog's flag for this revision
1752 # (usually alter its state or content)
1752 # (usually alter its state or content)
1753 flags = self.flags(rev)
1753 flags = self.flags(rev)
1754
1754
1755 if validated and flags == REVIDX_DEFAULT_FLAGS:
1755 if validated and flags == REVIDX_DEFAULT_FLAGS:
1756 # no extra flags set, no flag processor runs, text = rawtext
1756 # no extra flags set, no flag processor runs, text = rawtext
1757 return rawtext, {}
1757 return rawtext, {}
1758
1758
1759 sidedata = {}
1759 sidedata = {}
1760 if raw:
1760 if raw:
1761 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1761 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1762 text = rawtext
1762 text = rawtext
1763 else:
1763 else:
1764 try:
1764 try:
1765 r = flagutil.processflagsread(self, rawtext, flags)
1765 r = flagutil.processflagsread(self, rawtext, flags)
1766 except error.SidedataHashError as exc:
1766 except error.SidedataHashError as exc:
1767 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1767 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1768 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1768 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1769 raise error.RevlogError(msg)
1769 raise error.RevlogError(msg)
1770 text, validatehash, sidedata = r
1770 text, validatehash, sidedata = r
1771 if validatehash:
1771 if validatehash:
1772 self.checkhash(text, node, rev=rev)
1772 self.checkhash(text, node, rev=rev)
1773 if not validated:
1773 if not validated:
1774 self._revisioncache = (node, rev, rawtext)
1774 self._revisioncache = (node, rev, rawtext)
1775
1775
1776 return text, sidedata
1776 return text, sidedata
1777
1777
1778 def _rawtext(self, node, rev, _df=None):
1778 def _rawtext(self, node, rev, _df=None):
1779 """return the possibly unvalidated rawtext for a revision
1779 """return the possibly unvalidated rawtext for a revision
1780
1780
1781 returns (rev, rawtext, validated)
1781 returns (rev, rawtext, validated)
1782 """
1782 """
1783
1783
1784 # revision in the cache (could be useful to apply delta)
1784 # revision in the cache (could be useful to apply delta)
1785 cachedrev = None
1785 cachedrev = None
1786 # An intermediate text to apply deltas to
1786 # An intermediate text to apply deltas to
1787 basetext = None
1787 basetext = None
1788
1788
1789 # Check if we have the entry in cache
1789 # Check if we have the entry in cache
1790 # The cache entry looks like (node, rev, rawtext)
1790 # The cache entry looks like (node, rev, rawtext)
1791 if self._revisioncache:
1791 if self._revisioncache:
1792 if self._revisioncache[0] == node:
1792 if self._revisioncache[0] == node:
1793 return (rev, self._revisioncache[2], True)
1793 return (rev, self._revisioncache[2], True)
1794 cachedrev = self._revisioncache[1]
1794 cachedrev = self._revisioncache[1]
1795
1795
1796 if rev is None:
1796 if rev is None:
1797 rev = self.rev(node)
1797 rev = self.rev(node)
1798
1798
1799 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1799 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1800 if stopped:
1800 if stopped:
1801 basetext = self._revisioncache[2]
1801 basetext = self._revisioncache[2]
1802
1802
1803 # drop cache to save memory, the caller is expected to
1803 # drop cache to save memory, the caller is expected to
1804 # update self._revisioncache after validating the text
1804 # update self._revisioncache after validating the text
1805 self._revisioncache = None
1805 self._revisioncache = None
1806
1806
1807 targetsize = None
1807 targetsize = None
1808 rawsize = self.index[rev][2]
1808 rawsize = self.index[rev][2]
1809 if 0 <= rawsize:
1809 if 0 <= rawsize:
1810 targetsize = 4 * rawsize
1810 targetsize = 4 * rawsize
1811
1811
1812 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1812 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1813 if basetext is None:
1813 if basetext is None:
1814 basetext = bytes(bins[0])
1814 basetext = bytes(bins[0])
1815 bins = bins[1:]
1815 bins = bins[1:]
1816
1816
1817 rawtext = mdiff.patches(basetext, bins)
1817 rawtext = mdiff.patches(basetext, bins)
1818 del basetext # let us have a chance to free memory early
1818 del basetext # let us have a chance to free memory early
1819 return (rev, rawtext, False)
1819 return (rev, rawtext, False)
1820
1820
1821 def rawdata(self, nodeorrev, _df=None):
1821 def rawdata(self, nodeorrev, _df=None):
1822 """return an uncompressed raw data of a given node or revision number.
1822 """return an uncompressed raw data of a given node or revision number.
1823
1823
1824 _df - an existing file handle to read from. (internal-only)
1824 _df - an existing file handle to read from. (internal-only)
1825 """
1825 """
1826 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1826 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1827
1827
1828 def hash(self, text, p1, p2):
1828 def hash(self, text, p1, p2):
1829 """Compute a node hash.
1829 """Compute a node hash.
1830
1830
1831 Available as a function so that subclasses can replace the hash
1831 Available as a function so that subclasses can replace the hash
1832 as needed.
1832 as needed.
1833 """
1833 """
1834 return storageutil.hashrevisionsha1(text, p1, p2)
1834 return storageutil.hashrevisionsha1(text, p1, p2)
1835
1835
1836 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1836 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1837 """Check node hash integrity.
1837 """Check node hash integrity.
1838
1838
1839 Available as a function so that subclasses can extend hash mismatch
1839 Available as a function so that subclasses can extend hash mismatch
1840 behaviors as needed.
1840 behaviors as needed.
1841 """
1841 """
1842 try:
1842 try:
1843 if p1 is None and p2 is None:
1843 if p1 is None and p2 is None:
1844 p1, p2 = self.parents(node)
1844 p1, p2 = self.parents(node)
1845 if node != self.hash(text, p1, p2):
1845 if node != self.hash(text, p1, p2):
1846 # Clear the revision cache on hash failure. The revision cache
1846 # Clear the revision cache on hash failure. The revision cache
1847 # only stores the raw revision and clearing the cache does have
1847 # only stores the raw revision and clearing the cache does have
1848 # the side-effect that we won't have a cache hit when the raw
1848 # the side-effect that we won't have a cache hit when the raw
1849 # revision data is accessed. But this case should be rare and
1849 # revision data is accessed. But this case should be rare and
1850 # it is extra work to teach the cache about the hash
1850 # it is extra work to teach the cache about the hash
1851 # verification state.
1851 # verification state.
1852 if self._revisioncache and self._revisioncache[0] == node:
1852 if self._revisioncache and self._revisioncache[0] == node:
1853 self._revisioncache = None
1853 self._revisioncache = None
1854
1854
1855 revornode = rev
1855 revornode = rev
1856 if revornode is None:
1856 if revornode is None:
1857 revornode = templatefilters.short(hex(node))
1857 revornode = templatefilters.short(hex(node))
1858 raise error.RevlogError(
1858 raise error.RevlogError(
1859 _(b"integrity check failed on %s:%s")
1859 _(b"integrity check failed on %s:%s")
1860 % (self.indexfile, pycompat.bytestr(revornode))
1860 % (self.indexfile, pycompat.bytestr(revornode))
1861 )
1861 )
1862 except error.RevlogError:
1862 except error.RevlogError:
1863 if self._censorable and storageutil.iscensoredtext(text):
1863 if self._censorable and storageutil.iscensoredtext(text):
1864 raise error.CensoredNodeError(self.indexfile, node, text)
1864 raise error.CensoredNodeError(self.indexfile, node, text)
1865 raise
1865 raise
1866
1866
1867 def _enforceinlinesize(self, tr, fp=None):
1867 def _enforceinlinesize(self, tr, fp=None):
1868 """Check if the revlog is too big for inline and convert if so.
1868 """Check if the revlog is too big for inline and convert if so.
1869
1869
1870 This should be called after revisions are added to the revlog. If the
1870 This should be called after revisions are added to the revlog. If the
1871 revlog has grown too large to be an inline revlog, it will convert it
1871 revlog has grown too large to be an inline revlog, it will convert it
1872 to use multiple index and data files.
1872 to use multiple index and data files.
1873 """
1873 """
1874 tiprev = len(self) - 1
1874 tiprev = len(self) - 1
1875 if (
1875 if (
1876 not self._inline
1876 not self._inline
1877 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1877 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1878 ):
1878 ):
1879 return
1879 return
1880
1880
1881 trinfo = tr.find(self.indexfile)
1881 trinfo = tr.find(self.indexfile)
1882 if trinfo is None:
1882 if trinfo is None:
1883 raise error.RevlogError(
1883 raise error.RevlogError(
1884 _(b"%s not found in the transaction") % self.indexfile
1884 _(b"%s not found in the transaction") % self.indexfile
1885 )
1885 )
1886
1886
1887 trindex = trinfo[2]
1887 trindex = trinfo[2]
1888 if trindex is not None:
1888 if trindex is not None:
1889 dataoff = self.start(trindex)
1889 dataoff = self.start(trindex)
1890 else:
1890 else:
1891 # revlog was stripped at start of transaction, use all leftover data
1891 # revlog was stripped at start of transaction, use all leftover data
1892 trindex = len(self) - 1
1892 trindex = len(self) - 1
1893 dataoff = self.end(tiprev)
1893 dataoff = self.end(tiprev)
1894
1894
1895 tr.add(self.datafile, dataoff)
1895 tr.add(self.datafile, dataoff)
1896
1896
1897 if fp:
1897 if fp:
1898 fp.flush()
1898 fp.flush()
1899 fp.close()
1899 fp.close()
1900 # We can't use the cached file handle after close(). So prevent
1900 # We can't use the cached file handle after close(). So prevent
1901 # its usage.
1901 # its usage.
1902 self._writinghandles = None
1902 self._writinghandles = None
1903
1903
1904 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1904 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1905 for r in self:
1905 for r in self:
1906 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1906 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1907
1907
1908 with self._indexfp(b'w') as fp:
1908 with self._indexfp(b'w') as fp:
1909 self.version &= ~FLAG_INLINE_DATA
1909 self.version &= ~FLAG_INLINE_DATA
1910 self._inline = False
1910 self._inline = False
1911 io = self._io
1911 io = self._io
1912 for i in self:
1912 for i in self:
1913 e = io.packentry(self.index[i], self.node, self.version, i)
1913 e = io.packentry(self.index[i], self.node, self.version, i)
1914 fp.write(e)
1914 fp.write(e)
1915
1915
1916 # the temp file replace the real index when we exit the context
1916 # the temp file replace the real index when we exit the context
1917 # manager
1917 # manager
1918
1918
1919 tr.replace(self.indexfile, trindex * self._io.size)
1919 tr.replace(self.indexfile, trindex * self._io.size)
1920 self._chunkclear()
1920 self._chunkclear()
1921
1921
1922 def _nodeduplicatecallback(self, transaction, node):
1922 def _nodeduplicatecallback(self, transaction, node):
1923 """called when trying to add a node already stored.
1923 """called when trying to add a node already stored.
1924 """
1924 """
1925
1925
1926 def addrevision(
1926 def addrevision(
1927 self,
1927 self,
1928 text,
1928 text,
1929 transaction,
1929 transaction,
1930 link,
1930 link,
1931 p1,
1931 p1,
1932 p2,
1932 p2,
1933 cachedelta=None,
1933 cachedelta=None,
1934 node=None,
1934 node=None,
1935 flags=REVIDX_DEFAULT_FLAGS,
1935 flags=REVIDX_DEFAULT_FLAGS,
1936 deltacomputer=None,
1936 deltacomputer=None,
1937 sidedata=None,
1937 sidedata=None,
1938 ):
1938 ):
1939 """add a revision to the log
1939 """add a revision to the log
1940
1940
1941 text - the revision data to add
1941 text - the revision data to add
1942 transaction - the transaction object used for rollback
1942 transaction - the transaction object used for rollback
1943 link - the linkrev data to add
1943 link - the linkrev data to add
1944 p1, p2 - the parent nodeids of the revision
1944 p1, p2 - the parent nodeids of the revision
1945 cachedelta - an optional precomputed delta
1945 cachedelta - an optional precomputed delta
1946 node - nodeid of revision; typically node is not specified, and it is
1946 node - nodeid of revision; typically node is not specified, and it is
1947 computed by default as hash(text, p1, p2), however subclasses might
1947 computed by default as hash(text, p1, p2), however subclasses might
1948 use different hashing method (and override checkhash() in such case)
1948 use different hashing method (and override checkhash() in such case)
1949 flags - the known flags to set on the revision
1949 flags - the known flags to set on the revision
1950 deltacomputer - an optional deltacomputer instance shared between
1950 deltacomputer - an optional deltacomputer instance shared between
1951 multiple calls
1951 multiple calls
1952 """
1952 """
1953 if link == nullrev:
1953 if link == nullrev:
1954 raise error.RevlogError(
1954 raise error.RevlogError(
1955 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1955 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1956 )
1956 )
1957
1957
1958 if sidedata is None:
1958 if sidedata is None:
1959 sidedata = {}
1959 sidedata = {}
1960 flags = flags & ~REVIDX_SIDEDATA
1960 flags = flags & ~REVIDX_SIDEDATA
1961 elif not self.hassidedata:
1961 elif not self.hassidedata:
1962 raise error.ProgrammingError(
1962 raise error.ProgrammingError(
1963 _(b"trying to add sidedata to a revlog who don't support them")
1963 _(b"trying to add sidedata to a revlog who don't support them")
1964 )
1964 )
1965 else:
1965 else:
1966 flags |= REVIDX_SIDEDATA
1966 flags |= REVIDX_SIDEDATA
1967
1967
1968 if flags:
1968 if flags:
1969 node = node or self.hash(text, p1, p2)
1969 node = node or self.hash(text, p1, p2)
1970
1970
1971 rawtext, validatehash = flagutil.processflagswrite(
1971 rawtext, validatehash = flagutil.processflagswrite(
1972 self, text, flags, sidedata=sidedata
1972 self, text, flags, sidedata=sidedata
1973 )
1973 )
1974
1974
1975 # If the flag processor modifies the revision data, ignore any provided
1975 # If the flag processor modifies the revision data, ignore any provided
1976 # cachedelta.
1976 # cachedelta.
1977 if rawtext != text:
1977 if rawtext != text:
1978 cachedelta = None
1978 cachedelta = None
1979
1979
1980 if len(rawtext) > _maxentrysize:
1980 if len(rawtext) > _maxentrysize:
1981 raise error.RevlogError(
1981 raise error.RevlogError(
1982 _(
1982 _(
1983 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1983 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1984 )
1984 )
1985 % (self.indexfile, len(rawtext))
1985 % (self.indexfile, len(rawtext))
1986 )
1986 )
1987
1987
1988 node = node or self.hash(rawtext, p1, p2)
1988 node = node or self.hash(rawtext, p1, p2)
1989 if node in self.nodemap:
1989 if node in self.nodemap:
1990 return node
1990 return node
1991
1991
1992 if validatehash:
1992 if validatehash:
1993 self.checkhash(rawtext, node, p1=p1, p2=p2)
1993 self.checkhash(rawtext, node, p1=p1, p2=p2)
1994
1994
1995 return self.addrawrevision(
1995 return self.addrawrevision(
1996 rawtext,
1996 rawtext,
1997 transaction,
1997 transaction,
1998 link,
1998 link,
1999 p1,
1999 p1,
2000 p2,
2000 p2,
2001 node,
2001 node,
2002 flags,
2002 flags,
2003 cachedelta=cachedelta,
2003 cachedelta=cachedelta,
2004 deltacomputer=deltacomputer,
2004 deltacomputer=deltacomputer,
2005 )
2005 )
2006
2006
2007 def addrawrevision(
2007 def addrawrevision(
2008 self,
2008 self,
2009 rawtext,
2009 rawtext,
2010 transaction,
2010 transaction,
2011 link,
2011 link,
2012 p1,
2012 p1,
2013 p2,
2013 p2,
2014 node,
2014 node,
2015 flags,
2015 flags,
2016 cachedelta=None,
2016 cachedelta=None,
2017 deltacomputer=None,
2017 deltacomputer=None,
2018 ):
2018 ):
2019 """add a raw revision with known flags, node and parents
2019 """add a raw revision with known flags, node and parents
2020 useful when reusing a revision not stored in this revlog (ex: received
2020 useful when reusing a revision not stored in this revlog (ex: received
2021 over wire, or read from an external bundle).
2021 over wire, or read from an external bundle).
2022 """
2022 """
2023 dfh = None
2023 dfh = None
2024 if not self._inline:
2024 if not self._inline:
2025 dfh = self._datafp(b"a+")
2025 dfh = self._datafp(b"a+")
2026 ifh = self._indexfp(b"a+")
2026 ifh = self._indexfp(b"a+")
2027 try:
2027 try:
2028 return self._addrevision(
2028 return self._addrevision(
2029 node,
2029 node,
2030 rawtext,
2030 rawtext,
2031 transaction,
2031 transaction,
2032 link,
2032 link,
2033 p1,
2033 p1,
2034 p2,
2034 p2,
2035 flags,
2035 flags,
2036 cachedelta,
2036 cachedelta,
2037 ifh,
2037 ifh,
2038 dfh,
2038 dfh,
2039 deltacomputer=deltacomputer,
2039 deltacomputer=deltacomputer,
2040 )
2040 )
2041 finally:
2041 finally:
2042 if dfh:
2042 if dfh:
2043 dfh.close()
2043 dfh.close()
2044 ifh.close()
2044 ifh.close()
2045
2045
2046 def compress(self, data):
2046 def compress(self, data):
2047 """Generate a possibly-compressed representation of data."""
2047 """Generate a possibly-compressed representation of data."""
2048 if not data:
2048 if not data:
2049 return b'', data
2049 return b'', data
2050
2050
2051 compressed = self._compressor.compress(data)
2051 compressed = self._compressor.compress(data)
2052
2052
2053 if compressed:
2053 if compressed:
2054 # The revlog compressor added the header in the returned data.
2054 # The revlog compressor added the header in the returned data.
2055 return b'', compressed
2055 return b'', compressed
2056
2056
2057 if data[0:1] == b'\0':
2057 if data[0:1] == b'\0':
2058 return b'', data
2058 return b'', data
2059 return b'u', data
2059 return b'u', data
2060
2060
2061 def decompress(self, data):
2061 def decompress(self, data):
2062 """Decompress a revlog chunk.
2062 """Decompress a revlog chunk.
2063
2063
2064 The chunk is expected to begin with a header identifying the
2064 The chunk is expected to begin with a header identifying the
2065 format type so it can be routed to an appropriate decompressor.
2065 format type so it can be routed to an appropriate decompressor.
2066 """
2066 """
2067 if not data:
2067 if not data:
2068 return data
2068 return data
2069
2069
2070 # Revlogs are read much more frequently than they are written and many
2070 # Revlogs are read much more frequently than they are written and many
2071 # chunks only take microseconds to decompress, so performance is
2071 # chunks only take microseconds to decompress, so performance is
2072 # important here.
2072 # important here.
2073 #
2073 #
2074 # We can make a few assumptions about revlogs:
2074 # We can make a few assumptions about revlogs:
2075 #
2075 #
2076 # 1) the majority of chunks will be compressed (as opposed to inline
2076 # 1) the majority of chunks will be compressed (as opposed to inline
2077 # raw data).
2077 # raw data).
2078 # 2) decompressing *any* data will likely by at least 10x slower than
2078 # 2) decompressing *any* data will likely by at least 10x slower than
2079 # returning raw inline data.
2079 # returning raw inline data.
2080 # 3) we want to prioritize common and officially supported compression
2080 # 3) we want to prioritize common and officially supported compression
2081 # engines
2081 # engines
2082 #
2082 #
2083 # It follows that we want to optimize for "decompress compressed data
2083 # It follows that we want to optimize for "decompress compressed data
2084 # when encoded with common and officially supported compression engines"
2084 # when encoded with common and officially supported compression engines"
2085 # case over "raw data" and "data encoded by less common or non-official
2085 # case over "raw data" and "data encoded by less common or non-official
2086 # compression engines." That is why we have the inline lookup first
2086 # compression engines." That is why we have the inline lookup first
2087 # followed by the compengines lookup.
2087 # followed by the compengines lookup.
2088 #
2088 #
2089 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2089 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2090 # compressed chunks. And this matters for changelog and manifest reads.
2090 # compressed chunks. And this matters for changelog and manifest reads.
2091 t = data[0:1]
2091 t = data[0:1]
2092
2092
2093 if t == b'x':
2093 if t == b'x':
2094 try:
2094 try:
2095 return _zlibdecompress(data)
2095 return _zlibdecompress(data)
2096 except zlib.error as e:
2096 except zlib.error as e:
2097 raise error.RevlogError(
2097 raise error.RevlogError(
2098 _(b'revlog decompress error: %s')
2098 _(b'revlog decompress error: %s')
2099 % stringutil.forcebytestr(e)
2099 % stringutil.forcebytestr(e)
2100 )
2100 )
2101 # '\0' is more common than 'u' so it goes first.
2101 # '\0' is more common than 'u' so it goes first.
2102 elif t == b'\0':
2102 elif t == b'\0':
2103 return data
2103 return data
2104 elif t == b'u':
2104 elif t == b'u':
2105 return util.buffer(data, 1)
2105 return util.buffer(data, 1)
2106
2106
2107 try:
2107 try:
2108 compressor = self._decompressors[t]
2108 compressor = self._decompressors[t]
2109 except KeyError:
2109 except KeyError:
2110 try:
2110 try:
2111 engine = util.compengines.forrevlogheader(t)
2111 engine = util.compengines.forrevlogheader(t)
2112 compressor = engine.revlogcompressor(self._compengineopts)
2112 compressor = engine.revlogcompressor(self._compengineopts)
2113 self._decompressors[t] = compressor
2113 self._decompressors[t] = compressor
2114 except KeyError:
2114 except KeyError:
2115 raise error.RevlogError(_(b'unknown compression type %r') % t)
2115 raise error.RevlogError(_(b'unknown compression type %r') % t)
2116
2116
2117 return compressor.decompress(data)
2117 return compressor.decompress(data)
2118
2118
2119 def _addrevision(
2119 def _addrevision(
2120 self,
2120 self,
2121 node,
2121 node,
2122 rawtext,
2122 rawtext,
2123 transaction,
2123 transaction,
2124 link,
2124 link,
2125 p1,
2125 p1,
2126 p2,
2126 p2,
2127 flags,
2127 flags,
2128 cachedelta,
2128 cachedelta,
2129 ifh,
2129 ifh,
2130 dfh,
2130 dfh,
2131 alwayscache=False,
2131 alwayscache=False,
2132 deltacomputer=None,
2132 deltacomputer=None,
2133 ):
2133 ):
2134 """internal function to add revisions to the log
2134 """internal function to add revisions to the log
2135
2135
2136 see addrevision for argument descriptions.
2136 see addrevision for argument descriptions.
2137
2137
2138 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2138 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2139
2139
2140 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2140 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2141 be used.
2141 be used.
2142
2142
2143 invariants:
2143 invariants:
2144 - rawtext is optional (can be None); if not set, cachedelta must be set.
2144 - rawtext is optional (can be None); if not set, cachedelta must be set.
2145 if both are set, they must correspond to each other.
2145 if both are set, they must correspond to each other.
2146 """
2146 """
2147 if node == nullid:
2147 if node == nullid:
2148 raise error.RevlogError(
2148 raise error.RevlogError(
2149 _(b"%s: attempt to add null revision") % self.indexfile
2149 _(b"%s: attempt to add null revision") % self.indexfile
2150 )
2150 )
2151 if node == wdirid or node in wdirfilenodeids:
2151 if node == wdirid or node in wdirfilenodeids:
2152 raise error.RevlogError(
2152 raise error.RevlogError(
2153 _(b"%s: attempt to add wdir revision") % self.indexfile
2153 _(b"%s: attempt to add wdir revision") % self.indexfile
2154 )
2154 )
2155
2155
2156 if self._inline:
2156 if self._inline:
2157 fh = ifh
2157 fh = ifh
2158 else:
2158 else:
2159 fh = dfh
2159 fh = dfh
2160
2160
2161 btext = [rawtext]
2161 btext = [rawtext]
2162
2162
2163 curr = len(self)
2163 curr = len(self)
2164 prev = curr - 1
2164 prev = curr - 1
2165 offset = self.end(prev)
2165 offset = self.end(prev)
2166 p1r, p2r = self.rev(p1), self.rev(p2)
2166 p1r, p2r = self.rev(p1), self.rev(p2)
2167
2167
2168 # full versions are inserted when the needed deltas
2168 # full versions are inserted when the needed deltas
2169 # become comparable to the uncompressed text
2169 # become comparable to the uncompressed text
2170 if rawtext is None:
2170 if rawtext is None:
2171 # need rawtext size, before changed by flag processors, which is
2171 # need rawtext size, before changed by flag processors, which is
2172 # the non-raw size. use revlog explicitly to avoid filelog's extra
2172 # the non-raw size. use revlog explicitly to avoid filelog's extra
2173 # logic that might remove metadata size.
2173 # logic that might remove metadata size.
2174 textlen = mdiff.patchedsize(
2174 textlen = mdiff.patchedsize(
2175 revlog.size(self, cachedelta[0]), cachedelta[1]
2175 revlog.size(self, cachedelta[0]), cachedelta[1]
2176 )
2176 )
2177 else:
2177 else:
2178 textlen = len(rawtext)
2178 textlen = len(rawtext)
2179
2179
2180 if deltacomputer is None:
2180 if deltacomputer is None:
2181 deltacomputer = deltautil.deltacomputer(self)
2181 deltacomputer = deltautil.deltacomputer(self)
2182
2182
2183 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2183 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2184
2184
2185 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2185 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2186
2186
2187 e = (
2187 e = (
2188 offset_type(offset, flags),
2188 offset_type(offset, flags),
2189 deltainfo.deltalen,
2189 deltainfo.deltalen,
2190 textlen,
2190 textlen,
2191 deltainfo.base,
2191 deltainfo.base,
2192 link,
2192 link,
2193 p1r,
2193 p1r,
2194 p2r,
2194 p2r,
2195 node,
2195 node,
2196 )
2196 )
2197 self.index.append(e)
2197 self.index.append(e)
2198 self.nodemap[node] = curr
2198 self.nodemap[node] = curr
2199
2199
2200 # Reset the pure node cache start lookup offset to account for new
2200 # Reset the pure node cache start lookup offset to account for new
2201 # revision.
2201 # revision.
2202 if self._nodepos is not None:
2202 if self._nodepos is not None:
2203 self._nodepos = curr
2203 self._nodepos = curr
2204
2204
2205 entry = self._io.packentry(e, self.node, self.version, curr)
2205 entry = self._io.packentry(e, self.node, self.version, curr)
2206 self._writeentry(
2206 self._writeentry(
2207 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2207 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2208 )
2208 )
2209
2209
2210 rawtext = btext[0]
2210 rawtext = btext[0]
2211
2211
2212 if alwayscache and rawtext is None:
2212 if alwayscache and rawtext is None:
2213 rawtext = deltacomputer.buildtext(revinfo, fh)
2213 rawtext = deltacomputer.buildtext(revinfo, fh)
2214
2214
2215 if type(rawtext) == bytes: # only accept immutable objects
2215 if type(rawtext) == bytes: # only accept immutable objects
2216 self._revisioncache = (node, curr, rawtext)
2216 self._revisioncache = (node, curr, rawtext)
2217 self._chainbasecache[curr] = deltainfo.chainbase
2217 self._chainbasecache[curr] = deltainfo.chainbase
2218 return node
2218 return node
2219
2219
2220 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2220 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2221 # Files opened in a+ mode have inconsistent behavior on various
2221 # Files opened in a+ mode have inconsistent behavior on various
2222 # platforms. Windows requires that a file positioning call be made
2222 # platforms. Windows requires that a file positioning call be made
2223 # when the file handle transitions between reads and writes. See
2223 # when the file handle transitions between reads and writes. See
2224 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2224 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2225 # platforms, Python or the platform itself can be buggy. Some versions
2225 # platforms, Python or the platform itself can be buggy. Some versions
2226 # of Solaris have been observed to not append at the end of the file
2226 # of Solaris have been observed to not append at the end of the file
2227 # if the file was seeked to before the end. See issue4943 for more.
2227 # if the file was seeked to before the end. See issue4943 for more.
2228 #
2228 #
2229 # We work around this issue by inserting a seek() before writing.
2229 # We work around this issue by inserting a seek() before writing.
2230 # Note: This is likely not necessary on Python 3. However, because
2230 # Note: This is likely not necessary on Python 3. However, because
2231 # the file handle is reused for reads and may be seeked there, we need
2231 # the file handle is reused for reads and may be seeked there, we need
2232 # to be careful before changing this.
2232 # to be careful before changing this.
2233 ifh.seek(0, os.SEEK_END)
2233 ifh.seek(0, os.SEEK_END)
2234 if dfh:
2234 if dfh:
2235 dfh.seek(0, os.SEEK_END)
2235 dfh.seek(0, os.SEEK_END)
2236
2236
2237 curr = len(self) - 1
2237 curr = len(self) - 1
2238 if not self._inline:
2238 if not self._inline:
2239 transaction.add(self.datafile, offset)
2239 transaction.add(self.datafile, offset)
2240 transaction.add(self.indexfile, curr * len(entry))
2240 transaction.add(self.indexfile, curr * len(entry))
2241 if data[0]:
2241 if data[0]:
2242 dfh.write(data[0])
2242 dfh.write(data[0])
2243 dfh.write(data[1])
2243 dfh.write(data[1])
2244 ifh.write(entry)
2244 ifh.write(entry)
2245 else:
2245 else:
2246 offset += curr * self._io.size
2246 offset += curr * self._io.size
2247 transaction.add(self.indexfile, offset, curr)
2247 transaction.add(self.indexfile, offset, curr)
2248 ifh.write(entry)
2248 ifh.write(entry)
2249 ifh.write(data[0])
2249 ifh.write(data[0])
2250 ifh.write(data[1])
2250 ifh.write(data[1])
2251 self._enforceinlinesize(transaction, ifh)
2251 self._enforceinlinesize(transaction, ifh)
2252
2252
2253 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2253 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2254 """
2254 """
2255 add a delta group
2255 add a delta group
2256
2256
2257 given a set of deltas, add them to the revision log. the
2257 given a set of deltas, add them to the revision log. the
2258 first delta is against its parent, which should be in our
2258 first delta is against its parent, which should be in our
2259 log, the rest are against the previous delta.
2259 log, the rest are against the previous delta.
2260
2260
2261 If ``addrevisioncb`` is defined, it will be called with arguments of
2261 If ``addrevisioncb`` is defined, it will be called with arguments of
2262 this revlog and the node that was added.
2262 this revlog and the node that was added.
2263 """
2263 """
2264
2264
2265 if self._writinghandles:
2265 if self._writinghandles:
2266 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2266 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2267
2267
2268 nodes = []
2268 nodes = []
2269
2269
2270 r = len(self)
2270 r = len(self)
2271 end = 0
2271 end = 0
2272 if r:
2272 if r:
2273 end = self.end(r - 1)
2273 end = self.end(r - 1)
2274 ifh = self._indexfp(b"a+")
2274 ifh = self._indexfp(b"a+")
2275 isize = r * self._io.size
2275 isize = r * self._io.size
2276 if self._inline:
2276 if self._inline:
2277 transaction.add(self.indexfile, end + isize, r)
2277 transaction.add(self.indexfile, end + isize, r)
2278 dfh = None
2278 dfh = None
2279 else:
2279 else:
2280 transaction.add(self.indexfile, isize, r)
2280 transaction.add(self.indexfile, isize, r)
2281 transaction.add(self.datafile, end)
2281 transaction.add(self.datafile, end)
2282 dfh = self._datafp(b"a+")
2282 dfh = self._datafp(b"a+")
2283
2283
2284 def flush():
2284 def flush():
2285 if dfh:
2285 if dfh:
2286 dfh.flush()
2286 dfh.flush()
2287 ifh.flush()
2287 ifh.flush()
2288
2288
2289 self._writinghandles = (ifh, dfh)
2289 self._writinghandles = (ifh, dfh)
2290
2290
2291 try:
2291 try:
2292 deltacomputer = deltautil.deltacomputer(self)
2292 deltacomputer = deltautil.deltacomputer(self)
2293 # loop through our set of deltas
2293 # loop through our set of deltas
2294 for data in deltas:
2294 for data in deltas:
2295 node, p1, p2, linknode, deltabase, delta, flags = data
2295 node, p1, p2, linknode, deltabase, delta, flags = data
2296 link = linkmapper(linknode)
2296 link = linkmapper(linknode)
2297 flags = flags or REVIDX_DEFAULT_FLAGS
2297 flags = flags or REVIDX_DEFAULT_FLAGS
2298
2298
2299 nodes.append(node)
2299 nodes.append(node)
2300
2300
2301 if node in self.nodemap:
2301 if node in self.nodemap:
2302 self._nodeduplicatecallback(transaction, node)
2302 self._nodeduplicatecallback(transaction, node)
2303 # this can happen if two branches make the same change
2303 # this can happen if two branches make the same change
2304 continue
2304 continue
2305
2305
2306 for p in (p1, p2):
2306 for p in (p1, p2):
2307 if p not in self.nodemap:
2307 if p not in self.nodemap:
2308 raise error.LookupError(
2308 raise error.LookupError(
2309 p, self.indexfile, _(b'unknown parent')
2309 p, self.indexfile, _(b'unknown parent')
2310 )
2310 )
2311
2311
2312 if deltabase not in self.nodemap:
2312 if deltabase not in self.nodemap:
2313 raise error.LookupError(
2313 raise error.LookupError(
2314 deltabase, self.indexfile, _(b'unknown delta base')
2314 deltabase, self.indexfile, _(b'unknown delta base')
2315 )
2315 )
2316
2316
2317 baserev = self.rev(deltabase)
2317 baserev = self.rev(deltabase)
2318
2318
2319 if baserev != nullrev and self.iscensored(baserev):
2319 if baserev != nullrev and self.iscensored(baserev):
2320 # if base is censored, delta must be full replacement in a
2320 # if base is censored, delta must be full replacement in a
2321 # single patch operation
2321 # single patch operation
2322 hlen = struct.calcsize(b">lll")
2322 hlen = struct.calcsize(b">lll")
2323 oldlen = self.rawsize(baserev)
2323 oldlen = self.rawsize(baserev)
2324 newlen = len(delta) - hlen
2324 newlen = len(delta) - hlen
2325 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2325 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2326 raise error.CensoredBaseError(
2326 raise error.CensoredBaseError(
2327 self.indexfile, self.node(baserev)
2327 self.indexfile, self.node(baserev)
2328 )
2328 )
2329
2329
2330 if not flags and self._peek_iscensored(baserev, delta, flush):
2330 if not flags and self._peek_iscensored(baserev, delta, flush):
2331 flags |= REVIDX_ISCENSORED
2331 flags |= REVIDX_ISCENSORED
2332
2332
2333 # We assume consumers of addrevisioncb will want to retrieve
2333 # We assume consumers of addrevisioncb will want to retrieve
2334 # the added revision, which will require a call to
2334 # the added revision, which will require a call to
2335 # revision(). revision() will fast path if there is a cache
2335 # revision(). revision() will fast path if there is a cache
2336 # hit. So, we tell _addrevision() to always cache in this case.
2336 # hit. So, we tell _addrevision() to always cache in this case.
2337 # We're only using addgroup() in the context of changegroup
2337 # We're only using addgroup() in the context of changegroup
2338 # generation so the revision data can always be handled as raw
2338 # generation so the revision data can always be handled as raw
2339 # by the flagprocessor.
2339 # by the flagprocessor.
2340 self._addrevision(
2340 self._addrevision(
2341 node,
2341 node,
2342 None,
2342 None,
2343 transaction,
2343 transaction,
2344 link,
2344 link,
2345 p1,
2345 p1,
2346 p2,
2346 p2,
2347 flags,
2347 flags,
2348 (baserev, delta),
2348 (baserev, delta),
2349 ifh,
2349 ifh,
2350 dfh,
2350 dfh,
2351 alwayscache=bool(addrevisioncb),
2351 alwayscache=bool(addrevisioncb),
2352 deltacomputer=deltacomputer,
2352 deltacomputer=deltacomputer,
2353 )
2353 )
2354
2354
2355 if addrevisioncb:
2355 if addrevisioncb:
2356 addrevisioncb(self, node)
2356 addrevisioncb(self, node)
2357
2357
2358 if not dfh and not self._inline:
2358 if not dfh and not self._inline:
2359 # addrevision switched from inline to conventional
2359 # addrevision switched from inline to conventional
2360 # reopen the index
2360 # reopen the index
2361 ifh.close()
2361 ifh.close()
2362 dfh = self._datafp(b"a+")
2362 dfh = self._datafp(b"a+")
2363 ifh = self._indexfp(b"a+")
2363 ifh = self._indexfp(b"a+")
2364 self._writinghandles = (ifh, dfh)
2364 self._writinghandles = (ifh, dfh)
2365 finally:
2365 finally:
2366 self._writinghandles = None
2366 self._writinghandles = None
2367
2367
2368 if dfh:
2368 if dfh:
2369 dfh.close()
2369 dfh.close()
2370 ifh.close()
2370 ifh.close()
2371
2371
2372 return nodes
2372 return nodes
2373
2373
2374 def iscensored(self, rev):
2374 def iscensored(self, rev):
2375 """Check if a file revision is censored."""
2375 """Check if a file revision is censored."""
2376 if not self._censorable:
2376 if not self._censorable:
2377 return False
2377 return False
2378
2378
2379 return self.flags(rev) & REVIDX_ISCENSORED
2379 return self.flags(rev) & REVIDX_ISCENSORED
2380
2380
2381 def _peek_iscensored(self, baserev, delta, flush):
2381 def _peek_iscensored(self, baserev, delta, flush):
2382 """Quickly check if a delta produces a censored revision."""
2382 """Quickly check if a delta produces a censored revision."""
2383 if not self._censorable:
2383 if not self._censorable:
2384 return False
2384 return False
2385
2385
2386 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2386 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2387
2387
2388 def getstrippoint(self, minlink):
2388 def getstrippoint(self, minlink):
2389 """find the minimum rev that must be stripped to strip the linkrev
2389 """find the minimum rev that must be stripped to strip the linkrev
2390
2390
2391 Returns a tuple containing the minimum rev and a set of all revs that
2391 Returns a tuple containing the minimum rev and a set of all revs that
2392 have linkrevs that will be broken by this strip.
2392 have linkrevs that will be broken by this strip.
2393 """
2393 """
2394 return storageutil.resolvestripinfo(
2394 return storageutil.resolvestripinfo(
2395 minlink,
2395 minlink,
2396 len(self) - 1,
2396 len(self) - 1,
2397 self.headrevs(),
2397 self.headrevs(),
2398 self.linkrev,
2398 self.linkrev,
2399 self.parentrevs,
2399 self.parentrevs,
2400 )
2400 )
2401
2401
2402 def strip(self, minlink, transaction):
2402 def strip(self, minlink, transaction):
2403 """truncate the revlog on the first revision with a linkrev >= minlink
2403 """truncate the revlog on the first revision with a linkrev >= minlink
2404
2404
2405 This function is called when we're stripping revision minlink and
2405 This function is called when we're stripping revision minlink and
2406 its descendants from the repository.
2406 its descendants from the repository.
2407
2407
2408 We have to remove all revisions with linkrev >= minlink, because
2408 We have to remove all revisions with linkrev >= minlink, because
2409 the equivalent changelog revisions will be renumbered after the
2409 the equivalent changelog revisions will be renumbered after the
2410 strip.
2410 strip.
2411
2411
2412 So we truncate the revlog on the first of these revisions, and
2412 So we truncate the revlog on the first of these revisions, and
2413 trust that the caller has saved the revisions that shouldn't be
2413 trust that the caller has saved the revisions that shouldn't be
2414 removed and that it'll re-add them after this truncation.
2414 removed and that it'll re-add them after this truncation.
2415 """
2415 """
2416 if len(self) == 0:
2416 if len(self) == 0:
2417 return
2417 return
2418
2418
2419 rev, _ = self.getstrippoint(minlink)
2419 rev, _ = self.getstrippoint(minlink)
2420 if rev == len(self):
2420 if rev == len(self):
2421 return
2421 return
2422
2422
2423 # first truncate the files on disk
2423 # first truncate the files on disk
2424 end = self.start(rev)
2424 end = self.start(rev)
2425 if not self._inline:
2425 if not self._inline:
2426 transaction.add(self.datafile, end)
2426 transaction.add(self.datafile, end)
2427 end = rev * self._io.size
2427 end = rev * self._io.size
2428 else:
2428 else:
2429 end += rev * self._io.size
2429 end += rev * self._io.size
2430
2430
2431 transaction.add(self.indexfile, end)
2431 transaction.add(self.indexfile, end)
2432
2432
2433 # then reset internal state in memory to forget those revisions
2433 # then reset internal state in memory to forget those revisions
2434 self._revisioncache = None
2434 self._revisioncache = None
2435 self._chaininfocache = {}
2435 self._chaininfocache = {}
2436 self._chunkclear()
2436 self._chunkclear()
2437 for x in pycompat.xrange(rev, len(self)):
2437 for x in pycompat.xrange(rev, len(self)):
2438 del self.nodemap[self.node(x)]
2438 del self.nodemap[self.node(x)]
2439
2439
2440 del self.index[rev:-1]
2440 del self.index[rev:-1]
2441 self._nodepos = None
2441 self._nodepos = None
2442
2442
2443 def checksize(self):
2443 def checksize(self):
2444 """Check size of index and data files
2444 """Check size of index and data files
2445
2445
2446 return a (dd, di) tuple.
2446 return a (dd, di) tuple.
2447 - dd: extra bytes for the "data" file
2447 - dd: extra bytes for the "data" file
2448 - di: extra bytes for the "index" file
2448 - di: extra bytes for the "index" file
2449
2449
2450 A healthy revlog will return (0, 0).
2450 A healthy revlog will return (0, 0).
2451 """
2451 """
2452 expected = 0
2452 expected = 0
2453 if len(self):
2453 if len(self):
2454 expected = max(0, self.end(len(self) - 1))
2454 expected = max(0, self.end(len(self) - 1))
2455
2455
2456 try:
2456 try:
2457 with self._datafp() as f:
2457 with self._datafp() as f:
2458 f.seek(0, io.SEEK_END)
2458 f.seek(0, io.SEEK_END)
2459 actual = f.tell()
2459 actual = f.tell()
2460 dd = actual - expected
2460 dd = actual - expected
2461 except IOError as inst:
2461 except IOError as inst:
2462 if inst.errno != errno.ENOENT:
2462 if inst.errno != errno.ENOENT:
2463 raise
2463 raise
2464 dd = 0
2464 dd = 0
2465
2465
2466 try:
2466 try:
2467 f = self.opener(self.indexfile)
2467 f = self.opener(self.indexfile)
2468 f.seek(0, io.SEEK_END)
2468 f.seek(0, io.SEEK_END)
2469 actual = f.tell()
2469 actual = f.tell()
2470 f.close()
2470 f.close()
2471 s = self._io.size
2471 s = self._io.size
2472 i = max(0, actual // s)
2472 i = max(0, actual // s)
2473 di = actual - (i * s)
2473 di = actual - (i * s)
2474 if self._inline:
2474 if self._inline:
2475 databytes = 0
2475 databytes = 0
2476 for r in self:
2476 for r in self:
2477 databytes += max(0, self.length(r))
2477 databytes += max(0, self.length(r))
2478 dd = 0
2478 dd = 0
2479 di = actual - len(self) * s - databytes
2479 di = actual - len(self) * s - databytes
2480 except IOError as inst:
2480 except IOError as inst:
2481 if inst.errno != errno.ENOENT:
2481 if inst.errno != errno.ENOENT:
2482 raise
2482 raise
2483 di = 0
2483 di = 0
2484
2484
2485 return (dd, di)
2485 return (dd, di)
2486
2486
2487 def files(self):
2487 def files(self):
2488 res = [self.indexfile]
2488 res = [self.indexfile]
2489 if not self._inline:
2489 if not self._inline:
2490 res.append(self.datafile)
2490 res.append(self.datafile)
2491 return res
2491 return res
2492
2492
2493 def emitrevisions(
2493 def emitrevisions(
2494 self,
2494 self,
2495 nodes,
2495 nodes,
2496 nodesorder=None,
2496 nodesorder=None,
2497 revisiondata=False,
2497 revisiondata=False,
2498 assumehaveparentrevisions=False,
2498 assumehaveparentrevisions=False,
2499 deltamode=repository.CG_DELTAMODE_STD,
2499 deltamode=repository.CG_DELTAMODE_STD,
2500 ):
2500 ):
2501 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2501 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2502 raise error.ProgrammingError(
2502 raise error.ProgrammingError(
2503 b'unhandled value for nodesorder: %s' % nodesorder
2503 b'unhandled value for nodesorder: %s' % nodesorder
2504 )
2504 )
2505
2505
2506 if nodesorder is None and not self._generaldelta:
2506 if nodesorder is None and not self._generaldelta:
2507 nodesorder = b'storage'
2507 nodesorder = b'storage'
2508
2508
2509 if (
2509 if (
2510 not self._storedeltachains
2510 not self._storedeltachains
2511 and deltamode != repository.CG_DELTAMODE_PREV
2511 and deltamode != repository.CG_DELTAMODE_PREV
2512 ):
2512 ):
2513 deltamode = repository.CG_DELTAMODE_FULL
2513 deltamode = repository.CG_DELTAMODE_FULL
2514
2514
2515 return storageutil.emitrevisions(
2515 return storageutil.emitrevisions(
2516 self,
2516 self,
2517 nodes,
2517 nodes,
2518 nodesorder,
2518 nodesorder,
2519 revlogrevisiondelta,
2519 revlogrevisiondelta,
2520 deltaparentfn=self.deltaparent,
2520 deltaparentfn=self.deltaparent,
2521 candeltafn=self.candelta,
2521 candeltafn=self.candelta,
2522 rawsizefn=self.rawsize,
2522 rawsizefn=self.rawsize,
2523 revdifffn=self.revdiff,
2523 revdifffn=self.revdiff,
2524 flagsfn=self.flags,
2524 flagsfn=self.flags,
2525 deltamode=deltamode,
2525 deltamode=deltamode,
2526 revisiondata=revisiondata,
2526 revisiondata=revisiondata,
2527 assumehaveparentrevisions=assumehaveparentrevisions,
2527 assumehaveparentrevisions=assumehaveparentrevisions,
2528 )
2528 )
2529
2529
2530 DELTAREUSEALWAYS = b'always'
2530 DELTAREUSEALWAYS = b'always'
2531 DELTAREUSESAMEREVS = b'samerevs'
2531 DELTAREUSESAMEREVS = b'samerevs'
2532 DELTAREUSENEVER = b'never'
2532 DELTAREUSENEVER = b'never'
2533
2533
2534 DELTAREUSEFULLADD = b'fulladd'
2534 DELTAREUSEFULLADD = b'fulladd'
2535
2535
2536 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2536 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2537
2537
2538 def clone(
2538 def clone(
2539 self,
2539 self,
2540 tr,
2540 tr,
2541 destrevlog,
2541 destrevlog,
2542 addrevisioncb=None,
2542 addrevisioncb=None,
2543 deltareuse=DELTAREUSESAMEREVS,
2543 deltareuse=DELTAREUSESAMEREVS,
2544 forcedeltabothparents=None,
2544 forcedeltabothparents=None,
2545 sidedatacompanion=None,
2545 ):
2546 ):
2546 """Copy this revlog to another, possibly with format changes.
2547 """Copy this revlog to another, possibly with format changes.
2547
2548
2548 The destination revlog will contain the same revisions and nodes.
2549 The destination revlog will contain the same revisions and nodes.
2549 However, it may not be bit-for-bit identical due to e.g. delta encoding
2550 However, it may not be bit-for-bit identical due to e.g. delta encoding
2550 differences.
2551 differences.
2551
2552
2552 The ``deltareuse`` argument control how deltas from the existing revlog
2553 The ``deltareuse`` argument control how deltas from the existing revlog
2553 are preserved in the destination revlog. The argument can have the
2554 are preserved in the destination revlog. The argument can have the
2554 following values:
2555 following values:
2555
2556
2556 DELTAREUSEALWAYS
2557 DELTAREUSEALWAYS
2557 Deltas will always be reused (if possible), even if the destination
2558 Deltas will always be reused (if possible), even if the destination
2558 revlog would not select the same revisions for the delta. This is the
2559 revlog would not select the same revisions for the delta. This is the
2559 fastest mode of operation.
2560 fastest mode of operation.
2560 DELTAREUSESAMEREVS
2561 DELTAREUSESAMEREVS
2561 Deltas will be reused if the destination revlog would pick the same
2562 Deltas will be reused if the destination revlog would pick the same
2562 revisions for the delta. This mode strikes a balance between speed
2563 revisions for the delta. This mode strikes a balance between speed
2563 and optimization.
2564 and optimization.
2564 DELTAREUSENEVER
2565 DELTAREUSENEVER
2565 Deltas will never be reused. This is the slowest mode of execution.
2566 Deltas will never be reused. This is the slowest mode of execution.
2566 This mode can be used to recompute deltas (e.g. if the diff/delta
2567 This mode can be used to recompute deltas (e.g. if the diff/delta
2567 algorithm changes).
2568 algorithm changes).
2568 DELTAREUSEFULLADD
2569 DELTAREUSEFULLADD
2569 Revision will be re-added as if their were new content. This is
2570 Revision will be re-added as if their were new content. This is
2570 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2571 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2571 eg: large file detection and handling.
2572 eg: large file detection and handling.
2572
2573
2573 Delta computation can be slow, so the choice of delta reuse policy can
2574 Delta computation can be slow, so the choice of delta reuse policy can
2574 significantly affect run time.
2575 significantly affect run time.
2575
2576
2576 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2577 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2577 two extremes. Deltas will be reused if they are appropriate. But if the
2578 two extremes. Deltas will be reused if they are appropriate. But if the
2578 delta could choose a better revision, it will do so. This means if you
2579 delta could choose a better revision, it will do so. This means if you
2579 are converting a non-generaldelta revlog to a generaldelta revlog,
2580 are converting a non-generaldelta revlog to a generaldelta revlog,
2580 deltas will be recomputed if the delta's parent isn't a parent of the
2581 deltas will be recomputed if the delta's parent isn't a parent of the
2581 revision.
2582 revision.
2582
2583
2583 In addition to the delta policy, the ``forcedeltabothparents``
2584 In addition to the delta policy, the ``forcedeltabothparents``
2584 argument controls whether to force compute deltas against both parents
2585 argument controls whether to force compute deltas against both parents
2585 for merges. By default, the current default is used.
2586 for merges. By default, the current default is used.
2587
2588 If not None, the `sidedatacompanion` is callable that accept two
2589 arguments:
2590
2591 (srcrevlog, rev)
2592
2593 and return a triplet that control changes to sidedata content from the
2594 old revision to the new clone result:
2595
2596 (dropall, filterout, update)
2597
2598 * if `dropall` is True, all sidedata should be dropped
2599 * `filterout` is a set of sidedata keys that should be dropped
2600 * `update` is a mapping of additionnal/new key -> value
2586 """
2601 """
2587 if deltareuse not in self.DELTAREUSEALL:
2602 if deltareuse not in self.DELTAREUSEALL:
2588 raise ValueError(
2603 raise ValueError(
2589 _(b'value for deltareuse invalid: %s') % deltareuse
2604 _(b'value for deltareuse invalid: %s') % deltareuse
2590 )
2605 )
2591
2606
2592 if len(destrevlog):
2607 if len(destrevlog):
2593 raise ValueError(_(b'destination revlog is not empty'))
2608 raise ValueError(_(b'destination revlog is not empty'))
2594
2609
2595 if getattr(self, 'filteredrevs', None):
2610 if getattr(self, 'filteredrevs', None):
2596 raise ValueError(_(b'source revlog has filtered revisions'))
2611 raise ValueError(_(b'source revlog has filtered revisions'))
2597 if getattr(destrevlog, 'filteredrevs', None):
2612 if getattr(destrevlog, 'filteredrevs', None):
2598 raise ValueError(_(b'destination revlog has filtered revisions'))
2613 raise ValueError(_(b'destination revlog has filtered revisions'))
2599
2614
2600 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2615 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2601 # if possible.
2616 # if possible.
2602 oldlazydelta = destrevlog._lazydelta
2617 oldlazydelta = destrevlog._lazydelta
2603 oldlazydeltabase = destrevlog._lazydeltabase
2618 oldlazydeltabase = destrevlog._lazydeltabase
2604 oldamd = destrevlog._deltabothparents
2619 oldamd = destrevlog._deltabothparents
2605
2620
2606 try:
2621 try:
2607 if deltareuse == self.DELTAREUSEALWAYS:
2622 if deltareuse == self.DELTAREUSEALWAYS:
2608 destrevlog._lazydeltabase = True
2623 destrevlog._lazydeltabase = True
2609 destrevlog._lazydelta = True
2624 destrevlog._lazydelta = True
2610 elif deltareuse == self.DELTAREUSESAMEREVS:
2625 elif deltareuse == self.DELTAREUSESAMEREVS:
2611 destrevlog._lazydeltabase = False
2626 destrevlog._lazydeltabase = False
2612 destrevlog._lazydelta = True
2627 destrevlog._lazydelta = True
2613 elif deltareuse == self.DELTAREUSENEVER:
2628 elif deltareuse == self.DELTAREUSENEVER:
2614 destrevlog._lazydeltabase = False
2629 destrevlog._lazydeltabase = False
2615 destrevlog._lazydelta = False
2630 destrevlog._lazydelta = False
2616
2631
2617 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2632 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2618
2633
2619 self._clone(
2634 self._clone(
2620 tr, destrevlog, addrevisioncb, deltareuse, forcedeltabothparents
2635 tr,
2636 destrevlog,
2637 addrevisioncb,
2638 deltareuse,
2639 forcedeltabothparents,
2640 sidedatacompanion,
2621 )
2641 )
2622
2642
2623 finally:
2643 finally:
2624 destrevlog._lazydelta = oldlazydelta
2644 destrevlog._lazydelta = oldlazydelta
2625 destrevlog._lazydeltabase = oldlazydeltabase
2645 destrevlog._lazydeltabase = oldlazydeltabase
2626 destrevlog._deltabothparents = oldamd
2646 destrevlog._deltabothparents = oldamd
2627
2647
2628 def _clone(
2648 def _clone(
2629 self, tr, destrevlog, addrevisioncb, deltareuse, forcedeltabothparents
2649 self,
2650 tr,
2651 destrevlog,
2652 addrevisioncb,
2653 deltareuse,
2654 forcedeltabothparents,
2655 sidedatacompanion,
2630 ):
2656 ):
2631 """perform the core duty of `revlog.clone` after parameter processing"""
2657 """perform the core duty of `revlog.clone` after parameter processing"""
2632 deltacomputer = deltautil.deltacomputer(destrevlog)
2658 deltacomputer = deltautil.deltacomputer(destrevlog)
2633 index = self.index
2659 index = self.index
2634 for rev in self:
2660 for rev in self:
2635 entry = index[rev]
2661 entry = index[rev]
2636
2662
2637 # Some classes override linkrev to take filtered revs into
2663 # Some classes override linkrev to take filtered revs into
2638 # account. Use raw entry from index.
2664 # account. Use raw entry from index.
2639 flags = entry[0] & 0xFFFF
2665 flags = entry[0] & 0xFFFF
2640 linkrev = entry[4]
2666 linkrev = entry[4]
2641 p1 = index[entry[5]][7]
2667 p1 = index[entry[5]][7]
2642 p2 = index[entry[6]][7]
2668 p2 = index[entry[6]][7]
2643 node = entry[7]
2669 node = entry[7]
2644
2670
2671 sidedataactions = (False, [], {})
2672 if sidedatacompanion is not None:
2673 sidedataactions = sidedatacompanion(self, rev)
2674
2645 # (Possibly) reuse the delta from the revlog if allowed and
2675 # (Possibly) reuse the delta from the revlog if allowed and
2646 # the revlog chunk is a delta.
2676 # the revlog chunk is a delta.
2647 cachedelta = None
2677 cachedelta = None
2648 rawtext = None
2678 rawtext = None
2649 if deltareuse == self.DELTAREUSEFULLADD:
2679 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2650 text = self.revision(rev)
2680 dropall, filterout, update = sidedataactions
2681 text, sidedata = self._revisiondata(rev)
2682 if dropall:
2683 sidedata = {}
2684 for key in filterout:
2685 sidedata.pop(key, None)
2686 sidedata.update(update)
2687 if not sidedata:
2688 sidedata = None
2651 destrevlog.addrevision(
2689 destrevlog.addrevision(
2652 text,
2690 text,
2653 tr,
2691 tr,
2654 linkrev,
2692 linkrev,
2655 p1,
2693 p1,
2656 p2,
2694 p2,
2657 cachedelta=cachedelta,
2695 cachedelta=cachedelta,
2658 node=node,
2696 node=node,
2659 flags=flags,
2697 flags=flags,
2660 deltacomputer=deltacomputer,
2698 deltacomputer=deltacomputer,
2699 sidedata=sidedata,
2661 )
2700 )
2662 else:
2701 else:
2663 if destrevlog._lazydelta:
2702 if destrevlog._lazydelta:
2664 dp = self.deltaparent(rev)
2703 dp = self.deltaparent(rev)
2665 if dp != nullrev:
2704 if dp != nullrev:
2666 cachedelta = (dp, bytes(self._chunk(rev)))
2705 cachedelta = (dp, bytes(self._chunk(rev)))
2667
2706
2668 if not cachedelta:
2707 if not cachedelta:
2669 rawtext = self.rawdata(rev)
2708 rawtext = self.rawdata(rev)
2670
2709
2671 ifh = destrevlog.opener(
2710 ifh = destrevlog.opener(
2672 destrevlog.indexfile, b'a+', checkambig=False
2711 destrevlog.indexfile, b'a+', checkambig=False
2673 )
2712 )
2674 dfh = None
2713 dfh = None
2675 if not destrevlog._inline:
2714 if not destrevlog._inline:
2676 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2715 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2677 try:
2716 try:
2678 destrevlog._addrevision(
2717 destrevlog._addrevision(
2679 node,
2718 node,
2680 rawtext,
2719 rawtext,
2681 tr,
2720 tr,
2682 linkrev,
2721 linkrev,
2683 p1,
2722 p1,
2684 p2,
2723 p2,
2685 flags,
2724 flags,
2686 cachedelta,
2725 cachedelta,
2687 ifh,
2726 ifh,
2688 dfh,
2727 dfh,
2689 deltacomputer=deltacomputer,
2728 deltacomputer=deltacomputer,
2690 )
2729 )
2691 finally:
2730 finally:
2692 if dfh:
2731 if dfh:
2693 dfh.close()
2732 dfh.close()
2694 ifh.close()
2733 ifh.close()
2695
2734
2696 if addrevisioncb:
2735 if addrevisioncb:
2697 addrevisioncb(self, rev, node)
2736 addrevisioncb(self, rev, node)
2698
2737
2699 def censorrevision(self, tr, censornode, tombstone=b''):
2738 def censorrevision(self, tr, censornode, tombstone=b''):
2700 if (self.version & 0xFFFF) == REVLOGV0:
2739 if (self.version & 0xFFFF) == REVLOGV0:
2701 raise error.RevlogError(
2740 raise error.RevlogError(
2702 _(b'cannot censor with version %d revlogs') % self.version
2741 _(b'cannot censor with version %d revlogs') % self.version
2703 )
2742 )
2704
2743
2705 censorrev = self.rev(censornode)
2744 censorrev = self.rev(censornode)
2706 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2745 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2707
2746
2708 if len(tombstone) > self.rawsize(censorrev):
2747 if len(tombstone) > self.rawsize(censorrev):
2709 raise error.Abort(
2748 raise error.Abort(
2710 _(b'censor tombstone must be no longer than censored data')
2749 _(b'censor tombstone must be no longer than censored data')
2711 )
2750 )
2712
2751
2713 # Rewriting the revlog in place is hard. Our strategy for censoring is
2752 # Rewriting the revlog in place is hard. Our strategy for censoring is
2714 # to create a new revlog, copy all revisions to it, then replace the
2753 # to create a new revlog, copy all revisions to it, then replace the
2715 # revlogs on transaction close.
2754 # revlogs on transaction close.
2716
2755
2717 newindexfile = self.indexfile + b'.tmpcensored'
2756 newindexfile = self.indexfile + b'.tmpcensored'
2718 newdatafile = self.datafile + b'.tmpcensored'
2757 newdatafile = self.datafile + b'.tmpcensored'
2719
2758
2720 # This is a bit dangerous. We could easily have a mismatch of state.
2759 # This is a bit dangerous. We could easily have a mismatch of state.
2721 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2760 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2722 newrl.version = self.version
2761 newrl.version = self.version
2723 newrl._generaldelta = self._generaldelta
2762 newrl._generaldelta = self._generaldelta
2724 newrl._io = self._io
2763 newrl._io = self._io
2725
2764
2726 for rev in self.revs():
2765 for rev in self.revs():
2727 node = self.node(rev)
2766 node = self.node(rev)
2728 p1, p2 = self.parents(node)
2767 p1, p2 = self.parents(node)
2729
2768
2730 if rev == censorrev:
2769 if rev == censorrev:
2731 newrl.addrawrevision(
2770 newrl.addrawrevision(
2732 tombstone,
2771 tombstone,
2733 tr,
2772 tr,
2734 self.linkrev(censorrev),
2773 self.linkrev(censorrev),
2735 p1,
2774 p1,
2736 p2,
2775 p2,
2737 censornode,
2776 censornode,
2738 REVIDX_ISCENSORED,
2777 REVIDX_ISCENSORED,
2739 )
2778 )
2740
2779
2741 if newrl.deltaparent(rev) != nullrev:
2780 if newrl.deltaparent(rev) != nullrev:
2742 raise error.Abort(
2781 raise error.Abort(
2743 _(
2782 _(
2744 b'censored revision stored as delta; '
2783 b'censored revision stored as delta; '
2745 b'cannot censor'
2784 b'cannot censor'
2746 ),
2785 ),
2747 hint=_(
2786 hint=_(
2748 b'censoring of revlogs is not '
2787 b'censoring of revlogs is not '
2749 b'fully implemented; please report '
2788 b'fully implemented; please report '
2750 b'this bug'
2789 b'this bug'
2751 ),
2790 ),
2752 )
2791 )
2753 continue
2792 continue
2754
2793
2755 if self.iscensored(rev):
2794 if self.iscensored(rev):
2756 if self.deltaparent(rev) != nullrev:
2795 if self.deltaparent(rev) != nullrev:
2757 raise error.Abort(
2796 raise error.Abort(
2758 _(
2797 _(
2759 b'cannot censor due to censored '
2798 b'cannot censor due to censored '
2760 b'revision having delta stored'
2799 b'revision having delta stored'
2761 )
2800 )
2762 )
2801 )
2763 rawtext = self._chunk(rev)
2802 rawtext = self._chunk(rev)
2764 else:
2803 else:
2765 rawtext = self.rawdata(rev)
2804 rawtext = self.rawdata(rev)
2766
2805
2767 newrl.addrawrevision(
2806 newrl.addrawrevision(
2768 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2807 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2769 )
2808 )
2770
2809
2771 tr.addbackup(self.indexfile, location=b'store')
2810 tr.addbackup(self.indexfile, location=b'store')
2772 if not self._inline:
2811 if not self._inline:
2773 tr.addbackup(self.datafile, location=b'store')
2812 tr.addbackup(self.datafile, location=b'store')
2774
2813
2775 self.opener.rename(newrl.indexfile, self.indexfile)
2814 self.opener.rename(newrl.indexfile, self.indexfile)
2776 if not self._inline:
2815 if not self._inline:
2777 self.opener.rename(newrl.datafile, self.datafile)
2816 self.opener.rename(newrl.datafile, self.datafile)
2778
2817
2779 self.clearcaches()
2818 self.clearcaches()
2780 self._loadindex()
2819 self._loadindex()
2781
2820
2782 def verifyintegrity(self, state):
2821 def verifyintegrity(self, state):
2783 """Verifies the integrity of the revlog.
2822 """Verifies the integrity of the revlog.
2784
2823
2785 Yields ``revlogproblem`` instances describing problems that are
2824 Yields ``revlogproblem`` instances describing problems that are
2786 found.
2825 found.
2787 """
2826 """
2788 dd, di = self.checksize()
2827 dd, di = self.checksize()
2789 if dd:
2828 if dd:
2790 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2829 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2791 if di:
2830 if di:
2792 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2831 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2793
2832
2794 version = self.version & 0xFFFF
2833 version = self.version & 0xFFFF
2795
2834
2796 # The verifier tells us what version revlog we should be.
2835 # The verifier tells us what version revlog we should be.
2797 if version != state[b'expectedversion']:
2836 if version != state[b'expectedversion']:
2798 yield revlogproblem(
2837 yield revlogproblem(
2799 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2838 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2800 % (self.indexfile, version, state[b'expectedversion'])
2839 % (self.indexfile, version, state[b'expectedversion'])
2801 )
2840 )
2802
2841
2803 state[b'skipread'] = set()
2842 state[b'skipread'] = set()
2804
2843
2805 for rev in self:
2844 for rev in self:
2806 node = self.node(rev)
2845 node = self.node(rev)
2807
2846
2808 # Verify contents. 4 cases to care about:
2847 # Verify contents. 4 cases to care about:
2809 #
2848 #
2810 # common: the most common case
2849 # common: the most common case
2811 # rename: with a rename
2850 # rename: with a rename
2812 # meta: file content starts with b'\1\n', the metadata
2851 # meta: file content starts with b'\1\n', the metadata
2813 # header defined in filelog.py, but without a rename
2852 # header defined in filelog.py, but without a rename
2814 # ext: content stored externally
2853 # ext: content stored externally
2815 #
2854 #
2816 # More formally, their differences are shown below:
2855 # More formally, their differences are shown below:
2817 #
2856 #
2818 # | common | rename | meta | ext
2857 # | common | rename | meta | ext
2819 # -------------------------------------------------------
2858 # -------------------------------------------------------
2820 # flags() | 0 | 0 | 0 | not 0
2859 # flags() | 0 | 0 | 0 | not 0
2821 # renamed() | False | True | False | ?
2860 # renamed() | False | True | False | ?
2822 # rawtext[0:2]=='\1\n'| False | True | True | ?
2861 # rawtext[0:2]=='\1\n'| False | True | True | ?
2823 #
2862 #
2824 # "rawtext" means the raw text stored in revlog data, which
2863 # "rawtext" means the raw text stored in revlog data, which
2825 # could be retrieved by "rawdata(rev)". "text"
2864 # could be retrieved by "rawdata(rev)". "text"
2826 # mentioned below is "revision(rev)".
2865 # mentioned below is "revision(rev)".
2827 #
2866 #
2828 # There are 3 different lengths stored physically:
2867 # There are 3 different lengths stored physically:
2829 # 1. L1: rawsize, stored in revlog index
2868 # 1. L1: rawsize, stored in revlog index
2830 # 2. L2: len(rawtext), stored in revlog data
2869 # 2. L2: len(rawtext), stored in revlog data
2831 # 3. L3: len(text), stored in revlog data if flags==0, or
2870 # 3. L3: len(text), stored in revlog data if flags==0, or
2832 # possibly somewhere else if flags!=0
2871 # possibly somewhere else if flags!=0
2833 #
2872 #
2834 # L1 should be equal to L2. L3 could be different from them.
2873 # L1 should be equal to L2. L3 could be different from them.
2835 # "text" may or may not affect commit hash depending on flag
2874 # "text" may or may not affect commit hash depending on flag
2836 # processors (see flagutil.addflagprocessor).
2875 # processors (see flagutil.addflagprocessor).
2837 #
2876 #
2838 # | common | rename | meta | ext
2877 # | common | rename | meta | ext
2839 # -------------------------------------------------
2878 # -------------------------------------------------
2840 # rawsize() | L1 | L1 | L1 | L1
2879 # rawsize() | L1 | L1 | L1 | L1
2841 # size() | L1 | L2-LM | L1(*) | L1 (?)
2880 # size() | L1 | L2-LM | L1(*) | L1 (?)
2842 # len(rawtext) | L2 | L2 | L2 | L2
2881 # len(rawtext) | L2 | L2 | L2 | L2
2843 # len(text) | L2 | L2 | L2 | L3
2882 # len(text) | L2 | L2 | L2 | L3
2844 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2883 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2845 #
2884 #
2846 # LM: length of metadata, depending on rawtext
2885 # LM: length of metadata, depending on rawtext
2847 # (*): not ideal, see comment in filelog.size
2886 # (*): not ideal, see comment in filelog.size
2848 # (?): could be "- len(meta)" if the resolved content has
2887 # (?): could be "- len(meta)" if the resolved content has
2849 # rename metadata
2888 # rename metadata
2850 #
2889 #
2851 # Checks needed to be done:
2890 # Checks needed to be done:
2852 # 1. length check: L1 == L2, in all cases.
2891 # 1. length check: L1 == L2, in all cases.
2853 # 2. hash check: depending on flag processor, we may need to
2892 # 2. hash check: depending on flag processor, we may need to
2854 # use either "text" (external), or "rawtext" (in revlog).
2893 # use either "text" (external), or "rawtext" (in revlog).
2855
2894
2856 try:
2895 try:
2857 skipflags = state.get(b'skipflags', 0)
2896 skipflags = state.get(b'skipflags', 0)
2858 if skipflags:
2897 if skipflags:
2859 skipflags &= self.flags(rev)
2898 skipflags &= self.flags(rev)
2860
2899
2861 if skipflags:
2900 if skipflags:
2862 state[b'skipread'].add(node)
2901 state[b'skipread'].add(node)
2863 else:
2902 else:
2864 # Side-effect: read content and verify hash.
2903 # Side-effect: read content and verify hash.
2865 self.revision(node)
2904 self.revision(node)
2866
2905
2867 l1 = self.rawsize(rev)
2906 l1 = self.rawsize(rev)
2868 l2 = len(self.rawdata(node))
2907 l2 = len(self.rawdata(node))
2869
2908
2870 if l1 != l2:
2909 if l1 != l2:
2871 yield revlogproblem(
2910 yield revlogproblem(
2872 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2911 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2873 node=node,
2912 node=node,
2874 )
2913 )
2875
2914
2876 except error.CensoredNodeError:
2915 except error.CensoredNodeError:
2877 if state[b'erroroncensored']:
2916 if state[b'erroroncensored']:
2878 yield revlogproblem(
2917 yield revlogproblem(
2879 error=_(b'censored file data'), node=node
2918 error=_(b'censored file data'), node=node
2880 )
2919 )
2881 state[b'skipread'].add(node)
2920 state[b'skipread'].add(node)
2882 except Exception as e:
2921 except Exception as e:
2883 yield revlogproblem(
2922 yield revlogproblem(
2884 error=_(b'unpacking %s: %s')
2923 error=_(b'unpacking %s: %s')
2885 % (short(node), stringutil.forcebytestr(e)),
2924 % (short(node), stringutil.forcebytestr(e)),
2886 node=node,
2925 node=node,
2887 )
2926 )
2888 state[b'skipread'].add(node)
2927 state[b'skipread'].add(node)
2889
2928
2890 def storageinfo(
2929 def storageinfo(
2891 self,
2930 self,
2892 exclusivefiles=False,
2931 exclusivefiles=False,
2893 sharedfiles=False,
2932 sharedfiles=False,
2894 revisionscount=False,
2933 revisionscount=False,
2895 trackedsize=False,
2934 trackedsize=False,
2896 storedsize=False,
2935 storedsize=False,
2897 ):
2936 ):
2898 d = {}
2937 d = {}
2899
2938
2900 if exclusivefiles:
2939 if exclusivefiles:
2901 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2940 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2902 if not self._inline:
2941 if not self._inline:
2903 d[b'exclusivefiles'].append((self.opener, self.datafile))
2942 d[b'exclusivefiles'].append((self.opener, self.datafile))
2904
2943
2905 if sharedfiles:
2944 if sharedfiles:
2906 d[b'sharedfiles'] = []
2945 d[b'sharedfiles'] = []
2907
2946
2908 if revisionscount:
2947 if revisionscount:
2909 d[b'revisionscount'] = len(self)
2948 d[b'revisionscount'] = len(self)
2910
2949
2911 if trackedsize:
2950 if trackedsize:
2912 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2951 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2913
2952
2914 if storedsize:
2953 if storedsize:
2915 d[b'storedsize'] = sum(
2954 d[b'storedsize'] = sum(
2916 self.opener.stat(path).st_size for path in self.files()
2955 self.opener.stat(path).st_size for path in self.files()
2917 )
2956 )
2918
2957
2919 return d
2958 return d
General Comments 0
You need to be logged in to leave comments. Login now