##// END OF EJS Templates
index: use `index.has_node` in `revlog.addrevision`...
marmoute -
r43935:8b533de0 default
parent child Browse files
Show More
@@ -1,2964 +1,2964 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @util.propertycache
208 @util.propertycache
209 def nodemap(self):
209 def nodemap(self):
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 for r in range(0, len(self)):
211 for r in range(0, len(self)):
212 n = self[r][7]
212 n = self[r][7]
213 nodemap[n] = r
213 nodemap[n] = r
214 return nodemap
214 return nodemap
215
215
216 def has_node(self, node):
216 def has_node(self, node):
217 """return True if the node exist in the index"""
217 """return True if the node exist in the index"""
218 return node in self.nodemap
218 return node in self.nodemap
219
219
220 def append(self, tup):
220 def append(self, tup):
221 self.nodemap[tup[7]] = len(self)
221 self.nodemap[tup[7]] = len(self)
222 super(revlogoldindex, self).append(tup)
222 super(revlogoldindex, self).append(tup)
223
223
224 def __delitem__(self, i):
224 def __delitem__(self, i):
225 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
225 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
226 raise ValueError(b"deleting slices only supports a:-1 with step 1")
226 raise ValueError(b"deleting slices only supports a:-1 with step 1")
227 for r in pycompat.xrange(i.start, len(self)):
227 for r in pycompat.xrange(i.start, len(self)):
228 del self.nodemap[self[r][7]]
228 del self.nodemap[self[r][7]]
229 super(revlogoldindex, self).__delitem__(i)
229 super(revlogoldindex, self).__delitem__(i)
230
230
231 def clearcaches(self):
231 def clearcaches(self):
232 self.__dict__.pop('nodemap', None)
232 self.__dict__.pop('nodemap', None)
233
233
234 def __getitem__(self, i):
234 def __getitem__(self, i):
235 if i == -1:
235 if i == -1:
236 return (0, 0, 0, -1, -1, -1, -1, nullid)
236 return (0, 0, 0, -1, -1, -1, -1, nullid)
237 return list.__getitem__(self, i)
237 return list.__getitem__(self, i)
238
238
239
239
240 class revlogoldio(object):
240 class revlogoldio(object):
241 def __init__(self):
241 def __init__(self):
242 self.size = indexformatv0.size
242 self.size = indexformatv0.size
243
243
244 def parseindex(self, data, inline):
244 def parseindex(self, data, inline):
245 s = self.size
245 s = self.size
246 index = []
246 index = []
247 nodemap = revlogutils.NodeMap({nullid: nullrev})
247 nodemap = revlogutils.NodeMap({nullid: nullrev})
248 n = off = 0
248 n = off = 0
249 l = len(data)
249 l = len(data)
250 while off + s <= l:
250 while off + s <= l:
251 cur = data[off : off + s]
251 cur = data[off : off + s]
252 off += s
252 off += s
253 e = indexformatv0_unpack(cur)
253 e = indexformatv0_unpack(cur)
254 # transform to revlogv1 format
254 # transform to revlogv1 format
255 e2 = (
255 e2 = (
256 offset_type(e[0], 0),
256 offset_type(e[0], 0),
257 e[1],
257 e[1],
258 -1,
258 -1,
259 e[2],
259 e[2],
260 e[3],
260 e[3],
261 nodemap.get(e[4], nullrev),
261 nodemap.get(e[4], nullrev),
262 nodemap.get(e[5], nullrev),
262 nodemap.get(e[5], nullrev),
263 e[6],
263 e[6],
264 )
264 )
265 index.append(e2)
265 index.append(e2)
266 nodemap[e[6]] = n
266 nodemap[e[6]] = n
267 n += 1
267 n += 1
268
268
269 index = revlogoldindex(index)
269 index = revlogoldindex(index)
270 return index, None
270 return index, None
271
271
272 def packentry(self, entry, node, version, rev):
272 def packentry(self, entry, node, version, rev):
273 if gettype(entry[0]):
273 if gettype(entry[0]):
274 raise error.RevlogError(
274 raise error.RevlogError(
275 _(b'index entry flags need revlog version 1')
275 _(b'index entry flags need revlog version 1')
276 )
276 )
277 e2 = (
277 e2 = (
278 getoffset(entry[0]),
278 getoffset(entry[0]),
279 entry[1],
279 entry[1],
280 entry[3],
280 entry[3],
281 entry[4],
281 entry[4],
282 node(entry[5]),
282 node(entry[5]),
283 node(entry[6]),
283 node(entry[6]),
284 entry[7],
284 entry[7],
285 )
285 )
286 return indexformatv0_pack(*e2)
286 return indexformatv0_pack(*e2)
287
287
288
288
289 # index ng:
289 # index ng:
290 # 6 bytes: offset
290 # 6 bytes: offset
291 # 2 bytes: flags
291 # 2 bytes: flags
292 # 4 bytes: compressed length
292 # 4 bytes: compressed length
293 # 4 bytes: uncompressed length
293 # 4 bytes: uncompressed length
294 # 4 bytes: base rev
294 # 4 bytes: base rev
295 # 4 bytes: link rev
295 # 4 bytes: link rev
296 # 4 bytes: parent 1 rev
296 # 4 bytes: parent 1 rev
297 # 4 bytes: parent 2 rev
297 # 4 bytes: parent 2 rev
298 # 32 bytes: nodeid
298 # 32 bytes: nodeid
299 indexformatng = struct.Struct(b">Qiiiiii20s12x")
299 indexformatng = struct.Struct(b">Qiiiiii20s12x")
300 indexformatng_pack = indexformatng.pack
300 indexformatng_pack = indexformatng.pack
301 versionformat = struct.Struct(b">I")
301 versionformat = struct.Struct(b">I")
302 versionformat_pack = versionformat.pack
302 versionformat_pack = versionformat.pack
303 versionformat_unpack = versionformat.unpack
303 versionformat_unpack = versionformat.unpack
304
304
305 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
305 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
306 # signed integer)
306 # signed integer)
307 _maxentrysize = 0x7FFFFFFF
307 _maxentrysize = 0x7FFFFFFF
308
308
309
309
310 class revlogio(object):
310 class revlogio(object):
311 def __init__(self):
311 def __init__(self):
312 self.size = indexformatng.size
312 self.size = indexformatng.size
313
313
314 def parseindex(self, data, inline):
314 def parseindex(self, data, inline):
315 # call the C implementation to parse the index data
315 # call the C implementation to parse the index data
316 index, cache = parsers.parse_index2(data, inline)
316 index, cache = parsers.parse_index2(data, inline)
317 return index, cache
317 return index, cache
318
318
319 def packentry(self, entry, node, version, rev):
319 def packentry(self, entry, node, version, rev):
320 p = indexformatng_pack(*entry)
320 p = indexformatng_pack(*entry)
321 if rev == 0:
321 if rev == 0:
322 p = versionformat_pack(version) + p[4:]
322 p = versionformat_pack(version) + p[4:]
323 return p
323 return p
324
324
325
325
326 class revlog(object):
326 class revlog(object):
327 """
327 """
328 the underlying revision storage object
328 the underlying revision storage object
329
329
330 A revlog consists of two parts, an index and the revision data.
330 A revlog consists of two parts, an index and the revision data.
331
331
332 The index is a file with a fixed record size containing
332 The index is a file with a fixed record size containing
333 information on each revision, including its nodeid (hash), the
333 information on each revision, including its nodeid (hash), the
334 nodeids of its parents, the position and offset of its data within
334 nodeids of its parents, the position and offset of its data within
335 the data file, and the revision it's based on. Finally, each entry
335 the data file, and the revision it's based on. Finally, each entry
336 contains a linkrev entry that can serve as a pointer to external
336 contains a linkrev entry that can serve as a pointer to external
337 data.
337 data.
338
338
339 The revision data itself is a linear collection of data chunks.
339 The revision data itself is a linear collection of data chunks.
340 Each chunk represents a revision and is usually represented as a
340 Each chunk represents a revision and is usually represented as a
341 delta against the previous chunk. To bound lookup time, runs of
341 delta against the previous chunk. To bound lookup time, runs of
342 deltas are limited to about 2 times the length of the original
342 deltas are limited to about 2 times the length of the original
343 version data. This makes retrieval of a version proportional to
343 version data. This makes retrieval of a version proportional to
344 its size, or O(1) relative to the number of revisions.
344 its size, or O(1) relative to the number of revisions.
345
345
346 Both pieces of the revlog are written to in an append-only
346 Both pieces of the revlog are written to in an append-only
347 fashion, which means we never need to rewrite a file to insert or
347 fashion, which means we never need to rewrite a file to insert or
348 remove data, and can use some simple techniques to avoid the need
348 remove data, and can use some simple techniques to avoid the need
349 for locking while reading.
349 for locking while reading.
350
350
351 If checkambig, indexfile is opened with checkambig=True at
351 If checkambig, indexfile is opened with checkambig=True at
352 writing, to avoid file stat ambiguity.
352 writing, to avoid file stat ambiguity.
353
353
354 If mmaplargeindex is True, and an mmapindexthreshold is set, the
354 If mmaplargeindex is True, and an mmapindexthreshold is set, the
355 index will be mmapped rather than read if it is larger than the
355 index will be mmapped rather than read if it is larger than the
356 configured threshold.
356 configured threshold.
357
357
358 If censorable is True, the revlog can have censored revisions.
358 If censorable is True, the revlog can have censored revisions.
359
359
360 If `upperboundcomp` is not None, this is the expected maximal gain from
360 If `upperboundcomp` is not None, this is the expected maximal gain from
361 compression for the data content.
361 compression for the data content.
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 indexfile,
369 indexfile,
370 datafile=None,
370 datafile=None,
371 checkambig=False,
371 checkambig=False,
372 mmaplargeindex=False,
372 mmaplargeindex=False,
373 censorable=False,
373 censorable=False,
374 upperboundcomp=None,
374 upperboundcomp=None,
375 ):
375 ):
376 """
376 """
377 create a revlog object
377 create a revlog object
378
378
379 opener is a function that abstracts the file opening operation
379 opener is a function that abstracts the file opening operation
380 and can be used to implement COW semantics or the like.
380 and can be used to implement COW semantics or the like.
381
381
382 """
382 """
383 self.upperboundcomp = upperboundcomp
383 self.upperboundcomp = upperboundcomp
384 self.indexfile = indexfile
384 self.indexfile = indexfile
385 self.datafile = datafile or (indexfile[:-2] + b".d")
385 self.datafile = datafile or (indexfile[:-2] + b".d")
386 self.opener = opener
386 self.opener = opener
387 # When True, indexfile is opened with checkambig=True at writing, to
387 # When True, indexfile is opened with checkambig=True at writing, to
388 # avoid file stat ambiguity.
388 # avoid file stat ambiguity.
389 self._checkambig = checkambig
389 self._checkambig = checkambig
390 self._mmaplargeindex = mmaplargeindex
390 self._mmaplargeindex = mmaplargeindex
391 self._censorable = censorable
391 self._censorable = censorable
392 # 3-tuple of (node, rev, text) for a raw revision.
392 # 3-tuple of (node, rev, text) for a raw revision.
393 self._revisioncache = None
393 self._revisioncache = None
394 # Maps rev to chain base rev.
394 # Maps rev to chain base rev.
395 self._chainbasecache = util.lrucachedict(100)
395 self._chainbasecache = util.lrucachedict(100)
396 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
396 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
397 self._chunkcache = (0, b'')
397 self._chunkcache = (0, b'')
398 # How much data to read and cache into the raw revlog data cache.
398 # How much data to read and cache into the raw revlog data cache.
399 self._chunkcachesize = 65536
399 self._chunkcachesize = 65536
400 self._maxchainlen = None
400 self._maxchainlen = None
401 self._deltabothparents = True
401 self._deltabothparents = True
402 self.index = None
402 self.index = None
403 # Mapping of partial identifiers to full nodes.
403 # Mapping of partial identifiers to full nodes.
404 self._pcache = {}
404 self._pcache = {}
405 # Mapping of revision integer to full node.
405 # Mapping of revision integer to full node.
406 self._nodepos = None
406 self._nodepos = None
407 self._compengine = b'zlib'
407 self._compengine = b'zlib'
408 self._compengineopts = {}
408 self._compengineopts = {}
409 self._maxdeltachainspan = -1
409 self._maxdeltachainspan = -1
410 self._withsparseread = False
410 self._withsparseread = False
411 self._sparserevlog = False
411 self._sparserevlog = False
412 self._srdensitythreshold = 0.50
412 self._srdensitythreshold = 0.50
413 self._srmingapsize = 262144
413 self._srmingapsize = 262144
414
414
415 # Make copy of flag processors so each revlog instance can support
415 # Make copy of flag processors so each revlog instance can support
416 # custom flags.
416 # custom flags.
417 self._flagprocessors = dict(flagutil.flagprocessors)
417 self._flagprocessors = dict(flagutil.flagprocessors)
418
418
419 # 2-tuple of file handles being used for active writing.
419 # 2-tuple of file handles being used for active writing.
420 self._writinghandles = None
420 self._writinghandles = None
421
421
422 self._loadindex()
422 self._loadindex()
423
423
424 def _loadindex(self):
424 def _loadindex(self):
425 mmapindexthreshold = None
425 mmapindexthreshold = None
426 opts = self.opener.options
426 opts = self.opener.options
427
427
428 if b'revlogv2' in opts:
428 if b'revlogv2' in opts:
429 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
429 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
430 elif b'revlogv1' in opts:
430 elif b'revlogv1' in opts:
431 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
431 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
432 if b'generaldelta' in opts:
432 if b'generaldelta' in opts:
433 newversionflags |= FLAG_GENERALDELTA
433 newversionflags |= FLAG_GENERALDELTA
434 elif b'revlogv0' in self.opener.options:
434 elif b'revlogv0' in self.opener.options:
435 newversionflags = REVLOGV0
435 newversionflags = REVLOGV0
436 else:
436 else:
437 newversionflags = REVLOG_DEFAULT_VERSION
437 newversionflags = REVLOG_DEFAULT_VERSION
438
438
439 if b'chunkcachesize' in opts:
439 if b'chunkcachesize' in opts:
440 self._chunkcachesize = opts[b'chunkcachesize']
440 self._chunkcachesize = opts[b'chunkcachesize']
441 if b'maxchainlen' in opts:
441 if b'maxchainlen' in opts:
442 self._maxchainlen = opts[b'maxchainlen']
442 self._maxchainlen = opts[b'maxchainlen']
443 if b'deltabothparents' in opts:
443 if b'deltabothparents' in opts:
444 self._deltabothparents = opts[b'deltabothparents']
444 self._deltabothparents = opts[b'deltabothparents']
445 self._lazydelta = bool(opts.get(b'lazydelta', True))
445 self._lazydelta = bool(opts.get(b'lazydelta', True))
446 self._lazydeltabase = False
446 self._lazydeltabase = False
447 if self._lazydelta:
447 if self._lazydelta:
448 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
448 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
449 if b'compengine' in opts:
449 if b'compengine' in opts:
450 self._compengine = opts[b'compengine']
450 self._compengine = opts[b'compengine']
451 if b'zlib.level' in opts:
451 if b'zlib.level' in opts:
452 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
452 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
453 if b'zstd.level' in opts:
453 if b'zstd.level' in opts:
454 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
454 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
455 if b'maxdeltachainspan' in opts:
455 if b'maxdeltachainspan' in opts:
456 self._maxdeltachainspan = opts[b'maxdeltachainspan']
456 self._maxdeltachainspan = opts[b'maxdeltachainspan']
457 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
457 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
458 mmapindexthreshold = opts[b'mmapindexthreshold']
458 mmapindexthreshold = opts[b'mmapindexthreshold']
459 self.hassidedata = bool(opts.get(b'side-data', False))
459 self.hassidedata = bool(opts.get(b'side-data', False))
460 if self.hassidedata:
460 if self.hassidedata:
461 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
461 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
462 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
462 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
463 withsparseread = bool(opts.get(b'with-sparse-read', False))
463 withsparseread = bool(opts.get(b'with-sparse-read', False))
464 # sparse-revlog forces sparse-read
464 # sparse-revlog forces sparse-read
465 self._withsparseread = self._sparserevlog or withsparseread
465 self._withsparseread = self._sparserevlog or withsparseread
466 if b'sparse-read-density-threshold' in opts:
466 if b'sparse-read-density-threshold' in opts:
467 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
467 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
468 if b'sparse-read-min-gap-size' in opts:
468 if b'sparse-read-min-gap-size' in opts:
469 self._srmingapsize = opts[b'sparse-read-min-gap-size']
469 self._srmingapsize = opts[b'sparse-read-min-gap-size']
470 if opts.get(b'enableellipsis'):
470 if opts.get(b'enableellipsis'):
471 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
471 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
472
472
473 # revlog v0 doesn't have flag processors
473 # revlog v0 doesn't have flag processors
474 for flag, processor in pycompat.iteritems(
474 for flag, processor in pycompat.iteritems(
475 opts.get(b'flagprocessors', {})
475 opts.get(b'flagprocessors', {})
476 ):
476 ):
477 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
477 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
478
478
479 if self._chunkcachesize <= 0:
479 if self._chunkcachesize <= 0:
480 raise error.RevlogError(
480 raise error.RevlogError(
481 _(b'revlog chunk cache size %r is not greater than 0')
481 _(b'revlog chunk cache size %r is not greater than 0')
482 % self._chunkcachesize
482 % self._chunkcachesize
483 )
483 )
484 elif self._chunkcachesize & (self._chunkcachesize - 1):
484 elif self._chunkcachesize & (self._chunkcachesize - 1):
485 raise error.RevlogError(
485 raise error.RevlogError(
486 _(b'revlog chunk cache size %r is not a power of 2')
486 _(b'revlog chunk cache size %r is not a power of 2')
487 % self._chunkcachesize
487 % self._chunkcachesize
488 )
488 )
489
489
490 indexdata = b''
490 indexdata = b''
491 self._initempty = True
491 self._initempty = True
492 try:
492 try:
493 with self._indexfp() as f:
493 with self._indexfp() as f:
494 if (
494 if (
495 mmapindexthreshold is not None
495 mmapindexthreshold is not None
496 and self.opener.fstat(f).st_size >= mmapindexthreshold
496 and self.opener.fstat(f).st_size >= mmapindexthreshold
497 ):
497 ):
498 # TODO: should .close() to release resources without
498 # TODO: should .close() to release resources without
499 # relying on Python GC
499 # relying on Python GC
500 indexdata = util.buffer(util.mmapread(f))
500 indexdata = util.buffer(util.mmapread(f))
501 else:
501 else:
502 indexdata = f.read()
502 indexdata = f.read()
503 if len(indexdata) > 0:
503 if len(indexdata) > 0:
504 versionflags = versionformat_unpack(indexdata[:4])[0]
504 versionflags = versionformat_unpack(indexdata[:4])[0]
505 self._initempty = False
505 self._initempty = False
506 else:
506 else:
507 versionflags = newversionflags
507 versionflags = newversionflags
508 except IOError as inst:
508 except IOError as inst:
509 if inst.errno != errno.ENOENT:
509 if inst.errno != errno.ENOENT:
510 raise
510 raise
511
511
512 versionflags = newversionflags
512 versionflags = newversionflags
513
513
514 self.version = versionflags
514 self.version = versionflags
515
515
516 flags = versionflags & ~0xFFFF
516 flags = versionflags & ~0xFFFF
517 fmt = versionflags & 0xFFFF
517 fmt = versionflags & 0xFFFF
518
518
519 if fmt == REVLOGV0:
519 if fmt == REVLOGV0:
520 if flags:
520 if flags:
521 raise error.RevlogError(
521 raise error.RevlogError(
522 _(b'unknown flags (%#04x) in version %d revlog %s')
522 _(b'unknown flags (%#04x) in version %d revlog %s')
523 % (flags >> 16, fmt, self.indexfile)
523 % (flags >> 16, fmt, self.indexfile)
524 )
524 )
525
525
526 self._inline = False
526 self._inline = False
527 self._generaldelta = False
527 self._generaldelta = False
528
528
529 elif fmt == REVLOGV1:
529 elif fmt == REVLOGV1:
530 if flags & ~REVLOGV1_FLAGS:
530 if flags & ~REVLOGV1_FLAGS:
531 raise error.RevlogError(
531 raise error.RevlogError(
532 _(b'unknown flags (%#04x) in version %d revlog %s')
532 _(b'unknown flags (%#04x) in version %d revlog %s')
533 % (flags >> 16, fmt, self.indexfile)
533 % (flags >> 16, fmt, self.indexfile)
534 )
534 )
535
535
536 self._inline = versionflags & FLAG_INLINE_DATA
536 self._inline = versionflags & FLAG_INLINE_DATA
537 self._generaldelta = versionflags & FLAG_GENERALDELTA
537 self._generaldelta = versionflags & FLAG_GENERALDELTA
538
538
539 elif fmt == REVLOGV2:
539 elif fmt == REVLOGV2:
540 if flags & ~REVLOGV2_FLAGS:
540 if flags & ~REVLOGV2_FLAGS:
541 raise error.RevlogError(
541 raise error.RevlogError(
542 _(b'unknown flags (%#04x) in version %d revlog %s')
542 _(b'unknown flags (%#04x) in version %d revlog %s')
543 % (flags >> 16, fmt, self.indexfile)
543 % (flags >> 16, fmt, self.indexfile)
544 )
544 )
545
545
546 self._inline = versionflags & FLAG_INLINE_DATA
546 self._inline = versionflags & FLAG_INLINE_DATA
547 # generaldelta implied by version 2 revlogs.
547 # generaldelta implied by version 2 revlogs.
548 self._generaldelta = True
548 self._generaldelta = True
549
549
550 else:
550 else:
551 raise error.RevlogError(
551 raise error.RevlogError(
552 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
552 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
553 )
553 )
554 # sparse-revlog can't be on without general-delta (issue6056)
554 # sparse-revlog can't be on without general-delta (issue6056)
555 if not self._generaldelta:
555 if not self._generaldelta:
556 self._sparserevlog = False
556 self._sparserevlog = False
557
557
558 self._storedeltachains = True
558 self._storedeltachains = True
559
559
560 self._io = revlogio()
560 self._io = revlogio()
561 if self.version == REVLOGV0:
561 if self.version == REVLOGV0:
562 self._io = revlogoldio()
562 self._io = revlogoldio()
563 try:
563 try:
564 d = self._io.parseindex(indexdata, self._inline)
564 d = self._io.parseindex(indexdata, self._inline)
565 except (ValueError, IndexError):
565 except (ValueError, IndexError):
566 raise error.RevlogError(
566 raise error.RevlogError(
567 _(b"index %s is corrupted") % self.indexfile
567 _(b"index %s is corrupted") % self.indexfile
568 )
568 )
569 self.index, self._chunkcache = d
569 self.index, self._chunkcache = d
570 self.nodemap = self.index.nodemap
570 self.nodemap = self.index.nodemap
571 if not self._chunkcache:
571 if not self._chunkcache:
572 self._chunkclear()
572 self._chunkclear()
573 # revnum -> (chain-length, sum-delta-length)
573 # revnum -> (chain-length, sum-delta-length)
574 self._chaininfocache = {}
574 self._chaininfocache = {}
575 # revlog header -> revlog compressor
575 # revlog header -> revlog compressor
576 self._decompressors = {}
576 self._decompressors = {}
577
577
578 @util.propertycache
578 @util.propertycache
579 def _compressor(self):
579 def _compressor(self):
580 engine = util.compengines[self._compengine]
580 engine = util.compengines[self._compengine]
581 return engine.revlogcompressor(self._compengineopts)
581 return engine.revlogcompressor(self._compengineopts)
582
582
583 def _indexfp(self, mode=b'r'):
583 def _indexfp(self, mode=b'r'):
584 """file object for the revlog's index file"""
584 """file object for the revlog's index file"""
585 args = {'mode': mode}
585 args = {'mode': mode}
586 if mode != b'r':
586 if mode != b'r':
587 args['checkambig'] = self._checkambig
587 args['checkambig'] = self._checkambig
588 if mode == b'w':
588 if mode == b'w':
589 args['atomictemp'] = True
589 args['atomictemp'] = True
590 return self.opener(self.indexfile, **args)
590 return self.opener(self.indexfile, **args)
591
591
592 def _datafp(self, mode=b'r'):
592 def _datafp(self, mode=b'r'):
593 """file object for the revlog's data file"""
593 """file object for the revlog's data file"""
594 return self.opener(self.datafile, mode=mode)
594 return self.opener(self.datafile, mode=mode)
595
595
596 @contextlib.contextmanager
596 @contextlib.contextmanager
597 def _datareadfp(self, existingfp=None):
597 def _datareadfp(self, existingfp=None):
598 """file object suitable to read data"""
598 """file object suitable to read data"""
599 # Use explicit file handle, if given.
599 # Use explicit file handle, if given.
600 if existingfp is not None:
600 if existingfp is not None:
601 yield existingfp
601 yield existingfp
602
602
603 # Use a file handle being actively used for writes, if available.
603 # Use a file handle being actively used for writes, if available.
604 # There is some danger to doing this because reads will seek the
604 # There is some danger to doing this because reads will seek the
605 # file. However, _writeentry() performs a SEEK_END before all writes,
605 # file. However, _writeentry() performs a SEEK_END before all writes,
606 # so we should be safe.
606 # so we should be safe.
607 elif self._writinghandles:
607 elif self._writinghandles:
608 if self._inline:
608 if self._inline:
609 yield self._writinghandles[0]
609 yield self._writinghandles[0]
610 else:
610 else:
611 yield self._writinghandles[1]
611 yield self._writinghandles[1]
612
612
613 # Otherwise open a new file handle.
613 # Otherwise open a new file handle.
614 else:
614 else:
615 if self._inline:
615 if self._inline:
616 func = self._indexfp
616 func = self._indexfp
617 else:
617 else:
618 func = self._datafp
618 func = self._datafp
619 with func() as fp:
619 with func() as fp:
620 yield fp
620 yield fp
621
621
622 def tiprev(self):
622 def tiprev(self):
623 return len(self.index) - 1
623 return len(self.index) - 1
624
624
625 def tip(self):
625 def tip(self):
626 return self.node(self.tiprev())
626 return self.node(self.tiprev())
627
627
628 def __contains__(self, rev):
628 def __contains__(self, rev):
629 return 0 <= rev < len(self)
629 return 0 <= rev < len(self)
630
630
631 def __len__(self):
631 def __len__(self):
632 return len(self.index)
632 return len(self.index)
633
633
634 def __iter__(self):
634 def __iter__(self):
635 return iter(pycompat.xrange(len(self)))
635 return iter(pycompat.xrange(len(self)))
636
636
637 def revs(self, start=0, stop=None):
637 def revs(self, start=0, stop=None):
638 """iterate over all rev in this revlog (from start to stop)"""
638 """iterate over all rev in this revlog (from start to stop)"""
639 return storageutil.iterrevs(len(self), start=start, stop=stop)
639 return storageutil.iterrevs(len(self), start=start, stop=stop)
640
640
641 @util.propertycache
641 @util.propertycache
642 def nodemap(self):
642 def nodemap(self):
643 if self.index:
643 if self.index:
644 # populate mapping down to the initial node
644 # populate mapping down to the initial node
645 node0 = self.index[0][7] # get around changelog filtering
645 node0 = self.index[0][7] # get around changelog filtering
646 self.rev(node0)
646 self.rev(node0)
647 return self.index.nodemap
647 return self.index.nodemap
648
648
649 @property
649 @property
650 def _nodecache(self):
650 def _nodecache(self):
651 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
651 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
652 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
652 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
653 return self.index.nodemap
653 return self.index.nodemap
654
654
655 def hasnode(self, node):
655 def hasnode(self, node):
656 try:
656 try:
657 self.rev(node)
657 self.rev(node)
658 return True
658 return True
659 except KeyError:
659 except KeyError:
660 return False
660 return False
661
661
662 def candelta(self, baserev, rev):
662 def candelta(self, baserev, rev):
663 """whether two revisions (baserev, rev) can be delta-ed or not"""
663 """whether two revisions (baserev, rev) can be delta-ed or not"""
664 # Disable delta if either rev requires a content-changing flag
664 # Disable delta if either rev requires a content-changing flag
665 # processor (ex. LFS). This is because such flag processor can alter
665 # processor (ex. LFS). This is because such flag processor can alter
666 # the rawtext content that the delta will be based on, and two clients
666 # the rawtext content that the delta will be based on, and two clients
667 # could have a same revlog node with different flags (i.e. different
667 # could have a same revlog node with different flags (i.e. different
668 # rawtext contents) and the delta could be incompatible.
668 # rawtext contents) and the delta could be incompatible.
669 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
669 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
670 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
670 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
671 ):
671 ):
672 return False
672 return False
673 return True
673 return True
674
674
675 def clearcaches(self):
675 def clearcaches(self):
676 self._revisioncache = None
676 self._revisioncache = None
677 self._chainbasecache.clear()
677 self._chainbasecache.clear()
678 self._chunkcache = (0, b'')
678 self._chunkcache = (0, b'')
679 self._pcache = {}
679 self._pcache = {}
680 self.index.clearcaches()
680 self.index.clearcaches()
681
681
682 def rev(self, node):
682 def rev(self, node):
683 try:
683 try:
684 return self.index.nodemap[node]
684 return self.index.nodemap[node]
685 except TypeError:
685 except TypeError:
686 raise
686 raise
687 except error.RevlogError:
687 except error.RevlogError:
688 # parsers.c radix tree lookup failed
688 # parsers.c radix tree lookup failed
689 if node == wdirid or node in wdirfilenodeids:
689 if node == wdirid or node in wdirfilenodeids:
690 raise error.WdirUnsupported
690 raise error.WdirUnsupported
691 raise error.LookupError(node, self.indexfile, _(b'no node'))
691 raise error.LookupError(node, self.indexfile, _(b'no node'))
692
692
693 # Accessors for index entries.
693 # Accessors for index entries.
694
694
695 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
695 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
696 # are flags.
696 # are flags.
697 def start(self, rev):
697 def start(self, rev):
698 return int(self.index[rev][0] >> 16)
698 return int(self.index[rev][0] >> 16)
699
699
700 def flags(self, rev):
700 def flags(self, rev):
701 return self.index[rev][0] & 0xFFFF
701 return self.index[rev][0] & 0xFFFF
702
702
703 def length(self, rev):
703 def length(self, rev):
704 return self.index[rev][1]
704 return self.index[rev][1]
705
705
706 def rawsize(self, rev):
706 def rawsize(self, rev):
707 """return the length of the uncompressed text for a given revision"""
707 """return the length of the uncompressed text for a given revision"""
708 l = self.index[rev][2]
708 l = self.index[rev][2]
709 if l >= 0:
709 if l >= 0:
710 return l
710 return l
711
711
712 t = self.rawdata(rev)
712 t = self.rawdata(rev)
713 return len(t)
713 return len(t)
714
714
715 def size(self, rev):
715 def size(self, rev):
716 """length of non-raw text (processed by a "read" flag processor)"""
716 """length of non-raw text (processed by a "read" flag processor)"""
717 # fast path: if no "read" flag processor could change the content,
717 # fast path: if no "read" flag processor could change the content,
718 # size is rawsize. note: ELLIPSIS is known to not change the content.
718 # size is rawsize. note: ELLIPSIS is known to not change the content.
719 flags = self.flags(rev)
719 flags = self.flags(rev)
720 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
720 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
721 return self.rawsize(rev)
721 return self.rawsize(rev)
722
722
723 return len(self.revision(rev, raw=False))
723 return len(self.revision(rev, raw=False))
724
724
725 def chainbase(self, rev):
725 def chainbase(self, rev):
726 base = self._chainbasecache.get(rev)
726 base = self._chainbasecache.get(rev)
727 if base is not None:
727 if base is not None:
728 return base
728 return base
729
729
730 index = self.index
730 index = self.index
731 iterrev = rev
731 iterrev = rev
732 base = index[iterrev][3]
732 base = index[iterrev][3]
733 while base != iterrev:
733 while base != iterrev:
734 iterrev = base
734 iterrev = base
735 base = index[iterrev][3]
735 base = index[iterrev][3]
736
736
737 self._chainbasecache[rev] = base
737 self._chainbasecache[rev] = base
738 return base
738 return base
739
739
740 def linkrev(self, rev):
740 def linkrev(self, rev):
741 return self.index[rev][4]
741 return self.index[rev][4]
742
742
743 def parentrevs(self, rev):
743 def parentrevs(self, rev):
744 try:
744 try:
745 entry = self.index[rev]
745 entry = self.index[rev]
746 except IndexError:
746 except IndexError:
747 if rev == wdirrev:
747 if rev == wdirrev:
748 raise error.WdirUnsupported
748 raise error.WdirUnsupported
749 raise
749 raise
750
750
751 return entry[5], entry[6]
751 return entry[5], entry[6]
752
752
753 # fast parentrevs(rev) where rev isn't filtered
753 # fast parentrevs(rev) where rev isn't filtered
754 _uncheckedparentrevs = parentrevs
754 _uncheckedparentrevs = parentrevs
755
755
756 def node(self, rev):
756 def node(self, rev):
757 try:
757 try:
758 return self.index[rev][7]
758 return self.index[rev][7]
759 except IndexError:
759 except IndexError:
760 if rev == wdirrev:
760 if rev == wdirrev:
761 raise error.WdirUnsupported
761 raise error.WdirUnsupported
762 raise
762 raise
763
763
764 # Derived from index values.
764 # Derived from index values.
765
765
766 def end(self, rev):
766 def end(self, rev):
767 return self.start(rev) + self.length(rev)
767 return self.start(rev) + self.length(rev)
768
768
769 def parents(self, node):
769 def parents(self, node):
770 i = self.index
770 i = self.index
771 d = i[self.rev(node)]
771 d = i[self.rev(node)]
772 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
772 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
773
773
774 def chainlen(self, rev):
774 def chainlen(self, rev):
775 return self._chaininfo(rev)[0]
775 return self._chaininfo(rev)[0]
776
776
777 def _chaininfo(self, rev):
777 def _chaininfo(self, rev):
778 chaininfocache = self._chaininfocache
778 chaininfocache = self._chaininfocache
779 if rev in chaininfocache:
779 if rev in chaininfocache:
780 return chaininfocache[rev]
780 return chaininfocache[rev]
781 index = self.index
781 index = self.index
782 generaldelta = self._generaldelta
782 generaldelta = self._generaldelta
783 iterrev = rev
783 iterrev = rev
784 e = index[iterrev]
784 e = index[iterrev]
785 clen = 0
785 clen = 0
786 compresseddeltalen = 0
786 compresseddeltalen = 0
787 while iterrev != e[3]:
787 while iterrev != e[3]:
788 clen += 1
788 clen += 1
789 compresseddeltalen += e[1]
789 compresseddeltalen += e[1]
790 if generaldelta:
790 if generaldelta:
791 iterrev = e[3]
791 iterrev = e[3]
792 else:
792 else:
793 iterrev -= 1
793 iterrev -= 1
794 if iterrev in chaininfocache:
794 if iterrev in chaininfocache:
795 t = chaininfocache[iterrev]
795 t = chaininfocache[iterrev]
796 clen += t[0]
796 clen += t[0]
797 compresseddeltalen += t[1]
797 compresseddeltalen += t[1]
798 break
798 break
799 e = index[iterrev]
799 e = index[iterrev]
800 else:
800 else:
801 # Add text length of base since decompressing that also takes
801 # Add text length of base since decompressing that also takes
802 # work. For cache hits the length is already included.
802 # work. For cache hits the length is already included.
803 compresseddeltalen += e[1]
803 compresseddeltalen += e[1]
804 r = (clen, compresseddeltalen)
804 r = (clen, compresseddeltalen)
805 chaininfocache[rev] = r
805 chaininfocache[rev] = r
806 return r
806 return r
807
807
808 def _deltachain(self, rev, stoprev=None):
808 def _deltachain(self, rev, stoprev=None):
809 """Obtain the delta chain for a revision.
809 """Obtain the delta chain for a revision.
810
810
811 ``stoprev`` specifies a revision to stop at. If not specified, we
811 ``stoprev`` specifies a revision to stop at. If not specified, we
812 stop at the base of the chain.
812 stop at the base of the chain.
813
813
814 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
814 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
815 revs in ascending order and ``stopped`` is a bool indicating whether
815 revs in ascending order and ``stopped`` is a bool indicating whether
816 ``stoprev`` was hit.
816 ``stoprev`` was hit.
817 """
817 """
818 # Try C implementation.
818 # Try C implementation.
819 try:
819 try:
820 return self.index.deltachain(rev, stoprev, self._generaldelta)
820 return self.index.deltachain(rev, stoprev, self._generaldelta)
821 except AttributeError:
821 except AttributeError:
822 pass
822 pass
823
823
824 chain = []
824 chain = []
825
825
826 # Alias to prevent attribute lookup in tight loop.
826 # Alias to prevent attribute lookup in tight loop.
827 index = self.index
827 index = self.index
828 generaldelta = self._generaldelta
828 generaldelta = self._generaldelta
829
829
830 iterrev = rev
830 iterrev = rev
831 e = index[iterrev]
831 e = index[iterrev]
832 while iterrev != e[3] and iterrev != stoprev:
832 while iterrev != e[3] and iterrev != stoprev:
833 chain.append(iterrev)
833 chain.append(iterrev)
834 if generaldelta:
834 if generaldelta:
835 iterrev = e[3]
835 iterrev = e[3]
836 else:
836 else:
837 iterrev -= 1
837 iterrev -= 1
838 e = index[iterrev]
838 e = index[iterrev]
839
839
840 if iterrev == stoprev:
840 if iterrev == stoprev:
841 stopped = True
841 stopped = True
842 else:
842 else:
843 chain.append(iterrev)
843 chain.append(iterrev)
844 stopped = False
844 stopped = False
845
845
846 chain.reverse()
846 chain.reverse()
847 return chain, stopped
847 return chain, stopped
848
848
849 def ancestors(self, revs, stoprev=0, inclusive=False):
849 def ancestors(self, revs, stoprev=0, inclusive=False):
850 """Generate the ancestors of 'revs' in reverse revision order.
850 """Generate the ancestors of 'revs' in reverse revision order.
851 Does not generate revs lower than stoprev.
851 Does not generate revs lower than stoprev.
852
852
853 See the documentation for ancestor.lazyancestors for more details."""
853 See the documentation for ancestor.lazyancestors for more details."""
854
854
855 # first, make sure start revisions aren't filtered
855 # first, make sure start revisions aren't filtered
856 revs = list(revs)
856 revs = list(revs)
857 checkrev = self.node
857 checkrev = self.node
858 for r in revs:
858 for r in revs:
859 checkrev(r)
859 checkrev(r)
860 # and we're sure ancestors aren't filtered as well
860 # and we're sure ancestors aren't filtered as well
861
861
862 if rustancestor is not None:
862 if rustancestor is not None:
863 lazyancestors = rustancestor.LazyAncestors
863 lazyancestors = rustancestor.LazyAncestors
864 arg = self.index
864 arg = self.index
865 elif util.safehasattr(parsers, b'rustlazyancestors'):
865 elif util.safehasattr(parsers, b'rustlazyancestors'):
866 lazyancestors = ancestor.rustlazyancestors
866 lazyancestors = ancestor.rustlazyancestors
867 arg = self.index
867 arg = self.index
868 else:
868 else:
869 lazyancestors = ancestor.lazyancestors
869 lazyancestors = ancestor.lazyancestors
870 arg = self._uncheckedparentrevs
870 arg = self._uncheckedparentrevs
871 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
871 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
872
872
873 def descendants(self, revs):
873 def descendants(self, revs):
874 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
874 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
875
875
876 def findcommonmissing(self, common=None, heads=None):
876 def findcommonmissing(self, common=None, heads=None):
877 """Return a tuple of the ancestors of common and the ancestors of heads
877 """Return a tuple of the ancestors of common and the ancestors of heads
878 that are not ancestors of common. In revset terminology, we return the
878 that are not ancestors of common. In revset terminology, we return the
879 tuple:
879 tuple:
880
880
881 ::common, (::heads) - (::common)
881 ::common, (::heads) - (::common)
882
882
883 The list is sorted by revision number, meaning it is
883 The list is sorted by revision number, meaning it is
884 topologically sorted.
884 topologically sorted.
885
885
886 'heads' and 'common' are both lists of node IDs. If heads is
886 'heads' and 'common' are both lists of node IDs. If heads is
887 not supplied, uses all of the revlog's heads. If common is not
887 not supplied, uses all of the revlog's heads. If common is not
888 supplied, uses nullid."""
888 supplied, uses nullid."""
889 if common is None:
889 if common is None:
890 common = [nullid]
890 common = [nullid]
891 if heads is None:
891 if heads is None:
892 heads = self.heads()
892 heads = self.heads()
893
893
894 common = [self.rev(n) for n in common]
894 common = [self.rev(n) for n in common]
895 heads = [self.rev(n) for n in heads]
895 heads = [self.rev(n) for n in heads]
896
896
897 # we want the ancestors, but inclusive
897 # we want the ancestors, but inclusive
898 class lazyset(object):
898 class lazyset(object):
899 def __init__(self, lazyvalues):
899 def __init__(self, lazyvalues):
900 self.addedvalues = set()
900 self.addedvalues = set()
901 self.lazyvalues = lazyvalues
901 self.lazyvalues = lazyvalues
902
902
903 def __contains__(self, value):
903 def __contains__(self, value):
904 return value in self.addedvalues or value in self.lazyvalues
904 return value in self.addedvalues or value in self.lazyvalues
905
905
906 def __iter__(self):
906 def __iter__(self):
907 added = self.addedvalues
907 added = self.addedvalues
908 for r in added:
908 for r in added:
909 yield r
909 yield r
910 for r in self.lazyvalues:
910 for r in self.lazyvalues:
911 if not r in added:
911 if not r in added:
912 yield r
912 yield r
913
913
914 def add(self, value):
914 def add(self, value):
915 self.addedvalues.add(value)
915 self.addedvalues.add(value)
916
916
917 def update(self, values):
917 def update(self, values):
918 self.addedvalues.update(values)
918 self.addedvalues.update(values)
919
919
920 has = lazyset(self.ancestors(common))
920 has = lazyset(self.ancestors(common))
921 has.add(nullrev)
921 has.add(nullrev)
922 has.update(common)
922 has.update(common)
923
923
924 # take all ancestors from heads that aren't in has
924 # take all ancestors from heads that aren't in has
925 missing = set()
925 missing = set()
926 visit = collections.deque(r for r in heads if r not in has)
926 visit = collections.deque(r for r in heads if r not in has)
927 while visit:
927 while visit:
928 r = visit.popleft()
928 r = visit.popleft()
929 if r in missing:
929 if r in missing:
930 continue
930 continue
931 else:
931 else:
932 missing.add(r)
932 missing.add(r)
933 for p in self.parentrevs(r):
933 for p in self.parentrevs(r):
934 if p not in has:
934 if p not in has:
935 visit.append(p)
935 visit.append(p)
936 missing = list(missing)
936 missing = list(missing)
937 missing.sort()
937 missing.sort()
938 return has, [self.node(miss) for miss in missing]
938 return has, [self.node(miss) for miss in missing]
939
939
940 def incrementalmissingrevs(self, common=None):
940 def incrementalmissingrevs(self, common=None):
941 """Return an object that can be used to incrementally compute the
941 """Return an object that can be used to incrementally compute the
942 revision numbers of the ancestors of arbitrary sets that are not
942 revision numbers of the ancestors of arbitrary sets that are not
943 ancestors of common. This is an ancestor.incrementalmissingancestors
943 ancestors of common. This is an ancestor.incrementalmissingancestors
944 object.
944 object.
945
945
946 'common' is a list of revision numbers. If common is not supplied, uses
946 'common' is a list of revision numbers. If common is not supplied, uses
947 nullrev.
947 nullrev.
948 """
948 """
949 if common is None:
949 if common is None:
950 common = [nullrev]
950 common = [nullrev]
951
951
952 if rustancestor is not None:
952 if rustancestor is not None:
953 return rustancestor.MissingAncestors(self.index, common)
953 return rustancestor.MissingAncestors(self.index, common)
954 return ancestor.incrementalmissingancestors(self.parentrevs, common)
954 return ancestor.incrementalmissingancestors(self.parentrevs, common)
955
955
956 def findmissingrevs(self, common=None, heads=None):
956 def findmissingrevs(self, common=None, heads=None):
957 """Return the revision numbers of the ancestors of heads that
957 """Return the revision numbers of the ancestors of heads that
958 are not ancestors of common.
958 are not ancestors of common.
959
959
960 More specifically, return a list of revision numbers corresponding to
960 More specifically, return a list of revision numbers corresponding to
961 nodes N such that every N satisfies the following constraints:
961 nodes N such that every N satisfies the following constraints:
962
962
963 1. N is an ancestor of some node in 'heads'
963 1. N is an ancestor of some node in 'heads'
964 2. N is not an ancestor of any node in 'common'
964 2. N is not an ancestor of any node in 'common'
965
965
966 The list is sorted by revision number, meaning it is
966 The list is sorted by revision number, meaning it is
967 topologically sorted.
967 topologically sorted.
968
968
969 'heads' and 'common' are both lists of revision numbers. If heads is
969 'heads' and 'common' are both lists of revision numbers. If heads is
970 not supplied, uses all of the revlog's heads. If common is not
970 not supplied, uses all of the revlog's heads. If common is not
971 supplied, uses nullid."""
971 supplied, uses nullid."""
972 if common is None:
972 if common is None:
973 common = [nullrev]
973 common = [nullrev]
974 if heads is None:
974 if heads is None:
975 heads = self.headrevs()
975 heads = self.headrevs()
976
976
977 inc = self.incrementalmissingrevs(common=common)
977 inc = self.incrementalmissingrevs(common=common)
978 return inc.missingancestors(heads)
978 return inc.missingancestors(heads)
979
979
980 def findmissing(self, common=None, heads=None):
980 def findmissing(self, common=None, heads=None):
981 """Return the ancestors of heads that are not ancestors of common.
981 """Return the ancestors of heads that are not ancestors of common.
982
982
983 More specifically, return a list of nodes N such that every N
983 More specifically, return a list of nodes N such that every N
984 satisfies the following constraints:
984 satisfies the following constraints:
985
985
986 1. N is an ancestor of some node in 'heads'
986 1. N is an ancestor of some node in 'heads'
987 2. N is not an ancestor of any node in 'common'
987 2. N is not an ancestor of any node in 'common'
988
988
989 The list is sorted by revision number, meaning it is
989 The list is sorted by revision number, meaning it is
990 topologically sorted.
990 topologically sorted.
991
991
992 'heads' and 'common' are both lists of node IDs. If heads is
992 'heads' and 'common' are both lists of node IDs. If heads is
993 not supplied, uses all of the revlog's heads. If common is not
993 not supplied, uses all of the revlog's heads. If common is not
994 supplied, uses nullid."""
994 supplied, uses nullid."""
995 if common is None:
995 if common is None:
996 common = [nullid]
996 common = [nullid]
997 if heads is None:
997 if heads is None:
998 heads = self.heads()
998 heads = self.heads()
999
999
1000 common = [self.rev(n) for n in common]
1000 common = [self.rev(n) for n in common]
1001 heads = [self.rev(n) for n in heads]
1001 heads = [self.rev(n) for n in heads]
1002
1002
1003 inc = self.incrementalmissingrevs(common=common)
1003 inc = self.incrementalmissingrevs(common=common)
1004 return [self.node(r) for r in inc.missingancestors(heads)]
1004 return [self.node(r) for r in inc.missingancestors(heads)]
1005
1005
1006 def nodesbetween(self, roots=None, heads=None):
1006 def nodesbetween(self, roots=None, heads=None):
1007 """Return a topological path from 'roots' to 'heads'.
1007 """Return a topological path from 'roots' to 'heads'.
1008
1008
1009 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1009 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1010 topologically sorted list of all nodes N that satisfy both of
1010 topologically sorted list of all nodes N that satisfy both of
1011 these constraints:
1011 these constraints:
1012
1012
1013 1. N is a descendant of some node in 'roots'
1013 1. N is a descendant of some node in 'roots'
1014 2. N is an ancestor of some node in 'heads'
1014 2. N is an ancestor of some node in 'heads'
1015
1015
1016 Every node is considered to be both a descendant and an ancestor
1016 Every node is considered to be both a descendant and an ancestor
1017 of itself, so every reachable node in 'roots' and 'heads' will be
1017 of itself, so every reachable node in 'roots' and 'heads' will be
1018 included in 'nodes'.
1018 included in 'nodes'.
1019
1019
1020 'outroots' is the list of reachable nodes in 'roots', i.e., the
1020 'outroots' is the list of reachable nodes in 'roots', i.e., the
1021 subset of 'roots' that is returned in 'nodes'. Likewise,
1021 subset of 'roots' that is returned in 'nodes'. Likewise,
1022 'outheads' is the subset of 'heads' that is also in 'nodes'.
1022 'outheads' is the subset of 'heads' that is also in 'nodes'.
1023
1023
1024 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1024 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1025 unspecified, uses nullid as the only root. If 'heads' is
1025 unspecified, uses nullid as the only root. If 'heads' is
1026 unspecified, uses list of all of the revlog's heads."""
1026 unspecified, uses list of all of the revlog's heads."""
1027 nonodes = ([], [], [])
1027 nonodes = ([], [], [])
1028 if roots is not None:
1028 if roots is not None:
1029 roots = list(roots)
1029 roots = list(roots)
1030 if not roots:
1030 if not roots:
1031 return nonodes
1031 return nonodes
1032 lowestrev = min([self.rev(n) for n in roots])
1032 lowestrev = min([self.rev(n) for n in roots])
1033 else:
1033 else:
1034 roots = [nullid] # Everybody's a descendant of nullid
1034 roots = [nullid] # Everybody's a descendant of nullid
1035 lowestrev = nullrev
1035 lowestrev = nullrev
1036 if (lowestrev == nullrev) and (heads is None):
1036 if (lowestrev == nullrev) and (heads is None):
1037 # We want _all_ the nodes!
1037 # We want _all_ the nodes!
1038 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1038 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1039 if heads is None:
1039 if heads is None:
1040 # All nodes are ancestors, so the latest ancestor is the last
1040 # All nodes are ancestors, so the latest ancestor is the last
1041 # node.
1041 # node.
1042 highestrev = len(self) - 1
1042 highestrev = len(self) - 1
1043 # Set ancestors to None to signal that every node is an ancestor.
1043 # Set ancestors to None to signal that every node is an ancestor.
1044 ancestors = None
1044 ancestors = None
1045 # Set heads to an empty dictionary for later discovery of heads
1045 # Set heads to an empty dictionary for later discovery of heads
1046 heads = {}
1046 heads = {}
1047 else:
1047 else:
1048 heads = list(heads)
1048 heads = list(heads)
1049 if not heads:
1049 if not heads:
1050 return nonodes
1050 return nonodes
1051 ancestors = set()
1051 ancestors = set()
1052 # Turn heads into a dictionary so we can remove 'fake' heads.
1052 # Turn heads into a dictionary so we can remove 'fake' heads.
1053 # Also, later we will be using it to filter out the heads we can't
1053 # Also, later we will be using it to filter out the heads we can't
1054 # find from roots.
1054 # find from roots.
1055 heads = dict.fromkeys(heads, False)
1055 heads = dict.fromkeys(heads, False)
1056 # Start at the top and keep marking parents until we're done.
1056 # Start at the top and keep marking parents until we're done.
1057 nodestotag = set(heads)
1057 nodestotag = set(heads)
1058 # Remember where the top was so we can use it as a limit later.
1058 # Remember where the top was so we can use it as a limit later.
1059 highestrev = max([self.rev(n) for n in nodestotag])
1059 highestrev = max([self.rev(n) for n in nodestotag])
1060 while nodestotag:
1060 while nodestotag:
1061 # grab a node to tag
1061 # grab a node to tag
1062 n = nodestotag.pop()
1062 n = nodestotag.pop()
1063 # Never tag nullid
1063 # Never tag nullid
1064 if n == nullid:
1064 if n == nullid:
1065 continue
1065 continue
1066 # A node's revision number represents its place in a
1066 # A node's revision number represents its place in a
1067 # topologically sorted list of nodes.
1067 # topologically sorted list of nodes.
1068 r = self.rev(n)
1068 r = self.rev(n)
1069 if r >= lowestrev:
1069 if r >= lowestrev:
1070 if n not in ancestors:
1070 if n not in ancestors:
1071 # If we are possibly a descendant of one of the roots
1071 # If we are possibly a descendant of one of the roots
1072 # and we haven't already been marked as an ancestor
1072 # and we haven't already been marked as an ancestor
1073 ancestors.add(n) # Mark as ancestor
1073 ancestors.add(n) # Mark as ancestor
1074 # Add non-nullid parents to list of nodes to tag.
1074 # Add non-nullid parents to list of nodes to tag.
1075 nodestotag.update(
1075 nodestotag.update(
1076 [p for p in self.parents(n) if p != nullid]
1076 [p for p in self.parents(n) if p != nullid]
1077 )
1077 )
1078 elif n in heads: # We've seen it before, is it a fake head?
1078 elif n in heads: # We've seen it before, is it a fake head?
1079 # So it is, real heads should not be the ancestors of
1079 # So it is, real heads should not be the ancestors of
1080 # any other heads.
1080 # any other heads.
1081 heads.pop(n)
1081 heads.pop(n)
1082 if not ancestors:
1082 if not ancestors:
1083 return nonodes
1083 return nonodes
1084 # Now that we have our set of ancestors, we want to remove any
1084 # Now that we have our set of ancestors, we want to remove any
1085 # roots that are not ancestors.
1085 # roots that are not ancestors.
1086
1086
1087 # If one of the roots was nullid, everything is included anyway.
1087 # If one of the roots was nullid, everything is included anyway.
1088 if lowestrev > nullrev:
1088 if lowestrev > nullrev:
1089 # But, since we weren't, let's recompute the lowest rev to not
1089 # But, since we weren't, let's recompute the lowest rev to not
1090 # include roots that aren't ancestors.
1090 # include roots that aren't ancestors.
1091
1091
1092 # Filter out roots that aren't ancestors of heads
1092 # Filter out roots that aren't ancestors of heads
1093 roots = [root for root in roots if root in ancestors]
1093 roots = [root for root in roots if root in ancestors]
1094 # Recompute the lowest revision
1094 # Recompute the lowest revision
1095 if roots:
1095 if roots:
1096 lowestrev = min([self.rev(root) for root in roots])
1096 lowestrev = min([self.rev(root) for root in roots])
1097 else:
1097 else:
1098 # No more roots? Return empty list
1098 # No more roots? Return empty list
1099 return nonodes
1099 return nonodes
1100 else:
1100 else:
1101 # We are descending from nullid, and don't need to care about
1101 # We are descending from nullid, and don't need to care about
1102 # any other roots.
1102 # any other roots.
1103 lowestrev = nullrev
1103 lowestrev = nullrev
1104 roots = [nullid]
1104 roots = [nullid]
1105 # Transform our roots list into a set.
1105 # Transform our roots list into a set.
1106 descendants = set(roots)
1106 descendants = set(roots)
1107 # Also, keep the original roots so we can filter out roots that aren't
1107 # Also, keep the original roots so we can filter out roots that aren't
1108 # 'real' roots (i.e. are descended from other roots).
1108 # 'real' roots (i.e. are descended from other roots).
1109 roots = descendants.copy()
1109 roots = descendants.copy()
1110 # Our topologically sorted list of output nodes.
1110 # Our topologically sorted list of output nodes.
1111 orderedout = []
1111 orderedout = []
1112 # Don't start at nullid since we don't want nullid in our output list,
1112 # Don't start at nullid since we don't want nullid in our output list,
1113 # and if nullid shows up in descendants, empty parents will look like
1113 # and if nullid shows up in descendants, empty parents will look like
1114 # they're descendants.
1114 # they're descendants.
1115 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1115 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1116 n = self.node(r)
1116 n = self.node(r)
1117 isdescendant = False
1117 isdescendant = False
1118 if lowestrev == nullrev: # Everybody is a descendant of nullid
1118 if lowestrev == nullrev: # Everybody is a descendant of nullid
1119 isdescendant = True
1119 isdescendant = True
1120 elif n in descendants:
1120 elif n in descendants:
1121 # n is already a descendant
1121 # n is already a descendant
1122 isdescendant = True
1122 isdescendant = True
1123 # This check only needs to be done here because all the roots
1123 # This check only needs to be done here because all the roots
1124 # will start being marked is descendants before the loop.
1124 # will start being marked is descendants before the loop.
1125 if n in roots:
1125 if n in roots:
1126 # If n was a root, check if it's a 'real' root.
1126 # If n was a root, check if it's a 'real' root.
1127 p = tuple(self.parents(n))
1127 p = tuple(self.parents(n))
1128 # If any of its parents are descendants, it's not a root.
1128 # If any of its parents are descendants, it's not a root.
1129 if (p[0] in descendants) or (p[1] in descendants):
1129 if (p[0] in descendants) or (p[1] in descendants):
1130 roots.remove(n)
1130 roots.remove(n)
1131 else:
1131 else:
1132 p = tuple(self.parents(n))
1132 p = tuple(self.parents(n))
1133 # A node is a descendant if either of its parents are
1133 # A node is a descendant if either of its parents are
1134 # descendants. (We seeded the dependents list with the roots
1134 # descendants. (We seeded the dependents list with the roots
1135 # up there, remember?)
1135 # up there, remember?)
1136 if (p[0] in descendants) or (p[1] in descendants):
1136 if (p[0] in descendants) or (p[1] in descendants):
1137 descendants.add(n)
1137 descendants.add(n)
1138 isdescendant = True
1138 isdescendant = True
1139 if isdescendant and ((ancestors is None) or (n in ancestors)):
1139 if isdescendant and ((ancestors is None) or (n in ancestors)):
1140 # Only include nodes that are both descendants and ancestors.
1140 # Only include nodes that are both descendants and ancestors.
1141 orderedout.append(n)
1141 orderedout.append(n)
1142 if (ancestors is not None) and (n in heads):
1142 if (ancestors is not None) and (n in heads):
1143 # We're trying to figure out which heads are reachable
1143 # We're trying to figure out which heads are reachable
1144 # from roots.
1144 # from roots.
1145 # Mark this head as having been reached
1145 # Mark this head as having been reached
1146 heads[n] = True
1146 heads[n] = True
1147 elif ancestors is None:
1147 elif ancestors is None:
1148 # Otherwise, we're trying to discover the heads.
1148 # Otherwise, we're trying to discover the heads.
1149 # Assume this is a head because if it isn't, the next step
1149 # Assume this is a head because if it isn't, the next step
1150 # will eventually remove it.
1150 # will eventually remove it.
1151 heads[n] = True
1151 heads[n] = True
1152 # But, obviously its parents aren't.
1152 # But, obviously its parents aren't.
1153 for p in self.parents(n):
1153 for p in self.parents(n):
1154 heads.pop(p, None)
1154 heads.pop(p, None)
1155 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1155 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1156 roots = list(roots)
1156 roots = list(roots)
1157 assert orderedout
1157 assert orderedout
1158 assert roots
1158 assert roots
1159 assert heads
1159 assert heads
1160 return (orderedout, roots, heads)
1160 return (orderedout, roots, heads)
1161
1161
1162 def headrevs(self, revs=None):
1162 def headrevs(self, revs=None):
1163 if revs is None:
1163 if revs is None:
1164 try:
1164 try:
1165 return self.index.headrevs()
1165 return self.index.headrevs()
1166 except AttributeError:
1166 except AttributeError:
1167 return self._headrevs()
1167 return self._headrevs()
1168 if rustdagop is not None:
1168 if rustdagop is not None:
1169 return rustdagop.headrevs(self.index, revs)
1169 return rustdagop.headrevs(self.index, revs)
1170 return dagop.headrevs(revs, self._uncheckedparentrevs)
1170 return dagop.headrevs(revs, self._uncheckedparentrevs)
1171
1171
1172 def computephases(self, roots):
1172 def computephases(self, roots):
1173 return self.index.computephasesmapsets(roots)
1173 return self.index.computephasesmapsets(roots)
1174
1174
1175 def _headrevs(self):
1175 def _headrevs(self):
1176 count = len(self)
1176 count = len(self)
1177 if not count:
1177 if not count:
1178 return [nullrev]
1178 return [nullrev]
1179 # we won't iter over filtered rev so nobody is a head at start
1179 # we won't iter over filtered rev so nobody is a head at start
1180 ishead = [0] * (count + 1)
1180 ishead = [0] * (count + 1)
1181 index = self.index
1181 index = self.index
1182 for r in self:
1182 for r in self:
1183 ishead[r] = 1 # I may be an head
1183 ishead[r] = 1 # I may be an head
1184 e = index[r]
1184 e = index[r]
1185 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1185 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1186 return [r for r, val in enumerate(ishead) if val]
1186 return [r for r, val in enumerate(ishead) if val]
1187
1187
1188 def heads(self, start=None, stop=None):
1188 def heads(self, start=None, stop=None):
1189 """return the list of all nodes that have no children
1189 """return the list of all nodes that have no children
1190
1190
1191 if start is specified, only heads that are descendants of
1191 if start is specified, only heads that are descendants of
1192 start will be returned
1192 start will be returned
1193 if stop is specified, it will consider all the revs from stop
1193 if stop is specified, it will consider all the revs from stop
1194 as if they had no children
1194 as if they had no children
1195 """
1195 """
1196 if start is None and stop is None:
1196 if start is None and stop is None:
1197 if not len(self):
1197 if not len(self):
1198 return [nullid]
1198 return [nullid]
1199 return [self.node(r) for r in self.headrevs()]
1199 return [self.node(r) for r in self.headrevs()]
1200
1200
1201 if start is None:
1201 if start is None:
1202 start = nullrev
1202 start = nullrev
1203 else:
1203 else:
1204 start = self.rev(start)
1204 start = self.rev(start)
1205
1205
1206 stoprevs = set(self.rev(n) for n in stop or [])
1206 stoprevs = set(self.rev(n) for n in stop or [])
1207
1207
1208 revs = dagop.headrevssubset(
1208 revs = dagop.headrevssubset(
1209 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1209 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1210 )
1210 )
1211
1211
1212 return [self.node(rev) for rev in revs]
1212 return [self.node(rev) for rev in revs]
1213
1213
1214 def children(self, node):
1214 def children(self, node):
1215 """find the children of a given node"""
1215 """find the children of a given node"""
1216 c = []
1216 c = []
1217 p = self.rev(node)
1217 p = self.rev(node)
1218 for r in self.revs(start=p + 1):
1218 for r in self.revs(start=p + 1):
1219 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1219 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1220 if prevs:
1220 if prevs:
1221 for pr in prevs:
1221 for pr in prevs:
1222 if pr == p:
1222 if pr == p:
1223 c.append(self.node(r))
1223 c.append(self.node(r))
1224 elif p == nullrev:
1224 elif p == nullrev:
1225 c.append(self.node(r))
1225 c.append(self.node(r))
1226 return c
1226 return c
1227
1227
1228 def commonancestorsheads(self, a, b):
1228 def commonancestorsheads(self, a, b):
1229 """calculate all the heads of the common ancestors of nodes a and b"""
1229 """calculate all the heads of the common ancestors of nodes a and b"""
1230 a, b = self.rev(a), self.rev(b)
1230 a, b = self.rev(a), self.rev(b)
1231 ancs = self._commonancestorsheads(a, b)
1231 ancs = self._commonancestorsheads(a, b)
1232 return pycompat.maplist(self.node, ancs)
1232 return pycompat.maplist(self.node, ancs)
1233
1233
1234 def _commonancestorsheads(self, *revs):
1234 def _commonancestorsheads(self, *revs):
1235 """calculate all the heads of the common ancestors of revs"""
1235 """calculate all the heads of the common ancestors of revs"""
1236 try:
1236 try:
1237 ancs = self.index.commonancestorsheads(*revs)
1237 ancs = self.index.commonancestorsheads(*revs)
1238 except (AttributeError, OverflowError): # C implementation failed
1238 except (AttributeError, OverflowError): # C implementation failed
1239 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1239 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1240 return ancs
1240 return ancs
1241
1241
1242 def isancestor(self, a, b):
1242 def isancestor(self, a, b):
1243 """return True if node a is an ancestor of node b
1243 """return True if node a is an ancestor of node b
1244
1244
1245 A revision is considered an ancestor of itself."""
1245 A revision is considered an ancestor of itself."""
1246 a, b = self.rev(a), self.rev(b)
1246 a, b = self.rev(a), self.rev(b)
1247 return self.isancestorrev(a, b)
1247 return self.isancestorrev(a, b)
1248
1248
1249 def isancestorrev(self, a, b):
1249 def isancestorrev(self, a, b):
1250 """return True if revision a is an ancestor of revision b
1250 """return True if revision a is an ancestor of revision b
1251
1251
1252 A revision is considered an ancestor of itself.
1252 A revision is considered an ancestor of itself.
1253
1253
1254 The implementation of this is trivial but the use of
1254 The implementation of this is trivial but the use of
1255 reachableroots is not."""
1255 reachableroots is not."""
1256 if a == nullrev:
1256 if a == nullrev:
1257 return True
1257 return True
1258 elif a == b:
1258 elif a == b:
1259 return True
1259 return True
1260 elif a > b:
1260 elif a > b:
1261 return False
1261 return False
1262 return bool(self.reachableroots(a, [b], [a], includepath=False))
1262 return bool(self.reachableroots(a, [b], [a], includepath=False))
1263
1263
1264 def reachableroots(self, minroot, heads, roots, includepath=False):
1264 def reachableroots(self, minroot, heads, roots, includepath=False):
1265 """return (heads(::<roots> and <roots>::<heads>))
1265 """return (heads(::<roots> and <roots>::<heads>))
1266
1266
1267 If includepath is True, return (<roots>::<heads>)."""
1267 If includepath is True, return (<roots>::<heads>)."""
1268 try:
1268 try:
1269 return self.index.reachableroots2(
1269 return self.index.reachableroots2(
1270 minroot, heads, roots, includepath
1270 minroot, heads, roots, includepath
1271 )
1271 )
1272 except AttributeError:
1272 except AttributeError:
1273 return dagop._reachablerootspure(
1273 return dagop._reachablerootspure(
1274 self.parentrevs, minroot, roots, heads, includepath
1274 self.parentrevs, minroot, roots, heads, includepath
1275 )
1275 )
1276
1276
1277 def ancestor(self, a, b):
1277 def ancestor(self, a, b):
1278 """calculate the "best" common ancestor of nodes a and b"""
1278 """calculate the "best" common ancestor of nodes a and b"""
1279
1279
1280 a, b = self.rev(a), self.rev(b)
1280 a, b = self.rev(a), self.rev(b)
1281 try:
1281 try:
1282 ancs = self.index.ancestors(a, b)
1282 ancs = self.index.ancestors(a, b)
1283 except (AttributeError, OverflowError):
1283 except (AttributeError, OverflowError):
1284 ancs = ancestor.ancestors(self.parentrevs, a, b)
1284 ancs = ancestor.ancestors(self.parentrevs, a, b)
1285 if ancs:
1285 if ancs:
1286 # choose a consistent winner when there's a tie
1286 # choose a consistent winner when there's a tie
1287 return min(map(self.node, ancs))
1287 return min(map(self.node, ancs))
1288 return nullid
1288 return nullid
1289
1289
1290 def _match(self, id):
1290 def _match(self, id):
1291 if isinstance(id, int):
1291 if isinstance(id, int):
1292 # rev
1292 # rev
1293 return self.node(id)
1293 return self.node(id)
1294 if len(id) == 20:
1294 if len(id) == 20:
1295 # possibly a binary node
1295 # possibly a binary node
1296 # odds of a binary node being all hex in ASCII are 1 in 10**25
1296 # odds of a binary node being all hex in ASCII are 1 in 10**25
1297 try:
1297 try:
1298 node = id
1298 node = id
1299 self.rev(node) # quick search the index
1299 self.rev(node) # quick search the index
1300 return node
1300 return node
1301 except error.LookupError:
1301 except error.LookupError:
1302 pass # may be partial hex id
1302 pass # may be partial hex id
1303 try:
1303 try:
1304 # str(rev)
1304 # str(rev)
1305 rev = int(id)
1305 rev = int(id)
1306 if b"%d" % rev != id:
1306 if b"%d" % rev != id:
1307 raise ValueError
1307 raise ValueError
1308 if rev < 0:
1308 if rev < 0:
1309 rev = len(self) + rev
1309 rev = len(self) + rev
1310 if rev < 0 or rev >= len(self):
1310 if rev < 0 or rev >= len(self):
1311 raise ValueError
1311 raise ValueError
1312 return self.node(rev)
1312 return self.node(rev)
1313 except (ValueError, OverflowError):
1313 except (ValueError, OverflowError):
1314 pass
1314 pass
1315 if len(id) == 40:
1315 if len(id) == 40:
1316 try:
1316 try:
1317 # a full hex nodeid?
1317 # a full hex nodeid?
1318 node = bin(id)
1318 node = bin(id)
1319 self.rev(node)
1319 self.rev(node)
1320 return node
1320 return node
1321 except (TypeError, error.LookupError):
1321 except (TypeError, error.LookupError):
1322 pass
1322 pass
1323
1323
1324 def _partialmatch(self, id):
1324 def _partialmatch(self, id):
1325 # we don't care wdirfilenodeids as they should be always full hash
1325 # we don't care wdirfilenodeids as they should be always full hash
1326 maybewdir = wdirhex.startswith(id)
1326 maybewdir = wdirhex.startswith(id)
1327 try:
1327 try:
1328 partial = self.index.partialmatch(id)
1328 partial = self.index.partialmatch(id)
1329 if partial and self.hasnode(partial):
1329 if partial and self.hasnode(partial):
1330 if maybewdir:
1330 if maybewdir:
1331 # single 'ff...' match in radix tree, ambiguous with wdir
1331 # single 'ff...' match in radix tree, ambiguous with wdir
1332 raise error.RevlogError
1332 raise error.RevlogError
1333 return partial
1333 return partial
1334 if maybewdir:
1334 if maybewdir:
1335 # no 'ff...' match in radix tree, wdir identified
1335 # no 'ff...' match in radix tree, wdir identified
1336 raise error.WdirUnsupported
1336 raise error.WdirUnsupported
1337 return None
1337 return None
1338 except error.RevlogError:
1338 except error.RevlogError:
1339 # parsers.c radix tree lookup gave multiple matches
1339 # parsers.c radix tree lookup gave multiple matches
1340 # fast path: for unfiltered changelog, radix tree is accurate
1340 # fast path: for unfiltered changelog, radix tree is accurate
1341 if not getattr(self, 'filteredrevs', None):
1341 if not getattr(self, 'filteredrevs', None):
1342 raise error.AmbiguousPrefixLookupError(
1342 raise error.AmbiguousPrefixLookupError(
1343 id, self.indexfile, _(b'ambiguous identifier')
1343 id, self.indexfile, _(b'ambiguous identifier')
1344 )
1344 )
1345 # fall through to slow path that filters hidden revisions
1345 # fall through to slow path that filters hidden revisions
1346 except (AttributeError, ValueError):
1346 except (AttributeError, ValueError):
1347 # we are pure python, or key was too short to search radix tree
1347 # we are pure python, or key was too short to search radix tree
1348 pass
1348 pass
1349
1349
1350 if id in self._pcache:
1350 if id in self._pcache:
1351 return self._pcache[id]
1351 return self._pcache[id]
1352
1352
1353 if len(id) <= 40:
1353 if len(id) <= 40:
1354 try:
1354 try:
1355 # hex(node)[:...]
1355 # hex(node)[:...]
1356 l = len(id) // 2 # grab an even number of digits
1356 l = len(id) // 2 # grab an even number of digits
1357 prefix = bin(id[: l * 2])
1357 prefix = bin(id[: l * 2])
1358 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1358 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1359 nl = [
1359 nl = [
1360 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1360 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1361 ]
1361 ]
1362 if nullhex.startswith(id):
1362 if nullhex.startswith(id):
1363 nl.append(nullid)
1363 nl.append(nullid)
1364 if len(nl) > 0:
1364 if len(nl) > 0:
1365 if len(nl) == 1 and not maybewdir:
1365 if len(nl) == 1 and not maybewdir:
1366 self._pcache[id] = nl[0]
1366 self._pcache[id] = nl[0]
1367 return nl[0]
1367 return nl[0]
1368 raise error.AmbiguousPrefixLookupError(
1368 raise error.AmbiguousPrefixLookupError(
1369 id, self.indexfile, _(b'ambiguous identifier')
1369 id, self.indexfile, _(b'ambiguous identifier')
1370 )
1370 )
1371 if maybewdir:
1371 if maybewdir:
1372 raise error.WdirUnsupported
1372 raise error.WdirUnsupported
1373 return None
1373 return None
1374 except TypeError:
1374 except TypeError:
1375 pass
1375 pass
1376
1376
1377 def lookup(self, id):
1377 def lookup(self, id):
1378 """locate a node based on:
1378 """locate a node based on:
1379 - revision number or str(revision number)
1379 - revision number or str(revision number)
1380 - nodeid or subset of hex nodeid
1380 - nodeid or subset of hex nodeid
1381 """
1381 """
1382 n = self._match(id)
1382 n = self._match(id)
1383 if n is not None:
1383 if n is not None:
1384 return n
1384 return n
1385 n = self._partialmatch(id)
1385 n = self._partialmatch(id)
1386 if n:
1386 if n:
1387 return n
1387 return n
1388
1388
1389 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1389 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1390
1390
1391 def shortest(self, node, minlength=1):
1391 def shortest(self, node, minlength=1):
1392 """Find the shortest unambiguous prefix that matches node."""
1392 """Find the shortest unambiguous prefix that matches node."""
1393
1393
1394 def isvalid(prefix):
1394 def isvalid(prefix):
1395 try:
1395 try:
1396 matchednode = self._partialmatch(prefix)
1396 matchednode = self._partialmatch(prefix)
1397 except error.AmbiguousPrefixLookupError:
1397 except error.AmbiguousPrefixLookupError:
1398 return False
1398 return False
1399 except error.WdirUnsupported:
1399 except error.WdirUnsupported:
1400 # single 'ff...' match
1400 # single 'ff...' match
1401 return True
1401 return True
1402 if matchednode is None:
1402 if matchednode is None:
1403 raise error.LookupError(node, self.indexfile, _(b'no node'))
1403 raise error.LookupError(node, self.indexfile, _(b'no node'))
1404 return True
1404 return True
1405
1405
1406 def maybewdir(prefix):
1406 def maybewdir(prefix):
1407 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1407 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1408
1408
1409 hexnode = hex(node)
1409 hexnode = hex(node)
1410
1410
1411 def disambiguate(hexnode, minlength):
1411 def disambiguate(hexnode, minlength):
1412 """Disambiguate against wdirid."""
1412 """Disambiguate against wdirid."""
1413 for length in range(minlength, 41):
1413 for length in range(minlength, 41):
1414 prefix = hexnode[:length]
1414 prefix = hexnode[:length]
1415 if not maybewdir(prefix):
1415 if not maybewdir(prefix):
1416 return prefix
1416 return prefix
1417
1417
1418 if not getattr(self, 'filteredrevs', None):
1418 if not getattr(self, 'filteredrevs', None):
1419 try:
1419 try:
1420 length = max(self.index.shortest(node), minlength)
1420 length = max(self.index.shortest(node), minlength)
1421 return disambiguate(hexnode, length)
1421 return disambiguate(hexnode, length)
1422 except error.RevlogError:
1422 except error.RevlogError:
1423 if node != wdirid:
1423 if node != wdirid:
1424 raise error.LookupError(node, self.indexfile, _(b'no node'))
1424 raise error.LookupError(node, self.indexfile, _(b'no node'))
1425 except AttributeError:
1425 except AttributeError:
1426 # Fall through to pure code
1426 # Fall through to pure code
1427 pass
1427 pass
1428
1428
1429 if node == wdirid:
1429 if node == wdirid:
1430 for length in range(minlength, 41):
1430 for length in range(minlength, 41):
1431 prefix = hexnode[:length]
1431 prefix = hexnode[:length]
1432 if isvalid(prefix):
1432 if isvalid(prefix):
1433 return prefix
1433 return prefix
1434
1434
1435 for length in range(minlength, 41):
1435 for length in range(minlength, 41):
1436 prefix = hexnode[:length]
1436 prefix = hexnode[:length]
1437 if isvalid(prefix):
1437 if isvalid(prefix):
1438 return disambiguate(hexnode, length)
1438 return disambiguate(hexnode, length)
1439
1439
1440 def cmp(self, node, text):
1440 def cmp(self, node, text):
1441 """compare text with a given file revision
1441 """compare text with a given file revision
1442
1442
1443 returns True if text is different than what is stored.
1443 returns True if text is different than what is stored.
1444 """
1444 """
1445 p1, p2 = self.parents(node)
1445 p1, p2 = self.parents(node)
1446 return storageutil.hashrevisionsha1(text, p1, p2) != node
1446 return storageutil.hashrevisionsha1(text, p1, p2) != node
1447
1447
1448 def _cachesegment(self, offset, data):
1448 def _cachesegment(self, offset, data):
1449 """Add a segment to the revlog cache.
1449 """Add a segment to the revlog cache.
1450
1450
1451 Accepts an absolute offset and the data that is at that location.
1451 Accepts an absolute offset and the data that is at that location.
1452 """
1452 """
1453 o, d = self._chunkcache
1453 o, d = self._chunkcache
1454 # try to add to existing cache
1454 # try to add to existing cache
1455 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1455 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1456 self._chunkcache = o, d + data
1456 self._chunkcache = o, d + data
1457 else:
1457 else:
1458 self._chunkcache = offset, data
1458 self._chunkcache = offset, data
1459
1459
1460 def _readsegment(self, offset, length, df=None):
1460 def _readsegment(self, offset, length, df=None):
1461 """Load a segment of raw data from the revlog.
1461 """Load a segment of raw data from the revlog.
1462
1462
1463 Accepts an absolute offset, length to read, and an optional existing
1463 Accepts an absolute offset, length to read, and an optional existing
1464 file handle to read from.
1464 file handle to read from.
1465
1465
1466 If an existing file handle is passed, it will be seeked and the
1466 If an existing file handle is passed, it will be seeked and the
1467 original seek position will NOT be restored.
1467 original seek position will NOT be restored.
1468
1468
1469 Returns a str or buffer of raw byte data.
1469 Returns a str or buffer of raw byte data.
1470
1470
1471 Raises if the requested number of bytes could not be read.
1471 Raises if the requested number of bytes could not be read.
1472 """
1472 """
1473 # Cache data both forward and backward around the requested
1473 # Cache data both forward and backward around the requested
1474 # data, in a fixed size window. This helps speed up operations
1474 # data, in a fixed size window. This helps speed up operations
1475 # involving reading the revlog backwards.
1475 # involving reading the revlog backwards.
1476 cachesize = self._chunkcachesize
1476 cachesize = self._chunkcachesize
1477 realoffset = offset & ~(cachesize - 1)
1477 realoffset = offset & ~(cachesize - 1)
1478 reallength = (
1478 reallength = (
1479 (offset + length + cachesize) & ~(cachesize - 1)
1479 (offset + length + cachesize) & ~(cachesize - 1)
1480 ) - realoffset
1480 ) - realoffset
1481 with self._datareadfp(df) as df:
1481 with self._datareadfp(df) as df:
1482 df.seek(realoffset)
1482 df.seek(realoffset)
1483 d = df.read(reallength)
1483 d = df.read(reallength)
1484
1484
1485 self._cachesegment(realoffset, d)
1485 self._cachesegment(realoffset, d)
1486 if offset != realoffset or reallength != length:
1486 if offset != realoffset or reallength != length:
1487 startoffset = offset - realoffset
1487 startoffset = offset - realoffset
1488 if len(d) - startoffset < length:
1488 if len(d) - startoffset < length:
1489 raise error.RevlogError(
1489 raise error.RevlogError(
1490 _(
1490 _(
1491 b'partial read of revlog %s; expected %d bytes from '
1491 b'partial read of revlog %s; expected %d bytes from '
1492 b'offset %d, got %d'
1492 b'offset %d, got %d'
1493 )
1493 )
1494 % (
1494 % (
1495 self.indexfile if self._inline else self.datafile,
1495 self.indexfile if self._inline else self.datafile,
1496 length,
1496 length,
1497 realoffset,
1497 realoffset,
1498 len(d) - startoffset,
1498 len(d) - startoffset,
1499 )
1499 )
1500 )
1500 )
1501
1501
1502 return util.buffer(d, startoffset, length)
1502 return util.buffer(d, startoffset, length)
1503
1503
1504 if len(d) < length:
1504 if len(d) < length:
1505 raise error.RevlogError(
1505 raise error.RevlogError(
1506 _(
1506 _(
1507 b'partial read of revlog %s; expected %d bytes from offset '
1507 b'partial read of revlog %s; expected %d bytes from offset '
1508 b'%d, got %d'
1508 b'%d, got %d'
1509 )
1509 )
1510 % (
1510 % (
1511 self.indexfile if self._inline else self.datafile,
1511 self.indexfile if self._inline else self.datafile,
1512 length,
1512 length,
1513 offset,
1513 offset,
1514 len(d),
1514 len(d),
1515 )
1515 )
1516 )
1516 )
1517
1517
1518 return d
1518 return d
1519
1519
1520 def _getsegment(self, offset, length, df=None):
1520 def _getsegment(self, offset, length, df=None):
1521 """Obtain a segment of raw data from the revlog.
1521 """Obtain a segment of raw data from the revlog.
1522
1522
1523 Accepts an absolute offset, length of bytes to obtain, and an
1523 Accepts an absolute offset, length of bytes to obtain, and an
1524 optional file handle to the already-opened revlog. If the file
1524 optional file handle to the already-opened revlog. If the file
1525 handle is used, it's original seek position will not be preserved.
1525 handle is used, it's original seek position will not be preserved.
1526
1526
1527 Requests for data may be returned from a cache.
1527 Requests for data may be returned from a cache.
1528
1528
1529 Returns a str or a buffer instance of raw byte data.
1529 Returns a str or a buffer instance of raw byte data.
1530 """
1530 """
1531 o, d = self._chunkcache
1531 o, d = self._chunkcache
1532 l = len(d)
1532 l = len(d)
1533
1533
1534 # is it in the cache?
1534 # is it in the cache?
1535 cachestart = offset - o
1535 cachestart = offset - o
1536 cacheend = cachestart + length
1536 cacheend = cachestart + length
1537 if cachestart >= 0 and cacheend <= l:
1537 if cachestart >= 0 and cacheend <= l:
1538 if cachestart == 0 and cacheend == l:
1538 if cachestart == 0 and cacheend == l:
1539 return d # avoid a copy
1539 return d # avoid a copy
1540 return util.buffer(d, cachestart, cacheend - cachestart)
1540 return util.buffer(d, cachestart, cacheend - cachestart)
1541
1541
1542 return self._readsegment(offset, length, df=df)
1542 return self._readsegment(offset, length, df=df)
1543
1543
1544 def _getsegmentforrevs(self, startrev, endrev, df=None):
1544 def _getsegmentforrevs(self, startrev, endrev, df=None):
1545 """Obtain a segment of raw data corresponding to a range of revisions.
1545 """Obtain a segment of raw data corresponding to a range of revisions.
1546
1546
1547 Accepts the start and end revisions and an optional already-open
1547 Accepts the start and end revisions and an optional already-open
1548 file handle to be used for reading. If the file handle is read, its
1548 file handle to be used for reading. If the file handle is read, its
1549 seek position will not be preserved.
1549 seek position will not be preserved.
1550
1550
1551 Requests for data may be satisfied by a cache.
1551 Requests for data may be satisfied by a cache.
1552
1552
1553 Returns a 2-tuple of (offset, data) for the requested range of
1553 Returns a 2-tuple of (offset, data) for the requested range of
1554 revisions. Offset is the integer offset from the beginning of the
1554 revisions. Offset is the integer offset from the beginning of the
1555 revlog and data is a str or buffer of the raw byte data.
1555 revlog and data is a str or buffer of the raw byte data.
1556
1556
1557 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1557 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1558 to determine where each revision's data begins and ends.
1558 to determine where each revision's data begins and ends.
1559 """
1559 """
1560 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1560 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1561 # (functions are expensive).
1561 # (functions are expensive).
1562 index = self.index
1562 index = self.index
1563 istart = index[startrev]
1563 istart = index[startrev]
1564 start = int(istart[0] >> 16)
1564 start = int(istart[0] >> 16)
1565 if startrev == endrev:
1565 if startrev == endrev:
1566 end = start + istart[1]
1566 end = start + istart[1]
1567 else:
1567 else:
1568 iend = index[endrev]
1568 iend = index[endrev]
1569 end = int(iend[0] >> 16) + iend[1]
1569 end = int(iend[0] >> 16) + iend[1]
1570
1570
1571 if self._inline:
1571 if self._inline:
1572 start += (startrev + 1) * self._io.size
1572 start += (startrev + 1) * self._io.size
1573 end += (endrev + 1) * self._io.size
1573 end += (endrev + 1) * self._io.size
1574 length = end - start
1574 length = end - start
1575
1575
1576 return start, self._getsegment(start, length, df=df)
1576 return start, self._getsegment(start, length, df=df)
1577
1577
1578 def _chunk(self, rev, df=None):
1578 def _chunk(self, rev, df=None):
1579 """Obtain a single decompressed chunk for a revision.
1579 """Obtain a single decompressed chunk for a revision.
1580
1580
1581 Accepts an integer revision and an optional already-open file handle
1581 Accepts an integer revision and an optional already-open file handle
1582 to be used for reading. If used, the seek position of the file will not
1582 to be used for reading. If used, the seek position of the file will not
1583 be preserved.
1583 be preserved.
1584
1584
1585 Returns a str holding uncompressed data for the requested revision.
1585 Returns a str holding uncompressed data for the requested revision.
1586 """
1586 """
1587 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1587 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1588
1588
1589 def _chunks(self, revs, df=None, targetsize=None):
1589 def _chunks(self, revs, df=None, targetsize=None):
1590 """Obtain decompressed chunks for the specified revisions.
1590 """Obtain decompressed chunks for the specified revisions.
1591
1591
1592 Accepts an iterable of numeric revisions that are assumed to be in
1592 Accepts an iterable of numeric revisions that are assumed to be in
1593 ascending order. Also accepts an optional already-open file handle
1593 ascending order. Also accepts an optional already-open file handle
1594 to be used for reading. If used, the seek position of the file will
1594 to be used for reading. If used, the seek position of the file will
1595 not be preserved.
1595 not be preserved.
1596
1596
1597 This function is similar to calling ``self._chunk()`` multiple times,
1597 This function is similar to calling ``self._chunk()`` multiple times,
1598 but is faster.
1598 but is faster.
1599
1599
1600 Returns a list with decompressed data for each requested revision.
1600 Returns a list with decompressed data for each requested revision.
1601 """
1601 """
1602 if not revs:
1602 if not revs:
1603 return []
1603 return []
1604 start = self.start
1604 start = self.start
1605 length = self.length
1605 length = self.length
1606 inline = self._inline
1606 inline = self._inline
1607 iosize = self._io.size
1607 iosize = self._io.size
1608 buffer = util.buffer
1608 buffer = util.buffer
1609
1609
1610 l = []
1610 l = []
1611 ladd = l.append
1611 ladd = l.append
1612
1612
1613 if not self._withsparseread:
1613 if not self._withsparseread:
1614 slicedchunks = (revs,)
1614 slicedchunks = (revs,)
1615 else:
1615 else:
1616 slicedchunks = deltautil.slicechunk(
1616 slicedchunks = deltautil.slicechunk(
1617 self, revs, targetsize=targetsize
1617 self, revs, targetsize=targetsize
1618 )
1618 )
1619
1619
1620 for revschunk in slicedchunks:
1620 for revschunk in slicedchunks:
1621 firstrev = revschunk[0]
1621 firstrev = revschunk[0]
1622 # Skip trailing revisions with empty diff
1622 # Skip trailing revisions with empty diff
1623 for lastrev in revschunk[::-1]:
1623 for lastrev in revschunk[::-1]:
1624 if length(lastrev) != 0:
1624 if length(lastrev) != 0:
1625 break
1625 break
1626
1626
1627 try:
1627 try:
1628 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1628 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1629 except OverflowError:
1629 except OverflowError:
1630 # issue4215 - we can't cache a run of chunks greater than
1630 # issue4215 - we can't cache a run of chunks greater than
1631 # 2G on Windows
1631 # 2G on Windows
1632 return [self._chunk(rev, df=df) for rev in revschunk]
1632 return [self._chunk(rev, df=df) for rev in revschunk]
1633
1633
1634 decomp = self.decompress
1634 decomp = self.decompress
1635 for rev in revschunk:
1635 for rev in revschunk:
1636 chunkstart = start(rev)
1636 chunkstart = start(rev)
1637 if inline:
1637 if inline:
1638 chunkstart += (rev + 1) * iosize
1638 chunkstart += (rev + 1) * iosize
1639 chunklength = length(rev)
1639 chunklength = length(rev)
1640 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1640 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1641
1641
1642 return l
1642 return l
1643
1643
1644 def _chunkclear(self):
1644 def _chunkclear(self):
1645 """Clear the raw chunk cache."""
1645 """Clear the raw chunk cache."""
1646 self._chunkcache = (0, b'')
1646 self._chunkcache = (0, b'')
1647
1647
1648 def deltaparent(self, rev):
1648 def deltaparent(self, rev):
1649 """return deltaparent of the given revision"""
1649 """return deltaparent of the given revision"""
1650 base = self.index[rev][3]
1650 base = self.index[rev][3]
1651 if base == rev:
1651 if base == rev:
1652 return nullrev
1652 return nullrev
1653 elif self._generaldelta:
1653 elif self._generaldelta:
1654 return base
1654 return base
1655 else:
1655 else:
1656 return rev - 1
1656 return rev - 1
1657
1657
1658 def issnapshot(self, rev):
1658 def issnapshot(self, rev):
1659 """tells whether rev is a snapshot
1659 """tells whether rev is a snapshot
1660 """
1660 """
1661 if not self._sparserevlog:
1661 if not self._sparserevlog:
1662 return self.deltaparent(rev) == nullrev
1662 return self.deltaparent(rev) == nullrev
1663 elif util.safehasattr(self.index, b'issnapshot'):
1663 elif util.safehasattr(self.index, b'issnapshot'):
1664 # directly assign the method to cache the testing and access
1664 # directly assign the method to cache the testing and access
1665 self.issnapshot = self.index.issnapshot
1665 self.issnapshot = self.index.issnapshot
1666 return self.issnapshot(rev)
1666 return self.issnapshot(rev)
1667 if rev == nullrev:
1667 if rev == nullrev:
1668 return True
1668 return True
1669 entry = self.index[rev]
1669 entry = self.index[rev]
1670 base = entry[3]
1670 base = entry[3]
1671 if base == rev:
1671 if base == rev:
1672 return True
1672 return True
1673 if base == nullrev:
1673 if base == nullrev:
1674 return True
1674 return True
1675 p1 = entry[5]
1675 p1 = entry[5]
1676 p2 = entry[6]
1676 p2 = entry[6]
1677 if base == p1 or base == p2:
1677 if base == p1 or base == p2:
1678 return False
1678 return False
1679 return self.issnapshot(base)
1679 return self.issnapshot(base)
1680
1680
1681 def snapshotdepth(self, rev):
1681 def snapshotdepth(self, rev):
1682 """number of snapshot in the chain before this one"""
1682 """number of snapshot in the chain before this one"""
1683 if not self.issnapshot(rev):
1683 if not self.issnapshot(rev):
1684 raise error.ProgrammingError(b'revision %d not a snapshot')
1684 raise error.ProgrammingError(b'revision %d not a snapshot')
1685 return len(self._deltachain(rev)[0]) - 1
1685 return len(self._deltachain(rev)[0]) - 1
1686
1686
1687 def revdiff(self, rev1, rev2):
1687 def revdiff(self, rev1, rev2):
1688 """return or calculate a delta between two revisions
1688 """return or calculate a delta between two revisions
1689
1689
1690 The delta calculated is in binary form and is intended to be written to
1690 The delta calculated is in binary form and is intended to be written to
1691 revlog data directly. So this function needs raw revision data.
1691 revlog data directly. So this function needs raw revision data.
1692 """
1692 """
1693 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1693 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1694 return bytes(self._chunk(rev2))
1694 return bytes(self._chunk(rev2))
1695
1695
1696 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1696 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1697
1697
1698 def _processflags(self, text, flags, operation, raw=False):
1698 def _processflags(self, text, flags, operation, raw=False):
1699 """deprecated entry point to access flag processors"""
1699 """deprecated entry point to access flag processors"""
1700 msg = b'_processflag(...) use the specialized variant'
1700 msg = b'_processflag(...) use the specialized variant'
1701 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1701 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1702 if raw:
1702 if raw:
1703 return text, flagutil.processflagsraw(self, text, flags)
1703 return text, flagutil.processflagsraw(self, text, flags)
1704 elif operation == b'read':
1704 elif operation == b'read':
1705 return flagutil.processflagsread(self, text, flags)
1705 return flagutil.processflagsread(self, text, flags)
1706 else: # write operation
1706 else: # write operation
1707 return flagutil.processflagswrite(self, text, flags)
1707 return flagutil.processflagswrite(self, text, flags)
1708
1708
1709 def revision(self, nodeorrev, _df=None, raw=False):
1709 def revision(self, nodeorrev, _df=None, raw=False):
1710 """return an uncompressed revision of a given node or revision
1710 """return an uncompressed revision of a given node or revision
1711 number.
1711 number.
1712
1712
1713 _df - an existing file handle to read from. (internal-only)
1713 _df - an existing file handle to read from. (internal-only)
1714 raw - an optional argument specifying if the revision data is to be
1714 raw - an optional argument specifying if the revision data is to be
1715 treated as raw data when applying flag transforms. 'raw' should be set
1715 treated as raw data when applying flag transforms. 'raw' should be set
1716 to True when generating changegroups or in debug commands.
1716 to True when generating changegroups or in debug commands.
1717 """
1717 """
1718 if raw:
1718 if raw:
1719 msg = (
1719 msg = (
1720 b'revlog.revision(..., raw=True) is deprecated, '
1720 b'revlog.revision(..., raw=True) is deprecated, '
1721 b'use revlog.rawdata(...)'
1721 b'use revlog.rawdata(...)'
1722 )
1722 )
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1724 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1725
1725
1726 def sidedata(self, nodeorrev, _df=None):
1726 def sidedata(self, nodeorrev, _df=None):
1727 """a map of extra data related to the changeset but not part of the hash
1727 """a map of extra data related to the changeset but not part of the hash
1728
1728
1729 This function currently return a dictionary. However, more advanced
1729 This function currently return a dictionary. However, more advanced
1730 mapping object will likely be used in the future for a more
1730 mapping object will likely be used in the future for a more
1731 efficient/lazy code.
1731 efficient/lazy code.
1732 """
1732 """
1733 return self._revisiondata(nodeorrev, _df)[1]
1733 return self._revisiondata(nodeorrev, _df)[1]
1734
1734
1735 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1735 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1736 # deal with <nodeorrev> argument type
1736 # deal with <nodeorrev> argument type
1737 if isinstance(nodeorrev, int):
1737 if isinstance(nodeorrev, int):
1738 rev = nodeorrev
1738 rev = nodeorrev
1739 node = self.node(rev)
1739 node = self.node(rev)
1740 else:
1740 else:
1741 node = nodeorrev
1741 node = nodeorrev
1742 rev = None
1742 rev = None
1743
1743
1744 # fast path the special `nullid` rev
1744 # fast path the special `nullid` rev
1745 if node == nullid:
1745 if node == nullid:
1746 return b"", {}
1746 return b"", {}
1747
1747
1748 # The text as stored inside the revlog. Might be the revision or might
1748 # The text as stored inside the revlog. Might be the revision or might
1749 # need to be processed to retrieve the revision.
1749 # need to be processed to retrieve the revision.
1750 rawtext = None
1750 rawtext = None
1751
1751
1752 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1752 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1753
1753
1754 if raw and validated:
1754 if raw and validated:
1755 # if we don't want to process the raw text and that raw
1755 # if we don't want to process the raw text and that raw
1756 # text is cached, we can exit early.
1756 # text is cached, we can exit early.
1757 return rawtext, {}
1757 return rawtext, {}
1758 if rev is None:
1758 if rev is None:
1759 rev = self.rev(node)
1759 rev = self.rev(node)
1760 # the revlog's flag for this revision
1760 # the revlog's flag for this revision
1761 # (usually alter its state or content)
1761 # (usually alter its state or content)
1762 flags = self.flags(rev)
1762 flags = self.flags(rev)
1763
1763
1764 if validated and flags == REVIDX_DEFAULT_FLAGS:
1764 if validated and flags == REVIDX_DEFAULT_FLAGS:
1765 # no extra flags set, no flag processor runs, text = rawtext
1765 # no extra flags set, no flag processor runs, text = rawtext
1766 return rawtext, {}
1766 return rawtext, {}
1767
1767
1768 sidedata = {}
1768 sidedata = {}
1769 if raw:
1769 if raw:
1770 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1770 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1771 text = rawtext
1771 text = rawtext
1772 else:
1772 else:
1773 try:
1773 try:
1774 r = flagutil.processflagsread(self, rawtext, flags)
1774 r = flagutil.processflagsread(self, rawtext, flags)
1775 except error.SidedataHashError as exc:
1775 except error.SidedataHashError as exc:
1776 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1776 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1777 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1777 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1778 raise error.RevlogError(msg)
1778 raise error.RevlogError(msg)
1779 text, validatehash, sidedata = r
1779 text, validatehash, sidedata = r
1780 if validatehash:
1780 if validatehash:
1781 self.checkhash(text, node, rev=rev)
1781 self.checkhash(text, node, rev=rev)
1782 if not validated:
1782 if not validated:
1783 self._revisioncache = (node, rev, rawtext)
1783 self._revisioncache = (node, rev, rawtext)
1784
1784
1785 return text, sidedata
1785 return text, sidedata
1786
1786
1787 def _rawtext(self, node, rev, _df=None):
1787 def _rawtext(self, node, rev, _df=None):
1788 """return the possibly unvalidated rawtext for a revision
1788 """return the possibly unvalidated rawtext for a revision
1789
1789
1790 returns (rev, rawtext, validated)
1790 returns (rev, rawtext, validated)
1791 """
1791 """
1792
1792
1793 # revision in the cache (could be useful to apply delta)
1793 # revision in the cache (could be useful to apply delta)
1794 cachedrev = None
1794 cachedrev = None
1795 # An intermediate text to apply deltas to
1795 # An intermediate text to apply deltas to
1796 basetext = None
1796 basetext = None
1797
1797
1798 # Check if we have the entry in cache
1798 # Check if we have the entry in cache
1799 # The cache entry looks like (node, rev, rawtext)
1799 # The cache entry looks like (node, rev, rawtext)
1800 if self._revisioncache:
1800 if self._revisioncache:
1801 if self._revisioncache[0] == node:
1801 if self._revisioncache[0] == node:
1802 return (rev, self._revisioncache[2], True)
1802 return (rev, self._revisioncache[2], True)
1803 cachedrev = self._revisioncache[1]
1803 cachedrev = self._revisioncache[1]
1804
1804
1805 if rev is None:
1805 if rev is None:
1806 rev = self.rev(node)
1806 rev = self.rev(node)
1807
1807
1808 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1808 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1809 if stopped:
1809 if stopped:
1810 basetext = self._revisioncache[2]
1810 basetext = self._revisioncache[2]
1811
1811
1812 # drop cache to save memory, the caller is expected to
1812 # drop cache to save memory, the caller is expected to
1813 # update self._revisioncache after validating the text
1813 # update self._revisioncache after validating the text
1814 self._revisioncache = None
1814 self._revisioncache = None
1815
1815
1816 targetsize = None
1816 targetsize = None
1817 rawsize = self.index[rev][2]
1817 rawsize = self.index[rev][2]
1818 if 0 <= rawsize:
1818 if 0 <= rawsize:
1819 targetsize = 4 * rawsize
1819 targetsize = 4 * rawsize
1820
1820
1821 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1821 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1822 if basetext is None:
1822 if basetext is None:
1823 basetext = bytes(bins[0])
1823 basetext = bytes(bins[0])
1824 bins = bins[1:]
1824 bins = bins[1:]
1825
1825
1826 rawtext = mdiff.patches(basetext, bins)
1826 rawtext = mdiff.patches(basetext, bins)
1827 del basetext # let us have a chance to free memory early
1827 del basetext # let us have a chance to free memory early
1828 return (rev, rawtext, False)
1828 return (rev, rawtext, False)
1829
1829
1830 def rawdata(self, nodeorrev, _df=None):
1830 def rawdata(self, nodeorrev, _df=None):
1831 """return an uncompressed raw data of a given node or revision number.
1831 """return an uncompressed raw data of a given node or revision number.
1832
1832
1833 _df - an existing file handle to read from. (internal-only)
1833 _df - an existing file handle to read from. (internal-only)
1834 """
1834 """
1835 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1835 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1836
1836
1837 def hash(self, text, p1, p2):
1837 def hash(self, text, p1, p2):
1838 """Compute a node hash.
1838 """Compute a node hash.
1839
1839
1840 Available as a function so that subclasses can replace the hash
1840 Available as a function so that subclasses can replace the hash
1841 as needed.
1841 as needed.
1842 """
1842 """
1843 return storageutil.hashrevisionsha1(text, p1, p2)
1843 return storageutil.hashrevisionsha1(text, p1, p2)
1844
1844
1845 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1845 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1846 """Check node hash integrity.
1846 """Check node hash integrity.
1847
1847
1848 Available as a function so that subclasses can extend hash mismatch
1848 Available as a function so that subclasses can extend hash mismatch
1849 behaviors as needed.
1849 behaviors as needed.
1850 """
1850 """
1851 try:
1851 try:
1852 if p1 is None and p2 is None:
1852 if p1 is None and p2 is None:
1853 p1, p2 = self.parents(node)
1853 p1, p2 = self.parents(node)
1854 if node != self.hash(text, p1, p2):
1854 if node != self.hash(text, p1, p2):
1855 # Clear the revision cache on hash failure. The revision cache
1855 # Clear the revision cache on hash failure. The revision cache
1856 # only stores the raw revision and clearing the cache does have
1856 # only stores the raw revision and clearing the cache does have
1857 # the side-effect that we won't have a cache hit when the raw
1857 # the side-effect that we won't have a cache hit when the raw
1858 # revision data is accessed. But this case should be rare and
1858 # revision data is accessed. But this case should be rare and
1859 # it is extra work to teach the cache about the hash
1859 # it is extra work to teach the cache about the hash
1860 # verification state.
1860 # verification state.
1861 if self._revisioncache and self._revisioncache[0] == node:
1861 if self._revisioncache and self._revisioncache[0] == node:
1862 self._revisioncache = None
1862 self._revisioncache = None
1863
1863
1864 revornode = rev
1864 revornode = rev
1865 if revornode is None:
1865 if revornode is None:
1866 revornode = templatefilters.short(hex(node))
1866 revornode = templatefilters.short(hex(node))
1867 raise error.RevlogError(
1867 raise error.RevlogError(
1868 _(b"integrity check failed on %s:%s")
1868 _(b"integrity check failed on %s:%s")
1869 % (self.indexfile, pycompat.bytestr(revornode))
1869 % (self.indexfile, pycompat.bytestr(revornode))
1870 )
1870 )
1871 except error.RevlogError:
1871 except error.RevlogError:
1872 if self._censorable and storageutil.iscensoredtext(text):
1872 if self._censorable and storageutil.iscensoredtext(text):
1873 raise error.CensoredNodeError(self.indexfile, node, text)
1873 raise error.CensoredNodeError(self.indexfile, node, text)
1874 raise
1874 raise
1875
1875
1876 def _enforceinlinesize(self, tr, fp=None):
1876 def _enforceinlinesize(self, tr, fp=None):
1877 """Check if the revlog is too big for inline and convert if so.
1877 """Check if the revlog is too big for inline and convert if so.
1878
1878
1879 This should be called after revisions are added to the revlog. If the
1879 This should be called after revisions are added to the revlog. If the
1880 revlog has grown too large to be an inline revlog, it will convert it
1880 revlog has grown too large to be an inline revlog, it will convert it
1881 to use multiple index and data files.
1881 to use multiple index and data files.
1882 """
1882 """
1883 tiprev = len(self) - 1
1883 tiprev = len(self) - 1
1884 if (
1884 if (
1885 not self._inline
1885 not self._inline
1886 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1886 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1887 ):
1887 ):
1888 return
1888 return
1889
1889
1890 trinfo = tr.find(self.indexfile)
1890 trinfo = tr.find(self.indexfile)
1891 if trinfo is None:
1891 if trinfo is None:
1892 raise error.RevlogError(
1892 raise error.RevlogError(
1893 _(b"%s not found in the transaction") % self.indexfile
1893 _(b"%s not found in the transaction") % self.indexfile
1894 )
1894 )
1895
1895
1896 trindex = trinfo[2]
1896 trindex = trinfo[2]
1897 if trindex is not None:
1897 if trindex is not None:
1898 dataoff = self.start(trindex)
1898 dataoff = self.start(trindex)
1899 else:
1899 else:
1900 # revlog was stripped at start of transaction, use all leftover data
1900 # revlog was stripped at start of transaction, use all leftover data
1901 trindex = len(self) - 1
1901 trindex = len(self) - 1
1902 dataoff = self.end(tiprev)
1902 dataoff = self.end(tiprev)
1903
1903
1904 tr.add(self.datafile, dataoff)
1904 tr.add(self.datafile, dataoff)
1905
1905
1906 if fp:
1906 if fp:
1907 fp.flush()
1907 fp.flush()
1908 fp.close()
1908 fp.close()
1909 # We can't use the cached file handle after close(). So prevent
1909 # We can't use the cached file handle after close(). So prevent
1910 # its usage.
1910 # its usage.
1911 self._writinghandles = None
1911 self._writinghandles = None
1912
1912
1913 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1913 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1914 for r in self:
1914 for r in self:
1915 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1915 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1916
1916
1917 with self._indexfp(b'w') as fp:
1917 with self._indexfp(b'w') as fp:
1918 self.version &= ~FLAG_INLINE_DATA
1918 self.version &= ~FLAG_INLINE_DATA
1919 self._inline = False
1919 self._inline = False
1920 io = self._io
1920 io = self._io
1921 for i in self:
1921 for i in self:
1922 e = io.packentry(self.index[i], self.node, self.version, i)
1922 e = io.packentry(self.index[i], self.node, self.version, i)
1923 fp.write(e)
1923 fp.write(e)
1924
1924
1925 # the temp file replace the real index when we exit the context
1925 # the temp file replace the real index when we exit the context
1926 # manager
1926 # manager
1927
1927
1928 tr.replace(self.indexfile, trindex * self._io.size)
1928 tr.replace(self.indexfile, trindex * self._io.size)
1929 self._chunkclear()
1929 self._chunkclear()
1930
1930
1931 def _nodeduplicatecallback(self, transaction, node):
1931 def _nodeduplicatecallback(self, transaction, node):
1932 """called when trying to add a node already stored.
1932 """called when trying to add a node already stored.
1933 """
1933 """
1934
1934
1935 def addrevision(
1935 def addrevision(
1936 self,
1936 self,
1937 text,
1937 text,
1938 transaction,
1938 transaction,
1939 link,
1939 link,
1940 p1,
1940 p1,
1941 p2,
1941 p2,
1942 cachedelta=None,
1942 cachedelta=None,
1943 node=None,
1943 node=None,
1944 flags=REVIDX_DEFAULT_FLAGS,
1944 flags=REVIDX_DEFAULT_FLAGS,
1945 deltacomputer=None,
1945 deltacomputer=None,
1946 sidedata=None,
1946 sidedata=None,
1947 ):
1947 ):
1948 """add a revision to the log
1948 """add a revision to the log
1949
1949
1950 text - the revision data to add
1950 text - the revision data to add
1951 transaction - the transaction object used for rollback
1951 transaction - the transaction object used for rollback
1952 link - the linkrev data to add
1952 link - the linkrev data to add
1953 p1, p2 - the parent nodeids of the revision
1953 p1, p2 - the parent nodeids of the revision
1954 cachedelta - an optional precomputed delta
1954 cachedelta - an optional precomputed delta
1955 node - nodeid of revision; typically node is not specified, and it is
1955 node - nodeid of revision; typically node is not specified, and it is
1956 computed by default as hash(text, p1, p2), however subclasses might
1956 computed by default as hash(text, p1, p2), however subclasses might
1957 use different hashing method (and override checkhash() in such case)
1957 use different hashing method (and override checkhash() in such case)
1958 flags - the known flags to set on the revision
1958 flags - the known flags to set on the revision
1959 deltacomputer - an optional deltacomputer instance shared between
1959 deltacomputer - an optional deltacomputer instance shared between
1960 multiple calls
1960 multiple calls
1961 """
1961 """
1962 if link == nullrev:
1962 if link == nullrev:
1963 raise error.RevlogError(
1963 raise error.RevlogError(
1964 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1964 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1965 )
1965 )
1966
1966
1967 if sidedata is None:
1967 if sidedata is None:
1968 sidedata = {}
1968 sidedata = {}
1969 flags = flags & ~REVIDX_SIDEDATA
1969 flags = flags & ~REVIDX_SIDEDATA
1970 elif not self.hassidedata:
1970 elif not self.hassidedata:
1971 raise error.ProgrammingError(
1971 raise error.ProgrammingError(
1972 _(b"trying to add sidedata to a revlog who don't support them")
1972 _(b"trying to add sidedata to a revlog who don't support them")
1973 )
1973 )
1974 else:
1974 else:
1975 flags |= REVIDX_SIDEDATA
1975 flags |= REVIDX_SIDEDATA
1976
1976
1977 if flags:
1977 if flags:
1978 node = node or self.hash(text, p1, p2)
1978 node = node or self.hash(text, p1, p2)
1979
1979
1980 rawtext, validatehash = flagutil.processflagswrite(
1980 rawtext, validatehash = flagutil.processflagswrite(
1981 self, text, flags, sidedata=sidedata
1981 self, text, flags, sidedata=sidedata
1982 )
1982 )
1983
1983
1984 # If the flag processor modifies the revision data, ignore any provided
1984 # If the flag processor modifies the revision data, ignore any provided
1985 # cachedelta.
1985 # cachedelta.
1986 if rawtext != text:
1986 if rawtext != text:
1987 cachedelta = None
1987 cachedelta = None
1988
1988
1989 if len(rawtext) > _maxentrysize:
1989 if len(rawtext) > _maxentrysize:
1990 raise error.RevlogError(
1990 raise error.RevlogError(
1991 _(
1991 _(
1992 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1992 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1993 )
1993 )
1994 % (self.indexfile, len(rawtext))
1994 % (self.indexfile, len(rawtext))
1995 )
1995 )
1996
1996
1997 node = node or self.hash(rawtext, p1, p2)
1997 node = node or self.hash(rawtext, p1, p2)
1998 if node in self.nodemap:
1998 if self.index.has_node(node):
1999 return node
1999 return node
2000
2000
2001 if validatehash:
2001 if validatehash:
2002 self.checkhash(rawtext, node, p1=p1, p2=p2)
2002 self.checkhash(rawtext, node, p1=p1, p2=p2)
2003
2003
2004 return self.addrawrevision(
2004 return self.addrawrevision(
2005 rawtext,
2005 rawtext,
2006 transaction,
2006 transaction,
2007 link,
2007 link,
2008 p1,
2008 p1,
2009 p2,
2009 p2,
2010 node,
2010 node,
2011 flags,
2011 flags,
2012 cachedelta=cachedelta,
2012 cachedelta=cachedelta,
2013 deltacomputer=deltacomputer,
2013 deltacomputer=deltacomputer,
2014 )
2014 )
2015
2015
2016 def addrawrevision(
2016 def addrawrevision(
2017 self,
2017 self,
2018 rawtext,
2018 rawtext,
2019 transaction,
2019 transaction,
2020 link,
2020 link,
2021 p1,
2021 p1,
2022 p2,
2022 p2,
2023 node,
2023 node,
2024 flags,
2024 flags,
2025 cachedelta=None,
2025 cachedelta=None,
2026 deltacomputer=None,
2026 deltacomputer=None,
2027 ):
2027 ):
2028 """add a raw revision with known flags, node and parents
2028 """add a raw revision with known flags, node and parents
2029 useful when reusing a revision not stored in this revlog (ex: received
2029 useful when reusing a revision not stored in this revlog (ex: received
2030 over wire, or read from an external bundle).
2030 over wire, or read from an external bundle).
2031 """
2031 """
2032 dfh = None
2032 dfh = None
2033 if not self._inline:
2033 if not self._inline:
2034 dfh = self._datafp(b"a+")
2034 dfh = self._datafp(b"a+")
2035 ifh = self._indexfp(b"a+")
2035 ifh = self._indexfp(b"a+")
2036 try:
2036 try:
2037 return self._addrevision(
2037 return self._addrevision(
2038 node,
2038 node,
2039 rawtext,
2039 rawtext,
2040 transaction,
2040 transaction,
2041 link,
2041 link,
2042 p1,
2042 p1,
2043 p2,
2043 p2,
2044 flags,
2044 flags,
2045 cachedelta,
2045 cachedelta,
2046 ifh,
2046 ifh,
2047 dfh,
2047 dfh,
2048 deltacomputer=deltacomputer,
2048 deltacomputer=deltacomputer,
2049 )
2049 )
2050 finally:
2050 finally:
2051 if dfh:
2051 if dfh:
2052 dfh.close()
2052 dfh.close()
2053 ifh.close()
2053 ifh.close()
2054
2054
2055 def compress(self, data):
2055 def compress(self, data):
2056 """Generate a possibly-compressed representation of data."""
2056 """Generate a possibly-compressed representation of data."""
2057 if not data:
2057 if not data:
2058 return b'', data
2058 return b'', data
2059
2059
2060 compressed = self._compressor.compress(data)
2060 compressed = self._compressor.compress(data)
2061
2061
2062 if compressed:
2062 if compressed:
2063 # The revlog compressor added the header in the returned data.
2063 # The revlog compressor added the header in the returned data.
2064 return b'', compressed
2064 return b'', compressed
2065
2065
2066 if data[0:1] == b'\0':
2066 if data[0:1] == b'\0':
2067 return b'', data
2067 return b'', data
2068 return b'u', data
2068 return b'u', data
2069
2069
2070 def decompress(self, data):
2070 def decompress(self, data):
2071 """Decompress a revlog chunk.
2071 """Decompress a revlog chunk.
2072
2072
2073 The chunk is expected to begin with a header identifying the
2073 The chunk is expected to begin with a header identifying the
2074 format type so it can be routed to an appropriate decompressor.
2074 format type so it can be routed to an appropriate decompressor.
2075 """
2075 """
2076 if not data:
2076 if not data:
2077 return data
2077 return data
2078
2078
2079 # Revlogs are read much more frequently than they are written and many
2079 # Revlogs are read much more frequently than they are written and many
2080 # chunks only take microseconds to decompress, so performance is
2080 # chunks only take microseconds to decompress, so performance is
2081 # important here.
2081 # important here.
2082 #
2082 #
2083 # We can make a few assumptions about revlogs:
2083 # We can make a few assumptions about revlogs:
2084 #
2084 #
2085 # 1) the majority of chunks will be compressed (as opposed to inline
2085 # 1) the majority of chunks will be compressed (as opposed to inline
2086 # raw data).
2086 # raw data).
2087 # 2) decompressing *any* data will likely by at least 10x slower than
2087 # 2) decompressing *any* data will likely by at least 10x slower than
2088 # returning raw inline data.
2088 # returning raw inline data.
2089 # 3) we want to prioritize common and officially supported compression
2089 # 3) we want to prioritize common and officially supported compression
2090 # engines
2090 # engines
2091 #
2091 #
2092 # It follows that we want to optimize for "decompress compressed data
2092 # It follows that we want to optimize for "decompress compressed data
2093 # when encoded with common and officially supported compression engines"
2093 # when encoded with common and officially supported compression engines"
2094 # case over "raw data" and "data encoded by less common or non-official
2094 # case over "raw data" and "data encoded by less common or non-official
2095 # compression engines." That is why we have the inline lookup first
2095 # compression engines." That is why we have the inline lookup first
2096 # followed by the compengines lookup.
2096 # followed by the compengines lookup.
2097 #
2097 #
2098 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2098 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2099 # compressed chunks. And this matters for changelog and manifest reads.
2099 # compressed chunks. And this matters for changelog and manifest reads.
2100 t = data[0:1]
2100 t = data[0:1]
2101
2101
2102 if t == b'x':
2102 if t == b'x':
2103 try:
2103 try:
2104 return _zlibdecompress(data)
2104 return _zlibdecompress(data)
2105 except zlib.error as e:
2105 except zlib.error as e:
2106 raise error.RevlogError(
2106 raise error.RevlogError(
2107 _(b'revlog decompress error: %s')
2107 _(b'revlog decompress error: %s')
2108 % stringutil.forcebytestr(e)
2108 % stringutil.forcebytestr(e)
2109 )
2109 )
2110 # '\0' is more common than 'u' so it goes first.
2110 # '\0' is more common than 'u' so it goes first.
2111 elif t == b'\0':
2111 elif t == b'\0':
2112 return data
2112 return data
2113 elif t == b'u':
2113 elif t == b'u':
2114 return util.buffer(data, 1)
2114 return util.buffer(data, 1)
2115
2115
2116 try:
2116 try:
2117 compressor = self._decompressors[t]
2117 compressor = self._decompressors[t]
2118 except KeyError:
2118 except KeyError:
2119 try:
2119 try:
2120 engine = util.compengines.forrevlogheader(t)
2120 engine = util.compengines.forrevlogheader(t)
2121 compressor = engine.revlogcompressor(self._compengineopts)
2121 compressor = engine.revlogcompressor(self._compengineopts)
2122 self._decompressors[t] = compressor
2122 self._decompressors[t] = compressor
2123 except KeyError:
2123 except KeyError:
2124 raise error.RevlogError(_(b'unknown compression type %r') % t)
2124 raise error.RevlogError(_(b'unknown compression type %r') % t)
2125
2125
2126 return compressor.decompress(data)
2126 return compressor.decompress(data)
2127
2127
2128 def _addrevision(
2128 def _addrevision(
2129 self,
2129 self,
2130 node,
2130 node,
2131 rawtext,
2131 rawtext,
2132 transaction,
2132 transaction,
2133 link,
2133 link,
2134 p1,
2134 p1,
2135 p2,
2135 p2,
2136 flags,
2136 flags,
2137 cachedelta,
2137 cachedelta,
2138 ifh,
2138 ifh,
2139 dfh,
2139 dfh,
2140 alwayscache=False,
2140 alwayscache=False,
2141 deltacomputer=None,
2141 deltacomputer=None,
2142 ):
2142 ):
2143 """internal function to add revisions to the log
2143 """internal function to add revisions to the log
2144
2144
2145 see addrevision for argument descriptions.
2145 see addrevision for argument descriptions.
2146
2146
2147 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2147 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2148
2148
2149 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2149 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2150 be used.
2150 be used.
2151
2151
2152 invariants:
2152 invariants:
2153 - rawtext is optional (can be None); if not set, cachedelta must be set.
2153 - rawtext is optional (can be None); if not set, cachedelta must be set.
2154 if both are set, they must correspond to each other.
2154 if both are set, they must correspond to each other.
2155 """
2155 """
2156 if node == nullid:
2156 if node == nullid:
2157 raise error.RevlogError(
2157 raise error.RevlogError(
2158 _(b"%s: attempt to add null revision") % self.indexfile
2158 _(b"%s: attempt to add null revision") % self.indexfile
2159 )
2159 )
2160 if node == wdirid or node in wdirfilenodeids:
2160 if node == wdirid or node in wdirfilenodeids:
2161 raise error.RevlogError(
2161 raise error.RevlogError(
2162 _(b"%s: attempt to add wdir revision") % self.indexfile
2162 _(b"%s: attempt to add wdir revision") % self.indexfile
2163 )
2163 )
2164
2164
2165 if self._inline:
2165 if self._inline:
2166 fh = ifh
2166 fh = ifh
2167 else:
2167 else:
2168 fh = dfh
2168 fh = dfh
2169
2169
2170 btext = [rawtext]
2170 btext = [rawtext]
2171
2171
2172 curr = len(self)
2172 curr = len(self)
2173 prev = curr - 1
2173 prev = curr - 1
2174 offset = self.end(prev)
2174 offset = self.end(prev)
2175 p1r, p2r = self.rev(p1), self.rev(p2)
2175 p1r, p2r = self.rev(p1), self.rev(p2)
2176
2176
2177 # full versions are inserted when the needed deltas
2177 # full versions are inserted when the needed deltas
2178 # become comparable to the uncompressed text
2178 # become comparable to the uncompressed text
2179 if rawtext is None:
2179 if rawtext is None:
2180 # need rawtext size, before changed by flag processors, which is
2180 # need rawtext size, before changed by flag processors, which is
2181 # the non-raw size. use revlog explicitly to avoid filelog's extra
2181 # the non-raw size. use revlog explicitly to avoid filelog's extra
2182 # logic that might remove metadata size.
2182 # logic that might remove metadata size.
2183 textlen = mdiff.patchedsize(
2183 textlen = mdiff.patchedsize(
2184 revlog.size(self, cachedelta[0]), cachedelta[1]
2184 revlog.size(self, cachedelta[0]), cachedelta[1]
2185 )
2185 )
2186 else:
2186 else:
2187 textlen = len(rawtext)
2187 textlen = len(rawtext)
2188
2188
2189 if deltacomputer is None:
2189 if deltacomputer is None:
2190 deltacomputer = deltautil.deltacomputer(self)
2190 deltacomputer = deltautil.deltacomputer(self)
2191
2191
2192 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2192 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2193
2193
2194 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2194 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2195
2195
2196 e = (
2196 e = (
2197 offset_type(offset, flags),
2197 offset_type(offset, flags),
2198 deltainfo.deltalen,
2198 deltainfo.deltalen,
2199 textlen,
2199 textlen,
2200 deltainfo.base,
2200 deltainfo.base,
2201 link,
2201 link,
2202 p1r,
2202 p1r,
2203 p2r,
2203 p2r,
2204 node,
2204 node,
2205 )
2205 )
2206 self.index.append(e)
2206 self.index.append(e)
2207
2207
2208 # Reset the pure node cache start lookup offset to account for new
2208 # Reset the pure node cache start lookup offset to account for new
2209 # revision.
2209 # revision.
2210 if self._nodepos is not None:
2210 if self._nodepos is not None:
2211 self._nodepos = curr
2211 self._nodepos = curr
2212
2212
2213 entry = self._io.packentry(e, self.node, self.version, curr)
2213 entry = self._io.packentry(e, self.node, self.version, curr)
2214 self._writeentry(
2214 self._writeentry(
2215 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2215 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2216 )
2216 )
2217
2217
2218 rawtext = btext[0]
2218 rawtext = btext[0]
2219
2219
2220 if alwayscache and rawtext is None:
2220 if alwayscache and rawtext is None:
2221 rawtext = deltacomputer.buildtext(revinfo, fh)
2221 rawtext = deltacomputer.buildtext(revinfo, fh)
2222
2222
2223 if type(rawtext) == bytes: # only accept immutable objects
2223 if type(rawtext) == bytes: # only accept immutable objects
2224 self._revisioncache = (node, curr, rawtext)
2224 self._revisioncache = (node, curr, rawtext)
2225 self._chainbasecache[curr] = deltainfo.chainbase
2225 self._chainbasecache[curr] = deltainfo.chainbase
2226 return node
2226 return node
2227
2227
2228 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2228 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2229 # Files opened in a+ mode have inconsistent behavior on various
2229 # Files opened in a+ mode have inconsistent behavior on various
2230 # platforms. Windows requires that a file positioning call be made
2230 # platforms. Windows requires that a file positioning call be made
2231 # when the file handle transitions between reads and writes. See
2231 # when the file handle transitions between reads and writes. See
2232 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2232 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2233 # platforms, Python or the platform itself can be buggy. Some versions
2233 # platforms, Python or the platform itself can be buggy. Some versions
2234 # of Solaris have been observed to not append at the end of the file
2234 # of Solaris have been observed to not append at the end of the file
2235 # if the file was seeked to before the end. See issue4943 for more.
2235 # if the file was seeked to before the end. See issue4943 for more.
2236 #
2236 #
2237 # We work around this issue by inserting a seek() before writing.
2237 # We work around this issue by inserting a seek() before writing.
2238 # Note: This is likely not necessary on Python 3. However, because
2238 # Note: This is likely not necessary on Python 3. However, because
2239 # the file handle is reused for reads and may be seeked there, we need
2239 # the file handle is reused for reads and may be seeked there, we need
2240 # to be careful before changing this.
2240 # to be careful before changing this.
2241 ifh.seek(0, os.SEEK_END)
2241 ifh.seek(0, os.SEEK_END)
2242 if dfh:
2242 if dfh:
2243 dfh.seek(0, os.SEEK_END)
2243 dfh.seek(0, os.SEEK_END)
2244
2244
2245 curr = len(self) - 1
2245 curr = len(self) - 1
2246 if not self._inline:
2246 if not self._inline:
2247 transaction.add(self.datafile, offset)
2247 transaction.add(self.datafile, offset)
2248 transaction.add(self.indexfile, curr * len(entry))
2248 transaction.add(self.indexfile, curr * len(entry))
2249 if data[0]:
2249 if data[0]:
2250 dfh.write(data[0])
2250 dfh.write(data[0])
2251 dfh.write(data[1])
2251 dfh.write(data[1])
2252 ifh.write(entry)
2252 ifh.write(entry)
2253 else:
2253 else:
2254 offset += curr * self._io.size
2254 offset += curr * self._io.size
2255 transaction.add(self.indexfile, offset, curr)
2255 transaction.add(self.indexfile, offset, curr)
2256 ifh.write(entry)
2256 ifh.write(entry)
2257 ifh.write(data[0])
2257 ifh.write(data[0])
2258 ifh.write(data[1])
2258 ifh.write(data[1])
2259 self._enforceinlinesize(transaction, ifh)
2259 self._enforceinlinesize(transaction, ifh)
2260
2260
2261 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2261 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2262 """
2262 """
2263 add a delta group
2263 add a delta group
2264
2264
2265 given a set of deltas, add them to the revision log. the
2265 given a set of deltas, add them to the revision log. the
2266 first delta is against its parent, which should be in our
2266 first delta is against its parent, which should be in our
2267 log, the rest are against the previous delta.
2267 log, the rest are against the previous delta.
2268
2268
2269 If ``addrevisioncb`` is defined, it will be called with arguments of
2269 If ``addrevisioncb`` is defined, it will be called with arguments of
2270 this revlog and the node that was added.
2270 this revlog and the node that was added.
2271 """
2271 """
2272
2272
2273 if self._writinghandles:
2273 if self._writinghandles:
2274 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2274 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2275
2275
2276 nodes = []
2276 nodes = []
2277
2277
2278 r = len(self)
2278 r = len(self)
2279 end = 0
2279 end = 0
2280 if r:
2280 if r:
2281 end = self.end(r - 1)
2281 end = self.end(r - 1)
2282 ifh = self._indexfp(b"a+")
2282 ifh = self._indexfp(b"a+")
2283 isize = r * self._io.size
2283 isize = r * self._io.size
2284 if self._inline:
2284 if self._inline:
2285 transaction.add(self.indexfile, end + isize, r)
2285 transaction.add(self.indexfile, end + isize, r)
2286 dfh = None
2286 dfh = None
2287 else:
2287 else:
2288 transaction.add(self.indexfile, isize, r)
2288 transaction.add(self.indexfile, isize, r)
2289 transaction.add(self.datafile, end)
2289 transaction.add(self.datafile, end)
2290 dfh = self._datafp(b"a+")
2290 dfh = self._datafp(b"a+")
2291
2291
2292 def flush():
2292 def flush():
2293 if dfh:
2293 if dfh:
2294 dfh.flush()
2294 dfh.flush()
2295 ifh.flush()
2295 ifh.flush()
2296
2296
2297 self._writinghandles = (ifh, dfh)
2297 self._writinghandles = (ifh, dfh)
2298
2298
2299 try:
2299 try:
2300 deltacomputer = deltautil.deltacomputer(self)
2300 deltacomputer = deltautil.deltacomputer(self)
2301 # loop through our set of deltas
2301 # loop through our set of deltas
2302 for data in deltas:
2302 for data in deltas:
2303 node, p1, p2, linknode, deltabase, delta, flags = data
2303 node, p1, p2, linknode, deltabase, delta, flags = data
2304 link = linkmapper(linknode)
2304 link = linkmapper(linknode)
2305 flags = flags or REVIDX_DEFAULT_FLAGS
2305 flags = flags or REVIDX_DEFAULT_FLAGS
2306
2306
2307 nodes.append(node)
2307 nodes.append(node)
2308
2308
2309 if node in self.nodemap:
2309 if node in self.nodemap:
2310 self._nodeduplicatecallback(transaction, node)
2310 self._nodeduplicatecallback(transaction, node)
2311 # this can happen if two branches make the same change
2311 # this can happen if two branches make the same change
2312 continue
2312 continue
2313
2313
2314 for p in (p1, p2):
2314 for p in (p1, p2):
2315 if p not in self.nodemap:
2315 if p not in self.nodemap:
2316 raise error.LookupError(
2316 raise error.LookupError(
2317 p, self.indexfile, _(b'unknown parent')
2317 p, self.indexfile, _(b'unknown parent')
2318 )
2318 )
2319
2319
2320 if deltabase not in self.nodemap:
2320 if deltabase not in self.nodemap:
2321 raise error.LookupError(
2321 raise error.LookupError(
2322 deltabase, self.indexfile, _(b'unknown delta base')
2322 deltabase, self.indexfile, _(b'unknown delta base')
2323 )
2323 )
2324
2324
2325 baserev = self.rev(deltabase)
2325 baserev = self.rev(deltabase)
2326
2326
2327 if baserev != nullrev and self.iscensored(baserev):
2327 if baserev != nullrev and self.iscensored(baserev):
2328 # if base is censored, delta must be full replacement in a
2328 # if base is censored, delta must be full replacement in a
2329 # single patch operation
2329 # single patch operation
2330 hlen = struct.calcsize(b">lll")
2330 hlen = struct.calcsize(b">lll")
2331 oldlen = self.rawsize(baserev)
2331 oldlen = self.rawsize(baserev)
2332 newlen = len(delta) - hlen
2332 newlen = len(delta) - hlen
2333 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2333 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2334 raise error.CensoredBaseError(
2334 raise error.CensoredBaseError(
2335 self.indexfile, self.node(baserev)
2335 self.indexfile, self.node(baserev)
2336 )
2336 )
2337
2337
2338 if not flags and self._peek_iscensored(baserev, delta, flush):
2338 if not flags and self._peek_iscensored(baserev, delta, flush):
2339 flags |= REVIDX_ISCENSORED
2339 flags |= REVIDX_ISCENSORED
2340
2340
2341 # We assume consumers of addrevisioncb will want to retrieve
2341 # We assume consumers of addrevisioncb will want to retrieve
2342 # the added revision, which will require a call to
2342 # the added revision, which will require a call to
2343 # revision(). revision() will fast path if there is a cache
2343 # revision(). revision() will fast path if there is a cache
2344 # hit. So, we tell _addrevision() to always cache in this case.
2344 # hit. So, we tell _addrevision() to always cache in this case.
2345 # We're only using addgroup() in the context of changegroup
2345 # We're only using addgroup() in the context of changegroup
2346 # generation so the revision data can always be handled as raw
2346 # generation so the revision data can always be handled as raw
2347 # by the flagprocessor.
2347 # by the flagprocessor.
2348 self._addrevision(
2348 self._addrevision(
2349 node,
2349 node,
2350 None,
2350 None,
2351 transaction,
2351 transaction,
2352 link,
2352 link,
2353 p1,
2353 p1,
2354 p2,
2354 p2,
2355 flags,
2355 flags,
2356 (baserev, delta),
2356 (baserev, delta),
2357 ifh,
2357 ifh,
2358 dfh,
2358 dfh,
2359 alwayscache=bool(addrevisioncb),
2359 alwayscache=bool(addrevisioncb),
2360 deltacomputer=deltacomputer,
2360 deltacomputer=deltacomputer,
2361 )
2361 )
2362
2362
2363 if addrevisioncb:
2363 if addrevisioncb:
2364 addrevisioncb(self, node)
2364 addrevisioncb(self, node)
2365
2365
2366 if not dfh and not self._inline:
2366 if not dfh and not self._inline:
2367 # addrevision switched from inline to conventional
2367 # addrevision switched from inline to conventional
2368 # reopen the index
2368 # reopen the index
2369 ifh.close()
2369 ifh.close()
2370 dfh = self._datafp(b"a+")
2370 dfh = self._datafp(b"a+")
2371 ifh = self._indexfp(b"a+")
2371 ifh = self._indexfp(b"a+")
2372 self._writinghandles = (ifh, dfh)
2372 self._writinghandles = (ifh, dfh)
2373 finally:
2373 finally:
2374 self._writinghandles = None
2374 self._writinghandles = None
2375
2375
2376 if dfh:
2376 if dfh:
2377 dfh.close()
2377 dfh.close()
2378 ifh.close()
2378 ifh.close()
2379
2379
2380 return nodes
2380 return nodes
2381
2381
2382 def iscensored(self, rev):
2382 def iscensored(self, rev):
2383 """Check if a file revision is censored."""
2383 """Check if a file revision is censored."""
2384 if not self._censorable:
2384 if not self._censorable:
2385 return False
2385 return False
2386
2386
2387 return self.flags(rev) & REVIDX_ISCENSORED
2387 return self.flags(rev) & REVIDX_ISCENSORED
2388
2388
2389 def _peek_iscensored(self, baserev, delta, flush):
2389 def _peek_iscensored(self, baserev, delta, flush):
2390 """Quickly check if a delta produces a censored revision."""
2390 """Quickly check if a delta produces a censored revision."""
2391 if not self._censorable:
2391 if not self._censorable:
2392 return False
2392 return False
2393
2393
2394 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2394 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2395
2395
2396 def getstrippoint(self, minlink):
2396 def getstrippoint(self, minlink):
2397 """find the minimum rev that must be stripped to strip the linkrev
2397 """find the minimum rev that must be stripped to strip the linkrev
2398
2398
2399 Returns a tuple containing the minimum rev and a set of all revs that
2399 Returns a tuple containing the minimum rev and a set of all revs that
2400 have linkrevs that will be broken by this strip.
2400 have linkrevs that will be broken by this strip.
2401 """
2401 """
2402 return storageutil.resolvestripinfo(
2402 return storageutil.resolvestripinfo(
2403 minlink,
2403 minlink,
2404 len(self) - 1,
2404 len(self) - 1,
2405 self.headrevs(),
2405 self.headrevs(),
2406 self.linkrev,
2406 self.linkrev,
2407 self.parentrevs,
2407 self.parentrevs,
2408 )
2408 )
2409
2409
2410 def strip(self, minlink, transaction):
2410 def strip(self, minlink, transaction):
2411 """truncate the revlog on the first revision with a linkrev >= minlink
2411 """truncate the revlog on the first revision with a linkrev >= minlink
2412
2412
2413 This function is called when we're stripping revision minlink and
2413 This function is called when we're stripping revision minlink and
2414 its descendants from the repository.
2414 its descendants from the repository.
2415
2415
2416 We have to remove all revisions with linkrev >= minlink, because
2416 We have to remove all revisions with linkrev >= minlink, because
2417 the equivalent changelog revisions will be renumbered after the
2417 the equivalent changelog revisions will be renumbered after the
2418 strip.
2418 strip.
2419
2419
2420 So we truncate the revlog on the first of these revisions, and
2420 So we truncate the revlog on the first of these revisions, and
2421 trust that the caller has saved the revisions that shouldn't be
2421 trust that the caller has saved the revisions that shouldn't be
2422 removed and that it'll re-add them after this truncation.
2422 removed and that it'll re-add them after this truncation.
2423 """
2423 """
2424 if len(self) == 0:
2424 if len(self) == 0:
2425 return
2425 return
2426
2426
2427 rev, _ = self.getstrippoint(minlink)
2427 rev, _ = self.getstrippoint(minlink)
2428 if rev == len(self):
2428 if rev == len(self):
2429 return
2429 return
2430
2430
2431 # first truncate the files on disk
2431 # first truncate the files on disk
2432 end = self.start(rev)
2432 end = self.start(rev)
2433 if not self._inline:
2433 if not self._inline:
2434 transaction.add(self.datafile, end)
2434 transaction.add(self.datafile, end)
2435 end = rev * self._io.size
2435 end = rev * self._io.size
2436 else:
2436 else:
2437 end += rev * self._io.size
2437 end += rev * self._io.size
2438
2438
2439 transaction.add(self.indexfile, end)
2439 transaction.add(self.indexfile, end)
2440
2440
2441 # then reset internal state in memory to forget those revisions
2441 # then reset internal state in memory to forget those revisions
2442 self._revisioncache = None
2442 self._revisioncache = None
2443 self._chaininfocache = {}
2443 self._chaininfocache = {}
2444 self._chunkclear()
2444 self._chunkclear()
2445
2445
2446 del self.index[rev:-1]
2446 del self.index[rev:-1]
2447 self._nodepos = None
2447 self._nodepos = None
2448
2448
2449 def checksize(self):
2449 def checksize(self):
2450 """Check size of index and data files
2450 """Check size of index and data files
2451
2451
2452 return a (dd, di) tuple.
2452 return a (dd, di) tuple.
2453 - dd: extra bytes for the "data" file
2453 - dd: extra bytes for the "data" file
2454 - di: extra bytes for the "index" file
2454 - di: extra bytes for the "index" file
2455
2455
2456 A healthy revlog will return (0, 0).
2456 A healthy revlog will return (0, 0).
2457 """
2457 """
2458 expected = 0
2458 expected = 0
2459 if len(self):
2459 if len(self):
2460 expected = max(0, self.end(len(self) - 1))
2460 expected = max(0, self.end(len(self) - 1))
2461
2461
2462 try:
2462 try:
2463 with self._datafp() as f:
2463 with self._datafp() as f:
2464 f.seek(0, io.SEEK_END)
2464 f.seek(0, io.SEEK_END)
2465 actual = f.tell()
2465 actual = f.tell()
2466 dd = actual - expected
2466 dd = actual - expected
2467 except IOError as inst:
2467 except IOError as inst:
2468 if inst.errno != errno.ENOENT:
2468 if inst.errno != errno.ENOENT:
2469 raise
2469 raise
2470 dd = 0
2470 dd = 0
2471
2471
2472 try:
2472 try:
2473 f = self.opener(self.indexfile)
2473 f = self.opener(self.indexfile)
2474 f.seek(0, io.SEEK_END)
2474 f.seek(0, io.SEEK_END)
2475 actual = f.tell()
2475 actual = f.tell()
2476 f.close()
2476 f.close()
2477 s = self._io.size
2477 s = self._io.size
2478 i = max(0, actual // s)
2478 i = max(0, actual // s)
2479 di = actual - (i * s)
2479 di = actual - (i * s)
2480 if self._inline:
2480 if self._inline:
2481 databytes = 0
2481 databytes = 0
2482 for r in self:
2482 for r in self:
2483 databytes += max(0, self.length(r))
2483 databytes += max(0, self.length(r))
2484 dd = 0
2484 dd = 0
2485 di = actual - len(self) * s - databytes
2485 di = actual - len(self) * s - databytes
2486 except IOError as inst:
2486 except IOError as inst:
2487 if inst.errno != errno.ENOENT:
2487 if inst.errno != errno.ENOENT:
2488 raise
2488 raise
2489 di = 0
2489 di = 0
2490
2490
2491 return (dd, di)
2491 return (dd, di)
2492
2492
2493 def files(self):
2493 def files(self):
2494 res = [self.indexfile]
2494 res = [self.indexfile]
2495 if not self._inline:
2495 if not self._inline:
2496 res.append(self.datafile)
2496 res.append(self.datafile)
2497 return res
2497 return res
2498
2498
2499 def emitrevisions(
2499 def emitrevisions(
2500 self,
2500 self,
2501 nodes,
2501 nodes,
2502 nodesorder=None,
2502 nodesorder=None,
2503 revisiondata=False,
2503 revisiondata=False,
2504 assumehaveparentrevisions=False,
2504 assumehaveparentrevisions=False,
2505 deltamode=repository.CG_DELTAMODE_STD,
2505 deltamode=repository.CG_DELTAMODE_STD,
2506 ):
2506 ):
2507 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2507 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2508 raise error.ProgrammingError(
2508 raise error.ProgrammingError(
2509 b'unhandled value for nodesorder: %s' % nodesorder
2509 b'unhandled value for nodesorder: %s' % nodesorder
2510 )
2510 )
2511
2511
2512 if nodesorder is None and not self._generaldelta:
2512 if nodesorder is None and not self._generaldelta:
2513 nodesorder = b'storage'
2513 nodesorder = b'storage'
2514
2514
2515 if (
2515 if (
2516 not self._storedeltachains
2516 not self._storedeltachains
2517 and deltamode != repository.CG_DELTAMODE_PREV
2517 and deltamode != repository.CG_DELTAMODE_PREV
2518 ):
2518 ):
2519 deltamode = repository.CG_DELTAMODE_FULL
2519 deltamode = repository.CG_DELTAMODE_FULL
2520
2520
2521 return storageutil.emitrevisions(
2521 return storageutil.emitrevisions(
2522 self,
2522 self,
2523 nodes,
2523 nodes,
2524 nodesorder,
2524 nodesorder,
2525 revlogrevisiondelta,
2525 revlogrevisiondelta,
2526 deltaparentfn=self.deltaparent,
2526 deltaparentfn=self.deltaparent,
2527 candeltafn=self.candelta,
2527 candeltafn=self.candelta,
2528 rawsizefn=self.rawsize,
2528 rawsizefn=self.rawsize,
2529 revdifffn=self.revdiff,
2529 revdifffn=self.revdiff,
2530 flagsfn=self.flags,
2530 flagsfn=self.flags,
2531 deltamode=deltamode,
2531 deltamode=deltamode,
2532 revisiondata=revisiondata,
2532 revisiondata=revisiondata,
2533 assumehaveparentrevisions=assumehaveparentrevisions,
2533 assumehaveparentrevisions=assumehaveparentrevisions,
2534 )
2534 )
2535
2535
2536 DELTAREUSEALWAYS = b'always'
2536 DELTAREUSEALWAYS = b'always'
2537 DELTAREUSESAMEREVS = b'samerevs'
2537 DELTAREUSESAMEREVS = b'samerevs'
2538 DELTAREUSENEVER = b'never'
2538 DELTAREUSENEVER = b'never'
2539
2539
2540 DELTAREUSEFULLADD = b'fulladd'
2540 DELTAREUSEFULLADD = b'fulladd'
2541
2541
2542 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2542 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2543
2543
2544 def clone(
2544 def clone(
2545 self,
2545 self,
2546 tr,
2546 tr,
2547 destrevlog,
2547 destrevlog,
2548 addrevisioncb=None,
2548 addrevisioncb=None,
2549 deltareuse=DELTAREUSESAMEREVS,
2549 deltareuse=DELTAREUSESAMEREVS,
2550 forcedeltabothparents=None,
2550 forcedeltabothparents=None,
2551 sidedatacompanion=None,
2551 sidedatacompanion=None,
2552 ):
2552 ):
2553 """Copy this revlog to another, possibly with format changes.
2553 """Copy this revlog to another, possibly with format changes.
2554
2554
2555 The destination revlog will contain the same revisions and nodes.
2555 The destination revlog will contain the same revisions and nodes.
2556 However, it may not be bit-for-bit identical due to e.g. delta encoding
2556 However, it may not be bit-for-bit identical due to e.g. delta encoding
2557 differences.
2557 differences.
2558
2558
2559 The ``deltareuse`` argument control how deltas from the existing revlog
2559 The ``deltareuse`` argument control how deltas from the existing revlog
2560 are preserved in the destination revlog. The argument can have the
2560 are preserved in the destination revlog. The argument can have the
2561 following values:
2561 following values:
2562
2562
2563 DELTAREUSEALWAYS
2563 DELTAREUSEALWAYS
2564 Deltas will always be reused (if possible), even if the destination
2564 Deltas will always be reused (if possible), even if the destination
2565 revlog would not select the same revisions for the delta. This is the
2565 revlog would not select the same revisions for the delta. This is the
2566 fastest mode of operation.
2566 fastest mode of operation.
2567 DELTAREUSESAMEREVS
2567 DELTAREUSESAMEREVS
2568 Deltas will be reused if the destination revlog would pick the same
2568 Deltas will be reused if the destination revlog would pick the same
2569 revisions for the delta. This mode strikes a balance between speed
2569 revisions for the delta. This mode strikes a balance between speed
2570 and optimization.
2570 and optimization.
2571 DELTAREUSENEVER
2571 DELTAREUSENEVER
2572 Deltas will never be reused. This is the slowest mode of execution.
2572 Deltas will never be reused. This is the slowest mode of execution.
2573 This mode can be used to recompute deltas (e.g. if the diff/delta
2573 This mode can be used to recompute deltas (e.g. if the diff/delta
2574 algorithm changes).
2574 algorithm changes).
2575 DELTAREUSEFULLADD
2575 DELTAREUSEFULLADD
2576 Revision will be re-added as if their were new content. This is
2576 Revision will be re-added as if their were new content. This is
2577 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2577 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2578 eg: large file detection and handling.
2578 eg: large file detection and handling.
2579
2579
2580 Delta computation can be slow, so the choice of delta reuse policy can
2580 Delta computation can be slow, so the choice of delta reuse policy can
2581 significantly affect run time.
2581 significantly affect run time.
2582
2582
2583 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2583 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2584 two extremes. Deltas will be reused if they are appropriate. But if the
2584 two extremes. Deltas will be reused if they are appropriate. But if the
2585 delta could choose a better revision, it will do so. This means if you
2585 delta could choose a better revision, it will do so. This means if you
2586 are converting a non-generaldelta revlog to a generaldelta revlog,
2586 are converting a non-generaldelta revlog to a generaldelta revlog,
2587 deltas will be recomputed if the delta's parent isn't a parent of the
2587 deltas will be recomputed if the delta's parent isn't a parent of the
2588 revision.
2588 revision.
2589
2589
2590 In addition to the delta policy, the ``forcedeltabothparents``
2590 In addition to the delta policy, the ``forcedeltabothparents``
2591 argument controls whether to force compute deltas against both parents
2591 argument controls whether to force compute deltas against both parents
2592 for merges. By default, the current default is used.
2592 for merges. By default, the current default is used.
2593
2593
2594 If not None, the `sidedatacompanion` is callable that accept two
2594 If not None, the `sidedatacompanion` is callable that accept two
2595 arguments:
2595 arguments:
2596
2596
2597 (srcrevlog, rev)
2597 (srcrevlog, rev)
2598
2598
2599 and return a triplet that control changes to sidedata content from the
2599 and return a triplet that control changes to sidedata content from the
2600 old revision to the new clone result:
2600 old revision to the new clone result:
2601
2601
2602 (dropall, filterout, update)
2602 (dropall, filterout, update)
2603
2603
2604 * if `dropall` is True, all sidedata should be dropped
2604 * if `dropall` is True, all sidedata should be dropped
2605 * `filterout` is a set of sidedata keys that should be dropped
2605 * `filterout` is a set of sidedata keys that should be dropped
2606 * `update` is a mapping of additionnal/new key -> value
2606 * `update` is a mapping of additionnal/new key -> value
2607 """
2607 """
2608 if deltareuse not in self.DELTAREUSEALL:
2608 if deltareuse not in self.DELTAREUSEALL:
2609 raise ValueError(
2609 raise ValueError(
2610 _(b'value for deltareuse invalid: %s') % deltareuse
2610 _(b'value for deltareuse invalid: %s') % deltareuse
2611 )
2611 )
2612
2612
2613 if len(destrevlog):
2613 if len(destrevlog):
2614 raise ValueError(_(b'destination revlog is not empty'))
2614 raise ValueError(_(b'destination revlog is not empty'))
2615
2615
2616 if getattr(self, 'filteredrevs', None):
2616 if getattr(self, 'filteredrevs', None):
2617 raise ValueError(_(b'source revlog has filtered revisions'))
2617 raise ValueError(_(b'source revlog has filtered revisions'))
2618 if getattr(destrevlog, 'filteredrevs', None):
2618 if getattr(destrevlog, 'filteredrevs', None):
2619 raise ValueError(_(b'destination revlog has filtered revisions'))
2619 raise ValueError(_(b'destination revlog has filtered revisions'))
2620
2620
2621 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2621 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2622 # if possible.
2622 # if possible.
2623 oldlazydelta = destrevlog._lazydelta
2623 oldlazydelta = destrevlog._lazydelta
2624 oldlazydeltabase = destrevlog._lazydeltabase
2624 oldlazydeltabase = destrevlog._lazydeltabase
2625 oldamd = destrevlog._deltabothparents
2625 oldamd = destrevlog._deltabothparents
2626
2626
2627 try:
2627 try:
2628 if deltareuse == self.DELTAREUSEALWAYS:
2628 if deltareuse == self.DELTAREUSEALWAYS:
2629 destrevlog._lazydeltabase = True
2629 destrevlog._lazydeltabase = True
2630 destrevlog._lazydelta = True
2630 destrevlog._lazydelta = True
2631 elif deltareuse == self.DELTAREUSESAMEREVS:
2631 elif deltareuse == self.DELTAREUSESAMEREVS:
2632 destrevlog._lazydeltabase = False
2632 destrevlog._lazydeltabase = False
2633 destrevlog._lazydelta = True
2633 destrevlog._lazydelta = True
2634 elif deltareuse == self.DELTAREUSENEVER:
2634 elif deltareuse == self.DELTAREUSENEVER:
2635 destrevlog._lazydeltabase = False
2635 destrevlog._lazydeltabase = False
2636 destrevlog._lazydelta = False
2636 destrevlog._lazydelta = False
2637
2637
2638 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2638 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2639
2639
2640 self._clone(
2640 self._clone(
2641 tr,
2641 tr,
2642 destrevlog,
2642 destrevlog,
2643 addrevisioncb,
2643 addrevisioncb,
2644 deltareuse,
2644 deltareuse,
2645 forcedeltabothparents,
2645 forcedeltabothparents,
2646 sidedatacompanion,
2646 sidedatacompanion,
2647 )
2647 )
2648
2648
2649 finally:
2649 finally:
2650 destrevlog._lazydelta = oldlazydelta
2650 destrevlog._lazydelta = oldlazydelta
2651 destrevlog._lazydeltabase = oldlazydeltabase
2651 destrevlog._lazydeltabase = oldlazydeltabase
2652 destrevlog._deltabothparents = oldamd
2652 destrevlog._deltabothparents = oldamd
2653
2653
2654 def _clone(
2654 def _clone(
2655 self,
2655 self,
2656 tr,
2656 tr,
2657 destrevlog,
2657 destrevlog,
2658 addrevisioncb,
2658 addrevisioncb,
2659 deltareuse,
2659 deltareuse,
2660 forcedeltabothparents,
2660 forcedeltabothparents,
2661 sidedatacompanion,
2661 sidedatacompanion,
2662 ):
2662 ):
2663 """perform the core duty of `revlog.clone` after parameter processing"""
2663 """perform the core duty of `revlog.clone` after parameter processing"""
2664 deltacomputer = deltautil.deltacomputer(destrevlog)
2664 deltacomputer = deltautil.deltacomputer(destrevlog)
2665 index = self.index
2665 index = self.index
2666 for rev in self:
2666 for rev in self:
2667 entry = index[rev]
2667 entry = index[rev]
2668
2668
2669 # Some classes override linkrev to take filtered revs into
2669 # Some classes override linkrev to take filtered revs into
2670 # account. Use raw entry from index.
2670 # account. Use raw entry from index.
2671 flags = entry[0] & 0xFFFF
2671 flags = entry[0] & 0xFFFF
2672 linkrev = entry[4]
2672 linkrev = entry[4]
2673 p1 = index[entry[5]][7]
2673 p1 = index[entry[5]][7]
2674 p2 = index[entry[6]][7]
2674 p2 = index[entry[6]][7]
2675 node = entry[7]
2675 node = entry[7]
2676
2676
2677 sidedataactions = (False, [], {})
2677 sidedataactions = (False, [], {})
2678 if sidedatacompanion is not None:
2678 if sidedatacompanion is not None:
2679 sidedataactions = sidedatacompanion(self, rev)
2679 sidedataactions = sidedatacompanion(self, rev)
2680
2680
2681 # (Possibly) reuse the delta from the revlog if allowed and
2681 # (Possibly) reuse the delta from the revlog if allowed and
2682 # the revlog chunk is a delta.
2682 # the revlog chunk is a delta.
2683 cachedelta = None
2683 cachedelta = None
2684 rawtext = None
2684 rawtext = None
2685 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2685 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2686 dropall, filterout, update = sidedataactions
2686 dropall, filterout, update = sidedataactions
2687 text, sidedata = self._revisiondata(rev)
2687 text, sidedata = self._revisiondata(rev)
2688 if dropall:
2688 if dropall:
2689 sidedata = {}
2689 sidedata = {}
2690 for key in filterout:
2690 for key in filterout:
2691 sidedata.pop(key, None)
2691 sidedata.pop(key, None)
2692 sidedata.update(update)
2692 sidedata.update(update)
2693 if not sidedata:
2693 if not sidedata:
2694 sidedata = None
2694 sidedata = None
2695 destrevlog.addrevision(
2695 destrevlog.addrevision(
2696 text,
2696 text,
2697 tr,
2697 tr,
2698 linkrev,
2698 linkrev,
2699 p1,
2699 p1,
2700 p2,
2700 p2,
2701 cachedelta=cachedelta,
2701 cachedelta=cachedelta,
2702 node=node,
2702 node=node,
2703 flags=flags,
2703 flags=flags,
2704 deltacomputer=deltacomputer,
2704 deltacomputer=deltacomputer,
2705 sidedata=sidedata,
2705 sidedata=sidedata,
2706 )
2706 )
2707 else:
2707 else:
2708 if destrevlog._lazydelta:
2708 if destrevlog._lazydelta:
2709 dp = self.deltaparent(rev)
2709 dp = self.deltaparent(rev)
2710 if dp != nullrev:
2710 if dp != nullrev:
2711 cachedelta = (dp, bytes(self._chunk(rev)))
2711 cachedelta = (dp, bytes(self._chunk(rev)))
2712
2712
2713 if not cachedelta:
2713 if not cachedelta:
2714 rawtext = self.rawdata(rev)
2714 rawtext = self.rawdata(rev)
2715
2715
2716 ifh = destrevlog.opener(
2716 ifh = destrevlog.opener(
2717 destrevlog.indexfile, b'a+', checkambig=False
2717 destrevlog.indexfile, b'a+', checkambig=False
2718 )
2718 )
2719 dfh = None
2719 dfh = None
2720 if not destrevlog._inline:
2720 if not destrevlog._inline:
2721 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2721 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2722 try:
2722 try:
2723 destrevlog._addrevision(
2723 destrevlog._addrevision(
2724 node,
2724 node,
2725 rawtext,
2725 rawtext,
2726 tr,
2726 tr,
2727 linkrev,
2727 linkrev,
2728 p1,
2728 p1,
2729 p2,
2729 p2,
2730 flags,
2730 flags,
2731 cachedelta,
2731 cachedelta,
2732 ifh,
2732 ifh,
2733 dfh,
2733 dfh,
2734 deltacomputer=deltacomputer,
2734 deltacomputer=deltacomputer,
2735 )
2735 )
2736 finally:
2736 finally:
2737 if dfh:
2737 if dfh:
2738 dfh.close()
2738 dfh.close()
2739 ifh.close()
2739 ifh.close()
2740
2740
2741 if addrevisioncb:
2741 if addrevisioncb:
2742 addrevisioncb(self, rev, node)
2742 addrevisioncb(self, rev, node)
2743
2743
2744 def censorrevision(self, tr, censornode, tombstone=b''):
2744 def censorrevision(self, tr, censornode, tombstone=b''):
2745 if (self.version & 0xFFFF) == REVLOGV0:
2745 if (self.version & 0xFFFF) == REVLOGV0:
2746 raise error.RevlogError(
2746 raise error.RevlogError(
2747 _(b'cannot censor with version %d revlogs') % self.version
2747 _(b'cannot censor with version %d revlogs') % self.version
2748 )
2748 )
2749
2749
2750 censorrev = self.rev(censornode)
2750 censorrev = self.rev(censornode)
2751 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2751 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2752
2752
2753 if len(tombstone) > self.rawsize(censorrev):
2753 if len(tombstone) > self.rawsize(censorrev):
2754 raise error.Abort(
2754 raise error.Abort(
2755 _(b'censor tombstone must be no longer than censored data')
2755 _(b'censor tombstone must be no longer than censored data')
2756 )
2756 )
2757
2757
2758 # Rewriting the revlog in place is hard. Our strategy for censoring is
2758 # Rewriting the revlog in place is hard. Our strategy for censoring is
2759 # to create a new revlog, copy all revisions to it, then replace the
2759 # to create a new revlog, copy all revisions to it, then replace the
2760 # revlogs on transaction close.
2760 # revlogs on transaction close.
2761
2761
2762 newindexfile = self.indexfile + b'.tmpcensored'
2762 newindexfile = self.indexfile + b'.tmpcensored'
2763 newdatafile = self.datafile + b'.tmpcensored'
2763 newdatafile = self.datafile + b'.tmpcensored'
2764
2764
2765 # This is a bit dangerous. We could easily have a mismatch of state.
2765 # This is a bit dangerous. We could easily have a mismatch of state.
2766 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2766 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2767 newrl.version = self.version
2767 newrl.version = self.version
2768 newrl._generaldelta = self._generaldelta
2768 newrl._generaldelta = self._generaldelta
2769 newrl._io = self._io
2769 newrl._io = self._io
2770
2770
2771 for rev in self.revs():
2771 for rev in self.revs():
2772 node = self.node(rev)
2772 node = self.node(rev)
2773 p1, p2 = self.parents(node)
2773 p1, p2 = self.parents(node)
2774
2774
2775 if rev == censorrev:
2775 if rev == censorrev:
2776 newrl.addrawrevision(
2776 newrl.addrawrevision(
2777 tombstone,
2777 tombstone,
2778 tr,
2778 tr,
2779 self.linkrev(censorrev),
2779 self.linkrev(censorrev),
2780 p1,
2780 p1,
2781 p2,
2781 p2,
2782 censornode,
2782 censornode,
2783 REVIDX_ISCENSORED,
2783 REVIDX_ISCENSORED,
2784 )
2784 )
2785
2785
2786 if newrl.deltaparent(rev) != nullrev:
2786 if newrl.deltaparent(rev) != nullrev:
2787 raise error.Abort(
2787 raise error.Abort(
2788 _(
2788 _(
2789 b'censored revision stored as delta; '
2789 b'censored revision stored as delta; '
2790 b'cannot censor'
2790 b'cannot censor'
2791 ),
2791 ),
2792 hint=_(
2792 hint=_(
2793 b'censoring of revlogs is not '
2793 b'censoring of revlogs is not '
2794 b'fully implemented; please report '
2794 b'fully implemented; please report '
2795 b'this bug'
2795 b'this bug'
2796 ),
2796 ),
2797 )
2797 )
2798 continue
2798 continue
2799
2799
2800 if self.iscensored(rev):
2800 if self.iscensored(rev):
2801 if self.deltaparent(rev) != nullrev:
2801 if self.deltaparent(rev) != nullrev:
2802 raise error.Abort(
2802 raise error.Abort(
2803 _(
2803 _(
2804 b'cannot censor due to censored '
2804 b'cannot censor due to censored '
2805 b'revision having delta stored'
2805 b'revision having delta stored'
2806 )
2806 )
2807 )
2807 )
2808 rawtext = self._chunk(rev)
2808 rawtext = self._chunk(rev)
2809 else:
2809 else:
2810 rawtext = self.rawdata(rev)
2810 rawtext = self.rawdata(rev)
2811
2811
2812 newrl.addrawrevision(
2812 newrl.addrawrevision(
2813 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2813 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2814 )
2814 )
2815
2815
2816 tr.addbackup(self.indexfile, location=b'store')
2816 tr.addbackup(self.indexfile, location=b'store')
2817 if not self._inline:
2817 if not self._inline:
2818 tr.addbackup(self.datafile, location=b'store')
2818 tr.addbackup(self.datafile, location=b'store')
2819
2819
2820 self.opener.rename(newrl.indexfile, self.indexfile)
2820 self.opener.rename(newrl.indexfile, self.indexfile)
2821 if not self._inline:
2821 if not self._inline:
2822 self.opener.rename(newrl.datafile, self.datafile)
2822 self.opener.rename(newrl.datafile, self.datafile)
2823
2823
2824 self.clearcaches()
2824 self.clearcaches()
2825 self._loadindex()
2825 self._loadindex()
2826
2826
2827 def verifyintegrity(self, state):
2827 def verifyintegrity(self, state):
2828 """Verifies the integrity of the revlog.
2828 """Verifies the integrity of the revlog.
2829
2829
2830 Yields ``revlogproblem`` instances describing problems that are
2830 Yields ``revlogproblem`` instances describing problems that are
2831 found.
2831 found.
2832 """
2832 """
2833 dd, di = self.checksize()
2833 dd, di = self.checksize()
2834 if dd:
2834 if dd:
2835 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2835 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2836 if di:
2836 if di:
2837 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2837 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2838
2838
2839 version = self.version & 0xFFFF
2839 version = self.version & 0xFFFF
2840
2840
2841 # The verifier tells us what version revlog we should be.
2841 # The verifier tells us what version revlog we should be.
2842 if version != state[b'expectedversion']:
2842 if version != state[b'expectedversion']:
2843 yield revlogproblem(
2843 yield revlogproblem(
2844 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2844 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2845 % (self.indexfile, version, state[b'expectedversion'])
2845 % (self.indexfile, version, state[b'expectedversion'])
2846 )
2846 )
2847
2847
2848 state[b'skipread'] = set()
2848 state[b'skipread'] = set()
2849
2849
2850 for rev in self:
2850 for rev in self:
2851 node = self.node(rev)
2851 node = self.node(rev)
2852
2852
2853 # Verify contents. 4 cases to care about:
2853 # Verify contents. 4 cases to care about:
2854 #
2854 #
2855 # common: the most common case
2855 # common: the most common case
2856 # rename: with a rename
2856 # rename: with a rename
2857 # meta: file content starts with b'\1\n', the metadata
2857 # meta: file content starts with b'\1\n', the metadata
2858 # header defined in filelog.py, but without a rename
2858 # header defined in filelog.py, but without a rename
2859 # ext: content stored externally
2859 # ext: content stored externally
2860 #
2860 #
2861 # More formally, their differences are shown below:
2861 # More formally, their differences are shown below:
2862 #
2862 #
2863 # | common | rename | meta | ext
2863 # | common | rename | meta | ext
2864 # -------------------------------------------------------
2864 # -------------------------------------------------------
2865 # flags() | 0 | 0 | 0 | not 0
2865 # flags() | 0 | 0 | 0 | not 0
2866 # renamed() | False | True | False | ?
2866 # renamed() | False | True | False | ?
2867 # rawtext[0:2]=='\1\n'| False | True | True | ?
2867 # rawtext[0:2]=='\1\n'| False | True | True | ?
2868 #
2868 #
2869 # "rawtext" means the raw text stored in revlog data, which
2869 # "rawtext" means the raw text stored in revlog data, which
2870 # could be retrieved by "rawdata(rev)". "text"
2870 # could be retrieved by "rawdata(rev)". "text"
2871 # mentioned below is "revision(rev)".
2871 # mentioned below is "revision(rev)".
2872 #
2872 #
2873 # There are 3 different lengths stored physically:
2873 # There are 3 different lengths stored physically:
2874 # 1. L1: rawsize, stored in revlog index
2874 # 1. L1: rawsize, stored in revlog index
2875 # 2. L2: len(rawtext), stored in revlog data
2875 # 2. L2: len(rawtext), stored in revlog data
2876 # 3. L3: len(text), stored in revlog data if flags==0, or
2876 # 3. L3: len(text), stored in revlog data if flags==0, or
2877 # possibly somewhere else if flags!=0
2877 # possibly somewhere else if flags!=0
2878 #
2878 #
2879 # L1 should be equal to L2. L3 could be different from them.
2879 # L1 should be equal to L2. L3 could be different from them.
2880 # "text" may or may not affect commit hash depending on flag
2880 # "text" may or may not affect commit hash depending on flag
2881 # processors (see flagutil.addflagprocessor).
2881 # processors (see flagutil.addflagprocessor).
2882 #
2882 #
2883 # | common | rename | meta | ext
2883 # | common | rename | meta | ext
2884 # -------------------------------------------------
2884 # -------------------------------------------------
2885 # rawsize() | L1 | L1 | L1 | L1
2885 # rawsize() | L1 | L1 | L1 | L1
2886 # size() | L1 | L2-LM | L1(*) | L1 (?)
2886 # size() | L1 | L2-LM | L1(*) | L1 (?)
2887 # len(rawtext) | L2 | L2 | L2 | L2
2887 # len(rawtext) | L2 | L2 | L2 | L2
2888 # len(text) | L2 | L2 | L2 | L3
2888 # len(text) | L2 | L2 | L2 | L3
2889 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2889 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2890 #
2890 #
2891 # LM: length of metadata, depending on rawtext
2891 # LM: length of metadata, depending on rawtext
2892 # (*): not ideal, see comment in filelog.size
2892 # (*): not ideal, see comment in filelog.size
2893 # (?): could be "- len(meta)" if the resolved content has
2893 # (?): could be "- len(meta)" if the resolved content has
2894 # rename metadata
2894 # rename metadata
2895 #
2895 #
2896 # Checks needed to be done:
2896 # Checks needed to be done:
2897 # 1. length check: L1 == L2, in all cases.
2897 # 1. length check: L1 == L2, in all cases.
2898 # 2. hash check: depending on flag processor, we may need to
2898 # 2. hash check: depending on flag processor, we may need to
2899 # use either "text" (external), or "rawtext" (in revlog).
2899 # use either "text" (external), or "rawtext" (in revlog).
2900
2900
2901 try:
2901 try:
2902 skipflags = state.get(b'skipflags', 0)
2902 skipflags = state.get(b'skipflags', 0)
2903 if skipflags:
2903 if skipflags:
2904 skipflags &= self.flags(rev)
2904 skipflags &= self.flags(rev)
2905
2905
2906 if skipflags:
2906 if skipflags:
2907 state[b'skipread'].add(node)
2907 state[b'skipread'].add(node)
2908 else:
2908 else:
2909 # Side-effect: read content and verify hash.
2909 # Side-effect: read content and verify hash.
2910 self.revision(node)
2910 self.revision(node)
2911
2911
2912 l1 = self.rawsize(rev)
2912 l1 = self.rawsize(rev)
2913 l2 = len(self.rawdata(node))
2913 l2 = len(self.rawdata(node))
2914
2914
2915 if l1 != l2:
2915 if l1 != l2:
2916 yield revlogproblem(
2916 yield revlogproblem(
2917 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2917 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2918 node=node,
2918 node=node,
2919 )
2919 )
2920
2920
2921 except error.CensoredNodeError:
2921 except error.CensoredNodeError:
2922 if state[b'erroroncensored']:
2922 if state[b'erroroncensored']:
2923 yield revlogproblem(
2923 yield revlogproblem(
2924 error=_(b'censored file data'), node=node
2924 error=_(b'censored file data'), node=node
2925 )
2925 )
2926 state[b'skipread'].add(node)
2926 state[b'skipread'].add(node)
2927 except Exception as e:
2927 except Exception as e:
2928 yield revlogproblem(
2928 yield revlogproblem(
2929 error=_(b'unpacking %s: %s')
2929 error=_(b'unpacking %s: %s')
2930 % (short(node), stringutil.forcebytestr(e)),
2930 % (short(node), stringutil.forcebytestr(e)),
2931 node=node,
2931 node=node,
2932 )
2932 )
2933 state[b'skipread'].add(node)
2933 state[b'skipread'].add(node)
2934
2934
2935 def storageinfo(
2935 def storageinfo(
2936 self,
2936 self,
2937 exclusivefiles=False,
2937 exclusivefiles=False,
2938 sharedfiles=False,
2938 sharedfiles=False,
2939 revisionscount=False,
2939 revisionscount=False,
2940 trackedsize=False,
2940 trackedsize=False,
2941 storedsize=False,
2941 storedsize=False,
2942 ):
2942 ):
2943 d = {}
2943 d = {}
2944
2944
2945 if exclusivefiles:
2945 if exclusivefiles:
2946 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2946 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2947 if not self._inline:
2947 if not self._inline:
2948 d[b'exclusivefiles'].append((self.opener, self.datafile))
2948 d[b'exclusivefiles'].append((self.opener, self.datafile))
2949
2949
2950 if sharedfiles:
2950 if sharedfiles:
2951 d[b'sharedfiles'] = []
2951 d[b'sharedfiles'] = []
2952
2952
2953 if revisionscount:
2953 if revisionscount:
2954 d[b'revisionscount'] = len(self)
2954 d[b'revisionscount'] = len(self)
2955
2955
2956 if trackedsize:
2956 if trackedsize:
2957 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2957 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2958
2958
2959 if storedsize:
2959 if storedsize:
2960 d[b'storedsize'] = sum(
2960 d[b'storedsize'] = sum(
2961 self.opener.stat(path).st_size for path in self.files()
2961 self.opener.stat(path).st_size for path in self.files()
2962 )
2962 )
2963
2963
2964 return d
2964 return d
General Comments 0
You need to be logged in to leave comments. Login now