##// END OF EJS Templates
revlog: avoid hard-coded hash sizes...
Joerg Sonnenberger -
r45596:cc257292 default
parent child Browse files
Show More
@@ -1,3068 +1,3068 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 nodemap as nodemaputil,
78 nodemap as nodemaputil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109 rustrevlog = policy.importrust('revlog')
109 rustrevlog = policy.importrust('revlog')
110
110
111 # Aliased for performance.
111 # Aliased for performance.
112 _zlibdecompress = zlib.decompress
112 _zlibdecompress = zlib.decompress
113
113
114 # max size of revlog with inline data
114 # max size of revlog with inline data
115 _maxinline = 131072
115 _maxinline = 131072
116 _chunksize = 1048576
116 _chunksize = 1048576
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False, {}
120 return text, False, {}
121
121
122
122
123 def ellipsiswriteprocessor(rl, text, sidedata):
123 def ellipsiswriteprocessor(rl, text, sidedata):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsisrawprocessor(rl, text):
127 def ellipsisrawprocessor(rl, text):
128 return False
128 return False
129
129
130
130
131 ellipsisprocessor = (
131 ellipsisprocessor = (
132 ellipsisreadprocessor,
132 ellipsisreadprocessor,
133 ellipsiswriteprocessor,
133 ellipsiswriteprocessor,
134 ellipsisrawprocessor,
134 ellipsisrawprocessor,
135 )
135 )
136
136
137
137
138 def getoffset(q):
138 def getoffset(q):
139 return int(q >> 16)
139 return int(q >> 16)
140
140
141
141
142 def gettype(q):
142 def gettype(q):
143 return int(q & 0xFFFF)
143 return int(q & 0xFFFF)
144
144
145
145
146 def offset_type(offset, type):
146 def offset_type(offset, type):
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 raise ValueError(b'unknown revlog index flags')
148 raise ValueError(b'unknown revlog index flags')
149 return int(int(offset) << 16 | type)
149 return int(int(offset) << 16 | type)
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 @attr.s(slots=True, frozen=True)
162 @attr.s(slots=True, frozen=True)
163 class _revisioninfo(object):
163 class _revisioninfo(object):
164 """Information about a revision that allows building its fulltext
164 """Information about a revision that allows building its fulltext
165 node: expected hash of the revision
165 node: expected hash of the revision
166 p1, p2: parent revs of the revision
166 p1, p2: parent revs of the revision
167 btext: built text cache consisting of a one-element list
167 btext: built text cache consisting of a one-element list
168 cachedelta: (baserev, uncompressed_delta) or None
168 cachedelta: (baserev, uncompressed_delta) or None
169 flags: flags associated to the revision storage
169 flags: flags associated to the revision storage
170
170
171 One of btext[0] or cachedelta must be set.
171 One of btext[0] or cachedelta must be set.
172 """
172 """
173
173
174 node = attr.ib()
174 node = attr.ib()
175 p1 = attr.ib()
175 p1 = attr.ib()
176 p2 = attr.ib()
176 p2 = attr.ib()
177 btext = attr.ib()
177 btext = attr.ib()
178 textlen = attr.ib()
178 textlen = attr.ib()
179 cachedelta = attr.ib()
179 cachedelta = attr.ib()
180 flags = attr.ib()
180 flags = attr.ib()
181
181
182
182
183 @interfaceutil.implementer(repository.irevisiondelta)
183 @interfaceutil.implementer(repository.irevisiondelta)
184 @attr.s(slots=True)
184 @attr.s(slots=True)
185 class revlogrevisiondelta(object):
185 class revlogrevisiondelta(object):
186 node = attr.ib()
186 node = attr.ib()
187 p1node = attr.ib()
187 p1node = attr.ib()
188 p2node = attr.ib()
188 p2node = attr.ib()
189 basenode = attr.ib()
189 basenode = attr.ib()
190 flags = attr.ib()
190 flags = attr.ib()
191 baserevisionsize = attr.ib()
191 baserevisionsize = attr.ib()
192 revision = attr.ib()
192 revision = attr.ib()
193 delta = attr.ib()
193 delta = attr.ib()
194 linknode = attr.ib(default=None)
194 linknode = attr.ib(default=None)
195
195
196
196
197 @interfaceutil.implementer(repository.iverifyproblem)
197 @interfaceutil.implementer(repository.iverifyproblem)
198 @attr.s(frozen=True)
198 @attr.s(frozen=True)
199 class revlogproblem(object):
199 class revlogproblem(object):
200 warning = attr.ib(default=None)
200 warning = attr.ib(default=None)
201 error = attr.ib(default=None)
201 error = attr.ib(default=None)
202 node = attr.ib(default=None)
202 node = attr.ib(default=None)
203
203
204
204
205 # index v0:
205 # index v0:
206 # 4 bytes: offset
206 # 4 bytes: offset
207 # 4 bytes: compressed length
207 # 4 bytes: compressed length
208 # 4 bytes: base rev
208 # 4 bytes: base rev
209 # 4 bytes: link rev
209 # 4 bytes: link rev
210 # 20 bytes: parent 1 nodeid
210 # 20 bytes: parent 1 nodeid
211 # 20 bytes: parent 2 nodeid
211 # 20 bytes: parent 2 nodeid
212 # 20 bytes: nodeid
212 # 20 bytes: nodeid
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 indexformatv0_pack = indexformatv0.pack
214 indexformatv0_pack = indexformatv0.pack
215 indexformatv0_unpack = indexformatv0.unpack
215 indexformatv0_unpack = indexformatv0.unpack
216
216
217
217
218 class revlogoldindex(list):
218 class revlogoldindex(list):
219 @property
219 @property
220 def nodemap(self):
220 def nodemap(self):
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 return self._nodemap
223 return self._nodemap
224
224
225 @util.propertycache
225 @util.propertycache
226 def _nodemap(self):
226 def _nodemap(self):
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 for r in range(0, len(self)):
228 for r in range(0, len(self)):
229 n = self[r][7]
229 n = self[r][7]
230 nodemap[n] = r
230 nodemap[n] = r
231 return nodemap
231 return nodemap
232
232
233 def has_node(self, node):
233 def has_node(self, node):
234 """return True if the node exist in the index"""
234 """return True if the node exist in the index"""
235 return node in self._nodemap
235 return node in self._nodemap
236
236
237 def rev(self, node):
237 def rev(self, node):
238 """return a revision for a node
238 """return a revision for a node
239
239
240 If the node is unknown, raise a RevlogError"""
240 If the node is unknown, raise a RevlogError"""
241 return self._nodemap[node]
241 return self._nodemap[node]
242
242
243 def get_rev(self, node):
243 def get_rev(self, node):
244 """return a revision for a node
244 """return a revision for a node
245
245
246 If the node is unknown, return None"""
246 If the node is unknown, return None"""
247 return self._nodemap.get(node)
247 return self._nodemap.get(node)
248
248
249 def append(self, tup):
249 def append(self, tup):
250 self._nodemap[tup[7]] = len(self)
250 self._nodemap[tup[7]] = len(self)
251 super(revlogoldindex, self).append(tup)
251 super(revlogoldindex, self).append(tup)
252
252
253 def __delitem__(self, i):
253 def __delitem__(self, i):
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 for r in pycompat.xrange(i.start, len(self)):
256 for r in pycompat.xrange(i.start, len(self)):
257 del self._nodemap[self[r][7]]
257 del self._nodemap[self[r][7]]
258 super(revlogoldindex, self).__delitem__(i)
258 super(revlogoldindex, self).__delitem__(i)
259
259
260 def clearcaches(self):
260 def clearcaches(self):
261 self.__dict__.pop('_nodemap', None)
261 self.__dict__.pop('_nodemap', None)
262
262
263 def __getitem__(self, i):
263 def __getitem__(self, i):
264 if i == -1:
264 if i == -1:
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 return list.__getitem__(self, i)
266 return list.__getitem__(self, i)
267
267
268
268
269 class revlogoldio(object):
269 class revlogoldio(object):
270 def __init__(self):
270 def __init__(self):
271 self.size = indexformatv0.size
271 self.size = indexformatv0.size
272
272
273 def parseindex(self, data, inline):
273 def parseindex(self, data, inline):
274 s = self.size
274 s = self.size
275 index = []
275 index = []
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 n = off = 0
277 n = off = 0
278 l = len(data)
278 l = len(data)
279 while off + s <= l:
279 while off + s <= l:
280 cur = data[off : off + s]
280 cur = data[off : off + s]
281 off += s
281 off += s
282 e = indexformatv0_unpack(cur)
282 e = indexformatv0_unpack(cur)
283 # transform to revlogv1 format
283 # transform to revlogv1 format
284 e2 = (
284 e2 = (
285 offset_type(e[0], 0),
285 offset_type(e[0], 0),
286 e[1],
286 e[1],
287 -1,
287 -1,
288 e[2],
288 e[2],
289 e[3],
289 e[3],
290 nodemap.get(e[4], nullrev),
290 nodemap.get(e[4], nullrev),
291 nodemap.get(e[5], nullrev),
291 nodemap.get(e[5], nullrev),
292 e[6],
292 e[6],
293 )
293 )
294 index.append(e2)
294 index.append(e2)
295 nodemap[e[6]] = n
295 nodemap[e[6]] = n
296 n += 1
296 n += 1
297
297
298 index = revlogoldindex(index)
298 index = revlogoldindex(index)
299 return index, None
299 return index, None
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 if gettype(entry[0]):
302 if gettype(entry[0]):
303 raise error.RevlogError(
303 raise error.RevlogError(
304 _(b'index entry flags need revlog version 1')
304 _(b'index entry flags need revlog version 1')
305 )
305 )
306 e2 = (
306 e2 = (
307 getoffset(entry[0]),
307 getoffset(entry[0]),
308 entry[1],
308 entry[1],
309 entry[3],
309 entry[3],
310 entry[4],
310 entry[4],
311 node(entry[5]),
311 node(entry[5]),
312 node(entry[6]),
312 node(entry[6]),
313 entry[7],
313 entry[7],
314 )
314 )
315 return indexformatv0_pack(*e2)
315 return indexformatv0_pack(*e2)
316
316
317
317
318 # index ng:
318 # index ng:
319 # 6 bytes: offset
319 # 6 bytes: offset
320 # 2 bytes: flags
320 # 2 bytes: flags
321 # 4 bytes: compressed length
321 # 4 bytes: compressed length
322 # 4 bytes: uncompressed length
322 # 4 bytes: uncompressed length
323 # 4 bytes: base rev
323 # 4 bytes: base rev
324 # 4 bytes: link rev
324 # 4 bytes: link rev
325 # 4 bytes: parent 1 rev
325 # 4 bytes: parent 1 rev
326 # 4 bytes: parent 2 rev
326 # 4 bytes: parent 2 rev
327 # 32 bytes: nodeid
327 # 32 bytes: nodeid
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 indexformatng_pack = indexformatng.pack
329 indexformatng_pack = indexformatng.pack
330 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
333
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
335 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
337
337
338
338
339 class revlogio(object):
339 class revlogio(object):
340 def __init__(self):
340 def __init__(self):
341 self.size = indexformatng.size
341 self.size = indexformatng.size
342
342
343 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
346 return index, cache
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 p = indexformatng_pack(*entry)
349 p = indexformatng_pack(*entry)
350 if rev == 0:
350 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
352 return p
352 return p
353
353
354
354
355 NodemapRevlogIO = None
355 NodemapRevlogIO = None
356
356
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358
358
359 class NodemapRevlogIO(revlogio):
359 class NodemapRevlogIO(revlogio):
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361
361
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 """
363 """
364
364
365 def parseindex(self, data, inline):
365 def parseindex(self, data, inline):
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 return index, cache
367 return index, cache
368
368
369
369
370 class rustrevlogio(revlogio):
370 class rustrevlogio(revlogio):
371 def parseindex(self, data, inline):
371 def parseindex(self, data, inline):
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 return rustrevlog.MixedIndex(index), cache
373 return rustrevlog.MixedIndex(index), cache
374
374
375
375
376 class revlog(object):
376 class revlog(object):
377 """
377 """
378 the underlying revision storage object
378 the underlying revision storage object
379
379
380 A revlog consists of two parts, an index and the revision data.
380 A revlog consists of two parts, an index and the revision data.
381
381
382 The index is a file with a fixed record size containing
382 The index is a file with a fixed record size containing
383 information on each revision, including its nodeid (hash), the
383 information on each revision, including its nodeid (hash), the
384 nodeids of its parents, the position and offset of its data within
384 nodeids of its parents, the position and offset of its data within
385 the data file, and the revision it's based on. Finally, each entry
385 the data file, and the revision it's based on. Finally, each entry
386 contains a linkrev entry that can serve as a pointer to external
386 contains a linkrev entry that can serve as a pointer to external
387 data.
387 data.
388
388
389 The revision data itself is a linear collection of data chunks.
389 The revision data itself is a linear collection of data chunks.
390 Each chunk represents a revision and is usually represented as a
390 Each chunk represents a revision and is usually represented as a
391 delta against the previous chunk. To bound lookup time, runs of
391 delta against the previous chunk. To bound lookup time, runs of
392 deltas are limited to about 2 times the length of the original
392 deltas are limited to about 2 times the length of the original
393 version data. This makes retrieval of a version proportional to
393 version data. This makes retrieval of a version proportional to
394 its size, or O(1) relative to the number of revisions.
394 its size, or O(1) relative to the number of revisions.
395
395
396 Both pieces of the revlog are written to in an append-only
396 Both pieces of the revlog are written to in an append-only
397 fashion, which means we never need to rewrite a file to insert or
397 fashion, which means we never need to rewrite a file to insert or
398 remove data, and can use some simple techniques to avoid the need
398 remove data, and can use some simple techniques to avoid the need
399 for locking while reading.
399 for locking while reading.
400
400
401 If checkambig, indexfile is opened with checkambig=True at
401 If checkambig, indexfile is opened with checkambig=True at
402 writing, to avoid file stat ambiguity.
402 writing, to avoid file stat ambiguity.
403
403
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 index will be mmapped rather than read if it is larger than the
405 index will be mmapped rather than read if it is larger than the
406 configured threshold.
406 configured threshold.
407
407
408 If censorable is True, the revlog can have censored revisions.
408 If censorable is True, the revlog can have censored revisions.
409
409
410 If `upperboundcomp` is not None, this is the expected maximal gain from
410 If `upperboundcomp` is not None, this is the expected maximal gain from
411 compression for the data content.
411 compression for the data content.
412 """
412 """
413
413
414 _flagserrorclass = error.RevlogError
414 _flagserrorclass = error.RevlogError
415
415
416 def __init__(
416 def __init__(
417 self,
417 self,
418 opener,
418 opener,
419 indexfile,
419 indexfile,
420 datafile=None,
420 datafile=None,
421 checkambig=False,
421 checkambig=False,
422 mmaplargeindex=False,
422 mmaplargeindex=False,
423 censorable=False,
423 censorable=False,
424 upperboundcomp=None,
424 upperboundcomp=None,
425 persistentnodemap=False,
425 persistentnodemap=False,
426 ):
426 ):
427 """
427 """
428 create a revlog object
428 create a revlog object
429
429
430 opener is a function that abstracts the file opening operation
430 opener is a function that abstracts the file opening operation
431 and can be used to implement COW semantics or the like.
431 and can be used to implement COW semantics or the like.
432
432
433 """
433 """
434 self.upperboundcomp = upperboundcomp
434 self.upperboundcomp = upperboundcomp
435 self.indexfile = indexfile
435 self.indexfile = indexfile
436 self.datafile = datafile or (indexfile[:-2] + b".d")
436 self.datafile = datafile or (indexfile[:-2] + b".d")
437 self.nodemap_file = None
437 self.nodemap_file = None
438 if persistentnodemap:
438 if persistentnodemap:
439 if indexfile.endswith(b'.a'):
439 if indexfile.endswith(b'.a'):
440 pending_path = indexfile[:-4] + b".n.a"
440 pending_path = indexfile[:-4] + b".n.a"
441 if opener.exists(pending_path):
441 if opener.exists(pending_path):
442 self.nodemap_file = pending_path
442 self.nodemap_file = pending_path
443 else:
443 else:
444 self.nodemap_file = indexfile[:-4] + b".n"
444 self.nodemap_file = indexfile[:-4] + b".n"
445 else:
445 else:
446 self.nodemap_file = indexfile[:-2] + b".n"
446 self.nodemap_file = indexfile[:-2] + b".n"
447
447
448 self.opener = opener
448 self.opener = opener
449 # When True, indexfile is opened with checkambig=True at writing, to
449 # When True, indexfile is opened with checkambig=True at writing, to
450 # avoid file stat ambiguity.
450 # avoid file stat ambiguity.
451 self._checkambig = checkambig
451 self._checkambig = checkambig
452 self._mmaplargeindex = mmaplargeindex
452 self._mmaplargeindex = mmaplargeindex
453 self._censorable = censorable
453 self._censorable = censorable
454 # 3-tuple of (node, rev, text) for a raw revision.
454 # 3-tuple of (node, rev, text) for a raw revision.
455 self._revisioncache = None
455 self._revisioncache = None
456 # Maps rev to chain base rev.
456 # Maps rev to chain base rev.
457 self._chainbasecache = util.lrucachedict(100)
457 self._chainbasecache = util.lrucachedict(100)
458 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
458 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
459 self._chunkcache = (0, b'')
459 self._chunkcache = (0, b'')
460 # How much data to read and cache into the raw revlog data cache.
460 # How much data to read and cache into the raw revlog data cache.
461 self._chunkcachesize = 65536
461 self._chunkcachesize = 65536
462 self._maxchainlen = None
462 self._maxchainlen = None
463 self._deltabothparents = True
463 self._deltabothparents = True
464 self.index = None
464 self.index = None
465 self._nodemap_docket = None
465 self._nodemap_docket = None
466 # Mapping of partial identifiers to full nodes.
466 # Mapping of partial identifiers to full nodes.
467 self._pcache = {}
467 self._pcache = {}
468 # Mapping of revision integer to full node.
468 # Mapping of revision integer to full node.
469 self._compengine = b'zlib'
469 self._compengine = b'zlib'
470 self._compengineopts = {}
470 self._compengineopts = {}
471 self._maxdeltachainspan = -1
471 self._maxdeltachainspan = -1
472 self._withsparseread = False
472 self._withsparseread = False
473 self._sparserevlog = False
473 self._sparserevlog = False
474 self._srdensitythreshold = 0.50
474 self._srdensitythreshold = 0.50
475 self._srmingapsize = 262144
475 self._srmingapsize = 262144
476
476
477 # Make copy of flag processors so each revlog instance can support
477 # Make copy of flag processors so each revlog instance can support
478 # custom flags.
478 # custom flags.
479 self._flagprocessors = dict(flagutil.flagprocessors)
479 self._flagprocessors = dict(flagutil.flagprocessors)
480
480
481 # 2-tuple of file handles being used for active writing.
481 # 2-tuple of file handles being used for active writing.
482 self._writinghandles = None
482 self._writinghandles = None
483
483
484 self._loadindex()
484 self._loadindex()
485
485
486 def _loadindex(self):
486 def _loadindex(self):
487 mmapindexthreshold = None
487 mmapindexthreshold = None
488 opts = self.opener.options
488 opts = self.opener.options
489
489
490 if b'revlogv2' in opts:
490 if b'revlogv2' in opts:
491 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
491 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
492 elif b'revlogv1' in opts:
492 elif b'revlogv1' in opts:
493 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
493 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
494 if b'generaldelta' in opts:
494 if b'generaldelta' in opts:
495 newversionflags |= FLAG_GENERALDELTA
495 newversionflags |= FLAG_GENERALDELTA
496 elif b'revlogv0' in self.opener.options:
496 elif b'revlogv0' in self.opener.options:
497 newversionflags = REVLOGV0
497 newversionflags = REVLOGV0
498 else:
498 else:
499 newversionflags = REVLOG_DEFAULT_VERSION
499 newversionflags = REVLOG_DEFAULT_VERSION
500
500
501 if b'chunkcachesize' in opts:
501 if b'chunkcachesize' in opts:
502 self._chunkcachesize = opts[b'chunkcachesize']
502 self._chunkcachesize = opts[b'chunkcachesize']
503 if b'maxchainlen' in opts:
503 if b'maxchainlen' in opts:
504 self._maxchainlen = opts[b'maxchainlen']
504 self._maxchainlen = opts[b'maxchainlen']
505 if b'deltabothparents' in opts:
505 if b'deltabothparents' in opts:
506 self._deltabothparents = opts[b'deltabothparents']
506 self._deltabothparents = opts[b'deltabothparents']
507 self._lazydelta = bool(opts.get(b'lazydelta', True))
507 self._lazydelta = bool(opts.get(b'lazydelta', True))
508 self._lazydeltabase = False
508 self._lazydeltabase = False
509 if self._lazydelta:
509 if self._lazydelta:
510 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
510 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
511 if b'compengine' in opts:
511 if b'compengine' in opts:
512 self._compengine = opts[b'compengine']
512 self._compengine = opts[b'compengine']
513 if b'zlib.level' in opts:
513 if b'zlib.level' in opts:
514 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
514 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
515 if b'zstd.level' in opts:
515 if b'zstd.level' in opts:
516 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
516 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
517 if b'maxdeltachainspan' in opts:
517 if b'maxdeltachainspan' in opts:
518 self._maxdeltachainspan = opts[b'maxdeltachainspan']
518 self._maxdeltachainspan = opts[b'maxdeltachainspan']
519 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
519 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
520 mmapindexthreshold = opts[b'mmapindexthreshold']
520 mmapindexthreshold = opts[b'mmapindexthreshold']
521 self.hassidedata = bool(opts.get(b'side-data', False))
521 self.hassidedata = bool(opts.get(b'side-data', False))
522 if self.hassidedata:
522 if self.hassidedata:
523 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
523 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
524 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
524 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
525 withsparseread = bool(opts.get(b'with-sparse-read', False))
525 withsparseread = bool(opts.get(b'with-sparse-read', False))
526 # sparse-revlog forces sparse-read
526 # sparse-revlog forces sparse-read
527 self._withsparseread = self._sparserevlog or withsparseread
527 self._withsparseread = self._sparserevlog or withsparseread
528 if b'sparse-read-density-threshold' in opts:
528 if b'sparse-read-density-threshold' in opts:
529 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
529 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
530 if b'sparse-read-min-gap-size' in opts:
530 if b'sparse-read-min-gap-size' in opts:
531 self._srmingapsize = opts[b'sparse-read-min-gap-size']
531 self._srmingapsize = opts[b'sparse-read-min-gap-size']
532 if opts.get(b'enableellipsis'):
532 if opts.get(b'enableellipsis'):
533 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
533 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
534
534
535 # revlog v0 doesn't have flag processors
535 # revlog v0 doesn't have flag processors
536 for flag, processor in pycompat.iteritems(
536 for flag, processor in pycompat.iteritems(
537 opts.get(b'flagprocessors', {})
537 opts.get(b'flagprocessors', {})
538 ):
538 ):
539 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
539 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
540
540
541 if self._chunkcachesize <= 0:
541 if self._chunkcachesize <= 0:
542 raise error.RevlogError(
542 raise error.RevlogError(
543 _(b'revlog chunk cache size %r is not greater than 0')
543 _(b'revlog chunk cache size %r is not greater than 0')
544 % self._chunkcachesize
544 % self._chunkcachesize
545 )
545 )
546 elif self._chunkcachesize & (self._chunkcachesize - 1):
546 elif self._chunkcachesize & (self._chunkcachesize - 1):
547 raise error.RevlogError(
547 raise error.RevlogError(
548 _(b'revlog chunk cache size %r is not a power of 2')
548 _(b'revlog chunk cache size %r is not a power of 2')
549 % self._chunkcachesize
549 % self._chunkcachesize
550 )
550 )
551
551
552 indexdata = b''
552 indexdata = b''
553 self._initempty = True
553 self._initempty = True
554 try:
554 try:
555 with self._indexfp() as f:
555 with self._indexfp() as f:
556 if (
556 if (
557 mmapindexthreshold is not None
557 mmapindexthreshold is not None
558 and self.opener.fstat(f).st_size >= mmapindexthreshold
558 and self.opener.fstat(f).st_size >= mmapindexthreshold
559 ):
559 ):
560 # TODO: should .close() to release resources without
560 # TODO: should .close() to release resources without
561 # relying on Python GC
561 # relying on Python GC
562 indexdata = util.buffer(util.mmapread(f))
562 indexdata = util.buffer(util.mmapread(f))
563 else:
563 else:
564 indexdata = f.read()
564 indexdata = f.read()
565 if len(indexdata) > 0:
565 if len(indexdata) > 0:
566 versionflags = versionformat_unpack(indexdata[:4])[0]
566 versionflags = versionformat_unpack(indexdata[:4])[0]
567 self._initempty = False
567 self._initempty = False
568 else:
568 else:
569 versionflags = newversionflags
569 versionflags = newversionflags
570 except IOError as inst:
570 except IOError as inst:
571 if inst.errno != errno.ENOENT:
571 if inst.errno != errno.ENOENT:
572 raise
572 raise
573
573
574 versionflags = newversionflags
574 versionflags = newversionflags
575
575
576 self.version = versionflags
576 self.version = versionflags
577
577
578 flags = versionflags & ~0xFFFF
578 flags = versionflags & ~0xFFFF
579 fmt = versionflags & 0xFFFF
579 fmt = versionflags & 0xFFFF
580
580
581 if fmt == REVLOGV0:
581 if fmt == REVLOGV0:
582 if flags:
582 if flags:
583 raise error.RevlogError(
583 raise error.RevlogError(
584 _(b'unknown flags (%#04x) in version %d revlog %s')
584 _(b'unknown flags (%#04x) in version %d revlog %s')
585 % (flags >> 16, fmt, self.indexfile)
585 % (flags >> 16, fmt, self.indexfile)
586 )
586 )
587
587
588 self._inline = False
588 self._inline = False
589 self._generaldelta = False
589 self._generaldelta = False
590
590
591 elif fmt == REVLOGV1:
591 elif fmt == REVLOGV1:
592 if flags & ~REVLOGV1_FLAGS:
592 if flags & ~REVLOGV1_FLAGS:
593 raise error.RevlogError(
593 raise error.RevlogError(
594 _(b'unknown flags (%#04x) in version %d revlog %s')
594 _(b'unknown flags (%#04x) in version %d revlog %s')
595 % (flags >> 16, fmt, self.indexfile)
595 % (flags >> 16, fmt, self.indexfile)
596 )
596 )
597
597
598 self._inline = versionflags & FLAG_INLINE_DATA
598 self._inline = versionflags & FLAG_INLINE_DATA
599 self._generaldelta = versionflags & FLAG_GENERALDELTA
599 self._generaldelta = versionflags & FLAG_GENERALDELTA
600
600
601 elif fmt == REVLOGV2:
601 elif fmt == REVLOGV2:
602 if flags & ~REVLOGV2_FLAGS:
602 if flags & ~REVLOGV2_FLAGS:
603 raise error.RevlogError(
603 raise error.RevlogError(
604 _(b'unknown flags (%#04x) in version %d revlog %s')
604 _(b'unknown flags (%#04x) in version %d revlog %s')
605 % (flags >> 16, fmt, self.indexfile)
605 % (flags >> 16, fmt, self.indexfile)
606 )
606 )
607
607
608 self._inline = versionflags & FLAG_INLINE_DATA
608 self._inline = versionflags & FLAG_INLINE_DATA
609 # generaldelta implied by version 2 revlogs.
609 # generaldelta implied by version 2 revlogs.
610 self._generaldelta = True
610 self._generaldelta = True
611
611
612 else:
612 else:
613 raise error.RevlogError(
613 raise error.RevlogError(
614 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
614 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
615 )
615 )
616 # sparse-revlog can't be on without general-delta (issue6056)
616 # sparse-revlog can't be on without general-delta (issue6056)
617 if not self._generaldelta:
617 if not self._generaldelta:
618 self._sparserevlog = False
618 self._sparserevlog = False
619
619
620 self._storedeltachains = True
620 self._storedeltachains = True
621
621
622 devel_nodemap = (
622 devel_nodemap = (
623 self.nodemap_file
623 self.nodemap_file
624 and opts.get(b'devel-force-nodemap', False)
624 and opts.get(b'devel-force-nodemap', False)
625 and NodemapRevlogIO is not None
625 and NodemapRevlogIO is not None
626 )
626 )
627
627
628 use_rust_index = False
628 use_rust_index = False
629 if rustrevlog is not None:
629 if rustrevlog is not None:
630 if self.nodemap_file is not None:
630 if self.nodemap_file is not None:
631 use_rust_index = True
631 use_rust_index = True
632 else:
632 else:
633 use_rust_index = self.opener.options.get(b'rust.index')
633 use_rust_index = self.opener.options.get(b'rust.index')
634
634
635 self._io = revlogio()
635 self._io = revlogio()
636 if self.version == REVLOGV0:
636 if self.version == REVLOGV0:
637 self._io = revlogoldio()
637 self._io = revlogoldio()
638 elif devel_nodemap:
638 elif devel_nodemap:
639 self._io = NodemapRevlogIO()
639 self._io = NodemapRevlogIO()
640 elif use_rust_index:
640 elif use_rust_index:
641 self._io = rustrevlogio()
641 self._io = rustrevlogio()
642 try:
642 try:
643 d = self._io.parseindex(indexdata, self._inline)
643 d = self._io.parseindex(indexdata, self._inline)
644 index, _chunkcache = d
644 index, _chunkcache = d
645 use_nodemap = (
645 use_nodemap = (
646 not self._inline
646 not self._inline
647 and self.nodemap_file is not None
647 and self.nodemap_file is not None
648 and util.safehasattr(index, 'update_nodemap_data')
648 and util.safehasattr(index, 'update_nodemap_data')
649 )
649 )
650 if use_nodemap:
650 if use_nodemap:
651 nodemap_data = nodemaputil.persisted_data(self)
651 nodemap_data = nodemaputil.persisted_data(self)
652 if nodemap_data is not None:
652 if nodemap_data is not None:
653 docket = nodemap_data[0]
653 docket = nodemap_data[0]
654 if (
654 if (
655 len(d[0]) > docket.tip_rev
655 len(d[0]) > docket.tip_rev
656 and d[0][docket.tip_rev][7] == docket.tip_node
656 and d[0][docket.tip_rev][7] == docket.tip_node
657 ):
657 ):
658 # no changelog tampering
658 # no changelog tampering
659 self._nodemap_docket = docket
659 self._nodemap_docket = docket
660 index.update_nodemap_data(*nodemap_data)
660 index.update_nodemap_data(*nodemap_data)
661 except (ValueError, IndexError):
661 except (ValueError, IndexError):
662 raise error.RevlogError(
662 raise error.RevlogError(
663 _(b"index %s is corrupted") % self.indexfile
663 _(b"index %s is corrupted") % self.indexfile
664 )
664 )
665 self.index, self._chunkcache = d
665 self.index, self._chunkcache = d
666 if not self._chunkcache:
666 if not self._chunkcache:
667 self._chunkclear()
667 self._chunkclear()
668 # revnum -> (chain-length, sum-delta-length)
668 # revnum -> (chain-length, sum-delta-length)
669 self._chaininfocache = {}
669 self._chaininfocache = {}
670 # revlog header -> revlog compressor
670 # revlog header -> revlog compressor
671 self._decompressors = {}
671 self._decompressors = {}
672
672
673 @util.propertycache
673 @util.propertycache
674 def _compressor(self):
674 def _compressor(self):
675 engine = util.compengines[self._compengine]
675 engine = util.compengines[self._compengine]
676 return engine.revlogcompressor(self._compengineopts)
676 return engine.revlogcompressor(self._compengineopts)
677
677
678 def _indexfp(self, mode=b'r'):
678 def _indexfp(self, mode=b'r'):
679 """file object for the revlog's index file"""
679 """file object for the revlog's index file"""
680 args = {'mode': mode}
680 args = {'mode': mode}
681 if mode != b'r':
681 if mode != b'r':
682 args['checkambig'] = self._checkambig
682 args['checkambig'] = self._checkambig
683 if mode == b'w':
683 if mode == b'w':
684 args['atomictemp'] = True
684 args['atomictemp'] = True
685 return self.opener(self.indexfile, **args)
685 return self.opener(self.indexfile, **args)
686
686
687 def _datafp(self, mode=b'r'):
687 def _datafp(self, mode=b'r'):
688 """file object for the revlog's data file"""
688 """file object for the revlog's data file"""
689 return self.opener(self.datafile, mode=mode)
689 return self.opener(self.datafile, mode=mode)
690
690
691 @contextlib.contextmanager
691 @contextlib.contextmanager
692 def _datareadfp(self, existingfp=None):
692 def _datareadfp(self, existingfp=None):
693 """file object suitable to read data"""
693 """file object suitable to read data"""
694 # Use explicit file handle, if given.
694 # Use explicit file handle, if given.
695 if existingfp is not None:
695 if existingfp is not None:
696 yield existingfp
696 yield existingfp
697
697
698 # Use a file handle being actively used for writes, if available.
698 # Use a file handle being actively used for writes, if available.
699 # There is some danger to doing this because reads will seek the
699 # There is some danger to doing this because reads will seek the
700 # file. However, _writeentry() performs a SEEK_END before all writes,
700 # file. However, _writeentry() performs a SEEK_END before all writes,
701 # so we should be safe.
701 # so we should be safe.
702 elif self._writinghandles:
702 elif self._writinghandles:
703 if self._inline:
703 if self._inline:
704 yield self._writinghandles[0]
704 yield self._writinghandles[0]
705 else:
705 else:
706 yield self._writinghandles[1]
706 yield self._writinghandles[1]
707
707
708 # Otherwise open a new file handle.
708 # Otherwise open a new file handle.
709 else:
709 else:
710 if self._inline:
710 if self._inline:
711 func = self._indexfp
711 func = self._indexfp
712 else:
712 else:
713 func = self._datafp
713 func = self._datafp
714 with func() as fp:
714 with func() as fp:
715 yield fp
715 yield fp
716
716
717 def tiprev(self):
717 def tiprev(self):
718 return len(self.index) - 1
718 return len(self.index) - 1
719
719
720 def tip(self):
720 def tip(self):
721 return self.node(self.tiprev())
721 return self.node(self.tiprev())
722
722
723 def __contains__(self, rev):
723 def __contains__(self, rev):
724 return 0 <= rev < len(self)
724 return 0 <= rev < len(self)
725
725
726 def __len__(self):
726 def __len__(self):
727 return len(self.index)
727 return len(self.index)
728
728
729 def __iter__(self):
729 def __iter__(self):
730 return iter(pycompat.xrange(len(self)))
730 return iter(pycompat.xrange(len(self)))
731
731
732 def revs(self, start=0, stop=None):
732 def revs(self, start=0, stop=None):
733 """iterate over all rev in this revlog (from start to stop)"""
733 """iterate over all rev in this revlog (from start to stop)"""
734 return storageutil.iterrevs(len(self), start=start, stop=stop)
734 return storageutil.iterrevs(len(self), start=start, stop=stop)
735
735
736 @property
736 @property
737 def nodemap(self):
737 def nodemap(self):
738 msg = (
738 msg = (
739 b"revlog.nodemap is deprecated, "
739 b"revlog.nodemap is deprecated, "
740 b"use revlog.index.[has_node|rev|get_rev]"
740 b"use revlog.index.[has_node|rev|get_rev]"
741 )
741 )
742 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
742 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
743 return self.index.nodemap
743 return self.index.nodemap
744
744
745 @property
745 @property
746 def _nodecache(self):
746 def _nodecache(self):
747 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
747 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
748 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
748 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
749 return self.index.nodemap
749 return self.index.nodemap
750
750
751 def hasnode(self, node):
751 def hasnode(self, node):
752 try:
752 try:
753 self.rev(node)
753 self.rev(node)
754 return True
754 return True
755 except KeyError:
755 except KeyError:
756 return False
756 return False
757
757
758 def candelta(self, baserev, rev):
758 def candelta(self, baserev, rev):
759 """whether two revisions (baserev, rev) can be delta-ed or not"""
759 """whether two revisions (baserev, rev) can be delta-ed or not"""
760 # Disable delta if either rev requires a content-changing flag
760 # Disable delta if either rev requires a content-changing flag
761 # processor (ex. LFS). This is because such flag processor can alter
761 # processor (ex. LFS). This is because such flag processor can alter
762 # the rawtext content that the delta will be based on, and two clients
762 # the rawtext content that the delta will be based on, and two clients
763 # could have a same revlog node with different flags (i.e. different
763 # could have a same revlog node with different flags (i.e. different
764 # rawtext contents) and the delta could be incompatible.
764 # rawtext contents) and the delta could be incompatible.
765 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
765 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
766 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
766 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
767 ):
767 ):
768 return False
768 return False
769 return True
769 return True
770
770
771 def update_caches(self, transaction):
771 def update_caches(self, transaction):
772 if self.nodemap_file is not None:
772 if self.nodemap_file is not None:
773 if transaction is None:
773 if transaction is None:
774 nodemaputil.update_persistent_nodemap(self)
774 nodemaputil.update_persistent_nodemap(self)
775 else:
775 else:
776 nodemaputil.setup_persistent_nodemap(transaction, self)
776 nodemaputil.setup_persistent_nodemap(transaction, self)
777
777
778 def clearcaches(self):
778 def clearcaches(self):
779 self._revisioncache = None
779 self._revisioncache = None
780 self._chainbasecache.clear()
780 self._chainbasecache.clear()
781 self._chunkcache = (0, b'')
781 self._chunkcache = (0, b'')
782 self._pcache = {}
782 self._pcache = {}
783 self._nodemap_docket = None
783 self._nodemap_docket = None
784 self.index.clearcaches()
784 self.index.clearcaches()
785 # The python code is the one responsible for validating the docket, we
785 # The python code is the one responsible for validating the docket, we
786 # end up having to refresh it here.
786 # end up having to refresh it here.
787 use_nodemap = (
787 use_nodemap = (
788 not self._inline
788 not self._inline
789 and self.nodemap_file is not None
789 and self.nodemap_file is not None
790 and util.safehasattr(self.index, 'update_nodemap_data')
790 and util.safehasattr(self.index, 'update_nodemap_data')
791 )
791 )
792 if use_nodemap:
792 if use_nodemap:
793 nodemap_data = nodemaputil.persisted_data(self)
793 nodemap_data = nodemaputil.persisted_data(self)
794 if nodemap_data is not None:
794 if nodemap_data is not None:
795 self._nodemap_docket = nodemap_data[0]
795 self._nodemap_docket = nodemap_data[0]
796 self.index.update_nodemap_data(*nodemap_data)
796 self.index.update_nodemap_data(*nodemap_data)
797
797
798 def rev(self, node):
798 def rev(self, node):
799 try:
799 try:
800 return self.index.rev(node)
800 return self.index.rev(node)
801 except TypeError:
801 except TypeError:
802 raise
802 raise
803 except error.RevlogError:
803 except error.RevlogError:
804 # parsers.c radix tree lookup failed
804 # parsers.c radix tree lookup failed
805 if node == wdirid or node in wdirfilenodeids:
805 if node == wdirid or node in wdirfilenodeids:
806 raise error.WdirUnsupported
806 raise error.WdirUnsupported
807 raise error.LookupError(node, self.indexfile, _(b'no node'))
807 raise error.LookupError(node, self.indexfile, _(b'no node'))
808
808
809 # Accessors for index entries.
809 # Accessors for index entries.
810
810
811 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
811 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
812 # are flags.
812 # are flags.
813 def start(self, rev):
813 def start(self, rev):
814 return int(self.index[rev][0] >> 16)
814 return int(self.index[rev][0] >> 16)
815
815
816 def flags(self, rev):
816 def flags(self, rev):
817 return self.index[rev][0] & 0xFFFF
817 return self.index[rev][0] & 0xFFFF
818
818
819 def length(self, rev):
819 def length(self, rev):
820 return self.index[rev][1]
820 return self.index[rev][1]
821
821
822 def rawsize(self, rev):
822 def rawsize(self, rev):
823 """return the length of the uncompressed text for a given revision"""
823 """return the length of the uncompressed text for a given revision"""
824 l = self.index[rev][2]
824 l = self.index[rev][2]
825 if l >= 0:
825 if l >= 0:
826 return l
826 return l
827
827
828 t = self.rawdata(rev)
828 t = self.rawdata(rev)
829 return len(t)
829 return len(t)
830
830
831 def size(self, rev):
831 def size(self, rev):
832 """length of non-raw text (processed by a "read" flag processor)"""
832 """length of non-raw text (processed by a "read" flag processor)"""
833 # fast path: if no "read" flag processor could change the content,
833 # fast path: if no "read" flag processor could change the content,
834 # size is rawsize. note: ELLIPSIS is known to not change the content.
834 # size is rawsize. note: ELLIPSIS is known to not change the content.
835 flags = self.flags(rev)
835 flags = self.flags(rev)
836 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
836 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
837 return self.rawsize(rev)
837 return self.rawsize(rev)
838
838
839 return len(self.revision(rev, raw=False))
839 return len(self.revision(rev, raw=False))
840
840
841 def chainbase(self, rev):
841 def chainbase(self, rev):
842 base = self._chainbasecache.get(rev)
842 base = self._chainbasecache.get(rev)
843 if base is not None:
843 if base is not None:
844 return base
844 return base
845
845
846 index = self.index
846 index = self.index
847 iterrev = rev
847 iterrev = rev
848 base = index[iterrev][3]
848 base = index[iterrev][3]
849 while base != iterrev:
849 while base != iterrev:
850 iterrev = base
850 iterrev = base
851 base = index[iterrev][3]
851 base = index[iterrev][3]
852
852
853 self._chainbasecache[rev] = base
853 self._chainbasecache[rev] = base
854 return base
854 return base
855
855
856 def linkrev(self, rev):
856 def linkrev(self, rev):
857 return self.index[rev][4]
857 return self.index[rev][4]
858
858
859 def parentrevs(self, rev):
859 def parentrevs(self, rev):
860 try:
860 try:
861 entry = self.index[rev]
861 entry = self.index[rev]
862 except IndexError:
862 except IndexError:
863 if rev == wdirrev:
863 if rev == wdirrev:
864 raise error.WdirUnsupported
864 raise error.WdirUnsupported
865 raise
865 raise
866
866
867 return entry[5], entry[6]
867 return entry[5], entry[6]
868
868
869 # fast parentrevs(rev) where rev isn't filtered
869 # fast parentrevs(rev) where rev isn't filtered
870 _uncheckedparentrevs = parentrevs
870 _uncheckedparentrevs = parentrevs
871
871
872 def node(self, rev):
872 def node(self, rev):
873 try:
873 try:
874 return self.index[rev][7]
874 return self.index[rev][7]
875 except IndexError:
875 except IndexError:
876 if rev == wdirrev:
876 if rev == wdirrev:
877 raise error.WdirUnsupported
877 raise error.WdirUnsupported
878 raise
878 raise
879
879
880 # Derived from index values.
880 # Derived from index values.
881
881
882 def end(self, rev):
882 def end(self, rev):
883 return self.start(rev) + self.length(rev)
883 return self.start(rev) + self.length(rev)
884
884
885 def parents(self, node):
885 def parents(self, node):
886 i = self.index
886 i = self.index
887 d = i[self.rev(node)]
887 d = i[self.rev(node)]
888 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
888 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
889
889
890 def chainlen(self, rev):
890 def chainlen(self, rev):
891 return self._chaininfo(rev)[0]
891 return self._chaininfo(rev)[0]
892
892
893 def _chaininfo(self, rev):
893 def _chaininfo(self, rev):
894 chaininfocache = self._chaininfocache
894 chaininfocache = self._chaininfocache
895 if rev in chaininfocache:
895 if rev in chaininfocache:
896 return chaininfocache[rev]
896 return chaininfocache[rev]
897 index = self.index
897 index = self.index
898 generaldelta = self._generaldelta
898 generaldelta = self._generaldelta
899 iterrev = rev
899 iterrev = rev
900 e = index[iterrev]
900 e = index[iterrev]
901 clen = 0
901 clen = 0
902 compresseddeltalen = 0
902 compresseddeltalen = 0
903 while iterrev != e[3]:
903 while iterrev != e[3]:
904 clen += 1
904 clen += 1
905 compresseddeltalen += e[1]
905 compresseddeltalen += e[1]
906 if generaldelta:
906 if generaldelta:
907 iterrev = e[3]
907 iterrev = e[3]
908 else:
908 else:
909 iterrev -= 1
909 iterrev -= 1
910 if iterrev in chaininfocache:
910 if iterrev in chaininfocache:
911 t = chaininfocache[iterrev]
911 t = chaininfocache[iterrev]
912 clen += t[0]
912 clen += t[0]
913 compresseddeltalen += t[1]
913 compresseddeltalen += t[1]
914 break
914 break
915 e = index[iterrev]
915 e = index[iterrev]
916 else:
916 else:
917 # Add text length of base since decompressing that also takes
917 # Add text length of base since decompressing that also takes
918 # work. For cache hits the length is already included.
918 # work. For cache hits the length is already included.
919 compresseddeltalen += e[1]
919 compresseddeltalen += e[1]
920 r = (clen, compresseddeltalen)
920 r = (clen, compresseddeltalen)
921 chaininfocache[rev] = r
921 chaininfocache[rev] = r
922 return r
922 return r
923
923
924 def _deltachain(self, rev, stoprev=None):
924 def _deltachain(self, rev, stoprev=None):
925 """Obtain the delta chain for a revision.
925 """Obtain the delta chain for a revision.
926
926
927 ``stoprev`` specifies a revision to stop at. If not specified, we
927 ``stoprev`` specifies a revision to stop at. If not specified, we
928 stop at the base of the chain.
928 stop at the base of the chain.
929
929
930 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
930 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
931 revs in ascending order and ``stopped`` is a bool indicating whether
931 revs in ascending order and ``stopped`` is a bool indicating whether
932 ``stoprev`` was hit.
932 ``stoprev`` was hit.
933 """
933 """
934 # Try C implementation.
934 # Try C implementation.
935 try:
935 try:
936 return self.index.deltachain(rev, stoprev, self._generaldelta)
936 return self.index.deltachain(rev, stoprev, self._generaldelta)
937 except AttributeError:
937 except AttributeError:
938 pass
938 pass
939
939
940 chain = []
940 chain = []
941
941
942 # Alias to prevent attribute lookup in tight loop.
942 # Alias to prevent attribute lookup in tight loop.
943 index = self.index
943 index = self.index
944 generaldelta = self._generaldelta
944 generaldelta = self._generaldelta
945
945
946 iterrev = rev
946 iterrev = rev
947 e = index[iterrev]
947 e = index[iterrev]
948 while iterrev != e[3] and iterrev != stoprev:
948 while iterrev != e[3] and iterrev != stoprev:
949 chain.append(iterrev)
949 chain.append(iterrev)
950 if generaldelta:
950 if generaldelta:
951 iterrev = e[3]
951 iterrev = e[3]
952 else:
952 else:
953 iterrev -= 1
953 iterrev -= 1
954 e = index[iterrev]
954 e = index[iterrev]
955
955
956 if iterrev == stoprev:
956 if iterrev == stoprev:
957 stopped = True
957 stopped = True
958 else:
958 else:
959 chain.append(iterrev)
959 chain.append(iterrev)
960 stopped = False
960 stopped = False
961
961
962 chain.reverse()
962 chain.reverse()
963 return chain, stopped
963 return chain, stopped
964
964
965 def ancestors(self, revs, stoprev=0, inclusive=False):
965 def ancestors(self, revs, stoprev=0, inclusive=False):
966 """Generate the ancestors of 'revs' in reverse revision order.
966 """Generate the ancestors of 'revs' in reverse revision order.
967 Does not generate revs lower than stoprev.
967 Does not generate revs lower than stoprev.
968
968
969 See the documentation for ancestor.lazyancestors for more details."""
969 See the documentation for ancestor.lazyancestors for more details."""
970
970
971 # first, make sure start revisions aren't filtered
971 # first, make sure start revisions aren't filtered
972 revs = list(revs)
972 revs = list(revs)
973 checkrev = self.node
973 checkrev = self.node
974 for r in revs:
974 for r in revs:
975 checkrev(r)
975 checkrev(r)
976 # and we're sure ancestors aren't filtered as well
976 # and we're sure ancestors aren't filtered as well
977
977
978 if rustancestor is not None:
978 if rustancestor is not None:
979 lazyancestors = rustancestor.LazyAncestors
979 lazyancestors = rustancestor.LazyAncestors
980 arg = self.index
980 arg = self.index
981 else:
981 else:
982 lazyancestors = ancestor.lazyancestors
982 lazyancestors = ancestor.lazyancestors
983 arg = self._uncheckedparentrevs
983 arg = self._uncheckedparentrevs
984 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
984 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
985
985
986 def descendants(self, revs):
986 def descendants(self, revs):
987 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
987 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
988
988
989 def findcommonmissing(self, common=None, heads=None):
989 def findcommonmissing(self, common=None, heads=None):
990 """Return a tuple of the ancestors of common and the ancestors of heads
990 """Return a tuple of the ancestors of common and the ancestors of heads
991 that are not ancestors of common. In revset terminology, we return the
991 that are not ancestors of common. In revset terminology, we return the
992 tuple:
992 tuple:
993
993
994 ::common, (::heads) - (::common)
994 ::common, (::heads) - (::common)
995
995
996 The list is sorted by revision number, meaning it is
996 The list is sorted by revision number, meaning it is
997 topologically sorted.
997 topologically sorted.
998
998
999 'heads' and 'common' are both lists of node IDs. If heads is
999 'heads' and 'common' are both lists of node IDs. If heads is
1000 not supplied, uses all of the revlog's heads. If common is not
1000 not supplied, uses all of the revlog's heads. If common is not
1001 supplied, uses nullid."""
1001 supplied, uses nullid."""
1002 if common is None:
1002 if common is None:
1003 common = [nullid]
1003 common = [nullid]
1004 if heads is None:
1004 if heads is None:
1005 heads = self.heads()
1005 heads = self.heads()
1006
1006
1007 common = [self.rev(n) for n in common]
1007 common = [self.rev(n) for n in common]
1008 heads = [self.rev(n) for n in heads]
1008 heads = [self.rev(n) for n in heads]
1009
1009
1010 # we want the ancestors, but inclusive
1010 # we want the ancestors, but inclusive
1011 class lazyset(object):
1011 class lazyset(object):
1012 def __init__(self, lazyvalues):
1012 def __init__(self, lazyvalues):
1013 self.addedvalues = set()
1013 self.addedvalues = set()
1014 self.lazyvalues = lazyvalues
1014 self.lazyvalues = lazyvalues
1015
1015
1016 def __contains__(self, value):
1016 def __contains__(self, value):
1017 return value in self.addedvalues or value in self.lazyvalues
1017 return value in self.addedvalues or value in self.lazyvalues
1018
1018
1019 def __iter__(self):
1019 def __iter__(self):
1020 added = self.addedvalues
1020 added = self.addedvalues
1021 for r in added:
1021 for r in added:
1022 yield r
1022 yield r
1023 for r in self.lazyvalues:
1023 for r in self.lazyvalues:
1024 if not r in added:
1024 if not r in added:
1025 yield r
1025 yield r
1026
1026
1027 def add(self, value):
1027 def add(self, value):
1028 self.addedvalues.add(value)
1028 self.addedvalues.add(value)
1029
1029
1030 def update(self, values):
1030 def update(self, values):
1031 self.addedvalues.update(values)
1031 self.addedvalues.update(values)
1032
1032
1033 has = lazyset(self.ancestors(common))
1033 has = lazyset(self.ancestors(common))
1034 has.add(nullrev)
1034 has.add(nullrev)
1035 has.update(common)
1035 has.update(common)
1036
1036
1037 # take all ancestors from heads that aren't in has
1037 # take all ancestors from heads that aren't in has
1038 missing = set()
1038 missing = set()
1039 visit = collections.deque(r for r in heads if r not in has)
1039 visit = collections.deque(r for r in heads if r not in has)
1040 while visit:
1040 while visit:
1041 r = visit.popleft()
1041 r = visit.popleft()
1042 if r in missing:
1042 if r in missing:
1043 continue
1043 continue
1044 else:
1044 else:
1045 missing.add(r)
1045 missing.add(r)
1046 for p in self.parentrevs(r):
1046 for p in self.parentrevs(r):
1047 if p not in has:
1047 if p not in has:
1048 visit.append(p)
1048 visit.append(p)
1049 missing = list(missing)
1049 missing = list(missing)
1050 missing.sort()
1050 missing.sort()
1051 return has, [self.node(miss) for miss in missing]
1051 return has, [self.node(miss) for miss in missing]
1052
1052
1053 def incrementalmissingrevs(self, common=None):
1053 def incrementalmissingrevs(self, common=None):
1054 """Return an object that can be used to incrementally compute the
1054 """Return an object that can be used to incrementally compute the
1055 revision numbers of the ancestors of arbitrary sets that are not
1055 revision numbers of the ancestors of arbitrary sets that are not
1056 ancestors of common. This is an ancestor.incrementalmissingancestors
1056 ancestors of common. This is an ancestor.incrementalmissingancestors
1057 object.
1057 object.
1058
1058
1059 'common' is a list of revision numbers. If common is not supplied, uses
1059 'common' is a list of revision numbers. If common is not supplied, uses
1060 nullrev.
1060 nullrev.
1061 """
1061 """
1062 if common is None:
1062 if common is None:
1063 common = [nullrev]
1063 common = [nullrev]
1064
1064
1065 if rustancestor is not None:
1065 if rustancestor is not None:
1066 return rustancestor.MissingAncestors(self.index, common)
1066 return rustancestor.MissingAncestors(self.index, common)
1067 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1067 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1068
1068
1069 def findmissingrevs(self, common=None, heads=None):
1069 def findmissingrevs(self, common=None, heads=None):
1070 """Return the revision numbers of the ancestors of heads that
1070 """Return the revision numbers of the ancestors of heads that
1071 are not ancestors of common.
1071 are not ancestors of common.
1072
1072
1073 More specifically, return a list of revision numbers corresponding to
1073 More specifically, return a list of revision numbers corresponding to
1074 nodes N such that every N satisfies the following constraints:
1074 nodes N such that every N satisfies the following constraints:
1075
1075
1076 1. N is an ancestor of some node in 'heads'
1076 1. N is an ancestor of some node in 'heads'
1077 2. N is not an ancestor of any node in 'common'
1077 2. N is not an ancestor of any node in 'common'
1078
1078
1079 The list is sorted by revision number, meaning it is
1079 The list is sorted by revision number, meaning it is
1080 topologically sorted.
1080 topologically sorted.
1081
1081
1082 'heads' and 'common' are both lists of revision numbers. If heads is
1082 'heads' and 'common' are both lists of revision numbers. If heads is
1083 not supplied, uses all of the revlog's heads. If common is not
1083 not supplied, uses all of the revlog's heads. If common is not
1084 supplied, uses nullid."""
1084 supplied, uses nullid."""
1085 if common is None:
1085 if common is None:
1086 common = [nullrev]
1086 common = [nullrev]
1087 if heads is None:
1087 if heads is None:
1088 heads = self.headrevs()
1088 heads = self.headrevs()
1089
1089
1090 inc = self.incrementalmissingrevs(common=common)
1090 inc = self.incrementalmissingrevs(common=common)
1091 return inc.missingancestors(heads)
1091 return inc.missingancestors(heads)
1092
1092
1093 def findmissing(self, common=None, heads=None):
1093 def findmissing(self, common=None, heads=None):
1094 """Return the ancestors of heads that are not ancestors of common.
1094 """Return the ancestors of heads that are not ancestors of common.
1095
1095
1096 More specifically, return a list of nodes N such that every N
1096 More specifically, return a list of nodes N such that every N
1097 satisfies the following constraints:
1097 satisfies the following constraints:
1098
1098
1099 1. N is an ancestor of some node in 'heads'
1099 1. N is an ancestor of some node in 'heads'
1100 2. N is not an ancestor of any node in 'common'
1100 2. N is not an ancestor of any node in 'common'
1101
1101
1102 The list is sorted by revision number, meaning it is
1102 The list is sorted by revision number, meaning it is
1103 topologically sorted.
1103 topologically sorted.
1104
1104
1105 'heads' and 'common' are both lists of node IDs. If heads is
1105 'heads' and 'common' are both lists of node IDs. If heads is
1106 not supplied, uses all of the revlog's heads. If common is not
1106 not supplied, uses all of the revlog's heads. If common is not
1107 supplied, uses nullid."""
1107 supplied, uses nullid."""
1108 if common is None:
1108 if common is None:
1109 common = [nullid]
1109 common = [nullid]
1110 if heads is None:
1110 if heads is None:
1111 heads = self.heads()
1111 heads = self.heads()
1112
1112
1113 common = [self.rev(n) for n in common]
1113 common = [self.rev(n) for n in common]
1114 heads = [self.rev(n) for n in heads]
1114 heads = [self.rev(n) for n in heads]
1115
1115
1116 inc = self.incrementalmissingrevs(common=common)
1116 inc = self.incrementalmissingrevs(common=common)
1117 return [self.node(r) for r in inc.missingancestors(heads)]
1117 return [self.node(r) for r in inc.missingancestors(heads)]
1118
1118
1119 def nodesbetween(self, roots=None, heads=None):
1119 def nodesbetween(self, roots=None, heads=None):
1120 """Return a topological path from 'roots' to 'heads'.
1120 """Return a topological path from 'roots' to 'heads'.
1121
1121
1122 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1122 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1123 topologically sorted list of all nodes N that satisfy both of
1123 topologically sorted list of all nodes N that satisfy both of
1124 these constraints:
1124 these constraints:
1125
1125
1126 1. N is a descendant of some node in 'roots'
1126 1. N is a descendant of some node in 'roots'
1127 2. N is an ancestor of some node in 'heads'
1127 2. N is an ancestor of some node in 'heads'
1128
1128
1129 Every node is considered to be both a descendant and an ancestor
1129 Every node is considered to be both a descendant and an ancestor
1130 of itself, so every reachable node in 'roots' and 'heads' will be
1130 of itself, so every reachable node in 'roots' and 'heads' will be
1131 included in 'nodes'.
1131 included in 'nodes'.
1132
1132
1133 'outroots' is the list of reachable nodes in 'roots', i.e., the
1133 'outroots' is the list of reachable nodes in 'roots', i.e., the
1134 subset of 'roots' that is returned in 'nodes'. Likewise,
1134 subset of 'roots' that is returned in 'nodes'. Likewise,
1135 'outheads' is the subset of 'heads' that is also in 'nodes'.
1135 'outheads' is the subset of 'heads' that is also in 'nodes'.
1136
1136
1137 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1137 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1138 unspecified, uses nullid as the only root. If 'heads' is
1138 unspecified, uses nullid as the only root. If 'heads' is
1139 unspecified, uses list of all of the revlog's heads."""
1139 unspecified, uses list of all of the revlog's heads."""
1140 nonodes = ([], [], [])
1140 nonodes = ([], [], [])
1141 if roots is not None:
1141 if roots is not None:
1142 roots = list(roots)
1142 roots = list(roots)
1143 if not roots:
1143 if not roots:
1144 return nonodes
1144 return nonodes
1145 lowestrev = min([self.rev(n) for n in roots])
1145 lowestrev = min([self.rev(n) for n in roots])
1146 else:
1146 else:
1147 roots = [nullid] # Everybody's a descendant of nullid
1147 roots = [nullid] # Everybody's a descendant of nullid
1148 lowestrev = nullrev
1148 lowestrev = nullrev
1149 if (lowestrev == nullrev) and (heads is None):
1149 if (lowestrev == nullrev) and (heads is None):
1150 # We want _all_ the nodes!
1150 # We want _all_ the nodes!
1151 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1151 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1152 if heads is None:
1152 if heads is None:
1153 # All nodes are ancestors, so the latest ancestor is the last
1153 # All nodes are ancestors, so the latest ancestor is the last
1154 # node.
1154 # node.
1155 highestrev = len(self) - 1
1155 highestrev = len(self) - 1
1156 # Set ancestors to None to signal that every node is an ancestor.
1156 # Set ancestors to None to signal that every node is an ancestor.
1157 ancestors = None
1157 ancestors = None
1158 # Set heads to an empty dictionary for later discovery of heads
1158 # Set heads to an empty dictionary for later discovery of heads
1159 heads = {}
1159 heads = {}
1160 else:
1160 else:
1161 heads = list(heads)
1161 heads = list(heads)
1162 if not heads:
1162 if not heads:
1163 return nonodes
1163 return nonodes
1164 ancestors = set()
1164 ancestors = set()
1165 # Turn heads into a dictionary so we can remove 'fake' heads.
1165 # Turn heads into a dictionary so we can remove 'fake' heads.
1166 # Also, later we will be using it to filter out the heads we can't
1166 # Also, later we will be using it to filter out the heads we can't
1167 # find from roots.
1167 # find from roots.
1168 heads = dict.fromkeys(heads, False)
1168 heads = dict.fromkeys(heads, False)
1169 # Start at the top and keep marking parents until we're done.
1169 # Start at the top and keep marking parents until we're done.
1170 nodestotag = set(heads)
1170 nodestotag = set(heads)
1171 # Remember where the top was so we can use it as a limit later.
1171 # Remember where the top was so we can use it as a limit later.
1172 highestrev = max([self.rev(n) for n in nodestotag])
1172 highestrev = max([self.rev(n) for n in nodestotag])
1173 while nodestotag:
1173 while nodestotag:
1174 # grab a node to tag
1174 # grab a node to tag
1175 n = nodestotag.pop()
1175 n = nodestotag.pop()
1176 # Never tag nullid
1176 # Never tag nullid
1177 if n == nullid:
1177 if n == nullid:
1178 continue
1178 continue
1179 # A node's revision number represents its place in a
1179 # A node's revision number represents its place in a
1180 # topologically sorted list of nodes.
1180 # topologically sorted list of nodes.
1181 r = self.rev(n)
1181 r = self.rev(n)
1182 if r >= lowestrev:
1182 if r >= lowestrev:
1183 if n not in ancestors:
1183 if n not in ancestors:
1184 # If we are possibly a descendant of one of the roots
1184 # If we are possibly a descendant of one of the roots
1185 # and we haven't already been marked as an ancestor
1185 # and we haven't already been marked as an ancestor
1186 ancestors.add(n) # Mark as ancestor
1186 ancestors.add(n) # Mark as ancestor
1187 # Add non-nullid parents to list of nodes to tag.
1187 # Add non-nullid parents to list of nodes to tag.
1188 nodestotag.update(
1188 nodestotag.update(
1189 [p for p in self.parents(n) if p != nullid]
1189 [p for p in self.parents(n) if p != nullid]
1190 )
1190 )
1191 elif n in heads: # We've seen it before, is it a fake head?
1191 elif n in heads: # We've seen it before, is it a fake head?
1192 # So it is, real heads should not be the ancestors of
1192 # So it is, real heads should not be the ancestors of
1193 # any other heads.
1193 # any other heads.
1194 heads.pop(n)
1194 heads.pop(n)
1195 if not ancestors:
1195 if not ancestors:
1196 return nonodes
1196 return nonodes
1197 # Now that we have our set of ancestors, we want to remove any
1197 # Now that we have our set of ancestors, we want to remove any
1198 # roots that are not ancestors.
1198 # roots that are not ancestors.
1199
1199
1200 # If one of the roots was nullid, everything is included anyway.
1200 # If one of the roots was nullid, everything is included anyway.
1201 if lowestrev > nullrev:
1201 if lowestrev > nullrev:
1202 # But, since we weren't, let's recompute the lowest rev to not
1202 # But, since we weren't, let's recompute the lowest rev to not
1203 # include roots that aren't ancestors.
1203 # include roots that aren't ancestors.
1204
1204
1205 # Filter out roots that aren't ancestors of heads
1205 # Filter out roots that aren't ancestors of heads
1206 roots = [root for root in roots if root in ancestors]
1206 roots = [root for root in roots if root in ancestors]
1207 # Recompute the lowest revision
1207 # Recompute the lowest revision
1208 if roots:
1208 if roots:
1209 lowestrev = min([self.rev(root) for root in roots])
1209 lowestrev = min([self.rev(root) for root in roots])
1210 else:
1210 else:
1211 # No more roots? Return empty list
1211 # No more roots? Return empty list
1212 return nonodes
1212 return nonodes
1213 else:
1213 else:
1214 # We are descending from nullid, and don't need to care about
1214 # We are descending from nullid, and don't need to care about
1215 # any other roots.
1215 # any other roots.
1216 lowestrev = nullrev
1216 lowestrev = nullrev
1217 roots = [nullid]
1217 roots = [nullid]
1218 # Transform our roots list into a set.
1218 # Transform our roots list into a set.
1219 descendants = set(roots)
1219 descendants = set(roots)
1220 # Also, keep the original roots so we can filter out roots that aren't
1220 # Also, keep the original roots so we can filter out roots that aren't
1221 # 'real' roots (i.e. are descended from other roots).
1221 # 'real' roots (i.e. are descended from other roots).
1222 roots = descendants.copy()
1222 roots = descendants.copy()
1223 # Our topologically sorted list of output nodes.
1223 # Our topologically sorted list of output nodes.
1224 orderedout = []
1224 orderedout = []
1225 # Don't start at nullid since we don't want nullid in our output list,
1225 # Don't start at nullid since we don't want nullid in our output list,
1226 # and if nullid shows up in descendants, empty parents will look like
1226 # and if nullid shows up in descendants, empty parents will look like
1227 # they're descendants.
1227 # they're descendants.
1228 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1228 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1229 n = self.node(r)
1229 n = self.node(r)
1230 isdescendant = False
1230 isdescendant = False
1231 if lowestrev == nullrev: # Everybody is a descendant of nullid
1231 if lowestrev == nullrev: # Everybody is a descendant of nullid
1232 isdescendant = True
1232 isdescendant = True
1233 elif n in descendants:
1233 elif n in descendants:
1234 # n is already a descendant
1234 # n is already a descendant
1235 isdescendant = True
1235 isdescendant = True
1236 # This check only needs to be done here because all the roots
1236 # This check only needs to be done here because all the roots
1237 # will start being marked is descendants before the loop.
1237 # will start being marked is descendants before the loop.
1238 if n in roots:
1238 if n in roots:
1239 # If n was a root, check if it's a 'real' root.
1239 # If n was a root, check if it's a 'real' root.
1240 p = tuple(self.parents(n))
1240 p = tuple(self.parents(n))
1241 # If any of its parents are descendants, it's not a root.
1241 # If any of its parents are descendants, it's not a root.
1242 if (p[0] in descendants) or (p[1] in descendants):
1242 if (p[0] in descendants) or (p[1] in descendants):
1243 roots.remove(n)
1243 roots.remove(n)
1244 else:
1244 else:
1245 p = tuple(self.parents(n))
1245 p = tuple(self.parents(n))
1246 # A node is a descendant if either of its parents are
1246 # A node is a descendant if either of its parents are
1247 # descendants. (We seeded the dependents list with the roots
1247 # descendants. (We seeded the dependents list with the roots
1248 # up there, remember?)
1248 # up there, remember?)
1249 if (p[0] in descendants) or (p[1] in descendants):
1249 if (p[0] in descendants) or (p[1] in descendants):
1250 descendants.add(n)
1250 descendants.add(n)
1251 isdescendant = True
1251 isdescendant = True
1252 if isdescendant and ((ancestors is None) or (n in ancestors)):
1252 if isdescendant and ((ancestors is None) or (n in ancestors)):
1253 # Only include nodes that are both descendants and ancestors.
1253 # Only include nodes that are both descendants and ancestors.
1254 orderedout.append(n)
1254 orderedout.append(n)
1255 if (ancestors is not None) and (n in heads):
1255 if (ancestors is not None) and (n in heads):
1256 # We're trying to figure out which heads are reachable
1256 # We're trying to figure out which heads are reachable
1257 # from roots.
1257 # from roots.
1258 # Mark this head as having been reached
1258 # Mark this head as having been reached
1259 heads[n] = True
1259 heads[n] = True
1260 elif ancestors is None:
1260 elif ancestors is None:
1261 # Otherwise, we're trying to discover the heads.
1261 # Otherwise, we're trying to discover the heads.
1262 # Assume this is a head because if it isn't, the next step
1262 # Assume this is a head because if it isn't, the next step
1263 # will eventually remove it.
1263 # will eventually remove it.
1264 heads[n] = True
1264 heads[n] = True
1265 # But, obviously its parents aren't.
1265 # But, obviously its parents aren't.
1266 for p in self.parents(n):
1266 for p in self.parents(n):
1267 heads.pop(p, None)
1267 heads.pop(p, None)
1268 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1268 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1269 roots = list(roots)
1269 roots = list(roots)
1270 assert orderedout
1270 assert orderedout
1271 assert roots
1271 assert roots
1272 assert heads
1272 assert heads
1273 return (orderedout, roots, heads)
1273 return (orderedout, roots, heads)
1274
1274
1275 def headrevs(self, revs=None):
1275 def headrevs(self, revs=None):
1276 if revs is None:
1276 if revs is None:
1277 try:
1277 try:
1278 return self.index.headrevs()
1278 return self.index.headrevs()
1279 except AttributeError:
1279 except AttributeError:
1280 return self._headrevs()
1280 return self._headrevs()
1281 if rustdagop is not None:
1281 if rustdagop is not None:
1282 return rustdagop.headrevs(self.index, revs)
1282 return rustdagop.headrevs(self.index, revs)
1283 return dagop.headrevs(revs, self._uncheckedparentrevs)
1283 return dagop.headrevs(revs, self._uncheckedparentrevs)
1284
1284
1285 def computephases(self, roots):
1285 def computephases(self, roots):
1286 return self.index.computephasesmapsets(roots)
1286 return self.index.computephasesmapsets(roots)
1287
1287
1288 def _headrevs(self):
1288 def _headrevs(self):
1289 count = len(self)
1289 count = len(self)
1290 if not count:
1290 if not count:
1291 return [nullrev]
1291 return [nullrev]
1292 # we won't iter over filtered rev so nobody is a head at start
1292 # we won't iter over filtered rev so nobody is a head at start
1293 ishead = [0] * (count + 1)
1293 ishead = [0] * (count + 1)
1294 index = self.index
1294 index = self.index
1295 for r in self:
1295 for r in self:
1296 ishead[r] = 1 # I may be an head
1296 ishead[r] = 1 # I may be an head
1297 e = index[r]
1297 e = index[r]
1298 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1298 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1299 return [r for r, val in enumerate(ishead) if val]
1299 return [r for r, val in enumerate(ishead) if val]
1300
1300
1301 def heads(self, start=None, stop=None):
1301 def heads(self, start=None, stop=None):
1302 """return the list of all nodes that have no children
1302 """return the list of all nodes that have no children
1303
1303
1304 if start is specified, only heads that are descendants of
1304 if start is specified, only heads that are descendants of
1305 start will be returned
1305 start will be returned
1306 if stop is specified, it will consider all the revs from stop
1306 if stop is specified, it will consider all the revs from stop
1307 as if they had no children
1307 as if they had no children
1308 """
1308 """
1309 if start is None and stop is None:
1309 if start is None and stop is None:
1310 if not len(self):
1310 if not len(self):
1311 return [nullid]
1311 return [nullid]
1312 return [self.node(r) for r in self.headrevs()]
1312 return [self.node(r) for r in self.headrevs()]
1313
1313
1314 if start is None:
1314 if start is None:
1315 start = nullrev
1315 start = nullrev
1316 else:
1316 else:
1317 start = self.rev(start)
1317 start = self.rev(start)
1318
1318
1319 stoprevs = {self.rev(n) for n in stop or []}
1319 stoprevs = {self.rev(n) for n in stop or []}
1320
1320
1321 revs = dagop.headrevssubset(
1321 revs = dagop.headrevssubset(
1322 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1322 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1323 )
1323 )
1324
1324
1325 return [self.node(rev) for rev in revs]
1325 return [self.node(rev) for rev in revs]
1326
1326
1327 def children(self, node):
1327 def children(self, node):
1328 """find the children of a given node"""
1328 """find the children of a given node"""
1329 c = []
1329 c = []
1330 p = self.rev(node)
1330 p = self.rev(node)
1331 for r in self.revs(start=p + 1):
1331 for r in self.revs(start=p + 1):
1332 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1332 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1333 if prevs:
1333 if prevs:
1334 for pr in prevs:
1334 for pr in prevs:
1335 if pr == p:
1335 if pr == p:
1336 c.append(self.node(r))
1336 c.append(self.node(r))
1337 elif p == nullrev:
1337 elif p == nullrev:
1338 c.append(self.node(r))
1338 c.append(self.node(r))
1339 return c
1339 return c
1340
1340
1341 def commonancestorsheads(self, a, b):
1341 def commonancestorsheads(self, a, b):
1342 """calculate all the heads of the common ancestors of nodes a and b"""
1342 """calculate all the heads of the common ancestors of nodes a and b"""
1343 a, b = self.rev(a), self.rev(b)
1343 a, b = self.rev(a), self.rev(b)
1344 ancs = self._commonancestorsheads(a, b)
1344 ancs = self._commonancestorsheads(a, b)
1345 return pycompat.maplist(self.node, ancs)
1345 return pycompat.maplist(self.node, ancs)
1346
1346
1347 def _commonancestorsheads(self, *revs):
1347 def _commonancestorsheads(self, *revs):
1348 """calculate all the heads of the common ancestors of revs"""
1348 """calculate all the heads of the common ancestors of revs"""
1349 try:
1349 try:
1350 ancs = self.index.commonancestorsheads(*revs)
1350 ancs = self.index.commonancestorsheads(*revs)
1351 except (AttributeError, OverflowError): # C implementation failed
1351 except (AttributeError, OverflowError): # C implementation failed
1352 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1352 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1353 return ancs
1353 return ancs
1354
1354
1355 def isancestor(self, a, b):
1355 def isancestor(self, a, b):
1356 """return True if node a is an ancestor of node b
1356 """return True if node a is an ancestor of node b
1357
1357
1358 A revision is considered an ancestor of itself."""
1358 A revision is considered an ancestor of itself."""
1359 a, b = self.rev(a), self.rev(b)
1359 a, b = self.rev(a), self.rev(b)
1360 return self.isancestorrev(a, b)
1360 return self.isancestorrev(a, b)
1361
1361
1362 def isancestorrev(self, a, b):
1362 def isancestorrev(self, a, b):
1363 """return True if revision a is an ancestor of revision b
1363 """return True if revision a is an ancestor of revision b
1364
1364
1365 A revision is considered an ancestor of itself.
1365 A revision is considered an ancestor of itself.
1366
1366
1367 The implementation of this is trivial but the use of
1367 The implementation of this is trivial but the use of
1368 reachableroots is not."""
1368 reachableroots is not."""
1369 if a == nullrev:
1369 if a == nullrev:
1370 return True
1370 return True
1371 elif a == b:
1371 elif a == b:
1372 return True
1372 return True
1373 elif a > b:
1373 elif a > b:
1374 return False
1374 return False
1375 return bool(self.reachableroots(a, [b], [a], includepath=False))
1375 return bool(self.reachableroots(a, [b], [a], includepath=False))
1376
1376
1377 def reachableroots(self, minroot, heads, roots, includepath=False):
1377 def reachableroots(self, minroot, heads, roots, includepath=False):
1378 """return (heads(::(<roots> and <roots>::<heads>)))
1378 """return (heads(::(<roots> and <roots>::<heads>)))
1379
1379
1380 If includepath is True, return (<roots>::<heads>)."""
1380 If includepath is True, return (<roots>::<heads>)."""
1381 try:
1381 try:
1382 return self.index.reachableroots2(
1382 return self.index.reachableroots2(
1383 minroot, heads, roots, includepath
1383 minroot, heads, roots, includepath
1384 )
1384 )
1385 except AttributeError:
1385 except AttributeError:
1386 return dagop._reachablerootspure(
1386 return dagop._reachablerootspure(
1387 self.parentrevs, minroot, roots, heads, includepath
1387 self.parentrevs, minroot, roots, heads, includepath
1388 )
1388 )
1389
1389
1390 def ancestor(self, a, b):
1390 def ancestor(self, a, b):
1391 """calculate the "best" common ancestor of nodes a and b"""
1391 """calculate the "best" common ancestor of nodes a and b"""
1392
1392
1393 a, b = self.rev(a), self.rev(b)
1393 a, b = self.rev(a), self.rev(b)
1394 try:
1394 try:
1395 ancs = self.index.ancestors(a, b)
1395 ancs = self.index.ancestors(a, b)
1396 except (AttributeError, OverflowError):
1396 except (AttributeError, OverflowError):
1397 ancs = ancestor.ancestors(self.parentrevs, a, b)
1397 ancs = ancestor.ancestors(self.parentrevs, a, b)
1398 if ancs:
1398 if ancs:
1399 # choose a consistent winner when there's a tie
1399 # choose a consistent winner when there's a tie
1400 return min(map(self.node, ancs))
1400 return min(map(self.node, ancs))
1401 return nullid
1401 return nullid
1402
1402
1403 def _match(self, id):
1403 def _match(self, id):
1404 if isinstance(id, int):
1404 if isinstance(id, int):
1405 # rev
1405 # rev
1406 return self.node(id)
1406 return self.node(id)
1407 if len(id) == 20:
1407 if len(id) == 20:
1408 # possibly a binary node
1408 # possibly a binary node
1409 # odds of a binary node being all hex in ASCII are 1 in 10**25
1409 # odds of a binary node being all hex in ASCII are 1 in 10**25
1410 try:
1410 try:
1411 node = id
1411 node = id
1412 self.rev(node) # quick search the index
1412 self.rev(node) # quick search the index
1413 return node
1413 return node
1414 except error.LookupError:
1414 except error.LookupError:
1415 pass # may be partial hex id
1415 pass # may be partial hex id
1416 try:
1416 try:
1417 # str(rev)
1417 # str(rev)
1418 rev = int(id)
1418 rev = int(id)
1419 if b"%d" % rev != id:
1419 if b"%d" % rev != id:
1420 raise ValueError
1420 raise ValueError
1421 if rev < 0:
1421 if rev < 0:
1422 rev = len(self) + rev
1422 rev = len(self) + rev
1423 if rev < 0 or rev >= len(self):
1423 if rev < 0 or rev >= len(self):
1424 raise ValueError
1424 raise ValueError
1425 return self.node(rev)
1425 return self.node(rev)
1426 except (ValueError, OverflowError):
1426 except (ValueError, OverflowError):
1427 pass
1427 pass
1428 if len(id) == 40:
1428 if len(id) == 40:
1429 try:
1429 try:
1430 # a full hex nodeid?
1430 # a full hex nodeid?
1431 node = bin(id)
1431 node = bin(id)
1432 self.rev(node)
1432 self.rev(node)
1433 return node
1433 return node
1434 except (TypeError, error.LookupError):
1434 except (TypeError, error.LookupError):
1435 pass
1435 pass
1436
1436
1437 def _partialmatch(self, id):
1437 def _partialmatch(self, id):
1438 # we don't care wdirfilenodeids as they should be always full hash
1438 # we don't care wdirfilenodeids as they should be always full hash
1439 maybewdir = wdirhex.startswith(id)
1439 maybewdir = wdirhex.startswith(id)
1440 try:
1440 try:
1441 partial = self.index.partialmatch(id)
1441 partial = self.index.partialmatch(id)
1442 if partial and self.hasnode(partial):
1442 if partial and self.hasnode(partial):
1443 if maybewdir:
1443 if maybewdir:
1444 # single 'ff...' match in radix tree, ambiguous with wdir
1444 # single 'ff...' match in radix tree, ambiguous with wdir
1445 raise error.RevlogError
1445 raise error.RevlogError
1446 return partial
1446 return partial
1447 if maybewdir:
1447 if maybewdir:
1448 # no 'ff...' match in radix tree, wdir identified
1448 # no 'ff...' match in radix tree, wdir identified
1449 raise error.WdirUnsupported
1449 raise error.WdirUnsupported
1450 return None
1450 return None
1451 except error.RevlogError:
1451 except error.RevlogError:
1452 # parsers.c radix tree lookup gave multiple matches
1452 # parsers.c radix tree lookup gave multiple matches
1453 # fast path: for unfiltered changelog, radix tree is accurate
1453 # fast path: for unfiltered changelog, radix tree is accurate
1454 if not getattr(self, 'filteredrevs', None):
1454 if not getattr(self, 'filteredrevs', None):
1455 raise error.AmbiguousPrefixLookupError(
1455 raise error.AmbiguousPrefixLookupError(
1456 id, self.indexfile, _(b'ambiguous identifier')
1456 id, self.indexfile, _(b'ambiguous identifier')
1457 )
1457 )
1458 # fall through to slow path that filters hidden revisions
1458 # fall through to slow path that filters hidden revisions
1459 except (AttributeError, ValueError):
1459 except (AttributeError, ValueError):
1460 # we are pure python, or key was too short to search radix tree
1460 # we are pure python, or key was too short to search radix tree
1461 pass
1461 pass
1462
1462
1463 if id in self._pcache:
1463 if id in self._pcache:
1464 return self._pcache[id]
1464 return self._pcache[id]
1465
1465
1466 if len(id) <= 40:
1466 if len(id) <= 40:
1467 try:
1467 try:
1468 # hex(node)[:...]
1468 # hex(node)[:...]
1469 l = len(id) // 2 # grab an even number of digits
1469 l = len(id) // 2 # grab an even number of digits
1470 prefix = bin(id[: l * 2])
1470 prefix = bin(id[: l * 2])
1471 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1471 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1472 nl = [
1472 nl = [
1473 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1473 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1474 ]
1474 ]
1475 if nullhex.startswith(id):
1475 if nullhex.startswith(id):
1476 nl.append(nullid)
1476 nl.append(nullid)
1477 if len(nl) > 0:
1477 if len(nl) > 0:
1478 if len(nl) == 1 and not maybewdir:
1478 if len(nl) == 1 and not maybewdir:
1479 self._pcache[id] = nl[0]
1479 self._pcache[id] = nl[0]
1480 return nl[0]
1480 return nl[0]
1481 raise error.AmbiguousPrefixLookupError(
1481 raise error.AmbiguousPrefixLookupError(
1482 id, self.indexfile, _(b'ambiguous identifier')
1482 id, self.indexfile, _(b'ambiguous identifier')
1483 )
1483 )
1484 if maybewdir:
1484 if maybewdir:
1485 raise error.WdirUnsupported
1485 raise error.WdirUnsupported
1486 return None
1486 return None
1487 except TypeError:
1487 except TypeError:
1488 pass
1488 pass
1489
1489
1490 def lookup(self, id):
1490 def lookup(self, id):
1491 """locate a node based on:
1491 """locate a node based on:
1492 - revision number or str(revision number)
1492 - revision number or str(revision number)
1493 - nodeid or subset of hex nodeid
1493 - nodeid or subset of hex nodeid
1494 """
1494 """
1495 n = self._match(id)
1495 n = self._match(id)
1496 if n is not None:
1496 if n is not None:
1497 return n
1497 return n
1498 n = self._partialmatch(id)
1498 n = self._partialmatch(id)
1499 if n:
1499 if n:
1500 return n
1500 return n
1501
1501
1502 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1502 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1503
1503
1504 def shortest(self, node, minlength=1):
1504 def shortest(self, node, minlength=1):
1505 """Find the shortest unambiguous prefix that matches node."""
1505 """Find the shortest unambiguous prefix that matches node."""
1506
1506
1507 def isvalid(prefix):
1507 def isvalid(prefix):
1508 try:
1508 try:
1509 matchednode = self._partialmatch(prefix)
1509 matchednode = self._partialmatch(prefix)
1510 except error.AmbiguousPrefixLookupError:
1510 except error.AmbiguousPrefixLookupError:
1511 return False
1511 return False
1512 except error.WdirUnsupported:
1512 except error.WdirUnsupported:
1513 # single 'ff...' match
1513 # single 'ff...' match
1514 return True
1514 return True
1515 if matchednode is None:
1515 if matchednode is None:
1516 raise error.LookupError(node, self.indexfile, _(b'no node'))
1516 raise error.LookupError(node, self.indexfile, _(b'no node'))
1517 return True
1517 return True
1518
1518
1519 def maybewdir(prefix):
1519 def maybewdir(prefix):
1520 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1520 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1521
1521
1522 hexnode = hex(node)
1522 hexnode = hex(node)
1523
1523
1524 def disambiguate(hexnode, minlength):
1524 def disambiguate(hexnode, minlength):
1525 """Disambiguate against wdirid."""
1525 """Disambiguate against wdirid."""
1526 for length in range(minlength, 41):
1526 for length in range(minlength, len(hexnode) + 1):
1527 prefix = hexnode[:length]
1527 prefix = hexnode[:length]
1528 if not maybewdir(prefix):
1528 if not maybewdir(prefix):
1529 return prefix
1529 return prefix
1530
1530
1531 if not getattr(self, 'filteredrevs', None):
1531 if not getattr(self, 'filteredrevs', None):
1532 try:
1532 try:
1533 length = max(self.index.shortest(node), minlength)
1533 length = max(self.index.shortest(node), minlength)
1534 return disambiguate(hexnode, length)
1534 return disambiguate(hexnode, length)
1535 except error.RevlogError:
1535 except error.RevlogError:
1536 if node != wdirid:
1536 if node != wdirid:
1537 raise error.LookupError(node, self.indexfile, _(b'no node'))
1537 raise error.LookupError(node, self.indexfile, _(b'no node'))
1538 except AttributeError:
1538 except AttributeError:
1539 # Fall through to pure code
1539 # Fall through to pure code
1540 pass
1540 pass
1541
1541
1542 if node == wdirid:
1542 if node == wdirid:
1543 for length in range(minlength, 41):
1543 for length in range(minlength, len(hexnode) + 1):
1544 prefix = hexnode[:length]
1544 prefix = hexnode[:length]
1545 if isvalid(prefix):
1545 if isvalid(prefix):
1546 return prefix
1546 return prefix
1547
1547
1548 for length in range(minlength, 41):
1548 for length in range(minlength, len(hexnode) + 1):
1549 prefix = hexnode[:length]
1549 prefix = hexnode[:length]
1550 if isvalid(prefix):
1550 if isvalid(prefix):
1551 return disambiguate(hexnode, length)
1551 return disambiguate(hexnode, length)
1552
1552
1553 def cmp(self, node, text):
1553 def cmp(self, node, text):
1554 """compare text with a given file revision
1554 """compare text with a given file revision
1555
1555
1556 returns True if text is different than what is stored.
1556 returns True if text is different than what is stored.
1557 """
1557 """
1558 p1, p2 = self.parents(node)
1558 p1, p2 = self.parents(node)
1559 return storageutil.hashrevisionsha1(text, p1, p2) != node
1559 return storageutil.hashrevisionsha1(text, p1, p2) != node
1560
1560
1561 def _cachesegment(self, offset, data):
1561 def _cachesegment(self, offset, data):
1562 """Add a segment to the revlog cache.
1562 """Add a segment to the revlog cache.
1563
1563
1564 Accepts an absolute offset and the data that is at that location.
1564 Accepts an absolute offset and the data that is at that location.
1565 """
1565 """
1566 o, d = self._chunkcache
1566 o, d = self._chunkcache
1567 # try to add to existing cache
1567 # try to add to existing cache
1568 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1568 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1569 self._chunkcache = o, d + data
1569 self._chunkcache = o, d + data
1570 else:
1570 else:
1571 self._chunkcache = offset, data
1571 self._chunkcache = offset, data
1572
1572
1573 def _readsegment(self, offset, length, df=None):
1573 def _readsegment(self, offset, length, df=None):
1574 """Load a segment of raw data from the revlog.
1574 """Load a segment of raw data from the revlog.
1575
1575
1576 Accepts an absolute offset, length to read, and an optional existing
1576 Accepts an absolute offset, length to read, and an optional existing
1577 file handle to read from.
1577 file handle to read from.
1578
1578
1579 If an existing file handle is passed, it will be seeked and the
1579 If an existing file handle is passed, it will be seeked and the
1580 original seek position will NOT be restored.
1580 original seek position will NOT be restored.
1581
1581
1582 Returns a str or buffer of raw byte data.
1582 Returns a str or buffer of raw byte data.
1583
1583
1584 Raises if the requested number of bytes could not be read.
1584 Raises if the requested number of bytes could not be read.
1585 """
1585 """
1586 # Cache data both forward and backward around the requested
1586 # Cache data both forward and backward around the requested
1587 # data, in a fixed size window. This helps speed up operations
1587 # data, in a fixed size window. This helps speed up operations
1588 # involving reading the revlog backwards.
1588 # involving reading the revlog backwards.
1589 cachesize = self._chunkcachesize
1589 cachesize = self._chunkcachesize
1590 realoffset = offset & ~(cachesize - 1)
1590 realoffset = offset & ~(cachesize - 1)
1591 reallength = (
1591 reallength = (
1592 (offset + length + cachesize) & ~(cachesize - 1)
1592 (offset + length + cachesize) & ~(cachesize - 1)
1593 ) - realoffset
1593 ) - realoffset
1594 with self._datareadfp(df) as df:
1594 with self._datareadfp(df) as df:
1595 df.seek(realoffset)
1595 df.seek(realoffset)
1596 d = df.read(reallength)
1596 d = df.read(reallength)
1597
1597
1598 self._cachesegment(realoffset, d)
1598 self._cachesegment(realoffset, d)
1599 if offset != realoffset or reallength != length:
1599 if offset != realoffset or reallength != length:
1600 startoffset = offset - realoffset
1600 startoffset = offset - realoffset
1601 if len(d) - startoffset < length:
1601 if len(d) - startoffset < length:
1602 raise error.RevlogError(
1602 raise error.RevlogError(
1603 _(
1603 _(
1604 b'partial read of revlog %s; expected %d bytes from '
1604 b'partial read of revlog %s; expected %d bytes from '
1605 b'offset %d, got %d'
1605 b'offset %d, got %d'
1606 )
1606 )
1607 % (
1607 % (
1608 self.indexfile if self._inline else self.datafile,
1608 self.indexfile if self._inline else self.datafile,
1609 length,
1609 length,
1610 realoffset,
1610 realoffset,
1611 len(d) - startoffset,
1611 len(d) - startoffset,
1612 )
1612 )
1613 )
1613 )
1614
1614
1615 return util.buffer(d, startoffset, length)
1615 return util.buffer(d, startoffset, length)
1616
1616
1617 if len(d) < length:
1617 if len(d) < length:
1618 raise error.RevlogError(
1618 raise error.RevlogError(
1619 _(
1619 _(
1620 b'partial read of revlog %s; expected %d bytes from offset '
1620 b'partial read of revlog %s; expected %d bytes from offset '
1621 b'%d, got %d'
1621 b'%d, got %d'
1622 )
1622 )
1623 % (
1623 % (
1624 self.indexfile if self._inline else self.datafile,
1624 self.indexfile if self._inline else self.datafile,
1625 length,
1625 length,
1626 offset,
1626 offset,
1627 len(d),
1627 len(d),
1628 )
1628 )
1629 )
1629 )
1630
1630
1631 return d
1631 return d
1632
1632
1633 def _getsegment(self, offset, length, df=None):
1633 def _getsegment(self, offset, length, df=None):
1634 """Obtain a segment of raw data from the revlog.
1634 """Obtain a segment of raw data from the revlog.
1635
1635
1636 Accepts an absolute offset, length of bytes to obtain, and an
1636 Accepts an absolute offset, length of bytes to obtain, and an
1637 optional file handle to the already-opened revlog. If the file
1637 optional file handle to the already-opened revlog. If the file
1638 handle is used, it's original seek position will not be preserved.
1638 handle is used, it's original seek position will not be preserved.
1639
1639
1640 Requests for data may be returned from a cache.
1640 Requests for data may be returned from a cache.
1641
1641
1642 Returns a str or a buffer instance of raw byte data.
1642 Returns a str or a buffer instance of raw byte data.
1643 """
1643 """
1644 o, d = self._chunkcache
1644 o, d = self._chunkcache
1645 l = len(d)
1645 l = len(d)
1646
1646
1647 # is it in the cache?
1647 # is it in the cache?
1648 cachestart = offset - o
1648 cachestart = offset - o
1649 cacheend = cachestart + length
1649 cacheend = cachestart + length
1650 if cachestart >= 0 and cacheend <= l:
1650 if cachestart >= 0 and cacheend <= l:
1651 if cachestart == 0 and cacheend == l:
1651 if cachestart == 0 and cacheend == l:
1652 return d # avoid a copy
1652 return d # avoid a copy
1653 return util.buffer(d, cachestart, cacheend - cachestart)
1653 return util.buffer(d, cachestart, cacheend - cachestart)
1654
1654
1655 return self._readsegment(offset, length, df=df)
1655 return self._readsegment(offset, length, df=df)
1656
1656
1657 def _getsegmentforrevs(self, startrev, endrev, df=None):
1657 def _getsegmentforrevs(self, startrev, endrev, df=None):
1658 """Obtain a segment of raw data corresponding to a range of revisions.
1658 """Obtain a segment of raw data corresponding to a range of revisions.
1659
1659
1660 Accepts the start and end revisions and an optional already-open
1660 Accepts the start and end revisions and an optional already-open
1661 file handle to be used for reading. If the file handle is read, its
1661 file handle to be used for reading. If the file handle is read, its
1662 seek position will not be preserved.
1662 seek position will not be preserved.
1663
1663
1664 Requests for data may be satisfied by a cache.
1664 Requests for data may be satisfied by a cache.
1665
1665
1666 Returns a 2-tuple of (offset, data) for the requested range of
1666 Returns a 2-tuple of (offset, data) for the requested range of
1667 revisions. Offset is the integer offset from the beginning of the
1667 revisions. Offset is the integer offset from the beginning of the
1668 revlog and data is a str or buffer of the raw byte data.
1668 revlog and data is a str or buffer of the raw byte data.
1669
1669
1670 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1670 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1671 to determine where each revision's data begins and ends.
1671 to determine where each revision's data begins and ends.
1672 """
1672 """
1673 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1673 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1674 # (functions are expensive).
1674 # (functions are expensive).
1675 index = self.index
1675 index = self.index
1676 istart = index[startrev]
1676 istart = index[startrev]
1677 start = int(istart[0] >> 16)
1677 start = int(istart[0] >> 16)
1678 if startrev == endrev:
1678 if startrev == endrev:
1679 end = start + istart[1]
1679 end = start + istart[1]
1680 else:
1680 else:
1681 iend = index[endrev]
1681 iend = index[endrev]
1682 end = int(iend[0] >> 16) + iend[1]
1682 end = int(iend[0] >> 16) + iend[1]
1683
1683
1684 if self._inline:
1684 if self._inline:
1685 start += (startrev + 1) * self._io.size
1685 start += (startrev + 1) * self._io.size
1686 end += (endrev + 1) * self._io.size
1686 end += (endrev + 1) * self._io.size
1687 length = end - start
1687 length = end - start
1688
1688
1689 return start, self._getsegment(start, length, df=df)
1689 return start, self._getsegment(start, length, df=df)
1690
1690
1691 def _chunk(self, rev, df=None):
1691 def _chunk(self, rev, df=None):
1692 """Obtain a single decompressed chunk for a revision.
1692 """Obtain a single decompressed chunk for a revision.
1693
1693
1694 Accepts an integer revision and an optional already-open file handle
1694 Accepts an integer revision and an optional already-open file handle
1695 to be used for reading. If used, the seek position of the file will not
1695 to be used for reading. If used, the seek position of the file will not
1696 be preserved.
1696 be preserved.
1697
1697
1698 Returns a str holding uncompressed data for the requested revision.
1698 Returns a str holding uncompressed data for the requested revision.
1699 """
1699 """
1700 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1700 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1701
1701
1702 def _chunks(self, revs, df=None, targetsize=None):
1702 def _chunks(self, revs, df=None, targetsize=None):
1703 """Obtain decompressed chunks for the specified revisions.
1703 """Obtain decompressed chunks for the specified revisions.
1704
1704
1705 Accepts an iterable of numeric revisions that are assumed to be in
1705 Accepts an iterable of numeric revisions that are assumed to be in
1706 ascending order. Also accepts an optional already-open file handle
1706 ascending order. Also accepts an optional already-open file handle
1707 to be used for reading. If used, the seek position of the file will
1707 to be used for reading. If used, the seek position of the file will
1708 not be preserved.
1708 not be preserved.
1709
1709
1710 This function is similar to calling ``self._chunk()`` multiple times,
1710 This function is similar to calling ``self._chunk()`` multiple times,
1711 but is faster.
1711 but is faster.
1712
1712
1713 Returns a list with decompressed data for each requested revision.
1713 Returns a list with decompressed data for each requested revision.
1714 """
1714 """
1715 if not revs:
1715 if not revs:
1716 return []
1716 return []
1717 start = self.start
1717 start = self.start
1718 length = self.length
1718 length = self.length
1719 inline = self._inline
1719 inline = self._inline
1720 iosize = self._io.size
1720 iosize = self._io.size
1721 buffer = util.buffer
1721 buffer = util.buffer
1722
1722
1723 l = []
1723 l = []
1724 ladd = l.append
1724 ladd = l.append
1725
1725
1726 if not self._withsparseread:
1726 if not self._withsparseread:
1727 slicedchunks = (revs,)
1727 slicedchunks = (revs,)
1728 else:
1728 else:
1729 slicedchunks = deltautil.slicechunk(
1729 slicedchunks = deltautil.slicechunk(
1730 self, revs, targetsize=targetsize
1730 self, revs, targetsize=targetsize
1731 )
1731 )
1732
1732
1733 for revschunk in slicedchunks:
1733 for revschunk in slicedchunks:
1734 firstrev = revschunk[0]
1734 firstrev = revschunk[0]
1735 # Skip trailing revisions with empty diff
1735 # Skip trailing revisions with empty diff
1736 for lastrev in revschunk[::-1]:
1736 for lastrev in revschunk[::-1]:
1737 if length(lastrev) != 0:
1737 if length(lastrev) != 0:
1738 break
1738 break
1739
1739
1740 try:
1740 try:
1741 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1741 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1742 except OverflowError:
1742 except OverflowError:
1743 # issue4215 - we can't cache a run of chunks greater than
1743 # issue4215 - we can't cache a run of chunks greater than
1744 # 2G on Windows
1744 # 2G on Windows
1745 return [self._chunk(rev, df=df) for rev in revschunk]
1745 return [self._chunk(rev, df=df) for rev in revschunk]
1746
1746
1747 decomp = self.decompress
1747 decomp = self.decompress
1748 for rev in revschunk:
1748 for rev in revschunk:
1749 chunkstart = start(rev)
1749 chunkstart = start(rev)
1750 if inline:
1750 if inline:
1751 chunkstart += (rev + 1) * iosize
1751 chunkstart += (rev + 1) * iosize
1752 chunklength = length(rev)
1752 chunklength = length(rev)
1753 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1753 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1754
1754
1755 return l
1755 return l
1756
1756
1757 def _chunkclear(self):
1757 def _chunkclear(self):
1758 """Clear the raw chunk cache."""
1758 """Clear the raw chunk cache."""
1759 self._chunkcache = (0, b'')
1759 self._chunkcache = (0, b'')
1760
1760
1761 def deltaparent(self, rev):
1761 def deltaparent(self, rev):
1762 """return deltaparent of the given revision"""
1762 """return deltaparent of the given revision"""
1763 base = self.index[rev][3]
1763 base = self.index[rev][3]
1764 if base == rev:
1764 if base == rev:
1765 return nullrev
1765 return nullrev
1766 elif self._generaldelta:
1766 elif self._generaldelta:
1767 return base
1767 return base
1768 else:
1768 else:
1769 return rev - 1
1769 return rev - 1
1770
1770
1771 def issnapshot(self, rev):
1771 def issnapshot(self, rev):
1772 """tells whether rev is a snapshot
1772 """tells whether rev is a snapshot
1773 """
1773 """
1774 if not self._sparserevlog:
1774 if not self._sparserevlog:
1775 return self.deltaparent(rev) == nullrev
1775 return self.deltaparent(rev) == nullrev
1776 elif util.safehasattr(self.index, b'issnapshot'):
1776 elif util.safehasattr(self.index, b'issnapshot'):
1777 # directly assign the method to cache the testing and access
1777 # directly assign the method to cache the testing and access
1778 self.issnapshot = self.index.issnapshot
1778 self.issnapshot = self.index.issnapshot
1779 return self.issnapshot(rev)
1779 return self.issnapshot(rev)
1780 if rev == nullrev:
1780 if rev == nullrev:
1781 return True
1781 return True
1782 entry = self.index[rev]
1782 entry = self.index[rev]
1783 base = entry[3]
1783 base = entry[3]
1784 if base == rev:
1784 if base == rev:
1785 return True
1785 return True
1786 if base == nullrev:
1786 if base == nullrev:
1787 return True
1787 return True
1788 p1 = entry[5]
1788 p1 = entry[5]
1789 p2 = entry[6]
1789 p2 = entry[6]
1790 if base == p1 or base == p2:
1790 if base == p1 or base == p2:
1791 return False
1791 return False
1792 return self.issnapshot(base)
1792 return self.issnapshot(base)
1793
1793
1794 def snapshotdepth(self, rev):
1794 def snapshotdepth(self, rev):
1795 """number of snapshot in the chain before this one"""
1795 """number of snapshot in the chain before this one"""
1796 if not self.issnapshot(rev):
1796 if not self.issnapshot(rev):
1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1798 return len(self._deltachain(rev)[0]) - 1
1798 return len(self._deltachain(rev)[0]) - 1
1799
1799
1800 def revdiff(self, rev1, rev2):
1800 def revdiff(self, rev1, rev2):
1801 """return or calculate a delta between two revisions
1801 """return or calculate a delta between two revisions
1802
1802
1803 The delta calculated is in binary form and is intended to be written to
1803 The delta calculated is in binary form and is intended to be written to
1804 revlog data directly. So this function needs raw revision data.
1804 revlog data directly. So this function needs raw revision data.
1805 """
1805 """
1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1807 return bytes(self._chunk(rev2))
1807 return bytes(self._chunk(rev2))
1808
1808
1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1810
1810
1811 def _processflags(self, text, flags, operation, raw=False):
1811 def _processflags(self, text, flags, operation, raw=False):
1812 """deprecated entry point to access flag processors"""
1812 """deprecated entry point to access flag processors"""
1813 msg = b'_processflag(...) use the specialized variant'
1813 msg = b'_processflag(...) use the specialized variant'
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 if raw:
1815 if raw:
1816 return text, flagutil.processflagsraw(self, text, flags)
1816 return text, flagutil.processflagsraw(self, text, flags)
1817 elif operation == b'read':
1817 elif operation == b'read':
1818 return flagutil.processflagsread(self, text, flags)
1818 return flagutil.processflagsread(self, text, flags)
1819 else: # write operation
1819 else: # write operation
1820 return flagutil.processflagswrite(self, text, flags)
1820 return flagutil.processflagswrite(self, text, flags)
1821
1821
1822 def revision(self, nodeorrev, _df=None, raw=False):
1822 def revision(self, nodeorrev, _df=None, raw=False):
1823 """return an uncompressed revision of a given node or revision
1823 """return an uncompressed revision of a given node or revision
1824 number.
1824 number.
1825
1825
1826 _df - an existing file handle to read from. (internal-only)
1826 _df - an existing file handle to read from. (internal-only)
1827 raw - an optional argument specifying if the revision data is to be
1827 raw - an optional argument specifying if the revision data is to be
1828 treated as raw data when applying flag transforms. 'raw' should be set
1828 treated as raw data when applying flag transforms. 'raw' should be set
1829 to True when generating changegroups or in debug commands.
1829 to True when generating changegroups or in debug commands.
1830 """
1830 """
1831 if raw:
1831 if raw:
1832 msg = (
1832 msg = (
1833 b'revlog.revision(..., raw=True) is deprecated, '
1833 b'revlog.revision(..., raw=True) is deprecated, '
1834 b'use revlog.rawdata(...)'
1834 b'use revlog.rawdata(...)'
1835 )
1835 )
1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1838
1838
1839 def sidedata(self, nodeorrev, _df=None):
1839 def sidedata(self, nodeorrev, _df=None):
1840 """a map of extra data related to the changeset but not part of the hash
1840 """a map of extra data related to the changeset but not part of the hash
1841
1841
1842 This function currently return a dictionary. However, more advanced
1842 This function currently return a dictionary. However, more advanced
1843 mapping object will likely be used in the future for a more
1843 mapping object will likely be used in the future for a more
1844 efficient/lazy code.
1844 efficient/lazy code.
1845 """
1845 """
1846 return self._revisiondata(nodeorrev, _df)[1]
1846 return self._revisiondata(nodeorrev, _df)[1]
1847
1847
1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1849 # deal with <nodeorrev> argument type
1849 # deal with <nodeorrev> argument type
1850 if isinstance(nodeorrev, int):
1850 if isinstance(nodeorrev, int):
1851 rev = nodeorrev
1851 rev = nodeorrev
1852 node = self.node(rev)
1852 node = self.node(rev)
1853 else:
1853 else:
1854 node = nodeorrev
1854 node = nodeorrev
1855 rev = None
1855 rev = None
1856
1856
1857 # fast path the special `nullid` rev
1857 # fast path the special `nullid` rev
1858 if node == nullid:
1858 if node == nullid:
1859 return b"", {}
1859 return b"", {}
1860
1860
1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1862 # revision or might need to be processed to retrieve the revision.
1862 # revision or might need to be processed to retrieve the revision.
1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1864
1864
1865 if raw and validated:
1865 if raw and validated:
1866 # if we don't want to process the raw text and that raw
1866 # if we don't want to process the raw text and that raw
1867 # text is cached, we can exit early.
1867 # text is cached, we can exit early.
1868 return rawtext, {}
1868 return rawtext, {}
1869 if rev is None:
1869 if rev is None:
1870 rev = self.rev(node)
1870 rev = self.rev(node)
1871 # the revlog's flag for this revision
1871 # the revlog's flag for this revision
1872 # (usually alter its state or content)
1872 # (usually alter its state or content)
1873 flags = self.flags(rev)
1873 flags = self.flags(rev)
1874
1874
1875 if validated and flags == REVIDX_DEFAULT_FLAGS:
1875 if validated and flags == REVIDX_DEFAULT_FLAGS:
1876 # no extra flags set, no flag processor runs, text = rawtext
1876 # no extra flags set, no flag processor runs, text = rawtext
1877 return rawtext, {}
1877 return rawtext, {}
1878
1878
1879 sidedata = {}
1879 sidedata = {}
1880 if raw:
1880 if raw:
1881 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1881 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1882 text = rawtext
1882 text = rawtext
1883 else:
1883 else:
1884 try:
1884 try:
1885 r = flagutil.processflagsread(self, rawtext, flags)
1885 r = flagutil.processflagsread(self, rawtext, flags)
1886 except error.SidedataHashError as exc:
1886 except error.SidedataHashError as exc:
1887 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1887 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1888 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1888 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1889 raise error.RevlogError(msg)
1889 raise error.RevlogError(msg)
1890 text, validatehash, sidedata = r
1890 text, validatehash, sidedata = r
1891 if validatehash:
1891 if validatehash:
1892 self.checkhash(text, node, rev=rev)
1892 self.checkhash(text, node, rev=rev)
1893 if not validated:
1893 if not validated:
1894 self._revisioncache = (node, rev, rawtext)
1894 self._revisioncache = (node, rev, rawtext)
1895
1895
1896 return text, sidedata
1896 return text, sidedata
1897
1897
1898 def _rawtext(self, node, rev, _df=None):
1898 def _rawtext(self, node, rev, _df=None):
1899 """return the possibly unvalidated rawtext for a revision
1899 """return the possibly unvalidated rawtext for a revision
1900
1900
1901 returns (rev, rawtext, validated)
1901 returns (rev, rawtext, validated)
1902 """
1902 """
1903
1903
1904 # revision in the cache (could be useful to apply delta)
1904 # revision in the cache (could be useful to apply delta)
1905 cachedrev = None
1905 cachedrev = None
1906 # An intermediate text to apply deltas to
1906 # An intermediate text to apply deltas to
1907 basetext = None
1907 basetext = None
1908
1908
1909 # Check if we have the entry in cache
1909 # Check if we have the entry in cache
1910 # The cache entry looks like (node, rev, rawtext)
1910 # The cache entry looks like (node, rev, rawtext)
1911 if self._revisioncache:
1911 if self._revisioncache:
1912 if self._revisioncache[0] == node:
1912 if self._revisioncache[0] == node:
1913 return (rev, self._revisioncache[2], True)
1913 return (rev, self._revisioncache[2], True)
1914 cachedrev = self._revisioncache[1]
1914 cachedrev = self._revisioncache[1]
1915
1915
1916 if rev is None:
1916 if rev is None:
1917 rev = self.rev(node)
1917 rev = self.rev(node)
1918
1918
1919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1920 if stopped:
1920 if stopped:
1921 basetext = self._revisioncache[2]
1921 basetext = self._revisioncache[2]
1922
1922
1923 # drop cache to save memory, the caller is expected to
1923 # drop cache to save memory, the caller is expected to
1924 # update self._revisioncache after validating the text
1924 # update self._revisioncache after validating the text
1925 self._revisioncache = None
1925 self._revisioncache = None
1926
1926
1927 targetsize = None
1927 targetsize = None
1928 rawsize = self.index[rev][2]
1928 rawsize = self.index[rev][2]
1929 if 0 <= rawsize:
1929 if 0 <= rawsize:
1930 targetsize = 4 * rawsize
1930 targetsize = 4 * rawsize
1931
1931
1932 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1932 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1933 if basetext is None:
1933 if basetext is None:
1934 basetext = bytes(bins[0])
1934 basetext = bytes(bins[0])
1935 bins = bins[1:]
1935 bins = bins[1:]
1936
1936
1937 rawtext = mdiff.patches(basetext, bins)
1937 rawtext = mdiff.patches(basetext, bins)
1938 del basetext # let us have a chance to free memory early
1938 del basetext # let us have a chance to free memory early
1939 return (rev, rawtext, False)
1939 return (rev, rawtext, False)
1940
1940
1941 def rawdata(self, nodeorrev, _df=None):
1941 def rawdata(self, nodeorrev, _df=None):
1942 """return an uncompressed raw data of a given node or revision number.
1942 """return an uncompressed raw data of a given node or revision number.
1943
1943
1944 _df - an existing file handle to read from. (internal-only)
1944 _df - an existing file handle to read from. (internal-only)
1945 """
1945 """
1946 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1946 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1947
1947
1948 def hash(self, text, p1, p2):
1948 def hash(self, text, p1, p2):
1949 """Compute a node hash.
1949 """Compute a node hash.
1950
1950
1951 Available as a function so that subclasses can replace the hash
1951 Available as a function so that subclasses can replace the hash
1952 as needed.
1952 as needed.
1953 """
1953 """
1954 return storageutil.hashrevisionsha1(text, p1, p2)
1954 return storageutil.hashrevisionsha1(text, p1, p2)
1955
1955
1956 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1956 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1957 """Check node hash integrity.
1957 """Check node hash integrity.
1958
1958
1959 Available as a function so that subclasses can extend hash mismatch
1959 Available as a function so that subclasses can extend hash mismatch
1960 behaviors as needed.
1960 behaviors as needed.
1961 """
1961 """
1962 try:
1962 try:
1963 if p1 is None and p2 is None:
1963 if p1 is None and p2 is None:
1964 p1, p2 = self.parents(node)
1964 p1, p2 = self.parents(node)
1965 if node != self.hash(text, p1, p2):
1965 if node != self.hash(text, p1, p2):
1966 # Clear the revision cache on hash failure. The revision cache
1966 # Clear the revision cache on hash failure. The revision cache
1967 # only stores the raw revision and clearing the cache does have
1967 # only stores the raw revision and clearing the cache does have
1968 # the side-effect that we won't have a cache hit when the raw
1968 # the side-effect that we won't have a cache hit when the raw
1969 # revision data is accessed. But this case should be rare and
1969 # revision data is accessed. But this case should be rare and
1970 # it is extra work to teach the cache about the hash
1970 # it is extra work to teach the cache about the hash
1971 # verification state.
1971 # verification state.
1972 if self._revisioncache and self._revisioncache[0] == node:
1972 if self._revisioncache and self._revisioncache[0] == node:
1973 self._revisioncache = None
1973 self._revisioncache = None
1974
1974
1975 revornode = rev
1975 revornode = rev
1976 if revornode is None:
1976 if revornode is None:
1977 revornode = templatefilters.short(hex(node))
1977 revornode = templatefilters.short(hex(node))
1978 raise error.RevlogError(
1978 raise error.RevlogError(
1979 _(b"integrity check failed on %s:%s")
1979 _(b"integrity check failed on %s:%s")
1980 % (self.indexfile, pycompat.bytestr(revornode))
1980 % (self.indexfile, pycompat.bytestr(revornode))
1981 )
1981 )
1982 except error.RevlogError:
1982 except error.RevlogError:
1983 if self._censorable and storageutil.iscensoredtext(text):
1983 if self._censorable and storageutil.iscensoredtext(text):
1984 raise error.CensoredNodeError(self.indexfile, node, text)
1984 raise error.CensoredNodeError(self.indexfile, node, text)
1985 raise
1985 raise
1986
1986
1987 def _enforceinlinesize(self, tr, fp=None):
1987 def _enforceinlinesize(self, tr, fp=None):
1988 """Check if the revlog is too big for inline and convert if so.
1988 """Check if the revlog is too big for inline and convert if so.
1989
1989
1990 This should be called after revisions are added to the revlog. If the
1990 This should be called after revisions are added to the revlog. If the
1991 revlog has grown too large to be an inline revlog, it will convert it
1991 revlog has grown too large to be an inline revlog, it will convert it
1992 to use multiple index and data files.
1992 to use multiple index and data files.
1993 """
1993 """
1994 tiprev = len(self) - 1
1994 tiprev = len(self) - 1
1995 if (
1995 if (
1996 not self._inline
1996 not self._inline
1997 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1997 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1998 ):
1998 ):
1999 return
1999 return
2000
2000
2001 trinfo = tr.find(self.indexfile)
2001 trinfo = tr.find(self.indexfile)
2002 if trinfo is None:
2002 if trinfo is None:
2003 raise error.RevlogError(
2003 raise error.RevlogError(
2004 _(b"%s not found in the transaction") % self.indexfile
2004 _(b"%s not found in the transaction") % self.indexfile
2005 )
2005 )
2006
2006
2007 trindex = trinfo[2]
2007 trindex = trinfo[2]
2008 if trindex is not None:
2008 if trindex is not None:
2009 dataoff = self.start(trindex)
2009 dataoff = self.start(trindex)
2010 else:
2010 else:
2011 # revlog was stripped at start of transaction, use all leftover data
2011 # revlog was stripped at start of transaction, use all leftover data
2012 trindex = len(self) - 1
2012 trindex = len(self) - 1
2013 dataoff = self.end(tiprev)
2013 dataoff = self.end(tiprev)
2014
2014
2015 tr.add(self.datafile, dataoff)
2015 tr.add(self.datafile, dataoff)
2016
2016
2017 if fp:
2017 if fp:
2018 fp.flush()
2018 fp.flush()
2019 fp.close()
2019 fp.close()
2020 # We can't use the cached file handle after close(). So prevent
2020 # We can't use the cached file handle after close(). So prevent
2021 # its usage.
2021 # its usage.
2022 self._writinghandles = None
2022 self._writinghandles = None
2023
2023
2024 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2024 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2025 for r in self:
2025 for r in self:
2026 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2026 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2027
2027
2028 with self._indexfp(b'w') as fp:
2028 with self._indexfp(b'w') as fp:
2029 self.version &= ~FLAG_INLINE_DATA
2029 self.version &= ~FLAG_INLINE_DATA
2030 self._inline = False
2030 self._inline = False
2031 io = self._io
2031 io = self._io
2032 for i in self:
2032 for i in self:
2033 e = io.packentry(self.index[i], self.node, self.version, i)
2033 e = io.packentry(self.index[i], self.node, self.version, i)
2034 fp.write(e)
2034 fp.write(e)
2035
2035
2036 # the temp file replace the real index when we exit the context
2036 # the temp file replace the real index when we exit the context
2037 # manager
2037 # manager
2038
2038
2039 tr.replace(self.indexfile, trindex * self._io.size)
2039 tr.replace(self.indexfile, trindex * self._io.size)
2040 nodemaputil.setup_persistent_nodemap(tr, self)
2040 nodemaputil.setup_persistent_nodemap(tr, self)
2041 self._chunkclear()
2041 self._chunkclear()
2042
2042
2043 def _nodeduplicatecallback(self, transaction, node):
2043 def _nodeduplicatecallback(self, transaction, node):
2044 """called when trying to add a node already stored.
2044 """called when trying to add a node already stored.
2045 """
2045 """
2046
2046
2047 def addrevision(
2047 def addrevision(
2048 self,
2048 self,
2049 text,
2049 text,
2050 transaction,
2050 transaction,
2051 link,
2051 link,
2052 p1,
2052 p1,
2053 p2,
2053 p2,
2054 cachedelta=None,
2054 cachedelta=None,
2055 node=None,
2055 node=None,
2056 flags=REVIDX_DEFAULT_FLAGS,
2056 flags=REVIDX_DEFAULT_FLAGS,
2057 deltacomputer=None,
2057 deltacomputer=None,
2058 sidedata=None,
2058 sidedata=None,
2059 ):
2059 ):
2060 """add a revision to the log
2060 """add a revision to the log
2061
2061
2062 text - the revision data to add
2062 text - the revision data to add
2063 transaction - the transaction object used for rollback
2063 transaction - the transaction object used for rollback
2064 link - the linkrev data to add
2064 link - the linkrev data to add
2065 p1, p2 - the parent nodeids of the revision
2065 p1, p2 - the parent nodeids of the revision
2066 cachedelta - an optional precomputed delta
2066 cachedelta - an optional precomputed delta
2067 node - nodeid of revision; typically node is not specified, and it is
2067 node - nodeid of revision; typically node is not specified, and it is
2068 computed by default as hash(text, p1, p2), however subclasses might
2068 computed by default as hash(text, p1, p2), however subclasses might
2069 use different hashing method (and override checkhash() in such case)
2069 use different hashing method (and override checkhash() in such case)
2070 flags - the known flags to set on the revision
2070 flags - the known flags to set on the revision
2071 deltacomputer - an optional deltacomputer instance shared between
2071 deltacomputer - an optional deltacomputer instance shared between
2072 multiple calls
2072 multiple calls
2073 """
2073 """
2074 if link == nullrev:
2074 if link == nullrev:
2075 raise error.RevlogError(
2075 raise error.RevlogError(
2076 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2076 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2077 )
2077 )
2078
2078
2079 if sidedata is None:
2079 if sidedata is None:
2080 sidedata = {}
2080 sidedata = {}
2081 flags = flags & ~REVIDX_SIDEDATA
2081 flags = flags & ~REVIDX_SIDEDATA
2082 elif not self.hassidedata:
2082 elif not self.hassidedata:
2083 raise error.ProgrammingError(
2083 raise error.ProgrammingError(
2084 _(b"trying to add sidedata to a revlog who don't support them")
2084 _(b"trying to add sidedata to a revlog who don't support them")
2085 )
2085 )
2086 else:
2086 else:
2087 flags |= REVIDX_SIDEDATA
2087 flags |= REVIDX_SIDEDATA
2088
2088
2089 if flags:
2089 if flags:
2090 node = node or self.hash(text, p1, p2)
2090 node = node or self.hash(text, p1, p2)
2091
2091
2092 rawtext, validatehash = flagutil.processflagswrite(
2092 rawtext, validatehash = flagutil.processflagswrite(
2093 self, text, flags, sidedata=sidedata
2093 self, text, flags, sidedata=sidedata
2094 )
2094 )
2095
2095
2096 # If the flag processor modifies the revision data, ignore any provided
2096 # If the flag processor modifies the revision data, ignore any provided
2097 # cachedelta.
2097 # cachedelta.
2098 if rawtext != text:
2098 if rawtext != text:
2099 cachedelta = None
2099 cachedelta = None
2100
2100
2101 if len(rawtext) > _maxentrysize:
2101 if len(rawtext) > _maxentrysize:
2102 raise error.RevlogError(
2102 raise error.RevlogError(
2103 _(
2103 _(
2104 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2104 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2105 )
2105 )
2106 % (self.indexfile, len(rawtext))
2106 % (self.indexfile, len(rawtext))
2107 )
2107 )
2108
2108
2109 node = node or self.hash(rawtext, p1, p2)
2109 node = node or self.hash(rawtext, p1, p2)
2110 if self.index.has_node(node):
2110 if self.index.has_node(node):
2111 return node
2111 return node
2112
2112
2113 if validatehash:
2113 if validatehash:
2114 self.checkhash(rawtext, node, p1=p1, p2=p2)
2114 self.checkhash(rawtext, node, p1=p1, p2=p2)
2115
2115
2116 return self.addrawrevision(
2116 return self.addrawrevision(
2117 rawtext,
2117 rawtext,
2118 transaction,
2118 transaction,
2119 link,
2119 link,
2120 p1,
2120 p1,
2121 p2,
2121 p2,
2122 node,
2122 node,
2123 flags,
2123 flags,
2124 cachedelta=cachedelta,
2124 cachedelta=cachedelta,
2125 deltacomputer=deltacomputer,
2125 deltacomputer=deltacomputer,
2126 )
2126 )
2127
2127
2128 def addrawrevision(
2128 def addrawrevision(
2129 self,
2129 self,
2130 rawtext,
2130 rawtext,
2131 transaction,
2131 transaction,
2132 link,
2132 link,
2133 p1,
2133 p1,
2134 p2,
2134 p2,
2135 node,
2135 node,
2136 flags,
2136 flags,
2137 cachedelta=None,
2137 cachedelta=None,
2138 deltacomputer=None,
2138 deltacomputer=None,
2139 ):
2139 ):
2140 """add a raw revision with known flags, node and parents
2140 """add a raw revision with known flags, node and parents
2141 useful when reusing a revision not stored in this revlog (ex: received
2141 useful when reusing a revision not stored in this revlog (ex: received
2142 over wire, or read from an external bundle).
2142 over wire, or read from an external bundle).
2143 """
2143 """
2144 dfh = None
2144 dfh = None
2145 if not self._inline:
2145 if not self._inline:
2146 dfh = self._datafp(b"a+")
2146 dfh = self._datafp(b"a+")
2147 ifh = self._indexfp(b"a+")
2147 ifh = self._indexfp(b"a+")
2148 try:
2148 try:
2149 return self._addrevision(
2149 return self._addrevision(
2150 node,
2150 node,
2151 rawtext,
2151 rawtext,
2152 transaction,
2152 transaction,
2153 link,
2153 link,
2154 p1,
2154 p1,
2155 p2,
2155 p2,
2156 flags,
2156 flags,
2157 cachedelta,
2157 cachedelta,
2158 ifh,
2158 ifh,
2159 dfh,
2159 dfh,
2160 deltacomputer=deltacomputer,
2160 deltacomputer=deltacomputer,
2161 )
2161 )
2162 finally:
2162 finally:
2163 if dfh:
2163 if dfh:
2164 dfh.close()
2164 dfh.close()
2165 ifh.close()
2165 ifh.close()
2166
2166
2167 def compress(self, data):
2167 def compress(self, data):
2168 """Generate a possibly-compressed representation of data."""
2168 """Generate a possibly-compressed representation of data."""
2169 if not data:
2169 if not data:
2170 return b'', data
2170 return b'', data
2171
2171
2172 compressed = self._compressor.compress(data)
2172 compressed = self._compressor.compress(data)
2173
2173
2174 if compressed:
2174 if compressed:
2175 # The revlog compressor added the header in the returned data.
2175 # The revlog compressor added the header in the returned data.
2176 return b'', compressed
2176 return b'', compressed
2177
2177
2178 if data[0:1] == b'\0':
2178 if data[0:1] == b'\0':
2179 return b'', data
2179 return b'', data
2180 return b'u', data
2180 return b'u', data
2181
2181
2182 def decompress(self, data):
2182 def decompress(self, data):
2183 """Decompress a revlog chunk.
2183 """Decompress a revlog chunk.
2184
2184
2185 The chunk is expected to begin with a header identifying the
2185 The chunk is expected to begin with a header identifying the
2186 format type so it can be routed to an appropriate decompressor.
2186 format type so it can be routed to an appropriate decompressor.
2187 """
2187 """
2188 if not data:
2188 if not data:
2189 return data
2189 return data
2190
2190
2191 # Revlogs are read much more frequently than they are written and many
2191 # Revlogs are read much more frequently than they are written and many
2192 # chunks only take microseconds to decompress, so performance is
2192 # chunks only take microseconds to decompress, so performance is
2193 # important here.
2193 # important here.
2194 #
2194 #
2195 # We can make a few assumptions about revlogs:
2195 # We can make a few assumptions about revlogs:
2196 #
2196 #
2197 # 1) the majority of chunks will be compressed (as opposed to inline
2197 # 1) the majority of chunks will be compressed (as opposed to inline
2198 # raw data).
2198 # raw data).
2199 # 2) decompressing *any* data will likely by at least 10x slower than
2199 # 2) decompressing *any* data will likely by at least 10x slower than
2200 # returning raw inline data.
2200 # returning raw inline data.
2201 # 3) we want to prioritize common and officially supported compression
2201 # 3) we want to prioritize common and officially supported compression
2202 # engines
2202 # engines
2203 #
2203 #
2204 # It follows that we want to optimize for "decompress compressed data
2204 # It follows that we want to optimize for "decompress compressed data
2205 # when encoded with common and officially supported compression engines"
2205 # when encoded with common and officially supported compression engines"
2206 # case over "raw data" and "data encoded by less common or non-official
2206 # case over "raw data" and "data encoded by less common or non-official
2207 # compression engines." That is why we have the inline lookup first
2207 # compression engines." That is why we have the inline lookup first
2208 # followed by the compengines lookup.
2208 # followed by the compengines lookup.
2209 #
2209 #
2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 # compressed chunks. And this matters for changelog and manifest reads.
2211 # compressed chunks. And this matters for changelog and manifest reads.
2212 t = data[0:1]
2212 t = data[0:1]
2213
2213
2214 if t == b'x':
2214 if t == b'x':
2215 try:
2215 try:
2216 return _zlibdecompress(data)
2216 return _zlibdecompress(data)
2217 except zlib.error as e:
2217 except zlib.error as e:
2218 raise error.RevlogError(
2218 raise error.RevlogError(
2219 _(b'revlog decompress error: %s')
2219 _(b'revlog decompress error: %s')
2220 % stringutil.forcebytestr(e)
2220 % stringutil.forcebytestr(e)
2221 )
2221 )
2222 # '\0' is more common than 'u' so it goes first.
2222 # '\0' is more common than 'u' so it goes first.
2223 elif t == b'\0':
2223 elif t == b'\0':
2224 return data
2224 return data
2225 elif t == b'u':
2225 elif t == b'u':
2226 return util.buffer(data, 1)
2226 return util.buffer(data, 1)
2227
2227
2228 try:
2228 try:
2229 compressor = self._decompressors[t]
2229 compressor = self._decompressors[t]
2230 except KeyError:
2230 except KeyError:
2231 try:
2231 try:
2232 engine = util.compengines.forrevlogheader(t)
2232 engine = util.compengines.forrevlogheader(t)
2233 compressor = engine.revlogcompressor(self._compengineopts)
2233 compressor = engine.revlogcompressor(self._compengineopts)
2234 self._decompressors[t] = compressor
2234 self._decompressors[t] = compressor
2235 except KeyError:
2235 except KeyError:
2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237
2237
2238 return compressor.decompress(data)
2238 return compressor.decompress(data)
2239
2239
2240 def _addrevision(
2240 def _addrevision(
2241 self,
2241 self,
2242 node,
2242 node,
2243 rawtext,
2243 rawtext,
2244 transaction,
2244 transaction,
2245 link,
2245 link,
2246 p1,
2246 p1,
2247 p2,
2247 p2,
2248 flags,
2248 flags,
2249 cachedelta,
2249 cachedelta,
2250 ifh,
2250 ifh,
2251 dfh,
2251 dfh,
2252 alwayscache=False,
2252 alwayscache=False,
2253 deltacomputer=None,
2253 deltacomputer=None,
2254 ):
2254 ):
2255 """internal function to add revisions to the log
2255 """internal function to add revisions to the log
2256
2256
2257 see addrevision for argument descriptions.
2257 see addrevision for argument descriptions.
2258
2258
2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260
2260
2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 be used.
2262 be used.
2263
2263
2264 invariants:
2264 invariants:
2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 if both are set, they must correspond to each other.
2266 if both are set, they must correspond to each other.
2267 """
2267 """
2268 if node == nullid:
2268 if node == nullid:
2269 raise error.RevlogError(
2269 raise error.RevlogError(
2270 _(b"%s: attempt to add null revision") % self.indexfile
2270 _(b"%s: attempt to add null revision") % self.indexfile
2271 )
2271 )
2272 if node == wdirid or node in wdirfilenodeids:
2272 if node == wdirid or node in wdirfilenodeids:
2273 raise error.RevlogError(
2273 raise error.RevlogError(
2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 )
2275 )
2276
2276
2277 if self._inline:
2277 if self._inline:
2278 fh = ifh
2278 fh = ifh
2279 else:
2279 else:
2280 fh = dfh
2280 fh = dfh
2281
2281
2282 btext = [rawtext]
2282 btext = [rawtext]
2283
2283
2284 curr = len(self)
2284 curr = len(self)
2285 prev = curr - 1
2285 prev = curr - 1
2286 offset = self.end(prev)
2286 offset = self.end(prev)
2287 p1r, p2r = self.rev(p1), self.rev(p2)
2287 p1r, p2r = self.rev(p1), self.rev(p2)
2288
2288
2289 # full versions are inserted when the needed deltas
2289 # full versions are inserted when the needed deltas
2290 # become comparable to the uncompressed text
2290 # become comparable to the uncompressed text
2291 if rawtext is None:
2291 if rawtext is None:
2292 # need rawtext size, before changed by flag processors, which is
2292 # need rawtext size, before changed by flag processors, which is
2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 # logic that might remove metadata size.
2294 # logic that might remove metadata size.
2295 textlen = mdiff.patchedsize(
2295 textlen = mdiff.patchedsize(
2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 )
2297 )
2298 else:
2298 else:
2299 textlen = len(rawtext)
2299 textlen = len(rawtext)
2300
2300
2301 if deltacomputer is None:
2301 if deltacomputer is None:
2302 deltacomputer = deltautil.deltacomputer(self)
2302 deltacomputer = deltautil.deltacomputer(self)
2303
2303
2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305
2305
2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307
2307
2308 e = (
2308 e = (
2309 offset_type(offset, flags),
2309 offset_type(offset, flags),
2310 deltainfo.deltalen,
2310 deltainfo.deltalen,
2311 textlen,
2311 textlen,
2312 deltainfo.base,
2312 deltainfo.base,
2313 link,
2313 link,
2314 p1r,
2314 p1r,
2315 p2r,
2315 p2r,
2316 node,
2316 node,
2317 )
2317 )
2318 self.index.append(e)
2318 self.index.append(e)
2319
2319
2320 entry = self._io.packentry(e, self.node, self.version, curr)
2320 entry = self._io.packentry(e, self.node, self.version, curr)
2321 self._writeentry(
2321 self._writeentry(
2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 )
2323 )
2324
2324
2325 rawtext = btext[0]
2325 rawtext = btext[0]
2326
2326
2327 if alwayscache and rawtext is None:
2327 if alwayscache and rawtext is None:
2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2329
2329
2330 if type(rawtext) == bytes: # only accept immutable objects
2330 if type(rawtext) == bytes: # only accept immutable objects
2331 self._revisioncache = (node, curr, rawtext)
2331 self._revisioncache = (node, curr, rawtext)
2332 self._chainbasecache[curr] = deltainfo.chainbase
2332 self._chainbasecache[curr] = deltainfo.chainbase
2333 return node
2333 return node
2334
2334
2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 # Files opened in a+ mode have inconsistent behavior on various
2336 # Files opened in a+ mode have inconsistent behavior on various
2337 # platforms. Windows requires that a file positioning call be made
2337 # platforms. Windows requires that a file positioning call be made
2338 # when the file handle transitions between reads and writes. See
2338 # when the file handle transitions between reads and writes. See
2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 # platforms, Python or the platform itself can be buggy. Some versions
2340 # platforms, Python or the platform itself can be buggy. Some versions
2341 # of Solaris have been observed to not append at the end of the file
2341 # of Solaris have been observed to not append at the end of the file
2342 # if the file was seeked to before the end. See issue4943 for more.
2342 # if the file was seeked to before the end. See issue4943 for more.
2343 #
2343 #
2344 # We work around this issue by inserting a seek() before writing.
2344 # We work around this issue by inserting a seek() before writing.
2345 # Note: This is likely not necessary on Python 3. However, because
2345 # Note: This is likely not necessary on Python 3. However, because
2346 # the file handle is reused for reads and may be seeked there, we need
2346 # the file handle is reused for reads and may be seeked there, we need
2347 # to be careful before changing this.
2347 # to be careful before changing this.
2348 ifh.seek(0, os.SEEK_END)
2348 ifh.seek(0, os.SEEK_END)
2349 if dfh:
2349 if dfh:
2350 dfh.seek(0, os.SEEK_END)
2350 dfh.seek(0, os.SEEK_END)
2351
2351
2352 curr = len(self) - 1
2352 curr = len(self) - 1
2353 if not self._inline:
2353 if not self._inline:
2354 transaction.add(self.datafile, offset)
2354 transaction.add(self.datafile, offset)
2355 transaction.add(self.indexfile, curr * len(entry))
2355 transaction.add(self.indexfile, curr * len(entry))
2356 if data[0]:
2356 if data[0]:
2357 dfh.write(data[0])
2357 dfh.write(data[0])
2358 dfh.write(data[1])
2358 dfh.write(data[1])
2359 ifh.write(entry)
2359 ifh.write(entry)
2360 else:
2360 else:
2361 offset += curr * self._io.size
2361 offset += curr * self._io.size
2362 transaction.add(self.indexfile, offset, curr)
2362 transaction.add(self.indexfile, offset, curr)
2363 ifh.write(entry)
2363 ifh.write(entry)
2364 ifh.write(data[0])
2364 ifh.write(data[0])
2365 ifh.write(data[1])
2365 ifh.write(data[1])
2366 self._enforceinlinesize(transaction, ifh)
2366 self._enforceinlinesize(transaction, ifh)
2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2368
2368
2369 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2369 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2370 """
2370 """
2371 add a delta group
2371 add a delta group
2372
2372
2373 given a set of deltas, add them to the revision log. the
2373 given a set of deltas, add them to the revision log. the
2374 first delta is against its parent, which should be in our
2374 first delta is against its parent, which should be in our
2375 log, the rest are against the previous delta.
2375 log, the rest are against the previous delta.
2376
2376
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 this revlog and the node that was added.
2378 this revlog and the node that was added.
2379 """
2379 """
2380
2380
2381 if self._writinghandles:
2381 if self._writinghandles:
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383
2383
2384 nodes = []
2384 nodes = []
2385
2385
2386 r = len(self)
2386 r = len(self)
2387 end = 0
2387 end = 0
2388 if r:
2388 if r:
2389 end = self.end(r - 1)
2389 end = self.end(r - 1)
2390 ifh = self._indexfp(b"a+")
2390 ifh = self._indexfp(b"a+")
2391 isize = r * self._io.size
2391 isize = r * self._io.size
2392 if self._inline:
2392 if self._inline:
2393 transaction.add(self.indexfile, end + isize, r)
2393 transaction.add(self.indexfile, end + isize, r)
2394 dfh = None
2394 dfh = None
2395 else:
2395 else:
2396 transaction.add(self.indexfile, isize, r)
2396 transaction.add(self.indexfile, isize, r)
2397 transaction.add(self.datafile, end)
2397 transaction.add(self.datafile, end)
2398 dfh = self._datafp(b"a+")
2398 dfh = self._datafp(b"a+")
2399
2399
2400 def flush():
2400 def flush():
2401 if dfh:
2401 if dfh:
2402 dfh.flush()
2402 dfh.flush()
2403 ifh.flush()
2403 ifh.flush()
2404
2404
2405 self._writinghandles = (ifh, dfh)
2405 self._writinghandles = (ifh, dfh)
2406
2406
2407 try:
2407 try:
2408 deltacomputer = deltautil.deltacomputer(self)
2408 deltacomputer = deltautil.deltacomputer(self)
2409 # loop through our set of deltas
2409 # loop through our set of deltas
2410 for data in deltas:
2410 for data in deltas:
2411 node, p1, p2, linknode, deltabase, delta, flags = data
2411 node, p1, p2, linknode, deltabase, delta, flags = data
2412 link = linkmapper(linknode)
2412 link = linkmapper(linknode)
2413 flags = flags or REVIDX_DEFAULT_FLAGS
2413 flags = flags or REVIDX_DEFAULT_FLAGS
2414
2414
2415 nodes.append(node)
2415 nodes.append(node)
2416
2416
2417 if self.index.has_node(node):
2417 if self.index.has_node(node):
2418 self._nodeduplicatecallback(transaction, node)
2418 self._nodeduplicatecallback(transaction, node)
2419 # this can happen if two branches make the same change
2419 # this can happen if two branches make the same change
2420 continue
2420 continue
2421
2421
2422 for p in (p1, p2):
2422 for p in (p1, p2):
2423 if not self.index.has_node(p):
2423 if not self.index.has_node(p):
2424 raise error.LookupError(
2424 raise error.LookupError(
2425 p, self.indexfile, _(b'unknown parent')
2425 p, self.indexfile, _(b'unknown parent')
2426 )
2426 )
2427
2427
2428 if not self.index.has_node(deltabase):
2428 if not self.index.has_node(deltabase):
2429 raise error.LookupError(
2429 raise error.LookupError(
2430 deltabase, self.indexfile, _(b'unknown delta base')
2430 deltabase, self.indexfile, _(b'unknown delta base')
2431 )
2431 )
2432
2432
2433 baserev = self.rev(deltabase)
2433 baserev = self.rev(deltabase)
2434
2434
2435 if baserev != nullrev and self.iscensored(baserev):
2435 if baserev != nullrev and self.iscensored(baserev):
2436 # if base is censored, delta must be full replacement in a
2436 # if base is censored, delta must be full replacement in a
2437 # single patch operation
2437 # single patch operation
2438 hlen = struct.calcsize(b">lll")
2438 hlen = struct.calcsize(b">lll")
2439 oldlen = self.rawsize(baserev)
2439 oldlen = self.rawsize(baserev)
2440 newlen = len(delta) - hlen
2440 newlen = len(delta) - hlen
2441 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2441 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 raise error.CensoredBaseError(
2442 raise error.CensoredBaseError(
2443 self.indexfile, self.node(baserev)
2443 self.indexfile, self.node(baserev)
2444 )
2444 )
2445
2445
2446 if not flags and self._peek_iscensored(baserev, delta, flush):
2446 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 flags |= REVIDX_ISCENSORED
2447 flags |= REVIDX_ISCENSORED
2448
2448
2449 # We assume consumers of addrevisioncb will want to retrieve
2449 # We assume consumers of addrevisioncb will want to retrieve
2450 # the added revision, which will require a call to
2450 # the added revision, which will require a call to
2451 # revision(). revision() will fast path if there is a cache
2451 # revision(). revision() will fast path if there is a cache
2452 # hit. So, we tell _addrevision() to always cache in this case.
2452 # hit. So, we tell _addrevision() to always cache in this case.
2453 # We're only using addgroup() in the context of changegroup
2453 # We're only using addgroup() in the context of changegroup
2454 # generation so the revision data can always be handled as raw
2454 # generation so the revision data can always be handled as raw
2455 # by the flagprocessor.
2455 # by the flagprocessor.
2456 self._addrevision(
2456 self._addrevision(
2457 node,
2457 node,
2458 None,
2458 None,
2459 transaction,
2459 transaction,
2460 link,
2460 link,
2461 p1,
2461 p1,
2462 p2,
2462 p2,
2463 flags,
2463 flags,
2464 (baserev, delta),
2464 (baserev, delta),
2465 ifh,
2465 ifh,
2466 dfh,
2466 dfh,
2467 alwayscache=bool(addrevisioncb),
2467 alwayscache=bool(addrevisioncb),
2468 deltacomputer=deltacomputer,
2468 deltacomputer=deltacomputer,
2469 )
2469 )
2470
2470
2471 if addrevisioncb:
2471 if addrevisioncb:
2472 addrevisioncb(self, node)
2472 addrevisioncb(self, node)
2473
2473
2474 if not dfh and not self._inline:
2474 if not dfh and not self._inline:
2475 # addrevision switched from inline to conventional
2475 # addrevision switched from inline to conventional
2476 # reopen the index
2476 # reopen the index
2477 ifh.close()
2477 ifh.close()
2478 dfh = self._datafp(b"a+")
2478 dfh = self._datafp(b"a+")
2479 ifh = self._indexfp(b"a+")
2479 ifh = self._indexfp(b"a+")
2480 self._writinghandles = (ifh, dfh)
2480 self._writinghandles = (ifh, dfh)
2481 finally:
2481 finally:
2482 self._writinghandles = None
2482 self._writinghandles = None
2483
2483
2484 if dfh:
2484 if dfh:
2485 dfh.close()
2485 dfh.close()
2486 ifh.close()
2486 ifh.close()
2487
2487
2488 return nodes
2488 return nodes
2489
2489
2490 def iscensored(self, rev):
2490 def iscensored(self, rev):
2491 """Check if a file revision is censored."""
2491 """Check if a file revision is censored."""
2492 if not self._censorable:
2492 if not self._censorable:
2493 return False
2493 return False
2494
2494
2495 return self.flags(rev) & REVIDX_ISCENSORED
2495 return self.flags(rev) & REVIDX_ISCENSORED
2496
2496
2497 def _peek_iscensored(self, baserev, delta, flush):
2497 def _peek_iscensored(self, baserev, delta, flush):
2498 """Quickly check if a delta produces a censored revision."""
2498 """Quickly check if a delta produces a censored revision."""
2499 if not self._censorable:
2499 if not self._censorable:
2500 return False
2500 return False
2501
2501
2502 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2502 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2503
2503
2504 def getstrippoint(self, minlink):
2504 def getstrippoint(self, minlink):
2505 """find the minimum rev that must be stripped to strip the linkrev
2505 """find the minimum rev that must be stripped to strip the linkrev
2506
2506
2507 Returns a tuple containing the minimum rev and a set of all revs that
2507 Returns a tuple containing the minimum rev and a set of all revs that
2508 have linkrevs that will be broken by this strip.
2508 have linkrevs that will be broken by this strip.
2509 """
2509 """
2510 return storageutil.resolvestripinfo(
2510 return storageutil.resolvestripinfo(
2511 minlink,
2511 minlink,
2512 len(self) - 1,
2512 len(self) - 1,
2513 self.headrevs(),
2513 self.headrevs(),
2514 self.linkrev,
2514 self.linkrev,
2515 self.parentrevs,
2515 self.parentrevs,
2516 )
2516 )
2517
2517
2518 def strip(self, minlink, transaction):
2518 def strip(self, minlink, transaction):
2519 """truncate the revlog on the first revision with a linkrev >= minlink
2519 """truncate the revlog on the first revision with a linkrev >= minlink
2520
2520
2521 This function is called when we're stripping revision minlink and
2521 This function is called when we're stripping revision minlink and
2522 its descendants from the repository.
2522 its descendants from the repository.
2523
2523
2524 We have to remove all revisions with linkrev >= minlink, because
2524 We have to remove all revisions with linkrev >= minlink, because
2525 the equivalent changelog revisions will be renumbered after the
2525 the equivalent changelog revisions will be renumbered after the
2526 strip.
2526 strip.
2527
2527
2528 So we truncate the revlog on the first of these revisions, and
2528 So we truncate the revlog on the first of these revisions, and
2529 trust that the caller has saved the revisions that shouldn't be
2529 trust that the caller has saved the revisions that shouldn't be
2530 removed and that it'll re-add them after this truncation.
2530 removed and that it'll re-add them after this truncation.
2531 """
2531 """
2532 if len(self) == 0:
2532 if len(self) == 0:
2533 return
2533 return
2534
2534
2535 rev, _ = self.getstrippoint(minlink)
2535 rev, _ = self.getstrippoint(minlink)
2536 if rev == len(self):
2536 if rev == len(self):
2537 return
2537 return
2538
2538
2539 # first truncate the files on disk
2539 # first truncate the files on disk
2540 end = self.start(rev)
2540 end = self.start(rev)
2541 if not self._inline:
2541 if not self._inline:
2542 transaction.add(self.datafile, end)
2542 transaction.add(self.datafile, end)
2543 end = rev * self._io.size
2543 end = rev * self._io.size
2544 else:
2544 else:
2545 end += rev * self._io.size
2545 end += rev * self._io.size
2546
2546
2547 transaction.add(self.indexfile, end)
2547 transaction.add(self.indexfile, end)
2548
2548
2549 # then reset internal state in memory to forget those revisions
2549 # then reset internal state in memory to forget those revisions
2550 self._revisioncache = None
2550 self._revisioncache = None
2551 self._chaininfocache = {}
2551 self._chaininfocache = {}
2552 self._chunkclear()
2552 self._chunkclear()
2553
2553
2554 del self.index[rev:-1]
2554 del self.index[rev:-1]
2555
2555
2556 def checksize(self):
2556 def checksize(self):
2557 """Check size of index and data files
2557 """Check size of index and data files
2558
2558
2559 return a (dd, di) tuple.
2559 return a (dd, di) tuple.
2560 - dd: extra bytes for the "data" file
2560 - dd: extra bytes for the "data" file
2561 - di: extra bytes for the "index" file
2561 - di: extra bytes for the "index" file
2562
2562
2563 A healthy revlog will return (0, 0).
2563 A healthy revlog will return (0, 0).
2564 """
2564 """
2565 expected = 0
2565 expected = 0
2566 if len(self):
2566 if len(self):
2567 expected = max(0, self.end(len(self) - 1))
2567 expected = max(0, self.end(len(self) - 1))
2568
2568
2569 try:
2569 try:
2570 with self._datafp() as f:
2570 with self._datafp() as f:
2571 f.seek(0, io.SEEK_END)
2571 f.seek(0, io.SEEK_END)
2572 actual = f.tell()
2572 actual = f.tell()
2573 dd = actual - expected
2573 dd = actual - expected
2574 except IOError as inst:
2574 except IOError as inst:
2575 if inst.errno != errno.ENOENT:
2575 if inst.errno != errno.ENOENT:
2576 raise
2576 raise
2577 dd = 0
2577 dd = 0
2578
2578
2579 try:
2579 try:
2580 f = self.opener(self.indexfile)
2580 f = self.opener(self.indexfile)
2581 f.seek(0, io.SEEK_END)
2581 f.seek(0, io.SEEK_END)
2582 actual = f.tell()
2582 actual = f.tell()
2583 f.close()
2583 f.close()
2584 s = self._io.size
2584 s = self._io.size
2585 i = max(0, actual // s)
2585 i = max(0, actual // s)
2586 di = actual - (i * s)
2586 di = actual - (i * s)
2587 if self._inline:
2587 if self._inline:
2588 databytes = 0
2588 databytes = 0
2589 for r in self:
2589 for r in self:
2590 databytes += max(0, self.length(r))
2590 databytes += max(0, self.length(r))
2591 dd = 0
2591 dd = 0
2592 di = actual - len(self) * s - databytes
2592 di = actual - len(self) * s - databytes
2593 except IOError as inst:
2593 except IOError as inst:
2594 if inst.errno != errno.ENOENT:
2594 if inst.errno != errno.ENOENT:
2595 raise
2595 raise
2596 di = 0
2596 di = 0
2597
2597
2598 return (dd, di)
2598 return (dd, di)
2599
2599
2600 def files(self):
2600 def files(self):
2601 res = [self.indexfile]
2601 res = [self.indexfile]
2602 if not self._inline:
2602 if not self._inline:
2603 res.append(self.datafile)
2603 res.append(self.datafile)
2604 return res
2604 return res
2605
2605
2606 def emitrevisions(
2606 def emitrevisions(
2607 self,
2607 self,
2608 nodes,
2608 nodes,
2609 nodesorder=None,
2609 nodesorder=None,
2610 revisiondata=False,
2610 revisiondata=False,
2611 assumehaveparentrevisions=False,
2611 assumehaveparentrevisions=False,
2612 deltamode=repository.CG_DELTAMODE_STD,
2612 deltamode=repository.CG_DELTAMODE_STD,
2613 ):
2613 ):
2614 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2614 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2615 raise error.ProgrammingError(
2615 raise error.ProgrammingError(
2616 b'unhandled value for nodesorder: %s' % nodesorder
2616 b'unhandled value for nodesorder: %s' % nodesorder
2617 )
2617 )
2618
2618
2619 if nodesorder is None and not self._generaldelta:
2619 if nodesorder is None and not self._generaldelta:
2620 nodesorder = b'storage'
2620 nodesorder = b'storage'
2621
2621
2622 if (
2622 if (
2623 not self._storedeltachains
2623 not self._storedeltachains
2624 and deltamode != repository.CG_DELTAMODE_PREV
2624 and deltamode != repository.CG_DELTAMODE_PREV
2625 ):
2625 ):
2626 deltamode = repository.CG_DELTAMODE_FULL
2626 deltamode = repository.CG_DELTAMODE_FULL
2627
2627
2628 return storageutil.emitrevisions(
2628 return storageutil.emitrevisions(
2629 self,
2629 self,
2630 nodes,
2630 nodes,
2631 nodesorder,
2631 nodesorder,
2632 revlogrevisiondelta,
2632 revlogrevisiondelta,
2633 deltaparentfn=self.deltaparent,
2633 deltaparentfn=self.deltaparent,
2634 candeltafn=self.candelta,
2634 candeltafn=self.candelta,
2635 rawsizefn=self.rawsize,
2635 rawsizefn=self.rawsize,
2636 revdifffn=self.revdiff,
2636 revdifffn=self.revdiff,
2637 flagsfn=self.flags,
2637 flagsfn=self.flags,
2638 deltamode=deltamode,
2638 deltamode=deltamode,
2639 revisiondata=revisiondata,
2639 revisiondata=revisiondata,
2640 assumehaveparentrevisions=assumehaveparentrevisions,
2640 assumehaveparentrevisions=assumehaveparentrevisions,
2641 )
2641 )
2642
2642
2643 DELTAREUSEALWAYS = b'always'
2643 DELTAREUSEALWAYS = b'always'
2644 DELTAREUSESAMEREVS = b'samerevs'
2644 DELTAREUSESAMEREVS = b'samerevs'
2645 DELTAREUSENEVER = b'never'
2645 DELTAREUSENEVER = b'never'
2646
2646
2647 DELTAREUSEFULLADD = b'fulladd'
2647 DELTAREUSEFULLADD = b'fulladd'
2648
2648
2649 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2649 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2650
2650
2651 def clone(
2651 def clone(
2652 self,
2652 self,
2653 tr,
2653 tr,
2654 destrevlog,
2654 destrevlog,
2655 addrevisioncb=None,
2655 addrevisioncb=None,
2656 deltareuse=DELTAREUSESAMEREVS,
2656 deltareuse=DELTAREUSESAMEREVS,
2657 forcedeltabothparents=None,
2657 forcedeltabothparents=None,
2658 sidedatacompanion=None,
2658 sidedatacompanion=None,
2659 ):
2659 ):
2660 """Copy this revlog to another, possibly with format changes.
2660 """Copy this revlog to another, possibly with format changes.
2661
2661
2662 The destination revlog will contain the same revisions and nodes.
2662 The destination revlog will contain the same revisions and nodes.
2663 However, it may not be bit-for-bit identical due to e.g. delta encoding
2663 However, it may not be bit-for-bit identical due to e.g. delta encoding
2664 differences.
2664 differences.
2665
2665
2666 The ``deltareuse`` argument control how deltas from the existing revlog
2666 The ``deltareuse`` argument control how deltas from the existing revlog
2667 are preserved in the destination revlog. The argument can have the
2667 are preserved in the destination revlog. The argument can have the
2668 following values:
2668 following values:
2669
2669
2670 DELTAREUSEALWAYS
2670 DELTAREUSEALWAYS
2671 Deltas will always be reused (if possible), even if the destination
2671 Deltas will always be reused (if possible), even if the destination
2672 revlog would not select the same revisions for the delta. This is the
2672 revlog would not select the same revisions for the delta. This is the
2673 fastest mode of operation.
2673 fastest mode of operation.
2674 DELTAREUSESAMEREVS
2674 DELTAREUSESAMEREVS
2675 Deltas will be reused if the destination revlog would pick the same
2675 Deltas will be reused if the destination revlog would pick the same
2676 revisions for the delta. This mode strikes a balance between speed
2676 revisions for the delta. This mode strikes a balance between speed
2677 and optimization.
2677 and optimization.
2678 DELTAREUSENEVER
2678 DELTAREUSENEVER
2679 Deltas will never be reused. This is the slowest mode of execution.
2679 Deltas will never be reused. This is the slowest mode of execution.
2680 This mode can be used to recompute deltas (e.g. if the diff/delta
2680 This mode can be used to recompute deltas (e.g. if the diff/delta
2681 algorithm changes).
2681 algorithm changes).
2682 DELTAREUSEFULLADD
2682 DELTAREUSEFULLADD
2683 Revision will be re-added as if their were new content. This is
2683 Revision will be re-added as if their were new content. This is
2684 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2684 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2685 eg: large file detection and handling.
2685 eg: large file detection and handling.
2686
2686
2687 Delta computation can be slow, so the choice of delta reuse policy can
2687 Delta computation can be slow, so the choice of delta reuse policy can
2688 significantly affect run time.
2688 significantly affect run time.
2689
2689
2690 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2690 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2691 two extremes. Deltas will be reused if they are appropriate. But if the
2691 two extremes. Deltas will be reused if they are appropriate. But if the
2692 delta could choose a better revision, it will do so. This means if you
2692 delta could choose a better revision, it will do so. This means if you
2693 are converting a non-generaldelta revlog to a generaldelta revlog,
2693 are converting a non-generaldelta revlog to a generaldelta revlog,
2694 deltas will be recomputed if the delta's parent isn't a parent of the
2694 deltas will be recomputed if the delta's parent isn't a parent of the
2695 revision.
2695 revision.
2696
2696
2697 In addition to the delta policy, the ``forcedeltabothparents``
2697 In addition to the delta policy, the ``forcedeltabothparents``
2698 argument controls whether to force compute deltas against both parents
2698 argument controls whether to force compute deltas against both parents
2699 for merges. By default, the current default is used.
2699 for merges. By default, the current default is used.
2700
2700
2701 If not None, the `sidedatacompanion` is callable that accept two
2701 If not None, the `sidedatacompanion` is callable that accept two
2702 arguments:
2702 arguments:
2703
2703
2704 (srcrevlog, rev)
2704 (srcrevlog, rev)
2705
2705
2706 and return a triplet that control changes to sidedata content from the
2706 and return a triplet that control changes to sidedata content from the
2707 old revision to the new clone result:
2707 old revision to the new clone result:
2708
2708
2709 (dropall, filterout, update)
2709 (dropall, filterout, update)
2710
2710
2711 * if `dropall` is True, all sidedata should be dropped
2711 * if `dropall` is True, all sidedata should be dropped
2712 * `filterout` is a set of sidedata keys that should be dropped
2712 * `filterout` is a set of sidedata keys that should be dropped
2713 * `update` is a mapping of additionnal/new key -> value
2713 * `update` is a mapping of additionnal/new key -> value
2714 """
2714 """
2715 if deltareuse not in self.DELTAREUSEALL:
2715 if deltareuse not in self.DELTAREUSEALL:
2716 raise ValueError(
2716 raise ValueError(
2717 _(b'value for deltareuse invalid: %s') % deltareuse
2717 _(b'value for deltareuse invalid: %s') % deltareuse
2718 )
2718 )
2719
2719
2720 if len(destrevlog):
2720 if len(destrevlog):
2721 raise ValueError(_(b'destination revlog is not empty'))
2721 raise ValueError(_(b'destination revlog is not empty'))
2722
2722
2723 if getattr(self, 'filteredrevs', None):
2723 if getattr(self, 'filteredrevs', None):
2724 raise ValueError(_(b'source revlog has filtered revisions'))
2724 raise ValueError(_(b'source revlog has filtered revisions'))
2725 if getattr(destrevlog, 'filteredrevs', None):
2725 if getattr(destrevlog, 'filteredrevs', None):
2726 raise ValueError(_(b'destination revlog has filtered revisions'))
2726 raise ValueError(_(b'destination revlog has filtered revisions'))
2727
2727
2728 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2728 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2729 # if possible.
2729 # if possible.
2730 oldlazydelta = destrevlog._lazydelta
2730 oldlazydelta = destrevlog._lazydelta
2731 oldlazydeltabase = destrevlog._lazydeltabase
2731 oldlazydeltabase = destrevlog._lazydeltabase
2732 oldamd = destrevlog._deltabothparents
2732 oldamd = destrevlog._deltabothparents
2733
2733
2734 try:
2734 try:
2735 if deltareuse == self.DELTAREUSEALWAYS:
2735 if deltareuse == self.DELTAREUSEALWAYS:
2736 destrevlog._lazydeltabase = True
2736 destrevlog._lazydeltabase = True
2737 destrevlog._lazydelta = True
2737 destrevlog._lazydelta = True
2738 elif deltareuse == self.DELTAREUSESAMEREVS:
2738 elif deltareuse == self.DELTAREUSESAMEREVS:
2739 destrevlog._lazydeltabase = False
2739 destrevlog._lazydeltabase = False
2740 destrevlog._lazydelta = True
2740 destrevlog._lazydelta = True
2741 elif deltareuse == self.DELTAREUSENEVER:
2741 elif deltareuse == self.DELTAREUSENEVER:
2742 destrevlog._lazydeltabase = False
2742 destrevlog._lazydeltabase = False
2743 destrevlog._lazydelta = False
2743 destrevlog._lazydelta = False
2744
2744
2745 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2745 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2746
2746
2747 self._clone(
2747 self._clone(
2748 tr,
2748 tr,
2749 destrevlog,
2749 destrevlog,
2750 addrevisioncb,
2750 addrevisioncb,
2751 deltareuse,
2751 deltareuse,
2752 forcedeltabothparents,
2752 forcedeltabothparents,
2753 sidedatacompanion,
2753 sidedatacompanion,
2754 )
2754 )
2755
2755
2756 finally:
2756 finally:
2757 destrevlog._lazydelta = oldlazydelta
2757 destrevlog._lazydelta = oldlazydelta
2758 destrevlog._lazydeltabase = oldlazydeltabase
2758 destrevlog._lazydeltabase = oldlazydeltabase
2759 destrevlog._deltabothparents = oldamd
2759 destrevlog._deltabothparents = oldamd
2760
2760
2761 def _clone(
2761 def _clone(
2762 self,
2762 self,
2763 tr,
2763 tr,
2764 destrevlog,
2764 destrevlog,
2765 addrevisioncb,
2765 addrevisioncb,
2766 deltareuse,
2766 deltareuse,
2767 forcedeltabothparents,
2767 forcedeltabothparents,
2768 sidedatacompanion,
2768 sidedatacompanion,
2769 ):
2769 ):
2770 """perform the core duty of `revlog.clone` after parameter processing"""
2770 """perform the core duty of `revlog.clone` after parameter processing"""
2771 deltacomputer = deltautil.deltacomputer(destrevlog)
2771 deltacomputer = deltautil.deltacomputer(destrevlog)
2772 index = self.index
2772 index = self.index
2773 for rev in self:
2773 for rev in self:
2774 entry = index[rev]
2774 entry = index[rev]
2775
2775
2776 # Some classes override linkrev to take filtered revs into
2776 # Some classes override linkrev to take filtered revs into
2777 # account. Use raw entry from index.
2777 # account. Use raw entry from index.
2778 flags = entry[0] & 0xFFFF
2778 flags = entry[0] & 0xFFFF
2779 linkrev = entry[4]
2779 linkrev = entry[4]
2780 p1 = index[entry[5]][7]
2780 p1 = index[entry[5]][7]
2781 p2 = index[entry[6]][7]
2781 p2 = index[entry[6]][7]
2782 node = entry[7]
2782 node = entry[7]
2783
2783
2784 sidedataactions = (False, [], {})
2784 sidedataactions = (False, [], {})
2785 if sidedatacompanion is not None:
2785 if sidedatacompanion is not None:
2786 sidedataactions = sidedatacompanion(self, rev)
2786 sidedataactions = sidedatacompanion(self, rev)
2787
2787
2788 # (Possibly) reuse the delta from the revlog if allowed and
2788 # (Possibly) reuse the delta from the revlog if allowed and
2789 # the revlog chunk is a delta.
2789 # the revlog chunk is a delta.
2790 cachedelta = None
2790 cachedelta = None
2791 rawtext = None
2791 rawtext = None
2792 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2792 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2793 dropall, filterout, update = sidedataactions
2793 dropall, filterout, update = sidedataactions
2794 text, sidedata = self._revisiondata(rev)
2794 text, sidedata = self._revisiondata(rev)
2795 if dropall:
2795 if dropall:
2796 sidedata = {}
2796 sidedata = {}
2797 for key in filterout:
2797 for key in filterout:
2798 sidedata.pop(key, None)
2798 sidedata.pop(key, None)
2799 sidedata.update(update)
2799 sidedata.update(update)
2800 if not sidedata:
2800 if not sidedata:
2801 sidedata = None
2801 sidedata = None
2802 destrevlog.addrevision(
2802 destrevlog.addrevision(
2803 text,
2803 text,
2804 tr,
2804 tr,
2805 linkrev,
2805 linkrev,
2806 p1,
2806 p1,
2807 p2,
2807 p2,
2808 cachedelta=cachedelta,
2808 cachedelta=cachedelta,
2809 node=node,
2809 node=node,
2810 flags=flags,
2810 flags=flags,
2811 deltacomputer=deltacomputer,
2811 deltacomputer=deltacomputer,
2812 sidedata=sidedata,
2812 sidedata=sidedata,
2813 )
2813 )
2814 else:
2814 else:
2815 if destrevlog._lazydelta:
2815 if destrevlog._lazydelta:
2816 dp = self.deltaparent(rev)
2816 dp = self.deltaparent(rev)
2817 if dp != nullrev:
2817 if dp != nullrev:
2818 cachedelta = (dp, bytes(self._chunk(rev)))
2818 cachedelta = (dp, bytes(self._chunk(rev)))
2819
2819
2820 if not cachedelta:
2820 if not cachedelta:
2821 rawtext = self.rawdata(rev)
2821 rawtext = self.rawdata(rev)
2822
2822
2823 ifh = destrevlog.opener(
2823 ifh = destrevlog.opener(
2824 destrevlog.indexfile, b'a+', checkambig=False
2824 destrevlog.indexfile, b'a+', checkambig=False
2825 )
2825 )
2826 dfh = None
2826 dfh = None
2827 if not destrevlog._inline:
2827 if not destrevlog._inline:
2828 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2828 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2829 try:
2829 try:
2830 destrevlog._addrevision(
2830 destrevlog._addrevision(
2831 node,
2831 node,
2832 rawtext,
2832 rawtext,
2833 tr,
2833 tr,
2834 linkrev,
2834 linkrev,
2835 p1,
2835 p1,
2836 p2,
2836 p2,
2837 flags,
2837 flags,
2838 cachedelta,
2838 cachedelta,
2839 ifh,
2839 ifh,
2840 dfh,
2840 dfh,
2841 deltacomputer=deltacomputer,
2841 deltacomputer=deltacomputer,
2842 )
2842 )
2843 finally:
2843 finally:
2844 if dfh:
2844 if dfh:
2845 dfh.close()
2845 dfh.close()
2846 ifh.close()
2846 ifh.close()
2847
2847
2848 if addrevisioncb:
2848 if addrevisioncb:
2849 addrevisioncb(self, rev, node)
2849 addrevisioncb(self, rev, node)
2850
2850
2851 def censorrevision(self, tr, censornode, tombstone=b''):
2851 def censorrevision(self, tr, censornode, tombstone=b''):
2852 if (self.version & 0xFFFF) == REVLOGV0:
2852 if (self.version & 0xFFFF) == REVLOGV0:
2853 raise error.RevlogError(
2853 raise error.RevlogError(
2854 _(b'cannot censor with version %d revlogs') % self.version
2854 _(b'cannot censor with version %d revlogs') % self.version
2855 )
2855 )
2856
2856
2857 censorrev = self.rev(censornode)
2857 censorrev = self.rev(censornode)
2858 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2858 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2859
2859
2860 if len(tombstone) > self.rawsize(censorrev):
2860 if len(tombstone) > self.rawsize(censorrev):
2861 raise error.Abort(
2861 raise error.Abort(
2862 _(b'censor tombstone must be no longer than censored data')
2862 _(b'censor tombstone must be no longer than censored data')
2863 )
2863 )
2864
2864
2865 # Rewriting the revlog in place is hard. Our strategy for censoring is
2865 # Rewriting the revlog in place is hard. Our strategy for censoring is
2866 # to create a new revlog, copy all revisions to it, then replace the
2866 # to create a new revlog, copy all revisions to it, then replace the
2867 # revlogs on transaction close.
2867 # revlogs on transaction close.
2868
2868
2869 newindexfile = self.indexfile + b'.tmpcensored'
2869 newindexfile = self.indexfile + b'.tmpcensored'
2870 newdatafile = self.datafile + b'.tmpcensored'
2870 newdatafile = self.datafile + b'.tmpcensored'
2871
2871
2872 # This is a bit dangerous. We could easily have a mismatch of state.
2872 # This is a bit dangerous. We could easily have a mismatch of state.
2873 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2873 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2874 newrl.version = self.version
2874 newrl.version = self.version
2875 newrl._generaldelta = self._generaldelta
2875 newrl._generaldelta = self._generaldelta
2876 newrl._io = self._io
2876 newrl._io = self._io
2877
2877
2878 for rev in self.revs():
2878 for rev in self.revs():
2879 node = self.node(rev)
2879 node = self.node(rev)
2880 p1, p2 = self.parents(node)
2880 p1, p2 = self.parents(node)
2881
2881
2882 if rev == censorrev:
2882 if rev == censorrev:
2883 newrl.addrawrevision(
2883 newrl.addrawrevision(
2884 tombstone,
2884 tombstone,
2885 tr,
2885 tr,
2886 self.linkrev(censorrev),
2886 self.linkrev(censorrev),
2887 p1,
2887 p1,
2888 p2,
2888 p2,
2889 censornode,
2889 censornode,
2890 REVIDX_ISCENSORED,
2890 REVIDX_ISCENSORED,
2891 )
2891 )
2892
2892
2893 if newrl.deltaparent(rev) != nullrev:
2893 if newrl.deltaparent(rev) != nullrev:
2894 raise error.Abort(
2894 raise error.Abort(
2895 _(
2895 _(
2896 b'censored revision stored as delta; '
2896 b'censored revision stored as delta; '
2897 b'cannot censor'
2897 b'cannot censor'
2898 ),
2898 ),
2899 hint=_(
2899 hint=_(
2900 b'censoring of revlogs is not '
2900 b'censoring of revlogs is not '
2901 b'fully implemented; please report '
2901 b'fully implemented; please report '
2902 b'this bug'
2902 b'this bug'
2903 ),
2903 ),
2904 )
2904 )
2905 continue
2905 continue
2906
2906
2907 if self.iscensored(rev):
2907 if self.iscensored(rev):
2908 if self.deltaparent(rev) != nullrev:
2908 if self.deltaparent(rev) != nullrev:
2909 raise error.Abort(
2909 raise error.Abort(
2910 _(
2910 _(
2911 b'cannot censor due to censored '
2911 b'cannot censor due to censored '
2912 b'revision having delta stored'
2912 b'revision having delta stored'
2913 )
2913 )
2914 )
2914 )
2915 rawtext = self._chunk(rev)
2915 rawtext = self._chunk(rev)
2916 else:
2916 else:
2917 rawtext = self.rawdata(rev)
2917 rawtext = self.rawdata(rev)
2918
2918
2919 newrl.addrawrevision(
2919 newrl.addrawrevision(
2920 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2920 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2921 )
2921 )
2922
2922
2923 tr.addbackup(self.indexfile, location=b'store')
2923 tr.addbackup(self.indexfile, location=b'store')
2924 if not self._inline:
2924 if not self._inline:
2925 tr.addbackup(self.datafile, location=b'store')
2925 tr.addbackup(self.datafile, location=b'store')
2926
2926
2927 self.opener.rename(newrl.indexfile, self.indexfile)
2927 self.opener.rename(newrl.indexfile, self.indexfile)
2928 if not self._inline:
2928 if not self._inline:
2929 self.opener.rename(newrl.datafile, self.datafile)
2929 self.opener.rename(newrl.datafile, self.datafile)
2930
2930
2931 self.clearcaches()
2931 self.clearcaches()
2932 self._loadindex()
2932 self._loadindex()
2933
2933
2934 def verifyintegrity(self, state):
2934 def verifyintegrity(self, state):
2935 """Verifies the integrity of the revlog.
2935 """Verifies the integrity of the revlog.
2936
2936
2937 Yields ``revlogproblem`` instances describing problems that are
2937 Yields ``revlogproblem`` instances describing problems that are
2938 found.
2938 found.
2939 """
2939 """
2940 dd, di = self.checksize()
2940 dd, di = self.checksize()
2941 if dd:
2941 if dd:
2942 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2942 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2943 if di:
2943 if di:
2944 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2944 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2945
2945
2946 version = self.version & 0xFFFF
2946 version = self.version & 0xFFFF
2947
2947
2948 # The verifier tells us what version revlog we should be.
2948 # The verifier tells us what version revlog we should be.
2949 if version != state[b'expectedversion']:
2949 if version != state[b'expectedversion']:
2950 yield revlogproblem(
2950 yield revlogproblem(
2951 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2951 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2952 % (self.indexfile, version, state[b'expectedversion'])
2952 % (self.indexfile, version, state[b'expectedversion'])
2953 )
2953 )
2954
2954
2955 state[b'skipread'] = set()
2955 state[b'skipread'] = set()
2956 state[b'safe_renamed'] = set()
2956 state[b'safe_renamed'] = set()
2957
2957
2958 for rev in self:
2958 for rev in self:
2959 node = self.node(rev)
2959 node = self.node(rev)
2960
2960
2961 # Verify contents. 4 cases to care about:
2961 # Verify contents. 4 cases to care about:
2962 #
2962 #
2963 # common: the most common case
2963 # common: the most common case
2964 # rename: with a rename
2964 # rename: with a rename
2965 # meta: file content starts with b'\1\n', the metadata
2965 # meta: file content starts with b'\1\n', the metadata
2966 # header defined in filelog.py, but without a rename
2966 # header defined in filelog.py, but without a rename
2967 # ext: content stored externally
2967 # ext: content stored externally
2968 #
2968 #
2969 # More formally, their differences are shown below:
2969 # More formally, their differences are shown below:
2970 #
2970 #
2971 # | common | rename | meta | ext
2971 # | common | rename | meta | ext
2972 # -------------------------------------------------------
2972 # -------------------------------------------------------
2973 # flags() | 0 | 0 | 0 | not 0
2973 # flags() | 0 | 0 | 0 | not 0
2974 # renamed() | False | True | False | ?
2974 # renamed() | False | True | False | ?
2975 # rawtext[0:2]=='\1\n'| False | True | True | ?
2975 # rawtext[0:2]=='\1\n'| False | True | True | ?
2976 #
2976 #
2977 # "rawtext" means the raw text stored in revlog data, which
2977 # "rawtext" means the raw text stored in revlog data, which
2978 # could be retrieved by "rawdata(rev)". "text"
2978 # could be retrieved by "rawdata(rev)". "text"
2979 # mentioned below is "revision(rev)".
2979 # mentioned below is "revision(rev)".
2980 #
2980 #
2981 # There are 3 different lengths stored physically:
2981 # There are 3 different lengths stored physically:
2982 # 1. L1: rawsize, stored in revlog index
2982 # 1. L1: rawsize, stored in revlog index
2983 # 2. L2: len(rawtext), stored in revlog data
2983 # 2. L2: len(rawtext), stored in revlog data
2984 # 3. L3: len(text), stored in revlog data if flags==0, or
2984 # 3. L3: len(text), stored in revlog data if flags==0, or
2985 # possibly somewhere else if flags!=0
2985 # possibly somewhere else if flags!=0
2986 #
2986 #
2987 # L1 should be equal to L2. L3 could be different from them.
2987 # L1 should be equal to L2. L3 could be different from them.
2988 # "text" may or may not affect commit hash depending on flag
2988 # "text" may or may not affect commit hash depending on flag
2989 # processors (see flagutil.addflagprocessor).
2989 # processors (see flagutil.addflagprocessor).
2990 #
2990 #
2991 # | common | rename | meta | ext
2991 # | common | rename | meta | ext
2992 # -------------------------------------------------
2992 # -------------------------------------------------
2993 # rawsize() | L1 | L1 | L1 | L1
2993 # rawsize() | L1 | L1 | L1 | L1
2994 # size() | L1 | L2-LM | L1(*) | L1 (?)
2994 # size() | L1 | L2-LM | L1(*) | L1 (?)
2995 # len(rawtext) | L2 | L2 | L2 | L2
2995 # len(rawtext) | L2 | L2 | L2 | L2
2996 # len(text) | L2 | L2 | L2 | L3
2996 # len(text) | L2 | L2 | L2 | L3
2997 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2997 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2998 #
2998 #
2999 # LM: length of metadata, depending on rawtext
2999 # LM: length of metadata, depending on rawtext
3000 # (*): not ideal, see comment in filelog.size
3000 # (*): not ideal, see comment in filelog.size
3001 # (?): could be "- len(meta)" if the resolved content has
3001 # (?): could be "- len(meta)" if the resolved content has
3002 # rename metadata
3002 # rename metadata
3003 #
3003 #
3004 # Checks needed to be done:
3004 # Checks needed to be done:
3005 # 1. length check: L1 == L2, in all cases.
3005 # 1. length check: L1 == L2, in all cases.
3006 # 2. hash check: depending on flag processor, we may need to
3006 # 2. hash check: depending on flag processor, we may need to
3007 # use either "text" (external), or "rawtext" (in revlog).
3007 # use either "text" (external), or "rawtext" (in revlog).
3008
3008
3009 try:
3009 try:
3010 skipflags = state.get(b'skipflags', 0)
3010 skipflags = state.get(b'skipflags', 0)
3011 if skipflags:
3011 if skipflags:
3012 skipflags &= self.flags(rev)
3012 skipflags &= self.flags(rev)
3013
3013
3014 _verify_revision(self, skipflags, state, node)
3014 _verify_revision(self, skipflags, state, node)
3015
3015
3016 l1 = self.rawsize(rev)
3016 l1 = self.rawsize(rev)
3017 l2 = len(self.rawdata(node))
3017 l2 = len(self.rawdata(node))
3018
3018
3019 if l1 != l2:
3019 if l1 != l2:
3020 yield revlogproblem(
3020 yield revlogproblem(
3021 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3021 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3022 node=node,
3022 node=node,
3023 )
3023 )
3024
3024
3025 except error.CensoredNodeError:
3025 except error.CensoredNodeError:
3026 if state[b'erroroncensored']:
3026 if state[b'erroroncensored']:
3027 yield revlogproblem(
3027 yield revlogproblem(
3028 error=_(b'censored file data'), node=node
3028 error=_(b'censored file data'), node=node
3029 )
3029 )
3030 state[b'skipread'].add(node)
3030 state[b'skipread'].add(node)
3031 except Exception as e:
3031 except Exception as e:
3032 yield revlogproblem(
3032 yield revlogproblem(
3033 error=_(b'unpacking %s: %s')
3033 error=_(b'unpacking %s: %s')
3034 % (short(node), stringutil.forcebytestr(e)),
3034 % (short(node), stringutil.forcebytestr(e)),
3035 node=node,
3035 node=node,
3036 )
3036 )
3037 state[b'skipread'].add(node)
3037 state[b'skipread'].add(node)
3038
3038
3039 def storageinfo(
3039 def storageinfo(
3040 self,
3040 self,
3041 exclusivefiles=False,
3041 exclusivefiles=False,
3042 sharedfiles=False,
3042 sharedfiles=False,
3043 revisionscount=False,
3043 revisionscount=False,
3044 trackedsize=False,
3044 trackedsize=False,
3045 storedsize=False,
3045 storedsize=False,
3046 ):
3046 ):
3047 d = {}
3047 d = {}
3048
3048
3049 if exclusivefiles:
3049 if exclusivefiles:
3050 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3050 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3051 if not self._inline:
3051 if not self._inline:
3052 d[b'exclusivefiles'].append((self.opener, self.datafile))
3052 d[b'exclusivefiles'].append((self.opener, self.datafile))
3053
3053
3054 if sharedfiles:
3054 if sharedfiles:
3055 d[b'sharedfiles'] = []
3055 d[b'sharedfiles'] = []
3056
3056
3057 if revisionscount:
3057 if revisionscount:
3058 d[b'revisionscount'] = len(self)
3058 d[b'revisionscount'] = len(self)
3059
3059
3060 if trackedsize:
3060 if trackedsize:
3061 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3061 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3062
3062
3063 if storedsize:
3063 if storedsize:
3064 d[b'storedsize'] = sum(
3064 d[b'storedsize'] = sum(
3065 self.opener.stat(path).st_size for path in self.files()
3065 self.opener.stat(path).st_size for path in self.files()
3066 )
3066 )
3067
3067
3068 return d
3068 return d
General Comments 0
You need to be logged in to leave comments. Login now