##// END OF EJS Templates
revlogv2: don't assume that the sidedata of the last rev is right after data...
Raphaël Gomès -
r47444:4cd214c9 default
parent child Browse files
Show More
@@ -1,3178 +1,3199 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_SIDEDATA,
59 REVIDX_SIDEDATA,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 nodemap as nodemaputil,
79 nodemap as nodemaputil,
80 sidedata as sidedatautil,
80 sidedata as sidedatautil,
81 )
81 )
82 from .utils import (
82 from .utils import (
83 storageutil,
83 storageutil,
84 stringutil,
84 stringutil,
85 )
85 )
86 from .pure import parsers as pureparsers
86 from .pure import parsers as pureparsers
87
87
88 # blanked usage of all the name to prevent pyflakes constraints
88 # blanked usage of all the name to prevent pyflakes constraints
89 # We need these name available in the module for extensions.
89 # We need these name available in the module for extensions.
90 REVLOGV0
90 REVLOGV0
91 REVLOGV1
91 REVLOGV1
92 REVLOGV2
92 REVLOGV2
93 FLAG_INLINE_DATA
93 FLAG_INLINE_DATA
94 FLAG_GENERALDELTA
94 FLAG_GENERALDELTA
95 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_VERSION
97 REVLOG_DEFAULT_VERSION
98 REVLOGV1_FLAGS
98 REVLOGV1_FLAGS
99 REVLOGV2_FLAGS
99 REVLOGV2_FLAGS
100 REVIDX_ISCENSORED
100 REVIDX_ISCENSORED
101 REVIDX_ELLIPSIS
101 REVIDX_ELLIPSIS
102 REVIDX_SIDEDATA
102 REVIDX_SIDEDATA
103 REVIDX_HASCOPIESINFO
103 REVIDX_HASCOPIESINFO
104 REVIDX_EXTSTORED
104 REVIDX_EXTSTORED
105 REVIDX_DEFAULT_FLAGS
105 REVIDX_DEFAULT_FLAGS
106 REVIDX_FLAGS_ORDER
106 REVIDX_FLAGS_ORDER
107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 REVIDX_RAWTEXT_CHANGING_FLAGS
108
108
109 parsers = policy.importmod('parsers')
109 parsers = policy.importmod('parsers')
110 rustancestor = policy.importrust('ancestor')
110 rustancestor = policy.importrust('ancestor')
111 rustdagop = policy.importrust('dagop')
111 rustdagop = policy.importrust('dagop')
112 rustrevlog = policy.importrust('revlog')
112 rustrevlog = policy.importrust('revlog')
113
113
114 # Aliased for performance.
114 # Aliased for performance.
115 _zlibdecompress = zlib.decompress
115 _zlibdecompress = zlib.decompress
116
116
117 # max size of revlog with inline data
117 # max size of revlog with inline data
118 _maxinline = 131072
118 _maxinline = 131072
119 _chunksize = 1048576
119 _chunksize = 1048576
120
120
121 # Flag processors for REVIDX_ELLIPSIS.
121 # Flag processors for REVIDX_ELLIPSIS.
122 def ellipsisreadprocessor(rl, text):
122 def ellipsisreadprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsiswriteprocessor(rl, text):
126 def ellipsiswriteprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsisrawprocessor(rl, text):
130 def ellipsisrawprocessor(rl, text):
131 return False
131 return False
132
132
133
133
134 ellipsisprocessor = (
134 ellipsisprocessor = (
135 ellipsisreadprocessor,
135 ellipsisreadprocessor,
136 ellipsiswriteprocessor,
136 ellipsiswriteprocessor,
137 ellipsisrawprocessor,
137 ellipsisrawprocessor,
138 )
138 )
139
139
140
140
141 def getoffset(q):
141 def getoffset(q):
142 return int(q >> 16)
142 return int(q >> 16)
143
143
144
144
145 def gettype(q):
145 def gettype(q):
146 return int(q & 0xFFFF)
146 return int(q & 0xFFFF)
147
147
148
148
149 def offset_type(offset, type):
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
176 class _revisioninfo(object):
176 class _revisioninfo(object):
177 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
178 node: expected hash of the revision
178 node: expected hash of the revision
179 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
180 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
181 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
182 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
183
183
184 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
185 """
185 """
186
186
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195
195
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
197 @attr.s(slots=True)
197 @attr.s(slots=True)
198 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
199 node = attr.ib()
199 node = attr.ib()
200 p1node = attr.ib()
200 p1node = attr.ib()
201 p2node = attr.ib()
201 p2node = attr.ib()
202 basenode = attr.ib()
202 basenode = attr.ib()
203 flags = attr.ib()
203 flags = attr.ib()
204 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
205 revision = attr.ib()
205 revision = attr.ib()
206 delta = attr.ib()
206 delta = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 # index v0:
218 # index v0:
219 # 4 bytes: offset
219 # 4 bytes: offset
220 # 4 bytes: compressed length
220 # 4 bytes: compressed length
221 # 4 bytes: base rev
221 # 4 bytes: base rev
222 # 4 bytes: link rev
222 # 4 bytes: link rev
223 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 2 nodeid
224 # 20 bytes: parent 2 nodeid
225 # 20 bytes: nodeid
225 # 20 bytes: nodeid
226 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 indexformatv0 = struct.Struct(b">4l20s20s20s")
227 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_unpack = indexformatv0.unpack
228 indexformatv0_unpack = indexformatv0.unpack
229
229
230
230
231 class revlogoldindex(list):
231 class revlogoldindex(list):
232 @property
232 @property
233 def nodemap(self):
233 def nodemap(self):
234 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
235 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
236 return self._nodemap
236 return self._nodemap
237
237
238 @util.propertycache
238 @util.propertycache
239 def _nodemap(self):
239 def _nodemap(self):
240 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 nodemap = nodemaputil.NodeMap({nullid: nullrev})
241 for r in range(0, len(self)):
241 for r in range(0, len(self)):
242 n = self[r][7]
242 n = self[r][7]
243 nodemap[n] = r
243 nodemap[n] = r
244 return nodemap
244 return nodemap
245
245
246 def has_node(self, node):
246 def has_node(self, node):
247 """return True if the node exist in the index"""
247 """return True if the node exist in the index"""
248 return node in self._nodemap
248 return node in self._nodemap
249
249
250 def rev(self, node):
250 def rev(self, node):
251 """return a revision for a node
251 """return a revision for a node
252
252
253 If the node is unknown, raise a RevlogError"""
253 If the node is unknown, raise a RevlogError"""
254 return self._nodemap[node]
254 return self._nodemap[node]
255
255
256 def get_rev(self, node):
256 def get_rev(self, node):
257 """return a revision for a node
257 """return a revision for a node
258
258
259 If the node is unknown, return None"""
259 If the node is unknown, return None"""
260 return self._nodemap.get(node)
260 return self._nodemap.get(node)
261
261
262 def append(self, tup):
262 def append(self, tup):
263 self._nodemap[tup[7]] = len(self)
263 self._nodemap[tup[7]] = len(self)
264 super(revlogoldindex, self).append(tup)
264 super(revlogoldindex, self).append(tup)
265
265
266 def __delitem__(self, i):
266 def __delitem__(self, i):
267 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
268 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 raise ValueError(b"deleting slices only supports a:-1 with step 1")
269 for r in pycompat.xrange(i.start, len(self)):
269 for r in pycompat.xrange(i.start, len(self)):
270 del self._nodemap[self[r][7]]
270 del self._nodemap[self[r][7]]
271 super(revlogoldindex, self).__delitem__(i)
271 super(revlogoldindex, self).__delitem__(i)
272
272
273 def clearcaches(self):
273 def clearcaches(self):
274 self.__dict__.pop('_nodemap', None)
274 self.__dict__.pop('_nodemap', None)
275
275
276 def __getitem__(self, i):
276 def __getitem__(self, i):
277 if i == -1:
277 if i == -1:
278 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 return list.__getitem__(self, i)
279 return list.__getitem__(self, i)
280
280
281
281
282 class revlogoldio(object):
282 class revlogoldio(object):
283 def __init__(self):
283 def __init__(self):
284 self.size = indexformatv0.size
284 self.size = indexformatv0.size
285
285
286 def parseindex(self, data, inline):
286 def parseindex(self, data, inline):
287 s = self.size
287 s = self.size
288 index = []
288 index = []
289 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 nodemap = nodemaputil.NodeMap({nullid: nullrev})
290 n = off = 0
290 n = off = 0
291 l = len(data)
291 l = len(data)
292 while off + s <= l:
292 while off + s <= l:
293 cur = data[off : off + s]
293 cur = data[off : off + s]
294 off += s
294 off += s
295 e = indexformatv0_unpack(cur)
295 e = indexformatv0_unpack(cur)
296 # transform to revlogv1 format
296 # transform to revlogv1 format
297 e2 = (
297 e2 = (
298 offset_type(e[0], 0),
298 offset_type(e[0], 0),
299 e[1],
299 e[1],
300 -1,
300 -1,
301 e[2],
301 e[2],
302 e[3],
302 e[3],
303 nodemap.get(e[4], nullrev),
303 nodemap.get(e[4], nullrev),
304 nodemap.get(e[5], nullrev),
304 nodemap.get(e[5], nullrev),
305 e[6],
305 e[6],
306 )
306 )
307 index.append(e2)
307 index.append(e2)
308 nodemap[e[6]] = n
308 nodemap[e[6]] = n
309 n += 1
309 n += 1
310
310
311 index = revlogoldindex(index)
311 index = revlogoldindex(index)
312 return index, None
312 return index, None
313
313
314 def packentry(self, entry, node, version, rev):
314 def packentry(self, entry, node, version, rev):
315 if gettype(entry[0]):
315 if gettype(entry[0]):
316 raise error.RevlogError(
316 raise error.RevlogError(
317 _(b'index entry flags need revlog version 1')
317 _(b'index entry flags need revlog version 1')
318 )
318 )
319 e2 = (
319 e2 = (
320 getoffset(entry[0]),
320 getoffset(entry[0]),
321 entry[1],
321 entry[1],
322 entry[3],
322 entry[3],
323 entry[4],
323 entry[4],
324 node(entry[5]),
324 node(entry[5]),
325 node(entry[6]),
325 node(entry[6]),
326 entry[7],
326 entry[7],
327 )
327 )
328 return indexformatv0_pack(*e2)
328 return indexformatv0_pack(*e2)
329
329
330
330
331 # index ng:
331 # index ng:
332 # 6 bytes: offset
332 # 6 bytes: offset
333 # 2 bytes: flags
333 # 2 bytes: flags
334 # 4 bytes: compressed length
334 # 4 bytes: compressed length
335 # 4 bytes: uncompressed length
335 # 4 bytes: uncompressed length
336 # 4 bytes: base rev
336 # 4 bytes: base rev
337 # 4 bytes: link rev
337 # 4 bytes: link rev
338 # 4 bytes: parent 1 rev
338 # 4 bytes: parent 1 rev
339 # 4 bytes: parent 2 rev
339 # 4 bytes: parent 2 rev
340 # 32 bytes: nodeid
340 # 32 bytes: nodeid
341 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 indexformatng = struct.Struct(b">Qiiiiii20s12x")
342 indexformatng_pack = indexformatng.pack
342 indexformatng_pack = indexformatng.pack
343 versionformat = struct.Struct(b">I")
343 versionformat = struct.Struct(b">I")
344 versionformat_pack = versionformat.pack
344 versionformat_pack = versionformat.pack
345 versionformat_unpack = versionformat.unpack
345 versionformat_unpack = versionformat.unpack
346
346
347 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 # signed integer)
348 # signed integer)
349 _maxentrysize = 0x7FFFFFFF
349 _maxentrysize = 0x7FFFFFFF
350
350
351
351
352 class revlogio(object):
352 class revlogio(object):
353 def __init__(self):
353 def __init__(self):
354 self.size = indexformatng.size
354 self.size = indexformatng.size
355
355
356 def parseindex(self, data, inline):
356 def parseindex(self, data, inline):
357 # call the C implementation to parse the index data
357 # call the C implementation to parse the index data
358 index, cache = parsers.parse_index2(data, inline)
358 index, cache = parsers.parse_index2(data, inline)
359 return index, cache
359 return index, cache
360
360
361 def packentry(self, entry, node, version, rev):
361 def packentry(self, entry, node, version, rev):
362 p = indexformatng_pack(*entry)
362 p = indexformatng_pack(*entry)
363 if rev == 0:
363 if rev == 0:
364 p = versionformat_pack(version) + p[4:]
364 p = versionformat_pack(version) + p[4:]
365 return p
365 return p
366
366
367
367
368 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
368 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
369 indexformatv2_pack = indexformatv2.pack
369 indexformatv2_pack = indexformatv2.pack
370
370
371
371
372 class revlogv2io(object):
372 class revlogv2io(object):
373 def __init__(self):
373 def __init__(self):
374 self.size = indexformatv2.size
374 self.size = indexformatv2.size
375
375
376 def parseindex(self, data, inline):
376 def parseindex(self, data, inline):
377 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
377 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
378 return index, cache
378 return index, cache
379
379
380 def packentry(self, entry, node, version, rev):
380 def packentry(self, entry, node, version, rev):
381 p = indexformatv2_pack(*entry)
381 p = indexformatv2_pack(*entry)
382 if rev == 0:
382 if rev == 0:
383 p = versionformat_pack(version) + p[4:]
383 p = versionformat_pack(version) + p[4:]
384 return p
384 return p
385
385
386
386
387 NodemapRevlogIO = None
387 NodemapRevlogIO = None
388
388
389 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
389 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
390
390
391 class NodemapRevlogIO(revlogio):
391 class NodemapRevlogIO(revlogio):
392 """A debug oriented IO class that return a PersistentNodeMapIndexObject
392 """A debug oriented IO class that return a PersistentNodeMapIndexObject
393
393
394 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
394 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
395 """
395 """
396
396
397 def parseindex(self, data, inline):
397 def parseindex(self, data, inline):
398 index, cache = parsers.parse_index_devel_nodemap(data, inline)
398 index, cache = parsers.parse_index_devel_nodemap(data, inline)
399 return index, cache
399 return index, cache
400
400
401
401
402 class rustrevlogio(revlogio):
402 class rustrevlogio(revlogio):
403 def parseindex(self, data, inline):
403 def parseindex(self, data, inline):
404 index, cache = super(rustrevlogio, self).parseindex(data, inline)
404 index, cache = super(rustrevlogio, self).parseindex(data, inline)
405 return rustrevlog.MixedIndex(index), cache
405 return rustrevlog.MixedIndex(index), cache
406
406
407
407
408 class revlog(object):
408 class revlog(object):
409 """
409 """
410 the underlying revision storage object
410 the underlying revision storage object
411
411
412 A revlog consists of two parts, an index and the revision data.
412 A revlog consists of two parts, an index and the revision data.
413
413
414 The index is a file with a fixed record size containing
414 The index is a file with a fixed record size containing
415 information on each revision, including its nodeid (hash), the
415 information on each revision, including its nodeid (hash), the
416 nodeids of its parents, the position and offset of its data within
416 nodeids of its parents, the position and offset of its data within
417 the data file, and the revision it's based on. Finally, each entry
417 the data file, and the revision it's based on. Finally, each entry
418 contains a linkrev entry that can serve as a pointer to external
418 contains a linkrev entry that can serve as a pointer to external
419 data.
419 data.
420
420
421 The revision data itself is a linear collection of data chunks.
421 The revision data itself is a linear collection of data chunks.
422 Each chunk represents a revision and is usually represented as a
422 Each chunk represents a revision and is usually represented as a
423 delta against the previous chunk. To bound lookup time, runs of
423 delta against the previous chunk. To bound lookup time, runs of
424 deltas are limited to about 2 times the length of the original
424 deltas are limited to about 2 times the length of the original
425 version data. This makes retrieval of a version proportional to
425 version data. This makes retrieval of a version proportional to
426 its size, or O(1) relative to the number of revisions.
426 its size, or O(1) relative to the number of revisions.
427
427
428 Both pieces of the revlog are written to in an append-only
428 Both pieces of the revlog are written to in an append-only
429 fashion, which means we never need to rewrite a file to insert or
429 fashion, which means we never need to rewrite a file to insert or
430 remove data, and can use some simple techniques to avoid the need
430 remove data, and can use some simple techniques to avoid the need
431 for locking while reading.
431 for locking while reading.
432
432
433 If checkambig, indexfile is opened with checkambig=True at
433 If checkambig, indexfile is opened with checkambig=True at
434 writing, to avoid file stat ambiguity.
434 writing, to avoid file stat ambiguity.
435
435
436 If mmaplargeindex is True, and an mmapindexthreshold is set, the
436 If mmaplargeindex is True, and an mmapindexthreshold is set, the
437 index will be mmapped rather than read if it is larger than the
437 index will be mmapped rather than read if it is larger than the
438 configured threshold.
438 configured threshold.
439
439
440 If censorable is True, the revlog can have censored revisions.
440 If censorable is True, the revlog can have censored revisions.
441
441
442 If `upperboundcomp` is not None, this is the expected maximal gain from
442 If `upperboundcomp` is not None, this is the expected maximal gain from
443 compression for the data content.
443 compression for the data content.
444
444
445 `concurrencychecker` is an optional function that receives 3 arguments: a
445 `concurrencychecker` is an optional function that receives 3 arguments: a
446 file handle, a filename, and an expected position. It should check whether
446 file handle, a filename, and an expected position. It should check whether
447 the current position in the file handle is valid, and log/warn/fail (by
447 the current position in the file handle is valid, and log/warn/fail (by
448 raising).
448 raising).
449 """
449 """
450
450
451 _flagserrorclass = error.RevlogError
451 _flagserrorclass = error.RevlogError
452
452
453 def __init__(
453 def __init__(
454 self,
454 self,
455 opener,
455 opener,
456 indexfile,
456 indexfile,
457 datafile=None,
457 datafile=None,
458 checkambig=False,
458 checkambig=False,
459 mmaplargeindex=False,
459 mmaplargeindex=False,
460 censorable=False,
460 censorable=False,
461 upperboundcomp=None,
461 upperboundcomp=None,
462 persistentnodemap=False,
462 persistentnodemap=False,
463 concurrencychecker=None,
463 concurrencychecker=None,
464 ):
464 ):
465 """
465 """
466 create a revlog object
466 create a revlog object
467
467
468 opener is a function that abstracts the file opening operation
468 opener is a function that abstracts the file opening operation
469 and can be used to implement COW semantics or the like.
469 and can be used to implement COW semantics or the like.
470
470
471 """
471 """
472 self.upperboundcomp = upperboundcomp
472 self.upperboundcomp = upperboundcomp
473 self.indexfile = indexfile
473 self.indexfile = indexfile
474 self.datafile = datafile or (indexfile[:-2] + b".d")
474 self.datafile = datafile or (indexfile[:-2] + b".d")
475 self.nodemap_file = None
475 self.nodemap_file = None
476 if persistentnodemap:
476 if persistentnodemap:
477 self.nodemap_file = nodemaputil.get_nodemap_file(
477 self.nodemap_file = nodemaputil.get_nodemap_file(
478 opener, self.indexfile
478 opener, self.indexfile
479 )
479 )
480
480
481 self.opener = opener
481 self.opener = opener
482 # When True, indexfile is opened with checkambig=True at writing, to
482 # When True, indexfile is opened with checkambig=True at writing, to
483 # avoid file stat ambiguity.
483 # avoid file stat ambiguity.
484 self._checkambig = checkambig
484 self._checkambig = checkambig
485 self._mmaplargeindex = mmaplargeindex
485 self._mmaplargeindex = mmaplargeindex
486 self._censorable = censorable
486 self._censorable = censorable
487 # 3-tuple of (node, rev, text) for a raw revision.
487 # 3-tuple of (node, rev, text) for a raw revision.
488 self._revisioncache = None
488 self._revisioncache = None
489 # Maps rev to chain base rev.
489 # Maps rev to chain base rev.
490 self._chainbasecache = util.lrucachedict(100)
490 self._chainbasecache = util.lrucachedict(100)
491 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
491 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
492 self._chunkcache = (0, b'')
492 self._chunkcache = (0, b'')
493 # How much data to read and cache into the raw revlog data cache.
493 # How much data to read and cache into the raw revlog data cache.
494 self._chunkcachesize = 65536
494 self._chunkcachesize = 65536
495 self._maxchainlen = None
495 self._maxchainlen = None
496 self._deltabothparents = True
496 self._deltabothparents = True
497 self.index = None
497 self.index = None
498 self._nodemap_docket = None
498 self._nodemap_docket = None
499 # Mapping of partial identifiers to full nodes.
499 # Mapping of partial identifiers to full nodes.
500 self._pcache = {}
500 self._pcache = {}
501 # Mapping of revision integer to full node.
501 # Mapping of revision integer to full node.
502 self._compengine = b'zlib'
502 self._compengine = b'zlib'
503 self._compengineopts = {}
503 self._compengineopts = {}
504 self._maxdeltachainspan = -1
504 self._maxdeltachainspan = -1
505 self._withsparseread = False
505 self._withsparseread = False
506 self._sparserevlog = False
506 self._sparserevlog = False
507 self._srdensitythreshold = 0.50
507 self._srdensitythreshold = 0.50
508 self._srmingapsize = 262144
508 self._srmingapsize = 262144
509
509
510 # Make copy of flag processors so each revlog instance can support
510 # Make copy of flag processors so each revlog instance can support
511 # custom flags.
511 # custom flags.
512 self._flagprocessors = dict(flagutil.flagprocessors)
512 self._flagprocessors = dict(flagutil.flagprocessors)
513
513
514 # 2-tuple of file handles being used for active writing.
514 # 2-tuple of file handles being used for active writing.
515 self._writinghandles = None
515 self._writinghandles = None
516
516
517 self._loadindex()
517 self._loadindex()
518
518
519 self._concurrencychecker = concurrencychecker
519 self._concurrencychecker = concurrencychecker
520
520
521 def _loadindex(self):
521 def _loadindex(self):
522 mmapindexthreshold = None
522 mmapindexthreshold = None
523 opts = self.opener.options
523 opts = self.opener.options
524
524
525 if b'revlogv2' in opts:
525 if b'revlogv2' in opts:
526 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
526 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
527 elif b'revlogv1' in opts:
527 elif b'revlogv1' in opts:
528 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
528 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
529 if b'generaldelta' in opts:
529 if b'generaldelta' in opts:
530 newversionflags |= FLAG_GENERALDELTA
530 newversionflags |= FLAG_GENERALDELTA
531 elif b'revlogv0' in self.opener.options:
531 elif b'revlogv0' in self.opener.options:
532 newversionflags = REVLOGV0
532 newversionflags = REVLOGV0
533 else:
533 else:
534 newversionflags = REVLOG_DEFAULT_VERSION
534 newversionflags = REVLOG_DEFAULT_VERSION
535
535
536 if b'chunkcachesize' in opts:
536 if b'chunkcachesize' in opts:
537 self._chunkcachesize = opts[b'chunkcachesize']
537 self._chunkcachesize = opts[b'chunkcachesize']
538 if b'maxchainlen' in opts:
538 if b'maxchainlen' in opts:
539 self._maxchainlen = opts[b'maxchainlen']
539 self._maxchainlen = opts[b'maxchainlen']
540 if b'deltabothparents' in opts:
540 if b'deltabothparents' in opts:
541 self._deltabothparents = opts[b'deltabothparents']
541 self._deltabothparents = opts[b'deltabothparents']
542 self._lazydelta = bool(opts.get(b'lazydelta', True))
542 self._lazydelta = bool(opts.get(b'lazydelta', True))
543 self._lazydeltabase = False
543 self._lazydeltabase = False
544 if self._lazydelta:
544 if self._lazydelta:
545 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
545 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
546 if b'compengine' in opts:
546 if b'compengine' in opts:
547 self._compengine = opts[b'compengine']
547 self._compengine = opts[b'compengine']
548 if b'zlib.level' in opts:
548 if b'zlib.level' in opts:
549 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
549 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
550 if b'zstd.level' in opts:
550 if b'zstd.level' in opts:
551 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
551 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
552 if b'maxdeltachainspan' in opts:
552 if b'maxdeltachainspan' in opts:
553 self._maxdeltachainspan = opts[b'maxdeltachainspan']
553 self._maxdeltachainspan = opts[b'maxdeltachainspan']
554 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
554 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
555 mmapindexthreshold = opts[b'mmapindexthreshold']
555 mmapindexthreshold = opts[b'mmapindexthreshold']
556 self.hassidedata = bool(opts.get(b'side-data', False))
556 self.hassidedata = bool(opts.get(b'side-data', False))
557 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
557 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
558 withsparseread = bool(opts.get(b'with-sparse-read', False))
558 withsparseread = bool(opts.get(b'with-sparse-read', False))
559 # sparse-revlog forces sparse-read
559 # sparse-revlog forces sparse-read
560 self._withsparseread = self._sparserevlog or withsparseread
560 self._withsparseread = self._sparserevlog or withsparseread
561 if b'sparse-read-density-threshold' in opts:
561 if b'sparse-read-density-threshold' in opts:
562 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
562 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
563 if b'sparse-read-min-gap-size' in opts:
563 if b'sparse-read-min-gap-size' in opts:
564 self._srmingapsize = opts[b'sparse-read-min-gap-size']
564 self._srmingapsize = opts[b'sparse-read-min-gap-size']
565 if opts.get(b'enableellipsis'):
565 if opts.get(b'enableellipsis'):
566 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
566 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
567
567
568 # revlog v0 doesn't have flag processors
568 # revlog v0 doesn't have flag processors
569 for flag, processor in pycompat.iteritems(
569 for flag, processor in pycompat.iteritems(
570 opts.get(b'flagprocessors', {})
570 opts.get(b'flagprocessors', {})
571 ):
571 ):
572 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
572 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
573
573
574 if self._chunkcachesize <= 0:
574 if self._chunkcachesize <= 0:
575 raise error.RevlogError(
575 raise error.RevlogError(
576 _(b'revlog chunk cache size %r is not greater than 0')
576 _(b'revlog chunk cache size %r is not greater than 0')
577 % self._chunkcachesize
577 % self._chunkcachesize
578 )
578 )
579 elif self._chunkcachesize & (self._chunkcachesize - 1):
579 elif self._chunkcachesize & (self._chunkcachesize - 1):
580 raise error.RevlogError(
580 raise error.RevlogError(
581 _(b'revlog chunk cache size %r is not a power of 2')
581 _(b'revlog chunk cache size %r is not a power of 2')
582 % self._chunkcachesize
582 % self._chunkcachesize
583 )
583 )
584
584
585 indexdata = b''
585 indexdata = b''
586 self._initempty = True
586 self._initempty = True
587 try:
587 try:
588 with self._indexfp() as f:
588 with self._indexfp() as f:
589 if (
589 if (
590 mmapindexthreshold is not None
590 mmapindexthreshold is not None
591 and self.opener.fstat(f).st_size >= mmapindexthreshold
591 and self.opener.fstat(f).st_size >= mmapindexthreshold
592 ):
592 ):
593 # TODO: should .close() to release resources without
593 # TODO: should .close() to release resources without
594 # relying on Python GC
594 # relying on Python GC
595 indexdata = util.buffer(util.mmapread(f))
595 indexdata = util.buffer(util.mmapread(f))
596 else:
596 else:
597 indexdata = f.read()
597 indexdata = f.read()
598 if len(indexdata) > 0:
598 if len(indexdata) > 0:
599 versionflags = versionformat_unpack(indexdata[:4])[0]
599 versionflags = versionformat_unpack(indexdata[:4])[0]
600 self._initempty = False
600 self._initempty = False
601 else:
601 else:
602 versionflags = newversionflags
602 versionflags = newversionflags
603 except IOError as inst:
603 except IOError as inst:
604 if inst.errno != errno.ENOENT:
604 if inst.errno != errno.ENOENT:
605 raise
605 raise
606
606
607 versionflags = newversionflags
607 versionflags = newversionflags
608
608
609 self.version = versionflags
609 self.version = versionflags
610
610
611 flags = versionflags & ~0xFFFF
611 flags = versionflags & ~0xFFFF
612 fmt = versionflags & 0xFFFF
612 fmt = versionflags & 0xFFFF
613
613
614 if fmt == REVLOGV0:
614 if fmt == REVLOGV0:
615 if flags:
615 if flags:
616 raise error.RevlogError(
616 raise error.RevlogError(
617 _(b'unknown flags (%#04x) in version %d revlog %s')
617 _(b'unknown flags (%#04x) in version %d revlog %s')
618 % (flags >> 16, fmt, self.indexfile)
618 % (flags >> 16, fmt, self.indexfile)
619 )
619 )
620
620
621 self._inline = False
621 self._inline = False
622 self._generaldelta = False
622 self._generaldelta = False
623
623
624 elif fmt == REVLOGV1:
624 elif fmt == REVLOGV1:
625 if flags & ~REVLOGV1_FLAGS:
625 if flags & ~REVLOGV1_FLAGS:
626 raise error.RevlogError(
626 raise error.RevlogError(
627 _(b'unknown flags (%#04x) in version %d revlog %s')
627 _(b'unknown flags (%#04x) in version %d revlog %s')
628 % (flags >> 16, fmt, self.indexfile)
628 % (flags >> 16, fmt, self.indexfile)
629 )
629 )
630
630
631 self._inline = versionflags & FLAG_INLINE_DATA
631 self._inline = versionflags & FLAG_INLINE_DATA
632 self._generaldelta = versionflags & FLAG_GENERALDELTA
632 self._generaldelta = versionflags & FLAG_GENERALDELTA
633
633
634 elif fmt == REVLOGV2:
634 elif fmt == REVLOGV2:
635 if flags & ~REVLOGV2_FLAGS:
635 if flags & ~REVLOGV2_FLAGS:
636 raise error.RevlogError(
636 raise error.RevlogError(
637 _(b'unknown flags (%#04x) in version %d revlog %s')
637 _(b'unknown flags (%#04x) in version %d revlog %s')
638 % (flags >> 16, fmt, self.indexfile)
638 % (flags >> 16, fmt, self.indexfile)
639 )
639 )
640
640
641 self._inline = versionflags & FLAG_INLINE_DATA
641 self._inline = versionflags & FLAG_INLINE_DATA
642 # generaldelta implied by version 2 revlogs.
642 # generaldelta implied by version 2 revlogs.
643 self._generaldelta = True
643 self._generaldelta = True
644
644
645 else:
645 else:
646 raise error.RevlogError(
646 raise error.RevlogError(
647 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
647 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
648 )
648 )
649 # sparse-revlog can't be on without general-delta (issue6056)
649 # sparse-revlog can't be on without general-delta (issue6056)
650 if not self._generaldelta:
650 if not self._generaldelta:
651 self._sparserevlog = False
651 self._sparserevlog = False
652
652
653 self._storedeltachains = True
653 self._storedeltachains = True
654
654
655 devel_nodemap = (
655 devel_nodemap = (
656 self.nodemap_file
656 self.nodemap_file
657 and opts.get(b'devel-force-nodemap', False)
657 and opts.get(b'devel-force-nodemap', False)
658 and NodemapRevlogIO is not None
658 and NodemapRevlogIO is not None
659 )
659 )
660
660
661 use_rust_index = False
661 use_rust_index = False
662 if rustrevlog is not None:
662 if rustrevlog is not None:
663 if self.nodemap_file is not None:
663 if self.nodemap_file is not None:
664 use_rust_index = True
664 use_rust_index = True
665 else:
665 else:
666 use_rust_index = self.opener.options.get(b'rust.index')
666 use_rust_index = self.opener.options.get(b'rust.index')
667
667
668 self._io = revlogio()
668 self._io = revlogio()
669 if self.version == REVLOGV0:
669 if self.version == REVLOGV0:
670 self._io = revlogoldio()
670 self._io = revlogoldio()
671 elif fmt == REVLOGV2:
671 elif fmt == REVLOGV2:
672 self._io = revlogv2io()
672 self._io = revlogv2io()
673 elif devel_nodemap:
673 elif devel_nodemap:
674 self._io = NodemapRevlogIO()
674 self._io = NodemapRevlogIO()
675 elif use_rust_index:
675 elif use_rust_index:
676 self._io = rustrevlogio()
676 self._io = rustrevlogio()
677 try:
677 try:
678 d = self._io.parseindex(indexdata, self._inline)
678 d = self._io.parseindex(indexdata, self._inline)
679 index, _chunkcache = d
679 index, _chunkcache = d
680 use_nodemap = (
680 use_nodemap = (
681 not self._inline
681 not self._inline
682 and self.nodemap_file is not None
682 and self.nodemap_file is not None
683 and util.safehasattr(index, 'update_nodemap_data')
683 and util.safehasattr(index, 'update_nodemap_data')
684 )
684 )
685 if use_nodemap:
685 if use_nodemap:
686 nodemap_data = nodemaputil.persisted_data(self)
686 nodemap_data = nodemaputil.persisted_data(self)
687 if nodemap_data is not None:
687 if nodemap_data is not None:
688 docket = nodemap_data[0]
688 docket = nodemap_data[0]
689 if (
689 if (
690 len(d[0]) > docket.tip_rev
690 len(d[0]) > docket.tip_rev
691 and d[0][docket.tip_rev][7] == docket.tip_node
691 and d[0][docket.tip_rev][7] == docket.tip_node
692 ):
692 ):
693 # no changelog tampering
693 # no changelog tampering
694 self._nodemap_docket = docket
694 self._nodemap_docket = docket
695 index.update_nodemap_data(*nodemap_data)
695 index.update_nodemap_data(*nodemap_data)
696 except (ValueError, IndexError):
696 except (ValueError, IndexError):
697 raise error.RevlogError(
697 raise error.RevlogError(
698 _(b"index %s is corrupted") % self.indexfile
698 _(b"index %s is corrupted") % self.indexfile
699 )
699 )
700 self.index, self._chunkcache = d
700 self.index, self._chunkcache = d
701 if not self._chunkcache:
701 if not self._chunkcache:
702 self._chunkclear()
702 self._chunkclear()
703 # revnum -> (chain-length, sum-delta-length)
703 # revnum -> (chain-length, sum-delta-length)
704 self._chaininfocache = util.lrucachedict(500)
704 self._chaininfocache = util.lrucachedict(500)
705 # revlog header -> revlog compressor
705 # revlog header -> revlog compressor
706 self._decompressors = {}
706 self._decompressors = {}
707
707
708 @util.propertycache
708 @util.propertycache
709 def _compressor(self):
709 def _compressor(self):
710 engine = util.compengines[self._compengine]
710 engine = util.compengines[self._compengine]
711 return engine.revlogcompressor(self._compengineopts)
711 return engine.revlogcompressor(self._compengineopts)
712
712
713 def _indexfp(self, mode=b'r'):
713 def _indexfp(self, mode=b'r'):
714 """file object for the revlog's index file"""
714 """file object for the revlog's index file"""
715 args = {'mode': mode}
715 args = {'mode': mode}
716 if mode != b'r':
716 if mode != b'r':
717 args['checkambig'] = self._checkambig
717 args['checkambig'] = self._checkambig
718 if mode == b'w':
718 if mode == b'w':
719 args['atomictemp'] = True
719 args['atomictemp'] = True
720 return self.opener(self.indexfile, **args)
720 return self.opener(self.indexfile, **args)
721
721
722 def _datafp(self, mode=b'r'):
722 def _datafp(self, mode=b'r'):
723 """file object for the revlog's data file"""
723 """file object for the revlog's data file"""
724 return self.opener(self.datafile, mode=mode)
724 return self.opener(self.datafile, mode=mode)
725
725
726 @contextlib.contextmanager
726 @contextlib.contextmanager
727 def _datareadfp(self, existingfp=None):
727 def _datareadfp(self, existingfp=None):
728 """file object suitable to read data"""
728 """file object suitable to read data"""
729 # Use explicit file handle, if given.
729 # Use explicit file handle, if given.
730 if existingfp is not None:
730 if existingfp is not None:
731 yield existingfp
731 yield existingfp
732
732
733 # Use a file handle being actively used for writes, if available.
733 # Use a file handle being actively used for writes, if available.
734 # There is some danger to doing this because reads will seek the
734 # There is some danger to doing this because reads will seek the
735 # file. However, _writeentry() performs a SEEK_END before all writes,
735 # file. However, _writeentry() performs a SEEK_END before all writes,
736 # so we should be safe.
736 # so we should be safe.
737 elif self._writinghandles:
737 elif self._writinghandles:
738 if self._inline:
738 if self._inline:
739 yield self._writinghandles[0]
739 yield self._writinghandles[0]
740 else:
740 else:
741 yield self._writinghandles[1]
741 yield self._writinghandles[1]
742
742
743 # Otherwise open a new file handle.
743 # Otherwise open a new file handle.
744 else:
744 else:
745 if self._inline:
745 if self._inline:
746 func = self._indexfp
746 func = self._indexfp
747 else:
747 else:
748 func = self._datafp
748 func = self._datafp
749 with func() as fp:
749 with func() as fp:
750 yield fp
750 yield fp
751
751
752 def tiprev(self):
752 def tiprev(self):
753 return len(self.index) - 1
753 return len(self.index) - 1
754
754
755 def tip(self):
755 def tip(self):
756 return self.node(self.tiprev())
756 return self.node(self.tiprev())
757
757
758 def __contains__(self, rev):
758 def __contains__(self, rev):
759 return 0 <= rev < len(self)
759 return 0 <= rev < len(self)
760
760
761 def __len__(self):
761 def __len__(self):
762 return len(self.index)
762 return len(self.index)
763
763
764 def __iter__(self):
764 def __iter__(self):
765 return iter(pycompat.xrange(len(self)))
765 return iter(pycompat.xrange(len(self)))
766
766
767 def revs(self, start=0, stop=None):
767 def revs(self, start=0, stop=None):
768 """iterate over all rev in this revlog (from start to stop)"""
768 """iterate over all rev in this revlog (from start to stop)"""
769 return storageutil.iterrevs(len(self), start=start, stop=stop)
769 return storageutil.iterrevs(len(self), start=start, stop=stop)
770
770
771 @property
771 @property
772 def nodemap(self):
772 def nodemap(self):
773 msg = (
773 msg = (
774 b"revlog.nodemap is deprecated, "
774 b"revlog.nodemap is deprecated, "
775 b"use revlog.index.[has_node|rev|get_rev]"
775 b"use revlog.index.[has_node|rev|get_rev]"
776 )
776 )
777 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
777 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
778 return self.index.nodemap
778 return self.index.nodemap
779
779
780 @property
780 @property
781 def _nodecache(self):
781 def _nodecache(self):
782 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
782 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
783 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
784 return self.index.nodemap
784 return self.index.nodemap
785
785
786 def hasnode(self, node):
786 def hasnode(self, node):
787 try:
787 try:
788 self.rev(node)
788 self.rev(node)
789 return True
789 return True
790 except KeyError:
790 except KeyError:
791 return False
791 return False
792
792
793 def candelta(self, baserev, rev):
793 def candelta(self, baserev, rev):
794 """whether two revisions (baserev, rev) can be delta-ed or not"""
794 """whether two revisions (baserev, rev) can be delta-ed or not"""
795 # Disable delta if either rev requires a content-changing flag
795 # Disable delta if either rev requires a content-changing flag
796 # processor (ex. LFS). This is because such flag processor can alter
796 # processor (ex. LFS). This is because such flag processor can alter
797 # the rawtext content that the delta will be based on, and two clients
797 # the rawtext content that the delta will be based on, and two clients
798 # could have a same revlog node with different flags (i.e. different
798 # could have a same revlog node with different flags (i.e. different
799 # rawtext contents) and the delta could be incompatible.
799 # rawtext contents) and the delta could be incompatible.
800 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
800 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
801 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
801 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
802 ):
802 ):
803 return False
803 return False
804 return True
804 return True
805
805
806 def update_caches(self, transaction):
806 def update_caches(self, transaction):
807 if self.nodemap_file is not None:
807 if self.nodemap_file is not None:
808 if transaction is None:
808 if transaction is None:
809 nodemaputil.update_persistent_nodemap(self)
809 nodemaputil.update_persistent_nodemap(self)
810 else:
810 else:
811 nodemaputil.setup_persistent_nodemap(transaction, self)
811 nodemaputil.setup_persistent_nodemap(transaction, self)
812
812
813 def clearcaches(self):
813 def clearcaches(self):
814 self._revisioncache = None
814 self._revisioncache = None
815 self._chainbasecache.clear()
815 self._chainbasecache.clear()
816 self._chunkcache = (0, b'')
816 self._chunkcache = (0, b'')
817 self._pcache = {}
817 self._pcache = {}
818 self._nodemap_docket = None
818 self._nodemap_docket = None
819 self.index.clearcaches()
819 self.index.clearcaches()
820 # The python code is the one responsible for validating the docket, we
820 # The python code is the one responsible for validating the docket, we
821 # end up having to refresh it here.
821 # end up having to refresh it here.
822 use_nodemap = (
822 use_nodemap = (
823 not self._inline
823 not self._inline
824 and self.nodemap_file is not None
824 and self.nodemap_file is not None
825 and util.safehasattr(self.index, 'update_nodemap_data')
825 and util.safehasattr(self.index, 'update_nodemap_data')
826 )
826 )
827 if use_nodemap:
827 if use_nodemap:
828 nodemap_data = nodemaputil.persisted_data(self)
828 nodemap_data = nodemaputil.persisted_data(self)
829 if nodemap_data is not None:
829 if nodemap_data is not None:
830 self._nodemap_docket = nodemap_data[0]
830 self._nodemap_docket = nodemap_data[0]
831 self.index.update_nodemap_data(*nodemap_data)
831 self.index.update_nodemap_data(*nodemap_data)
832
832
833 def rev(self, node):
833 def rev(self, node):
834 try:
834 try:
835 return self.index.rev(node)
835 return self.index.rev(node)
836 except TypeError:
836 except TypeError:
837 raise
837 raise
838 except error.RevlogError:
838 except error.RevlogError:
839 # parsers.c radix tree lookup failed
839 # parsers.c radix tree lookup failed
840 if node == wdirid or node in wdirfilenodeids:
840 if node == wdirid or node in wdirfilenodeids:
841 raise error.WdirUnsupported
841 raise error.WdirUnsupported
842 raise error.LookupError(node, self.indexfile, _(b'no node'))
842 raise error.LookupError(node, self.indexfile, _(b'no node'))
843
843
844 # Accessors for index entries.
844 # Accessors for index entries.
845
845
846 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
846 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
847 # are flags.
847 # are flags.
848 def start(self, rev):
848 def start(self, rev):
849 return int(self.index[rev][0] >> 16)
849 return int(self.index[rev][0] >> 16)
850
850
851 def flags(self, rev):
851 def flags(self, rev):
852 return self.index[rev][0] & 0xFFFF
852 return self.index[rev][0] & 0xFFFF
853
853
854 def length(self, rev):
854 def length(self, rev):
855 return self.index[rev][1]
855 return self.index[rev][1]
856
856
857 def sidedata_length(self, rev):
857 def sidedata_length(self, rev):
858 if self.version & 0xFFFF != REVLOGV2:
858 if self.version & 0xFFFF != REVLOGV2:
859 return 0
859 return 0
860 return self.index[rev][9]
860 return self.index[rev][9]
861
861
862 def rawsize(self, rev):
862 def rawsize(self, rev):
863 """return the length of the uncompressed text for a given revision"""
863 """return the length of the uncompressed text for a given revision"""
864 l = self.index[rev][2]
864 l = self.index[rev][2]
865 if l >= 0:
865 if l >= 0:
866 return l
866 return l
867
867
868 t = self.rawdata(rev)
868 t = self.rawdata(rev)
869 return len(t)
869 return len(t)
870
870
871 def size(self, rev):
871 def size(self, rev):
872 """length of non-raw text (processed by a "read" flag processor)"""
872 """length of non-raw text (processed by a "read" flag processor)"""
873 # fast path: if no "read" flag processor could change the content,
873 # fast path: if no "read" flag processor could change the content,
874 # size is rawsize. note: ELLIPSIS is known to not change the content.
874 # size is rawsize. note: ELLIPSIS is known to not change the content.
875 flags = self.flags(rev)
875 flags = self.flags(rev)
876 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
876 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
877 return self.rawsize(rev)
877 return self.rawsize(rev)
878
878
879 return len(self.revision(rev, raw=False))
879 return len(self.revision(rev, raw=False))
880
880
881 def chainbase(self, rev):
881 def chainbase(self, rev):
882 base = self._chainbasecache.get(rev)
882 base = self._chainbasecache.get(rev)
883 if base is not None:
883 if base is not None:
884 return base
884 return base
885
885
886 index = self.index
886 index = self.index
887 iterrev = rev
887 iterrev = rev
888 base = index[iterrev][3]
888 base = index[iterrev][3]
889 while base != iterrev:
889 while base != iterrev:
890 iterrev = base
890 iterrev = base
891 base = index[iterrev][3]
891 base = index[iterrev][3]
892
892
893 self._chainbasecache[rev] = base
893 self._chainbasecache[rev] = base
894 return base
894 return base
895
895
896 def linkrev(self, rev):
896 def linkrev(self, rev):
897 return self.index[rev][4]
897 return self.index[rev][4]
898
898
899 def parentrevs(self, rev):
899 def parentrevs(self, rev):
900 try:
900 try:
901 entry = self.index[rev]
901 entry = self.index[rev]
902 except IndexError:
902 except IndexError:
903 if rev == wdirrev:
903 if rev == wdirrev:
904 raise error.WdirUnsupported
904 raise error.WdirUnsupported
905 raise
905 raise
906
906
907 return entry[5], entry[6]
907 return entry[5], entry[6]
908
908
909 # fast parentrevs(rev) where rev isn't filtered
909 # fast parentrevs(rev) where rev isn't filtered
910 _uncheckedparentrevs = parentrevs
910 _uncheckedparentrevs = parentrevs
911
911
912 def node(self, rev):
912 def node(self, rev):
913 try:
913 try:
914 return self.index[rev][7]
914 return self.index[rev][7]
915 except IndexError:
915 except IndexError:
916 if rev == wdirrev:
916 if rev == wdirrev:
917 raise error.WdirUnsupported
917 raise error.WdirUnsupported
918 raise
918 raise
919
919
920 # Derived from index values.
920 # Derived from index values.
921
921
922 def end(self, rev):
922 def end(self, rev):
923 return self.start(rev) + self.length(rev) + self.sidedata_length(rev)
923 return self.start(rev) + self.length(rev)
924
924
925 def parents(self, node):
925 def parents(self, node):
926 i = self.index
926 i = self.index
927 d = i[self.rev(node)]
927 d = i[self.rev(node)]
928 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
928 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
929
929
930 def chainlen(self, rev):
930 def chainlen(self, rev):
931 return self._chaininfo(rev)[0]
931 return self._chaininfo(rev)[0]
932
932
933 def _chaininfo(self, rev):
933 def _chaininfo(self, rev):
934 chaininfocache = self._chaininfocache
934 chaininfocache = self._chaininfocache
935 if rev in chaininfocache:
935 if rev in chaininfocache:
936 return chaininfocache[rev]
936 return chaininfocache[rev]
937 index = self.index
937 index = self.index
938 generaldelta = self._generaldelta
938 generaldelta = self._generaldelta
939 iterrev = rev
939 iterrev = rev
940 e = index[iterrev]
940 e = index[iterrev]
941 clen = 0
941 clen = 0
942 compresseddeltalen = 0
942 compresseddeltalen = 0
943 while iterrev != e[3]:
943 while iterrev != e[3]:
944 clen += 1
944 clen += 1
945 compresseddeltalen += e[1]
945 compresseddeltalen += e[1]
946 if generaldelta:
946 if generaldelta:
947 iterrev = e[3]
947 iterrev = e[3]
948 else:
948 else:
949 iterrev -= 1
949 iterrev -= 1
950 if iterrev in chaininfocache:
950 if iterrev in chaininfocache:
951 t = chaininfocache[iterrev]
951 t = chaininfocache[iterrev]
952 clen += t[0]
952 clen += t[0]
953 compresseddeltalen += t[1]
953 compresseddeltalen += t[1]
954 break
954 break
955 e = index[iterrev]
955 e = index[iterrev]
956 else:
956 else:
957 # Add text length of base since decompressing that also takes
957 # Add text length of base since decompressing that also takes
958 # work. For cache hits the length is already included.
958 # work. For cache hits the length is already included.
959 compresseddeltalen += e[1]
959 compresseddeltalen += e[1]
960 r = (clen, compresseddeltalen)
960 r = (clen, compresseddeltalen)
961 chaininfocache[rev] = r
961 chaininfocache[rev] = r
962 return r
962 return r
963
963
964 def _deltachain(self, rev, stoprev=None):
964 def _deltachain(self, rev, stoprev=None):
965 """Obtain the delta chain for a revision.
965 """Obtain the delta chain for a revision.
966
966
967 ``stoprev`` specifies a revision to stop at. If not specified, we
967 ``stoprev`` specifies a revision to stop at. If not specified, we
968 stop at the base of the chain.
968 stop at the base of the chain.
969
969
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 revs in ascending order and ``stopped`` is a bool indicating whether
971 revs in ascending order and ``stopped`` is a bool indicating whether
972 ``stoprev`` was hit.
972 ``stoprev`` was hit.
973 """
973 """
974 # Try C implementation.
974 # Try C implementation.
975 try:
975 try:
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 except AttributeError:
977 except AttributeError:
978 pass
978 pass
979
979
980 chain = []
980 chain = []
981
981
982 # Alias to prevent attribute lookup in tight loop.
982 # Alias to prevent attribute lookup in tight loop.
983 index = self.index
983 index = self.index
984 generaldelta = self._generaldelta
984 generaldelta = self._generaldelta
985
985
986 iterrev = rev
986 iterrev = rev
987 e = index[iterrev]
987 e = index[iterrev]
988 while iterrev != e[3] and iterrev != stoprev:
988 while iterrev != e[3] and iterrev != stoprev:
989 chain.append(iterrev)
989 chain.append(iterrev)
990 if generaldelta:
990 if generaldelta:
991 iterrev = e[3]
991 iterrev = e[3]
992 else:
992 else:
993 iterrev -= 1
993 iterrev -= 1
994 e = index[iterrev]
994 e = index[iterrev]
995
995
996 if iterrev == stoprev:
996 if iterrev == stoprev:
997 stopped = True
997 stopped = True
998 else:
998 else:
999 chain.append(iterrev)
999 chain.append(iterrev)
1000 stopped = False
1000 stopped = False
1001
1001
1002 chain.reverse()
1002 chain.reverse()
1003 return chain, stopped
1003 return chain, stopped
1004
1004
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 """Generate the ancestors of 'revs' in reverse revision order.
1006 """Generate the ancestors of 'revs' in reverse revision order.
1007 Does not generate revs lower than stoprev.
1007 Does not generate revs lower than stoprev.
1008
1008
1009 See the documentation for ancestor.lazyancestors for more details."""
1009 See the documentation for ancestor.lazyancestors for more details."""
1010
1010
1011 # first, make sure start revisions aren't filtered
1011 # first, make sure start revisions aren't filtered
1012 revs = list(revs)
1012 revs = list(revs)
1013 checkrev = self.node
1013 checkrev = self.node
1014 for r in revs:
1014 for r in revs:
1015 checkrev(r)
1015 checkrev(r)
1016 # and we're sure ancestors aren't filtered as well
1016 # and we're sure ancestors aren't filtered as well
1017
1017
1018 if rustancestor is not None:
1018 if rustancestor is not None:
1019 lazyancestors = rustancestor.LazyAncestors
1019 lazyancestors = rustancestor.LazyAncestors
1020 arg = self.index
1020 arg = self.index
1021 else:
1021 else:
1022 lazyancestors = ancestor.lazyancestors
1022 lazyancestors = ancestor.lazyancestors
1023 arg = self._uncheckedparentrevs
1023 arg = self._uncheckedparentrevs
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025
1025
1026 def descendants(self, revs):
1026 def descendants(self, revs):
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028
1028
1029 def findcommonmissing(self, common=None, heads=None):
1029 def findcommonmissing(self, common=None, heads=None):
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1031 that are not ancestors of common. In revset terminology, we return the
1031 that are not ancestors of common. In revset terminology, we return the
1032 tuple:
1032 tuple:
1033
1033
1034 ::common, (::heads) - (::common)
1034 ::common, (::heads) - (::common)
1035
1035
1036 The list is sorted by revision number, meaning it is
1036 The list is sorted by revision number, meaning it is
1037 topologically sorted.
1037 topologically sorted.
1038
1038
1039 'heads' and 'common' are both lists of node IDs. If heads is
1039 'heads' and 'common' are both lists of node IDs. If heads is
1040 not supplied, uses all of the revlog's heads. If common is not
1040 not supplied, uses all of the revlog's heads. If common is not
1041 supplied, uses nullid."""
1041 supplied, uses nullid."""
1042 if common is None:
1042 if common is None:
1043 common = [nullid]
1043 common = [nullid]
1044 if heads is None:
1044 if heads is None:
1045 heads = self.heads()
1045 heads = self.heads()
1046
1046
1047 common = [self.rev(n) for n in common]
1047 common = [self.rev(n) for n in common]
1048 heads = [self.rev(n) for n in heads]
1048 heads = [self.rev(n) for n in heads]
1049
1049
1050 # we want the ancestors, but inclusive
1050 # we want the ancestors, but inclusive
1051 class lazyset(object):
1051 class lazyset(object):
1052 def __init__(self, lazyvalues):
1052 def __init__(self, lazyvalues):
1053 self.addedvalues = set()
1053 self.addedvalues = set()
1054 self.lazyvalues = lazyvalues
1054 self.lazyvalues = lazyvalues
1055
1055
1056 def __contains__(self, value):
1056 def __contains__(self, value):
1057 return value in self.addedvalues or value in self.lazyvalues
1057 return value in self.addedvalues or value in self.lazyvalues
1058
1058
1059 def __iter__(self):
1059 def __iter__(self):
1060 added = self.addedvalues
1060 added = self.addedvalues
1061 for r in added:
1061 for r in added:
1062 yield r
1062 yield r
1063 for r in self.lazyvalues:
1063 for r in self.lazyvalues:
1064 if not r in added:
1064 if not r in added:
1065 yield r
1065 yield r
1066
1066
1067 def add(self, value):
1067 def add(self, value):
1068 self.addedvalues.add(value)
1068 self.addedvalues.add(value)
1069
1069
1070 def update(self, values):
1070 def update(self, values):
1071 self.addedvalues.update(values)
1071 self.addedvalues.update(values)
1072
1072
1073 has = lazyset(self.ancestors(common))
1073 has = lazyset(self.ancestors(common))
1074 has.add(nullrev)
1074 has.add(nullrev)
1075 has.update(common)
1075 has.update(common)
1076
1076
1077 # take all ancestors from heads that aren't in has
1077 # take all ancestors from heads that aren't in has
1078 missing = set()
1078 missing = set()
1079 visit = collections.deque(r for r in heads if r not in has)
1079 visit = collections.deque(r for r in heads if r not in has)
1080 while visit:
1080 while visit:
1081 r = visit.popleft()
1081 r = visit.popleft()
1082 if r in missing:
1082 if r in missing:
1083 continue
1083 continue
1084 else:
1084 else:
1085 missing.add(r)
1085 missing.add(r)
1086 for p in self.parentrevs(r):
1086 for p in self.parentrevs(r):
1087 if p not in has:
1087 if p not in has:
1088 visit.append(p)
1088 visit.append(p)
1089 missing = list(missing)
1089 missing = list(missing)
1090 missing.sort()
1090 missing.sort()
1091 return has, [self.node(miss) for miss in missing]
1091 return has, [self.node(miss) for miss in missing]
1092
1092
1093 def incrementalmissingrevs(self, common=None):
1093 def incrementalmissingrevs(self, common=None):
1094 """Return an object that can be used to incrementally compute the
1094 """Return an object that can be used to incrementally compute the
1095 revision numbers of the ancestors of arbitrary sets that are not
1095 revision numbers of the ancestors of arbitrary sets that are not
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 object.
1097 object.
1098
1098
1099 'common' is a list of revision numbers. If common is not supplied, uses
1099 'common' is a list of revision numbers. If common is not supplied, uses
1100 nullrev.
1100 nullrev.
1101 """
1101 """
1102 if common is None:
1102 if common is None:
1103 common = [nullrev]
1103 common = [nullrev]
1104
1104
1105 if rustancestor is not None:
1105 if rustancestor is not None:
1106 return rustancestor.MissingAncestors(self.index, common)
1106 return rustancestor.MissingAncestors(self.index, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108
1108
1109 def findmissingrevs(self, common=None, heads=None):
1109 def findmissingrevs(self, common=None, heads=None):
1110 """Return the revision numbers of the ancestors of heads that
1110 """Return the revision numbers of the ancestors of heads that
1111 are not ancestors of common.
1111 are not ancestors of common.
1112
1112
1113 More specifically, return a list of revision numbers corresponding to
1113 More specifically, return a list of revision numbers corresponding to
1114 nodes N such that every N satisfies the following constraints:
1114 nodes N such that every N satisfies the following constraints:
1115
1115
1116 1. N is an ancestor of some node in 'heads'
1116 1. N is an ancestor of some node in 'heads'
1117 2. N is not an ancestor of any node in 'common'
1117 2. N is not an ancestor of any node in 'common'
1118
1118
1119 The list is sorted by revision number, meaning it is
1119 The list is sorted by revision number, meaning it is
1120 topologically sorted.
1120 topologically sorted.
1121
1121
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1123 not supplied, uses all of the revlog's heads. If common is not
1123 not supplied, uses all of the revlog's heads. If common is not
1124 supplied, uses nullid."""
1124 supplied, uses nullid."""
1125 if common is None:
1125 if common is None:
1126 common = [nullrev]
1126 common = [nullrev]
1127 if heads is None:
1127 if heads is None:
1128 heads = self.headrevs()
1128 heads = self.headrevs()
1129
1129
1130 inc = self.incrementalmissingrevs(common=common)
1130 inc = self.incrementalmissingrevs(common=common)
1131 return inc.missingancestors(heads)
1131 return inc.missingancestors(heads)
1132
1132
1133 def findmissing(self, common=None, heads=None):
1133 def findmissing(self, common=None, heads=None):
1134 """Return the ancestors of heads that are not ancestors of common.
1134 """Return the ancestors of heads that are not ancestors of common.
1135
1135
1136 More specifically, return a list of nodes N such that every N
1136 More specifically, return a list of nodes N such that every N
1137 satisfies the following constraints:
1137 satisfies the following constraints:
1138
1138
1139 1. N is an ancestor of some node in 'heads'
1139 1. N is an ancestor of some node in 'heads'
1140 2. N is not an ancestor of any node in 'common'
1140 2. N is not an ancestor of any node in 'common'
1141
1141
1142 The list is sorted by revision number, meaning it is
1142 The list is sorted by revision number, meaning it is
1143 topologically sorted.
1143 topologically sorted.
1144
1144
1145 'heads' and 'common' are both lists of node IDs. If heads is
1145 'heads' and 'common' are both lists of node IDs. If heads is
1146 not supplied, uses all of the revlog's heads. If common is not
1146 not supplied, uses all of the revlog's heads. If common is not
1147 supplied, uses nullid."""
1147 supplied, uses nullid."""
1148 if common is None:
1148 if common is None:
1149 common = [nullid]
1149 common = [nullid]
1150 if heads is None:
1150 if heads is None:
1151 heads = self.heads()
1151 heads = self.heads()
1152
1152
1153 common = [self.rev(n) for n in common]
1153 common = [self.rev(n) for n in common]
1154 heads = [self.rev(n) for n in heads]
1154 heads = [self.rev(n) for n in heads]
1155
1155
1156 inc = self.incrementalmissingrevs(common=common)
1156 inc = self.incrementalmissingrevs(common=common)
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1158
1158
1159 def nodesbetween(self, roots=None, heads=None):
1159 def nodesbetween(self, roots=None, heads=None):
1160 """Return a topological path from 'roots' to 'heads'.
1160 """Return a topological path from 'roots' to 'heads'.
1161
1161
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 topologically sorted list of all nodes N that satisfy both of
1163 topologically sorted list of all nodes N that satisfy both of
1164 these constraints:
1164 these constraints:
1165
1165
1166 1. N is a descendant of some node in 'roots'
1166 1. N is a descendant of some node in 'roots'
1167 2. N is an ancestor of some node in 'heads'
1167 2. N is an ancestor of some node in 'heads'
1168
1168
1169 Every node is considered to be both a descendant and an ancestor
1169 Every node is considered to be both a descendant and an ancestor
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1171 included in 'nodes'.
1171 included in 'nodes'.
1172
1172
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176
1176
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1179 unspecified, uses list of all of the revlog's heads."""
1179 unspecified, uses list of all of the revlog's heads."""
1180 nonodes = ([], [], [])
1180 nonodes = ([], [], [])
1181 if roots is not None:
1181 if roots is not None:
1182 roots = list(roots)
1182 roots = list(roots)
1183 if not roots:
1183 if not roots:
1184 return nonodes
1184 return nonodes
1185 lowestrev = min([self.rev(n) for n in roots])
1185 lowestrev = min([self.rev(n) for n in roots])
1186 else:
1186 else:
1187 roots = [nullid] # Everybody's a descendant of nullid
1187 roots = [nullid] # Everybody's a descendant of nullid
1188 lowestrev = nullrev
1188 lowestrev = nullrev
1189 if (lowestrev == nullrev) and (heads is None):
1189 if (lowestrev == nullrev) and (heads is None):
1190 # We want _all_ the nodes!
1190 # We want _all_ the nodes!
1191 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1191 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1192 if heads is None:
1192 if heads is None:
1193 # All nodes are ancestors, so the latest ancestor is the last
1193 # All nodes are ancestors, so the latest ancestor is the last
1194 # node.
1194 # node.
1195 highestrev = len(self) - 1
1195 highestrev = len(self) - 1
1196 # Set ancestors to None to signal that every node is an ancestor.
1196 # Set ancestors to None to signal that every node is an ancestor.
1197 ancestors = None
1197 ancestors = None
1198 # Set heads to an empty dictionary for later discovery of heads
1198 # Set heads to an empty dictionary for later discovery of heads
1199 heads = {}
1199 heads = {}
1200 else:
1200 else:
1201 heads = list(heads)
1201 heads = list(heads)
1202 if not heads:
1202 if not heads:
1203 return nonodes
1203 return nonodes
1204 ancestors = set()
1204 ancestors = set()
1205 # Turn heads into a dictionary so we can remove 'fake' heads.
1205 # Turn heads into a dictionary so we can remove 'fake' heads.
1206 # Also, later we will be using it to filter out the heads we can't
1206 # Also, later we will be using it to filter out the heads we can't
1207 # find from roots.
1207 # find from roots.
1208 heads = dict.fromkeys(heads, False)
1208 heads = dict.fromkeys(heads, False)
1209 # Start at the top and keep marking parents until we're done.
1209 # Start at the top and keep marking parents until we're done.
1210 nodestotag = set(heads)
1210 nodestotag = set(heads)
1211 # Remember where the top was so we can use it as a limit later.
1211 # Remember where the top was so we can use it as a limit later.
1212 highestrev = max([self.rev(n) for n in nodestotag])
1212 highestrev = max([self.rev(n) for n in nodestotag])
1213 while nodestotag:
1213 while nodestotag:
1214 # grab a node to tag
1214 # grab a node to tag
1215 n = nodestotag.pop()
1215 n = nodestotag.pop()
1216 # Never tag nullid
1216 # Never tag nullid
1217 if n == nullid:
1217 if n == nullid:
1218 continue
1218 continue
1219 # A node's revision number represents its place in a
1219 # A node's revision number represents its place in a
1220 # topologically sorted list of nodes.
1220 # topologically sorted list of nodes.
1221 r = self.rev(n)
1221 r = self.rev(n)
1222 if r >= lowestrev:
1222 if r >= lowestrev:
1223 if n not in ancestors:
1223 if n not in ancestors:
1224 # If we are possibly a descendant of one of the roots
1224 # If we are possibly a descendant of one of the roots
1225 # and we haven't already been marked as an ancestor
1225 # and we haven't already been marked as an ancestor
1226 ancestors.add(n) # Mark as ancestor
1226 ancestors.add(n) # Mark as ancestor
1227 # Add non-nullid parents to list of nodes to tag.
1227 # Add non-nullid parents to list of nodes to tag.
1228 nodestotag.update(
1228 nodestotag.update(
1229 [p for p in self.parents(n) if p != nullid]
1229 [p for p in self.parents(n) if p != nullid]
1230 )
1230 )
1231 elif n in heads: # We've seen it before, is it a fake head?
1231 elif n in heads: # We've seen it before, is it a fake head?
1232 # So it is, real heads should not be the ancestors of
1232 # So it is, real heads should not be the ancestors of
1233 # any other heads.
1233 # any other heads.
1234 heads.pop(n)
1234 heads.pop(n)
1235 if not ancestors:
1235 if not ancestors:
1236 return nonodes
1236 return nonodes
1237 # Now that we have our set of ancestors, we want to remove any
1237 # Now that we have our set of ancestors, we want to remove any
1238 # roots that are not ancestors.
1238 # roots that are not ancestors.
1239
1239
1240 # If one of the roots was nullid, everything is included anyway.
1240 # If one of the roots was nullid, everything is included anyway.
1241 if lowestrev > nullrev:
1241 if lowestrev > nullrev:
1242 # But, since we weren't, let's recompute the lowest rev to not
1242 # But, since we weren't, let's recompute the lowest rev to not
1243 # include roots that aren't ancestors.
1243 # include roots that aren't ancestors.
1244
1244
1245 # Filter out roots that aren't ancestors of heads
1245 # Filter out roots that aren't ancestors of heads
1246 roots = [root for root in roots if root in ancestors]
1246 roots = [root for root in roots if root in ancestors]
1247 # Recompute the lowest revision
1247 # Recompute the lowest revision
1248 if roots:
1248 if roots:
1249 lowestrev = min([self.rev(root) for root in roots])
1249 lowestrev = min([self.rev(root) for root in roots])
1250 else:
1250 else:
1251 # No more roots? Return empty list
1251 # No more roots? Return empty list
1252 return nonodes
1252 return nonodes
1253 else:
1253 else:
1254 # We are descending from nullid, and don't need to care about
1254 # We are descending from nullid, and don't need to care about
1255 # any other roots.
1255 # any other roots.
1256 lowestrev = nullrev
1256 lowestrev = nullrev
1257 roots = [nullid]
1257 roots = [nullid]
1258 # Transform our roots list into a set.
1258 # Transform our roots list into a set.
1259 descendants = set(roots)
1259 descendants = set(roots)
1260 # Also, keep the original roots so we can filter out roots that aren't
1260 # Also, keep the original roots so we can filter out roots that aren't
1261 # 'real' roots (i.e. are descended from other roots).
1261 # 'real' roots (i.e. are descended from other roots).
1262 roots = descendants.copy()
1262 roots = descendants.copy()
1263 # Our topologically sorted list of output nodes.
1263 # Our topologically sorted list of output nodes.
1264 orderedout = []
1264 orderedout = []
1265 # Don't start at nullid since we don't want nullid in our output list,
1265 # Don't start at nullid since we don't want nullid in our output list,
1266 # and if nullid shows up in descendants, empty parents will look like
1266 # and if nullid shows up in descendants, empty parents will look like
1267 # they're descendants.
1267 # they're descendants.
1268 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1268 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 n = self.node(r)
1269 n = self.node(r)
1270 isdescendant = False
1270 isdescendant = False
1271 if lowestrev == nullrev: # Everybody is a descendant of nullid
1271 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 isdescendant = True
1272 isdescendant = True
1273 elif n in descendants:
1273 elif n in descendants:
1274 # n is already a descendant
1274 # n is already a descendant
1275 isdescendant = True
1275 isdescendant = True
1276 # This check only needs to be done here because all the roots
1276 # This check only needs to be done here because all the roots
1277 # will start being marked is descendants before the loop.
1277 # will start being marked is descendants before the loop.
1278 if n in roots:
1278 if n in roots:
1279 # If n was a root, check if it's a 'real' root.
1279 # If n was a root, check if it's a 'real' root.
1280 p = tuple(self.parents(n))
1280 p = tuple(self.parents(n))
1281 # If any of its parents are descendants, it's not a root.
1281 # If any of its parents are descendants, it's not a root.
1282 if (p[0] in descendants) or (p[1] in descendants):
1282 if (p[0] in descendants) or (p[1] in descendants):
1283 roots.remove(n)
1283 roots.remove(n)
1284 else:
1284 else:
1285 p = tuple(self.parents(n))
1285 p = tuple(self.parents(n))
1286 # A node is a descendant if either of its parents are
1286 # A node is a descendant if either of its parents are
1287 # descendants. (We seeded the dependents list with the roots
1287 # descendants. (We seeded the dependents list with the roots
1288 # up there, remember?)
1288 # up there, remember?)
1289 if (p[0] in descendants) or (p[1] in descendants):
1289 if (p[0] in descendants) or (p[1] in descendants):
1290 descendants.add(n)
1290 descendants.add(n)
1291 isdescendant = True
1291 isdescendant = True
1292 if isdescendant and ((ancestors is None) or (n in ancestors)):
1292 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 # Only include nodes that are both descendants and ancestors.
1293 # Only include nodes that are both descendants and ancestors.
1294 orderedout.append(n)
1294 orderedout.append(n)
1295 if (ancestors is not None) and (n in heads):
1295 if (ancestors is not None) and (n in heads):
1296 # We're trying to figure out which heads are reachable
1296 # We're trying to figure out which heads are reachable
1297 # from roots.
1297 # from roots.
1298 # Mark this head as having been reached
1298 # Mark this head as having been reached
1299 heads[n] = True
1299 heads[n] = True
1300 elif ancestors is None:
1300 elif ancestors is None:
1301 # Otherwise, we're trying to discover the heads.
1301 # Otherwise, we're trying to discover the heads.
1302 # Assume this is a head because if it isn't, the next step
1302 # Assume this is a head because if it isn't, the next step
1303 # will eventually remove it.
1303 # will eventually remove it.
1304 heads[n] = True
1304 heads[n] = True
1305 # But, obviously its parents aren't.
1305 # But, obviously its parents aren't.
1306 for p in self.parents(n):
1306 for p in self.parents(n):
1307 heads.pop(p, None)
1307 heads.pop(p, None)
1308 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1308 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1309 roots = list(roots)
1309 roots = list(roots)
1310 assert orderedout
1310 assert orderedout
1311 assert roots
1311 assert roots
1312 assert heads
1312 assert heads
1313 return (orderedout, roots, heads)
1313 return (orderedout, roots, heads)
1314
1314
1315 def headrevs(self, revs=None):
1315 def headrevs(self, revs=None):
1316 if revs is None:
1316 if revs is None:
1317 try:
1317 try:
1318 return self.index.headrevs()
1318 return self.index.headrevs()
1319 except AttributeError:
1319 except AttributeError:
1320 return self._headrevs()
1320 return self._headrevs()
1321 if rustdagop is not None:
1321 if rustdagop is not None:
1322 return rustdagop.headrevs(self.index, revs)
1322 return rustdagop.headrevs(self.index, revs)
1323 return dagop.headrevs(revs, self._uncheckedparentrevs)
1323 return dagop.headrevs(revs, self._uncheckedparentrevs)
1324
1324
1325 def computephases(self, roots):
1325 def computephases(self, roots):
1326 return self.index.computephasesmapsets(roots)
1326 return self.index.computephasesmapsets(roots)
1327
1327
1328 def _headrevs(self):
1328 def _headrevs(self):
1329 count = len(self)
1329 count = len(self)
1330 if not count:
1330 if not count:
1331 return [nullrev]
1331 return [nullrev]
1332 # we won't iter over filtered rev so nobody is a head at start
1332 # we won't iter over filtered rev so nobody is a head at start
1333 ishead = [0] * (count + 1)
1333 ishead = [0] * (count + 1)
1334 index = self.index
1334 index = self.index
1335 for r in self:
1335 for r in self:
1336 ishead[r] = 1 # I may be an head
1336 ishead[r] = 1 # I may be an head
1337 e = index[r]
1337 e = index[r]
1338 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1338 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1339 return [r for r, val in enumerate(ishead) if val]
1339 return [r for r, val in enumerate(ishead) if val]
1340
1340
1341 def heads(self, start=None, stop=None):
1341 def heads(self, start=None, stop=None):
1342 """return the list of all nodes that have no children
1342 """return the list of all nodes that have no children
1343
1343
1344 if start is specified, only heads that are descendants of
1344 if start is specified, only heads that are descendants of
1345 start will be returned
1345 start will be returned
1346 if stop is specified, it will consider all the revs from stop
1346 if stop is specified, it will consider all the revs from stop
1347 as if they had no children
1347 as if they had no children
1348 """
1348 """
1349 if start is None and stop is None:
1349 if start is None and stop is None:
1350 if not len(self):
1350 if not len(self):
1351 return [nullid]
1351 return [nullid]
1352 return [self.node(r) for r in self.headrevs()]
1352 return [self.node(r) for r in self.headrevs()]
1353
1353
1354 if start is None:
1354 if start is None:
1355 start = nullrev
1355 start = nullrev
1356 else:
1356 else:
1357 start = self.rev(start)
1357 start = self.rev(start)
1358
1358
1359 stoprevs = {self.rev(n) for n in stop or []}
1359 stoprevs = {self.rev(n) for n in stop or []}
1360
1360
1361 revs = dagop.headrevssubset(
1361 revs = dagop.headrevssubset(
1362 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1362 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1363 )
1363 )
1364
1364
1365 return [self.node(rev) for rev in revs]
1365 return [self.node(rev) for rev in revs]
1366
1366
1367 def children(self, node):
1367 def children(self, node):
1368 """find the children of a given node"""
1368 """find the children of a given node"""
1369 c = []
1369 c = []
1370 p = self.rev(node)
1370 p = self.rev(node)
1371 for r in self.revs(start=p + 1):
1371 for r in self.revs(start=p + 1):
1372 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1372 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1373 if prevs:
1373 if prevs:
1374 for pr in prevs:
1374 for pr in prevs:
1375 if pr == p:
1375 if pr == p:
1376 c.append(self.node(r))
1376 c.append(self.node(r))
1377 elif p == nullrev:
1377 elif p == nullrev:
1378 c.append(self.node(r))
1378 c.append(self.node(r))
1379 return c
1379 return c
1380
1380
1381 def commonancestorsheads(self, a, b):
1381 def commonancestorsheads(self, a, b):
1382 """calculate all the heads of the common ancestors of nodes a and b"""
1382 """calculate all the heads of the common ancestors of nodes a and b"""
1383 a, b = self.rev(a), self.rev(b)
1383 a, b = self.rev(a), self.rev(b)
1384 ancs = self._commonancestorsheads(a, b)
1384 ancs = self._commonancestorsheads(a, b)
1385 return pycompat.maplist(self.node, ancs)
1385 return pycompat.maplist(self.node, ancs)
1386
1386
1387 def _commonancestorsheads(self, *revs):
1387 def _commonancestorsheads(self, *revs):
1388 """calculate all the heads of the common ancestors of revs"""
1388 """calculate all the heads of the common ancestors of revs"""
1389 try:
1389 try:
1390 ancs = self.index.commonancestorsheads(*revs)
1390 ancs = self.index.commonancestorsheads(*revs)
1391 except (AttributeError, OverflowError): # C implementation failed
1391 except (AttributeError, OverflowError): # C implementation failed
1392 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1392 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1393 return ancs
1393 return ancs
1394
1394
1395 def isancestor(self, a, b):
1395 def isancestor(self, a, b):
1396 """return True if node a is an ancestor of node b
1396 """return True if node a is an ancestor of node b
1397
1397
1398 A revision is considered an ancestor of itself."""
1398 A revision is considered an ancestor of itself."""
1399 a, b = self.rev(a), self.rev(b)
1399 a, b = self.rev(a), self.rev(b)
1400 return self.isancestorrev(a, b)
1400 return self.isancestorrev(a, b)
1401
1401
1402 def isancestorrev(self, a, b):
1402 def isancestorrev(self, a, b):
1403 """return True if revision a is an ancestor of revision b
1403 """return True if revision a is an ancestor of revision b
1404
1404
1405 A revision is considered an ancestor of itself.
1405 A revision is considered an ancestor of itself.
1406
1406
1407 The implementation of this is trivial but the use of
1407 The implementation of this is trivial but the use of
1408 reachableroots is not."""
1408 reachableroots is not."""
1409 if a == nullrev:
1409 if a == nullrev:
1410 return True
1410 return True
1411 elif a == b:
1411 elif a == b:
1412 return True
1412 return True
1413 elif a > b:
1413 elif a > b:
1414 return False
1414 return False
1415 return bool(self.reachableroots(a, [b], [a], includepath=False))
1415 return bool(self.reachableroots(a, [b], [a], includepath=False))
1416
1416
1417 def reachableroots(self, minroot, heads, roots, includepath=False):
1417 def reachableroots(self, minroot, heads, roots, includepath=False):
1418 """return (heads(::(<roots> and <roots>::<heads>)))
1418 """return (heads(::(<roots> and <roots>::<heads>)))
1419
1419
1420 If includepath is True, return (<roots>::<heads>)."""
1420 If includepath is True, return (<roots>::<heads>)."""
1421 try:
1421 try:
1422 return self.index.reachableroots2(
1422 return self.index.reachableroots2(
1423 minroot, heads, roots, includepath
1423 minroot, heads, roots, includepath
1424 )
1424 )
1425 except AttributeError:
1425 except AttributeError:
1426 return dagop._reachablerootspure(
1426 return dagop._reachablerootspure(
1427 self.parentrevs, minroot, roots, heads, includepath
1427 self.parentrevs, minroot, roots, heads, includepath
1428 )
1428 )
1429
1429
1430 def ancestor(self, a, b):
1430 def ancestor(self, a, b):
1431 """calculate the "best" common ancestor of nodes a and b"""
1431 """calculate the "best" common ancestor of nodes a and b"""
1432
1432
1433 a, b = self.rev(a), self.rev(b)
1433 a, b = self.rev(a), self.rev(b)
1434 try:
1434 try:
1435 ancs = self.index.ancestors(a, b)
1435 ancs = self.index.ancestors(a, b)
1436 except (AttributeError, OverflowError):
1436 except (AttributeError, OverflowError):
1437 ancs = ancestor.ancestors(self.parentrevs, a, b)
1437 ancs = ancestor.ancestors(self.parentrevs, a, b)
1438 if ancs:
1438 if ancs:
1439 # choose a consistent winner when there's a tie
1439 # choose a consistent winner when there's a tie
1440 return min(map(self.node, ancs))
1440 return min(map(self.node, ancs))
1441 return nullid
1441 return nullid
1442
1442
1443 def _match(self, id):
1443 def _match(self, id):
1444 if isinstance(id, int):
1444 if isinstance(id, int):
1445 # rev
1445 # rev
1446 return self.node(id)
1446 return self.node(id)
1447 if len(id) == 20:
1447 if len(id) == 20:
1448 # possibly a binary node
1448 # possibly a binary node
1449 # odds of a binary node being all hex in ASCII are 1 in 10**25
1449 # odds of a binary node being all hex in ASCII are 1 in 10**25
1450 try:
1450 try:
1451 node = id
1451 node = id
1452 self.rev(node) # quick search the index
1452 self.rev(node) # quick search the index
1453 return node
1453 return node
1454 except error.LookupError:
1454 except error.LookupError:
1455 pass # may be partial hex id
1455 pass # may be partial hex id
1456 try:
1456 try:
1457 # str(rev)
1457 # str(rev)
1458 rev = int(id)
1458 rev = int(id)
1459 if b"%d" % rev != id:
1459 if b"%d" % rev != id:
1460 raise ValueError
1460 raise ValueError
1461 if rev < 0:
1461 if rev < 0:
1462 rev = len(self) + rev
1462 rev = len(self) + rev
1463 if rev < 0 or rev >= len(self):
1463 if rev < 0 or rev >= len(self):
1464 raise ValueError
1464 raise ValueError
1465 return self.node(rev)
1465 return self.node(rev)
1466 except (ValueError, OverflowError):
1466 except (ValueError, OverflowError):
1467 pass
1467 pass
1468 if len(id) == 40:
1468 if len(id) == 40:
1469 try:
1469 try:
1470 # a full hex nodeid?
1470 # a full hex nodeid?
1471 node = bin(id)
1471 node = bin(id)
1472 self.rev(node)
1472 self.rev(node)
1473 return node
1473 return node
1474 except (TypeError, error.LookupError):
1474 except (TypeError, error.LookupError):
1475 pass
1475 pass
1476
1476
1477 def _partialmatch(self, id):
1477 def _partialmatch(self, id):
1478 # we don't care wdirfilenodeids as they should be always full hash
1478 # we don't care wdirfilenodeids as they should be always full hash
1479 maybewdir = wdirhex.startswith(id)
1479 maybewdir = wdirhex.startswith(id)
1480 try:
1480 try:
1481 partial = self.index.partialmatch(id)
1481 partial = self.index.partialmatch(id)
1482 if partial and self.hasnode(partial):
1482 if partial and self.hasnode(partial):
1483 if maybewdir:
1483 if maybewdir:
1484 # single 'ff...' match in radix tree, ambiguous with wdir
1484 # single 'ff...' match in radix tree, ambiguous with wdir
1485 raise error.RevlogError
1485 raise error.RevlogError
1486 return partial
1486 return partial
1487 if maybewdir:
1487 if maybewdir:
1488 # no 'ff...' match in radix tree, wdir identified
1488 # no 'ff...' match in radix tree, wdir identified
1489 raise error.WdirUnsupported
1489 raise error.WdirUnsupported
1490 return None
1490 return None
1491 except error.RevlogError:
1491 except error.RevlogError:
1492 # parsers.c radix tree lookup gave multiple matches
1492 # parsers.c radix tree lookup gave multiple matches
1493 # fast path: for unfiltered changelog, radix tree is accurate
1493 # fast path: for unfiltered changelog, radix tree is accurate
1494 if not getattr(self, 'filteredrevs', None):
1494 if not getattr(self, 'filteredrevs', None):
1495 raise error.AmbiguousPrefixLookupError(
1495 raise error.AmbiguousPrefixLookupError(
1496 id, self.indexfile, _(b'ambiguous identifier')
1496 id, self.indexfile, _(b'ambiguous identifier')
1497 )
1497 )
1498 # fall through to slow path that filters hidden revisions
1498 # fall through to slow path that filters hidden revisions
1499 except (AttributeError, ValueError):
1499 except (AttributeError, ValueError):
1500 # we are pure python, or key was too short to search radix tree
1500 # we are pure python, or key was too short to search radix tree
1501 pass
1501 pass
1502
1502
1503 if id in self._pcache:
1503 if id in self._pcache:
1504 return self._pcache[id]
1504 return self._pcache[id]
1505
1505
1506 if len(id) <= 40:
1506 if len(id) <= 40:
1507 try:
1507 try:
1508 # hex(node)[:...]
1508 # hex(node)[:...]
1509 l = len(id) // 2 # grab an even number of digits
1509 l = len(id) // 2 # grab an even number of digits
1510 prefix = bin(id[: l * 2])
1510 prefix = bin(id[: l * 2])
1511 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1511 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1512 nl = [
1512 nl = [
1513 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1513 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1514 ]
1514 ]
1515 if nullhex.startswith(id):
1515 if nullhex.startswith(id):
1516 nl.append(nullid)
1516 nl.append(nullid)
1517 if len(nl) > 0:
1517 if len(nl) > 0:
1518 if len(nl) == 1 and not maybewdir:
1518 if len(nl) == 1 and not maybewdir:
1519 self._pcache[id] = nl[0]
1519 self._pcache[id] = nl[0]
1520 return nl[0]
1520 return nl[0]
1521 raise error.AmbiguousPrefixLookupError(
1521 raise error.AmbiguousPrefixLookupError(
1522 id, self.indexfile, _(b'ambiguous identifier')
1522 id, self.indexfile, _(b'ambiguous identifier')
1523 )
1523 )
1524 if maybewdir:
1524 if maybewdir:
1525 raise error.WdirUnsupported
1525 raise error.WdirUnsupported
1526 return None
1526 return None
1527 except TypeError:
1527 except TypeError:
1528 pass
1528 pass
1529
1529
1530 def lookup(self, id):
1530 def lookup(self, id):
1531 """locate a node based on:
1531 """locate a node based on:
1532 - revision number or str(revision number)
1532 - revision number or str(revision number)
1533 - nodeid or subset of hex nodeid
1533 - nodeid or subset of hex nodeid
1534 """
1534 """
1535 n = self._match(id)
1535 n = self._match(id)
1536 if n is not None:
1536 if n is not None:
1537 return n
1537 return n
1538 n = self._partialmatch(id)
1538 n = self._partialmatch(id)
1539 if n:
1539 if n:
1540 return n
1540 return n
1541
1541
1542 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1542 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1543
1543
1544 def shortest(self, node, minlength=1):
1544 def shortest(self, node, minlength=1):
1545 """Find the shortest unambiguous prefix that matches node."""
1545 """Find the shortest unambiguous prefix that matches node."""
1546
1546
1547 def isvalid(prefix):
1547 def isvalid(prefix):
1548 try:
1548 try:
1549 matchednode = self._partialmatch(prefix)
1549 matchednode = self._partialmatch(prefix)
1550 except error.AmbiguousPrefixLookupError:
1550 except error.AmbiguousPrefixLookupError:
1551 return False
1551 return False
1552 except error.WdirUnsupported:
1552 except error.WdirUnsupported:
1553 # single 'ff...' match
1553 # single 'ff...' match
1554 return True
1554 return True
1555 if matchednode is None:
1555 if matchednode is None:
1556 raise error.LookupError(node, self.indexfile, _(b'no node'))
1556 raise error.LookupError(node, self.indexfile, _(b'no node'))
1557 return True
1557 return True
1558
1558
1559 def maybewdir(prefix):
1559 def maybewdir(prefix):
1560 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1560 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1561
1561
1562 hexnode = hex(node)
1562 hexnode = hex(node)
1563
1563
1564 def disambiguate(hexnode, minlength):
1564 def disambiguate(hexnode, minlength):
1565 """Disambiguate against wdirid."""
1565 """Disambiguate against wdirid."""
1566 for length in range(minlength, len(hexnode) + 1):
1566 for length in range(minlength, len(hexnode) + 1):
1567 prefix = hexnode[:length]
1567 prefix = hexnode[:length]
1568 if not maybewdir(prefix):
1568 if not maybewdir(prefix):
1569 return prefix
1569 return prefix
1570
1570
1571 if not getattr(self, 'filteredrevs', None):
1571 if not getattr(self, 'filteredrevs', None):
1572 try:
1572 try:
1573 length = max(self.index.shortest(node), minlength)
1573 length = max(self.index.shortest(node), minlength)
1574 return disambiguate(hexnode, length)
1574 return disambiguate(hexnode, length)
1575 except error.RevlogError:
1575 except error.RevlogError:
1576 if node != wdirid:
1576 if node != wdirid:
1577 raise error.LookupError(node, self.indexfile, _(b'no node'))
1577 raise error.LookupError(node, self.indexfile, _(b'no node'))
1578 except AttributeError:
1578 except AttributeError:
1579 # Fall through to pure code
1579 # Fall through to pure code
1580 pass
1580 pass
1581
1581
1582 if node == wdirid:
1582 if node == wdirid:
1583 for length in range(minlength, len(hexnode) + 1):
1583 for length in range(minlength, len(hexnode) + 1):
1584 prefix = hexnode[:length]
1584 prefix = hexnode[:length]
1585 if isvalid(prefix):
1585 if isvalid(prefix):
1586 return prefix
1586 return prefix
1587
1587
1588 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1589 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1590 if isvalid(prefix):
1590 if isvalid(prefix):
1591 return disambiguate(hexnode, length)
1591 return disambiguate(hexnode, length)
1592
1592
1593 def cmp(self, node, text):
1593 def cmp(self, node, text):
1594 """compare text with a given file revision
1594 """compare text with a given file revision
1595
1595
1596 returns True if text is different than what is stored.
1596 returns True if text is different than what is stored.
1597 """
1597 """
1598 p1, p2 = self.parents(node)
1598 p1, p2 = self.parents(node)
1599 return storageutil.hashrevisionsha1(text, p1, p2) != node
1599 return storageutil.hashrevisionsha1(text, p1, p2) != node
1600
1600
1601 def _cachesegment(self, offset, data):
1601 def _cachesegment(self, offset, data):
1602 """Add a segment to the revlog cache.
1602 """Add a segment to the revlog cache.
1603
1603
1604 Accepts an absolute offset and the data that is at that location.
1604 Accepts an absolute offset and the data that is at that location.
1605 """
1605 """
1606 o, d = self._chunkcache
1606 o, d = self._chunkcache
1607 # try to add to existing cache
1607 # try to add to existing cache
1608 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1608 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1609 self._chunkcache = o, d + data
1609 self._chunkcache = o, d + data
1610 else:
1610 else:
1611 self._chunkcache = offset, data
1611 self._chunkcache = offset, data
1612
1612
1613 def _readsegment(self, offset, length, df=None):
1613 def _readsegment(self, offset, length, df=None):
1614 """Load a segment of raw data from the revlog.
1614 """Load a segment of raw data from the revlog.
1615
1615
1616 Accepts an absolute offset, length to read, and an optional existing
1616 Accepts an absolute offset, length to read, and an optional existing
1617 file handle to read from.
1617 file handle to read from.
1618
1618
1619 If an existing file handle is passed, it will be seeked and the
1619 If an existing file handle is passed, it will be seeked and the
1620 original seek position will NOT be restored.
1620 original seek position will NOT be restored.
1621
1621
1622 Returns a str or buffer of raw byte data.
1622 Returns a str or buffer of raw byte data.
1623
1623
1624 Raises if the requested number of bytes could not be read.
1624 Raises if the requested number of bytes could not be read.
1625 """
1625 """
1626 # Cache data both forward and backward around the requested
1626 # Cache data both forward and backward around the requested
1627 # data, in a fixed size window. This helps speed up operations
1627 # data, in a fixed size window. This helps speed up operations
1628 # involving reading the revlog backwards.
1628 # involving reading the revlog backwards.
1629 cachesize = self._chunkcachesize
1629 cachesize = self._chunkcachesize
1630 realoffset = offset & ~(cachesize - 1)
1630 realoffset = offset & ~(cachesize - 1)
1631 reallength = (
1631 reallength = (
1632 (offset + length + cachesize) & ~(cachesize - 1)
1632 (offset + length + cachesize) & ~(cachesize - 1)
1633 ) - realoffset
1633 ) - realoffset
1634 with self._datareadfp(df) as df:
1634 with self._datareadfp(df) as df:
1635 df.seek(realoffset)
1635 df.seek(realoffset)
1636 d = df.read(reallength)
1636 d = df.read(reallength)
1637
1637
1638 self._cachesegment(realoffset, d)
1638 self._cachesegment(realoffset, d)
1639 if offset != realoffset or reallength != length:
1639 if offset != realoffset or reallength != length:
1640 startoffset = offset - realoffset
1640 startoffset = offset - realoffset
1641 if len(d) - startoffset < length:
1641 if len(d) - startoffset < length:
1642 raise error.RevlogError(
1642 raise error.RevlogError(
1643 _(
1643 _(
1644 b'partial read of revlog %s; expected %d bytes from '
1644 b'partial read of revlog %s; expected %d bytes from '
1645 b'offset %d, got %d'
1645 b'offset %d, got %d'
1646 )
1646 )
1647 % (
1647 % (
1648 self.indexfile if self._inline else self.datafile,
1648 self.indexfile if self._inline else self.datafile,
1649 length,
1649 length,
1650 realoffset,
1650 realoffset,
1651 len(d) - startoffset,
1651 len(d) - startoffset,
1652 )
1652 )
1653 )
1653 )
1654
1654
1655 return util.buffer(d, startoffset, length)
1655 return util.buffer(d, startoffset, length)
1656
1656
1657 if len(d) < length:
1657 if len(d) < length:
1658 raise error.RevlogError(
1658 raise error.RevlogError(
1659 _(
1659 _(
1660 b'partial read of revlog %s; expected %d bytes from offset '
1660 b'partial read of revlog %s; expected %d bytes from offset '
1661 b'%d, got %d'
1661 b'%d, got %d'
1662 )
1662 )
1663 % (
1663 % (
1664 self.indexfile if self._inline else self.datafile,
1664 self.indexfile if self._inline else self.datafile,
1665 length,
1665 length,
1666 offset,
1666 offset,
1667 len(d),
1667 len(d),
1668 )
1668 )
1669 )
1669 )
1670
1670
1671 return d
1671 return d
1672
1672
1673 def _getsegment(self, offset, length, df=None):
1673 def _getsegment(self, offset, length, df=None):
1674 """Obtain a segment of raw data from the revlog.
1674 """Obtain a segment of raw data from the revlog.
1675
1675
1676 Accepts an absolute offset, length of bytes to obtain, and an
1676 Accepts an absolute offset, length of bytes to obtain, and an
1677 optional file handle to the already-opened revlog. If the file
1677 optional file handle to the already-opened revlog. If the file
1678 handle is used, it's original seek position will not be preserved.
1678 handle is used, it's original seek position will not be preserved.
1679
1679
1680 Requests for data may be returned from a cache.
1680 Requests for data may be returned from a cache.
1681
1681
1682 Returns a str or a buffer instance of raw byte data.
1682 Returns a str or a buffer instance of raw byte data.
1683 """
1683 """
1684 o, d = self._chunkcache
1684 o, d = self._chunkcache
1685 l = len(d)
1685 l = len(d)
1686
1686
1687 # is it in the cache?
1687 # is it in the cache?
1688 cachestart = offset - o
1688 cachestart = offset - o
1689 cacheend = cachestart + length
1689 cacheend = cachestart + length
1690 if cachestart >= 0 and cacheend <= l:
1690 if cachestart >= 0 and cacheend <= l:
1691 if cachestart == 0 and cacheend == l:
1691 if cachestart == 0 and cacheend == l:
1692 return d # avoid a copy
1692 return d # avoid a copy
1693 return util.buffer(d, cachestart, cacheend - cachestart)
1693 return util.buffer(d, cachestart, cacheend - cachestart)
1694
1694
1695 return self._readsegment(offset, length, df=df)
1695 return self._readsegment(offset, length, df=df)
1696
1696
1697 def _getsegmentforrevs(self, startrev, endrev, df=None):
1697 def _getsegmentforrevs(self, startrev, endrev, df=None):
1698 """Obtain a segment of raw data corresponding to a range of revisions.
1698 """Obtain a segment of raw data corresponding to a range of revisions.
1699
1699
1700 Accepts the start and end revisions and an optional already-open
1700 Accepts the start and end revisions and an optional already-open
1701 file handle to be used for reading. If the file handle is read, its
1701 file handle to be used for reading. If the file handle is read, its
1702 seek position will not be preserved.
1702 seek position will not be preserved.
1703
1703
1704 Requests for data may be satisfied by a cache.
1704 Requests for data may be satisfied by a cache.
1705
1705
1706 Returns a 2-tuple of (offset, data) for the requested range of
1706 Returns a 2-tuple of (offset, data) for the requested range of
1707 revisions. Offset is the integer offset from the beginning of the
1707 revisions. Offset is the integer offset from the beginning of the
1708 revlog and data is a str or buffer of the raw byte data.
1708 revlog and data is a str or buffer of the raw byte data.
1709
1709
1710 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1710 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1711 to determine where each revision's data begins and ends.
1711 to determine where each revision's data begins and ends.
1712 """
1712 """
1713 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1713 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1714 # (functions are expensive).
1714 # (functions are expensive).
1715 index = self.index
1715 index = self.index
1716 istart = index[startrev]
1716 istart = index[startrev]
1717 start = int(istart[0] >> 16)
1717 start = int(istart[0] >> 16)
1718 if startrev == endrev:
1718 if startrev == endrev:
1719 end = start + istart[1]
1719 end = start + istart[1]
1720 else:
1720 else:
1721 iend = index[endrev]
1721 iend = index[endrev]
1722 end = int(iend[0] >> 16) + iend[1]
1722 end = int(iend[0] >> 16) + iend[1]
1723
1723
1724 if self._inline:
1724 if self._inline:
1725 start += (startrev + 1) * self._io.size
1725 start += (startrev + 1) * self._io.size
1726 end += (endrev + 1) * self._io.size
1726 end += (endrev + 1) * self._io.size
1727 length = end - start
1727 length = end - start
1728
1728
1729 return start, self._getsegment(start, length, df=df)
1729 return start, self._getsegment(start, length, df=df)
1730
1730
1731 def _chunk(self, rev, df=None):
1731 def _chunk(self, rev, df=None):
1732 """Obtain a single decompressed chunk for a revision.
1732 """Obtain a single decompressed chunk for a revision.
1733
1733
1734 Accepts an integer revision and an optional already-open file handle
1734 Accepts an integer revision and an optional already-open file handle
1735 to be used for reading. If used, the seek position of the file will not
1735 to be used for reading. If used, the seek position of the file will not
1736 be preserved.
1736 be preserved.
1737
1737
1738 Returns a str holding uncompressed data for the requested revision.
1738 Returns a str holding uncompressed data for the requested revision.
1739 """
1739 """
1740 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1740 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1741
1741
1742 def _chunks(self, revs, df=None, targetsize=None):
1742 def _chunks(self, revs, df=None, targetsize=None):
1743 """Obtain decompressed chunks for the specified revisions.
1743 """Obtain decompressed chunks for the specified revisions.
1744
1744
1745 Accepts an iterable of numeric revisions that are assumed to be in
1745 Accepts an iterable of numeric revisions that are assumed to be in
1746 ascending order. Also accepts an optional already-open file handle
1746 ascending order. Also accepts an optional already-open file handle
1747 to be used for reading. If used, the seek position of the file will
1747 to be used for reading. If used, the seek position of the file will
1748 not be preserved.
1748 not be preserved.
1749
1749
1750 This function is similar to calling ``self._chunk()`` multiple times,
1750 This function is similar to calling ``self._chunk()`` multiple times,
1751 but is faster.
1751 but is faster.
1752
1752
1753 Returns a list with decompressed data for each requested revision.
1753 Returns a list with decompressed data for each requested revision.
1754 """
1754 """
1755 if not revs:
1755 if not revs:
1756 return []
1756 return []
1757 start = self.start
1757 start = self.start
1758 length = self.length
1758 length = self.length
1759 inline = self._inline
1759 inline = self._inline
1760 iosize = self._io.size
1760 iosize = self._io.size
1761 buffer = util.buffer
1761 buffer = util.buffer
1762
1762
1763 l = []
1763 l = []
1764 ladd = l.append
1764 ladd = l.append
1765
1765
1766 if not self._withsparseread:
1766 if not self._withsparseread:
1767 slicedchunks = (revs,)
1767 slicedchunks = (revs,)
1768 else:
1768 else:
1769 slicedchunks = deltautil.slicechunk(
1769 slicedchunks = deltautil.slicechunk(
1770 self, revs, targetsize=targetsize
1770 self, revs, targetsize=targetsize
1771 )
1771 )
1772
1772
1773 for revschunk in slicedchunks:
1773 for revschunk in slicedchunks:
1774 firstrev = revschunk[0]
1774 firstrev = revschunk[0]
1775 # Skip trailing revisions with empty diff
1775 # Skip trailing revisions with empty diff
1776 for lastrev in revschunk[::-1]:
1776 for lastrev in revschunk[::-1]:
1777 if length(lastrev) != 0:
1777 if length(lastrev) != 0:
1778 break
1778 break
1779
1779
1780 try:
1780 try:
1781 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1781 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1782 except OverflowError:
1782 except OverflowError:
1783 # issue4215 - we can't cache a run of chunks greater than
1783 # issue4215 - we can't cache a run of chunks greater than
1784 # 2G on Windows
1784 # 2G on Windows
1785 return [self._chunk(rev, df=df) for rev in revschunk]
1785 return [self._chunk(rev, df=df) for rev in revschunk]
1786
1786
1787 decomp = self.decompress
1787 decomp = self.decompress
1788 for rev in revschunk:
1788 for rev in revschunk:
1789 chunkstart = start(rev)
1789 chunkstart = start(rev)
1790 if inline:
1790 if inline:
1791 chunkstart += (rev + 1) * iosize
1791 chunkstart += (rev + 1) * iosize
1792 chunklength = length(rev)
1792 chunklength = length(rev)
1793 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1793 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1794
1794
1795 return l
1795 return l
1796
1796
1797 def _chunkclear(self):
1797 def _chunkclear(self):
1798 """Clear the raw chunk cache."""
1798 """Clear the raw chunk cache."""
1799 self._chunkcache = (0, b'')
1799 self._chunkcache = (0, b'')
1800
1800
1801 def deltaparent(self, rev):
1801 def deltaparent(self, rev):
1802 """return deltaparent of the given revision"""
1802 """return deltaparent of the given revision"""
1803 base = self.index[rev][3]
1803 base = self.index[rev][3]
1804 if base == rev:
1804 if base == rev:
1805 return nullrev
1805 return nullrev
1806 elif self._generaldelta:
1806 elif self._generaldelta:
1807 return base
1807 return base
1808 else:
1808 else:
1809 return rev - 1
1809 return rev - 1
1810
1810
1811 def issnapshot(self, rev):
1811 def issnapshot(self, rev):
1812 """tells whether rev is a snapshot"""
1812 """tells whether rev is a snapshot"""
1813 if not self._sparserevlog:
1813 if not self._sparserevlog:
1814 return self.deltaparent(rev) == nullrev
1814 return self.deltaparent(rev) == nullrev
1815 elif util.safehasattr(self.index, b'issnapshot'):
1815 elif util.safehasattr(self.index, b'issnapshot'):
1816 # directly assign the method to cache the testing and access
1816 # directly assign the method to cache the testing and access
1817 self.issnapshot = self.index.issnapshot
1817 self.issnapshot = self.index.issnapshot
1818 return self.issnapshot(rev)
1818 return self.issnapshot(rev)
1819 if rev == nullrev:
1819 if rev == nullrev:
1820 return True
1820 return True
1821 entry = self.index[rev]
1821 entry = self.index[rev]
1822 base = entry[3]
1822 base = entry[3]
1823 if base == rev:
1823 if base == rev:
1824 return True
1824 return True
1825 if base == nullrev:
1825 if base == nullrev:
1826 return True
1826 return True
1827 p1 = entry[5]
1827 p1 = entry[5]
1828 p2 = entry[6]
1828 p2 = entry[6]
1829 if base == p1 or base == p2:
1829 if base == p1 or base == p2:
1830 return False
1830 return False
1831 return self.issnapshot(base)
1831 return self.issnapshot(base)
1832
1832
1833 def snapshotdepth(self, rev):
1833 def snapshotdepth(self, rev):
1834 """number of snapshot in the chain before this one"""
1834 """number of snapshot in the chain before this one"""
1835 if not self.issnapshot(rev):
1835 if not self.issnapshot(rev):
1836 raise error.ProgrammingError(b'revision %d not a snapshot')
1836 raise error.ProgrammingError(b'revision %d not a snapshot')
1837 return len(self._deltachain(rev)[0]) - 1
1837 return len(self._deltachain(rev)[0]) - 1
1838
1838
1839 def revdiff(self, rev1, rev2):
1839 def revdiff(self, rev1, rev2):
1840 """return or calculate a delta between two revisions
1840 """return or calculate a delta between two revisions
1841
1841
1842 The delta calculated is in binary form and is intended to be written to
1842 The delta calculated is in binary form and is intended to be written to
1843 revlog data directly. So this function needs raw revision data.
1843 revlog data directly. So this function needs raw revision data.
1844 """
1844 """
1845 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1845 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1846 return bytes(self._chunk(rev2))
1846 return bytes(self._chunk(rev2))
1847
1847
1848 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1848 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1849
1849
1850 def _processflags(self, text, flags, operation, raw=False):
1850 def _processflags(self, text, flags, operation, raw=False):
1851 """deprecated entry point to access flag processors"""
1851 """deprecated entry point to access flag processors"""
1852 msg = b'_processflag(...) use the specialized variant'
1852 msg = b'_processflag(...) use the specialized variant'
1853 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1853 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1854 if raw:
1854 if raw:
1855 return text, flagutil.processflagsraw(self, text, flags)
1855 return text, flagutil.processflagsraw(self, text, flags)
1856 elif operation == b'read':
1856 elif operation == b'read':
1857 return flagutil.processflagsread(self, text, flags)
1857 return flagutil.processflagsread(self, text, flags)
1858 else: # write operation
1858 else: # write operation
1859 return flagutil.processflagswrite(self, text, flags)
1859 return flagutil.processflagswrite(self, text, flags)
1860
1860
1861 def revision(self, nodeorrev, _df=None, raw=False):
1861 def revision(self, nodeorrev, _df=None, raw=False):
1862 """return an uncompressed revision of a given node or revision
1862 """return an uncompressed revision of a given node or revision
1863 number.
1863 number.
1864
1864
1865 _df - an existing file handle to read from. (internal-only)
1865 _df - an existing file handle to read from. (internal-only)
1866 raw - an optional argument specifying if the revision data is to be
1866 raw - an optional argument specifying if the revision data is to be
1867 treated as raw data when applying flag transforms. 'raw' should be set
1867 treated as raw data when applying flag transforms. 'raw' should be set
1868 to True when generating changegroups or in debug commands.
1868 to True when generating changegroups or in debug commands.
1869 """
1869 """
1870 if raw:
1870 if raw:
1871 msg = (
1871 msg = (
1872 b'revlog.revision(..., raw=True) is deprecated, '
1872 b'revlog.revision(..., raw=True) is deprecated, '
1873 b'use revlog.rawdata(...)'
1873 b'use revlog.rawdata(...)'
1874 )
1874 )
1875 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1875 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1876 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1876 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1877
1877
1878 def sidedata(self, nodeorrev, _df=None):
1878 def sidedata(self, nodeorrev, _df=None):
1879 """a map of extra data related to the changeset but not part of the hash
1879 """a map of extra data related to the changeset but not part of the hash
1880
1880
1881 This function currently return a dictionary. However, more advanced
1881 This function currently return a dictionary. However, more advanced
1882 mapping object will likely be used in the future for a more
1882 mapping object will likely be used in the future for a more
1883 efficient/lazy code.
1883 efficient/lazy code.
1884 """
1884 """
1885 return self._revisiondata(nodeorrev, _df)[1]
1885 return self._revisiondata(nodeorrev, _df)[1]
1886
1886
1887 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1887 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1888 # deal with <nodeorrev> argument type
1888 # deal with <nodeorrev> argument type
1889 if isinstance(nodeorrev, int):
1889 if isinstance(nodeorrev, int):
1890 rev = nodeorrev
1890 rev = nodeorrev
1891 node = self.node(rev)
1891 node = self.node(rev)
1892 else:
1892 else:
1893 node = nodeorrev
1893 node = nodeorrev
1894 rev = None
1894 rev = None
1895
1895
1896 # fast path the special `nullid` rev
1896 # fast path the special `nullid` rev
1897 if node == nullid:
1897 if node == nullid:
1898 return b"", {}
1898 return b"", {}
1899
1899
1900 # ``rawtext`` is the text as stored inside the revlog. Might be the
1900 # ``rawtext`` is the text as stored inside the revlog. Might be the
1901 # revision or might need to be processed to retrieve the revision.
1901 # revision or might need to be processed to retrieve the revision.
1902 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1902 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1903
1903
1904 if self.version & 0xFFFF == REVLOGV2:
1904 if self.version & 0xFFFF == REVLOGV2:
1905 if rev is None:
1905 if rev is None:
1906 rev = self.rev(node)
1906 rev = self.rev(node)
1907 sidedata = self._sidedata(rev)
1907 sidedata = self._sidedata(rev)
1908 else:
1908 else:
1909 sidedata = {}
1909 sidedata = {}
1910
1910
1911 if raw and validated:
1911 if raw and validated:
1912 # if we don't want to process the raw text and that raw
1912 # if we don't want to process the raw text and that raw
1913 # text is cached, we can exit early.
1913 # text is cached, we can exit early.
1914 return rawtext, sidedata
1914 return rawtext, sidedata
1915 if rev is None:
1915 if rev is None:
1916 rev = self.rev(node)
1916 rev = self.rev(node)
1917 # the revlog's flag for this revision
1917 # the revlog's flag for this revision
1918 # (usually alter its state or content)
1918 # (usually alter its state or content)
1919 flags = self.flags(rev)
1919 flags = self.flags(rev)
1920
1920
1921 if validated and flags == REVIDX_DEFAULT_FLAGS:
1921 if validated and flags == REVIDX_DEFAULT_FLAGS:
1922 # no extra flags set, no flag processor runs, text = rawtext
1922 # no extra flags set, no flag processor runs, text = rawtext
1923 return rawtext, sidedata
1923 return rawtext, sidedata
1924
1924
1925 if raw:
1925 if raw:
1926 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1926 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1927 text = rawtext
1927 text = rawtext
1928 else:
1928 else:
1929 r = flagutil.processflagsread(self, rawtext, flags)
1929 r = flagutil.processflagsread(self, rawtext, flags)
1930 text, validatehash = r
1930 text, validatehash = r
1931 if validatehash:
1931 if validatehash:
1932 self.checkhash(text, node, rev=rev)
1932 self.checkhash(text, node, rev=rev)
1933 if not validated:
1933 if not validated:
1934 self._revisioncache = (node, rev, rawtext)
1934 self._revisioncache = (node, rev, rawtext)
1935
1935
1936 return text, sidedata
1936 return text, sidedata
1937
1937
1938 def _rawtext(self, node, rev, _df=None):
1938 def _rawtext(self, node, rev, _df=None):
1939 """return the possibly unvalidated rawtext for a revision
1939 """return the possibly unvalidated rawtext for a revision
1940
1940
1941 returns (rev, rawtext, validated)
1941 returns (rev, rawtext, validated)
1942 """
1942 """
1943
1943
1944 # revision in the cache (could be useful to apply delta)
1944 # revision in the cache (could be useful to apply delta)
1945 cachedrev = None
1945 cachedrev = None
1946 # An intermediate text to apply deltas to
1946 # An intermediate text to apply deltas to
1947 basetext = None
1947 basetext = None
1948
1948
1949 # Check if we have the entry in cache
1949 # Check if we have the entry in cache
1950 # The cache entry looks like (node, rev, rawtext)
1950 # The cache entry looks like (node, rev, rawtext)
1951 if self._revisioncache:
1951 if self._revisioncache:
1952 if self._revisioncache[0] == node:
1952 if self._revisioncache[0] == node:
1953 return (rev, self._revisioncache[2], True)
1953 return (rev, self._revisioncache[2], True)
1954 cachedrev = self._revisioncache[1]
1954 cachedrev = self._revisioncache[1]
1955
1955
1956 if rev is None:
1956 if rev is None:
1957 rev = self.rev(node)
1957 rev = self.rev(node)
1958
1958
1959 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1959 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1960 if stopped:
1960 if stopped:
1961 basetext = self._revisioncache[2]
1961 basetext = self._revisioncache[2]
1962
1962
1963 # drop cache to save memory, the caller is expected to
1963 # drop cache to save memory, the caller is expected to
1964 # update self._revisioncache after validating the text
1964 # update self._revisioncache after validating the text
1965 self._revisioncache = None
1965 self._revisioncache = None
1966
1966
1967 targetsize = None
1967 targetsize = None
1968 rawsize = self.index[rev][2]
1968 rawsize = self.index[rev][2]
1969 if 0 <= rawsize:
1969 if 0 <= rawsize:
1970 targetsize = 4 * rawsize
1970 targetsize = 4 * rawsize
1971
1971
1972 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1972 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1973 if basetext is None:
1973 if basetext is None:
1974 basetext = bytes(bins[0])
1974 basetext = bytes(bins[0])
1975 bins = bins[1:]
1975 bins = bins[1:]
1976
1976
1977 rawtext = mdiff.patches(basetext, bins)
1977 rawtext = mdiff.patches(basetext, bins)
1978 del basetext # let us have a chance to free memory early
1978 del basetext # let us have a chance to free memory early
1979 return (rev, rawtext, False)
1979 return (rev, rawtext, False)
1980
1980
1981 def _sidedata(self, rev):
1981 def _sidedata(self, rev):
1982 """Return the sidedata for a given revision number."""
1982 """Return the sidedata for a given revision number."""
1983 index_entry = self.index[rev]
1983 index_entry = self.index[rev]
1984 sidedata_offset = index_entry[8]
1984 sidedata_offset = index_entry[8]
1985 sidedata_size = index_entry[9]
1985 sidedata_size = index_entry[9]
1986
1986
1987 if self._inline:
1987 if self._inline:
1988 sidedata_offset += self._io.size * (1 + rev)
1988 sidedata_offset += self._io.size * (1 + rev)
1989 if sidedata_size == 0:
1989 if sidedata_size == 0:
1990 return {}
1990 return {}
1991
1991
1992 segment = self._getsegment(sidedata_offset, sidedata_size)
1992 segment = self._getsegment(sidedata_offset, sidedata_size)
1993 sidedata = sidedatautil.deserialize_sidedata(segment)
1993 sidedata = sidedatautil.deserialize_sidedata(segment)
1994 return sidedata
1994 return sidedata
1995
1995
1996 def rawdata(self, nodeorrev, _df=None):
1996 def rawdata(self, nodeorrev, _df=None):
1997 """return an uncompressed raw data of a given node or revision number.
1997 """return an uncompressed raw data of a given node or revision number.
1998
1998
1999 _df - an existing file handle to read from. (internal-only)
1999 _df - an existing file handle to read from. (internal-only)
2000 """
2000 """
2001 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2001 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2002
2002
2003 def hash(self, text, p1, p2):
2003 def hash(self, text, p1, p2):
2004 """Compute a node hash.
2004 """Compute a node hash.
2005
2005
2006 Available as a function so that subclasses can replace the hash
2006 Available as a function so that subclasses can replace the hash
2007 as needed.
2007 as needed.
2008 """
2008 """
2009 return storageutil.hashrevisionsha1(text, p1, p2)
2009 return storageutil.hashrevisionsha1(text, p1, p2)
2010
2010
2011 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2011 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2012 """Check node hash integrity.
2012 """Check node hash integrity.
2013
2013
2014 Available as a function so that subclasses can extend hash mismatch
2014 Available as a function so that subclasses can extend hash mismatch
2015 behaviors as needed.
2015 behaviors as needed.
2016 """
2016 """
2017 try:
2017 try:
2018 if p1 is None and p2 is None:
2018 if p1 is None and p2 is None:
2019 p1, p2 = self.parents(node)
2019 p1, p2 = self.parents(node)
2020 if node != self.hash(text, p1, p2):
2020 if node != self.hash(text, p1, p2):
2021 # Clear the revision cache on hash failure. The revision cache
2021 # Clear the revision cache on hash failure. The revision cache
2022 # only stores the raw revision and clearing the cache does have
2022 # only stores the raw revision and clearing the cache does have
2023 # the side-effect that we won't have a cache hit when the raw
2023 # the side-effect that we won't have a cache hit when the raw
2024 # revision data is accessed. But this case should be rare and
2024 # revision data is accessed. But this case should be rare and
2025 # it is extra work to teach the cache about the hash
2025 # it is extra work to teach the cache about the hash
2026 # verification state.
2026 # verification state.
2027 if self._revisioncache and self._revisioncache[0] == node:
2027 if self._revisioncache and self._revisioncache[0] == node:
2028 self._revisioncache = None
2028 self._revisioncache = None
2029
2029
2030 revornode = rev
2030 revornode = rev
2031 if revornode is None:
2031 if revornode is None:
2032 revornode = templatefilters.short(hex(node))
2032 revornode = templatefilters.short(hex(node))
2033 raise error.RevlogError(
2033 raise error.RevlogError(
2034 _(b"integrity check failed on %s:%s")
2034 _(b"integrity check failed on %s:%s")
2035 % (self.indexfile, pycompat.bytestr(revornode))
2035 % (self.indexfile, pycompat.bytestr(revornode))
2036 )
2036 )
2037 except error.RevlogError:
2037 except error.RevlogError:
2038 if self._censorable and storageutil.iscensoredtext(text):
2038 if self._censorable and storageutil.iscensoredtext(text):
2039 raise error.CensoredNodeError(self.indexfile, node, text)
2039 raise error.CensoredNodeError(self.indexfile, node, text)
2040 raise
2040 raise
2041
2041
2042 def _enforceinlinesize(self, tr, fp=None):
2042 def _enforceinlinesize(self, tr, fp=None):
2043 """Check if the revlog is too big for inline and convert if so.
2043 """Check if the revlog is too big for inline and convert if so.
2044
2044
2045 This should be called after revisions are added to the revlog. If the
2045 This should be called after revisions are added to the revlog. If the
2046 revlog has grown too large to be an inline revlog, it will convert it
2046 revlog has grown too large to be an inline revlog, it will convert it
2047 to use multiple index and data files.
2047 to use multiple index and data files.
2048 """
2048 """
2049 tiprev = len(self) - 1
2049 tiprev = len(self) - 1
2050 if (
2050 if (
2051 not self._inline
2051 not self._inline
2052 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2052 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2053 ):
2053 ):
2054 return
2054 return
2055
2055
2056 troffset = tr.findoffset(self.indexfile)
2056 troffset = tr.findoffset(self.indexfile)
2057 if troffset is None:
2057 if troffset is None:
2058 raise error.RevlogError(
2058 raise error.RevlogError(
2059 _(b"%s not found in the transaction") % self.indexfile
2059 _(b"%s not found in the transaction") % self.indexfile
2060 )
2060 )
2061 trindex = 0
2061 trindex = 0
2062 tr.add(self.datafile, 0)
2062 tr.add(self.datafile, 0)
2063
2063
2064 if fp:
2064 if fp:
2065 fp.flush()
2065 fp.flush()
2066 fp.close()
2066 fp.close()
2067 # We can't use the cached file handle after close(). So prevent
2067 # We can't use the cached file handle after close(). So prevent
2068 # its usage.
2068 # its usage.
2069 self._writinghandles = None
2069 self._writinghandles = None
2070
2070
2071 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2071 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2072 for r in self:
2072 for r in self:
2073 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2073 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2074 if troffset <= self.start(r):
2074 if troffset <= self.start(r):
2075 trindex = r
2075 trindex = r
2076
2076
2077 with self._indexfp(b'w') as fp:
2077 with self._indexfp(b'w') as fp:
2078 self.version &= ~FLAG_INLINE_DATA
2078 self.version &= ~FLAG_INLINE_DATA
2079 self._inline = False
2079 self._inline = False
2080 io = self._io
2080 io = self._io
2081 for i in self:
2081 for i in self:
2082 e = io.packentry(self.index[i], self.node, self.version, i)
2082 e = io.packentry(self.index[i], self.node, self.version, i)
2083 fp.write(e)
2083 fp.write(e)
2084
2084
2085 # the temp file replace the real index when we exit the context
2085 # the temp file replace the real index when we exit the context
2086 # manager
2086 # manager
2087
2087
2088 tr.replace(self.indexfile, trindex * self._io.size)
2088 tr.replace(self.indexfile, trindex * self._io.size)
2089 nodemaputil.setup_persistent_nodemap(tr, self)
2089 nodemaputil.setup_persistent_nodemap(tr, self)
2090 self._chunkclear()
2090 self._chunkclear()
2091
2091
2092 def _nodeduplicatecallback(self, transaction, node):
2092 def _nodeduplicatecallback(self, transaction, node):
2093 """called when trying to add a node already stored."""
2093 """called when trying to add a node already stored."""
2094
2094
2095 def addrevision(
2095 def addrevision(
2096 self,
2096 self,
2097 text,
2097 text,
2098 transaction,
2098 transaction,
2099 link,
2099 link,
2100 p1,
2100 p1,
2101 p2,
2101 p2,
2102 cachedelta=None,
2102 cachedelta=None,
2103 node=None,
2103 node=None,
2104 flags=REVIDX_DEFAULT_FLAGS,
2104 flags=REVIDX_DEFAULT_FLAGS,
2105 deltacomputer=None,
2105 deltacomputer=None,
2106 sidedata=None,
2106 sidedata=None,
2107 ):
2107 ):
2108 """add a revision to the log
2108 """add a revision to the log
2109
2109
2110 text - the revision data to add
2110 text - the revision data to add
2111 transaction - the transaction object used for rollback
2111 transaction - the transaction object used for rollback
2112 link - the linkrev data to add
2112 link - the linkrev data to add
2113 p1, p2 - the parent nodeids of the revision
2113 p1, p2 - the parent nodeids of the revision
2114 cachedelta - an optional precomputed delta
2114 cachedelta - an optional precomputed delta
2115 node - nodeid of revision; typically node is not specified, and it is
2115 node - nodeid of revision; typically node is not specified, and it is
2116 computed by default as hash(text, p1, p2), however subclasses might
2116 computed by default as hash(text, p1, p2), however subclasses might
2117 use different hashing method (and override checkhash() in such case)
2117 use different hashing method (and override checkhash() in such case)
2118 flags - the known flags to set on the revision
2118 flags - the known flags to set on the revision
2119 deltacomputer - an optional deltacomputer instance shared between
2119 deltacomputer - an optional deltacomputer instance shared between
2120 multiple calls
2120 multiple calls
2121 """
2121 """
2122 if link == nullrev:
2122 if link == nullrev:
2123 raise error.RevlogError(
2123 raise error.RevlogError(
2124 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2124 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2125 )
2125 )
2126
2126
2127 if sidedata is None:
2127 if sidedata is None:
2128 sidedata = {}
2128 sidedata = {}
2129 elif not self.hassidedata:
2129 elif not self.hassidedata:
2130 raise error.ProgrammingError(
2130 raise error.ProgrammingError(
2131 _(b"trying to add sidedata to a revlog who don't support them")
2131 _(b"trying to add sidedata to a revlog who don't support them")
2132 )
2132 )
2133
2133
2134 if flags:
2134 if flags:
2135 node = node or self.hash(text, p1, p2)
2135 node = node or self.hash(text, p1, p2)
2136
2136
2137 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2137 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2138
2138
2139 # If the flag processor modifies the revision data, ignore any provided
2139 # If the flag processor modifies the revision data, ignore any provided
2140 # cachedelta.
2140 # cachedelta.
2141 if rawtext != text:
2141 if rawtext != text:
2142 cachedelta = None
2142 cachedelta = None
2143
2143
2144 if len(rawtext) > _maxentrysize:
2144 if len(rawtext) > _maxentrysize:
2145 raise error.RevlogError(
2145 raise error.RevlogError(
2146 _(
2146 _(
2147 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2147 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2148 )
2148 )
2149 % (self.indexfile, len(rawtext))
2149 % (self.indexfile, len(rawtext))
2150 )
2150 )
2151
2151
2152 node = node or self.hash(rawtext, p1, p2)
2152 node = node or self.hash(rawtext, p1, p2)
2153 rev = self.index.get_rev(node)
2153 rev = self.index.get_rev(node)
2154 if rev is not None:
2154 if rev is not None:
2155 return rev
2155 return rev
2156
2156
2157 if validatehash:
2157 if validatehash:
2158 self.checkhash(rawtext, node, p1=p1, p2=p2)
2158 self.checkhash(rawtext, node, p1=p1, p2=p2)
2159
2159
2160 return self.addrawrevision(
2160 return self.addrawrevision(
2161 rawtext,
2161 rawtext,
2162 transaction,
2162 transaction,
2163 link,
2163 link,
2164 p1,
2164 p1,
2165 p2,
2165 p2,
2166 node,
2166 node,
2167 flags,
2167 flags,
2168 cachedelta=cachedelta,
2168 cachedelta=cachedelta,
2169 deltacomputer=deltacomputer,
2169 deltacomputer=deltacomputer,
2170 sidedata=sidedata,
2170 sidedata=sidedata,
2171 )
2171 )
2172
2172
2173 def addrawrevision(
2173 def addrawrevision(
2174 self,
2174 self,
2175 rawtext,
2175 rawtext,
2176 transaction,
2176 transaction,
2177 link,
2177 link,
2178 p1,
2178 p1,
2179 p2,
2179 p2,
2180 node,
2180 node,
2181 flags,
2181 flags,
2182 cachedelta=None,
2182 cachedelta=None,
2183 deltacomputer=None,
2183 deltacomputer=None,
2184 sidedata=None,
2184 sidedata=None,
2185 ):
2185 ):
2186 """add a raw revision with known flags, node and parents
2186 """add a raw revision with known flags, node and parents
2187 useful when reusing a revision not stored in this revlog (ex: received
2187 useful when reusing a revision not stored in this revlog (ex: received
2188 over wire, or read from an external bundle).
2188 over wire, or read from an external bundle).
2189 """
2189 """
2190 dfh = None
2190 dfh = None
2191 if not self._inline:
2191 if not self._inline:
2192 dfh = self._datafp(b"a+")
2192 dfh = self._datafp(b"a+")
2193 ifh = self._indexfp(b"a+")
2193 ifh = self._indexfp(b"a+")
2194 try:
2194 try:
2195 return self._addrevision(
2195 return self._addrevision(
2196 node,
2196 node,
2197 rawtext,
2197 rawtext,
2198 transaction,
2198 transaction,
2199 link,
2199 link,
2200 p1,
2200 p1,
2201 p2,
2201 p2,
2202 flags,
2202 flags,
2203 cachedelta,
2203 cachedelta,
2204 ifh,
2204 ifh,
2205 dfh,
2205 dfh,
2206 deltacomputer=deltacomputer,
2206 deltacomputer=deltacomputer,
2207 sidedata=sidedata,
2207 sidedata=sidedata,
2208 )
2208 )
2209 finally:
2209 finally:
2210 if dfh:
2210 if dfh:
2211 dfh.close()
2211 dfh.close()
2212 ifh.close()
2212 ifh.close()
2213
2213
2214 def compress(self, data):
2214 def compress(self, data):
2215 """Generate a possibly-compressed representation of data."""
2215 """Generate a possibly-compressed representation of data."""
2216 if not data:
2216 if not data:
2217 return b'', data
2217 return b'', data
2218
2218
2219 compressed = self._compressor.compress(data)
2219 compressed = self._compressor.compress(data)
2220
2220
2221 if compressed:
2221 if compressed:
2222 # The revlog compressor added the header in the returned data.
2222 # The revlog compressor added the header in the returned data.
2223 return b'', compressed
2223 return b'', compressed
2224
2224
2225 if data[0:1] == b'\0':
2225 if data[0:1] == b'\0':
2226 return b'', data
2226 return b'', data
2227 return b'u', data
2227 return b'u', data
2228
2228
2229 def decompress(self, data):
2229 def decompress(self, data):
2230 """Decompress a revlog chunk.
2230 """Decompress a revlog chunk.
2231
2231
2232 The chunk is expected to begin with a header identifying the
2232 The chunk is expected to begin with a header identifying the
2233 format type so it can be routed to an appropriate decompressor.
2233 format type so it can be routed to an appropriate decompressor.
2234 """
2234 """
2235 if not data:
2235 if not data:
2236 return data
2236 return data
2237
2237
2238 # Revlogs are read much more frequently than they are written and many
2238 # Revlogs are read much more frequently than they are written and many
2239 # chunks only take microseconds to decompress, so performance is
2239 # chunks only take microseconds to decompress, so performance is
2240 # important here.
2240 # important here.
2241 #
2241 #
2242 # We can make a few assumptions about revlogs:
2242 # We can make a few assumptions about revlogs:
2243 #
2243 #
2244 # 1) the majority of chunks will be compressed (as opposed to inline
2244 # 1) the majority of chunks will be compressed (as opposed to inline
2245 # raw data).
2245 # raw data).
2246 # 2) decompressing *any* data will likely by at least 10x slower than
2246 # 2) decompressing *any* data will likely by at least 10x slower than
2247 # returning raw inline data.
2247 # returning raw inline data.
2248 # 3) we want to prioritize common and officially supported compression
2248 # 3) we want to prioritize common and officially supported compression
2249 # engines
2249 # engines
2250 #
2250 #
2251 # It follows that we want to optimize for "decompress compressed data
2251 # It follows that we want to optimize for "decompress compressed data
2252 # when encoded with common and officially supported compression engines"
2252 # when encoded with common and officially supported compression engines"
2253 # case over "raw data" and "data encoded by less common or non-official
2253 # case over "raw data" and "data encoded by less common or non-official
2254 # compression engines." That is why we have the inline lookup first
2254 # compression engines." That is why we have the inline lookup first
2255 # followed by the compengines lookup.
2255 # followed by the compengines lookup.
2256 #
2256 #
2257 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2257 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2258 # compressed chunks. And this matters for changelog and manifest reads.
2258 # compressed chunks. And this matters for changelog and manifest reads.
2259 t = data[0:1]
2259 t = data[0:1]
2260
2260
2261 if t == b'x':
2261 if t == b'x':
2262 try:
2262 try:
2263 return _zlibdecompress(data)
2263 return _zlibdecompress(data)
2264 except zlib.error as e:
2264 except zlib.error as e:
2265 raise error.RevlogError(
2265 raise error.RevlogError(
2266 _(b'revlog decompress error: %s')
2266 _(b'revlog decompress error: %s')
2267 % stringutil.forcebytestr(e)
2267 % stringutil.forcebytestr(e)
2268 )
2268 )
2269 # '\0' is more common than 'u' so it goes first.
2269 # '\0' is more common than 'u' so it goes first.
2270 elif t == b'\0':
2270 elif t == b'\0':
2271 return data
2271 return data
2272 elif t == b'u':
2272 elif t == b'u':
2273 return util.buffer(data, 1)
2273 return util.buffer(data, 1)
2274
2274
2275 try:
2275 try:
2276 compressor = self._decompressors[t]
2276 compressor = self._decompressors[t]
2277 except KeyError:
2277 except KeyError:
2278 try:
2278 try:
2279 engine = util.compengines.forrevlogheader(t)
2279 engine = util.compengines.forrevlogheader(t)
2280 compressor = engine.revlogcompressor(self._compengineopts)
2280 compressor = engine.revlogcompressor(self._compengineopts)
2281 self._decompressors[t] = compressor
2281 self._decompressors[t] = compressor
2282 except KeyError:
2282 except KeyError:
2283 raise error.RevlogError(_(b'unknown compression type %r') % t)
2283 raise error.RevlogError(_(b'unknown compression type %r') % t)
2284
2284
2285 return compressor.decompress(data)
2285 return compressor.decompress(data)
2286
2286
2287 def _addrevision(
2287 def _addrevision(
2288 self,
2288 self,
2289 node,
2289 node,
2290 rawtext,
2290 rawtext,
2291 transaction,
2291 transaction,
2292 link,
2292 link,
2293 p1,
2293 p1,
2294 p2,
2294 p2,
2295 flags,
2295 flags,
2296 cachedelta,
2296 cachedelta,
2297 ifh,
2297 ifh,
2298 dfh,
2298 dfh,
2299 alwayscache=False,
2299 alwayscache=False,
2300 deltacomputer=None,
2300 deltacomputer=None,
2301 sidedata=None,
2301 sidedata=None,
2302 ):
2302 ):
2303 """internal function to add revisions to the log
2303 """internal function to add revisions to the log
2304
2304
2305 see addrevision for argument descriptions.
2305 see addrevision for argument descriptions.
2306
2306
2307 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2307 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2308
2308
2309 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2309 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2310 be used.
2310 be used.
2311
2311
2312 invariants:
2312 invariants:
2313 - rawtext is optional (can be None); if not set, cachedelta must be set.
2313 - rawtext is optional (can be None); if not set, cachedelta must be set.
2314 if both are set, they must correspond to each other.
2314 if both are set, they must correspond to each other.
2315 """
2315 """
2316 if node == nullid:
2316 if node == nullid:
2317 raise error.RevlogError(
2317 raise error.RevlogError(
2318 _(b"%s: attempt to add null revision") % self.indexfile
2318 _(b"%s: attempt to add null revision") % self.indexfile
2319 )
2319 )
2320 if node == wdirid or node in wdirfilenodeids:
2320 if node == wdirid or node in wdirfilenodeids:
2321 raise error.RevlogError(
2321 raise error.RevlogError(
2322 _(b"%s: attempt to add wdir revision") % self.indexfile
2322 _(b"%s: attempt to add wdir revision") % self.indexfile
2323 )
2323 )
2324
2324
2325 if self._inline:
2325 if self._inline:
2326 fh = ifh
2326 fh = ifh
2327 else:
2327 else:
2328 fh = dfh
2328 fh = dfh
2329
2329
2330 btext = [rawtext]
2330 btext = [rawtext]
2331
2331
2332 curr = len(self)
2332 curr = len(self)
2333 prev = curr - 1
2333 prev = curr - 1
2334 offset = self.end(prev)
2334
2335 offset = self._get_data_offset(prev)
2335
2336
2336 if self._concurrencychecker:
2337 if self._concurrencychecker:
2337 if self._inline:
2338 if self._inline:
2338 # offset is "as if" it were in the .d file, so we need to add on
2339 # offset is "as if" it were in the .d file, so we need to add on
2339 # the size of the entry metadata.
2340 # the size of the entry metadata.
2340 self._concurrencychecker(
2341 self._concurrencychecker(
2341 ifh, self.indexfile, offset + curr * self._io.size
2342 ifh, self.indexfile, offset + curr * self._io.size
2342 )
2343 )
2343 else:
2344 else:
2344 # Entries in the .i are a consistent size.
2345 # Entries in the .i are a consistent size.
2345 self._concurrencychecker(
2346 self._concurrencychecker(
2346 ifh, self.indexfile, curr * self._io.size
2347 ifh, self.indexfile, curr * self._io.size
2347 )
2348 )
2348 self._concurrencychecker(dfh, self.datafile, offset)
2349 self._concurrencychecker(dfh, self.datafile, offset)
2349
2350
2350 p1r, p2r = self.rev(p1), self.rev(p2)
2351 p1r, p2r = self.rev(p1), self.rev(p2)
2351
2352
2352 # full versions are inserted when the needed deltas
2353 # full versions are inserted when the needed deltas
2353 # become comparable to the uncompressed text
2354 # become comparable to the uncompressed text
2354 if rawtext is None:
2355 if rawtext is None:
2355 # need rawtext size, before changed by flag processors, which is
2356 # need rawtext size, before changed by flag processors, which is
2356 # the non-raw size. use revlog explicitly to avoid filelog's extra
2357 # the non-raw size. use revlog explicitly to avoid filelog's extra
2357 # logic that might remove metadata size.
2358 # logic that might remove metadata size.
2358 textlen = mdiff.patchedsize(
2359 textlen = mdiff.patchedsize(
2359 revlog.size(self, cachedelta[0]), cachedelta[1]
2360 revlog.size(self, cachedelta[0]), cachedelta[1]
2360 )
2361 )
2361 else:
2362 else:
2362 textlen = len(rawtext)
2363 textlen = len(rawtext)
2363
2364
2364 if deltacomputer is None:
2365 if deltacomputer is None:
2365 deltacomputer = deltautil.deltacomputer(self)
2366 deltacomputer = deltautil.deltacomputer(self)
2366
2367
2367 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2368 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2368
2369
2369 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2370 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2370
2371
2371 if sidedata:
2372 if sidedata:
2372 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2373 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2373 sidedata_offset = offset + deltainfo.deltalen
2374 sidedata_offset = offset + deltainfo.deltalen
2374 else:
2375 else:
2375 serialized_sidedata = b""
2376 serialized_sidedata = b""
2376 # Don't store the offset if the sidedata is empty, that way
2377 # Don't store the offset if the sidedata is empty, that way
2377 # we can easily detect empty sidedata and they will be no different
2378 # we can easily detect empty sidedata and they will be no different
2378 # than ones we manually add.
2379 # than ones we manually add.
2379 sidedata_offset = 0
2380 sidedata_offset = 0
2380
2381
2381 e = (
2382 e = (
2382 offset_type(offset, flags),
2383 offset_type(offset, flags),
2383 deltainfo.deltalen,
2384 deltainfo.deltalen,
2384 textlen,
2385 textlen,
2385 deltainfo.base,
2386 deltainfo.base,
2386 link,
2387 link,
2387 p1r,
2388 p1r,
2388 p2r,
2389 p2r,
2389 node,
2390 node,
2390 sidedata_offset,
2391 sidedata_offset,
2391 len(serialized_sidedata),
2392 len(serialized_sidedata),
2392 )
2393 )
2393
2394
2394 if self.version & 0xFFFF != REVLOGV2:
2395 if self.version & 0xFFFF != REVLOGV2:
2395 e = e[:8]
2396 e = e[:8]
2396
2397
2397 self.index.append(e)
2398 self.index.append(e)
2398 entry = self._io.packentry(e, self.node, self.version, curr)
2399 entry = self._io.packentry(e, self.node, self.version, curr)
2399 self._writeentry(
2400 self._writeentry(
2400 transaction,
2401 transaction,
2401 ifh,
2402 ifh,
2402 dfh,
2403 dfh,
2403 entry,
2404 entry,
2404 deltainfo.data,
2405 deltainfo.data,
2405 link,
2406 link,
2406 offset,
2407 offset,
2407 serialized_sidedata,
2408 serialized_sidedata,
2408 )
2409 )
2409
2410
2410 rawtext = btext[0]
2411 rawtext = btext[0]
2411
2412
2412 if alwayscache and rawtext is None:
2413 if alwayscache and rawtext is None:
2413 rawtext = deltacomputer.buildtext(revinfo, fh)
2414 rawtext = deltacomputer.buildtext(revinfo, fh)
2414
2415
2415 if type(rawtext) == bytes: # only accept immutable objects
2416 if type(rawtext) == bytes: # only accept immutable objects
2416 self._revisioncache = (node, curr, rawtext)
2417 self._revisioncache = (node, curr, rawtext)
2417 self._chainbasecache[curr] = deltainfo.chainbase
2418 self._chainbasecache[curr] = deltainfo.chainbase
2418 return curr
2419 return curr
2419
2420
2421 def _get_data_offset(self, prev):
2422 """Returns the current offset in the (in-transaction) data file.
2423 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2424 file to store that information: since sidedata can be rewritten to the
2425 end of the data file within a transaction, you can have cases where, for
2426 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2427 to `n - 1`'s sidedata being written after `n`'s data.
2428
2429 TODO cache this in a docket file before getting out of experimental."""
2430 if self.version & 0xFFFF != REVLOGV2:
2431 return self.end(prev)
2432
2433 offset = 0
2434 for rev, entry in enumerate(self.index):
2435 sidedata_end = entry[8] + entry[9]
2436 # Sidedata for a previous rev has potentially been written after
2437 # this rev's end, so take the max.
2438 offset = max(self.end(rev), offset, sidedata_end)
2439 return offset
2440
2420 def _writeentry(
2441 def _writeentry(
2421 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2442 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2422 ):
2443 ):
2423 # Files opened in a+ mode have inconsistent behavior on various
2444 # Files opened in a+ mode have inconsistent behavior on various
2424 # platforms. Windows requires that a file positioning call be made
2445 # platforms. Windows requires that a file positioning call be made
2425 # when the file handle transitions between reads and writes. See
2446 # when the file handle transitions between reads and writes. See
2426 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2447 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2427 # platforms, Python or the platform itself can be buggy. Some versions
2448 # platforms, Python or the platform itself can be buggy. Some versions
2428 # of Solaris have been observed to not append at the end of the file
2449 # of Solaris have been observed to not append at the end of the file
2429 # if the file was seeked to before the end. See issue4943 for more.
2450 # if the file was seeked to before the end. See issue4943 for more.
2430 #
2451 #
2431 # We work around this issue by inserting a seek() before writing.
2452 # We work around this issue by inserting a seek() before writing.
2432 # Note: This is likely not necessary on Python 3. However, because
2453 # Note: This is likely not necessary on Python 3. However, because
2433 # the file handle is reused for reads and may be seeked there, we need
2454 # the file handle is reused for reads and may be seeked there, we need
2434 # to be careful before changing this.
2455 # to be careful before changing this.
2435 ifh.seek(0, os.SEEK_END)
2456 ifh.seek(0, os.SEEK_END)
2436 if dfh:
2457 if dfh:
2437 dfh.seek(0, os.SEEK_END)
2458 dfh.seek(0, os.SEEK_END)
2438
2459
2439 curr = len(self) - 1
2460 curr = len(self) - 1
2440 if not self._inline:
2461 if not self._inline:
2441 transaction.add(self.datafile, offset)
2462 transaction.add(self.datafile, offset)
2442 transaction.add(self.indexfile, curr * len(entry))
2463 transaction.add(self.indexfile, curr * len(entry))
2443 if data[0]:
2464 if data[0]:
2444 dfh.write(data[0])
2465 dfh.write(data[0])
2445 dfh.write(data[1])
2466 dfh.write(data[1])
2446 if sidedata:
2467 if sidedata:
2447 dfh.write(sidedata)
2468 dfh.write(sidedata)
2448 ifh.write(entry)
2469 ifh.write(entry)
2449 else:
2470 else:
2450 offset += curr * self._io.size
2471 offset += curr * self._io.size
2451 transaction.add(self.indexfile, offset)
2472 transaction.add(self.indexfile, offset)
2452 ifh.write(entry)
2473 ifh.write(entry)
2453 ifh.write(data[0])
2474 ifh.write(data[0])
2454 ifh.write(data[1])
2475 ifh.write(data[1])
2455 if sidedata:
2476 if sidedata:
2456 ifh.write(sidedata)
2477 ifh.write(sidedata)
2457 self._enforceinlinesize(transaction, ifh)
2478 self._enforceinlinesize(transaction, ifh)
2458 nodemaputil.setup_persistent_nodemap(transaction, self)
2479 nodemaputil.setup_persistent_nodemap(transaction, self)
2459
2480
2460 def addgroup(
2481 def addgroup(
2461 self,
2482 self,
2462 deltas,
2483 deltas,
2463 linkmapper,
2484 linkmapper,
2464 transaction,
2485 transaction,
2465 alwayscache=False,
2486 alwayscache=False,
2466 addrevisioncb=None,
2487 addrevisioncb=None,
2467 duplicaterevisioncb=None,
2488 duplicaterevisioncb=None,
2468 ):
2489 ):
2469 """
2490 """
2470 add a delta group
2491 add a delta group
2471
2492
2472 given a set of deltas, add them to the revision log. the
2493 given a set of deltas, add them to the revision log. the
2473 first delta is against its parent, which should be in our
2494 first delta is against its parent, which should be in our
2474 log, the rest are against the previous delta.
2495 log, the rest are against the previous delta.
2475
2496
2476 If ``addrevisioncb`` is defined, it will be called with arguments of
2497 If ``addrevisioncb`` is defined, it will be called with arguments of
2477 this revlog and the node that was added.
2498 this revlog and the node that was added.
2478 """
2499 """
2479
2500
2480 if self._writinghandles:
2501 if self._writinghandles:
2481 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2502 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2482
2503
2483 r = len(self)
2504 r = len(self)
2484 end = 0
2505 end = 0
2485 if r:
2506 if r:
2486 end = self.end(r - 1)
2507 end = self.end(r - 1)
2487 ifh = self._indexfp(b"a+")
2508 ifh = self._indexfp(b"a+")
2488 isize = r * self._io.size
2509 isize = r * self._io.size
2489 if self._inline:
2510 if self._inline:
2490 transaction.add(self.indexfile, end + isize)
2511 transaction.add(self.indexfile, end + isize)
2491 dfh = None
2512 dfh = None
2492 else:
2513 else:
2493 transaction.add(self.indexfile, isize)
2514 transaction.add(self.indexfile, isize)
2494 transaction.add(self.datafile, end)
2515 transaction.add(self.datafile, end)
2495 dfh = self._datafp(b"a+")
2516 dfh = self._datafp(b"a+")
2496
2517
2497 def flush():
2518 def flush():
2498 if dfh:
2519 if dfh:
2499 dfh.flush()
2520 dfh.flush()
2500 ifh.flush()
2521 ifh.flush()
2501
2522
2502 self._writinghandles = (ifh, dfh)
2523 self._writinghandles = (ifh, dfh)
2503 empty = True
2524 empty = True
2504
2525
2505 try:
2526 try:
2506 deltacomputer = deltautil.deltacomputer(self)
2527 deltacomputer = deltautil.deltacomputer(self)
2507 # loop through our set of deltas
2528 # loop through our set of deltas
2508 for data in deltas:
2529 for data in deltas:
2509 node, p1, p2, linknode, deltabase, delta, flags = data
2530 node, p1, p2, linknode, deltabase, delta, flags = data
2510 link = linkmapper(linknode)
2531 link = linkmapper(linknode)
2511 flags = flags or REVIDX_DEFAULT_FLAGS
2532 flags = flags or REVIDX_DEFAULT_FLAGS
2512
2533
2513 rev = self.index.get_rev(node)
2534 rev = self.index.get_rev(node)
2514 if rev is not None:
2535 if rev is not None:
2515 # this can happen if two branches make the same change
2536 # this can happen if two branches make the same change
2516 self._nodeduplicatecallback(transaction, rev)
2537 self._nodeduplicatecallback(transaction, rev)
2517 if duplicaterevisioncb:
2538 if duplicaterevisioncb:
2518 duplicaterevisioncb(self, rev)
2539 duplicaterevisioncb(self, rev)
2519 empty = False
2540 empty = False
2520 continue
2541 continue
2521
2542
2522 for p in (p1, p2):
2543 for p in (p1, p2):
2523 if not self.index.has_node(p):
2544 if not self.index.has_node(p):
2524 raise error.LookupError(
2545 raise error.LookupError(
2525 p, self.indexfile, _(b'unknown parent')
2546 p, self.indexfile, _(b'unknown parent')
2526 )
2547 )
2527
2548
2528 if not self.index.has_node(deltabase):
2549 if not self.index.has_node(deltabase):
2529 raise error.LookupError(
2550 raise error.LookupError(
2530 deltabase, self.indexfile, _(b'unknown delta base')
2551 deltabase, self.indexfile, _(b'unknown delta base')
2531 )
2552 )
2532
2553
2533 baserev = self.rev(deltabase)
2554 baserev = self.rev(deltabase)
2534
2555
2535 if baserev != nullrev and self.iscensored(baserev):
2556 if baserev != nullrev and self.iscensored(baserev):
2536 # if base is censored, delta must be full replacement in a
2557 # if base is censored, delta must be full replacement in a
2537 # single patch operation
2558 # single patch operation
2538 hlen = struct.calcsize(b">lll")
2559 hlen = struct.calcsize(b">lll")
2539 oldlen = self.rawsize(baserev)
2560 oldlen = self.rawsize(baserev)
2540 newlen = len(delta) - hlen
2561 newlen = len(delta) - hlen
2541 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2562 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2542 raise error.CensoredBaseError(
2563 raise error.CensoredBaseError(
2543 self.indexfile, self.node(baserev)
2564 self.indexfile, self.node(baserev)
2544 )
2565 )
2545
2566
2546 if not flags and self._peek_iscensored(baserev, delta, flush):
2567 if not flags and self._peek_iscensored(baserev, delta, flush):
2547 flags |= REVIDX_ISCENSORED
2568 flags |= REVIDX_ISCENSORED
2548
2569
2549 # We assume consumers of addrevisioncb will want to retrieve
2570 # We assume consumers of addrevisioncb will want to retrieve
2550 # the added revision, which will require a call to
2571 # the added revision, which will require a call to
2551 # revision(). revision() will fast path if there is a cache
2572 # revision(). revision() will fast path if there is a cache
2552 # hit. So, we tell _addrevision() to always cache in this case.
2573 # hit. So, we tell _addrevision() to always cache in this case.
2553 # We're only using addgroup() in the context of changegroup
2574 # We're only using addgroup() in the context of changegroup
2554 # generation so the revision data can always be handled as raw
2575 # generation so the revision data can always be handled as raw
2555 # by the flagprocessor.
2576 # by the flagprocessor.
2556 rev = self._addrevision(
2577 rev = self._addrevision(
2557 node,
2578 node,
2558 None,
2579 None,
2559 transaction,
2580 transaction,
2560 link,
2581 link,
2561 p1,
2582 p1,
2562 p2,
2583 p2,
2563 flags,
2584 flags,
2564 (baserev, delta),
2585 (baserev, delta),
2565 ifh,
2586 ifh,
2566 dfh,
2587 dfh,
2567 alwayscache=alwayscache,
2588 alwayscache=alwayscache,
2568 deltacomputer=deltacomputer,
2589 deltacomputer=deltacomputer,
2569 )
2590 )
2570
2591
2571 if addrevisioncb:
2592 if addrevisioncb:
2572 addrevisioncb(self, rev)
2593 addrevisioncb(self, rev)
2573 empty = False
2594 empty = False
2574
2595
2575 if not dfh and not self._inline:
2596 if not dfh and not self._inline:
2576 # addrevision switched from inline to conventional
2597 # addrevision switched from inline to conventional
2577 # reopen the index
2598 # reopen the index
2578 ifh.close()
2599 ifh.close()
2579 dfh = self._datafp(b"a+")
2600 dfh = self._datafp(b"a+")
2580 ifh = self._indexfp(b"a+")
2601 ifh = self._indexfp(b"a+")
2581 self._writinghandles = (ifh, dfh)
2602 self._writinghandles = (ifh, dfh)
2582 finally:
2603 finally:
2583 self._writinghandles = None
2604 self._writinghandles = None
2584
2605
2585 if dfh:
2606 if dfh:
2586 dfh.close()
2607 dfh.close()
2587 ifh.close()
2608 ifh.close()
2588 return not empty
2609 return not empty
2589
2610
2590 def iscensored(self, rev):
2611 def iscensored(self, rev):
2591 """Check if a file revision is censored."""
2612 """Check if a file revision is censored."""
2592 if not self._censorable:
2613 if not self._censorable:
2593 return False
2614 return False
2594
2615
2595 return self.flags(rev) & REVIDX_ISCENSORED
2616 return self.flags(rev) & REVIDX_ISCENSORED
2596
2617
2597 def _peek_iscensored(self, baserev, delta, flush):
2618 def _peek_iscensored(self, baserev, delta, flush):
2598 """Quickly check if a delta produces a censored revision."""
2619 """Quickly check if a delta produces a censored revision."""
2599 if not self._censorable:
2620 if not self._censorable:
2600 return False
2621 return False
2601
2622
2602 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2623 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2603
2624
2604 def getstrippoint(self, minlink):
2625 def getstrippoint(self, minlink):
2605 """find the minimum rev that must be stripped to strip the linkrev
2626 """find the minimum rev that must be stripped to strip the linkrev
2606
2627
2607 Returns a tuple containing the minimum rev and a set of all revs that
2628 Returns a tuple containing the minimum rev and a set of all revs that
2608 have linkrevs that will be broken by this strip.
2629 have linkrevs that will be broken by this strip.
2609 """
2630 """
2610 return storageutil.resolvestripinfo(
2631 return storageutil.resolvestripinfo(
2611 minlink,
2632 minlink,
2612 len(self) - 1,
2633 len(self) - 1,
2613 self.headrevs(),
2634 self.headrevs(),
2614 self.linkrev,
2635 self.linkrev,
2615 self.parentrevs,
2636 self.parentrevs,
2616 )
2637 )
2617
2638
2618 def strip(self, minlink, transaction):
2639 def strip(self, minlink, transaction):
2619 """truncate the revlog on the first revision with a linkrev >= minlink
2640 """truncate the revlog on the first revision with a linkrev >= minlink
2620
2641
2621 This function is called when we're stripping revision minlink and
2642 This function is called when we're stripping revision minlink and
2622 its descendants from the repository.
2643 its descendants from the repository.
2623
2644
2624 We have to remove all revisions with linkrev >= minlink, because
2645 We have to remove all revisions with linkrev >= minlink, because
2625 the equivalent changelog revisions will be renumbered after the
2646 the equivalent changelog revisions will be renumbered after the
2626 strip.
2647 strip.
2627
2648
2628 So we truncate the revlog on the first of these revisions, and
2649 So we truncate the revlog on the first of these revisions, and
2629 trust that the caller has saved the revisions that shouldn't be
2650 trust that the caller has saved the revisions that shouldn't be
2630 removed and that it'll re-add them after this truncation.
2651 removed and that it'll re-add them after this truncation.
2631 """
2652 """
2632 if len(self) == 0:
2653 if len(self) == 0:
2633 return
2654 return
2634
2655
2635 rev, _ = self.getstrippoint(minlink)
2656 rev, _ = self.getstrippoint(minlink)
2636 if rev == len(self):
2657 if rev == len(self):
2637 return
2658 return
2638
2659
2639 # first truncate the files on disk
2660 # first truncate the files on disk
2640 end = self.start(rev)
2661 end = self.start(rev)
2641 if not self._inline:
2662 if not self._inline:
2642 transaction.add(self.datafile, end)
2663 transaction.add(self.datafile, end)
2643 end = rev * self._io.size
2664 end = rev * self._io.size
2644 else:
2665 else:
2645 end += rev * self._io.size
2666 end += rev * self._io.size
2646
2667
2647 transaction.add(self.indexfile, end)
2668 transaction.add(self.indexfile, end)
2648
2669
2649 # then reset internal state in memory to forget those revisions
2670 # then reset internal state in memory to forget those revisions
2650 self._revisioncache = None
2671 self._revisioncache = None
2651 self._chaininfocache = util.lrucachedict(500)
2672 self._chaininfocache = util.lrucachedict(500)
2652 self._chunkclear()
2673 self._chunkclear()
2653
2674
2654 del self.index[rev:-1]
2675 del self.index[rev:-1]
2655
2676
2656 def checksize(self):
2677 def checksize(self):
2657 """Check size of index and data files
2678 """Check size of index and data files
2658
2679
2659 return a (dd, di) tuple.
2680 return a (dd, di) tuple.
2660 - dd: extra bytes for the "data" file
2681 - dd: extra bytes for the "data" file
2661 - di: extra bytes for the "index" file
2682 - di: extra bytes for the "index" file
2662
2683
2663 A healthy revlog will return (0, 0).
2684 A healthy revlog will return (0, 0).
2664 """
2685 """
2665 expected = 0
2686 expected = 0
2666 if len(self):
2687 if len(self):
2667 expected = max(0, self.end(len(self) - 1))
2688 expected = max(0, self.end(len(self) - 1))
2668
2689
2669 try:
2690 try:
2670 with self._datafp() as f:
2691 with self._datafp() as f:
2671 f.seek(0, io.SEEK_END)
2692 f.seek(0, io.SEEK_END)
2672 actual = f.tell()
2693 actual = f.tell()
2673 dd = actual - expected
2694 dd = actual - expected
2674 except IOError as inst:
2695 except IOError as inst:
2675 if inst.errno != errno.ENOENT:
2696 if inst.errno != errno.ENOENT:
2676 raise
2697 raise
2677 dd = 0
2698 dd = 0
2678
2699
2679 try:
2700 try:
2680 f = self.opener(self.indexfile)
2701 f = self.opener(self.indexfile)
2681 f.seek(0, io.SEEK_END)
2702 f.seek(0, io.SEEK_END)
2682 actual = f.tell()
2703 actual = f.tell()
2683 f.close()
2704 f.close()
2684 s = self._io.size
2705 s = self._io.size
2685 i = max(0, actual // s)
2706 i = max(0, actual // s)
2686 di = actual - (i * s)
2707 di = actual - (i * s)
2687 if self._inline:
2708 if self._inline:
2688 databytes = 0
2709 databytes = 0
2689 for r in self:
2710 for r in self:
2690 databytes += max(0, self.length(r))
2711 databytes += max(0, self.length(r))
2691 dd = 0
2712 dd = 0
2692 di = actual - len(self) * s - databytes
2713 di = actual - len(self) * s - databytes
2693 except IOError as inst:
2714 except IOError as inst:
2694 if inst.errno != errno.ENOENT:
2715 if inst.errno != errno.ENOENT:
2695 raise
2716 raise
2696 di = 0
2717 di = 0
2697
2718
2698 return (dd, di)
2719 return (dd, di)
2699
2720
2700 def files(self):
2721 def files(self):
2701 res = [self.indexfile]
2722 res = [self.indexfile]
2702 if not self._inline:
2723 if not self._inline:
2703 res.append(self.datafile)
2724 res.append(self.datafile)
2704 return res
2725 return res
2705
2726
2706 def emitrevisions(
2727 def emitrevisions(
2707 self,
2728 self,
2708 nodes,
2729 nodes,
2709 nodesorder=None,
2730 nodesorder=None,
2710 revisiondata=False,
2731 revisiondata=False,
2711 assumehaveparentrevisions=False,
2732 assumehaveparentrevisions=False,
2712 deltamode=repository.CG_DELTAMODE_STD,
2733 deltamode=repository.CG_DELTAMODE_STD,
2713 ):
2734 ):
2714 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2735 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2715 raise error.ProgrammingError(
2736 raise error.ProgrammingError(
2716 b'unhandled value for nodesorder: %s' % nodesorder
2737 b'unhandled value for nodesorder: %s' % nodesorder
2717 )
2738 )
2718
2739
2719 if nodesorder is None and not self._generaldelta:
2740 if nodesorder is None and not self._generaldelta:
2720 nodesorder = b'storage'
2741 nodesorder = b'storage'
2721
2742
2722 if (
2743 if (
2723 not self._storedeltachains
2744 not self._storedeltachains
2724 and deltamode != repository.CG_DELTAMODE_PREV
2745 and deltamode != repository.CG_DELTAMODE_PREV
2725 ):
2746 ):
2726 deltamode = repository.CG_DELTAMODE_FULL
2747 deltamode = repository.CG_DELTAMODE_FULL
2727
2748
2728 return storageutil.emitrevisions(
2749 return storageutil.emitrevisions(
2729 self,
2750 self,
2730 nodes,
2751 nodes,
2731 nodesorder,
2752 nodesorder,
2732 revlogrevisiondelta,
2753 revlogrevisiondelta,
2733 deltaparentfn=self.deltaparent,
2754 deltaparentfn=self.deltaparent,
2734 candeltafn=self.candelta,
2755 candeltafn=self.candelta,
2735 rawsizefn=self.rawsize,
2756 rawsizefn=self.rawsize,
2736 revdifffn=self.revdiff,
2757 revdifffn=self.revdiff,
2737 flagsfn=self.flags,
2758 flagsfn=self.flags,
2738 deltamode=deltamode,
2759 deltamode=deltamode,
2739 revisiondata=revisiondata,
2760 revisiondata=revisiondata,
2740 assumehaveparentrevisions=assumehaveparentrevisions,
2761 assumehaveparentrevisions=assumehaveparentrevisions,
2741 )
2762 )
2742
2763
2743 DELTAREUSEALWAYS = b'always'
2764 DELTAREUSEALWAYS = b'always'
2744 DELTAREUSESAMEREVS = b'samerevs'
2765 DELTAREUSESAMEREVS = b'samerevs'
2745 DELTAREUSENEVER = b'never'
2766 DELTAREUSENEVER = b'never'
2746
2767
2747 DELTAREUSEFULLADD = b'fulladd'
2768 DELTAREUSEFULLADD = b'fulladd'
2748
2769
2749 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2770 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2750
2771
2751 def clone(
2772 def clone(
2752 self,
2773 self,
2753 tr,
2774 tr,
2754 destrevlog,
2775 destrevlog,
2755 addrevisioncb=None,
2776 addrevisioncb=None,
2756 deltareuse=DELTAREUSESAMEREVS,
2777 deltareuse=DELTAREUSESAMEREVS,
2757 forcedeltabothparents=None,
2778 forcedeltabothparents=None,
2758 sidedatacompanion=None,
2779 sidedatacompanion=None,
2759 ):
2780 ):
2760 """Copy this revlog to another, possibly with format changes.
2781 """Copy this revlog to another, possibly with format changes.
2761
2782
2762 The destination revlog will contain the same revisions and nodes.
2783 The destination revlog will contain the same revisions and nodes.
2763 However, it may not be bit-for-bit identical due to e.g. delta encoding
2784 However, it may not be bit-for-bit identical due to e.g. delta encoding
2764 differences.
2785 differences.
2765
2786
2766 The ``deltareuse`` argument control how deltas from the existing revlog
2787 The ``deltareuse`` argument control how deltas from the existing revlog
2767 are preserved in the destination revlog. The argument can have the
2788 are preserved in the destination revlog. The argument can have the
2768 following values:
2789 following values:
2769
2790
2770 DELTAREUSEALWAYS
2791 DELTAREUSEALWAYS
2771 Deltas will always be reused (if possible), even if the destination
2792 Deltas will always be reused (if possible), even if the destination
2772 revlog would not select the same revisions for the delta. This is the
2793 revlog would not select the same revisions for the delta. This is the
2773 fastest mode of operation.
2794 fastest mode of operation.
2774 DELTAREUSESAMEREVS
2795 DELTAREUSESAMEREVS
2775 Deltas will be reused if the destination revlog would pick the same
2796 Deltas will be reused if the destination revlog would pick the same
2776 revisions for the delta. This mode strikes a balance between speed
2797 revisions for the delta. This mode strikes a balance between speed
2777 and optimization.
2798 and optimization.
2778 DELTAREUSENEVER
2799 DELTAREUSENEVER
2779 Deltas will never be reused. This is the slowest mode of execution.
2800 Deltas will never be reused. This is the slowest mode of execution.
2780 This mode can be used to recompute deltas (e.g. if the diff/delta
2801 This mode can be used to recompute deltas (e.g. if the diff/delta
2781 algorithm changes).
2802 algorithm changes).
2782 DELTAREUSEFULLADD
2803 DELTAREUSEFULLADD
2783 Revision will be re-added as if their were new content. This is
2804 Revision will be re-added as if their were new content. This is
2784 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2805 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2785 eg: large file detection and handling.
2806 eg: large file detection and handling.
2786
2807
2787 Delta computation can be slow, so the choice of delta reuse policy can
2808 Delta computation can be slow, so the choice of delta reuse policy can
2788 significantly affect run time.
2809 significantly affect run time.
2789
2810
2790 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2811 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2791 two extremes. Deltas will be reused if they are appropriate. But if the
2812 two extremes. Deltas will be reused if they are appropriate. But if the
2792 delta could choose a better revision, it will do so. This means if you
2813 delta could choose a better revision, it will do so. This means if you
2793 are converting a non-generaldelta revlog to a generaldelta revlog,
2814 are converting a non-generaldelta revlog to a generaldelta revlog,
2794 deltas will be recomputed if the delta's parent isn't a parent of the
2815 deltas will be recomputed if the delta's parent isn't a parent of the
2795 revision.
2816 revision.
2796
2817
2797 In addition to the delta policy, the ``forcedeltabothparents``
2818 In addition to the delta policy, the ``forcedeltabothparents``
2798 argument controls whether to force compute deltas against both parents
2819 argument controls whether to force compute deltas against both parents
2799 for merges. By default, the current default is used.
2820 for merges. By default, the current default is used.
2800
2821
2801 If not None, the `sidedatacompanion` is callable that accept two
2822 If not None, the `sidedatacompanion` is callable that accept two
2802 arguments:
2823 arguments:
2803
2824
2804 (srcrevlog, rev)
2825 (srcrevlog, rev)
2805
2826
2806 and return a quintet that control changes to sidedata content from the
2827 and return a quintet that control changes to sidedata content from the
2807 old revision to the new clone result:
2828 old revision to the new clone result:
2808
2829
2809 (dropall, filterout, update, new_flags, dropped_flags)
2830 (dropall, filterout, update, new_flags, dropped_flags)
2810
2831
2811 * if `dropall` is True, all sidedata should be dropped
2832 * if `dropall` is True, all sidedata should be dropped
2812 * `filterout` is a set of sidedata keys that should be dropped
2833 * `filterout` is a set of sidedata keys that should be dropped
2813 * `update` is a mapping of additionnal/new key -> value
2834 * `update` is a mapping of additionnal/new key -> value
2814 * new_flags is a bitfields of new flags that the revision should get
2835 * new_flags is a bitfields of new flags that the revision should get
2815 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2836 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2816 """
2837 """
2817 if deltareuse not in self.DELTAREUSEALL:
2838 if deltareuse not in self.DELTAREUSEALL:
2818 raise ValueError(
2839 raise ValueError(
2819 _(b'value for deltareuse invalid: %s') % deltareuse
2840 _(b'value for deltareuse invalid: %s') % deltareuse
2820 )
2841 )
2821
2842
2822 if len(destrevlog):
2843 if len(destrevlog):
2823 raise ValueError(_(b'destination revlog is not empty'))
2844 raise ValueError(_(b'destination revlog is not empty'))
2824
2845
2825 if getattr(self, 'filteredrevs', None):
2846 if getattr(self, 'filteredrevs', None):
2826 raise ValueError(_(b'source revlog has filtered revisions'))
2847 raise ValueError(_(b'source revlog has filtered revisions'))
2827 if getattr(destrevlog, 'filteredrevs', None):
2848 if getattr(destrevlog, 'filteredrevs', None):
2828 raise ValueError(_(b'destination revlog has filtered revisions'))
2849 raise ValueError(_(b'destination revlog has filtered revisions'))
2829
2850
2830 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2851 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2831 # if possible.
2852 # if possible.
2832 oldlazydelta = destrevlog._lazydelta
2853 oldlazydelta = destrevlog._lazydelta
2833 oldlazydeltabase = destrevlog._lazydeltabase
2854 oldlazydeltabase = destrevlog._lazydeltabase
2834 oldamd = destrevlog._deltabothparents
2855 oldamd = destrevlog._deltabothparents
2835
2856
2836 try:
2857 try:
2837 if deltareuse == self.DELTAREUSEALWAYS:
2858 if deltareuse == self.DELTAREUSEALWAYS:
2838 destrevlog._lazydeltabase = True
2859 destrevlog._lazydeltabase = True
2839 destrevlog._lazydelta = True
2860 destrevlog._lazydelta = True
2840 elif deltareuse == self.DELTAREUSESAMEREVS:
2861 elif deltareuse == self.DELTAREUSESAMEREVS:
2841 destrevlog._lazydeltabase = False
2862 destrevlog._lazydeltabase = False
2842 destrevlog._lazydelta = True
2863 destrevlog._lazydelta = True
2843 elif deltareuse == self.DELTAREUSENEVER:
2864 elif deltareuse == self.DELTAREUSENEVER:
2844 destrevlog._lazydeltabase = False
2865 destrevlog._lazydeltabase = False
2845 destrevlog._lazydelta = False
2866 destrevlog._lazydelta = False
2846
2867
2847 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2868 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2848
2869
2849 self._clone(
2870 self._clone(
2850 tr,
2871 tr,
2851 destrevlog,
2872 destrevlog,
2852 addrevisioncb,
2873 addrevisioncb,
2853 deltareuse,
2874 deltareuse,
2854 forcedeltabothparents,
2875 forcedeltabothparents,
2855 sidedatacompanion,
2876 sidedatacompanion,
2856 )
2877 )
2857
2878
2858 finally:
2879 finally:
2859 destrevlog._lazydelta = oldlazydelta
2880 destrevlog._lazydelta = oldlazydelta
2860 destrevlog._lazydeltabase = oldlazydeltabase
2881 destrevlog._lazydeltabase = oldlazydeltabase
2861 destrevlog._deltabothparents = oldamd
2882 destrevlog._deltabothparents = oldamd
2862
2883
2863 def _clone(
2884 def _clone(
2864 self,
2885 self,
2865 tr,
2886 tr,
2866 destrevlog,
2887 destrevlog,
2867 addrevisioncb,
2888 addrevisioncb,
2868 deltareuse,
2889 deltareuse,
2869 forcedeltabothparents,
2890 forcedeltabothparents,
2870 sidedatacompanion,
2891 sidedatacompanion,
2871 ):
2892 ):
2872 """perform the core duty of `revlog.clone` after parameter processing"""
2893 """perform the core duty of `revlog.clone` after parameter processing"""
2873 deltacomputer = deltautil.deltacomputer(destrevlog)
2894 deltacomputer = deltautil.deltacomputer(destrevlog)
2874 index = self.index
2895 index = self.index
2875 for rev in self:
2896 for rev in self:
2876 entry = index[rev]
2897 entry = index[rev]
2877
2898
2878 # Some classes override linkrev to take filtered revs into
2899 # Some classes override linkrev to take filtered revs into
2879 # account. Use raw entry from index.
2900 # account. Use raw entry from index.
2880 flags = entry[0] & 0xFFFF
2901 flags = entry[0] & 0xFFFF
2881 linkrev = entry[4]
2902 linkrev = entry[4]
2882 p1 = index[entry[5]][7]
2903 p1 = index[entry[5]][7]
2883 p2 = index[entry[6]][7]
2904 p2 = index[entry[6]][7]
2884 node = entry[7]
2905 node = entry[7]
2885
2906
2886 sidedataactions = (False, [], {}, 0, 0)
2907 sidedataactions = (False, [], {}, 0, 0)
2887 if sidedatacompanion is not None:
2908 if sidedatacompanion is not None:
2888 sidedataactions = sidedatacompanion(self, rev)
2909 sidedataactions = sidedatacompanion(self, rev)
2889
2910
2890 # (Possibly) reuse the delta from the revlog if allowed and
2911 # (Possibly) reuse the delta from the revlog if allowed and
2891 # the revlog chunk is a delta.
2912 # the revlog chunk is a delta.
2892 cachedelta = None
2913 cachedelta = None
2893 rawtext = None
2914 rawtext = None
2894 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2915 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2895 dropall = sidedataactions[0]
2916 dropall = sidedataactions[0]
2896 filterout = sidedataactions[1]
2917 filterout = sidedataactions[1]
2897 update = sidedataactions[2]
2918 update = sidedataactions[2]
2898 new_flags = sidedataactions[3]
2919 new_flags = sidedataactions[3]
2899 dropped_flags = sidedataactions[4]
2920 dropped_flags = sidedataactions[4]
2900 text, sidedata = self._revisiondata(rev)
2921 text, sidedata = self._revisiondata(rev)
2901 if dropall:
2922 if dropall:
2902 sidedata = {}
2923 sidedata = {}
2903 for key in filterout:
2924 for key in filterout:
2904 sidedata.pop(key, None)
2925 sidedata.pop(key, None)
2905 sidedata.update(update)
2926 sidedata.update(update)
2906 if not sidedata:
2927 if not sidedata:
2907 sidedata = None
2928 sidedata = None
2908
2929
2909 flags |= new_flags
2930 flags |= new_flags
2910 flags &= ~dropped_flags
2931 flags &= ~dropped_flags
2911
2932
2912 destrevlog.addrevision(
2933 destrevlog.addrevision(
2913 text,
2934 text,
2914 tr,
2935 tr,
2915 linkrev,
2936 linkrev,
2916 p1,
2937 p1,
2917 p2,
2938 p2,
2918 cachedelta=cachedelta,
2939 cachedelta=cachedelta,
2919 node=node,
2940 node=node,
2920 flags=flags,
2941 flags=flags,
2921 deltacomputer=deltacomputer,
2942 deltacomputer=deltacomputer,
2922 sidedata=sidedata,
2943 sidedata=sidedata,
2923 )
2944 )
2924 else:
2945 else:
2925 if destrevlog._lazydelta:
2946 if destrevlog._lazydelta:
2926 dp = self.deltaparent(rev)
2947 dp = self.deltaparent(rev)
2927 if dp != nullrev:
2948 if dp != nullrev:
2928 cachedelta = (dp, bytes(self._chunk(rev)))
2949 cachedelta = (dp, bytes(self._chunk(rev)))
2929
2950
2930 if not cachedelta:
2951 if not cachedelta:
2931 rawtext = self.rawdata(rev)
2952 rawtext = self.rawdata(rev)
2932
2953
2933 ifh = destrevlog.opener(
2954 ifh = destrevlog.opener(
2934 destrevlog.indexfile, b'a+', checkambig=False
2955 destrevlog.indexfile, b'a+', checkambig=False
2935 )
2956 )
2936 dfh = None
2957 dfh = None
2937 if not destrevlog._inline:
2958 if not destrevlog._inline:
2938 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2959 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2939 try:
2960 try:
2940 destrevlog._addrevision(
2961 destrevlog._addrevision(
2941 node,
2962 node,
2942 rawtext,
2963 rawtext,
2943 tr,
2964 tr,
2944 linkrev,
2965 linkrev,
2945 p1,
2966 p1,
2946 p2,
2967 p2,
2947 flags,
2968 flags,
2948 cachedelta,
2969 cachedelta,
2949 ifh,
2970 ifh,
2950 dfh,
2971 dfh,
2951 deltacomputer=deltacomputer,
2972 deltacomputer=deltacomputer,
2952 )
2973 )
2953 finally:
2974 finally:
2954 if dfh:
2975 if dfh:
2955 dfh.close()
2976 dfh.close()
2956 ifh.close()
2977 ifh.close()
2957
2978
2958 if addrevisioncb:
2979 if addrevisioncb:
2959 addrevisioncb(self, rev, node)
2980 addrevisioncb(self, rev, node)
2960
2981
2961 def censorrevision(self, tr, censornode, tombstone=b''):
2982 def censorrevision(self, tr, censornode, tombstone=b''):
2962 if (self.version & 0xFFFF) == REVLOGV0:
2983 if (self.version & 0xFFFF) == REVLOGV0:
2963 raise error.RevlogError(
2984 raise error.RevlogError(
2964 _(b'cannot censor with version %d revlogs') % self.version
2985 _(b'cannot censor with version %d revlogs') % self.version
2965 )
2986 )
2966
2987
2967 censorrev = self.rev(censornode)
2988 censorrev = self.rev(censornode)
2968 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2989 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2969
2990
2970 if len(tombstone) > self.rawsize(censorrev):
2991 if len(tombstone) > self.rawsize(censorrev):
2971 raise error.Abort(
2992 raise error.Abort(
2972 _(b'censor tombstone must be no longer than censored data')
2993 _(b'censor tombstone must be no longer than censored data')
2973 )
2994 )
2974
2995
2975 # Rewriting the revlog in place is hard. Our strategy for censoring is
2996 # Rewriting the revlog in place is hard. Our strategy for censoring is
2976 # to create a new revlog, copy all revisions to it, then replace the
2997 # to create a new revlog, copy all revisions to it, then replace the
2977 # revlogs on transaction close.
2998 # revlogs on transaction close.
2978
2999
2979 newindexfile = self.indexfile + b'.tmpcensored'
3000 newindexfile = self.indexfile + b'.tmpcensored'
2980 newdatafile = self.datafile + b'.tmpcensored'
3001 newdatafile = self.datafile + b'.tmpcensored'
2981
3002
2982 # This is a bit dangerous. We could easily have a mismatch of state.
3003 # This is a bit dangerous. We could easily have a mismatch of state.
2983 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3004 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2984 newrl.version = self.version
3005 newrl.version = self.version
2985 newrl._generaldelta = self._generaldelta
3006 newrl._generaldelta = self._generaldelta
2986 newrl._io = self._io
3007 newrl._io = self._io
2987
3008
2988 for rev in self.revs():
3009 for rev in self.revs():
2989 node = self.node(rev)
3010 node = self.node(rev)
2990 p1, p2 = self.parents(node)
3011 p1, p2 = self.parents(node)
2991
3012
2992 if rev == censorrev:
3013 if rev == censorrev:
2993 newrl.addrawrevision(
3014 newrl.addrawrevision(
2994 tombstone,
3015 tombstone,
2995 tr,
3016 tr,
2996 self.linkrev(censorrev),
3017 self.linkrev(censorrev),
2997 p1,
3018 p1,
2998 p2,
3019 p2,
2999 censornode,
3020 censornode,
3000 REVIDX_ISCENSORED,
3021 REVIDX_ISCENSORED,
3001 )
3022 )
3002
3023
3003 if newrl.deltaparent(rev) != nullrev:
3024 if newrl.deltaparent(rev) != nullrev:
3004 raise error.Abort(
3025 raise error.Abort(
3005 _(
3026 _(
3006 b'censored revision stored as delta; '
3027 b'censored revision stored as delta; '
3007 b'cannot censor'
3028 b'cannot censor'
3008 ),
3029 ),
3009 hint=_(
3030 hint=_(
3010 b'censoring of revlogs is not '
3031 b'censoring of revlogs is not '
3011 b'fully implemented; please report '
3032 b'fully implemented; please report '
3012 b'this bug'
3033 b'this bug'
3013 ),
3034 ),
3014 )
3035 )
3015 continue
3036 continue
3016
3037
3017 if self.iscensored(rev):
3038 if self.iscensored(rev):
3018 if self.deltaparent(rev) != nullrev:
3039 if self.deltaparent(rev) != nullrev:
3019 raise error.Abort(
3040 raise error.Abort(
3020 _(
3041 _(
3021 b'cannot censor due to censored '
3042 b'cannot censor due to censored '
3022 b'revision having delta stored'
3043 b'revision having delta stored'
3023 )
3044 )
3024 )
3045 )
3025 rawtext = self._chunk(rev)
3046 rawtext = self._chunk(rev)
3026 else:
3047 else:
3027 rawtext = self.rawdata(rev)
3048 rawtext = self.rawdata(rev)
3028
3049
3029 newrl.addrawrevision(
3050 newrl.addrawrevision(
3030 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3051 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3031 )
3052 )
3032
3053
3033 tr.addbackup(self.indexfile, location=b'store')
3054 tr.addbackup(self.indexfile, location=b'store')
3034 if not self._inline:
3055 if not self._inline:
3035 tr.addbackup(self.datafile, location=b'store')
3056 tr.addbackup(self.datafile, location=b'store')
3036
3057
3037 self.opener.rename(newrl.indexfile, self.indexfile)
3058 self.opener.rename(newrl.indexfile, self.indexfile)
3038 if not self._inline:
3059 if not self._inline:
3039 self.opener.rename(newrl.datafile, self.datafile)
3060 self.opener.rename(newrl.datafile, self.datafile)
3040
3061
3041 self.clearcaches()
3062 self.clearcaches()
3042 self._loadindex()
3063 self._loadindex()
3043
3064
3044 def verifyintegrity(self, state):
3065 def verifyintegrity(self, state):
3045 """Verifies the integrity of the revlog.
3066 """Verifies the integrity of the revlog.
3046
3067
3047 Yields ``revlogproblem`` instances describing problems that are
3068 Yields ``revlogproblem`` instances describing problems that are
3048 found.
3069 found.
3049 """
3070 """
3050 dd, di = self.checksize()
3071 dd, di = self.checksize()
3051 if dd:
3072 if dd:
3052 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3073 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3053 if di:
3074 if di:
3054 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3075 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3055
3076
3056 version = self.version & 0xFFFF
3077 version = self.version & 0xFFFF
3057
3078
3058 # The verifier tells us what version revlog we should be.
3079 # The verifier tells us what version revlog we should be.
3059 if version != state[b'expectedversion']:
3080 if version != state[b'expectedversion']:
3060 yield revlogproblem(
3081 yield revlogproblem(
3061 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3082 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3062 % (self.indexfile, version, state[b'expectedversion'])
3083 % (self.indexfile, version, state[b'expectedversion'])
3063 )
3084 )
3064
3085
3065 state[b'skipread'] = set()
3086 state[b'skipread'] = set()
3066 state[b'safe_renamed'] = set()
3087 state[b'safe_renamed'] = set()
3067
3088
3068 for rev in self:
3089 for rev in self:
3069 node = self.node(rev)
3090 node = self.node(rev)
3070
3091
3071 # Verify contents. 4 cases to care about:
3092 # Verify contents. 4 cases to care about:
3072 #
3093 #
3073 # common: the most common case
3094 # common: the most common case
3074 # rename: with a rename
3095 # rename: with a rename
3075 # meta: file content starts with b'\1\n', the metadata
3096 # meta: file content starts with b'\1\n', the metadata
3076 # header defined in filelog.py, but without a rename
3097 # header defined in filelog.py, but without a rename
3077 # ext: content stored externally
3098 # ext: content stored externally
3078 #
3099 #
3079 # More formally, their differences are shown below:
3100 # More formally, their differences are shown below:
3080 #
3101 #
3081 # | common | rename | meta | ext
3102 # | common | rename | meta | ext
3082 # -------------------------------------------------------
3103 # -------------------------------------------------------
3083 # flags() | 0 | 0 | 0 | not 0
3104 # flags() | 0 | 0 | 0 | not 0
3084 # renamed() | False | True | False | ?
3105 # renamed() | False | True | False | ?
3085 # rawtext[0:2]=='\1\n'| False | True | True | ?
3106 # rawtext[0:2]=='\1\n'| False | True | True | ?
3086 #
3107 #
3087 # "rawtext" means the raw text stored in revlog data, which
3108 # "rawtext" means the raw text stored in revlog data, which
3088 # could be retrieved by "rawdata(rev)". "text"
3109 # could be retrieved by "rawdata(rev)". "text"
3089 # mentioned below is "revision(rev)".
3110 # mentioned below is "revision(rev)".
3090 #
3111 #
3091 # There are 3 different lengths stored physically:
3112 # There are 3 different lengths stored physically:
3092 # 1. L1: rawsize, stored in revlog index
3113 # 1. L1: rawsize, stored in revlog index
3093 # 2. L2: len(rawtext), stored in revlog data
3114 # 2. L2: len(rawtext), stored in revlog data
3094 # 3. L3: len(text), stored in revlog data if flags==0, or
3115 # 3. L3: len(text), stored in revlog data if flags==0, or
3095 # possibly somewhere else if flags!=0
3116 # possibly somewhere else if flags!=0
3096 #
3117 #
3097 # L1 should be equal to L2. L3 could be different from them.
3118 # L1 should be equal to L2. L3 could be different from them.
3098 # "text" may or may not affect commit hash depending on flag
3119 # "text" may or may not affect commit hash depending on flag
3099 # processors (see flagutil.addflagprocessor).
3120 # processors (see flagutil.addflagprocessor).
3100 #
3121 #
3101 # | common | rename | meta | ext
3122 # | common | rename | meta | ext
3102 # -------------------------------------------------
3123 # -------------------------------------------------
3103 # rawsize() | L1 | L1 | L1 | L1
3124 # rawsize() | L1 | L1 | L1 | L1
3104 # size() | L1 | L2-LM | L1(*) | L1 (?)
3125 # size() | L1 | L2-LM | L1(*) | L1 (?)
3105 # len(rawtext) | L2 | L2 | L2 | L2
3126 # len(rawtext) | L2 | L2 | L2 | L2
3106 # len(text) | L2 | L2 | L2 | L3
3127 # len(text) | L2 | L2 | L2 | L3
3107 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3128 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3108 #
3129 #
3109 # LM: length of metadata, depending on rawtext
3130 # LM: length of metadata, depending on rawtext
3110 # (*): not ideal, see comment in filelog.size
3131 # (*): not ideal, see comment in filelog.size
3111 # (?): could be "- len(meta)" if the resolved content has
3132 # (?): could be "- len(meta)" if the resolved content has
3112 # rename metadata
3133 # rename metadata
3113 #
3134 #
3114 # Checks needed to be done:
3135 # Checks needed to be done:
3115 # 1. length check: L1 == L2, in all cases.
3136 # 1. length check: L1 == L2, in all cases.
3116 # 2. hash check: depending on flag processor, we may need to
3137 # 2. hash check: depending on flag processor, we may need to
3117 # use either "text" (external), or "rawtext" (in revlog).
3138 # use either "text" (external), or "rawtext" (in revlog).
3118
3139
3119 try:
3140 try:
3120 skipflags = state.get(b'skipflags', 0)
3141 skipflags = state.get(b'skipflags', 0)
3121 if skipflags:
3142 if skipflags:
3122 skipflags &= self.flags(rev)
3143 skipflags &= self.flags(rev)
3123
3144
3124 _verify_revision(self, skipflags, state, node)
3145 _verify_revision(self, skipflags, state, node)
3125
3146
3126 l1 = self.rawsize(rev)
3147 l1 = self.rawsize(rev)
3127 l2 = len(self.rawdata(node))
3148 l2 = len(self.rawdata(node))
3128
3149
3129 if l1 != l2:
3150 if l1 != l2:
3130 yield revlogproblem(
3151 yield revlogproblem(
3131 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3152 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3132 node=node,
3153 node=node,
3133 )
3154 )
3134
3155
3135 except error.CensoredNodeError:
3156 except error.CensoredNodeError:
3136 if state[b'erroroncensored']:
3157 if state[b'erroroncensored']:
3137 yield revlogproblem(
3158 yield revlogproblem(
3138 error=_(b'censored file data'), node=node
3159 error=_(b'censored file data'), node=node
3139 )
3160 )
3140 state[b'skipread'].add(node)
3161 state[b'skipread'].add(node)
3141 except Exception as e:
3162 except Exception as e:
3142 yield revlogproblem(
3163 yield revlogproblem(
3143 error=_(b'unpacking %s: %s')
3164 error=_(b'unpacking %s: %s')
3144 % (short(node), stringutil.forcebytestr(e)),
3165 % (short(node), stringutil.forcebytestr(e)),
3145 node=node,
3166 node=node,
3146 )
3167 )
3147 state[b'skipread'].add(node)
3168 state[b'skipread'].add(node)
3148
3169
3149 def storageinfo(
3170 def storageinfo(
3150 self,
3171 self,
3151 exclusivefiles=False,
3172 exclusivefiles=False,
3152 sharedfiles=False,
3173 sharedfiles=False,
3153 revisionscount=False,
3174 revisionscount=False,
3154 trackedsize=False,
3175 trackedsize=False,
3155 storedsize=False,
3176 storedsize=False,
3156 ):
3177 ):
3157 d = {}
3178 d = {}
3158
3179
3159 if exclusivefiles:
3180 if exclusivefiles:
3160 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3181 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3161 if not self._inline:
3182 if not self._inline:
3162 d[b'exclusivefiles'].append((self.opener, self.datafile))
3183 d[b'exclusivefiles'].append((self.opener, self.datafile))
3163
3184
3164 if sharedfiles:
3185 if sharedfiles:
3165 d[b'sharedfiles'] = []
3186 d[b'sharedfiles'] = []
3166
3187
3167 if revisionscount:
3188 if revisionscount:
3168 d[b'revisionscount'] = len(self)
3189 d[b'revisionscount'] = len(self)
3169
3190
3170 if trackedsize:
3191 if trackedsize:
3171 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3192 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3172
3193
3173 if storedsize:
3194 if storedsize:
3174 d[b'storedsize'] = sum(
3195 d[b'storedsize'] = sum(
3175 self.opener.stat(path).st_size for path in self.files()
3196 self.opener.stat(path).st_size for path in self.files()
3176 )
3197 )
3177
3198
3178 return d
3199 return d
General Comments 0
You need to be logged in to leave comments. Login now