##// END OF EJS Templates
revlogv2: temporarily forbid inline revlogs...
Raphaël Gomès -
r47450:c8bb7b89 default
parent child Browse files
Show More
@@ -1,3203 +1,3207 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_SIDEDATA,
59 REVIDX_SIDEDATA,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 nodemap as nodemaputil,
79 nodemap as nodemaputil,
80 sidedata as sidedatautil,
80 sidedata as sidedatautil,
81 )
81 )
82 from .utils import (
82 from .utils import (
83 storageutil,
83 storageutil,
84 stringutil,
84 stringutil,
85 )
85 )
86 from .pure import parsers as pureparsers
86 from .pure import parsers as pureparsers
87
87
88 # blanked usage of all the name to prevent pyflakes constraints
88 # blanked usage of all the name to prevent pyflakes constraints
89 # We need these name available in the module for extensions.
89 # We need these name available in the module for extensions.
90 REVLOGV0
90 REVLOGV0
91 REVLOGV1
91 REVLOGV1
92 REVLOGV2
92 REVLOGV2
93 FLAG_INLINE_DATA
93 FLAG_INLINE_DATA
94 FLAG_GENERALDELTA
94 FLAG_GENERALDELTA
95 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_VERSION
97 REVLOG_DEFAULT_VERSION
98 REVLOGV1_FLAGS
98 REVLOGV1_FLAGS
99 REVLOGV2_FLAGS
99 REVLOGV2_FLAGS
100 REVIDX_ISCENSORED
100 REVIDX_ISCENSORED
101 REVIDX_ELLIPSIS
101 REVIDX_ELLIPSIS
102 REVIDX_SIDEDATA
102 REVIDX_SIDEDATA
103 REVIDX_HASCOPIESINFO
103 REVIDX_HASCOPIESINFO
104 REVIDX_EXTSTORED
104 REVIDX_EXTSTORED
105 REVIDX_DEFAULT_FLAGS
105 REVIDX_DEFAULT_FLAGS
106 REVIDX_FLAGS_ORDER
106 REVIDX_FLAGS_ORDER
107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 REVIDX_RAWTEXT_CHANGING_FLAGS
108
108
109 parsers = policy.importmod('parsers')
109 parsers = policy.importmod('parsers')
110 rustancestor = policy.importrust('ancestor')
110 rustancestor = policy.importrust('ancestor')
111 rustdagop = policy.importrust('dagop')
111 rustdagop = policy.importrust('dagop')
112 rustrevlog = policy.importrust('revlog')
112 rustrevlog = policy.importrust('revlog')
113
113
114 # Aliased for performance.
114 # Aliased for performance.
115 _zlibdecompress = zlib.decompress
115 _zlibdecompress = zlib.decompress
116
116
117 # max size of revlog with inline data
117 # max size of revlog with inline data
118 _maxinline = 131072
118 _maxinline = 131072
119 _chunksize = 1048576
119 _chunksize = 1048576
120
120
121 # Flag processors for REVIDX_ELLIPSIS.
121 # Flag processors for REVIDX_ELLIPSIS.
122 def ellipsisreadprocessor(rl, text):
122 def ellipsisreadprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsiswriteprocessor(rl, text):
126 def ellipsiswriteprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsisrawprocessor(rl, text):
130 def ellipsisrawprocessor(rl, text):
131 return False
131 return False
132
132
133
133
134 ellipsisprocessor = (
134 ellipsisprocessor = (
135 ellipsisreadprocessor,
135 ellipsisreadprocessor,
136 ellipsiswriteprocessor,
136 ellipsiswriteprocessor,
137 ellipsisrawprocessor,
137 ellipsisrawprocessor,
138 )
138 )
139
139
140
140
141 def getoffset(q):
141 def getoffset(q):
142 return int(q >> 16)
142 return int(q >> 16)
143
143
144
144
145 def gettype(q):
145 def gettype(q):
146 return int(q & 0xFFFF)
146 return int(q & 0xFFFF)
147
147
148
148
149 def offset_type(offset, type):
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
176 class _revisioninfo(object):
176 class _revisioninfo(object):
177 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
178 node: expected hash of the revision
178 node: expected hash of the revision
179 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
180 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
181 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
182 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
183
183
184 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
185 """
185 """
186
186
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195
195
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
197 @attr.s(slots=True)
197 @attr.s(slots=True)
198 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
199 node = attr.ib()
199 node = attr.ib()
200 p1node = attr.ib()
200 p1node = attr.ib()
201 p2node = attr.ib()
201 p2node = attr.ib()
202 basenode = attr.ib()
202 basenode = attr.ib()
203 flags = attr.ib()
203 flags = attr.ib()
204 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
205 revision = attr.ib()
205 revision = attr.ib()
206 delta = attr.ib()
206 delta = attr.ib()
207 sidedata = attr.ib()
207 sidedata = attr.ib()
208 linknode = attr.ib(default=None)
208 linknode = attr.ib(default=None)
209
209
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218
218
219 # index v0:
219 # index v0:
220 # 4 bytes: offset
220 # 4 bytes: offset
221 # 4 bytes: compressed length
221 # 4 bytes: compressed length
222 # 4 bytes: base rev
222 # 4 bytes: base rev
223 # 4 bytes: link rev
223 # 4 bytes: link rev
224 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 1 nodeid
225 # 20 bytes: parent 2 nodeid
225 # 20 bytes: parent 2 nodeid
226 # 20 bytes: nodeid
226 # 20 bytes: nodeid
227 indexformatv0 = struct.Struct(b">4l20s20s20s")
227 indexformatv0 = struct.Struct(b">4l20s20s20s")
228 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_pack = indexformatv0.pack
229 indexformatv0_unpack = indexformatv0.unpack
229 indexformatv0_unpack = indexformatv0.unpack
230
230
231
231
232 class revlogoldindex(list):
232 class revlogoldindex(list):
233 @property
233 @property
234 def nodemap(self):
234 def nodemap(self):
235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
237 return self._nodemap
237 return self._nodemap
238
238
239 @util.propertycache
239 @util.propertycache
240 def _nodemap(self):
240 def _nodemap(self):
241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
242 for r in range(0, len(self)):
242 for r in range(0, len(self)):
243 n = self[r][7]
243 n = self[r][7]
244 nodemap[n] = r
244 nodemap[n] = r
245 return nodemap
245 return nodemap
246
246
247 def has_node(self, node):
247 def has_node(self, node):
248 """return True if the node exist in the index"""
248 """return True if the node exist in the index"""
249 return node in self._nodemap
249 return node in self._nodemap
250
250
251 def rev(self, node):
251 def rev(self, node):
252 """return a revision for a node
252 """return a revision for a node
253
253
254 If the node is unknown, raise a RevlogError"""
254 If the node is unknown, raise a RevlogError"""
255 return self._nodemap[node]
255 return self._nodemap[node]
256
256
257 def get_rev(self, node):
257 def get_rev(self, node):
258 """return a revision for a node
258 """return a revision for a node
259
259
260 If the node is unknown, return None"""
260 If the node is unknown, return None"""
261 return self._nodemap.get(node)
261 return self._nodemap.get(node)
262
262
263 def append(self, tup):
263 def append(self, tup):
264 self._nodemap[tup[7]] = len(self)
264 self._nodemap[tup[7]] = len(self)
265 super(revlogoldindex, self).append(tup)
265 super(revlogoldindex, self).append(tup)
266
266
267 def __delitem__(self, i):
267 def __delitem__(self, i):
268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
270 for r in pycompat.xrange(i.start, len(self)):
270 for r in pycompat.xrange(i.start, len(self)):
271 del self._nodemap[self[r][7]]
271 del self._nodemap[self[r][7]]
272 super(revlogoldindex, self).__delitem__(i)
272 super(revlogoldindex, self).__delitem__(i)
273
273
274 def clearcaches(self):
274 def clearcaches(self):
275 self.__dict__.pop('_nodemap', None)
275 self.__dict__.pop('_nodemap', None)
276
276
277 def __getitem__(self, i):
277 def __getitem__(self, i):
278 if i == -1:
278 if i == -1:
279 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 return (0, 0, 0, -1, -1, -1, -1, nullid)
280 return list.__getitem__(self, i)
280 return list.__getitem__(self, i)
281
281
282
282
283 class revlogoldio(object):
283 class revlogoldio(object):
284 def __init__(self):
284 def __init__(self):
285 self.size = indexformatv0.size
285 self.size = indexformatv0.size
286
286
287 def parseindex(self, data, inline):
287 def parseindex(self, data, inline):
288 s = self.size
288 s = self.size
289 index = []
289 index = []
290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
291 n = off = 0
291 n = off = 0
292 l = len(data)
292 l = len(data)
293 while off + s <= l:
293 while off + s <= l:
294 cur = data[off : off + s]
294 cur = data[off : off + s]
295 off += s
295 off += s
296 e = indexformatv0_unpack(cur)
296 e = indexformatv0_unpack(cur)
297 # transform to revlogv1 format
297 # transform to revlogv1 format
298 e2 = (
298 e2 = (
299 offset_type(e[0], 0),
299 offset_type(e[0], 0),
300 e[1],
300 e[1],
301 -1,
301 -1,
302 e[2],
302 e[2],
303 e[3],
303 e[3],
304 nodemap.get(e[4], nullrev),
304 nodemap.get(e[4], nullrev),
305 nodemap.get(e[5], nullrev),
305 nodemap.get(e[5], nullrev),
306 e[6],
306 e[6],
307 )
307 )
308 index.append(e2)
308 index.append(e2)
309 nodemap[e[6]] = n
309 nodemap[e[6]] = n
310 n += 1
310 n += 1
311
311
312 index = revlogoldindex(index)
312 index = revlogoldindex(index)
313 return index, None
313 return index, None
314
314
315 def packentry(self, entry, node, version, rev):
315 def packentry(self, entry, node, version, rev):
316 if gettype(entry[0]):
316 if gettype(entry[0]):
317 raise error.RevlogError(
317 raise error.RevlogError(
318 _(b'index entry flags need revlog version 1')
318 _(b'index entry flags need revlog version 1')
319 )
319 )
320 e2 = (
320 e2 = (
321 getoffset(entry[0]),
321 getoffset(entry[0]),
322 entry[1],
322 entry[1],
323 entry[3],
323 entry[3],
324 entry[4],
324 entry[4],
325 node(entry[5]),
325 node(entry[5]),
326 node(entry[6]),
326 node(entry[6]),
327 entry[7],
327 entry[7],
328 )
328 )
329 return indexformatv0_pack(*e2)
329 return indexformatv0_pack(*e2)
330
330
331
331
332 # index ng:
332 # index ng:
333 # 6 bytes: offset
333 # 6 bytes: offset
334 # 2 bytes: flags
334 # 2 bytes: flags
335 # 4 bytes: compressed length
335 # 4 bytes: compressed length
336 # 4 bytes: uncompressed length
336 # 4 bytes: uncompressed length
337 # 4 bytes: base rev
337 # 4 bytes: base rev
338 # 4 bytes: link rev
338 # 4 bytes: link rev
339 # 4 bytes: parent 1 rev
339 # 4 bytes: parent 1 rev
340 # 4 bytes: parent 2 rev
340 # 4 bytes: parent 2 rev
341 # 32 bytes: nodeid
341 # 32 bytes: nodeid
342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
343 indexformatng_pack = indexformatng.pack
343 indexformatng_pack = indexformatng.pack
344 versionformat = struct.Struct(b">I")
344 versionformat = struct.Struct(b">I")
345 versionformat_pack = versionformat.pack
345 versionformat_pack = versionformat.pack
346 versionformat_unpack = versionformat.unpack
346 versionformat_unpack = versionformat.unpack
347
347
348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 # signed integer)
349 # signed integer)
350 _maxentrysize = 0x7FFFFFFF
350 _maxentrysize = 0x7FFFFFFF
351
351
352
352
353 class revlogio(object):
353 class revlogio(object):
354 def __init__(self):
354 def __init__(self):
355 self.size = indexformatng.size
355 self.size = indexformatng.size
356
356
357 def parseindex(self, data, inline):
357 def parseindex(self, data, inline):
358 # call the C implementation to parse the index data
358 # call the C implementation to parse the index data
359 index, cache = parsers.parse_index2(data, inline)
359 index, cache = parsers.parse_index2(data, inline)
360 return index, cache
360 return index, cache
361
361
362 def packentry(self, entry, node, version, rev):
362 def packentry(self, entry, node, version, rev):
363 p = indexformatng_pack(*entry)
363 p = indexformatng_pack(*entry)
364 if rev == 0:
364 if rev == 0:
365 p = versionformat_pack(version) + p[4:]
365 p = versionformat_pack(version) + p[4:]
366 return p
366 return p
367
367
368
368
369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
370 indexformatv2_pack = indexformatv2.pack
370 indexformatv2_pack = indexformatv2.pack
371
371
372
372
373 class revlogv2io(object):
373 class revlogv2io(object):
374 def __init__(self):
374 def __init__(self):
375 self.size = indexformatv2.size
375 self.size = indexformatv2.size
376
376
377 def parseindex(self, data, inline):
377 def parseindex(self, data, inline):
378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
379 return index, cache
379 return index, cache
380
380
381 def packentry(self, entry, node, version, rev):
381 def packentry(self, entry, node, version, rev):
382 p = indexformatv2_pack(*entry)
382 p = indexformatv2_pack(*entry)
383 if rev == 0:
383 if rev == 0:
384 p = versionformat_pack(version) + p[4:]
384 p = versionformat_pack(version) + p[4:]
385 return p
385 return p
386
386
387
387
388 NodemapRevlogIO = None
388 NodemapRevlogIO = None
389
389
390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
391
391
392 class NodemapRevlogIO(revlogio):
392 class NodemapRevlogIO(revlogio):
393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
394
394
395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
396 """
396 """
397
397
398 def parseindex(self, data, inline):
398 def parseindex(self, data, inline):
399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
400 return index, cache
400 return index, cache
401
401
402
402
403 class rustrevlogio(revlogio):
403 class rustrevlogio(revlogio):
404 def parseindex(self, data, inline):
404 def parseindex(self, data, inline):
405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
406 return rustrevlog.MixedIndex(index), cache
406 return rustrevlog.MixedIndex(index), cache
407
407
408
408
409 class revlog(object):
409 class revlog(object):
410 """
410 """
411 the underlying revision storage object
411 the underlying revision storage object
412
412
413 A revlog consists of two parts, an index and the revision data.
413 A revlog consists of two parts, an index and the revision data.
414
414
415 The index is a file with a fixed record size containing
415 The index is a file with a fixed record size containing
416 information on each revision, including its nodeid (hash), the
416 information on each revision, including its nodeid (hash), the
417 nodeids of its parents, the position and offset of its data within
417 nodeids of its parents, the position and offset of its data within
418 the data file, and the revision it's based on. Finally, each entry
418 the data file, and the revision it's based on. Finally, each entry
419 contains a linkrev entry that can serve as a pointer to external
419 contains a linkrev entry that can serve as a pointer to external
420 data.
420 data.
421
421
422 The revision data itself is a linear collection of data chunks.
422 The revision data itself is a linear collection of data chunks.
423 Each chunk represents a revision and is usually represented as a
423 Each chunk represents a revision and is usually represented as a
424 delta against the previous chunk. To bound lookup time, runs of
424 delta against the previous chunk. To bound lookup time, runs of
425 deltas are limited to about 2 times the length of the original
425 deltas are limited to about 2 times the length of the original
426 version data. This makes retrieval of a version proportional to
426 version data. This makes retrieval of a version proportional to
427 its size, or O(1) relative to the number of revisions.
427 its size, or O(1) relative to the number of revisions.
428
428
429 Both pieces of the revlog are written to in an append-only
429 Both pieces of the revlog are written to in an append-only
430 fashion, which means we never need to rewrite a file to insert or
430 fashion, which means we never need to rewrite a file to insert or
431 remove data, and can use some simple techniques to avoid the need
431 remove data, and can use some simple techniques to avoid the need
432 for locking while reading.
432 for locking while reading.
433
433
434 If checkambig, indexfile is opened with checkambig=True at
434 If checkambig, indexfile is opened with checkambig=True at
435 writing, to avoid file stat ambiguity.
435 writing, to avoid file stat ambiguity.
436
436
437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
438 index will be mmapped rather than read if it is larger than the
438 index will be mmapped rather than read if it is larger than the
439 configured threshold.
439 configured threshold.
440
440
441 If censorable is True, the revlog can have censored revisions.
441 If censorable is True, the revlog can have censored revisions.
442
442
443 If `upperboundcomp` is not None, this is the expected maximal gain from
443 If `upperboundcomp` is not None, this is the expected maximal gain from
444 compression for the data content.
444 compression for the data content.
445
445
446 `concurrencychecker` is an optional function that receives 3 arguments: a
446 `concurrencychecker` is an optional function that receives 3 arguments: a
447 file handle, a filename, and an expected position. It should check whether
447 file handle, a filename, and an expected position. It should check whether
448 the current position in the file handle is valid, and log/warn/fail (by
448 the current position in the file handle is valid, and log/warn/fail (by
449 raising).
449 raising).
450 """
450 """
451
451
452 _flagserrorclass = error.RevlogError
452 _flagserrorclass = error.RevlogError
453
453
454 def __init__(
454 def __init__(
455 self,
455 self,
456 opener,
456 opener,
457 indexfile,
457 indexfile,
458 datafile=None,
458 datafile=None,
459 checkambig=False,
459 checkambig=False,
460 mmaplargeindex=False,
460 mmaplargeindex=False,
461 censorable=False,
461 censorable=False,
462 upperboundcomp=None,
462 upperboundcomp=None,
463 persistentnodemap=False,
463 persistentnodemap=False,
464 concurrencychecker=None,
464 concurrencychecker=None,
465 ):
465 ):
466 """
466 """
467 create a revlog object
467 create a revlog object
468
468
469 opener is a function that abstracts the file opening operation
469 opener is a function that abstracts the file opening operation
470 and can be used to implement COW semantics or the like.
470 and can be used to implement COW semantics or the like.
471
471
472 """
472 """
473 self.upperboundcomp = upperboundcomp
473 self.upperboundcomp = upperboundcomp
474 self.indexfile = indexfile
474 self.indexfile = indexfile
475 self.datafile = datafile or (indexfile[:-2] + b".d")
475 self.datafile = datafile or (indexfile[:-2] + b".d")
476 self.nodemap_file = None
476 self.nodemap_file = None
477 if persistentnodemap:
477 if persistentnodemap:
478 self.nodemap_file = nodemaputil.get_nodemap_file(
478 self.nodemap_file = nodemaputil.get_nodemap_file(
479 opener, self.indexfile
479 opener, self.indexfile
480 )
480 )
481
481
482 self.opener = opener
482 self.opener = opener
483 # When True, indexfile is opened with checkambig=True at writing, to
483 # When True, indexfile is opened with checkambig=True at writing, to
484 # avoid file stat ambiguity.
484 # avoid file stat ambiguity.
485 self._checkambig = checkambig
485 self._checkambig = checkambig
486 self._mmaplargeindex = mmaplargeindex
486 self._mmaplargeindex = mmaplargeindex
487 self._censorable = censorable
487 self._censorable = censorable
488 # 3-tuple of (node, rev, text) for a raw revision.
488 # 3-tuple of (node, rev, text) for a raw revision.
489 self._revisioncache = None
489 self._revisioncache = None
490 # Maps rev to chain base rev.
490 # Maps rev to chain base rev.
491 self._chainbasecache = util.lrucachedict(100)
491 self._chainbasecache = util.lrucachedict(100)
492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
493 self._chunkcache = (0, b'')
493 self._chunkcache = (0, b'')
494 # How much data to read and cache into the raw revlog data cache.
494 # How much data to read and cache into the raw revlog data cache.
495 self._chunkcachesize = 65536
495 self._chunkcachesize = 65536
496 self._maxchainlen = None
496 self._maxchainlen = None
497 self._deltabothparents = True
497 self._deltabothparents = True
498 self.index = None
498 self.index = None
499 self._nodemap_docket = None
499 self._nodemap_docket = None
500 # Mapping of partial identifiers to full nodes.
500 # Mapping of partial identifiers to full nodes.
501 self._pcache = {}
501 self._pcache = {}
502 # Mapping of revision integer to full node.
502 # Mapping of revision integer to full node.
503 self._compengine = b'zlib'
503 self._compengine = b'zlib'
504 self._compengineopts = {}
504 self._compengineopts = {}
505 self._maxdeltachainspan = -1
505 self._maxdeltachainspan = -1
506 self._withsparseread = False
506 self._withsparseread = False
507 self._sparserevlog = False
507 self._sparserevlog = False
508 self._srdensitythreshold = 0.50
508 self._srdensitythreshold = 0.50
509 self._srmingapsize = 262144
509 self._srmingapsize = 262144
510
510
511 # Make copy of flag processors so each revlog instance can support
511 # Make copy of flag processors so each revlog instance can support
512 # custom flags.
512 # custom flags.
513 self._flagprocessors = dict(flagutil.flagprocessors)
513 self._flagprocessors = dict(flagutil.flagprocessors)
514
514
515 # 2-tuple of file handles being used for active writing.
515 # 2-tuple of file handles being used for active writing.
516 self._writinghandles = None
516 self._writinghandles = None
517
517
518 self._loadindex()
518 self._loadindex()
519
519
520 self._concurrencychecker = concurrencychecker
520 self._concurrencychecker = concurrencychecker
521
521
522 def _loadindex(self):
522 def _loadindex(self):
523 mmapindexthreshold = None
523 mmapindexthreshold = None
524 opts = self.opener.options
524 opts = self.opener.options
525
525
526 if b'revlogv2' in opts:
526 if b'revlogv2' in opts:
527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
528 elif b'revlogv1' in opts:
528 elif b'revlogv1' in opts:
529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
530 if b'generaldelta' in opts:
530 if b'generaldelta' in opts:
531 newversionflags |= FLAG_GENERALDELTA
531 newversionflags |= FLAG_GENERALDELTA
532 elif b'revlogv0' in self.opener.options:
532 elif b'revlogv0' in self.opener.options:
533 newversionflags = REVLOGV0
533 newversionflags = REVLOGV0
534 else:
534 else:
535 newversionflags = REVLOG_DEFAULT_VERSION
535 newversionflags = REVLOG_DEFAULT_VERSION
536
536
537 if b'chunkcachesize' in opts:
537 if b'chunkcachesize' in opts:
538 self._chunkcachesize = opts[b'chunkcachesize']
538 self._chunkcachesize = opts[b'chunkcachesize']
539 if b'maxchainlen' in opts:
539 if b'maxchainlen' in opts:
540 self._maxchainlen = opts[b'maxchainlen']
540 self._maxchainlen = opts[b'maxchainlen']
541 if b'deltabothparents' in opts:
541 if b'deltabothparents' in opts:
542 self._deltabothparents = opts[b'deltabothparents']
542 self._deltabothparents = opts[b'deltabothparents']
543 self._lazydelta = bool(opts.get(b'lazydelta', True))
543 self._lazydelta = bool(opts.get(b'lazydelta', True))
544 self._lazydeltabase = False
544 self._lazydeltabase = False
545 if self._lazydelta:
545 if self._lazydelta:
546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
547 if b'compengine' in opts:
547 if b'compengine' in opts:
548 self._compengine = opts[b'compengine']
548 self._compengine = opts[b'compengine']
549 if b'zlib.level' in opts:
549 if b'zlib.level' in opts:
550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
551 if b'zstd.level' in opts:
551 if b'zstd.level' in opts:
552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
553 if b'maxdeltachainspan' in opts:
553 if b'maxdeltachainspan' in opts:
554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
556 mmapindexthreshold = opts[b'mmapindexthreshold']
556 mmapindexthreshold = opts[b'mmapindexthreshold']
557 self.hassidedata = bool(opts.get(b'side-data', False))
557 self.hassidedata = bool(opts.get(b'side-data', False))
558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
559 withsparseread = bool(opts.get(b'with-sparse-read', False))
559 withsparseread = bool(opts.get(b'with-sparse-read', False))
560 # sparse-revlog forces sparse-read
560 # sparse-revlog forces sparse-read
561 self._withsparseread = self._sparserevlog or withsparseread
561 self._withsparseread = self._sparserevlog or withsparseread
562 if b'sparse-read-density-threshold' in opts:
562 if b'sparse-read-density-threshold' in opts:
563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
564 if b'sparse-read-min-gap-size' in opts:
564 if b'sparse-read-min-gap-size' in opts:
565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
566 if opts.get(b'enableellipsis'):
566 if opts.get(b'enableellipsis'):
567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
568
568
569 # revlog v0 doesn't have flag processors
569 # revlog v0 doesn't have flag processors
570 for flag, processor in pycompat.iteritems(
570 for flag, processor in pycompat.iteritems(
571 opts.get(b'flagprocessors', {})
571 opts.get(b'flagprocessors', {})
572 ):
572 ):
573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
574
574
575 if self._chunkcachesize <= 0:
575 if self._chunkcachesize <= 0:
576 raise error.RevlogError(
576 raise error.RevlogError(
577 _(b'revlog chunk cache size %r is not greater than 0')
577 _(b'revlog chunk cache size %r is not greater than 0')
578 % self._chunkcachesize
578 % self._chunkcachesize
579 )
579 )
580 elif self._chunkcachesize & (self._chunkcachesize - 1):
580 elif self._chunkcachesize & (self._chunkcachesize - 1):
581 raise error.RevlogError(
581 raise error.RevlogError(
582 _(b'revlog chunk cache size %r is not a power of 2')
582 _(b'revlog chunk cache size %r is not a power of 2')
583 % self._chunkcachesize
583 % self._chunkcachesize
584 )
584 )
585
585
586 indexdata = b''
586 indexdata = b''
587 self._initempty = True
587 self._initempty = True
588 try:
588 try:
589 with self._indexfp() as f:
589 with self._indexfp() as f:
590 if (
590 if (
591 mmapindexthreshold is not None
591 mmapindexthreshold is not None
592 and self.opener.fstat(f).st_size >= mmapindexthreshold
592 and self.opener.fstat(f).st_size >= mmapindexthreshold
593 ):
593 ):
594 # TODO: should .close() to release resources without
594 # TODO: should .close() to release resources without
595 # relying on Python GC
595 # relying on Python GC
596 indexdata = util.buffer(util.mmapread(f))
596 indexdata = util.buffer(util.mmapread(f))
597 else:
597 else:
598 indexdata = f.read()
598 indexdata = f.read()
599 if len(indexdata) > 0:
599 if len(indexdata) > 0:
600 versionflags = versionformat_unpack(indexdata[:4])[0]
600 versionflags = versionformat_unpack(indexdata[:4])[0]
601 self._initempty = False
601 self._initempty = False
602 else:
602 else:
603 versionflags = newversionflags
603 versionflags = newversionflags
604 except IOError as inst:
604 except IOError as inst:
605 if inst.errno != errno.ENOENT:
605 if inst.errno != errno.ENOENT:
606 raise
606 raise
607
607
608 versionflags = newversionflags
608 versionflags = newversionflags
609
609
610 self.version = versionflags
610 self.version = versionflags
611
611
612 flags = versionflags & ~0xFFFF
612 flags = versionflags & ~0xFFFF
613 fmt = versionflags & 0xFFFF
613 fmt = versionflags & 0xFFFF
614
614
615 if fmt == REVLOGV0:
615 if fmt == REVLOGV0:
616 if flags:
616 if flags:
617 raise error.RevlogError(
617 raise error.RevlogError(
618 _(b'unknown flags (%#04x) in version %d revlog %s')
618 _(b'unknown flags (%#04x) in version %d revlog %s')
619 % (flags >> 16, fmt, self.indexfile)
619 % (flags >> 16, fmt, self.indexfile)
620 )
620 )
621
621
622 self._inline = False
622 self._inline = False
623 self._generaldelta = False
623 self._generaldelta = False
624
624
625 elif fmt == REVLOGV1:
625 elif fmt == REVLOGV1:
626 if flags & ~REVLOGV1_FLAGS:
626 if flags & ~REVLOGV1_FLAGS:
627 raise error.RevlogError(
627 raise error.RevlogError(
628 _(b'unknown flags (%#04x) in version %d revlog %s')
628 _(b'unknown flags (%#04x) in version %d revlog %s')
629 % (flags >> 16, fmt, self.indexfile)
629 % (flags >> 16, fmt, self.indexfile)
630 )
630 )
631
631
632 self._inline = versionflags & FLAG_INLINE_DATA
632 self._inline = versionflags & FLAG_INLINE_DATA
633 self._generaldelta = versionflags & FLAG_GENERALDELTA
633 self._generaldelta = versionflags & FLAG_GENERALDELTA
634
634
635 elif fmt == REVLOGV2:
635 elif fmt == REVLOGV2:
636 if flags & ~REVLOGV2_FLAGS:
636 if flags & ~REVLOGV2_FLAGS:
637 raise error.RevlogError(
637 raise error.RevlogError(
638 _(b'unknown flags (%#04x) in version %d revlog %s')
638 _(b'unknown flags (%#04x) in version %d revlog %s')
639 % (flags >> 16, fmt, self.indexfile)
639 % (flags >> 16, fmt, self.indexfile)
640 )
640 )
641
641
642 self._inline = versionflags & FLAG_INLINE_DATA
642 # There is a bug in the transaction handling when going from an
643 # inline revlog to a separate index and data file. Turn it off until
644 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
645 # See issue6485
646 self._inline = False
643 # generaldelta implied by version 2 revlogs.
647 # generaldelta implied by version 2 revlogs.
644 self._generaldelta = True
648 self._generaldelta = True
645
649
646 else:
650 else:
647 raise error.RevlogError(
651 raise error.RevlogError(
648 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
652 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
649 )
653 )
650 # sparse-revlog can't be on without general-delta (issue6056)
654 # sparse-revlog can't be on without general-delta (issue6056)
651 if not self._generaldelta:
655 if not self._generaldelta:
652 self._sparserevlog = False
656 self._sparserevlog = False
653
657
654 self._storedeltachains = True
658 self._storedeltachains = True
655
659
656 devel_nodemap = (
660 devel_nodemap = (
657 self.nodemap_file
661 self.nodemap_file
658 and opts.get(b'devel-force-nodemap', False)
662 and opts.get(b'devel-force-nodemap', False)
659 and NodemapRevlogIO is not None
663 and NodemapRevlogIO is not None
660 )
664 )
661
665
662 use_rust_index = False
666 use_rust_index = False
663 if rustrevlog is not None:
667 if rustrevlog is not None:
664 if self.nodemap_file is not None:
668 if self.nodemap_file is not None:
665 use_rust_index = True
669 use_rust_index = True
666 else:
670 else:
667 use_rust_index = self.opener.options.get(b'rust.index')
671 use_rust_index = self.opener.options.get(b'rust.index')
668
672
669 self._io = revlogio()
673 self._io = revlogio()
670 if self.version == REVLOGV0:
674 if self.version == REVLOGV0:
671 self._io = revlogoldio()
675 self._io = revlogoldio()
672 elif fmt == REVLOGV2:
676 elif fmt == REVLOGV2:
673 self._io = revlogv2io()
677 self._io = revlogv2io()
674 elif devel_nodemap:
678 elif devel_nodemap:
675 self._io = NodemapRevlogIO()
679 self._io = NodemapRevlogIO()
676 elif use_rust_index:
680 elif use_rust_index:
677 self._io = rustrevlogio()
681 self._io = rustrevlogio()
678 try:
682 try:
679 d = self._io.parseindex(indexdata, self._inline)
683 d = self._io.parseindex(indexdata, self._inline)
680 index, _chunkcache = d
684 index, _chunkcache = d
681 use_nodemap = (
685 use_nodemap = (
682 not self._inline
686 not self._inline
683 and self.nodemap_file is not None
687 and self.nodemap_file is not None
684 and util.safehasattr(index, 'update_nodemap_data')
688 and util.safehasattr(index, 'update_nodemap_data')
685 )
689 )
686 if use_nodemap:
690 if use_nodemap:
687 nodemap_data = nodemaputil.persisted_data(self)
691 nodemap_data = nodemaputil.persisted_data(self)
688 if nodemap_data is not None:
692 if nodemap_data is not None:
689 docket = nodemap_data[0]
693 docket = nodemap_data[0]
690 if (
694 if (
691 len(d[0]) > docket.tip_rev
695 len(d[0]) > docket.tip_rev
692 and d[0][docket.tip_rev][7] == docket.tip_node
696 and d[0][docket.tip_rev][7] == docket.tip_node
693 ):
697 ):
694 # no changelog tampering
698 # no changelog tampering
695 self._nodemap_docket = docket
699 self._nodemap_docket = docket
696 index.update_nodemap_data(*nodemap_data)
700 index.update_nodemap_data(*nodemap_data)
697 except (ValueError, IndexError):
701 except (ValueError, IndexError):
698 raise error.RevlogError(
702 raise error.RevlogError(
699 _(b"index %s is corrupted") % self.indexfile
703 _(b"index %s is corrupted") % self.indexfile
700 )
704 )
701 self.index, self._chunkcache = d
705 self.index, self._chunkcache = d
702 if not self._chunkcache:
706 if not self._chunkcache:
703 self._chunkclear()
707 self._chunkclear()
704 # revnum -> (chain-length, sum-delta-length)
708 # revnum -> (chain-length, sum-delta-length)
705 self._chaininfocache = util.lrucachedict(500)
709 self._chaininfocache = util.lrucachedict(500)
706 # revlog header -> revlog compressor
710 # revlog header -> revlog compressor
707 self._decompressors = {}
711 self._decompressors = {}
708
712
709 @util.propertycache
713 @util.propertycache
710 def _compressor(self):
714 def _compressor(self):
711 engine = util.compengines[self._compengine]
715 engine = util.compengines[self._compengine]
712 return engine.revlogcompressor(self._compengineopts)
716 return engine.revlogcompressor(self._compengineopts)
713
717
714 def _indexfp(self, mode=b'r'):
718 def _indexfp(self, mode=b'r'):
715 """file object for the revlog's index file"""
719 """file object for the revlog's index file"""
716 args = {'mode': mode}
720 args = {'mode': mode}
717 if mode != b'r':
721 if mode != b'r':
718 args['checkambig'] = self._checkambig
722 args['checkambig'] = self._checkambig
719 if mode == b'w':
723 if mode == b'w':
720 args['atomictemp'] = True
724 args['atomictemp'] = True
721 return self.opener(self.indexfile, **args)
725 return self.opener(self.indexfile, **args)
722
726
723 def _datafp(self, mode=b'r'):
727 def _datafp(self, mode=b'r'):
724 """file object for the revlog's data file"""
728 """file object for the revlog's data file"""
725 return self.opener(self.datafile, mode=mode)
729 return self.opener(self.datafile, mode=mode)
726
730
727 @contextlib.contextmanager
731 @contextlib.contextmanager
728 def _datareadfp(self, existingfp=None):
732 def _datareadfp(self, existingfp=None):
729 """file object suitable to read data"""
733 """file object suitable to read data"""
730 # Use explicit file handle, if given.
734 # Use explicit file handle, if given.
731 if existingfp is not None:
735 if existingfp is not None:
732 yield existingfp
736 yield existingfp
733
737
734 # Use a file handle being actively used for writes, if available.
738 # Use a file handle being actively used for writes, if available.
735 # There is some danger to doing this because reads will seek the
739 # There is some danger to doing this because reads will seek the
736 # file. However, _writeentry() performs a SEEK_END before all writes,
740 # file. However, _writeentry() performs a SEEK_END before all writes,
737 # so we should be safe.
741 # so we should be safe.
738 elif self._writinghandles:
742 elif self._writinghandles:
739 if self._inline:
743 if self._inline:
740 yield self._writinghandles[0]
744 yield self._writinghandles[0]
741 else:
745 else:
742 yield self._writinghandles[1]
746 yield self._writinghandles[1]
743
747
744 # Otherwise open a new file handle.
748 # Otherwise open a new file handle.
745 else:
749 else:
746 if self._inline:
750 if self._inline:
747 func = self._indexfp
751 func = self._indexfp
748 else:
752 else:
749 func = self._datafp
753 func = self._datafp
750 with func() as fp:
754 with func() as fp:
751 yield fp
755 yield fp
752
756
753 def tiprev(self):
757 def tiprev(self):
754 return len(self.index) - 1
758 return len(self.index) - 1
755
759
756 def tip(self):
760 def tip(self):
757 return self.node(self.tiprev())
761 return self.node(self.tiprev())
758
762
759 def __contains__(self, rev):
763 def __contains__(self, rev):
760 return 0 <= rev < len(self)
764 return 0 <= rev < len(self)
761
765
762 def __len__(self):
766 def __len__(self):
763 return len(self.index)
767 return len(self.index)
764
768
765 def __iter__(self):
769 def __iter__(self):
766 return iter(pycompat.xrange(len(self)))
770 return iter(pycompat.xrange(len(self)))
767
771
768 def revs(self, start=0, stop=None):
772 def revs(self, start=0, stop=None):
769 """iterate over all rev in this revlog (from start to stop)"""
773 """iterate over all rev in this revlog (from start to stop)"""
770 return storageutil.iterrevs(len(self), start=start, stop=stop)
774 return storageutil.iterrevs(len(self), start=start, stop=stop)
771
775
772 @property
776 @property
773 def nodemap(self):
777 def nodemap(self):
774 msg = (
778 msg = (
775 b"revlog.nodemap is deprecated, "
779 b"revlog.nodemap is deprecated, "
776 b"use revlog.index.[has_node|rev|get_rev]"
780 b"use revlog.index.[has_node|rev|get_rev]"
777 )
781 )
778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
779 return self.index.nodemap
783 return self.index.nodemap
780
784
781 @property
785 @property
782 def _nodecache(self):
786 def _nodecache(self):
783 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
784 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
785 return self.index.nodemap
789 return self.index.nodemap
786
790
787 def hasnode(self, node):
791 def hasnode(self, node):
788 try:
792 try:
789 self.rev(node)
793 self.rev(node)
790 return True
794 return True
791 except KeyError:
795 except KeyError:
792 return False
796 return False
793
797
794 def candelta(self, baserev, rev):
798 def candelta(self, baserev, rev):
795 """whether two revisions (baserev, rev) can be delta-ed or not"""
799 """whether two revisions (baserev, rev) can be delta-ed or not"""
796 # Disable delta if either rev requires a content-changing flag
800 # Disable delta if either rev requires a content-changing flag
797 # processor (ex. LFS). This is because such flag processor can alter
801 # processor (ex. LFS). This is because such flag processor can alter
798 # the rawtext content that the delta will be based on, and two clients
802 # the rawtext content that the delta will be based on, and two clients
799 # could have a same revlog node with different flags (i.e. different
803 # could have a same revlog node with different flags (i.e. different
800 # rawtext contents) and the delta could be incompatible.
804 # rawtext contents) and the delta could be incompatible.
801 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
802 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
803 ):
807 ):
804 return False
808 return False
805 return True
809 return True
806
810
807 def update_caches(self, transaction):
811 def update_caches(self, transaction):
808 if self.nodemap_file is not None:
812 if self.nodemap_file is not None:
809 if transaction is None:
813 if transaction is None:
810 nodemaputil.update_persistent_nodemap(self)
814 nodemaputil.update_persistent_nodemap(self)
811 else:
815 else:
812 nodemaputil.setup_persistent_nodemap(transaction, self)
816 nodemaputil.setup_persistent_nodemap(transaction, self)
813
817
814 def clearcaches(self):
818 def clearcaches(self):
815 self._revisioncache = None
819 self._revisioncache = None
816 self._chainbasecache.clear()
820 self._chainbasecache.clear()
817 self._chunkcache = (0, b'')
821 self._chunkcache = (0, b'')
818 self._pcache = {}
822 self._pcache = {}
819 self._nodemap_docket = None
823 self._nodemap_docket = None
820 self.index.clearcaches()
824 self.index.clearcaches()
821 # The python code is the one responsible for validating the docket, we
825 # The python code is the one responsible for validating the docket, we
822 # end up having to refresh it here.
826 # end up having to refresh it here.
823 use_nodemap = (
827 use_nodemap = (
824 not self._inline
828 not self._inline
825 and self.nodemap_file is not None
829 and self.nodemap_file is not None
826 and util.safehasattr(self.index, 'update_nodemap_data')
830 and util.safehasattr(self.index, 'update_nodemap_data')
827 )
831 )
828 if use_nodemap:
832 if use_nodemap:
829 nodemap_data = nodemaputil.persisted_data(self)
833 nodemap_data = nodemaputil.persisted_data(self)
830 if nodemap_data is not None:
834 if nodemap_data is not None:
831 self._nodemap_docket = nodemap_data[0]
835 self._nodemap_docket = nodemap_data[0]
832 self.index.update_nodemap_data(*nodemap_data)
836 self.index.update_nodemap_data(*nodemap_data)
833
837
834 def rev(self, node):
838 def rev(self, node):
835 try:
839 try:
836 return self.index.rev(node)
840 return self.index.rev(node)
837 except TypeError:
841 except TypeError:
838 raise
842 raise
839 except error.RevlogError:
843 except error.RevlogError:
840 # parsers.c radix tree lookup failed
844 # parsers.c radix tree lookup failed
841 if node == wdirid or node in wdirfilenodeids:
845 if node == wdirid or node in wdirfilenodeids:
842 raise error.WdirUnsupported
846 raise error.WdirUnsupported
843 raise error.LookupError(node, self.indexfile, _(b'no node'))
847 raise error.LookupError(node, self.indexfile, _(b'no node'))
844
848
845 # Accessors for index entries.
849 # Accessors for index entries.
846
850
847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
851 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
848 # are flags.
852 # are flags.
849 def start(self, rev):
853 def start(self, rev):
850 return int(self.index[rev][0] >> 16)
854 return int(self.index[rev][0] >> 16)
851
855
852 def flags(self, rev):
856 def flags(self, rev):
853 return self.index[rev][0] & 0xFFFF
857 return self.index[rev][0] & 0xFFFF
854
858
855 def length(self, rev):
859 def length(self, rev):
856 return self.index[rev][1]
860 return self.index[rev][1]
857
861
858 def sidedata_length(self, rev):
862 def sidedata_length(self, rev):
859 if self.version & 0xFFFF != REVLOGV2:
863 if self.version & 0xFFFF != REVLOGV2:
860 return 0
864 return 0
861 return self.index[rev][9]
865 return self.index[rev][9]
862
866
863 def rawsize(self, rev):
867 def rawsize(self, rev):
864 """return the length of the uncompressed text for a given revision"""
868 """return the length of the uncompressed text for a given revision"""
865 l = self.index[rev][2]
869 l = self.index[rev][2]
866 if l >= 0:
870 if l >= 0:
867 return l
871 return l
868
872
869 t = self.rawdata(rev)
873 t = self.rawdata(rev)
870 return len(t)
874 return len(t)
871
875
872 def size(self, rev):
876 def size(self, rev):
873 """length of non-raw text (processed by a "read" flag processor)"""
877 """length of non-raw text (processed by a "read" flag processor)"""
874 # fast path: if no "read" flag processor could change the content,
878 # fast path: if no "read" flag processor could change the content,
875 # size is rawsize. note: ELLIPSIS is known to not change the content.
879 # size is rawsize. note: ELLIPSIS is known to not change the content.
876 flags = self.flags(rev)
880 flags = self.flags(rev)
877 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
881 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
878 return self.rawsize(rev)
882 return self.rawsize(rev)
879
883
880 return len(self.revision(rev, raw=False))
884 return len(self.revision(rev, raw=False))
881
885
882 def chainbase(self, rev):
886 def chainbase(self, rev):
883 base = self._chainbasecache.get(rev)
887 base = self._chainbasecache.get(rev)
884 if base is not None:
888 if base is not None:
885 return base
889 return base
886
890
887 index = self.index
891 index = self.index
888 iterrev = rev
892 iterrev = rev
889 base = index[iterrev][3]
893 base = index[iterrev][3]
890 while base != iterrev:
894 while base != iterrev:
891 iterrev = base
895 iterrev = base
892 base = index[iterrev][3]
896 base = index[iterrev][3]
893
897
894 self._chainbasecache[rev] = base
898 self._chainbasecache[rev] = base
895 return base
899 return base
896
900
897 def linkrev(self, rev):
901 def linkrev(self, rev):
898 return self.index[rev][4]
902 return self.index[rev][4]
899
903
900 def parentrevs(self, rev):
904 def parentrevs(self, rev):
901 try:
905 try:
902 entry = self.index[rev]
906 entry = self.index[rev]
903 except IndexError:
907 except IndexError:
904 if rev == wdirrev:
908 if rev == wdirrev:
905 raise error.WdirUnsupported
909 raise error.WdirUnsupported
906 raise
910 raise
907
911
908 return entry[5], entry[6]
912 return entry[5], entry[6]
909
913
910 # fast parentrevs(rev) where rev isn't filtered
914 # fast parentrevs(rev) where rev isn't filtered
911 _uncheckedparentrevs = parentrevs
915 _uncheckedparentrevs = parentrevs
912
916
913 def node(self, rev):
917 def node(self, rev):
914 try:
918 try:
915 return self.index[rev][7]
919 return self.index[rev][7]
916 except IndexError:
920 except IndexError:
917 if rev == wdirrev:
921 if rev == wdirrev:
918 raise error.WdirUnsupported
922 raise error.WdirUnsupported
919 raise
923 raise
920
924
921 # Derived from index values.
925 # Derived from index values.
922
926
923 def end(self, rev):
927 def end(self, rev):
924 return self.start(rev) + self.length(rev)
928 return self.start(rev) + self.length(rev)
925
929
926 def parents(self, node):
930 def parents(self, node):
927 i = self.index
931 i = self.index
928 d = i[self.rev(node)]
932 d = i[self.rev(node)]
929 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
933 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
930
934
931 def chainlen(self, rev):
935 def chainlen(self, rev):
932 return self._chaininfo(rev)[0]
936 return self._chaininfo(rev)[0]
933
937
934 def _chaininfo(self, rev):
938 def _chaininfo(self, rev):
935 chaininfocache = self._chaininfocache
939 chaininfocache = self._chaininfocache
936 if rev in chaininfocache:
940 if rev in chaininfocache:
937 return chaininfocache[rev]
941 return chaininfocache[rev]
938 index = self.index
942 index = self.index
939 generaldelta = self._generaldelta
943 generaldelta = self._generaldelta
940 iterrev = rev
944 iterrev = rev
941 e = index[iterrev]
945 e = index[iterrev]
942 clen = 0
946 clen = 0
943 compresseddeltalen = 0
947 compresseddeltalen = 0
944 while iterrev != e[3]:
948 while iterrev != e[3]:
945 clen += 1
949 clen += 1
946 compresseddeltalen += e[1]
950 compresseddeltalen += e[1]
947 if generaldelta:
951 if generaldelta:
948 iterrev = e[3]
952 iterrev = e[3]
949 else:
953 else:
950 iterrev -= 1
954 iterrev -= 1
951 if iterrev in chaininfocache:
955 if iterrev in chaininfocache:
952 t = chaininfocache[iterrev]
956 t = chaininfocache[iterrev]
953 clen += t[0]
957 clen += t[0]
954 compresseddeltalen += t[1]
958 compresseddeltalen += t[1]
955 break
959 break
956 e = index[iterrev]
960 e = index[iterrev]
957 else:
961 else:
958 # Add text length of base since decompressing that also takes
962 # Add text length of base since decompressing that also takes
959 # work. For cache hits the length is already included.
963 # work. For cache hits the length is already included.
960 compresseddeltalen += e[1]
964 compresseddeltalen += e[1]
961 r = (clen, compresseddeltalen)
965 r = (clen, compresseddeltalen)
962 chaininfocache[rev] = r
966 chaininfocache[rev] = r
963 return r
967 return r
964
968
965 def _deltachain(self, rev, stoprev=None):
969 def _deltachain(self, rev, stoprev=None):
966 """Obtain the delta chain for a revision.
970 """Obtain the delta chain for a revision.
967
971
968 ``stoprev`` specifies a revision to stop at. If not specified, we
972 ``stoprev`` specifies a revision to stop at. If not specified, we
969 stop at the base of the chain.
973 stop at the base of the chain.
970
974
971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
975 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
972 revs in ascending order and ``stopped`` is a bool indicating whether
976 revs in ascending order and ``stopped`` is a bool indicating whether
973 ``stoprev`` was hit.
977 ``stoprev`` was hit.
974 """
978 """
975 # Try C implementation.
979 # Try C implementation.
976 try:
980 try:
977 return self.index.deltachain(rev, stoprev, self._generaldelta)
981 return self.index.deltachain(rev, stoprev, self._generaldelta)
978 except AttributeError:
982 except AttributeError:
979 pass
983 pass
980
984
981 chain = []
985 chain = []
982
986
983 # Alias to prevent attribute lookup in tight loop.
987 # Alias to prevent attribute lookup in tight loop.
984 index = self.index
988 index = self.index
985 generaldelta = self._generaldelta
989 generaldelta = self._generaldelta
986
990
987 iterrev = rev
991 iterrev = rev
988 e = index[iterrev]
992 e = index[iterrev]
989 while iterrev != e[3] and iterrev != stoprev:
993 while iterrev != e[3] and iterrev != stoprev:
990 chain.append(iterrev)
994 chain.append(iterrev)
991 if generaldelta:
995 if generaldelta:
992 iterrev = e[3]
996 iterrev = e[3]
993 else:
997 else:
994 iterrev -= 1
998 iterrev -= 1
995 e = index[iterrev]
999 e = index[iterrev]
996
1000
997 if iterrev == stoprev:
1001 if iterrev == stoprev:
998 stopped = True
1002 stopped = True
999 else:
1003 else:
1000 chain.append(iterrev)
1004 chain.append(iterrev)
1001 stopped = False
1005 stopped = False
1002
1006
1003 chain.reverse()
1007 chain.reverse()
1004 return chain, stopped
1008 return chain, stopped
1005
1009
1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1010 def ancestors(self, revs, stoprev=0, inclusive=False):
1007 """Generate the ancestors of 'revs' in reverse revision order.
1011 """Generate the ancestors of 'revs' in reverse revision order.
1008 Does not generate revs lower than stoprev.
1012 Does not generate revs lower than stoprev.
1009
1013
1010 See the documentation for ancestor.lazyancestors for more details."""
1014 See the documentation for ancestor.lazyancestors for more details."""
1011
1015
1012 # first, make sure start revisions aren't filtered
1016 # first, make sure start revisions aren't filtered
1013 revs = list(revs)
1017 revs = list(revs)
1014 checkrev = self.node
1018 checkrev = self.node
1015 for r in revs:
1019 for r in revs:
1016 checkrev(r)
1020 checkrev(r)
1017 # and we're sure ancestors aren't filtered as well
1021 # and we're sure ancestors aren't filtered as well
1018
1022
1019 if rustancestor is not None:
1023 if rustancestor is not None:
1020 lazyancestors = rustancestor.LazyAncestors
1024 lazyancestors = rustancestor.LazyAncestors
1021 arg = self.index
1025 arg = self.index
1022 else:
1026 else:
1023 lazyancestors = ancestor.lazyancestors
1027 lazyancestors = ancestor.lazyancestors
1024 arg = self._uncheckedparentrevs
1028 arg = self._uncheckedparentrevs
1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1029 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1026
1030
1027 def descendants(self, revs):
1031 def descendants(self, revs):
1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1032 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1029
1033
1030 def findcommonmissing(self, common=None, heads=None):
1034 def findcommonmissing(self, common=None, heads=None):
1031 """Return a tuple of the ancestors of common and the ancestors of heads
1035 """Return a tuple of the ancestors of common and the ancestors of heads
1032 that are not ancestors of common. In revset terminology, we return the
1036 that are not ancestors of common. In revset terminology, we return the
1033 tuple:
1037 tuple:
1034
1038
1035 ::common, (::heads) - (::common)
1039 ::common, (::heads) - (::common)
1036
1040
1037 The list is sorted by revision number, meaning it is
1041 The list is sorted by revision number, meaning it is
1038 topologically sorted.
1042 topologically sorted.
1039
1043
1040 'heads' and 'common' are both lists of node IDs. If heads is
1044 'heads' and 'common' are both lists of node IDs. If heads is
1041 not supplied, uses all of the revlog's heads. If common is not
1045 not supplied, uses all of the revlog's heads. If common is not
1042 supplied, uses nullid."""
1046 supplied, uses nullid."""
1043 if common is None:
1047 if common is None:
1044 common = [nullid]
1048 common = [nullid]
1045 if heads is None:
1049 if heads is None:
1046 heads = self.heads()
1050 heads = self.heads()
1047
1051
1048 common = [self.rev(n) for n in common]
1052 common = [self.rev(n) for n in common]
1049 heads = [self.rev(n) for n in heads]
1053 heads = [self.rev(n) for n in heads]
1050
1054
1051 # we want the ancestors, but inclusive
1055 # we want the ancestors, but inclusive
1052 class lazyset(object):
1056 class lazyset(object):
1053 def __init__(self, lazyvalues):
1057 def __init__(self, lazyvalues):
1054 self.addedvalues = set()
1058 self.addedvalues = set()
1055 self.lazyvalues = lazyvalues
1059 self.lazyvalues = lazyvalues
1056
1060
1057 def __contains__(self, value):
1061 def __contains__(self, value):
1058 return value in self.addedvalues or value in self.lazyvalues
1062 return value in self.addedvalues or value in self.lazyvalues
1059
1063
1060 def __iter__(self):
1064 def __iter__(self):
1061 added = self.addedvalues
1065 added = self.addedvalues
1062 for r in added:
1066 for r in added:
1063 yield r
1067 yield r
1064 for r in self.lazyvalues:
1068 for r in self.lazyvalues:
1065 if not r in added:
1069 if not r in added:
1066 yield r
1070 yield r
1067
1071
1068 def add(self, value):
1072 def add(self, value):
1069 self.addedvalues.add(value)
1073 self.addedvalues.add(value)
1070
1074
1071 def update(self, values):
1075 def update(self, values):
1072 self.addedvalues.update(values)
1076 self.addedvalues.update(values)
1073
1077
1074 has = lazyset(self.ancestors(common))
1078 has = lazyset(self.ancestors(common))
1075 has.add(nullrev)
1079 has.add(nullrev)
1076 has.update(common)
1080 has.update(common)
1077
1081
1078 # take all ancestors from heads that aren't in has
1082 # take all ancestors from heads that aren't in has
1079 missing = set()
1083 missing = set()
1080 visit = collections.deque(r for r in heads if r not in has)
1084 visit = collections.deque(r for r in heads if r not in has)
1081 while visit:
1085 while visit:
1082 r = visit.popleft()
1086 r = visit.popleft()
1083 if r in missing:
1087 if r in missing:
1084 continue
1088 continue
1085 else:
1089 else:
1086 missing.add(r)
1090 missing.add(r)
1087 for p in self.parentrevs(r):
1091 for p in self.parentrevs(r):
1088 if p not in has:
1092 if p not in has:
1089 visit.append(p)
1093 visit.append(p)
1090 missing = list(missing)
1094 missing = list(missing)
1091 missing.sort()
1095 missing.sort()
1092 return has, [self.node(miss) for miss in missing]
1096 return has, [self.node(miss) for miss in missing]
1093
1097
1094 def incrementalmissingrevs(self, common=None):
1098 def incrementalmissingrevs(self, common=None):
1095 """Return an object that can be used to incrementally compute the
1099 """Return an object that can be used to incrementally compute the
1096 revision numbers of the ancestors of arbitrary sets that are not
1100 revision numbers of the ancestors of arbitrary sets that are not
1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1101 ancestors of common. This is an ancestor.incrementalmissingancestors
1098 object.
1102 object.
1099
1103
1100 'common' is a list of revision numbers. If common is not supplied, uses
1104 'common' is a list of revision numbers. If common is not supplied, uses
1101 nullrev.
1105 nullrev.
1102 """
1106 """
1103 if common is None:
1107 if common is None:
1104 common = [nullrev]
1108 common = [nullrev]
1105
1109
1106 if rustancestor is not None:
1110 if rustancestor is not None:
1107 return rustancestor.MissingAncestors(self.index, common)
1111 return rustancestor.MissingAncestors(self.index, common)
1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1112 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1109
1113
1110 def findmissingrevs(self, common=None, heads=None):
1114 def findmissingrevs(self, common=None, heads=None):
1111 """Return the revision numbers of the ancestors of heads that
1115 """Return the revision numbers of the ancestors of heads that
1112 are not ancestors of common.
1116 are not ancestors of common.
1113
1117
1114 More specifically, return a list of revision numbers corresponding to
1118 More specifically, return a list of revision numbers corresponding to
1115 nodes N such that every N satisfies the following constraints:
1119 nodes N such that every N satisfies the following constraints:
1116
1120
1117 1. N is an ancestor of some node in 'heads'
1121 1. N is an ancestor of some node in 'heads'
1118 2. N is not an ancestor of any node in 'common'
1122 2. N is not an ancestor of any node in 'common'
1119
1123
1120 The list is sorted by revision number, meaning it is
1124 The list is sorted by revision number, meaning it is
1121 topologically sorted.
1125 topologically sorted.
1122
1126
1123 'heads' and 'common' are both lists of revision numbers. If heads is
1127 'heads' and 'common' are both lists of revision numbers. If heads is
1124 not supplied, uses all of the revlog's heads. If common is not
1128 not supplied, uses all of the revlog's heads. If common is not
1125 supplied, uses nullid."""
1129 supplied, uses nullid."""
1126 if common is None:
1130 if common is None:
1127 common = [nullrev]
1131 common = [nullrev]
1128 if heads is None:
1132 if heads is None:
1129 heads = self.headrevs()
1133 heads = self.headrevs()
1130
1134
1131 inc = self.incrementalmissingrevs(common=common)
1135 inc = self.incrementalmissingrevs(common=common)
1132 return inc.missingancestors(heads)
1136 return inc.missingancestors(heads)
1133
1137
1134 def findmissing(self, common=None, heads=None):
1138 def findmissing(self, common=None, heads=None):
1135 """Return the ancestors of heads that are not ancestors of common.
1139 """Return the ancestors of heads that are not ancestors of common.
1136
1140
1137 More specifically, return a list of nodes N such that every N
1141 More specifically, return a list of nodes N such that every N
1138 satisfies the following constraints:
1142 satisfies the following constraints:
1139
1143
1140 1. N is an ancestor of some node in 'heads'
1144 1. N is an ancestor of some node in 'heads'
1141 2. N is not an ancestor of any node in 'common'
1145 2. N is not an ancestor of any node in 'common'
1142
1146
1143 The list is sorted by revision number, meaning it is
1147 The list is sorted by revision number, meaning it is
1144 topologically sorted.
1148 topologically sorted.
1145
1149
1146 'heads' and 'common' are both lists of node IDs. If heads is
1150 'heads' and 'common' are both lists of node IDs. If heads is
1147 not supplied, uses all of the revlog's heads. If common is not
1151 not supplied, uses all of the revlog's heads. If common is not
1148 supplied, uses nullid."""
1152 supplied, uses nullid."""
1149 if common is None:
1153 if common is None:
1150 common = [nullid]
1154 common = [nullid]
1151 if heads is None:
1155 if heads is None:
1152 heads = self.heads()
1156 heads = self.heads()
1153
1157
1154 common = [self.rev(n) for n in common]
1158 common = [self.rev(n) for n in common]
1155 heads = [self.rev(n) for n in heads]
1159 heads = [self.rev(n) for n in heads]
1156
1160
1157 inc = self.incrementalmissingrevs(common=common)
1161 inc = self.incrementalmissingrevs(common=common)
1158 return [self.node(r) for r in inc.missingancestors(heads)]
1162 return [self.node(r) for r in inc.missingancestors(heads)]
1159
1163
1160 def nodesbetween(self, roots=None, heads=None):
1164 def nodesbetween(self, roots=None, heads=None):
1161 """Return a topological path from 'roots' to 'heads'.
1165 """Return a topological path from 'roots' to 'heads'.
1162
1166
1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1167 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1164 topologically sorted list of all nodes N that satisfy both of
1168 topologically sorted list of all nodes N that satisfy both of
1165 these constraints:
1169 these constraints:
1166
1170
1167 1. N is a descendant of some node in 'roots'
1171 1. N is a descendant of some node in 'roots'
1168 2. N is an ancestor of some node in 'heads'
1172 2. N is an ancestor of some node in 'heads'
1169
1173
1170 Every node is considered to be both a descendant and an ancestor
1174 Every node is considered to be both a descendant and an ancestor
1171 of itself, so every reachable node in 'roots' and 'heads' will be
1175 of itself, so every reachable node in 'roots' and 'heads' will be
1172 included in 'nodes'.
1176 included in 'nodes'.
1173
1177
1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1178 'outroots' is the list of reachable nodes in 'roots', i.e., the
1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1179 subset of 'roots' that is returned in 'nodes'. Likewise,
1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1180 'outheads' is the subset of 'heads' that is also in 'nodes'.
1177
1181
1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1182 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1179 unspecified, uses nullid as the only root. If 'heads' is
1183 unspecified, uses nullid as the only root. If 'heads' is
1180 unspecified, uses list of all of the revlog's heads."""
1184 unspecified, uses list of all of the revlog's heads."""
1181 nonodes = ([], [], [])
1185 nonodes = ([], [], [])
1182 if roots is not None:
1186 if roots is not None:
1183 roots = list(roots)
1187 roots = list(roots)
1184 if not roots:
1188 if not roots:
1185 return nonodes
1189 return nonodes
1186 lowestrev = min([self.rev(n) for n in roots])
1190 lowestrev = min([self.rev(n) for n in roots])
1187 else:
1191 else:
1188 roots = [nullid] # Everybody's a descendant of nullid
1192 roots = [nullid] # Everybody's a descendant of nullid
1189 lowestrev = nullrev
1193 lowestrev = nullrev
1190 if (lowestrev == nullrev) and (heads is None):
1194 if (lowestrev == nullrev) and (heads is None):
1191 # We want _all_ the nodes!
1195 # We want _all_ the nodes!
1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1196 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1193 if heads is None:
1197 if heads is None:
1194 # All nodes are ancestors, so the latest ancestor is the last
1198 # All nodes are ancestors, so the latest ancestor is the last
1195 # node.
1199 # node.
1196 highestrev = len(self) - 1
1200 highestrev = len(self) - 1
1197 # Set ancestors to None to signal that every node is an ancestor.
1201 # Set ancestors to None to signal that every node is an ancestor.
1198 ancestors = None
1202 ancestors = None
1199 # Set heads to an empty dictionary for later discovery of heads
1203 # Set heads to an empty dictionary for later discovery of heads
1200 heads = {}
1204 heads = {}
1201 else:
1205 else:
1202 heads = list(heads)
1206 heads = list(heads)
1203 if not heads:
1207 if not heads:
1204 return nonodes
1208 return nonodes
1205 ancestors = set()
1209 ancestors = set()
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1210 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 # Also, later we will be using it to filter out the heads we can't
1211 # Also, later we will be using it to filter out the heads we can't
1208 # find from roots.
1212 # find from roots.
1209 heads = dict.fromkeys(heads, False)
1213 heads = dict.fromkeys(heads, False)
1210 # Start at the top and keep marking parents until we're done.
1214 # Start at the top and keep marking parents until we're done.
1211 nodestotag = set(heads)
1215 nodestotag = set(heads)
1212 # Remember where the top was so we can use it as a limit later.
1216 # Remember where the top was so we can use it as a limit later.
1213 highestrev = max([self.rev(n) for n in nodestotag])
1217 highestrev = max([self.rev(n) for n in nodestotag])
1214 while nodestotag:
1218 while nodestotag:
1215 # grab a node to tag
1219 # grab a node to tag
1216 n = nodestotag.pop()
1220 n = nodestotag.pop()
1217 # Never tag nullid
1221 # Never tag nullid
1218 if n == nullid:
1222 if n == nullid:
1219 continue
1223 continue
1220 # A node's revision number represents its place in a
1224 # A node's revision number represents its place in a
1221 # topologically sorted list of nodes.
1225 # topologically sorted list of nodes.
1222 r = self.rev(n)
1226 r = self.rev(n)
1223 if r >= lowestrev:
1227 if r >= lowestrev:
1224 if n not in ancestors:
1228 if n not in ancestors:
1225 # If we are possibly a descendant of one of the roots
1229 # If we are possibly a descendant of one of the roots
1226 # and we haven't already been marked as an ancestor
1230 # and we haven't already been marked as an ancestor
1227 ancestors.add(n) # Mark as ancestor
1231 ancestors.add(n) # Mark as ancestor
1228 # Add non-nullid parents to list of nodes to tag.
1232 # Add non-nullid parents to list of nodes to tag.
1229 nodestotag.update(
1233 nodestotag.update(
1230 [p for p in self.parents(n) if p != nullid]
1234 [p for p in self.parents(n) if p != nullid]
1231 )
1235 )
1232 elif n in heads: # We've seen it before, is it a fake head?
1236 elif n in heads: # We've seen it before, is it a fake head?
1233 # So it is, real heads should not be the ancestors of
1237 # So it is, real heads should not be the ancestors of
1234 # any other heads.
1238 # any other heads.
1235 heads.pop(n)
1239 heads.pop(n)
1236 if not ancestors:
1240 if not ancestors:
1237 return nonodes
1241 return nonodes
1238 # Now that we have our set of ancestors, we want to remove any
1242 # Now that we have our set of ancestors, we want to remove any
1239 # roots that are not ancestors.
1243 # roots that are not ancestors.
1240
1244
1241 # If one of the roots was nullid, everything is included anyway.
1245 # If one of the roots was nullid, everything is included anyway.
1242 if lowestrev > nullrev:
1246 if lowestrev > nullrev:
1243 # But, since we weren't, let's recompute the lowest rev to not
1247 # But, since we weren't, let's recompute the lowest rev to not
1244 # include roots that aren't ancestors.
1248 # include roots that aren't ancestors.
1245
1249
1246 # Filter out roots that aren't ancestors of heads
1250 # Filter out roots that aren't ancestors of heads
1247 roots = [root for root in roots if root in ancestors]
1251 roots = [root for root in roots if root in ancestors]
1248 # Recompute the lowest revision
1252 # Recompute the lowest revision
1249 if roots:
1253 if roots:
1250 lowestrev = min([self.rev(root) for root in roots])
1254 lowestrev = min([self.rev(root) for root in roots])
1251 else:
1255 else:
1252 # No more roots? Return empty list
1256 # No more roots? Return empty list
1253 return nonodes
1257 return nonodes
1254 else:
1258 else:
1255 # We are descending from nullid, and don't need to care about
1259 # We are descending from nullid, and don't need to care about
1256 # any other roots.
1260 # any other roots.
1257 lowestrev = nullrev
1261 lowestrev = nullrev
1258 roots = [nullid]
1262 roots = [nullid]
1259 # Transform our roots list into a set.
1263 # Transform our roots list into a set.
1260 descendants = set(roots)
1264 descendants = set(roots)
1261 # Also, keep the original roots so we can filter out roots that aren't
1265 # Also, keep the original roots so we can filter out roots that aren't
1262 # 'real' roots (i.e. are descended from other roots).
1266 # 'real' roots (i.e. are descended from other roots).
1263 roots = descendants.copy()
1267 roots = descendants.copy()
1264 # Our topologically sorted list of output nodes.
1268 # Our topologically sorted list of output nodes.
1265 orderedout = []
1269 orderedout = []
1266 # Don't start at nullid since we don't want nullid in our output list,
1270 # Don't start at nullid since we don't want nullid in our output list,
1267 # and if nullid shows up in descendants, empty parents will look like
1271 # and if nullid shows up in descendants, empty parents will look like
1268 # they're descendants.
1272 # they're descendants.
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1273 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1270 n = self.node(r)
1274 n = self.node(r)
1271 isdescendant = False
1275 isdescendant = False
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1276 if lowestrev == nullrev: # Everybody is a descendant of nullid
1273 isdescendant = True
1277 isdescendant = True
1274 elif n in descendants:
1278 elif n in descendants:
1275 # n is already a descendant
1279 # n is already a descendant
1276 isdescendant = True
1280 isdescendant = True
1277 # This check only needs to be done here because all the roots
1281 # This check only needs to be done here because all the roots
1278 # will start being marked is descendants before the loop.
1282 # will start being marked is descendants before the loop.
1279 if n in roots:
1283 if n in roots:
1280 # If n was a root, check if it's a 'real' root.
1284 # If n was a root, check if it's a 'real' root.
1281 p = tuple(self.parents(n))
1285 p = tuple(self.parents(n))
1282 # If any of its parents are descendants, it's not a root.
1286 # If any of its parents are descendants, it's not a root.
1283 if (p[0] in descendants) or (p[1] in descendants):
1287 if (p[0] in descendants) or (p[1] in descendants):
1284 roots.remove(n)
1288 roots.remove(n)
1285 else:
1289 else:
1286 p = tuple(self.parents(n))
1290 p = tuple(self.parents(n))
1287 # A node is a descendant if either of its parents are
1291 # A node is a descendant if either of its parents are
1288 # descendants. (We seeded the dependents list with the roots
1292 # descendants. (We seeded the dependents list with the roots
1289 # up there, remember?)
1293 # up there, remember?)
1290 if (p[0] in descendants) or (p[1] in descendants):
1294 if (p[0] in descendants) or (p[1] in descendants):
1291 descendants.add(n)
1295 descendants.add(n)
1292 isdescendant = True
1296 isdescendant = True
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1297 if isdescendant and ((ancestors is None) or (n in ancestors)):
1294 # Only include nodes that are both descendants and ancestors.
1298 # Only include nodes that are both descendants and ancestors.
1295 orderedout.append(n)
1299 orderedout.append(n)
1296 if (ancestors is not None) and (n in heads):
1300 if (ancestors is not None) and (n in heads):
1297 # We're trying to figure out which heads are reachable
1301 # We're trying to figure out which heads are reachable
1298 # from roots.
1302 # from roots.
1299 # Mark this head as having been reached
1303 # Mark this head as having been reached
1300 heads[n] = True
1304 heads[n] = True
1301 elif ancestors is None:
1305 elif ancestors is None:
1302 # Otherwise, we're trying to discover the heads.
1306 # Otherwise, we're trying to discover the heads.
1303 # Assume this is a head because if it isn't, the next step
1307 # Assume this is a head because if it isn't, the next step
1304 # will eventually remove it.
1308 # will eventually remove it.
1305 heads[n] = True
1309 heads[n] = True
1306 # But, obviously its parents aren't.
1310 # But, obviously its parents aren't.
1307 for p in self.parents(n):
1311 for p in self.parents(n):
1308 heads.pop(p, None)
1312 heads.pop(p, None)
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1313 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1310 roots = list(roots)
1314 roots = list(roots)
1311 assert orderedout
1315 assert orderedout
1312 assert roots
1316 assert roots
1313 assert heads
1317 assert heads
1314 return (orderedout, roots, heads)
1318 return (orderedout, roots, heads)
1315
1319
1316 def headrevs(self, revs=None):
1320 def headrevs(self, revs=None):
1317 if revs is None:
1321 if revs is None:
1318 try:
1322 try:
1319 return self.index.headrevs()
1323 return self.index.headrevs()
1320 except AttributeError:
1324 except AttributeError:
1321 return self._headrevs()
1325 return self._headrevs()
1322 if rustdagop is not None:
1326 if rustdagop is not None:
1323 return rustdagop.headrevs(self.index, revs)
1327 return rustdagop.headrevs(self.index, revs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1328 return dagop.headrevs(revs, self._uncheckedparentrevs)
1325
1329
1326 def computephases(self, roots):
1330 def computephases(self, roots):
1327 return self.index.computephasesmapsets(roots)
1331 return self.index.computephasesmapsets(roots)
1328
1332
1329 def _headrevs(self):
1333 def _headrevs(self):
1330 count = len(self)
1334 count = len(self)
1331 if not count:
1335 if not count:
1332 return [nullrev]
1336 return [nullrev]
1333 # we won't iter over filtered rev so nobody is a head at start
1337 # we won't iter over filtered rev so nobody is a head at start
1334 ishead = [0] * (count + 1)
1338 ishead = [0] * (count + 1)
1335 index = self.index
1339 index = self.index
1336 for r in self:
1340 for r in self:
1337 ishead[r] = 1 # I may be an head
1341 ishead[r] = 1 # I may be an head
1338 e = index[r]
1342 e = index[r]
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1343 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1340 return [r for r, val in enumerate(ishead) if val]
1344 return [r for r, val in enumerate(ishead) if val]
1341
1345
1342 def heads(self, start=None, stop=None):
1346 def heads(self, start=None, stop=None):
1343 """return the list of all nodes that have no children
1347 """return the list of all nodes that have no children
1344
1348
1345 if start is specified, only heads that are descendants of
1349 if start is specified, only heads that are descendants of
1346 start will be returned
1350 start will be returned
1347 if stop is specified, it will consider all the revs from stop
1351 if stop is specified, it will consider all the revs from stop
1348 as if they had no children
1352 as if they had no children
1349 """
1353 """
1350 if start is None and stop is None:
1354 if start is None and stop is None:
1351 if not len(self):
1355 if not len(self):
1352 return [nullid]
1356 return [nullid]
1353 return [self.node(r) for r in self.headrevs()]
1357 return [self.node(r) for r in self.headrevs()]
1354
1358
1355 if start is None:
1359 if start is None:
1356 start = nullrev
1360 start = nullrev
1357 else:
1361 else:
1358 start = self.rev(start)
1362 start = self.rev(start)
1359
1363
1360 stoprevs = {self.rev(n) for n in stop or []}
1364 stoprevs = {self.rev(n) for n in stop or []}
1361
1365
1362 revs = dagop.headrevssubset(
1366 revs = dagop.headrevssubset(
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1367 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1364 )
1368 )
1365
1369
1366 return [self.node(rev) for rev in revs]
1370 return [self.node(rev) for rev in revs]
1367
1371
1368 def children(self, node):
1372 def children(self, node):
1369 """find the children of a given node"""
1373 """find the children of a given node"""
1370 c = []
1374 c = []
1371 p = self.rev(node)
1375 p = self.rev(node)
1372 for r in self.revs(start=p + 1):
1376 for r in self.revs(start=p + 1):
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 if prevs:
1378 if prevs:
1375 for pr in prevs:
1379 for pr in prevs:
1376 if pr == p:
1380 if pr == p:
1377 c.append(self.node(r))
1381 c.append(self.node(r))
1378 elif p == nullrev:
1382 elif p == nullrev:
1379 c.append(self.node(r))
1383 c.append(self.node(r))
1380 return c
1384 return c
1381
1385
1382 def commonancestorsheads(self, a, b):
1386 def commonancestorsheads(self, a, b):
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1387 """calculate all the heads of the common ancestors of nodes a and b"""
1384 a, b = self.rev(a), self.rev(b)
1388 a, b = self.rev(a), self.rev(b)
1385 ancs = self._commonancestorsheads(a, b)
1389 ancs = self._commonancestorsheads(a, b)
1386 return pycompat.maplist(self.node, ancs)
1390 return pycompat.maplist(self.node, ancs)
1387
1391
1388 def _commonancestorsheads(self, *revs):
1392 def _commonancestorsheads(self, *revs):
1389 """calculate all the heads of the common ancestors of revs"""
1393 """calculate all the heads of the common ancestors of revs"""
1390 try:
1394 try:
1391 ancs = self.index.commonancestorsheads(*revs)
1395 ancs = self.index.commonancestorsheads(*revs)
1392 except (AttributeError, OverflowError): # C implementation failed
1396 except (AttributeError, OverflowError): # C implementation failed
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1397 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1394 return ancs
1398 return ancs
1395
1399
1396 def isancestor(self, a, b):
1400 def isancestor(self, a, b):
1397 """return True if node a is an ancestor of node b
1401 """return True if node a is an ancestor of node b
1398
1402
1399 A revision is considered an ancestor of itself."""
1403 A revision is considered an ancestor of itself."""
1400 a, b = self.rev(a), self.rev(b)
1404 a, b = self.rev(a), self.rev(b)
1401 return self.isancestorrev(a, b)
1405 return self.isancestorrev(a, b)
1402
1406
1403 def isancestorrev(self, a, b):
1407 def isancestorrev(self, a, b):
1404 """return True if revision a is an ancestor of revision b
1408 """return True if revision a is an ancestor of revision b
1405
1409
1406 A revision is considered an ancestor of itself.
1410 A revision is considered an ancestor of itself.
1407
1411
1408 The implementation of this is trivial but the use of
1412 The implementation of this is trivial but the use of
1409 reachableroots is not."""
1413 reachableroots is not."""
1410 if a == nullrev:
1414 if a == nullrev:
1411 return True
1415 return True
1412 elif a == b:
1416 elif a == b:
1413 return True
1417 return True
1414 elif a > b:
1418 elif a > b:
1415 return False
1419 return False
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1420 return bool(self.reachableroots(a, [b], [a], includepath=False))
1417
1421
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1422 def reachableroots(self, minroot, heads, roots, includepath=False):
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1423 """return (heads(::(<roots> and <roots>::<heads>)))
1420
1424
1421 If includepath is True, return (<roots>::<heads>)."""
1425 If includepath is True, return (<roots>::<heads>)."""
1422 try:
1426 try:
1423 return self.index.reachableroots2(
1427 return self.index.reachableroots2(
1424 minroot, heads, roots, includepath
1428 minroot, heads, roots, includepath
1425 )
1429 )
1426 except AttributeError:
1430 except AttributeError:
1427 return dagop._reachablerootspure(
1431 return dagop._reachablerootspure(
1428 self.parentrevs, minroot, roots, heads, includepath
1432 self.parentrevs, minroot, roots, heads, includepath
1429 )
1433 )
1430
1434
1431 def ancestor(self, a, b):
1435 def ancestor(self, a, b):
1432 """calculate the "best" common ancestor of nodes a and b"""
1436 """calculate the "best" common ancestor of nodes a and b"""
1433
1437
1434 a, b = self.rev(a), self.rev(b)
1438 a, b = self.rev(a), self.rev(b)
1435 try:
1439 try:
1436 ancs = self.index.ancestors(a, b)
1440 ancs = self.index.ancestors(a, b)
1437 except (AttributeError, OverflowError):
1441 except (AttributeError, OverflowError):
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1442 ancs = ancestor.ancestors(self.parentrevs, a, b)
1439 if ancs:
1443 if ancs:
1440 # choose a consistent winner when there's a tie
1444 # choose a consistent winner when there's a tie
1441 return min(map(self.node, ancs))
1445 return min(map(self.node, ancs))
1442 return nullid
1446 return nullid
1443
1447
1444 def _match(self, id):
1448 def _match(self, id):
1445 if isinstance(id, int):
1449 if isinstance(id, int):
1446 # rev
1450 # rev
1447 return self.node(id)
1451 return self.node(id)
1448 if len(id) == 20:
1452 if len(id) == 20:
1449 # possibly a binary node
1453 # possibly a binary node
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1454 # odds of a binary node being all hex in ASCII are 1 in 10**25
1451 try:
1455 try:
1452 node = id
1456 node = id
1453 self.rev(node) # quick search the index
1457 self.rev(node) # quick search the index
1454 return node
1458 return node
1455 except error.LookupError:
1459 except error.LookupError:
1456 pass # may be partial hex id
1460 pass # may be partial hex id
1457 try:
1461 try:
1458 # str(rev)
1462 # str(rev)
1459 rev = int(id)
1463 rev = int(id)
1460 if b"%d" % rev != id:
1464 if b"%d" % rev != id:
1461 raise ValueError
1465 raise ValueError
1462 if rev < 0:
1466 if rev < 0:
1463 rev = len(self) + rev
1467 rev = len(self) + rev
1464 if rev < 0 or rev >= len(self):
1468 if rev < 0 or rev >= len(self):
1465 raise ValueError
1469 raise ValueError
1466 return self.node(rev)
1470 return self.node(rev)
1467 except (ValueError, OverflowError):
1471 except (ValueError, OverflowError):
1468 pass
1472 pass
1469 if len(id) == 40:
1473 if len(id) == 40:
1470 try:
1474 try:
1471 # a full hex nodeid?
1475 # a full hex nodeid?
1472 node = bin(id)
1476 node = bin(id)
1473 self.rev(node)
1477 self.rev(node)
1474 return node
1478 return node
1475 except (TypeError, error.LookupError):
1479 except (TypeError, error.LookupError):
1476 pass
1480 pass
1477
1481
1478 def _partialmatch(self, id):
1482 def _partialmatch(self, id):
1479 # we don't care wdirfilenodeids as they should be always full hash
1483 # we don't care wdirfilenodeids as they should be always full hash
1480 maybewdir = wdirhex.startswith(id)
1484 maybewdir = wdirhex.startswith(id)
1481 try:
1485 try:
1482 partial = self.index.partialmatch(id)
1486 partial = self.index.partialmatch(id)
1483 if partial and self.hasnode(partial):
1487 if partial and self.hasnode(partial):
1484 if maybewdir:
1488 if maybewdir:
1485 # single 'ff...' match in radix tree, ambiguous with wdir
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1486 raise error.RevlogError
1490 raise error.RevlogError
1487 return partial
1491 return partial
1488 if maybewdir:
1492 if maybewdir:
1489 # no 'ff...' match in radix tree, wdir identified
1493 # no 'ff...' match in radix tree, wdir identified
1490 raise error.WdirUnsupported
1494 raise error.WdirUnsupported
1491 return None
1495 return None
1492 except error.RevlogError:
1496 except error.RevlogError:
1493 # parsers.c radix tree lookup gave multiple matches
1497 # parsers.c radix tree lookup gave multiple matches
1494 # fast path: for unfiltered changelog, radix tree is accurate
1498 # fast path: for unfiltered changelog, radix tree is accurate
1495 if not getattr(self, 'filteredrevs', None):
1499 if not getattr(self, 'filteredrevs', None):
1496 raise error.AmbiguousPrefixLookupError(
1500 raise error.AmbiguousPrefixLookupError(
1497 id, self.indexfile, _(b'ambiguous identifier')
1501 id, self.indexfile, _(b'ambiguous identifier')
1498 )
1502 )
1499 # fall through to slow path that filters hidden revisions
1503 # fall through to slow path that filters hidden revisions
1500 except (AttributeError, ValueError):
1504 except (AttributeError, ValueError):
1501 # we are pure python, or key was too short to search radix tree
1505 # we are pure python, or key was too short to search radix tree
1502 pass
1506 pass
1503
1507
1504 if id in self._pcache:
1508 if id in self._pcache:
1505 return self._pcache[id]
1509 return self._pcache[id]
1506
1510
1507 if len(id) <= 40:
1511 if len(id) <= 40:
1508 try:
1512 try:
1509 # hex(node)[:...]
1513 # hex(node)[:...]
1510 l = len(id) // 2 # grab an even number of digits
1514 l = len(id) // 2 # grab an even number of digits
1511 prefix = bin(id[: l * 2])
1515 prefix = bin(id[: l * 2])
1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1516 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1513 nl = [
1517 nl = [
1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1518 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1515 ]
1519 ]
1516 if nullhex.startswith(id):
1520 if nullhex.startswith(id):
1517 nl.append(nullid)
1521 nl.append(nullid)
1518 if len(nl) > 0:
1522 if len(nl) > 0:
1519 if len(nl) == 1 and not maybewdir:
1523 if len(nl) == 1 and not maybewdir:
1520 self._pcache[id] = nl[0]
1524 self._pcache[id] = nl[0]
1521 return nl[0]
1525 return nl[0]
1522 raise error.AmbiguousPrefixLookupError(
1526 raise error.AmbiguousPrefixLookupError(
1523 id, self.indexfile, _(b'ambiguous identifier')
1527 id, self.indexfile, _(b'ambiguous identifier')
1524 )
1528 )
1525 if maybewdir:
1529 if maybewdir:
1526 raise error.WdirUnsupported
1530 raise error.WdirUnsupported
1527 return None
1531 return None
1528 except TypeError:
1532 except TypeError:
1529 pass
1533 pass
1530
1534
1531 def lookup(self, id):
1535 def lookup(self, id):
1532 """locate a node based on:
1536 """locate a node based on:
1533 - revision number or str(revision number)
1537 - revision number or str(revision number)
1534 - nodeid or subset of hex nodeid
1538 - nodeid or subset of hex nodeid
1535 """
1539 """
1536 n = self._match(id)
1540 n = self._match(id)
1537 if n is not None:
1541 if n is not None:
1538 return n
1542 return n
1539 n = self._partialmatch(id)
1543 n = self._partialmatch(id)
1540 if n:
1544 if n:
1541 return n
1545 return n
1542
1546
1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1547 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1544
1548
1545 def shortest(self, node, minlength=1):
1549 def shortest(self, node, minlength=1):
1546 """Find the shortest unambiguous prefix that matches node."""
1550 """Find the shortest unambiguous prefix that matches node."""
1547
1551
1548 def isvalid(prefix):
1552 def isvalid(prefix):
1549 try:
1553 try:
1550 matchednode = self._partialmatch(prefix)
1554 matchednode = self._partialmatch(prefix)
1551 except error.AmbiguousPrefixLookupError:
1555 except error.AmbiguousPrefixLookupError:
1552 return False
1556 return False
1553 except error.WdirUnsupported:
1557 except error.WdirUnsupported:
1554 # single 'ff...' match
1558 # single 'ff...' match
1555 return True
1559 return True
1556 if matchednode is None:
1560 if matchednode is None:
1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1561 raise error.LookupError(node, self.indexfile, _(b'no node'))
1558 return True
1562 return True
1559
1563
1560 def maybewdir(prefix):
1564 def maybewdir(prefix):
1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1565 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1562
1566
1563 hexnode = hex(node)
1567 hexnode = hex(node)
1564
1568
1565 def disambiguate(hexnode, minlength):
1569 def disambiguate(hexnode, minlength):
1566 """Disambiguate against wdirid."""
1570 """Disambiguate against wdirid."""
1567 for length in range(minlength, len(hexnode) + 1):
1571 for length in range(minlength, len(hexnode) + 1):
1568 prefix = hexnode[:length]
1572 prefix = hexnode[:length]
1569 if not maybewdir(prefix):
1573 if not maybewdir(prefix):
1570 return prefix
1574 return prefix
1571
1575
1572 if not getattr(self, 'filteredrevs', None):
1576 if not getattr(self, 'filteredrevs', None):
1573 try:
1577 try:
1574 length = max(self.index.shortest(node), minlength)
1578 length = max(self.index.shortest(node), minlength)
1575 return disambiguate(hexnode, length)
1579 return disambiguate(hexnode, length)
1576 except error.RevlogError:
1580 except error.RevlogError:
1577 if node != wdirid:
1581 if node != wdirid:
1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1582 raise error.LookupError(node, self.indexfile, _(b'no node'))
1579 except AttributeError:
1583 except AttributeError:
1580 # Fall through to pure code
1584 # Fall through to pure code
1581 pass
1585 pass
1582
1586
1583 if node == wdirid:
1587 if node == wdirid:
1584 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1585 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1586 if isvalid(prefix):
1590 if isvalid(prefix):
1587 return prefix
1591 return prefix
1588
1592
1589 for length in range(minlength, len(hexnode) + 1):
1593 for length in range(minlength, len(hexnode) + 1):
1590 prefix = hexnode[:length]
1594 prefix = hexnode[:length]
1591 if isvalid(prefix):
1595 if isvalid(prefix):
1592 return disambiguate(hexnode, length)
1596 return disambiguate(hexnode, length)
1593
1597
1594 def cmp(self, node, text):
1598 def cmp(self, node, text):
1595 """compare text with a given file revision
1599 """compare text with a given file revision
1596
1600
1597 returns True if text is different than what is stored.
1601 returns True if text is different than what is stored.
1598 """
1602 """
1599 p1, p2 = self.parents(node)
1603 p1, p2 = self.parents(node)
1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1601
1605
1602 def _cachesegment(self, offset, data):
1606 def _cachesegment(self, offset, data):
1603 """Add a segment to the revlog cache.
1607 """Add a segment to the revlog cache.
1604
1608
1605 Accepts an absolute offset and the data that is at that location.
1609 Accepts an absolute offset and the data that is at that location.
1606 """
1610 """
1607 o, d = self._chunkcache
1611 o, d = self._chunkcache
1608 # try to add to existing cache
1612 # try to add to existing cache
1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1613 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1610 self._chunkcache = o, d + data
1614 self._chunkcache = o, d + data
1611 else:
1615 else:
1612 self._chunkcache = offset, data
1616 self._chunkcache = offset, data
1613
1617
1614 def _readsegment(self, offset, length, df=None):
1618 def _readsegment(self, offset, length, df=None):
1615 """Load a segment of raw data from the revlog.
1619 """Load a segment of raw data from the revlog.
1616
1620
1617 Accepts an absolute offset, length to read, and an optional existing
1621 Accepts an absolute offset, length to read, and an optional existing
1618 file handle to read from.
1622 file handle to read from.
1619
1623
1620 If an existing file handle is passed, it will be seeked and the
1624 If an existing file handle is passed, it will be seeked and the
1621 original seek position will NOT be restored.
1625 original seek position will NOT be restored.
1622
1626
1623 Returns a str or buffer of raw byte data.
1627 Returns a str or buffer of raw byte data.
1624
1628
1625 Raises if the requested number of bytes could not be read.
1629 Raises if the requested number of bytes could not be read.
1626 """
1630 """
1627 # Cache data both forward and backward around the requested
1631 # Cache data both forward and backward around the requested
1628 # data, in a fixed size window. This helps speed up operations
1632 # data, in a fixed size window. This helps speed up operations
1629 # involving reading the revlog backwards.
1633 # involving reading the revlog backwards.
1630 cachesize = self._chunkcachesize
1634 cachesize = self._chunkcachesize
1631 realoffset = offset & ~(cachesize - 1)
1635 realoffset = offset & ~(cachesize - 1)
1632 reallength = (
1636 reallength = (
1633 (offset + length + cachesize) & ~(cachesize - 1)
1637 (offset + length + cachesize) & ~(cachesize - 1)
1634 ) - realoffset
1638 ) - realoffset
1635 with self._datareadfp(df) as df:
1639 with self._datareadfp(df) as df:
1636 df.seek(realoffset)
1640 df.seek(realoffset)
1637 d = df.read(reallength)
1641 d = df.read(reallength)
1638
1642
1639 self._cachesegment(realoffset, d)
1643 self._cachesegment(realoffset, d)
1640 if offset != realoffset or reallength != length:
1644 if offset != realoffset or reallength != length:
1641 startoffset = offset - realoffset
1645 startoffset = offset - realoffset
1642 if len(d) - startoffset < length:
1646 if len(d) - startoffset < length:
1643 raise error.RevlogError(
1647 raise error.RevlogError(
1644 _(
1648 _(
1645 b'partial read of revlog %s; expected %d bytes from '
1649 b'partial read of revlog %s; expected %d bytes from '
1646 b'offset %d, got %d'
1650 b'offset %d, got %d'
1647 )
1651 )
1648 % (
1652 % (
1649 self.indexfile if self._inline else self.datafile,
1653 self.indexfile if self._inline else self.datafile,
1650 length,
1654 length,
1651 realoffset,
1655 realoffset,
1652 len(d) - startoffset,
1656 len(d) - startoffset,
1653 )
1657 )
1654 )
1658 )
1655
1659
1656 return util.buffer(d, startoffset, length)
1660 return util.buffer(d, startoffset, length)
1657
1661
1658 if len(d) < length:
1662 if len(d) < length:
1659 raise error.RevlogError(
1663 raise error.RevlogError(
1660 _(
1664 _(
1661 b'partial read of revlog %s; expected %d bytes from offset '
1665 b'partial read of revlog %s; expected %d bytes from offset '
1662 b'%d, got %d'
1666 b'%d, got %d'
1663 )
1667 )
1664 % (
1668 % (
1665 self.indexfile if self._inline else self.datafile,
1669 self.indexfile if self._inline else self.datafile,
1666 length,
1670 length,
1667 offset,
1671 offset,
1668 len(d),
1672 len(d),
1669 )
1673 )
1670 )
1674 )
1671
1675
1672 return d
1676 return d
1673
1677
1674 def _getsegment(self, offset, length, df=None):
1678 def _getsegment(self, offset, length, df=None):
1675 """Obtain a segment of raw data from the revlog.
1679 """Obtain a segment of raw data from the revlog.
1676
1680
1677 Accepts an absolute offset, length of bytes to obtain, and an
1681 Accepts an absolute offset, length of bytes to obtain, and an
1678 optional file handle to the already-opened revlog. If the file
1682 optional file handle to the already-opened revlog. If the file
1679 handle is used, it's original seek position will not be preserved.
1683 handle is used, it's original seek position will not be preserved.
1680
1684
1681 Requests for data may be returned from a cache.
1685 Requests for data may be returned from a cache.
1682
1686
1683 Returns a str or a buffer instance of raw byte data.
1687 Returns a str or a buffer instance of raw byte data.
1684 """
1688 """
1685 o, d = self._chunkcache
1689 o, d = self._chunkcache
1686 l = len(d)
1690 l = len(d)
1687
1691
1688 # is it in the cache?
1692 # is it in the cache?
1689 cachestart = offset - o
1693 cachestart = offset - o
1690 cacheend = cachestart + length
1694 cacheend = cachestart + length
1691 if cachestart >= 0 and cacheend <= l:
1695 if cachestart >= 0 and cacheend <= l:
1692 if cachestart == 0 and cacheend == l:
1696 if cachestart == 0 and cacheend == l:
1693 return d # avoid a copy
1697 return d # avoid a copy
1694 return util.buffer(d, cachestart, cacheend - cachestart)
1698 return util.buffer(d, cachestart, cacheend - cachestart)
1695
1699
1696 return self._readsegment(offset, length, df=df)
1700 return self._readsegment(offset, length, df=df)
1697
1701
1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1702 def _getsegmentforrevs(self, startrev, endrev, df=None):
1699 """Obtain a segment of raw data corresponding to a range of revisions.
1703 """Obtain a segment of raw data corresponding to a range of revisions.
1700
1704
1701 Accepts the start and end revisions and an optional already-open
1705 Accepts the start and end revisions and an optional already-open
1702 file handle to be used for reading. If the file handle is read, its
1706 file handle to be used for reading. If the file handle is read, its
1703 seek position will not be preserved.
1707 seek position will not be preserved.
1704
1708
1705 Requests for data may be satisfied by a cache.
1709 Requests for data may be satisfied by a cache.
1706
1710
1707 Returns a 2-tuple of (offset, data) for the requested range of
1711 Returns a 2-tuple of (offset, data) for the requested range of
1708 revisions. Offset is the integer offset from the beginning of the
1712 revisions. Offset is the integer offset from the beginning of the
1709 revlog and data is a str or buffer of the raw byte data.
1713 revlog and data is a str or buffer of the raw byte data.
1710
1714
1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1715 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1712 to determine where each revision's data begins and ends.
1716 to determine where each revision's data begins and ends.
1713 """
1717 """
1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1718 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1715 # (functions are expensive).
1719 # (functions are expensive).
1716 index = self.index
1720 index = self.index
1717 istart = index[startrev]
1721 istart = index[startrev]
1718 start = int(istart[0] >> 16)
1722 start = int(istart[0] >> 16)
1719 if startrev == endrev:
1723 if startrev == endrev:
1720 end = start + istart[1]
1724 end = start + istart[1]
1721 else:
1725 else:
1722 iend = index[endrev]
1726 iend = index[endrev]
1723 end = int(iend[0] >> 16) + iend[1]
1727 end = int(iend[0] >> 16) + iend[1]
1724
1728
1725 if self._inline:
1729 if self._inline:
1726 start += (startrev + 1) * self._io.size
1730 start += (startrev + 1) * self._io.size
1727 end += (endrev + 1) * self._io.size
1731 end += (endrev + 1) * self._io.size
1728 length = end - start
1732 length = end - start
1729
1733
1730 return start, self._getsegment(start, length, df=df)
1734 return start, self._getsegment(start, length, df=df)
1731
1735
1732 def _chunk(self, rev, df=None):
1736 def _chunk(self, rev, df=None):
1733 """Obtain a single decompressed chunk for a revision.
1737 """Obtain a single decompressed chunk for a revision.
1734
1738
1735 Accepts an integer revision and an optional already-open file handle
1739 Accepts an integer revision and an optional already-open file handle
1736 to be used for reading. If used, the seek position of the file will not
1740 to be used for reading. If used, the seek position of the file will not
1737 be preserved.
1741 be preserved.
1738
1742
1739 Returns a str holding uncompressed data for the requested revision.
1743 Returns a str holding uncompressed data for the requested revision.
1740 """
1744 """
1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1745 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1742
1746
1743 def _chunks(self, revs, df=None, targetsize=None):
1747 def _chunks(self, revs, df=None, targetsize=None):
1744 """Obtain decompressed chunks for the specified revisions.
1748 """Obtain decompressed chunks for the specified revisions.
1745
1749
1746 Accepts an iterable of numeric revisions that are assumed to be in
1750 Accepts an iterable of numeric revisions that are assumed to be in
1747 ascending order. Also accepts an optional already-open file handle
1751 ascending order. Also accepts an optional already-open file handle
1748 to be used for reading. If used, the seek position of the file will
1752 to be used for reading. If used, the seek position of the file will
1749 not be preserved.
1753 not be preserved.
1750
1754
1751 This function is similar to calling ``self._chunk()`` multiple times,
1755 This function is similar to calling ``self._chunk()`` multiple times,
1752 but is faster.
1756 but is faster.
1753
1757
1754 Returns a list with decompressed data for each requested revision.
1758 Returns a list with decompressed data for each requested revision.
1755 """
1759 """
1756 if not revs:
1760 if not revs:
1757 return []
1761 return []
1758 start = self.start
1762 start = self.start
1759 length = self.length
1763 length = self.length
1760 inline = self._inline
1764 inline = self._inline
1761 iosize = self._io.size
1765 iosize = self._io.size
1762 buffer = util.buffer
1766 buffer = util.buffer
1763
1767
1764 l = []
1768 l = []
1765 ladd = l.append
1769 ladd = l.append
1766
1770
1767 if not self._withsparseread:
1771 if not self._withsparseread:
1768 slicedchunks = (revs,)
1772 slicedchunks = (revs,)
1769 else:
1773 else:
1770 slicedchunks = deltautil.slicechunk(
1774 slicedchunks = deltautil.slicechunk(
1771 self, revs, targetsize=targetsize
1775 self, revs, targetsize=targetsize
1772 )
1776 )
1773
1777
1774 for revschunk in slicedchunks:
1778 for revschunk in slicedchunks:
1775 firstrev = revschunk[0]
1779 firstrev = revschunk[0]
1776 # Skip trailing revisions with empty diff
1780 # Skip trailing revisions with empty diff
1777 for lastrev in revschunk[::-1]:
1781 for lastrev in revschunk[::-1]:
1778 if length(lastrev) != 0:
1782 if length(lastrev) != 0:
1779 break
1783 break
1780
1784
1781 try:
1785 try:
1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1786 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1783 except OverflowError:
1787 except OverflowError:
1784 # issue4215 - we can't cache a run of chunks greater than
1788 # issue4215 - we can't cache a run of chunks greater than
1785 # 2G on Windows
1789 # 2G on Windows
1786 return [self._chunk(rev, df=df) for rev in revschunk]
1790 return [self._chunk(rev, df=df) for rev in revschunk]
1787
1791
1788 decomp = self.decompress
1792 decomp = self.decompress
1789 for rev in revschunk:
1793 for rev in revschunk:
1790 chunkstart = start(rev)
1794 chunkstart = start(rev)
1791 if inline:
1795 if inline:
1792 chunkstart += (rev + 1) * iosize
1796 chunkstart += (rev + 1) * iosize
1793 chunklength = length(rev)
1797 chunklength = length(rev)
1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1798 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1795
1799
1796 return l
1800 return l
1797
1801
1798 def _chunkclear(self):
1802 def _chunkclear(self):
1799 """Clear the raw chunk cache."""
1803 """Clear the raw chunk cache."""
1800 self._chunkcache = (0, b'')
1804 self._chunkcache = (0, b'')
1801
1805
1802 def deltaparent(self, rev):
1806 def deltaparent(self, rev):
1803 """return deltaparent of the given revision"""
1807 """return deltaparent of the given revision"""
1804 base = self.index[rev][3]
1808 base = self.index[rev][3]
1805 if base == rev:
1809 if base == rev:
1806 return nullrev
1810 return nullrev
1807 elif self._generaldelta:
1811 elif self._generaldelta:
1808 return base
1812 return base
1809 else:
1813 else:
1810 return rev - 1
1814 return rev - 1
1811
1815
1812 def issnapshot(self, rev):
1816 def issnapshot(self, rev):
1813 """tells whether rev is a snapshot"""
1817 """tells whether rev is a snapshot"""
1814 if not self._sparserevlog:
1818 if not self._sparserevlog:
1815 return self.deltaparent(rev) == nullrev
1819 return self.deltaparent(rev) == nullrev
1816 elif util.safehasattr(self.index, b'issnapshot'):
1820 elif util.safehasattr(self.index, b'issnapshot'):
1817 # directly assign the method to cache the testing and access
1821 # directly assign the method to cache the testing and access
1818 self.issnapshot = self.index.issnapshot
1822 self.issnapshot = self.index.issnapshot
1819 return self.issnapshot(rev)
1823 return self.issnapshot(rev)
1820 if rev == nullrev:
1824 if rev == nullrev:
1821 return True
1825 return True
1822 entry = self.index[rev]
1826 entry = self.index[rev]
1823 base = entry[3]
1827 base = entry[3]
1824 if base == rev:
1828 if base == rev:
1825 return True
1829 return True
1826 if base == nullrev:
1830 if base == nullrev:
1827 return True
1831 return True
1828 p1 = entry[5]
1832 p1 = entry[5]
1829 p2 = entry[6]
1833 p2 = entry[6]
1830 if base == p1 or base == p2:
1834 if base == p1 or base == p2:
1831 return False
1835 return False
1832 return self.issnapshot(base)
1836 return self.issnapshot(base)
1833
1837
1834 def snapshotdepth(self, rev):
1838 def snapshotdepth(self, rev):
1835 """number of snapshot in the chain before this one"""
1839 """number of snapshot in the chain before this one"""
1836 if not self.issnapshot(rev):
1840 if not self.issnapshot(rev):
1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1841 raise error.ProgrammingError(b'revision %d not a snapshot')
1838 return len(self._deltachain(rev)[0]) - 1
1842 return len(self._deltachain(rev)[0]) - 1
1839
1843
1840 def revdiff(self, rev1, rev2):
1844 def revdiff(self, rev1, rev2):
1841 """return or calculate a delta between two revisions
1845 """return or calculate a delta between two revisions
1842
1846
1843 The delta calculated is in binary form and is intended to be written to
1847 The delta calculated is in binary form and is intended to be written to
1844 revlog data directly. So this function needs raw revision data.
1848 revlog data directly. So this function needs raw revision data.
1845 """
1849 """
1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1850 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1847 return bytes(self._chunk(rev2))
1851 return bytes(self._chunk(rev2))
1848
1852
1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1853 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1850
1854
1851 def _processflags(self, text, flags, operation, raw=False):
1855 def _processflags(self, text, flags, operation, raw=False):
1852 """deprecated entry point to access flag processors"""
1856 """deprecated entry point to access flag processors"""
1853 msg = b'_processflag(...) use the specialized variant'
1857 msg = b'_processflag(...) use the specialized variant'
1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1858 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1855 if raw:
1859 if raw:
1856 return text, flagutil.processflagsraw(self, text, flags)
1860 return text, flagutil.processflagsraw(self, text, flags)
1857 elif operation == b'read':
1861 elif operation == b'read':
1858 return flagutil.processflagsread(self, text, flags)
1862 return flagutil.processflagsread(self, text, flags)
1859 else: # write operation
1863 else: # write operation
1860 return flagutil.processflagswrite(self, text, flags)
1864 return flagutil.processflagswrite(self, text, flags)
1861
1865
1862 def revision(self, nodeorrev, _df=None, raw=False):
1866 def revision(self, nodeorrev, _df=None, raw=False):
1863 """return an uncompressed revision of a given node or revision
1867 """return an uncompressed revision of a given node or revision
1864 number.
1868 number.
1865
1869
1866 _df - an existing file handle to read from. (internal-only)
1870 _df - an existing file handle to read from. (internal-only)
1867 raw - an optional argument specifying if the revision data is to be
1871 raw - an optional argument specifying if the revision data is to be
1868 treated as raw data when applying flag transforms. 'raw' should be set
1872 treated as raw data when applying flag transforms. 'raw' should be set
1869 to True when generating changegroups or in debug commands.
1873 to True when generating changegroups or in debug commands.
1870 """
1874 """
1871 if raw:
1875 if raw:
1872 msg = (
1876 msg = (
1873 b'revlog.revision(..., raw=True) is deprecated, '
1877 b'revlog.revision(..., raw=True) is deprecated, '
1874 b'use revlog.rawdata(...)'
1878 b'use revlog.rawdata(...)'
1875 )
1879 )
1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1880 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1881 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1878
1882
1879 def sidedata(self, nodeorrev, _df=None):
1883 def sidedata(self, nodeorrev, _df=None):
1880 """a map of extra data related to the changeset but not part of the hash
1884 """a map of extra data related to the changeset but not part of the hash
1881
1885
1882 This function currently return a dictionary. However, more advanced
1886 This function currently return a dictionary. However, more advanced
1883 mapping object will likely be used in the future for a more
1887 mapping object will likely be used in the future for a more
1884 efficient/lazy code.
1888 efficient/lazy code.
1885 """
1889 """
1886 return self._revisiondata(nodeorrev, _df)[1]
1890 return self._revisiondata(nodeorrev, _df)[1]
1887
1891
1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1892 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1889 # deal with <nodeorrev> argument type
1893 # deal with <nodeorrev> argument type
1890 if isinstance(nodeorrev, int):
1894 if isinstance(nodeorrev, int):
1891 rev = nodeorrev
1895 rev = nodeorrev
1892 node = self.node(rev)
1896 node = self.node(rev)
1893 else:
1897 else:
1894 node = nodeorrev
1898 node = nodeorrev
1895 rev = None
1899 rev = None
1896
1900
1897 # fast path the special `nullid` rev
1901 # fast path the special `nullid` rev
1898 if node == nullid:
1902 if node == nullid:
1899 return b"", {}
1903 return b"", {}
1900
1904
1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1905 # ``rawtext`` is the text as stored inside the revlog. Might be the
1902 # revision or might need to be processed to retrieve the revision.
1906 # revision or might need to be processed to retrieve the revision.
1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1907 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1904
1908
1905 if self.version & 0xFFFF == REVLOGV2:
1909 if self.version & 0xFFFF == REVLOGV2:
1906 if rev is None:
1910 if rev is None:
1907 rev = self.rev(node)
1911 rev = self.rev(node)
1908 sidedata = self._sidedata(rev)
1912 sidedata = self._sidedata(rev)
1909 else:
1913 else:
1910 sidedata = {}
1914 sidedata = {}
1911
1915
1912 if raw and validated:
1916 if raw and validated:
1913 # if we don't want to process the raw text and that raw
1917 # if we don't want to process the raw text and that raw
1914 # text is cached, we can exit early.
1918 # text is cached, we can exit early.
1915 return rawtext, sidedata
1919 return rawtext, sidedata
1916 if rev is None:
1920 if rev is None:
1917 rev = self.rev(node)
1921 rev = self.rev(node)
1918 # the revlog's flag for this revision
1922 # the revlog's flag for this revision
1919 # (usually alter its state or content)
1923 # (usually alter its state or content)
1920 flags = self.flags(rev)
1924 flags = self.flags(rev)
1921
1925
1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1926 if validated and flags == REVIDX_DEFAULT_FLAGS:
1923 # no extra flags set, no flag processor runs, text = rawtext
1927 # no extra flags set, no flag processor runs, text = rawtext
1924 return rawtext, sidedata
1928 return rawtext, sidedata
1925
1929
1926 if raw:
1930 if raw:
1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1931 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1928 text = rawtext
1932 text = rawtext
1929 else:
1933 else:
1930 r = flagutil.processflagsread(self, rawtext, flags)
1934 r = flagutil.processflagsread(self, rawtext, flags)
1931 text, validatehash = r
1935 text, validatehash = r
1932 if validatehash:
1936 if validatehash:
1933 self.checkhash(text, node, rev=rev)
1937 self.checkhash(text, node, rev=rev)
1934 if not validated:
1938 if not validated:
1935 self._revisioncache = (node, rev, rawtext)
1939 self._revisioncache = (node, rev, rawtext)
1936
1940
1937 return text, sidedata
1941 return text, sidedata
1938
1942
1939 def _rawtext(self, node, rev, _df=None):
1943 def _rawtext(self, node, rev, _df=None):
1940 """return the possibly unvalidated rawtext for a revision
1944 """return the possibly unvalidated rawtext for a revision
1941
1945
1942 returns (rev, rawtext, validated)
1946 returns (rev, rawtext, validated)
1943 """
1947 """
1944
1948
1945 # revision in the cache (could be useful to apply delta)
1949 # revision in the cache (could be useful to apply delta)
1946 cachedrev = None
1950 cachedrev = None
1947 # An intermediate text to apply deltas to
1951 # An intermediate text to apply deltas to
1948 basetext = None
1952 basetext = None
1949
1953
1950 # Check if we have the entry in cache
1954 # Check if we have the entry in cache
1951 # The cache entry looks like (node, rev, rawtext)
1955 # The cache entry looks like (node, rev, rawtext)
1952 if self._revisioncache:
1956 if self._revisioncache:
1953 if self._revisioncache[0] == node:
1957 if self._revisioncache[0] == node:
1954 return (rev, self._revisioncache[2], True)
1958 return (rev, self._revisioncache[2], True)
1955 cachedrev = self._revisioncache[1]
1959 cachedrev = self._revisioncache[1]
1956
1960
1957 if rev is None:
1961 if rev is None:
1958 rev = self.rev(node)
1962 rev = self.rev(node)
1959
1963
1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1964 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1961 if stopped:
1965 if stopped:
1962 basetext = self._revisioncache[2]
1966 basetext = self._revisioncache[2]
1963
1967
1964 # drop cache to save memory, the caller is expected to
1968 # drop cache to save memory, the caller is expected to
1965 # update self._revisioncache after validating the text
1969 # update self._revisioncache after validating the text
1966 self._revisioncache = None
1970 self._revisioncache = None
1967
1971
1968 targetsize = None
1972 targetsize = None
1969 rawsize = self.index[rev][2]
1973 rawsize = self.index[rev][2]
1970 if 0 <= rawsize:
1974 if 0 <= rawsize:
1971 targetsize = 4 * rawsize
1975 targetsize = 4 * rawsize
1972
1976
1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1977 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1974 if basetext is None:
1978 if basetext is None:
1975 basetext = bytes(bins[0])
1979 basetext = bytes(bins[0])
1976 bins = bins[1:]
1980 bins = bins[1:]
1977
1981
1978 rawtext = mdiff.patches(basetext, bins)
1982 rawtext = mdiff.patches(basetext, bins)
1979 del basetext # let us have a chance to free memory early
1983 del basetext # let us have a chance to free memory early
1980 return (rev, rawtext, False)
1984 return (rev, rawtext, False)
1981
1985
1982 def _sidedata(self, rev):
1986 def _sidedata(self, rev):
1983 """Return the sidedata for a given revision number."""
1987 """Return the sidedata for a given revision number."""
1984 index_entry = self.index[rev]
1988 index_entry = self.index[rev]
1985 sidedata_offset = index_entry[8]
1989 sidedata_offset = index_entry[8]
1986 sidedata_size = index_entry[9]
1990 sidedata_size = index_entry[9]
1987
1991
1988 if self._inline:
1992 if self._inline:
1989 sidedata_offset += self._io.size * (1 + rev)
1993 sidedata_offset += self._io.size * (1 + rev)
1990 if sidedata_size == 0:
1994 if sidedata_size == 0:
1991 return {}
1995 return {}
1992
1996
1993 segment = self._getsegment(sidedata_offset, sidedata_size)
1997 segment = self._getsegment(sidedata_offset, sidedata_size)
1994 sidedata = sidedatautil.deserialize_sidedata(segment)
1998 sidedata = sidedatautil.deserialize_sidedata(segment)
1995 return sidedata
1999 return sidedata
1996
2000
1997 def rawdata(self, nodeorrev, _df=None):
2001 def rawdata(self, nodeorrev, _df=None):
1998 """return an uncompressed raw data of a given node or revision number.
2002 """return an uncompressed raw data of a given node or revision number.
1999
2003
2000 _df - an existing file handle to read from. (internal-only)
2004 _df - an existing file handle to read from. (internal-only)
2001 """
2005 """
2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2006 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2003
2007
2004 def hash(self, text, p1, p2):
2008 def hash(self, text, p1, p2):
2005 """Compute a node hash.
2009 """Compute a node hash.
2006
2010
2007 Available as a function so that subclasses can replace the hash
2011 Available as a function so that subclasses can replace the hash
2008 as needed.
2012 as needed.
2009 """
2013 """
2010 return storageutil.hashrevisionsha1(text, p1, p2)
2014 return storageutil.hashrevisionsha1(text, p1, p2)
2011
2015
2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2016 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2013 """Check node hash integrity.
2017 """Check node hash integrity.
2014
2018
2015 Available as a function so that subclasses can extend hash mismatch
2019 Available as a function so that subclasses can extend hash mismatch
2016 behaviors as needed.
2020 behaviors as needed.
2017 """
2021 """
2018 try:
2022 try:
2019 if p1 is None and p2 is None:
2023 if p1 is None and p2 is None:
2020 p1, p2 = self.parents(node)
2024 p1, p2 = self.parents(node)
2021 if node != self.hash(text, p1, p2):
2025 if node != self.hash(text, p1, p2):
2022 # Clear the revision cache on hash failure. The revision cache
2026 # Clear the revision cache on hash failure. The revision cache
2023 # only stores the raw revision and clearing the cache does have
2027 # only stores the raw revision and clearing the cache does have
2024 # the side-effect that we won't have a cache hit when the raw
2028 # the side-effect that we won't have a cache hit when the raw
2025 # revision data is accessed. But this case should be rare and
2029 # revision data is accessed. But this case should be rare and
2026 # it is extra work to teach the cache about the hash
2030 # it is extra work to teach the cache about the hash
2027 # verification state.
2031 # verification state.
2028 if self._revisioncache and self._revisioncache[0] == node:
2032 if self._revisioncache and self._revisioncache[0] == node:
2029 self._revisioncache = None
2033 self._revisioncache = None
2030
2034
2031 revornode = rev
2035 revornode = rev
2032 if revornode is None:
2036 if revornode is None:
2033 revornode = templatefilters.short(hex(node))
2037 revornode = templatefilters.short(hex(node))
2034 raise error.RevlogError(
2038 raise error.RevlogError(
2035 _(b"integrity check failed on %s:%s")
2039 _(b"integrity check failed on %s:%s")
2036 % (self.indexfile, pycompat.bytestr(revornode))
2040 % (self.indexfile, pycompat.bytestr(revornode))
2037 )
2041 )
2038 except error.RevlogError:
2042 except error.RevlogError:
2039 if self._censorable and storageutil.iscensoredtext(text):
2043 if self._censorable and storageutil.iscensoredtext(text):
2040 raise error.CensoredNodeError(self.indexfile, node, text)
2044 raise error.CensoredNodeError(self.indexfile, node, text)
2041 raise
2045 raise
2042
2046
2043 def _enforceinlinesize(self, tr, fp=None):
2047 def _enforceinlinesize(self, tr, fp=None):
2044 """Check if the revlog is too big for inline and convert if so.
2048 """Check if the revlog is too big for inline and convert if so.
2045
2049
2046 This should be called after revisions are added to the revlog. If the
2050 This should be called after revisions are added to the revlog. If the
2047 revlog has grown too large to be an inline revlog, it will convert it
2051 revlog has grown too large to be an inline revlog, it will convert it
2048 to use multiple index and data files.
2052 to use multiple index and data files.
2049 """
2053 """
2050 tiprev = len(self) - 1
2054 tiprev = len(self) - 1
2051 if (
2055 if (
2052 not self._inline
2056 not self._inline
2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2057 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2054 ):
2058 ):
2055 return
2059 return
2056
2060
2057 troffset = tr.findoffset(self.indexfile)
2061 troffset = tr.findoffset(self.indexfile)
2058 if troffset is None:
2062 if troffset is None:
2059 raise error.RevlogError(
2063 raise error.RevlogError(
2060 _(b"%s not found in the transaction") % self.indexfile
2064 _(b"%s not found in the transaction") % self.indexfile
2061 )
2065 )
2062 trindex = 0
2066 trindex = 0
2063 tr.add(self.datafile, 0)
2067 tr.add(self.datafile, 0)
2064
2068
2065 if fp:
2069 if fp:
2066 fp.flush()
2070 fp.flush()
2067 fp.close()
2071 fp.close()
2068 # We can't use the cached file handle after close(). So prevent
2072 # We can't use the cached file handle after close(). So prevent
2069 # its usage.
2073 # its usage.
2070 self._writinghandles = None
2074 self._writinghandles = None
2071
2075
2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2076 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2073 for r in self:
2077 for r in self:
2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2078 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2075 if troffset <= self.start(r):
2079 if troffset <= self.start(r):
2076 trindex = r
2080 trindex = r
2077
2081
2078 with self._indexfp(b'w') as fp:
2082 with self._indexfp(b'w') as fp:
2079 self.version &= ~FLAG_INLINE_DATA
2083 self.version &= ~FLAG_INLINE_DATA
2080 self._inline = False
2084 self._inline = False
2081 io = self._io
2085 io = self._io
2082 for i in self:
2086 for i in self:
2083 e = io.packentry(self.index[i], self.node, self.version, i)
2087 e = io.packentry(self.index[i], self.node, self.version, i)
2084 fp.write(e)
2088 fp.write(e)
2085
2089
2086 # the temp file replace the real index when we exit the context
2090 # the temp file replace the real index when we exit the context
2087 # manager
2091 # manager
2088
2092
2089 tr.replace(self.indexfile, trindex * self._io.size)
2093 tr.replace(self.indexfile, trindex * self._io.size)
2090 nodemaputil.setup_persistent_nodemap(tr, self)
2094 nodemaputil.setup_persistent_nodemap(tr, self)
2091 self._chunkclear()
2095 self._chunkclear()
2092
2096
2093 def _nodeduplicatecallback(self, transaction, node):
2097 def _nodeduplicatecallback(self, transaction, node):
2094 """called when trying to add a node already stored."""
2098 """called when trying to add a node already stored."""
2095
2099
2096 def addrevision(
2100 def addrevision(
2097 self,
2101 self,
2098 text,
2102 text,
2099 transaction,
2103 transaction,
2100 link,
2104 link,
2101 p1,
2105 p1,
2102 p2,
2106 p2,
2103 cachedelta=None,
2107 cachedelta=None,
2104 node=None,
2108 node=None,
2105 flags=REVIDX_DEFAULT_FLAGS,
2109 flags=REVIDX_DEFAULT_FLAGS,
2106 deltacomputer=None,
2110 deltacomputer=None,
2107 sidedata=None,
2111 sidedata=None,
2108 ):
2112 ):
2109 """add a revision to the log
2113 """add a revision to the log
2110
2114
2111 text - the revision data to add
2115 text - the revision data to add
2112 transaction - the transaction object used for rollback
2116 transaction - the transaction object used for rollback
2113 link - the linkrev data to add
2117 link - the linkrev data to add
2114 p1, p2 - the parent nodeids of the revision
2118 p1, p2 - the parent nodeids of the revision
2115 cachedelta - an optional precomputed delta
2119 cachedelta - an optional precomputed delta
2116 node - nodeid of revision; typically node is not specified, and it is
2120 node - nodeid of revision; typically node is not specified, and it is
2117 computed by default as hash(text, p1, p2), however subclasses might
2121 computed by default as hash(text, p1, p2), however subclasses might
2118 use different hashing method (and override checkhash() in such case)
2122 use different hashing method (and override checkhash() in such case)
2119 flags - the known flags to set on the revision
2123 flags - the known flags to set on the revision
2120 deltacomputer - an optional deltacomputer instance shared between
2124 deltacomputer - an optional deltacomputer instance shared between
2121 multiple calls
2125 multiple calls
2122 """
2126 """
2123 if link == nullrev:
2127 if link == nullrev:
2124 raise error.RevlogError(
2128 raise error.RevlogError(
2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2129 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2126 )
2130 )
2127
2131
2128 if sidedata is None:
2132 if sidedata is None:
2129 sidedata = {}
2133 sidedata = {}
2130 elif not self.hassidedata:
2134 elif not self.hassidedata:
2131 raise error.ProgrammingError(
2135 raise error.ProgrammingError(
2132 _(b"trying to add sidedata to a revlog who don't support them")
2136 _(b"trying to add sidedata to a revlog who don't support them")
2133 )
2137 )
2134
2138
2135 if flags:
2139 if flags:
2136 node = node or self.hash(text, p1, p2)
2140 node = node or self.hash(text, p1, p2)
2137
2141
2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2142 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2139
2143
2140 # If the flag processor modifies the revision data, ignore any provided
2144 # If the flag processor modifies the revision data, ignore any provided
2141 # cachedelta.
2145 # cachedelta.
2142 if rawtext != text:
2146 if rawtext != text:
2143 cachedelta = None
2147 cachedelta = None
2144
2148
2145 if len(rawtext) > _maxentrysize:
2149 if len(rawtext) > _maxentrysize:
2146 raise error.RevlogError(
2150 raise error.RevlogError(
2147 _(
2151 _(
2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2152 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2149 )
2153 )
2150 % (self.indexfile, len(rawtext))
2154 % (self.indexfile, len(rawtext))
2151 )
2155 )
2152
2156
2153 node = node or self.hash(rawtext, p1, p2)
2157 node = node or self.hash(rawtext, p1, p2)
2154 rev = self.index.get_rev(node)
2158 rev = self.index.get_rev(node)
2155 if rev is not None:
2159 if rev is not None:
2156 return rev
2160 return rev
2157
2161
2158 if validatehash:
2162 if validatehash:
2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2163 self.checkhash(rawtext, node, p1=p1, p2=p2)
2160
2164
2161 return self.addrawrevision(
2165 return self.addrawrevision(
2162 rawtext,
2166 rawtext,
2163 transaction,
2167 transaction,
2164 link,
2168 link,
2165 p1,
2169 p1,
2166 p2,
2170 p2,
2167 node,
2171 node,
2168 flags,
2172 flags,
2169 cachedelta=cachedelta,
2173 cachedelta=cachedelta,
2170 deltacomputer=deltacomputer,
2174 deltacomputer=deltacomputer,
2171 sidedata=sidedata,
2175 sidedata=sidedata,
2172 )
2176 )
2173
2177
2174 def addrawrevision(
2178 def addrawrevision(
2175 self,
2179 self,
2176 rawtext,
2180 rawtext,
2177 transaction,
2181 transaction,
2178 link,
2182 link,
2179 p1,
2183 p1,
2180 p2,
2184 p2,
2181 node,
2185 node,
2182 flags,
2186 flags,
2183 cachedelta=None,
2187 cachedelta=None,
2184 deltacomputer=None,
2188 deltacomputer=None,
2185 sidedata=None,
2189 sidedata=None,
2186 ):
2190 ):
2187 """add a raw revision with known flags, node and parents
2191 """add a raw revision with known flags, node and parents
2188 useful when reusing a revision not stored in this revlog (ex: received
2192 useful when reusing a revision not stored in this revlog (ex: received
2189 over wire, or read from an external bundle).
2193 over wire, or read from an external bundle).
2190 """
2194 """
2191 dfh = None
2195 dfh = None
2192 if not self._inline:
2196 if not self._inline:
2193 dfh = self._datafp(b"a+")
2197 dfh = self._datafp(b"a+")
2194 ifh = self._indexfp(b"a+")
2198 ifh = self._indexfp(b"a+")
2195 try:
2199 try:
2196 return self._addrevision(
2200 return self._addrevision(
2197 node,
2201 node,
2198 rawtext,
2202 rawtext,
2199 transaction,
2203 transaction,
2200 link,
2204 link,
2201 p1,
2205 p1,
2202 p2,
2206 p2,
2203 flags,
2207 flags,
2204 cachedelta,
2208 cachedelta,
2205 ifh,
2209 ifh,
2206 dfh,
2210 dfh,
2207 deltacomputer=deltacomputer,
2211 deltacomputer=deltacomputer,
2208 sidedata=sidedata,
2212 sidedata=sidedata,
2209 )
2213 )
2210 finally:
2214 finally:
2211 if dfh:
2215 if dfh:
2212 dfh.close()
2216 dfh.close()
2213 ifh.close()
2217 ifh.close()
2214
2218
2215 def compress(self, data):
2219 def compress(self, data):
2216 """Generate a possibly-compressed representation of data."""
2220 """Generate a possibly-compressed representation of data."""
2217 if not data:
2221 if not data:
2218 return b'', data
2222 return b'', data
2219
2223
2220 compressed = self._compressor.compress(data)
2224 compressed = self._compressor.compress(data)
2221
2225
2222 if compressed:
2226 if compressed:
2223 # The revlog compressor added the header in the returned data.
2227 # The revlog compressor added the header in the returned data.
2224 return b'', compressed
2228 return b'', compressed
2225
2229
2226 if data[0:1] == b'\0':
2230 if data[0:1] == b'\0':
2227 return b'', data
2231 return b'', data
2228 return b'u', data
2232 return b'u', data
2229
2233
2230 def decompress(self, data):
2234 def decompress(self, data):
2231 """Decompress a revlog chunk.
2235 """Decompress a revlog chunk.
2232
2236
2233 The chunk is expected to begin with a header identifying the
2237 The chunk is expected to begin with a header identifying the
2234 format type so it can be routed to an appropriate decompressor.
2238 format type so it can be routed to an appropriate decompressor.
2235 """
2239 """
2236 if not data:
2240 if not data:
2237 return data
2241 return data
2238
2242
2239 # Revlogs are read much more frequently than they are written and many
2243 # Revlogs are read much more frequently than they are written and many
2240 # chunks only take microseconds to decompress, so performance is
2244 # chunks only take microseconds to decompress, so performance is
2241 # important here.
2245 # important here.
2242 #
2246 #
2243 # We can make a few assumptions about revlogs:
2247 # We can make a few assumptions about revlogs:
2244 #
2248 #
2245 # 1) the majority of chunks will be compressed (as opposed to inline
2249 # 1) the majority of chunks will be compressed (as opposed to inline
2246 # raw data).
2250 # raw data).
2247 # 2) decompressing *any* data will likely by at least 10x slower than
2251 # 2) decompressing *any* data will likely by at least 10x slower than
2248 # returning raw inline data.
2252 # returning raw inline data.
2249 # 3) we want to prioritize common and officially supported compression
2253 # 3) we want to prioritize common and officially supported compression
2250 # engines
2254 # engines
2251 #
2255 #
2252 # It follows that we want to optimize for "decompress compressed data
2256 # It follows that we want to optimize for "decompress compressed data
2253 # when encoded with common and officially supported compression engines"
2257 # when encoded with common and officially supported compression engines"
2254 # case over "raw data" and "data encoded by less common or non-official
2258 # case over "raw data" and "data encoded by less common or non-official
2255 # compression engines." That is why we have the inline lookup first
2259 # compression engines." That is why we have the inline lookup first
2256 # followed by the compengines lookup.
2260 # followed by the compengines lookup.
2257 #
2261 #
2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2262 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2259 # compressed chunks. And this matters for changelog and manifest reads.
2263 # compressed chunks. And this matters for changelog and manifest reads.
2260 t = data[0:1]
2264 t = data[0:1]
2261
2265
2262 if t == b'x':
2266 if t == b'x':
2263 try:
2267 try:
2264 return _zlibdecompress(data)
2268 return _zlibdecompress(data)
2265 except zlib.error as e:
2269 except zlib.error as e:
2266 raise error.RevlogError(
2270 raise error.RevlogError(
2267 _(b'revlog decompress error: %s')
2271 _(b'revlog decompress error: %s')
2268 % stringutil.forcebytestr(e)
2272 % stringutil.forcebytestr(e)
2269 )
2273 )
2270 # '\0' is more common than 'u' so it goes first.
2274 # '\0' is more common than 'u' so it goes first.
2271 elif t == b'\0':
2275 elif t == b'\0':
2272 return data
2276 return data
2273 elif t == b'u':
2277 elif t == b'u':
2274 return util.buffer(data, 1)
2278 return util.buffer(data, 1)
2275
2279
2276 try:
2280 try:
2277 compressor = self._decompressors[t]
2281 compressor = self._decompressors[t]
2278 except KeyError:
2282 except KeyError:
2279 try:
2283 try:
2280 engine = util.compengines.forrevlogheader(t)
2284 engine = util.compengines.forrevlogheader(t)
2281 compressor = engine.revlogcompressor(self._compengineopts)
2285 compressor = engine.revlogcompressor(self._compengineopts)
2282 self._decompressors[t] = compressor
2286 self._decompressors[t] = compressor
2283 except KeyError:
2287 except KeyError:
2284 raise error.RevlogError(_(b'unknown compression type %r') % t)
2288 raise error.RevlogError(_(b'unknown compression type %r') % t)
2285
2289
2286 return compressor.decompress(data)
2290 return compressor.decompress(data)
2287
2291
2288 def _addrevision(
2292 def _addrevision(
2289 self,
2293 self,
2290 node,
2294 node,
2291 rawtext,
2295 rawtext,
2292 transaction,
2296 transaction,
2293 link,
2297 link,
2294 p1,
2298 p1,
2295 p2,
2299 p2,
2296 flags,
2300 flags,
2297 cachedelta,
2301 cachedelta,
2298 ifh,
2302 ifh,
2299 dfh,
2303 dfh,
2300 alwayscache=False,
2304 alwayscache=False,
2301 deltacomputer=None,
2305 deltacomputer=None,
2302 sidedata=None,
2306 sidedata=None,
2303 ):
2307 ):
2304 """internal function to add revisions to the log
2308 """internal function to add revisions to the log
2305
2309
2306 see addrevision for argument descriptions.
2310 see addrevision for argument descriptions.
2307
2311
2308 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2312 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2309
2313
2310 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2314 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2311 be used.
2315 be used.
2312
2316
2313 invariants:
2317 invariants:
2314 - rawtext is optional (can be None); if not set, cachedelta must be set.
2318 - rawtext is optional (can be None); if not set, cachedelta must be set.
2315 if both are set, they must correspond to each other.
2319 if both are set, they must correspond to each other.
2316 """
2320 """
2317 if node == nullid:
2321 if node == nullid:
2318 raise error.RevlogError(
2322 raise error.RevlogError(
2319 _(b"%s: attempt to add null revision") % self.indexfile
2323 _(b"%s: attempt to add null revision") % self.indexfile
2320 )
2324 )
2321 if node == wdirid or node in wdirfilenodeids:
2325 if node == wdirid or node in wdirfilenodeids:
2322 raise error.RevlogError(
2326 raise error.RevlogError(
2323 _(b"%s: attempt to add wdir revision") % self.indexfile
2327 _(b"%s: attempt to add wdir revision") % self.indexfile
2324 )
2328 )
2325
2329
2326 if self._inline:
2330 if self._inline:
2327 fh = ifh
2331 fh = ifh
2328 else:
2332 else:
2329 fh = dfh
2333 fh = dfh
2330
2334
2331 btext = [rawtext]
2335 btext = [rawtext]
2332
2336
2333 curr = len(self)
2337 curr = len(self)
2334 prev = curr - 1
2338 prev = curr - 1
2335
2339
2336 offset = self._get_data_offset(prev)
2340 offset = self._get_data_offset(prev)
2337
2341
2338 if self._concurrencychecker:
2342 if self._concurrencychecker:
2339 if self._inline:
2343 if self._inline:
2340 # offset is "as if" it were in the .d file, so we need to add on
2344 # offset is "as if" it were in the .d file, so we need to add on
2341 # the size of the entry metadata.
2345 # the size of the entry metadata.
2342 self._concurrencychecker(
2346 self._concurrencychecker(
2343 ifh, self.indexfile, offset + curr * self._io.size
2347 ifh, self.indexfile, offset + curr * self._io.size
2344 )
2348 )
2345 else:
2349 else:
2346 # Entries in the .i are a consistent size.
2350 # Entries in the .i are a consistent size.
2347 self._concurrencychecker(
2351 self._concurrencychecker(
2348 ifh, self.indexfile, curr * self._io.size
2352 ifh, self.indexfile, curr * self._io.size
2349 )
2353 )
2350 self._concurrencychecker(dfh, self.datafile, offset)
2354 self._concurrencychecker(dfh, self.datafile, offset)
2351
2355
2352 p1r, p2r = self.rev(p1), self.rev(p2)
2356 p1r, p2r = self.rev(p1), self.rev(p2)
2353
2357
2354 # full versions are inserted when the needed deltas
2358 # full versions are inserted when the needed deltas
2355 # become comparable to the uncompressed text
2359 # become comparable to the uncompressed text
2356 if rawtext is None:
2360 if rawtext is None:
2357 # need rawtext size, before changed by flag processors, which is
2361 # need rawtext size, before changed by flag processors, which is
2358 # the non-raw size. use revlog explicitly to avoid filelog's extra
2362 # the non-raw size. use revlog explicitly to avoid filelog's extra
2359 # logic that might remove metadata size.
2363 # logic that might remove metadata size.
2360 textlen = mdiff.patchedsize(
2364 textlen = mdiff.patchedsize(
2361 revlog.size(self, cachedelta[0]), cachedelta[1]
2365 revlog.size(self, cachedelta[0]), cachedelta[1]
2362 )
2366 )
2363 else:
2367 else:
2364 textlen = len(rawtext)
2368 textlen = len(rawtext)
2365
2369
2366 if deltacomputer is None:
2370 if deltacomputer is None:
2367 deltacomputer = deltautil.deltacomputer(self)
2371 deltacomputer = deltautil.deltacomputer(self)
2368
2372
2369 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2373 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2370
2374
2371 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2375 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2372
2376
2373 if sidedata:
2377 if sidedata:
2374 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2378 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2375 sidedata_offset = offset + deltainfo.deltalen
2379 sidedata_offset = offset + deltainfo.deltalen
2376 else:
2380 else:
2377 serialized_sidedata = b""
2381 serialized_sidedata = b""
2378 # Don't store the offset if the sidedata is empty, that way
2382 # Don't store the offset if the sidedata is empty, that way
2379 # we can easily detect empty sidedata and they will be no different
2383 # we can easily detect empty sidedata and they will be no different
2380 # than ones we manually add.
2384 # than ones we manually add.
2381 sidedata_offset = 0
2385 sidedata_offset = 0
2382
2386
2383 e = (
2387 e = (
2384 offset_type(offset, flags),
2388 offset_type(offset, flags),
2385 deltainfo.deltalen,
2389 deltainfo.deltalen,
2386 textlen,
2390 textlen,
2387 deltainfo.base,
2391 deltainfo.base,
2388 link,
2392 link,
2389 p1r,
2393 p1r,
2390 p2r,
2394 p2r,
2391 node,
2395 node,
2392 sidedata_offset,
2396 sidedata_offset,
2393 len(serialized_sidedata),
2397 len(serialized_sidedata),
2394 )
2398 )
2395
2399
2396 if self.version & 0xFFFF != REVLOGV2:
2400 if self.version & 0xFFFF != REVLOGV2:
2397 e = e[:8]
2401 e = e[:8]
2398
2402
2399 self.index.append(e)
2403 self.index.append(e)
2400 entry = self._io.packentry(e, self.node, self.version, curr)
2404 entry = self._io.packentry(e, self.node, self.version, curr)
2401 self._writeentry(
2405 self._writeentry(
2402 transaction,
2406 transaction,
2403 ifh,
2407 ifh,
2404 dfh,
2408 dfh,
2405 entry,
2409 entry,
2406 deltainfo.data,
2410 deltainfo.data,
2407 link,
2411 link,
2408 offset,
2412 offset,
2409 serialized_sidedata,
2413 serialized_sidedata,
2410 )
2414 )
2411
2415
2412 rawtext = btext[0]
2416 rawtext = btext[0]
2413
2417
2414 if alwayscache and rawtext is None:
2418 if alwayscache and rawtext is None:
2415 rawtext = deltacomputer.buildtext(revinfo, fh)
2419 rawtext = deltacomputer.buildtext(revinfo, fh)
2416
2420
2417 if type(rawtext) == bytes: # only accept immutable objects
2421 if type(rawtext) == bytes: # only accept immutable objects
2418 self._revisioncache = (node, curr, rawtext)
2422 self._revisioncache = (node, curr, rawtext)
2419 self._chainbasecache[curr] = deltainfo.chainbase
2423 self._chainbasecache[curr] = deltainfo.chainbase
2420 return curr
2424 return curr
2421
2425
2422 def _get_data_offset(self, prev):
2426 def _get_data_offset(self, prev):
2423 """Returns the current offset in the (in-transaction) data file.
2427 """Returns the current offset in the (in-transaction) data file.
2424 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2428 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2425 file to store that information: since sidedata can be rewritten to the
2429 file to store that information: since sidedata can be rewritten to the
2426 end of the data file within a transaction, you can have cases where, for
2430 end of the data file within a transaction, you can have cases where, for
2427 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2431 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2428 to `n - 1`'s sidedata being written after `n`'s data.
2432 to `n - 1`'s sidedata being written after `n`'s data.
2429
2433
2430 TODO cache this in a docket file before getting out of experimental."""
2434 TODO cache this in a docket file before getting out of experimental."""
2431 if self.version & 0xFFFF != REVLOGV2:
2435 if self.version & 0xFFFF != REVLOGV2:
2432 return self.end(prev)
2436 return self.end(prev)
2433
2437
2434 offset = 0
2438 offset = 0
2435 for rev, entry in enumerate(self.index):
2439 for rev, entry in enumerate(self.index):
2436 sidedata_end = entry[8] + entry[9]
2440 sidedata_end = entry[8] + entry[9]
2437 # Sidedata for a previous rev has potentially been written after
2441 # Sidedata for a previous rev has potentially been written after
2438 # this rev's end, so take the max.
2442 # this rev's end, so take the max.
2439 offset = max(self.end(rev), offset, sidedata_end)
2443 offset = max(self.end(rev), offset, sidedata_end)
2440 return offset
2444 return offset
2441
2445
2442 def _writeentry(
2446 def _writeentry(
2443 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2447 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2444 ):
2448 ):
2445 # Files opened in a+ mode have inconsistent behavior on various
2449 # Files opened in a+ mode have inconsistent behavior on various
2446 # platforms. Windows requires that a file positioning call be made
2450 # platforms. Windows requires that a file positioning call be made
2447 # when the file handle transitions between reads and writes. See
2451 # when the file handle transitions between reads and writes. See
2448 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2452 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2449 # platforms, Python or the platform itself can be buggy. Some versions
2453 # platforms, Python or the platform itself can be buggy. Some versions
2450 # of Solaris have been observed to not append at the end of the file
2454 # of Solaris have been observed to not append at the end of the file
2451 # if the file was seeked to before the end. See issue4943 for more.
2455 # if the file was seeked to before the end. See issue4943 for more.
2452 #
2456 #
2453 # We work around this issue by inserting a seek() before writing.
2457 # We work around this issue by inserting a seek() before writing.
2454 # Note: This is likely not necessary on Python 3. However, because
2458 # Note: This is likely not necessary on Python 3. However, because
2455 # the file handle is reused for reads and may be seeked there, we need
2459 # the file handle is reused for reads and may be seeked there, we need
2456 # to be careful before changing this.
2460 # to be careful before changing this.
2457 ifh.seek(0, os.SEEK_END)
2461 ifh.seek(0, os.SEEK_END)
2458 if dfh:
2462 if dfh:
2459 dfh.seek(0, os.SEEK_END)
2463 dfh.seek(0, os.SEEK_END)
2460
2464
2461 curr = len(self) - 1
2465 curr = len(self) - 1
2462 if not self._inline:
2466 if not self._inline:
2463 transaction.add(self.datafile, offset)
2467 transaction.add(self.datafile, offset)
2464 transaction.add(self.indexfile, curr * len(entry))
2468 transaction.add(self.indexfile, curr * len(entry))
2465 if data[0]:
2469 if data[0]:
2466 dfh.write(data[0])
2470 dfh.write(data[0])
2467 dfh.write(data[1])
2471 dfh.write(data[1])
2468 if sidedata:
2472 if sidedata:
2469 dfh.write(sidedata)
2473 dfh.write(sidedata)
2470 ifh.write(entry)
2474 ifh.write(entry)
2471 else:
2475 else:
2472 offset += curr * self._io.size
2476 offset += curr * self._io.size
2473 transaction.add(self.indexfile, offset)
2477 transaction.add(self.indexfile, offset)
2474 ifh.write(entry)
2478 ifh.write(entry)
2475 ifh.write(data[0])
2479 ifh.write(data[0])
2476 ifh.write(data[1])
2480 ifh.write(data[1])
2477 if sidedata:
2481 if sidedata:
2478 ifh.write(sidedata)
2482 ifh.write(sidedata)
2479 self._enforceinlinesize(transaction, ifh)
2483 self._enforceinlinesize(transaction, ifh)
2480 nodemaputil.setup_persistent_nodemap(transaction, self)
2484 nodemaputil.setup_persistent_nodemap(transaction, self)
2481
2485
2482 def addgroup(
2486 def addgroup(
2483 self,
2487 self,
2484 deltas,
2488 deltas,
2485 linkmapper,
2489 linkmapper,
2486 transaction,
2490 transaction,
2487 alwayscache=False,
2491 alwayscache=False,
2488 addrevisioncb=None,
2492 addrevisioncb=None,
2489 duplicaterevisioncb=None,
2493 duplicaterevisioncb=None,
2490 ):
2494 ):
2491 """
2495 """
2492 add a delta group
2496 add a delta group
2493
2497
2494 given a set of deltas, add them to the revision log. the
2498 given a set of deltas, add them to the revision log. the
2495 first delta is against its parent, which should be in our
2499 first delta is against its parent, which should be in our
2496 log, the rest are against the previous delta.
2500 log, the rest are against the previous delta.
2497
2501
2498 If ``addrevisioncb`` is defined, it will be called with arguments of
2502 If ``addrevisioncb`` is defined, it will be called with arguments of
2499 this revlog and the node that was added.
2503 this revlog and the node that was added.
2500 """
2504 """
2501
2505
2502 if self._writinghandles:
2506 if self._writinghandles:
2503 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2507 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2504
2508
2505 r = len(self)
2509 r = len(self)
2506 end = 0
2510 end = 0
2507 if r:
2511 if r:
2508 end = self.end(r - 1)
2512 end = self.end(r - 1)
2509 ifh = self._indexfp(b"a+")
2513 ifh = self._indexfp(b"a+")
2510 isize = r * self._io.size
2514 isize = r * self._io.size
2511 if self._inline:
2515 if self._inline:
2512 transaction.add(self.indexfile, end + isize)
2516 transaction.add(self.indexfile, end + isize)
2513 dfh = None
2517 dfh = None
2514 else:
2518 else:
2515 transaction.add(self.indexfile, isize)
2519 transaction.add(self.indexfile, isize)
2516 transaction.add(self.datafile, end)
2520 transaction.add(self.datafile, end)
2517 dfh = self._datafp(b"a+")
2521 dfh = self._datafp(b"a+")
2518
2522
2519 def flush():
2523 def flush():
2520 if dfh:
2524 if dfh:
2521 dfh.flush()
2525 dfh.flush()
2522 ifh.flush()
2526 ifh.flush()
2523
2527
2524 self._writinghandles = (ifh, dfh)
2528 self._writinghandles = (ifh, dfh)
2525 empty = True
2529 empty = True
2526
2530
2527 try:
2531 try:
2528 deltacomputer = deltautil.deltacomputer(self)
2532 deltacomputer = deltautil.deltacomputer(self)
2529 # loop through our set of deltas
2533 # loop through our set of deltas
2530 for data in deltas:
2534 for data in deltas:
2531 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2535 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2532 link = linkmapper(linknode)
2536 link = linkmapper(linknode)
2533 flags = flags or REVIDX_DEFAULT_FLAGS
2537 flags = flags or REVIDX_DEFAULT_FLAGS
2534
2538
2535 rev = self.index.get_rev(node)
2539 rev = self.index.get_rev(node)
2536 if rev is not None:
2540 if rev is not None:
2537 # this can happen if two branches make the same change
2541 # this can happen if two branches make the same change
2538 self._nodeduplicatecallback(transaction, rev)
2542 self._nodeduplicatecallback(transaction, rev)
2539 if duplicaterevisioncb:
2543 if duplicaterevisioncb:
2540 duplicaterevisioncb(self, rev)
2544 duplicaterevisioncb(self, rev)
2541 empty = False
2545 empty = False
2542 continue
2546 continue
2543
2547
2544 for p in (p1, p2):
2548 for p in (p1, p2):
2545 if not self.index.has_node(p):
2549 if not self.index.has_node(p):
2546 raise error.LookupError(
2550 raise error.LookupError(
2547 p, self.indexfile, _(b'unknown parent')
2551 p, self.indexfile, _(b'unknown parent')
2548 )
2552 )
2549
2553
2550 if not self.index.has_node(deltabase):
2554 if not self.index.has_node(deltabase):
2551 raise error.LookupError(
2555 raise error.LookupError(
2552 deltabase, self.indexfile, _(b'unknown delta base')
2556 deltabase, self.indexfile, _(b'unknown delta base')
2553 )
2557 )
2554
2558
2555 baserev = self.rev(deltabase)
2559 baserev = self.rev(deltabase)
2556
2560
2557 if baserev != nullrev and self.iscensored(baserev):
2561 if baserev != nullrev and self.iscensored(baserev):
2558 # if base is censored, delta must be full replacement in a
2562 # if base is censored, delta must be full replacement in a
2559 # single patch operation
2563 # single patch operation
2560 hlen = struct.calcsize(b">lll")
2564 hlen = struct.calcsize(b">lll")
2561 oldlen = self.rawsize(baserev)
2565 oldlen = self.rawsize(baserev)
2562 newlen = len(delta) - hlen
2566 newlen = len(delta) - hlen
2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2567 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2564 raise error.CensoredBaseError(
2568 raise error.CensoredBaseError(
2565 self.indexfile, self.node(baserev)
2569 self.indexfile, self.node(baserev)
2566 )
2570 )
2567
2571
2568 if not flags and self._peek_iscensored(baserev, delta, flush):
2572 if not flags and self._peek_iscensored(baserev, delta, flush):
2569 flags |= REVIDX_ISCENSORED
2573 flags |= REVIDX_ISCENSORED
2570
2574
2571 # We assume consumers of addrevisioncb will want to retrieve
2575 # We assume consumers of addrevisioncb will want to retrieve
2572 # the added revision, which will require a call to
2576 # the added revision, which will require a call to
2573 # revision(). revision() will fast path if there is a cache
2577 # revision(). revision() will fast path if there is a cache
2574 # hit. So, we tell _addrevision() to always cache in this case.
2578 # hit. So, we tell _addrevision() to always cache in this case.
2575 # We're only using addgroup() in the context of changegroup
2579 # We're only using addgroup() in the context of changegroup
2576 # generation so the revision data can always be handled as raw
2580 # generation so the revision data can always be handled as raw
2577 # by the flagprocessor.
2581 # by the flagprocessor.
2578 rev = self._addrevision(
2582 rev = self._addrevision(
2579 node,
2583 node,
2580 None,
2584 None,
2581 transaction,
2585 transaction,
2582 link,
2586 link,
2583 p1,
2587 p1,
2584 p2,
2588 p2,
2585 flags,
2589 flags,
2586 (baserev, delta),
2590 (baserev, delta),
2587 ifh,
2591 ifh,
2588 dfh,
2592 dfh,
2589 alwayscache=alwayscache,
2593 alwayscache=alwayscache,
2590 deltacomputer=deltacomputer,
2594 deltacomputer=deltacomputer,
2591 sidedata=sidedata,
2595 sidedata=sidedata,
2592 )
2596 )
2593
2597
2594 if addrevisioncb:
2598 if addrevisioncb:
2595 addrevisioncb(self, rev)
2599 addrevisioncb(self, rev)
2596 empty = False
2600 empty = False
2597
2601
2598 if not dfh and not self._inline:
2602 if not dfh and not self._inline:
2599 # addrevision switched from inline to conventional
2603 # addrevision switched from inline to conventional
2600 # reopen the index
2604 # reopen the index
2601 ifh.close()
2605 ifh.close()
2602 dfh = self._datafp(b"a+")
2606 dfh = self._datafp(b"a+")
2603 ifh = self._indexfp(b"a+")
2607 ifh = self._indexfp(b"a+")
2604 self._writinghandles = (ifh, dfh)
2608 self._writinghandles = (ifh, dfh)
2605 finally:
2609 finally:
2606 self._writinghandles = None
2610 self._writinghandles = None
2607
2611
2608 if dfh:
2612 if dfh:
2609 dfh.close()
2613 dfh.close()
2610 ifh.close()
2614 ifh.close()
2611 return not empty
2615 return not empty
2612
2616
2613 def iscensored(self, rev):
2617 def iscensored(self, rev):
2614 """Check if a file revision is censored."""
2618 """Check if a file revision is censored."""
2615 if not self._censorable:
2619 if not self._censorable:
2616 return False
2620 return False
2617
2621
2618 return self.flags(rev) & REVIDX_ISCENSORED
2622 return self.flags(rev) & REVIDX_ISCENSORED
2619
2623
2620 def _peek_iscensored(self, baserev, delta, flush):
2624 def _peek_iscensored(self, baserev, delta, flush):
2621 """Quickly check if a delta produces a censored revision."""
2625 """Quickly check if a delta produces a censored revision."""
2622 if not self._censorable:
2626 if not self._censorable:
2623 return False
2627 return False
2624
2628
2625 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2629 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2626
2630
2627 def getstrippoint(self, minlink):
2631 def getstrippoint(self, minlink):
2628 """find the minimum rev that must be stripped to strip the linkrev
2632 """find the minimum rev that must be stripped to strip the linkrev
2629
2633
2630 Returns a tuple containing the minimum rev and a set of all revs that
2634 Returns a tuple containing the minimum rev and a set of all revs that
2631 have linkrevs that will be broken by this strip.
2635 have linkrevs that will be broken by this strip.
2632 """
2636 """
2633 return storageutil.resolvestripinfo(
2637 return storageutil.resolvestripinfo(
2634 minlink,
2638 minlink,
2635 len(self) - 1,
2639 len(self) - 1,
2636 self.headrevs(),
2640 self.headrevs(),
2637 self.linkrev,
2641 self.linkrev,
2638 self.parentrevs,
2642 self.parentrevs,
2639 )
2643 )
2640
2644
2641 def strip(self, minlink, transaction):
2645 def strip(self, minlink, transaction):
2642 """truncate the revlog on the first revision with a linkrev >= minlink
2646 """truncate the revlog on the first revision with a linkrev >= minlink
2643
2647
2644 This function is called when we're stripping revision minlink and
2648 This function is called when we're stripping revision minlink and
2645 its descendants from the repository.
2649 its descendants from the repository.
2646
2650
2647 We have to remove all revisions with linkrev >= minlink, because
2651 We have to remove all revisions with linkrev >= minlink, because
2648 the equivalent changelog revisions will be renumbered after the
2652 the equivalent changelog revisions will be renumbered after the
2649 strip.
2653 strip.
2650
2654
2651 So we truncate the revlog on the first of these revisions, and
2655 So we truncate the revlog on the first of these revisions, and
2652 trust that the caller has saved the revisions that shouldn't be
2656 trust that the caller has saved the revisions that shouldn't be
2653 removed and that it'll re-add them after this truncation.
2657 removed and that it'll re-add them after this truncation.
2654 """
2658 """
2655 if len(self) == 0:
2659 if len(self) == 0:
2656 return
2660 return
2657
2661
2658 rev, _ = self.getstrippoint(minlink)
2662 rev, _ = self.getstrippoint(minlink)
2659 if rev == len(self):
2663 if rev == len(self):
2660 return
2664 return
2661
2665
2662 # first truncate the files on disk
2666 # first truncate the files on disk
2663 end = self.start(rev)
2667 end = self.start(rev)
2664 if not self._inline:
2668 if not self._inline:
2665 transaction.add(self.datafile, end)
2669 transaction.add(self.datafile, end)
2666 end = rev * self._io.size
2670 end = rev * self._io.size
2667 else:
2671 else:
2668 end += rev * self._io.size
2672 end += rev * self._io.size
2669
2673
2670 transaction.add(self.indexfile, end)
2674 transaction.add(self.indexfile, end)
2671
2675
2672 # then reset internal state in memory to forget those revisions
2676 # then reset internal state in memory to forget those revisions
2673 self._revisioncache = None
2677 self._revisioncache = None
2674 self._chaininfocache = util.lrucachedict(500)
2678 self._chaininfocache = util.lrucachedict(500)
2675 self._chunkclear()
2679 self._chunkclear()
2676
2680
2677 del self.index[rev:-1]
2681 del self.index[rev:-1]
2678
2682
2679 def checksize(self):
2683 def checksize(self):
2680 """Check size of index and data files
2684 """Check size of index and data files
2681
2685
2682 return a (dd, di) tuple.
2686 return a (dd, di) tuple.
2683 - dd: extra bytes for the "data" file
2687 - dd: extra bytes for the "data" file
2684 - di: extra bytes for the "index" file
2688 - di: extra bytes for the "index" file
2685
2689
2686 A healthy revlog will return (0, 0).
2690 A healthy revlog will return (0, 0).
2687 """
2691 """
2688 expected = 0
2692 expected = 0
2689 if len(self):
2693 if len(self):
2690 expected = max(0, self.end(len(self) - 1))
2694 expected = max(0, self.end(len(self) - 1))
2691
2695
2692 try:
2696 try:
2693 with self._datafp() as f:
2697 with self._datafp() as f:
2694 f.seek(0, io.SEEK_END)
2698 f.seek(0, io.SEEK_END)
2695 actual = f.tell()
2699 actual = f.tell()
2696 dd = actual - expected
2700 dd = actual - expected
2697 except IOError as inst:
2701 except IOError as inst:
2698 if inst.errno != errno.ENOENT:
2702 if inst.errno != errno.ENOENT:
2699 raise
2703 raise
2700 dd = 0
2704 dd = 0
2701
2705
2702 try:
2706 try:
2703 f = self.opener(self.indexfile)
2707 f = self.opener(self.indexfile)
2704 f.seek(0, io.SEEK_END)
2708 f.seek(0, io.SEEK_END)
2705 actual = f.tell()
2709 actual = f.tell()
2706 f.close()
2710 f.close()
2707 s = self._io.size
2711 s = self._io.size
2708 i = max(0, actual // s)
2712 i = max(0, actual // s)
2709 di = actual - (i * s)
2713 di = actual - (i * s)
2710 if self._inline:
2714 if self._inline:
2711 databytes = 0
2715 databytes = 0
2712 for r in self:
2716 for r in self:
2713 databytes += max(0, self.length(r))
2717 databytes += max(0, self.length(r))
2714 dd = 0
2718 dd = 0
2715 di = actual - len(self) * s - databytes
2719 di = actual - len(self) * s - databytes
2716 except IOError as inst:
2720 except IOError as inst:
2717 if inst.errno != errno.ENOENT:
2721 if inst.errno != errno.ENOENT:
2718 raise
2722 raise
2719 di = 0
2723 di = 0
2720
2724
2721 return (dd, di)
2725 return (dd, di)
2722
2726
2723 def files(self):
2727 def files(self):
2724 res = [self.indexfile]
2728 res = [self.indexfile]
2725 if not self._inline:
2729 if not self._inline:
2726 res.append(self.datafile)
2730 res.append(self.datafile)
2727 return res
2731 return res
2728
2732
2729 def emitrevisions(
2733 def emitrevisions(
2730 self,
2734 self,
2731 nodes,
2735 nodes,
2732 nodesorder=None,
2736 nodesorder=None,
2733 revisiondata=False,
2737 revisiondata=False,
2734 assumehaveparentrevisions=False,
2738 assumehaveparentrevisions=False,
2735 deltamode=repository.CG_DELTAMODE_STD,
2739 deltamode=repository.CG_DELTAMODE_STD,
2736 sidedata_helpers=None,
2740 sidedata_helpers=None,
2737 ):
2741 ):
2738 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2742 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2739 raise error.ProgrammingError(
2743 raise error.ProgrammingError(
2740 b'unhandled value for nodesorder: %s' % nodesorder
2744 b'unhandled value for nodesorder: %s' % nodesorder
2741 )
2745 )
2742
2746
2743 if nodesorder is None and not self._generaldelta:
2747 if nodesorder is None and not self._generaldelta:
2744 nodesorder = b'storage'
2748 nodesorder = b'storage'
2745
2749
2746 if (
2750 if (
2747 not self._storedeltachains
2751 not self._storedeltachains
2748 and deltamode != repository.CG_DELTAMODE_PREV
2752 and deltamode != repository.CG_DELTAMODE_PREV
2749 ):
2753 ):
2750 deltamode = repository.CG_DELTAMODE_FULL
2754 deltamode = repository.CG_DELTAMODE_FULL
2751
2755
2752 return storageutil.emitrevisions(
2756 return storageutil.emitrevisions(
2753 self,
2757 self,
2754 nodes,
2758 nodes,
2755 nodesorder,
2759 nodesorder,
2756 revlogrevisiondelta,
2760 revlogrevisiondelta,
2757 deltaparentfn=self.deltaparent,
2761 deltaparentfn=self.deltaparent,
2758 candeltafn=self.candelta,
2762 candeltafn=self.candelta,
2759 rawsizefn=self.rawsize,
2763 rawsizefn=self.rawsize,
2760 revdifffn=self.revdiff,
2764 revdifffn=self.revdiff,
2761 flagsfn=self.flags,
2765 flagsfn=self.flags,
2762 deltamode=deltamode,
2766 deltamode=deltamode,
2763 revisiondata=revisiondata,
2767 revisiondata=revisiondata,
2764 assumehaveparentrevisions=assumehaveparentrevisions,
2768 assumehaveparentrevisions=assumehaveparentrevisions,
2765 sidedata_helpers=sidedata_helpers,
2769 sidedata_helpers=sidedata_helpers,
2766 )
2770 )
2767
2771
2768 DELTAREUSEALWAYS = b'always'
2772 DELTAREUSEALWAYS = b'always'
2769 DELTAREUSESAMEREVS = b'samerevs'
2773 DELTAREUSESAMEREVS = b'samerevs'
2770 DELTAREUSENEVER = b'never'
2774 DELTAREUSENEVER = b'never'
2771
2775
2772 DELTAREUSEFULLADD = b'fulladd'
2776 DELTAREUSEFULLADD = b'fulladd'
2773
2777
2774 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2778 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2775
2779
2776 def clone(
2780 def clone(
2777 self,
2781 self,
2778 tr,
2782 tr,
2779 destrevlog,
2783 destrevlog,
2780 addrevisioncb=None,
2784 addrevisioncb=None,
2781 deltareuse=DELTAREUSESAMEREVS,
2785 deltareuse=DELTAREUSESAMEREVS,
2782 forcedeltabothparents=None,
2786 forcedeltabothparents=None,
2783 sidedatacompanion=None,
2787 sidedatacompanion=None,
2784 ):
2788 ):
2785 """Copy this revlog to another, possibly with format changes.
2789 """Copy this revlog to another, possibly with format changes.
2786
2790
2787 The destination revlog will contain the same revisions and nodes.
2791 The destination revlog will contain the same revisions and nodes.
2788 However, it may not be bit-for-bit identical due to e.g. delta encoding
2792 However, it may not be bit-for-bit identical due to e.g. delta encoding
2789 differences.
2793 differences.
2790
2794
2791 The ``deltareuse`` argument control how deltas from the existing revlog
2795 The ``deltareuse`` argument control how deltas from the existing revlog
2792 are preserved in the destination revlog. The argument can have the
2796 are preserved in the destination revlog. The argument can have the
2793 following values:
2797 following values:
2794
2798
2795 DELTAREUSEALWAYS
2799 DELTAREUSEALWAYS
2796 Deltas will always be reused (if possible), even if the destination
2800 Deltas will always be reused (if possible), even if the destination
2797 revlog would not select the same revisions for the delta. This is the
2801 revlog would not select the same revisions for the delta. This is the
2798 fastest mode of operation.
2802 fastest mode of operation.
2799 DELTAREUSESAMEREVS
2803 DELTAREUSESAMEREVS
2800 Deltas will be reused if the destination revlog would pick the same
2804 Deltas will be reused if the destination revlog would pick the same
2801 revisions for the delta. This mode strikes a balance between speed
2805 revisions for the delta. This mode strikes a balance between speed
2802 and optimization.
2806 and optimization.
2803 DELTAREUSENEVER
2807 DELTAREUSENEVER
2804 Deltas will never be reused. This is the slowest mode of execution.
2808 Deltas will never be reused. This is the slowest mode of execution.
2805 This mode can be used to recompute deltas (e.g. if the diff/delta
2809 This mode can be used to recompute deltas (e.g. if the diff/delta
2806 algorithm changes).
2810 algorithm changes).
2807 DELTAREUSEFULLADD
2811 DELTAREUSEFULLADD
2808 Revision will be re-added as if their were new content. This is
2812 Revision will be re-added as if their were new content. This is
2809 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2813 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2810 eg: large file detection and handling.
2814 eg: large file detection and handling.
2811
2815
2812 Delta computation can be slow, so the choice of delta reuse policy can
2816 Delta computation can be slow, so the choice of delta reuse policy can
2813 significantly affect run time.
2817 significantly affect run time.
2814
2818
2815 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2819 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2816 two extremes. Deltas will be reused if they are appropriate. But if the
2820 two extremes. Deltas will be reused if they are appropriate. But if the
2817 delta could choose a better revision, it will do so. This means if you
2821 delta could choose a better revision, it will do so. This means if you
2818 are converting a non-generaldelta revlog to a generaldelta revlog,
2822 are converting a non-generaldelta revlog to a generaldelta revlog,
2819 deltas will be recomputed if the delta's parent isn't a parent of the
2823 deltas will be recomputed if the delta's parent isn't a parent of the
2820 revision.
2824 revision.
2821
2825
2822 In addition to the delta policy, the ``forcedeltabothparents``
2826 In addition to the delta policy, the ``forcedeltabothparents``
2823 argument controls whether to force compute deltas against both parents
2827 argument controls whether to force compute deltas against both parents
2824 for merges. By default, the current default is used.
2828 for merges. By default, the current default is used.
2825
2829
2826 If not None, the `sidedatacompanion` is callable that accept two
2830 If not None, the `sidedatacompanion` is callable that accept two
2827 arguments:
2831 arguments:
2828
2832
2829 (srcrevlog, rev)
2833 (srcrevlog, rev)
2830
2834
2831 and return a quintet that control changes to sidedata content from the
2835 and return a quintet that control changes to sidedata content from the
2832 old revision to the new clone result:
2836 old revision to the new clone result:
2833
2837
2834 (dropall, filterout, update, new_flags, dropped_flags)
2838 (dropall, filterout, update, new_flags, dropped_flags)
2835
2839
2836 * if `dropall` is True, all sidedata should be dropped
2840 * if `dropall` is True, all sidedata should be dropped
2837 * `filterout` is a set of sidedata keys that should be dropped
2841 * `filterout` is a set of sidedata keys that should be dropped
2838 * `update` is a mapping of additionnal/new key -> value
2842 * `update` is a mapping of additionnal/new key -> value
2839 * new_flags is a bitfields of new flags that the revision should get
2843 * new_flags is a bitfields of new flags that the revision should get
2840 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2844 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2841 """
2845 """
2842 if deltareuse not in self.DELTAREUSEALL:
2846 if deltareuse not in self.DELTAREUSEALL:
2843 raise ValueError(
2847 raise ValueError(
2844 _(b'value for deltareuse invalid: %s') % deltareuse
2848 _(b'value for deltareuse invalid: %s') % deltareuse
2845 )
2849 )
2846
2850
2847 if len(destrevlog):
2851 if len(destrevlog):
2848 raise ValueError(_(b'destination revlog is not empty'))
2852 raise ValueError(_(b'destination revlog is not empty'))
2849
2853
2850 if getattr(self, 'filteredrevs', None):
2854 if getattr(self, 'filteredrevs', None):
2851 raise ValueError(_(b'source revlog has filtered revisions'))
2855 raise ValueError(_(b'source revlog has filtered revisions'))
2852 if getattr(destrevlog, 'filteredrevs', None):
2856 if getattr(destrevlog, 'filteredrevs', None):
2853 raise ValueError(_(b'destination revlog has filtered revisions'))
2857 raise ValueError(_(b'destination revlog has filtered revisions'))
2854
2858
2855 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2859 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2856 # if possible.
2860 # if possible.
2857 oldlazydelta = destrevlog._lazydelta
2861 oldlazydelta = destrevlog._lazydelta
2858 oldlazydeltabase = destrevlog._lazydeltabase
2862 oldlazydeltabase = destrevlog._lazydeltabase
2859 oldamd = destrevlog._deltabothparents
2863 oldamd = destrevlog._deltabothparents
2860
2864
2861 try:
2865 try:
2862 if deltareuse == self.DELTAREUSEALWAYS:
2866 if deltareuse == self.DELTAREUSEALWAYS:
2863 destrevlog._lazydeltabase = True
2867 destrevlog._lazydeltabase = True
2864 destrevlog._lazydelta = True
2868 destrevlog._lazydelta = True
2865 elif deltareuse == self.DELTAREUSESAMEREVS:
2869 elif deltareuse == self.DELTAREUSESAMEREVS:
2866 destrevlog._lazydeltabase = False
2870 destrevlog._lazydeltabase = False
2867 destrevlog._lazydelta = True
2871 destrevlog._lazydelta = True
2868 elif deltareuse == self.DELTAREUSENEVER:
2872 elif deltareuse == self.DELTAREUSENEVER:
2869 destrevlog._lazydeltabase = False
2873 destrevlog._lazydeltabase = False
2870 destrevlog._lazydelta = False
2874 destrevlog._lazydelta = False
2871
2875
2872 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2876 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2873
2877
2874 self._clone(
2878 self._clone(
2875 tr,
2879 tr,
2876 destrevlog,
2880 destrevlog,
2877 addrevisioncb,
2881 addrevisioncb,
2878 deltareuse,
2882 deltareuse,
2879 forcedeltabothparents,
2883 forcedeltabothparents,
2880 sidedatacompanion,
2884 sidedatacompanion,
2881 )
2885 )
2882
2886
2883 finally:
2887 finally:
2884 destrevlog._lazydelta = oldlazydelta
2888 destrevlog._lazydelta = oldlazydelta
2885 destrevlog._lazydeltabase = oldlazydeltabase
2889 destrevlog._lazydeltabase = oldlazydeltabase
2886 destrevlog._deltabothparents = oldamd
2890 destrevlog._deltabothparents = oldamd
2887
2891
2888 def _clone(
2892 def _clone(
2889 self,
2893 self,
2890 tr,
2894 tr,
2891 destrevlog,
2895 destrevlog,
2892 addrevisioncb,
2896 addrevisioncb,
2893 deltareuse,
2897 deltareuse,
2894 forcedeltabothparents,
2898 forcedeltabothparents,
2895 sidedatacompanion,
2899 sidedatacompanion,
2896 ):
2900 ):
2897 """perform the core duty of `revlog.clone` after parameter processing"""
2901 """perform the core duty of `revlog.clone` after parameter processing"""
2898 deltacomputer = deltautil.deltacomputer(destrevlog)
2902 deltacomputer = deltautil.deltacomputer(destrevlog)
2899 index = self.index
2903 index = self.index
2900 for rev in self:
2904 for rev in self:
2901 entry = index[rev]
2905 entry = index[rev]
2902
2906
2903 # Some classes override linkrev to take filtered revs into
2907 # Some classes override linkrev to take filtered revs into
2904 # account. Use raw entry from index.
2908 # account. Use raw entry from index.
2905 flags = entry[0] & 0xFFFF
2909 flags = entry[0] & 0xFFFF
2906 linkrev = entry[4]
2910 linkrev = entry[4]
2907 p1 = index[entry[5]][7]
2911 p1 = index[entry[5]][7]
2908 p2 = index[entry[6]][7]
2912 p2 = index[entry[6]][7]
2909 node = entry[7]
2913 node = entry[7]
2910
2914
2911 sidedataactions = (False, [], {}, 0, 0)
2915 sidedataactions = (False, [], {}, 0, 0)
2912 if sidedatacompanion is not None:
2916 if sidedatacompanion is not None:
2913 sidedataactions = sidedatacompanion(self, rev)
2917 sidedataactions = sidedatacompanion(self, rev)
2914
2918
2915 # (Possibly) reuse the delta from the revlog if allowed and
2919 # (Possibly) reuse the delta from the revlog if allowed and
2916 # the revlog chunk is a delta.
2920 # the revlog chunk is a delta.
2917 cachedelta = None
2921 cachedelta = None
2918 rawtext = None
2922 rawtext = None
2919 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2923 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2920 dropall = sidedataactions[0]
2924 dropall = sidedataactions[0]
2921 filterout = sidedataactions[1]
2925 filterout = sidedataactions[1]
2922 update = sidedataactions[2]
2926 update = sidedataactions[2]
2923 new_flags = sidedataactions[3]
2927 new_flags = sidedataactions[3]
2924 dropped_flags = sidedataactions[4]
2928 dropped_flags = sidedataactions[4]
2925 text, sidedata = self._revisiondata(rev)
2929 text, sidedata = self._revisiondata(rev)
2926 if dropall:
2930 if dropall:
2927 sidedata = {}
2931 sidedata = {}
2928 for key in filterout:
2932 for key in filterout:
2929 sidedata.pop(key, None)
2933 sidedata.pop(key, None)
2930 sidedata.update(update)
2934 sidedata.update(update)
2931 if not sidedata:
2935 if not sidedata:
2932 sidedata = None
2936 sidedata = None
2933
2937
2934 flags |= new_flags
2938 flags |= new_flags
2935 flags &= ~dropped_flags
2939 flags &= ~dropped_flags
2936
2940
2937 destrevlog.addrevision(
2941 destrevlog.addrevision(
2938 text,
2942 text,
2939 tr,
2943 tr,
2940 linkrev,
2944 linkrev,
2941 p1,
2945 p1,
2942 p2,
2946 p2,
2943 cachedelta=cachedelta,
2947 cachedelta=cachedelta,
2944 node=node,
2948 node=node,
2945 flags=flags,
2949 flags=flags,
2946 deltacomputer=deltacomputer,
2950 deltacomputer=deltacomputer,
2947 sidedata=sidedata,
2951 sidedata=sidedata,
2948 )
2952 )
2949 else:
2953 else:
2950 if destrevlog._lazydelta:
2954 if destrevlog._lazydelta:
2951 dp = self.deltaparent(rev)
2955 dp = self.deltaparent(rev)
2952 if dp != nullrev:
2956 if dp != nullrev:
2953 cachedelta = (dp, bytes(self._chunk(rev)))
2957 cachedelta = (dp, bytes(self._chunk(rev)))
2954
2958
2955 if not cachedelta:
2959 if not cachedelta:
2956 rawtext = self.rawdata(rev)
2960 rawtext = self.rawdata(rev)
2957
2961
2958 ifh = destrevlog.opener(
2962 ifh = destrevlog.opener(
2959 destrevlog.indexfile, b'a+', checkambig=False
2963 destrevlog.indexfile, b'a+', checkambig=False
2960 )
2964 )
2961 dfh = None
2965 dfh = None
2962 if not destrevlog._inline:
2966 if not destrevlog._inline:
2963 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2967 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2964 try:
2968 try:
2965 destrevlog._addrevision(
2969 destrevlog._addrevision(
2966 node,
2970 node,
2967 rawtext,
2971 rawtext,
2968 tr,
2972 tr,
2969 linkrev,
2973 linkrev,
2970 p1,
2974 p1,
2971 p2,
2975 p2,
2972 flags,
2976 flags,
2973 cachedelta,
2977 cachedelta,
2974 ifh,
2978 ifh,
2975 dfh,
2979 dfh,
2976 deltacomputer=deltacomputer,
2980 deltacomputer=deltacomputer,
2977 )
2981 )
2978 finally:
2982 finally:
2979 if dfh:
2983 if dfh:
2980 dfh.close()
2984 dfh.close()
2981 ifh.close()
2985 ifh.close()
2982
2986
2983 if addrevisioncb:
2987 if addrevisioncb:
2984 addrevisioncb(self, rev, node)
2988 addrevisioncb(self, rev, node)
2985
2989
2986 def censorrevision(self, tr, censornode, tombstone=b''):
2990 def censorrevision(self, tr, censornode, tombstone=b''):
2987 if (self.version & 0xFFFF) == REVLOGV0:
2991 if (self.version & 0xFFFF) == REVLOGV0:
2988 raise error.RevlogError(
2992 raise error.RevlogError(
2989 _(b'cannot censor with version %d revlogs') % self.version
2993 _(b'cannot censor with version %d revlogs') % self.version
2990 )
2994 )
2991
2995
2992 censorrev = self.rev(censornode)
2996 censorrev = self.rev(censornode)
2993 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2997 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2994
2998
2995 if len(tombstone) > self.rawsize(censorrev):
2999 if len(tombstone) > self.rawsize(censorrev):
2996 raise error.Abort(
3000 raise error.Abort(
2997 _(b'censor tombstone must be no longer than censored data')
3001 _(b'censor tombstone must be no longer than censored data')
2998 )
3002 )
2999
3003
3000 # Rewriting the revlog in place is hard. Our strategy for censoring is
3004 # Rewriting the revlog in place is hard. Our strategy for censoring is
3001 # to create a new revlog, copy all revisions to it, then replace the
3005 # to create a new revlog, copy all revisions to it, then replace the
3002 # revlogs on transaction close.
3006 # revlogs on transaction close.
3003
3007
3004 newindexfile = self.indexfile + b'.tmpcensored'
3008 newindexfile = self.indexfile + b'.tmpcensored'
3005 newdatafile = self.datafile + b'.tmpcensored'
3009 newdatafile = self.datafile + b'.tmpcensored'
3006
3010
3007 # This is a bit dangerous. We could easily have a mismatch of state.
3011 # This is a bit dangerous. We could easily have a mismatch of state.
3008 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3012 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3009 newrl.version = self.version
3013 newrl.version = self.version
3010 newrl._generaldelta = self._generaldelta
3014 newrl._generaldelta = self._generaldelta
3011 newrl._io = self._io
3015 newrl._io = self._io
3012
3016
3013 for rev in self.revs():
3017 for rev in self.revs():
3014 node = self.node(rev)
3018 node = self.node(rev)
3015 p1, p2 = self.parents(node)
3019 p1, p2 = self.parents(node)
3016
3020
3017 if rev == censorrev:
3021 if rev == censorrev:
3018 newrl.addrawrevision(
3022 newrl.addrawrevision(
3019 tombstone,
3023 tombstone,
3020 tr,
3024 tr,
3021 self.linkrev(censorrev),
3025 self.linkrev(censorrev),
3022 p1,
3026 p1,
3023 p2,
3027 p2,
3024 censornode,
3028 censornode,
3025 REVIDX_ISCENSORED,
3029 REVIDX_ISCENSORED,
3026 )
3030 )
3027
3031
3028 if newrl.deltaparent(rev) != nullrev:
3032 if newrl.deltaparent(rev) != nullrev:
3029 raise error.Abort(
3033 raise error.Abort(
3030 _(
3034 _(
3031 b'censored revision stored as delta; '
3035 b'censored revision stored as delta; '
3032 b'cannot censor'
3036 b'cannot censor'
3033 ),
3037 ),
3034 hint=_(
3038 hint=_(
3035 b'censoring of revlogs is not '
3039 b'censoring of revlogs is not '
3036 b'fully implemented; please report '
3040 b'fully implemented; please report '
3037 b'this bug'
3041 b'this bug'
3038 ),
3042 ),
3039 )
3043 )
3040 continue
3044 continue
3041
3045
3042 if self.iscensored(rev):
3046 if self.iscensored(rev):
3043 if self.deltaparent(rev) != nullrev:
3047 if self.deltaparent(rev) != nullrev:
3044 raise error.Abort(
3048 raise error.Abort(
3045 _(
3049 _(
3046 b'cannot censor due to censored '
3050 b'cannot censor due to censored '
3047 b'revision having delta stored'
3051 b'revision having delta stored'
3048 )
3052 )
3049 )
3053 )
3050 rawtext = self._chunk(rev)
3054 rawtext = self._chunk(rev)
3051 else:
3055 else:
3052 rawtext = self.rawdata(rev)
3056 rawtext = self.rawdata(rev)
3053
3057
3054 newrl.addrawrevision(
3058 newrl.addrawrevision(
3055 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3059 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3056 )
3060 )
3057
3061
3058 tr.addbackup(self.indexfile, location=b'store')
3062 tr.addbackup(self.indexfile, location=b'store')
3059 if not self._inline:
3063 if not self._inline:
3060 tr.addbackup(self.datafile, location=b'store')
3064 tr.addbackup(self.datafile, location=b'store')
3061
3065
3062 self.opener.rename(newrl.indexfile, self.indexfile)
3066 self.opener.rename(newrl.indexfile, self.indexfile)
3063 if not self._inline:
3067 if not self._inline:
3064 self.opener.rename(newrl.datafile, self.datafile)
3068 self.opener.rename(newrl.datafile, self.datafile)
3065
3069
3066 self.clearcaches()
3070 self.clearcaches()
3067 self._loadindex()
3071 self._loadindex()
3068
3072
3069 def verifyintegrity(self, state):
3073 def verifyintegrity(self, state):
3070 """Verifies the integrity of the revlog.
3074 """Verifies the integrity of the revlog.
3071
3075
3072 Yields ``revlogproblem`` instances describing problems that are
3076 Yields ``revlogproblem`` instances describing problems that are
3073 found.
3077 found.
3074 """
3078 """
3075 dd, di = self.checksize()
3079 dd, di = self.checksize()
3076 if dd:
3080 if dd:
3077 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3081 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3078 if di:
3082 if di:
3079 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3083 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3080
3084
3081 version = self.version & 0xFFFF
3085 version = self.version & 0xFFFF
3082
3086
3083 # The verifier tells us what version revlog we should be.
3087 # The verifier tells us what version revlog we should be.
3084 if version != state[b'expectedversion']:
3088 if version != state[b'expectedversion']:
3085 yield revlogproblem(
3089 yield revlogproblem(
3086 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3090 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3087 % (self.indexfile, version, state[b'expectedversion'])
3091 % (self.indexfile, version, state[b'expectedversion'])
3088 )
3092 )
3089
3093
3090 state[b'skipread'] = set()
3094 state[b'skipread'] = set()
3091 state[b'safe_renamed'] = set()
3095 state[b'safe_renamed'] = set()
3092
3096
3093 for rev in self:
3097 for rev in self:
3094 node = self.node(rev)
3098 node = self.node(rev)
3095
3099
3096 # Verify contents. 4 cases to care about:
3100 # Verify contents. 4 cases to care about:
3097 #
3101 #
3098 # common: the most common case
3102 # common: the most common case
3099 # rename: with a rename
3103 # rename: with a rename
3100 # meta: file content starts with b'\1\n', the metadata
3104 # meta: file content starts with b'\1\n', the metadata
3101 # header defined in filelog.py, but without a rename
3105 # header defined in filelog.py, but without a rename
3102 # ext: content stored externally
3106 # ext: content stored externally
3103 #
3107 #
3104 # More formally, their differences are shown below:
3108 # More formally, their differences are shown below:
3105 #
3109 #
3106 # | common | rename | meta | ext
3110 # | common | rename | meta | ext
3107 # -------------------------------------------------------
3111 # -------------------------------------------------------
3108 # flags() | 0 | 0 | 0 | not 0
3112 # flags() | 0 | 0 | 0 | not 0
3109 # renamed() | False | True | False | ?
3113 # renamed() | False | True | False | ?
3110 # rawtext[0:2]=='\1\n'| False | True | True | ?
3114 # rawtext[0:2]=='\1\n'| False | True | True | ?
3111 #
3115 #
3112 # "rawtext" means the raw text stored in revlog data, which
3116 # "rawtext" means the raw text stored in revlog data, which
3113 # could be retrieved by "rawdata(rev)". "text"
3117 # could be retrieved by "rawdata(rev)". "text"
3114 # mentioned below is "revision(rev)".
3118 # mentioned below is "revision(rev)".
3115 #
3119 #
3116 # There are 3 different lengths stored physically:
3120 # There are 3 different lengths stored physically:
3117 # 1. L1: rawsize, stored in revlog index
3121 # 1. L1: rawsize, stored in revlog index
3118 # 2. L2: len(rawtext), stored in revlog data
3122 # 2. L2: len(rawtext), stored in revlog data
3119 # 3. L3: len(text), stored in revlog data if flags==0, or
3123 # 3. L3: len(text), stored in revlog data if flags==0, or
3120 # possibly somewhere else if flags!=0
3124 # possibly somewhere else if flags!=0
3121 #
3125 #
3122 # L1 should be equal to L2. L3 could be different from them.
3126 # L1 should be equal to L2. L3 could be different from them.
3123 # "text" may or may not affect commit hash depending on flag
3127 # "text" may or may not affect commit hash depending on flag
3124 # processors (see flagutil.addflagprocessor).
3128 # processors (see flagutil.addflagprocessor).
3125 #
3129 #
3126 # | common | rename | meta | ext
3130 # | common | rename | meta | ext
3127 # -------------------------------------------------
3131 # -------------------------------------------------
3128 # rawsize() | L1 | L1 | L1 | L1
3132 # rawsize() | L1 | L1 | L1 | L1
3129 # size() | L1 | L2-LM | L1(*) | L1 (?)
3133 # size() | L1 | L2-LM | L1(*) | L1 (?)
3130 # len(rawtext) | L2 | L2 | L2 | L2
3134 # len(rawtext) | L2 | L2 | L2 | L2
3131 # len(text) | L2 | L2 | L2 | L3
3135 # len(text) | L2 | L2 | L2 | L3
3132 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3136 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3133 #
3137 #
3134 # LM: length of metadata, depending on rawtext
3138 # LM: length of metadata, depending on rawtext
3135 # (*): not ideal, see comment in filelog.size
3139 # (*): not ideal, see comment in filelog.size
3136 # (?): could be "- len(meta)" if the resolved content has
3140 # (?): could be "- len(meta)" if the resolved content has
3137 # rename metadata
3141 # rename metadata
3138 #
3142 #
3139 # Checks needed to be done:
3143 # Checks needed to be done:
3140 # 1. length check: L1 == L2, in all cases.
3144 # 1. length check: L1 == L2, in all cases.
3141 # 2. hash check: depending on flag processor, we may need to
3145 # 2. hash check: depending on flag processor, we may need to
3142 # use either "text" (external), or "rawtext" (in revlog).
3146 # use either "text" (external), or "rawtext" (in revlog).
3143
3147
3144 try:
3148 try:
3145 skipflags = state.get(b'skipflags', 0)
3149 skipflags = state.get(b'skipflags', 0)
3146 if skipflags:
3150 if skipflags:
3147 skipflags &= self.flags(rev)
3151 skipflags &= self.flags(rev)
3148
3152
3149 _verify_revision(self, skipflags, state, node)
3153 _verify_revision(self, skipflags, state, node)
3150
3154
3151 l1 = self.rawsize(rev)
3155 l1 = self.rawsize(rev)
3152 l2 = len(self.rawdata(node))
3156 l2 = len(self.rawdata(node))
3153
3157
3154 if l1 != l2:
3158 if l1 != l2:
3155 yield revlogproblem(
3159 yield revlogproblem(
3156 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3160 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3157 node=node,
3161 node=node,
3158 )
3162 )
3159
3163
3160 except error.CensoredNodeError:
3164 except error.CensoredNodeError:
3161 if state[b'erroroncensored']:
3165 if state[b'erroroncensored']:
3162 yield revlogproblem(
3166 yield revlogproblem(
3163 error=_(b'censored file data'), node=node
3167 error=_(b'censored file data'), node=node
3164 )
3168 )
3165 state[b'skipread'].add(node)
3169 state[b'skipread'].add(node)
3166 except Exception as e:
3170 except Exception as e:
3167 yield revlogproblem(
3171 yield revlogproblem(
3168 error=_(b'unpacking %s: %s')
3172 error=_(b'unpacking %s: %s')
3169 % (short(node), stringutil.forcebytestr(e)),
3173 % (short(node), stringutil.forcebytestr(e)),
3170 node=node,
3174 node=node,
3171 )
3175 )
3172 state[b'skipread'].add(node)
3176 state[b'skipread'].add(node)
3173
3177
3174 def storageinfo(
3178 def storageinfo(
3175 self,
3179 self,
3176 exclusivefiles=False,
3180 exclusivefiles=False,
3177 sharedfiles=False,
3181 sharedfiles=False,
3178 revisionscount=False,
3182 revisionscount=False,
3179 trackedsize=False,
3183 trackedsize=False,
3180 storedsize=False,
3184 storedsize=False,
3181 ):
3185 ):
3182 d = {}
3186 d = {}
3183
3187
3184 if exclusivefiles:
3188 if exclusivefiles:
3185 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3189 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3186 if not self._inline:
3190 if not self._inline:
3187 d[b'exclusivefiles'].append((self.opener, self.datafile))
3191 d[b'exclusivefiles'].append((self.opener, self.datafile))
3188
3192
3189 if sharedfiles:
3193 if sharedfiles:
3190 d[b'sharedfiles'] = []
3194 d[b'sharedfiles'] = []
3191
3195
3192 if revisionscount:
3196 if revisionscount:
3193 d[b'revisionscount'] = len(self)
3197 d[b'revisionscount'] = len(self)
3194
3198
3195 if trackedsize:
3199 if trackedsize:
3196 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3200 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3197
3201
3198 if storedsize:
3202 if storedsize:
3199 d[b'storedsize'] = sum(
3203 d[b'storedsize'] = sum(
3200 self.opener.stat(path).st_size for path in self.files()
3204 self.opener.stat(path).st_size for path in self.files()
3201 )
3205 )
3202
3206
3203 return d
3207 return d
General Comments 0
You need to be logged in to leave comments. Login now