##// END OF EJS Templates
revlog: change addrawrevision to return the revision...
Joerg Sonnenberger -
r47257:9ee4e988 default
parent child Browse files
Show More
@@ -1,3086 +1,3086
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_SIDEDATA,
59 REVIDX_SIDEDATA,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 nodemap as nodemaputil,
79 nodemap as nodemaputil,
80 sidedata as sidedatautil,
80 sidedata as sidedatautil,
81 )
81 )
82 from .utils import (
82 from .utils import (
83 storageutil,
83 storageutil,
84 stringutil,
84 stringutil,
85 )
85 )
86
86
87 # blanked usage of all the name to prevent pyflakes constraints
87 # blanked usage of all the name to prevent pyflakes constraints
88 # We need these name available in the module for extensions.
88 # We need these name available in the module for extensions.
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_SIDEDATA
101 REVIDX_SIDEDATA
102 REVIDX_HASCOPIESINFO
102 REVIDX_HASCOPIESINFO
103 REVIDX_EXTSTORED
103 REVIDX_EXTSTORED
104 REVIDX_DEFAULT_FLAGS
104 REVIDX_DEFAULT_FLAGS
105 REVIDX_FLAGS_ORDER
105 REVIDX_FLAGS_ORDER
106 REVIDX_RAWTEXT_CHANGING_FLAGS
106 REVIDX_RAWTEXT_CHANGING_FLAGS
107
107
108 parsers = policy.importmod('parsers')
108 parsers = policy.importmod('parsers')
109 rustancestor = policy.importrust('ancestor')
109 rustancestor = policy.importrust('ancestor')
110 rustdagop = policy.importrust('dagop')
110 rustdagop = policy.importrust('dagop')
111 rustrevlog = policy.importrust('revlog')
111 rustrevlog = policy.importrust('revlog')
112
112
113 # Aliased for performance.
113 # Aliased for performance.
114 _zlibdecompress = zlib.decompress
114 _zlibdecompress = zlib.decompress
115
115
116 # max size of revlog with inline data
116 # max size of revlog with inline data
117 _maxinline = 131072
117 _maxinline = 131072
118 _chunksize = 1048576
118 _chunksize = 1048576
119
119
120 # Flag processors for REVIDX_ELLIPSIS.
120 # Flag processors for REVIDX_ELLIPSIS.
121 def ellipsisreadprocessor(rl, text):
121 def ellipsisreadprocessor(rl, text):
122 return text, False, {}
122 return text, False, {}
123
123
124
124
125 def ellipsiswriteprocessor(rl, text, sidedata):
125 def ellipsiswriteprocessor(rl, text, sidedata):
126 return text, False
126 return text, False
127
127
128
128
129 def ellipsisrawprocessor(rl, text):
129 def ellipsisrawprocessor(rl, text):
130 return False
130 return False
131
131
132
132
133 ellipsisprocessor = (
133 ellipsisprocessor = (
134 ellipsisreadprocessor,
134 ellipsisreadprocessor,
135 ellipsiswriteprocessor,
135 ellipsiswriteprocessor,
136 ellipsisrawprocessor,
136 ellipsisrawprocessor,
137 )
137 )
138
138
139
139
140 def getoffset(q):
140 def getoffset(q):
141 return int(q >> 16)
141 return int(q >> 16)
142
142
143
143
144 def gettype(q):
144 def gettype(q):
145 return int(q & 0xFFFF)
145 return int(q & 0xFFFF)
146
146
147
147
148 def offset_type(offset, type):
148 def offset_type(offset, type):
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 raise ValueError(b'unknown revlog index flags')
150 raise ValueError(b'unknown revlog index flags')
151 return int(int(offset) << 16 | type)
151 return int(int(offset) << 16 | type)
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @attr.s(slots=True, frozen=True)
174 @attr.s(slots=True, frozen=True)
175 class _revisioninfo(object):
175 class _revisioninfo(object):
176 """Information about a revision that allows building its fulltext
176 """Information about a revision that allows building its fulltext
177 node: expected hash of the revision
177 node: expected hash of the revision
178 p1, p2: parent revs of the revision
178 p1, p2: parent revs of the revision
179 btext: built text cache consisting of a one-element list
179 btext: built text cache consisting of a one-element list
180 cachedelta: (baserev, uncompressed_delta) or None
180 cachedelta: (baserev, uncompressed_delta) or None
181 flags: flags associated to the revision storage
181 flags: flags associated to the revision storage
182
182
183 One of btext[0] or cachedelta must be set.
183 One of btext[0] or cachedelta must be set.
184 """
184 """
185
185
186 node = attr.ib()
186 node = attr.ib()
187 p1 = attr.ib()
187 p1 = attr.ib()
188 p2 = attr.ib()
188 p2 = attr.ib()
189 btext = attr.ib()
189 btext = attr.ib()
190 textlen = attr.ib()
190 textlen = attr.ib()
191 cachedelta = attr.ib()
191 cachedelta = attr.ib()
192 flags = attr.ib()
192 flags = attr.ib()
193
193
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208
208
209 @interfaceutil.implementer(repository.iverifyproblem)
209 @interfaceutil.implementer(repository.iverifyproblem)
210 @attr.s(frozen=True)
210 @attr.s(frozen=True)
211 class revlogproblem(object):
211 class revlogproblem(object):
212 warning = attr.ib(default=None)
212 warning = attr.ib(default=None)
213 error = attr.ib(default=None)
213 error = attr.ib(default=None)
214 node = attr.ib(default=None)
214 node = attr.ib(default=None)
215
215
216
216
217 # index v0:
217 # index v0:
218 # 4 bytes: offset
218 # 4 bytes: offset
219 # 4 bytes: compressed length
219 # 4 bytes: compressed length
220 # 4 bytes: base rev
220 # 4 bytes: base rev
221 # 4 bytes: link rev
221 # 4 bytes: link rev
222 # 20 bytes: parent 1 nodeid
222 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 2 nodeid
223 # 20 bytes: parent 2 nodeid
224 # 20 bytes: nodeid
224 # 20 bytes: nodeid
225 indexformatv0 = struct.Struct(b">4l20s20s20s")
225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 indexformatv0_pack = indexformatv0.pack
226 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_unpack = indexformatv0.unpack
227 indexformatv0_unpack = indexformatv0.unpack
228
228
229
229
230 class revlogoldindex(list):
230 class revlogoldindex(list):
231 @property
231 @property
232 def nodemap(self):
232 def nodemap(self):
233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 return self._nodemap
235 return self._nodemap
236
236
237 @util.propertycache
237 @util.propertycache
238 def _nodemap(self):
238 def _nodemap(self):
239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 for r in range(0, len(self)):
240 for r in range(0, len(self)):
241 n = self[r][7]
241 n = self[r][7]
242 nodemap[n] = r
242 nodemap[n] = r
243 return nodemap
243 return nodemap
244
244
245 def has_node(self, node):
245 def has_node(self, node):
246 """return True if the node exist in the index"""
246 """return True if the node exist in the index"""
247 return node in self._nodemap
247 return node in self._nodemap
248
248
249 def rev(self, node):
249 def rev(self, node):
250 """return a revision for a node
250 """return a revision for a node
251
251
252 If the node is unknown, raise a RevlogError"""
252 If the node is unknown, raise a RevlogError"""
253 return self._nodemap[node]
253 return self._nodemap[node]
254
254
255 def get_rev(self, node):
255 def get_rev(self, node):
256 """return a revision for a node
256 """return a revision for a node
257
257
258 If the node is unknown, return None"""
258 If the node is unknown, return None"""
259 return self._nodemap.get(node)
259 return self._nodemap.get(node)
260
260
261 def append(self, tup):
261 def append(self, tup):
262 self._nodemap[tup[7]] = len(self)
262 self._nodemap[tup[7]] = len(self)
263 super(revlogoldindex, self).append(tup)
263 super(revlogoldindex, self).append(tup)
264
264
265 def __delitem__(self, i):
265 def __delitem__(self, i):
266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 for r in pycompat.xrange(i.start, len(self)):
268 for r in pycompat.xrange(i.start, len(self)):
269 del self._nodemap[self[r][7]]
269 del self._nodemap[self[r][7]]
270 super(revlogoldindex, self).__delitem__(i)
270 super(revlogoldindex, self).__delitem__(i)
271
271
272 def clearcaches(self):
272 def clearcaches(self):
273 self.__dict__.pop('_nodemap', None)
273 self.__dict__.pop('_nodemap', None)
274
274
275 def __getitem__(self, i):
275 def __getitem__(self, i):
276 if i == -1:
276 if i == -1:
277 return (0, 0, 0, -1, -1, -1, -1, nullid)
277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 return list.__getitem__(self, i)
278 return list.__getitem__(self, i)
279
279
280
280
281 class revlogoldio(object):
281 class revlogoldio(object):
282 def __init__(self):
282 def __init__(self):
283 self.size = indexformatv0.size
283 self.size = indexformatv0.size
284
284
285 def parseindex(self, data, inline):
285 def parseindex(self, data, inline):
286 s = self.size
286 s = self.size
287 index = []
287 index = []
288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 n = off = 0
289 n = off = 0
290 l = len(data)
290 l = len(data)
291 while off + s <= l:
291 while off + s <= l:
292 cur = data[off : off + s]
292 cur = data[off : off + s]
293 off += s
293 off += s
294 e = indexformatv0_unpack(cur)
294 e = indexformatv0_unpack(cur)
295 # transform to revlogv1 format
295 # transform to revlogv1 format
296 e2 = (
296 e2 = (
297 offset_type(e[0], 0),
297 offset_type(e[0], 0),
298 e[1],
298 e[1],
299 -1,
299 -1,
300 e[2],
300 e[2],
301 e[3],
301 e[3],
302 nodemap.get(e[4], nullrev),
302 nodemap.get(e[4], nullrev),
303 nodemap.get(e[5], nullrev),
303 nodemap.get(e[5], nullrev),
304 e[6],
304 e[6],
305 )
305 )
306 index.append(e2)
306 index.append(e2)
307 nodemap[e[6]] = n
307 nodemap[e[6]] = n
308 n += 1
308 n += 1
309
309
310 index = revlogoldindex(index)
310 index = revlogoldindex(index)
311 return index, None
311 return index, None
312
312
313 def packentry(self, entry, node, version, rev):
313 def packentry(self, entry, node, version, rev):
314 if gettype(entry[0]):
314 if gettype(entry[0]):
315 raise error.RevlogError(
315 raise error.RevlogError(
316 _(b'index entry flags need revlog version 1')
316 _(b'index entry flags need revlog version 1')
317 )
317 )
318 e2 = (
318 e2 = (
319 getoffset(entry[0]),
319 getoffset(entry[0]),
320 entry[1],
320 entry[1],
321 entry[3],
321 entry[3],
322 entry[4],
322 entry[4],
323 node(entry[5]),
323 node(entry[5]),
324 node(entry[6]),
324 node(entry[6]),
325 entry[7],
325 entry[7],
326 )
326 )
327 return indexformatv0_pack(*e2)
327 return indexformatv0_pack(*e2)
328
328
329
329
330 # index ng:
330 # index ng:
331 # 6 bytes: offset
331 # 6 bytes: offset
332 # 2 bytes: flags
332 # 2 bytes: flags
333 # 4 bytes: compressed length
333 # 4 bytes: compressed length
334 # 4 bytes: uncompressed length
334 # 4 bytes: uncompressed length
335 # 4 bytes: base rev
335 # 4 bytes: base rev
336 # 4 bytes: link rev
336 # 4 bytes: link rev
337 # 4 bytes: parent 1 rev
337 # 4 bytes: parent 1 rev
338 # 4 bytes: parent 2 rev
338 # 4 bytes: parent 2 rev
339 # 32 bytes: nodeid
339 # 32 bytes: nodeid
340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 indexformatng_pack = indexformatng.pack
341 indexformatng_pack = indexformatng.pack
342 versionformat = struct.Struct(b">I")
342 versionformat = struct.Struct(b">I")
343 versionformat_pack = versionformat.pack
343 versionformat_pack = versionformat.pack
344 versionformat_unpack = versionformat.unpack
344 versionformat_unpack = versionformat.unpack
345
345
346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 # signed integer)
347 # signed integer)
348 _maxentrysize = 0x7FFFFFFF
348 _maxentrysize = 0x7FFFFFFF
349
349
350
350
351 class revlogio(object):
351 class revlogio(object):
352 def __init__(self):
352 def __init__(self):
353 self.size = indexformatng.size
353 self.size = indexformatng.size
354
354
355 def parseindex(self, data, inline):
355 def parseindex(self, data, inline):
356 # call the C implementation to parse the index data
356 # call the C implementation to parse the index data
357 index, cache = parsers.parse_index2(data, inline)
357 index, cache = parsers.parse_index2(data, inline)
358 return index, cache
358 return index, cache
359
359
360 def packentry(self, entry, node, version, rev):
360 def packentry(self, entry, node, version, rev):
361 p = indexformatng_pack(*entry)
361 p = indexformatng_pack(*entry)
362 if rev == 0:
362 if rev == 0:
363 p = versionformat_pack(version) + p[4:]
363 p = versionformat_pack(version) + p[4:]
364 return p
364 return p
365
365
366
366
367 NodemapRevlogIO = None
367 NodemapRevlogIO = None
368
368
369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370
370
371 class NodemapRevlogIO(revlogio):
371 class NodemapRevlogIO(revlogio):
372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373
373
374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 """
375 """
376
376
377 def parseindex(self, data, inline):
377 def parseindex(self, data, inline):
378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 return index, cache
379 return index, cache
380
380
381
381
382 class rustrevlogio(revlogio):
382 class rustrevlogio(revlogio):
383 def parseindex(self, data, inline):
383 def parseindex(self, data, inline):
384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 return rustrevlog.MixedIndex(index), cache
385 return rustrevlog.MixedIndex(index), cache
386
386
387
387
388 class revlog(object):
388 class revlog(object):
389 """
389 """
390 the underlying revision storage object
390 the underlying revision storage object
391
391
392 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
393
393
394 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
395 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
396 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
397 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
398 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
399 data.
399 data.
400
400
401 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
402 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
403 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
404 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
405 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
406 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
407
407
408 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
409 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
410 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
411 for locking while reading.
411 for locking while reading.
412
412
413 If checkambig, indexfile is opened with checkambig=True at
413 If checkambig, indexfile is opened with checkambig=True at
414 writing, to avoid file stat ambiguity.
414 writing, to avoid file stat ambiguity.
415
415
416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 index will be mmapped rather than read if it is larger than the
417 index will be mmapped rather than read if it is larger than the
418 configured threshold.
418 configured threshold.
419
419
420 If censorable is True, the revlog can have censored revisions.
420 If censorable is True, the revlog can have censored revisions.
421
421
422 If `upperboundcomp` is not None, this is the expected maximal gain from
422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 compression for the data content.
423 compression for the data content.
424 """
424 """
425
425
426 _flagserrorclass = error.RevlogError
426 _flagserrorclass = error.RevlogError
427
427
428 def __init__(
428 def __init__(
429 self,
429 self,
430 opener,
430 opener,
431 indexfile,
431 indexfile,
432 datafile=None,
432 datafile=None,
433 checkambig=False,
433 checkambig=False,
434 mmaplargeindex=False,
434 mmaplargeindex=False,
435 censorable=False,
435 censorable=False,
436 upperboundcomp=None,
436 upperboundcomp=None,
437 persistentnodemap=False,
437 persistentnodemap=False,
438 ):
438 ):
439 """
439 """
440 create a revlog object
440 create a revlog object
441
441
442 opener is a function that abstracts the file opening operation
442 opener is a function that abstracts the file opening operation
443 and can be used to implement COW semantics or the like.
443 and can be used to implement COW semantics or the like.
444
444
445 """
445 """
446 self.upperboundcomp = upperboundcomp
446 self.upperboundcomp = upperboundcomp
447 self.indexfile = indexfile
447 self.indexfile = indexfile
448 self.datafile = datafile or (indexfile[:-2] + b".d")
448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 self.nodemap_file = None
449 self.nodemap_file = None
450 if persistentnodemap:
450 if persistentnodemap:
451 self.nodemap_file = nodemaputil.get_nodemap_file(
451 self.nodemap_file = nodemaputil.get_nodemap_file(
452 opener, self.indexfile
452 opener, self.indexfile
453 )
453 )
454
454
455 self.opener = opener
455 self.opener = opener
456 # When True, indexfile is opened with checkambig=True at writing, to
456 # When True, indexfile is opened with checkambig=True at writing, to
457 # avoid file stat ambiguity.
457 # avoid file stat ambiguity.
458 self._checkambig = checkambig
458 self._checkambig = checkambig
459 self._mmaplargeindex = mmaplargeindex
459 self._mmaplargeindex = mmaplargeindex
460 self._censorable = censorable
460 self._censorable = censorable
461 # 3-tuple of (node, rev, text) for a raw revision.
461 # 3-tuple of (node, rev, text) for a raw revision.
462 self._revisioncache = None
462 self._revisioncache = None
463 # Maps rev to chain base rev.
463 # Maps rev to chain base rev.
464 self._chainbasecache = util.lrucachedict(100)
464 self._chainbasecache = util.lrucachedict(100)
465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
466 self._chunkcache = (0, b'')
466 self._chunkcache = (0, b'')
467 # How much data to read and cache into the raw revlog data cache.
467 # How much data to read and cache into the raw revlog data cache.
468 self._chunkcachesize = 65536
468 self._chunkcachesize = 65536
469 self._maxchainlen = None
469 self._maxchainlen = None
470 self._deltabothparents = True
470 self._deltabothparents = True
471 self.index = None
471 self.index = None
472 self._nodemap_docket = None
472 self._nodemap_docket = None
473 # Mapping of partial identifiers to full nodes.
473 # Mapping of partial identifiers to full nodes.
474 self._pcache = {}
474 self._pcache = {}
475 # Mapping of revision integer to full node.
475 # Mapping of revision integer to full node.
476 self._compengine = b'zlib'
476 self._compengine = b'zlib'
477 self._compengineopts = {}
477 self._compengineopts = {}
478 self._maxdeltachainspan = -1
478 self._maxdeltachainspan = -1
479 self._withsparseread = False
479 self._withsparseread = False
480 self._sparserevlog = False
480 self._sparserevlog = False
481 self._srdensitythreshold = 0.50
481 self._srdensitythreshold = 0.50
482 self._srmingapsize = 262144
482 self._srmingapsize = 262144
483
483
484 # Make copy of flag processors so each revlog instance can support
484 # Make copy of flag processors so each revlog instance can support
485 # custom flags.
485 # custom flags.
486 self._flagprocessors = dict(flagutil.flagprocessors)
486 self._flagprocessors = dict(flagutil.flagprocessors)
487
487
488 # 2-tuple of file handles being used for active writing.
488 # 2-tuple of file handles being used for active writing.
489 self._writinghandles = None
489 self._writinghandles = None
490
490
491 self._loadindex()
491 self._loadindex()
492
492
493 def _loadindex(self):
493 def _loadindex(self):
494 mmapindexthreshold = None
494 mmapindexthreshold = None
495 opts = self.opener.options
495 opts = self.opener.options
496
496
497 if b'revlogv2' in opts:
497 if b'revlogv2' in opts:
498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
499 elif b'revlogv1' in opts:
499 elif b'revlogv1' in opts:
500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
501 if b'generaldelta' in opts:
501 if b'generaldelta' in opts:
502 newversionflags |= FLAG_GENERALDELTA
502 newversionflags |= FLAG_GENERALDELTA
503 elif b'revlogv0' in self.opener.options:
503 elif b'revlogv0' in self.opener.options:
504 newversionflags = REVLOGV0
504 newversionflags = REVLOGV0
505 else:
505 else:
506 newversionflags = REVLOG_DEFAULT_VERSION
506 newversionflags = REVLOG_DEFAULT_VERSION
507
507
508 if b'chunkcachesize' in opts:
508 if b'chunkcachesize' in opts:
509 self._chunkcachesize = opts[b'chunkcachesize']
509 self._chunkcachesize = opts[b'chunkcachesize']
510 if b'maxchainlen' in opts:
510 if b'maxchainlen' in opts:
511 self._maxchainlen = opts[b'maxchainlen']
511 self._maxchainlen = opts[b'maxchainlen']
512 if b'deltabothparents' in opts:
512 if b'deltabothparents' in opts:
513 self._deltabothparents = opts[b'deltabothparents']
513 self._deltabothparents = opts[b'deltabothparents']
514 self._lazydelta = bool(opts.get(b'lazydelta', True))
514 self._lazydelta = bool(opts.get(b'lazydelta', True))
515 self._lazydeltabase = False
515 self._lazydeltabase = False
516 if self._lazydelta:
516 if self._lazydelta:
517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
518 if b'compengine' in opts:
518 if b'compengine' in opts:
519 self._compengine = opts[b'compengine']
519 self._compengine = opts[b'compengine']
520 if b'zlib.level' in opts:
520 if b'zlib.level' in opts:
521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
522 if b'zstd.level' in opts:
522 if b'zstd.level' in opts:
523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
524 if b'maxdeltachainspan' in opts:
524 if b'maxdeltachainspan' in opts:
525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
527 mmapindexthreshold = opts[b'mmapindexthreshold']
527 mmapindexthreshold = opts[b'mmapindexthreshold']
528 self.hassidedata = bool(opts.get(b'side-data', False))
528 self.hassidedata = bool(opts.get(b'side-data', False))
529 if self.hassidedata:
529 if self.hassidedata:
530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
532 withsparseread = bool(opts.get(b'with-sparse-read', False))
532 withsparseread = bool(opts.get(b'with-sparse-read', False))
533 # sparse-revlog forces sparse-read
533 # sparse-revlog forces sparse-read
534 self._withsparseread = self._sparserevlog or withsparseread
534 self._withsparseread = self._sparserevlog or withsparseread
535 if b'sparse-read-density-threshold' in opts:
535 if b'sparse-read-density-threshold' in opts:
536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
537 if b'sparse-read-min-gap-size' in opts:
537 if b'sparse-read-min-gap-size' in opts:
538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
539 if opts.get(b'enableellipsis'):
539 if opts.get(b'enableellipsis'):
540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
541
541
542 # revlog v0 doesn't have flag processors
542 # revlog v0 doesn't have flag processors
543 for flag, processor in pycompat.iteritems(
543 for flag, processor in pycompat.iteritems(
544 opts.get(b'flagprocessors', {})
544 opts.get(b'flagprocessors', {})
545 ):
545 ):
546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
547
547
548 if self._chunkcachesize <= 0:
548 if self._chunkcachesize <= 0:
549 raise error.RevlogError(
549 raise error.RevlogError(
550 _(b'revlog chunk cache size %r is not greater than 0')
550 _(b'revlog chunk cache size %r is not greater than 0')
551 % self._chunkcachesize
551 % self._chunkcachesize
552 )
552 )
553 elif self._chunkcachesize & (self._chunkcachesize - 1):
553 elif self._chunkcachesize & (self._chunkcachesize - 1):
554 raise error.RevlogError(
554 raise error.RevlogError(
555 _(b'revlog chunk cache size %r is not a power of 2')
555 _(b'revlog chunk cache size %r is not a power of 2')
556 % self._chunkcachesize
556 % self._chunkcachesize
557 )
557 )
558
558
559 indexdata = b''
559 indexdata = b''
560 self._initempty = True
560 self._initempty = True
561 try:
561 try:
562 with self._indexfp() as f:
562 with self._indexfp() as f:
563 if (
563 if (
564 mmapindexthreshold is not None
564 mmapindexthreshold is not None
565 and self.opener.fstat(f).st_size >= mmapindexthreshold
565 and self.opener.fstat(f).st_size >= mmapindexthreshold
566 ):
566 ):
567 # TODO: should .close() to release resources without
567 # TODO: should .close() to release resources without
568 # relying on Python GC
568 # relying on Python GC
569 indexdata = util.buffer(util.mmapread(f))
569 indexdata = util.buffer(util.mmapread(f))
570 else:
570 else:
571 indexdata = f.read()
571 indexdata = f.read()
572 if len(indexdata) > 0:
572 if len(indexdata) > 0:
573 versionflags = versionformat_unpack(indexdata[:4])[0]
573 versionflags = versionformat_unpack(indexdata[:4])[0]
574 self._initempty = False
574 self._initempty = False
575 else:
575 else:
576 versionflags = newversionflags
576 versionflags = newversionflags
577 except IOError as inst:
577 except IOError as inst:
578 if inst.errno != errno.ENOENT:
578 if inst.errno != errno.ENOENT:
579 raise
579 raise
580
580
581 versionflags = newversionflags
581 versionflags = newversionflags
582
582
583 self.version = versionflags
583 self.version = versionflags
584
584
585 flags = versionflags & ~0xFFFF
585 flags = versionflags & ~0xFFFF
586 fmt = versionflags & 0xFFFF
586 fmt = versionflags & 0xFFFF
587
587
588 if fmt == REVLOGV0:
588 if fmt == REVLOGV0:
589 if flags:
589 if flags:
590 raise error.RevlogError(
590 raise error.RevlogError(
591 _(b'unknown flags (%#04x) in version %d revlog %s')
591 _(b'unknown flags (%#04x) in version %d revlog %s')
592 % (flags >> 16, fmt, self.indexfile)
592 % (flags >> 16, fmt, self.indexfile)
593 )
593 )
594
594
595 self._inline = False
595 self._inline = False
596 self._generaldelta = False
596 self._generaldelta = False
597
597
598 elif fmt == REVLOGV1:
598 elif fmt == REVLOGV1:
599 if flags & ~REVLOGV1_FLAGS:
599 if flags & ~REVLOGV1_FLAGS:
600 raise error.RevlogError(
600 raise error.RevlogError(
601 _(b'unknown flags (%#04x) in version %d revlog %s')
601 _(b'unknown flags (%#04x) in version %d revlog %s')
602 % (flags >> 16, fmt, self.indexfile)
602 % (flags >> 16, fmt, self.indexfile)
603 )
603 )
604
604
605 self._inline = versionflags & FLAG_INLINE_DATA
605 self._inline = versionflags & FLAG_INLINE_DATA
606 self._generaldelta = versionflags & FLAG_GENERALDELTA
606 self._generaldelta = versionflags & FLAG_GENERALDELTA
607
607
608 elif fmt == REVLOGV2:
608 elif fmt == REVLOGV2:
609 if flags & ~REVLOGV2_FLAGS:
609 if flags & ~REVLOGV2_FLAGS:
610 raise error.RevlogError(
610 raise error.RevlogError(
611 _(b'unknown flags (%#04x) in version %d revlog %s')
611 _(b'unknown flags (%#04x) in version %d revlog %s')
612 % (flags >> 16, fmt, self.indexfile)
612 % (flags >> 16, fmt, self.indexfile)
613 )
613 )
614
614
615 self._inline = versionflags & FLAG_INLINE_DATA
615 self._inline = versionflags & FLAG_INLINE_DATA
616 # generaldelta implied by version 2 revlogs.
616 # generaldelta implied by version 2 revlogs.
617 self._generaldelta = True
617 self._generaldelta = True
618
618
619 else:
619 else:
620 raise error.RevlogError(
620 raise error.RevlogError(
621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
622 )
622 )
623 # sparse-revlog can't be on without general-delta (issue6056)
623 # sparse-revlog can't be on without general-delta (issue6056)
624 if not self._generaldelta:
624 if not self._generaldelta:
625 self._sparserevlog = False
625 self._sparserevlog = False
626
626
627 self._storedeltachains = True
627 self._storedeltachains = True
628
628
629 devel_nodemap = (
629 devel_nodemap = (
630 self.nodemap_file
630 self.nodemap_file
631 and opts.get(b'devel-force-nodemap', False)
631 and opts.get(b'devel-force-nodemap', False)
632 and NodemapRevlogIO is not None
632 and NodemapRevlogIO is not None
633 )
633 )
634
634
635 use_rust_index = False
635 use_rust_index = False
636 if rustrevlog is not None:
636 if rustrevlog is not None:
637 if self.nodemap_file is not None:
637 if self.nodemap_file is not None:
638 use_rust_index = True
638 use_rust_index = True
639 else:
639 else:
640 use_rust_index = self.opener.options.get(b'rust.index')
640 use_rust_index = self.opener.options.get(b'rust.index')
641
641
642 self._io = revlogio()
642 self._io = revlogio()
643 if self.version == REVLOGV0:
643 if self.version == REVLOGV0:
644 self._io = revlogoldio()
644 self._io = revlogoldio()
645 elif devel_nodemap:
645 elif devel_nodemap:
646 self._io = NodemapRevlogIO()
646 self._io = NodemapRevlogIO()
647 elif use_rust_index:
647 elif use_rust_index:
648 self._io = rustrevlogio()
648 self._io = rustrevlogio()
649 try:
649 try:
650 d = self._io.parseindex(indexdata, self._inline)
650 d = self._io.parseindex(indexdata, self._inline)
651 index, _chunkcache = d
651 index, _chunkcache = d
652 use_nodemap = (
652 use_nodemap = (
653 not self._inline
653 not self._inline
654 and self.nodemap_file is not None
654 and self.nodemap_file is not None
655 and util.safehasattr(index, 'update_nodemap_data')
655 and util.safehasattr(index, 'update_nodemap_data')
656 )
656 )
657 if use_nodemap:
657 if use_nodemap:
658 nodemap_data = nodemaputil.persisted_data(self)
658 nodemap_data = nodemaputil.persisted_data(self)
659 if nodemap_data is not None:
659 if nodemap_data is not None:
660 docket = nodemap_data[0]
660 docket = nodemap_data[0]
661 if (
661 if (
662 len(d[0]) > docket.tip_rev
662 len(d[0]) > docket.tip_rev
663 and d[0][docket.tip_rev][7] == docket.tip_node
663 and d[0][docket.tip_rev][7] == docket.tip_node
664 ):
664 ):
665 # no changelog tampering
665 # no changelog tampering
666 self._nodemap_docket = docket
666 self._nodemap_docket = docket
667 index.update_nodemap_data(*nodemap_data)
667 index.update_nodemap_data(*nodemap_data)
668 except (ValueError, IndexError):
668 except (ValueError, IndexError):
669 raise error.RevlogError(
669 raise error.RevlogError(
670 _(b"index %s is corrupted") % self.indexfile
670 _(b"index %s is corrupted") % self.indexfile
671 )
671 )
672 self.index, self._chunkcache = d
672 self.index, self._chunkcache = d
673 if not self._chunkcache:
673 if not self._chunkcache:
674 self._chunkclear()
674 self._chunkclear()
675 # revnum -> (chain-length, sum-delta-length)
675 # revnum -> (chain-length, sum-delta-length)
676 self._chaininfocache = util.lrucachedict(500)
676 self._chaininfocache = util.lrucachedict(500)
677 # revlog header -> revlog compressor
677 # revlog header -> revlog compressor
678 self._decompressors = {}
678 self._decompressors = {}
679
679
680 @util.propertycache
680 @util.propertycache
681 def _compressor(self):
681 def _compressor(self):
682 engine = util.compengines[self._compengine]
682 engine = util.compengines[self._compengine]
683 return engine.revlogcompressor(self._compengineopts)
683 return engine.revlogcompressor(self._compengineopts)
684
684
685 def _indexfp(self, mode=b'r'):
685 def _indexfp(self, mode=b'r'):
686 """file object for the revlog's index file"""
686 """file object for the revlog's index file"""
687 args = {'mode': mode}
687 args = {'mode': mode}
688 if mode != b'r':
688 if mode != b'r':
689 args['checkambig'] = self._checkambig
689 args['checkambig'] = self._checkambig
690 if mode == b'w':
690 if mode == b'w':
691 args['atomictemp'] = True
691 args['atomictemp'] = True
692 return self.opener(self.indexfile, **args)
692 return self.opener(self.indexfile, **args)
693
693
694 def _datafp(self, mode=b'r'):
694 def _datafp(self, mode=b'r'):
695 """file object for the revlog's data file"""
695 """file object for the revlog's data file"""
696 return self.opener(self.datafile, mode=mode)
696 return self.opener(self.datafile, mode=mode)
697
697
698 @contextlib.contextmanager
698 @contextlib.contextmanager
699 def _datareadfp(self, existingfp=None):
699 def _datareadfp(self, existingfp=None):
700 """file object suitable to read data"""
700 """file object suitable to read data"""
701 # Use explicit file handle, if given.
701 # Use explicit file handle, if given.
702 if existingfp is not None:
702 if existingfp is not None:
703 yield existingfp
703 yield existingfp
704
704
705 # Use a file handle being actively used for writes, if available.
705 # Use a file handle being actively used for writes, if available.
706 # There is some danger to doing this because reads will seek the
706 # There is some danger to doing this because reads will seek the
707 # file. However, _writeentry() performs a SEEK_END before all writes,
707 # file. However, _writeentry() performs a SEEK_END before all writes,
708 # so we should be safe.
708 # so we should be safe.
709 elif self._writinghandles:
709 elif self._writinghandles:
710 if self._inline:
710 if self._inline:
711 yield self._writinghandles[0]
711 yield self._writinghandles[0]
712 else:
712 else:
713 yield self._writinghandles[1]
713 yield self._writinghandles[1]
714
714
715 # Otherwise open a new file handle.
715 # Otherwise open a new file handle.
716 else:
716 else:
717 if self._inline:
717 if self._inline:
718 func = self._indexfp
718 func = self._indexfp
719 else:
719 else:
720 func = self._datafp
720 func = self._datafp
721 with func() as fp:
721 with func() as fp:
722 yield fp
722 yield fp
723
723
724 def tiprev(self):
724 def tiprev(self):
725 return len(self.index) - 1
725 return len(self.index) - 1
726
726
727 def tip(self):
727 def tip(self):
728 return self.node(self.tiprev())
728 return self.node(self.tiprev())
729
729
730 def __contains__(self, rev):
730 def __contains__(self, rev):
731 return 0 <= rev < len(self)
731 return 0 <= rev < len(self)
732
732
733 def __len__(self):
733 def __len__(self):
734 return len(self.index)
734 return len(self.index)
735
735
736 def __iter__(self):
736 def __iter__(self):
737 return iter(pycompat.xrange(len(self)))
737 return iter(pycompat.xrange(len(self)))
738
738
739 def revs(self, start=0, stop=None):
739 def revs(self, start=0, stop=None):
740 """iterate over all rev in this revlog (from start to stop)"""
740 """iterate over all rev in this revlog (from start to stop)"""
741 return storageutil.iterrevs(len(self), start=start, stop=stop)
741 return storageutil.iterrevs(len(self), start=start, stop=stop)
742
742
743 @property
743 @property
744 def nodemap(self):
744 def nodemap(self):
745 msg = (
745 msg = (
746 b"revlog.nodemap is deprecated, "
746 b"revlog.nodemap is deprecated, "
747 b"use revlog.index.[has_node|rev|get_rev]"
747 b"use revlog.index.[has_node|rev|get_rev]"
748 )
748 )
749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 return self.index.nodemap
750 return self.index.nodemap
751
751
752 @property
752 @property
753 def _nodecache(self):
753 def _nodecache(self):
754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 return self.index.nodemap
756 return self.index.nodemap
757
757
758 def hasnode(self, node):
758 def hasnode(self, node):
759 try:
759 try:
760 self.rev(node)
760 self.rev(node)
761 return True
761 return True
762 except KeyError:
762 except KeyError:
763 return False
763 return False
764
764
765 def candelta(self, baserev, rev):
765 def candelta(self, baserev, rev):
766 """whether two revisions (baserev, rev) can be delta-ed or not"""
766 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 # Disable delta if either rev requires a content-changing flag
767 # Disable delta if either rev requires a content-changing flag
768 # processor (ex. LFS). This is because such flag processor can alter
768 # processor (ex. LFS). This is because such flag processor can alter
769 # the rawtext content that the delta will be based on, and two clients
769 # the rawtext content that the delta will be based on, and two clients
770 # could have a same revlog node with different flags (i.e. different
770 # could have a same revlog node with different flags (i.e. different
771 # rawtext contents) and the delta could be incompatible.
771 # rawtext contents) and the delta could be incompatible.
772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 ):
774 ):
775 return False
775 return False
776 return True
776 return True
777
777
778 def update_caches(self, transaction):
778 def update_caches(self, transaction):
779 if self.nodemap_file is not None:
779 if self.nodemap_file is not None:
780 if transaction is None:
780 if transaction is None:
781 nodemaputil.update_persistent_nodemap(self)
781 nodemaputil.update_persistent_nodemap(self)
782 else:
782 else:
783 nodemaputil.setup_persistent_nodemap(transaction, self)
783 nodemaputil.setup_persistent_nodemap(transaction, self)
784
784
785 def clearcaches(self):
785 def clearcaches(self):
786 self._revisioncache = None
786 self._revisioncache = None
787 self._chainbasecache.clear()
787 self._chainbasecache.clear()
788 self._chunkcache = (0, b'')
788 self._chunkcache = (0, b'')
789 self._pcache = {}
789 self._pcache = {}
790 self._nodemap_docket = None
790 self._nodemap_docket = None
791 self.index.clearcaches()
791 self.index.clearcaches()
792 # The python code is the one responsible for validating the docket, we
792 # The python code is the one responsible for validating the docket, we
793 # end up having to refresh it here.
793 # end up having to refresh it here.
794 use_nodemap = (
794 use_nodemap = (
795 not self._inline
795 not self._inline
796 and self.nodemap_file is not None
796 and self.nodemap_file is not None
797 and util.safehasattr(self.index, 'update_nodemap_data')
797 and util.safehasattr(self.index, 'update_nodemap_data')
798 )
798 )
799 if use_nodemap:
799 if use_nodemap:
800 nodemap_data = nodemaputil.persisted_data(self)
800 nodemap_data = nodemaputil.persisted_data(self)
801 if nodemap_data is not None:
801 if nodemap_data is not None:
802 self._nodemap_docket = nodemap_data[0]
802 self._nodemap_docket = nodemap_data[0]
803 self.index.update_nodemap_data(*nodemap_data)
803 self.index.update_nodemap_data(*nodemap_data)
804
804
805 def rev(self, node):
805 def rev(self, node):
806 try:
806 try:
807 return self.index.rev(node)
807 return self.index.rev(node)
808 except TypeError:
808 except TypeError:
809 raise
809 raise
810 except error.RevlogError:
810 except error.RevlogError:
811 # parsers.c radix tree lookup failed
811 # parsers.c radix tree lookup failed
812 if node == wdirid or node in wdirfilenodeids:
812 if node == wdirid or node in wdirfilenodeids:
813 raise error.WdirUnsupported
813 raise error.WdirUnsupported
814 raise error.LookupError(node, self.indexfile, _(b'no node'))
814 raise error.LookupError(node, self.indexfile, _(b'no node'))
815
815
816 # Accessors for index entries.
816 # Accessors for index entries.
817
817
818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
819 # are flags.
819 # are flags.
820 def start(self, rev):
820 def start(self, rev):
821 return int(self.index[rev][0] >> 16)
821 return int(self.index[rev][0] >> 16)
822
822
823 def flags(self, rev):
823 def flags(self, rev):
824 return self.index[rev][0] & 0xFFFF
824 return self.index[rev][0] & 0xFFFF
825
825
826 def length(self, rev):
826 def length(self, rev):
827 return self.index[rev][1]
827 return self.index[rev][1]
828
828
829 def rawsize(self, rev):
829 def rawsize(self, rev):
830 """return the length of the uncompressed text for a given revision"""
830 """return the length of the uncompressed text for a given revision"""
831 l = self.index[rev][2]
831 l = self.index[rev][2]
832 if l >= 0:
832 if l >= 0:
833 return l
833 return l
834
834
835 t = self.rawdata(rev)
835 t = self.rawdata(rev)
836 return len(t)
836 return len(t)
837
837
838 def size(self, rev):
838 def size(self, rev):
839 """length of non-raw text (processed by a "read" flag processor)"""
839 """length of non-raw text (processed by a "read" flag processor)"""
840 # fast path: if no "read" flag processor could change the content,
840 # fast path: if no "read" flag processor could change the content,
841 # size is rawsize. note: ELLIPSIS is known to not change the content.
841 # size is rawsize. note: ELLIPSIS is known to not change the content.
842 flags = self.flags(rev)
842 flags = self.flags(rev)
843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
844 return self.rawsize(rev)
844 return self.rawsize(rev)
845
845
846 return len(self.revision(rev, raw=False))
846 return len(self.revision(rev, raw=False))
847
847
848 def chainbase(self, rev):
848 def chainbase(self, rev):
849 base = self._chainbasecache.get(rev)
849 base = self._chainbasecache.get(rev)
850 if base is not None:
850 if base is not None:
851 return base
851 return base
852
852
853 index = self.index
853 index = self.index
854 iterrev = rev
854 iterrev = rev
855 base = index[iterrev][3]
855 base = index[iterrev][3]
856 while base != iterrev:
856 while base != iterrev:
857 iterrev = base
857 iterrev = base
858 base = index[iterrev][3]
858 base = index[iterrev][3]
859
859
860 self._chainbasecache[rev] = base
860 self._chainbasecache[rev] = base
861 return base
861 return base
862
862
863 def linkrev(self, rev):
863 def linkrev(self, rev):
864 return self.index[rev][4]
864 return self.index[rev][4]
865
865
866 def parentrevs(self, rev):
866 def parentrevs(self, rev):
867 try:
867 try:
868 entry = self.index[rev]
868 entry = self.index[rev]
869 except IndexError:
869 except IndexError:
870 if rev == wdirrev:
870 if rev == wdirrev:
871 raise error.WdirUnsupported
871 raise error.WdirUnsupported
872 raise
872 raise
873
873
874 return entry[5], entry[6]
874 return entry[5], entry[6]
875
875
876 # fast parentrevs(rev) where rev isn't filtered
876 # fast parentrevs(rev) where rev isn't filtered
877 _uncheckedparentrevs = parentrevs
877 _uncheckedparentrevs = parentrevs
878
878
879 def node(self, rev):
879 def node(self, rev):
880 try:
880 try:
881 return self.index[rev][7]
881 return self.index[rev][7]
882 except IndexError:
882 except IndexError:
883 if rev == wdirrev:
883 if rev == wdirrev:
884 raise error.WdirUnsupported
884 raise error.WdirUnsupported
885 raise
885 raise
886
886
887 # Derived from index values.
887 # Derived from index values.
888
888
889 def end(self, rev):
889 def end(self, rev):
890 return self.start(rev) + self.length(rev)
890 return self.start(rev) + self.length(rev)
891
891
892 def parents(self, node):
892 def parents(self, node):
893 i = self.index
893 i = self.index
894 d = i[self.rev(node)]
894 d = i[self.rev(node)]
895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
896
896
897 def chainlen(self, rev):
897 def chainlen(self, rev):
898 return self._chaininfo(rev)[0]
898 return self._chaininfo(rev)[0]
899
899
900 def _chaininfo(self, rev):
900 def _chaininfo(self, rev):
901 chaininfocache = self._chaininfocache
901 chaininfocache = self._chaininfocache
902 if rev in chaininfocache:
902 if rev in chaininfocache:
903 return chaininfocache[rev]
903 return chaininfocache[rev]
904 index = self.index
904 index = self.index
905 generaldelta = self._generaldelta
905 generaldelta = self._generaldelta
906 iterrev = rev
906 iterrev = rev
907 e = index[iterrev]
907 e = index[iterrev]
908 clen = 0
908 clen = 0
909 compresseddeltalen = 0
909 compresseddeltalen = 0
910 while iterrev != e[3]:
910 while iterrev != e[3]:
911 clen += 1
911 clen += 1
912 compresseddeltalen += e[1]
912 compresseddeltalen += e[1]
913 if generaldelta:
913 if generaldelta:
914 iterrev = e[3]
914 iterrev = e[3]
915 else:
915 else:
916 iterrev -= 1
916 iterrev -= 1
917 if iterrev in chaininfocache:
917 if iterrev in chaininfocache:
918 t = chaininfocache[iterrev]
918 t = chaininfocache[iterrev]
919 clen += t[0]
919 clen += t[0]
920 compresseddeltalen += t[1]
920 compresseddeltalen += t[1]
921 break
921 break
922 e = index[iterrev]
922 e = index[iterrev]
923 else:
923 else:
924 # Add text length of base since decompressing that also takes
924 # Add text length of base since decompressing that also takes
925 # work. For cache hits the length is already included.
925 # work. For cache hits the length is already included.
926 compresseddeltalen += e[1]
926 compresseddeltalen += e[1]
927 r = (clen, compresseddeltalen)
927 r = (clen, compresseddeltalen)
928 chaininfocache[rev] = r
928 chaininfocache[rev] = r
929 return r
929 return r
930
930
931 def _deltachain(self, rev, stoprev=None):
931 def _deltachain(self, rev, stoprev=None):
932 """Obtain the delta chain for a revision.
932 """Obtain the delta chain for a revision.
933
933
934 ``stoprev`` specifies a revision to stop at. If not specified, we
934 ``stoprev`` specifies a revision to stop at. If not specified, we
935 stop at the base of the chain.
935 stop at the base of the chain.
936
936
937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
938 revs in ascending order and ``stopped`` is a bool indicating whether
938 revs in ascending order and ``stopped`` is a bool indicating whether
939 ``stoprev`` was hit.
939 ``stoprev`` was hit.
940 """
940 """
941 # Try C implementation.
941 # Try C implementation.
942 try:
942 try:
943 return self.index.deltachain(rev, stoprev, self._generaldelta)
943 return self.index.deltachain(rev, stoprev, self._generaldelta)
944 except AttributeError:
944 except AttributeError:
945 pass
945 pass
946
946
947 chain = []
947 chain = []
948
948
949 # Alias to prevent attribute lookup in tight loop.
949 # Alias to prevent attribute lookup in tight loop.
950 index = self.index
950 index = self.index
951 generaldelta = self._generaldelta
951 generaldelta = self._generaldelta
952
952
953 iterrev = rev
953 iterrev = rev
954 e = index[iterrev]
954 e = index[iterrev]
955 while iterrev != e[3] and iterrev != stoprev:
955 while iterrev != e[3] and iterrev != stoprev:
956 chain.append(iterrev)
956 chain.append(iterrev)
957 if generaldelta:
957 if generaldelta:
958 iterrev = e[3]
958 iterrev = e[3]
959 else:
959 else:
960 iterrev -= 1
960 iterrev -= 1
961 e = index[iterrev]
961 e = index[iterrev]
962
962
963 if iterrev == stoprev:
963 if iterrev == stoprev:
964 stopped = True
964 stopped = True
965 else:
965 else:
966 chain.append(iterrev)
966 chain.append(iterrev)
967 stopped = False
967 stopped = False
968
968
969 chain.reverse()
969 chain.reverse()
970 return chain, stopped
970 return chain, stopped
971
971
972 def ancestors(self, revs, stoprev=0, inclusive=False):
972 def ancestors(self, revs, stoprev=0, inclusive=False):
973 """Generate the ancestors of 'revs' in reverse revision order.
973 """Generate the ancestors of 'revs' in reverse revision order.
974 Does not generate revs lower than stoprev.
974 Does not generate revs lower than stoprev.
975
975
976 See the documentation for ancestor.lazyancestors for more details."""
976 See the documentation for ancestor.lazyancestors for more details."""
977
977
978 # first, make sure start revisions aren't filtered
978 # first, make sure start revisions aren't filtered
979 revs = list(revs)
979 revs = list(revs)
980 checkrev = self.node
980 checkrev = self.node
981 for r in revs:
981 for r in revs:
982 checkrev(r)
982 checkrev(r)
983 # and we're sure ancestors aren't filtered as well
983 # and we're sure ancestors aren't filtered as well
984
984
985 if rustancestor is not None:
985 if rustancestor is not None:
986 lazyancestors = rustancestor.LazyAncestors
986 lazyancestors = rustancestor.LazyAncestors
987 arg = self.index
987 arg = self.index
988 else:
988 else:
989 lazyancestors = ancestor.lazyancestors
989 lazyancestors = ancestor.lazyancestors
990 arg = self._uncheckedparentrevs
990 arg = self._uncheckedparentrevs
991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
992
992
993 def descendants(self, revs):
993 def descendants(self, revs):
994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
995
995
996 def findcommonmissing(self, common=None, heads=None):
996 def findcommonmissing(self, common=None, heads=None):
997 """Return a tuple of the ancestors of common and the ancestors of heads
997 """Return a tuple of the ancestors of common and the ancestors of heads
998 that are not ancestors of common. In revset terminology, we return the
998 that are not ancestors of common. In revset terminology, we return the
999 tuple:
999 tuple:
1000
1000
1001 ::common, (::heads) - (::common)
1001 ::common, (::heads) - (::common)
1002
1002
1003 The list is sorted by revision number, meaning it is
1003 The list is sorted by revision number, meaning it is
1004 topologically sorted.
1004 topologically sorted.
1005
1005
1006 'heads' and 'common' are both lists of node IDs. If heads is
1006 'heads' and 'common' are both lists of node IDs. If heads is
1007 not supplied, uses all of the revlog's heads. If common is not
1007 not supplied, uses all of the revlog's heads. If common is not
1008 supplied, uses nullid."""
1008 supplied, uses nullid."""
1009 if common is None:
1009 if common is None:
1010 common = [nullid]
1010 common = [nullid]
1011 if heads is None:
1011 if heads is None:
1012 heads = self.heads()
1012 heads = self.heads()
1013
1013
1014 common = [self.rev(n) for n in common]
1014 common = [self.rev(n) for n in common]
1015 heads = [self.rev(n) for n in heads]
1015 heads = [self.rev(n) for n in heads]
1016
1016
1017 # we want the ancestors, but inclusive
1017 # we want the ancestors, but inclusive
1018 class lazyset(object):
1018 class lazyset(object):
1019 def __init__(self, lazyvalues):
1019 def __init__(self, lazyvalues):
1020 self.addedvalues = set()
1020 self.addedvalues = set()
1021 self.lazyvalues = lazyvalues
1021 self.lazyvalues = lazyvalues
1022
1022
1023 def __contains__(self, value):
1023 def __contains__(self, value):
1024 return value in self.addedvalues or value in self.lazyvalues
1024 return value in self.addedvalues or value in self.lazyvalues
1025
1025
1026 def __iter__(self):
1026 def __iter__(self):
1027 added = self.addedvalues
1027 added = self.addedvalues
1028 for r in added:
1028 for r in added:
1029 yield r
1029 yield r
1030 for r in self.lazyvalues:
1030 for r in self.lazyvalues:
1031 if not r in added:
1031 if not r in added:
1032 yield r
1032 yield r
1033
1033
1034 def add(self, value):
1034 def add(self, value):
1035 self.addedvalues.add(value)
1035 self.addedvalues.add(value)
1036
1036
1037 def update(self, values):
1037 def update(self, values):
1038 self.addedvalues.update(values)
1038 self.addedvalues.update(values)
1039
1039
1040 has = lazyset(self.ancestors(common))
1040 has = lazyset(self.ancestors(common))
1041 has.add(nullrev)
1041 has.add(nullrev)
1042 has.update(common)
1042 has.update(common)
1043
1043
1044 # take all ancestors from heads that aren't in has
1044 # take all ancestors from heads that aren't in has
1045 missing = set()
1045 missing = set()
1046 visit = collections.deque(r for r in heads if r not in has)
1046 visit = collections.deque(r for r in heads if r not in has)
1047 while visit:
1047 while visit:
1048 r = visit.popleft()
1048 r = visit.popleft()
1049 if r in missing:
1049 if r in missing:
1050 continue
1050 continue
1051 else:
1051 else:
1052 missing.add(r)
1052 missing.add(r)
1053 for p in self.parentrevs(r):
1053 for p in self.parentrevs(r):
1054 if p not in has:
1054 if p not in has:
1055 visit.append(p)
1055 visit.append(p)
1056 missing = list(missing)
1056 missing = list(missing)
1057 missing.sort()
1057 missing.sort()
1058 return has, [self.node(miss) for miss in missing]
1058 return has, [self.node(miss) for miss in missing]
1059
1059
1060 def incrementalmissingrevs(self, common=None):
1060 def incrementalmissingrevs(self, common=None):
1061 """Return an object that can be used to incrementally compute the
1061 """Return an object that can be used to incrementally compute the
1062 revision numbers of the ancestors of arbitrary sets that are not
1062 revision numbers of the ancestors of arbitrary sets that are not
1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1064 object.
1064 object.
1065
1065
1066 'common' is a list of revision numbers. If common is not supplied, uses
1066 'common' is a list of revision numbers. If common is not supplied, uses
1067 nullrev.
1067 nullrev.
1068 """
1068 """
1069 if common is None:
1069 if common is None:
1070 common = [nullrev]
1070 common = [nullrev]
1071
1071
1072 if rustancestor is not None:
1072 if rustancestor is not None:
1073 return rustancestor.MissingAncestors(self.index, common)
1073 return rustancestor.MissingAncestors(self.index, common)
1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1075
1075
1076 def findmissingrevs(self, common=None, heads=None):
1076 def findmissingrevs(self, common=None, heads=None):
1077 """Return the revision numbers of the ancestors of heads that
1077 """Return the revision numbers of the ancestors of heads that
1078 are not ancestors of common.
1078 are not ancestors of common.
1079
1079
1080 More specifically, return a list of revision numbers corresponding to
1080 More specifically, return a list of revision numbers corresponding to
1081 nodes N such that every N satisfies the following constraints:
1081 nodes N such that every N satisfies the following constraints:
1082
1082
1083 1. N is an ancestor of some node in 'heads'
1083 1. N is an ancestor of some node in 'heads'
1084 2. N is not an ancestor of any node in 'common'
1084 2. N is not an ancestor of any node in 'common'
1085
1085
1086 The list is sorted by revision number, meaning it is
1086 The list is sorted by revision number, meaning it is
1087 topologically sorted.
1087 topologically sorted.
1088
1088
1089 'heads' and 'common' are both lists of revision numbers. If heads is
1089 'heads' and 'common' are both lists of revision numbers. If heads is
1090 not supplied, uses all of the revlog's heads. If common is not
1090 not supplied, uses all of the revlog's heads. If common is not
1091 supplied, uses nullid."""
1091 supplied, uses nullid."""
1092 if common is None:
1092 if common is None:
1093 common = [nullrev]
1093 common = [nullrev]
1094 if heads is None:
1094 if heads is None:
1095 heads = self.headrevs()
1095 heads = self.headrevs()
1096
1096
1097 inc = self.incrementalmissingrevs(common=common)
1097 inc = self.incrementalmissingrevs(common=common)
1098 return inc.missingancestors(heads)
1098 return inc.missingancestors(heads)
1099
1099
1100 def findmissing(self, common=None, heads=None):
1100 def findmissing(self, common=None, heads=None):
1101 """Return the ancestors of heads that are not ancestors of common.
1101 """Return the ancestors of heads that are not ancestors of common.
1102
1102
1103 More specifically, return a list of nodes N such that every N
1103 More specifically, return a list of nodes N such that every N
1104 satisfies the following constraints:
1104 satisfies the following constraints:
1105
1105
1106 1. N is an ancestor of some node in 'heads'
1106 1. N is an ancestor of some node in 'heads'
1107 2. N is not an ancestor of any node in 'common'
1107 2. N is not an ancestor of any node in 'common'
1108
1108
1109 The list is sorted by revision number, meaning it is
1109 The list is sorted by revision number, meaning it is
1110 topologically sorted.
1110 topologically sorted.
1111
1111
1112 'heads' and 'common' are both lists of node IDs. If heads is
1112 'heads' and 'common' are both lists of node IDs. If heads is
1113 not supplied, uses all of the revlog's heads. If common is not
1113 not supplied, uses all of the revlog's heads. If common is not
1114 supplied, uses nullid."""
1114 supplied, uses nullid."""
1115 if common is None:
1115 if common is None:
1116 common = [nullid]
1116 common = [nullid]
1117 if heads is None:
1117 if heads is None:
1118 heads = self.heads()
1118 heads = self.heads()
1119
1119
1120 common = [self.rev(n) for n in common]
1120 common = [self.rev(n) for n in common]
1121 heads = [self.rev(n) for n in heads]
1121 heads = [self.rev(n) for n in heads]
1122
1122
1123 inc = self.incrementalmissingrevs(common=common)
1123 inc = self.incrementalmissingrevs(common=common)
1124 return [self.node(r) for r in inc.missingancestors(heads)]
1124 return [self.node(r) for r in inc.missingancestors(heads)]
1125
1125
1126 def nodesbetween(self, roots=None, heads=None):
1126 def nodesbetween(self, roots=None, heads=None):
1127 """Return a topological path from 'roots' to 'heads'.
1127 """Return a topological path from 'roots' to 'heads'.
1128
1128
1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1130 topologically sorted list of all nodes N that satisfy both of
1130 topologically sorted list of all nodes N that satisfy both of
1131 these constraints:
1131 these constraints:
1132
1132
1133 1. N is a descendant of some node in 'roots'
1133 1. N is a descendant of some node in 'roots'
1134 2. N is an ancestor of some node in 'heads'
1134 2. N is an ancestor of some node in 'heads'
1135
1135
1136 Every node is considered to be both a descendant and an ancestor
1136 Every node is considered to be both a descendant and an ancestor
1137 of itself, so every reachable node in 'roots' and 'heads' will be
1137 of itself, so every reachable node in 'roots' and 'heads' will be
1138 included in 'nodes'.
1138 included in 'nodes'.
1139
1139
1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1143
1143
1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1145 unspecified, uses nullid as the only root. If 'heads' is
1145 unspecified, uses nullid as the only root. If 'heads' is
1146 unspecified, uses list of all of the revlog's heads."""
1146 unspecified, uses list of all of the revlog's heads."""
1147 nonodes = ([], [], [])
1147 nonodes = ([], [], [])
1148 if roots is not None:
1148 if roots is not None:
1149 roots = list(roots)
1149 roots = list(roots)
1150 if not roots:
1150 if not roots:
1151 return nonodes
1151 return nonodes
1152 lowestrev = min([self.rev(n) for n in roots])
1152 lowestrev = min([self.rev(n) for n in roots])
1153 else:
1153 else:
1154 roots = [nullid] # Everybody's a descendant of nullid
1154 roots = [nullid] # Everybody's a descendant of nullid
1155 lowestrev = nullrev
1155 lowestrev = nullrev
1156 if (lowestrev == nullrev) and (heads is None):
1156 if (lowestrev == nullrev) and (heads is None):
1157 # We want _all_ the nodes!
1157 # We want _all_ the nodes!
1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1159 if heads is None:
1159 if heads is None:
1160 # All nodes are ancestors, so the latest ancestor is the last
1160 # All nodes are ancestors, so the latest ancestor is the last
1161 # node.
1161 # node.
1162 highestrev = len(self) - 1
1162 highestrev = len(self) - 1
1163 # Set ancestors to None to signal that every node is an ancestor.
1163 # Set ancestors to None to signal that every node is an ancestor.
1164 ancestors = None
1164 ancestors = None
1165 # Set heads to an empty dictionary for later discovery of heads
1165 # Set heads to an empty dictionary for later discovery of heads
1166 heads = {}
1166 heads = {}
1167 else:
1167 else:
1168 heads = list(heads)
1168 heads = list(heads)
1169 if not heads:
1169 if not heads:
1170 return nonodes
1170 return nonodes
1171 ancestors = set()
1171 ancestors = set()
1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1173 # Also, later we will be using it to filter out the heads we can't
1173 # Also, later we will be using it to filter out the heads we can't
1174 # find from roots.
1174 # find from roots.
1175 heads = dict.fromkeys(heads, False)
1175 heads = dict.fromkeys(heads, False)
1176 # Start at the top and keep marking parents until we're done.
1176 # Start at the top and keep marking parents until we're done.
1177 nodestotag = set(heads)
1177 nodestotag = set(heads)
1178 # Remember where the top was so we can use it as a limit later.
1178 # Remember where the top was so we can use it as a limit later.
1179 highestrev = max([self.rev(n) for n in nodestotag])
1179 highestrev = max([self.rev(n) for n in nodestotag])
1180 while nodestotag:
1180 while nodestotag:
1181 # grab a node to tag
1181 # grab a node to tag
1182 n = nodestotag.pop()
1182 n = nodestotag.pop()
1183 # Never tag nullid
1183 # Never tag nullid
1184 if n == nullid:
1184 if n == nullid:
1185 continue
1185 continue
1186 # A node's revision number represents its place in a
1186 # A node's revision number represents its place in a
1187 # topologically sorted list of nodes.
1187 # topologically sorted list of nodes.
1188 r = self.rev(n)
1188 r = self.rev(n)
1189 if r >= lowestrev:
1189 if r >= lowestrev:
1190 if n not in ancestors:
1190 if n not in ancestors:
1191 # If we are possibly a descendant of one of the roots
1191 # If we are possibly a descendant of one of the roots
1192 # and we haven't already been marked as an ancestor
1192 # and we haven't already been marked as an ancestor
1193 ancestors.add(n) # Mark as ancestor
1193 ancestors.add(n) # Mark as ancestor
1194 # Add non-nullid parents to list of nodes to tag.
1194 # Add non-nullid parents to list of nodes to tag.
1195 nodestotag.update(
1195 nodestotag.update(
1196 [p for p in self.parents(n) if p != nullid]
1196 [p for p in self.parents(n) if p != nullid]
1197 )
1197 )
1198 elif n in heads: # We've seen it before, is it a fake head?
1198 elif n in heads: # We've seen it before, is it a fake head?
1199 # So it is, real heads should not be the ancestors of
1199 # So it is, real heads should not be the ancestors of
1200 # any other heads.
1200 # any other heads.
1201 heads.pop(n)
1201 heads.pop(n)
1202 if not ancestors:
1202 if not ancestors:
1203 return nonodes
1203 return nonodes
1204 # Now that we have our set of ancestors, we want to remove any
1204 # Now that we have our set of ancestors, we want to remove any
1205 # roots that are not ancestors.
1205 # roots that are not ancestors.
1206
1206
1207 # If one of the roots was nullid, everything is included anyway.
1207 # If one of the roots was nullid, everything is included anyway.
1208 if lowestrev > nullrev:
1208 if lowestrev > nullrev:
1209 # But, since we weren't, let's recompute the lowest rev to not
1209 # But, since we weren't, let's recompute the lowest rev to not
1210 # include roots that aren't ancestors.
1210 # include roots that aren't ancestors.
1211
1211
1212 # Filter out roots that aren't ancestors of heads
1212 # Filter out roots that aren't ancestors of heads
1213 roots = [root for root in roots if root in ancestors]
1213 roots = [root for root in roots if root in ancestors]
1214 # Recompute the lowest revision
1214 # Recompute the lowest revision
1215 if roots:
1215 if roots:
1216 lowestrev = min([self.rev(root) for root in roots])
1216 lowestrev = min([self.rev(root) for root in roots])
1217 else:
1217 else:
1218 # No more roots? Return empty list
1218 # No more roots? Return empty list
1219 return nonodes
1219 return nonodes
1220 else:
1220 else:
1221 # We are descending from nullid, and don't need to care about
1221 # We are descending from nullid, and don't need to care about
1222 # any other roots.
1222 # any other roots.
1223 lowestrev = nullrev
1223 lowestrev = nullrev
1224 roots = [nullid]
1224 roots = [nullid]
1225 # Transform our roots list into a set.
1225 # Transform our roots list into a set.
1226 descendants = set(roots)
1226 descendants = set(roots)
1227 # Also, keep the original roots so we can filter out roots that aren't
1227 # Also, keep the original roots so we can filter out roots that aren't
1228 # 'real' roots (i.e. are descended from other roots).
1228 # 'real' roots (i.e. are descended from other roots).
1229 roots = descendants.copy()
1229 roots = descendants.copy()
1230 # Our topologically sorted list of output nodes.
1230 # Our topologically sorted list of output nodes.
1231 orderedout = []
1231 orderedout = []
1232 # Don't start at nullid since we don't want nullid in our output list,
1232 # Don't start at nullid since we don't want nullid in our output list,
1233 # and if nullid shows up in descendants, empty parents will look like
1233 # and if nullid shows up in descendants, empty parents will look like
1234 # they're descendants.
1234 # they're descendants.
1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1236 n = self.node(r)
1236 n = self.node(r)
1237 isdescendant = False
1237 isdescendant = False
1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1239 isdescendant = True
1239 isdescendant = True
1240 elif n in descendants:
1240 elif n in descendants:
1241 # n is already a descendant
1241 # n is already a descendant
1242 isdescendant = True
1242 isdescendant = True
1243 # This check only needs to be done here because all the roots
1243 # This check only needs to be done here because all the roots
1244 # will start being marked is descendants before the loop.
1244 # will start being marked is descendants before the loop.
1245 if n in roots:
1245 if n in roots:
1246 # If n was a root, check if it's a 'real' root.
1246 # If n was a root, check if it's a 'real' root.
1247 p = tuple(self.parents(n))
1247 p = tuple(self.parents(n))
1248 # If any of its parents are descendants, it's not a root.
1248 # If any of its parents are descendants, it's not a root.
1249 if (p[0] in descendants) or (p[1] in descendants):
1249 if (p[0] in descendants) or (p[1] in descendants):
1250 roots.remove(n)
1250 roots.remove(n)
1251 else:
1251 else:
1252 p = tuple(self.parents(n))
1252 p = tuple(self.parents(n))
1253 # A node is a descendant if either of its parents are
1253 # A node is a descendant if either of its parents are
1254 # descendants. (We seeded the dependents list with the roots
1254 # descendants. (We seeded the dependents list with the roots
1255 # up there, remember?)
1255 # up there, remember?)
1256 if (p[0] in descendants) or (p[1] in descendants):
1256 if (p[0] in descendants) or (p[1] in descendants):
1257 descendants.add(n)
1257 descendants.add(n)
1258 isdescendant = True
1258 isdescendant = True
1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1260 # Only include nodes that are both descendants and ancestors.
1260 # Only include nodes that are both descendants and ancestors.
1261 orderedout.append(n)
1261 orderedout.append(n)
1262 if (ancestors is not None) and (n in heads):
1262 if (ancestors is not None) and (n in heads):
1263 # We're trying to figure out which heads are reachable
1263 # We're trying to figure out which heads are reachable
1264 # from roots.
1264 # from roots.
1265 # Mark this head as having been reached
1265 # Mark this head as having been reached
1266 heads[n] = True
1266 heads[n] = True
1267 elif ancestors is None:
1267 elif ancestors is None:
1268 # Otherwise, we're trying to discover the heads.
1268 # Otherwise, we're trying to discover the heads.
1269 # Assume this is a head because if it isn't, the next step
1269 # Assume this is a head because if it isn't, the next step
1270 # will eventually remove it.
1270 # will eventually remove it.
1271 heads[n] = True
1271 heads[n] = True
1272 # But, obviously its parents aren't.
1272 # But, obviously its parents aren't.
1273 for p in self.parents(n):
1273 for p in self.parents(n):
1274 heads.pop(p, None)
1274 heads.pop(p, None)
1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1276 roots = list(roots)
1276 roots = list(roots)
1277 assert orderedout
1277 assert orderedout
1278 assert roots
1278 assert roots
1279 assert heads
1279 assert heads
1280 return (orderedout, roots, heads)
1280 return (orderedout, roots, heads)
1281
1281
1282 def headrevs(self, revs=None):
1282 def headrevs(self, revs=None):
1283 if revs is None:
1283 if revs is None:
1284 try:
1284 try:
1285 return self.index.headrevs()
1285 return self.index.headrevs()
1286 except AttributeError:
1286 except AttributeError:
1287 return self._headrevs()
1287 return self._headrevs()
1288 if rustdagop is not None:
1288 if rustdagop is not None:
1289 return rustdagop.headrevs(self.index, revs)
1289 return rustdagop.headrevs(self.index, revs)
1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1291
1291
1292 def computephases(self, roots):
1292 def computephases(self, roots):
1293 return self.index.computephasesmapsets(roots)
1293 return self.index.computephasesmapsets(roots)
1294
1294
1295 def _headrevs(self):
1295 def _headrevs(self):
1296 count = len(self)
1296 count = len(self)
1297 if not count:
1297 if not count:
1298 return [nullrev]
1298 return [nullrev]
1299 # we won't iter over filtered rev so nobody is a head at start
1299 # we won't iter over filtered rev so nobody is a head at start
1300 ishead = [0] * (count + 1)
1300 ishead = [0] * (count + 1)
1301 index = self.index
1301 index = self.index
1302 for r in self:
1302 for r in self:
1303 ishead[r] = 1 # I may be an head
1303 ishead[r] = 1 # I may be an head
1304 e = index[r]
1304 e = index[r]
1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1306 return [r for r, val in enumerate(ishead) if val]
1306 return [r for r, val in enumerate(ishead) if val]
1307
1307
1308 def heads(self, start=None, stop=None):
1308 def heads(self, start=None, stop=None):
1309 """return the list of all nodes that have no children
1309 """return the list of all nodes that have no children
1310
1310
1311 if start is specified, only heads that are descendants of
1311 if start is specified, only heads that are descendants of
1312 start will be returned
1312 start will be returned
1313 if stop is specified, it will consider all the revs from stop
1313 if stop is specified, it will consider all the revs from stop
1314 as if they had no children
1314 as if they had no children
1315 """
1315 """
1316 if start is None and stop is None:
1316 if start is None and stop is None:
1317 if not len(self):
1317 if not len(self):
1318 return [nullid]
1318 return [nullid]
1319 return [self.node(r) for r in self.headrevs()]
1319 return [self.node(r) for r in self.headrevs()]
1320
1320
1321 if start is None:
1321 if start is None:
1322 start = nullrev
1322 start = nullrev
1323 else:
1323 else:
1324 start = self.rev(start)
1324 start = self.rev(start)
1325
1325
1326 stoprevs = {self.rev(n) for n in stop or []}
1326 stoprevs = {self.rev(n) for n in stop or []}
1327
1327
1328 revs = dagop.headrevssubset(
1328 revs = dagop.headrevssubset(
1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1330 )
1330 )
1331
1331
1332 return [self.node(rev) for rev in revs]
1332 return [self.node(rev) for rev in revs]
1333
1333
1334 def children(self, node):
1334 def children(self, node):
1335 """find the children of a given node"""
1335 """find the children of a given node"""
1336 c = []
1336 c = []
1337 p = self.rev(node)
1337 p = self.rev(node)
1338 for r in self.revs(start=p + 1):
1338 for r in self.revs(start=p + 1):
1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1340 if prevs:
1340 if prevs:
1341 for pr in prevs:
1341 for pr in prevs:
1342 if pr == p:
1342 if pr == p:
1343 c.append(self.node(r))
1343 c.append(self.node(r))
1344 elif p == nullrev:
1344 elif p == nullrev:
1345 c.append(self.node(r))
1345 c.append(self.node(r))
1346 return c
1346 return c
1347
1347
1348 def commonancestorsheads(self, a, b):
1348 def commonancestorsheads(self, a, b):
1349 """calculate all the heads of the common ancestors of nodes a and b"""
1349 """calculate all the heads of the common ancestors of nodes a and b"""
1350 a, b = self.rev(a), self.rev(b)
1350 a, b = self.rev(a), self.rev(b)
1351 ancs = self._commonancestorsheads(a, b)
1351 ancs = self._commonancestorsheads(a, b)
1352 return pycompat.maplist(self.node, ancs)
1352 return pycompat.maplist(self.node, ancs)
1353
1353
1354 def _commonancestorsheads(self, *revs):
1354 def _commonancestorsheads(self, *revs):
1355 """calculate all the heads of the common ancestors of revs"""
1355 """calculate all the heads of the common ancestors of revs"""
1356 try:
1356 try:
1357 ancs = self.index.commonancestorsheads(*revs)
1357 ancs = self.index.commonancestorsheads(*revs)
1358 except (AttributeError, OverflowError): # C implementation failed
1358 except (AttributeError, OverflowError): # C implementation failed
1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1360 return ancs
1360 return ancs
1361
1361
1362 def isancestor(self, a, b):
1362 def isancestor(self, a, b):
1363 """return True if node a is an ancestor of node b
1363 """return True if node a is an ancestor of node b
1364
1364
1365 A revision is considered an ancestor of itself."""
1365 A revision is considered an ancestor of itself."""
1366 a, b = self.rev(a), self.rev(b)
1366 a, b = self.rev(a), self.rev(b)
1367 return self.isancestorrev(a, b)
1367 return self.isancestorrev(a, b)
1368
1368
1369 def isancestorrev(self, a, b):
1369 def isancestorrev(self, a, b):
1370 """return True if revision a is an ancestor of revision b
1370 """return True if revision a is an ancestor of revision b
1371
1371
1372 A revision is considered an ancestor of itself.
1372 A revision is considered an ancestor of itself.
1373
1373
1374 The implementation of this is trivial but the use of
1374 The implementation of this is trivial but the use of
1375 reachableroots is not."""
1375 reachableroots is not."""
1376 if a == nullrev:
1376 if a == nullrev:
1377 return True
1377 return True
1378 elif a == b:
1378 elif a == b:
1379 return True
1379 return True
1380 elif a > b:
1380 elif a > b:
1381 return False
1381 return False
1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1383
1383
1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1385 """return (heads(::(<roots> and <roots>::<heads>)))
1385 """return (heads(::(<roots> and <roots>::<heads>)))
1386
1386
1387 If includepath is True, return (<roots>::<heads>)."""
1387 If includepath is True, return (<roots>::<heads>)."""
1388 try:
1388 try:
1389 return self.index.reachableroots2(
1389 return self.index.reachableroots2(
1390 minroot, heads, roots, includepath
1390 minroot, heads, roots, includepath
1391 )
1391 )
1392 except AttributeError:
1392 except AttributeError:
1393 return dagop._reachablerootspure(
1393 return dagop._reachablerootspure(
1394 self.parentrevs, minroot, roots, heads, includepath
1394 self.parentrevs, minroot, roots, heads, includepath
1395 )
1395 )
1396
1396
1397 def ancestor(self, a, b):
1397 def ancestor(self, a, b):
1398 """calculate the "best" common ancestor of nodes a and b"""
1398 """calculate the "best" common ancestor of nodes a and b"""
1399
1399
1400 a, b = self.rev(a), self.rev(b)
1400 a, b = self.rev(a), self.rev(b)
1401 try:
1401 try:
1402 ancs = self.index.ancestors(a, b)
1402 ancs = self.index.ancestors(a, b)
1403 except (AttributeError, OverflowError):
1403 except (AttributeError, OverflowError):
1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1405 if ancs:
1405 if ancs:
1406 # choose a consistent winner when there's a tie
1406 # choose a consistent winner when there's a tie
1407 return min(map(self.node, ancs))
1407 return min(map(self.node, ancs))
1408 return nullid
1408 return nullid
1409
1409
1410 def _match(self, id):
1410 def _match(self, id):
1411 if isinstance(id, int):
1411 if isinstance(id, int):
1412 # rev
1412 # rev
1413 return self.node(id)
1413 return self.node(id)
1414 if len(id) == 20:
1414 if len(id) == 20:
1415 # possibly a binary node
1415 # possibly a binary node
1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1417 try:
1417 try:
1418 node = id
1418 node = id
1419 self.rev(node) # quick search the index
1419 self.rev(node) # quick search the index
1420 return node
1420 return node
1421 except error.LookupError:
1421 except error.LookupError:
1422 pass # may be partial hex id
1422 pass # may be partial hex id
1423 try:
1423 try:
1424 # str(rev)
1424 # str(rev)
1425 rev = int(id)
1425 rev = int(id)
1426 if b"%d" % rev != id:
1426 if b"%d" % rev != id:
1427 raise ValueError
1427 raise ValueError
1428 if rev < 0:
1428 if rev < 0:
1429 rev = len(self) + rev
1429 rev = len(self) + rev
1430 if rev < 0 or rev >= len(self):
1430 if rev < 0 or rev >= len(self):
1431 raise ValueError
1431 raise ValueError
1432 return self.node(rev)
1432 return self.node(rev)
1433 except (ValueError, OverflowError):
1433 except (ValueError, OverflowError):
1434 pass
1434 pass
1435 if len(id) == 40:
1435 if len(id) == 40:
1436 try:
1436 try:
1437 # a full hex nodeid?
1437 # a full hex nodeid?
1438 node = bin(id)
1438 node = bin(id)
1439 self.rev(node)
1439 self.rev(node)
1440 return node
1440 return node
1441 except (TypeError, error.LookupError):
1441 except (TypeError, error.LookupError):
1442 pass
1442 pass
1443
1443
1444 def _partialmatch(self, id):
1444 def _partialmatch(self, id):
1445 # we don't care wdirfilenodeids as they should be always full hash
1445 # we don't care wdirfilenodeids as they should be always full hash
1446 maybewdir = wdirhex.startswith(id)
1446 maybewdir = wdirhex.startswith(id)
1447 try:
1447 try:
1448 partial = self.index.partialmatch(id)
1448 partial = self.index.partialmatch(id)
1449 if partial and self.hasnode(partial):
1449 if partial and self.hasnode(partial):
1450 if maybewdir:
1450 if maybewdir:
1451 # single 'ff...' match in radix tree, ambiguous with wdir
1451 # single 'ff...' match in radix tree, ambiguous with wdir
1452 raise error.RevlogError
1452 raise error.RevlogError
1453 return partial
1453 return partial
1454 if maybewdir:
1454 if maybewdir:
1455 # no 'ff...' match in radix tree, wdir identified
1455 # no 'ff...' match in radix tree, wdir identified
1456 raise error.WdirUnsupported
1456 raise error.WdirUnsupported
1457 return None
1457 return None
1458 except error.RevlogError:
1458 except error.RevlogError:
1459 # parsers.c radix tree lookup gave multiple matches
1459 # parsers.c radix tree lookup gave multiple matches
1460 # fast path: for unfiltered changelog, radix tree is accurate
1460 # fast path: for unfiltered changelog, radix tree is accurate
1461 if not getattr(self, 'filteredrevs', None):
1461 if not getattr(self, 'filteredrevs', None):
1462 raise error.AmbiguousPrefixLookupError(
1462 raise error.AmbiguousPrefixLookupError(
1463 id, self.indexfile, _(b'ambiguous identifier')
1463 id, self.indexfile, _(b'ambiguous identifier')
1464 )
1464 )
1465 # fall through to slow path that filters hidden revisions
1465 # fall through to slow path that filters hidden revisions
1466 except (AttributeError, ValueError):
1466 except (AttributeError, ValueError):
1467 # we are pure python, or key was too short to search radix tree
1467 # we are pure python, or key was too short to search radix tree
1468 pass
1468 pass
1469
1469
1470 if id in self._pcache:
1470 if id in self._pcache:
1471 return self._pcache[id]
1471 return self._pcache[id]
1472
1472
1473 if len(id) <= 40:
1473 if len(id) <= 40:
1474 try:
1474 try:
1475 # hex(node)[:...]
1475 # hex(node)[:...]
1476 l = len(id) // 2 # grab an even number of digits
1476 l = len(id) // 2 # grab an even number of digits
1477 prefix = bin(id[: l * 2])
1477 prefix = bin(id[: l * 2])
1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1479 nl = [
1479 nl = [
1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1481 ]
1481 ]
1482 if nullhex.startswith(id):
1482 if nullhex.startswith(id):
1483 nl.append(nullid)
1483 nl.append(nullid)
1484 if len(nl) > 0:
1484 if len(nl) > 0:
1485 if len(nl) == 1 and not maybewdir:
1485 if len(nl) == 1 and not maybewdir:
1486 self._pcache[id] = nl[0]
1486 self._pcache[id] = nl[0]
1487 return nl[0]
1487 return nl[0]
1488 raise error.AmbiguousPrefixLookupError(
1488 raise error.AmbiguousPrefixLookupError(
1489 id, self.indexfile, _(b'ambiguous identifier')
1489 id, self.indexfile, _(b'ambiguous identifier')
1490 )
1490 )
1491 if maybewdir:
1491 if maybewdir:
1492 raise error.WdirUnsupported
1492 raise error.WdirUnsupported
1493 return None
1493 return None
1494 except TypeError:
1494 except TypeError:
1495 pass
1495 pass
1496
1496
1497 def lookup(self, id):
1497 def lookup(self, id):
1498 """locate a node based on:
1498 """locate a node based on:
1499 - revision number or str(revision number)
1499 - revision number or str(revision number)
1500 - nodeid or subset of hex nodeid
1500 - nodeid or subset of hex nodeid
1501 """
1501 """
1502 n = self._match(id)
1502 n = self._match(id)
1503 if n is not None:
1503 if n is not None:
1504 return n
1504 return n
1505 n = self._partialmatch(id)
1505 n = self._partialmatch(id)
1506 if n:
1506 if n:
1507 return n
1507 return n
1508
1508
1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1510
1510
1511 def shortest(self, node, minlength=1):
1511 def shortest(self, node, minlength=1):
1512 """Find the shortest unambiguous prefix that matches node."""
1512 """Find the shortest unambiguous prefix that matches node."""
1513
1513
1514 def isvalid(prefix):
1514 def isvalid(prefix):
1515 try:
1515 try:
1516 matchednode = self._partialmatch(prefix)
1516 matchednode = self._partialmatch(prefix)
1517 except error.AmbiguousPrefixLookupError:
1517 except error.AmbiguousPrefixLookupError:
1518 return False
1518 return False
1519 except error.WdirUnsupported:
1519 except error.WdirUnsupported:
1520 # single 'ff...' match
1520 # single 'ff...' match
1521 return True
1521 return True
1522 if matchednode is None:
1522 if matchednode is None:
1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 return True
1524 return True
1525
1525
1526 def maybewdir(prefix):
1526 def maybewdir(prefix):
1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1528
1528
1529 hexnode = hex(node)
1529 hexnode = hex(node)
1530
1530
1531 def disambiguate(hexnode, minlength):
1531 def disambiguate(hexnode, minlength):
1532 """Disambiguate against wdirid."""
1532 """Disambiguate against wdirid."""
1533 for length in range(minlength, len(hexnode) + 1):
1533 for length in range(minlength, len(hexnode) + 1):
1534 prefix = hexnode[:length]
1534 prefix = hexnode[:length]
1535 if not maybewdir(prefix):
1535 if not maybewdir(prefix):
1536 return prefix
1536 return prefix
1537
1537
1538 if not getattr(self, 'filteredrevs', None):
1538 if not getattr(self, 'filteredrevs', None):
1539 try:
1539 try:
1540 length = max(self.index.shortest(node), minlength)
1540 length = max(self.index.shortest(node), minlength)
1541 return disambiguate(hexnode, length)
1541 return disambiguate(hexnode, length)
1542 except error.RevlogError:
1542 except error.RevlogError:
1543 if node != wdirid:
1543 if node != wdirid:
1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1545 except AttributeError:
1545 except AttributeError:
1546 # Fall through to pure code
1546 # Fall through to pure code
1547 pass
1547 pass
1548
1548
1549 if node == wdirid:
1549 if node == wdirid:
1550 for length in range(minlength, len(hexnode) + 1):
1550 for length in range(minlength, len(hexnode) + 1):
1551 prefix = hexnode[:length]
1551 prefix = hexnode[:length]
1552 if isvalid(prefix):
1552 if isvalid(prefix):
1553 return prefix
1553 return prefix
1554
1554
1555 for length in range(minlength, len(hexnode) + 1):
1555 for length in range(minlength, len(hexnode) + 1):
1556 prefix = hexnode[:length]
1556 prefix = hexnode[:length]
1557 if isvalid(prefix):
1557 if isvalid(prefix):
1558 return disambiguate(hexnode, length)
1558 return disambiguate(hexnode, length)
1559
1559
1560 def cmp(self, node, text):
1560 def cmp(self, node, text):
1561 """compare text with a given file revision
1561 """compare text with a given file revision
1562
1562
1563 returns True if text is different than what is stored.
1563 returns True if text is different than what is stored.
1564 """
1564 """
1565 p1, p2 = self.parents(node)
1565 p1, p2 = self.parents(node)
1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1567
1567
1568 def _cachesegment(self, offset, data):
1568 def _cachesegment(self, offset, data):
1569 """Add a segment to the revlog cache.
1569 """Add a segment to the revlog cache.
1570
1570
1571 Accepts an absolute offset and the data that is at that location.
1571 Accepts an absolute offset and the data that is at that location.
1572 """
1572 """
1573 o, d = self._chunkcache
1573 o, d = self._chunkcache
1574 # try to add to existing cache
1574 # try to add to existing cache
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 self._chunkcache = o, d + data
1576 self._chunkcache = o, d + data
1577 else:
1577 else:
1578 self._chunkcache = offset, data
1578 self._chunkcache = offset, data
1579
1579
1580 def _readsegment(self, offset, length, df=None):
1580 def _readsegment(self, offset, length, df=None):
1581 """Load a segment of raw data from the revlog.
1581 """Load a segment of raw data from the revlog.
1582
1582
1583 Accepts an absolute offset, length to read, and an optional existing
1583 Accepts an absolute offset, length to read, and an optional existing
1584 file handle to read from.
1584 file handle to read from.
1585
1585
1586 If an existing file handle is passed, it will be seeked and the
1586 If an existing file handle is passed, it will be seeked and the
1587 original seek position will NOT be restored.
1587 original seek position will NOT be restored.
1588
1588
1589 Returns a str or buffer of raw byte data.
1589 Returns a str or buffer of raw byte data.
1590
1590
1591 Raises if the requested number of bytes could not be read.
1591 Raises if the requested number of bytes could not be read.
1592 """
1592 """
1593 # Cache data both forward and backward around the requested
1593 # Cache data both forward and backward around the requested
1594 # data, in a fixed size window. This helps speed up operations
1594 # data, in a fixed size window. This helps speed up operations
1595 # involving reading the revlog backwards.
1595 # involving reading the revlog backwards.
1596 cachesize = self._chunkcachesize
1596 cachesize = self._chunkcachesize
1597 realoffset = offset & ~(cachesize - 1)
1597 realoffset = offset & ~(cachesize - 1)
1598 reallength = (
1598 reallength = (
1599 (offset + length + cachesize) & ~(cachesize - 1)
1599 (offset + length + cachesize) & ~(cachesize - 1)
1600 ) - realoffset
1600 ) - realoffset
1601 with self._datareadfp(df) as df:
1601 with self._datareadfp(df) as df:
1602 df.seek(realoffset)
1602 df.seek(realoffset)
1603 d = df.read(reallength)
1603 d = df.read(reallength)
1604
1604
1605 self._cachesegment(realoffset, d)
1605 self._cachesegment(realoffset, d)
1606 if offset != realoffset or reallength != length:
1606 if offset != realoffset or reallength != length:
1607 startoffset = offset - realoffset
1607 startoffset = offset - realoffset
1608 if len(d) - startoffset < length:
1608 if len(d) - startoffset < length:
1609 raise error.RevlogError(
1609 raise error.RevlogError(
1610 _(
1610 _(
1611 b'partial read of revlog %s; expected %d bytes from '
1611 b'partial read of revlog %s; expected %d bytes from '
1612 b'offset %d, got %d'
1612 b'offset %d, got %d'
1613 )
1613 )
1614 % (
1614 % (
1615 self.indexfile if self._inline else self.datafile,
1615 self.indexfile if self._inline else self.datafile,
1616 length,
1616 length,
1617 realoffset,
1617 realoffset,
1618 len(d) - startoffset,
1618 len(d) - startoffset,
1619 )
1619 )
1620 )
1620 )
1621
1621
1622 return util.buffer(d, startoffset, length)
1622 return util.buffer(d, startoffset, length)
1623
1623
1624 if len(d) < length:
1624 if len(d) < length:
1625 raise error.RevlogError(
1625 raise error.RevlogError(
1626 _(
1626 _(
1627 b'partial read of revlog %s; expected %d bytes from offset '
1627 b'partial read of revlog %s; expected %d bytes from offset '
1628 b'%d, got %d'
1628 b'%d, got %d'
1629 )
1629 )
1630 % (
1630 % (
1631 self.indexfile if self._inline else self.datafile,
1631 self.indexfile if self._inline else self.datafile,
1632 length,
1632 length,
1633 offset,
1633 offset,
1634 len(d),
1634 len(d),
1635 )
1635 )
1636 )
1636 )
1637
1637
1638 return d
1638 return d
1639
1639
1640 def _getsegment(self, offset, length, df=None):
1640 def _getsegment(self, offset, length, df=None):
1641 """Obtain a segment of raw data from the revlog.
1641 """Obtain a segment of raw data from the revlog.
1642
1642
1643 Accepts an absolute offset, length of bytes to obtain, and an
1643 Accepts an absolute offset, length of bytes to obtain, and an
1644 optional file handle to the already-opened revlog. If the file
1644 optional file handle to the already-opened revlog. If the file
1645 handle is used, it's original seek position will not be preserved.
1645 handle is used, it's original seek position will not be preserved.
1646
1646
1647 Requests for data may be returned from a cache.
1647 Requests for data may be returned from a cache.
1648
1648
1649 Returns a str or a buffer instance of raw byte data.
1649 Returns a str or a buffer instance of raw byte data.
1650 """
1650 """
1651 o, d = self._chunkcache
1651 o, d = self._chunkcache
1652 l = len(d)
1652 l = len(d)
1653
1653
1654 # is it in the cache?
1654 # is it in the cache?
1655 cachestart = offset - o
1655 cachestart = offset - o
1656 cacheend = cachestart + length
1656 cacheend = cachestart + length
1657 if cachestart >= 0 and cacheend <= l:
1657 if cachestart >= 0 and cacheend <= l:
1658 if cachestart == 0 and cacheend == l:
1658 if cachestart == 0 and cacheend == l:
1659 return d # avoid a copy
1659 return d # avoid a copy
1660 return util.buffer(d, cachestart, cacheend - cachestart)
1660 return util.buffer(d, cachestart, cacheend - cachestart)
1661
1661
1662 return self._readsegment(offset, length, df=df)
1662 return self._readsegment(offset, length, df=df)
1663
1663
1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1665 """Obtain a segment of raw data corresponding to a range of revisions.
1665 """Obtain a segment of raw data corresponding to a range of revisions.
1666
1666
1667 Accepts the start and end revisions and an optional already-open
1667 Accepts the start and end revisions and an optional already-open
1668 file handle to be used for reading. If the file handle is read, its
1668 file handle to be used for reading. If the file handle is read, its
1669 seek position will not be preserved.
1669 seek position will not be preserved.
1670
1670
1671 Requests for data may be satisfied by a cache.
1671 Requests for data may be satisfied by a cache.
1672
1672
1673 Returns a 2-tuple of (offset, data) for the requested range of
1673 Returns a 2-tuple of (offset, data) for the requested range of
1674 revisions. Offset is the integer offset from the beginning of the
1674 revisions. Offset is the integer offset from the beginning of the
1675 revlog and data is a str or buffer of the raw byte data.
1675 revlog and data is a str or buffer of the raw byte data.
1676
1676
1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1678 to determine where each revision's data begins and ends.
1678 to determine where each revision's data begins and ends.
1679 """
1679 """
1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1681 # (functions are expensive).
1681 # (functions are expensive).
1682 index = self.index
1682 index = self.index
1683 istart = index[startrev]
1683 istart = index[startrev]
1684 start = int(istart[0] >> 16)
1684 start = int(istart[0] >> 16)
1685 if startrev == endrev:
1685 if startrev == endrev:
1686 end = start + istart[1]
1686 end = start + istart[1]
1687 else:
1687 else:
1688 iend = index[endrev]
1688 iend = index[endrev]
1689 end = int(iend[0] >> 16) + iend[1]
1689 end = int(iend[0] >> 16) + iend[1]
1690
1690
1691 if self._inline:
1691 if self._inline:
1692 start += (startrev + 1) * self._io.size
1692 start += (startrev + 1) * self._io.size
1693 end += (endrev + 1) * self._io.size
1693 end += (endrev + 1) * self._io.size
1694 length = end - start
1694 length = end - start
1695
1695
1696 return start, self._getsegment(start, length, df=df)
1696 return start, self._getsegment(start, length, df=df)
1697
1697
1698 def _chunk(self, rev, df=None):
1698 def _chunk(self, rev, df=None):
1699 """Obtain a single decompressed chunk for a revision.
1699 """Obtain a single decompressed chunk for a revision.
1700
1700
1701 Accepts an integer revision and an optional already-open file handle
1701 Accepts an integer revision and an optional already-open file handle
1702 to be used for reading. If used, the seek position of the file will not
1702 to be used for reading. If used, the seek position of the file will not
1703 be preserved.
1703 be preserved.
1704
1704
1705 Returns a str holding uncompressed data for the requested revision.
1705 Returns a str holding uncompressed data for the requested revision.
1706 """
1706 """
1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1708
1708
1709 def _chunks(self, revs, df=None, targetsize=None):
1709 def _chunks(self, revs, df=None, targetsize=None):
1710 """Obtain decompressed chunks for the specified revisions.
1710 """Obtain decompressed chunks for the specified revisions.
1711
1711
1712 Accepts an iterable of numeric revisions that are assumed to be in
1712 Accepts an iterable of numeric revisions that are assumed to be in
1713 ascending order. Also accepts an optional already-open file handle
1713 ascending order. Also accepts an optional already-open file handle
1714 to be used for reading. If used, the seek position of the file will
1714 to be used for reading. If used, the seek position of the file will
1715 not be preserved.
1715 not be preserved.
1716
1716
1717 This function is similar to calling ``self._chunk()`` multiple times,
1717 This function is similar to calling ``self._chunk()`` multiple times,
1718 but is faster.
1718 but is faster.
1719
1719
1720 Returns a list with decompressed data for each requested revision.
1720 Returns a list with decompressed data for each requested revision.
1721 """
1721 """
1722 if not revs:
1722 if not revs:
1723 return []
1723 return []
1724 start = self.start
1724 start = self.start
1725 length = self.length
1725 length = self.length
1726 inline = self._inline
1726 inline = self._inline
1727 iosize = self._io.size
1727 iosize = self._io.size
1728 buffer = util.buffer
1728 buffer = util.buffer
1729
1729
1730 l = []
1730 l = []
1731 ladd = l.append
1731 ladd = l.append
1732
1732
1733 if not self._withsparseread:
1733 if not self._withsparseread:
1734 slicedchunks = (revs,)
1734 slicedchunks = (revs,)
1735 else:
1735 else:
1736 slicedchunks = deltautil.slicechunk(
1736 slicedchunks = deltautil.slicechunk(
1737 self, revs, targetsize=targetsize
1737 self, revs, targetsize=targetsize
1738 )
1738 )
1739
1739
1740 for revschunk in slicedchunks:
1740 for revschunk in slicedchunks:
1741 firstrev = revschunk[0]
1741 firstrev = revschunk[0]
1742 # Skip trailing revisions with empty diff
1742 # Skip trailing revisions with empty diff
1743 for lastrev in revschunk[::-1]:
1743 for lastrev in revschunk[::-1]:
1744 if length(lastrev) != 0:
1744 if length(lastrev) != 0:
1745 break
1745 break
1746
1746
1747 try:
1747 try:
1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1749 except OverflowError:
1749 except OverflowError:
1750 # issue4215 - we can't cache a run of chunks greater than
1750 # issue4215 - we can't cache a run of chunks greater than
1751 # 2G on Windows
1751 # 2G on Windows
1752 return [self._chunk(rev, df=df) for rev in revschunk]
1752 return [self._chunk(rev, df=df) for rev in revschunk]
1753
1753
1754 decomp = self.decompress
1754 decomp = self.decompress
1755 for rev in revschunk:
1755 for rev in revschunk:
1756 chunkstart = start(rev)
1756 chunkstart = start(rev)
1757 if inline:
1757 if inline:
1758 chunkstart += (rev + 1) * iosize
1758 chunkstart += (rev + 1) * iosize
1759 chunklength = length(rev)
1759 chunklength = length(rev)
1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1761
1761
1762 return l
1762 return l
1763
1763
1764 def _chunkclear(self):
1764 def _chunkclear(self):
1765 """Clear the raw chunk cache."""
1765 """Clear the raw chunk cache."""
1766 self._chunkcache = (0, b'')
1766 self._chunkcache = (0, b'')
1767
1767
1768 def deltaparent(self, rev):
1768 def deltaparent(self, rev):
1769 """return deltaparent of the given revision"""
1769 """return deltaparent of the given revision"""
1770 base = self.index[rev][3]
1770 base = self.index[rev][3]
1771 if base == rev:
1771 if base == rev:
1772 return nullrev
1772 return nullrev
1773 elif self._generaldelta:
1773 elif self._generaldelta:
1774 return base
1774 return base
1775 else:
1775 else:
1776 return rev - 1
1776 return rev - 1
1777
1777
1778 def issnapshot(self, rev):
1778 def issnapshot(self, rev):
1779 """tells whether rev is a snapshot"""
1779 """tells whether rev is a snapshot"""
1780 if not self._sparserevlog:
1780 if not self._sparserevlog:
1781 return self.deltaparent(rev) == nullrev
1781 return self.deltaparent(rev) == nullrev
1782 elif util.safehasattr(self.index, b'issnapshot'):
1782 elif util.safehasattr(self.index, b'issnapshot'):
1783 # directly assign the method to cache the testing and access
1783 # directly assign the method to cache the testing and access
1784 self.issnapshot = self.index.issnapshot
1784 self.issnapshot = self.index.issnapshot
1785 return self.issnapshot(rev)
1785 return self.issnapshot(rev)
1786 if rev == nullrev:
1786 if rev == nullrev:
1787 return True
1787 return True
1788 entry = self.index[rev]
1788 entry = self.index[rev]
1789 base = entry[3]
1789 base = entry[3]
1790 if base == rev:
1790 if base == rev:
1791 return True
1791 return True
1792 if base == nullrev:
1792 if base == nullrev:
1793 return True
1793 return True
1794 p1 = entry[5]
1794 p1 = entry[5]
1795 p2 = entry[6]
1795 p2 = entry[6]
1796 if base == p1 or base == p2:
1796 if base == p1 or base == p2:
1797 return False
1797 return False
1798 return self.issnapshot(base)
1798 return self.issnapshot(base)
1799
1799
1800 def snapshotdepth(self, rev):
1800 def snapshotdepth(self, rev):
1801 """number of snapshot in the chain before this one"""
1801 """number of snapshot in the chain before this one"""
1802 if not self.issnapshot(rev):
1802 if not self.issnapshot(rev):
1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1804 return len(self._deltachain(rev)[0]) - 1
1804 return len(self._deltachain(rev)[0]) - 1
1805
1805
1806 def revdiff(self, rev1, rev2):
1806 def revdiff(self, rev1, rev2):
1807 """return or calculate a delta between two revisions
1807 """return or calculate a delta between two revisions
1808
1808
1809 The delta calculated is in binary form and is intended to be written to
1809 The delta calculated is in binary form and is intended to be written to
1810 revlog data directly. So this function needs raw revision data.
1810 revlog data directly. So this function needs raw revision data.
1811 """
1811 """
1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1813 return bytes(self._chunk(rev2))
1813 return bytes(self._chunk(rev2))
1814
1814
1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1816
1816
1817 def _processflags(self, text, flags, operation, raw=False):
1817 def _processflags(self, text, flags, operation, raw=False):
1818 """deprecated entry point to access flag processors"""
1818 """deprecated entry point to access flag processors"""
1819 msg = b'_processflag(...) use the specialized variant'
1819 msg = b'_processflag(...) use the specialized variant'
1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1821 if raw:
1821 if raw:
1822 return text, flagutil.processflagsraw(self, text, flags)
1822 return text, flagutil.processflagsraw(self, text, flags)
1823 elif operation == b'read':
1823 elif operation == b'read':
1824 return flagutil.processflagsread(self, text, flags)
1824 return flagutil.processflagsread(self, text, flags)
1825 else: # write operation
1825 else: # write operation
1826 return flagutil.processflagswrite(self, text, flags, None)
1826 return flagutil.processflagswrite(self, text, flags, None)
1827
1827
1828 def revision(self, nodeorrev, _df=None, raw=False):
1828 def revision(self, nodeorrev, _df=None, raw=False):
1829 """return an uncompressed revision of a given node or revision
1829 """return an uncompressed revision of a given node or revision
1830 number.
1830 number.
1831
1831
1832 _df - an existing file handle to read from. (internal-only)
1832 _df - an existing file handle to read from. (internal-only)
1833 raw - an optional argument specifying if the revision data is to be
1833 raw - an optional argument specifying if the revision data is to be
1834 treated as raw data when applying flag transforms. 'raw' should be set
1834 treated as raw data when applying flag transforms. 'raw' should be set
1835 to True when generating changegroups or in debug commands.
1835 to True when generating changegroups or in debug commands.
1836 """
1836 """
1837 if raw:
1837 if raw:
1838 msg = (
1838 msg = (
1839 b'revlog.revision(..., raw=True) is deprecated, '
1839 b'revlog.revision(..., raw=True) is deprecated, '
1840 b'use revlog.rawdata(...)'
1840 b'use revlog.rawdata(...)'
1841 )
1841 )
1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1844
1844
1845 def sidedata(self, nodeorrev, _df=None):
1845 def sidedata(self, nodeorrev, _df=None):
1846 """a map of extra data related to the changeset but not part of the hash
1846 """a map of extra data related to the changeset but not part of the hash
1847
1847
1848 This function currently return a dictionary. However, more advanced
1848 This function currently return a dictionary. However, more advanced
1849 mapping object will likely be used in the future for a more
1849 mapping object will likely be used in the future for a more
1850 efficient/lazy code.
1850 efficient/lazy code.
1851 """
1851 """
1852 return self._revisiondata(nodeorrev, _df)[1]
1852 return self._revisiondata(nodeorrev, _df)[1]
1853
1853
1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1855 # deal with <nodeorrev> argument type
1855 # deal with <nodeorrev> argument type
1856 if isinstance(nodeorrev, int):
1856 if isinstance(nodeorrev, int):
1857 rev = nodeorrev
1857 rev = nodeorrev
1858 node = self.node(rev)
1858 node = self.node(rev)
1859 else:
1859 else:
1860 node = nodeorrev
1860 node = nodeorrev
1861 rev = None
1861 rev = None
1862
1862
1863 # fast path the special `nullid` rev
1863 # fast path the special `nullid` rev
1864 if node == nullid:
1864 if node == nullid:
1865 return b"", {}
1865 return b"", {}
1866
1866
1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1868 # revision or might need to be processed to retrieve the revision.
1868 # revision or might need to be processed to retrieve the revision.
1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1870
1870
1871 if raw and validated:
1871 if raw and validated:
1872 # if we don't want to process the raw text and that raw
1872 # if we don't want to process the raw text and that raw
1873 # text is cached, we can exit early.
1873 # text is cached, we can exit early.
1874 return rawtext, {}
1874 return rawtext, {}
1875 if rev is None:
1875 if rev is None:
1876 rev = self.rev(node)
1876 rev = self.rev(node)
1877 # the revlog's flag for this revision
1877 # the revlog's flag for this revision
1878 # (usually alter its state or content)
1878 # (usually alter its state or content)
1879 flags = self.flags(rev)
1879 flags = self.flags(rev)
1880
1880
1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1882 # no extra flags set, no flag processor runs, text = rawtext
1882 # no extra flags set, no flag processor runs, text = rawtext
1883 return rawtext, {}
1883 return rawtext, {}
1884
1884
1885 sidedata = {}
1885 sidedata = {}
1886 if raw:
1886 if raw:
1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 text = rawtext
1888 text = rawtext
1889 else:
1889 else:
1890 try:
1890 try:
1891 r = flagutil.processflagsread(self, rawtext, flags)
1891 r = flagutil.processflagsread(self, rawtext, flags)
1892 except error.SidedataHashError as exc:
1892 except error.SidedataHashError as exc:
1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1895 raise error.RevlogError(msg)
1895 raise error.RevlogError(msg)
1896 text, validatehash, sidedata = r
1896 text, validatehash, sidedata = r
1897 if validatehash:
1897 if validatehash:
1898 self.checkhash(text, node, rev=rev)
1898 self.checkhash(text, node, rev=rev)
1899 if not validated:
1899 if not validated:
1900 self._revisioncache = (node, rev, rawtext)
1900 self._revisioncache = (node, rev, rawtext)
1901
1901
1902 return text, sidedata
1902 return text, sidedata
1903
1903
1904 def _rawtext(self, node, rev, _df=None):
1904 def _rawtext(self, node, rev, _df=None):
1905 """return the possibly unvalidated rawtext for a revision
1905 """return the possibly unvalidated rawtext for a revision
1906
1906
1907 returns (rev, rawtext, validated)
1907 returns (rev, rawtext, validated)
1908 """
1908 """
1909
1909
1910 # revision in the cache (could be useful to apply delta)
1910 # revision in the cache (could be useful to apply delta)
1911 cachedrev = None
1911 cachedrev = None
1912 # An intermediate text to apply deltas to
1912 # An intermediate text to apply deltas to
1913 basetext = None
1913 basetext = None
1914
1914
1915 # Check if we have the entry in cache
1915 # Check if we have the entry in cache
1916 # The cache entry looks like (node, rev, rawtext)
1916 # The cache entry looks like (node, rev, rawtext)
1917 if self._revisioncache:
1917 if self._revisioncache:
1918 if self._revisioncache[0] == node:
1918 if self._revisioncache[0] == node:
1919 return (rev, self._revisioncache[2], True)
1919 return (rev, self._revisioncache[2], True)
1920 cachedrev = self._revisioncache[1]
1920 cachedrev = self._revisioncache[1]
1921
1921
1922 if rev is None:
1922 if rev is None:
1923 rev = self.rev(node)
1923 rev = self.rev(node)
1924
1924
1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1926 if stopped:
1926 if stopped:
1927 basetext = self._revisioncache[2]
1927 basetext = self._revisioncache[2]
1928
1928
1929 # drop cache to save memory, the caller is expected to
1929 # drop cache to save memory, the caller is expected to
1930 # update self._revisioncache after validating the text
1930 # update self._revisioncache after validating the text
1931 self._revisioncache = None
1931 self._revisioncache = None
1932
1932
1933 targetsize = None
1933 targetsize = None
1934 rawsize = self.index[rev][2]
1934 rawsize = self.index[rev][2]
1935 if 0 <= rawsize:
1935 if 0 <= rawsize:
1936 targetsize = 4 * rawsize
1936 targetsize = 4 * rawsize
1937
1937
1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1939 if basetext is None:
1939 if basetext is None:
1940 basetext = bytes(bins[0])
1940 basetext = bytes(bins[0])
1941 bins = bins[1:]
1941 bins = bins[1:]
1942
1942
1943 rawtext = mdiff.patches(basetext, bins)
1943 rawtext = mdiff.patches(basetext, bins)
1944 del basetext # let us have a chance to free memory early
1944 del basetext # let us have a chance to free memory early
1945 return (rev, rawtext, False)
1945 return (rev, rawtext, False)
1946
1946
1947 def rawdata(self, nodeorrev, _df=None):
1947 def rawdata(self, nodeorrev, _df=None):
1948 """return an uncompressed raw data of a given node or revision number.
1948 """return an uncompressed raw data of a given node or revision number.
1949
1949
1950 _df - an existing file handle to read from. (internal-only)
1950 _df - an existing file handle to read from. (internal-only)
1951 """
1951 """
1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1953
1953
1954 def hash(self, text, p1, p2):
1954 def hash(self, text, p1, p2):
1955 """Compute a node hash.
1955 """Compute a node hash.
1956
1956
1957 Available as a function so that subclasses can replace the hash
1957 Available as a function so that subclasses can replace the hash
1958 as needed.
1958 as needed.
1959 """
1959 """
1960 return storageutil.hashrevisionsha1(text, p1, p2)
1960 return storageutil.hashrevisionsha1(text, p1, p2)
1961
1961
1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1963 """Check node hash integrity.
1963 """Check node hash integrity.
1964
1964
1965 Available as a function so that subclasses can extend hash mismatch
1965 Available as a function so that subclasses can extend hash mismatch
1966 behaviors as needed.
1966 behaviors as needed.
1967 """
1967 """
1968 try:
1968 try:
1969 if p1 is None and p2 is None:
1969 if p1 is None and p2 is None:
1970 p1, p2 = self.parents(node)
1970 p1, p2 = self.parents(node)
1971 if node != self.hash(text, p1, p2):
1971 if node != self.hash(text, p1, p2):
1972 # Clear the revision cache on hash failure. The revision cache
1972 # Clear the revision cache on hash failure. The revision cache
1973 # only stores the raw revision and clearing the cache does have
1973 # only stores the raw revision and clearing the cache does have
1974 # the side-effect that we won't have a cache hit when the raw
1974 # the side-effect that we won't have a cache hit when the raw
1975 # revision data is accessed. But this case should be rare and
1975 # revision data is accessed. But this case should be rare and
1976 # it is extra work to teach the cache about the hash
1976 # it is extra work to teach the cache about the hash
1977 # verification state.
1977 # verification state.
1978 if self._revisioncache and self._revisioncache[0] == node:
1978 if self._revisioncache and self._revisioncache[0] == node:
1979 self._revisioncache = None
1979 self._revisioncache = None
1980
1980
1981 revornode = rev
1981 revornode = rev
1982 if revornode is None:
1982 if revornode is None:
1983 revornode = templatefilters.short(hex(node))
1983 revornode = templatefilters.short(hex(node))
1984 raise error.RevlogError(
1984 raise error.RevlogError(
1985 _(b"integrity check failed on %s:%s")
1985 _(b"integrity check failed on %s:%s")
1986 % (self.indexfile, pycompat.bytestr(revornode))
1986 % (self.indexfile, pycompat.bytestr(revornode))
1987 )
1987 )
1988 except error.RevlogError:
1988 except error.RevlogError:
1989 if self._censorable and storageutil.iscensoredtext(text):
1989 if self._censorable and storageutil.iscensoredtext(text):
1990 raise error.CensoredNodeError(self.indexfile, node, text)
1990 raise error.CensoredNodeError(self.indexfile, node, text)
1991 raise
1991 raise
1992
1992
1993 def _enforceinlinesize(self, tr, fp=None):
1993 def _enforceinlinesize(self, tr, fp=None):
1994 """Check if the revlog is too big for inline and convert if so.
1994 """Check if the revlog is too big for inline and convert if so.
1995
1995
1996 This should be called after revisions are added to the revlog. If the
1996 This should be called after revisions are added to the revlog. If the
1997 revlog has grown too large to be an inline revlog, it will convert it
1997 revlog has grown too large to be an inline revlog, it will convert it
1998 to use multiple index and data files.
1998 to use multiple index and data files.
1999 """
1999 """
2000 tiprev = len(self) - 1
2000 tiprev = len(self) - 1
2001 if (
2001 if (
2002 not self._inline
2002 not self._inline
2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2004 ):
2004 ):
2005 return
2005 return
2006
2006
2007 troffset = tr.findoffset(self.indexfile)
2007 troffset = tr.findoffset(self.indexfile)
2008 if troffset is None:
2008 if troffset is None:
2009 raise error.RevlogError(
2009 raise error.RevlogError(
2010 _(b"%s not found in the transaction") % self.indexfile
2010 _(b"%s not found in the transaction") % self.indexfile
2011 )
2011 )
2012 trindex = 0
2012 trindex = 0
2013 tr.add(self.datafile, 0)
2013 tr.add(self.datafile, 0)
2014
2014
2015 if fp:
2015 if fp:
2016 fp.flush()
2016 fp.flush()
2017 fp.close()
2017 fp.close()
2018 # We can't use the cached file handle after close(). So prevent
2018 # We can't use the cached file handle after close(). So prevent
2019 # its usage.
2019 # its usage.
2020 self._writinghandles = None
2020 self._writinghandles = None
2021
2021
2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2023 for r in self:
2023 for r in self:
2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2025 if troffset <= self.start(r):
2025 if troffset <= self.start(r):
2026 trindex = r
2026 trindex = r
2027
2027
2028 with self._indexfp(b'w') as fp:
2028 with self._indexfp(b'w') as fp:
2029 self.version &= ~FLAG_INLINE_DATA
2029 self.version &= ~FLAG_INLINE_DATA
2030 self._inline = False
2030 self._inline = False
2031 io = self._io
2031 io = self._io
2032 for i in self:
2032 for i in self:
2033 e = io.packentry(self.index[i], self.node, self.version, i)
2033 e = io.packentry(self.index[i], self.node, self.version, i)
2034 fp.write(e)
2034 fp.write(e)
2035
2035
2036 # the temp file replace the real index when we exit the context
2036 # the temp file replace the real index when we exit the context
2037 # manager
2037 # manager
2038
2038
2039 tr.replace(self.indexfile, trindex * self._io.size)
2039 tr.replace(self.indexfile, trindex * self._io.size)
2040 nodemaputil.setup_persistent_nodemap(tr, self)
2040 nodemaputil.setup_persistent_nodemap(tr, self)
2041 self._chunkclear()
2041 self._chunkclear()
2042
2042
2043 def _nodeduplicatecallback(self, transaction, node):
2043 def _nodeduplicatecallback(self, transaction, node):
2044 """called when trying to add a node already stored."""
2044 """called when trying to add a node already stored."""
2045
2045
2046 def addrevision(
2046 def addrevision(
2047 self,
2047 self,
2048 text,
2048 text,
2049 transaction,
2049 transaction,
2050 link,
2050 link,
2051 p1,
2051 p1,
2052 p2,
2052 p2,
2053 cachedelta=None,
2053 cachedelta=None,
2054 node=None,
2054 node=None,
2055 flags=REVIDX_DEFAULT_FLAGS,
2055 flags=REVIDX_DEFAULT_FLAGS,
2056 deltacomputer=None,
2056 deltacomputer=None,
2057 sidedata=None,
2057 sidedata=None,
2058 ):
2058 ):
2059 """add a revision to the log
2059 """add a revision to the log
2060
2060
2061 text - the revision data to add
2061 text - the revision data to add
2062 transaction - the transaction object used for rollback
2062 transaction - the transaction object used for rollback
2063 link - the linkrev data to add
2063 link - the linkrev data to add
2064 p1, p2 - the parent nodeids of the revision
2064 p1, p2 - the parent nodeids of the revision
2065 cachedelta - an optional precomputed delta
2065 cachedelta - an optional precomputed delta
2066 node - nodeid of revision; typically node is not specified, and it is
2066 node - nodeid of revision; typically node is not specified, and it is
2067 computed by default as hash(text, p1, p2), however subclasses might
2067 computed by default as hash(text, p1, p2), however subclasses might
2068 use different hashing method (and override checkhash() in such case)
2068 use different hashing method (and override checkhash() in such case)
2069 flags - the known flags to set on the revision
2069 flags - the known flags to set on the revision
2070 deltacomputer - an optional deltacomputer instance shared between
2070 deltacomputer - an optional deltacomputer instance shared between
2071 multiple calls
2071 multiple calls
2072 """
2072 """
2073 if link == nullrev:
2073 if link == nullrev:
2074 raise error.RevlogError(
2074 raise error.RevlogError(
2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2076 )
2076 )
2077
2077
2078 if sidedata is None:
2078 if sidedata is None:
2079 sidedata = {}
2079 sidedata = {}
2080 flags = flags & ~REVIDX_SIDEDATA
2080 flags = flags & ~REVIDX_SIDEDATA
2081 elif not self.hassidedata:
2081 elif not self.hassidedata:
2082 raise error.ProgrammingError(
2082 raise error.ProgrammingError(
2083 _(b"trying to add sidedata to a revlog who don't support them")
2083 _(b"trying to add sidedata to a revlog who don't support them")
2084 )
2084 )
2085 else:
2085 else:
2086 flags |= REVIDX_SIDEDATA
2086 flags |= REVIDX_SIDEDATA
2087
2087
2088 if flags:
2088 if flags:
2089 node = node or self.hash(text, p1, p2)
2089 node = node or self.hash(text, p1, p2)
2090
2090
2091 rawtext, validatehash = flagutil.processflagswrite(
2091 rawtext, validatehash = flagutil.processflagswrite(
2092 self, text, flags, sidedata=sidedata
2092 self, text, flags, sidedata=sidedata
2093 )
2093 )
2094
2094
2095 # If the flag processor modifies the revision data, ignore any provided
2095 # If the flag processor modifies the revision data, ignore any provided
2096 # cachedelta.
2096 # cachedelta.
2097 if rawtext != text:
2097 if rawtext != text:
2098 cachedelta = None
2098 cachedelta = None
2099
2099
2100 if len(rawtext) > _maxentrysize:
2100 if len(rawtext) > _maxentrysize:
2101 raise error.RevlogError(
2101 raise error.RevlogError(
2102 _(
2102 _(
2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2104 )
2104 )
2105 % (self.indexfile, len(rawtext))
2105 % (self.indexfile, len(rawtext))
2106 )
2106 )
2107
2107
2108 node = node or self.hash(rawtext, p1, p2)
2108 node = node or self.hash(rawtext, p1, p2)
2109 if self.index.has_node(node):
2109 if self.index.has_node(node):
2110 return node
2110 return node
2111
2111
2112 if validatehash:
2112 if validatehash:
2113 self.checkhash(rawtext, node, p1=p1, p2=p2)
2113 self.checkhash(rawtext, node, p1=p1, p2=p2)
2114
2114
2115 return self.addrawrevision(
2115 rev = self.addrawrevision(
2116 rawtext,
2116 rawtext,
2117 transaction,
2117 transaction,
2118 link,
2118 link,
2119 p1,
2119 p1,
2120 p2,
2120 p2,
2121 node,
2121 node,
2122 flags,
2122 flags,
2123 cachedelta=cachedelta,
2123 cachedelta=cachedelta,
2124 deltacomputer=deltacomputer,
2124 deltacomputer=deltacomputer,
2125 )
2125 )
2126 return node
2126
2127
2127 def addrawrevision(
2128 def addrawrevision(
2128 self,
2129 self,
2129 rawtext,
2130 rawtext,
2130 transaction,
2131 transaction,
2131 link,
2132 link,
2132 p1,
2133 p1,
2133 p2,
2134 p2,
2134 node,
2135 node,
2135 flags,
2136 flags,
2136 cachedelta=None,
2137 cachedelta=None,
2137 deltacomputer=None,
2138 deltacomputer=None,
2138 ):
2139 ):
2139 """add a raw revision with known flags, node and parents
2140 """add a raw revision with known flags, node and parents
2140 useful when reusing a revision not stored in this revlog (ex: received
2141 useful when reusing a revision not stored in this revlog (ex: received
2141 over wire, or read from an external bundle).
2142 over wire, or read from an external bundle).
2142 """
2143 """
2143 dfh = None
2144 dfh = None
2144 if not self._inline:
2145 if not self._inline:
2145 dfh = self._datafp(b"a+")
2146 dfh = self._datafp(b"a+")
2146 ifh = self._indexfp(b"a+")
2147 ifh = self._indexfp(b"a+")
2147 try:
2148 try:
2148 self._addrevision(
2149 return self._addrevision(
2149 node,
2150 node,
2150 rawtext,
2151 rawtext,
2151 transaction,
2152 transaction,
2152 link,
2153 link,
2153 p1,
2154 p1,
2154 p2,
2155 p2,
2155 flags,
2156 flags,
2156 cachedelta,
2157 cachedelta,
2157 ifh,
2158 ifh,
2158 dfh,
2159 dfh,
2159 deltacomputer=deltacomputer,
2160 deltacomputer=deltacomputer,
2160 )
2161 )
2161 return node
2162 finally:
2162 finally:
2163 if dfh:
2163 if dfh:
2164 dfh.close()
2164 dfh.close()
2165 ifh.close()
2165 ifh.close()
2166
2166
2167 def compress(self, data):
2167 def compress(self, data):
2168 """Generate a possibly-compressed representation of data."""
2168 """Generate a possibly-compressed representation of data."""
2169 if not data:
2169 if not data:
2170 return b'', data
2170 return b'', data
2171
2171
2172 compressed = self._compressor.compress(data)
2172 compressed = self._compressor.compress(data)
2173
2173
2174 if compressed:
2174 if compressed:
2175 # The revlog compressor added the header in the returned data.
2175 # The revlog compressor added the header in the returned data.
2176 return b'', compressed
2176 return b'', compressed
2177
2177
2178 if data[0:1] == b'\0':
2178 if data[0:1] == b'\0':
2179 return b'', data
2179 return b'', data
2180 return b'u', data
2180 return b'u', data
2181
2181
2182 def decompress(self, data):
2182 def decompress(self, data):
2183 """Decompress a revlog chunk.
2183 """Decompress a revlog chunk.
2184
2184
2185 The chunk is expected to begin with a header identifying the
2185 The chunk is expected to begin with a header identifying the
2186 format type so it can be routed to an appropriate decompressor.
2186 format type so it can be routed to an appropriate decompressor.
2187 """
2187 """
2188 if not data:
2188 if not data:
2189 return data
2189 return data
2190
2190
2191 # Revlogs are read much more frequently than they are written and many
2191 # Revlogs are read much more frequently than they are written and many
2192 # chunks only take microseconds to decompress, so performance is
2192 # chunks only take microseconds to decompress, so performance is
2193 # important here.
2193 # important here.
2194 #
2194 #
2195 # We can make a few assumptions about revlogs:
2195 # We can make a few assumptions about revlogs:
2196 #
2196 #
2197 # 1) the majority of chunks will be compressed (as opposed to inline
2197 # 1) the majority of chunks will be compressed (as opposed to inline
2198 # raw data).
2198 # raw data).
2199 # 2) decompressing *any* data will likely by at least 10x slower than
2199 # 2) decompressing *any* data will likely by at least 10x slower than
2200 # returning raw inline data.
2200 # returning raw inline data.
2201 # 3) we want to prioritize common and officially supported compression
2201 # 3) we want to prioritize common and officially supported compression
2202 # engines
2202 # engines
2203 #
2203 #
2204 # It follows that we want to optimize for "decompress compressed data
2204 # It follows that we want to optimize for "decompress compressed data
2205 # when encoded with common and officially supported compression engines"
2205 # when encoded with common and officially supported compression engines"
2206 # case over "raw data" and "data encoded by less common or non-official
2206 # case over "raw data" and "data encoded by less common or non-official
2207 # compression engines." That is why we have the inline lookup first
2207 # compression engines." That is why we have the inline lookup first
2208 # followed by the compengines lookup.
2208 # followed by the compengines lookup.
2209 #
2209 #
2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 # compressed chunks. And this matters for changelog and manifest reads.
2211 # compressed chunks. And this matters for changelog and manifest reads.
2212 t = data[0:1]
2212 t = data[0:1]
2213
2213
2214 if t == b'x':
2214 if t == b'x':
2215 try:
2215 try:
2216 return _zlibdecompress(data)
2216 return _zlibdecompress(data)
2217 except zlib.error as e:
2217 except zlib.error as e:
2218 raise error.RevlogError(
2218 raise error.RevlogError(
2219 _(b'revlog decompress error: %s')
2219 _(b'revlog decompress error: %s')
2220 % stringutil.forcebytestr(e)
2220 % stringutil.forcebytestr(e)
2221 )
2221 )
2222 # '\0' is more common than 'u' so it goes first.
2222 # '\0' is more common than 'u' so it goes first.
2223 elif t == b'\0':
2223 elif t == b'\0':
2224 return data
2224 return data
2225 elif t == b'u':
2225 elif t == b'u':
2226 return util.buffer(data, 1)
2226 return util.buffer(data, 1)
2227
2227
2228 try:
2228 try:
2229 compressor = self._decompressors[t]
2229 compressor = self._decompressors[t]
2230 except KeyError:
2230 except KeyError:
2231 try:
2231 try:
2232 engine = util.compengines.forrevlogheader(t)
2232 engine = util.compengines.forrevlogheader(t)
2233 compressor = engine.revlogcompressor(self._compengineopts)
2233 compressor = engine.revlogcompressor(self._compengineopts)
2234 self._decompressors[t] = compressor
2234 self._decompressors[t] = compressor
2235 except KeyError:
2235 except KeyError:
2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237
2237
2238 return compressor.decompress(data)
2238 return compressor.decompress(data)
2239
2239
2240 def _addrevision(
2240 def _addrevision(
2241 self,
2241 self,
2242 node,
2242 node,
2243 rawtext,
2243 rawtext,
2244 transaction,
2244 transaction,
2245 link,
2245 link,
2246 p1,
2246 p1,
2247 p2,
2247 p2,
2248 flags,
2248 flags,
2249 cachedelta,
2249 cachedelta,
2250 ifh,
2250 ifh,
2251 dfh,
2251 dfh,
2252 alwayscache=False,
2252 alwayscache=False,
2253 deltacomputer=None,
2253 deltacomputer=None,
2254 ):
2254 ):
2255 """internal function to add revisions to the log
2255 """internal function to add revisions to the log
2256
2256
2257 see addrevision for argument descriptions.
2257 see addrevision for argument descriptions.
2258
2258
2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260
2260
2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 be used.
2262 be used.
2263
2263
2264 invariants:
2264 invariants:
2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 if both are set, they must correspond to each other.
2266 if both are set, they must correspond to each other.
2267 """
2267 """
2268 if node == nullid:
2268 if node == nullid:
2269 raise error.RevlogError(
2269 raise error.RevlogError(
2270 _(b"%s: attempt to add null revision") % self.indexfile
2270 _(b"%s: attempt to add null revision") % self.indexfile
2271 )
2271 )
2272 if node == wdirid or node in wdirfilenodeids:
2272 if node == wdirid or node in wdirfilenodeids:
2273 raise error.RevlogError(
2273 raise error.RevlogError(
2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 )
2275 )
2276
2276
2277 if self._inline:
2277 if self._inline:
2278 fh = ifh
2278 fh = ifh
2279 else:
2279 else:
2280 fh = dfh
2280 fh = dfh
2281
2281
2282 btext = [rawtext]
2282 btext = [rawtext]
2283
2283
2284 curr = len(self)
2284 curr = len(self)
2285 prev = curr - 1
2285 prev = curr - 1
2286 offset = self.end(prev)
2286 offset = self.end(prev)
2287 p1r, p2r = self.rev(p1), self.rev(p2)
2287 p1r, p2r = self.rev(p1), self.rev(p2)
2288
2288
2289 # full versions are inserted when the needed deltas
2289 # full versions are inserted when the needed deltas
2290 # become comparable to the uncompressed text
2290 # become comparable to the uncompressed text
2291 if rawtext is None:
2291 if rawtext is None:
2292 # need rawtext size, before changed by flag processors, which is
2292 # need rawtext size, before changed by flag processors, which is
2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 # logic that might remove metadata size.
2294 # logic that might remove metadata size.
2295 textlen = mdiff.patchedsize(
2295 textlen = mdiff.patchedsize(
2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 )
2297 )
2298 else:
2298 else:
2299 textlen = len(rawtext)
2299 textlen = len(rawtext)
2300
2300
2301 if deltacomputer is None:
2301 if deltacomputer is None:
2302 deltacomputer = deltautil.deltacomputer(self)
2302 deltacomputer = deltautil.deltacomputer(self)
2303
2303
2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305
2305
2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307
2307
2308 e = (
2308 e = (
2309 offset_type(offset, flags),
2309 offset_type(offset, flags),
2310 deltainfo.deltalen,
2310 deltainfo.deltalen,
2311 textlen,
2311 textlen,
2312 deltainfo.base,
2312 deltainfo.base,
2313 link,
2313 link,
2314 p1r,
2314 p1r,
2315 p2r,
2315 p2r,
2316 node,
2316 node,
2317 )
2317 )
2318 self.index.append(e)
2318 self.index.append(e)
2319
2319
2320 entry = self._io.packentry(e, self.node, self.version, curr)
2320 entry = self._io.packentry(e, self.node, self.version, curr)
2321 self._writeentry(
2321 self._writeentry(
2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 )
2323 )
2324
2324
2325 rawtext = btext[0]
2325 rawtext = btext[0]
2326
2326
2327 if alwayscache and rawtext is None:
2327 if alwayscache and rawtext is None:
2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2329
2329
2330 if type(rawtext) == bytes: # only accept immutable objects
2330 if type(rawtext) == bytes: # only accept immutable objects
2331 self._revisioncache = (node, curr, rawtext)
2331 self._revisioncache = (node, curr, rawtext)
2332 self._chainbasecache[curr] = deltainfo.chainbase
2332 self._chainbasecache[curr] = deltainfo.chainbase
2333 return curr
2333 return curr
2334
2334
2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 # Files opened in a+ mode have inconsistent behavior on various
2336 # Files opened in a+ mode have inconsistent behavior on various
2337 # platforms. Windows requires that a file positioning call be made
2337 # platforms. Windows requires that a file positioning call be made
2338 # when the file handle transitions between reads and writes. See
2338 # when the file handle transitions between reads and writes. See
2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 # platforms, Python or the platform itself can be buggy. Some versions
2340 # platforms, Python or the platform itself can be buggy. Some versions
2341 # of Solaris have been observed to not append at the end of the file
2341 # of Solaris have been observed to not append at the end of the file
2342 # if the file was seeked to before the end. See issue4943 for more.
2342 # if the file was seeked to before the end. See issue4943 for more.
2343 #
2343 #
2344 # We work around this issue by inserting a seek() before writing.
2344 # We work around this issue by inserting a seek() before writing.
2345 # Note: This is likely not necessary on Python 3. However, because
2345 # Note: This is likely not necessary on Python 3. However, because
2346 # the file handle is reused for reads and may be seeked there, we need
2346 # the file handle is reused for reads and may be seeked there, we need
2347 # to be careful before changing this.
2347 # to be careful before changing this.
2348 ifh.seek(0, os.SEEK_END)
2348 ifh.seek(0, os.SEEK_END)
2349 if dfh:
2349 if dfh:
2350 dfh.seek(0, os.SEEK_END)
2350 dfh.seek(0, os.SEEK_END)
2351
2351
2352 curr = len(self) - 1
2352 curr = len(self) - 1
2353 if not self._inline:
2353 if not self._inline:
2354 transaction.add(self.datafile, offset)
2354 transaction.add(self.datafile, offset)
2355 transaction.add(self.indexfile, curr * len(entry))
2355 transaction.add(self.indexfile, curr * len(entry))
2356 if data[0]:
2356 if data[0]:
2357 dfh.write(data[0])
2357 dfh.write(data[0])
2358 dfh.write(data[1])
2358 dfh.write(data[1])
2359 ifh.write(entry)
2359 ifh.write(entry)
2360 else:
2360 else:
2361 offset += curr * self._io.size
2361 offset += curr * self._io.size
2362 transaction.add(self.indexfile, offset)
2362 transaction.add(self.indexfile, offset)
2363 ifh.write(entry)
2363 ifh.write(entry)
2364 ifh.write(data[0])
2364 ifh.write(data[0])
2365 ifh.write(data[1])
2365 ifh.write(data[1])
2366 self._enforceinlinesize(transaction, ifh)
2366 self._enforceinlinesize(transaction, ifh)
2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2368
2368
2369 def addgroup(
2369 def addgroup(
2370 self,
2370 self,
2371 deltas,
2371 deltas,
2372 linkmapper,
2372 linkmapper,
2373 transaction,
2373 transaction,
2374 alwayscache=False,
2374 alwayscache=False,
2375 addrevisioncb=None,
2375 addrevisioncb=None,
2376 duplicaterevisioncb=None,
2376 duplicaterevisioncb=None,
2377 ):
2377 ):
2378 """
2378 """
2379 add a delta group
2379 add a delta group
2380
2380
2381 given a set of deltas, add them to the revision log. the
2381 given a set of deltas, add them to the revision log. the
2382 first delta is against its parent, which should be in our
2382 first delta is against its parent, which should be in our
2383 log, the rest are against the previous delta.
2383 log, the rest are against the previous delta.
2384
2384
2385 If ``addrevisioncb`` is defined, it will be called with arguments of
2385 If ``addrevisioncb`` is defined, it will be called with arguments of
2386 this revlog and the node that was added.
2386 this revlog and the node that was added.
2387 """
2387 """
2388
2388
2389 if self._writinghandles:
2389 if self._writinghandles:
2390 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2390 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2391
2391
2392 r = len(self)
2392 r = len(self)
2393 end = 0
2393 end = 0
2394 if r:
2394 if r:
2395 end = self.end(r - 1)
2395 end = self.end(r - 1)
2396 ifh = self._indexfp(b"a+")
2396 ifh = self._indexfp(b"a+")
2397 isize = r * self._io.size
2397 isize = r * self._io.size
2398 if self._inline:
2398 if self._inline:
2399 transaction.add(self.indexfile, end + isize)
2399 transaction.add(self.indexfile, end + isize)
2400 dfh = None
2400 dfh = None
2401 else:
2401 else:
2402 transaction.add(self.indexfile, isize)
2402 transaction.add(self.indexfile, isize)
2403 transaction.add(self.datafile, end)
2403 transaction.add(self.datafile, end)
2404 dfh = self._datafp(b"a+")
2404 dfh = self._datafp(b"a+")
2405
2405
2406 def flush():
2406 def flush():
2407 if dfh:
2407 if dfh:
2408 dfh.flush()
2408 dfh.flush()
2409 ifh.flush()
2409 ifh.flush()
2410
2410
2411 self._writinghandles = (ifh, dfh)
2411 self._writinghandles = (ifh, dfh)
2412 empty = True
2412 empty = True
2413
2413
2414 try:
2414 try:
2415 deltacomputer = deltautil.deltacomputer(self)
2415 deltacomputer = deltautil.deltacomputer(self)
2416 # loop through our set of deltas
2416 # loop through our set of deltas
2417 for data in deltas:
2417 for data in deltas:
2418 node, p1, p2, linknode, deltabase, delta, flags = data
2418 node, p1, p2, linknode, deltabase, delta, flags = data
2419 link = linkmapper(linknode)
2419 link = linkmapper(linknode)
2420 flags = flags or REVIDX_DEFAULT_FLAGS
2420 flags = flags or REVIDX_DEFAULT_FLAGS
2421
2421
2422 if self.index.has_node(node):
2422 if self.index.has_node(node):
2423 # this can happen if two branches make the same change
2423 # this can happen if two branches make the same change
2424 self._nodeduplicatecallback(transaction, node)
2424 self._nodeduplicatecallback(transaction, node)
2425 if duplicaterevisioncb:
2425 if duplicaterevisioncb:
2426 duplicaterevisioncb(self, node)
2426 duplicaterevisioncb(self, node)
2427 empty = False
2427 empty = False
2428 continue
2428 continue
2429
2429
2430 for p in (p1, p2):
2430 for p in (p1, p2):
2431 if not self.index.has_node(p):
2431 if not self.index.has_node(p):
2432 raise error.LookupError(
2432 raise error.LookupError(
2433 p, self.indexfile, _(b'unknown parent')
2433 p, self.indexfile, _(b'unknown parent')
2434 )
2434 )
2435
2435
2436 if not self.index.has_node(deltabase):
2436 if not self.index.has_node(deltabase):
2437 raise error.LookupError(
2437 raise error.LookupError(
2438 deltabase, self.indexfile, _(b'unknown delta base')
2438 deltabase, self.indexfile, _(b'unknown delta base')
2439 )
2439 )
2440
2440
2441 baserev = self.rev(deltabase)
2441 baserev = self.rev(deltabase)
2442
2442
2443 if baserev != nullrev and self.iscensored(baserev):
2443 if baserev != nullrev and self.iscensored(baserev):
2444 # if base is censored, delta must be full replacement in a
2444 # if base is censored, delta must be full replacement in a
2445 # single patch operation
2445 # single patch operation
2446 hlen = struct.calcsize(b">lll")
2446 hlen = struct.calcsize(b">lll")
2447 oldlen = self.rawsize(baserev)
2447 oldlen = self.rawsize(baserev)
2448 newlen = len(delta) - hlen
2448 newlen = len(delta) - hlen
2449 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2449 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2450 raise error.CensoredBaseError(
2450 raise error.CensoredBaseError(
2451 self.indexfile, self.node(baserev)
2451 self.indexfile, self.node(baserev)
2452 )
2452 )
2453
2453
2454 if not flags and self._peek_iscensored(baserev, delta, flush):
2454 if not flags and self._peek_iscensored(baserev, delta, flush):
2455 flags |= REVIDX_ISCENSORED
2455 flags |= REVIDX_ISCENSORED
2456
2456
2457 # We assume consumers of addrevisioncb will want to retrieve
2457 # We assume consumers of addrevisioncb will want to retrieve
2458 # the added revision, which will require a call to
2458 # the added revision, which will require a call to
2459 # revision(). revision() will fast path if there is a cache
2459 # revision(). revision() will fast path if there is a cache
2460 # hit. So, we tell _addrevision() to always cache in this case.
2460 # hit. So, we tell _addrevision() to always cache in this case.
2461 # We're only using addgroup() in the context of changegroup
2461 # We're only using addgroup() in the context of changegroup
2462 # generation so the revision data can always be handled as raw
2462 # generation so the revision data can always be handled as raw
2463 # by the flagprocessor.
2463 # by the flagprocessor.
2464 self._addrevision(
2464 self._addrevision(
2465 node,
2465 node,
2466 None,
2466 None,
2467 transaction,
2467 transaction,
2468 link,
2468 link,
2469 p1,
2469 p1,
2470 p2,
2470 p2,
2471 flags,
2471 flags,
2472 (baserev, delta),
2472 (baserev, delta),
2473 ifh,
2473 ifh,
2474 dfh,
2474 dfh,
2475 alwayscache=alwayscache,
2475 alwayscache=alwayscache,
2476 deltacomputer=deltacomputer,
2476 deltacomputer=deltacomputer,
2477 )
2477 )
2478
2478
2479 if addrevisioncb:
2479 if addrevisioncb:
2480 addrevisioncb(self, node)
2480 addrevisioncb(self, node)
2481 empty = False
2481 empty = False
2482
2482
2483 if not dfh and not self._inline:
2483 if not dfh and not self._inline:
2484 # addrevision switched from inline to conventional
2484 # addrevision switched from inline to conventional
2485 # reopen the index
2485 # reopen the index
2486 ifh.close()
2486 ifh.close()
2487 dfh = self._datafp(b"a+")
2487 dfh = self._datafp(b"a+")
2488 ifh = self._indexfp(b"a+")
2488 ifh = self._indexfp(b"a+")
2489 self._writinghandles = (ifh, dfh)
2489 self._writinghandles = (ifh, dfh)
2490 finally:
2490 finally:
2491 self._writinghandles = None
2491 self._writinghandles = None
2492
2492
2493 if dfh:
2493 if dfh:
2494 dfh.close()
2494 dfh.close()
2495 ifh.close()
2495 ifh.close()
2496 return not empty
2496 return not empty
2497
2497
2498 def iscensored(self, rev):
2498 def iscensored(self, rev):
2499 """Check if a file revision is censored."""
2499 """Check if a file revision is censored."""
2500 if not self._censorable:
2500 if not self._censorable:
2501 return False
2501 return False
2502
2502
2503 return self.flags(rev) & REVIDX_ISCENSORED
2503 return self.flags(rev) & REVIDX_ISCENSORED
2504
2504
2505 def _peek_iscensored(self, baserev, delta, flush):
2505 def _peek_iscensored(self, baserev, delta, flush):
2506 """Quickly check if a delta produces a censored revision."""
2506 """Quickly check if a delta produces a censored revision."""
2507 if not self._censorable:
2507 if not self._censorable:
2508 return False
2508 return False
2509
2509
2510 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2510 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2511
2511
2512 def getstrippoint(self, minlink):
2512 def getstrippoint(self, minlink):
2513 """find the minimum rev that must be stripped to strip the linkrev
2513 """find the minimum rev that must be stripped to strip the linkrev
2514
2514
2515 Returns a tuple containing the minimum rev and a set of all revs that
2515 Returns a tuple containing the minimum rev and a set of all revs that
2516 have linkrevs that will be broken by this strip.
2516 have linkrevs that will be broken by this strip.
2517 """
2517 """
2518 return storageutil.resolvestripinfo(
2518 return storageutil.resolvestripinfo(
2519 minlink,
2519 minlink,
2520 len(self) - 1,
2520 len(self) - 1,
2521 self.headrevs(),
2521 self.headrevs(),
2522 self.linkrev,
2522 self.linkrev,
2523 self.parentrevs,
2523 self.parentrevs,
2524 )
2524 )
2525
2525
2526 def strip(self, minlink, transaction):
2526 def strip(self, minlink, transaction):
2527 """truncate the revlog on the first revision with a linkrev >= minlink
2527 """truncate the revlog on the first revision with a linkrev >= minlink
2528
2528
2529 This function is called when we're stripping revision minlink and
2529 This function is called when we're stripping revision minlink and
2530 its descendants from the repository.
2530 its descendants from the repository.
2531
2531
2532 We have to remove all revisions with linkrev >= minlink, because
2532 We have to remove all revisions with linkrev >= minlink, because
2533 the equivalent changelog revisions will be renumbered after the
2533 the equivalent changelog revisions will be renumbered after the
2534 strip.
2534 strip.
2535
2535
2536 So we truncate the revlog on the first of these revisions, and
2536 So we truncate the revlog on the first of these revisions, and
2537 trust that the caller has saved the revisions that shouldn't be
2537 trust that the caller has saved the revisions that shouldn't be
2538 removed and that it'll re-add them after this truncation.
2538 removed and that it'll re-add them after this truncation.
2539 """
2539 """
2540 if len(self) == 0:
2540 if len(self) == 0:
2541 return
2541 return
2542
2542
2543 rev, _ = self.getstrippoint(minlink)
2543 rev, _ = self.getstrippoint(minlink)
2544 if rev == len(self):
2544 if rev == len(self):
2545 return
2545 return
2546
2546
2547 # first truncate the files on disk
2547 # first truncate the files on disk
2548 end = self.start(rev)
2548 end = self.start(rev)
2549 if not self._inline:
2549 if not self._inline:
2550 transaction.add(self.datafile, end)
2550 transaction.add(self.datafile, end)
2551 end = rev * self._io.size
2551 end = rev * self._io.size
2552 else:
2552 else:
2553 end += rev * self._io.size
2553 end += rev * self._io.size
2554
2554
2555 transaction.add(self.indexfile, end)
2555 transaction.add(self.indexfile, end)
2556
2556
2557 # then reset internal state in memory to forget those revisions
2557 # then reset internal state in memory to forget those revisions
2558 self._revisioncache = None
2558 self._revisioncache = None
2559 self._chaininfocache = util.lrucachedict(500)
2559 self._chaininfocache = util.lrucachedict(500)
2560 self._chunkclear()
2560 self._chunkclear()
2561
2561
2562 del self.index[rev:-1]
2562 del self.index[rev:-1]
2563
2563
2564 def checksize(self):
2564 def checksize(self):
2565 """Check size of index and data files
2565 """Check size of index and data files
2566
2566
2567 return a (dd, di) tuple.
2567 return a (dd, di) tuple.
2568 - dd: extra bytes for the "data" file
2568 - dd: extra bytes for the "data" file
2569 - di: extra bytes for the "index" file
2569 - di: extra bytes for the "index" file
2570
2570
2571 A healthy revlog will return (0, 0).
2571 A healthy revlog will return (0, 0).
2572 """
2572 """
2573 expected = 0
2573 expected = 0
2574 if len(self):
2574 if len(self):
2575 expected = max(0, self.end(len(self) - 1))
2575 expected = max(0, self.end(len(self) - 1))
2576
2576
2577 try:
2577 try:
2578 with self._datafp() as f:
2578 with self._datafp() as f:
2579 f.seek(0, io.SEEK_END)
2579 f.seek(0, io.SEEK_END)
2580 actual = f.tell()
2580 actual = f.tell()
2581 dd = actual - expected
2581 dd = actual - expected
2582 except IOError as inst:
2582 except IOError as inst:
2583 if inst.errno != errno.ENOENT:
2583 if inst.errno != errno.ENOENT:
2584 raise
2584 raise
2585 dd = 0
2585 dd = 0
2586
2586
2587 try:
2587 try:
2588 f = self.opener(self.indexfile)
2588 f = self.opener(self.indexfile)
2589 f.seek(0, io.SEEK_END)
2589 f.seek(0, io.SEEK_END)
2590 actual = f.tell()
2590 actual = f.tell()
2591 f.close()
2591 f.close()
2592 s = self._io.size
2592 s = self._io.size
2593 i = max(0, actual // s)
2593 i = max(0, actual // s)
2594 di = actual - (i * s)
2594 di = actual - (i * s)
2595 if self._inline:
2595 if self._inline:
2596 databytes = 0
2596 databytes = 0
2597 for r in self:
2597 for r in self:
2598 databytes += max(0, self.length(r))
2598 databytes += max(0, self.length(r))
2599 dd = 0
2599 dd = 0
2600 di = actual - len(self) * s - databytes
2600 di = actual - len(self) * s - databytes
2601 except IOError as inst:
2601 except IOError as inst:
2602 if inst.errno != errno.ENOENT:
2602 if inst.errno != errno.ENOENT:
2603 raise
2603 raise
2604 di = 0
2604 di = 0
2605
2605
2606 return (dd, di)
2606 return (dd, di)
2607
2607
2608 def files(self):
2608 def files(self):
2609 res = [self.indexfile]
2609 res = [self.indexfile]
2610 if not self._inline:
2610 if not self._inline:
2611 res.append(self.datafile)
2611 res.append(self.datafile)
2612 return res
2612 return res
2613
2613
2614 def emitrevisions(
2614 def emitrevisions(
2615 self,
2615 self,
2616 nodes,
2616 nodes,
2617 nodesorder=None,
2617 nodesorder=None,
2618 revisiondata=False,
2618 revisiondata=False,
2619 assumehaveparentrevisions=False,
2619 assumehaveparentrevisions=False,
2620 deltamode=repository.CG_DELTAMODE_STD,
2620 deltamode=repository.CG_DELTAMODE_STD,
2621 ):
2621 ):
2622 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2622 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2623 raise error.ProgrammingError(
2623 raise error.ProgrammingError(
2624 b'unhandled value for nodesorder: %s' % nodesorder
2624 b'unhandled value for nodesorder: %s' % nodesorder
2625 )
2625 )
2626
2626
2627 if nodesorder is None and not self._generaldelta:
2627 if nodesorder is None and not self._generaldelta:
2628 nodesorder = b'storage'
2628 nodesorder = b'storage'
2629
2629
2630 if (
2630 if (
2631 not self._storedeltachains
2631 not self._storedeltachains
2632 and deltamode != repository.CG_DELTAMODE_PREV
2632 and deltamode != repository.CG_DELTAMODE_PREV
2633 ):
2633 ):
2634 deltamode = repository.CG_DELTAMODE_FULL
2634 deltamode = repository.CG_DELTAMODE_FULL
2635
2635
2636 return storageutil.emitrevisions(
2636 return storageutil.emitrevisions(
2637 self,
2637 self,
2638 nodes,
2638 nodes,
2639 nodesorder,
2639 nodesorder,
2640 revlogrevisiondelta,
2640 revlogrevisiondelta,
2641 deltaparentfn=self.deltaparent,
2641 deltaparentfn=self.deltaparent,
2642 candeltafn=self.candelta,
2642 candeltafn=self.candelta,
2643 rawsizefn=self.rawsize,
2643 rawsizefn=self.rawsize,
2644 revdifffn=self.revdiff,
2644 revdifffn=self.revdiff,
2645 flagsfn=self.flags,
2645 flagsfn=self.flags,
2646 deltamode=deltamode,
2646 deltamode=deltamode,
2647 revisiondata=revisiondata,
2647 revisiondata=revisiondata,
2648 assumehaveparentrevisions=assumehaveparentrevisions,
2648 assumehaveparentrevisions=assumehaveparentrevisions,
2649 )
2649 )
2650
2650
2651 DELTAREUSEALWAYS = b'always'
2651 DELTAREUSEALWAYS = b'always'
2652 DELTAREUSESAMEREVS = b'samerevs'
2652 DELTAREUSESAMEREVS = b'samerevs'
2653 DELTAREUSENEVER = b'never'
2653 DELTAREUSENEVER = b'never'
2654
2654
2655 DELTAREUSEFULLADD = b'fulladd'
2655 DELTAREUSEFULLADD = b'fulladd'
2656
2656
2657 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2657 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2658
2658
2659 def clone(
2659 def clone(
2660 self,
2660 self,
2661 tr,
2661 tr,
2662 destrevlog,
2662 destrevlog,
2663 addrevisioncb=None,
2663 addrevisioncb=None,
2664 deltareuse=DELTAREUSESAMEREVS,
2664 deltareuse=DELTAREUSESAMEREVS,
2665 forcedeltabothparents=None,
2665 forcedeltabothparents=None,
2666 sidedatacompanion=None,
2666 sidedatacompanion=None,
2667 ):
2667 ):
2668 """Copy this revlog to another, possibly with format changes.
2668 """Copy this revlog to another, possibly with format changes.
2669
2669
2670 The destination revlog will contain the same revisions and nodes.
2670 The destination revlog will contain the same revisions and nodes.
2671 However, it may not be bit-for-bit identical due to e.g. delta encoding
2671 However, it may not be bit-for-bit identical due to e.g. delta encoding
2672 differences.
2672 differences.
2673
2673
2674 The ``deltareuse`` argument control how deltas from the existing revlog
2674 The ``deltareuse`` argument control how deltas from the existing revlog
2675 are preserved in the destination revlog. The argument can have the
2675 are preserved in the destination revlog. The argument can have the
2676 following values:
2676 following values:
2677
2677
2678 DELTAREUSEALWAYS
2678 DELTAREUSEALWAYS
2679 Deltas will always be reused (if possible), even if the destination
2679 Deltas will always be reused (if possible), even if the destination
2680 revlog would not select the same revisions for the delta. This is the
2680 revlog would not select the same revisions for the delta. This is the
2681 fastest mode of operation.
2681 fastest mode of operation.
2682 DELTAREUSESAMEREVS
2682 DELTAREUSESAMEREVS
2683 Deltas will be reused if the destination revlog would pick the same
2683 Deltas will be reused if the destination revlog would pick the same
2684 revisions for the delta. This mode strikes a balance between speed
2684 revisions for the delta. This mode strikes a balance between speed
2685 and optimization.
2685 and optimization.
2686 DELTAREUSENEVER
2686 DELTAREUSENEVER
2687 Deltas will never be reused. This is the slowest mode of execution.
2687 Deltas will never be reused. This is the slowest mode of execution.
2688 This mode can be used to recompute deltas (e.g. if the diff/delta
2688 This mode can be used to recompute deltas (e.g. if the diff/delta
2689 algorithm changes).
2689 algorithm changes).
2690 DELTAREUSEFULLADD
2690 DELTAREUSEFULLADD
2691 Revision will be re-added as if their were new content. This is
2691 Revision will be re-added as if their were new content. This is
2692 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2692 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2693 eg: large file detection and handling.
2693 eg: large file detection and handling.
2694
2694
2695 Delta computation can be slow, so the choice of delta reuse policy can
2695 Delta computation can be slow, so the choice of delta reuse policy can
2696 significantly affect run time.
2696 significantly affect run time.
2697
2697
2698 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2698 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2699 two extremes. Deltas will be reused if they are appropriate. But if the
2699 two extremes. Deltas will be reused if they are appropriate. But if the
2700 delta could choose a better revision, it will do so. This means if you
2700 delta could choose a better revision, it will do so. This means if you
2701 are converting a non-generaldelta revlog to a generaldelta revlog,
2701 are converting a non-generaldelta revlog to a generaldelta revlog,
2702 deltas will be recomputed if the delta's parent isn't a parent of the
2702 deltas will be recomputed if the delta's parent isn't a parent of the
2703 revision.
2703 revision.
2704
2704
2705 In addition to the delta policy, the ``forcedeltabothparents``
2705 In addition to the delta policy, the ``forcedeltabothparents``
2706 argument controls whether to force compute deltas against both parents
2706 argument controls whether to force compute deltas against both parents
2707 for merges. By default, the current default is used.
2707 for merges. By default, the current default is used.
2708
2708
2709 If not None, the `sidedatacompanion` is callable that accept two
2709 If not None, the `sidedatacompanion` is callable that accept two
2710 arguments:
2710 arguments:
2711
2711
2712 (srcrevlog, rev)
2712 (srcrevlog, rev)
2713
2713
2714 and return a quintet that control changes to sidedata content from the
2714 and return a quintet that control changes to sidedata content from the
2715 old revision to the new clone result:
2715 old revision to the new clone result:
2716
2716
2717 (dropall, filterout, update, new_flags, dropped_flags)
2717 (dropall, filterout, update, new_flags, dropped_flags)
2718
2718
2719 * if `dropall` is True, all sidedata should be dropped
2719 * if `dropall` is True, all sidedata should be dropped
2720 * `filterout` is a set of sidedata keys that should be dropped
2720 * `filterout` is a set of sidedata keys that should be dropped
2721 * `update` is a mapping of additionnal/new key -> value
2721 * `update` is a mapping of additionnal/new key -> value
2722 * new_flags is a bitfields of new flags that the revision should get
2722 * new_flags is a bitfields of new flags that the revision should get
2723 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2723 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2724 """
2724 """
2725 if deltareuse not in self.DELTAREUSEALL:
2725 if deltareuse not in self.DELTAREUSEALL:
2726 raise ValueError(
2726 raise ValueError(
2727 _(b'value for deltareuse invalid: %s') % deltareuse
2727 _(b'value for deltareuse invalid: %s') % deltareuse
2728 )
2728 )
2729
2729
2730 if len(destrevlog):
2730 if len(destrevlog):
2731 raise ValueError(_(b'destination revlog is not empty'))
2731 raise ValueError(_(b'destination revlog is not empty'))
2732
2732
2733 if getattr(self, 'filteredrevs', None):
2733 if getattr(self, 'filteredrevs', None):
2734 raise ValueError(_(b'source revlog has filtered revisions'))
2734 raise ValueError(_(b'source revlog has filtered revisions'))
2735 if getattr(destrevlog, 'filteredrevs', None):
2735 if getattr(destrevlog, 'filteredrevs', None):
2736 raise ValueError(_(b'destination revlog has filtered revisions'))
2736 raise ValueError(_(b'destination revlog has filtered revisions'))
2737
2737
2738 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2738 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2739 # if possible.
2739 # if possible.
2740 oldlazydelta = destrevlog._lazydelta
2740 oldlazydelta = destrevlog._lazydelta
2741 oldlazydeltabase = destrevlog._lazydeltabase
2741 oldlazydeltabase = destrevlog._lazydeltabase
2742 oldamd = destrevlog._deltabothparents
2742 oldamd = destrevlog._deltabothparents
2743
2743
2744 try:
2744 try:
2745 if deltareuse == self.DELTAREUSEALWAYS:
2745 if deltareuse == self.DELTAREUSEALWAYS:
2746 destrevlog._lazydeltabase = True
2746 destrevlog._lazydeltabase = True
2747 destrevlog._lazydelta = True
2747 destrevlog._lazydelta = True
2748 elif deltareuse == self.DELTAREUSESAMEREVS:
2748 elif deltareuse == self.DELTAREUSESAMEREVS:
2749 destrevlog._lazydeltabase = False
2749 destrevlog._lazydeltabase = False
2750 destrevlog._lazydelta = True
2750 destrevlog._lazydelta = True
2751 elif deltareuse == self.DELTAREUSENEVER:
2751 elif deltareuse == self.DELTAREUSENEVER:
2752 destrevlog._lazydeltabase = False
2752 destrevlog._lazydeltabase = False
2753 destrevlog._lazydelta = False
2753 destrevlog._lazydelta = False
2754
2754
2755 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2755 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2756
2756
2757 self._clone(
2757 self._clone(
2758 tr,
2758 tr,
2759 destrevlog,
2759 destrevlog,
2760 addrevisioncb,
2760 addrevisioncb,
2761 deltareuse,
2761 deltareuse,
2762 forcedeltabothparents,
2762 forcedeltabothparents,
2763 sidedatacompanion,
2763 sidedatacompanion,
2764 )
2764 )
2765
2765
2766 finally:
2766 finally:
2767 destrevlog._lazydelta = oldlazydelta
2767 destrevlog._lazydelta = oldlazydelta
2768 destrevlog._lazydeltabase = oldlazydeltabase
2768 destrevlog._lazydeltabase = oldlazydeltabase
2769 destrevlog._deltabothparents = oldamd
2769 destrevlog._deltabothparents = oldamd
2770
2770
2771 def _clone(
2771 def _clone(
2772 self,
2772 self,
2773 tr,
2773 tr,
2774 destrevlog,
2774 destrevlog,
2775 addrevisioncb,
2775 addrevisioncb,
2776 deltareuse,
2776 deltareuse,
2777 forcedeltabothparents,
2777 forcedeltabothparents,
2778 sidedatacompanion,
2778 sidedatacompanion,
2779 ):
2779 ):
2780 """perform the core duty of `revlog.clone` after parameter processing"""
2780 """perform the core duty of `revlog.clone` after parameter processing"""
2781 deltacomputer = deltautil.deltacomputer(destrevlog)
2781 deltacomputer = deltautil.deltacomputer(destrevlog)
2782 index = self.index
2782 index = self.index
2783 for rev in self:
2783 for rev in self:
2784 entry = index[rev]
2784 entry = index[rev]
2785
2785
2786 # Some classes override linkrev to take filtered revs into
2786 # Some classes override linkrev to take filtered revs into
2787 # account. Use raw entry from index.
2787 # account. Use raw entry from index.
2788 flags = entry[0] & 0xFFFF
2788 flags = entry[0] & 0xFFFF
2789 linkrev = entry[4]
2789 linkrev = entry[4]
2790 p1 = index[entry[5]][7]
2790 p1 = index[entry[5]][7]
2791 p2 = index[entry[6]][7]
2791 p2 = index[entry[6]][7]
2792 node = entry[7]
2792 node = entry[7]
2793
2793
2794 sidedataactions = (False, [], {}, 0, 0)
2794 sidedataactions = (False, [], {}, 0, 0)
2795 if sidedatacompanion is not None:
2795 if sidedatacompanion is not None:
2796 sidedataactions = sidedatacompanion(self, rev)
2796 sidedataactions = sidedatacompanion(self, rev)
2797
2797
2798 # (Possibly) reuse the delta from the revlog if allowed and
2798 # (Possibly) reuse the delta from the revlog if allowed and
2799 # the revlog chunk is a delta.
2799 # the revlog chunk is a delta.
2800 cachedelta = None
2800 cachedelta = None
2801 rawtext = None
2801 rawtext = None
2802 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2802 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2803 dropall = sidedataactions[0]
2803 dropall = sidedataactions[0]
2804 filterout = sidedataactions[1]
2804 filterout = sidedataactions[1]
2805 update = sidedataactions[2]
2805 update = sidedataactions[2]
2806 new_flags = sidedataactions[3]
2806 new_flags = sidedataactions[3]
2807 dropped_flags = sidedataactions[4]
2807 dropped_flags = sidedataactions[4]
2808 text, sidedata = self._revisiondata(rev)
2808 text, sidedata = self._revisiondata(rev)
2809 if dropall:
2809 if dropall:
2810 sidedata = {}
2810 sidedata = {}
2811 for key in filterout:
2811 for key in filterout:
2812 sidedata.pop(key, None)
2812 sidedata.pop(key, None)
2813 sidedata.update(update)
2813 sidedata.update(update)
2814 if not sidedata:
2814 if not sidedata:
2815 sidedata = None
2815 sidedata = None
2816
2816
2817 flags |= new_flags
2817 flags |= new_flags
2818 flags &= ~dropped_flags
2818 flags &= ~dropped_flags
2819
2819
2820 destrevlog.addrevision(
2820 destrevlog.addrevision(
2821 text,
2821 text,
2822 tr,
2822 tr,
2823 linkrev,
2823 linkrev,
2824 p1,
2824 p1,
2825 p2,
2825 p2,
2826 cachedelta=cachedelta,
2826 cachedelta=cachedelta,
2827 node=node,
2827 node=node,
2828 flags=flags,
2828 flags=flags,
2829 deltacomputer=deltacomputer,
2829 deltacomputer=deltacomputer,
2830 sidedata=sidedata,
2830 sidedata=sidedata,
2831 )
2831 )
2832 else:
2832 else:
2833 if destrevlog._lazydelta:
2833 if destrevlog._lazydelta:
2834 dp = self.deltaparent(rev)
2834 dp = self.deltaparent(rev)
2835 if dp != nullrev:
2835 if dp != nullrev:
2836 cachedelta = (dp, bytes(self._chunk(rev)))
2836 cachedelta = (dp, bytes(self._chunk(rev)))
2837
2837
2838 if not cachedelta:
2838 if not cachedelta:
2839 rawtext = self.rawdata(rev)
2839 rawtext = self.rawdata(rev)
2840
2840
2841 ifh = destrevlog.opener(
2841 ifh = destrevlog.opener(
2842 destrevlog.indexfile, b'a+', checkambig=False
2842 destrevlog.indexfile, b'a+', checkambig=False
2843 )
2843 )
2844 dfh = None
2844 dfh = None
2845 if not destrevlog._inline:
2845 if not destrevlog._inline:
2846 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2846 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2847 try:
2847 try:
2848 destrevlog._addrevision(
2848 destrevlog._addrevision(
2849 node,
2849 node,
2850 rawtext,
2850 rawtext,
2851 tr,
2851 tr,
2852 linkrev,
2852 linkrev,
2853 p1,
2853 p1,
2854 p2,
2854 p2,
2855 flags,
2855 flags,
2856 cachedelta,
2856 cachedelta,
2857 ifh,
2857 ifh,
2858 dfh,
2858 dfh,
2859 deltacomputer=deltacomputer,
2859 deltacomputer=deltacomputer,
2860 )
2860 )
2861 finally:
2861 finally:
2862 if dfh:
2862 if dfh:
2863 dfh.close()
2863 dfh.close()
2864 ifh.close()
2864 ifh.close()
2865
2865
2866 if addrevisioncb:
2866 if addrevisioncb:
2867 addrevisioncb(self, rev, node)
2867 addrevisioncb(self, rev, node)
2868
2868
2869 def censorrevision(self, tr, censornode, tombstone=b''):
2869 def censorrevision(self, tr, censornode, tombstone=b''):
2870 if (self.version & 0xFFFF) == REVLOGV0:
2870 if (self.version & 0xFFFF) == REVLOGV0:
2871 raise error.RevlogError(
2871 raise error.RevlogError(
2872 _(b'cannot censor with version %d revlogs') % self.version
2872 _(b'cannot censor with version %d revlogs') % self.version
2873 )
2873 )
2874
2874
2875 censorrev = self.rev(censornode)
2875 censorrev = self.rev(censornode)
2876 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2876 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2877
2877
2878 if len(tombstone) > self.rawsize(censorrev):
2878 if len(tombstone) > self.rawsize(censorrev):
2879 raise error.Abort(
2879 raise error.Abort(
2880 _(b'censor tombstone must be no longer than censored data')
2880 _(b'censor tombstone must be no longer than censored data')
2881 )
2881 )
2882
2882
2883 # Rewriting the revlog in place is hard. Our strategy for censoring is
2883 # Rewriting the revlog in place is hard. Our strategy for censoring is
2884 # to create a new revlog, copy all revisions to it, then replace the
2884 # to create a new revlog, copy all revisions to it, then replace the
2885 # revlogs on transaction close.
2885 # revlogs on transaction close.
2886
2886
2887 newindexfile = self.indexfile + b'.tmpcensored'
2887 newindexfile = self.indexfile + b'.tmpcensored'
2888 newdatafile = self.datafile + b'.tmpcensored'
2888 newdatafile = self.datafile + b'.tmpcensored'
2889
2889
2890 # This is a bit dangerous. We could easily have a mismatch of state.
2890 # This is a bit dangerous. We could easily have a mismatch of state.
2891 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2891 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2892 newrl.version = self.version
2892 newrl.version = self.version
2893 newrl._generaldelta = self._generaldelta
2893 newrl._generaldelta = self._generaldelta
2894 newrl._io = self._io
2894 newrl._io = self._io
2895
2895
2896 for rev in self.revs():
2896 for rev in self.revs():
2897 node = self.node(rev)
2897 node = self.node(rev)
2898 p1, p2 = self.parents(node)
2898 p1, p2 = self.parents(node)
2899
2899
2900 if rev == censorrev:
2900 if rev == censorrev:
2901 newrl.addrawrevision(
2901 newrl.addrawrevision(
2902 tombstone,
2902 tombstone,
2903 tr,
2903 tr,
2904 self.linkrev(censorrev),
2904 self.linkrev(censorrev),
2905 p1,
2905 p1,
2906 p2,
2906 p2,
2907 censornode,
2907 censornode,
2908 REVIDX_ISCENSORED,
2908 REVIDX_ISCENSORED,
2909 )
2909 )
2910
2910
2911 if newrl.deltaparent(rev) != nullrev:
2911 if newrl.deltaparent(rev) != nullrev:
2912 raise error.Abort(
2912 raise error.Abort(
2913 _(
2913 _(
2914 b'censored revision stored as delta; '
2914 b'censored revision stored as delta; '
2915 b'cannot censor'
2915 b'cannot censor'
2916 ),
2916 ),
2917 hint=_(
2917 hint=_(
2918 b'censoring of revlogs is not '
2918 b'censoring of revlogs is not '
2919 b'fully implemented; please report '
2919 b'fully implemented; please report '
2920 b'this bug'
2920 b'this bug'
2921 ),
2921 ),
2922 )
2922 )
2923 continue
2923 continue
2924
2924
2925 if self.iscensored(rev):
2925 if self.iscensored(rev):
2926 if self.deltaparent(rev) != nullrev:
2926 if self.deltaparent(rev) != nullrev:
2927 raise error.Abort(
2927 raise error.Abort(
2928 _(
2928 _(
2929 b'cannot censor due to censored '
2929 b'cannot censor due to censored '
2930 b'revision having delta stored'
2930 b'revision having delta stored'
2931 )
2931 )
2932 )
2932 )
2933 rawtext = self._chunk(rev)
2933 rawtext = self._chunk(rev)
2934 else:
2934 else:
2935 rawtext = self.rawdata(rev)
2935 rawtext = self.rawdata(rev)
2936
2936
2937 newrl.addrawrevision(
2937 newrl.addrawrevision(
2938 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2938 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2939 )
2939 )
2940
2940
2941 tr.addbackup(self.indexfile, location=b'store')
2941 tr.addbackup(self.indexfile, location=b'store')
2942 if not self._inline:
2942 if not self._inline:
2943 tr.addbackup(self.datafile, location=b'store')
2943 tr.addbackup(self.datafile, location=b'store')
2944
2944
2945 self.opener.rename(newrl.indexfile, self.indexfile)
2945 self.opener.rename(newrl.indexfile, self.indexfile)
2946 if not self._inline:
2946 if not self._inline:
2947 self.opener.rename(newrl.datafile, self.datafile)
2947 self.opener.rename(newrl.datafile, self.datafile)
2948
2948
2949 self.clearcaches()
2949 self.clearcaches()
2950 self._loadindex()
2950 self._loadindex()
2951
2951
2952 def verifyintegrity(self, state):
2952 def verifyintegrity(self, state):
2953 """Verifies the integrity of the revlog.
2953 """Verifies the integrity of the revlog.
2954
2954
2955 Yields ``revlogproblem`` instances describing problems that are
2955 Yields ``revlogproblem`` instances describing problems that are
2956 found.
2956 found.
2957 """
2957 """
2958 dd, di = self.checksize()
2958 dd, di = self.checksize()
2959 if dd:
2959 if dd:
2960 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2960 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2961 if di:
2961 if di:
2962 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2962 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2963
2963
2964 version = self.version & 0xFFFF
2964 version = self.version & 0xFFFF
2965
2965
2966 # The verifier tells us what version revlog we should be.
2966 # The verifier tells us what version revlog we should be.
2967 if version != state[b'expectedversion']:
2967 if version != state[b'expectedversion']:
2968 yield revlogproblem(
2968 yield revlogproblem(
2969 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2969 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2970 % (self.indexfile, version, state[b'expectedversion'])
2970 % (self.indexfile, version, state[b'expectedversion'])
2971 )
2971 )
2972
2972
2973 state[b'skipread'] = set()
2973 state[b'skipread'] = set()
2974 state[b'safe_renamed'] = set()
2974 state[b'safe_renamed'] = set()
2975
2975
2976 for rev in self:
2976 for rev in self:
2977 node = self.node(rev)
2977 node = self.node(rev)
2978
2978
2979 # Verify contents. 4 cases to care about:
2979 # Verify contents. 4 cases to care about:
2980 #
2980 #
2981 # common: the most common case
2981 # common: the most common case
2982 # rename: with a rename
2982 # rename: with a rename
2983 # meta: file content starts with b'\1\n', the metadata
2983 # meta: file content starts with b'\1\n', the metadata
2984 # header defined in filelog.py, but without a rename
2984 # header defined in filelog.py, but without a rename
2985 # ext: content stored externally
2985 # ext: content stored externally
2986 #
2986 #
2987 # More formally, their differences are shown below:
2987 # More formally, their differences are shown below:
2988 #
2988 #
2989 # | common | rename | meta | ext
2989 # | common | rename | meta | ext
2990 # -------------------------------------------------------
2990 # -------------------------------------------------------
2991 # flags() | 0 | 0 | 0 | not 0
2991 # flags() | 0 | 0 | 0 | not 0
2992 # renamed() | False | True | False | ?
2992 # renamed() | False | True | False | ?
2993 # rawtext[0:2]=='\1\n'| False | True | True | ?
2993 # rawtext[0:2]=='\1\n'| False | True | True | ?
2994 #
2994 #
2995 # "rawtext" means the raw text stored in revlog data, which
2995 # "rawtext" means the raw text stored in revlog data, which
2996 # could be retrieved by "rawdata(rev)". "text"
2996 # could be retrieved by "rawdata(rev)". "text"
2997 # mentioned below is "revision(rev)".
2997 # mentioned below is "revision(rev)".
2998 #
2998 #
2999 # There are 3 different lengths stored physically:
2999 # There are 3 different lengths stored physically:
3000 # 1. L1: rawsize, stored in revlog index
3000 # 1. L1: rawsize, stored in revlog index
3001 # 2. L2: len(rawtext), stored in revlog data
3001 # 2. L2: len(rawtext), stored in revlog data
3002 # 3. L3: len(text), stored in revlog data if flags==0, or
3002 # 3. L3: len(text), stored in revlog data if flags==0, or
3003 # possibly somewhere else if flags!=0
3003 # possibly somewhere else if flags!=0
3004 #
3004 #
3005 # L1 should be equal to L2. L3 could be different from them.
3005 # L1 should be equal to L2. L3 could be different from them.
3006 # "text" may or may not affect commit hash depending on flag
3006 # "text" may or may not affect commit hash depending on flag
3007 # processors (see flagutil.addflagprocessor).
3007 # processors (see flagutil.addflagprocessor).
3008 #
3008 #
3009 # | common | rename | meta | ext
3009 # | common | rename | meta | ext
3010 # -------------------------------------------------
3010 # -------------------------------------------------
3011 # rawsize() | L1 | L1 | L1 | L1
3011 # rawsize() | L1 | L1 | L1 | L1
3012 # size() | L1 | L2-LM | L1(*) | L1 (?)
3012 # size() | L1 | L2-LM | L1(*) | L1 (?)
3013 # len(rawtext) | L2 | L2 | L2 | L2
3013 # len(rawtext) | L2 | L2 | L2 | L2
3014 # len(text) | L2 | L2 | L2 | L3
3014 # len(text) | L2 | L2 | L2 | L3
3015 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3015 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3016 #
3016 #
3017 # LM: length of metadata, depending on rawtext
3017 # LM: length of metadata, depending on rawtext
3018 # (*): not ideal, see comment in filelog.size
3018 # (*): not ideal, see comment in filelog.size
3019 # (?): could be "- len(meta)" if the resolved content has
3019 # (?): could be "- len(meta)" if the resolved content has
3020 # rename metadata
3020 # rename metadata
3021 #
3021 #
3022 # Checks needed to be done:
3022 # Checks needed to be done:
3023 # 1. length check: L1 == L2, in all cases.
3023 # 1. length check: L1 == L2, in all cases.
3024 # 2. hash check: depending on flag processor, we may need to
3024 # 2. hash check: depending on flag processor, we may need to
3025 # use either "text" (external), or "rawtext" (in revlog).
3025 # use either "text" (external), or "rawtext" (in revlog).
3026
3026
3027 try:
3027 try:
3028 skipflags = state.get(b'skipflags', 0)
3028 skipflags = state.get(b'skipflags', 0)
3029 if skipflags:
3029 if skipflags:
3030 skipflags &= self.flags(rev)
3030 skipflags &= self.flags(rev)
3031
3031
3032 _verify_revision(self, skipflags, state, node)
3032 _verify_revision(self, skipflags, state, node)
3033
3033
3034 l1 = self.rawsize(rev)
3034 l1 = self.rawsize(rev)
3035 l2 = len(self.rawdata(node))
3035 l2 = len(self.rawdata(node))
3036
3036
3037 if l1 != l2:
3037 if l1 != l2:
3038 yield revlogproblem(
3038 yield revlogproblem(
3039 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3039 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3040 node=node,
3040 node=node,
3041 )
3041 )
3042
3042
3043 except error.CensoredNodeError:
3043 except error.CensoredNodeError:
3044 if state[b'erroroncensored']:
3044 if state[b'erroroncensored']:
3045 yield revlogproblem(
3045 yield revlogproblem(
3046 error=_(b'censored file data'), node=node
3046 error=_(b'censored file data'), node=node
3047 )
3047 )
3048 state[b'skipread'].add(node)
3048 state[b'skipread'].add(node)
3049 except Exception as e:
3049 except Exception as e:
3050 yield revlogproblem(
3050 yield revlogproblem(
3051 error=_(b'unpacking %s: %s')
3051 error=_(b'unpacking %s: %s')
3052 % (short(node), stringutil.forcebytestr(e)),
3052 % (short(node), stringutil.forcebytestr(e)),
3053 node=node,
3053 node=node,
3054 )
3054 )
3055 state[b'skipread'].add(node)
3055 state[b'skipread'].add(node)
3056
3056
3057 def storageinfo(
3057 def storageinfo(
3058 self,
3058 self,
3059 exclusivefiles=False,
3059 exclusivefiles=False,
3060 sharedfiles=False,
3060 sharedfiles=False,
3061 revisionscount=False,
3061 revisionscount=False,
3062 trackedsize=False,
3062 trackedsize=False,
3063 storedsize=False,
3063 storedsize=False,
3064 ):
3064 ):
3065 d = {}
3065 d = {}
3066
3066
3067 if exclusivefiles:
3067 if exclusivefiles:
3068 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3068 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3069 if not self._inline:
3069 if not self._inline:
3070 d[b'exclusivefiles'].append((self.opener, self.datafile))
3070 d[b'exclusivefiles'].append((self.opener, self.datafile))
3071
3071
3072 if sharedfiles:
3072 if sharedfiles:
3073 d[b'sharedfiles'] = []
3073 d[b'sharedfiles'] = []
3074
3074
3075 if revisionscount:
3075 if revisionscount:
3076 d[b'revisionscount'] = len(self)
3076 d[b'revisionscount'] = len(self)
3077
3077
3078 if trackedsize:
3078 if trackedsize:
3079 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3079 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3080
3080
3081 if storedsize:
3081 if storedsize:
3082 d[b'storedsize'] = sum(
3082 d[b'storedsize'] = sum(
3083 self.opener.stat(path).st_size for path in self.files()
3083 self.opener.stat(path).st_size for path in self.files()
3084 )
3084 )
3085
3085
3086 return d
3086 return d
General Comments 0
You need to be logged in to leave comments. Login now