##// END OF EJS Templates
rust-nodemap: automatically use the rust index for persistent nodemap...
marmoute -
r45000:e7fff9c3 default
parent child Browse files
Show More
@@ -1,3048 +1,3055
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 nodemap as nodemaputil,
78 nodemap as nodemaputil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109 rustrevlog = policy.importrust('revlog')
109 rustrevlog = policy.importrust('revlog')
110
110
111 # Aliased for performance.
111 # Aliased for performance.
112 _zlibdecompress = zlib.decompress
112 _zlibdecompress = zlib.decompress
113
113
114 # max size of revlog with inline data
114 # max size of revlog with inline data
115 _maxinline = 131072
115 _maxinline = 131072
116 _chunksize = 1048576
116 _chunksize = 1048576
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False, {}
120 return text, False, {}
121
121
122
122
123 def ellipsiswriteprocessor(rl, text, sidedata):
123 def ellipsiswriteprocessor(rl, text, sidedata):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsisrawprocessor(rl, text):
127 def ellipsisrawprocessor(rl, text):
128 return False
128 return False
129
129
130
130
131 ellipsisprocessor = (
131 ellipsisprocessor = (
132 ellipsisreadprocessor,
132 ellipsisreadprocessor,
133 ellipsiswriteprocessor,
133 ellipsiswriteprocessor,
134 ellipsisrawprocessor,
134 ellipsisrawprocessor,
135 )
135 )
136
136
137
137
138 def getoffset(q):
138 def getoffset(q):
139 return int(q >> 16)
139 return int(q >> 16)
140
140
141
141
142 def gettype(q):
142 def gettype(q):
143 return int(q & 0xFFFF)
143 return int(q & 0xFFFF)
144
144
145
145
146 def offset_type(offset, type):
146 def offset_type(offset, type):
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 raise ValueError(b'unknown revlog index flags')
148 raise ValueError(b'unknown revlog index flags')
149 return int(int(offset) << 16 | type)
149 return int(int(offset) << 16 | type)
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 @attr.s(slots=True, frozen=True)
162 @attr.s(slots=True, frozen=True)
163 class _revisioninfo(object):
163 class _revisioninfo(object):
164 """Information about a revision that allows building its fulltext
164 """Information about a revision that allows building its fulltext
165 node: expected hash of the revision
165 node: expected hash of the revision
166 p1, p2: parent revs of the revision
166 p1, p2: parent revs of the revision
167 btext: built text cache consisting of a one-element list
167 btext: built text cache consisting of a one-element list
168 cachedelta: (baserev, uncompressed_delta) or None
168 cachedelta: (baserev, uncompressed_delta) or None
169 flags: flags associated to the revision storage
169 flags: flags associated to the revision storage
170
170
171 One of btext[0] or cachedelta must be set.
171 One of btext[0] or cachedelta must be set.
172 """
172 """
173
173
174 node = attr.ib()
174 node = attr.ib()
175 p1 = attr.ib()
175 p1 = attr.ib()
176 p2 = attr.ib()
176 p2 = attr.ib()
177 btext = attr.ib()
177 btext = attr.ib()
178 textlen = attr.ib()
178 textlen = attr.ib()
179 cachedelta = attr.ib()
179 cachedelta = attr.ib()
180 flags = attr.ib()
180 flags = attr.ib()
181
181
182
182
183 @interfaceutil.implementer(repository.irevisiondelta)
183 @interfaceutil.implementer(repository.irevisiondelta)
184 @attr.s(slots=True)
184 @attr.s(slots=True)
185 class revlogrevisiondelta(object):
185 class revlogrevisiondelta(object):
186 node = attr.ib()
186 node = attr.ib()
187 p1node = attr.ib()
187 p1node = attr.ib()
188 p2node = attr.ib()
188 p2node = attr.ib()
189 basenode = attr.ib()
189 basenode = attr.ib()
190 flags = attr.ib()
190 flags = attr.ib()
191 baserevisionsize = attr.ib()
191 baserevisionsize = attr.ib()
192 revision = attr.ib()
192 revision = attr.ib()
193 delta = attr.ib()
193 delta = attr.ib()
194 linknode = attr.ib(default=None)
194 linknode = attr.ib(default=None)
195
195
196
196
197 @interfaceutil.implementer(repository.iverifyproblem)
197 @interfaceutil.implementer(repository.iverifyproblem)
198 @attr.s(frozen=True)
198 @attr.s(frozen=True)
199 class revlogproblem(object):
199 class revlogproblem(object):
200 warning = attr.ib(default=None)
200 warning = attr.ib(default=None)
201 error = attr.ib(default=None)
201 error = attr.ib(default=None)
202 node = attr.ib(default=None)
202 node = attr.ib(default=None)
203
203
204
204
205 # index v0:
205 # index v0:
206 # 4 bytes: offset
206 # 4 bytes: offset
207 # 4 bytes: compressed length
207 # 4 bytes: compressed length
208 # 4 bytes: base rev
208 # 4 bytes: base rev
209 # 4 bytes: link rev
209 # 4 bytes: link rev
210 # 20 bytes: parent 1 nodeid
210 # 20 bytes: parent 1 nodeid
211 # 20 bytes: parent 2 nodeid
211 # 20 bytes: parent 2 nodeid
212 # 20 bytes: nodeid
212 # 20 bytes: nodeid
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 indexformatv0_pack = indexformatv0.pack
214 indexformatv0_pack = indexformatv0.pack
215 indexformatv0_unpack = indexformatv0.unpack
215 indexformatv0_unpack = indexformatv0.unpack
216
216
217
217
218 class revlogoldindex(list):
218 class revlogoldindex(list):
219 @property
219 @property
220 def nodemap(self):
220 def nodemap(self):
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 return self._nodemap
223 return self._nodemap
224
224
225 @util.propertycache
225 @util.propertycache
226 def _nodemap(self):
226 def _nodemap(self):
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 for r in range(0, len(self)):
228 for r in range(0, len(self)):
229 n = self[r][7]
229 n = self[r][7]
230 nodemap[n] = r
230 nodemap[n] = r
231 return nodemap
231 return nodemap
232
232
233 def has_node(self, node):
233 def has_node(self, node):
234 """return True if the node exist in the index"""
234 """return True if the node exist in the index"""
235 return node in self._nodemap
235 return node in self._nodemap
236
236
237 def rev(self, node):
237 def rev(self, node):
238 """return a revision for a node
238 """return a revision for a node
239
239
240 If the node is unknown, raise a RevlogError"""
240 If the node is unknown, raise a RevlogError"""
241 return self._nodemap[node]
241 return self._nodemap[node]
242
242
243 def get_rev(self, node):
243 def get_rev(self, node):
244 """return a revision for a node
244 """return a revision for a node
245
245
246 If the node is unknown, return None"""
246 If the node is unknown, return None"""
247 return self._nodemap.get(node)
247 return self._nodemap.get(node)
248
248
249 def append(self, tup):
249 def append(self, tup):
250 self._nodemap[tup[7]] = len(self)
250 self._nodemap[tup[7]] = len(self)
251 super(revlogoldindex, self).append(tup)
251 super(revlogoldindex, self).append(tup)
252
252
253 def __delitem__(self, i):
253 def __delitem__(self, i):
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 for r in pycompat.xrange(i.start, len(self)):
256 for r in pycompat.xrange(i.start, len(self)):
257 del self._nodemap[self[r][7]]
257 del self._nodemap[self[r][7]]
258 super(revlogoldindex, self).__delitem__(i)
258 super(revlogoldindex, self).__delitem__(i)
259
259
260 def clearcaches(self):
260 def clearcaches(self):
261 self.__dict__.pop('_nodemap', None)
261 self.__dict__.pop('_nodemap', None)
262
262
263 def __getitem__(self, i):
263 def __getitem__(self, i):
264 if i == -1:
264 if i == -1:
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 return list.__getitem__(self, i)
266 return list.__getitem__(self, i)
267
267
268
268
269 class revlogoldio(object):
269 class revlogoldio(object):
270 def __init__(self):
270 def __init__(self):
271 self.size = indexformatv0.size
271 self.size = indexformatv0.size
272
272
273 def parseindex(self, data, inline):
273 def parseindex(self, data, inline):
274 s = self.size
274 s = self.size
275 index = []
275 index = []
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 n = off = 0
277 n = off = 0
278 l = len(data)
278 l = len(data)
279 while off + s <= l:
279 while off + s <= l:
280 cur = data[off : off + s]
280 cur = data[off : off + s]
281 off += s
281 off += s
282 e = indexformatv0_unpack(cur)
282 e = indexformatv0_unpack(cur)
283 # transform to revlogv1 format
283 # transform to revlogv1 format
284 e2 = (
284 e2 = (
285 offset_type(e[0], 0),
285 offset_type(e[0], 0),
286 e[1],
286 e[1],
287 -1,
287 -1,
288 e[2],
288 e[2],
289 e[3],
289 e[3],
290 nodemap.get(e[4], nullrev),
290 nodemap.get(e[4], nullrev),
291 nodemap.get(e[5], nullrev),
291 nodemap.get(e[5], nullrev),
292 e[6],
292 e[6],
293 )
293 )
294 index.append(e2)
294 index.append(e2)
295 nodemap[e[6]] = n
295 nodemap[e[6]] = n
296 n += 1
296 n += 1
297
297
298 index = revlogoldindex(index)
298 index = revlogoldindex(index)
299 return index, None
299 return index, None
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 if gettype(entry[0]):
302 if gettype(entry[0]):
303 raise error.RevlogError(
303 raise error.RevlogError(
304 _(b'index entry flags need revlog version 1')
304 _(b'index entry flags need revlog version 1')
305 )
305 )
306 e2 = (
306 e2 = (
307 getoffset(entry[0]),
307 getoffset(entry[0]),
308 entry[1],
308 entry[1],
309 entry[3],
309 entry[3],
310 entry[4],
310 entry[4],
311 node(entry[5]),
311 node(entry[5]),
312 node(entry[6]),
312 node(entry[6]),
313 entry[7],
313 entry[7],
314 )
314 )
315 return indexformatv0_pack(*e2)
315 return indexformatv0_pack(*e2)
316
316
317
317
318 # index ng:
318 # index ng:
319 # 6 bytes: offset
319 # 6 bytes: offset
320 # 2 bytes: flags
320 # 2 bytes: flags
321 # 4 bytes: compressed length
321 # 4 bytes: compressed length
322 # 4 bytes: uncompressed length
322 # 4 bytes: uncompressed length
323 # 4 bytes: base rev
323 # 4 bytes: base rev
324 # 4 bytes: link rev
324 # 4 bytes: link rev
325 # 4 bytes: parent 1 rev
325 # 4 bytes: parent 1 rev
326 # 4 bytes: parent 2 rev
326 # 4 bytes: parent 2 rev
327 # 32 bytes: nodeid
327 # 32 bytes: nodeid
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 indexformatng_pack = indexformatng.pack
329 indexformatng_pack = indexformatng.pack
330 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
333
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
335 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
337
337
338
338
339 class revlogio(object):
339 class revlogio(object):
340 def __init__(self):
340 def __init__(self):
341 self.size = indexformatng.size
341 self.size = indexformatng.size
342
342
343 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
346 return index, cache
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 p = indexformatng_pack(*entry)
349 p = indexformatng_pack(*entry)
350 if rev == 0:
350 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
352 return p
352 return p
353
353
354
354
355 NodemapRevlogIO = None
355 NodemapRevlogIO = None
356
356
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358
358
359 class NodemapRevlogIO(revlogio):
359 class NodemapRevlogIO(revlogio):
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361
361
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 """
363 """
364
364
365 def parseindex(self, data, inline):
365 def parseindex(self, data, inline):
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 return index, cache
367 return index, cache
368
368
369
369
370 class rustrevlogio(revlogio):
370 class rustrevlogio(revlogio):
371 def parseindex(self, data, inline):
371 def parseindex(self, data, inline):
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 return rustrevlog.MixedIndex(index), cache
373 return rustrevlog.MixedIndex(index), cache
374
374
375
375
376 class revlog(object):
376 class revlog(object):
377 """
377 """
378 the underlying revision storage object
378 the underlying revision storage object
379
379
380 A revlog consists of two parts, an index and the revision data.
380 A revlog consists of two parts, an index and the revision data.
381
381
382 The index is a file with a fixed record size containing
382 The index is a file with a fixed record size containing
383 information on each revision, including its nodeid (hash), the
383 information on each revision, including its nodeid (hash), the
384 nodeids of its parents, the position and offset of its data within
384 nodeids of its parents, the position and offset of its data within
385 the data file, and the revision it's based on. Finally, each entry
385 the data file, and the revision it's based on. Finally, each entry
386 contains a linkrev entry that can serve as a pointer to external
386 contains a linkrev entry that can serve as a pointer to external
387 data.
387 data.
388
388
389 The revision data itself is a linear collection of data chunks.
389 The revision data itself is a linear collection of data chunks.
390 Each chunk represents a revision and is usually represented as a
390 Each chunk represents a revision and is usually represented as a
391 delta against the previous chunk. To bound lookup time, runs of
391 delta against the previous chunk. To bound lookup time, runs of
392 deltas are limited to about 2 times the length of the original
392 deltas are limited to about 2 times the length of the original
393 version data. This makes retrieval of a version proportional to
393 version data. This makes retrieval of a version proportional to
394 its size, or O(1) relative to the number of revisions.
394 its size, or O(1) relative to the number of revisions.
395
395
396 Both pieces of the revlog are written to in an append-only
396 Both pieces of the revlog are written to in an append-only
397 fashion, which means we never need to rewrite a file to insert or
397 fashion, which means we never need to rewrite a file to insert or
398 remove data, and can use some simple techniques to avoid the need
398 remove data, and can use some simple techniques to avoid the need
399 for locking while reading.
399 for locking while reading.
400
400
401 If checkambig, indexfile is opened with checkambig=True at
401 If checkambig, indexfile is opened with checkambig=True at
402 writing, to avoid file stat ambiguity.
402 writing, to avoid file stat ambiguity.
403
403
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 index will be mmapped rather than read if it is larger than the
405 index will be mmapped rather than read if it is larger than the
406 configured threshold.
406 configured threshold.
407
407
408 If censorable is True, the revlog can have censored revisions.
408 If censorable is True, the revlog can have censored revisions.
409
409
410 If `upperboundcomp` is not None, this is the expected maximal gain from
410 If `upperboundcomp` is not None, this is the expected maximal gain from
411 compression for the data content.
411 compression for the data content.
412 """
412 """
413
413
414 _flagserrorclass = error.RevlogError
414 _flagserrorclass = error.RevlogError
415
415
416 def __init__(
416 def __init__(
417 self,
417 self,
418 opener,
418 opener,
419 indexfile,
419 indexfile,
420 datafile=None,
420 datafile=None,
421 checkambig=False,
421 checkambig=False,
422 mmaplargeindex=False,
422 mmaplargeindex=False,
423 censorable=False,
423 censorable=False,
424 upperboundcomp=None,
424 upperboundcomp=None,
425 persistentnodemap=False,
425 persistentnodemap=False,
426 ):
426 ):
427 """
427 """
428 create a revlog object
428 create a revlog object
429
429
430 opener is a function that abstracts the file opening operation
430 opener is a function that abstracts the file opening operation
431 and can be used to implement COW semantics or the like.
431 and can be used to implement COW semantics or the like.
432
432
433 """
433 """
434 self.upperboundcomp = upperboundcomp
434 self.upperboundcomp = upperboundcomp
435 self.indexfile = indexfile
435 self.indexfile = indexfile
436 self.datafile = datafile or (indexfile[:-2] + b".d")
436 self.datafile = datafile or (indexfile[:-2] + b".d")
437 self.nodemap_file = None
437 self.nodemap_file = None
438 if persistentnodemap:
438 if persistentnodemap:
439 self.nodemap_file = indexfile[:-2] + b".n"
439 self.nodemap_file = indexfile[:-2] + b".n"
440
440
441 self.opener = opener
441 self.opener = opener
442 # When True, indexfile is opened with checkambig=True at writing, to
442 # When True, indexfile is opened with checkambig=True at writing, to
443 # avoid file stat ambiguity.
443 # avoid file stat ambiguity.
444 self._checkambig = checkambig
444 self._checkambig = checkambig
445 self._mmaplargeindex = mmaplargeindex
445 self._mmaplargeindex = mmaplargeindex
446 self._censorable = censorable
446 self._censorable = censorable
447 # 3-tuple of (node, rev, text) for a raw revision.
447 # 3-tuple of (node, rev, text) for a raw revision.
448 self._revisioncache = None
448 self._revisioncache = None
449 # Maps rev to chain base rev.
449 # Maps rev to chain base rev.
450 self._chainbasecache = util.lrucachedict(100)
450 self._chainbasecache = util.lrucachedict(100)
451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
452 self._chunkcache = (0, b'')
452 self._chunkcache = (0, b'')
453 # How much data to read and cache into the raw revlog data cache.
453 # How much data to read and cache into the raw revlog data cache.
454 self._chunkcachesize = 65536
454 self._chunkcachesize = 65536
455 self._maxchainlen = None
455 self._maxchainlen = None
456 self._deltabothparents = True
456 self._deltabothparents = True
457 self.index = None
457 self.index = None
458 self._nodemap_docket = None
458 self._nodemap_docket = None
459 # Mapping of partial identifiers to full nodes.
459 # Mapping of partial identifiers to full nodes.
460 self._pcache = {}
460 self._pcache = {}
461 # Mapping of revision integer to full node.
461 # Mapping of revision integer to full node.
462 self._compengine = b'zlib'
462 self._compengine = b'zlib'
463 self._compengineopts = {}
463 self._compengineopts = {}
464 self._maxdeltachainspan = -1
464 self._maxdeltachainspan = -1
465 self._withsparseread = False
465 self._withsparseread = False
466 self._sparserevlog = False
466 self._sparserevlog = False
467 self._srdensitythreshold = 0.50
467 self._srdensitythreshold = 0.50
468 self._srmingapsize = 262144
468 self._srmingapsize = 262144
469
469
470 # Make copy of flag processors so each revlog instance can support
470 # Make copy of flag processors so each revlog instance can support
471 # custom flags.
471 # custom flags.
472 self._flagprocessors = dict(flagutil.flagprocessors)
472 self._flagprocessors = dict(flagutil.flagprocessors)
473
473
474 # 2-tuple of file handles being used for active writing.
474 # 2-tuple of file handles being used for active writing.
475 self._writinghandles = None
475 self._writinghandles = None
476
476
477 self._loadindex()
477 self._loadindex()
478
478
479 def _loadindex(self):
479 def _loadindex(self):
480 mmapindexthreshold = None
480 mmapindexthreshold = None
481 opts = self.opener.options
481 opts = self.opener.options
482
482
483 if b'revlogv2' in opts:
483 if b'revlogv2' in opts:
484 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
484 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
485 elif b'revlogv1' in opts:
485 elif b'revlogv1' in opts:
486 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
486 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
487 if b'generaldelta' in opts:
487 if b'generaldelta' in opts:
488 newversionflags |= FLAG_GENERALDELTA
488 newversionflags |= FLAG_GENERALDELTA
489 elif b'revlogv0' in self.opener.options:
489 elif b'revlogv0' in self.opener.options:
490 newversionflags = REVLOGV0
490 newversionflags = REVLOGV0
491 else:
491 else:
492 newversionflags = REVLOG_DEFAULT_VERSION
492 newversionflags = REVLOG_DEFAULT_VERSION
493
493
494 if b'chunkcachesize' in opts:
494 if b'chunkcachesize' in opts:
495 self._chunkcachesize = opts[b'chunkcachesize']
495 self._chunkcachesize = opts[b'chunkcachesize']
496 if b'maxchainlen' in opts:
496 if b'maxchainlen' in opts:
497 self._maxchainlen = opts[b'maxchainlen']
497 self._maxchainlen = opts[b'maxchainlen']
498 if b'deltabothparents' in opts:
498 if b'deltabothparents' in opts:
499 self._deltabothparents = opts[b'deltabothparents']
499 self._deltabothparents = opts[b'deltabothparents']
500 self._lazydelta = bool(opts.get(b'lazydelta', True))
500 self._lazydelta = bool(opts.get(b'lazydelta', True))
501 self._lazydeltabase = False
501 self._lazydeltabase = False
502 if self._lazydelta:
502 if self._lazydelta:
503 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
503 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
504 if b'compengine' in opts:
504 if b'compengine' in opts:
505 self._compengine = opts[b'compengine']
505 self._compengine = opts[b'compengine']
506 if b'zlib.level' in opts:
506 if b'zlib.level' in opts:
507 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
507 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
508 if b'zstd.level' in opts:
508 if b'zstd.level' in opts:
509 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
509 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
510 if b'maxdeltachainspan' in opts:
510 if b'maxdeltachainspan' in opts:
511 self._maxdeltachainspan = opts[b'maxdeltachainspan']
511 self._maxdeltachainspan = opts[b'maxdeltachainspan']
512 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
512 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
513 mmapindexthreshold = opts[b'mmapindexthreshold']
513 mmapindexthreshold = opts[b'mmapindexthreshold']
514 self.hassidedata = bool(opts.get(b'side-data', False))
514 self.hassidedata = bool(opts.get(b'side-data', False))
515 if self.hassidedata:
515 if self.hassidedata:
516 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
516 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
518 withsparseread = bool(opts.get(b'with-sparse-read', False))
518 withsparseread = bool(opts.get(b'with-sparse-read', False))
519 # sparse-revlog forces sparse-read
519 # sparse-revlog forces sparse-read
520 self._withsparseread = self._sparserevlog or withsparseread
520 self._withsparseread = self._sparserevlog or withsparseread
521 if b'sparse-read-density-threshold' in opts:
521 if b'sparse-read-density-threshold' in opts:
522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
523 if b'sparse-read-min-gap-size' in opts:
523 if b'sparse-read-min-gap-size' in opts:
524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
525 if opts.get(b'enableellipsis'):
525 if opts.get(b'enableellipsis'):
526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
527
527
528 # revlog v0 doesn't have flag processors
528 # revlog v0 doesn't have flag processors
529 for flag, processor in pycompat.iteritems(
529 for flag, processor in pycompat.iteritems(
530 opts.get(b'flagprocessors', {})
530 opts.get(b'flagprocessors', {})
531 ):
531 ):
532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
533
533
534 if self._chunkcachesize <= 0:
534 if self._chunkcachesize <= 0:
535 raise error.RevlogError(
535 raise error.RevlogError(
536 _(b'revlog chunk cache size %r is not greater than 0')
536 _(b'revlog chunk cache size %r is not greater than 0')
537 % self._chunkcachesize
537 % self._chunkcachesize
538 )
538 )
539 elif self._chunkcachesize & (self._chunkcachesize - 1):
539 elif self._chunkcachesize & (self._chunkcachesize - 1):
540 raise error.RevlogError(
540 raise error.RevlogError(
541 _(b'revlog chunk cache size %r is not a power of 2')
541 _(b'revlog chunk cache size %r is not a power of 2')
542 % self._chunkcachesize
542 % self._chunkcachesize
543 )
543 )
544
544
545 indexdata = b''
545 indexdata = b''
546 self._initempty = True
546 self._initempty = True
547 try:
547 try:
548 with self._indexfp() as f:
548 with self._indexfp() as f:
549 if (
549 if (
550 mmapindexthreshold is not None
550 mmapindexthreshold is not None
551 and self.opener.fstat(f).st_size >= mmapindexthreshold
551 and self.opener.fstat(f).st_size >= mmapindexthreshold
552 ):
552 ):
553 # TODO: should .close() to release resources without
553 # TODO: should .close() to release resources without
554 # relying on Python GC
554 # relying on Python GC
555 indexdata = util.buffer(util.mmapread(f))
555 indexdata = util.buffer(util.mmapread(f))
556 else:
556 else:
557 indexdata = f.read()
557 indexdata = f.read()
558 if len(indexdata) > 0:
558 if len(indexdata) > 0:
559 versionflags = versionformat_unpack(indexdata[:4])[0]
559 versionflags = versionformat_unpack(indexdata[:4])[0]
560 self._initempty = False
560 self._initempty = False
561 else:
561 else:
562 versionflags = newversionflags
562 versionflags = newversionflags
563 except IOError as inst:
563 except IOError as inst:
564 if inst.errno != errno.ENOENT:
564 if inst.errno != errno.ENOENT:
565 raise
565 raise
566
566
567 versionflags = newversionflags
567 versionflags = newversionflags
568
568
569 self.version = versionflags
569 self.version = versionflags
570
570
571 flags = versionflags & ~0xFFFF
571 flags = versionflags & ~0xFFFF
572 fmt = versionflags & 0xFFFF
572 fmt = versionflags & 0xFFFF
573
573
574 if fmt == REVLOGV0:
574 if fmt == REVLOGV0:
575 if flags:
575 if flags:
576 raise error.RevlogError(
576 raise error.RevlogError(
577 _(b'unknown flags (%#04x) in version %d revlog %s')
577 _(b'unknown flags (%#04x) in version %d revlog %s')
578 % (flags >> 16, fmt, self.indexfile)
578 % (flags >> 16, fmt, self.indexfile)
579 )
579 )
580
580
581 self._inline = False
581 self._inline = False
582 self._generaldelta = False
582 self._generaldelta = False
583
583
584 elif fmt == REVLOGV1:
584 elif fmt == REVLOGV1:
585 if flags & ~REVLOGV1_FLAGS:
585 if flags & ~REVLOGV1_FLAGS:
586 raise error.RevlogError(
586 raise error.RevlogError(
587 _(b'unknown flags (%#04x) in version %d revlog %s')
587 _(b'unknown flags (%#04x) in version %d revlog %s')
588 % (flags >> 16, fmt, self.indexfile)
588 % (flags >> 16, fmt, self.indexfile)
589 )
589 )
590
590
591 self._inline = versionflags & FLAG_INLINE_DATA
591 self._inline = versionflags & FLAG_INLINE_DATA
592 self._generaldelta = versionflags & FLAG_GENERALDELTA
592 self._generaldelta = versionflags & FLAG_GENERALDELTA
593
593
594 elif fmt == REVLOGV2:
594 elif fmt == REVLOGV2:
595 if flags & ~REVLOGV2_FLAGS:
595 if flags & ~REVLOGV2_FLAGS:
596 raise error.RevlogError(
596 raise error.RevlogError(
597 _(b'unknown flags (%#04x) in version %d revlog %s')
597 _(b'unknown flags (%#04x) in version %d revlog %s')
598 % (flags >> 16, fmt, self.indexfile)
598 % (flags >> 16, fmt, self.indexfile)
599 )
599 )
600
600
601 self._inline = versionflags & FLAG_INLINE_DATA
601 self._inline = versionflags & FLAG_INLINE_DATA
602 # generaldelta implied by version 2 revlogs.
602 # generaldelta implied by version 2 revlogs.
603 self._generaldelta = True
603 self._generaldelta = True
604
604
605 else:
605 else:
606 raise error.RevlogError(
606 raise error.RevlogError(
607 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
607 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
608 )
608 )
609 # sparse-revlog can't be on without general-delta (issue6056)
609 # sparse-revlog can't be on without general-delta (issue6056)
610 if not self._generaldelta:
610 if not self._generaldelta:
611 self._sparserevlog = False
611 self._sparserevlog = False
612
612
613 self._storedeltachains = True
613 self._storedeltachains = True
614
614
615 devel_nodemap = (
615 devel_nodemap = (
616 self.nodemap_file
616 self.nodemap_file
617 and opts.get(b'devel-force-nodemap', False)
617 and opts.get(b'devel-force-nodemap', False)
618 and NodemapRevlogIO is not None
618 and NodemapRevlogIO is not None
619 )
619 )
620
620
621 use_rust_index = False
622 if rustrevlog is not None:
623 if self.nodemap_file is not None:
624 use_rust_index = True
625 else:
626 use_rust_index = self.opener.options.get(b'rust.index')
627
621 self._io = revlogio()
628 self._io = revlogio()
622 if self.version == REVLOGV0:
629 if self.version == REVLOGV0:
623 self._io = revlogoldio()
630 self._io = revlogoldio()
624 elif devel_nodemap:
631 elif devel_nodemap:
625 self._io = NodemapRevlogIO()
632 self._io = NodemapRevlogIO()
626 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
633 elif use_rust_index:
627 self._io = rustrevlogio()
634 self._io = rustrevlogio()
628 try:
635 try:
629 d = self._io.parseindex(indexdata, self._inline)
636 d = self._io.parseindex(indexdata, self._inline)
630 index, _chunkcache = d
637 index, _chunkcache = d
631 use_nodemap = (
638 use_nodemap = (
632 not self._inline
639 not self._inline
633 and self.nodemap_file is not None
640 and self.nodemap_file is not None
634 and util.safehasattr(index, 'update_nodemap_data')
641 and util.safehasattr(index, 'update_nodemap_data')
635 )
642 )
636 if use_nodemap:
643 if use_nodemap:
637 nodemap_data = nodemaputil.persisted_data(self)
644 nodemap_data = nodemaputil.persisted_data(self)
638 if nodemap_data is not None:
645 if nodemap_data is not None:
639 self._nodemap_docket = nodemap_data[0]
646 self._nodemap_docket = nodemap_data[0]
640 index.update_nodemap_data(*nodemap_data)
647 index.update_nodemap_data(*nodemap_data)
641 except (ValueError, IndexError):
648 except (ValueError, IndexError):
642 raise error.RevlogError(
649 raise error.RevlogError(
643 _(b"index %s is corrupted") % self.indexfile
650 _(b"index %s is corrupted") % self.indexfile
644 )
651 )
645 self.index, self._chunkcache = d
652 self.index, self._chunkcache = d
646 if not self._chunkcache:
653 if not self._chunkcache:
647 self._chunkclear()
654 self._chunkclear()
648 # revnum -> (chain-length, sum-delta-length)
655 # revnum -> (chain-length, sum-delta-length)
649 self._chaininfocache = {}
656 self._chaininfocache = {}
650 # revlog header -> revlog compressor
657 # revlog header -> revlog compressor
651 self._decompressors = {}
658 self._decompressors = {}
652
659
653 @util.propertycache
660 @util.propertycache
654 def _compressor(self):
661 def _compressor(self):
655 engine = util.compengines[self._compengine]
662 engine = util.compengines[self._compengine]
656 return engine.revlogcompressor(self._compengineopts)
663 return engine.revlogcompressor(self._compengineopts)
657
664
658 def _indexfp(self, mode=b'r'):
665 def _indexfp(self, mode=b'r'):
659 """file object for the revlog's index file"""
666 """file object for the revlog's index file"""
660 args = {'mode': mode}
667 args = {'mode': mode}
661 if mode != b'r':
668 if mode != b'r':
662 args['checkambig'] = self._checkambig
669 args['checkambig'] = self._checkambig
663 if mode == b'w':
670 if mode == b'w':
664 args['atomictemp'] = True
671 args['atomictemp'] = True
665 return self.opener(self.indexfile, **args)
672 return self.opener(self.indexfile, **args)
666
673
667 def _datafp(self, mode=b'r'):
674 def _datafp(self, mode=b'r'):
668 """file object for the revlog's data file"""
675 """file object for the revlog's data file"""
669 return self.opener(self.datafile, mode=mode)
676 return self.opener(self.datafile, mode=mode)
670
677
671 @contextlib.contextmanager
678 @contextlib.contextmanager
672 def _datareadfp(self, existingfp=None):
679 def _datareadfp(self, existingfp=None):
673 """file object suitable to read data"""
680 """file object suitable to read data"""
674 # Use explicit file handle, if given.
681 # Use explicit file handle, if given.
675 if existingfp is not None:
682 if existingfp is not None:
676 yield existingfp
683 yield existingfp
677
684
678 # Use a file handle being actively used for writes, if available.
685 # Use a file handle being actively used for writes, if available.
679 # There is some danger to doing this because reads will seek the
686 # There is some danger to doing this because reads will seek the
680 # file. However, _writeentry() performs a SEEK_END before all writes,
687 # file. However, _writeentry() performs a SEEK_END before all writes,
681 # so we should be safe.
688 # so we should be safe.
682 elif self._writinghandles:
689 elif self._writinghandles:
683 if self._inline:
690 if self._inline:
684 yield self._writinghandles[0]
691 yield self._writinghandles[0]
685 else:
692 else:
686 yield self._writinghandles[1]
693 yield self._writinghandles[1]
687
694
688 # Otherwise open a new file handle.
695 # Otherwise open a new file handle.
689 else:
696 else:
690 if self._inline:
697 if self._inline:
691 func = self._indexfp
698 func = self._indexfp
692 else:
699 else:
693 func = self._datafp
700 func = self._datafp
694 with func() as fp:
701 with func() as fp:
695 yield fp
702 yield fp
696
703
697 def tiprev(self):
704 def tiprev(self):
698 return len(self.index) - 1
705 return len(self.index) - 1
699
706
700 def tip(self):
707 def tip(self):
701 return self.node(self.tiprev())
708 return self.node(self.tiprev())
702
709
703 def __contains__(self, rev):
710 def __contains__(self, rev):
704 return 0 <= rev < len(self)
711 return 0 <= rev < len(self)
705
712
706 def __len__(self):
713 def __len__(self):
707 return len(self.index)
714 return len(self.index)
708
715
709 def __iter__(self):
716 def __iter__(self):
710 return iter(pycompat.xrange(len(self)))
717 return iter(pycompat.xrange(len(self)))
711
718
712 def revs(self, start=0, stop=None):
719 def revs(self, start=0, stop=None):
713 """iterate over all rev in this revlog (from start to stop)"""
720 """iterate over all rev in this revlog (from start to stop)"""
714 return storageutil.iterrevs(len(self), start=start, stop=stop)
721 return storageutil.iterrevs(len(self), start=start, stop=stop)
715
722
716 @property
723 @property
717 def nodemap(self):
724 def nodemap(self):
718 msg = (
725 msg = (
719 b"revlog.nodemap is deprecated, "
726 b"revlog.nodemap is deprecated, "
720 b"use revlog.index.[has_node|rev|get_rev]"
727 b"use revlog.index.[has_node|rev|get_rev]"
721 )
728 )
722 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
729 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
723 return self.index.nodemap
730 return self.index.nodemap
724
731
725 @property
732 @property
726 def _nodecache(self):
733 def _nodecache(self):
727 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
734 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
728 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
735 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
729 return self.index.nodemap
736 return self.index.nodemap
730
737
731 def hasnode(self, node):
738 def hasnode(self, node):
732 try:
739 try:
733 self.rev(node)
740 self.rev(node)
734 return True
741 return True
735 except KeyError:
742 except KeyError:
736 return False
743 return False
737
744
738 def candelta(self, baserev, rev):
745 def candelta(self, baserev, rev):
739 """whether two revisions (baserev, rev) can be delta-ed or not"""
746 """whether two revisions (baserev, rev) can be delta-ed or not"""
740 # Disable delta if either rev requires a content-changing flag
747 # Disable delta if either rev requires a content-changing flag
741 # processor (ex. LFS). This is because such flag processor can alter
748 # processor (ex. LFS). This is because such flag processor can alter
742 # the rawtext content that the delta will be based on, and two clients
749 # the rawtext content that the delta will be based on, and two clients
743 # could have a same revlog node with different flags (i.e. different
750 # could have a same revlog node with different flags (i.e. different
744 # rawtext contents) and the delta could be incompatible.
751 # rawtext contents) and the delta could be incompatible.
745 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
752 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
746 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
753 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
747 ):
754 ):
748 return False
755 return False
749 return True
756 return True
750
757
751 def update_caches(self, transaction):
758 def update_caches(self, transaction):
752 if self.nodemap_file is not None:
759 if self.nodemap_file is not None:
753 if transaction is None:
760 if transaction is None:
754 nodemaputil.update_persistent_nodemap(self)
761 nodemaputil.update_persistent_nodemap(self)
755 else:
762 else:
756 nodemaputil.setup_persistent_nodemap(transaction, self)
763 nodemaputil.setup_persistent_nodemap(transaction, self)
757
764
758 def clearcaches(self):
765 def clearcaches(self):
759 self._revisioncache = None
766 self._revisioncache = None
760 self._chainbasecache.clear()
767 self._chainbasecache.clear()
761 self._chunkcache = (0, b'')
768 self._chunkcache = (0, b'')
762 self._pcache = {}
769 self._pcache = {}
763 self._nodemap_docket = None
770 self._nodemap_docket = None
764 self.index.clearcaches()
771 self.index.clearcaches()
765 # The python code is the one responsible for validating the docket, we
772 # The python code is the one responsible for validating the docket, we
766 # end up having to refresh it here.
773 # end up having to refresh it here.
767 use_nodemap = (
774 use_nodemap = (
768 not self._inline
775 not self._inline
769 and self.nodemap_file is not None
776 and self.nodemap_file is not None
770 and util.safehasattr(self.index, 'update_nodemap_data')
777 and util.safehasattr(self.index, 'update_nodemap_data')
771 )
778 )
772 if use_nodemap:
779 if use_nodemap:
773 nodemap_data = nodemaputil.persisted_data(self)
780 nodemap_data = nodemaputil.persisted_data(self)
774 if nodemap_data is not None:
781 if nodemap_data is not None:
775 self._nodemap_docket = nodemap_data[0]
782 self._nodemap_docket = nodemap_data[0]
776 self.index.update_nodemap_data(*nodemap_data)
783 self.index.update_nodemap_data(*nodemap_data)
777
784
778 def rev(self, node):
785 def rev(self, node):
779 try:
786 try:
780 return self.index.rev(node)
787 return self.index.rev(node)
781 except TypeError:
788 except TypeError:
782 raise
789 raise
783 except error.RevlogError:
790 except error.RevlogError:
784 # parsers.c radix tree lookup failed
791 # parsers.c radix tree lookup failed
785 if node == wdirid or node in wdirfilenodeids:
792 if node == wdirid or node in wdirfilenodeids:
786 raise error.WdirUnsupported
793 raise error.WdirUnsupported
787 raise error.LookupError(node, self.indexfile, _(b'no node'))
794 raise error.LookupError(node, self.indexfile, _(b'no node'))
788
795
789 # Accessors for index entries.
796 # Accessors for index entries.
790
797
791 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
798 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
792 # are flags.
799 # are flags.
793 def start(self, rev):
800 def start(self, rev):
794 return int(self.index[rev][0] >> 16)
801 return int(self.index[rev][0] >> 16)
795
802
796 def flags(self, rev):
803 def flags(self, rev):
797 return self.index[rev][0] & 0xFFFF
804 return self.index[rev][0] & 0xFFFF
798
805
799 def length(self, rev):
806 def length(self, rev):
800 return self.index[rev][1]
807 return self.index[rev][1]
801
808
802 def rawsize(self, rev):
809 def rawsize(self, rev):
803 """return the length of the uncompressed text for a given revision"""
810 """return the length of the uncompressed text for a given revision"""
804 l = self.index[rev][2]
811 l = self.index[rev][2]
805 if l >= 0:
812 if l >= 0:
806 return l
813 return l
807
814
808 t = self.rawdata(rev)
815 t = self.rawdata(rev)
809 return len(t)
816 return len(t)
810
817
811 def size(self, rev):
818 def size(self, rev):
812 """length of non-raw text (processed by a "read" flag processor)"""
819 """length of non-raw text (processed by a "read" flag processor)"""
813 # fast path: if no "read" flag processor could change the content,
820 # fast path: if no "read" flag processor could change the content,
814 # size is rawsize. note: ELLIPSIS is known to not change the content.
821 # size is rawsize. note: ELLIPSIS is known to not change the content.
815 flags = self.flags(rev)
822 flags = self.flags(rev)
816 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
823 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
817 return self.rawsize(rev)
824 return self.rawsize(rev)
818
825
819 return len(self.revision(rev, raw=False))
826 return len(self.revision(rev, raw=False))
820
827
821 def chainbase(self, rev):
828 def chainbase(self, rev):
822 base = self._chainbasecache.get(rev)
829 base = self._chainbasecache.get(rev)
823 if base is not None:
830 if base is not None:
824 return base
831 return base
825
832
826 index = self.index
833 index = self.index
827 iterrev = rev
834 iterrev = rev
828 base = index[iterrev][3]
835 base = index[iterrev][3]
829 while base != iterrev:
836 while base != iterrev:
830 iterrev = base
837 iterrev = base
831 base = index[iterrev][3]
838 base = index[iterrev][3]
832
839
833 self._chainbasecache[rev] = base
840 self._chainbasecache[rev] = base
834 return base
841 return base
835
842
836 def linkrev(self, rev):
843 def linkrev(self, rev):
837 return self.index[rev][4]
844 return self.index[rev][4]
838
845
839 def parentrevs(self, rev):
846 def parentrevs(self, rev):
840 try:
847 try:
841 entry = self.index[rev]
848 entry = self.index[rev]
842 except IndexError:
849 except IndexError:
843 if rev == wdirrev:
850 if rev == wdirrev:
844 raise error.WdirUnsupported
851 raise error.WdirUnsupported
845 raise
852 raise
846
853
847 return entry[5], entry[6]
854 return entry[5], entry[6]
848
855
849 # fast parentrevs(rev) where rev isn't filtered
856 # fast parentrevs(rev) where rev isn't filtered
850 _uncheckedparentrevs = parentrevs
857 _uncheckedparentrevs = parentrevs
851
858
852 def node(self, rev):
859 def node(self, rev):
853 try:
860 try:
854 return self.index[rev][7]
861 return self.index[rev][7]
855 except IndexError:
862 except IndexError:
856 if rev == wdirrev:
863 if rev == wdirrev:
857 raise error.WdirUnsupported
864 raise error.WdirUnsupported
858 raise
865 raise
859
866
860 # Derived from index values.
867 # Derived from index values.
861
868
862 def end(self, rev):
869 def end(self, rev):
863 return self.start(rev) + self.length(rev)
870 return self.start(rev) + self.length(rev)
864
871
865 def parents(self, node):
872 def parents(self, node):
866 i = self.index
873 i = self.index
867 d = i[self.rev(node)]
874 d = i[self.rev(node)]
868 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
875 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
869
876
870 def chainlen(self, rev):
877 def chainlen(self, rev):
871 return self._chaininfo(rev)[0]
878 return self._chaininfo(rev)[0]
872
879
873 def _chaininfo(self, rev):
880 def _chaininfo(self, rev):
874 chaininfocache = self._chaininfocache
881 chaininfocache = self._chaininfocache
875 if rev in chaininfocache:
882 if rev in chaininfocache:
876 return chaininfocache[rev]
883 return chaininfocache[rev]
877 index = self.index
884 index = self.index
878 generaldelta = self._generaldelta
885 generaldelta = self._generaldelta
879 iterrev = rev
886 iterrev = rev
880 e = index[iterrev]
887 e = index[iterrev]
881 clen = 0
888 clen = 0
882 compresseddeltalen = 0
889 compresseddeltalen = 0
883 while iterrev != e[3]:
890 while iterrev != e[3]:
884 clen += 1
891 clen += 1
885 compresseddeltalen += e[1]
892 compresseddeltalen += e[1]
886 if generaldelta:
893 if generaldelta:
887 iterrev = e[3]
894 iterrev = e[3]
888 else:
895 else:
889 iterrev -= 1
896 iterrev -= 1
890 if iterrev in chaininfocache:
897 if iterrev in chaininfocache:
891 t = chaininfocache[iterrev]
898 t = chaininfocache[iterrev]
892 clen += t[0]
899 clen += t[0]
893 compresseddeltalen += t[1]
900 compresseddeltalen += t[1]
894 break
901 break
895 e = index[iterrev]
902 e = index[iterrev]
896 else:
903 else:
897 # Add text length of base since decompressing that also takes
904 # Add text length of base since decompressing that also takes
898 # work. For cache hits the length is already included.
905 # work. For cache hits the length is already included.
899 compresseddeltalen += e[1]
906 compresseddeltalen += e[1]
900 r = (clen, compresseddeltalen)
907 r = (clen, compresseddeltalen)
901 chaininfocache[rev] = r
908 chaininfocache[rev] = r
902 return r
909 return r
903
910
904 def _deltachain(self, rev, stoprev=None):
911 def _deltachain(self, rev, stoprev=None):
905 """Obtain the delta chain for a revision.
912 """Obtain the delta chain for a revision.
906
913
907 ``stoprev`` specifies a revision to stop at. If not specified, we
914 ``stoprev`` specifies a revision to stop at. If not specified, we
908 stop at the base of the chain.
915 stop at the base of the chain.
909
916
910 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
917 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
911 revs in ascending order and ``stopped`` is a bool indicating whether
918 revs in ascending order and ``stopped`` is a bool indicating whether
912 ``stoprev`` was hit.
919 ``stoprev`` was hit.
913 """
920 """
914 # Try C implementation.
921 # Try C implementation.
915 try:
922 try:
916 return self.index.deltachain(rev, stoprev, self._generaldelta)
923 return self.index.deltachain(rev, stoprev, self._generaldelta)
917 except AttributeError:
924 except AttributeError:
918 pass
925 pass
919
926
920 chain = []
927 chain = []
921
928
922 # Alias to prevent attribute lookup in tight loop.
929 # Alias to prevent attribute lookup in tight loop.
923 index = self.index
930 index = self.index
924 generaldelta = self._generaldelta
931 generaldelta = self._generaldelta
925
932
926 iterrev = rev
933 iterrev = rev
927 e = index[iterrev]
934 e = index[iterrev]
928 while iterrev != e[3] and iterrev != stoprev:
935 while iterrev != e[3] and iterrev != stoprev:
929 chain.append(iterrev)
936 chain.append(iterrev)
930 if generaldelta:
937 if generaldelta:
931 iterrev = e[3]
938 iterrev = e[3]
932 else:
939 else:
933 iterrev -= 1
940 iterrev -= 1
934 e = index[iterrev]
941 e = index[iterrev]
935
942
936 if iterrev == stoprev:
943 if iterrev == stoprev:
937 stopped = True
944 stopped = True
938 else:
945 else:
939 chain.append(iterrev)
946 chain.append(iterrev)
940 stopped = False
947 stopped = False
941
948
942 chain.reverse()
949 chain.reverse()
943 return chain, stopped
950 return chain, stopped
944
951
945 def ancestors(self, revs, stoprev=0, inclusive=False):
952 def ancestors(self, revs, stoprev=0, inclusive=False):
946 """Generate the ancestors of 'revs' in reverse revision order.
953 """Generate the ancestors of 'revs' in reverse revision order.
947 Does not generate revs lower than stoprev.
954 Does not generate revs lower than stoprev.
948
955
949 See the documentation for ancestor.lazyancestors for more details."""
956 See the documentation for ancestor.lazyancestors for more details."""
950
957
951 # first, make sure start revisions aren't filtered
958 # first, make sure start revisions aren't filtered
952 revs = list(revs)
959 revs = list(revs)
953 checkrev = self.node
960 checkrev = self.node
954 for r in revs:
961 for r in revs:
955 checkrev(r)
962 checkrev(r)
956 # and we're sure ancestors aren't filtered as well
963 # and we're sure ancestors aren't filtered as well
957
964
958 if rustancestor is not None:
965 if rustancestor is not None:
959 lazyancestors = rustancestor.LazyAncestors
966 lazyancestors = rustancestor.LazyAncestors
960 arg = self.index
967 arg = self.index
961 else:
968 else:
962 lazyancestors = ancestor.lazyancestors
969 lazyancestors = ancestor.lazyancestors
963 arg = self._uncheckedparentrevs
970 arg = self._uncheckedparentrevs
964 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
971 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
965
972
966 def descendants(self, revs):
973 def descendants(self, revs):
967 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
974 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
968
975
969 def findcommonmissing(self, common=None, heads=None):
976 def findcommonmissing(self, common=None, heads=None):
970 """Return a tuple of the ancestors of common and the ancestors of heads
977 """Return a tuple of the ancestors of common and the ancestors of heads
971 that are not ancestors of common. In revset terminology, we return the
978 that are not ancestors of common. In revset terminology, we return the
972 tuple:
979 tuple:
973
980
974 ::common, (::heads) - (::common)
981 ::common, (::heads) - (::common)
975
982
976 The list is sorted by revision number, meaning it is
983 The list is sorted by revision number, meaning it is
977 topologically sorted.
984 topologically sorted.
978
985
979 'heads' and 'common' are both lists of node IDs. If heads is
986 'heads' and 'common' are both lists of node IDs. If heads is
980 not supplied, uses all of the revlog's heads. If common is not
987 not supplied, uses all of the revlog's heads. If common is not
981 supplied, uses nullid."""
988 supplied, uses nullid."""
982 if common is None:
989 if common is None:
983 common = [nullid]
990 common = [nullid]
984 if heads is None:
991 if heads is None:
985 heads = self.heads()
992 heads = self.heads()
986
993
987 common = [self.rev(n) for n in common]
994 common = [self.rev(n) for n in common]
988 heads = [self.rev(n) for n in heads]
995 heads = [self.rev(n) for n in heads]
989
996
990 # we want the ancestors, but inclusive
997 # we want the ancestors, but inclusive
991 class lazyset(object):
998 class lazyset(object):
992 def __init__(self, lazyvalues):
999 def __init__(self, lazyvalues):
993 self.addedvalues = set()
1000 self.addedvalues = set()
994 self.lazyvalues = lazyvalues
1001 self.lazyvalues = lazyvalues
995
1002
996 def __contains__(self, value):
1003 def __contains__(self, value):
997 return value in self.addedvalues or value in self.lazyvalues
1004 return value in self.addedvalues or value in self.lazyvalues
998
1005
999 def __iter__(self):
1006 def __iter__(self):
1000 added = self.addedvalues
1007 added = self.addedvalues
1001 for r in added:
1008 for r in added:
1002 yield r
1009 yield r
1003 for r in self.lazyvalues:
1010 for r in self.lazyvalues:
1004 if not r in added:
1011 if not r in added:
1005 yield r
1012 yield r
1006
1013
1007 def add(self, value):
1014 def add(self, value):
1008 self.addedvalues.add(value)
1015 self.addedvalues.add(value)
1009
1016
1010 def update(self, values):
1017 def update(self, values):
1011 self.addedvalues.update(values)
1018 self.addedvalues.update(values)
1012
1019
1013 has = lazyset(self.ancestors(common))
1020 has = lazyset(self.ancestors(common))
1014 has.add(nullrev)
1021 has.add(nullrev)
1015 has.update(common)
1022 has.update(common)
1016
1023
1017 # take all ancestors from heads that aren't in has
1024 # take all ancestors from heads that aren't in has
1018 missing = set()
1025 missing = set()
1019 visit = collections.deque(r for r in heads if r not in has)
1026 visit = collections.deque(r for r in heads if r not in has)
1020 while visit:
1027 while visit:
1021 r = visit.popleft()
1028 r = visit.popleft()
1022 if r in missing:
1029 if r in missing:
1023 continue
1030 continue
1024 else:
1031 else:
1025 missing.add(r)
1032 missing.add(r)
1026 for p in self.parentrevs(r):
1033 for p in self.parentrevs(r):
1027 if p not in has:
1034 if p not in has:
1028 visit.append(p)
1035 visit.append(p)
1029 missing = list(missing)
1036 missing = list(missing)
1030 missing.sort()
1037 missing.sort()
1031 return has, [self.node(miss) for miss in missing]
1038 return has, [self.node(miss) for miss in missing]
1032
1039
1033 def incrementalmissingrevs(self, common=None):
1040 def incrementalmissingrevs(self, common=None):
1034 """Return an object that can be used to incrementally compute the
1041 """Return an object that can be used to incrementally compute the
1035 revision numbers of the ancestors of arbitrary sets that are not
1042 revision numbers of the ancestors of arbitrary sets that are not
1036 ancestors of common. This is an ancestor.incrementalmissingancestors
1043 ancestors of common. This is an ancestor.incrementalmissingancestors
1037 object.
1044 object.
1038
1045
1039 'common' is a list of revision numbers. If common is not supplied, uses
1046 'common' is a list of revision numbers. If common is not supplied, uses
1040 nullrev.
1047 nullrev.
1041 """
1048 """
1042 if common is None:
1049 if common is None:
1043 common = [nullrev]
1050 common = [nullrev]
1044
1051
1045 if rustancestor is not None:
1052 if rustancestor is not None:
1046 return rustancestor.MissingAncestors(self.index, common)
1053 return rustancestor.MissingAncestors(self.index, common)
1047 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1054 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1048
1055
1049 def findmissingrevs(self, common=None, heads=None):
1056 def findmissingrevs(self, common=None, heads=None):
1050 """Return the revision numbers of the ancestors of heads that
1057 """Return the revision numbers of the ancestors of heads that
1051 are not ancestors of common.
1058 are not ancestors of common.
1052
1059
1053 More specifically, return a list of revision numbers corresponding to
1060 More specifically, return a list of revision numbers corresponding to
1054 nodes N such that every N satisfies the following constraints:
1061 nodes N such that every N satisfies the following constraints:
1055
1062
1056 1. N is an ancestor of some node in 'heads'
1063 1. N is an ancestor of some node in 'heads'
1057 2. N is not an ancestor of any node in 'common'
1064 2. N is not an ancestor of any node in 'common'
1058
1065
1059 The list is sorted by revision number, meaning it is
1066 The list is sorted by revision number, meaning it is
1060 topologically sorted.
1067 topologically sorted.
1061
1068
1062 'heads' and 'common' are both lists of revision numbers. If heads is
1069 'heads' and 'common' are both lists of revision numbers. If heads is
1063 not supplied, uses all of the revlog's heads. If common is not
1070 not supplied, uses all of the revlog's heads. If common is not
1064 supplied, uses nullid."""
1071 supplied, uses nullid."""
1065 if common is None:
1072 if common is None:
1066 common = [nullrev]
1073 common = [nullrev]
1067 if heads is None:
1074 if heads is None:
1068 heads = self.headrevs()
1075 heads = self.headrevs()
1069
1076
1070 inc = self.incrementalmissingrevs(common=common)
1077 inc = self.incrementalmissingrevs(common=common)
1071 return inc.missingancestors(heads)
1078 return inc.missingancestors(heads)
1072
1079
1073 def findmissing(self, common=None, heads=None):
1080 def findmissing(self, common=None, heads=None):
1074 """Return the ancestors of heads that are not ancestors of common.
1081 """Return the ancestors of heads that are not ancestors of common.
1075
1082
1076 More specifically, return a list of nodes N such that every N
1083 More specifically, return a list of nodes N such that every N
1077 satisfies the following constraints:
1084 satisfies the following constraints:
1078
1085
1079 1. N is an ancestor of some node in 'heads'
1086 1. N is an ancestor of some node in 'heads'
1080 2. N is not an ancestor of any node in 'common'
1087 2. N is not an ancestor of any node in 'common'
1081
1088
1082 The list is sorted by revision number, meaning it is
1089 The list is sorted by revision number, meaning it is
1083 topologically sorted.
1090 topologically sorted.
1084
1091
1085 'heads' and 'common' are both lists of node IDs. If heads is
1092 'heads' and 'common' are both lists of node IDs. If heads is
1086 not supplied, uses all of the revlog's heads. If common is not
1093 not supplied, uses all of the revlog's heads. If common is not
1087 supplied, uses nullid."""
1094 supplied, uses nullid."""
1088 if common is None:
1095 if common is None:
1089 common = [nullid]
1096 common = [nullid]
1090 if heads is None:
1097 if heads is None:
1091 heads = self.heads()
1098 heads = self.heads()
1092
1099
1093 common = [self.rev(n) for n in common]
1100 common = [self.rev(n) for n in common]
1094 heads = [self.rev(n) for n in heads]
1101 heads = [self.rev(n) for n in heads]
1095
1102
1096 inc = self.incrementalmissingrevs(common=common)
1103 inc = self.incrementalmissingrevs(common=common)
1097 return [self.node(r) for r in inc.missingancestors(heads)]
1104 return [self.node(r) for r in inc.missingancestors(heads)]
1098
1105
1099 def nodesbetween(self, roots=None, heads=None):
1106 def nodesbetween(self, roots=None, heads=None):
1100 """Return a topological path from 'roots' to 'heads'.
1107 """Return a topological path from 'roots' to 'heads'.
1101
1108
1102 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1109 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1103 topologically sorted list of all nodes N that satisfy both of
1110 topologically sorted list of all nodes N that satisfy both of
1104 these constraints:
1111 these constraints:
1105
1112
1106 1. N is a descendant of some node in 'roots'
1113 1. N is a descendant of some node in 'roots'
1107 2. N is an ancestor of some node in 'heads'
1114 2. N is an ancestor of some node in 'heads'
1108
1115
1109 Every node is considered to be both a descendant and an ancestor
1116 Every node is considered to be both a descendant and an ancestor
1110 of itself, so every reachable node in 'roots' and 'heads' will be
1117 of itself, so every reachable node in 'roots' and 'heads' will be
1111 included in 'nodes'.
1118 included in 'nodes'.
1112
1119
1113 'outroots' is the list of reachable nodes in 'roots', i.e., the
1120 'outroots' is the list of reachable nodes in 'roots', i.e., the
1114 subset of 'roots' that is returned in 'nodes'. Likewise,
1121 subset of 'roots' that is returned in 'nodes'. Likewise,
1115 'outheads' is the subset of 'heads' that is also in 'nodes'.
1122 'outheads' is the subset of 'heads' that is also in 'nodes'.
1116
1123
1117 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1124 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1118 unspecified, uses nullid as the only root. If 'heads' is
1125 unspecified, uses nullid as the only root. If 'heads' is
1119 unspecified, uses list of all of the revlog's heads."""
1126 unspecified, uses list of all of the revlog's heads."""
1120 nonodes = ([], [], [])
1127 nonodes = ([], [], [])
1121 if roots is not None:
1128 if roots is not None:
1122 roots = list(roots)
1129 roots = list(roots)
1123 if not roots:
1130 if not roots:
1124 return nonodes
1131 return nonodes
1125 lowestrev = min([self.rev(n) for n in roots])
1132 lowestrev = min([self.rev(n) for n in roots])
1126 else:
1133 else:
1127 roots = [nullid] # Everybody's a descendant of nullid
1134 roots = [nullid] # Everybody's a descendant of nullid
1128 lowestrev = nullrev
1135 lowestrev = nullrev
1129 if (lowestrev == nullrev) and (heads is None):
1136 if (lowestrev == nullrev) and (heads is None):
1130 # We want _all_ the nodes!
1137 # We want _all_ the nodes!
1131 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1138 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1132 if heads is None:
1139 if heads is None:
1133 # All nodes are ancestors, so the latest ancestor is the last
1140 # All nodes are ancestors, so the latest ancestor is the last
1134 # node.
1141 # node.
1135 highestrev = len(self) - 1
1142 highestrev = len(self) - 1
1136 # Set ancestors to None to signal that every node is an ancestor.
1143 # Set ancestors to None to signal that every node is an ancestor.
1137 ancestors = None
1144 ancestors = None
1138 # Set heads to an empty dictionary for later discovery of heads
1145 # Set heads to an empty dictionary for later discovery of heads
1139 heads = {}
1146 heads = {}
1140 else:
1147 else:
1141 heads = list(heads)
1148 heads = list(heads)
1142 if not heads:
1149 if not heads:
1143 return nonodes
1150 return nonodes
1144 ancestors = set()
1151 ancestors = set()
1145 # Turn heads into a dictionary so we can remove 'fake' heads.
1152 # Turn heads into a dictionary so we can remove 'fake' heads.
1146 # Also, later we will be using it to filter out the heads we can't
1153 # Also, later we will be using it to filter out the heads we can't
1147 # find from roots.
1154 # find from roots.
1148 heads = dict.fromkeys(heads, False)
1155 heads = dict.fromkeys(heads, False)
1149 # Start at the top and keep marking parents until we're done.
1156 # Start at the top and keep marking parents until we're done.
1150 nodestotag = set(heads)
1157 nodestotag = set(heads)
1151 # Remember where the top was so we can use it as a limit later.
1158 # Remember where the top was so we can use it as a limit later.
1152 highestrev = max([self.rev(n) for n in nodestotag])
1159 highestrev = max([self.rev(n) for n in nodestotag])
1153 while nodestotag:
1160 while nodestotag:
1154 # grab a node to tag
1161 # grab a node to tag
1155 n = nodestotag.pop()
1162 n = nodestotag.pop()
1156 # Never tag nullid
1163 # Never tag nullid
1157 if n == nullid:
1164 if n == nullid:
1158 continue
1165 continue
1159 # A node's revision number represents its place in a
1166 # A node's revision number represents its place in a
1160 # topologically sorted list of nodes.
1167 # topologically sorted list of nodes.
1161 r = self.rev(n)
1168 r = self.rev(n)
1162 if r >= lowestrev:
1169 if r >= lowestrev:
1163 if n not in ancestors:
1170 if n not in ancestors:
1164 # If we are possibly a descendant of one of the roots
1171 # If we are possibly a descendant of one of the roots
1165 # and we haven't already been marked as an ancestor
1172 # and we haven't already been marked as an ancestor
1166 ancestors.add(n) # Mark as ancestor
1173 ancestors.add(n) # Mark as ancestor
1167 # Add non-nullid parents to list of nodes to tag.
1174 # Add non-nullid parents to list of nodes to tag.
1168 nodestotag.update(
1175 nodestotag.update(
1169 [p for p in self.parents(n) if p != nullid]
1176 [p for p in self.parents(n) if p != nullid]
1170 )
1177 )
1171 elif n in heads: # We've seen it before, is it a fake head?
1178 elif n in heads: # We've seen it before, is it a fake head?
1172 # So it is, real heads should not be the ancestors of
1179 # So it is, real heads should not be the ancestors of
1173 # any other heads.
1180 # any other heads.
1174 heads.pop(n)
1181 heads.pop(n)
1175 if not ancestors:
1182 if not ancestors:
1176 return nonodes
1183 return nonodes
1177 # Now that we have our set of ancestors, we want to remove any
1184 # Now that we have our set of ancestors, we want to remove any
1178 # roots that are not ancestors.
1185 # roots that are not ancestors.
1179
1186
1180 # If one of the roots was nullid, everything is included anyway.
1187 # If one of the roots was nullid, everything is included anyway.
1181 if lowestrev > nullrev:
1188 if lowestrev > nullrev:
1182 # But, since we weren't, let's recompute the lowest rev to not
1189 # But, since we weren't, let's recompute the lowest rev to not
1183 # include roots that aren't ancestors.
1190 # include roots that aren't ancestors.
1184
1191
1185 # Filter out roots that aren't ancestors of heads
1192 # Filter out roots that aren't ancestors of heads
1186 roots = [root for root in roots if root in ancestors]
1193 roots = [root for root in roots if root in ancestors]
1187 # Recompute the lowest revision
1194 # Recompute the lowest revision
1188 if roots:
1195 if roots:
1189 lowestrev = min([self.rev(root) for root in roots])
1196 lowestrev = min([self.rev(root) for root in roots])
1190 else:
1197 else:
1191 # No more roots? Return empty list
1198 # No more roots? Return empty list
1192 return nonodes
1199 return nonodes
1193 else:
1200 else:
1194 # We are descending from nullid, and don't need to care about
1201 # We are descending from nullid, and don't need to care about
1195 # any other roots.
1202 # any other roots.
1196 lowestrev = nullrev
1203 lowestrev = nullrev
1197 roots = [nullid]
1204 roots = [nullid]
1198 # Transform our roots list into a set.
1205 # Transform our roots list into a set.
1199 descendants = set(roots)
1206 descendants = set(roots)
1200 # Also, keep the original roots so we can filter out roots that aren't
1207 # Also, keep the original roots so we can filter out roots that aren't
1201 # 'real' roots (i.e. are descended from other roots).
1208 # 'real' roots (i.e. are descended from other roots).
1202 roots = descendants.copy()
1209 roots = descendants.copy()
1203 # Our topologically sorted list of output nodes.
1210 # Our topologically sorted list of output nodes.
1204 orderedout = []
1211 orderedout = []
1205 # Don't start at nullid since we don't want nullid in our output list,
1212 # Don't start at nullid since we don't want nullid in our output list,
1206 # and if nullid shows up in descendants, empty parents will look like
1213 # and if nullid shows up in descendants, empty parents will look like
1207 # they're descendants.
1214 # they're descendants.
1208 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1215 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1209 n = self.node(r)
1216 n = self.node(r)
1210 isdescendant = False
1217 isdescendant = False
1211 if lowestrev == nullrev: # Everybody is a descendant of nullid
1218 if lowestrev == nullrev: # Everybody is a descendant of nullid
1212 isdescendant = True
1219 isdescendant = True
1213 elif n in descendants:
1220 elif n in descendants:
1214 # n is already a descendant
1221 # n is already a descendant
1215 isdescendant = True
1222 isdescendant = True
1216 # This check only needs to be done here because all the roots
1223 # This check only needs to be done here because all the roots
1217 # will start being marked is descendants before the loop.
1224 # will start being marked is descendants before the loop.
1218 if n in roots:
1225 if n in roots:
1219 # If n was a root, check if it's a 'real' root.
1226 # If n was a root, check if it's a 'real' root.
1220 p = tuple(self.parents(n))
1227 p = tuple(self.parents(n))
1221 # If any of its parents are descendants, it's not a root.
1228 # If any of its parents are descendants, it's not a root.
1222 if (p[0] in descendants) or (p[1] in descendants):
1229 if (p[0] in descendants) or (p[1] in descendants):
1223 roots.remove(n)
1230 roots.remove(n)
1224 else:
1231 else:
1225 p = tuple(self.parents(n))
1232 p = tuple(self.parents(n))
1226 # A node is a descendant if either of its parents are
1233 # A node is a descendant if either of its parents are
1227 # descendants. (We seeded the dependents list with the roots
1234 # descendants. (We seeded the dependents list with the roots
1228 # up there, remember?)
1235 # up there, remember?)
1229 if (p[0] in descendants) or (p[1] in descendants):
1236 if (p[0] in descendants) or (p[1] in descendants):
1230 descendants.add(n)
1237 descendants.add(n)
1231 isdescendant = True
1238 isdescendant = True
1232 if isdescendant and ((ancestors is None) or (n in ancestors)):
1239 if isdescendant and ((ancestors is None) or (n in ancestors)):
1233 # Only include nodes that are both descendants and ancestors.
1240 # Only include nodes that are both descendants and ancestors.
1234 orderedout.append(n)
1241 orderedout.append(n)
1235 if (ancestors is not None) and (n in heads):
1242 if (ancestors is not None) and (n in heads):
1236 # We're trying to figure out which heads are reachable
1243 # We're trying to figure out which heads are reachable
1237 # from roots.
1244 # from roots.
1238 # Mark this head as having been reached
1245 # Mark this head as having been reached
1239 heads[n] = True
1246 heads[n] = True
1240 elif ancestors is None:
1247 elif ancestors is None:
1241 # Otherwise, we're trying to discover the heads.
1248 # Otherwise, we're trying to discover the heads.
1242 # Assume this is a head because if it isn't, the next step
1249 # Assume this is a head because if it isn't, the next step
1243 # will eventually remove it.
1250 # will eventually remove it.
1244 heads[n] = True
1251 heads[n] = True
1245 # But, obviously its parents aren't.
1252 # But, obviously its parents aren't.
1246 for p in self.parents(n):
1253 for p in self.parents(n):
1247 heads.pop(p, None)
1254 heads.pop(p, None)
1248 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1255 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1249 roots = list(roots)
1256 roots = list(roots)
1250 assert orderedout
1257 assert orderedout
1251 assert roots
1258 assert roots
1252 assert heads
1259 assert heads
1253 return (orderedout, roots, heads)
1260 return (orderedout, roots, heads)
1254
1261
1255 def headrevs(self, revs=None):
1262 def headrevs(self, revs=None):
1256 if revs is None:
1263 if revs is None:
1257 try:
1264 try:
1258 return self.index.headrevs()
1265 return self.index.headrevs()
1259 except AttributeError:
1266 except AttributeError:
1260 return self._headrevs()
1267 return self._headrevs()
1261 if rustdagop is not None:
1268 if rustdagop is not None:
1262 return rustdagop.headrevs(self.index, revs)
1269 return rustdagop.headrevs(self.index, revs)
1263 return dagop.headrevs(revs, self._uncheckedparentrevs)
1270 return dagop.headrevs(revs, self._uncheckedparentrevs)
1264
1271
1265 def computephases(self, roots):
1272 def computephases(self, roots):
1266 return self.index.computephasesmapsets(roots)
1273 return self.index.computephasesmapsets(roots)
1267
1274
1268 def _headrevs(self):
1275 def _headrevs(self):
1269 count = len(self)
1276 count = len(self)
1270 if not count:
1277 if not count:
1271 return [nullrev]
1278 return [nullrev]
1272 # we won't iter over filtered rev so nobody is a head at start
1279 # we won't iter over filtered rev so nobody is a head at start
1273 ishead = [0] * (count + 1)
1280 ishead = [0] * (count + 1)
1274 index = self.index
1281 index = self.index
1275 for r in self:
1282 for r in self:
1276 ishead[r] = 1 # I may be an head
1283 ishead[r] = 1 # I may be an head
1277 e = index[r]
1284 e = index[r]
1278 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1285 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1279 return [r for r, val in enumerate(ishead) if val]
1286 return [r for r, val in enumerate(ishead) if val]
1280
1287
1281 def heads(self, start=None, stop=None):
1288 def heads(self, start=None, stop=None):
1282 """return the list of all nodes that have no children
1289 """return the list of all nodes that have no children
1283
1290
1284 if start is specified, only heads that are descendants of
1291 if start is specified, only heads that are descendants of
1285 start will be returned
1292 start will be returned
1286 if stop is specified, it will consider all the revs from stop
1293 if stop is specified, it will consider all the revs from stop
1287 as if they had no children
1294 as if they had no children
1288 """
1295 """
1289 if start is None and stop is None:
1296 if start is None and stop is None:
1290 if not len(self):
1297 if not len(self):
1291 return [nullid]
1298 return [nullid]
1292 return [self.node(r) for r in self.headrevs()]
1299 return [self.node(r) for r in self.headrevs()]
1293
1300
1294 if start is None:
1301 if start is None:
1295 start = nullrev
1302 start = nullrev
1296 else:
1303 else:
1297 start = self.rev(start)
1304 start = self.rev(start)
1298
1305
1299 stoprevs = {self.rev(n) for n in stop or []}
1306 stoprevs = {self.rev(n) for n in stop or []}
1300
1307
1301 revs = dagop.headrevssubset(
1308 revs = dagop.headrevssubset(
1302 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1309 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1303 )
1310 )
1304
1311
1305 return [self.node(rev) for rev in revs]
1312 return [self.node(rev) for rev in revs]
1306
1313
1307 def children(self, node):
1314 def children(self, node):
1308 """find the children of a given node"""
1315 """find the children of a given node"""
1309 c = []
1316 c = []
1310 p = self.rev(node)
1317 p = self.rev(node)
1311 for r in self.revs(start=p + 1):
1318 for r in self.revs(start=p + 1):
1312 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1319 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1313 if prevs:
1320 if prevs:
1314 for pr in prevs:
1321 for pr in prevs:
1315 if pr == p:
1322 if pr == p:
1316 c.append(self.node(r))
1323 c.append(self.node(r))
1317 elif p == nullrev:
1324 elif p == nullrev:
1318 c.append(self.node(r))
1325 c.append(self.node(r))
1319 return c
1326 return c
1320
1327
1321 def commonancestorsheads(self, a, b):
1328 def commonancestorsheads(self, a, b):
1322 """calculate all the heads of the common ancestors of nodes a and b"""
1329 """calculate all the heads of the common ancestors of nodes a and b"""
1323 a, b = self.rev(a), self.rev(b)
1330 a, b = self.rev(a), self.rev(b)
1324 ancs = self._commonancestorsheads(a, b)
1331 ancs = self._commonancestorsheads(a, b)
1325 return pycompat.maplist(self.node, ancs)
1332 return pycompat.maplist(self.node, ancs)
1326
1333
1327 def _commonancestorsheads(self, *revs):
1334 def _commonancestorsheads(self, *revs):
1328 """calculate all the heads of the common ancestors of revs"""
1335 """calculate all the heads of the common ancestors of revs"""
1329 try:
1336 try:
1330 ancs = self.index.commonancestorsheads(*revs)
1337 ancs = self.index.commonancestorsheads(*revs)
1331 except (AttributeError, OverflowError): # C implementation failed
1338 except (AttributeError, OverflowError): # C implementation failed
1332 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1339 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1333 return ancs
1340 return ancs
1334
1341
1335 def isancestor(self, a, b):
1342 def isancestor(self, a, b):
1336 """return True if node a is an ancestor of node b
1343 """return True if node a is an ancestor of node b
1337
1344
1338 A revision is considered an ancestor of itself."""
1345 A revision is considered an ancestor of itself."""
1339 a, b = self.rev(a), self.rev(b)
1346 a, b = self.rev(a), self.rev(b)
1340 return self.isancestorrev(a, b)
1347 return self.isancestorrev(a, b)
1341
1348
1342 def isancestorrev(self, a, b):
1349 def isancestorrev(self, a, b):
1343 """return True if revision a is an ancestor of revision b
1350 """return True if revision a is an ancestor of revision b
1344
1351
1345 A revision is considered an ancestor of itself.
1352 A revision is considered an ancestor of itself.
1346
1353
1347 The implementation of this is trivial but the use of
1354 The implementation of this is trivial but the use of
1348 reachableroots is not."""
1355 reachableroots is not."""
1349 if a == nullrev:
1356 if a == nullrev:
1350 return True
1357 return True
1351 elif a == b:
1358 elif a == b:
1352 return True
1359 return True
1353 elif a > b:
1360 elif a > b:
1354 return False
1361 return False
1355 return bool(self.reachableroots(a, [b], [a], includepath=False))
1362 return bool(self.reachableroots(a, [b], [a], includepath=False))
1356
1363
1357 def reachableroots(self, minroot, heads, roots, includepath=False):
1364 def reachableroots(self, minroot, heads, roots, includepath=False):
1358 """return (heads(::(<roots> and <roots>::<heads>)))
1365 """return (heads(::(<roots> and <roots>::<heads>)))
1359
1366
1360 If includepath is True, return (<roots>::<heads>)."""
1367 If includepath is True, return (<roots>::<heads>)."""
1361 try:
1368 try:
1362 return self.index.reachableroots2(
1369 return self.index.reachableroots2(
1363 minroot, heads, roots, includepath
1370 minroot, heads, roots, includepath
1364 )
1371 )
1365 except AttributeError:
1372 except AttributeError:
1366 return dagop._reachablerootspure(
1373 return dagop._reachablerootspure(
1367 self.parentrevs, minroot, roots, heads, includepath
1374 self.parentrevs, minroot, roots, heads, includepath
1368 )
1375 )
1369
1376
1370 def ancestor(self, a, b):
1377 def ancestor(self, a, b):
1371 """calculate the "best" common ancestor of nodes a and b"""
1378 """calculate the "best" common ancestor of nodes a and b"""
1372
1379
1373 a, b = self.rev(a), self.rev(b)
1380 a, b = self.rev(a), self.rev(b)
1374 try:
1381 try:
1375 ancs = self.index.ancestors(a, b)
1382 ancs = self.index.ancestors(a, b)
1376 except (AttributeError, OverflowError):
1383 except (AttributeError, OverflowError):
1377 ancs = ancestor.ancestors(self.parentrevs, a, b)
1384 ancs = ancestor.ancestors(self.parentrevs, a, b)
1378 if ancs:
1385 if ancs:
1379 # choose a consistent winner when there's a tie
1386 # choose a consistent winner when there's a tie
1380 return min(map(self.node, ancs))
1387 return min(map(self.node, ancs))
1381 return nullid
1388 return nullid
1382
1389
1383 def _match(self, id):
1390 def _match(self, id):
1384 if isinstance(id, int):
1391 if isinstance(id, int):
1385 # rev
1392 # rev
1386 return self.node(id)
1393 return self.node(id)
1387 if len(id) == 20:
1394 if len(id) == 20:
1388 # possibly a binary node
1395 # possibly a binary node
1389 # odds of a binary node being all hex in ASCII are 1 in 10**25
1396 # odds of a binary node being all hex in ASCII are 1 in 10**25
1390 try:
1397 try:
1391 node = id
1398 node = id
1392 self.rev(node) # quick search the index
1399 self.rev(node) # quick search the index
1393 return node
1400 return node
1394 except error.LookupError:
1401 except error.LookupError:
1395 pass # may be partial hex id
1402 pass # may be partial hex id
1396 try:
1403 try:
1397 # str(rev)
1404 # str(rev)
1398 rev = int(id)
1405 rev = int(id)
1399 if b"%d" % rev != id:
1406 if b"%d" % rev != id:
1400 raise ValueError
1407 raise ValueError
1401 if rev < 0:
1408 if rev < 0:
1402 rev = len(self) + rev
1409 rev = len(self) + rev
1403 if rev < 0 or rev >= len(self):
1410 if rev < 0 or rev >= len(self):
1404 raise ValueError
1411 raise ValueError
1405 return self.node(rev)
1412 return self.node(rev)
1406 except (ValueError, OverflowError):
1413 except (ValueError, OverflowError):
1407 pass
1414 pass
1408 if len(id) == 40:
1415 if len(id) == 40:
1409 try:
1416 try:
1410 # a full hex nodeid?
1417 # a full hex nodeid?
1411 node = bin(id)
1418 node = bin(id)
1412 self.rev(node)
1419 self.rev(node)
1413 return node
1420 return node
1414 except (TypeError, error.LookupError):
1421 except (TypeError, error.LookupError):
1415 pass
1422 pass
1416
1423
1417 def _partialmatch(self, id):
1424 def _partialmatch(self, id):
1418 # we don't care wdirfilenodeids as they should be always full hash
1425 # we don't care wdirfilenodeids as they should be always full hash
1419 maybewdir = wdirhex.startswith(id)
1426 maybewdir = wdirhex.startswith(id)
1420 try:
1427 try:
1421 partial = self.index.partialmatch(id)
1428 partial = self.index.partialmatch(id)
1422 if partial and self.hasnode(partial):
1429 if partial and self.hasnode(partial):
1423 if maybewdir:
1430 if maybewdir:
1424 # single 'ff...' match in radix tree, ambiguous with wdir
1431 # single 'ff...' match in radix tree, ambiguous with wdir
1425 raise error.RevlogError
1432 raise error.RevlogError
1426 return partial
1433 return partial
1427 if maybewdir:
1434 if maybewdir:
1428 # no 'ff...' match in radix tree, wdir identified
1435 # no 'ff...' match in radix tree, wdir identified
1429 raise error.WdirUnsupported
1436 raise error.WdirUnsupported
1430 return None
1437 return None
1431 except error.RevlogError:
1438 except error.RevlogError:
1432 # parsers.c radix tree lookup gave multiple matches
1439 # parsers.c radix tree lookup gave multiple matches
1433 # fast path: for unfiltered changelog, radix tree is accurate
1440 # fast path: for unfiltered changelog, radix tree is accurate
1434 if not getattr(self, 'filteredrevs', None):
1441 if not getattr(self, 'filteredrevs', None):
1435 raise error.AmbiguousPrefixLookupError(
1442 raise error.AmbiguousPrefixLookupError(
1436 id, self.indexfile, _(b'ambiguous identifier')
1443 id, self.indexfile, _(b'ambiguous identifier')
1437 )
1444 )
1438 # fall through to slow path that filters hidden revisions
1445 # fall through to slow path that filters hidden revisions
1439 except (AttributeError, ValueError):
1446 except (AttributeError, ValueError):
1440 # we are pure python, or key was too short to search radix tree
1447 # we are pure python, or key was too short to search radix tree
1441 pass
1448 pass
1442
1449
1443 if id in self._pcache:
1450 if id in self._pcache:
1444 return self._pcache[id]
1451 return self._pcache[id]
1445
1452
1446 if len(id) <= 40:
1453 if len(id) <= 40:
1447 try:
1454 try:
1448 # hex(node)[:...]
1455 # hex(node)[:...]
1449 l = len(id) // 2 # grab an even number of digits
1456 l = len(id) // 2 # grab an even number of digits
1450 prefix = bin(id[: l * 2])
1457 prefix = bin(id[: l * 2])
1451 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1458 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1452 nl = [
1459 nl = [
1453 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1460 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1454 ]
1461 ]
1455 if nullhex.startswith(id):
1462 if nullhex.startswith(id):
1456 nl.append(nullid)
1463 nl.append(nullid)
1457 if len(nl) > 0:
1464 if len(nl) > 0:
1458 if len(nl) == 1 and not maybewdir:
1465 if len(nl) == 1 and not maybewdir:
1459 self._pcache[id] = nl[0]
1466 self._pcache[id] = nl[0]
1460 return nl[0]
1467 return nl[0]
1461 raise error.AmbiguousPrefixLookupError(
1468 raise error.AmbiguousPrefixLookupError(
1462 id, self.indexfile, _(b'ambiguous identifier')
1469 id, self.indexfile, _(b'ambiguous identifier')
1463 )
1470 )
1464 if maybewdir:
1471 if maybewdir:
1465 raise error.WdirUnsupported
1472 raise error.WdirUnsupported
1466 return None
1473 return None
1467 except TypeError:
1474 except TypeError:
1468 pass
1475 pass
1469
1476
1470 def lookup(self, id):
1477 def lookup(self, id):
1471 """locate a node based on:
1478 """locate a node based on:
1472 - revision number or str(revision number)
1479 - revision number or str(revision number)
1473 - nodeid or subset of hex nodeid
1480 - nodeid or subset of hex nodeid
1474 """
1481 """
1475 n = self._match(id)
1482 n = self._match(id)
1476 if n is not None:
1483 if n is not None:
1477 return n
1484 return n
1478 n = self._partialmatch(id)
1485 n = self._partialmatch(id)
1479 if n:
1486 if n:
1480 return n
1487 return n
1481
1488
1482 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1489 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1483
1490
1484 def shortest(self, node, minlength=1):
1491 def shortest(self, node, minlength=1):
1485 """Find the shortest unambiguous prefix that matches node."""
1492 """Find the shortest unambiguous prefix that matches node."""
1486
1493
1487 def isvalid(prefix):
1494 def isvalid(prefix):
1488 try:
1495 try:
1489 matchednode = self._partialmatch(prefix)
1496 matchednode = self._partialmatch(prefix)
1490 except error.AmbiguousPrefixLookupError:
1497 except error.AmbiguousPrefixLookupError:
1491 return False
1498 return False
1492 except error.WdirUnsupported:
1499 except error.WdirUnsupported:
1493 # single 'ff...' match
1500 # single 'ff...' match
1494 return True
1501 return True
1495 if matchednode is None:
1502 if matchednode is None:
1496 raise error.LookupError(node, self.indexfile, _(b'no node'))
1503 raise error.LookupError(node, self.indexfile, _(b'no node'))
1497 return True
1504 return True
1498
1505
1499 def maybewdir(prefix):
1506 def maybewdir(prefix):
1500 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1507 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1501
1508
1502 hexnode = hex(node)
1509 hexnode = hex(node)
1503
1510
1504 def disambiguate(hexnode, minlength):
1511 def disambiguate(hexnode, minlength):
1505 """Disambiguate against wdirid."""
1512 """Disambiguate against wdirid."""
1506 for length in range(minlength, 41):
1513 for length in range(minlength, 41):
1507 prefix = hexnode[:length]
1514 prefix = hexnode[:length]
1508 if not maybewdir(prefix):
1515 if not maybewdir(prefix):
1509 return prefix
1516 return prefix
1510
1517
1511 if not getattr(self, 'filteredrevs', None):
1518 if not getattr(self, 'filteredrevs', None):
1512 try:
1519 try:
1513 length = max(self.index.shortest(node), minlength)
1520 length = max(self.index.shortest(node), minlength)
1514 return disambiguate(hexnode, length)
1521 return disambiguate(hexnode, length)
1515 except error.RevlogError:
1522 except error.RevlogError:
1516 if node != wdirid:
1523 if node != wdirid:
1517 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 raise error.LookupError(node, self.indexfile, _(b'no node'))
1518 except AttributeError:
1525 except AttributeError:
1519 # Fall through to pure code
1526 # Fall through to pure code
1520 pass
1527 pass
1521
1528
1522 if node == wdirid:
1529 if node == wdirid:
1523 for length in range(minlength, 41):
1530 for length in range(minlength, 41):
1524 prefix = hexnode[:length]
1531 prefix = hexnode[:length]
1525 if isvalid(prefix):
1532 if isvalid(prefix):
1526 return prefix
1533 return prefix
1527
1534
1528 for length in range(minlength, 41):
1535 for length in range(minlength, 41):
1529 prefix = hexnode[:length]
1536 prefix = hexnode[:length]
1530 if isvalid(prefix):
1537 if isvalid(prefix):
1531 return disambiguate(hexnode, length)
1538 return disambiguate(hexnode, length)
1532
1539
1533 def cmp(self, node, text):
1540 def cmp(self, node, text):
1534 """compare text with a given file revision
1541 """compare text with a given file revision
1535
1542
1536 returns True if text is different than what is stored.
1543 returns True if text is different than what is stored.
1537 """
1544 """
1538 p1, p2 = self.parents(node)
1545 p1, p2 = self.parents(node)
1539 return storageutil.hashrevisionsha1(text, p1, p2) != node
1546 return storageutil.hashrevisionsha1(text, p1, p2) != node
1540
1547
1541 def _cachesegment(self, offset, data):
1548 def _cachesegment(self, offset, data):
1542 """Add a segment to the revlog cache.
1549 """Add a segment to the revlog cache.
1543
1550
1544 Accepts an absolute offset and the data that is at that location.
1551 Accepts an absolute offset and the data that is at that location.
1545 """
1552 """
1546 o, d = self._chunkcache
1553 o, d = self._chunkcache
1547 # try to add to existing cache
1554 # try to add to existing cache
1548 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1555 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1549 self._chunkcache = o, d + data
1556 self._chunkcache = o, d + data
1550 else:
1557 else:
1551 self._chunkcache = offset, data
1558 self._chunkcache = offset, data
1552
1559
1553 def _readsegment(self, offset, length, df=None):
1560 def _readsegment(self, offset, length, df=None):
1554 """Load a segment of raw data from the revlog.
1561 """Load a segment of raw data from the revlog.
1555
1562
1556 Accepts an absolute offset, length to read, and an optional existing
1563 Accepts an absolute offset, length to read, and an optional existing
1557 file handle to read from.
1564 file handle to read from.
1558
1565
1559 If an existing file handle is passed, it will be seeked and the
1566 If an existing file handle is passed, it will be seeked and the
1560 original seek position will NOT be restored.
1567 original seek position will NOT be restored.
1561
1568
1562 Returns a str or buffer of raw byte data.
1569 Returns a str or buffer of raw byte data.
1563
1570
1564 Raises if the requested number of bytes could not be read.
1571 Raises if the requested number of bytes could not be read.
1565 """
1572 """
1566 # Cache data both forward and backward around the requested
1573 # Cache data both forward and backward around the requested
1567 # data, in a fixed size window. This helps speed up operations
1574 # data, in a fixed size window. This helps speed up operations
1568 # involving reading the revlog backwards.
1575 # involving reading the revlog backwards.
1569 cachesize = self._chunkcachesize
1576 cachesize = self._chunkcachesize
1570 realoffset = offset & ~(cachesize - 1)
1577 realoffset = offset & ~(cachesize - 1)
1571 reallength = (
1578 reallength = (
1572 (offset + length + cachesize) & ~(cachesize - 1)
1579 (offset + length + cachesize) & ~(cachesize - 1)
1573 ) - realoffset
1580 ) - realoffset
1574 with self._datareadfp(df) as df:
1581 with self._datareadfp(df) as df:
1575 df.seek(realoffset)
1582 df.seek(realoffset)
1576 d = df.read(reallength)
1583 d = df.read(reallength)
1577
1584
1578 self._cachesegment(realoffset, d)
1585 self._cachesegment(realoffset, d)
1579 if offset != realoffset or reallength != length:
1586 if offset != realoffset or reallength != length:
1580 startoffset = offset - realoffset
1587 startoffset = offset - realoffset
1581 if len(d) - startoffset < length:
1588 if len(d) - startoffset < length:
1582 raise error.RevlogError(
1589 raise error.RevlogError(
1583 _(
1590 _(
1584 b'partial read of revlog %s; expected %d bytes from '
1591 b'partial read of revlog %s; expected %d bytes from '
1585 b'offset %d, got %d'
1592 b'offset %d, got %d'
1586 )
1593 )
1587 % (
1594 % (
1588 self.indexfile if self._inline else self.datafile,
1595 self.indexfile if self._inline else self.datafile,
1589 length,
1596 length,
1590 realoffset,
1597 realoffset,
1591 len(d) - startoffset,
1598 len(d) - startoffset,
1592 )
1599 )
1593 )
1600 )
1594
1601
1595 return util.buffer(d, startoffset, length)
1602 return util.buffer(d, startoffset, length)
1596
1603
1597 if len(d) < length:
1604 if len(d) < length:
1598 raise error.RevlogError(
1605 raise error.RevlogError(
1599 _(
1606 _(
1600 b'partial read of revlog %s; expected %d bytes from offset '
1607 b'partial read of revlog %s; expected %d bytes from offset '
1601 b'%d, got %d'
1608 b'%d, got %d'
1602 )
1609 )
1603 % (
1610 % (
1604 self.indexfile if self._inline else self.datafile,
1611 self.indexfile if self._inline else self.datafile,
1605 length,
1612 length,
1606 offset,
1613 offset,
1607 len(d),
1614 len(d),
1608 )
1615 )
1609 )
1616 )
1610
1617
1611 return d
1618 return d
1612
1619
1613 def _getsegment(self, offset, length, df=None):
1620 def _getsegment(self, offset, length, df=None):
1614 """Obtain a segment of raw data from the revlog.
1621 """Obtain a segment of raw data from the revlog.
1615
1622
1616 Accepts an absolute offset, length of bytes to obtain, and an
1623 Accepts an absolute offset, length of bytes to obtain, and an
1617 optional file handle to the already-opened revlog. If the file
1624 optional file handle to the already-opened revlog. If the file
1618 handle is used, it's original seek position will not be preserved.
1625 handle is used, it's original seek position will not be preserved.
1619
1626
1620 Requests for data may be returned from a cache.
1627 Requests for data may be returned from a cache.
1621
1628
1622 Returns a str or a buffer instance of raw byte data.
1629 Returns a str or a buffer instance of raw byte data.
1623 """
1630 """
1624 o, d = self._chunkcache
1631 o, d = self._chunkcache
1625 l = len(d)
1632 l = len(d)
1626
1633
1627 # is it in the cache?
1634 # is it in the cache?
1628 cachestart = offset - o
1635 cachestart = offset - o
1629 cacheend = cachestart + length
1636 cacheend = cachestart + length
1630 if cachestart >= 0 and cacheend <= l:
1637 if cachestart >= 0 and cacheend <= l:
1631 if cachestart == 0 and cacheend == l:
1638 if cachestart == 0 and cacheend == l:
1632 return d # avoid a copy
1639 return d # avoid a copy
1633 return util.buffer(d, cachestart, cacheend - cachestart)
1640 return util.buffer(d, cachestart, cacheend - cachestart)
1634
1641
1635 return self._readsegment(offset, length, df=df)
1642 return self._readsegment(offset, length, df=df)
1636
1643
1637 def _getsegmentforrevs(self, startrev, endrev, df=None):
1644 def _getsegmentforrevs(self, startrev, endrev, df=None):
1638 """Obtain a segment of raw data corresponding to a range of revisions.
1645 """Obtain a segment of raw data corresponding to a range of revisions.
1639
1646
1640 Accepts the start and end revisions and an optional already-open
1647 Accepts the start and end revisions and an optional already-open
1641 file handle to be used for reading. If the file handle is read, its
1648 file handle to be used for reading. If the file handle is read, its
1642 seek position will not be preserved.
1649 seek position will not be preserved.
1643
1650
1644 Requests for data may be satisfied by a cache.
1651 Requests for data may be satisfied by a cache.
1645
1652
1646 Returns a 2-tuple of (offset, data) for the requested range of
1653 Returns a 2-tuple of (offset, data) for the requested range of
1647 revisions. Offset is the integer offset from the beginning of the
1654 revisions. Offset is the integer offset from the beginning of the
1648 revlog and data is a str or buffer of the raw byte data.
1655 revlog and data is a str or buffer of the raw byte data.
1649
1656
1650 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1657 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1651 to determine where each revision's data begins and ends.
1658 to determine where each revision's data begins and ends.
1652 """
1659 """
1653 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1660 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1654 # (functions are expensive).
1661 # (functions are expensive).
1655 index = self.index
1662 index = self.index
1656 istart = index[startrev]
1663 istart = index[startrev]
1657 start = int(istart[0] >> 16)
1664 start = int(istart[0] >> 16)
1658 if startrev == endrev:
1665 if startrev == endrev:
1659 end = start + istart[1]
1666 end = start + istart[1]
1660 else:
1667 else:
1661 iend = index[endrev]
1668 iend = index[endrev]
1662 end = int(iend[0] >> 16) + iend[1]
1669 end = int(iend[0] >> 16) + iend[1]
1663
1670
1664 if self._inline:
1671 if self._inline:
1665 start += (startrev + 1) * self._io.size
1672 start += (startrev + 1) * self._io.size
1666 end += (endrev + 1) * self._io.size
1673 end += (endrev + 1) * self._io.size
1667 length = end - start
1674 length = end - start
1668
1675
1669 return start, self._getsegment(start, length, df=df)
1676 return start, self._getsegment(start, length, df=df)
1670
1677
1671 def _chunk(self, rev, df=None):
1678 def _chunk(self, rev, df=None):
1672 """Obtain a single decompressed chunk for a revision.
1679 """Obtain a single decompressed chunk for a revision.
1673
1680
1674 Accepts an integer revision and an optional already-open file handle
1681 Accepts an integer revision and an optional already-open file handle
1675 to be used for reading. If used, the seek position of the file will not
1682 to be used for reading. If used, the seek position of the file will not
1676 be preserved.
1683 be preserved.
1677
1684
1678 Returns a str holding uncompressed data for the requested revision.
1685 Returns a str holding uncompressed data for the requested revision.
1679 """
1686 """
1680 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1687 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1681
1688
1682 def _chunks(self, revs, df=None, targetsize=None):
1689 def _chunks(self, revs, df=None, targetsize=None):
1683 """Obtain decompressed chunks for the specified revisions.
1690 """Obtain decompressed chunks for the specified revisions.
1684
1691
1685 Accepts an iterable of numeric revisions that are assumed to be in
1692 Accepts an iterable of numeric revisions that are assumed to be in
1686 ascending order. Also accepts an optional already-open file handle
1693 ascending order. Also accepts an optional already-open file handle
1687 to be used for reading. If used, the seek position of the file will
1694 to be used for reading. If used, the seek position of the file will
1688 not be preserved.
1695 not be preserved.
1689
1696
1690 This function is similar to calling ``self._chunk()`` multiple times,
1697 This function is similar to calling ``self._chunk()`` multiple times,
1691 but is faster.
1698 but is faster.
1692
1699
1693 Returns a list with decompressed data for each requested revision.
1700 Returns a list with decompressed data for each requested revision.
1694 """
1701 """
1695 if not revs:
1702 if not revs:
1696 return []
1703 return []
1697 start = self.start
1704 start = self.start
1698 length = self.length
1705 length = self.length
1699 inline = self._inline
1706 inline = self._inline
1700 iosize = self._io.size
1707 iosize = self._io.size
1701 buffer = util.buffer
1708 buffer = util.buffer
1702
1709
1703 l = []
1710 l = []
1704 ladd = l.append
1711 ladd = l.append
1705
1712
1706 if not self._withsparseread:
1713 if not self._withsparseread:
1707 slicedchunks = (revs,)
1714 slicedchunks = (revs,)
1708 else:
1715 else:
1709 slicedchunks = deltautil.slicechunk(
1716 slicedchunks = deltautil.slicechunk(
1710 self, revs, targetsize=targetsize
1717 self, revs, targetsize=targetsize
1711 )
1718 )
1712
1719
1713 for revschunk in slicedchunks:
1720 for revschunk in slicedchunks:
1714 firstrev = revschunk[0]
1721 firstrev = revschunk[0]
1715 # Skip trailing revisions with empty diff
1722 # Skip trailing revisions with empty diff
1716 for lastrev in revschunk[::-1]:
1723 for lastrev in revschunk[::-1]:
1717 if length(lastrev) != 0:
1724 if length(lastrev) != 0:
1718 break
1725 break
1719
1726
1720 try:
1727 try:
1721 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1728 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1722 except OverflowError:
1729 except OverflowError:
1723 # issue4215 - we can't cache a run of chunks greater than
1730 # issue4215 - we can't cache a run of chunks greater than
1724 # 2G on Windows
1731 # 2G on Windows
1725 return [self._chunk(rev, df=df) for rev in revschunk]
1732 return [self._chunk(rev, df=df) for rev in revschunk]
1726
1733
1727 decomp = self.decompress
1734 decomp = self.decompress
1728 for rev in revschunk:
1735 for rev in revschunk:
1729 chunkstart = start(rev)
1736 chunkstart = start(rev)
1730 if inline:
1737 if inline:
1731 chunkstart += (rev + 1) * iosize
1738 chunkstart += (rev + 1) * iosize
1732 chunklength = length(rev)
1739 chunklength = length(rev)
1733 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1740 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1734
1741
1735 return l
1742 return l
1736
1743
1737 def _chunkclear(self):
1744 def _chunkclear(self):
1738 """Clear the raw chunk cache."""
1745 """Clear the raw chunk cache."""
1739 self._chunkcache = (0, b'')
1746 self._chunkcache = (0, b'')
1740
1747
1741 def deltaparent(self, rev):
1748 def deltaparent(self, rev):
1742 """return deltaparent of the given revision"""
1749 """return deltaparent of the given revision"""
1743 base = self.index[rev][3]
1750 base = self.index[rev][3]
1744 if base == rev:
1751 if base == rev:
1745 return nullrev
1752 return nullrev
1746 elif self._generaldelta:
1753 elif self._generaldelta:
1747 return base
1754 return base
1748 else:
1755 else:
1749 return rev - 1
1756 return rev - 1
1750
1757
1751 def issnapshot(self, rev):
1758 def issnapshot(self, rev):
1752 """tells whether rev is a snapshot
1759 """tells whether rev is a snapshot
1753 """
1760 """
1754 if not self._sparserevlog:
1761 if not self._sparserevlog:
1755 return self.deltaparent(rev) == nullrev
1762 return self.deltaparent(rev) == nullrev
1756 elif util.safehasattr(self.index, b'issnapshot'):
1763 elif util.safehasattr(self.index, b'issnapshot'):
1757 # directly assign the method to cache the testing and access
1764 # directly assign the method to cache the testing and access
1758 self.issnapshot = self.index.issnapshot
1765 self.issnapshot = self.index.issnapshot
1759 return self.issnapshot(rev)
1766 return self.issnapshot(rev)
1760 if rev == nullrev:
1767 if rev == nullrev:
1761 return True
1768 return True
1762 entry = self.index[rev]
1769 entry = self.index[rev]
1763 base = entry[3]
1770 base = entry[3]
1764 if base == rev:
1771 if base == rev:
1765 return True
1772 return True
1766 if base == nullrev:
1773 if base == nullrev:
1767 return True
1774 return True
1768 p1 = entry[5]
1775 p1 = entry[5]
1769 p2 = entry[6]
1776 p2 = entry[6]
1770 if base == p1 or base == p2:
1777 if base == p1 or base == p2:
1771 return False
1778 return False
1772 return self.issnapshot(base)
1779 return self.issnapshot(base)
1773
1780
1774 def snapshotdepth(self, rev):
1781 def snapshotdepth(self, rev):
1775 """number of snapshot in the chain before this one"""
1782 """number of snapshot in the chain before this one"""
1776 if not self.issnapshot(rev):
1783 if not self.issnapshot(rev):
1777 raise error.ProgrammingError(b'revision %d not a snapshot')
1784 raise error.ProgrammingError(b'revision %d not a snapshot')
1778 return len(self._deltachain(rev)[0]) - 1
1785 return len(self._deltachain(rev)[0]) - 1
1779
1786
1780 def revdiff(self, rev1, rev2):
1787 def revdiff(self, rev1, rev2):
1781 """return or calculate a delta between two revisions
1788 """return or calculate a delta between two revisions
1782
1789
1783 The delta calculated is in binary form and is intended to be written to
1790 The delta calculated is in binary form and is intended to be written to
1784 revlog data directly. So this function needs raw revision data.
1791 revlog data directly. So this function needs raw revision data.
1785 """
1792 """
1786 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1793 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1787 return bytes(self._chunk(rev2))
1794 return bytes(self._chunk(rev2))
1788
1795
1789 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1796 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1790
1797
1791 def _processflags(self, text, flags, operation, raw=False):
1798 def _processflags(self, text, flags, operation, raw=False):
1792 """deprecated entry point to access flag processors"""
1799 """deprecated entry point to access flag processors"""
1793 msg = b'_processflag(...) use the specialized variant'
1800 msg = b'_processflag(...) use the specialized variant'
1794 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1801 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1795 if raw:
1802 if raw:
1796 return text, flagutil.processflagsraw(self, text, flags)
1803 return text, flagutil.processflagsraw(self, text, flags)
1797 elif operation == b'read':
1804 elif operation == b'read':
1798 return flagutil.processflagsread(self, text, flags)
1805 return flagutil.processflagsread(self, text, flags)
1799 else: # write operation
1806 else: # write operation
1800 return flagutil.processflagswrite(self, text, flags)
1807 return flagutil.processflagswrite(self, text, flags)
1801
1808
1802 def revision(self, nodeorrev, _df=None, raw=False):
1809 def revision(self, nodeorrev, _df=None, raw=False):
1803 """return an uncompressed revision of a given node or revision
1810 """return an uncompressed revision of a given node or revision
1804 number.
1811 number.
1805
1812
1806 _df - an existing file handle to read from. (internal-only)
1813 _df - an existing file handle to read from. (internal-only)
1807 raw - an optional argument specifying if the revision data is to be
1814 raw - an optional argument specifying if the revision data is to be
1808 treated as raw data when applying flag transforms. 'raw' should be set
1815 treated as raw data when applying flag transforms. 'raw' should be set
1809 to True when generating changegroups or in debug commands.
1816 to True when generating changegroups or in debug commands.
1810 """
1817 """
1811 if raw:
1818 if raw:
1812 msg = (
1819 msg = (
1813 b'revlog.revision(..., raw=True) is deprecated, '
1820 b'revlog.revision(..., raw=True) is deprecated, '
1814 b'use revlog.rawdata(...)'
1821 b'use revlog.rawdata(...)'
1815 )
1822 )
1816 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1823 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1817 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1824 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1818
1825
1819 def sidedata(self, nodeorrev, _df=None):
1826 def sidedata(self, nodeorrev, _df=None):
1820 """a map of extra data related to the changeset but not part of the hash
1827 """a map of extra data related to the changeset but not part of the hash
1821
1828
1822 This function currently return a dictionary. However, more advanced
1829 This function currently return a dictionary. However, more advanced
1823 mapping object will likely be used in the future for a more
1830 mapping object will likely be used in the future for a more
1824 efficient/lazy code.
1831 efficient/lazy code.
1825 """
1832 """
1826 return self._revisiondata(nodeorrev, _df)[1]
1833 return self._revisiondata(nodeorrev, _df)[1]
1827
1834
1828 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1835 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1829 # deal with <nodeorrev> argument type
1836 # deal with <nodeorrev> argument type
1830 if isinstance(nodeorrev, int):
1837 if isinstance(nodeorrev, int):
1831 rev = nodeorrev
1838 rev = nodeorrev
1832 node = self.node(rev)
1839 node = self.node(rev)
1833 else:
1840 else:
1834 node = nodeorrev
1841 node = nodeorrev
1835 rev = None
1842 rev = None
1836
1843
1837 # fast path the special `nullid` rev
1844 # fast path the special `nullid` rev
1838 if node == nullid:
1845 if node == nullid:
1839 return b"", {}
1846 return b"", {}
1840
1847
1841 # ``rawtext`` is the text as stored inside the revlog. Might be the
1848 # ``rawtext`` is the text as stored inside the revlog. Might be the
1842 # revision or might need to be processed to retrieve the revision.
1849 # revision or might need to be processed to retrieve the revision.
1843 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1850 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1844
1851
1845 if raw and validated:
1852 if raw and validated:
1846 # if we don't want to process the raw text and that raw
1853 # if we don't want to process the raw text and that raw
1847 # text is cached, we can exit early.
1854 # text is cached, we can exit early.
1848 return rawtext, {}
1855 return rawtext, {}
1849 if rev is None:
1856 if rev is None:
1850 rev = self.rev(node)
1857 rev = self.rev(node)
1851 # the revlog's flag for this revision
1858 # the revlog's flag for this revision
1852 # (usually alter its state or content)
1859 # (usually alter its state or content)
1853 flags = self.flags(rev)
1860 flags = self.flags(rev)
1854
1861
1855 if validated and flags == REVIDX_DEFAULT_FLAGS:
1862 if validated and flags == REVIDX_DEFAULT_FLAGS:
1856 # no extra flags set, no flag processor runs, text = rawtext
1863 # no extra flags set, no flag processor runs, text = rawtext
1857 return rawtext, {}
1864 return rawtext, {}
1858
1865
1859 sidedata = {}
1866 sidedata = {}
1860 if raw:
1867 if raw:
1861 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1868 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1862 text = rawtext
1869 text = rawtext
1863 else:
1870 else:
1864 try:
1871 try:
1865 r = flagutil.processflagsread(self, rawtext, flags)
1872 r = flagutil.processflagsread(self, rawtext, flags)
1866 except error.SidedataHashError as exc:
1873 except error.SidedataHashError as exc:
1867 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1874 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1868 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1875 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1869 raise error.RevlogError(msg)
1876 raise error.RevlogError(msg)
1870 text, validatehash, sidedata = r
1877 text, validatehash, sidedata = r
1871 if validatehash:
1878 if validatehash:
1872 self.checkhash(text, node, rev=rev)
1879 self.checkhash(text, node, rev=rev)
1873 if not validated:
1880 if not validated:
1874 self._revisioncache = (node, rev, rawtext)
1881 self._revisioncache = (node, rev, rawtext)
1875
1882
1876 return text, sidedata
1883 return text, sidedata
1877
1884
1878 def _rawtext(self, node, rev, _df=None):
1885 def _rawtext(self, node, rev, _df=None):
1879 """return the possibly unvalidated rawtext for a revision
1886 """return the possibly unvalidated rawtext for a revision
1880
1887
1881 returns (rev, rawtext, validated)
1888 returns (rev, rawtext, validated)
1882 """
1889 """
1883
1890
1884 # revision in the cache (could be useful to apply delta)
1891 # revision in the cache (could be useful to apply delta)
1885 cachedrev = None
1892 cachedrev = None
1886 # An intermediate text to apply deltas to
1893 # An intermediate text to apply deltas to
1887 basetext = None
1894 basetext = None
1888
1895
1889 # Check if we have the entry in cache
1896 # Check if we have the entry in cache
1890 # The cache entry looks like (node, rev, rawtext)
1897 # The cache entry looks like (node, rev, rawtext)
1891 if self._revisioncache:
1898 if self._revisioncache:
1892 if self._revisioncache[0] == node:
1899 if self._revisioncache[0] == node:
1893 return (rev, self._revisioncache[2], True)
1900 return (rev, self._revisioncache[2], True)
1894 cachedrev = self._revisioncache[1]
1901 cachedrev = self._revisioncache[1]
1895
1902
1896 if rev is None:
1903 if rev is None:
1897 rev = self.rev(node)
1904 rev = self.rev(node)
1898
1905
1899 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1906 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1900 if stopped:
1907 if stopped:
1901 basetext = self._revisioncache[2]
1908 basetext = self._revisioncache[2]
1902
1909
1903 # drop cache to save memory, the caller is expected to
1910 # drop cache to save memory, the caller is expected to
1904 # update self._revisioncache after validating the text
1911 # update self._revisioncache after validating the text
1905 self._revisioncache = None
1912 self._revisioncache = None
1906
1913
1907 targetsize = None
1914 targetsize = None
1908 rawsize = self.index[rev][2]
1915 rawsize = self.index[rev][2]
1909 if 0 <= rawsize:
1916 if 0 <= rawsize:
1910 targetsize = 4 * rawsize
1917 targetsize = 4 * rawsize
1911
1918
1912 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1919 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1913 if basetext is None:
1920 if basetext is None:
1914 basetext = bytes(bins[0])
1921 basetext = bytes(bins[0])
1915 bins = bins[1:]
1922 bins = bins[1:]
1916
1923
1917 rawtext = mdiff.patches(basetext, bins)
1924 rawtext = mdiff.patches(basetext, bins)
1918 del basetext # let us have a chance to free memory early
1925 del basetext # let us have a chance to free memory early
1919 return (rev, rawtext, False)
1926 return (rev, rawtext, False)
1920
1927
1921 def rawdata(self, nodeorrev, _df=None):
1928 def rawdata(self, nodeorrev, _df=None):
1922 """return an uncompressed raw data of a given node or revision number.
1929 """return an uncompressed raw data of a given node or revision number.
1923
1930
1924 _df - an existing file handle to read from. (internal-only)
1931 _df - an existing file handle to read from. (internal-only)
1925 """
1932 """
1926 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1933 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1927
1934
1928 def hash(self, text, p1, p2):
1935 def hash(self, text, p1, p2):
1929 """Compute a node hash.
1936 """Compute a node hash.
1930
1937
1931 Available as a function so that subclasses can replace the hash
1938 Available as a function so that subclasses can replace the hash
1932 as needed.
1939 as needed.
1933 """
1940 """
1934 return storageutil.hashrevisionsha1(text, p1, p2)
1941 return storageutil.hashrevisionsha1(text, p1, p2)
1935
1942
1936 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1943 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1937 """Check node hash integrity.
1944 """Check node hash integrity.
1938
1945
1939 Available as a function so that subclasses can extend hash mismatch
1946 Available as a function so that subclasses can extend hash mismatch
1940 behaviors as needed.
1947 behaviors as needed.
1941 """
1948 """
1942 try:
1949 try:
1943 if p1 is None and p2 is None:
1950 if p1 is None and p2 is None:
1944 p1, p2 = self.parents(node)
1951 p1, p2 = self.parents(node)
1945 if node != self.hash(text, p1, p2):
1952 if node != self.hash(text, p1, p2):
1946 # Clear the revision cache on hash failure. The revision cache
1953 # Clear the revision cache on hash failure. The revision cache
1947 # only stores the raw revision and clearing the cache does have
1954 # only stores the raw revision and clearing the cache does have
1948 # the side-effect that we won't have a cache hit when the raw
1955 # the side-effect that we won't have a cache hit when the raw
1949 # revision data is accessed. But this case should be rare and
1956 # revision data is accessed. But this case should be rare and
1950 # it is extra work to teach the cache about the hash
1957 # it is extra work to teach the cache about the hash
1951 # verification state.
1958 # verification state.
1952 if self._revisioncache and self._revisioncache[0] == node:
1959 if self._revisioncache and self._revisioncache[0] == node:
1953 self._revisioncache = None
1960 self._revisioncache = None
1954
1961
1955 revornode = rev
1962 revornode = rev
1956 if revornode is None:
1963 if revornode is None:
1957 revornode = templatefilters.short(hex(node))
1964 revornode = templatefilters.short(hex(node))
1958 raise error.RevlogError(
1965 raise error.RevlogError(
1959 _(b"integrity check failed on %s:%s")
1966 _(b"integrity check failed on %s:%s")
1960 % (self.indexfile, pycompat.bytestr(revornode))
1967 % (self.indexfile, pycompat.bytestr(revornode))
1961 )
1968 )
1962 except error.RevlogError:
1969 except error.RevlogError:
1963 if self._censorable and storageutil.iscensoredtext(text):
1970 if self._censorable and storageutil.iscensoredtext(text):
1964 raise error.CensoredNodeError(self.indexfile, node, text)
1971 raise error.CensoredNodeError(self.indexfile, node, text)
1965 raise
1972 raise
1966
1973
1967 def _enforceinlinesize(self, tr, fp=None):
1974 def _enforceinlinesize(self, tr, fp=None):
1968 """Check if the revlog is too big for inline and convert if so.
1975 """Check if the revlog is too big for inline and convert if so.
1969
1976
1970 This should be called after revisions are added to the revlog. If the
1977 This should be called after revisions are added to the revlog. If the
1971 revlog has grown too large to be an inline revlog, it will convert it
1978 revlog has grown too large to be an inline revlog, it will convert it
1972 to use multiple index and data files.
1979 to use multiple index and data files.
1973 """
1980 """
1974 tiprev = len(self) - 1
1981 tiprev = len(self) - 1
1975 if (
1982 if (
1976 not self._inline
1983 not self._inline
1977 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1984 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1978 ):
1985 ):
1979 return
1986 return
1980
1987
1981 trinfo = tr.find(self.indexfile)
1988 trinfo = tr.find(self.indexfile)
1982 if trinfo is None:
1989 if trinfo is None:
1983 raise error.RevlogError(
1990 raise error.RevlogError(
1984 _(b"%s not found in the transaction") % self.indexfile
1991 _(b"%s not found in the transaction") % self.indexfile
1985 )
1992 )
1986
1993
1987 trindex = trinfo[2]
1994 trindex = trinfo[2]
1988 if trindex is not None:
1995 if trindex is not None:
1989 dataoff = self.start(trindex)
1996 dataoff = self.start(trindex)
1990 else:
1997 else:
1991 # revlog was stripped at start of transaction, use all leftover data
1998 # revlog was stripped at start of transaction, use all leftover data
1992 trindex = len(self) - 1
1999 trindex = len(self) - 1
1993 dataoff = self.end(tiprev)
2000 dataoff = self.end(tiprev)
1994
2001
1995 tr.add(self.datafile, dataoff)
2002 tr.add(self.datafile, dataoff)
1996
2003
1997 if fp:
2004 if fp:
1998 fp.flush()
2005 fp.flush()
1999 fp.close()
2006 fp.close()
2000 # We can't use the cached file handle after close(). So prevent
2007 # We can't use the cached file handle after close(). So prevent
2001 # its usage.
2008 # its usage.
2002 self._writinghandles = None
2009 self._writinghandles = None
2003
2010
2004 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2011 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2005 for r in self:
2012 for r in self:
2006 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2013 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2007
2014
2008 with self._indexfp(b'w') as fp:
2015 with self._indexfp(b'w') as fp:
2009 self.version &= ~FLAG_INLINE_DATA
2016 self.version &= ~FLAG_INLINE_DATA
2010 self._inline = False
2017 self._inline = False
2011 io = self._io
2018 io = self._io
2012 for i in self:
2019 for i in self:
2013 e = io.packentry(self.index[i], self.node, self.version, i)
2020 e = io.packentry(self.index[i], self.node, self.version, i)
2014 fp.write(e)
2021 fp.write(e)
2015
2022
2016 # the temp file replace the real index when we exit the context
2023 # the temp file replace the real index when we exit the context
2017 # manager
2024 # manager
2018
2025
2019 tr.replace(self.indexfile, trindex * self._io.size)
2026 tr.replace(self.indexfile, trindex * self._io.size)
2020 nodemaputil.setup_persistent_nodemap(tr, self)
2027 nodemaputil.setup_persistent_nodemap(tr, self)
2021 self._chunkclear()
2028 self._chunkclear()
2022
2029
2023 def _nodeduplicatecallback(self, transaction, node):
2030 def _nodeduplicatecallback(self, transaction, node):
2024 """called when trying to add a node already stored.
2031 """called when trying to add a node already stored.
2025 """
2032 """
2026
2033
2027 def addrevision(
2034 def addrevision(
2028 self,
2035 self,
2029 text,
2036 text,
2030 transaction,
2037 transaction,
2031 link,
2038 link,
2032 p1,
2039 p1,
2033 p2,
2040 p2,
2034 cachedelta=None,
2041 cachedelta=None,
2035 node=None,
2042 node=None,
2036 flags=REVIDX_DEFAULT_FLAGS,
2043 flags=REVIDX_DEFAULT_FLAGS,
2037 deltacomputer=None,
2044 deltacomputer=None,
2038 sidedata=None,
2045 sidedata=None,
2039 ):
2046 ):
2040 """add a revision to the log
2047 """add a revision to the log
2041
2048
2042 text - the revision data to add
2049 text - the revision data to add
2043 transaction - the transaction object used for rollback
2050 transaction - the transaction object used for rollback
2044 link - the linkrev data to add
2051 link - the linkrev data to add
2045 p1, p2 - the parent nodeids of the revision
2052 p1, p2 - the parent nodeids of the revision
2046 cachedelta - an optional precomputed delta
2053 cachedelta - an optional precomputed delta
2047 node - nodeid of revision; typically node is not specified, and it is
2054 node - nodeid of revision; typically node is not specified, and it is
2048 computed by default as hash(text, p1, p2), however subclasses might
2055 computed by default as hash(text, p1, p2), however subclasses might
2049 use different hashing method (and override checkhash() in such case)
2056 use different hashing method (and override checkhash() in such case)
2050 flags - the known flags to set on the revision
2057 flags - the known flags to set on the revision
2051 deltacomputer - an optional deltacomputer instance shared between
2058 deltacomputer - an optional deltacomputer instance shared between
2052 multiple calls
2059 multiple calls
2053 """
2060 """
2054 if link == nullrev:
2061 if link == nullrev:
2055 raise error.RevlogError(
2062 raise error.RevlogError(
2056 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2063 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2057 )
2064 )
2058
2065
2059 if sidedata is None:
2066 if sidedata is None:
2060 sidedata = {}
2067 sidedata = {}
2061 flags = flags & ~REVIDX_SIDEDATA
2068 flags = flags & ~REVIDX_SIDEDATA
2062 elif not self.hassidedata:
2069 elif not self.hassidedata:
2063 raise error.ProgrammingError(
2070 raise error.ProgrammingError(
2064 _(b"trying to add sidedata to a revlog who don't support them")
2071 _(b"trying to add sidedata to a revlog who don't support them")
2065 )
2072 )
2066 else:
2073 else:
2067 flags |= REVIDX_SIDEDATA
2074 flags |= REVIDX_SIDEDATA
2068
2075
2069 if flags:
2076 if flags:
2070 node = node or self.hash(text, p1, p2)
2077 node = node or self.hash(text, p1, p2)
2071
2078
2072 rawtext, validatehash = flagutil.processflagswrite(
2079 rawtext, validatehash = flagutil.processflagswrite(
2073 self, text, flags, sidedata=sidedata
2080 self, text, flags, sidedata=sidedata
2074 )
2081 )
2075
2082
2076 # If the flag processor modifies the revision data, ignore any provided
2083 # If the flag processor modifies the revision data, ignore any provided
2077 # cachedelta.
2084 # cachedelta.
2078 if rawtext != text:
2085 if rawtext != text:
2079 cachedelta = None
2086 cachedelta = None
2080
2087
2081 if len(rawtext) > _maxentrysize:
2088 if len(rawtext) > _maxentrysize:
2082 raise error.RevlogError(
2089 raise error.RevlogError(
2083 _(
2090 _(
2084 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2091 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2085 )
2092 )
2086 % (self.indexfile, len(rawtext))
2093 % (self.indexfile, len(rawtext))
2087 )
2094 )
2088
2095
2089 node = node or self.hash(rawtext, p1, p2)
2096 node = node or self.hash(rawtext, p1, p2)
2090 if self.index.has_node(node):
2097 if self.index.has_node(node):
2091 return node
2098 return node
2092
2099
2093 if validatehash:
2100 if validatehash:
2094 self.checkhash(rawtext, node, p1=p1, p2=p2)
2101 self.checkhash(rawtext, node, p1=p1, p2=p2)
2095
2102
2096 return self.addrawrevision(
2103 return self.addrawrevision(
2097 rawtext,
2104 rawtext,
2098 transaction,
2105 transaction,
2099 link,
2106 link,
2100 p1,
2107 p1,
2101 p2,
2108 p2,
2102 node,
2109 node,
2103 flags,
2110 flags,
2104 cachedelta=cachedelta,
2111 cachedelta=cachedelta,
2105 deltacomputer=deltacomputer,
2112 deltacomputer=deltacomputer,
2106 )
2113 )
2107
2114
2108 def addrawrevision(
2115 def addrawrevision(
2109 self,
2116 self,
2110 rawtext,
2117 rawtext,
2111 transaction,
2118 transaction,
2112 link,
2119 link,
2113 p1,
2120 p1,
2114 p2,
2121 p2,
2115 node,
2122 node,
2116 flags,
2123 flags,
2117 cachedelta=None,
2124 cachedelta=None,
2118 deltacomputer=None,
2125 deltacomputer=None,
2119 ):
2126 ):
2120 """add a raw revision with known flags, node and parents
2127 """add a raw revision with known flags, node and parents
2121 useful when reusing a revision not stored in this revlog (ex: received
2128 useful when reusing a revision not stored in this revlog (ex: received
2122 over wire, or read from an external bundle).
2129 over wire, or read from an external bundle).
2123 """
2130 """
2124 dfh = None
2131 dfh = None
2125 if not self._inline:
2132 if not self._inline:
2126 dfh = self._datafp(b"a+")
2133 dfh = self._datafp(b"a+")
2127 ifh = self._indexfp(b"a+")
2134 ifh = self._indexfp(b"a+")
2128 try:
2135 try:
2129 return self._addrevision(
2136 return self._addrevision(
2130 node,
2137 node,
2131 rawtext,
2138 rawtext,
2132 transaction,
2139 transaction,
2133 link,
2140 link,
2134 p1,
2141 p1,
2135 p2,
2142 p2,
2136 flags,
2143 flags,
2137 cachedelta,
2144 cachedelta,
2138 ifh,
2145 ifh,
2139 dfh,
2146 dfh,
2140 deltacomputer=deltacomputer,
2147 deltacomputer=deltacomputer,
2141 )
2148 )
2142 finally:
2149 finally:
2143 if dfh:
2150 if dfh:
2144 dfh.close()
2151 dfh.close()
2145 ifh.close()
2152 ifh.close()
2146
2153
2147 def compress(self, data):
2154 def compress(self, data):
2148 """Generate a possibly-compressed representation of data."""
2155 """Generate a possibly-compressed representation of data."""
2149 if not data:
2156 if not data:
2150 return b'', data
2157 return b'', data
2151
2158
2152 compressed = self._compressor.compress(data)
2159 compressed = self._compressor.compress(data)
2153
2160
2154 if compressed:
2161 if compressed:
2155 # The revlog compressor added the header in the returned data.
2162 # The revlog compressor added the header in the returned data.
2156 return b'', compressed
2163 return b'', compressed
2157
2164
2158 if data[0:1] == b'\0':
2165 if data[0:1] == b'\0':
2159 return b'', data
2166 return b'', data
2160 return b'u', data
2167 return b'u', data
2161
2168
2162 def decompress(self, data):
2169 def decompress(self, data):
2163 """Decompress a revlog chunk.
2170 """Decompress a revlog chunk.
2164
2171
2165 The chunk is expected to begin with a header identifying the
2172 The chunk is expected to begin with a header identifying the
2166 format type so it can be routed to an appropriate decompressor.
2173 format type so it can be routed to an appropriate decompressor.
2167 """
2174 """
2168 if not data:
2175 if not data:
2169 return data
2176 return data
2170
2177
2171 # Revlogs are read much more frequently than they are written and many
2178 # Revlogs are read much more frequently than they are written and many
2172 # chunks only take microseconds to decompress, so performance is
2179 # chunks only take microseconds to decompress, so performance is
2173 # important here.
2180 # important here.
2174 #
2181 #
2175 # We can make a few assumptions about revlogs:
2182 # We can make a few assumptions about revlogs:
2176 #
2183 #
2177 # 1) the majority of chunks will be compressed (as opposed to inline
2184 # 1) the majority of chunks will be compressed (as opposed to inline
2178 # raw data).
2185 # raw data).
2179 # 2) decompressing *any* data will likely by at least 10x slower than
2186 # 2) decompressing *any* data will likely by at least 10x slower than
2180 # returning raw inline data.
2187 # returning raw inline data.
2181 # 3) we want to prioritize common and officially supported compression
2188 # 3) we want to prioritize common and officially supported compression
2182 # engines
2189 # engines
2183 #
2190 #
2184 # It follows that we want to optimize for "decompress compressed data
2191 # It follows that we want to optimize for "decompress compressed data
2185 # when encoded with common and officially supported compression engines"
2192 # when encoded with common and officially supported compression engines"
2186 # case over "raw data" and "data encoded by less common or non-official
2193 # case over "raw data" and "data encoded by less common or non-official
2187 # compression engines." That is why we have the inline lookup first
2194 # compression engines." That is why we have the inline lookup first
2188 # followed by the compengines lookup.
2195 # followed by the compengines lookup.
2189 #
2196 #
2190 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2197 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2191 # compressed chunks. And this matters for changelog and manifest reads.
2198 # compressed chunks. And this matters for changelog and manifest reads.
2192 t = data[0:1]
2199 t = data[0:1]
2193
2200
2194 if t == b'x':
2201 if t == b'x':
2195 try:
2202 try:
2196 return _zlibdecompress(data)
2203 return _zlibdecompress(data)
2197 except zlib.error as e:
2204 except zlib.error as e:
2198 raise error.RevlogError(
2205 raise error.RevlogError(
2199 _(b'revlog decompress error: %s')
2206 _(b'revlog decompress error: %s')
2200 % stringutil.forcebytestr(e)
2207 % stringutil.forcebytestr(e)
2201 )
2208 )
2202 # '\0' is more common than 'u' so it goes first.
2209 # '\0' is more common than 'u' so it goes first.
2203 elif t == b'\0':
2210 elif t == b'\0':
2204 return data
2211 return data
2205 elif t == b'u':
2212 elif t == b'u':
2206 return util.buffer(data, 1)
2213 return util.buffer(data, 1)
2207
2214
2208 try:
2215 try:
2209 compressor = self._decompressors[t]
2216 compressor = self._decompressors[t]
2210 except KeyError:
2217 except KeyError:
2211 try:
2218 try:
2212 engine = util.compengines.forrevlogheader(t)
2219 engine = util.compengines.forrevlogheader(t)
2213 compressor = engine.revlogcompressor(self._compengineopts)
2220 compressor = engine.revlogcompressor(self._compengineopts)
2214 self._decompressors[t] = compressor
2221 self._decompressors[t] = compressor
2215 except KeyError:
2222 except KeyError:
2216 raise error.RevlogError(_(b'unknown compression type %r') % t)
2223 raise error.RevlogError(_(b'unknown compression type %r') % t)
2217
2224
2218 return compressor.decompress(data)
2225 return compressor.decompress(data)
2219
2226
2220 def _addrevision(
2227 def _addrevision(
2221 self,
2228 self,
2222 node,
2229 node,
2223 rawtext,
2230 rawtext,
2224 transaction,
2231 transaction,
2225 link,
2232 link,
2226 p1,
2233 p1,
2227 p2,
2234 p2,
2228 flags,
2235 flags,
2229 cachedelta,
2236 cachedelta,
2230 ifh,
2237 ifh,
2231 dfh,
2238 dfh,
2232 alwayscache=False,
2239 alwayscache=False,
2233 deltacomputer=None,
2240 deltacomputer=None,
2234 ):
2241 ):
2235 """internal function to add revisions to the log
2242 """internal function to add revisions to the log
2236
2243
2237 see addrevision for argument descriptions.
2244 see addrevision for argument descriptions.
2238
2245
2239 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2246 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2240
2247
2241 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2248 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2242 be used.
2249 be used.
2243
2250
2244 invariants:
2251 invariants:
2245 - rawtext is optional (can be None); if not set, cachedelta must be set.
2252 - rawtext is optional (can be None); if not set, cachedelta must be set.
2246 if both are set, they must correspond to each other.
2253 if both are set, they must correspond to each other.
2247 """
2254 """
2248 if node == nullid:
2255 if node == nullid:
2249 raise error.RevlogError(
2256 raise error.RevlogError(
2250 _(b"%s: attempt to add null revision") % self.indexfile
2257 _(b"%s: attempt to add null revision") % self.indexfile
2251 )
2258 )
2252 if node == wdirid or node in wdirfilenodeids:
2259 if node == wdirid or node in wdirfilenodeids:
2253 raise error.RevlogError(
2260 raise error.RevlogError(
2254 _(b"%s: attempt to add wdir revision") % self.indexfile
2261 _(b"%s: attempt to add wdir revision") % self.indexfile
2255 )
2262 )
2256
2263
2257 if self._inline:
2264 if self._inline:
2258 fh = ifh
2265 fh = ifh
2259 else:
2266 else:
2260 fh = dfh
2267 fh = dfh
2261
2268
2262 btext = [rawtext]
2269 btext = [rawtext]
2263
2270
2264 curr = len(self)
2271 curr = len(self)
2265 prev = curr - 1
2272 prev = curr - 1
2266 offset = self.end(prev)
2273 offset = self.end(prev)
2267 p1r, p2r = self.rev(p1), self.rev(p2)
2274 p1r, p2r = self.rev(p1), self.rev(p2)
2268
2275
2269 # full versions are inserted when the needed deltas
2276 # full versions are inserted when the needed deltas
2270 # become comparable to the uncompressed text
2277 # become comparable to the uncompressed text
2271 if rawtext is None:
2278 if rawtext is None:
2272 # need rawtext size, before changed by flag processors, which is
2279 # need rawtext size, before changed by flag processors, which is
2273 # the non-raw size. use revlog explicitly to avoid filelog's extra
2280 # the non-raw size. use revlog explicitly to avoid filelog's extra
2274 # logic that might remove metadata size.
2281 # logic that might remove metadata size.
2275 textlen = mdiff.patchedsize(
2282 textlen = mdiff.patchedsize(
2276 revlog.size(self, cachedelta[0]), cachedelta[1]
2283 revlog.size(self, cachedelta[0]), cachedelta[1]
2277 )
2284 )
2278 else:
2285 else:
2279 textlen = len(rawtext)
2286 textlen = len(rawtext)
2280
2287
2281 if deltacomputer is None:
2288 if deltacomputer is None:
2282 deltacomputer = deltautil.deltacomputer(self)
2289 deltacomputer = deltautil.deltacomputer(self)
2283
2290
2284 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2291 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2285
2292
2286 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2293 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2287
2294
2288 e = (
2295 e = (
2289 offset_type(offset, flags),
2296 offset_type(offset, flags),
2290 deltainfo.deltalen,
2297 deltainfo.deltalen,
2291 textlen,
2298 textlen,
2292 deltainfo.base,
2299 deltainfo.base,
2293 link,
2300 link,
2294 p1r,
2301 p1r,
2295 p2r,
2302 p2r,
2296 node,
2303 node,
2297 )
2304 )
2298 self.index.append(e)
2305 self.index.append(e)
2299
2306
2300 entry = self._io.packentry(e, self.node, self.version, curr)
2307 entry = self._io.packentry(e, self.node, self.version, curr)
2301 self._writeentry(
2308 self._writeentry(
2302 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2309 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2303 )
2310 )
2304
2311
2305 rawtext = btext[0]
2312 rawtext = btext[0]
2306
2313
2307 if alwayscache and rawtext is None:
2314 if alwayscache and rawtext is None:
2308 rawtext = deltacomputer.buildtext(revinfo, fh)
2315 rawtext = deltacomputer.buildtext(revinfo, fh)
2309
2316
2310 if type(rawtext) == bytes: # only accept immutable objects
2317 if type(rawtext) == bytes: # only accept immutable objects
2311 self._revisioncache = (node, curr, rawtext)
2318 self._revisioncache = (node, curr, rawtext)
2312 self._chainbasecache[curr] = deltainfo.chainbase
2319 self._chainbasecache[curr] = deltainfo.chainbase
2313 return node
2320 return node
2314
2321
2315 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2322 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2316 # Files opened in a+ mode have inconsistent behavior on various
2323 # Files opened in a+ mode have inconsistent behavior on various
2317 # platforms. Windows requires that a file positioning call be made
2324 # platforms. Windows requires that a file positioning call be made
2318 # when the file handle transitions between reads and writes. See
2325 # when the file handle transitions between reads and writes. See
2319 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2326 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2320 # platforms, Python or the platform itself can be buggy. Some versions
2327 # platforms, Python or the platform itself can be buggy. Some versions
2321 # of Solaris have been observed to not append at the end of the file
2328 # of Solaris have been observed to not append at the end of the file
2322 # if the file was seeked to before the end. See issue4943 for more.
2329 # if the file was seeked to before the end. See issue4943 for more.
2323 #
2330 #
2324 # We work around this issue by inserting a seek() before writing.
2331 # We work around this issue by inserting a seek() before writing.
2325 # Note: This is likely not necessary on Python 3. However, because
2332 # Note: This is likely not necessary on Python 3. However, because
2326 # the file handle is reused for reads and may be seeked there, we need
2333 # the file handle is reused for reads and may be seeked there, we need
2327 # to be careful before changing this.
2334 # to be careful before changing this.
2328 ifh.seek(0, os.SEEK_END)
2335 ifh.seek(0, os.SEEK_END)
2329 if dfh:
2336 if dfh:
2330 dfh.seek(0, os.SEEK_END)
2337 dfh.seek(0, os.SEEK_END)
2331
2338
2332 curr = len(self) - 1
2339 curr = len(self) - 1
2333 if not self._inline:
2340 if not self._inline:
2334 transaction.add(self.datafile, offset)
2341 transaction.add(self.datafile, offset)
2335 transaction.add(self.indexfile, curr * len(entry))
2342 transaction.add(self.indexfile, curr * len(entry))
2336 if data[0]:
2343 if data[0]:
2337 dfh.write(data[0])
2344 dfh.write(data[0])
2338 dfh.write(data[1])
2345 dfh.write(data[1])
2339 ifh.write(entry)
2346 ifh.write(entry)
2340 else:
2347 else:
2341 offset += curr * self._io.size
2348 offset += curr * self._io.size
2342 transaction.add(self.indexfile, offset, curr)
2349 transaction.add(self.indexfile, offset, curr)
2343 ifh.write(entry)
2350 ifh.write(entry)
2344 ifh.write(data[0])
2351 ifh.write(data[0])
2345 ifh.write(data[1])
2352 ifh.write(data[1])
2346 self._enforceinlinesize(transaction, ifh)
2353 self._enforceinlinesize(transaction, ifh)
2347 nodemaputil.setup_persistent_nodemap(transaction, self)
2354 nodemaputil.setup_persistent_nodemap(transaction, self)
2348
2355
2349 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2356 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2350 """
2357 """
2351 add a delta group
2358 add a delta group
2352
2359
2353 given a set of deltas, add them to the revision log. the
2360 given a set of deltas, add them to the revision log. the
2354 first delta is against its parent, which should be in our
2361 first delta is against its parent, which should be in our
2355 log, the rest are against the previous delta.
2362 log, the rest are against the previous delta.
2356
2363
2357 If ``addrevisioncb`` is defined, it will be called with arguments of
2364 If ``addrevisioncb`` is defined, it will be called with arguments of
2358 this revlog and the node that was added.
2365 this revlog and the node that was added.
2359 """
2366 """
2360
2367
2361 if self._writinghandles:
2368 if self._writinghandles:
2362 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2369 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2363
2370
2364 nodes = []
2371 nodes = []
2365
2372
2366 r = len(self)
2373 r = len(self)
2367 end = 0
2374 end = 0
2368 if r:
2375 if r:
2369 end = self.end(r - 1)
2376 end = self.end(r - 1)
2370 ifh = self._indexfp(b"a+")
2377 ifh = self._indexfp(b"a+")
2371 isize = r * self._io.size
2378 isize = r * self._io.size
2372 if self._inline:
2379 if self._inline:
2373 transaction.add(self.indexfile, end + isize, r)
2380 transaction.add(self.indexfile, end + isize, r)
2374 dfh = None
2381 dfh = None
2375 else:
2382 else:
2376 transaction.add(self.indexfile, isize, r)
2383 transaction.add(self.indexfile, isize, r)
2377 transaction.add(self.datafile, end)
2384 transaction.add(self.datafile, end)
2378 dfh = self._datafp(b"a+")
2385 dfh = self._datafp(b"a+")
2379
2386
2380 def flush():
2387 def flush():
2381 if dfh:
2388 if dfh:
2382 dfh.flush()
2389 dfh.flush()
2383 ifh.flush()
2390 ifh.flush()
2384
2391
2385 self._writinghandles = (ifh, dfh)
2392 self._writinghandles = (ifh, dfh)
2386
2393
2387 try:
2394 try:
2388 deltacomputer = deltautil.deltacomputer(self)
2395 deltacomputer = deltautil.deltacomputer(self)
2389 # loop through our set of deltas
2396 # loop through our set of deltas
2390 for data in deltas:
2397 for data in deltas:
2391 node, p1, p2, linknode, deltabase, delta, flags = data
2398 node, p1, p2, linknode, deltabase, delta, flags = data
2392 link = linkmapper(linknode)
2399 link = linkmapper(linknode)
2393 flags = flags or REVIDX_DEFAULT_FLAGS
2400 flags = flags or REVIDX_DEFAULT_FLAGS
2394
2401
2395 nodes.append(node)
2402 nodes.append(node)
2396
2403
2397 if self.index.has_node(node):
2404 if self.index.has_node(node):
2398 self._nodeduplicatecallback(transaction, node)
2405 self._nodeduplicatecallback(transaction, node)
2399 # this can happen if two branches make the same change
2406 # this can happen if two branches make the same change
2400 continue
2407 continue
2401
2408
2402 for p in (p1, p2):
2409 for p in (p1, p2):
2403 if not self.index.has_node(p):
2410 if not self.index.has_node(p):
2404 raise error.LookupError(
2411 raise error.LookupError(
2405 p, self.indexfile, _(b'unknown parent')
2412 p, self.indexfile, _(b'unknown parent')
2406 )
2413 )
2407
2414
2408 if not self.index.has_node(deltabase):
2415 if not self.index.has_node(deltabase):
2409 raise error.LookupError(
2416 raise error.LookupError(
2410 deltabase, self.indexfile, _(b'unknown delta base')
2417 deltabase, self.indexfile, _(b'unknown delta base')
2411 )
2418 )
2412
2419
2413 baserev = self.rev(deltabase)
2420 baserev = self.rev(deltabase)
2414
2421
2415 if baserev != nullrev and self.iscensored(baserev):
2422 if baserev != nullrev and self.iscensored(baserev):
2416 # if base is censored, delta must be full replacement in a
2423 # if base is censored, delta must be full replacement in a
2417 # single patch operation
2424 # single patch operation
2418 hlen = struct.calcsize(b">lll")
2425 hlen = struct.calcsize(b">lll")
2419 oldlen = self.rawsize(baserev)
2426 oldlen = self.rawsize(baserev)
2420 newlen = len(delta) - hlen
2427 newlen = len(delta) - hlen
2421 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2428 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2422 raise error.CensoredBaseError(
2429 raise error.CensoredBaseError(
2423 self.indexfile, self.node(baserev)
2430 self.indexfile, self.node(baserev)
2424 )
2431 )
2425
2432
2426 if not flags and self._peek_iscensored(baserev, delta, flush):
2433 if not flags and self._peek_iscensored(baserev, delta, flush):
2427 flags |= REVIDX_ISCENSORED
2434 flags |= REVIDX_ISCENSORED
2428
2435
2429 # We assume consumers of addrevisioncb will want to retrieve
2436 # We assume consumers of addrevisioncb will want to retrieve
2430 # the added revision, which will require a call to
2437 # the added revision, which will require a call to
2431 # revision(). revision() will fast path if there is a cache
2438 # revision(). revision() will fast path if there is a cache
2432 # hit. So, we tell _addrevision() to always cache in this case.
2439 # hit. So, we tell _addrevision() to always cache in this case.
2433 # We're only using addgroup() in the context of changegroup
2440 # We're only using addgroup() in the context of changegroup
2434 # generation so the revision data can always be handled as raw
2441 # generation so the revision data can always be handled as raw
2435 # by the flagprocessor.
2442 # by the flagprocessor.
2436 self._addrevision(
2443 self._addrevision(
2437 node,
2444 node,
2438 None,
2445 None,
2439 transaction,
2446 transaction,
2440 link,
2447 link,
2441 p1,
2448 p1,
2442 p2,
2449 p2,
2443 flags,
2450 flags,
2444 (baserev, delta),
2451 (baserev, delta),
2445 ifh,
2452 ifh,
2446 dfh,
2453 dfh,
2447 alwayscache=bool(addrevisioncb),
2454 alwayscache=bool(addrevisioncb),
2448 deltacomputer=deltacomputer,
2455 deltacomputer=deltacomputer,
2449 )
2456 )
2450
2457
2451 if addrevisioncb:
2458 if addrevisioncb:
2452 addrevisioncb(self, node)
2459 addrevisioncb(self, node)
2453
2460
2454 if not dfh and not self._inline:
2461 if not dfh and not self._inline:
2455 # addrevision switched from inline to conventional
2462 # addrevision switched from inline to conventional
2456 # reopen the index
2463 # reopen the index
2457 ifh.close()
2464 ifh.close()
2458 dfh = self._datafp(b"a+")
2465 dfh = self._datafp(b"a+")
2459 ifh = self._indexfp(b"a+")
2466 ifh = self._indexfp(b"a+")
2460 self._writinghandles = (ifh, dfh)
2467 self._writinghandles = (ifh, dfh)
2461 finally:
2468 finally:
2462 self._writinghandles = None
2469 self._writinghandles = None
2463
2470
2464 if dfh:
2471 if dfh:
2465 dfh.close()
2472 dfh.close()
2466 ifh.close()
2473 ifh.close()
2467
2474
2468 return nodes
2475 return nodes
2469
2476
2470 def iscensored(self, rev):
2477 def iscensored(self, rev):
2471 """Check if a file revision is censored."""
2478 """Check if a file revision is censored."""
2472 if not self._censorable:
2479 if not self._censorable:
2473 return False
2480 return False
2474
2481
2475 return self.flags(rev) & REVIDX_ISCENSORED
2482 return self.flags(rev) & REVIDX_ISCENSORED
2476
2483
2477 def _peek_iscensored(self, baserev, delta, flush):
2484 def _peek_iscensored(self, baserev, delta, flush):
2478 """Quickly check if a delta produces a censored revision."""
2485 """Quickly check if a delta produces a censored revision."""
2479 if not self._censorable:
2486 if not self._censorable:
2480 return False
2487 return False
2481
2488
2482 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2489 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2483
2490
2484 def getstrippoint(self, minlink):
2491 def getstrippoint(self, minlink):
2485 """find the minimum rev that must be stripped to strip the linkrev
2492 """find the minimum rev that must be stripped to strip the linkrev
2486
2493
2487 Returns a tuple containing the minimum rev and a set of all revs that
2494 Returns a tuple containing the minimum rev and a set of all revs that
2488 have linkrevs that will be broken by this strip.
2495 have linkrevs that will be broken by this strip.
2489 """
2496 """
2490 return storageutil.resolvestripinfo(
2497 return storageutil.resolvestripinfo(
2491 minlink,
2498 minlink,
2492 len(self) - 1,
2499 len(self) - 1,
2493 self.headrevs(),
2500 self.headrevs(),
2494 self.linkrev,
2501 self.linkrev,
2495 self.parentrevs,
2502 self.parentrevs,
2496 )
2503 )
2497
2504
2498 def strip(self, minlink, transaction):
2505 def strip(self, minlink, transaction):
2499 """truncate the revlog on the first revision with a linkrev >= minlink
2506 """truncate the revlog on the first revision with a linkrev >= minlink
2500
2507
2501 This function is called when we're stripping revision minlink and
2508 This function is called when we're stripping revision minlink and
2502 its descendants from the repository.
2509 its descendants from the repository.
2503
2510
2504 We have to remove all revisions with linkrev >= minlink, because
2511 We have to remove all revisions with linkrev >= minlink, because
2505 the equivalent changelog revisions will be renumbered after the
2512 the equivalent changelog revisions will be renumbered after the
2506 strip.
2513 strip.
2507
2514
2508 So we truncate the revlog on the first of these revisions, and
2515 So we truncate the revlog on the first of these revisions, and
2509 trust that the caller has saved the revisions that shouldn't be
2516 trust that the caller has saved the revisions that shouldn't be
2510 removed and that it'll re-add them after this truncation.
2517 removed and that it'll re-add them after this truncation.
2511 """
2518 """
2512 if len(self) == 0:
2519 if len(self) == 0:
2513 return
2520 return
2514
2521
2515 rev, _ = self.getstrippoint(minlink)
2522 rev, _ = self.getstrippoint(minlink)
2516 if rev == len(self):
2523 if rev == len(self):
2517 return
2524 return
2518
2525
2519 # first truncate the files on disk
2526 # first truncate the files on disk
2520 end = self.start(rev)
2527 end = self.start(rev)
2521 if not self._inline:
2528 if not self._inline:
2522 transaction.add(self.datafile, end)
2529 transaction.add(self.datafile, end)
2523 end = rev * self._io.size
2530 end = rev * self._io.size
2524 else:
2531 else:
2525 end += rev * self._io.size
2532 end += rev * self._io.size
2526
2533
2527 transaction.add(self.indexfile, end)
2534 transaction.add(self.indexfile, end)
2528
2535
2529 # then reset internal state in memory to forget those revisions
2536 # then reset internal state in memory to forget those revisions
2530 self._revisioncache = None
2537 self._revisioncache = None
2531 self._chaininfocache = {}
2538 self._chaininfocache = {}
2532 self._chunkclear()
2539 self._chunkclear()
2533
2540
2534 del self.index[rev:-1]
2541 del self.index[rev:-1]
2535
2542
2536 def checksize(self):
2543 def checksize(self):
2537 """Check size of index and data files
2544 """Check size of index and data files
2538
2545
2539 return a (dd, di) tuple.
2546 return a (dd, di) tuple.
2540 - dd: extra bytes for the "data" file
2547 - dd: extra bytes for the "data" file
2541 - di: extra bytes for the "index" file
2548 - di: extra bytes for the "index" file
2542
2549
2543 A healthy revlog will return (0, 0).
2550 A healthy revlog will return (0, 0).
2544 """
2551 """
2545 expected = 0
2552 expected = 0
2546 if len(self):
2553 if len(self):
2547 expected = max(0, self.end(len(self) - 1))
2554 expected = max(0, self.end(len(self) - 1))
2548
2555
2549 try:
2556 try:
2550 with self._datafp() as f:
2557 with self._datafp() as f:
2551 f.seek(0, io.SEEK_END)
2558 f.seek(0, io.SEEK_END)
2552 actual = f.tell()
2559 actual = f.tell()
2553 dd = actual - expected
2560 dd = actual - expected
2554 except IOError as inst:
2561 except IOError as inst:
2555 if inst.errno != errno.ENOENT:
2562 if inst.errno != errno.ENOENT:
2556 raise
2563 raise
2557 dd = 0
2564 dd = 0
2558
2565
2559 try:
2566 try:
2560 f = self.opener(self.indexfile)
2567 f = self.opener(self.indexfile)
2561 f.seek(0, io.SEEK_END)
2568 f.seek(0, io.SEEK_END)
2562 actual = f.tell()
2569 actual = f.tell()
2563 f.close()
2570 f.close()
2564 s = self._io.size
2571 s = self._io.size
2565 i = max(0, actual // s)
2572 i = max(0, actual // s)
2566 di = actual - (i * s)
2573 di = actual - (i * s)
2567 if self._inline:
2574 if self._inline:
2568 databytes = 0
2575 databytes = 0
2569 for r in self:
2576 for r in self:
2570 databytes += max(0, self.length(r))
2577 databytes += max(0, self.length(r))
2571 dd = 0
2578 dd = 0
2572 di = actual - len(self) * s - databytes
2579 di = actual - len(self) * s - databytes
2573 except IOError as inst:
2580 except IOError as inst:
2574 if inst.errno != errno.ENOENT:
2581 if inst.errno != errno.ENOENT:
2575 raise
2582 raise
2576 di = 0
2583 di = 0
2577
2584
2578 return (dd, di)
2585 return (dd, di)
2579
2586
2580 def files(self):
2587 def files(self):
2581 res = [self.indexfile]
2588 res = [self.indexfile]
2582 if not self._inline:
2589 if not self._inline:
2583 res.append(self.datafile)
2590 res.append(self.datafile)
2584 return res
2591 return res
2585
2592
2586 def emitrevisions(
2593 def emitrevisions(
2587 self,
2594 self,
2588 nodes,
2595 nodes,
2589 nodesorder=None,
2596 nodesorder=None,
2590 revisiondata=False,
2597 revisiondata=False,
2591 assumehaveparentrevisions=False,
2598 assumehaveparentrevisions=False,
2592 deltamode=repository.CG_DELTAMODE_STD,
2599 deltamode=repository.CG_DELTAMODE_STD,
2593 ):
2600 ):
2594 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2601 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2595 raise error.ProgrammingError(
2602 raise error.ProgrammingError(
2596 b'unhandled value for nodesorder: %s' % nodesorder
2603 b'unhandled value for nodesorder: %s' % nodesorder
2597 )
2604 )
2598
2605
2599 if nodesorder is None and not self._generaldelta:
2606 if nodesorder is None and not self._generaldelta:
2600 nodesorder = b'storage'
2607 nodesorder = b'storage'
2601
2608
2602 if (
2609 if (
2603 not self._storedeltachains
2610 not self._storedeltachains
2604 and deltamode != repository.CG_DELTAMODE_PREV
2611 and deltamode != repository.CG_DELTAMODE_PREV
2605 ):
2612 ):
2606 deltamode = repository.CG_DELTAMODE_FULL
2613 deltamode = repository.CG_DELTAMODE_FULL
2607
2614
2608 return storageutil.emitrevisions(
2615 return storageutil.emitrevisions(
2609 self,
2616 self,
2610 nodes,
2617 nodes,
2611 nodesorder,
2618 nodesorder,
2612 revlogrevisiondelta,
2619 revlogrevisiondelta,
2613 deltaparentfn=self.deltaparent,
2620 deltaparentfn=self.deltaparent,
2614 candeltafn=self.candelta,
2621 candeltafn=self.candelta,
2615 rawsizefn=self.rawsize,
2622 rawsizefn=self.rawsize,
2616 revdifffn=self.revdiff,
2623 revdifffn=self.revdiff,
2617 flagsfn=self.flags,
2624 flagsfn=self.flags,
2618 deltamode=deltamode,
2625 deltamode=deltamode,
2619 revisiondata=revisiondata,
2626 revisiondata=revisiondata,
2620 assumehaveparentrevisions=assumehaveparentrevisions,
2627 assumehaveparentrevisions=assumehaveparentrevisions,
2621 )
2628 )
2622
2629
2623 DELTAREUSEALWAYS = b'always'
2630 DELTAREUSEALWAYS = b'always'
2624 DELTAREUSESAMEREVS = b'samerevs'
2631 DELTAREUSESAMEREVS = b'samerevs'
2625 DELTAREUSENEVER = b'never'
2632 DELTAREUSENEVER = b'never'
2626
2633
2627 DELTAREUSEFULLADD = b'fulladd'
2634 DELTAREUSEFULLADD = b'fulladd'
2628
2635
2629 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2636 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2630
2637
2631 def clone(
2638 def clone(
2632 self,
2639 self,
2633 tr,
2640 tr,
2634 destrevlog,
2641 destrevlog,
2635 addrevisioncb=None,
2642 addrevisioncb=None,
2636 deltareuse=DELTAREUSESAMEREVS,
2643 deltareuse=DELTAREUSESAMEREVS,
2637 forcedeltabothparents=None,
2644 forcedeltabothparents=None,
2638 sidedatacompanion=None,
2645 sidedatacompanion=None,
2639 ):
2646 ):
2640 """Copy this revlog to another, possibly with format changes.
2647 """Copy this revlog to another, possibly with format changes.
2641
2648
2642 The destination revlog will contain the same revisions and nodes.
2649 The destination revlog will contain the same revisions and nodes.
2643 However, it may not be bit-for-bit identical due to e.g. delta encoding
2650 However, it may not be bit-for-bit identical due to e.g. delta encoding
2644 differences.
2651 differences.
2645
2652
2646 The ``deltareuse`` argument control how deltas from the existing revlog
2653 The ``deltareuse`` argument control how deltas from the existing revlog
2647 are preserved in the destination revlog. The argument can have the
2654 are preserved in the destination revlog. The argument can have the
2648 following values:
2655 following values:
2649
2656
2650 DELTAREUSEALWAYS
2657 DELTAREUSEALWAYS
2651 Deltas will always be reused (if possible), even if the destination
2658 Deltas will always be reused (if possible), even if the destination
2652 revlog would not select the same revisions for the delta. This is the
2659 revlog would not select the same revisions for the delta. This is the
2653 fastest mode of operation.
2660 fastest mode of operation.
2654 DELTAREUSESAMEREVS
2661 DELTAREUSESAMEREVS
2655 Deltas will be reused if the destination revlog would pick the same
2662 Deltas will be reused if the destination revlog would pick the same
2656 revisions for the delta. This mode strikes a balance between speed
2663 revisions for the delta. This mode strikes a balance between speed
2657 and optimization.
2664 and optimization.
2658 DELTAREUSENEVER
2665 DELTAREUSENEVER
2659 Deltas will never be reused. This is the slowest mode of execution.
2666 Deltas will never be reused. This is the slowest mode of execution.
2660 This mode can be used to recompute deltas (e.g. if the diff/delta
2667 This mode can be used to recompute deltas (e.g. if the diff/delta
2661 algorithm changes).
2668 algorithm changes).
2662 DELTAREUSEFULLADD
2669 DELTAREUSEFULLADD
2663 Revision will be re-added as if their were new content. This is
2670 Revision will be re-added as if their were new content. This is
2664 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2671 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2665 eg: large file detection and handling.
2672 eg: large file detection and handling.
2666
2673
2667 Delta computation can be slow, so the choice of delta reuse policy can
2674 Delta computation can be slow, so the choice of delta reuse policy can
2668 significantly affect run time.
2675 significantly affect run time.
2669
2676
2670 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2677 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2671 two extremes. Deltas will be reused if they are appropriate. But if the
2678 two extremes. Deltas will be reused if they are appropriate. But if the
2672 delta could choose a better revision, it will do so. This means if you
2679 delta could choose a better revision, it will do so. This means if you
2673 are converting a non-generaldelta revlog to a generaldelta revlog,
2680 are converting a non-generaldelta revlog to a generaldelta revlog,
2674 deltas will be recomputed if the delta's parent isn't a parent of the
2681 deltas will be recomputed if the delta's parent isn't a parent of the
2675 revision.
2682 revision.
2676
2683
2677 In addition to the delta policy, the ``forcedeltabothparents``
2684 In addition to the delta policy, the ``forcedeltabothparents``
2678 argument controls whether to force compute deltas against both parents
2685 argument controls whether to force compute deltas against both parents
2679 for merges. By default, the current default is used.
2686 for merges. By default, the current default is used.
2680
2687
2681 If not None, the `sidedatacompanion` is callable that accept two
2688 If not None, the `sidedatacompanion` is callable that accept two
2682 arguments:
2689 arguments:
2683
2690
2684 (srcrevlog, rev)
2691 (srcrevlog, rev)
2685
2692
2686 and return a triplet that control changes to sidedata content from the
2693 and return a triplet that control changes to sidedata content from the
2687 old revision to the new clone result:
2694 old revision to the new clone result:
2688
2695
2689 (dropall, filterout, update)
2696 (dropall, filterout, update)
2690
2697
2691 * if `dropall` is True, all sidedata should be dropped
2698 * if `dropall` is True, all sidedata should be dropped
2692 * `filterout` is a set of sidedata keys that should be dropped
2699 * `filterout` is a set of sidedata keys that should be dropped
2693 * `update` is a mapping of additionnal/new key -> value
2700 * `update` is a mapping of additionnal/new key -> value
2694 """
2701 """
2695 if deltareuse not in self.DELTAREUSEALL:
2702 if deltareuse not in self.DELTAREUSEALL:
2696 raise ValueError(
2703 raise ValueError(
2697 _(b'value for deltareuse invalid: %s') % deltareuse
2704 _(b'value for deltareuse invalid: %s') % deltareuse
2698 )
2705 )
2699
2706
2700 if len(destrevlog):
2707 if len(destrevlog):
2701 raise ValueError(_(b'destination revlog is not empty'))
2708 raise ValueError(_(b'destination revlog is not empty'))
2702
2709
2703 if getattr(self, 'filteredrevs', None):
2710 if getattr(self, 'filteredrevs', None):
2704 raise ValueError(_(b'source revlog has filtered revisions'))
2711 raise ValueError(_(b'source revlog has filtered revisions'))
2705 if getattr(destrevlog, 'filteredrevs', None):
2712 if getattr(destrevlog, 'filteredrevs', None):
2706 raise ValueError(_(b'destination revlog has filtered revisions'))
2713 raise ValueError(_(b'destination revlog has filtered revisions'))
2707
2714
2708 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2715 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2709 # if possible.
2716 # if possible.
2710 oldlazydelta = destrevlog._lazydelta
2717 oldlazydelta = destrevlog._lazydelta
2711 oldlazydeltabase = destrevlog._lazydeltabase
2718 oldlazydeltabase = destrevlog._lazydeltabase
2712 oldamd = destrevlog._deltabothparents
2719 oldamd = destrevlog._deltabothparents
2713
2720
2714 try:
2721 try:
2715 if deltareuse == self.DELTAREUSEALWAYS:
2722 if deltareuse == self.DELTAREUSEALWAYS:
2716 destrevlog._lazydeltabase = True
2723 destrevlog._lazydeltabase = True
2717 destrevlog._lazydelta = True
2724 destrevlog._lazydelta = True
2718 elif deltareuse == self.DELTAREUSESAMEREVS:
2725 elif deltareuse == self.DELTAREUSESAMEREVS:
2719 destrevlog._lazydeltabase = False
2726 destrevlog._lazydeltabase = False
2720 destrevlog._lazydelta = True
2727 destrevlog._lazydelta = True
2721 elif deltareuse == self.DELTAREUSENEVER:
2728 elif deltareuse == self.DELTAREUSENEVER:
2722 destrevlog._lazydeltabase = False
2729 destrevlog._lazydeltabase = False
2723 destrevlog._lazydelta = False
2730 destrevlog._lazydelta = False
2724
2731
2725 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2732 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2726
2733
2727 self._clone(
2734 self._clone(
2728 tr,
2735 tr,
2729 destrevlog,
2736 destrevlog,
2730 addrevisioncb,
2737 addrevisioncb,
2731 deltareuse,
2738 deltareuse,
2732 forcedeltabothparents,
2739 forcedeltabothparents,
2733 sidedatacompanion,
2740 sidedatacompanion,
2734 )
2741 )
2735
2742
2736 finally:
2743 finally:
2737 destrevlog._lazydelta = oldlazydelta
2744 destrevlog._lazydelta = oldlazydelta
2738 destrevlog._lazydeltabase = oldlazydeltabase
2745 destrevlog._lazydeltabase = oldlazydeltabase
2739 destrevlog._deltabothparents = oldamd
2746 destrevlog._deltabothparents = oldamd
2740
2747
2741 def _clone(
2748 def _clone(
2742 self,
2749 self,
2743 tr,
2750 tr,
2744 destrevlog,
2751 destrevlog,
2745 addrevisioncb,
2752 addrevisioncb,
2746 deltareuse,
2753 deltareuse,
2747 forcedeltabothparents,
2754 forcedeltabothparents,
2748 sidedatacompanion,
2755 sidedatacompanion,
2749 ):
2756 ):
2750 """perform the core duty of `revlog.clone` after parameter processing"""
2757 """perform the core duty of `revlog.clone` after parameter processing"""
2751 deltacomputer = deltautil.deltacomputer(destrevlog)
2758 deltacomputer = deltautil.deltacomputer(destrevlog)
2752 index = self.index
2759 index = self.index
2753 for rev in self:
2760 for rev in self:
2754 entry = index[rev]
2761 entry = index[rev]
2755
2762
2756 # Some classes override linkrev to take filtered revs into
2763 # Some classes override linkrev to take filtered revs into
2757 # account. Use raw entry from index.
2764 # account. Use raw entry from index.
2758 flags = entry[0] & 0xFFFF
2765 flags = entry[0] & 0xFFFF
2759 linkrev = entry[4]
2766 linkrev = entry[4]
2760 p1 = index[entry[5]][7]
2767 p1 = index[entry[5]][7]
2761 p2 = index[entry[6]][7]
2768 p2 = index[entry[6]][7]
2762 node = entry[7]
2769 node = entry[7]
2763
2770
2764 sidedataactions = (False, [], {})
2771 sidedataactions = (False, [], {})
2765 if sidedatacompanion is not None:
2772 if sidedatacompanion is not None:
2766 sidedataactions = sidedatacompanion(self, rev)
2773 sidedataactions = sidedatacompanion(self, rev)
2767
2774
2768 # (Possibly) reuse the delta from the revlog if allowed and
2775 # (Possibly) reuse the delta from the revlog if allowed and
2769 # the revlog chunk is a delta.
2776 # the revlog chunk is a delta.
2770 cachedelta = None
2777 cachedelta = None
2771 rawtext = None
2778 rawtext = None
2772 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2779 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2773 dropall, filterout, update = sidedataactions
2780 dropall, filterout, update = sidedataactions
2774 text, sidedata = self._revisiondata(rev)
2781 text, sidedata = self._revisiondata(rev)
2775 if dropall:
2782 if dropall:
2776 sidedata = {}
2783 sidedata = {}
2777 for key in filterout:
2784 for key in filterout:
2778 sidedata.pop(key, None)
2785 sidedata.pop(key, None)
2779 sidedata.update(update)
2786 sidedata.update(update)
2780 if not sidedata:
2787 if not sidedata:
2781 sidedata = None
2788 sidedata = None
2782 destrevlog.addrevision(
2789 destrevlog.addrevision(
2783 text,
2790 text,
2784 tr,
2791 tr,
2785 linkrev,
2792 linkrev,
2786 p1,
2793 p1,
2787 p2,
2794 p2,
2788 cachedelta=cachedelta,
2795 cachedelta=cachedelta,
2789 node=node,
2796 node=node,
2790 flags=flags,
2797 flags=flags,
2791 deltacomputer=deltacomputer,
2798 deltacomputer=deltacomputer,
2792 sidedata=sidedata,
2799 sidedata=sidedata,
2793 )
2800 )
2794 else:
2801 else:
2795 if destrevlog._lazydelta:
2802 if destrevlog._lazydelta:
2796 dp = self.deltaparent(rev)
2803 dp = self.deltaparent(rev)
2797 if dp != nullrev:
2804 if dp != nullrev:
2798 cachedelta = (dp, bytes(self._chunk(rev)))
2805 cachedelta = (dp, bytes(self._chunk(rev)))
2799
2806
2800 if not cachedelta:
2807 if not cachedelta:
2801 rawtext = self.rawdata(rev)
2808 rawtext = self.rawdata(rev)
2802
2809
2803 ifh = destrevlog.opener(
2810 ifh = destrevlog.opener(
2804 destrevlog.indexfile, b'a+', checkambig=False
2811 destrevlog.indexfile, b'a+', checkambig=False
2805 )
2812 )
2806 dfh = None
2813 dfh = None
2807 if not destrevlog._inline:
2814 if not destrevlog._inline:
2808 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2815 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2809 try:
2816 try:
2810 destrevlog._addrevision(
2817 destrevlog._addrevision(
2811 node,
2818 node,
2812 rawtext,
2819 rawtext,
2813 tr,
2820 tr,
2814 linkrev,
2821 linkrev,
2815 p1,
2822 p1,
2816 p2,
2823 p2,
2817 flags,
2824 flags,
2818 cachedelta,
2825 cachedelta,
2819 ifh,
2826 ifh,
2820 dfh,
2827 dfh,
2821 deltacomputer=deltacomputer,
2828 deltacomputer=deltacomputer,
2822 )
2829 )
2823 finally:
2830 finally:
2824 if dfh:
2831 if dfh:
2825 dfh.close()
2832 dfh.close()
2826 ifh.close()
2833 ifh.close()
2827
2834
2828 if addrevisioncb:
2835 if addrevisioncb:
2829 addrevisioncb(self, rev, node)
2836 addrevisioncb(self, rev, node)
2830
2837
2831 def censorrevision(self, tr, censornode, tombstone=b''):
2838 def censorrevision(self, tr, censornode, tombstone=b''):
2832 if (self.version & 0xFFFF) == REVLOGV0:
2839 if (self.version & 0xFFFF) == REVLOGV0:
2833 raise error.RevlogError(
2840 raise error.RevlogError(
2834 _(b'cannot censor with version %d revlogs') % self.version
2841 _(b'cannot censor with version %d revlogs') % self.version
2835 )
2842 )
2836
2843
2837 censorrev = self.rev(censornode)
2844 censorrev = self.rev(censornode)
2838 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2845 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2839
2846
2840 if len(tombstone) > self.rawsize(censorrev):
2847 if len(tombstone) > self.rawsize(censorrev):
2841 raise error.Abort(
2848 raise error.Abort(
2842 _(b'censor tombstone must be no longer than censored data')
2849 _(b'censor tombstone must be no longer than censored data')
2843 )
2850 )
2844
2851
2845 # Rewriting the revlog in place is hard. Our strategy for censoring is
2852 # Rewriting the revlog in place is hard. Our strategy for censoring is
2846 # to create a new revlog, copy all revisions to it, then replace the
2853 # to create a new revlog, copy all revisions to it, then replace the
2847 # revlogs on transaction close.
2854 # revlogs on transaction close.
2848
2855
2849 newindexfile = self.indexfile + b'.tmpcensored'
2856 newindexfile = self.indexfile + b'.tmpcensored'
2850 newdatafile = self.datafile + b'.tmpcensored'
2857 newdatafile = self.datafile + b'.tmpcensored'
2851
2858
2852 # This is a bit dangerous. We could easily have a mismatch of state.
2859 # This is a bit dangerous. We could easily have a mismatch of state.
2853 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2860 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2854 newrl.version = self.version
2861 newrl.version = self.version
2855 newrl._generaldelta = self._generaldelta
2862 newrl._generaldelta = self._generaldelta
2856 newrl._io = self._io
2863 newrl._io = self._io
2857
2864
2858 for rev in self.revs():
2865 for rev in self.revs():
2859 node = self.node(rev)
2866 node = self.node(rev)
2860 p1, p2 = self.parents(node)
2867 p1, p2 = self.parents(node)
2861
2868
2862 if rev == censorrev:
2869 if rev == censorrev:
2863 newrl.addrawrevision(
2870 newrl.addrawrevision(
2864 tombstone,
2871 tombstone,
2865 tr,
2872 tr,
2866 self.linkrev(censorrev),
2873 self.linkrev(censorrev),
2867 p1,
2874 p1,
2868 p2,
2875 p2,
2869 censornode,
2876 censornode,
2870 REVIDX_ISCENSORED,
2877 REVIDX_ISCENSORED,
2871 )
2878 )
2872
2879
2873 if newrl.deltaparent(rev) != nullrev:
2880 if newrl.deltaparent(rev) != nullrev:
2874 raise error.Abort(
2881 raise error.Abort(
2875 _(
2882 _(
2876 b'censored revision stored as delta; '
2883 b'censored revision stored as delta; '
2877 b'cannot censor'
2884 b'cannot censor'
2878 ),
2885 ),
2879 hint=_(
2886 hint=_(
2880 b'censoring of revlogs is not '
2887 b'censoring of revlogs is not '
2881 b'fully implemented; please report '
2888 b'fully implemented; please report '
2882 b'this bug'
2889 b'this bug'
2883 ),
2890 ),
2884 )
2891 )
2885 continue
2892 continue
2886
2893
2887 if self.iscensored(rev):
2894 if self.iscensored(rev):
2888 if self.deltaparent(rev) != nullrev:
2895 if self.deltaparent(rev) != nullrev:
2889 raise error.Abort(
2896 raise error.Abort(
2890 _(
2897 _(
2891 b'cannot censor due to censored '
2898 b'cannot censor due to censored '
2892 b'revision having delta stored'
2899 b'revision having delta stored'
2893 )
2900 )
2894 )
2901 )
2895 rawtext = self._chunk(rev)
2902 rawtext = self._chunk(rev)
2896 else:
2903 else:
2897 rawtext = self.rawdata(rev)
2904 rawtext = self.rawdata(rev)
2898
2905
2899 newrl.addrawrevision(
2906 newrl.addrawrevision(
2900 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2907 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2901 )
2908 )
2902
2909
2903 tr.addbackup(self.indexfile, location=b'store')
2910 tr.addbackup(self.indexfile, location=b'store')
2904 if not self._inline:
2911 if not self._inline:
2905 tr.addbackup(self.datafile, location=b'store')
2912 tr.addbackup(self.datafile, location=b'store')
2906
2913
2907 self.opener.rename(newrl.indexfile, self.indexfile)
2914 self.opener.rename(newrl.indexfile, self.indexfile)
2908 if not self._inline:
2915 if not self._inline:
2909 self.opener.rename(newrl.datafile, self.datafile)
2916 self.opener.rename(newrl.datafile, self.datafile)
2910
2917
2911 self.clearcaches()
2918 self.clearcaches()
2912 self._loadindex()
2919 self._loadindex()
2913
2920
2914 def verifyintegrity(self, state):
2921 def verifyintegrity(self, state):
2915 """Verifies the integrity of the revlog.
2922 """Verifies the integrity of the revlog.
2916
2923
2917 Yields ``revlogproblem`` instances describing problems that are
2924 Yields ``revlogproblem`` instances describing problems that are
2918 found.
2925 found.
2919 """
2926 """
2920 dd, di = self.checksize()
2927 dd, di = self.checksize()
2921 if dd:
2928 if dd:
2922 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2929 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2923 if di:
2930 if di:
2924 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2931 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2925
2932
2926 version = self.version & 0xFFFF
2933 version = self.version & 0xFFFF
2927
2934
2928 # The verifier tells us what version revlog we should be.
2935 # The verifier tells us what version revlog we should be.
2929 if version != state[b'expectedversion']:
2936 if version != state[b'expectedversion']:
2930 yield revlogproblem(
2937 yield revlogproblem(
2931 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2938 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2932 % (self.indexfile, version, state[b'expectedversion'])
2939 % (self.indexfile, version, state[b'expectedversion'])
2933 )
2940 )
2934
2941
2935 state[b'skipread'] = set()
2942 state[b'skipread'] = set()
2936 state[b'safe_renamed'] = set()
2943 state[b'safe_renamed'] = set()
2937
2944
2938 for rev in self:
2945 for rev in self:
2939 node = self.node(rev)
2946 node = self.node(rev)
2940
2947
2941 # Verify contents. 4 cases to care about:
2948 # Verify contents. 4 cases to care about:
2942 #
2949 #
2943 # common: the most common case
2950 # common: the most common case
2944 # rename: with a rename
2951 # rename: with a rename
2945 # meta: file content starts with b'\1\n', the metadata
2952 # meta: file content starts with b'\1\n', the metadata
2946 # header defined in filelog.py, but without a rename
2953 # header defined in filelog.py, but without a rename
2947 # ext: content stored externally
2954 # ext: content stored externally
2948 #
2955 #
2949 # More formally, their differences are shown below:
2956 # More formally, their differences are shown below:
2950 #
2957 #
2951 # | common | rename | meta | ext
2958 # | common | rename | meta | ext
2952 # -------------------------------------------------------
2959 # -------------------------------------------------------
2953 # flags() | 0 | 0 | 0 | not 0
2960 # flags() | 0 | 0 | 0 | not 0
2954 # renamed() | False | True | False | ?
2961 # renamed() | False | True | False | ?
2955 # rawtext[0:2]=='\1\n'| False | True | True | ?
2962 # rawtext[0:2]=='\1\n'| False | True | True | ?
2956 #
2963 #
2957 # "rawtext" means the raw text stored in revlog data, which
2964 # "rawtext" means the raw text stored in revlog data, which
2958 # could be retrieved by "rawdata(rev)". "text"
2965 # could be retrieved by "rawdata(rev)". "text"
2959 # mentioned below is "revision(rev)".
2966 # mentioned below is "revision(rev)".
2960 #
2967 #
2961 # There are 3 different lengths stored physically:
2968 # There are 3 different lengths stored physically:
2962 # 1. L1: rawsize, stored in revlog index
2969 # 1. L1: rawsize, stored in revlog index
2963 # 2. L2: len(rawtext), stored in revlog data
2970 # 2. L2: len(rawtext), stored in revlog data
2964 # 3. L3: len(text), stored in revlog data if flags==0, or
2971 # 3. L3: len(text), stored in revlog data if flags==0, or
2965 # possibly somewhere else if flags!=0
2972 # possibly somewhere else if flags!=0
2966 #
2973 #
2967 # L1 should be equal to L2. L3 could be different from them.
2974 # L1 should be equal to L2. L3 could be different from them.
2968 # "text" may or may not affect commit hash depending on flag
2975 # "text" may or may not affect commit hash depending on flag
2969 # processors (see flagutil.addflagprocessor).
2976 # processors (see flagutil.addflagprocessor).
2970 #
2977 #
2971 # | common | rename | meta | ext
2978 # | common | rename | meta | ext
2972 # -------------------------------------------------
2979 # -------------------------------------------------
2973 # rawsize() | L1 | L1 | L1 | L1
2980 # rawsize() | L1 | L1 | L1 | L1
2974 # size() | L1 | L2-LM | L1(*) | L1 (?)
2981 # size() | L1 | L2-LM | L1(*) | L1 (?)
2975 # len(rawtext) | L2 | L2 | L2 | L2
2982 # len(rawtext) | L2 | L2 | L2 | L2
2976 # len(text) | L2 | L2 | L2 | L3
2983 # len(text) | L2 | L2 | L2 | L3
2977 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2984 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2978 #
2985 #
2979 # LM: length of metadata, depending on rawtext
2986 # LM: length of metadata, depending on rawtext
2980 # (*): not ideal, see comment in filelog.size
2987 # (*): not ideal, see comment in filelog.size
2981 # (?): could be "- len(meta)" if the resolved content has
2988 # (?): could be "- len(meta)" if the resolved content has
2982 # rename metadata
2989 # rename metadata
2983 #
2990 #
2984 # Checks needed to be done:
2991 # Checks needed to be done:
2985 # 1. length check: L1 == L2, in all cases.
2992 # 1. length check: L1 == L2, in all cases.
2986 # 2. hash check: depending on flag processor, we may need to
2993 # 2. hash check: depending on flag processor, we may need to
2987 # use either "text" (external), or "rawtext" (in revlog).
2994 # use either "text" (external), or "rawtext" (in revlog).
2988
2995
2989 try:
2996 try:
2990 skipflags = state.get(b'skipflags', 0)
2997 skipflags = state.get(b'skipflags', 0)
2991 if skipflags:
2998 if skipflags:
2992 skipflags &= self.flags(rev)
2999 skipflags &= self.flags(rev)
2993
3000
2994 _verify_revision(self, skipflags, state, node)
3001 _verify_revision(self, skipflags, state, node)
2995
3002
2996 l1 = self.rawsize(rev)
3003 l1 = self.rawsize(rev)
2997 l2 = len(self.rawdata(node))
3004 l2 = len(self.rawdata(node))
2998
3005
2999 if l1 != l2:
3006 if l1 != l2:
3000 yield revlogproblem(
3007 yield revlogproblem(
3001 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3008 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3002 node=node,
3009 node=node,
3003 )
3010 )
3004
3011
3005 except error.CensoredNodeError:
3012 except error.CensoredNodeError:
3006 if state[b'erroroncensored']:
3013 if state[b'erroroncensored']:
3007 yield revlogproblem(
3014 yield revlogproblem(
3008 error=_(b'censored file data'), node=node
3015 error=_(b'censored file data'), node=node
3009 )
3016 )
3010 state[b'skipread'].add(node)
3017 state[b'skipread'].add(node)
3011 except Exception as e:
3018 except Exception as e:
3012 yield revlogproblem(
3019 yield revlogproblem(
3013 error=_(b'unpacking %s: %s')
3020 error=_(b'unpacking %s: %s')
3014 % (short(node), stringutil.forcebytestr(e)),
3021 % (short(node), stringutil.forcebytestr(e)),
3015 node=node,
3022 node=node,
3016 )
3023 )
3017 state[b'skipread'].add(node)
3024 state[b'skipread'].add(node)
3018
3025
3019 def storageinfo(
3026 def storageinfo(
3020 self,
3027 self,
3021 exclusivefiles=False,
3028 exclusivefiles=False,
3022 sharedfiles=False,
3029 sharedfiles=False,
3023 revisionscount=False,
3030 revisionscount=False,
3024 trackedsize=False,
3031 trackedsize=False,
3025 storedsize=False,
3032 storedsize=False,
3026 ):
3033 ):
3027 d = {}
3034 d = {}
3028
3035
3029 if exclusivefiles:
3036 if exclusivefiles:
3030 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3037 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3031 if not self._inline:
3038 if not self._inline:
3032 d[b'exclusivefiles'].append((self.opener, self.datafile))
3039 d[b'exclusivefiles'].append((self.opener, self.datafile))
3033
3040
3034 if sharedfiles:
3041 if sharedfiles:
3035 d[b'sharedfiles'] = []
3042 d[b'sharedfiles'] = []
3036
3043
3037 if revisionscount:
3044 if revisionscount:
3038 d[b'revisionscount'] = len(self)
3045 d[b'revisionscount'] = len(self)
3039
3046
3040 if trackedsize:
3047 if trackedsize:
3041 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3048 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3042
3049
3043 if storedsize:
3050 if storedsize:
3044 d[b'storedsize'] = sum(
3051 d[b'storedsize'] = sum(
3045 self.opener.stat(path).st_size for path in self.files()
3052 self.opener.stat(path).st_size for path in self.files()
3046 )
3053 )
3047
3054
3048 return d
3055 return d
@@ -1,185 +1,231
1 ===================================
1 ===================================
2 Test the persistent on-disk nodemap
2 Test the persistent on-disk nodemap
3 ===================================
3 ===================================
4
4
5
6 $ hg init test-repo
5 $ hg init test-repo
7 $ cd test-repo
6 $ cd test-repo
8 $ cat << EOF >> .hg/hgrc
7 $ cat << EOF >> .hg/hgrc
9 > [experimental]
8 > [experimental]
10 > exp-persistent-nodemap=yes
9 > exp-persistent-nodemap=yes
11 > [devel]
10 > [devel]
12 > persistent-nodemap=yes
11 > persistent-nodemap=yes
13 > EOF
12 > EOF
14 $ hg debugbuilddag .+5000
13 $ hg debugbuilddag .+5000
15 $ hg debugnodemap --metadata
14 $ hg debugnodemap --metadata
16 uid: ???????????????? (glob)
15 uid: ???????????????? (glob)
17 tip-rev: 5000
16 tip-rev: 5000
18 data-length: 122880
17 data-length: 122880
19 data-unused: 0
18 data-unused: 0
20 $ f --size .hg/store/00changelog.n
19 $ f --size .hg/store/00changelog.n
21 .hg/store/00changelog.n: size=42
20 .hg/store/00changelog.n: size=42
22
21
23 Simple lookup works
22 Simple lookup works
24
23
25 $ ANYNODE=`hg log --template '{node|short}\n' --rev tip`
24 $ ANYNODE=`hg log --template '{node|short}\n' --rev tip`
26 $ hg log -r "$ANYNODE" --template '{rev}\n'
25 $ hg log -r "$ANYNODE" --template '{rev}\n'
27 5000
26 5000
28
27
28
29 #if rust
30
31 $ f --sha256 .hg/store/00changelog-*.nd
32 .hg/store/00changelog-????????????????.nd: sha256=1e38e9ffaa45cad13f15c1a9880ad606f4241e8beea2f61b4d5365abadfb55f6 (glob)
33 $ hg debugnodemap --dump-new | f --sha256 --size
34 size=122880, sha256=1e38e9ffaa45cad13f15c1a9880ad606f4241e8beea2f61b4d5365abadfb55f6
35 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
36 size=122880, sha256=1e38e9ffaa45cad13f15c1a9880ad606f4241e8beea2f61b4d5365abadfb55f6
37 0000: 00 00 00 76 00 00 01 65 00 00 00 95 00 00 01 34 |...v...e.......4|
38 0010: 00 00 00 19 00 00 01 69 00 00 00 ab 00 00 00 4b |.......i.......K|
39 0020: 00 00 00 07 00 00 01 4c 00 00 00 f8 00 00 00 8f |.......L........|
40 0030: 00 00 00 c0 00 00 00 a7 00 00 00 89 00 00 01 46 |...............F|
41 0040: 00 00 00 92 00 00 01 bc 00 00 00 71 00 00 00 ac |...........q....|
42 0050: 00 00 00 af 00 00 00 b4 00 00 00 34 00 00 01 ca |...........4....|
43 0060: 00 00 00 23 00 00 01 45 00 00 00 2d 00 00 00 b2 |...#...E...-....|
44 0070: 00 00 00 56 00 00 01 0f 00 00 00 4e 00 00 02 4c |...V.......N...L|
45 0080: 00 00 00 e7 00 00 00 cd 00 00 01 5b 00 00 00 78 |...........[...x|
46 0090: 00 00 00 e3 00 00 01 8e 00 00 00 4f 00 00 00 b1 |...........O....|
47 00a0: 00 00 00 30 00 00 00 11 00 00 00 25 00 00 00 d2 |...0.......%....|
48 00b0: 00 00 00 ec 00 00 00 69 00 00 01 2b 00 00 01 2e |.......i...+....|
49 00c0: 00 00 00 aa 00 00 00 15 00 00 00 3a 00 00 01 4e |...........:...N|
50 00d0: 00 00 00 4d 00 00 00 9d 00 00 00 8e 00 00 00 a4 |...M............|
51 00e0: 00 00 00 c3 00 00 00 eb 00 00 00 29 00 00 00 ad |...........)....|
52 00f0: 00 00 01 3a 00 00 01 32 00 00 00 04 00 00 00 53 |...:...2.......S|
53
54
55 #else
56
29 $ f --sha256 .hg/store/00changelog-*.nd
57 $ f --sha256 .hg/store/00changelog-*.nd
30 .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
58 .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
31 $ hg debugnodemap --dump-new | f --sha256 --size
59 $ hg debugnodemap --dump-new | f --sha256 --size
32 size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
60 size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
33 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
61 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
34 size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
62 size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
35 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
63 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
36 0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff |................|
64 0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff |................|
37 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
65 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
38 0030: ff ff ff ff ff ff ed b3 ff ff ff ff ff ff ff ff |................|
66 0030: ff ff ff ff ff ff ed b3 ff ff ff ff ff ff ff ff |................|
39 0040: ff ff ff ff ff ff ee 34 00 00 00 00 ff ff ff ff |.......4........|
67 0040: ff ff ff ff ff ff ee 34 00 00 00 00 ff ff ff ff |.......4........|
40 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
68 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
41 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
69 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
42 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
70 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
43 0080: ff ff ff ff ff ff f8 50 ff ff ff ff ff ff ff ff |.......P........|
71 0080: ff ff ff ff ff ff f8 50 ff ff ff ff ff ff ff ff |.......P........|
44 0090: ff ff ff ff ff ff ff ff ff ff ec c7 ff ff ff ff |................|
72 0090: ff ff ff ff ff ff ff ff ff ff ec c7 ff ff ff ff |................|
45 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
73 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
46 00b0: ff ff ff ff ff ff fa be ff ff f2 fc ff ff ff ff |................|
74 00b0: ff ff ff ff ff ff fa be ff ff f2 fc ff ff ff ff |................|
47 00c0: ff ff ff ff ff ff ef ea ff ff ff ff ff ff f9 17 |................|
75 00c0: ff ff ff ff ff ff ef ea ff ff ff ff ff ff f9 17 |................|
48 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
76 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
49 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
77 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
50 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
78 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
79
80 #endif
81
51 $ hg debugnodemap --check
82 $ hg debugnodemap --check
52 revision in index: 5001
83 revision in index: 5001
53 revision in nodemap: 5001
84 revision in nodemap: 5001
54
85
55 add a new commit
86 add a new commit
56
87
57 $ hg up
88 $ hg up
58 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
89 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
59 $ echo foo > foo
90 $ echo foo > foo
60 $ hg add foo
91 $ hg add foo
61 $ hg ci -m 'foo'
92 $ hg ci -m 'foo'
62
93
63 #if pure
94 #if no-pure no-rust
95 $ hg debugnodemap --metadata
96 uid: ???????????????? (glob)
97 tip-rev: 5001
98 data-length: 122880
99 data-unused: 0
100 #else
64 $ hg debugnodemap --metadata
101 $ hg debugnodemap --metadata
65 uid: ???????????????? (glob)
102 uid: ???????????????? (glob)
66 tip-rev: 5001
103 tip-rev: 5001
67 data-length: 123072
104 data-length: 123072
68 data-unused: 192
105 data-unused: 192
69 #else
70 $ hg debugnodemap --metadata
71 uid: ???????????????? (glob)
72 tip-rev: 5001
73 data-length: 122880
74 data-unused: 0
75 #endif
106 #endif
107
76 $ f --size .hg/store/00changelog.n
108 $ f --size .hg/store/00changelog.n
77 .hg/store/00changelog.n: size=42
109 .hg/store/00changelog.n: size=42
78
110
79 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
111 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
80
112
81 #if pure
113 #if pure
82 $ f --sha256 .hg/store/00changelog-*.nd --size
114 $ f --sha256 .hg/store/00changelog-*.nd --size
83 .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)
115 .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)
116 #endif
84
117
85 #else
118 #if rust
119 $ f --sha256 .hg/store/00changelog-*.nd --size
120 .hg/store/00changelog-????????????????.nd: size=123072, sha256=ccc8a43310ace13812fcc648683e259346754ef934c12dd238cf9b7fadfe9a4b (glob)
121 #endif
122
123 #if no-pure no-rust
86 $ f --sha256 .hg/store/00changelog-*.nd --size
124 $ f --sha256 .hg/store/00changelog-*.nd --size
87 .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)
125 .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)
88
89 #endif
126 #endif
90
127
91 $ hg debugnodemap --check
128 $ hg debugnodemap --check
92 revision in index: 5002
129 revision in index: 5002
93 revision in nodemap: 5002
130 revision in nodemap: 5002
94
131
95 Test code path without mmap
132 Test code path without mmap
96 ---------------------------
133 ---------------------------
97
134
98 $ echo bar > bar
135 $ echo bar > bar
99 $ hg add bar
136 $ hg add bar
100 $ hg ci -m 'bar' --config experimental.exp-persistent-nodemap.mmap=no
137 $ hg ci -m 'bar' --config experimental.exp-persistent-nodemap.mmap=no
101
138
102 $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=yes
139 $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=yes
103 revision in index: 5003
140 revision in index: 5003
104 revision in nodemap: 5003
141 revision in nodemap: 5003
105 $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=no
142 $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=no
106 revision in index: 5003
143 revision in index: 5003
107 revision in nodemap: 5003
144 revision in nodemap: 5003
108
145
109
146
110 #if pure
147 #if pure
111 $ hg debugnodemap --metadata
148 $ hg debugnodemap --metadata
112 uid: ???????????????? (glob)
149 uid: ???????????????? (glob)
113 tip-rev: 5002
150 tip-rev: 5002
114 data-length: 123328
151 data-length: 123328
115 data-unused: 384
152 data-unused: 384
116 $ f --sha256 .hg/store/00changelog-*.nd --size
153 $ f --sha256 .hg/store/00changelog-*.nd --size
117 .hg/store/00changelog-????????????????.nd: size=123328, sha256=10d26e9776b6596af0f89143a54eba8cc581e929c38242a02a7b0760698c6c70 (glob)
154 .hg/store/00changelog-????????????????.nd: size=123328, sha256=10d26e9776b6596af0f89143a54eba8cc581e929c38242a02a7b0760698c6c70 (glob)
118
155 #endif
119 #else
156 #if rust
157 $ hg debugnodemap --metadata
158 uid: ???????????????? (glob)
159 tip-rev: 5002
160 data-length: 123328
161 data-unused: 384
162 $ f --sha256 .hg/store/00changelog-*.nd --size
163 .hg/store/00changelog-????????????????.nd: size=123328, sha256=081eec9eb6708f2bf085d939b4c97bc0b6762bc8336bc4b93838f7fffa1516bf (glob)
164 #endif
165 #if no-pure no-rust
120 $ hg debugnodemap --metadata
166 $ hg debugnodemap --metadata
121 uid: ???????????????? (glob)
167 uid: ???????????????? (glob)
122 tip-rev: 5002
168 tip-rev: 5002
123 data-length: 122944
169 data-length: 122944
124 data-unused: 0
170 data-unused: 0
125 $ f --sha256 .hg/store/00changelog-*.nd --size
171 $ f --sha256 .hg/store/00changelog-*.nd --size
126 .hg/store/00changelog-????????????????.nd: size=122944, sha256=755976b22b64ab680401b45395953504e64e7fa8c31ac570f58dee21e15f9bc0 (glob)
172 .hg/store/00changelog-????????????????.nd: size=122944, sha256=755976b22b64ab680401b45395953504e64e7fa8c31ac570f58dee21e15f9bc0 (glob)
127 #endif
173 #endif
128
174
129 Test force warming the cache
175 Test force warming the cache
130
176
131 $ rm .hg/store/00changelog.n
177 $ rm .hg/store/00changelog.n
132 $ hg debugnodemap --metadata
178 $ hg debugnodemap --metadata
133 $ hg debugupdatecache
179 $ hg debugupdatecache
134 #if pure
180 #if pure
135 $ hg debugnodemap --metadata
181 $ hg debugnodemap --metadata
136 uid: ???????????????? (glob)
182 uid: ???????????????? (glob)
137 tip-rev: 5002
183 tip-rev: 5002
138 data-length: 122944
184 data-length: 122944
139 data-unused: 0
185 data-unused: 0
140 #else
186 #else
141 $ hg debugnodemap --metadata
187 $ hg debugnodemap --metadata
142 uid: ???????????????? (glob)
188 uid: ???????????????? (glob)
143 tip-rev: 5002
189 tip-rev: 5002
144 data-length: 122944
190 data-length: 122944
145 data-unused: 0
191 data-unused: 0
146 #endif
192 #endif
147
193
148 Check out of sync nodemap
194 Check out of sync nodemap
149 =========================
195 =========================
150
196
151 First copy old data on the side.
197 First copy old data on the side.
152
198
153 $ mkdir ../tmp-copies
199 $ mkdir ../tmp-copies
154 $ cp .hg/store/00changelog-????????????????.nd .hg/store/00changelog.n ../tmp-copies
200 $ cp .hg/store/00changelog-????????????????.nd .hg/store/00changelog.n ../tmp-copies
155
201
156 Nodemap lagging behind
202 Nodemap lagging behind
157 ----------------------
203 ----------------------
158
204
159 make a new commit
205 make a new commit
160
206
161 $ echo bar2 > bar
207 $ echo bar2 > bar
162 $ hg ci -m 'bar2'
208 $ hg ci -m 'bar2'
163 $ NODE=`hg log -r tip -T '{node}\n'`
209 $ NODE=`hg log -r tip -T '{node}\n'`
164 $ hg log -r "$NODE" -T '{rev}\n'
210 $ hg log -r "$NODE" -T '{rev}\n'
165 5003
211 5003
166
212
167 If the nodemap is lagging behind, it can catch up fine
213 If the nodemap is lagging behind, it can catch up fine
168
214
169 $ hg debugnodemap --metadata
215 $ hg debugnodemap --metadata
170 uid: ???????????????? (glob)
216 uid: ???????????????? (glob)
171 tip-rev: 5003
217 tip-rev: 5003
172 data-length: 123200 (pure !)
218 data-length: 123200 (pure !)
173 data-length: 123200 (rust !)
219 data-length: 123200 (rust !)
174 data-length: 122944 (no-rust no-pure !)
220 data-length: 122944 (no-rust no-pure !)
175 data-unused: 256 (pure !)
221 data-unused: 256 (pure !)
176 data-unused: 256 (rust !)
222 data-unused: 256 (rust !)
177 data-unused: 0 (no-rust no-pure !)
223 data-unused: 0 (no-rust no-pure !)
178 $ cp -f ../tmp-copies/* .hg/store/
224 $ cp -f ../tmp-copies/* .hg/store/
179 $ hg debugnodemap --metadata
225 $ hg debugnodemap --metadata
180 uid: ???????????????? (glob)
226 uid: ???????????????? (glob)
181 tip-rev: 5002
227 tip-rev: 5002
182 data-length: 122944
228 data-length: 122944
183 data-unused: 0
229 data-unused: 0
184 $ hg log -r "$NODE" -T '{rev}\n'
230 $ hg log -r "$NODE" -T '{rev}\n'
185 5003
231 5003
General Comments 0
You need to be logged in to leave comments. Login now