##// END OF EJS Templates
revlog: refactor logic to compute nodemap file in separate function...
Pulkit Goyal -
r47196:835aafb2 default
parent child Browse files
Show More
@@ -1,3090 +1,3085
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_SIDEDATA,
59 REVIDX_SIDEDATA,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 nodemap as nodemaputil,
79 nodemap as nodemaputil,
80 sidedata as sidedatautil,
80 sidedata as sidedatautil,
81 )
81 )
82 from .utils import (
82 from .utils import (
83 storageutil,
83 storageutil,
84 stringutil,
84 stringutil,
85 )
85 )
86
86
87 # blanked usage of all the name to prevent pyflakes constraints
87 # blanked usage of all the name to prevent pyflakes constraints
88 # We need these name available in the module for extensions.
88 # We need these name available in the module for extensions.
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_SIDEDATA
101 REVIDX_SIDEDATA
102 REVIDX_HASCOPIESINFO
102 REVIDX_HASCOPIESINFO
103 REVIDX_EXTSTORED
103 REVIDX_EXTSTORED
104 REVIDX_DEFAULT_FLAGS
104 REVIDX_DEFAULT_FLAGS
105 REVIDX_FLAGS_ORDER
105 REVIDX_FLAGS_ORDER
106 REVIDX_RAWTEXT_CHANGING_FLAGS
106 REVIDX_RAWTEXT_CHANGING_FLAGS
107
107
108 parsers = policy.importmod('parsers')
108 parsers = policy.importmod('parsers')
109 rustancestor = policy.importrust('ancestor')
109 rustancestor = policy.importrust('ancestor')
110 rustdagop = policy.importrust('dagop')
110 rustdagop = policy.importrust('dagop')
111 rustrevlog = policy.importrust('revlog')
111 rustrevlog = policy.importrust('revlog')
112
112
113 # Aliased for performance.
113 # Aliased for performance.
114 _zlibdecompress = zlib.decompress
114 _zlibdecompress = zlib.decompress
115
115
116 # max size of revlog with inline data
116 # max size of revlog with inline data
117 _maxinline = 131072
117 _maxinline = 131072
118 _chunksize = 1048576
118 _chunksize = 1048576
119
119
120 # Flag processors for REVIDX_ELLIPSIS.
120 # Flag processors for REVIDX_ELLIPSIS.
121 def ellipsisreadprocessor(rl, text):
121 def ellipsisreadprocessor(rl, text):
122 return text, False, {}
122 return text, False, {}
123
123
124
124
125 def ellipsiswriteprocessor(rl, text, sidedata):
125 def ellipsiswriteprocessor(rl, text, sidedata):
126 return text, False
126 return text, False
127
127
128
128
129 def ellipsisrawprocessor(rl, text):
129 def ellipsisrawprocessor(rl, text):
130 return False
130 return False
131
131
132
132
133 ellipsisprocessor = (
133 ellipsisprocessor = (
134 ellipsisreadprocessor,
134 ellipsisreadprocessor,
135 ellipsiswriteprocessor,
135 ellipsiswriteprocessor,
136 ellipsisrawprocessor,
136 ellipsisrawprocessor,
137 )
137 )
138
138
139
139
140 def getoffset(q):
140 def getoffset(q):
141 return int(q >> 16)
141 return int(q >> 16)
142
142
143
143
144 def gettype(q):
144 def gettype(q):
145 return int(q & 0xFFFF)
145 return int(q & 0xFFFF)
146
146
147
147
148 def offset_type(offset, type):
148 def offset_type(offset, type):
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 raise ValueError(b'unknown revlog index flags')
150 raise ValueError(b'unknown revlog index flags')
151 return int(int(offset) << 16 | type)
151 return int(int(offset) << 16 | type)
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @attr.s(slots=True, frozen=True)
174 @attr.s(slots=True, frozen=True)
175 class _revisioninfo(object):
175 class _revisioninfo(object):
176 """Information about a revision that allows building its fulltext
176 """Information about a revision that allows building its fulltext
177 node: expected hash of the revision
177 node: expected hash of the revision
178 p1, p2: parent revs of the revision
178 p1, p2: parent revs of the revision
179 btext: built text cache consisting of a one-element list
179 btext: built text cache consisting of a one-element list
180 cachedelta: (baserev, uncompressed_delta) or None
180 cachedelta: (baserev, uncompressed_delta) or None
181 flags: flags associated to the revision storage
181 flags: flags associated to the revision storage
182
182
183 One of btext[0] or cachedelta must be set.
183 One of btext[0] or cachedelta must be set.
184 """
184 """
185
185
186 node = attr.ib()
186 node = attr.ib()
187 p1 = attr.ib()
187 p1 = attr.ib()
188 p2 = attr.ib()
188 p2 = attr.ib()
189 btext = attr.ib()
189 btext = attr.ib()
190 textlen = attr.ib()
190 textlen = attr.ib()
191 cachedelta = attr.ib()
191 cachedelta = attr.ib()
192 flags = attr.ib()
192 flags = attr.ib()
193
193
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208
208
209 @interfaceutil.implementer(repository.iverifyproblem)
209 @interfaceutil.implementer(repository.iverifyproblem)
210 @attr.s(frozen=True)
210 @attr.s(frozen=True)
211 class revlogproblem(object):
211 class revlogproblem(object):
212 warning = attr.ib(default=None)
212 warning = attr.ib(default=None)
213 error = attr.ib(default=None)
213 error = attr.ib(default=None)
214 node = attr.ib(default=None)
214 node = attr.ib(default=None)
215
215
216
216
217 # index v0:
217 # index v0:
218 # 4 bytes: offset
218 # 4 bytes: offset
219 # 4 bytes: compressed length
219 # 4 bytes: compressed length
220 # 4 bytes: base rev
220 # 4 bytes: base rev
221 # 4 bytes: link rev
221 # 4 bytes: link rev
222 # 20 bytes: parent 1 nodeid
222 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 2 nodeid
223 # 20 bytes: parent 2 nodeid
224 # 20 bytes: nodeid
224 # 20 bytes: nodeid
225 indexformatv0 = struct.Struct(b">4l20s20s20s")
225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 indexformatv0_pack = indexformatv0.pack
226 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_unpack = indexformatv0.unpack
227 indexformatv0_unpack = indexformatv0.unpack
228
228
229
229
230 class revlogoldindex(list):
230 class revlogoldindex(list):
231 @property
231 @property
232 def nodemap(self):
232 def nodemap(self):
233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 return self._nodemap
235 return self._nodemap
236
236
237 @util.propertycache
237 @util.propertycache
238 def _nodemap(self):
238 def _nodemap(self):
239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 for r in range(0, len(self)):
240 for r in range(0, len(self)):
241 n = self[r][7]
241 n = self[r][7]
242 nodemap[n] = r
242 nodemap[n] = r
243 return nodemap
243 return nodemap
244
244
245 def has_node(self, node):
245 def has_node(self, node):
246 """return True if the node exist in the index"""
246 """return True if the node exist in the index"""
247 return node in self._nodemap
247 return node in self._nodemap
248
248
249 def rev(self, node):
249 def rev(self, node):
250 """return a revision for a node
250 """return a revision for a node
251
251
252 If the node is unknown, raise a RevlogError"""
252 If the node is unknown, raise a RevlogError"""
253 return self._nodemap[node]
253 return self._nodemap[node]
254
254
255 def get_rev(self, node):
255 def get_rev(self, node):
256 """return a revision for a node
256 """return a revision for a node
257
257
258 If the node is unknown, return None"""
258 If the node is unknown, return None"""
259 return self._nodemap.get(node)
259 return self._nodemap.get(node)
260
260
261 def append(self, tup):
261 def append(self, tup):
262 self._nodemap[tup[7]] = len(self)
262 self._nodemap[tup[7]] = len(self)
263 super(revlogoldindex, self).append(tup)
263 super(revlogoldindex, self).append(tup)
264
264
265 def __delitem__(self, i):
265 def __delitem__(self, i):
266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 for r in pycompat.xrange(i.start, len(self)):
268 for r in pycompat.xrange(i.start, len(self)):
269 del self._nodemap[self[r][7]]
269 del self._nodemap[self[r][7]]
270 super(revlogoldindex, self).__delitem__(i)
270 super(revlogoldindex, self).__delitem__(i)
271
271
272 def clearcaches(self):
272 def clearcaches(self):
273 self.__dict__.pop('_nodemap', None)
273 self.__dict__.pop('_nodemap', None)
274
274
275 def __getitem__(self, i):
275 def __getitem__(self, i):
276 if i == -1:
276 if i == -1:
277 return (0, 0, 0, -1, -1, -1, -1, nullid)
277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 return list.__getitem__(self, i)
278 return list.__getitem__(self, i)
279
279
280
280
281 class revlogoldio(object):
281 class revlogoldio(object):
282 def __init__(self):
282 def __init__(self):
283 self.size = indexformatv0.size
283 self.size = indexformatv0.size
284
284
285 def parseindex(self, data, inline):
285 def parseindex(self, data, inline):
286 s = self.size
286 s = self.size
287 index = []
287 index = []
288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 n = off = 0
289 n = off = 0
290 l = len(data)
290 l = len(data)
291 while off + s <= l:
291 while off + s <= l:
292 cur = data[off : off + s]
292 cur = data[off : off + s]
293 off += s
293 off += s
294 e = indexformatv0_unpack(cur)
294 e = indexformatv0_unpack(cur)
295 # transform to revlogv1 format
295 # transform to revlogv1 format
296 e2 = (
296 e2 = (
297 offset_type(e[0], 0),
297 offset_type(e[0], 0),
298 e[1],
298 e[1],
299 -1,
299 -1,
300 e[2],
300 e[2],
301 e[3],
301 e[3],
302 nodemap.get(e[4], nullrev),
302 nodemap.get(e[4], nullrev),
303 nodemap.get(e[5], nullrev),
303 nodemap.get(e[5], nullrev),
304 e[6],
304 e[6],
305 )
305 )
306 index.append(e2)
306 index.append(e2)
307 nodemap[e[6]] = n
307 nodemap[e[6]] = n
308 n += 1
308 n += 1
309
309
310 index = revlogoldindex(index)
310 index = revlogoldindex(index)
311 return index, None
311 return index, None
312
312
313 def packentry(self, entry, node, version, rev):
313 def packentry(self, entry, node, version, rev):
314 if gettype(entry[0]):
314 if gettype(entry[0]):
315 raise error.RevlogError(
315 raise error.RevlogError(
316 _(b'index entry flags need revlog version 1')
316 _(b'index entry flags need revlog version 1')
317 )
317 )
318 e2 = (
318 e2 = (
319 getoffset(entry[0]),
319 getoffset(entry[0]),
320 entry[1],
320 entry[1],
321 entry[3],
321 entry[3],
322 entry[4],
322 entry[4],
323 node(entry[5]),
323 node(entry[5]),
324 node(entry[6]),
324 node(entry[6]),
325 entry[7],
325 entry[7],
326 )
326 )
327 return indexformatv0_pack(*e2)
327 return indexformatv0_pack(*e2)
328
328
329
329
330 # index ng:
330 # index ng:
331 # 6 bytes: offset
331 # 6 bytes: offset
332 # 2 bytes: flags
332 # 2 bytes: flags
333 # 4 bytes: compressed length
333 # 4 bytes: compressed length
334 # 4 bytes: uncompressed length
334 # 4 bytes: uncompressed length
335 # 4 bytes: base rev
335 # 4 bytes: base rev
336 # 4 bytes: link rev
336 # 4 bytes: link rev
337 # 4 bytes: parent 1 rev
337 # 4 bytes: parent 1 rev
338 # 4 bytes: parent 2 rev
338 # 4 bytes: parent 2 rev
339 # 32 bytes: nodeid
339 # 32 bytes: nodeid
340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 indexformatng_pack = indexformatng.pack
341 indexformatng_pack = indexformatng.pack
342 versionformat = struct.Struct(b">I")
342 versionformat = struct.Struct(b">I")
343 versionformat_pack = versionformat.pack
343 versionformat_pack = versionformat.pack
344 versionformat_unpack = versionformat.unpack
344 versionformat_unpack = versionformat.unpack
345
345
346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 # signed integer)
347 # signed integer)
348 _maxentrysize = 0x7FFFFFFF
348 _maxentrysize = 0x7FFFFFFF
349
349
350
350
351 class revlogio(object):
351 class revlogio(object):
352 def __init__(self):
352 def __init__(self):
353 self.size = indexformatng.size
353 self.size = indexformatng.size
354
354
355 def parseindex(self, data, inline):
355 def parseindex(self, data, inline):
356 # call the C implementation to parse the index data
356 # call the C implementation to parse the index data
357 index, cache = parsers.parse_index2(data, inline)
357 index, cache = parsers.parse_index2(data, inline)
358 return index, cache
358 return index, cache
359
359
360 def packentry(self, entry, node, version, rev):
360 def packentry(self, entry, node, version, rev):
361 p = indexformatng_pack(*entry)
361 p = indexformatng_pack(*entry)
362 if rev == 0:
362 if rev == 0:
363 p = versionformat_pack(version) + p[4:]
363 p = versionformat_pack(version) + p[4:]
364 return p
364 return p
365
365
366
366
367 NodemapRevlogIO = None
367 NodemapRevlogIO = None
368
368
369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370
370
371 class NodemapRevlogIO(revlogio):
371 class NodemapRevlogIO(revlogio):
372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373
373
374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 """
375 """
376
376
377 def parseindex(self, data, inline):
377 def parseindex(self, data, inline):
378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 return index, cache
379 return index, cache
380
380
381
381
382 class rustrevlogio(revlogio):
382 class rustrevlogio(revlogio):
383 def parseindex(self, data, inline):
383 def parseindex(self, data, inline):
384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 return rustrevlog.MixedIndex(index), cache
385 return rustrevlog.MixedIndex(index), cache
386
386
387
387
388 class revlog(object):
388 class revlog(object):
389 """
389 """
390 the underlying revision storage object
390 the underlying revision storage object
391
391
392 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
393
393
394 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
395 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
396 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
397 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
398 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
399 data.
399 data.
400
400
401 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
402 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
403 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
404 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
405 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
406 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
407
407
408 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
409 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
410 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
411 for locking while reading.
411 for locking while reading.
412
412
413 If checkambig, indexfile is opened with checkambig=True at
413 If checkambig, indexfile is opened with checkambig=True at
414 writing, to avoid file stat ambiguity.
414 writing, to avoid file stat ambiguity.
415
415
416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 index will be mmapped rather than read if it is larger than the
417 index will be mmapped rather than read if it is larger than the
418 configured threshold.
418 configured threshold.
419
419
420 If censorable is True, the revlog can have censored revisions.
420 If censorable is True, the revlog can have censored revisions.
421
421
422 If `upperboundcomp` is not None, this is the expected maximal gain from
422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 compression for the data content.
423 compression for the data content.
424 """
424 """
425
425
426 _flagserrorclass = error.RevlogError
426 _flagserrorclass = error.RevlogError
427
427
428 def __init__(
428 def __init__(
429 self,
429 self,
430 opener,
430 opener,
431 indexfile,
431 indexfile,
432 datafile=None,
432 datafile=None,
433 checkambig=False,
433 checkambig=False,
434 mmaplargeindex=False,
434 mmaplargeindex=False,
435 censorable=False,
435 censorable=False,
436 upperboundcomp=None,
436 upperboundcomp=None,
437 persistentnodemap=False,
437 persistentnodemap=False,
438 ):
438 ):
439 """
439 """
440 create a revlog object
440 create a revlog object
441
441
442 opener is a function that abstracts the file opening operation
442 opener is a function that abstracts the file opening operation
443 and can be used to implement COW semantics or the like.
443 and can be used to implement COW semantics or the like.
444
444
445 """
445 """
446 self.upperboundcomp = upperboundcomp
446 self.upperboundcomp = upperboundcomp
447 self.indexfile = indexfile
447 self.indexfile = indexfile
448 self.datafile = datafile or (indexfile[:-2] + b".d")
448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 self.nodemap_file = None
449 self.nodemap_file = None
450 if persistentnodemap:
450 if persistentnodemap:
451 if indexfile.endswith(b'.a'):
451 self.nodemap_file = nodemaputil.get_nodemap_file(
452 pending_path = indexfile[:-4] + b".n.a"
452 opener, self.indexfile
453 if opener.exists(pending_path):
453 )
454 self.nodemap_file = pending_path
455 else:
456 self.nodemap_file = indexfile[:-4] + b".n"
457 else:
458 self.nodemap_file = indexfile[:-2] + b".n"
459
454
460 self.opener = opener
455 self.opener = opener
461 # When True, indexfile is opened with checkambig=True at writing, to
456 # When True, indexfile is opened with checkambig=True at writing, to
462 # avoid file stat ambiguity.
457 # avoid file stat ambiguity.
463 self._checkambig = checkambig
458 self._checkambig = checkambig
464 self._mmaplargeindex = mmaplargeindex
459 self._mmaplargeindex = mmaplargeindex
465 self._censorable = censorable
460 self._censorable = censorable
466 # 3-tuple of (node, rev, text) for a raw revision.
461 # 3-tuple of (node, rev, text) for a raw revision.
467 self._revisioncache = None
462 self._revisioncache = None
468 # Maps rev to chain base rev.
463 # Maps rev to chain base rev.
469 self._chainbasecache = util.lrucachedict(100)
464 self._chainbasecache = util.lrucachedict(100)
470 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
471 self._chunkcache = (0, b'')
466 self._chunkcache = (0, b'')
472 # How much data to read and cache into the raw revlog data cache.
467 # How much data to read and cache into the raw revlog data cache.
473 self._chunkcachesize = 65536
468 self._chunkcachesize = 65536
474 self._maxchainlen = None
469 self._maxchainlen = None
475 self._deltabothparents = True
470 self._deltabothparents = True
476 self.index = None
471 self.index = None
477 self._nodemap_docket = None
472 self._nodemap_docket = None
478 # Mapping of partial identifiers to full nodes.
473 # Mapping of partial identifiers to full nodes.
479 self._pcache = {}
474 self._pcache = {}
480 # Mapping of revision integer to full node.
475 # Mapping of revision integer to full node.
481 self._compengine = b'zlib'
476 self._compengine = b'zlib'
482 self._compengineopts = {}
477 self._compengineopts = {}
483 self._maxdeltachainspan = -1
478 self._maxdeltachainspan = -1
484 self._withsparseread = False
479 self._withsparseread = False
485 self._sparserevlog = False
480 self._sparserevlog = False
486 self._srdensitythreshold = 0.50
481 self._srdensitythreshold = 0.50
487 self._srmingapsize = 262144
482 self._srmingapsize = 262144
488
483
489 # Make copy of flag processors so each revlog instance can support
484 # Make copy of flag processors so each revlog instance can support
490 # custom flags.
485 # custom flags.
491 self._flagprocessors = dict(flagutil.flagprocessors)
486 self._flagprocessors = dict(flagutil.flagprocessors)
492
487
493 # 2-tuple of file handles being used for active writing.
488 # 2-tuple of file handles being used for active writing.
494 self._writinghandles = None
489 self._writinghandles = None
495
490
496 self._loadindex()
491 self._loadindex()
497
492
498 def _loadindex(self):
493 def _loadindex(self):
499 mmapindexthreshold = None
494 mmapindexthreshold = None
500 opts = self.opener.options
495 opts = self.opener.options
501
496
502 if b'revlogv2' in opts:
497 if b'revlogv2' in opts:
503 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
504 elif b'revlogv1' in opts:
499 elif b'revlogv1' in opts:
505 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
506 if b'generaldelta' in opts:
501 if b'generaldelta' in opts:
507 newversionflags |= FLAG_GENERALDELTA
502 newversionflags |= FLAG_GENERALDELTA
508 elif b'revlogv0' in self.opener.options:
503 elif b'revlogv0' in self.opener.options:
509 newversionflags = REVLOGV0
504 newversionflags = REVLOGV0
510 else:
505 else:
511 newversionflags = REVLOG_DEFAULT_VERSION
506 newversionflags = REVLOG_DEFAULT_VERSION
512
507
513 if b'chunkcachesize' in opts:
508 if b'chunkcachesize' in opts:
514 self._chunkcachesize = opts[b'chunkcachesize']
509 self._chunkcachesize = opts[b'chunkcachesize']
515 if b'maxchainlen' in opts:
510 if b'maxchainlen' in opts:
516 self._maxchainlen = opts[b'maxchainlen']
511 self._maxchainlen = opts[b'maxchainlen']
517 if b'deltabothparents' in opts:
512 if b'deltabothparents' in opts:
518 self._deltabothparents = opts[b'deltabothparents']
513 self._deltabothparents = opts[b'deltabothparents']
519 self._lazydelta = bool(opts.get(b'lazydelta', True))
514 self._lazydelta = bool(opts.get(b'lazydelta', True))
520 self._lazydeltabase = False
515 self._lazydeltabase = False
521 if self._lazydelta:
516 if self._lazydelta:
522 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
523 if b'compengine' in opts:
518 if b'compengine' in opts:
524 self._compengine = opts[b'compengine']
519 self._compengine = opts[b'compengine']
525 if b'zlib.level' in opts:
520 if b'zlib.level' in opts:
526 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
527 if b'zstd.level' in opts:
522 if b'zstd.level' in opts:
528 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
529 if b'maxdeltachainspan' in opts:
524 if b'maxdeltachainspan' in opts:
530 self._maxdeltachainspan = opts[b'maxdeltachainspan']
525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
531 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
532 mmapindexthreshold = opts[b'mmapindexthreshold']
527 mmapindexthreshold = opts[b'mmapindexthreshold']
533 self.hassidedata = bool(opts.get(b'side-data', False))
528 self.hassidedata = bool(opts.get(b'side-data', False))
534 if self.hassidedata:
529 if self.hassidedata:
535 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
536 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
537 withsparseread = bool(opts.get(b'with-sparse-read', False))
532 withsparseread = bool(opts.get(b'with-sparse-read', False))
538 # sparse-revlog forces sparse-read
533 # sparse-revlog forces sparse-read
539 self._withsparseread = self._sparserevlog or withsparseread
534 self._withsparseread = self._sparserevlog or withsparseread
540 if b'sparse-read-density-threshold' in opts:
535 if b'sparse-read-density-threshold' in opts:
541 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
542 if b'sparse-read-min-gap-size' in opts:
537 if b'sparse-read-min-gap-size' in opts:
543 self._srmingapsize = opts[b'sparse-read-min-gap-size']
538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
544 if opts.get(b'enableellipsis'):
539 if opts.get(b'enableellipsis'):
545 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
546
541
547 # revlog v0 doesn't have flag processors
542 # revlog v0 doesn't have flag processors
548 for flag, processor in pycompat.iteritems(
543 for flag, processor in pycompat.iteritems(
549 opts.get(b'flagprocessors', {})
544 opts.get(b'flagprocessors', {})
550 ):
545 ):
551 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
552
547
553 if self._chunkcachesize <= 0:
548 if self._chunkcachesize <= 0:
554 raise error.RevlogError(
549 raise error.RevlogError(
555 _(b'revlog chunk cache size %r is not greater than 0')
550 _(b'revlog chunk cache size %r is not greater than 0')
556 % self._chunkcachesize
551 % self._chunkcachesize
557 )
552 )
558 elif self._chunkcachesize & (self._chunkcachesize - 1):
553 elif self._chunkcachesize & (self._chunkcachesize - 1):
559 raise error.RevlogError(
554 raise error.RevlogError(
560 _(b'revlog chunk cache size %r is not a power of 2')
555 _(b'revlog chunk cache size %r is not a power of 2')
561 % self._chunkcachesize
556 % self._chunkcachesize
562 )
557 )
563
558
564 indexdata = b''
559 indexdata = b''
565 self._initempty = True
560 self._initempty = True
566 try:
561 try:
567 with self._indexfp() as f:
562 with self._indexfp() as f:
568 if (
563 if (
569 mmapindexthreshold is not None
564 mmapindexthreshold is not None
570 and self.opener.fstat(f).st_size >= mmapindexthreshold
565 and self.opener.fstat(f).st_size >= mmapindexthreshold
571 ):
566 ):
572 # TODO: should .close() to release resources without
567 # TODO: should .close() to release resources without
573 # relying on Python GC
568 # relying on Python GC
574 indexdata = util.buffer(util.mmapread(f))
569 indexdata = util.buffer(util.mmapread(f))
575 else:
570 else:
576 indexdata = f.read()
571 indexdata = f.read()
577 if len(indexdata) > 0:
572 if len(indexdata) > 0:
578 versionflags = versionformat_unpack(indexdata[:4])[0]
573 versionflags = versionformat_unpack(indexdata[:4])[0]
579 self._initempty = False
574 self._initempty = False
580 else:
575 else:
581 versionflags = newversionflags
576 versionflags = newversionflags
582 except IOError as inst:
577 except IOError as inst:
583 if inst.errno != errno.ENOENT:
578 if inst.errno != errno.ENOENT:
584 raise
579 raise
585
580
586 versionflags = newversionflags
581 versionflags = newversionflags
587
582
588 self.version = versionflags
583 self.version = versionflags
589
584
590 flags = versionflags & ~0xFFFF
585 flags = versionflags & ~0xFFFF
591 fmt = versionflags & 0xFFFF
586 fmt = versionflags & 0xFFFF
592
587
593 if fmt == REVLOGV0:
588 if fmt == REVLOGV0:
594 if flags:
589 if flags:
595 raise error.RevlogError(
590 raise error.RevlogError(
596 _(b'unknown flags (%#04x) in version %d revlog %s')
591 _(b'unknown flags (%#04x) in version %d revlog %s')
597 % (flags >> 16, fmt, self.indexfile)
592 % (flags >> 16, fmt, self.indexfile)
598 )
593 )
599
594
600 self._inline = False
595 self._inline = False
601 self._generaldelta = False
596 self._generaldelta = False
602
597
603 elif fmt == REVLOGV1:
598 elif fmt == REVLOGV1:
604 if flags & ~REVLOGV1_FLAGS:
599 if flags & ~REVLOGV1_FLAGS:
605 raise error.RevlogError(
600 raise error.RevlogError(
606 _(b'unknown flags (%#04x) in version %d revlog %s')
601 _(b'unknown flags (%#04x) in version %d revlog %s')
607 % (flags >> 16, fmt, self.indexfile)
602 % (flags >> 16, fmt, self.indexfile)
608 )
603 )
609
604
610 self._inline = versionflags & FLAG_INLINE_DATA
605 self._inline = versionflags & FLAG_INLINE_DATA
611 self._generaldelta = versionflags & FLAG_GENERALDELTA
606 self._generaldelta = versionflags & FLAG_GENERALDELTA
612
607
613 elif fmt == REVLOGV2:
608 elif fmt == REVLOGV2:
614 if flags & ~REVLOGV2_FLAGS:
609 if flags & ~REVLOGV2_FLAGS:
615 raise error.RevlogError(
610 raise error.RevlogError(
616 _(b'unknown flags (%#04x) in version %d revlog %s')
611 _(b'unknown flags (%#04x) in version %d revlog %s')
617 % (flags >> 16, fmt, self.indexfile)
612 % (flags >> 16, fmt, self.indexfile)
618 )
613 )
619
614
620 self._inline = versionflags & FLAG_INLINE_DATA
615 self._inline = versionflags & FLAG_INLINE_DATA
621 # generaldelta implied by version 2 revlogs.
616 # generaldelta implied by version 2 revlogs.
622 self._generaldelta = True
617 self._generaldelta = True
623
618
624 else:
619 else:
625 raise error.RevlogError(
620 raise error.RevlogError(
626 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
627 )
622 )
628 # sparse-revlog can't be on without general-delta (issue6056)
623 # sparse-revlog can't be on without general-delta (issue6056)
629 if not self._generaldelta:
624 if not self._generaldelta:
630 self._sparserevlog = False
625 self._sparserevlog = False
631
626
632 self._storedeltachains = True
627 self._storedeltachains = True
633
628
634 devel_nodemap = (
629 devel_nodemap = (
635 self.nodemap_file
630 self.nodemap_file
636 and opts.get(b'devel-force-nodemap', False)
631 and opts.get(b'devel-force-nodemap', False)
637 and NodemapRevlogIO is not None
632 and NodemapRevlogIO is not None
638 )
633 )
639
634
640 use_rust_index = False
635 use_rust_index = False
641 if rustrevlog is not None:
636 if rustrevlog is not None:
642 if self.nodemap_file is not None:
637 if self.nodemap_file is not None:
643 use_rust_index = True
638 use_rust_index = True
644 else:
639 else:
645 use_rust_index = self.opener.options.get(b'rust.index')
640 use_rust_index = self.opener.options.get(b'rust.index')
646
641
647 self._io = revlogio()
642 self._io = revlogio()
648 if self.version == REVLOGV0:
643 if self.version == REVLOGV0:
649 self._io = revlogoldio()
644 self._io = revlogoldio()
650 elif devel_nodemap:
645 elif devel_nodemap:
651 self._io = NodemapRevlogIO()
646 self._io = NodemapRevlogIO()
652 elif use_rust_index:
647 elif use_rust_index:
653 self._io = rustrevlogio()
648 self._io = rustrevlogio()
654 try:
649 try:
655 d = self._io.parseindex(indexdata, self._inline)
650 d = self._io.parseindex(indexdata, self._inline)
656 index, _chunkcache = d
651 index, _chunkcache = d
657 use_nodemap = (
652 use_nodemap = (
658 not self._inline
653 not self._inline
659 and self.nodemap_file is not None
654 and self.nodemap_file is not None
660 and util.safehasattr(index, 'update_nodemap_data')
655 and util.safehasattr(index, 'update_nodemap_data')
661 )
656 )
662 if use_nodemap:
657 if use_nodemap:
663 nodemap_data = nodemaputil.persisted_data(self)
658 nodemap_data = nodemaputil.persisted_data(self)
664 if nodemap_data is not None:
659 if nodemap_data is not None:
665 docket = nodemap_data[0]
660 docket = nodemap_data[0]
666 if (
661 if (
667 len(d[0]) > docket.tip_rev
662 len(d[0]) > docket.tip_rev
668 and d[0][docket.tip_rev][7] == docket.tip_node
663 and d[0][docket.tip_rev][7] == docket.tip_node
669 ):
664 ):
670 # no changelog tampering
665 # no changelog tampering
671 self._nodemap_docket = docket
666 self._nodemap_docket = docket
672 index.update_nodemap_data(*nodemap_data)
667 index.update_nodemap_data(*nodemap_data)
673 except (ValueError, IndexError):
668 except (ValueError, IndexError):
674 raise error.RevlogError(
669 raise error.RevlogError(
675 _(b"index %s is corrupted") % self.indexfile
670 _(b"index %s is corrupted") % self.indexfile
676 )
671 )
677 self.index, self._chunkcache = d
672 self.index, self._chunkcache = d
678 if not self._chunkcache:
673 if not self._chunkcache:
679 self._chunkclear()
674 self._chunkclear()
680 # revnum -> (chain-length, sum-delta-length)
675 # revnum -> (chain-length, sum-delta-length)
681 self._chaininfocache = util.lrucachedict(500)
676 self._chaininfocache = util.lrucachedict(500)
682 # revlog header -> revlog compressor
677 # revlog header -> revlog compressor
683 self._decompressors = {}
678 self._decompressors = {}
684
679
685 @util.propertycache
680 @util.propertycache
686 def _compressor(self):
681 def _compressor(self):
687 engine = util.compengines[self._compengine]
682 engine = util.compengines[self._compengine]
688 return engine.revlogcompressor(self._compengineopts)
683 return engine.revlogcompressor(self._compengineopts)
689
684
690 def _indexfp(self, mode=b'r'):
685 def _indexfp(self, mode=b'r'):
691 """file object for the revlog's index file"""
686 """file object for the revlog's index file"""
692 args = {'mode': mode}
687 args = {'mode': mode}
693 if mode != b'r':
688 if mode != b'r':
694 args['checkambig'] = self._checkambig
689 args['checkambig'] = self._checkambig
695 if mode == b'w':
690 if mode == b'w':
696 args['atomictemp'] = True
691 args['atomictemp'] = True
697 return self.opener(self.indexfile, **args)
692 return self.opener(self.indexfile, **args)
698
693
699 def _datafp(self, mode=b'r'):
694 def _datafp(self, mode=b'r'):
700 """file object for the revlog's data file"""
695 """file object for the revlog's data file"""
701 return self.opener(self.datafile, mode=mode)
696 return self.opener(self.datafile, mode=mode)
702
697
703 @contextlib.contextmanager
698 @contextlib.contextmanager
704 def _datareadfp(self, existingfp=None):
699 def _datareadfp(self, existingfp=None):
705 """file object suitable to read data"""
700 """file object suitable to read data"""
706 # Use explicit file handle, if given.
701 # Use explicit file handle, if given.
707 if existingfp is not None:
702 if existingfp is not None:
708 yield existingfp
703 yield existingfp
709
704
710 # Use a file handle being actively used for writes, if available.
705 # Use a file handle being actively used for writes, if available.
711 # There is some danger to doing this because reads will seek the
706 # There is some danger to doing this because reads will seek the
712 # file. However, _writeentry() performs a SEEK_END before all writes,
707 # file. However, _writeentry() performs a SEEK_END before all writes,
713 # so we should be safe.
708 # so we should be safe.
714 elif self._writinghandles:
709 elif self._writinghandles:
715 if self._inline:
710 if self._inline:
716 yield self._writinghandles[0]
711 yield self._writinghandles[0]
717 else:
712 else:
718 yield self._writinghandles[1]
713 yield self._writinghandles[1]
719
714
720 # Otherwise open a new file handle.
715 # Otherwise open a new file handle.
721 else:
716 else:
722 if self._inline:
717 if self._inline:
723 func = self._indexfp
718 func = self._indexfp
724 else:
719 else:
725 func = self._datafp
720 func = self._datafp
726 with func() as fp:
721 with func() as fp:
727 yield fp
722 yield fp
728
723
729 def tiprev(self):
724 def tiprev(self):
730 return len(self.index) - 1
725 return len(self.index) - 1
731
726
732 def tip(self):
727 def tip(self):
733 return self.node(self.tiprev())
728 return self.node(self.tiprev())
734
729
735 def __contains__(self, rev):
730 def __contains__(self, rev):
736 return 0 <= rev < len(self)
731 return 0 <= rev < len(self)
737
732
738 def __len__(self):
733 def __len__(self):
739 return len(self.index)
734 return len(self.index)
740
735
741 def __iter__(self):
736 def __iter__(self):
742 return iter(pycompat.xrange(len(self)))
737 return iter(pycompat.xrange(len(self)))
743
738
744 def revs(self, start=0, stop=None):
739 def revs(self, start=0, stop=None):
745 """iterate over all rev in this revlog (from start to stop)"""
740 """iterate over all rev in this revlog (from start to stop)"""
746 return storageutil.iterrevs(len(self), start=start, stop=stop)
741 return storageutil.iterrevs(len(self), start=start, stop=stop)
747
742
748 @property
743 @property
749 def nodemap(self):
744 def nodemap(self):
750 msg = (
745 msg = (
751 b"revlog.nodemap is deprecated, "
746 b"revlog.nodemap is deprecated, "
752 b"use revlog.index.[has_node|rev|get_rev]"
747 b"use revlog.index.[has_node|rev|get_rev]"
753 )
748 )
754 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
755 return self.index.nodemap
750 return self.index.nodemap
756
751
757 @property
752 @property
758 def _nodecache(self):
753 def _nodecache(self):
759 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
760 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
761 return self.index.nodemap
756 return self.index.nodemap
762
757
763 def hasnode(self, node):
758 def hasnode(self, node):
764 try:
759 try:
765 self.rev(node)
760 self.rev(node)
766 return True
761 return True
767 except KeyError:
762 except KeyError:
768 return False
763 return False
769
764
770 def candelta(self, baserev, rev):
765 def candelta(self, baserev, rev):
771 """whether two revisions (baserev, rev) can be delta-ed or not"""
766 """whether two revisions (baserev, rev) can be delta-ed or not"""
772 # Disable delta if either rev requires a content-changing flag
767 # Disable delta if either rev requires a content-changing flag
773 # processor (ex. LFS). This is because such flag processor can alter
768 # processor (ex. LFS). This is because such flag processor can alter
774 # the rawtext content that the delta will be based on, and two clients
769 # the rawtext content that the delta will be based on, and two clients
775 # could have a same revlog node with different flags (i.e. different
770 # could have a same revlog node with different flags (i.e. different
776 # rawtext contents) and the delta could be incompatible.
771 # rawtext contents) and the delta could be incompatible.
777 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
778 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
779 ):
774 ):
780 return False
775 return False
781 return True
776 return True
782
777
783 def update_caches(self, transaction):
778 def update_caches(self, transaction):
784 if self.nodemap_file is not None:
779 if self.nodemap_file is not None:
785 if transaction is None:
780 if transaction is None:
786 nodemaputil.update_persistent_nodemap(self)
781 nodemaputil.update_persistent_nodemap(self)
787 else:
782 else:
788 nodemaputil.setup_persistent_nodemap(transaction, self)
783 nodemaputil.setup_persistent_nodemap(transaction, self)
789
784
790 def clearcaches(self):
785 def clearcaches(self):
791 self._revisioncache = None
786 self._revisioncache = None
792 self._chainbasecache.clear()
787 self._chainbasecache.clear()
793 self._chunkcache = (0, b'')
788 self._chunkcache = (0, b'')
794 self._pcache = {}
789 self._pcache = {}
795 self._nodemap_docket = None
790 self._nodemap_docket = None
796 self.index.clearcaches()
791 self.index.clearcaches()
797 # The python code is the one responsible for validating the docket, we
792 # The python code is the one responsible for validating the docket, we
798 # end up having to refresh it here.
793 # end up having to refresh it here.
799 use_nodemap = (
794 use_nodemap = (
800 not self._inline
795 not self._inline
801 and self.nodemap_file is not None
796 and self.nodemap_file is not None
802 and util.safehasattr(self.index, 'update_nodemap_data')
797 and util.safehasattr(self.index, 'update_nodemap_data')
803 )
798 )
804 if use_nodemap:
799 if use_nodemap:
805 nodemap_data = nodemaputil.persisted_data(self)
800 nodemap_data = nodemaputil.persisted_data(self)
806 if nodemap_data is not None:
801 if nodemap_data is not None:
807 self._nodemap_docket = nodemap_data[0]
802 self._nodemap_docket = nodemap_data[0]
808 self.index.update_nodemap_data(*nodemap_data)
803 self.index.update_nodemap_data(*nodemap_data)
809
804
810 def rev(self, node):
805 def rev(self, node):
811 try:
806 try:
812 return self.index.rev(node)
807 return self.index.rev(node)
813 except TypeError:
808 except TypeError:
814 raise
809 raise
815 except error.RevlogError:
810 except error.RevlogError:
816 # parsers.c radix tree lookup failed
811 # parsers.c radix tree lookup failed
817 if node == wdirid or node in wdirfilenodeids:
812 if node == wdirid or node in wdirfilenodeids:
818 raise error.WdirUnsupported
813 raise error.WdirUnsupported
819 raise error.LookupError(node, self.indexfile, _(b'no node'))
814 raise error.LookupError(node, self.indexfile, _(b'no node'))
820
815
821 # Accessors for index entries.
816 # Accessors for index entries.
822
817
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 # are flags.
819 # are flags.
825 def start(self, rev):
820 def start(self, rev):
826 return int(self.index[rev][0] >> 16)
821 return int(self.index[rev][0] >> 16)
827
822
828 def flags(self, rev):
823 def flags(self, rev):
829 return self.index[rev][0] & 0xFFFF
824 return self.index[rev][0] & 0xFFFF
830
825
831 def length(self, rev):
826 def length(self, rev):
832 return self.index[rev][1]
827 return self.index[rev][1]
833
828
834 def rawsize(self, rev):
829 def rawsize(self, rev):
835 """return the length of the uncompressed text for a given revision"""
830 """return the length of the uncompressed text for a given revision"""
836 l = self.index[rev][2]
831 l = self.index[rev][2]
837 if l >= 0:
832 if l >= 0:
838 return l
833 return l
839
834
840 t = self.rawdata(rev)
835 t = self.rawdata(rev)
841 return len(t)
836 return len(t)
842
837
843 def size(self, rev):
838 def size(self, rev):
844 """length of non-raw text (processed by a "read" flag processor)"""
839 """length of non-raw text (processed by a "read" flag processor)"""
845 # fast path: if no "read" flag processor could change the content,
840 # fast path: if no "read" flag processor could change the content,
846 # size is rawsize. note: ELLIPSIS is known to not change the content.
841 # size is rawsize. note: ELLIPSIS is known to not change the content.
847 flags = self.flags(rev)
842 flags = self.flags(rev)
848 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
849 return self.rawsize(rev)
844 return self.rawsize(rev)
850
845
851 return len(self.revision(rev, raw=False))
846 return len(self.revision(rev, raw=False))
852
847
853 def chainbase(self, rev):
848 def chainbase(self, rev):
854 base = self._chainbasecache.get(rev)
849 base = self._chainbasecache.get(rev)
855 if base is not None:
850 if base is not None:
856 return base
851 return base
857
852
858 index = self.index
853 index = self.index
859 iterrev = rev
854 iterrev = rev
860 base = index[iterrev][3]
855 base = index[iterrev][3]
861 while base != iterrev:
856 while base != iterrev:
862 iterrev = base
857 iterrev = base
863 base = index[iterrev][3]
858 base = index[iterrev][3]
864
859
865 self._chainbasecache[rev] = base
860 self._chainbasecache[rev] = base
866 return base
861 return base
867
862
868 def linkrev(self, rev):
863 def linkrev(self, rev):
869 return self.index[rev][4]
864 return self.index[rev][4]
870
865
871 def parentrevs(self, rev):
866 def parentrevs(self, rev):
872 try:
867 try:
873 entry = self.index[rev]
868 entry = self.index[rev]
874 except IndexError:
869 except IndexError:
875 if rev == wdirrev:
870 if rev == wdirrev:
876 raise error.WdirUnsupported
871 raise error.WdirUnsupported
877 raise
872 raise
878
873
879 return entry[5], entry[6]
874 return entry[5], entry[6]
880
875
881 # fast parentrevs(rev) where rev isn't filtered
876 # fast parentrevs(rev) where rev isn't filtered
882 _uncheckedparentrevs = parentrevs
877 _uncheckedparentrevs = parentrevs
883
878
884 def node(self, rev):
879 def node(self, rev):
885 try:
880 try:
886 return self.index[rev][7]
881 return self.index[rev][7]
887 except IndexError:
882 except IndexError:
888 if rev == wdirrev:
883 if rev == wdirrev:
889 raise error.WdirUnsupported
884 raise error.WdirUnsupported
890 raise
885 raise
891
886
892 # Derived from index values.
887 # Derived from index values.
893
888
894 def end(self, rev):
889 def end(self, rev):
895 return self.start(rev) + self.length(rev)
890 return self.start(rev) + self.length(rev)
896
891
897 def parents(self, node):
892 def parents(self, node):
898 i = self.index
893 i = self.index
899 d = i[self.rev(node)]
894 d = i[self.rev(node)]
900 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
901
896
902 def chainlen(self, rev):
897 def chainlen(self, rev):
903 return self._chaininfo(rev)[0]
898 return self._chaininfo(rev)[0]
904
899
905 def _chaininfo(self, rev):
900 def _chaininfo(self, rev):
906 chaininfocache = self._chaininfocache
901 chaininfocache = self._chaininfocache
907 if rev in chaininfocache:
902 if rev in chaininfocache:
908 return chaininfocache[rev]
903 return chaininfocache[rev]
909 index = self.index
904 index = self.index
910 generaldelta = self._generaldelta
905 generaldelta = self._generaldelta
911 iterrev = rev
906 iterrev = rev
912 e = index[iterrev]
907 e = index[iterrev]
913 clen = 0
908 clen = 0
914 compresseddeltalen = 0
909 compresseddeltalen = 0
915 while iterrev != e[3]:
910 while iterrev != e[3]:
916 clen += 1
911 clen += 1
917 compresseddeltalen += e[1]
912 compresseddeltalen += e[1]
918 if generaldelta:
913 if generaldelta:
919 iterrev = e[3]
914 iterrev = e[3]
920 else:
915 else:
921 iterrev -= 1
916 iterrev -= 1
922 if iterrev in chaininfocache:
917 if iterrev in chaininfocache:
923 t = chaininfocache[iterrev]
918 t = chaininfocache[iterrev]
924 clen += t[0]
919 clen += t[0]
925 compresseddeltalen += t[1]
920 compresseddeltalen += t[1]
926 break
921 break
927 e = index[iterrev]
922 e = index[iterrev]
928 else:
923 else:
929 # Add text length of base since decompressing that also takes
924 # Add text length of base since decompressing that also takes
930 # work. For cache hits the length is already included.
925 # work. For cache hits the length is already included.
931 compresseddeltalen += e[1]
926 compresseddeltalen += e[1]
932 r = (clen, compresseddeltalen)
927 r = (clen, compresseddeltalen)
933 chaininfocache[rev] = r
928 chaininfocache[rev] = r
934 return r
929 return r
935
930
936 def _deltachain(self, rev, stoprev=None):
931 def _deltachain(self, rev, stoprev=None):
937 """Obtain the delta chain for a revision.
932 """Obtain the delta chain for a revision.
938
933
939 ``stoprev`` specifies a revision to stop at. If not specified, we
934 ``stoprev`` specifies a revision to stop at. If not specified, we
940 stop at the base of the chain.
935 stop at the base of the chain.
941
936
942 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
943 revs in ascending order and ``stopped`` is a bool indicating whether
938 revs in ascending order and ``stopped`` is a bool indicating whether
944 ``stoprev`` was hit.
939 ``stoprev`` was hit.
945 """
940 """
946 # Try C implementation.
941 # Try C implementation.
947 try:
942 try:
948 return self.index.deltachain(rev, stoprev, self._generaldelta)
943 return self.index.deltachain(rev, stoprev, self._generaldelta)
949 except AttributeError:
944 except AttributeError:
950 pass
945 pass
951
946
952 chain = []
947 chain = []
953
948
954 # Alias to prevent attribute lookup in tight loop.
949 # Alias to prevent attribute lookup in tight loop.
955 index = self.index
950 index = self.index
956 generaldelta = self._generaldelta
951 generaldelta = self._generaldelta
957
952
958 iterrev = rev
953 iterrev = rev
959 e = index[iterrev]
954 e = index[iterrev]
960 while iterrev != e[3] and iterrev != stoprev:
955 while iterrev != e[3] and iterrev != stoprev:
961 chain.append(iterrev)
956 chain.append(iterrev)
962 if generaldelta:
957 if generaldelta:
963 iterrev = e[3]
958 iterrev = e[3]
964 else:
959 else:
965 iterrev -= 1
960 iterrev -= 1
966 e = index[iterrev]
961 e = index[iterrev]
967
962
968 if iterrev == stoprev:
963 if iterrev == stoprev:
969 stopped = True
964 stopped = True
970 else:
965 else:
971 chain.append(iterrev)
966 chain.append(iterrev)
972 stopped = False
967 stopped = False
973
968
974 chain.reverse()
969 chain.reverse()
975 return chain, stopped
970 return chain, stopped
976
971
977 def ancestors(self, revs, stoprev=0, inclusive=False):
972 def ancestors(self, revs, stoprev=0, inclusive=False):
978 """Generate the ancestors of 'revs' in reverse revision order.
973 """Generate the ancestors of 'revs' in reverse revision order.
979 Does not generate revs lower than stoprev.
974 Does not generate revs lower than stoprev.
980
975
981 See the documentation for ancestor.lazyancestors for more details."""
976 See the documentation for ancestor.lazyancestors for more details."""
982
977
983 # first, make sure start revisions aren't filtered
978 # first, make sure start revisions aren't filtered
984 revs = list(revs)
979 revs = list(revs)
985 checkrev = self.node
980 checkrev = self.node
986 for r in revs:
981 for r in revs:
987 checkrev(r)
982 checkrev(r)
988 # and we're sure ancestors aren't filtered as well
983 # and we're sure ancestors aren't filtered as well
989
984
990 if rustancestor is not None:
985 if rustancestor is not None:
991 lazyancestors = rustancestor.LazyAncestors
986 lazyancestors = rustancestor.LazyAncestors
992 arg = self.index
987 arg = self.index
993 else:
988 else:
994 lazyancestors = ancestor.lazyancestors
989 lazyancestors = ancestor.lazyancestors
995 arg = self._uncheckedparentrevs
990 arg = self._uncheckedparentrevs
996 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
997
992
998 def descendants(self, revs):
993 def descendants(self, revs):
999 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1000
995
1001 def findcommonmissing(self, common=None, heads=None):
996 def findcommonmissing(self, common=None, heads=None):
1002 """Return a tuple of the ancestors of common and the ancestors of heads
997 """Return a tuple of the ancestors of common and the ancestors of heads
1003 that are not ancestors of common. In revset terminology, we return the
998 that are not ancestors of common. In revset terminology, we return the
1004 tuple:
999 tuple:
1005
1000
1006 ::common, (::heads) - (::common)
1001 ::common, (::heads) - (::common)
1007
1002
1008 The list is sorted by revision number, meaning it is
1003 The list is sorted by revision number, meaning it is
1009 topologically sorted.
1004 topologically sorted.
1010
1005
1011 'heads' and 'common' are both lists of node IDs. If heads is
1006 'heads' and 'common' are both lists of node IDs. If heads is
1012 not supplied, uses all of the revlog's heads. If common is not
1007 not supplied, uses all of the revlog's heads. If common is not
1013 supplied, uses nullid."""
1008 supplied, uses nullid."""
1014 if common is None:
1009 if common is None:
1015 common = [nullid]
1010 common = [nullid]
1016 if heads is None:
1011 if heads is None:
1017 heads = self.heads()
1012 heads = self.heads()
1018
1013
1019 common = [self.rev(n) for n in common]
1014 common = [self.rev(n) for n in common]
1020 heads = [self.rev(n) for n in heads]
1015 heads = [self.rev(n) for n in heads]
1021
1016
1022 # we want the ancestors, but inclusive
1017 # we want the ancestors, but inclusive
1023 class lazyset(object):
1018 class lazyset(object):
1024 def __init__(self, lazyvalues):
1019 def __init__(self, lazyvalues):
1025 self.addedvalues = set()
1020 self.addedvalues = set()
1026 self.lazyvalues = lazyvalues
1021 self.lazyvalues = lazyvalues
1027
1022
1028 def __contains__(self, value):
1023 def __contains__(self, value):
1029 return value in self.addedvalues or value in self.lazyvalues
1024 return value in self.addedvalues or value in self.lazyvalues
1030
1025
1031 def __iter__(self):
1026 def __iter__(self):
1032 added = self.addedvalues
1027 added = self.addedvalues
1033 for r in added:
1028 for r in added:
1034 yield r
1029 yield r
1035 for r in self.lazyvalues:
1030 for r in self.lazyvalues:
1036 if not r in added:
1031 if not r in added:
1037 yield r
1032 yield r
1038
1033
1039 def add(self, value):
1034 def add(self, value):
1040 self.addedvalues.add(value)
1035 self.addedvalues.add(value)
1041
1036
1042 def update(self, values):
1037 def update(self, values):
1043 self.addedvalues.update(values)
1038 self.addedvalues.update(values)
1044
1039
1045 has = lazyset(self.ancestors(common))
1040 has = lazyset(self.ancestors(common))
1046 has.add(nullrev)
1041 has.add(nullrev)
1047 has.update(common)
1042 has.update(common)
1048
1043
1049 # take all ancestors from heads that aren't in has
1044 # take all ancestors from heads that aren't in has
1050 missing = set()
1045 missing = set()
1051 visit = collections.deque(r for r in heads if r not in has)
1046 visit = collections.deque(r for r in heads if r not in has)
1052 while visit:
1047 while visit:
1053 r = visit.popleft()
1048 r = visit.popleft()
1054 if r in missing:
1049 if r in missing:
1055 continue
1050 continue
1056 else:
1051 else:
1057 missing.add(r)
1052 missing.add(r)
1058 for p in self.parentrevs(r):
1053 for p in self.parentrevs(r):
1059 if p not in has:
1054 if p not in has:
1060 visit.append(p)
1055 visit.append(p)
1061 missing = list(missing)
1056 missing = list(missing)
1062 missing.sort()
1057 missing.sort()
1063 return has, [self.node(miss) for miss in missing]
1058 return has, [self.node(miss) for miss in missing]
1064
1059
1065 def incrementalmissingrevs(self, common=None):
1060 def incrementalmissingrevs(self, common=None):
1066 """Return an object that can be used to incrementally compute the
1061 """Return an object that can be used to incrementally compute the
1067 revision numbers of the ancestors of arbitrary sets that are not
1062 revision numbers of the ancestors of arbitrary sets that are not
1068 ancestors of common. This is an ancestor.incrementalmissingancestors
1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1069 object.
1064 object.
1070
1065
1071 'common' is a list of revision numbers. If common is not supplied, uses
1066 'common' is a list of revision numbers. If common is not supplied, uses
1072 nullrev.
1067 nullrev.
1073 """
1068 """
1074 if common is None:
1069 if common is None:
1075 common = [nullrev]
1070 common = [nullrev]
1076
1071
1077 if rustancestor is not None:
1072 if rustancestor is not None:
1078 return rustancestor.MissingAncestors(self.index, common)
1073 return rustancestor.MissingAncestors(self.index, common)
1079 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1080
1075
1081 def findmissingrevs(self, common=None, heads=None):
1076 def findmissingrevs(self, common=None, heads=None):
1082 """Return the revision numbers of the ancestors of heads that
1077 """Return the revision numbers of the ancestors of heads that
1083 are not ancestors of common.
1078 are not ancestors of common.
1084
1079
1085 More specifically, return a list of revision numbers corresponding to
1080 More specifically, return a list of revision numbers corresponding to
1086 nodes N such that every N satisfies the following constraints:
1081 nodes N such that every N satisfies the following constraints:
1087
1082
1088 1. N is an ancestor of some node in 'heads'
1083 1. N is an ancestor of some node in 'heads'
1089 2. N is not an ancestor of any node in 'common'
1084 2. N is not an ancestor of any node in 'common'
1090
1085
1091 The list is sorted by revision number, meaning it is
1086 The list is sorted by revision number, meaning it is
1092 topologically sorted.
1087 topologically sorted.
1093
1088
1094 'heads' and 'common' are both lists of revision numbers. If heads is
1089 'heads' and 'common' are both lists of revision numbers. If heads is
1095 not supplied, uses all of the revlog's heads. If common is not
1090 not supplied, uses all of the revlog's heads. If common is not
1096 supplied, uses nullid."""
1091 supplied, uses nullid."""
1097 if common is None:
1092 if common is None:
1098 common = [nullrev]
1093 common = [nullrev]
1099 if heads is None:
1094 if heads is None:
1100 heads = self.headrevs()
1095 heads = self.headrevs()
1101
1096
1102 inc = self.incrementalmissingrevs(common=common)
1097 inc = self.incrementalmissingrevs(common=common)
1103 return inc.missingancestors(heads)
1098 return inc.missingancestors(heads)
1104
1099
1105 def findmissing(self, common=None, heads=None):
1100 def findmissing(self, common=None, heads=None):
1106 """Return the ancestors of heads that are not ancestors of common.
1101 """Return the ancestors of heads that are not ancestors of common.
1107
1102
1108 More specifically, return a list of nodes N such that every N
1103 More specifically, return a list of nodes N such that every N
1109 satisfies the following constraints:
1104 satisfies the following constraints:
1110
1105
1111 1. N is an ancestor of some node in 'heads'
1106 1. N is an ancestor of some node in 'heads'
1112 2. N is not an ancestor of any node in 'common'
1107 2. N is not an ancestor of any node in 'common'
1113
1108
1114 The list is sorted by revision number, meaning it is
1109 The list is sorted by revision number, meaning it is
1115 topologically sorted.
1110 topologically sorted.
1116
1111
1117 'heads' and 'common' are both lists of node IDs. If heads is
1112 'heads' and 'common' are both lists of node IDs. If heads is
1118 not supplied, uses all of the revlog's heads. If common is not
1113 not supplied, uses all of the revlog's heads. If common is not
1119 supplied, uses nullid."""
1114 supplied, uses nullid."""
1120 if common is None:
1115 if common is None:
1121 common = [nullid]
1116 common = [nullid]
1122 if heads is None:
1117 if heads is None:
1123 heads = self.heads()
1118 heads = self.heads()
1124
1119
1125 common = [self.rev(n) for n in common]
1120 common = [self.rev(n) for n in common]
1126 heads = [self.rev(n) for n in heads]
1121 heads = [self.rev(n) for n in heads]
1127
1122
1128 inc = self.incrementalmissingrevs(common=common)
1123 inc = self.incrementalmissingrevs(common=common)
1129 return [self.node(r) for r in inc.missingancestors(heads)]
1124 return [self.node(r) for r in inc.missingancestors(heads)]
1130
1125
1131 def nodesbetween(self, roots=None, heads=None):
1126 def nodesbetween(self, roots=None, heads=None):
1132 """Return a topological path from 'roots' to 'heads'.
1127 """Return a topological path from 'roots' to 'heads'.
1133
1128
1134 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1135 topologically sorted list of all nodes N that satisfy both of
1130 topologically sorted list of all nodes N that satisfy both of
1136 these constraints:
1131 these constraints:
1137
1132
1138 1. N is a descendant of some node in 'roots'
1133 1. N is a descendant of some node in 'roots'
1139 2. N is an ancestor of some node in 'heads'
1134 2. N is an ancestor of some node in 'heads'
1140
1135
1141 Every node is considered to be both a descendant and an ancestor
1136 Every node is considered to be both a descendant and an ancestor
1142 of itself, so every reachable node in 'roots' and 'heads' will be
1137 of itself, so every reachable node in 'roots' and 'heads' will be
1143 included in 'nodes'.
1138 included in 'nodes'.
1144
1139
1145 'outroots' is the list of reachable nodes in 'roots', i.e., the
1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1146 subset of 'roots' that is returned in 'nodes'. Likewise,
1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1147 'outheads' is the subset of 'heads' that is also in 'nodes'.
1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1148
1143
1149 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1150 unspecified, uses nullid as the only root. If 'heads' is
1145 unspecified, uses nullid as the only root. If 'heads' is
1151 unspecified, uses list of all of the revlog's heads."""
1146 unspecified, uses list of all of the revlog's heads."""
1152 nonodes = ([], [], [])
1147 nonodes = ([], [], [])
1153 if roots is not None:
1148 if roots is not None:
1154 roots = list(roots)
1149 roots = list(roots)
1155 if not roots:
1150 if not roots:
1156 return nonodes
1151 return nonodes
1157 lowestrev = min([self.rev(n) for n in roots])
1152 lowestrev = min([self.rev(n) for n in roots])
1158 else:
1153 else:
1159 roots = [nullid] # Everybody's a descendant of nullid
1154 roots = [nullid] # Everybody's a descendant of nullid
1160 lowestrev = nullrev
1155 lowestrev = nullrev
1161 if (lowestrev == nullrev) and (heads is None):
1156 if (lowestrev == nullrev) and (heads is None):
1162 # We want _all_ the nodes!
1157 # We want _all_ the nodes!
1163 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1164 if heads is None:
1159 if heads is None:
1165 # All nodes are ancestors, so the latest ancestor is the last
1160 # All nodes are ancestors, so the latest ancestor is the last
1166 # node.
1161 # node.
1167 highestrev = len(self) - 1
1162 highestrev = len(self) - 1
1168 # Set ancestors to None to signal that every node is an ancestor.
1163 # Set ancestors to None to signal that every node is an ancestor.
1169 ancestors = None
1164 ancestors = None
1170 # Set heads to an empty dictionary for later discovery of heads
1165 # Set heads to an empty dictionary for later discovery of heads
1171 heads = {}
1166 heads = {}
1172 else:
1167 else:
1173 heads = list(heads)
1168 heads = list(heads)
1174 if not heads:
1169 if not heads:
1175 return nonodes
1170 return nonodes
1176 ancestors = set()
1171 ancestors = set()
1177 # Turn heads into a dictionary so we can remove 'fake' heads.
1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1178 # Also, later we will be using it to filter out the heads we can't
1173 # Also, later we will be using it to filter out the heads we can't
1179 # find from roots.
1174 # find from roots.
1180 heads = dict.fromkeys(heads, False)
1175 heads = dict.fromkeys(heads, False)
1181 # Start at the top and keep marking parents until we're done.
1176 # Start at the top and keep marking parents until we're done.
1182 nodestotag = set(heads)
1177 nodestotag = set(heads)
1183 # Remember where the top was so we can use it as a limit later.
1178 # Remember where the top was so we can use it as a limit later.
1184 highestrev = max([self.rev(n) for n in nodestotag])
1179 highestrev = max([self.rev(n) for n in nodestotag])
1185 while nodestotag:
1180 while nodestotag:
1186 # grab a node to tag
1181 # grab a node to tag
1187 n = nodestotag.pop()
1182 n = nodestotag.pop()
1188 # Never tag nullid
1183 # Never tag nullid
1189 if n == nullid:
1184 if n == nullid:
1190 continue
1185 continue
1191 # A node's revision number represents its place in a
1186 # A node's revision number represents its place in a
1192 # topologically sorted list of nodes.
1187 # topologically sorted list of nodes.
1193 r = self.rev(n)
1188 r = self.rev(n)
1194 if r >= lowestrev:
1189 if r >= lowestrev:
1195 if n not in ancestors:
1190 if n not in ancestors:
1196 # If we are possibly a descendant of one of the roots
1191 # If we are possibly a descendant of one of the roots
1197 # and we haven't already been marked as an ancestor
1192 # and we haven't already been marked as an ancestor
1198 ancestors.add(n) # Mark as ancestor
1193 ancestors.add(n) # Mark as ancestor
1199 # Add non-nullid parents to list of nodes to tag.
1194 # Add non-nullid parents to list of nodes to tag.
1200 nodestotag.update(
1195 nodestotag.update(
1201 [p for p in self.parents(n) if p != nullid]
1196 [p for p in self.parents(n) if p != nullid]
1202 )
1197 )
1203 elif n in heads: # We've seen it before, is it a fake head?
1198 elif n in heads: # We've seen it before, is it a fake head?
1204 # So it is, real heads should not be the ancestors of
1199 # So it is, real heads should not be the ancestors of
1205 # any other heads.
1200 # any other heads.
1206 heads.pop(n)
1201 heads.pop(n)
1207 if not ancestors:
1202 if not ancestors:
1208 return nonodes
1203 return nonodes
1209 # Now that we have our set of ancestors, we want to remove any
1204 # Now that we have our set of ancestors, we want to remove any
1210 # roots that are not ancestors.
1205 # roots that are not ancestors.
1211
1206
1212 # If one of the roots was nullid, everything is included anyway.
1207 # If one of the roots was nullid, everything is included anyway.
1213 if lowestrev > nullrev:
1208 if lowestrev > nullrev:
1214 # But, since we weren't, let's recompute the lowest rev to not
1209 # But, since we weren't, let's recompute the lowest rev to not
1215 # include roots that aren't ancestors.
1210 # include roots that aren't ancestors.
1216
1211
1217 # Filter out roots that aren't ancestors of heads
1212 # Filter out roots that aren't ancestors of heads
1218 roots = [root for root in roots if root in ancestors]
1213 roots = [root for root in roots if root in ancestors]
1219 # Recompute the lowest revision
1214 # Recompute the lowest revision
1220 if roots:
1215 if roots:
1221 lowestrev = min([self.rev(root) for root in roots])
1216 lowestrev = min([self.rev(root) for root in roots])
1222 else:
1217 else:
1223 # No more roots? Return empty list
1218 # No more roots? Return empty list
1224 return nonodes
1219 return nonodes
1225 else:
1220 else:
1226 # We are descending from nullid, and don't need to care about
1221 # We are descending from nullid, and don't need to care about
1227 # any other roots.
1222 # any other roots.
1228 lowestrev = nullrev
1223 lowestrev = nullrev
1229 roots = [nullid]
1224 roots = [nullid]
1230 # Transform our roots list into a set.
1225 # Transform our roots list into a set.
1231 descendants = set(roots)
1226 descendants = set(roots)
1232 # Also, keep the original roots so we can filter out roots that aren't
1227 # Also, keep the original roots so we can filter out roots that aren't
1233 # 'real' roots (i.e. are descended from other roots).
1228 # 'real' roots (i.e. are descended from other roots).
1234 roots = descendants.copy()
1229 roots = descendants.copy()
1235 # Our topologically sorted list of output nodes.
1230 # Our topologically sorted list of output nodes.
1236 orderedout = []
1231 orderedout = []
1237 # Don't start at nullid since we don't want nullid in our output list,
1232 # Don't start at nullid since we don't want nullid in our output list,
1238 # and if nullid shows up in descendants, empty parents will look like
1233 # and if nullid shows up in descendants, empty parents will look like
1239 # they're descendants.
1234 # they're descendants.
1240 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1241 n = self.node(r)
1236 n = self.node(r)
1242 isdescendant = False
1237 isdescendant = False
1243 if lowestrev == nullrev: # Everybody is a descendant of nullid
1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1244 isdescendant = True
1239 isdescendant = True
1245 elif n in descendants:
1240 elif n in descendants:
1246 # n is already a descendant
1241 # n is already a descendant
1247 isdescendant = True
1242 isdescendant = True
1248 # This check only needs to be done here because all the roots
1243 # This check only needs to be done here because all the roots
1249 # will start being marked is descendants before the loop.
1244 # will start being marked is descendants before the loop.
1250 if n in roots:
1245 if n in roots:
1251 # If n was a root, check if it's a 'real' root.
1246 # If n was a root, check if it's a 'real' root.
1252 p = tuple(self.parents(n))
1247 p = tuple(self.parents(n))
1253 # If any of its parents are descendants, it's not a root.
1248 # If any of its parents are descendants, it's not a root.
1254 if (p[0] in descendants) or (p[1] in descendants):
1249 if (p[0] in descendants) or (p[1] in descendants):
1255 roots.remove(n)
1250 roots.remove(n)
1256 else:
1251 else:
1257 p = tuple(self.parents(n))
1252 p = tuple(self.parents(n))
1258 # A node is a descendant if either of its parents are
1253 # A node is a descendant if either of its parents are
1259 # descendants. (We seeded the dependents list with the roots
1254 # descendants. (We seeded the dependents list with the roots
1260 # up there, remember?)
1255 # up there, remember?)
1261 if (p[0] in descendants) or (p[1] in descendants):
1256 if (p[0] in descendants) or (p[1] in descendants):
1262 descendants.add(n)
1257 descendants.add(n)
1263 isdescendant = True
1258 isdescendant = True
1264 if isdescendant and ((ancestors is None) or (n in ancestors)):
1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1265 # Only include nodes that are both descendants and ancestors.
1260 # Only include nodes that are both descendants and ancestors.
1266 orderedout.append(n)
1261 orderedout.append(n)
1267 if (ancestors is not None) and (n in heads):
1262 if (ancestors is not None) and (n in heads):
1268 # We're trying to figure out which heads are reachable
1263 # We're trying to figure out which heads are reachable
1269 # from roots.
1264 # from roots.
1270 # Mark this head as having been reached
1265 # Mark this head as having been reached
1271 heads[n] = True
1266 heads[n] = True
1272 elif ancestors is None:
1267 elif ancestors is None:
1273 # Otherwise, we're trying to discover the heads.
1268 # Otherwise, we're trying to discover the heads.
1274 # Assume this is a head because if it isn't, the next step
1269 # Assume this is a head because if it isn't, the next step
1275 # will eventually remove it.
1270 # will eventually remove it.
1276 heads[n] = True
1271 heads[n] = True
1277 # But, obviously its parents aren't.
1272 # But, obviously its parents aren't.
1278 for p in self.parents(n):
1273 for p in self.parents(n):
1279 heads.pop(p, None)
1274 heads.pop(p, None)
1280 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1281 roots = list(roots)
1276 roots = list(roots)
1282 assert orderedout
1277 assert orderedout
1283 assert roots
1278 assert roots
1284 assert heads
1279 assert heads
1285 return (orderedout, roots, heads)
1280 return (orderedout, roots, heads)
1286
1281
1287 def headrevs(self, revs=None):
1282 def headrevs(self, revs=None):
1288 if revs is None:
1283 if revs is None:
1289 try:
1284 try:
1290 return self.index.headrevs()
1285 return self.index.headrevs()
1291 except AttributeError:
1286 except AttributeError:
1292 return self._headrevs()
1287 return self._headrevs()
1293 if rustdagop is not None:
1288 if rustdagop is not None:
1294 return rustdagop.headrevs(self.index, revs)
1289 return rustdagop.headrevs(self.index, revs)
1295 return dagop.headrevs(revs, self._uncheckedparentrevs)
1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1296
1291
1297 def computephases(self, roots):
1292 def computephases(self, roots):
1298 return self.index.computephasesmapsets(roots)
1293 return self.index.computephasesmapsets(roots)
1299
1294
1300 def _headrevs(self):
1295 def _headrevs(self):
1301 count = len(self)
1296 count = len(self)
1302 if not count:
1297 if not count:
1303 return [nullrev]
1298 return [nullrev]
1304 # we won't iter over filtered rev so nobody is a head at start
1299 # we won't iter over filtered rev so nobody is a head at start
1305 ishead = [0] * (count + 1)
1300 ishead = [0] * (count + 1)
1306 index = self.index
1301 index = self.index
1307 for r in self:
1302 for r in self:
1308 ishead[r] = 1 # I may be an head
1303 ishead[r] = 1 # I may be an head
1309 e = index[r]
1304 e = index[r]
1310 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1311 return [r for r, val in enumerate(ishead) if val]
1306 return [r for r, val in enumerate(ishead) if val]
1312
1307
1313 def heads(self, start=None, stop=None):
1308 def heads(self, start=None, stop=None):
1314 """return the list of all nodes that have no children
1309 """return the list of all nodes that have no children
1315
1310
1316 if start is specified, only heads that are descendants of
1311 if start is specified, only heads that are descendants of
1317 start will be returned
1312 start will be returned
1318 if stop is specified, it will consider all the revs from stop
1313 if stop is specified, it will consider all the revs from stop
1319 as if they had no children
1314 as if they had no children
1320 """
1315 """
1321 if start is None and stop is None:
1316 if start is None and stop is None:
1322 if not len(self):
1317 if not len(self):
1323 return [nullid]
1318 return [nullid]
1324 return [self.node(r) for r in self.headrevs()]
1319 return [self.node(r) for r in self.headrevs()]
1325
1320
1326 if start is None:
1321 if start is None:
1327 start = nullrev
1322 start = nullrev
1328 else:
1323 else:
1329 start = self.rev(start)
1324 start = self.rev(start)
1330
1325
1331 stoprevs = {self.rev(n) for n in stop or []}
1326 stoprevs = {self.rev(n) for n in stop or []}
1332
1327
1333 revs = dagop.headrevssubset(
1328 revs = dagop.headrevssubset(
1334 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1335 )
1330 )
1336
1331
1337 return [self.node(rev) for rev in revs]
1332 return [self.node(rev) for rev in revs]
1338
1333
1339 def children(self, node):
1334 def children(self, node):
1340 """find the children of a given node"""
1335 """find the children of a given node"""
1341 c = []
1336 c = []
1342 p = self.rev(node)
1337 p = self.rev(node)
1343 for r in self.revs(start=p + 1):
1338 for r in self.revs(start=p + 1):
1344 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1345 if prevs:
1340 if prevs:
1346 for pr in prevs:
1341 for pr in prevs:
1347 if pr == p:
1342 if pr == p:
1348 c.append(self.node(r))
1343 c.append(self.node(r))
1349 elif p == nullrev:
1344 elif p == nullrev:
1350 c.append(self.node(r))
1345 c.append(self.node(r))
1351 return c
1346 return c
1352
1347
1353 def commonancestorsheads(self, a, b):
1348 def commonancestorsheads(self, a, b):
1354 """calculate all the heads of the common ancestors of nodes a and b"""
1349 """calculate all the heads of the common ancestors of nodes a and b"""
1355 a, b = self.rev(a), self.rev(b)
1350 a, b = self.rev(a), self.rev(b)
1356 ancs = self._commonancestorsheads(a, b)
1351 ancs = self._commonancestorsheads(a, b)
1357 return pycompat.maplist(self.node, ancs)
1352 return pycompat.maplist(self.node, ancs)
1358
1353
1359 def _commonancestorsheads(self, *revs):
1354 def _commonancestorsheads(self, *revs):
1360 """calculate all the heads of the common ancestors of revs"""
1355 """calculate all the heads of the common ancestors of revs"""
1361 try:
1356 try:
1362 ancs = self.index.commonancestorsheads(*revs)
1357 ancs = self.index.commonancestorsheads(*revs)
1363 except (AttributeError, OverflowError): # C implementation failed
1358 except (AttributeError, OverflowError): # C implementation failed
1364 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1365 return ancs
1360 return ancs
1366
1361
1367 def isancestor(self, a, b):
1362 def isancestor(self, a, b):
1368 """return True if node a is an ancestor of node b
1363 """return True if node a is an ancestor of node b
1369
1364
1370 A revision is considered an ancestor of itself."""
1365 A revision is considered an ancestor of itself."""
1371 a, b = self.rev(a), self.rev(b)
1366 a, b = self.rev(a), self.rev(b)
1372 return self.isancestorrev(a, b)
1367 return self.isancestorrev(a, b)
1373
1368
1374 def isancestorrev(self, a, b):
1369 def isancestorrev(self, a, b):
1375 """return True if revision a is an ancestor of revision b
1370 """return True if revision a is an ancestor of revision b
1376
1371
1377 A revision is considered an ancestor of itself.
1372 A revision is considered an ancestor of itself.
1378
1373
1379 The implementation of this is trivial but the use of
1374 The implementation of this is trivial but the use of
1380 reachableroots is not."""
1375 reachableroots is not."""
1381 if a == nullrev:
1376 if a == nullrev:
1382 return True
1377 return True
1383 elif a == b:
1378 elif a == b:
1384 return True
1379 return True
1385 elif a > b:
1380 elif a > b:
1386 return False
1381 return False
1387 return bool(self.reachableroots(a, [b], [a], includepath=False))
1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1388
1383
1389 def reachableroots(self, minroot, heads, roots, includepath=False):
1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1390 """return (heads(::(<roots> and <roots>::<heads>)))
1385 """return (heads(::(<roots> and <roots>::<heads>)))
1391
1386
1392 If includepath is True, return (<roots>::<heads>)."""
1387 If includepath is True, return (<roots>::<heads>)."""
1393 try:
1388 try:
1394 return self.index.reachableroots2(
1389 return self.index.reachableroots2(
1395 minroot, heads, roots, includepath
1390 minroot, heads, roots, includepath
1396 )
1391 )
1397 except AttributeError:
1392 except AttributeError:
1398 return dagop._reachablerootspure(
1393 return dagop._reachablerootspure(
1399 self.parentrevs, minroot, roots, heads, includepath
1394 self.parentrevs, minroot, roots, heads, includepath
1400 )
1395 )
1401
1396
1402 def ancestor(self, a, b):
1397 def ancestor(self, a, b):
1403 """calculate the "best" common ancestor of nodes a and b"""
1398 """calculate the "best" common ancestor of nodes a and b"""
1404
1399
1405 a, b = self.rev(a), self.rev(b)
1400 a, b = self.rev(a), self.rev(b)
1406 try:
1401 try:
1407 ancs = self.index.ancestors(a, b)
1402 ancs = self.index.ancestors(a, b)
1408 except (AttributeError, OverflowError):
1403 except (AttributeError, OverflowError):
1409 ancs = ancestor.ancestors(self.parentrevs, a, b)
1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1410 if ancs:
1405 if ancs:
1411 # choose a consistent winner when there's a tie
1406 # choose a consistent winner when there's a tie
1412 return min(map(self.node, ancs))
1407 return min(map(self.node, ancs))
1413 return nullid
1408 return nullid
1414
1409
1415 def _match(self, id):
1410 def _match(self, id):
1416 if isinstance(id, int):
1411 if isinstance(id, int):
1417 # rev
1412 # rev
1418 return self.node(id)
1413 return self.node(id)
1419 if len(id) == 20:
1414 if len(id) == 20:
1420 # possibly a binary node
1415 # possibly a binary node
1421 # odds of a binary node being all hex in ASCII are 1 in 10**25
1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1422 try:
1417 try:
1423 node = id
1418 node = id
1424 self.rev(node) # quick search the index
1419 self.rev(node) # quick search the index
1425 return node
1420 return node
1426 except error.LookupError:
1421 except error.LookupError:
1427 pass # may be partial hex id
1422 pass # may be partial hex id
1428 try:
1423 try:
1429 # str(rev)
1424 # str(rev)
1430 rev = int(id)
1425 rev = int(id)
1431 if b"%d" % rev != id:
1426 if b"%d" % rev != id:
1432 raise ValueError
1427 raise ValueError
1433 if rev < 0:
1428 if rev < 0:
1434 rev = len(self) + rev
1429 rev = len(self) + rev
1435 if rev < 0 or rev >= len(self):
1430 if rev < 0 or rev >= len(self):
1436 raise ValueError
1431 raise ValueError
1437 return self.node(rev)
1432 return self.node(rev)
1438 except (ValueError, OverflowError):
1433 except (ValueError, OverflowError):
1439 pass
1434 pass
1440 if len(id) == 40:
1435 if len(id) == 40:
1441 try:
1436 try:
1442 # a full hex nodeid?
1437 # a full hex nodeid?
1443 node = bin(id)
1438 node = bin(id)
1444 self.rev(node)
1439 self.rev(node)
1445 return node
1440 return node
1446 except (TypeError, error.LookupError):
1441 except (TypeError, error.LookupError):
1447 pass
1442 pass
1448
1443
1449 def _partialmatch(self, id):
1444 def _partialmatch(self, id):
1450 # we don't care wdirfilenodeids as they should be always full hash
1445 # we don't care wdirfilenodeids as they should be always full hash
1451 maybewdir = wdirhex.startswith(id)
1446 maybewdir = wdirhex.startswith(id)
1452 try:
1447 try:
1453 partial = self.index.partialmatch(id)
1448 partial = self.index.partialmatch(id)
1454 if partial and self.hasnode(partial):
1449 if partial and self.hasnode(partial):
1455 if maybewdir:
1450 if maybewdir:
1456 # single 'ff...' match in radix tree, ambiguous with wdir
1451 # single 'ff...' match in radix tree, ambiguous with wdir
1457 raise error.RevlogError
1452 raise error.RevlogError
1458 return partial
1453 return partial
1459 if maybewdir:
1454 if maybewdir:
1460 # no 'ff...' match in radix tree, wdir identified
1455 # no 'ff...' match in radix tree, wdir identified
1461 raise error.WdirUnsupported
1456 raise error.WdirUnsupported
1462 return None
1457 return None
1463 except error.RevlogError:
1458 except error.RevlogError:
1464 # parsers.c radix tree lookup gave multiple matches
1459 # parsers.c radix tree lookup gave multiple matches
1465 # fast path: for unfiltered changelog, radix tree is accurate
1460 # fast path: for unfiltered changelog, radix tree is accurate
1466 if not getattr(self, 'filteredrevs', None):
1461 if not getattr(self, 'filteredrevs', None):
1467 raise error.AmbiguousPrefixLookupError(
1462 raise error.AmbiguousPrefixLookupError(
1468 id, self.indexfile, _(b'ambiguous identifier')
1463 id, self.indexfile, _(b'ambiguous identifier')
1469 )
1464 )
1470 # fall through to slow path that filters hidden revisions
1465 # fall through to slow path that filters hidden revisions
1471 except (AttributeError, ValueError):
1466 except (AttributeError, ValueError):
1472 # we are pure python, or key was too short to search radix tree
1467 # we are pure python, or key was too short to search radix tree
1473 pass
1468 pass
1474
1469
1475 if id in self._pcache:
1470 if id in self._pcache:
1476 return self._pcache[id]
1471 return self._pcache[id]
1477
1472
1478 if len(id) <= 40:
1473 if len(id) <= 40:
1479 try:
1474 try:
1480 # hex(node)[:...]
1475 # hex(node)[:...]
1481 l = len(id) // 2 # grab an even number of digits
1476 l = len(id) // 2 # grab an even number of digits
1482 prefix = bin(id[: l * 2])
1477 prefix = bin(id[: l * 2])
1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1484 nl = [
1479 nl = [
1485 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1486 ]
1481 ]
1487 if nullhex.startswith(id):
1482 if nullhex.startswith(id):
1488 nl.append(nullid)
1483 nl.append(nullid)
1489 if len(nl) > 0:
1484 if len(nl) > 0:
1490 if len(nl) == 1 and not maybewdir:
1485 if len(nl) == 1 and not maybewdir:
1491 self._pcache[id] = nl[0]
1486 self._pcache[id] = nl[0]
1492 return nl[0]
1487 return nl[0]
1493 raise error.AmbiguousPrefixLookupError(
1488 raise error.AmbiguousPrefixLookupError(
1494 id, self.indexfile, _(b'ambiguous identifier')
1489 id, self.indexfile, _(b'ambiguous identifier')
1495 )
1490 )
1496 if maybewdir:
1491 if maybewdir:
1497 raise error.WdirUnsupported
1492 raise error.WdirUnsupported
1498 return None
1493 return None
1499 except TypeError:
1494 except TypeError:
1500 pass
1495 pass
1501
1496
1502 def lookup(self, id):
1497 def lookup(self, id):
1503 """locate a node based on:
1498 """locate a node based on:
1504 - revision number or str(revision number)
1499 - revision number or str(revision number)
1505 - nodeid or subset of hex nodeid
1500 - nodeid or subset of hex nodeid
1506 """
1501 """
1507 n = self._match(id)
1502 n = self._match(id)
1508 if n is not None:
1503 if n is not None:
1509 return n
1504 return n
1510 n = self._partialmatch(id)
1505 n = self._partialmatch(id)
1511 if n:
1506 if n:
1512 return n
1507 return n
1513
1508
1514 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1515
1510
1516 def shortest(self, node, minlength=1):
1511 def shortest(self, node, minlength=1):
1517 """Find the shortest unambiguous prefix that matches node."""
1512 """Find the shortest unambiguous prefix that matches node."""
1518
1513
1519 def isvalid(prefix):
1514 def isvalid(prefix):
1520 try:
1515 try:
1521 matchednode = self._partialmatch(prefix)
1516 matchednode = self._partialmatch(prefix)
1522 except error.AmbiguousPrefixLookupError:
1517 except error.AmbiguousPrefixLookupError:
1523 return False
1518 return False
1524 except error.WdirUnsupported:
1519 except error.WdirUnsupported:
1525 # single 'ff...' match
1520 # single 'ff...' match
1526 return True
1521 return True
1527 if matchednode is None:
1522 if matchednode is None:
1528 raise error.LookupError(node, self.indexfile, _(b'no node'))
1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1529 return True
1524 return True
1530
1525
1531 def maybewdir(prefix):
1526 def maybewdir(prefix):
1532 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1533
1528
1534 hexnode = hex(node)
1529 hexnode = hex(node)
1535
1530
1536 def disambiguate(hexnode, minlength):
1531 def disambiguate(hexnode, minlength):
1537 """Disambiguate against wdirid."""
1532 """Disambiguate against wdirid."""
1538 for length in range(minlength, len(hexnode) + 1):
1533 for length in range(minlength, len(hexnode) + 1):
1539 prefix = hexnode[:length]
1534 prefix = hexnode[:length]
1540 if not maybewdir(prefix):
1535 if not maybewdir(prefix):
1541 return prefix
1536 return prefix
1542
1537
1543 if not getattr(self, 'filteredrevs', None):
1538 if not getattr(self, 'filteredrevs', None):
1544 try:
1539 try:
1545 length = max(self.index.shortest(node), minlength)
1540 length = max(self.index.shortest(node), minlength)
1546 return disambiguate(hexnode, length)
1541 return disambiguate(hexnode, length)
1547 except error.RevlogError:
1542 except error.RevlogError:
1548 if node != wdirid:
1543 if node != wdirid:
1549 raise error.LookupError(node, self.indexfile, _(b'no node'))
1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1550 except AttributeError:
1545 except AttributeError:
1551 # Fall through to pure code
1546 # Fall through to pure code
1552 pass
1547 pass
1553
1548
1554 if node == wdirid:
1549 if node == wdirid:
1555 for length in range(minlength, len(hexnode) + 1):
1550 for length in range(minlength, len(hexnode) + 1):
1556 prefix = hexnode[:length]
1551 prefix = hexnode[:length]
1557 if isvalid(prefix):
1552 if isvalid(prefix):
1558 return prefix
1553 return prefix
1559
1554
1560 for length in range(minlength, len(hexnode) + 1):
1555 for length in range(minlength, len(hexnode) + 1):
1561 prefix = hexnode[:length]
1556 prefix = hexnode[:length]
1562 if isvalid(prefix):
1557 if isvalid(prefix):
1563 return disambiguate(hexnode, length)
1558 return disambiguate(hexnode, length)
1564
1559
1565 def cmp(self, node, text):
1560 def cmp(self, node, text):
1566 """compare text with a given file revision
1561 """compare text with a given file revision
1567
1562
1568 returns True if text is different than what is stored.
1563 returns True if text is different than what is stored.
1569 """
1564 """
1570 p1, p2 = self.parents(node)
1565 p1, p2 = self.parents(node)
1571 return storageutil.hashrevisionsha1(text, p1, p2) != node
1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1572
1567
1573 def _cachesegment(self, offset, data):
1568 def _cachesegment(self, offset, data):
1574 """Add a segment to the revlog cache.
1569 """Add a segment to the revlog cache.
1575
1570
1576 Accepts an absolute offset and the data that is at that location.
1571 Accepts an absolute offset and the data that is at that location.
1577 """
1572 """
1578 o, d = self._chunkcache
1573 o, d = self._chunkcache
1579 # try to add to existing cache
1574 # try to add to existing cache
1580 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1581 self._chunkcache = o, d + data
1576 self._chunkcache = o, d + data
1582 else:
1577 else:
1583 self._chunkcache = offset, data
1578 self._chunkcache = offset, data
1584
1579
1585 def _readsegment(self, offset, length, df=None):
1580 def _readsegment(self, offset, length, df=None):
1586 """Load a segment of raw data from the revlog.
1581 """Load a segment of raw data from the revlog.
1587
1582
1588 Accepts an absolute offset, length to read, and an optional existing
1583 Accepts an absolute offset, length to read, and an optional existing
1589 file handle to read from.
1584 file handle to read from.
1590
1585
1591 If an existing file handle is passed, it will be seeked and the
1586 If an existing file handle is passed, it will be seeked and the
1592 original seek position will NOT be restored.
1587 original seek position will NOT be restored.
1593
1588
1594 Returns a str or buffer of raw byte data.
1589 Returns a str or buffer of raw byte data.
1595
1590
1596 Raises if the requested number of bytes could not be read.
1591 Raises if the requested number of bytes could not be read.
1597 """
1592 """
1598 # Cache data both forward and backward around the requested
1593 # Cache data both forward and backward around the requested
1599 # data, in a fixed size window. This helps speed up operations
1594 # data, in a fixed size window. This helps speed up operations
1600 # involving reading the revlog backwards.
1595 # involving reading the revlog backwards.
1601 cachesize = self._chunkcachesize
1596 cachesize = self._chunkcachesize
1602 realoffset = offset & ~(cachesize - 1)
1597 realoffset = offset & ~(cachesize - 1)
1603 reallength = (
1598 reallength = (
1604 (offset + length + cachesize) & ~(cachesize - 1)
1599 (offset + length + cachesize) & ~(cachesize - 1)
1605 ) - realoffset
1600 ) - realoffset
1606 with self._datareadfp(df) as df:
1601 with self._datareadfp(df) as df:
1607 df.seek(realoffset)
1602 df.seek(realoffset)
1608 d = df.read(reallength)
1603 d = df.read(reallength)
1609
1604
1610 self._cachesegment(realoffset, d)
1605 self._cachesegment(realoffset, d)
1611 if offset != realoffset or reallength != length:
1606 if offset != realoffset or reallength != length:
1612 startoffset = offset - realoffset
1607 startoffset = offset - realoffset
1613 if len(d) - startoffset < length:
1608 if len(d) - startoffset < length:
1614 raise error.RevlogError(
1609 raise error.RevlogError(
1615 _(
1610 _(
1616 b'partial read of revlog %s; expected %d bytes from '
1611 b'partial read of revlog %s; expected %d bytes from '
1617 b'offset %d, got %d'
1612 b'offset %d, got %d'
1618 )
1613 )
1619 % (
1614 % (
1620 self.indexfile if self._inline else self.datafile,
1615 self.indexfile if self._inline else self.datafile,
1621 length,
1616 length,
1622 realoffset,
1617 realoffset,
1623 len(d) - startoffset,
1618 len(d) - startoffset,
1624 )
1619 )
1625 )
1620 )
1626
1621
1627 return util.buffer(d, startoffset, length)
1622 return util.buffer(d, startoffset, length)
1628
1623
1629 if len(d) < length:
1624 if len(d) < length:
1630 raise error.RevlogError(
1625 raise error.RevlogError(
1631 _(
1626 _(
1632 b'partial read of revlog %s; expected %d bytes from offset '
1627 b'partial read of revlog %s; expected %d bytes from offset '
1633 b'%d, got %d'
1628 b'%d, got %d'
1634 )
1629 )
1635 % (
1630 % (
1636 self.indexfile if self._inline else self.datafile,
1631 self.indexfile if self._inline else self.datafile,
1637 length,
1632 length,
1638 offset,
1633 offset,
1639 len(d),
1634 len(d),
1640 )
1635 )
1641 )
1636 )
1642
1637
1643 return d
1638 return d
1644
1639
1645 def _getsegment(self, offset, length, df=None):
1640 def _getsegment(self, offset, length, df=None):
1646 """Obtain a segment of raw data from the revlog.
1641 """Obtain a segment of raw data from the revlog.
1647
1642
1648 Accepts an absolute offset, length of bytes to obtain, and an
1643 Accepts an absolute offset, length of bytes to obtain, and an
1649 optional file handle to the already-opened revlog. If the file
1644 optional file handle to the already-opened revlog. If the file
1650 handle is used, it's original seek position will not be preserved.
1645 handle is used, it's original seek position will not be preserved.
1651
1646
1652 Requests for data may be returned from a cache.
1647 Requests for data may be returned from a cache.
1653
1648
1654 Returns a str or a buffer instance of raw byte data.
1649 Returns a str or a buffer instance of raw byte data.
1655 """
1650 """
1656 o, d = self._chunkcache
1651 o, d = self._chunkcache
1657 l = len(d)
1652 l = len(d)
1658
1653
1659 # is it in the cache?
1654 # is it in the cache?
1660 cachestart = offset - o
1655 cachestart = offset - o
1661 cacheend = cachestart + length
1656 cacheend = cachestart + length
1662 if cachestart >= 0 and cacheend <= l:
1657 if cachestart >= 0 and cacheend <= l:
1663 if cachestart == 0 and cacheend == l:
1658 if cachestart == 0 and cacheend == l:
1664 return d # avoid a copy
1659 return d # avoid a copy
1665 return util.buffer(d, cachestart, cacheend - cachestart)
1660 return util.buffer(d, cachestart, cacheend - cachestart)
1666
1661
1667 return self._readsegment(offset, length, df=df)
1662 return self._readsegment(offset, length, df=df)
1668
1663
1669 def _getsegmentforrevs(self, startrev, endrev, df=None):
1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1670 """Obtain a segment of raw data corresponding to a range of revisions.
1665 """Obtain a segment of raw data corresponding to a range of revisions.
1671
1666
1672 Accepts the start and end revisions and an optional already-open
1667 Accepts the start and end revisions and an optional already-open
1673 file handle to be used for reading. If the file handle is read, its
1668 file handle to be used for reading. If the file handle is read, its
1674 seek position will not be preserved.
1669 seek position will not be preserved.
1675
1670
1676 Requests for data may be satisfied by a cache.
1671 Requests for data may be satisfied by a cache.
1677
1672
1678 Returns a 2-tuple of (offset, data) for the requested range of
1673 Returns a 2-tuple of (offset, data) for the requested range of
1679 revisions. Offset is the integer offset from the beginning of the
1674 revisions. Offset is the integer offset from the beginning of the
1680 revlog and data is a str or buffer of the raw byte data.
1675 revlog and data is a str or buffer of the raw byte data.
1681
1676
1682 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1683 to determine where each revision's data begins and ends.
1678 to determine where each revision's data begins and ends.
1684 """
1679 """
1685 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1686 # (functions are expensive).
1681 # (functions are expensive).
1687 index = self.index
1682 index = self.index
1688 istart = index[startrev]
1683 istart = index[startrev]
1689 start = int(istart[0] >> 16)
1684 start = int(istart[0] >> 16)
1690 if startrev == endrev:
1685 if startrev == endrev:
1691 end = start + istart[1]
1686 end = start + istart[1]
1692 else:
1687 else:
1693 iend = index[endrev]
1688 iend = index[endrev]
1694 end = int(iend[0] >> 16) + iend[1]
1689 end = int(iend[0] >> 16) + iend[1]
1695
1690
1696 if self._inline:
1691 if self._inline:
1697 start += (startrev + 1) * self._io.size
1692 start += (startrev + 1) * self._io.size
1698 end += (endrev + 1) * self._io.size
1693 end += (endrev + 1) * self._io.size
1699 length = end - start
1694 length = end - start
1700
1695
1701 return start, self._getsegment(start, length, df=df)
1696 return start, self._getsegment(start, length, df=df)
1702
1697
1703 def _chunk(self, rev, df=None):
1698 def _chunk(self, rev, df=None):
1704 """Obtain a single decompressed chunk for a revision.
1699 """Obtain a single decompressed chunk for a revision.
1705
1700
1706 Accepts an integer revision and an optional already-open file handle
1701 Accepts an integer revision and an optional already-open file handle
1707 to be used for reading. If used, the seek position of the file will not
1702 to be used for reading. If used, the seek position of the file will not
1708 be preserved.
1703 be preserved.
1709
1704
1710 Returns a str holding uncompressed data for the requested revision.
1705 Returns a str holding uncompressed data for the requested revision.
1711 """
1706 """
1712 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1713
1708
1714 def _chunks(self, revs, df=None, targetsize=None):
1709 def _chunks(self, revs, df=None, targetsize=None):
1715 """Obtain decompressed chunks for the specified revisions.
1710 """Obtain decompressed chunks for the specified revisions.
1716
1711
1717 Accepts an iterable of numeric revisions that are assumed to be in
1712 Accepts an iterable of numeric revisions that are assumed to be in
1718 ascending order. Also accepts an optional already-open file handle
1713 ascending order. Also accepts an optional already-open file handle
1719 to be used for reading. If used, the seek position of the file will
1714 to be used for reading. If used, the seek position of the file will
1720 not be preserved.
1715 not be preserved.
1721
1716
1722 This function is similar to calling ``self._chunk()`` multiple times,
1717 This function is similar to calling ``self._chunk()`` multiple times,
1723 but is faster.
1718 but is faster.
1724
1719
1725 Returns a list with decompressed data for each requested revision.
1720 Returns a list with decompressed data for each requested revision.
1726 """
1721 """
1727 if not revs:
1722 if not revs:
1728 return []
1723 return []
1729 start = self.start
1724 start = self.start
1730 length = self.length
1725 length = self.length
1731 inline = self._inline
1726 inline = self._inline
1732 iosize = self._io.size
1727 iosize = self._io.size
1733 buffer = util.buffer
1728 buffer = util.buffer
1734
1729
1735 l = []
1730 l = []
1736 ladd = l.append
1731 ladd = l.append
1737
1732
1738 if not self._withsparseread:
1733 if not self._withsparseread:
1739 slicedchunks = (revs,)
1734 slicedchunks = (revs,)
1740 else:
1735 else:
1741 slicedchunks = deltautil.slicechunk(
1736 slicedchunks = deltautil.slicechunk(
1742 self, revs, targetsize=targetsize
1737 self, revs, targetsize=targetsize
1743 )
1738 )
1744
1739
1745 for revschunk in slicedchunks:
1740 for revschunk in slicedchunks:
1746 firstrev = revschunk[0]
1741 firstrev = revschunk[0]
1747 # Skip trailing revisions with empty diff
1742 # Skip trailing revisions with empty diff
1748 for lastrev in revschunk[::-1]:
1743 for lastrev in revschunk[::-1]:
1749 if length(lastrev) != 0:
1744 if length(lastrev) != 0:
1750 break
1745 break
1751
1746
1752 try:
1747 try:
1753 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1754 except OverflowError:
1749 except OverflowError:
1755 # issue4215 - we can't cache a run of chunks greater than
1750 # issue4215 - we can't cache a run of chunks greater than
1756 # 2G on Windows
1751 # 2G on Windows
1757 return [self._chunk(rev, df=df) for rev in revschunk]
1752 return [self._chunk(rev, df=df) for rev in revschunk]
1758
1753
1759 decomp = self.decompress
1754 decomp = self.decompress
1760 for rev in revschunk:
1755 for rev in revschunk:
1761 chunkstart = start(rev)
1756 chunkstart = start(rev)
1762 if inline:
1757 if inline:
1763 chunkstart += (rev + 1) * iosize
1758 chunkstart += (rev + 1) * iosize
1764 chunklength = length(rev)
1759 chunklength = length(rev)
1765 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1766
1761
1767 return l
1762 return l
1768
1763
1769 def _chunkclear(self):
1764 def _chunkclear(self):
1770 """Clear the raw chunk cache."""
1765 """Clear the raw chunk cache."""
1771 self._chunkcache = (0, b'')
1766 self._chunkcache = (0, b'')
1772
1767
1773 def deltaparent(self, rev):
1768 def deltaparent(self, rev):
1774 """return deltaparent of the given revision"""
1769 """return deltaparent of the given revision"""
1775 base = self.index[rev][3]
1770 base = self.index[rev][3]
1776 if base == rev:
1771 if base == rev:
1777 return nullrev
1772 return nullrev
1778 elif self._generaldelta:
1773 elif self._generaldelta:
1779 return base
1774 return base
1780 else:
1775 else:
1781 return rev - 1
1776 return rev - 1
1782
1777
1783 def issnapshot(self, rev):
1778 def issnapshot(self, rev):
1784 """tells whether rev is a snapshot"""
1779 """tells whether rev is a snapshot"""
1785 if not self._sparserevlog:
1780 if not self._sparserevlog:
1786 return self.deltaparent(rev) == nullrev
1781 return self.deltaparent(rev) == nullrev
1787 elif util.safehasattr(self.index, b'issnapshot'):
1782 elif util.safehasattr(self.index, b'issnapshot'):
1788 # directly assign the method to cache the testing and access
1783 # directly assign the method to cache the testing and access
1789 self.issnapshot = self.index.issnapshot
1784 self.issnapshot = self.index.issnapshot
1790 return self.issnapshot(rev)
1785 return self.issnapshot(rev)
1791 if rev == nullrev:
1786 if rev == nullrev:
1792 return True
1787 return True
1793 entry = self.index[rev]
1788 entry = self.index[rev]
1794 base = entry[3]
1789 base = entry[3]
1795 if base == rev:
1790 if base == rev:
1796 return True
1791 return True
1797 if base == nullrev:
1792 if base == nullrev:
1798 return True
1793 return True
1799 p1 = entry[5]
1794 p1 = entry[5]
1800 p2 = entry[6]
1795 p2 = entry[6]
1801 if base == p1 or base == p2:
1796 if base == p1 or base == p2:
1802 return False
1797 return False
1803 return self.issnapshot(base)
1798 return self.issnapshot(base)
1804
1799
1805 def snapshotdepth(self, rev):
1800 def snapshotdepth(self, rev):
1806 """number of snapshot in the chain before this one"""
1801 """number of snapshot in the chain before this one"""
1807 if not self.issnapshot(rev):
1802 if not self.issnapshot(rev):
1808 raise error.ProgrammingError(b'revision %d not a snapshot')
1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1809 return len(self._deltachain(rev)[0]) - 1
1804 return len(self._deltachain(rev)[0]) - 1
1810
1805
1811 def revdiff(self, rev1, rev2):
1806 def revdiff(self, rev1, rev2):
1812 """return or calculate a delta between two revisions
1807 """return or calculate a delta between two revisions
1813
1808
1814 The delta calculated is in binary form and is intended to be written to
1809 The delta calculated is in binary form and is intended to be written to
1815 revlog data directly. So this function needs raw revision data.
1810 revlog data directly. So this function needs raw revision data.
1816 """
1811 """
1817 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1818 return bytes(self._chunk(rev2))
1813 return bytes(self._chunk(rev2))
1819
1814
1820 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1821
1816
1822 def _processflags(self, text, flags, operation, raw=False):
1817 def _processflags(self, text, flags, operation, raw=False):
1823 """deprecated entry point to access flag processors"""
1818 """deprecated entry point to access flag processors"""
1824 msg = b'_processflag(...) use the specialized variant'
1819 msg = b'_processflag(...) use the specialized variant'
1825 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1826 if raw:
1821 if raw:
1827 return text, flagutil.processflagsraw(self, text, flags)
1822 return text, flagutil.processflagsraw(self, text, flags)
1828 elif operation == b'read':
1823 elif operation == b'read':
1829 return flagutil.processflagsread(self, text, flags)
1824 return flagutil.processflagsread(self, text, flags)
1830 else: # write operation
1825 else: # write operation
1831 return flagutil.processflagswrite(self, text, flags, None)
1826 return flagutil.processflagswrite(self, text, flags, None)
1832
1827
1833 def revision(self, nodeorrev, _df=None, raw=False):
1828 def revision(self, nodeorrev, _df=None, raw=False):
1834 """return an uncompressed revision of a given node or revision
1829 """return an uncompressed revision of a given node or revision
1835 number.
1830 number.
1836
1831
1837 _df - an existing file handle to read from. (internal-only)
1832 _df - an existing file handle to read from. (internal-only)
1838 raw - an optional argument specifying if the revision data is to be
1833 raw - an optional argument specifying if the revision data is to be
1839 treated as raw data when applying flag transforms. 'raw' should be set
1834 treated as raw data when applying flag transforms. 'raw' should be set
1840 to True when generating changegroups or in debug commands.
1835 to True when generating changegroups or in debug commands.
1841 """
1836 """
1842 if raw:
1837 if raw:
1843 msg = (
1838 msg = (
1844 b'revlog.revision(..., raw=True) is deprecated, '
1839 b'revlog.revision(..., raw=True) is deprecated, '
1845 b'use revlog.rawdata(...)'
1840 b'use revlog.rawdata(...)'
1846 )
1841 )
1847 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1848 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1849
1844
1850 def sidedata(self, nodeorrev, _df=None):
1845 def sidedata(self, nodeorrev, _df=None):
1851 """a map of extra data related to the changeset but not part of the hash
1846 """a map of extra data related to the changeset but not part of the hash
1852
1847
1853 This function currently return a dictionary. However, more advanced
1848 This function currently return a dictionary. However, more advanced
1854 mapping object will likely be used in the future for a more
1849 mapping object will likely be used in the future for a more
1855 efficient/lazy code.
1850 efficient/lazy code.
1856 """
1851 """
1857 return self._revisiondata(nodeorrev, _df)[1]
1852 return self._revisiondata(nodeorrev, _df)[1]
1858
1853
1859 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1860 # deal with <nodeorrev> argument type
1855 # deal with <nodeorrev> argument type
1861 if isinstance(nodeorrev, int):
1856 if isinstance(nodeorrev, int):
1862 rev = nodeorrev
1857 rev = nodeorrev
1863 node = self.node(rev)
1858 node = self.node(rev)
1864 else:
1859 else:
1865 node = nodeorrev
1860 node = nodeorrev
1866 rev = None
1861 rev = None
1867
1862
1868 # fast path the special `nullid` rev
1863 # fast path the special `nullid` rev
1869 if node == nullid:
1864 if node == nullid:
1870 return b"", {}
1865 return b"", {}
1871
1866
1872 # ``rawtext`` is the text as stored inside the revlog. Might be the
1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1873 # revision or might need to be processed to retrieve the revision.
1868 # revision or might need to be processed to retrieve the revision.
1874 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1875
1870
1876 if raw and validated:
1871 if raw and validated:
1877 # if we don't want to process the raw text and that raw
1872 # if we don't want to process the raw text and that raw
1878 # text is cached, we can exit early.
1873 # text is cached, we can exit early.
1879 return rawtext, {}
1874 return rawtext, {}
1880 if rev is None:
1875 if rev is None:
1881 rev = self.rev(node)
1876 rev = self.rev(node)
1882 # the revlog's flag for this revision
1877 # the revlog's flag for this revision
1883 # (usually alter its state or content)
1878 # (usually alter its state or content)
1884 flags = self.flags(rev)
1879 flags = self.flags(rev)
1885
1880
1886 if validated and flags == REVIDX_DEFAULT_FLAGS:
1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1887 # no extra flags set, no flag processor runs, text = rawtext
1882 # no extra flags set, no flag processor runs, text = rawtext
1888 return rawtext, {}
1883 return rawtext, {}
1889
1884
1890 sidedata = {}
1885 sidedata = {}
1891 if raw:
1886 if raw:
1892 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1893 text = rawtext
1888 text = rawtext
1894 else:
1889 else:
1895 try:
1890 try:
1896 r = flagutil.processflagsread(self, rawtext, flags)
1891 r = flagutil.processflagsread(self, rawtext, flags)
1897 except error.SidedataHashError as exc:
1892 except error.SidedataHashError as exc:
1898 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1899 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1900 raise error.RevlogError(msg)
1895 raise error.RevlogError(msg)
1901 text, validatehash, sidedata = r
1896 text, validatehash, sidedata = r
1902 if validatehash:
1897 if validatehash:
1903 self.checkhash(text, node, rev=rev)
1898 self.checkhash(text, node, rev=rev)
1904 if not validated:
1899 if not validated:
1905 self._revisioncache = (node, rev, rawtext)
1900 self._revisioncache = (node, rev, rawtext)
1906
1901
1907 return text, sidedata
1902 return text, sidedata
1908
1903
1909 def _rawtext(self, node, rev, _df=None):
1904 def _rawtext(self, node, rev, _df=None):
1910 """return the possibly unvalidated rawtext for a revision
1905 """return the possibly unvalidated rawtext for a revision
1911
1906
1912 returns (rev, rawtext, validated)
1907 returns (rev, rawtext, validated)
1913 """
1908 """
1914
1909
1915 # revision in the cache (could be useful to apply delta)
1910 # revision in the cache (could be useful to apply delta)
1916 cachedrev = None
1911 cachedrev = None
1917 # An intermediate text to apply deltas to
1912 # An intermediate text to apply deltas to
1918 basetext = None
1913 basetext = None
1919
1914
1920 # Check if we have the entry in cache
1915 # Check if we have the entry in cache
1921 # The cache entry looks like (node, rev, rawtext)
1916 # The cache entry looks like (node, rev, rawtext)
1922 if self._revisioncache:
1917 if self._revisioncache:
1923 if self._revisioncache[0] == node:
1918 if self._revisioncache[0] == node:
1924 return (rev, self._revisioncache[2], True)
1919 return (rev, self._revisioncache[2], True)
1925 cachedrev = self._revisioncache[1]
1920 cachedrev = self._revisioncache[1]
1926
1921
1927 if rev is None:
1922 if rev is None:
1928 rev = self.rev(node)
1923 rev = self.rev(node)
1929
1924
1930 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1931 if stopped:
1926 if stopped:
1932 basetext = self._revisioncache[2]
1927 basetext = self._revisioncache[2]
1933
1928
1934 # drop cache to save memory, the caller is expected to
1929 # drop cache to save memory, the caller is expected to
1935 # update self._revisioncache after validating the text
1930 # update self._revisioncache after validating the text
1936 self._revisioncache = None
1931 self._revisioncache = None
1937
1932
1938 targetsize = None
1933 targetsize = None
1939 rawsize = self.index[rev][2]
1934 rawsize = self.index[rev][2]
1940 if 0 <= rawsize:
1935 if 0 <= rawsize:
1941 targetsize = 4 * rawsize
1936 targetsize = 4 * rawsize
1942
1937
1943 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1944 if basetext is None:
1939 if basetext is None:
1945 basetext = bytes(bins[0])
1940 basetext = bytes(bins[0])
1946 bins = bins[1:]
1941 bins = bins[1:]
1947
1942
1948 rawtext = mdiff.patches(basetext, bins)
1943 rawtext = mdiff.patches(basetext, bins)
1949 del basetext # let us have a chance to free memory early
1944 del basetext # let us have a chance to free memory early
1950 return (rev, rawtext, False)
1945 return (rev, rawtext, False)
1951
1946
1952 def rawdata(self, nodeorrev, _df=None):
1947 def rawdata(self, nodeorrev, _df=None):
1953 """return an uncompressed raw data of a given node or revision number.
1948 """return an uncompressed raw data of a given node or revision number.
1954
1949
1955 _df - an existing file handle to read from. (internal-only)
1950 _df - an existing file handle to read from. (internal-only)
1956 """
1951 """
1957 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1958
1953
1959 def hash(self, text, p1, p2):
1954 def hash(self, text, p1, p2):
1960 """Compute a node hash.
1955 """Compute a node hash.
1961
1956
1962 Available as a function so that subclasses can replace the hash
1957 Available as a function so that subclasses can replace the hash
1963 as needed.
1958 as needed.
1964 """
1959 """
1965 return storageutil.hashrevisionsha1(text, p1, p2)
1960 return storageutil.hashrevisionsha1(text, p1, p2)
1966
1961
1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1968 """Check node hash integrity.
1963 """Check node hash integrity.
1969
1964
1970 Available as a function so that subclasses can extend hash mismatch
1965 Available as a function so that subclasses can extend hash mismatch
1971 behaviors as needed.
1966 behaviors as needed.
1972 """
1967 """
1973 try:
1968 try:
1974 if p1 is None and p2 is None:
1969 if p1 is None and p2 is None:
1975 p1, p2 = self.parents(node)
1970 p1, p2 = self.parents(node)
1976 if node != self.hash(text, p1, p2):
1971 if node != self.hash(text, p1, p2):
1977 # Clear the revision cache on hash failure. The revision cache
1972 # Clear the revision cache on hash failure. The revision cache
1978 # only stores the raw revision and clearing the cache does have
1973 # only stores the raw revision and clearing the cache does have
1979 # the side-effect that we won't have a cache hit when the raw
1974 # the side-effect that we won't have a cache hit when the raw
1980 # revision data is accessed. But this case should be rare and
1975 # revision data is accessed. But this case should be rare and
1981 # it is extra work to teach the cache about the hash
1976 # it is extra work to teach the cache about the hash
1982 # verification state.
1977 # verification state.
1983 if self._revisioncache and self._revisioncache[0] == node:
1978 if self._revisioncache and self._revisioncache[0] == node:
1984 self._revisioncache = None
1979 self._revisioncache = None
1985
1980
1986 revornode = rev
1981 revornode = rev
1987 if revornode is None:
1982 if revornode is None:
1988 revornode = templatefilters.short(hex(node))
1983 revornode = templatefilters.short(hex(node))
1989 raise error.RevlogError(
1984 raise error.RevlogError(
1990 _(b"integrity check failed on %s:%s")
1985 _(b"integrity check failed on %s:%s")
1991 % (self.indexfile, pycompat.bytestr(revornode))
1986 % (self.indexfile, pycompat.bytestr(revornode))
1992 )
1987 )
1993 except error.RevlogError:
1988 except error.RevlogError:
1994 if self._censorable and storageutil.iscensoredtext(text):
1989 if self._censorable and storageutil.iscensoredtext(text):
1995 raise error.CensoredNodeError(self.indexfile, node, text)
1990 raise error.CensoredNodeError(self.indexfile, node, text)
1996 raise
1991 raise
1997
1992
1998 def _enforceinlinesize(self, tr, fp=None):
1993 def _enforceinlinesize(self, tr, fp=None):
1999 """Check if the revlog is too big for inline and convert if so.
1994 """Check if the revlog is too big for inline and convert if so.
2000
1995
2001 This should be called after revisions are added to the revlog. If the
1996 This should be called after revisions are added to the revlog. If the
2002 revlog has grown too large to be an inline revlog, it will convert it
1997 revlog has grown too large to be an inline revlog, it will convert it
2003 to use multiple index and data files.
1998 to use multiple index and data files.
2004 """
1999 """
2005 tiprev = len(self) - 1
2000 tiprev = len(self) - 1
2006 if (
2001 if (
2007 not self._inline
2002 not self._inline
2008 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2009 ):
2004 ):
2010 return
2005 return
2011
2006
2012 troffset = tr.findoffset(self.indexfile)
2007 troffset = tr.findoffset(self.indexfile)
2013 if troffset is None:
2008 if troffset is None:
2014 raise error.RevlogError(
2009 raise error.RevlogError(
2015 _(b"%s not found in the transaction") % self.indexfile
2010 _(b"%s not found in the transaction") % self.indexfile
2016 )
2011 )
2017 trindex = 0
2012 trindex = 0
2018 tr.add(self.datafile, 0)
2013 tr.add(self.datafile, 0)
2019
2014
2020 if fp:
2015 if fp:
2021 fp.flush()
2016 fp.flush()
2022 fp.close()
2017 fp.close()
2023 # We can't use the cached file handle after close(). So prevent
2018 # We can't use the cached file handle after close(). So prevent
2024 # its usage.
2019 # its usage.
2025 self._writinghandles = None
2020 self._writinghandles = None
2026
2021
2027 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2028 for r in self:
2023 for r in self:
2029 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2030 if troffset <= self.start(r):
2025 if troffset <= self.start(r):
2031 trindex = r
2026 trindex = r
2032
2027
2033 with self._indexfp(b'w') as fp:
2028 with self._indexfp(b'w') as fp:
2034 self.version &= ~FLAG_INLINE_DATA
2029 self.version &= ~FLAG_INLINE_DATA
2035 self._inline = False
2030 self._inline = False
2036 io = self._io
2031 io = self._io
2037 for i in self:
2032 for i in self:
2038 e = io.packentry(self.index[i], self.node, self.version, i)
2033 e = io.packentry(self.index[i], self.node, self.version, i)
2039 fp.write(e)
2034 fp.write(e)
2040
2035
2041 # the temp file replace the real index when we exit the context
2036 # the temp file replace the real index when we exit the context
2042 # manager
2037 # manager
2043
2038
2044 tr.replace(self.indexfile, trindex * self._io.size)
2039 tr.replace(self.indexfile, trindex * self._io.size)
2045 nodemaputil.setup_persistent_nodemap(tr, self)
2040 nodemaputil.setup_persistent_nodemap(tr, self)
2046 self._chunkclear()
2041 self._chunkclear()
2047
2042
2048 def _nodeduplicatecallback(self, transaction, node):
2043 def _nodeduplicatecallback(self, transaction, node):
2049 """called when trying to add a node already stored."""
2044 """called when trying to add a node already stored."""
2050
2045
2051 def addrevision(
2046 def addrevision(
2052 self,
2047 self,
2053 text,
2048 text,
2054 transaction,
2049 transaction,
2055 link,
2050 link,
2056 p1,
2051 p1,
2057 p2,
2052 p2,
2058 cachedelta=None,
2053 cachedelta=None,
2059 node=None,
2054 node=None,
2060 flags=REVIDX_DEFAULT_FLAGS,
2055 flags=REVIDX_DEFAULT_FLAGS,
2061 deltacomputer=None,
2056 deltacomputer=None,
2062 sidedata=None,
2057 sidedata=None,
2063 ):
2058 ):
2064 """add a revision to the log
2059 """add a revision to the log
2065
2060
2066 text - the revision data to add
2061 text - the revision data to add
2067 transaction - the transaction object used for rollback
2062 transaction - the transaction object used for rollback
2068 link - the linkrev data to add
2063 link - the linkrev data to add
2069 p1, p2 - the parent nodeids of the revision
2064 p1, p2 - the parent nodeids of the revision
2070 cachedelta - an optional precomputed delta
2065 cachedelta - an optional precomputed delta
2071 node - nodeid of revision; typically node is not specified, and it is
2066 node - nodeid of revision; typically node is not specified, and it is
2072 computed by default as hash(text, p1, p2), however subclasses might
2067 computed by default as hash(text, p1, p2), however subclasses might
2073 use different hashing method (and override checkhash() in such case)
2068 use different hashing method (and override checkhash() in such case)
2074 flags - the known flags to set on the revision
2069 flags - the known flags to set on the revision
2075 deltacomputer - an optional deltacomputer instance shared between
2070 deltacomputer - an optional deltacomputer instance shared between
2076 multiple calls
2071 multiple calls
2077 """
2072 """
2078 if link == nullrev:
2073 if link == nullrev:
2079 raise error.RevlogError(
2074 raise error.RevlogError(
2080 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2081 )
2076 )
2082
2077
2083 if sidedata is None:
2078 if sidedata is None:
2084 sidedata = {}
2079 sidedata = {}
2085 flags = flags & ~REVIDX_SIDEDATA
2080 flags = flags & ~REVIDX_SIDEDATA
2086 elif not self.hassidedata:
2081 elif not self.hassidedata:
2087 raise error.ProgrammingError(
2082 raise error.ProgrammingError(
2088 _(b"trying to add sidedata to a revlog who don't support them")
2083 _(b"trying to add sidedata to a revlog who don't support them")
2089 )
2084 )
2090 else:
2085 else:
2091 flags |= REVIDX_SIDEDATA
2086 flags |= REVIDX_SIDEDATA
2092
2087
2093 if flags:
2088 if flags:
2094 node = node or self.hash(text, p1, p2)
2089 node = node or self.hash(text, p1, p2)
2095
2090
2096 rawtext, validatehash = flagutil.processflagswrite(
2091 rawtext, validatehash = flagutil.processflagswrite(
2097 self, text, flags, sidedata=sidedata
2092 self, text, flags, sidedata=sidedata
2098 )
2093 )
2099
2094
2100 # If the flag processor modifies the revision data, ignore any provided
2095 # If the flag processor modifies the revision data, ignore any provided
2101 # cachedelta.
2096 # cachedelta.
2102 if rawtext != text:
2097 if rawtext != text:
2103 cachedelta = None
2098 cachedelta = None
2104
2099
2105 if len(rawtext) > _maxentrysize:
2100 if len(rawtext) > _maxentrysize:
2106 raise error.RevlogError(
2101 raise error.RevlogError(
2107 _(
2102 _(
2108 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2109 )
2104 )
2110 % (self.indexfile, len(rawtext))
2105 % (self.indexfile, len(rawtext))
2111 )
2106 )
2112
2107
2113 node = node or self.hash(rawtext, p1, p2)
2108 node = node or self.hash(rawtext, p1, p2)
2114 if self.index.has_node(node):
2109 if self.index.has_node(node):
2115 return node
2110 return node
2116
2111
2117 if validatehash:
2112 if validatehash:
2118 self.checkhash(rawtext, node, p1=p1, p2=p2)
2113 self.checkhash(rawtext, node, p1=p1, p2=p2)
2119
2114
2120 return self.addrawrevision(
2115 return self.addrawrevision(
2121 rawtext,
2116 rawtext,
2122 transaction,
2117 transaction,
2123 link,
2118 link,
2124 p1,
2119 p1,
2125 p2,
2120 p2,
2126 node,
2121 node,
2127 flags,
2122 flags,
2128 cachedelta=cachedelta,
2123 cachedelta=cachedelta,
2129 deltacomputer=deltacomputer,
2124 deltacomputer=deltacomputer,
2130 )
2125 )
2131
2126
2132 def addrawrevision(
2127 def addrawrevision(
2133 self,
2128 self,
2134 rawtext,
2129 rawtext,
2135 transaction,
2130 transaction,
2136 link,
2131 link,
2137 p1,
2132 p1,
2138 p2,
2133 p2,
2139 node,
2134 node,
2140 flags,
2135 flags,
2141 cachedelta=None,
2136 cachedelta=None,
2142 deltacomputer=None,
2137 deltacomputer=None,
2143 ):
2138 ):
2144 """add a raw revision with known flags, node and parents
2139 """add a raw revision with known flags, node and parents
2145 useful when reusing a revision not stored in this revlog (ex: received
2140 useful when reusing a revision not stored in this revlog (ex: received
2146 over wire, or read from an external bundle).
2141 over wire, or read from an external bundle).
2147 """
2142 """
2148 dfh = None
2143 dfh = None
2149 if not self._inline:
2144 if not self._inline:
2150 dfh = self._datafp(b"a+")
2145 dfh = self._datafp(b"a+")
2151 ifh = self._indexfp(b"a+")
2146 ifh = self._indexfp(b"a+")
2152 try:
2147 try:
2153 return self._addrevision(
2148 return self._addrevision(
2154 node,
2149 node,
2155 rawtext,
2150 rawtext,
2156 transaction,
2151 transaction,
2157 link,
2152 link,
2158 p1,
2153 p1,
2159 p2,
2154 p2,
2160 flags,
2155 flags,
2161 cachedelta,
2156 cachedelta,
2162 ifh,
2157 ifh,
2163 dfh,
2158 dfh,
2164 deltacomputer=deltacomputer,
2159 deltacomputer=deltacomputer,
2165 )
2160 )
2166 finally:
2161 finally:
2167 if dfh:
2162 if dfh:
2168 dfh.close()
2163 dfh.close()
2169 ifh.close()
2164 ifh.close()
2170
2165
2171 def compress(self, data):
2166 def compress(self, data):
2172 """Generate a possibly-compressed representation of data."""
2167 """Generate a possibly-compressed representation of data."""
2173 if not data:
2168 if not data:
2174 return b'', data
2169 return b'', data
2175
2170
2176 compressed = self._compressor.compress(data)
2171 compressed = self._compressor.compress(data)
2177
2172
2178 if compressed:
2173 if compressed:
2179 # The revlog compressor added the header in the returned data.
2174 # The revlog compressor added the header in the returned data.
2180 return b'', compressed
2175 return b'', compressed
2181
2176
2182 if data[0:1] == b'\0':
2177 if data[0:1] == b'\0':
2183 return b'', data
2178 return b'', data
2184 return b'u', data
2179 return b'u', data
2185
2180
2186 def decompress(self, data):
2181 def decompress(self, data):
2187 """Decompress a revlog chunk.
2182 """Decompress a revlog chunk.
2188
2183
2189 The chunk is expected to begin with a header identifying the
2184 The chunk is expected to begin with a header identifying the
2190 format type so it can be routed to an appropriate decompressor.
2185 format type so it can be routed to an appropriate decompressor.
2191 """
2186 """
2192 if not data:
2187 if not data:
2193 return data
2188 return data
2194
2189
2195 # Revlogs are read much more frequently than they are written and many
2190 # Revlogs are read much more frequently than they are written and many
2196 # chunks only take microseconds to decompress, so performance is
2191 # chunks only take microseconds to decompress, so performance is
2197 # important here.
2192 # important here.
2198 #
2193 #
2199 # We can make a few assumptions about revlogs:
2194 # We can make a few assumptions about revlogs:
2200 #
2195 #
2201 # 1) the majority of chunks will be compressed (as opposed to inline
2196 # 1) the majority of chunks will be compressed (as opposed to inline
2202 # raw data).
2197 # raw data).
2203 # 2) decompressing *any* data will likely by at least 10x slower than
2198 # 2) decompressing *any* data will likely by at least 10x slower than
2204 # returning raw inline data.
2199 # returning raw inline data.
2205 # 3) we want to prioritize common and officially supported compression
2200 # 3) we want to prioritize common and officially supported compression
2206 # engines
2201 # engines
2207 #
2202 #
2208 # It follows that we want to optimize for "decompress compressed data
2203 # It follows that we want to optimize for "decompress compressed data
2209 # when encoded with common and officially supported compression engines"
2204 # when encoded with common and officially supported compression engines"
2210 # case over "raw data" and "data encoded by less common or non-official
2205 # case over "raw data" and "data encoded by less common or non-official
2211 # compression engines." That is why we have the inline lookup first
2206 # compression engines." That is why we have the inline lookup first
2212 # followed by the compengines lookup.
2207 # followed by the compengines lookup.
2213 #
2208 #
2214 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2209 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2215 # compressed chunks. And this matters for changelog and manifest reads.
2210 # compressed chunks. And this matters for changelog and manifest reads.
2216 t = data[0:1]
2211 t = data[0:1]
2217
2212
2218 if t == b'x':
2213 if t == b'x':
2219 try:
2214 try:
2220 return _zlibdecompress(data)
2215 return _zlibdecompress(data)
2221 except zlib.error as e:
2216 except zlib.error as e:
2222 raise error.RevlogError(
2217 raise error.RevlogError(
2223 _(b'revlog decompress error: %s')
2218 _(b'revlog decompress error: %s')
2224 % stringutil.forcebytestr(e)
2219 % stringutil.forcebytestr(e)
2225 )
2220 )
2226 # '\0' is more common than 'u' so it goes first.
2221 # '\0' is more common than 'u' so it goes first.
2227 elif t == b'\0':
2222 elif t == b'\0':
2228 return data
2223 return data
2229 elif t == b'u':
2224 elif t == b'u':
2230 return util.buffer(data, 1)
2225 return util.buffer(data, 1)
2231
2226
2232 try:
2227 try:
2233 compressor = self._decompressors[t]
2228 compressor = self._decompressors[t]
2234 except KeyError:
2229 except KeyError:
2235 try:
2230 try:
2236 engine = util.compengines.forrevlogheader(t)
2231 engine = util.compengines.forrevlogheader(t)
2237 compressor = engine.revlogcompressor(self._compengineopts)
2232 compressor = engine.revlogcompressor(self._compengineopts)
2238 self._decompressors[t] = compressor
2233 self._decompressors[t] = compressor
2239 except KeyError:
2234 except KeyError:
2240 raise error.RevlogError(_(b'unknown compression type %r') % t)
2235 raise error.RevlogError(_(b'unknown compression type %r') % t)
2241
2236
2242 return compressor.decompress(data)
2237 return compressor.decompress(data)
2243
2238
2244 def _addrevision(
2239 def _addrevision(
2245 self,
2240 self,
2246 node,
2241 node,
2247 rawtext,
2242 rawtext,
2248 transaction,
2243 transaction,
2249 link,
2244 link,
2250 p1,
2245 p1,
2251 p2,
2246 p2,
2252 flags,
2247 flags,
2253 cachedelta,
2248 cachedelta,
2254 ifh,
2249 ifh,
2255 dfh,
2250 dfh,
2256 alwayscache=False,
2251 alwayscache=False,
2257 deltacomputer=None,
2252 deltacomputer=None,
2258 ):
2253 ):
2259 """internal function to add revisions to the log
2254 """internal function to add revisions to the log
2260
2255
2261 see addrevision for argument descriptions.
2256 see addrevision for argument descriptions.
2262
2257
2263 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2258 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2264
2259
2265 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2260 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2266 be used.
2261 be used.
2267
2262
2268 invariants:
2263 invariants:
2269 - rawtext is optional (can be None); if not set, cachedelta must be set.
2264 - rawtext is optional (can be None); if not set, cachedelta must be set.
2270 if both are set, they must correspond to each other.
2265 if both are set, they must correspond to each other.
2271 """
2266 """
2272 if node == nullid:
2267 if node == nullid:
2273 raise error.RevlogError(
2268 raise error.RevlogError(
2274 _(b"%s: attempt to add null revision") % self.indexfile
2269 _(b"%s: attempt to add null revision") % self.indexfile
2275 )
2270 )
2276 if node == wdirid or node in wdirfilenodeids:
2271 if node == wdirid or node in wdirfilenodeids:
2277 raise error.RevlogError(
2272 raise error.RevlogError(
2278 _(b"%s: attempt to add wdir revision") % self.indexfile
2273 _(b"%s: attempt to add wdir revision") % self.indexfile
2279 )
2274 )
2280
2275
2281 if self._inline:
2276 if self._inline:
2282 fh = ifh
2277 fh = ifh
2283 else:
2278 else:
2284 fh = dfh
2279 fh = dfh
2285
2280
2286 btext = [rawtext]
2281 btext = [rawtext]
2287
2282
2288 curr = len(self)
2283 curr = len(self)
2289 prev = curr - 1
2284 prev = curr - 1
2290 offset = self.end(prev)
2285 offset = self.end(prev)
2291 p1r, p2r = self.rev(p1), self.rev(p2)
2286 p1r, p2r = self.rev(p1), self.rev(p2)
2292
2287
2293 # full versions are inserted when the needed deltas
2288 # full versions are inserted when the needed deltas
2294 # become comparable to the uncompressed text
2289 # become comparable to the uncompressed text
2295 if rawtext is None:
2290 if rawtext is None:
2296 # need rawtext size, before changed by flag processors, which is
2291 # need rawtext size, before changed by flag processors, which is
2297 # the non-raw size. use revlog explicitly to avoid filelog's extra
2292 # the non-raw size. use revlog explicitly to avoid filelog's extra
2298 # logic that might remove metadata size.
2293 # logic that might remove metadata size.
2299 textlen = mdiff.patchedsize(
2294 textlen = mdiff.patchedsize(
2300 revlog.size(self, cachedelta[0]), cachedelta[1]
2295 revlog.size(self, cachedelta[0]), cachedelta[1]
2301 )
2296 )
2302 else:
2297 else:
2303 textlen = len(rawtext)
2298 textlen = len(rawtext)
2304
2299
2305 if deltacomputer is None:
2300 if deltacomputer is None:
2306 deltacomputer = deltautil.deltacomputer(self)
2301 deltacomputer = deltautil.deltacomputer(self)
2307
2302
2308 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2303 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2309
2304
2310 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2305 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2311
2306
2312 e = (
2307 e = (
2313 offset_type(offset, flags),
2308 offset_type(offset, flags),
2314 deltainfo.deltalen,
2309 deltainfo.deltalen,
2315 textlen,
2310 textlen,
2316 deltainfo.base,
2311 deltainfo.base,
2317 link,
2312 link,
2318 p1r,
2313 p1r,
2319 p2r,
2314 p2r,
2320 node,
2315 node,
2321 )
2316 )
2322 self.index.append(e)
2317 self.index.append(e)
2323
2318
2324 entry = self._io.packentry(e, self.node, self.version, curr)
2319 entry = self._io.packentry(e, self.node, self.version, curr)
2325 self._writeentry(
2320 self._writeentry(
2326 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2321 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2327 )
2322 )
2328
2323
2329 rawtext = btext[0]
2324 rawtext = btext[0]
2330
2325
2331 if alwayscache and rawtext is None:
2326 if alwayscache and rawtext is None:
2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2327 rawtext = deltacomputer.buildtext(revinfo, fh)
2333
2328
2334 if type(rawtext) == bytes: # only accept immutable objects
2329 if type(rawtext) == bytes: # only accept immutable objects
2335 self._revisioncache = (node, curr, rawtext)
2330 self._revisioncache = (node, curr, rawtext)
2336 self._chainbasecache[curr] = deltainfo.chainbase
2331 self._chainbasecache[curr] = deltainfo.chainbase
2337 return node
2332 return node
2338
2333
2339 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2334 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2340 # Files opened in a+ mode have inconsistent behavior on various
2335 # Files opened in a+ mode have inconsistent behavior on various
2341 # platforms. Windows requires that a file positioning call be made
2336 # platforms. Windows requires that a file positioning call be made
2342 # when the file handle transitions between reads and writes. See
2337 # when the file handle transitions between reads and writes. See
2343 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2338 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2344 # platforms, Python or the platform itself can be buggy. Some versions
2339 # platforms, Python or the platform itself can be buggy. Some versions
2345 # of Solaris have been observed to not append at the end of the file
2340 # of Solaris have been observed to not append at the end of the file
2346 # if the file was seeked to before the end. See issue4943 for more.
2341 # if the file was seeked to before the end. See issue4943 for more.
2347 #
2342 #
2348 # We work around this issue by inserting a seek() before writing.
2343 # We work around this issue by inserting a seek() before writing.
2349 # Note: This is likely not necessary on Python 3. However, because
2344 # Note: This is likely not necessary on Python 3. However, because
2350 # the file handle is reused for reads and may be seeked there, we need
2345 # the file handle is reused for reads and may be seeked there, we need
2351 # to be careful before changing this.
2346 # to be careful before changing this.
2352 ifh.seek(0, os.SEEK_END)
2347 ifh.seek(0, os.SEEK_END)
2353 if dfh:
2348 if dfh:
2354 dfh.seek(0, os.SEEK_END)
2349 dfh.seek(0, os.SEEK_END)
2355
2350
2356 curr = len(self) - 1
2351 curr = len(self) - 1
2357 if not self._inline:
2352 if not self._inline:
2358 transaction.add(self.datafile, offset)
2353 transaction.add(self.datafile, offset)
2359 transaction.add(self.indexfile, curr * len(entry))
2354 transaction.add(self.indexfile, curr * len(entry))
2360 if data[0]:
2355 if data[0]:
2361 dfh.write(data[0])
2356 dfh.write(data[0])
2362 dfh.write(data[1])
2357 dfh.write(data[1])
2363 ifh.write(entry)
2358 ifh.write(entry)
2364 else:
2359 else:
2365 offset += curr * self._io.size
2360 offset += curr * self._io.size
2366 transaction.add(self.indexfile, offset)
2361 transaction.add(self.indexfile, offset)
2367 ifh.write(entry)
2362 ifh.write(entry)
2368 ifh.write(data[0])
2363 ifh.write(data[0])
2369 ifh.write(data[1])
2364 ifh.write(data[1])
2370 self._enforceinlinesize(transaction, ifh)
2365 self._enforceinlinesize(transaction, ifh)
2371 nodemaputil.setup_persistent_nodemap(transaction, self)
2366 nodemaputil.setup_persistent_nodemap(transaction, self)
2372
2367
2373 def addgroup(
2368 def addgroup(
2374 self,
2369 self,
2375 deltas,
2370 deltas,
2376 linkmapper,
2371 linkmapper,
2377 transaction,
2372 transaction,
2378 alwayscache=False,
2373 alwayscache=False,
2379 addrevisioncb=None,
2374 addrevisioncb=None,
2380 duplicaterevisioncb=None,
2375 duplicaterevisioncb=None,
2381 ):
2376 ):
2382 """
2377 """
2383 add a delta group
2378 add a delta group
2384
2379
2385 given a set of deltas, add them to the revision log. the
2380 given a set of deltas, add them to the revision log. the
2386 first delta is against its parent, which should be in our
2381 first delta is against its parent, which should be in our
2387 log, the rest are against the previous delta.
2382 log, the rest are against the previous delta.
2388
2383
2389 If ``addrevisioncb`` is defined, it will be called with arguments of
2384 If ``addrevisioncb`` is defined, it will be called with arguments of
2390 this revlog and the node that was added.
2385 this revlog and the node that was added.
2391 """
2386 """
2392
2387
2393 if self._writinghandles:
2388 if self._writinghandles:
2394 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2389 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2395
2390
2396 r = len(self)
2391 r = len(self)
2397 end = 0
2392 end = 0
2398 if r:
2393 if r:
2399 end = self.end(r - 1)
2394 end = self.end(r - 1)
2400 ifh = self._indexfp(b"a+")
2395 ifh = self._indexfp(b"a+")
2401 isize = r * self._io.size
2396 isize = r * self._io.size
2402 if self._inline:
2397 if self._inline:
2403 transaction.add(self.indexfile, end + isize)
2398 transaction.add(self.indexfile, end + isize)
2404 dfh = None
2399 dfh = None
2405 else:
2400 else:
2406 transaction.add(self.indexfile, isize)
2401 transaction.add(self.indexfile, isize)
2407 transaction.add(self.datafile, end)
2402 transaction.add(self.datafile, end)
2408 dfh = self._datafp(b"a+")
2403 dfh = self._datafp(b"a+")
2409
2404
2410 def flush():
2405 def flush():
2411 if dfh:
2406 if dfh:
2412 dfh.flush()
2407 dfh.flush()
2413 ifh.flush()
2408 ifh.flush()
2414
2409
2415 self._writinghandles = (ifh, dfh)
2410 self._writinghandles = (ifh, dfh)
2416 empty = True
2411 empty = True
2417
2412
2418 try:
2413 try:
2419 deltacomputer = deltautil.deltacomputer(self)
2414 deltacomputer = deltautil.deltacomputer(self)
2420 # loop through our set of deltas
2415 # loop through our set of deltas
2421 for data in deltas:
2416 for data in deltas:
2422 node, p1, p2, linknode, deltabase, delta, flags = data
2417 node, p1, p2, linknode, deltabase, delta, flags = data
2423 link = linkmapper(linknode)
2418 link = linkmapper(linknode)
2424 flags = flags or REVIDX_DEFAULT_FLAGS
2419 flags = flags or REVIDX_DEFAULT_FLAGS
2425
2420
2426 if self.index.has_node(node):
2421 if self.index.has_node(node):
2427 # this can happen if two branches make the same change
2422 # this can happen if two branches make the same change
2428 self._nodeduplicatecallback(transaction, node)
2423 self._nodeduplicatecallback(transaction, node)
2429 if duplicaterevisioncb:
2424 if duplicaterevisioncb:
2430 duplicaterevisioncb(self, node)
2425 duplicaterevisioncb(self, node)
2431 empty = False
2426 empty = False
2432 continue
2427 continue
2433
2428
2434 for p in (p1, p2):
2429 for p in (p1, p2):
2435 if not self.index.has_node(p):
2430 if not self.index.has_node(p):
2436 raise error.LookupError(
2431 raise error.LookupError(
2437 p, self.indexfile, _(b'unknown parent')
2432 p, self.indexfile, _(b'unknown parent')
2438 )
2433 )
2439
2434
2440 if not self.index.has_node(deltabase):
2435 if not self.index.has_node(deltabase):
2441 raise error.LookupError(
2436 raise error.LookupError(
2442 deltabase, self.indexfile, _(b'unknown delta base')
2437 deltabase, self.indexfile, _(b'unknown delta base')
2443 )
2438 )
2444
2439
2445 baserev = self.rev(deltabase)
2440 baserev = self.rev(deltabase)
2446
2441
2447 if baserev != nullrev and self.iscensored(baserev):
2442 if baserev != nullrev and self.iscensored(baserev):
2448 # if base is censored, delta must be full replacement in a
2443 # if base is censored, delta must be full replacement in a
2449 # single patch operation
2444 # single patch operation
2450 hlen = struct.calcsize(b">lll")
2445 hlen = struct.calcsize(b">lll")
2451 oldlen = self.rawsize(baserev)
2446 oldlen = self.rawsize(baserev)
2452 newlen = len(delta) - hlen
2447 newlen = len(delta) - hlen
2453 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2448 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2454 raise error.CensoredBaseError(
2449 raise error.CensoredBaseError(
2455 self.indexfile, self.node(baserev)
2450 self.indexfile, self.node(baserev)
2456 )
2451 )
2457
2452
2458 if not flags and self._peek_iscensored(baserev, delta, flush):
2453 if not flags and self._peek_iscensored(baserev, delta, flush):
2459 flags |= REVIDX_ISCENSORED
2454 flags |= REVIDX_ISCENSORED
2460
2455
2461 # We assume consumers of addrevisioncb will want to retrieve
2456 # We assume consumers of addrevisioncb will want to retrieve
2462 # the added revision, which will require a call to
2457 # the added revision, which will require a call to
2463 # revision(). revision() will fast path if there is a cache
2458 # revision(). revision() will fast path if there is a cache
2464 # hit. So, we tell _addrevision() to always cache in this case.
2459 # hit. So, we tell _addrevision() to always cache in this case.
2465 # We're only using addgroup() in the context of changegroup
2460 # We're only using addgroup() in the context of changegroup
2466 # generation so the revision data can always be handled as raw
2461 # generation so the revision data can always be handled as raw
2467 # by the flagprocessor.
2462 # by the flagprocessor.
2468 self._addrevision(
2463 self._addrevision(
2469 node,
2464 node,
2470 None,
2465 None,
2471 transaction,
2466 transaction,
2472 link,
2467 link,
2473 p1,
2468 p1,
2474 p2,
2469 p2,
2475 flags,
2470 flags,
2476 (baserev, delta),
2471 (baserev, delta),
2477 ifh,
2472 ifh,
2478 dfh,
2473 dfh,
2479 alwayscache=alwayscache,
2474 alwayscache=alwayscache,
2480 deltacomputer=deltacomputer,
2475 deltacomputer=deltacomputer,
2481 )
2476 )
2482
2477
2483 if addrevisioncb:
2478 if addrevisioncb:
2484 addrevisioncb(self, node)
2479 addrevisioncb(self, node)
2485 empty = False
2480 empty = False
2486
2481
2487 if not dfh and not self._inline:
2482 if not dfh and not self._inline:
2488 # addrevision switched from inline to conventional
2483 # addrevision switched from inline to conventional
2489 # reopen the index
2484 # reopen the index
2490 ifh.close()
2485 ifh.close()
2491 dfh = self._datafp(b"a+")
2486 dfh = self._datafp(b"a+")
2492 ifh = self._indexfp(b"a+")
2487 ifh = self._indexfp(b"a+")
2493 self._writinghandles = (ifh, dfh)
2488 self._writinghandles = (ifh, dfh)
2494 finally:
2489 finally:
2495 self._writinghandles = None
2490 self._writinghandles = None
2496
2491
2497 if dfh:
2492 if dfh:
2498 dfh.close()
2493 dfh.close()
2499 ifh.close()
2494 ifh.close()
2500 return not empty
2495 return not empty
2501
2496
2502 def iscensored(self, rev):
2497 def iscensored(self, rev):
2503 """Check if a file revision is censored."""
2498 """Check if a file revision is censored."""
2504 if not self._censorable:
2499 if not self._censorable:
2505 return False
2500 return False
2506
2501
2507 return self.flags(rev) & REVIDX_ISCENSORED
2502 return self.flags(rev) & REVIDX_ISCENSORED
2508
2503
2509 def _peek_iscensored(self, baserev, delta, flush):
2504 def _peek_iscensored(self, baserev, delta, flush):
2510 """Quickly check if a delta produces a censored revision."""
2505 """Quickly check if a delta produces a censored revision."""
2511 if not self._censorable:
2506 if not self._censorable:
2512 return False
2507 return False
2513
2508
2514 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2509 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2515
2510
2516 def getstrippoint(self, minlink):
2511 def getstrippoint(self, minlink):
2517 """find the minimum rev that must be stripped to strip the linkrev
2512 """find the minimum rev that must be stripped to strip the linkrev
2518
2513
2519 Returns a tuple containing the minimum rev and a set of all revs that
2514 Returns a tuple containing the minimum rev and a set of all revs that
2520 have linkrevs that will be broken by this strip.
2515 have linkrevs that will be broken by this strip.
2521 """
2516 """
2522 return storageutil.resolvestripinfo(
2517 return storageutil.resolvestripinfo(
2523 minlink,
2518 minlink,
2524 len(self) - 1,
2519 len(self) - 1,
2525 self.headrevs(),
2520 self.headrevs(),
2526 self.linkrev,
2521 self.linkrev,
2527 self.parentrevs,
2522 self.parentrevs,
2528 )
2523 )
2529
2524
2530 def strip(self, minlink, transaction):
2525 def strip(self, minlink, transaction):
2531 """truncate the revlog on the first revision with a linkrev >= minlink
2526 """truncate the revlog on the first revision with a linkrev >= minlink
2532
2527
2533 This function is called when we're stripping revision minlink and
2528 This function is called when we're stripping revision minlink and
2534 its descendants from the repository.
2529 its descendants from the repository.
2535
2530
2536 We have to remove all revisions with linkrev >= minlink, because
2531 We have to remove all revisions with linkrev >= minlink, because
2537 the equivalent changelog revisions will be renumbered after the
2532 the equivalent changelog revisions will be renumbered after the
2538 strip.
2533 strip.
2539
2534
2540 So we truncate the revlog on the first of these revisions, and
2535 So we truncate the revlog on the first of these revisions, and
2541 trust that the caller has saved the revisions that shouldn't be
2536 trust that the caller has saved the revisions that shouldn't be
2542 removed and that it'll re-add them after this truncation.
2537 removed and that it'll re-add them after this truncation.
2543 """
2538 """
2544 if len(self) == 0:
2539 if len(self) == 0:
2545 return
2540 return
2546
2541
2547 rev, _ = self.getstrippoint(minlink)
2542 rev, _ = self.getstrippoint(minlink)
2548 if rev == len(self):
2543 if rev == len(self):
2549 return
2544 return
2550
2545
2551 # first truncate the files on disk
2546 # first truncate the files on disk
2552 end = self.start(rev)
2547 end = self.start(rev)
2553 if not self._inline:
2548 if not self._inline:
2554 transaction.add(self.datafile, end)
2549 transaction.add(self.datafile, end)
2555 end = rev * self._io.size
2550 end = rev * self._io.size
2556 else:
2551 else:
2557 end += rev * self._io.size
2552 end += rev * self._io.size
2558
2553
2559 transaction.add(self.indexfile, end)
2554 transaction.add(self.indexfile, end)
2560
2555
2561 # then reset internal state in memory to forget those revisions
2556 # then reset internal state in memory to forget those revisions
2562 self._revisioncache = None
2557 self._revisioncache = None
2563 self._chaininfocache = util.lrucachedict(500)
2558 self._chaininfocache = util.lrucachedict(500)
2564 self._chunkclear()
2559 self._chunkclear()
2565
2560
2566 del self.index[rev:-1]
2561 del self.index[rev:-1]
2567
2562
2568 def checksize(self):
2563 def checksize(self):
2569 """Check size of index and data files
2564 """Check size of index and data files
2570
2565
2571 return a (dd, di) tuple.
2566 return a (dd, di) tuple.
2572 - dd: extra bytes for the "data" file
2567 - dd: extra bytes for the "data" file
2573 - di: extra bytes for the "index" file
2568 - di: extra bytes for the "index" file
2574
2569
2575 A healthy revlog will return (0, 0).
2570 A healthy revlog will return (0, 0).
2576 """
2571 """
2577 expected = 0
2572 expected = 0
2578 if len(self):
2573 if len(self):
2579 expected = max(0, self.end(len(self) - 1))
2574 expected = max(0, self.end(len(self) - 1))
2580
2575
2581 try:
2576 try:
2582 with self._datafp() as f:
2577 with self._datafp() as f:
2583 f.seek(0, io.SEEK_END)
2578 f.seek(0, io.SEEK_END)
2584 actual = f.tell()
2579 actual = f.tell()
2585 dd = actual - expected
2580 dd = actual - expected
2586 except IOError as inst:
2581 except IOError as inst:
2587 if inst.errno != errno.ENOENT:
2582 if inst.errno != errno.ENOENT:
2588 raise
2583 raise
2589 dd = 0
2584 dd = 0
2590
2585
2591 try:
2586 try:
2592 f = self.opener(self.indexfile)
2587 f = self.opener(self.indexfile)
2593 f.seek(0, io.SEEK_END)
2588 f.seek(0, io.SEEK_END)
2594 actual = f.tell()
2589 actual = f.tell()
2595 f.close()
2590 f.close()
2596 s = self._io.size
2591 s = self._io.size
2597 i = max(0, actual // s)
2592 i = max(0, actual // s)
2598 di = actual - (i * s)
2593 di = actual - (i * s)
2599 if self._inline:
2594 if self._inline:
2600 databytes = 0
2595 databytes = 0
2601 for r in self:
2596 for r in self:
2602 databytes += max(0, self.length(r))
2597 databytes += max(0, self.length(r))
2603 dd = 0
2598 dd = 0
2604 di = actual - len(self) * s - databytes
2599 di = actual - len(self) * s - databytes
2605 except IOError as inst:
2600 except IOError as inst:
2606 if inst.errno != errno.ENOENT:
2601 if inst.errno != errno.ENOENT:
2607 raise
2602 raise
2608 di = 0
2603 di = 0
2609
2604
2610 return (dd, di)
2605 return (dd, di)
2611
2606
2612 def files(self):
2607 def files(self):
2613 res = [self.indexfile]
2608 res = [self.indexfile]
2614 if not self._inline:
2609 if not self._inline:
2615 res.append(self.datafile)
2610 res.append(self.datafile)
2616 return res
2611 return res
2617
2612
2618 def emitrevisions(
2613 def emitrevisions(
2619 self,
2614 self,
2620 nodes,
2615 nodes,
2621 nodesorder=None,
2616 nodesorder=None,
2622 revisiondata=False,
2617 revisiondata=False,
2623 assumehaveparentrevisions=False,
2618 assumehaveparentrevisions=False,
2624 deltamode=repository.CG_DELTAMODE_STD,
2619 deltamode=repository.CG_DELTAMODE_STD,
2625 ):
2620 ):
2626 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2621 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2627 raise error.ProgrammingError(
2622 raise error.ProgrammingError(
2628 b'unhandled value for nodesorder: %s' % nodesorder
2623 b'unhandled value for nodesorder: %s' % nodesorder
2629 )
2624 )
2630
2625
2631 if nodesorder is None and not self._generaldelta:
2626 if nodesorder is None and not self._generaldelta:
2632 nodesorder = b'storage'
2627 nodesorder = b'storage'
2633
2628
2634 if (
2629 if (
2635 not self._storedeltachains
2630 not self._storedeltachains
2636 and deltamode != repository.CG_DELTAMODE_PREV
2631 and deltamode != repository.CG_DELTAMODE_PREV
2637 ):
2632 ):
2638 deltamode = repository.CG_DELTAMODE_FULL
2633 deltamode = repository.CG_DELTAMODE_FULL
2639
2634
2640 return storageutil.emitrevisions(
2635 return storageutil.emitrevisions(
2641 self,
2636 self,
2642 nodes,
2637 nodes,
2643 nodesorder,
2638 nodesorder,
2644 revlogrevisiondelta,
2639 revlogrevisiondelta,
2645 deltaparentfn=self.deltaparent,
2640 deltaparentfn=self.deltaparent,
2646 candeltafn=self.candelta,
2641 candeltafn=self.candelta,
2647 rawsizefn=self.rawsize,
2642 rawsizefn=self.rawsize,
2648 revdifffn=self.revdiff,
2643 revdifffn=self.revdiff,
2649 flagsfn=self.flags,
2644 flagsfn=self.flags,
2650 deltamode=deltamode,
2645 deltamode=deltamode,
2651 revisiondata=revisiondata,
2646 revisiondata=revisiondata,
2652 assumehaveparentrevisions=assumehaveparentrevisions,
2647 assumehaveparentrevisions=assumehaveparentrevisions,
2653 )
2648 )
2654
2649
2655 DELTAREUSEALWAYS = b'always'
2650 DELTAREUSEALWAYS = b'always'
2656 DELTAREUSESAMEREVS = b'samerevs'
2651 DELTAREUSESAMEREVS = b'samerevs'
2657 DELTAREUSENEVER = b'never'
2652 DELTAREUSENEVER = b'never'
2658
2653
2659 DELTAREUSEFULLADD = b'fulladd'
2654 DELTAREUSEFULLADD = b'fulladd'
2660
2655
2661 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2656 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2662
2657
2663 def clone(
2658 def clone(
2664 self,
2659 self,
2665 tr,
2660 tr,
2666 destrevlog,
2661 destrevlog,
2667 addrevisioncb=None,
2662 addrevisioncb=None,
2668 deltareuse=DELTAREUSESAMEREVS,
2663 deltareuse=DELTAREUSESAMEREVS,
2669 forcedeltabothparents=None,
2664 forcedeltabothparents=None,
2670 sidedatacompanion=None,
2665 sidedatacompanion=None,
2671 ):
2666 ):
2672 """Copy this revlog to another, possibly with format changes.
2667 """Copy this revlog to another, possibly with format changes.
2673
2668
2674 The destination revlog will contain the same revisions and nodes.
2669 The destination revlog will contain the same revisions and nodes.
2675 However, it may not be bit-for-bit identical due to e.g. delta encoding
2670 However, it may not be bit-for-bit identical due to e.g. delta encoding
2676 differences.
2671 differences.
2677
2672
2678 The ``deltareuse`` argument control how deltas from the existing revlog
2673 The ``deltareuse`` argument control how deltas from the existing revlog
2679 are preserved in the destination revlog. The argument can have the
2674 are preserved in the destination revlog. The argument can have the
2680 following values:
2675 following values:
2681
2676
2682 DELTAREUSEALWAYS
2677 DELTAREUSEALWAYS
2683 Deltas will always be reused (if possible), even if the destination
2678 Deltas will always be reused (if possible), even if the destination
2684 revlog would not select the same revisions for the delta. This is the
2679 revlog would not select the same revisions for the delta. This is the
2685 fastest mode of operation.
2680 fastest mode of operation.
2686 DELTAREUSESAMEREVS
2681 DELTAREUSESAMEREVS
2687 Deltas will be reused if the destination revlog would pick the same
2682 Deltas will be reused if the destination revlog would pick the same
2688 revisions for the delta. This mode strikes a balance between speed
2683 revisions for the delta. This mode strikes a balance between speed
2689 and optimization.
2684 and optimization.
2690 DELTAREUSENEVER
2685 DELTAREUSENEVER
2691 Deltas will never be reused. This is the slowest mode of execution.
2686 Deltas will never be reused. This is the slowest mode of execution.
2692 This mode can be used to recompute deltas (e.g. if the diff/delta
2687 This mode can be used to recompute deltas (e.g. if the diff/delta
2693 algorithm changes).
2688 algorithm changes).
2694 DELTAREUSEFULLADD
2689 DELTAREUSEFULLADD
2695 Revision will be re-added as if their were new content. This is
2690 Revision will be re-added as if their were new content. This is
2696 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2691 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2697 eg: large file detection and handling.
2692 eg: large file detection and handling.
2698
2693
2699 Delta computation can be slow, so the choice of delta reuse policy can
2694 Delta computation can be slow, so the choice of delta reuse policy can
2700 significantly affect run time.
2695 significantly affect run time.
2701
2696
2702 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2697 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2703 two extremes. Deltas will be reused if they are appropriate. But if the
2698 two extremes. Deltas will be reused if they are appropriate. But if the
2704 delta could choose a better revision, it will do so. This means if you
2699 delta could choose a better revision, it will do so. This means if you
2705 are converting a non-generaldelta revlog to a generaldelta revlog,
2700 are converting a non-generaldelta revlog to a generaldelta revlog,
2706 deltas will be recomputed if the delta's parent isn't a parent of the
2701 deltas will be recomputed if the delta's parent isn't a parent of the
2707 revision.
2702 revision.
2708
2703
2709 In addition to the delta policy, the ``forcedeltabothparents``
2704 In addition to the delta policy, the ``forcedeltabothparents``
2710 argument controls whether to force compute deltas against both parents
2705 argument controls whether to force compute deltas against both parents
2711 for merges. By default, the current default is used.
2706 for merges. By default, the current default is used.
2712
2707
2713 If not None, the `sidedatacompanion` is callable that accept two
2708 If not None, the `sidedatacompanion` is callable that accept two
2714 arguments:
2709 arguments:
2715
2710
2716 (srcrevlog, rev)
2711 (srcrevlog, rev)
2717
2712
2718 and return a quintet that control changes to sidedata content from the
2713 and return a quintet that control changes to sidedata content from the
2719 old revision to the new clone result:
2714 old revision to the new clone result:
2720
2715
2721 (dropall, filterout, update, new_flags, dropped_flags)
2716 (dropall, filterout, update, new_flags, dropped_flags)
2722
2717
2723 * if `dropall` is True, all sidedata should be dropped
2718 * if `dropall` is True, all sidedata should be dropped
2724 * `filterout` is a set of sidedata keys that should be dropped
2719 * `filterout` is a set of sidedata keys that should be dropped
2725 * `update` is a mapping of additionnal/new key -> value
2720 * `update` is a mapping of additionnal/new key -> value
2726 * new_flags is a bitfields of new flags that the revision should get
2721 * new_flags is a bitfields of new flags that the revision should get
2727 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2722 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2728 """
2723 """
2729 if deltareuse not in self.DELTAREUSEALL:
2724 if deltareuse not in self.DELTAREUSEALL:
2730 raise ValueError(
2725 raise ValueError(
2731 _(b'value for deltareuse invalid: %s') % deltareuse
2726 _(b'value for deltareuse invalid: %s') % deltareuse
2732 )
2727 )
2733
2728
2734 if len(destrevlog):
2729 if len(destrevlog):
2735 raise ValueError(_(b'destination revlog is not empty'))
2730 raise ValueError(_(b'destination revlog is not empty'))
2736
2731
2737 if getattr(self, 'filteredrevs', None):
2732 if getattr(self, 'filteredrevs', None):
2738 raise ValueError(_(b'source revlog has filtered revisions'))
2733 raise ValueError(_(b'source revlog has filtered revisions'))
2739 if getattr(destrevlog, 'filteredrevs', None):
2734 if getattr(destrevlog, 'filteredrevs', None):
2740 raise ValueError(_(b'destination revlog has filtered revisions'))
2735 raise ValueError(_(b'destination revlog has filtered revisions'))
2741
2736
2742 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2737 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2743 # if possible.
2738 # if possible.
2744 oldlazydelta = destrevlog._lazydelta
2739 oldlazydelta = destrevlog._lazydelta
2745 oldlazydeltabase = destrevlog._lazydeltabase
2740 oldlazydeltabase = destrevlog._lazydeltabase
2746 oldamd = destrevlog._deltabothparents
2741 oldamd = destrevlog._deltabothparents
2747
2742
2748 try:
2743 try:
2749 if deltareuse == self.DELTAREUSEALWAYS:
2744 if deltareuse == self.DELTAREUSEALWAYS:
2750 destrevlog._lazydeltabase = True
2745 destrevlog._lazydeltabase = True
2751 destrevlog._lazydelta = True
2746 destrevlog._lazydelta = True
2752 elif deltareuse == self.DELTAREUSESAMEREVS:
2747 elif deltareuse == self.DELTAREUSESAMEREVS:
2753 destrevlog._lazydeltabase = False
2748 destrevlog._lazydeltabase = False
2754 destrevlog._lazydelta = True
2749 destrevlog._lazydelta = True
2755 elif deltareuse == self.DELTAREUSENEVER:
2750 elif deltareuse == self.DELTAREUSENEVER:
2756 destrevlog._lazydeltabase = False
2751 destrevlog._lazydeltabase = False
2757 destrevlog._lazydelta = False
2752 destrevlog._lazydelta = False
2758
2753
2759 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2754 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2760
2755
2761 self._clone(
2756 self._clone(
2762 tr,
2757 tr,
2763 destrevlog,
2758 destrevlog,
2764 addrevisioncb,
2759 addrevisioncb,
2765 deltareuse,
2760 deltareuse,
2766 forcedeltabothparents,
2761 forcedeltabothparents,
2767 sidedatacompanion,
2762 sidedatacompanion,
2768 )
2763 )
2769
2764
2770 finally:
2765 finally:
2771 destrevlog._lazydelta = oldlazydelta
2766 destrevlog._lazydelta = oldlazydelta
2772 destrevlog._lazydeltabase = oldlazydeltabase
2767 destrevlog._lazydeltabase = oldlazydeltabase
2773 destrevlog._deltabothparents = oldamd
2768 destrevlog._deltabothparents = oldamd
2774
2769
2775 def _clone(
2770 def _clone(
2776 self,
2771 self,
2777 tr,
2772 tr,
2778 destrevlog,
2773 destrevlog,
2779 addrevisioncb,
2774 addrevisioncb,
2780 deltareuse,
2775 deltareuse,
2781 forcedeltabothparents,
2776 forcedeltabothparents,
2782 sidedatacompanion,
2777 sidedatacompanion,
2783 ):
2778 ):
2784 """perform the core duty of `revlog.clone` after parameter processing"""
2779 """perform the core duty of `revlog.clone` after parameter processing"""
2785 deltacomputer = deltautil.deltacomputer(destrevlog)
2780 deltacomputer = deltautil.deltacomputer(destrevlog)
2786 index = self.index
2781 index = self.index
2787 for rev in self:
2782 for rev in self:
2788 entry = index[rev]
2783 entry = index[rev]
2789
2784
2790 # Some classes override linkrev to take filtered revs into
2785 # Some classes override linkrev to take filtered revs into
2791 # account. Use raw entry from index.
2786 # account. Use raw entry from index.
2792 flags = entry[0] & 0xFFFF
2787 flags = entry[0] & 0xFFFF
2793 linkrev = entry[4]
2788 linkrev = entry[4]
2794 p1 = index[entry[5]][7]
2789 p1 = index[entry[5]][7]
2795 p2 = index[entry[6]][7]
2790 p2 = index[entry[6]][7]
2796 node = entry[7]
2791 node = entry[7]
2797
2792
2798 sidedataactions = (False, [], {}, 0, 0)
2793 sidedataactions = (False, [], {}, 0, 0)
2799 if sidedatacompanion is not None:
2794 if sidedatacompanion is not None:
2800 sidedataactions = sidedatacompanion(self, rev)
2795 sidedataactions = sidedatacompanion(self, rev)
2801
2796
2802 # (Possibly) reuse the delta from the revlog if allowed and
2797 # (Possibly) reuse the delta from the revlog if allowed and
2803 # the revlog chunk is a delta.
2798 # the revlog chunk is a delta.
2804 cachedelta = None
2799 cachedelta = None
2805 rawtext = None
2800 rawtext = None
2806 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2801 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2807 dropall = sidedataactions[0]
2802 dropall = sidedataactions[0]
2808 filterout = sidedataactions[1]
2803 filterout = sidedataactions[1]
2809 update = sidedataactions[2]
2804 update = sidedataactions[2]
2810 new_flags = sidedataactions[3]
2805 new_flags = sidedataactions[3]
2811 dropped_flags = sidedataactions[4]
2806 dropped_flags = sidedataactions[4]
2812 text, sidedata = self._revisiondata(rev)
2807 text, sidedata = self._revisiondata(rev)
2813 if dropall:
2808 if dropall:
2814 sidedata = {}
2809 sidedata = {}
2815 for key in filterout:
2810 for key in filterout:
2816 sidedata.pop(key, None)
2811 sidedata.pop(key, None)
2817 sidedata.update(update)
2812 sidedata.update(update)
2818 if not sidedata:
2813 if not sidedata:
2819 sidedata = None
2814 sidedata = None
2820
2815
2821 flags |= new_flags
2816 flags |= new_flags
2822 flags &= ~dropped_flags
2817 flags &= ~dropped_flags
2823
2818
2824 destrevlog.addrevision(
2819 destrevlog.addrevision(
2825 text,
2820 text,
2826 tr,
2821 tr,
2827 linkrev,
2822 linkrev,
2828 p1,
2823 p1,
2829 p2,
2824 p2,
2830 cachedelta=cachedelta,
2825 cachedelta=cachedelta,
2831 node=node,
2826 node=node,
2832 flags=flags,
2827 flags=flags,
2833 deltacomputer=deltacomputer,
2828 deltacomputer=deltacomputer,
2834 sidedata=sidedata,
2829 sidedata=sidedata,
2835 )
2830 )
2836 else:
2831 else:
2837 if destrevlog._lazydelta:
2832 if destrevlog._lazydelta:
2838 dp = self.deltaparent(rev)
2833 dp = self.deltaparent(rev)
2839 if dp != nullrev:
2834 if dp != nullrev:
2840 cachedelta = (dp, bytes(self._chunk(rev)))
2835 cachedelta = (dp, bytes(self._chunk(rev)))
2841
2836
2842 if not cachedelta:
2837 if not cachedelta:
2843 rawtext = self.rawdata(rev)
2838 rawtext = self.rawdata(rev)
2844
2839
2845 ifh = destrevlog.opener(
2840 ifh = destrevlog.opener(
2846 destrevlog.indexfile, b'a+', checkambig=False
2841 destrevlog.indexfile, b'a+', checkambig=False
2847 )
2842 )
2848 dfh = None
2843 dfh = None
2849 if not destrevlog._inline:
2844 if not destrevlog._inline:
2850 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2845 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2851 try:
2846 try:
2852 destrevlog._addrevision(
2847 destrevlog._addrevision(
2853 node,
2848 node,
2854 rawtext,
2849 rawtext,
2855 tr,
2850 tr,
2856 linkrev,
2851 linkrev,
2857 p1,
2852 p1,
2858 p2,
2853 p2,
2859 flags,
2854 flags,
2860 cachedelta,
2855 cachedelta,
2861 ifh,
2856 ifh,
2862 dfh,
2857 dfh,
2863 deltacomputer=deltacomputer,
2858 deltacomputer=deltacomputer,
2864 )
2859 )
2865 finally:
2860 finally:
2866 if dfh:
2861 if dfh:
2867 dfh.close()
2862 dfh.close()
2868 ifh.close()
2863 ifh.close()
2869
2864
2870 if addrevisioncb:
2865 if addrevisioncb:
2871 addrevisioncb(self, rev, node)
2866 addrevisioncb(self, rev, node)
2872
2867
2873 def censorrevision(self, tr, censornode, tombstone=b''):
2868 def censorrevision(self, tr, censornode, tombstone=b''):
2874 if (self.version & 0xFFFF) == REVLOGV0:
2869 if (self.version & 0xFFFF) == REVLOGV0:
2875 raise error.RevlogError(
2870 raise error.RevlogError(
2876 _(b'cannot censor with version %d revlogs') % self.version
2871 _(b'cannot censor with version %d revlogs') % self.version
2877 )
2872 )
2878
2873
2879 censorrev = self.rev(censornode)
2874 censorrev = self.rev(censornode)
2880 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2875 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2881
2876
2882 if len(tombstone) > self.rawsize(censorrev):
2877 if len(tombstone) > self.rawsize(censorrev):
2883 raise error.Abort(
2878 raise error.Abort(
2884 _(b'censor tombstone must be no longer than censored data')
2879 _(b'censor tombstone must be no longer than censored data')
2885 )
2880 )
2886
2881
2887 # Rewriting the revlog in place is hard. Our strategy for censoring is
2882 # Rewriting the revlog in place is hard. Our strategy for censoring is
2888 # to create a new revlog, copy all revisions to it, then replace the
2883 # to create a new revlog, copy all revisions to it, then replace the
2889 # revlogs on transaction close.
2884 # revlogs on transaction close.
2890
2885
2891 newindexfile = self.indexfile + b'.tmpcensored'
2886 newindexfile = self.indexfile + b'.tmpcensored'
2892 newdatafile = self.datafile + b'.tmpcensored'
2887 newdatafile = self.datafile + b'.tmpcensored'
2893
2888
2894 # This is a bit dangerous. We could easily have a mismatch of state.
2889 # This is a bit dangerous. We could easily have a mismatch of state.
2895 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2890 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2896 newrl.version = self.version
2891 newrl.version = self.version
2897 newrl._generaldelta = self._generaldelta
2892 newrl._generaldelta = self._generaldelta
2898 newrl._io = self._io
2893 newrl._io = self._io
2899
2894
2900 for rev in self.revs():
2895 for rev in self.revs():
2901 node = self.node(rev)
2896 node = self.node(rev)
2902 p1, p2 = self.parents(node)
2897 p1, p2 = self.parents(node)
2903
2898
2904 if rev == censorrev:
2899 if rev == censorrev:
2905 newrl.addrawrevision(
2900 newrl.addrawrevision(
2906 tombstone,
2901 tombstone,
2907 tr,
2902 tr,
2908 self.linkrev(censorrev),
2903 self.linkrev(censorrev),
2909 p1,
2904 p1,
2910 p2,
2905 p2,
2911 censornode,
2906 censornode,
2912 REVIDX_ISCENSORED,
2907 REVIDX_ISCENSORED,
2913 )
2908 )
2914
2909
2915 if newrl.deltaparent(rev) != nullrev:
2910 if newrl.deltaparent(rev) != nullrev:
2916 raise error.Abort(
2911 raise error.Abort(
2917 _(
2912 _(
2918 b'censored revision stored as delta; '
2913 b'censored revision stored as delta; '
2919 b'cannot censor'
2914 b'cannot censor'
2920 ),
2915 ),
2921 hint=_(
2916 hint=_(
2922 b'censoring of revlogs is not '
2917 b'censoring of revlogs is not '
2923 b'fully implemented; please report '
2918 b'fully implemented; please report '
2924 b'this bug'
2919 b'this bug'
2925 ),
2920 ),
2926 )
2921 )
2927 continue
2922 continue
2928
2923
2929 if self.iscensored(rev):
2924 if self.iscensored(rev):
2930 if self.deltaparent(rev) != nullrev:
2925 if self.deltaparent(rev) != nullrev:
2931 raise error.Abort(
2926 raise error.Abort(
2932 _(
2927 _(
2933 b'cannot censor due to censored '
2928 b'cannot censor due to censored '
2934 b'revision having delta stored'
2929 b'revision having delta stored'
2935 )
2930 )
2936 )
2931 )
2937 rawtext = self._chunk(rev)
2932 rawtext = self._chunk(rev)
2938 else:
2933 else:
2939 rawtext = self.rawdata(rev)
2934 rawtext = self.rawdata(rev)
2940
2935
2941 newrl.addrawrevision(
2936 newrl.addrawrevision(
2942 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2937 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2943 )
2938 )
2944
2939
2945 tr.addbackup(self.indexfile, location=b'store')
2940 tr.addbackup(self.indexfile, location=b'store')
2946 if not self._inline:
2941 if not self._inline:
2947 tr.addbackup(self.datafile, location=b'store')
2942 tr.addbackup(self.datafile, location=b'store')
2948
2943
2949 self.opener.rename(newrl.indexfile, self.indexfile)
2944 self.opener.rename(newrl.indexfile, self.indexfile)
2950 if not self._inline:
2945 if not self._inline:
2951 self.opener.rename(newrl.datafile, self.datafile)
2946 self.opener.rename(newrl.datafile, self.datafile)
2952
2947
2953 self.clearcaches()
2948 self.clearcaches()
2954 self._loadindex()
2949 self._loadindex()
2955
2950
2956 def verifyintegrity(self, state):
2951 def verifyintegrity(self, state):
2957 """Verifies the integrity of the revlog.
2952 """Verifies the integrity of the revlog.
2958
2953
2959 Yields ``revlogproblem`` instances describing problems that are
2954 Yields ``revlogproblem`` instances describing problems that are
2960 found.
2955 found.
2961 """
2956 """
2962 dd, di = self.checksize()
2957 dd, di = self.checksize()
2963 if dd:
2958 if dd:
2964 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2959 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2965 if di:
2960 if di:
2966 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2961 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2967
2962
2968 version = self.version & 0xFFFF
2963 version = self.version & 0xFFFF
2969
2964
2970 # The verifier tells us what version revlog we should be.
2965 # The verifier tells us what version revlog we should be.
2971 if version != state[b'expectedversion']:
2966 if version != state[b'expectedversion']:
2972 yield revlogproblem(
2967 yield revlogproblem(
2973 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2968 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2974 % (self.indexfile, version, state[b'expectedversion'])
2969 % (self.indexfile, version, state[b'expectedversion'])
2975 )
2970 )
2976
2971
2977 state[b'skipread'] = set()
2972 state[b'skipread'] = set()
2978 state[b'safe_renamed'] = set()
2973 state[b'safe_renamed'] = set()
2979
2974
2980 for rev in self:
2975 for rev in self:
2981 node = self.node(rev)
2976 node = self.node(rev)
2982
2977
2983 # Verify contents. 4 cases to care about:
2978 # Verify contents. 4 cases to care about:
2984 #
2979 #
2985 # common: the most common case
2980 # common: the most common case
2986 # rename: with a rename
2981 # rename: with a rename
2987 # meta: file content starts with b'\1\n', the metadata
2982 # meta: file content starts with b'\1\n', the metadata
2988 # header defined in filelog.py, but without a rename
2983 # header defined in filelog.py, but without a rename
2989 # ext: content stored externally
2984 # ext: content stored externally
2990 #
2985 #
2991 # More formally, their differences are shown below:
2986 # More formally, their differences are shown below:
2992 #
2987 #
2993 # | common | rename | meta | ext
2988 # | common | rename | meta | ext
2994 # -------------------------------------------------------
2989 # -------------------------------------------------------
2995 # flags() | 0 | 0 | 0 | not 0
2990 # flags() | 0 | 0 | 0 | not 0
2996 # renamed() | False | True | False | ?
2991 # renamed() | False | True | False | ?
2997 # rawtext[0:2]=='\1\n'| False | True | True | ?
2992 # rawtext[0:2]=='\1\n'| False | True | True | ?
2998 #
2993 #
2999 # "rawtext" means the raw text stored in revlog data, which
2994 # "rawtext" means the raw text stored in revlog data, which
3000 # could be retrieved by "rawdata(rev)". "text"
2995 # could be retrieved by "rawdata(rev)". "text"
3001 # mentioned below is "revision(rev)".
2996 # mentioned below is "revision(rev)".
3002 #
2997 #
3003 # There are 3 different lengths stored physically:
2998 # There are 3 different lengths stored physically:
3004 # 1. L1: rawsize, stored in revlog index
2999 # 1. L1: rawsize, stored in revlog index
3005 # 2. L2: len(rawtext), stored in revlog data
3000 # 2. L2: len(rawtext), stored in revlog data
3006 # 3. L3: len(text), stored in revlog data if flags==0, or
3001 # 3. L3: len(text), stored in revlog data if flags==0, or
3007 # possibly somewhere else if flags!=0
3002 # possibly somewhere else if flags!=0
3008 #
3003 #
3009 # L1 should be equal to L2. L3 could be different from them.
3004 # L1 should be equal to L2. L3 could be different from them.
3010 # "text" may or may not affect commit hash depending on flag
3005 # "text" may or may not affect commit hash depending on flag
3011 # processors (see flagutil.addflagprocessor).
3006 # processors (see flagutil.addflagprocessor).
3012 #
3007 #
3013 # | common | rename | meta | ext
3008 # | common | rename | meta | ext
3014 # -------------------------------------------------
3009 # -------------------------------------------------
3015 # rawsize() | L1 | L1 | L1 | L1
3010 # rawsize() | L1 | L1 | L1 | L1
3016 # size() | L1 | L2-LM | L1(*) | L1 (?)
3011 # size() | L1 | L2-LM | L1(*) | L1 (?)
3017 # len(rawtext) | L2 | L2 | L2 | L2
3012 # len(rawtext) | L2 | L2 | L2 | L2
3018 # len(text) | L2 | L2 | L2 | L3
3013 # len(text) | L2 | L2 | L2 | L3
3019 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3014 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3020 #
3015 #
3021 # LM: length of metadata, depending on rawtext
3016 # LM: length of metadata, depending on rawtext
3022 # (*): not ideal, see comment in filelog.size
3017 # (*): not ideal, see comment in filelog.size
3023 # (?): could be "- len(meta)" if the resolved content has
3018 # (?): could be "- len(meta)" if the resolved content has
3024 # rename metadata
3019 # rename metadata
3025 #
3020 #
3026 # Checks needed to be done:
3021 # Checks needed to be done:
3027 # 1. length check: L1 == L2, in all cases.
3022 # 1. length check: L1 == L2, in all cases.
3028 # 2. hash check: depending on flag processor, we may need to
3023 # 2. hash check: depending on flag processor, we may need to
3029 # use either "text" (external), or "rawtext" (in revlog).
3024 # use either "text" (external), or "rawtext" (in revlog).
3030
3025
3031 try:
3026 try:
3032 skipflags = state.get(b'skipflags', 0)
3027 skipflags = state.get(b'skipflags', 0)
3033 if skipflags:
3028 if skipflags:
3034 skipflags &= self.flags(rev)
3029 skipflags &= self.flags(rev)
3035
3030
3036 _verify_revision(self, skipflags, state, node)
3031 _verify_revision(self, skipflags, state, node)
3037
3032
3038 l1 = self.rawsize(rev)
3033 l1 = self.rawsize(rev)
3039 l2 = len(self.rawdata(node))
3034 l2 = len(self.rawdata(node))
3040
3035
3041 if l1 != l2:
3036 if l1 != l2:
3042 yield revlogproblem(
3037 yield revlogproblem(
3043 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3038 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3044 node=node,
3039 node=node,
3045 )
3040 )
3046
3041
3047 except error.CensoredNodeError:
3042 except error.CensoredNodeError:
3048 if state[b'erroroncensored']:
3043 if state[b'erroroncensored']:
3049 yield revlogproblem(
3044 yield revlogproblem(
3050 error=_(b'censored file data'), node=node
3045 error=_(b'censored file data'), node=node
3051 )
3046 )
3052 state[b'skipread'].add(node)
3047 state[b'skipread'].add(node)
3053 except Exception as e:
3048 except Exception as e:
3054 yield revlogproblem(
3049 yield revlogproblem(
3055 error=_(b'unpacking %s: %s')
3050 error=_(b'unpacking %s: %s')
3056 % (short(node), stringutil.forcebytestr(e)),
3051 % (short(node), stringutil.forcebytestr(e)),
3057 node=node,
3052 node=node,
3058 )
3053 )
3059 state[b'skipread'].add(node)
3054 state[b'skipread'].add(node)
3060
3055
3061 def storageinfo(
3056 def storageinfo(
3062 self,
3057 self,
3063 exclusivefiles=False,
3058 exclusivefiles=False,
3064 sharedfiles=False,
3059 sharedfiles=False,
3065 revisionscount=False,
3060 revisionscount=False,
3066 trackedsize=False,
3061 trackedsize=False,
3067 storedsize=False,
3062 storedsize=False,
3068 ):
3063 ):
3069 d = {}
3064 d = {}
3070
3065
3071 if exclusivefiles:
3066 if exclusivefiles:
3072 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3067 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3073 if not self._inline:
3068 if not self._inline:
3074 d[b'exclusivefiles'].append((self.opener, self.datafile))
3069 d[b'exclusivefiles'].append((self.opener, self.datafile))
3075
3070
3076 if sharedfiles:
3071 if sharedfiles:
3077 d[b'sharedfiles'] = []
3072 d[b'sharedfiles'] = []
3078
3073
3079 if revisionscount:
3074 if revisionscount:
3080 d[b'revisionscount'] = len(self)
3075 d[b'revisionscount'] = len(self)
3081
3076
3082 if trackedsize:
3077 if trackedsize:
3083 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3078 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3084
3079
3085 if storedsize:
3080 if storedsize:
3086 d[b'storedsize'] = sum(
3081 d[b'storedsize'] = sum(
3087 self.opener.stat(path).st_size for path in self.files()
3082 self.opener.stat(path).st_size for path in self.files()
3088 )
3083 )
3089
3084
3090 return d
3085 return d
@@ -1,636 +1,647
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..node import hex
16 from ..node import hex
17
17
18 from .. import (
18 from .. import (
19 error,
19 error,
20 util,
20 util,
21 )
21 )
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def persisted_data(revlog):
29 def persisted_data(revlog):
30 """read the nodemap for a revlog from disk"""
30 """read the nodemap for a revlog from disk"""
31 if revlog.nodemap_file is None:
31 if revlog.nodemap_file is None:
32 return None
32 return None
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 if not pdata:
34 if not pdata:
35 return None
35 return None
36 offset = 0
36 offset = 0
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 if version != ONDISK_VERSION:
38 if version != ONDISK_VERSION:
39 return None
39 return None
40 offset += S_VERSION.size
40 offset += S_VERSION.size
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 offset += S_HEADER.size
43 offset += S_HEADER.size
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 offset += uid_size
45 offset += uid_size
46 docket.tip_rev = tip_rev
46 docket.tip_rev = tip_rev
47 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.data_length = data_length
48 docket.data_length = data_length
49 docket.data_unused = data_unused
49 docket.data_unused = data_unused
50
50
51 filename = _rawdata_filepath(revlog, docket)
51 filename = _rawdata_filepath(revlog, docket)
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 try:
53 try:
54 with revlog.opener(filename) as fd:
54 with revlog.opener(filename) as fd:
55 if use_mmap:
55 if use_mmap:
56 data = util.buffer(util.mmapread(fd, data_length))
56 data = util.buffer(util.mmapread(fd, data_length))
57 else:
57 else:
58 data = fd.read(data_length)
58 data = fd.read(data_length)
59 except (IOError, OSError) as e:
59 except (IOError, OSError) as e:
60 if e.errno == errno.ENOENT:
60 if e.errno == errno.ENOENT:
61 return None
61 return None
62 else:
62 else:
63 raise
63 raise
64 if len(data) < data_length:
64 if len(data) < data_length:
65 return None
65 return None
66 return docket, data
66 return docket, data
67
67
68
68
69 def setup_persistent_nodemap(tr, revlog):
69 def setup_persistent_nodemap(tr, revlog):
70 """Install whatever is needed transaction side to persist a nodemap on disk
70 """Install whatever is needed transaction side to persist a nodemap on disk
71
71
72 (only actually persist the nodemap if this is relevant for this revlog)
72 (only actually persist the nodemap if this is relevant for this revlog)
73 """
73 """
74 if revlog._inline:
74 if revlog._inline:
75 return # inlined revlog are too small for this to be relevant
75 return # inlined revlog are too small for this to be relevant
76 if revlog.nodemap_file is None:
76 if revlog.nodemap_file is None:
77 return # we do not use persistent_nodemap on this revlog
77 return # we do not use persistent_nodemap on this revlog
78
78
79 # we need to happen after the changelog finalization, in that use "cl-"
79 # we need to happen after the changelog finalization, in that use "cl-"
80 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
80 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
81 if tr.hasfinalize(callback_id):
81 if tr.hasfinalize(callback_id):
82 return # no need to register again
82 return # no need to register again
83 tr.addpending(
83 tr.addpending(
84 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
84 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
85 )
85 )
86 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
86 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
87
87
88
88
89 class _NoTransaction(object):
89 class _NoTransaction(object):
90 """transaction like object to update the nodemap outside a transaction"""
90 """transaction like object to update the nodemap outside a transaction"""
91
91
92 def __init__(self):
92 def __init__(self):
93 self._postclose = {}
93 self._postclose = {}
94
94
95 def addpostclose(self, callback_id, callback_func):
95 def addpostclose(self, callback_id, callback_func):
96 self._postclose[callback_id] = callback_func
96 self._postclose[callback_id] = callback_func
97
97
98 def registertmp(self, *args, **kwargs):
98 def registertmp(self, *args, **kwargs):
99 pass
99 pass
100
100
101 def addbackup(self, *args, **kwargs):
101 def addbackup(self, *args, **kwargs):
102 pass
102 pass
103
103
104 def add(self, *args, **kwargs):
104 def add(self, *args, **kwargs):
105 pass
105 pass
106
106
107 def addabort(self, *args, **kwargs):
107 def addabort(self, *args, **kwargs):
108 pass
108 pass
109
109
110 def _report(self, *args):
110 def _report(self, *args):
111 pass
111 pass
112
112
113
113
114 def update_persistent_nodemap(revlog):
114 def update_persistent_nodemap(revlog):
115 """update the persistent nodemap right now
115 """update the persistent nodemap right now
116
116
117 To be used for updating the nodemap on disk outside of a normal transaction
117 To be used for updating the nodemap on disk outside of a normal transaction
118 setup (eg, `debugupdatecache`).
118 setup (eg, `debugupdatecache`).
119 """
119 """
120 if revlog._inline:
120 if revlog._inline:
121 return # inlined revlog are too small for this to be relevant
121 return # inlined revlog are too small for this to be relevant
122 if revlog.nodemap_file is None:
122 if revlog.nodemap_file is None:
123 return # we do not use persistent_nodemap on this revlog
123 return # we do not use persistent_nodemap on this revlog
124
124
125 notr = _NoTransaction()
125 notr = _NoTransaction()
126 persist_nodemap(notr, revlog)
126 persist_nodemap(notr, revlog)
127 for k in sorted(notr._postclose):
127 for k in sorted(notr._postclose):
128 notr._postclose[k](None)
128 notr._postclose[k](None)
129
129
130
130
131 def persist_nodemap(tr, revlog, pending=False):
131 def persist_nodemap(tr, revlog, pending=False):
132 """Write nodemap data on disk for a given revlog"""
132 """Write nodemap data on disk for a given revlog"""
133 if getattr(revlog, 'filteredrevs', ()):
133 if getattr(revlog, 'filteredrevs', ()):
134 raise error.ProgrammingError(
134 raise error.ProgrammingError(
135 "cannot persist nodemap of a filtered changelog"
135 "cannot persist nodemap of a filtered changelog"
136 )
136 )
137 if revlog.nodemap_file is None:
137 if revlog.nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enableb"
138 msg = "calling persist nodemap on a revlog without the feature enableb"
139 raise error.ProgrammingError(msg)
139 raise error.ProgrammingError(msg)
140
140
141 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
141 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
142 ondisk_docket = revlog._nodemap_docket
142 ondisk_docket = revlog._nodemap_docket
143 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
143 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
144 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
144 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
145
145
146 data = None
146 data = None
147 # first attemp an incremental update of the data
147 # first attemp an incremental update of the data
148 if can_incremental and ondisk_docket is not None:
148 if can_incremental and ondisk_docket is not None:
149 target_docket = revlog._nodemap_docket.copy()
149 target_docket = revlog._nodemap_docket.copy()
150 (
150 (
151 src_docket,
151 src_docket,
152 data_changed_count,
152 data_changed_count,
153 data,
153 data,
154 ) = revlog.index.nodemap_data_incremental()
154 ) = revlog.index.nodemap_data_incremental()
155 new_length = target_docket.data_length + len(data)
155 new_length = target_docket.data_length + len(data)
156 new_unused = target_docket.data_unused + data_changed_count
156 new_unused = target_docket.data_unused + data_changed_count
157 if src_docket != target_docket:
157 if src_docket != target_docket:
158 data = None
158 data = None
159 elif new_length <= (new_unused * 10): # under 10% of unused data
159 elif new_length <= (new_unused * 10): # under 10% of unused data
160 data = None
160 data = None
161 else:
161 else:
162 datafile = _rawdata_filepath(revlog, target_docket)
162 datafile = _rawdata_filepath(revlog, target_docket)
163 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
163 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
164 # store vfs
164 # store vfs
165 tr.add(datafile, target_docket.data_length)
165 tr.add(datafile, target_docket.data_length)
166 with revlog.opener(datafile, b'r+') as fd:
166 with revlog.opener(datafile, b'r+') as fd:
167 fd.seek(target_docket.data_length)
167 fd.seek(target_docket.data_length)
168 fd.write(data)
168 fd.write(data)
169 if feed_data:
169 if feed_data:
170 if use_mmap:
170 if use_mmap:
171 fd.seek(0)
171 fd.seek(0)
172 new_data = fd.read(new_length)
172 new_data = fd.read(new_length)
173 else:
173 else:
174 fd.flush()
174 fd.flush()
175 new_data = util.buffer(util.mmapread(fd, new_length))
175 new_data = util.buffer(util.mmapread(fd, new_length))
176 target_docket.data_length = new_length
176 target_docket.data_length = new_length
177 target_docket.data_unused = new_unused
177 target_docket.data_unused = new_unused
178
178
179 if data is None:
179 if data is None:
180 # otherwise fallback to a full new export
180 # otherwise fallback to a full new export
181 target_docket = NodeMapDocket()
181 target_docket = NodeMapDocket()
182 datafile = _rawdata_filepath(revlog, target_docket)
182 datafile = _rawdata_filepath(revlog, target_docket)
183 if util.safehasattr(revlog.index, "nodemap_data_all"):
183 if util.safehasattr(revlog.index, "nodemap_data_all"):
184 data = revlog.index.nodemap_data_all()
184 data = revlog.index.nodemap_data_all()
185 else:
185 else:
186 data = persistent_data(revlog.index)
186 data = persistent_data(revlog.index)
187 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
187 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
188 # store vfs
188 # store vfs
189
189
190 tryunlink = revlog.opener.tryunlink
190 tryunlink = revlog.opener.tryunlink
191
191
192 def abortck(tr):
192 def abortck(tr):
193 tryunlink(datafile)
193 tryunlink(datafile)
194
194
195 callback_id = b"delete-%s" % datafile
195 callback_id = b"delete-%s" % datafile
196
196
197 # some flavor of the transaction abort does not cleanup new file, it
197 # some flavor of the transaction abort does not cleanup new file, it
198 # simply empty them.
198 # simply empty them.
199 tr.addabort(callback_id, abortck)
199 tr.addabort(callback_id, abortck)
200 with revlog.opener(datafile, b'w+') as fd:
200 with revlog.opener(datafile, b'w+') as fd:
201 fd.write(data)
201 fd.write(data)
202 if feed_data:
202 if feed_data:
203 if use_mmap:
203 if use_mmap:
204 new_data = data
204 new_data = data
205 else:
205 else:
206 fd.flush()
206 fd.flush()
207 new_data = util.buffer(util.mmapread(fd, len(data)))
207 new_data = util.buffer(util.mmapread(fd, len(data)))
208 target_docket.data_length = len(data)
208 target_docket.data_length = len(data)
209 target_docket.tip_rev = revlog.tiprev()
209 target_docket.tip_rev = revlog.tiprev()
210 target_docket.tip_node = revlog.node(target_docket.tip_rev)
210 target_docket.tip_node = revlog.node(target_docket.tip_rev)
211 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
211 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
212 # store vfs
212 # store vfs
213 file_path = revlog.nodemap_file
213 file_path = revlog.nodemap_file
214 if pending:
214 if pending:
215 file_path += b'.a'
215 file_path += b'.a'
216 tr.registertmp(file_path)
216 tr.registertmp(file_path)
217 else:
217 else:
218 tr.addbackup(file_path)
218 tr.addbackup(file_path)
219
219
220 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
220 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
221 fp.write(target_docket.serialize())
221 fp.write(target_docket.serialize())
222 revlog._nodemap_docket = target_docket
222 revlog._nodemap_docket = target_docket
223 if feed_data:
223 if feed_data:
224 revlog.index.update_nodemap_data(target_docket, new_data)
224 revlog.index.update_nodemap_data(target_docket, new_data)
225
225
226 # search for old index file in all cases, some older process might have
226 # search for old index file in all cases, some older process might have
227 # left one behind.
227 # left one behind.
228 olds = _other_rawdata_filepath(revlog, target_docket)
228 olds = _other_rawdata_filepath(revlog, target_docket)
229 if olds:
229 if olds:
230 realvfs = getattr(revlog, '_realopener', revlog.opener)
230 realvfs = getattr(revlog, '_realopener', revlog.opener)
231
231
232 def cleanup(tr):
232 def cleanup(tr):
233 for oldfile in olds:
233 for oldfile in olds:
234 realvfs.tryunlink(oldfile)
234 realvfs.tryunlink(oldfile)
235
235
236 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
236 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
237 tr.addpostclose(callback_id, cleanup)
237 tr.addpostclose(callback_id, cleanup)
238
238
239
239
240 ### Nodemap docket file
240 ### Nodemap docket file
241 #
241 #
242 # The nodemap data are stored on disk using 2 files:
242 # The nodemap data are stored on disk using 2 files:
243 #
243 #
244 # * a raw data files containing a persistent nodemap
244 # * a raw data files containing a persistent nodemap
245 # (see `Nodemap Trie` section)
245 # (see `Nodemap Trie` section)
246 #
246 #
247 # * a small "docket" file containing medatadata
247 # * a small "docket" file containing medatadata
248 #
248 #
249 # While the nodemap data can be multiple tens of megabytes, the "docket" is
249 # While the nodemap data can be multiple tens of megabytes, the "docket" is
250 # small, it is easy to update it automatically or to duplicated its content
250 # small, it is easy to update it automatically or to duplicated its content
251 # during a transaction.
251 # during a transaction.
252 #
252 #
253 # Multiple raw data can exist at the same time (The currently valid one and a
253 # Multiple raw data can exist at the same time (The currently valid one and a
254 # new one beind used by an in progress transaction). To accomodate this, the
254 # new one beind used by an in progress transaction). To accomodate this, the
255 # filename hosting the raw data has a variable parts. The exact filename is
255 # filename hosting the raw data has a variable parts. The exact filename is
256 # specified inside the "docket" file.
256 # specified inside the "docket" file.
257 #
257 #
258 # The docket file contains information to find, qualify and validate the raw
258 # The docket file contains information to find, qualify and validate the raw
259 # data. Its content is currently very light, but it will expand as the on disk
259 # data. Its content is currently very light, but it will expand as the on disk
260 # nodemap gains the necessary features to be used in production.
260 # nodemap gains the necessary features to be used in production.
261
261
262 ONDISK_VERSION = 1
262 ONDISK_VERSION = 1
263 S_VERSION = struct.Struct(">B")
263 S_VERSION = struct.Struct(">B")
264 S_HEADER = struct.Struct(">BQQQQ")
264 S_HEADER = struct.Struct(">BQQQQ")
265
265
266 ID_SIZE = 8
266 ID_SIZE = 8
267
267
268
268
269 def _make_uid():
269 def _make_uid():
270 """return a new unique identifier.
270 """return a new unique identifier.
271
271
272 The identifier is random and composed of ascii characters."""
272 The identifier is random and composed of ascii characters."""
273 return hex(os.urandom(ID_SIZE))
273 return hex(os.urandom(ID_SIZE))
274
274
275
275
276 class NodeMapDocket(object):
276 class NodeMapDocket(object):
277 """metadata associated with persistent nodemap data
277 """metadata associated with persistent nodemap data
278
278
279 The persistent data may come from disk or be on their way to disk.
279 The persistent data may come from disk or be on their way to disk.
280 """
280 """
281
281
282 def __init__(self, uid=None):
282 def __init__(self, uid=None):
283 if uid is None:
283 if uid is None:
284 uid = _make_uid()
284 uid = _make_uid()
285 # a unique identifier for the data file:
285 # a unique identifier for the data file:
286 # - When new data are appended, it is preserved.
286 # - When new data are appended, it is preserved.
287 # - When a new data file is created, a new identifier is generated.
287 # - When a new data file is created, a new identifier is generated.
288 self.uid = uid
288 self.uid = uid
289 # the tipmost revision stored in the data file. This revision and all
289 # the tipmost revision stored in the data file. This revision and all
290 # revision before it are expected to be encoded in the data file.
290 # revision before it are expected to be encoded in the data file.
291 self.tip_rev = None
291 self.tip_rev = None
292 # the node of that tipmost revision, if it mismatch the current index
292 # the node of that tipmost revision, if it mismatch the current index
293 # data the docket is not valid for the current index and should be
293 # data the docket is not valid for the current index and should be
294 # discarded.
294 # discarded.
295 #
295 #
296 # note: this method is not perfect as some destructive operation could
296 # note: this method is not perfect as some destructive operation could
297 # preserve the same tip_rev + tip_node while altering lower revision.
297 # preserve the same tip_rev + tip_node while altering lower revision.
298 # However this multiple other caches have the same vulnerability (eg:
298 # However this multiple other caches have the same vulnerability (eg:
299 # brancmap cache).
299 # brancmap cache).
300 self.tip_node = None
300 self.tip_node = None
301 # the size (in bytes) of the persisted data to encode the nodemap valid
301 # the size (in bytes) of the persisted data to encode the nodemap valid
302 # for `tip_rev`.
302 # for `tip_rev`.
303 # - data file shorter than this are corrupted,
303 # - data file shorter than this are corrupted,
304 # - any extra data should be ignored.
304 # - any extra data should be ignored.
305 self.data_length = None
305 self.data_length = None
306 # the amount (in bytes) of "dead" data, still in the data file but no
306 # the amount (in bytes) of "dead" data, still in the data file but no
307 # longer used for the nodemap.
307 # longer used for the nodemap.
308 self.data_unused = 0
308 self.data_unused = 0
309
309
310 def copy(self):
310 def copy(self):
311 new = NodeMapDocket(uid=self.uid)
311 new = NodeMapDocket(uid=self.uid)
312 new.tip_rev = self.tip_rev
312 new.tip_rev = self.tip_rev
313 new.tip_node = self.tip_node
313 new.tip_node = self.tip_node
314 new.data_length = self.data_length
314 new.data_length = self.data_length
315 new.data_unused = self.data_unused
315 new.data_unused = self.data_unused
316 return new
316 return new
317
317
318 def __cmp__(self, other):
318 def __cmp__(self, other):
319 if self.uid < other.uid:
319 if self.uid < other.uid:
320 return -1
320 return -1
321 if self.uid > other.uid:
321 if self.uid > other.uid:
322 return 1
322 return 1
323 elif self.data_length < other.data_length:
323 elif self.data_length < other.data_length:
324 return -1
324 return -1
325 elif self.data_length > other.data_length:
325 elif self.data_length > other.data_length:
326 return 1
326 return 1
327 return 0
327 return 0
328
328
329 def __eq__(self, other):
329 def __eq__(self, other):
330 return self.uid == other.uid and self.data_length == other.data_length
330 return self.uid == other.uid and self.data_length == other.data_length
331
331
332 def serialize(self):
332 def serialize(self):
333 """return serialized bytes for a docket using the passed uid"""
333 """return serialized bytes for a docket using the passed uid"""
334 data = []
334 data = []
335 data.append(S_VERSION.pack(ONDISK_VERSION))
335 data.append(S_VERSION.pack(ONDISK_VERSION))
336 headers = (
336 headers = (
337 len(self.uid),
337 len(self.uid),
338 self.tip_rev,
338 self.tip_rev,
339 self.data_length,
339 self.data_length,
340 self.data_unused,
340 self.data_unused,
341 len(self.tip_node),
341 len(self.tip_node),
342 )
342 )
343 data.append(S_HEADER.pack(*headers))
343 data.append(S_HEADER.pack(*headers))
344 data.append(self.uid)
344 data.append(self.uid)
345 data.append(self.tip_node)
345 data.append(self.tip_node)
346 return b''.join(data)
346 return b''.join(data)
347
347
348
348
349 def _rawdata_filepath(revlog, docket):
349 def _rawdata_filepath(revlog, docket):
350 """The (vfs relative) nodemap's rawdata file for a given uid"""
350 """The (vfs relative) nodemap's rawdata file for a given uid"""
351 if revlog.nodemap_file.endswith(b'.n.a'):
351 if revlog.nodemap_file.endswith(b'.n.a'):
352 prefix = revlog.nodemap_file[:-4]
352 prefix = revlog.nodemap_file[:-4]
353 else:
353 else:
354 prefix = revlog.nodemap_file[:-2]
354 prefix = revlog.nodemap_file[:-2]
355 return b"%s-%s.nd" % (prefix, docket.uid)
355 return b"%s-%s.nd" % (prefix, docket.uid)
356
356
357
357
358 def _other_rawdata_filepath(revlog, docket):
358 def _other_rawdata_filepath(revlog, docket):
359 prefix = revlog.nodemap_file[:-2]
359 prefix = revlog.nodemap_file[:-2]
360 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
360 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
361 new_file_path = _rawdata_filepath(revlog, docket)
361 new_file_path = _rawdata_filepath(revlog, docket)
362 new_file_name = revlog.opener.basename(new_file_path)
362 new_file_name = revlog.opener.basename(new_file_path)
363 dirpath = revlog.opener.dirname(new_file_path)
363 dirpath = revlog.opener.dirname(new_file_path)
364 others = []
364 others = []
365 for f in revlog.opener.listdir(dirpath):
365 for f in revlog.opener.listdir(dirpath):
366 if pattern.match(f) and f != new_file_name:
366 if pattern.match(f) and f != new_file_name:
367 others.append(f)
367 others.append(f)
368 return others
368 return others
369
369
370
370
371 ### Nodemap Trie
371 ### Nodemap Trie
372 #
372 #
373 # This is a simple reference implementation to compute and persist a nodemap
373 # This is a simple reference implementation to compute and persist a nodemap
374 # trie. This reference implementation is write only. The python version of this
374 # trie. This reference implementation is write only. The python version of this
375 # is not expected to be actually used, since it wont provide performance
375 # is not expected to be actually used, since it wont provide performance
376 # improvement over existing non-persistent C implementation.
376 # improvement over existing non-persistent C implementation.
377 #
377 #
378 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
378 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
379 # revision can be adressed using its node shortest prefix.
379 # revision can be adressed using its node shortest prefix.
380 #
380 #
381 # The trie is stored as a sequence of block. Each block contains 16 entries
381 # The trie is stored as a sequence of block. Each block contains 16 entries
382 # (signed 64bit integer, big endian). Each entry can be one of the following:
382 # (signed 64bit integer, big endian). Each entry can be one of the following:
383 #
383 #
384 # * value >= 0 -> index of sub-block
384 # * value >= 0 -> index of sub-block
385 # * value == -1 -> no value
385 # * value == -1 -> no value
386 # * value < -1 -> encoded revision: rev = -(value+2)
386 # * value < -1 -> encoded revision: rev = -(value+2)
387 #
387 #
388 # See REV_OFFSET and _transform_rev below.
388 # See REV_OFFSET and _transform_rev below.
389 #
389 #
390 # The implementation focus on simplicity, not on performance. A Rust
390 # The implementation focus on simplicity, not on performance. A Rust
391 # implementation should provide a efficient version of the same binary
391 # implementation should provide a efficient version of the same binary
392 # persistence. This reference python implementation is never meant to be
392 # persistence. This reference python implementation is never meant to be
393 # extensively use in production.
393 # extensively use in production.
394
394
395
395
396 def persistent_data(index):
396 def persistent_data(index):
397 """return the persistent binary form for a nodemap for a given index"""
397 """return the persistent binary form for a nodemap for a given index"""
398 trie = _build_trie(index)
398 trie = _build_trie(index)
399 return _persist_trie(trie)
399 return _persist_trie(trie)
400
400
401
401
402 def update_persistent_data(index, root, max_idx, last_rev):
402 def update_persistent_data(index, root, max_idx, last_rev):
403 """return the incremental update for persistent nodemap from a given index"""
403 """return the incremental update for persistent nodemap from a given index"""
404 changed_block, trie = _update_trie(index, root, last_rev)
404 changed_block, trie = _update_trie(index, root, last_rev)
405 return (
405 return (
406 changed_block * S_BLOCK.size,
406 changed_block * S_BLOCK.size,
407 _persist_trie(trie, existing_idx=max_idx),
407 _persist_trie(trie, existing_idx=max_idx),
408 )
408 )
409
409
410
410
411 S_BLOCK = struct.Struct(">" + ("l" * 16))
411 S_BLOCK = struct.Struct(">" + ("l" * 16))
412
412
413 NO_ENTRY = -1
413 NO_ENTRY = -1
414 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
414 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
415 REV_OFFSET = 2
415 REV_OFFSET = 2
416
416
417
417
418 def _transform_rev(rev):
418 def _transform_rev(rev):
419 """Return the number used to represent the rev in the tree.
419 """Return the number used to represent the rev in the tree.
420
420
421 (or retrieve a rev number from such representation)
421 (or retrieve a rev number from such representation)
422
422
423 Note that this is an involution, a function equal to its inverse (i.e.
423 Note that this is an involution, a function equal to its inverse (i.e.
424 which gives the identity when applied to itself).
424 which gives the identity when applied to itself).
425 """
425 """
426 return -(rev + REV_OFFSET)
426 return -(rev + REV_OFFSET)
427
427
428
428
429 def _to_int(hex_digit):
429 def _to_int(hex_digit):
430 """turn an hexadecimal digit into a proper integer"""
430 """turn an hexadecimal digit into a proper integer"""
431 return int(hex_digit, 16)
431 return int(hex_digit, 16)
432
432
433
433
434 class Block(dict):
434 class Block(dict):
435 """represent a block of the Trie
435 """represent a block of the Trie
436
436
437 contains up to 16 entry indexed from 0 to 15"""
437 contains up to 16 entry indexed from 0 to 15"""
438
438
439 def __init__(self):
439 def __init__(self):
440 super(Block, self).__init__()
440 super(Block, self).__init__()
441 # If this block exist on disk, here is its ID
441 # If this block exist on disk, here is its ID
442 self.ondisk_id = None
442 self.ondisk_id = None
443
443
444 def __iter__(self):
444 def __iter__(self):
445 return iter(self.get(i) for i in range(16))
445 return iter(self.get(i) for i in range(16))
446
446
447
447
448 def _build_trie(index):
448 def _build_trie(index):
449 """build a nodemap trie
449 """build a nodemap trie
450
450
451 The nodemap stores revision number for each unique prefix.
451 The nodemap stores revision number for each unique prefix.
452
452
453 Each block is a dictionary with keys in `[0, 15]`. Values are either
453 Each block is a dictionary with keys in `[0, 15]`. Values are either
454 another block or a revision number.
454 another block or a revision number.
455 """
455 """
456 root = Block()
456 root = Block()
457 for rev in range(len(index)):
457 for rev in range(len(index)):
458 current_hex = hex(index[rev][7])
458 current_hex = hex(index[rev][7])
459 _insert_into_block(index, 0, root, rev, current_hex)
459 _insert_into_block(index, 0, root, rev, current_hex)
460 return root
460 return root
461
461
462
462
463 def _update_trie(index, root, last_rev):
463 def _update_trie(index, root, last_rev):
464 """consume"""
464 """consume"""
465 changed = 0
465 changed = 0
466 for rev in range(last_rev + 1, len(index)):
466 for rev in range(last_rev + 1, len(index)):
467 current_hex = hex(index[rev][7])
467 current_hex = hex(index[rev][7])
468 changed += _insert_into_block(index, 0, root, rev, current_hex)
468 changed += _insert_into_block(index, 0, root, rev, current_hex)
469 return changed, root
469 return changed, root
470
470
471
471
472 def _insert_into_block(index, level, block, current_rev, current_hex):
472 def _insert_into_block(index, level, block, current_rev, current_hex):
473 """insert a new revision in a block
473 """insert a new revision in a block
474
474
475 index: the index we are adding revision for
475 index: the index we are adding revision for
476 level: the depth of the current block in the trie
476 level: the depth of the current block in the trie
477 block: the block currently being considered
477 block: the block currently being considered
478 current_rev: the revision number we are adding
478 current_rev: the revision number we are adding
479 current_hex: the hexadecimal representation of the of that revision
479 current_hex: the hexadecimal representation of the of that revision
480 """
480 """
481 changed = 1
481 changed = 1
482 if block.ondisk_id is not None:
482 if block.ondisk_id is not None:
483 block.ondisk_id = None
483 block.ondisk_id = None
484 hex_digit = _to_int(current_hex[level : level + 1])
484 hex_digit = _to_int(current_hex[level : level + 1])
485 entry = block.get(hex_digit)
485 entry = block.get(hex_digit)
486 if entry is None:
486 if entry is None:
487 # no entry, simply store the revision number
487 # no entry, simply store the revision number
488 block[hex_digit] = current_rev
488 block[hex_digit] = current_rev
489 elif isinstance(entry, dict):
489 elif isinstance(entry, dict):
490 # need to recurse to an underlying block
490 # need to recurse to an underlying block
491 changed += _insert_into_block(
491 changed += _insert_into_block(
492 index, level + 1, entry, current_rev, current_hex
492 index, level + 1, entry, current_rev, current_hex
493 )
493 )
494 else:
494 else:
495 # collision with a previously unique prefix, inserting new
495 # collision with a previously unique prefix, inserting new
496 # vertices to fit both entry.
496 # vertices to fit both entry.
497 other_hex = hex(index[entry][7])
497 other_hex = hex(index[entry][7])
498 other_rev = entry
498 other_rev = entry
499 new = Block()
499 new = Block()
500 block[hex_digit] = new
500 block[hex_digit] = new
501 _insert_into_block(index, level + 1, new, other_rev, other_hex)
501 _insert_into_block(index, level + 1, new, other_rev, other_hex)
502 _insert_into_block(index, level + 1, new, current_rev, current_hex)
502 _insert_into_block(index, level + 1, new, current_rev, current_hex)
503 return changed
503 return changed
504
504
505
505
506 def _persist_trie(root, existing_idx=None):
506 def _persist_trie(root, existing_idx=None):
507 """turn a nodemap trie into persistent binary data
507 """turn a nodemap trie into persistent binary data
508
508
509 See `_build_trie` for nodemap trie structure"""
509 See `_build_trie` for nodemap trie structure"""
510 block_map = {}
510 block_map = {}
511 if existing_idx is not None:
511 if existing_idx is not None:
512 base_idx = existing_idx + 1
512 base_idx = existing_idx + 1
513 else:
513 else:
514 base_idx = 0
514 base_idx = 0
515 chunks = []
515 chunks = []
516 for tn in _walk_trie(root):
516 for tn in _walk_trie(root):
517 if tn.ondisk_id is not None:
517 if tn.ondisk_id is not None:
518 block_map[id(tn)] = tn.ondisk_id
518 block_map[id(tn)] = tn.ondisk_id
519 else:
519 else:
520 block_map[id(tn)] = len(chunks) + base_idx
520 block_map[id(tn)] = len(chunks) + base_idx
521 chunks.append(_persist_block(tn, block_map))
521 chunks.append(_persist_block(tn, block_map))
522 return b''.join(chunks)
522 return b''.join(chunks)
523
523
524
524
525 def _walk_trie(block):
525 def _walk_trie(block):
526 """yield all the block in a trie
526 """yield all the block in a trie
527
527
528 Children blocks are always yield before their parent block.
528 Children blocks are always yield before their parent block.
529 """
529 """
530 for (__, item) in sorted(block.items()):
530 for (__, item) in sorted(block.items()):
531 if isinstance(item, dict):
531 if isinstance(item, dict):
532 for sub_block in _walk_trie(item):
532 for sub_block in _walk_trie(item):
533 yield sub_block
533 yield sub_block
534 yield block
534 yield block
535
535
536
536
537 def _persist_block(block_node, block_map):
537 def _persist_block(block_node, block_map):
538 """produce persistent binary data for a single block
538 """produce persistent binary data for a single block
539
539
540 Children block are assumed to be already persisted and present in
540 Children block are assumed to be already persisted and present in
541 block_map.
541 block_map.
542 """
542 """
543 data = tuple(_to_value(v, block_map) for v in block_node)
543 data = tuple(_to_value(v, block_map) for v in block_node)
544 return S_BLOCK.pack(*data)
544 return S_BLOCK.pack(*data)
545
545
546
546
547 def _to_value(item, block_map):
547 def _to_value(item, block_map):
548 """persist any value as an integer"""
548 """persist any value as an integer"""
549 if item is None:
549 if item is None:
550 return NO_ENTRY
550 return NO_ENTRY
551 elif isinstance(item, dict):
551 elif isinstance(item, dict):
552 return block_map[id(item)]
552 return block_map[id(item)]
553 else:
553 else:
554 return _transform_rev(item)
554 return _transform_rev(item)
555
555
556
556
557 def parse_data(data):
557 def parse_data(data):
558 """parse parse nodemap data into a nodemap Trie"""
558 """parse parse nodemap data into a nodemap Trie"""
559 if (len(data) % S_BLOCK.size) != 0:
559 if (len(data) % S_BLOCK.size) != 0:
560 msg = "nodemap data size is not a multiple of block size (%d): %d"
560 msg = "nodemap data size is not a multiple of block size (%d): %d"
561 raise error.Abort(msg % (S_BLOCK.size, len(data)))
561 raise error.Abort(msg % (S_BLOCK.size, len(data)))
562 if not data:
562 if not data:
563 return Block(), None
563 return Block(), None
564 block_map = {}
564 block_map = {}
565 new_blocks = []
565 new_blocks = []
566 for i in range(0, len(data), S_BLOCK.size):
566 for i in range(0, len(data), S_BLOCK.size):
567 block = Block()
567 block = Block()
568 block.ondisk_id = len(block_map)
568 block.ondisk_id = len(block_map)
569 block_map[block.ondisk_id] = block
569 block_map[block.ondisk_id] = block
570 block_data = data[i : i + S_BLOCK.size]
570 block_data = data[i : i + S_BLOCK.size]
571 values = S_BLOCK.unpack(block_data)
571 values = S_BLOCK.unpack(block_data)
572 new_blocks.append((block, values))
572 new_blocks.append((block, values))
573 for b, values in new_blocks:
573 for b, values in new_blocks:
574 for idx, v in enumerate(values):
574 for idx, v in enumerate(values):
575 if v == NO_ENTRY:
575 if v == NO_ENTRY:
576 continue
576 continue
577 elif v >= 0:
577 elif v >= 0:
578 b[idx] = block_map[v]
578 b[idx] = block_map[v]
579 else:
579 else:
580 b[idx] = _transform_rev(v)
580 b[idx] = _transform_rev(v)
581 return block, i // S_BLOCK.size
581 return block, i // S_BLOCK.size
582
582
583
583
584 # debug utility
584 # debug utility
585
585
586
586
587 def check_data(ui, index, data):
587 def check_data(ui, index, data):
588 """verify that the provided nodemap data are valid for the given idex"""
588 """verify that the provided nodemap data are valid for the given idex"""
589 ret = 0
589 ret = 0
590 ui.status((b"revision in index: %d\n") % len(index))
590 ui.status((b"revision in index: %d\n") % len(index))
591 root, __ = parse_data(data)
591 root, __ = parse_data(data)
592 all_revs = set(_all_revisions(root))
592 all_revs = set(_all_revisions(root))
593 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
593 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
594 for r in range(len(index)):
594 for r in range(len(index)):
595 if r not in all_revs:
595 if r not in all_revs:
596 msg = b" revision missing from nodemap: %d\n" % r
596 msg = b" revision missing from nodemap: %d\n" % r
597 ui.write_err(msg)
597 ui.write_err(msg)
598 ret = 1
598 ret = 1
599 else:
599 else:
600 all_revs.remove(r)
600 all_revs.remove(r)
601 nm_rev = _find_node(root, hex(index[r][7]))
601 nm_rev = _find_node(root, hex(index[r][7]))
602 if nm_rev is None:
602 if nm_rev is None:
603 msg = b" revision node does not match any entries: %d\n" % r
603 msg = b" revision node does not match any entries: %d\n" % r
604 ui.write_err(msg)
604 ui.write_err(msg)
605 ret = 1
605 ret = 1
606 elif nm_rev != r:
606 elif nm_rev != r:
607 msg = (
607 msg = (
608 b" revision node does not match the expected revision: "
608 b" revision node does not match the expected revision: "
609 b"%d != %d\n" % (r, nm_rev)
609 b"%d != %d\n" % (r, nm_rev)
610 )
610 )
611 ui.write_err(msg)
611 ui.write_err(msg)
612 ret = 1
612 ret = 1
613
613
614 if all_revs:
614 if all_revs:
615 for r in sorted(all_revs):
615 for r in sorted(all_revs):
616 msg = b" extra revision in nodemap: %d\n" % r
616 msg = b" extra revision in nodemap: %d\n" % r
617 ui.write_err(msg)
617 ui.write_err(msg)
618 ret = 1
618 ret = 1
619 return ret
619 return ret
620
620
621
621
622 def _all_revisions(root):
622 def _all_revisions(root):
623 """return all revisions stored in a Trie"""
623 """return all revisions stored in a Trie"""
624 for block in _walk_trie(root):
624 for block in _walk_trie(root):
625 for v in block:
625 for v in block:
626 if v is None or isinstance(v, Block):
626 if v is None or isinstance(v, Block):
627 continue
627 continue
628 yield v
628 yield v
629
629
630
630
631 def _find_node(block, node):
631 def _find_node(block, node):
632 """find the revision associated with a given node"""
632 """find the revision associated with a given node"""
633 entry = block.get(_to_int(node[0:1]))
633 entry = block.get(_to_int(node[0:1]))
634 if isinstance(entry, dict):
634 if isinstance(entry, dict):
635 return _find_node(entry, node[1:])
635 return _find_node(entry, node[1:])
636 return entry
636 return entry
637
638
639 def get_nodemap_file(opener, indexfile):
640 if indexfile.endswith(b'.a'):
641 pending_path = indexfile[:-4] + b".n.a"
642 if opener.exists(pending_path):
643 return pending_path
644 else:
645 return indexfile[:-4] + b".n"
646 else:
647 return indexfile[:-2] + b".n"
General Comments 0
You need to be logged in to leave comments. Login now