##// END OF EJS Templates
revlog: move the "index header" struct inside revlog.utils.constants...
marmoute -
r47618:c6e23fb4 default
parent child Browse files
Show More
@@ -1,3252 +1,3249 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 sha1nodeconstants,
32 sha1nodeconstants,
33 short,
33 short,
34 wdirfilenodeids,
34 wdirfilenodeids,
35 wdirhex,
35 wdirhex,
36 wdirid,
36 wdirid,
37 wdirrev,
37 wdirrev,
38 )
38 )
39 from .i18n import _
39 from .i18n import _
40 from .pycompat import getattr
40 from .pycompat import getattr
41 from .revlogutils.constants import (
41 from .revlogutils.constants import (
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_ENTRY_V0,
44 INDEX_ENTRY_V0,
45 INDEX_ENTRY_V1,
45 INDEX_ENTRY_V1,
46 INDEX_ENTRY_V2,
46 INDEX_ENTRY_V2,
47 INDEX_HEADER,
47 REVLOGV0,
48 REVLOGV0,
48 REVLOGV1,
49 REVLOGV1,
49 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
50 REVLOGV2,
51 REVLOGV2,
51 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
55 )
56 )
56 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
57 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
59 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
60 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
63 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_SIDEDATA,
65 REVIDX_SIDEDATA,
65 )
66 )
66 from .thirdparty import attr
67 from .thirdparty import attr
67 from . import (
68 from . import (
68 ancestor,
69 ancestor,
69 dagop,
70 dagop,
70 error,
71 error,
71 mdiff,
72 mdiff,
72 policy,
73 policy,
73 pycompat,
74 pycompat,
74 templatefilters,
75 templatefilters,
75 util,
76 util,
76 )
77 )
77 from .interfaces import (
78 from .interfaces import (
78 repository,
79 repository,
79 util as interfaceutil,
80 util as interfaceutil,
80 )
81 )
81 from .revlogutils import (
82 from .revlogutils import (
82 deltas as deltautil,
83 deltas as deltautil,
83 flagutil,
84 flagutil,
84 nodemap as nodemaputil,
85 nodemap as nodemaputil,
85 sidedata as sidedatautil,
86 sidedata as sidedatautil,
86 )
87 )
87 from .utils import (
88 from .utils import (
88 storageutil,
89 storageutil,
89 stringutil,
90 stringutil,
90 )
91 )
91
92
92 # blanked usage of all the name to prevent pyflakes constraints
93 # blanked usage of all the name to prevent pyflakes constraints
93 # We need these name available in the module for extensions.
94 # We need these name available in the module for extensions.
94 REVLOGV0
95 REVLOGV0
95 REVLOGV1
96 REVLOGV1
96 REVLOGV2
97 REVLOGV2
97 FLAG_INLINE_DATA
98 FLAG_INLINE_DATA
98 FLAG_GENERALDELTA
99 FLAG_GENERALDELTA
99 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_VERSION
102 REVLOG_DEFAULT_VERSION
102 REVLOGV1_FLAGS
103 REVLOGV1_FLAGS
103 REVLOGV2_FLAGS
104 REVLOGV2_FLAGS
104 REVIDX_ISCENSORED
105 REVIDX_ISCENSORED
105 REVIDX_ELLIPSIS
106 REVIDX_ELLIPSIS
106 REVIDX_SIDEDATA
107 REVIDX_SIDEDATA
107 REVIDX_HASCOPIESINFO
108 REVIDX_HASCOPIESINFO
108 REVIDX_EXTSTORED
109 REVIDX_EXTSTORED
109 REVIDX_DEFAULT_FLAGS
110 REVIDX_DEFAULT_FLAGS
110 REVIDX_FLAGS_ORDER
111 REVIDX_FLAGS_ORDER
111 REVIDX_RAWTEXT_CHANGING_FLAGS
112 REVIDX_RAWTEXT_CHANGING_FLAGS
112
113
113 parsers = policy.importmod('parsers')
114 parsers = policy.importmod('parsers')
114 rustancestor = policy.importrust('ancestor')
115 rustancestor = policy.importrust('ancestor')
115 rustdagop = policy.importrust('dagop')
116 rustdagop = policy.importrust('dagop')
116 rustrevlog = policy.importrust('revlog')
117 rustrevlog = policy.importrust('revlog')
117
118
118 # Aliased for performance.
119 # Aliased for performance.
119 _zlibdecompress = zlib.decompress
120 _zlibdecompress = zlib.decompress
120
121
121 # max size of revlog with inline data
122 # max size of revlog with inline data
122 _maxinline = 131072
123 _maxinline = 131072
123 _chunksize = 1048576
124 _chunksize = 1048576
124
125
125 # Flag processors for REVIDX_ELLIPSIS.
126 # Flag processors for REVIDX_ELLIPSIS.
126 def ellipsisreadprocessor(rl, text):
127 def ellipsisreadprocessor(rl, text):
127 return text, False
128 return text, False
128
129
129
130
130 def ellipsiswriteprocessor(rl, text):
131 def ellipsiswriteprocessor(rl, text):
131 return text, False
132 return text, False
132
133
133
134
134 def ellipsisrawprocessor(rl, text):
135 def ellipsisrawprocessor(rl, text):
135 return False
136 return False
136
137
137
138
138 ellipsisprocessor = (
139 ellipsisprocessor = (
139 ellipsisreadprocessor,
140 ellipsisreadprocessor,
140 ellipsiswriteprocessor,
141 ellipsiswriteprocessor,
141 ellipsisrawprocessor,
142 ellipsisrawprocessor,
142 )
143 )
143
144
144
145
145 def getoffset(q):
146 def getoffset(q):
146 return int(q >> 16)
147 return int(q >> 16)
147
148
148
149
149 def gettype(q):
150 def gettype(q):
150 return int(q & 0xFFFF)
151 return int(q & 0xFFFF)
151
152
152
153
153 def offset_type(offset, type):
154 def offset_type(offset, type):
154 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
155 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
155 raise ValueError(b'unknown revlog index flags')
156 raise ValueError(b'unknown revlog index flags')
156 return int(int(offset) << 16 | type)
157 return int(int(offset) << 16 | type)
157
158
158
159
159 def _verify_revision(rl, skipflags, state, node):
160 def _verify_revision(rl, skipflags, state, node):
160 """Verify the integrity of the given revlog ``node`` while providing a hook
161 """Verify the integrity of the given revlog ``node`` while providing a hook
161 point for extensions to influence the operation."""
162 point for extensions to influence the operation."""
162 if skipflags:
163 if skipflags:
163 state[b'skipread'].add(node)
164 state[b'skipread'].add(node)
164 else:
165 else:
165 # Side-effect: read content and verify hash.
166 # Side-effect: read content and verify hash.
166 rl.revision(node)
167 rl.revision(node)
167
168
168
169
169 # True if a fast implementation for persistent-nodemap is available
170 # True if a fast implementation for persistent-nodemap is available
170 #
171 #
171 # We also consider we have a "fast" implementation in "pure" python because
172 # We also consider we have a "fast" implementation in "pure" python because
172 # people using pure don't really have performance consideration (and a
173 # people using pure don't really have performance consideration (and a
173 # wheelbarrow of other slowness source)
174 # wheelbarrow of other slowness source)
174 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
175 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
175 parsers, 'BaseIndexObject'
176 parsers, 'BaseIndexObject'
176 )
177 )
177
178
178
179
179 @attr.s(slots=True, frozen=True)
180 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
181 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
182 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
183 node: expected hash of the revision
183 p1, p2: parent revs of the revision
184 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
185 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
186 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
187 flags: flags associated to the revision storage
187
188
188 One of btext[0] or cachedelta must be set.
189 One of btext[0] or cachedelta must be set.
189 """
190 """
190
191
191 node = attr.ib()
192 node = attr.ib()
192 p1 = attr.ib()
193 p1 = attr.ib()
193 p2 = attr.ib()
194 p2 = attr.ib()
194 btext = attr.ib()
195 btext = attr.ib()
195 textlen = attr.ib()
196 textlen = attr.ib()
196 cachedelta = attr.ib()
197 cachedelta = attr.ib()
197 flags = attr.ib()
198 flags = attr.ib()
198
199
199
200
200 @interfaceutil.implementer(repository.irevisiondelta)
201 @interfaceutil.implementer(repository.irevisiondelta)
201 @attr.s(slots=True)
202 @attr.s(slots=True)
202 class revlogrevisiondelta(object):
203 class revlogrevisiondelta(object):
203 node = attr.ib()
204 node = attr.ib()
204 p1node = attr.ib()
205 p1node = attr.ib()
205 p2node = attr.ib()
206 p2node = attr.ib()
206 basenode = attr.ib()
207 basenode = attr.ib()
207 flags = attr.ib()
208 flags = attr.ib()
208 baserevisionsize = attr.ib()
209 baserevisionsize = attr.ib()
209 revision = attr.ib()
210 revision = attr.ib()
210 delta = attr.ib()
211 delta = attr.ib()
211 sidedata = attr.ib()
212 sidedata = attr.ib()
212 linknode = attr.ib(default=None)
213 linknode = attr.ib(default=None)
213
214
214
215
215 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @attr.s(frozen=True)
217 @attr.s(frozen=True)
217 class revlogproblem(object):
218 class revlogproblem(object):
218 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
219 error = attr.ib(default=None)
220 error = attr.ib(default=None)
220 node = attr.ib(default=None)
221 node = attr.ib(default=None)
221
222
222
223
223 class revlogoldindex(list):
224 class revlogoldindex(list):
224 @property
225 @property
225 def nodemap(self):
226 def nodemap(self):
226 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
227 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
227 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
228 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
228 return self._nodemap
229 return self._nodemap
229
230
230 @util.propertycache
231 @util.propertycache
231 def _nodemap(self):
232 def _nodemap(self):
232 nodemap = nodemaputil.NodeMap({nullid: nullrev})
233 nodemap = nodemaputil.NodeMap({nullid: nullrev})
233 for r in range(0, len(self)):
234 for r in range(0, len(self)):
234 n = self[r][7]
235 n = self[r][7]
235 nodemap[n] = r
236 nodemap[n] = r
236 return nodemap
237 return nodemap
237
238
238 def has_node(self, node):
239 def has_node(self, node):
239 """return True if the node exist in the index"""
240 """return True if the node exist in the index"""
240 return node in self._nodemap
241 return node in self._nodemap
241
242
242 def rev(self, node):
243 def rev(self, node):
243 """return a revision for a node
244 """return a revision for a node
244
245
245 If the node is unknown, raise a RevlogError"""
246 If the node is unknown, raise a RevlogError"""
246 return self._nodemap[node]
247 return self._nodemap[node]
247
248
248 def get_rev(self, node):
249 def get_rev(self, node):
249 """return a revision for a node
250 """return a revision for a node
250
251
251 If the node is unknown, return None"""
252 If the node is unknown, return None"""
252 return self._nodemap.get(node)
253 return self._nodemap.get(node)
253
254
254 def append(self, tup):
255 def append(self, tup):
255 self._nodemap[tup[7]] = len(self)
256 self._nodemap[tup[7]] = len(self)
256 super(revlogoldindex, self).append(tup)
257 super(revlogoldindex, self).append(tup)
257
258
258 def __delitem__(self, i):
259 def __delitem__(self, i):
259 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
260 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
260 raise ValueError(b"deleting slices only supports a:-1 with step 1")
261 raise ValueError(b"deleting slices only supports a:-1 with step 1")
261 for r in pycompat.xrange(i.start, len(self)):
262 for r in pycompat.xrange(i.start, len(self)):
262 del self._nodemap[self[r][7]]
263 del self._nodemap[self[r][7]]
263 super(revlogoldindex, self).__delitem__(i)
264 super(revlogoldindex, self).__delitem__(i)
264
265
265 def clearcaches(self):
266 def clearcaches(self):
266 self.__dict__.pop('_nodemap', None)
267 self.__dict__.pop('_nodemap', None)
267
268
268 def __getitem__(self, i):
269 def __getitem__(self, i):
269 if i == -1:
270 if i == -1:
270 return (0, 0, 0, -1, -1, -1, -1, nullid)
271 return (0, 0, 0, -1, -1, -1, -1, nullid)
271 return list.__getitem__(self, i)
272 return list.__getitem__(self, i)
272
273
273
274
274 class revlogoldio(object):
275 class revlogoldio(object):
275 def __init__(self):
276 def __init__(self):
276 self.size = INDEX_ENTRY_V0.size
277 self.size = INDEX_ENTRY_V0.size
277
278
278 def parseindex(self, data, inline):
279 def parseindex(self, data, inline):
279 s = self.size
280 s = self.size
280 index = []
281 index = []
281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
282 nodemap = nodemaputil.NodeMap({nullid: nullrev})
282 n = off = 0
283 n = off = 0
283 l = len(data)
284 l = len(data)
284 while off + s <= l:
285 while off + s <= l:
285 cur = data[off : off + s]
286 cur = data[off : off + s]
286 off += s
287 off += s
287 e = INDEX_ENTRY_V0.unpack(cur)
288 e = INDEX_ENTRY_V0.unpack(cur)
288 # transform to revlogv1 format
289 # transform to revlogv1 format
289 e2 = (
290 e2 = (
290 offset_type(e[0], 0),
291 offset_type(e[0], 0),
291 e[1],
292 e[1],
292 -1,
293 -1,
293 e[2],
294 e[2],
294 e[3],
295 e[3],
295 nodemap.get(e[4], nullrev),
296 nodemap.get(e[4], nullrev),
296 nodemap.get(e[5], nullrev),
297 nodemap.get(e[5], nullrev),
297 e[6],
298 e[6],
298 )
299 )
299 index.append(e2)
300 index.append(e2)
300 nodemap[e[6]] = n
301 nodemap[e[6]] = n
301 n += 1
302 n += 1
302
303
303 index = revlogoldindex(index)
304 index = revlogoldindex(index)
304 return index, None
305 return index, None
305
306
306 def packentry(self, entry, node, version, rev):
307 def packentry(self, entry, node, version, rev):
307 """return the binary representation of an entry
308 """return the binary representation of an entry
308
309
309 entry: a tuple containing all the values (see index.__getitem__)
310 entry: a tuple containing all the values (see index.__getitem__)
310 node: a callback to convert a revision to nodeid
311 node: a callback to convert a revision to nodeid
311 version: the changelog version
312 version: the changelog version
312 rev: the revision number
313 rev: the revision number
313 """
314 """
314 if gettype(entry[0]):
315 if gettype(entry[0]):
315 raise error.RevlogError(
316 raise error.RevlogError(
316 _(b'index entry flags need revlog version 1')
317 _(b'index entry flags need revlog version 1')
317 )
318 )
318 e2 = (
319 e2 = (
319 getoffset(entry[0]),
320 getoffset(entry[0]),
320 entry[1],
321 entry[1],
321 entry[3],
322 entry[3],
322 entry[4],
323 entry[4],
323 node(entry[5]),
324 node(entry[5]),
324 node(entry[6]),
325 node(entry[6]),
325 entry[7],
326 entry[7],
326 )
327 )
327 return INDEX_ENTRY_V0.pack(*e2)
328 return INDEX_ENTRY_V0.pack(*e2)
328
329
329
330
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
331 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
332 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
333 _maxentrysize = 0x7FFFFFFF
337
334
338
335
339 class revlogio(object):
336 class revlogio(object):
340 def __init__(self):
337 def __init__(self):
341 self.size = INDEX_ENTRY_V1.size
338 self.size = INDEX_ENTRY_V1.size
342
339
343 def parseindex(self, data, inline):
340 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
341 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
342 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
343 return index, cache
347
344
348 def packentry(self, entry, node, version, rev):
345 def packentry(self, entry, node, version, rev):
349 p = INDEX_ENTRY_V1.pack(*entry)
346 p = INDEX_ENTRY_V1.pack(*entry)
350 if rev == 0:
347 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
348 p = INDEX_HEADER.pack(version) + p[4:]
352 return p
349 return p
353
350
354
351
355 class revlogv2io(object):
352 class revlogv2io(object):
356 def __init__(self):
353 def __init__(self):
357 self.size = INDEX_ENTRY_V2.size
354 self.size = INDEX_ENTRY_V2.size
358
355
359 def parseindex(self, data, inline):
356 def parseindex(self, data, inline):
360 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
357 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
361 return index, cache
358 return index, cache
362
359
363 def packentry(self, entry, node, version, rev):
360 def packentry(self, entry, node, version, rev):
364 p = INDEX_ENTRY_V2.pack(*entry)
361 p = INDEX_ENTRY_V2.pack(*entry)
365 if rev == 0:
362 if rev == 0:
366 p = versionformat_pack(version) + p[4:]
363 p = INDEX_HEADER.pack(version) + p[4:]
367 return p
364 return p
368
365
369
366
370 NodemapRevlogIO = None
367 NodemapRevlogIO = None
371
368
372 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
373
370
374 class NodemapRevlogIO(revlogio):
371 class NodemapRevlogIO(revlogio):
375 """A debug oriented IO class that return a PersistentNodeMapIndexObject
372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
376
373
377 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
378 """
375 """
379
376
380 def parseindex(self, data, inline):
377 def parseindex(self, data, inline):
381 index, cache = parsers.parse_index_devel_nodemap(data, inline)
378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
382 return index, cache
379 return index, cache
383
380
384
381
385 class rustrevlogio(revlogio):
382 class rustrevlogio(revlogio):
386 def parseindex(self, data, inline):
383 def parseindex(self, data, inline):
387 index, cache = super(rustrevlogio, self).parseindex(data, inline)
384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
388 return rustrevlog.MixedIndex(index), cache
385 return rustrevlog.MixedIndex(index), cache
389
386
390
387
391 class revlog(object):
388 class revlog(object):
392 """
389 """
393 the underlying revision storage object
390 the underlying revision storage object
394
391
395 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
396
393
397 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
398 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
399 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
400 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
401 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
402 data.
399 data.
403
400
404 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
405 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
406 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
407 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
408 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
409 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
410
407
411 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
412 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
413 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
414 for locking while reading.
411 for locking while reading.
415
412
416 If checkambig, indexfile is opened with checkambig=True at
413 If checkambig, indexfile is opened with checkambig=True at
417 writing, to avoid file stat ambiguity.
414 writing, to avoid file stat ambiguity.
418
415
419 If mmaplargeindex is True, and an mmapindexthreshold is set, the
416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
420 index will be mmapped rather than read if it is larger than the
417 index will be mmapped rather than read if it is larger than the
421 configured threshold.
418 configured threshold.
422
419
423 If censorable is True, the revlog can have censored revisions.
420 If censorable is True, the revlog can have censored revisions.
424
421
425 If `upperboundcomp` is not None, this is the expected maximal gain from
422 If `upperboundcomp` is not None, this is the expected maximal gain from
426 compression for the data content.
423 compression for the data content.
427
424
428 `concurrencychecker` is an optional function that receives 3 arguments: a
425 `concurrencychecker` is an optional function that receives 3 arguments: a
429 file handle, a filename, and an expected position. It should check whether
426 file handle, a filename, and an expected position. It should check whether
430 the current position in the file handle is valid, and log/warn/fail (by
427 the current position in the file handle is valid, and log/warn/fail (by
431 raising).
428 raising).
432 """
429 """
433
430
434 _flagserrorclass = error.RevlogError
431 _flagserrorclass = error.RevlogError
435
432
436 def __init__(
433 def __init__(
437 self,
434 self,
438 opener,
435 opener,
439 indexfile,
436 indexfile,
440 datafile=None,
437 datafile=None,
441 checkambig=False,
438 checkambig=False,
442 mmaplargeindex=False,
439 mmaplargeindex=False,
443 censorable=False,
440 censorable=False,
444 upperboundcomp=None,
441 upperboundcomp=None,
445 persistentnodemap=False,
442 persistentnodemap=False,
446 concurrencychecker=None,
443 concurrencychecker=None,
447 ):
444 ):
448 """
445 """
449 create a revlog object
446 create a revlog object
450
447
451 opener is a function that abstracts the file opening operation
448 opener is a function that abstracts the file opening operation
452 and can be used to implement COW semantics or the like.
449 and can be used to implement COW semantics or the like.
453
450
454 """
451 """
455 self.upperboundcomp = upperboundcomp
452 self.upperboundcomp = upperboundcomp
456 self.indexfile = indexfile
453 self.indexfile = indexfile
457 self.datafile = datafile or (indexfile[:-2] + b".d")
454 self.datafile = datafile or (indexfile[:-2] + b".d")
458 self.nodemap_file = None
455 self.nodemap_file = None
459 if persistentnodemap:
456 if persistentnodemap:
460 self.nodemap_file = nodemaputil.get_nodemap_file(
457 self.nodemap_file = nodemaputil.get_nodemap_file(
461 opener, self.indexfile
458 opener, self.indexfile
462 )
459 )
463
460
464 self.opener = opener
461 self.opener = opener
465 # When True, indexfile is opened with checkambig=True at writing, to
462 # When True, indexfile is opened with checkambig=True at writing, to
466 # avoid file stat ambiguity.
463 # avoid file stat ambiguity.
467 self._checkambig = checkambig
464 self._checkambig = checkambig
468 self._mmaplargeindex = mmaplargeindex
465 self._mmaplargeindex = mmaplargeindex
469 self._censorable = censorable
466 self._censorable = censorable
470 # 3-tuple of (node, rev, text) for a raw revision.
467 # 3-tuple of (node, rev, text) for a raw revision.
471 self._revisioncache = None
468 self._revisioncache = None
472 # Maps rev to chain base rev.
469 # Maps rev to chain base rev.
473 self._chainbasecache = util.lrucachedict(100)
470 self._chainbasecache = util.lrucachedict(100)
474 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
471 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
475 self._chunkcache = (0, b'')
472 self._chunkcache = (0, b'')
476 # How much data to read and cache into the raw revlog data cache.
473 # How much data to read and cache into the raw revlog data cache.
477 self._chunkcachesize = 65536
474 self._chunkcachesize = 65536
478 self._maxchainlen = None
475 self._maxchainlen = None
479 self._deltabothparents = True
476 self._deltabothparents = True
480 self.index = None
477 self.index = None
481 self._nodemap_docket = None
478 self._nodemap_docket = None
482 # Mapping of partial identifiers to full nodes.
479 # Mapping of partial identifiers to full nodes.
483 self._pcache = {}
480 self._pcache = {}
484 # Mapping of revision integer to full node.
481 # Mapping of revision integer to full node.
485 self._compengine = b'zlib'
482 self._compengine = b'zlib'
486 self._compengineopts = {}
483 self._compengineopts = {}
487 self._maxdeltachainspan = -1
484 self._maxdeltachainspan = -1
488 self._withsparseread = False
485 self._withsparseread = False
489 self._sparserevlog = False
486 self._sparserevlog = False
490 self._srdensitythreshold = 0.50
487 self._srdensitythreshold = 0.50
491 self._srmingapsize = 262144
488 self._srmingapsize = 262144
492
489
493 # Make copy of flag processors so each revlog instance can support
490 # Make copy of flag processors so each revlog instance can support
494 # custom flags.
491 # custom flags.
495 self._flagprocessors = dict(flagutil.flagprocessors)
492 self._flagprocessors = dict(flagutil.flagprocessors)
496
493
497 # 2-tuple of file handles being used for active writing.
494 # 2-tuple of file handles being used for active writing.
498 self._writinghandles = None
495 self._writinghandles = None
499
496
500 self._loadindex()
497 self._loadindex()
501
498
502 self._concurrencychecker = concurrencychecker
499 self._concurrencychecker = concurrencychecker
503
500
504 def _loadindex(self):
501 def _loadindex(self):
505 mmapindexthreshold = None
502 mmapindexthreshold = None
506 opts = self.opener.options
503 opts = self.opener.options
507
504
508 if b'revlogv2' in opts:
505 if b'revlogv2' in opts:
509 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
506 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
510 elif b'revlogv1' in opts:
507 elif b'revlogv1' in opts:
511 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
508 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
512 if b'generaldelta' in opts:
509 if b'generaldelta' in opts:
513 newversionflags |= FLAG_GENERALDELTA
510 newversionflags |= FLAG_GENERALDELTA
514 elif b'revlogv0' in self.opener.options:
511 elif b'revlogv0' in self.opener.options:
515 newversionflags = REVLOGV0
512 newversionflags = REVLOGV0
516 else:
513 else:
517 newversionflags = REVLOG_DEFAULT_VERSION
514 newversionflags = REVLOG_DEFAULT_VERSION
518
515
519 if b'chunkcachesize' in opts:
516 if b'chunkcachesize' in opts:
520 self._chunkcachesize = opts[b'chunkcachesize']
517 self._chunkcachesize = opts[b'chunkcachesize']
521 if b'maxchainlen' in opts:
518 if b'maxchainlen' in opts:
522 self._maxchainlen = opts[b'maxchainlen']
519 self._maxchainlen = opts[b'maxchainlen']
523 if b'deltabothparents' in opts:
520 if b'deltabothparents' in opts:
524 self._deltabothparents = opts[b'deltabothparents']
521 self._deltabothparents = opts[b'deltabothparents']
525 self._lazydelta = bool(opts.get(b'lazydelta', True))
522 self._lazydelta = bool(opts.get(b'lazydelta', True))
526 self._lazydeltabase = False
523 self._lazydeltabase = False
527 if self._lazydelta:
524 if self._lazydelta:
528 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
525 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
529 if b'compengine' in opts:
526 if b'compengine' in opts:
530 self._compengine = opts[b'compengine']
527 self._compengine = opts[b'compengine']
531 if b'zlib.level' in opts:
528 if b'zlib.level' in opts:
532 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
529 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
533 if b'zstd.level' in opts:
530 if b'zstd.level' in opts:
534 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
531 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
535 if b'maxdeltachainspan' in opts:
532 if b'maxdeltachainspan' in opts:
536 self._maxdeltachainspan = opts[b'maxdeltachainspan']
533 self._maxdeltachainspan = opts[b'maxdeltachainspan']
537 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
534 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
538 mmapindexthreshold = opts[b'mmapindexthreshold']
535 mmapindexthreshold = opts[b'mmapindexthreshold']
539 self.hassidedata = bool(opts.get(b'side-data', False))
536 self.hassidedata = bool(opts.get(b'side-data', False))
540 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
537 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
541 withsparseread = bool(opts.get(b'with-sparse-read', False))
538 withsparseread = bool(opts.get(b'with-sparse-read', False))
542 # sparse-revlog forces sparse-read
539 # sparse-revlog forces sparse-read
543 self._withsparseread = self._sparserevlog or withsparseread
540 self._withsparseread = self._sparserevlog or withsparseread
544 if b'sparse-read-density-threshold' in opts:
541 if b'sparse-read-density-threshold' in opts:
545 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
542 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
546 if b'sparse-read-min-gap-size' in opts:
543 if b'sparse-read-min-gap-size' in opts:
547 self._srmingapsize = opts[b'sparse-read-min-gap-size']
544 self._srmingapsize = opts[b'sparse-read-min-gap-size']
548 if opts.get(b'enableellipsis'):
545 if opts.get(b'enableellipsis'):
549 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
546 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
550
547
551 # revlog v0 doesn't have flag processors
548 # revlog v0 doesn't have flag processors
552 for flag, processor in pycompat.iteritems(
549 for flag, processor in pycompat.iteritems(
553 opts.get(b'flagprocessors', {})
550 opts.get(b'flagprocessors', {})
554 ):
551 ):
555 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
552 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
556
553
557 if self._chunkcachesize <= 0:
554 if self._chunkcachesize <= 0:
558 raise error.RevlogError(
555 raise error.RevlogError(
559 _(b'revlog chunk cache size %r is not greater than 0')
556 _(b'revlog chunk cache size %r is not greater than 0')
560 % self._chunkcachesize
557 % self._chunkcachesize
561 )
558 )
562 elif self._chunkcachesize & (self._chunkcachesize - 1):
559 elif self._chunkcachesize & (self._chunkcachesize - 1):
563 raise error.RevlogError(
560 raise error.RevlogError(
564 _(b'revlog chunk cache size %r is not a power of 2')
561 _(b'revlog chunk cache size %r is not a power of 2')
565 % self._chunkcachesize
562 % self._chunkcachesize
566 )
563 )
567
564
568 indexdata = b''
565 indexdata = b''
569 self._initempty = True
566 self._initempty = True
570 try:
567 try:
571 with self._indexfp() as f:
568 with self._indexfp() as f:
572 if (
569 if (
573 mmapindexthreshold is not None
570 mmapindexthreshold is not None
574 and self.opener.fstat(f).st_size >= mmapindexthreshold
571 and self.opener.fstat(f).st_size >= mmapindexthreshold
575 ):
572 ):
576 # TODO: should .close() to release resources without
573 # TODO: should .close() to release resources without
577 # relying on Python GC
574 # relying on Python GC
578 indexdata = util.buffer(util.mmapread(f))
575 indexdata = util.buffer(util.mmapread(f))
579 else:
576 else:
580 indexdata = f.read()
577 indexdata = f.read()
581 if len(indexdata) > 0:
578 if len(indexdata) > 0:
582 versionflags = versionformat_unpack(indexdata[:4])[0]
579 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
583 self._initempty = False
580 self._initempty = False
584 else:
581 else:
585 versionflags = newversionflags
582 versionflags = newversionflags
586 except IOError as inst:
583 except IOError as inst:
587 if inst.errno != errno.ENOENT:
584 if inst.errno != errno.ENOENT:
588 raise
585 raise
589
586
590 versionflags = newversionflags
587 versionflags = newversionflags
591
588
592 self.version = versionflags
589 self.version = versionflags
593
590
594 flags = versionflags & ~0xFFFF
591 flags = versionflags & ~0xFFFF
595 fmt = versionflags & 0xFFFF
592 fmt = versionflags & 0xFFFF
596
593
597 if fmt == REVLOGV0:
594 if fmt == REVLOGV0:
598 if flags:
595 if flags:
599 raise error.RevlogError(
596 raise error.RevlogError(
600 _(b'unknown flags (%#04x) in version %d revlog %s')
597 _(b'unknown flags (%#04x) in version %d revlog %s')
601 % (flags >> 16, fmt, self.indexfile)
598 % (flags >> 16, fmt, self.indexfile)
602 )
599 )
603
600
604 self._inline = False
601 self._inline = False
605 self._generaldelta = False
602 self._generaldelta = False
606
603
607 elif fmt == REVLOGV1:
604 elif fmt == REVLOGV1:
608 if flags & ~REVLOGV1_FLAGS:
605 if flags & ~REVLOGV1_FLAGS:
609 raise error.RevlogError(
606 raise error.RevlogError(
610 _(b'unknown flags (%#04x) in version %d revlog %s')
607 _(b'unknown flags (%#04x) in version %d revlog %s')
611 % (flags >> 16, fmt, self.indexfile)
608 % (flags >> 16, fmt, self.indexfile)
612 )
609 )
613
610
614 self._inline = versionflags & FLAG_INLINE_DATA
611 self._inline = versionflags & FLAG_INLINE_DATA
615 self._generaldelta = versionflags & FLAG_GENERALDELTA
612 self._generaldelta = versionflags & FLAG_GENERALDELTA
616
613
617 elif fmt == REVLOGV2:
614 elif fmt == REVLOGV2:
618 if flags & ~REVLOGV2_FLAGS:
615 if flags & ~REVLOGV2_FLAGS:
619 raise error.RevlogError(
616 raise error.RevlogError(
620 _(b'unknown flags (%#04x) in version %d revlog %s')
617 _(b'unknown flags (%#04x) in version %d revlog %s')
621 % (flags >> 16, fmt, self.indexfile)
618 % (flags >> 16, fmt, self.indexfile)
622 )
619 )
623
620
624 # There is a bug in the transaction handling when going from an
621 # There is a bug in the transaction handling when going from an
625 # inline revlog to a separate index and data file. Turn it off until
622 # inline revlog to a separate index and data file. Turn it off until
626 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
623 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
627 # See issue6485
624 # See issue6485
628 self._inline = False
625 self._inline = False
629 # generaldelta implied by version 2 revlogs.
626 # generaldelta implied by version 2 revlogs.
630 self._generaldelta = True
627 self._generaldelta = True
631
628
632 else:
629 else:
633 raise error.RevlogError(
630 raise error.RevlogError(
634 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
631 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
635 )
632 )
636
633
637 self.nodeconstants = sha1nodeconstants
634 self.nodeconstants = sha1nodeconstants
638 self.nullid = self.nodeconstants.nullid
635 self.nullid = self.nodeconstants.nullid
639
636
640 # sparse-revlog can't be on without general-delta (issue6056)
637 # sparse-revlog can't be on without general-delta (issue6056)
641 if not self._generaldelta:
638 if not self._generaldelta:
642 self._sparserevlog = False
639 self._sparserevlog = False
643
640
644 self._storedeltachains = True
641 self._storedeltachains = True
645
642
646 devel_nodemap = (
643 devel_nodemap = (
647 self.nodemap_file
644 self.nodemap_file
648 and opts.get(b'devel-force-nodemap', False)
645 and opts.get(b'devel-force-nodemap', False)
649 and NodemapRevlogIO is not None
646 and NodemapRevlogIO is not None
650 )
647 )
651
648
652 use_rust_index = False
649 use_rust_index = False
653 if rustrevlog is not None:
650 if rustrevlog is not None:
654 if self.nodemap_file is not None:
651 if self.nodemap_file is not None:
655 use_rust_index = True
652 use_rust_index = True
656 else:
653 else:
657 use_rust_index = self.opener.options.get(b'rust.index')
654 use_rust_index = self.opener.options.get(b'rust.index')
658
655
659 self._io = revlogio()
656 self._io = revlogio()
660 if self.version == REVLOGV0:
657 if self.version == REVLOGV0:
661 self._io = revlogoldio()
658 self._io = revlogoldio()
662 elif fmt == REVLOGV2:
659 elif fmt == REVLOGV2:
663 self._io = revlogv2io()
660 self._io = revlogv2io()
664 elif devel_nodemap:
661 elif devel_nodemap:
665 self._io = NodemapRevlogIO()
662 self._io = NodemapRevlogIO()
666 elif use_rust_index:
663 elif use_rust_index:
667 self._io = rustrevlogio()
664 self._io = rustrevlogio()
668 try:
665 try:
669 d = self._io.parseindex(indexdata, self._inline)
666 d = self._io.parseindex(indexdata, self._inline)
670 index, _chunkcache = d
667 index, _chunkcache = d
671 use_nodemap = (
668 use_nodemap = (
672 not self._inline
669 not self._inline
673 and self.nodemap_file is not None
670 and self.nodemap_file is not None
674 and util.safehasattr(index, 'update_nodemap_data')
671 and util.safehasattr(index, 'update_nodemap_data')
675 )
672 )
676 if use_nodemap:
673 if use_nodemap:
677 nodemap_data = nodemaputil.persisted_data(self)
674 nodemap_data = nodemaputil.persisted_data(self)
678 if nodemap_data is not None:
675 if nodemap_data is not None:
679 docket = nodemap_data[0]
676 docket = nodemap_data[0]
680 if (
677 if (
681 len(d[0]) > docket.tip_rev
678 len(d[0]) > docket.tip_rev
682 and d[0][docket.tip_rev][7] == docket.tip_node
679 and d[0][docket.tip_rev][7] == docket.tip_node
683 ):
680 ):
684 # no changelog tampering
681 # no changelog tampering
685 self._nodemap_docket = docket
682 self._nodemap_docket = docket
686 index.update_nodemap_data(*nodemap_data)
683 index.update_nodemap_data(*nodemap_data)
687 except (ValueError, IndexError):
684 except (ValueError, IndexError):
688 raise error.RevlogError(
685 raise error.RevlogError(
689 _(b"index %s is corrupted") % self.indexfile
686 _(b"index %s is corrupted") % self.indexfile
690 )
687 )
691 self.index, self._chunkcache = d
688 self.index, self._chunkcache = d
692 if not self._chunkcache:
689 if not self._chunkcache:
693 self._chunkclear()
690 self._chunkclear()
694 # revnum -> (chain-length, sum-delta-length)
691 # revnum -> (chain-length, sum-delta-length)
695 self._chaininfocache = util.lrucachedict(500)
692 self._chaininfocache = util.lrucachedict(500)
696 # revlog header -> revlog compressor
693 # revlog header -> revlog compressor
697 self._decompressors = {}
694 self._decompressors = {}
698
695
699 @util.propertycache
696 @util.propertycache
700 def _compressor(self):
697 def _compressor(self):
701 engine = util.compengines[self._compengine]
698 engine = util.compengines[self._compengine]
702 return engine.revlogcompressor(self._compengineopts)
699 return engine.revlogcompressor(self._compengineopts)
703
700
704 def _indexfp(self, mode=b'r'):
701 def _indexfp(self, mode=b'r'):
705 """file object for the revlog's index file"""
702 """file object for the revlog's index file"""
706 args = {'mode': mode}
703 args = {'mode': mode}
707 if mode != b'r':
704 if mode != b'r':
708 args['checkambig'] = self._checkambig
705 args['checkambig'] = self._checkambig
709 if mode == b'w':
706 if mode == b'w':
710 args['atomictemp'] = True
707 args['atomictemp'] = True
711 return self.opener(self.indexfile, **args)
708 return self.opener(self.indexfile, **args)
712
709
713 def _datafp(self, mode=b'r'):
710 def _datafp(self, mode=b'r'):
714 """file object for the revlog's data file"""
711 """file object for the revlog's data file"""
715 return self.opener(self.datafile, mode=mode)
712 return self.opener(self.datafile, mode=mode)
716
713
717 @contextlib.contextmanager
714 @contextlib.contextmanager
718 def _datareadfp(self, existingfp=None):
715 def _datareadfp(self, existingfp=None):
719 """file object suitable to read data"""
716 """file object suitable to read data"""
720 # Use explicit file handle, if given.
717 # Use explicit file handle, if given.
721 if existingfp is not None:
718 if existingfp is not None:
722 yield existingfp
719 yield existingfp
723
720
724 # Use a file handle being actively used for writes, if available.
721 # Use a file handle being actively used for writes, if available.
725 # There is some danger to doing this because reads will seek the
722 # There is some danger to doing this because reads will seek the
726 # file. However, _writeentry() performs a SEEK_END before all writes,
723 # file. However, _writeentry() performs a SEEK_END before all writes,
727 # so we should be safe.
724 # so we should be safe.
728 elif self._writinghandles:
725 elif self._writinghandles:
729 if self._inline:
726 if self._inline:
730 yield self._writinghandles[0]
727 yield self._writinghandles[0]
731 else:
728 else:
732 yield self._writinghandles[1]
729 yield self._writinghandles[1]
733
730
734 # Otherwise open a new file handle.
731 # Otherwise open a new file handle.
735 else:
732 else:
736 if self._inline:
733 if self._inline:
737 func = self._indexfp
734 func = self._indexfp
738 else:
735 else:
739 func = self._datafp
736 func = self._datafp
740 with func() as fp:
737 with func() as fp:
741 yield fp
738 yield fp
742
739
743 def tiprev(self):
740 def tiprev(self):
744 return len(self.index) - 1
741 return len(self.index) - 1
745
742
746 def tip(self):
743 def tip(self):
747 return self.node(self.tiprev())
744 return self.node(self.tiprev())
748
745
749 def __contains__(self, rev):
746 def __contains__(self, rev):
750 return 0 <= rev < len(self)
747 return 0 <= rev < len(self)
751
748
752 def __len__(self):
749 def __len__(self):
753 return len(self.index)
750 return len(self.index)
754
751
755 def __iter__(self):
752 def __iter__(self):
756 return iter(pycompat.xrange(len(self)))
753 return iter(pycompat.xrange(len(self)))
757
754
758 def revs(self, start=0, stop=None):
755 def revs(self, start=0, stop=None):
759 """iterate over all rev in this revlog (from start to stop)"""
756 """iterate over all rev in this revlog (from start to stop)"""
760 return storageutil.iterrevs(len(self), start=start, stop=stop)
757 return storageutil.iterrevs(len(self), start=start, stop=stop)
761
758
762 @property
759 @property
763 def nodemap(self):
760 def nodemap(self):
764 msg = (
761 msg = (
765 b"revlog.nodemap is deprecated, "
762 b"revlog.nodemap is deprecated, "
766 b"use revlog.index.[has_node|rev|get_rev]"
763 b"use revlog.index.[has_node|rev|get_rev]"
767 )
764 )
768 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
765 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
769 return self.index.nodemap
766 return self.index.nodemap
770
767
771 @property
768 @property
772 def _nodecache(self):
769 def _nodecache(self):
773 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
770 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
774 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
775 return self.index.nodemap
772 return self.index.nodemap
776
773
777 def hasnode(self, node):
774 def hasnode(self, node):
778 try:
775 try:
779 self.rev(node)
776 self.rev(node)
780 return True
777 return True
781 except KeyError:
778 except KeyError:
782 return False
779 return False
783
780
784 def candelta(self, baserev, rev):
781 def candelta(self, baserev, rev):
785 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 """whether two revisions (baserev, rev) can be delta-ed or not"""
786 # Disable delta if either rev requires a content-changing flag
783 # Disable delta if either rev requires a content-changing flag
787 # processor (ex. LFS). This is because such flag processor can alter
784 # processor (ex. LFS). This is because such flag processor can alter
788 # the rawtext content that the delta will be based on, and two clients
785 # the rawtext content that the delta will be based on, and two clients
789 # could have a same revlog node with different flags (i.e. different
786 # could have a same revlog node with different flags (i.e. different
790 # rawtext contents) and the delta could be incompatible.
787 # rawtext contents) and the delta could be incompatible.
791 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
788 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
792 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
789 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
793 ):
790 ):
794 return False
791 return False
795 return True
792 return True
796
793
797 def update_caches(self, transaction):
794 def update_caches(self, transaction):
798 if self.nodemap_file is not None:
795 if self.nodemap_file is not None:
799 if transaction is None:
796 if transaction is None:
800 nodemaputil.update_persistent_nodemap(self)
797 nodemaputil.update_persistent_nodemap(self)
801 else:
798 else:
802 nodemaputil.setup_persistent_nodemap(transaction, self)
799 nodemaputil.setup_persistent_nodemap(transaction, self)
803
800
804 def clearcaches(self):
801 def clearcaches(self):
805 self._revisioncache = None
802 self._revisioncache = None
806 self._chainbasecache.clear()
803 self._chainbasecache.clear()
807 self._chunkcache = (0, b'')
804 self._chunkcache = (0, b'')
808 self._pcache = {}
805 self._pcache = {}
809 self._nodemap_docket = None
806 self._nodemap_docket = None
810 self.index.clearcaches()
807 self.index.clearcaches()
811 # The python code is the one responsible for validating the docket, we
808 # The python code is the one responsible for validating the docket, we
812 # end up having to refresh it here.
809 # end up having to refresh it here.
813 use_nodemap = (
810 use_nodemap = (
814 not self._inline
811 not self._inline
815 and self.nodemap_file is not None
812 and self.nodemap_file is not None
816 and util.safehasattr(self.index, 'update_nodemap_data')
813 and util.safehasattr(self.index, 'update_nodemap_data')
817 )
814 )
818 if use_nodemap:
815 if use_nodemap:
819 nodemap_data = nodemaputil.persisted_data(self)
816 nodemap_data = nodemaputil.persisted_data(self)
820 if nodemap_data is not None:
817 if nodemap_data is not None:
821 self._nodemap_docket = nodemap_data[0]
818 self._nodemap_docket = nodemap_data[0]
822 self.index.update_nodemap_data(*nodemap_data)
819 self.index.update_nodemap_data(*nodemap_data)
823
820
824 def rev(self, node):
821 def rev(self, node):
825 try:
822 try:
826 return self.index.rev(node)
823 return self.index.rev(node)
827 except TypeError:
824 except TypeError:
828 raise
825 raise
829 except error.RevlogError:
826 except error.RevlogError:
830 # parsers.c radix tree lookup failed
827 # parsers.c radix tree lookup failed
831 if node == wdirid or node in wdirfilenodeids:
828 if node == wdirid or node in wdirfilenodeids:
832 raise error.WdirUnsupported
829 raise error.WdirUnsupported
833 raise error.LookupError(node, self.indexfile, _(b'no node'))
830 raise error.LookupError(node, self.indexfile, _(b'no node'))
834
831
835 # Accessors for index entries.
832 # Accessors for index entries.
836
833
837 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
834 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
838 # are flags.
835 # are flags.
839 def start(self, rev):
836 def start(self, rev):
840 return int(self.index[rev][0] >> 16)
837 return int(self.index[rev][0] >> 16)
841
838
842 def flags(self, rev):
839 def flags(self, rev):
843 return self.index[rev][0] & 0xFFFF
840 return self.index[rev][0] & 0xFFFF
844
841
845 def length(self, rev):
842 def length(self, rev):
846 return self.index[rev][1]
843 return self.index[rev][1]
847
844
848 def sidedata_length(self, rev):
845 def sidedata_length(self, rev):
849 if self.version & 0xFFFF != REVLOGV2:
846 if self.version & 0xFFFF != REVLOGV2:
850 return 0
847 return 0
851 return self.index[rev][9]
848 return self.index[rev][9]
852
849
853 def rawsize(self, rev):
850 def rawsize(self, rev):
854 """return the length of the uncompressed text for a given revision"""
851 """return the length of the uncompressed text for a given revision"""
855 l = self.index[rev][2]
852 l = self.index[rev][2]
856 if l >= 0:
853 if l >= 0:
857 return l
854 return l
858
855
859 t = self.rawdata(rev)
856 t = self.rawdata(rev)
860 return len(t)
857 return len(t)
861
858
862 def size(self, rev):
859 def size(self, rev):
863 """length of non-raw text (processed by a "read" flag processor)"""
860 """length of non-raw text (processed by a "read" flag processor)"""
864 # fast path: if no "read" flag processor could change the content,
861 # fast path: if no "read" flag processor could change the content,
865 # size is rawsize. note: ELLIPSIS is known to not change the content.
862 # size is rawsize. note: ELLIPSIS is known to not change the content.
866 flags = self.flags(rev)
863 flags = self.flags(rev)
867 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
864 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
868 return self.rawsize(rev)
865 return self.rawsize(rev)
869
866
870 return len(self.revision(rev, raw=False))
867 return len(self.revision(rev, raw=False))
871
868
872 def chainbase(self, rev):
869 def chainbase(self, rev):
873 base = self._chainbasecache.get(rev)
870 base = self._chainbasecache.get(rev)
874 if base is not None:
871 if base is not None:
875 return base
872 return base
876
873
877 index = self.index
874 index = self.index
878 iterrev = rev
875 iterrev = rev
879 base = index[iterrev][3]
876 base = index[iterrev][3]
880 while base != iterrev:
877 while base != iterrev:
881 iterrev = base
878 iterrev = base
882 base = index[iterrev][3]
879 base = index[iterrev][3]
883
880
884 self._chainbasecache[rev] = base
881 self._chainbasecache[rev] = base
885 return base
882 return base
886
883
887 def linkrev(self, rev):
884 def linkrev(self, rev):
888 return self.index[rev][4]
885 return self.index[rev][4]
889
886
890 def parentrevs(self, rev):
887 def parentrevs(self, rev):
891 try:
888 try:
892 entry = self.index[rev]
889 entry = self.index[rev]
893 except IndexError:
890 except IndexError:
894 if rev == wdirrev:
891 if rev == wdirrev:
895 raise error.WdirUnsupported
892 raise error.WdirUnsupported
896 raise
893 raise
897 if entry[5] == nullrev:
894 if entry[5] == nullrev:
898 return entry[6], entry[5]
895 return entry[6], entry[5]
899 else:
896 else:
900 return entry[5], entry[6]
897 return entry[5], entry[6]
901
898
902 # fast parentrevs(rev) where rev isn't filtered
899 # fast parentrevs(rev) where rev isn't filtered
903 _uncheckedparentrevs = parentrevs
900 _uncheckedparentrevs = parentrevs
904
901
905 def node(self, rev):
902 def node(self, rev):
906 try:
903 try:
907 return self.index[rev][7]
904 return self.index[rev][7]
908 except IndexError:
905 except IndexError:
909 if rev == wdirrev:
906 if rev == wdirrev:
910 raise error.WdirUnsupported
907 raise error.WdirUnsupported
911 raise
908 raise
912
909
913 # Derived from index values.
910 # Derived from index values.
914
911
915 def end(self, rev):
912 def end(self, rev):
916 return self.start(rev) + self.length(rev)
913 return self.start(rev) + self.length(rev)
917
914
918 def parents(self, node):
915 def parents(self, node):
919 i = self.index
916 i = self.index
920 d = i[self.rev(node)]
917 d = i[self.rev(node)]
921 # inline node() to avoid function call overhead
918 # inline node() to avoid function call overhead
922 if d[5] == nullid:
919 if d[5] == nullid:
923 return i[d[6]][7], i[d[5]][7]
920 return i[d[6]][7], i[d[5]][7]
924 else:
921 else:
925 return i[d[5]][7], i[d[6]][7]
922 return i[d[5]][7], i[d[6]][7]
926
923
927 def chainlen(self, rev):
924 def chainlen(self, rev):
928 return self._chaininfo(rev)[0]
925 return self._chaininfo(rev)[0]
929
926
930 def _chaininfo(self, rev):
927 def _chaininfo(self, rev):
931 chaininfocache = self._chaininfocache
928 chaininfocache = self._chaininfocache
932 if rev in chaininfocache:
929 if rev in chaininfocache:
933 return chaininfocache[rev]
930 return chaininfocache[rev]
934 index = self.index
931 index = self.index
935 generaldelta = self._generaldelta
932 generaldelta = self._generaldelta
936 iterrev = rev
933 iterrev = rev
937 e = index[iterrev]
934 e = index[iterrev]
938 clen = 0
935 clen = 0
939 compresseddeltalen = 0
936 compresseddeltalen = 0
940 while iterrev != e[3]:
937 while iterrev != e[3]:
941 clen += 1
938 clen += 1
942 compresseddeltalen += e[1]
939 compresseddeltalen += e[1]
943 if generaldelta:
940 if generaldelta:
944 iterrev = e[3]
941 iterrev = e[3]
945 else:
942 else:
946 iterrev -= 1
943 iterrev -= 1
947 if iterrev in chaininfocache:
944 if iterrev in chaininfocache:
948 t = chaininfocache[iterrev]
945 t = chaininfocache[iterrev]
949 clen += t[0]
946 clen += t[0]
950 compresseddeltalen += t[1]
947 compresseddeltalen += t[1]
951 break
948 break
952 e = index[iterrev]
949 e = index[iterrev]
953 else:
950 else:
954 # Add text length of base since decompressing that also takes
951 # Add text length of base since decompressing that also takes
955 # work. For cache hits the length is already included.
952 # work. For cache hits the length is already included.
956 compresseddeltalen += e[1]
953 compresseddeltalen += e[1]
957 r = (clen, compresseddeltalen)
954 r = (clen, compresseddeltalen)
958 chaininfocache[rev] = r
955 chaininfocache[rev] = r
959 return r
956 return r
960
957
961 def _deltachain(self, rev, stoprev=None):
958 def _deltachain(self, rev, stoprev=None):
962 """Obtain the delta chain for a revision.
959 """Obtain the delta chain for a revision.
963
960
964 ``stoprev`` specifies a revision to stop at. If not specified, we
961 ``stoprev`` specifies a revision to stop at. If not specified, we
965 stop at the base of the chain.
962 stop at the base of the chain.
966
963
967 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
968 revs in ascending order and ``stopped`` is a bool indicating whether
965 revs in ascending order and ``stopped`` is a bool indicating whether
969 ``stoprev`` was hit.
966 ``stoprev`` was hit.
970 """
967 """
971 # Try C implementation.
968 # Try C implementation.
972 try:
969 try:
973 return self.index.deltachain(rev, stoprev, self._generaldelta)
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
974 except AttributeError:
971 except AttributeError:
975 pass
972 pass
976
973
977 chain = []
974 chain = []
978
975
979 # Alias to prevent attribute lookup in tight loop.
976 # Alias to prevent attribute lookup in tight loop.
980 index = self.index
977 index = self.index
981 generaldelta = self._generaldelta
978 generaldelta = self._generaldelta
982
979
983 iterrev = rev
980 iterrev = rev
984 e = index[iterrev]
981 e = index[iterrev]
985 while iterrev != e[3] and iterrev != stoprev:
982 while iterrev != e[3] and iterrev != stoprev:
986 chain.append(iterrev)
983 chain.append(iterrev)
987 if generaldelta:
984 if generaldelta:
988 iterrev = e[3]
985 iterrev = e[3]
989 else:
986 else:
990 iterrev -= 1
987 iterrev -= 1
991 e = index[iterrev]
988 e = index[iterrev]
992
989
993 if iterrev == stoprev:
990 if iterrev == stoprev:
994 stopped = True
991 stopped = True
995 else:
992 else:
996 chain.append(iterrev)
993 chain.append(iterrev)
997 stopped = False
994 stopped = False
998
995
999 chain.reverse()
996 chain.reverse()
1000 return chain, stopped
997 return chain, stopped
1001
998
1002 def ancestors(self, revs, stoprev=0, inclusive=False):
999 def ancestors(self, revs, stoprev=0, inclusive=False):
1003 """Generate the ancestors of 'revs' in reverse revision order.
1000 """Generate the ancestors of 'revs' in reverse revision order.
1004 Does not generate revs lower than stoprev.
1001 Does not generate revs lower than stoprev.
1005
1002
1006 See the documentation for ancestor.lazyancestors for more details."""
1003 See the documentation for ancestor.lazyancestors for more details."""
1007
1004
1008 # first, make sure start revisions aren't filtered
1005 # first, make sure start revisions aren't filtered
1009 revs = list(revs)
1006 revs = list(revs)
1010 checkrev = self.node
1007 checkrev = self.node
1011 for r in revs:
1008 for r in revs:
1012 checkrev(r)
1009 checkrev(r)
1013 # and we're sure ancestors aren't filtered as well
1010 # and we're sure ancestors aren't filtered as well
1014
1011
1015 if rustancestor is not None:
1012 if rustancestor is not None:
1016 lazyancestors = rustancestor.LazyAncestors
1013 lazyancestors = rustancestor.LazyAncestors
1017 arg = self.index
1014 arg = self.index
1018 else:
1015 else:
1019 lazyancestors = ancestor.lazyancestors
1016 lazyancestors = ancestor.lazyancestors
1020 arg = self._uncheckedparentrevs
1017 arg = self._uncheckedparentrevs
1021 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1022
1019
1023 def descendants(self, revs):
1020 def descendants(self, revs):
1024 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1025
1022
1026 def findcommonmissing(self, common=None, heads=None):
1023 def findcommonmissing(self, common=None, heads=None):
1027 """Return a tuple of the ancestors of common and the ancestors of heads
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1028 that are not ancestors of common. In revset terminology, we return the
1025 that are not ancestors of common. In revset terminology, we return the
1029 tuple:
1026 tuple:
1030
1027
1031 ::common, (::heads) - (::common)
1028 ::common, (::heads) - (::common)
1032
1029
1033 The list is sorted by revision number, meaning it is
1030 The list is sorted by revision number, meaning it is
1034 topologically sorted.
1031 topologically sorted.
1035
1032
1036 'heads' and 'common' are both lists of node IDs. If heads is
1033 'heads' and 'common' are both lists of node IDs. If heads is
1037 not supplied, uses all of the revlog's heads. If common is not
1034 not supplied, uses all of the revlog's heads. If common is not
1038 supplied, uses nullid."""
1035 supplied, uses nullid."""
1039 if common is None:
1036 if common is None:
1040 common = [nullid]
1037 common = [nullid]
1041 if heads is None:
1038 if heads is None:
1042 heads = self.heads()
1039 heads = self.heads()
1043
1040
1044 common = [self.rev(n) for n in common]
1041 common = [self.rev(n) for n in common]
1045 heads = [self.rev(n) for n in heads]
1042 heads = [self.rev(n) for n in heads]
1046
1043
1047 # we want the ancestors, but inclusive
1044 # we want the ancestors, but inclusive
1048 class lazyset(object):
1045 class lazyset(object):
1049 def __init__(self, lazyvalues):
1046 def __init__(self, lazyvalues):
1050 self.addedvalues = set()
1047 self.addedvalues = set()
1051 self.lazyvalues = lazyvalues
1048 self.lazyvalues = lazyvalues
1052
1049
1053 def __contains__(self, value):
1050 def __contains__(self, value):
1054 return value in self.addedvalues or value in self.lazyvalues
1051 return value in self.addedvalues or value in self.lazyvalues
1055
1052
1056 def __iter__(self):
1053 def __iter__(self):
1057 added = self.addedvalues
1054 added = self.addedvalues
1058 for r in added:
1055 for r in added:
1059 yield r
1056 yield r
1060 for r in self.lazyvalues:
1057 for r in self.lazyvalues:
1061 if not r in added:
1058 if not r in added:
1062 yield r
1059 yield r
1063
1060
1064 def add(self, value):
1061 def add(self, value):
1065 self.addedvalues.add(value)
1062 self.addedvalues.add(value)
1066
1063
1067 def update(self, values):
1064 def update(self, values):
1068 self.addedvalues.update(values)
1065 self.addedvalues.update(values)
1069
1066
1070 has = lazyset(self.ancestors(common))
1067 has = lazyset(self.ancestors(common))
1071 has.add(nullrev)
1068 has.add(nullrev)
1072 has.update(common)
1069 has.update(common)
1073
1070
1074 # take all ancestors from heads that aren't in has
1071 # take all ancestors from heads that aren't in has
1075 missing = set()
1072 missing = set()
1076 visit = collections.deque(r for r in heads if r not in has)
1073 visit = collections.deque(r for r in heads if r not in has)
1077 while visit:
1074 while visit:
1078 r = visit.popleft()
1075 r = visit.popleft()
1079 if r in missing:
1076 if r in missing:
1080 continue
1077 continue
1081 else:
1078 else:
1082 missing.add(r)
1079 missing.add(r)
1083 for p in self.parentrevs(r):
1080 for p in self.parentrevs(r):
1084 if p not in has:
1081 if p not in has:
1085 visit.append(p)
1082 visit.append(p)
1086 missing = list(missing)
1083 missing = list(missing)
1087 missing.sort()
1084 missing.sort()
1088 return has, [self.node(miss) for miss in missing]
1085 return has, [self.node(miss) for miss in missing]
1089
1086
1090 def incrementalmissingrevs(self, common=None):
1087 def incrementalmissingrevs(self, common=None):
1091 """Return an object that can be used to incrementally compute the
1088 """Return an object that can be used to incrementally compute the
1092 revision numbers of the ancestors of arbitrary sets that are not
1089 revision numbers of the ancestors of arbitrary sets that are not
1093 ancestors of common. This is an ancestor.incrementalmissingancestors
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1094 object.
1091 object.
1095
1092
1096 'common' is a list of revision numbers. If common is not supplied, uses
1093 'common' is a list of revision numbers. If common is not supplied, uses
1097 nullrev.
1094 nullrev.
1098 """
1095 """
1099 if common is None:
1096 if common is None:
1100 common = [nullrev]
1097 common = [nullrev]
1101
1098
1102 if rustancestor is not None:
1099 if rustancestor is not None:
1103 return rustancestor.MissingAncestors(self.index, common)
1100 return rustancestor.MissingAncestors(self.index, common)
1104 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1105
1102
1106 def findmissingrevs(self, common=None, heads=None):
1103 def findmissingrevs(self, common=None, heads=None):
1107 """Return the revision numbers of the ancestors of heads that
1104 """Return the revision numbers of the ancestors of heads that
1108 are not ancestors of common.
1105 are not ancestors of common.
1109
1106
1110 More specifically, return a list of revision numbers corresponding to
1107 More specifically, return a list of revision numbers corresponding to
1111 nodes N such that every N satisfies the following constraints:
1108 nodes N such that every N satisfies the following constraints:
1112
1109
1113 1. N is an ancestor of some node in 'heads'
1110 1. N is an ancestor of some node in 'heads'
1114 2. N is not an ancestor of any node in 'common'
1111 2. N is not an ancestor of any node in 'common'
1115
1112
1116 The list is sorted by revision number, meaning it is
1113 The list is sorted by revision number, meaning it is
1117 topologically sorted.
1114 topologically sorted.
1118
1115
1119 'heads' and 'common' are both lists of revision numbers. If heads is
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1120 not supplied, uses all of the revlog's heads. If common is not
1117 not supplied, uses all of the revlog's heads. If common is not
1121 supplied, uses nullid."""
1118 supplied, uses nullid."""
1122 if common is None:
1119 if common is None:
1123 common = [nullrev]
1120 common = [nullrev]
1124 if heads is None:
1121 if heads is None:
1125 heads = self.headrevs()
1122 heads = self.headrevs()
1126
1123
1127 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1128 return inc.missingancestors(heads)
1125 return inc.missingancestors(heads)
1129
1126
1130 def findmissing(self, common=None, heads=None):
1127 def findmissing(self, common=None, heads=None):
1131 """Return the ancestors of heads that are not ancestors of common.
1128 """Return the ancestors of heads that are not ancestors of common.
1132
1129
1133 More specifically, return a list of nodes N such that every N
1130 More specifically, return a list of nodes N such that every N
1134 satisfies the following constraints:
1131 satisfies the following constraints:
1135
1132
1136 1. N is an ancestor of some node in 'heads'
1133 1. N is an ancestor of some node in 'heads'
1137 2. N is not an ancestor of any node in 'common'
1134 2. N is not an ancestor of any node in 'common'
1138
1135
1139 The list is sorted by revision number, meaning it is
1136 The list is sorted by revision number, meaning it is
1140 topologically sorted.
1137 topologically sorted.
1141
1138
1142 'heads' and 'common' are both lists of node IDs. If heads is
1139 'heads' and 'common' are both lists of node IDs. If heads is
1143 not supplied, uses all of the revlog's heads. If common is not
1140 not supplied, uses all of the revlog's heads. If common is not
1144 supplied, uses nullid."""
1141 supplied, uses nullid."""
1145 if common is None:
1142 if common is None:
1146 common = [nullid]
1143 common = [nullid]
1147 if heads is None:
1144 if heads is None:
1148 heads = self.heads()
1145 heads = self.heads()
1149
1146
1150 common = [self.rev(n) for n in common]
1147 common = [self.rev(n) for n in common]
1151 heads = [self.rev(n) for n in heads]
1148 heads = [self.rev(n) for n in heads]
1152
1149
1153 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1154 return [self.node(r) for r in inc.missingancestors(heads)]
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1155
1152
1156 def nodesbetween(self, roots=None, heads=None):
1153 def nodesbetween(self, roots=None, heads=None):
1157 """Return a topological path from 'roots' to 'heads'.
1154 """Return a topological path from 'roots' to 'heads'.
1158
1155
1159 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1160 topologically sorted list of all nodes N that satisfy both of
1157 topologically sorted list of all nodes N that satisfy both of
1161 these constraints:
1158 these constraints:
1162
1159
1163 1. N is a descendant of some node in 'roots'
1160 1. N is a descendant of some node in 'roots'
1164 2. N is an ancestor of some node in 'heads'
1161 2. N is an ancestor of some node in 'heads'
1165
1162
1166 Every node is considered to be both a descendant and an ancestor
1163 Every node is considered to be both a descendant and an ancestor
1167 of itself, so every reachable node in 'roots' and 'heads' will be
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1168 included in 'nodes'.
1165 included in 'nodes'.
1169
1166
1170 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1171 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1172 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1173
1170
1174 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1175 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1176 unspecified, uses list of all of the revlog's heads."""
1173 unspecified, uses list of all of the revlog's heads."""
1177 nonodes = ([], [], [])
1174 nonodes = ([], [], [])
1178 if roots is not None:
1175 if roots is not None:
1179 roots = list(roots)
1176 roots = list(roots)
1180 if not roots:
1177 if not roots:
1181 return nonodes
1178 return nonodes
1182 lowestrev = min([self.rev(n) for n in roots])
1179 lowestrev = min([self.rev(n) for n in roots])
1183 else:
1180 else:
1184 roots = [nullid] # Everybody's a descendant of nullid
1181 roots = [nullid] # Everybody's a descendant of nullid
1185 lowestrev = nullrev
1182 lowestrev = nullrev
1186 if (lowestrev == nullrev) and (heads is None):
1183 if (lowestrev == nullrev) and (heads is None):
1187 # We want _all_ the nodes!
1184 # We want _all_ the nodes!
1188 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1189 if heads is None:
1186 if heads is None:
1190 # All nodes are ancestors, so the latest ancestor is the last
1187 # All nodes are ancestors, so the latest ancestor is the last
1191 # node.
1188 # node.
1192 highestrev = len(self) - 1
1189 highestrev = len(self) - 1
1193 # Set ancestors to None to signal that every node is an ancestor.
1190 # Set ancestors to None to signal that every node is an ancestor.
1194 ancestors = None
1191 ancestors = None
1195 # Set heads to an empty dictionary for later discovery of heads
1192 # Set heads to an empty dictionary for later discovery of heads
1196 heads = {}
1193 heads = {}
1197 else:
1194 else:
1198 heads = list(heads)
1195 heads = list(heads)
1199 if not heads:
1196 if not heads:
1200 return nonodes
1197 return nonodes
1201 ancestors = set()
1198 ancestors = set()
1202 # Turn heads into a dictionary so we can remove 'fake' heads.
1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1203 # Also, later we will be using it to filter out the heads we can't
1200 # Also, later we will be using it to filter out the heads we can't
1204 # find from roots.
1201 # find from roots.
1205 heads = dict.fromkeys(heads, False)
1202 heads = dict.fromkeys(heads, False)
1206 # Start at the top and keep marking parents until we're done.
1203 # Start at the top and keep marking parents until we're done.
1207 nodestotag = set(heads)
1204 nodestotag = set(heads)
1208 # Remember where the top was so we can use it as a limit later.
1205 # Remember where the top was so we can use it as a limit later.
1209 highestrev = max([self.rev(n) for n in nodestotag])
1206 highestrev = max([self.rev(n) for n in nodestotag])
1210 while nodestotag:
1207 while nodestotag:
1211 # grab a node to tag
1208 # grab a node to tag
1212 n = nodestotag.pop()
1209 n = nodestotag.pop()
1213 # Never tag nullid
1210 # Never tag nullid
1214 if n == nullid:
1211 if n == nullid:
1215 continue
1212 continue
1216 # A node's revision number represents its place in a
1213 # A node's revision number represents its place in a
1217 # topologically sorted list of nodes.
1214 # topologically sorted list of nodes.
1218 r = self.rev(n)
1215 r = self.rev(n)
1219 if r >= lowestrev:
1216 if r >= lowestrev:
1220 if n not in ancestors:
1217 if n not in ancestors:
1221 # If we are possibly a descendant of one of the roots
1218 # If we are possibly a descendant of one of the roots
1222 # and we haven't already been marked as an ancestor
1219 # and we haven't already been marked as an ancestor
1223 ancestors.add(n) # Mark as ancestor
1220 ancestors.add(n) # Mark as ancestor
1224 # Add non-nullid parents to list of nodes to tag.
1221 # Add non-nullid parents to list of nodes to tag.
1225 nodestotag.update(
1222 nodestotag.update(
1226 [p for p in self.parents(n) if p != nullid]
1223 [p for p in self.parents(n) if p != nullid]
1227 )
1224 )
1228 elif n in heads: # We've seen it before, is it a fake head?
1225 elif n in heads: # We've seen it before, is it a fake head?
1229 # So it is, real heads should not be the ancestors of
1226 # So it is, real heads should not be the ancestors of
1230 # any other heads.
1227 # any other heads.
1231 heads.pop(n)
1228 heads.pop(n)
1232 if not ancestors:
1229 if not ancestors:
1233 return nonodes
1230 return nonodes
1234 # Now that we have our set of ancestors, we want to remove any
1231 # Now that we have our set of ancestors, we want to remove any
1235 # roots that are not ancestors.
1232 # roots that are not ancestors.
1236
1233
1237 # If one of the roots was nullid, everything is included anyway.
1234 # If one of the roots was nullid, everything is included anyway.
1238 if lowestrev > nullrev:
1235 if lowestrev > nullrev:
1239 # But, since we weren't, let's recompute the lowest rev to not
1236 # But, since we weren't, let's recompute the lowest rev to not
1240 # include roots that aren't ancestors.
1237 # include roots that aren't ancestors.
1241
1238
1242 # Filter out roots that aren't ancestors of heads
1239 # Filter out roots that aren't ancestors of heads
1243 roots = [root for root in roots if root in ancestors]
1240 roots = [root for root in roots if root in ancestors]
1244 # Recompute the lowest revision
1241 # Recompute the lowest revision
1245 if roots:
1242 if roots:
1246 lowestrev = min([self.rev(root) for root in roots])
1243 lowestrev = min([self.rev(root) for root in roots])
1247 else:
1244 else:
1248 # No more roots? Return empty list
1245 # No more roots? Return empty list
1249 return nonodes
1246 return nonodes
1250 else:
1247 else:
1251 # We are descending from nullid, and don't need to care about
1248 # We are descending from nullid, and don't need to care about
1252 # any other roots.
1249 # any other roots.
1253 lowestrev = nullrev
1250 lowestrev = nullrev
1254 roots = [nullid]
1251 roots = [nullid]
1255 # Transform our roots list into a set.
1252 # Transform our roots list into a set.
1256 descendants = set(roots)
1253 descendants = set(roots)
1257 # Also, keep the original roots so we can filter out roots that aren't
1254 # Also, keep the original roots so we can filter out roots that aren't
1258 # 'real' roots (i.e. are descended from other roots).
1255 # 'real' roots (i.e. are descended from other roots).
1259 roots = descendants.copy()
1256 roots = descendants.copy()
1260 # Our topologically sorted list of output nodes.
1257 # Our topologically sorted list of output nodes.
1261 orderedout = []
1258 orderedout = []
1262 # Don't start at nullid since we don't want nullid in our output list,
1259 # Don't start at nullid since we don't want nullid in our output list,
1263 # and if nullid shows up in descendants, empty parents will look like
1260 # and if nullid shows up in descendants, empty parents will look like
1264 # they're descendants.
1261 # they're descendants.
1265 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1262 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1266 n = self.node(r)
1263 n = self.node(r)
1267 isdescendant = False
1264 isdescendant = False
1268 if lowestrev == nullrev: # Everybody is a descendant of nullid
1265 if lowestrev == nullrev: # Everybody is a descendant of nullid
1269 isdescendant = True
1266 isdescendant = True
1270 elif n in descendants:
1267 elif n in descendants:
1271 # n is already a descendant
1268 # n is already a descendant
1272 isdescendant = True
1269 isdescendant = True
1273 # This check only needs to be done here because all the roots
1270 # This check only needs to be done here because all the roots
1274 # will start being marked is descendants before the loop.
1271 # will start being marked is descendants before the loop.
1275 if n in roots:
1272 if n in roots:
1276 # If n was a root, check if it's a 'real' root.
1273 # If n was a root, check if it's a 'real' root.
1277 p = tuple(self.parents(n))
1274 p = tuple(self.parents(n))
1278 # If any of its parents are descendants, it's not a root.
1275 # If any of its parents are descendants, it's not a root.
1279 if (p[0] in descendants) or (p[1] in descendants):
1276 if (p[0] in descendants) or (p[1] in descendants):
1280 roots.remove(n)
1277 roots.remove(n)
1281 else:
1278 else:
1282 p = tuple(self.parents(n))
1279 p = tuple(self.parents(n))
1283 # A node is a descendant if either of its parents are
1280 # A node is a descendant if either of its parents are
1284 # descendants. (We seeded the dependents list with the roots
1281 # descendants. (We seeded the dependents list with the roots
1285 # up there, remember?)
1282 # up there, remember?)
1286 if (p[0] in descendants) or (p[1] in descendants):
1283 if (p[0] in descendants) or (p[1] in descendants):
1287 descendants.add(n)
1284 descendants.add(n)
1288 isdescendant = True
1285 isdescendant = True
1289 if isdescendant and ((ancestors is None) or (n in ancestors)):
1286 if isdescendant and ((ancestors is None) or (n in ancestors)):
1290 # Only include nodes that are both descendants and ancestors.
1287 # Only include nodes that are both descendants and ancestors.
1291 orderedout.append(n)
1288 orderedout.append(n)
1292 if (ancestors is not None) and (n in heads):
1289 if (ancestors is not None) and (n in heads):
1293 # We're trying to figure out which heads are reachable
1290 # We're trying to figure out which heads are reachable
1294 # from roots.
1291 # from roots.
1295 # Mark this head as having been reached
1292 # Mark this head as having been reached
1296 heads[n] = True
1293 heads[n] = True
1297 elif ancestors is None:
1294 elif ancestors is None:
1298 # Otherwise, we're trying to discover the heads.
1295 # Otherwise, we're trying to discover the heads.
1299 # Assume this is a head because if it isn't, the next step
1296 # Assume this is a head because if it isn't, the next step
1300 # will eventually remove it.
1297 # will eventually remove it.
1301 heads[n] = True
1298 heads[n] = True
1302 # But, obviously its parents aren't.
1299 # But, obviously its parents aren't.
1303 for p in self.parents(n):
1300 for p in self.parents(n):
1304 heads.pop(p, None)
1301 heads.pop(p, None)
1305 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1302 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1306 roots = list(roots)
1303 roots = list(roots)
1307 assert orderedout
1304 assert orderedout
1308 assert roots
1305 assert roots
1309 assert heads
1306 assert heads
1310 return (orderedout, roots, heads)
1307 return (orderedout, roots, heads)
1311
1308
1312 def headrevs(self, revs=None):
1309 def headrevs(self, revs=None):
1313 if revs is None:
1310 if revs is None:
1314 try:
1311 try:
1315 return self.index.headrevs()
1312 return self.index.headrevs()
1316 except AttributeError:
1313 except AttributeError:
1317 return self._headrevs()
1314 return self._headrevs()
1318 if rustdagop is not None:
1315 if rustdagop is not None:
1319 return rustdagop.headrevs(self.index, revs)
1316 return rustdagop.headrevs(self.index, revs)
1320 return dagop.headrevs(revs, self._uncheckedparentrevs)
1317 return dagop.headrevs(revs, self._uncheckedparentrevs)
1321
1318
1322 def computephases(self, roots):
1319 def computephases(self, roots):
1323 return self.index.computephasesmapsets(roots)
1320 return self.index.computephasesmapsets(roots)
1324
1321
1325 def _headrevs(self):
1322 def _headrevs(self):
1326 count = len(self)
1323 count = len(self)
1327 if not count:
1324 if not count:
1328 return [nullrev]
1325 return [nullrev]
1329 # we won't iter over filtered rev so nobody is a head at start
1326 # we won't iter over filtered rev so nobody is a head at start
1330 ishead = [0] * (count + 1)
1327 ishead = [0] * (count + 1)
1331 index = self.index
1328 index = self.index
1332 for r in self:
1329 for r in self:
1333 ishead[r] = 1 # I may be an head
1330 ishead[r] = 1 # I may be an head
1334 e = index[r]
1331 e = index[r]
1335 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1332 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1336 return [r for r, val in enumerate(ishead) if val]
1333 return [r for r, val in enumerate(ishead) if val]
1337
1334
1338 def heads(self, start=None, stop=None):
1335 def heads(self, start=None, stop=None):
1339 """return the list of all nodes that have no children
1336 """return the list of all nodes that have no children
1340
1337
1341 if start is specified, only heads that are descendants of
1338 if start is specified, only heads that are descendants of
1342 start will be returned
1339 start will be returned
1343 if stop is specified, it will consider all the revs from stop
1340 if stop is specified, it will consider all the revs from stop
1344 as if they had no children
1341 as if they had no children
1345 """
1342 """
1346 if start is None and stop is None:
1343 if start is None and stop is None:
1347 if not len(self):
1344 if not len(self):
1348 return [nullid]
1345 return [nullid]
1349 return [self.node(r) for r in self.headrevs()]
1346 return [self.node(r) for r in self.headrevs()]
1350
1347
1351 if start is None:
1348 if start is None:
1352 start = nullrev
1349 start = nullrev
1353 else:
1350 else:
1354 start = self.rev(start)
1351 start = self.rev(start)
1355
1352
1356 stoprevs = {self.rev(n) for n in stop or []}
1353 stoprevs = {self.rev(n) for n in stop or []}
1357
1354
1358 revs = dagop.headrevssubset(
1355 revs = dagop.headrevssubset(
1359 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1356 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1360 )
1357 )
1361
1358
1362 return [self.node(rev) for rev in revs]
1359 return [self.node(rev) for rev in revs]
1363
1360
1364 def children(self, node):
1361 def children(self, node):
1365 """find the children of a given node"""
1362 """find the children of a given node"""
1366 c = []
1363 c = []
1367 p = self.rev(node)
1364 p = self.rev(node)
1368 for r in self.revs(start=p + 1):
1365 for r in self.revs(start=p + 1):
1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1366 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1370 if prevs:
1367 if prevs:
1371 for pr in prevs:
1368 for pr in prevs:
1372 if pr == p:
1369 if pr == p:
1373 c.append(self.node(r))
1370 c.append(self.node(r))
1374 elif p == nullrev:
1371 elif p == nullrev:
1375 c.append(self.node(r))
1372 c.append(self.node(r))
1376 return c
1373 return c
1377
1374
1378 def commonancestorsheads(self, a, b):
1375 def commonancestorsheads(self, a, b):
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1376 """calculate all the heads of the common ancestors of nodes a and b"""
1380 a, b = self.rev(a), self.rev(b)
1377 a, b = self.rev(a), self.rev(b)
1381 ancs = self._commonancestorsheads(a, b)
1378 ancs = self._commonancestorsheads(a, b)
1382 return pycompat.maplist(self.node, ancs)
1379 return pycompat.maplist(self.node, ancs)
1383
1380
1384 def _commonancestorsheads(self, *revs):
1381 def _commonancestorsheads(self, *revs):
1385 """calculate all the heads of the common ancestors of revs"""
1382 """calculate all the heads of the common ancestors of revs"""
1386 try:
1383 try:
1387 ancs = self.index.commonancestorsheads(*revs)
1384 ancs = self.index.commonancestorsheads(*revs)
1388 except (AttributeError, OverflowError): # C implementation failed
1385 except (AttributeError, OverflowError): # C implementation failed
1389 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1386 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1390 return ancs
1387 return ancs
1391
1388
1392 def isancestor(self, a, b):
1389 def isancestor(self, a, b):
1393 """return True if node a is an ancestor of node b
1390 """return True if node a is an ancestor of node b
1394
1391
1395 A revision is considered an ancestor of itself."""
1392 A revision is considered an ancestor of itself."""
1396 a, b = self.rev(a), self.rev(b)
1393 a, b = self.rev(a), self.rev(b)
1397 return self.isancestorrev(a, b)
1394 return self.isancestorrev(a, b)
1398
1395
1399 def isancestorrev(self, a, b):
1396 def isancestorrev(self, a, b):
1400 """return True if revision a is an ancestor of revision b
1397 """return True if revision a is an ancestor of revision b
1401
1398
1402 A revision is considered an ancestor of itself.
1399 A revision is considered an ancestor of itself.
1403
1400
1404 The implementation of this is trivial but the use of
1401 The implementation of this is trivial but the use of
1405 reachableroots is not."""
1402 reachableroots is not."""
1406 if a == nullrev:
1403 if a == nullrev:
1407 return True
1404 return True
1408 elif a == b:
1405 elif a == b:
1409 return True
1406 return True
1410 elif a > b:
1407 elif a > b:
1411 return False
1408 return False
1412 return bool(self.reachableroots(a, [b], [a], includepath=False))
1409 return bool(self.reachableroots(a, [b], [a], includepath=False))
1413
1410
1414 def reachableroots(self, minroot, heads, roots, includepath=False):
1411 def reachableroots(self, minroot, heads, roots, includepath=False):
1415 """return (heads(::(<roots> and <roots>::<heads>)))
1412 """return (heads(::(<roots> and <roots>::<heads>)))
1416
1413
1417 If includepath is True, return (<roots>::<heads>)."""
1414 If includepath is True, return (<roots>::<heads>)."""
1418 try:
1415 try:
1419 return self.index.reachableroots2(
1416 return self.index.reachableroots2(
1420 minroot, heads, roots, includepath
1417 minroot, heads, roots, includepath
1421 )
1418 )
1422 except AttributeError:
1419 except AttributeError:
1423 return dagop._reachablerootspure(
1420 return dagop._reachablerootspure(
1424 self.parentrevs, minroot, roots, heads, includepath
1421 self.parentrevs, minroot, roots, heads, includepath
1425 )
1422 )
1426
1423
1427 def ancestor(self, a, b):
1424 def ancestor(self, a, b):
1428 """calculate the "best" common ancestor of nodes a and b"""
1425 """calculate the "best" common ancestor of nodes a and b"""
1429
1426
1430 a, b = self.rev(a), self.rev(b)
1427 a, b = self.rev(a), self.rev(b)
1431 try:
1428 try:
1432 ancs = self.index.ancestors(a, b)
1429 ancs = self.index.ancestors(a, b)
1433 except (AttributeError, OverflowError):
1430 except (AttributeError, OverflowError):
1434 ancs = ancestor.ancestors(self.parentrevs, a, b)
1431 ancs = ancestor.ancestors(self.parentrevs, a, b)
1435 if ancs:
1432 if ancs:
1436 # choose a consistent winner when there's a tie
1433 # choose a consistent winner when there's a tie
1437 return min(map(self.node, ancs))
1434 return min(map(self.node, ancs))
1438 return nullid
1435 return nullid
1439
1436
1440 def _match(self, id):
1437 def _match(self, id):
1441 if isinstance(id, int):
1438 if isinstance(id, int):
1442 # rev
1439 # rev
1443 return self.node(id)
1440 return self.node(id)
1444 if len(id) == 20:
1441 if len(id) == 20:
1445 # possibly a binary node
1442 # possibly a binary node
1446 # odds of a binary node being all hex in ASCII are 1 in 10**25
1443 # odds of a binary node being all hex in ASCII are 1 in 10**25
1447 try:
1444 try:
1448 node = id
1445 node = id
1449 self.rev(node) # quick search the index
1446 self.rev(node) # quick search the index
1450 return node
1447 return node
1451 except error.LookupError:
1448 except error.LookupError:
1452 pass # may be partial hex id
1449 pass # may be partial hex id
1453 try:
1450 try:
1454 # str(rev)
1451 # str(rev)
1455 rev = int(id)
1452 rev = int(id)
1456 if b"%d" % rev != id:
1453 if b"%d" % rev != id:
1457 raise ValueError
1454 raise ValueError
1458 if rev < 0:
1455 if rev < 0:
1459 rev = len(self) + rev
1456 rev = len(self) + rev
1460 if rev < 0 or rev >= len(self):
1457 if rev < 0 or rev >= len(self):
1461 raise ValueError
1458 raise ValueError
1462 return self.node(rev)
1459 return self.node(rev)
1463 except (ValueError, OverflowError):
1460 except (ValueError, OverflowError):
1464 pass
1461 pass
1465 if len(id) == 40:
1462 if len(id) == 40:
1466 try:
1463 try:
1467 # a full hex nodeid?
1464 # a full hex nodeid?
1468 node = bin(id)
1465 node = bin(id)
1469 self.rev(node)
1466 self.rev(node)
1470 return node
1467 return node
1471 except (TypeError, error.LookupError):
1468 except (TypeError, error.LookupError):
1472 pass
1469 pass
1473
1470
1474 def _partialmatch(self, id):
1471 def _partialmatch(self, id):
1475 # we don't care wdirfilenodeids as they should be always full hash
1472 # we don't care wdirfilenodeids as they should be always full hash
1476 maybewdir = wdirhex.startswith(id)
1473 maybewdir = wdirhex.startswith(id)
1477 try:
1474 try:
1478 partial = self.index.partialmatch(id)
1475 partial = self.index.partialmatch(id)
1479 if partial and self.hasnode(partial):
1476 if partial and self.hasnode(partial):
1480 if maybewdir:
1477 if maybewdir:
1481 # single 'ff...' match in radix tree, ambiguous with wdir
1478 # single 'ff...' match in radix tree, ambiguous with wdir
1482 raise error.RevlogError
1479 raise error.RevlogError
1483 return partial
1480 return partial
1484 if maybewdir:
1481 if maybewdir:
1485 # no 'ff...' match in radix tree, wdir identified
1482 # no 'ff...' match in radix tree, wdir identified
1486 raise error.WdirUnsupported
1483 raise error.WdirUnsupported
1487 return None
1484 return None
1488 except error.RevlogError:
1485 except error.RevlogError:
1489 # parsers.c radix tree lookup gave multiple matches
1486 # parsers.c radix tree lookup gave multiple matches
1490 # fast path: for unfiltered changelog, radix tree is accurate
1487 # fast path: for unfiltered changelog, radix tree is accurate
1491 if not getattr(self, 'filteredrevs', None):
1488 if not getattr(self, 'filteredrevs', None):
1492 raise error.AmbiguousPrefixLookupError(
1489 raise error.AmbiguousPrefixLookupError(
1493 id, self.indexfile, _(b'ambiguous identifier')
1490 id, self.indexfile, _(b'ambiguous identifier')
1494 )
1491 )
1495 # fall through to slow path that filters hidden revisions
1492 # fall through to slow path that filters hidden revisions
1496 except (AttributeError, ValueError):
1493 except (AttributeError, ValueError):
1497 # we are pure python, or key was too short to search radix tree
1494 # we are pure python, or key was too short to search radix tree
1498 pass
1495 pass
1499
1496
1500 if id in self._pcache:
1497 if id in self._pcache:
1501 return self._pcache[id]
1498 return self._pcache[id]
1502
1499
1503 if len(id) <= 40:
1500 if len(id) <= 40:
1504 try:
1501 try:
1505 # hex(node)[:...]
1502 # hex(node)[:...]
1506 l = len(id) // 2 # grab an even number of digits
1503 l = len(id) // 2 # grab an even number of digits
1507 prefix = bin(id[: l * 2])
1504 prefix = bin(id[: l * 2])
1508 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1505 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1509 nl = [
1506 nl = [
1510 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1507 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1511 ]
1508 ]
1512 if nullhex.startswith(id):
1509 if nullhex.startswith(id):
1513 nl.append(nullid)
1510 nl.append(nullid)
1514 if len(nl) > 0:
1511 if len(nl) > 0:
1515 if len(nl) == 1 and not maybewdir:
1512 if len(nl) == 1 and not maybewdir:
1516 self._pcache[id] = nl[0]
1513 self._pcache[id] = nl[0]
1517 return nl[0]
1514 return nl[0]
1518 raise error.AmbiguousPrefixLookupError(
1515 raise error.AmbiguousPrefixLookupError(
1519 id, self.indexfile, _(b'ambiguous identifier')
1516 id, self.indexfile, _(b'ambiguous identifier')
1520 )
1517 )
1521 if maybewdir:
1518 if maybewdir:
1522 raise error.WdirUnsupported
1519 raise error.WdirUnsupported
1523 return None
1520 return None
1524 except TypeError:
1521 except TypeError:
1525 pass
1522 pass
1526
1523
1527 def lookup(self, id):
1524 def lookup(self, id):
1528 """locate a node based on:
1525 """locate a node based on:
1529 - revision number or str(revision number)
1526 - revision number or str(revision number)
1530 - nodeid or subset of hex nodeid
1527 - nodeid or subset of hex nodeid
1531 """
1528 """
1532 n = self._match(id)
1529 n = self._match(id)
1533 if n is not None:
1530 if n is not None:
1534 return n
1531 return n
1535 n = self._partialmatch(id)
1532 n = self._partialmatch(id)
1536 if n:
1533 if n:
1537 return n
1534 return n
1538
1535
1539 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1536 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1540
1537
1541 def shortest(self, node, minlength=1):
1538 def shortest(self, node, minlength=1):
1542 """Find the shortest unambiguous prefix that matches node."""
1539 """Find the shortest unambiguous prefix that matches node."""
1543
1540
1544 def isvalid(prefix):
1541 def isvalid(prefix):
1545 try:
1542 try:
1546 matchednode = self._partialmatch(prefix)
1543 matchednode = self._partialmatch(prefix)
1547 except error.AmbiguousPrefixLookupError:
1544 except error.AmbiguousPrefixLookupError:
1548 return False
1545 return False
1549 except error.WdirUnsupported:
1546 except error.WdirUnsupported:
1550 # single 'ff...' match
1547 # single 'ff...' match
1551 return True
1548 return True
1552 if matchednode is None:
1549 if matchednode is None:
1553 raise error.LookupError(node, self.indexfile, _(b'no node'))
1550 raise error.LookupError(node, self.indexfile, _(b'no node'))
1554 return True
1551 return True
1555
1552
1556 def maybewdir(prefix):
1553 def maybewdir(prefix):
1557 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1554 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1558
1555
1559 hexnode = hex(node)
1556 hexnode = hex(node)
1560
1557
1561 def disambiguate(hexnode, minlength):
1558 def disambiguate(hexnode, minlength):
1562 """Disambiguate against wdirid."""
1559 """Disambiguate against wdirid."""
1563 for length in range(minlength, len(hexnode) + 1):
1560 for length in range(minlength, len(hexnode) + 1):
1564 prefix = hexnode[:length]
1561 prefix = hexnode[:length]
1565 if not maybewdir(prefix):
1562 if not maybewdir(prefix):
1566 return prefix
1563 return prefix
1567
1564
1568 if not getattr(self, 'filteredrevs', None):
1565 if not getattr(self, 'filteredrevs', None):
1569 try:
1566 try:
1570 length = max(self.index.shortest(node), minlength)
1567 length = max(self.index.shortest(node), minlength)
1571 return disambiguate(hexnode, length)
1568 return disambiguate(hexnode, length)
1572 except error.RevlogError:
1569 except error.RevlogError:
1573 if node != wdirid:
1570 if node != wdirid:
1574 raise error.LookupError(node, self.indexfile, _(b'no node'))
1571 raise error.LookupError(node, self.indexfile, _(b'no node'))
1575 except AttributeError:
1572 except AttributeError:
1576 # Fall through to pure code
1573 # Fall through to pure code
1577 pass
1574 pass
1578
1575
1579 if node == wdirid:
1576 if node == wdirid:
1580 for length in range(minlength, len(hexnode) + 1):
1577 for length in range(minlength, len(hexnode) + 1):
1581 prefix = hexnode[:length]
1578 prefix = hexnode[:length]
1582 if isvalid(prefix):
1579 if isvalid(prefix):
1583 return prefix
1580 return prefix
1584
1581
1585 for length in range(minlength, len(hexnode) + 1):
1582 for length in range(minlength, len(hexnode) + 1):
1586 prefix = hexnode[:length]
1583 prefix = hexnode[:length]
1587 if isvalid(prefix):
1584 if isvalid(prefix):
1588 return disambiguate(hexnode, length)
1585 return disambiguate(hexnode, length)
1589
1586
1590 def cmp(self, node, text):
1587 def cmp(self, node, text):
1591 """compare text with a given file revision
1588 """compare text with a given file revision
1592
1589
1593 returns True if text is different than what is stored.
1590 returns True if text is different than what is stored.
1594 """
1591 """
1595 p1, p2 = self.parents(node)
1592 p1, p2 = self.parents(node)
1596 return storageutil.hashrevisionsha1(text, p1, p2) != node
1593 return storageutil.hashrevisionsha1(text, p1, p2) != node
1597
1594
1598 def _cachesegment(self, offset, data):
1595 def _cachesegment(self, offset, data):
1599 """Add a segment to the revlog cache.
1596 """Add a segment to the revlog cache.
1600
1597
1601 Accepts an absolute offset and the data that is at that location.
1598 Accepts an absolute offset and the data that is at that location.
1602 """
1599 """
1603 o, d = self._chunkcache
1600 o, d = self._chunkcache
1604 # try to add to existing cache
1601 # try to add to existing cache
1605 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1602 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1606 self._chunkcache = o, d + data
1603 self._chunkcache = o, d + data
1607 else:
1604 else:
1608 self._chunkcache = offset, data
1605 self._chunkcache = offset, data
1609
1606
1610 def _readsegment(self, offset, length, df=None):
1607 def _readsegment(self, offset, length, df=None):
1611 """Load a segment of raw data from the revlog.
1608 """Load a segment of raw data from the revlog.
1612
1609
1613 Accepts an absolute offset, length to read, and an optional existing
1610 Accepts an absolute offset, length to read, and an optional existing
1614 file handle to read from.
1611 file handle to read from.
1615
1612
1616 If an existing file handle is passed, it will be seeked and the
1613 If an existing file handle is passed, it will be seeked and the
1617 original seek position will NOT be restored.
1614 original seek position will NOT be restored.
1618
1615
1619 Returns a str or buffer of raw byte data.
1616 Returns a str or buffer of raw byte data.
1620
1617
1621 Raises if the requested number of bytes could not be read.
1618 Raises if the requested number of bytes could not be read.
1622 """
1619 """
1623 # Cache data both forward and backward around the requested
1620 # Cache data both forward and backward around the requested
1624 # data, in a fixed size window. This helps speed up operations
1621 # data, in a fixed size window. This helps speed up operations
1625 # involving reading the revlog backwards.
1622 # involving reading the revlog backwards.
1626 cachesize = self._chunkcachesize
1623 cachesize = self._chunkcachesize
1627 realoffset = offset & ~(cachesize - 1)
1624 realoffset = offset & ~(cachesize - 1)
1628 reallength = (
1625 reallength = (
1629 (offset + length + cachesize) & ~(cachesize - 1)
1626 (offset + length + cachesize) & ~(cachesize - 1)
1630 ) - realoffset
1627 ) - realoffset
1631 with self._datareadfp(df) as df:
1628 with self._datareadfp(df) as df:
1632 df.seek(realoffset)
1629 df.seek(realoffset)
1633 d = df.read(reallength)
1630 d = df.read(reallength)
1634
1631
1635 self._cachesegment(realoffset, d)
1632 self._cachesegment(realoffset, d)
1636 if offset != realoffset or reallength != length:
1633 if offset != realoffset or reallength != length:
1637 startoffset = offset - realoffset
1634 startoffset = offset - realoffset
1638 if len(d) - startoffset < length:
1635 if len(d) - startoffset < length:
1639 raise error.RevlogError(
1636 raise error.RevlogError(
1640 _(
1637 _(
1641 b'partial read of revlog %s; expected %d bytes from '
1638 b'partial read of revlog %s; expected %d bytes from '
1642 b'offset %d, got %d'
1639 b'offset %d, got %d'
1643 )
1640 )
1644 % (
1641 % (
1645 self.indexfile if self._inline else self.datafile,
1642 self.indexfile if self._inline else self.datafile,
1646 length,
1643 length,
1647 realoffset,
1644 realoffset,
1648 len(d) - startoffset,
1645 len(d) - startoffset,
1649 )
1646 )
1650 )
1647 )
1651
1648
1652 return util.buffer(d, startoffset, length)
1649 return util.buffer(d, startoffset, length)
1653
1650
1654 if len(d) < length:
1651 if len(d) < length:
1655 raise error.RevlogError(
1652 raise error.RevlogError(
1656 _(
1653 _(
1657 b'partial read of revlog %s; expected %d bytes from offset '
1654 b'partial read of revlog %s; expected %d bytes from offset '
1658 b'%d, got %d'
1655 b'%d, got %d'
1659 )
1656 )
1660 % (
1657 % (
1661 self.indexfile if self._inline else self.datafile,
1658 self.indexfile if self._inline else self.datafile,
1662 length,
1659 length,
1663 offset,
1660 offset,
1664 len(d),
1661 len(d),
1665 )
1662 )
1666 )
1663 )
1667
1664
1668 return d
1665 return d
1669
1666
1670 def _getsegment(self, offset, length, df=None):
1667 def _getsegment(self, offset, length, df=None):
1671 """Obtain a segment of raw data from the revlog.
1668 """Obtain a segment of raw data from the revlog.
1672
1669
1673 Accepts an absolute offset, length of bytes to obtain, and an
1670 Accepts an absolute offset, length of bytes to obtain, and an
1674 optional file handle to the already-opened revlog. If the file
1671 optional file handle to the already-opened revlog. If the file
1675 handle is used, it's original seek position will not be preserved.
1672 handle is used, it's original seek position will not be preserved.
1676
1673
1677 Requests for data may be returned from a cache.
1674 Requests for data may be returned from a cache.
1678
1675
1679 Returns a str or a buffer instance of raw byte data.
1676 Returns a str or a buffer instance of raw byte data.
1680 """
1677 """
1681 o, d = self._chunkcache
1678 o, d = self._chunkcache
1682 l = len(d)
1679 l = len(d)
1683
1680
1684 # is it in the cache?
1681 # is it in the cache?
1685 cachestart = offset - o
1682 cachestart = offset - o
1686 cacheend = cachestart + length
1683 cacheend = cachestart + length
1687 if cachestart >= 0 and cacheend <= l:
1684 if cachestart >= 0 and cacheend <= l:
1688 if cachestart == 0 and cacheend == l:
1685 if cachestart == 0 and cacheend == l:
1689 return d # avoid a copy
1686 return d # avoid a copy
1690 return util.buffer(d, cachestart, cacheend - cachestart)
1687 return util.buffer(d, cachestart, cacheend - cachestart)
1691
1688
1692 return self._readsegment(offset, length, df=df)
1689 return self._readsegment(offset, length, df=df)
1693
1690
1694 def _getsegmentforrevs(self, startrev, endrev, df=None):
1691 def _getsegmentforrevs(self, startrev, endrev, df=None):
1695 """Obtain a segment of raw data corresponding to a range of revisions.
1692 """Obtain a segment of raw data corresponding to a range of revisions.
1696
1693
1697 Accepts the start and end revisions and an optional already-open
1694 Accepts the start and end revisions and an optional already-open
1698 file handle to be used for reading. If the file handle is read, its
1695 file handle to be used for reading. If the file handle is read, its
1699 seek position will not be preserved.
1696 seek position will not be preserved.
1700
1697
1701 Requests for data may be satisfied by a cache.
1698 Requests for data may be satisfied by a cache.
1702
1699
1703 Returns a 2-tuple of (offset, data) for the requested range of
1700 Returns a 2-tuple of (offset, data) for the requested range of
1704 revisions. Offset is the integer offset from the beginning of the
1701 revisions. Offset is the integer offset from the beginning of the
1705 revlog and data is a str or buffer of the raw byte data.
1702 revlog and data is a str or buffer of the raw byte data.
1706
1703
1707 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1704 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1708 to determine where each revision's data begins and ends.
1705 to determine where each revision's data begins and ends.
1709 """
1706 """
1710 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1707 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1711 # (functions are expensive).
1708 # (functions are expensive).
1712 index = self.index
1709 index = self.index
1713 istart = index[startrev]
1710 istart = index[startrev]
1714 start = int(istart[0] >> 16)
1711 start = int(istart[0] >> 16)
1715 if startrev == endrev:
1712 if startrev == endrev:
1716 end = start + istart[1]
1713 end = start + istart[1]
1717 else:
1714 else:
1718 iend = index[endrev]
1715 iend = index[endrev]
1719 end = int(iend[0] >> 16) + iend[1]
1716 end = int(iend[0] >> 16) + iend[1]
1720
1717
1721 if self._inline:
1718 if self._inline:
1722 start += (startrev + 1) * self._io.size
1719 start += (startrev + 1) * self._io.size
1723 end += (endrev + 1) * self._io.size
1720 end += (endrev + 1) * self._io.size
1724 length = end - start
1721 length = end - start
1725
1722
1726 return start, self._getsegment(start, length, df=df)
1723 return start, self._getsegment(start, length, df=df)
1727
1724
1728 def _chunk(self, rev, df=None):
1725 def _chunk(self, rev, df=None):
1729 """Obtain a single decompressed chunk for a revision.
1726 """Obtain a single decompressed chunk for a revision.
1730
1727
1731 Accepts an integer revision and an optional already-open file handle
1728 Accepts an integer revision and an optional already-open file handle
1732 to be used for reading. If used, the seek position of the file will not
1729 to be used for reading. If used, the seek position of the file will not
1733 be preserved.
1730 be preserved.
1734
1731
1735 Returns a str holding uncompressed data for the requested revision.
1732 Returns a str holding uncompressed data for the requested revision.
1736 """
1733 """
1737 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1734 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1738
1735
1739 def _chunks(self, revs, df=None, targetsize=None):
1736 def _chunks(self, revs, df=None, targetsize=None):
1740 """Obtain decompressed chunks for the specified revisions.
1737 """Obtain decompressed chunks for the specified revisions.
1741
1738
1742 Accepts an iterable of numeric revisions that are assumed to be in
1739 Accepts an iterable of numeric revisions that are assumed to be in
1743 ascending order. Also accepts an optional already-open file handle
1740 ascending order. Also accepts an optional already-open file handle
1744 to be used for reading. If used, the seek position of the file will
1741 to be used for reading. If used, the seek position of the file will
1745 not be preserved.
1742 not be preserved.
1746
1743
1747 This function is similar to calling ``self._chunk()`` multiple times,
1744 This function is similar to calling ``self._chunk()`` multiple times,
1748 but is faster.
1745 but is faster.
1749
1746
1750 Returns a list with decompressed data for each requested revision.
1747 Returns a list with decompressed data for each requested revision.
1751 """
1748 """
1752 if not revs:
1749 if not revs:
1753 return []
1750 return []
1754 start = self.start
1751 start = self.start
1755 length = self.length
1752 length = self.length
1756 inline = self._inline
1753 inline = self._inline
1757 iosize = self._io.size
1754 iosize = self._io.size
1758 buffer = util.buffer
1755 buffer = util.buffer
1759
1756
1760 l = []
1757 l = []
1761 ladd = l.append
1758 ladd = l.append
1762
1759
1763 if not self._withsparseread:
1760 if not self._withsparseread:
1764 slicedchunks = (revs,)
1761 slicedchunks = (revs,)
1765 else:
1762 else:
1766 slicedchunks = deltautil.slicechunk(
1763 slicedchunks = deltautil.slicechunk(
1767 self, revs, targetsize=targetsize
1764 self, revs, targetsize=targetsize
1768 )
1765 )
1769
1766
1770 for revschunk in slicedchunks:
1767 for revschunk in slicedchunks:
1771 firstrev = revschunk[0]
1768 firstrev = revschunk[0]
1772 # Skip trailing revisions with empty diff
1769 # Skip trailing revisions with empty diff
1773 for lastrev in revschunk[::-1]:
1770 for lastrev in revschunk[::-1]:
1774 if length(lastrev) != 0:
1771 if length(lastrev) != 0:
1775 break
1772 break
1776
1773
1777 try:
1774 try:
1778 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1775 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1779 except OverflowError:
1776 except OverflowError:
1780 # issue4215 - we can't cache a run of chunks greater than
1777 # issue4215 - we can't cache a run of chunks greater than
1781 # 2G on Windows
1778 # 2G on Windows
1782 return [self._chunk(rev, df=df) for rev in revschunk]
1779 return [self._chunk(rev, df=df) for rev in revschunk]
1783
1780
1784 decomp = self.decompress
1781 decomp = self.decompress
1785 for rev in revschunk:
1782 for rev in revschunk:
1786 chunkstart = start(rev)
1783 chunkstart = start(rev)
1787 if inline:
1784 if inline:
1788 chunkstart += (rev + 1) * iosize
1785 chunkstart += (rev + 1) * iosize
1789 chunklength = length(rev)
1786 chunklength = length(rev)
1790 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1787 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1791
1788
1792 return l
1789 return l
1793
1790
1794 def _chunkclear(self):
1791 def _chunkclear(self):
1795 """Clear the raw chunk cache."""
1792 """Clear the raw chunk cache."""
1796 self._chunkcache = (0, b'')
1793 self._chunkcache = (0, b'')
1797
1794
1798 def deltaparent(self, rev):
1795 def deltaparent(self, rev):
1799 """return deltaparent of the given revision"""
1796 """return deltaparent of the given revision"""
1800 base = self.index[rev][3]
1797 base = self.index[rev][3]
1801 if base == rev:
1798 if base == rev:
1802 return nullrev
1799 return nullrev
1803 elif self._generaldelta:
1800 elif self._generaldelta:
1804 return base
1801 return base
1805 else:
1802 else:
1806 return rev - 1
1803 return rev - 1
1807
1804
1808 def issnapshot(self, rev):
1805 def issnapshot(self, rev):
1809 """tells whether rev is a snapshot"""
1806 """tells whether rev is a snapshot"""
1810 if not self._sparserevlog:
1807 if not self._sparserevlog:
1811 return self.deltaparent(rev) == nullrev
1808 return self.deltaparent(rev) == nullrev
1812 elif util.safehasattr(self.index, b'issnapshot'):
1809 elif util.safehasattr(self.index, b'issnapshot'):
1813 # directly assign the method to cache the testing and access
1810 # directly assign the method to cache the testing and access
1814 self.issnapshot = self.index.issnapshot
1811 self.issnapshot = self.index.issnapshot
1815 return self.issnapshot(rev)
1812 return self.issnapshot(rev)
1816 if rev == nullrev:
1813 if rev == nullrev:
1817 return True
1814 return True
1818 entry = self.index[rev]
1815 entry = self.index[rev]
1819 base = entry[3]
1816 base = entry[3]
1820 if base == rev:
1817 if base == rev:
1821 return True
1818 return True
1822 if base == nullrev:
1819 if base == nullrev:
1823 return True
1820 return True
1824 p1 = entry[5]
1821 p1 = entry[5]
1825 p2 = entry[6]
1822 p2 = entry[6]
1826 if base == p1 or base == p2:
1823 if base == p1 or base == p2:
1827 return False
1824 return False
1828 return self.issnapshot(base)
1825 return self.issnapshot(base)
1829
1826
1830 def snapshotdepth(self, rev):
1827 def snapshotdepth(self, rev):
1831 """number of snapshot in the chain before this one"""
1828 """number of snapshot in the chain before this one"""
1832 if not self.issnapshot(rev):
1829 if not self.issnapshot(rev):
1833 raise error.ProgrammingError(b'revision %d not a snapshot')
1830 raise error.ProgrammingError(b'revision %d not a snapshot')
1834 return len(self._deltachain(rev)[0]) - 1
1831 return len(self._deltachain(rev)[0]) - 1
1835
1832
1836 def revdiff(self, rev1, rev2):
1833 def revdiff(self, rev1, rev2):
1837 """return or calculate a delta between two revisions
1834 """return or calculate a delta between two revisions
1838
1835
1839 The delta calculated is in binary form and is intended to be written to
1836 The delta calculated is in binary form and is intended to be written to
1840 revlog data directly. So this function needs raw revision data.
1837 revlog data directly. So this function needs raw revision data.
1841 """
1838 """
1842 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1839 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1843 return bytes(self._chunk(rev2))
1840 return bytes(self._chunk(rev2))
1844
1841
1845 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1842 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1846
1843
1847 def _processflags(self, text, flags, operation, raw=False):
1844 def _processflags(self, text, flags, operation, raw=False):
1848 """deprecated entry point to access flag processors"""
1845 """deprecated entry point to access flag processors"""
1849 msg = b'_processflag(...) use the specialized variant'
1846 msg = b'_processflag(...) use the specialized variant'
1850 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1847 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1851 if raw:
1848 if raw:
1852 return text, flagutil.processflagsraw(self, text, flags)
1849 return text, flagutil.processflagsraw(self, text, flags)
1853 elif operation == b'read':
1850 elif operation == b'read':
1854 return flagutil.processflagsread(self, text, flags)
1851 return flagutil.processflagsread(self, text, flags)
1855 else: # write operation
1852 else: # write operation
1856 return flagutil.processflagswrite(self, text, flags)
1853 return flagutil.processflagswrite(self, text, flags)
1857
1854
1858 def revision(self, nodeorrev, _df=None, raw=False):
1855 def revision(self, nodeorrev, _df=None, raw=False):
1859 """return an uncompressed revision of a given node or revision
1856 """return an uncompressed revision of a given node or revision
1860 number.
1857 number.
1861
1858
1862 _df - an existing file handle to read from. (internal-only)
1859 _df - an existing file handle to read from. (internal-only)
1863 raw - an optional argument specifying if the revision data is to be
1860 raw - an optional argument specifying if the revision data is to be
1864 treated as raw data when applying flag transforms. 'raw' should be set
1861 treated as raw data when applying flag transforms. 'raw' should be set
1865 to True when generating changegroups or in debug commands.
1862 to True when generating changegroups or in debug commands.
1866 """
1863 """
1867 if raw:
1864 if raw:
1868 msg = (
1865 msg = (
1869 b'revlog.revision(..., raw=True) is deprecated, '
1866 b'revlog.revision(..., raw=True) is deprecated, '
1870 b'use revlog.rawdata(...)'
1867 b'use revlog.rawdata(...)'
1871 )
1868 )
1872 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1869 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1873 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1870 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1874
1871
1875 def sidedata(self, nodeorrev, _df=None):
1872 def sidedata(self, nodeorrev, _df=None):
1876 """a map of extra data related to the changeset but not part of the hash
1873 """a map of extra data related to the changeset but not part of the hash
1877
1874
1878 This function currently return a dictionary. However, more advanced
1875 This function currently return a dictionary. However, more advanced
1879 mapping object will likely be used in the future for a more
1876 mapping object will likely be used in the future for a more
1880 efficient/lazy code.
1877 efficient/lazy code.
1881 """
1878 """
1882 return self._revisiondata(nodeorrev, _df)[1]
1879 return self._revisiondata(nodeorrev, _df)[1]
1883
1880
1884 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1881 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1885 # deal with <nodeorrev> argument type
1882 # deal with <nodeorrev> argument type
1886 if isinstance(nodeorrev, int):
1883 if isinstance(nodeorrev, int):
1887 rev = nodeorrev
1884 rev = nodeorrev
1888 node = self.node(rev)
1885 node = self.node(rev)
1889 else:
1886 else:
1890 node = nodeorrev
1887 node = nodeorrev
1891 rev = None
1888 rev = None
1892
1889
1893 # fast path the special `nullid` rev
1890 # fast path the special `nullid` rev
1894 if node == nullid:
1891 if node == nullid:
1895 return b"", {}
1892 return b"", {}
1896
1893
1897 # ``rawtext`` is the text as stored inside the revlog. Might be the
1894 # ``rawtext`` is the text as stored inside the revlog. Might be the
1898 # revision or might need to be processed to retrieve the revision.
1895 # revision or might need to be processed to retrieve the revision.
1899 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1896 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1900
1897
1901 if self.version & 0xFFFF == REVLOGV2:
1898 if self.version & 0xFFFF == REVLOGV2:
1902 if rev is None:
1899 if rev is None:
1903 rev = self.rev(node)
1900 rev = self.rev(node)
1904 sidedata = self._sidedata(rev)
1901 sidedata = self._sidedata(rev)
1905 else:
1902 else:
1906 sidedata = {}
1903 sidedata = {}
1907
1904
1908 if raw and validated:
1905 if raw and validated:
1909 # if we don't want to process the raw text and that raw
1906 # if we don't want to process the raw text and that raw
1910 # text is cached, we can exit early.
1907 # text is cached, we can exit early.
1911 return rawtext, sidedata
1908 return rawtext, sidedata
1912 if rev is None:
1909 if rev is None:
1913 rev = self.rev(node)
1910 rev = self.rev(node)
1914 # the revlog's flag for this revision
1911 # the revlog's flag for this revision
1915 # (usually alter its state or content)
1912 # (usually alter its state or content)
1916 flags = self.flags(rev)
1913 flags = self.flags(rev)
1917
1914
1918 if validated and flags == REVIDX_DEFAULT_FLAGS:
1915 if validated and flags == REVIDX_DEFAULT_FLAGS:
1919 # no extra flags set, no flag processor runs, text = rawtext
1916 # no extra flags set, no flag processor runs, text = rawtext
1920 return rawtext, sidedata
1917 return rawtext, sidedata
1921
1918
1922 if raw:
1919 if raw:
1923 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1920 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1924 text = rawtext
1921 text = rawtext
1925 else:
1922 else:
1926 r = flagutil.processflagsread(self, rawtext, flags)
1923 r = flagutil.processflagsread(self, rawtext, flags)
1927 text, validatehash = r
1924 text, validatehash = r
1928 if validatehash:
1925 if validatehash:
1929 self.checkhash(text, node, rev=rev)
1926 self.checkhash(text, node, rev=rev)
1930 if not validated:
1927 if not validated:
1931 self._revisioncache = (node, rev, rawtext)
1928 self._revisioncache = (node, rev, rawtext)
1932
1929
1933 return text, sidedata
1930 return text, sidedata
1934
1931
1935 def _rawtext(self, node, rev, _df=None):
1932 def _rawtext(self, node, rev, _df=None):
1936 """return the possibly unvalidated rawtext for a revision
1933 """return the possibly unvalidated rawtext for a revision
1937
1934
1938 returns (rev, rawtext, validated)
1935 returns (rev, rawtext, validated)
1939 """
1936 """
1940
1937
1941 # revision in the cache (could be useful to apply delta)
1938 # revision in the cache (could be useful to apply delta)
1942 cachedrev = None
1939 cachedrev = None
1943 # An intermediate text to apply deltas to
1940 # An intermediate text to apply deltas to
1944 basetext = None
1941 basetext = None
1945
1942
1946 # Check if we have the entry in cache
1943 # Check if we have the entry in cache
1947 # The cache entry looks like (node, rev, rawtext)
1944 # The cache entry looks like (node, rev, rawtext)
1948 if self._revisioncache:
1945 if self._revisioncache:
1949 if self._revisioncache[0] == node:
1946 if self._revisioncache[0] == node:
1950 return (rev, self._revisioncache[2], True)
1947 return (rev, self._revisioncache[2], True)
1951 cachedrev = self._revisioncache[1]
1948 cachedrev = self._revisioncache[1]
1952
1949
1953 if rev is None:
1950 if rev is None:
1954 rev = self.rev(node)
1951 rev = self.rev(node)
1955
1952
1956 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1953 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1957 if stopped:
1954 if stopped:
1958 basetext = self._revisioncache[2]
1955 basetext = self._revisioncache[2]
1959
1956
1960 # drop cache to save memory, the caller is expected to
1957 # drop cache to save memory, the caller is expected to
1961 # update self._revisioncache after validating the text
1958 # update self._revisioncache after validating the text
1962 self._revisioncache = None
1959 self._revisioncache = None
1963
1960
1964 targetsize = None
1961 targetsize = None
1965 rawsize = self.index[rev][2]
1962 rawsize = self.index[rev][2]
1966 if 0 <= rawsize:
1963 if 0 <= rawsize:
1967 targetsize = 4 * rawsize
1964 targetsize = 4 * rawsize
1968
1965
1969 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1966 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1970 if basetext is None:
1967 if basetext is None:
1971 basetext = bytes(bins[0])
1968 basetext = bytes(bins[0])
1972 bins = bins[1:]
1969 bins = bins[1:]
1973
1970
1974 rawtext = mdiff.patches(basetext, bins)
1971 rawtext = mdiff.patches(basetext, bins)
1975 del basetext # let us have a chance to free memory early
1972 del basetext # let us have a chance to free memory early
1976 return (rev, rawtext, False)
1973 return (rev, rawtext, False)
1977
1974
1978 def _sidedata(self, rev):
1975 def _sidedata(self, rev):
1979 """Return the sidedata for a given revision number."""
1976 """Return the sidedata for a given revision number."""
1980 index_entry = self.index[rev]
1977 index_entry = self.index[rev]
1981 sidedata_offset = index_entry[8]
1978 sidedata_offset = index_entry[8]
1982 sidedata_size = index_entry[9]
1979 sidedata_size = index_entry[9]
1983
1980
1984 if self._inline:
1981 if self._inline:
1985 sidedata_offset += self._io.size * (1 + rev)
1982 sidedata_offset += self._io.size * (1 + rev)
1986 if sidedata_size == 0:
1983 if sidedata_size == 0:
1987 return {}
1984 return {}
1988
1985
1989 segment = self._getsegment(sidedata_offset, sidedata_size)
1986 segment = self._getsegment(sidedata_offset, sidedata_size)
1990 sidedata = sidedatautil.deserialize_sidedata(segment)
1987 sidedata = sidedatautil.deserialize_sidedata(segment)
1991 return sidedata
1988 return sidedata
1992
1989
1993 def rawdata(self, nodeorrev, _df=None):
1990 def rawdata(self, nodeorrev, _df=None):
1994 """return an uncompressed raw data of a given node or revision number.
1991 """return an uncompressed raw data of a given node or revision number.
1995
1992
1996 _df - an existing file handle to read from. (internal-only)
1993 _df - an existing file handle to read from. (internal-only)
1997 """
1994 """
1998 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1995 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1999
1996
2000 def hash(self, text, p1, p2):
1997 def hash(self, text, p1, p2):
2001 """Compute a node hash.
1998 """Compute a node hash.
2002
1999
2003 Available as a function so that subclasses can replace the hash
2000 Available as a function so that subclasses can replace the hash
2004 as needed.
2001 as needed.
2005 """
2002 """
2006 return storageutil.hashrevisionsha1(text, p1, p2)
2003 return storageutil.hashrevisionsha1(text, p1, p2)
2007
2004
2008 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2005 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2009 """Check node hash integrity.
2006 """Check node hash integrity.
2010
2007
2011 Available as a function so that subclasses can extend hash mismatch
2008 Available as a function so that subclasses can extend hash mismatch
2012 behaviors as needed.
2009 behaviors as needed.
2013 """
2010 """
2014 try:
2011 try:
2015 if p1 is None and p2 is None:
2012 if p1 is None and p2 is None:
2016 p1, p2 = self.parents(node)
2013 p1, p2 = self.parents(node)
2017 if node != self.hash(text, p1, p2):
2014 if node != self.hash(text, p1, p2):
2018 # Clear the revision cache on hash failure. The revision cache
2015 # Clear the revision cache on hash failure. The revision cache
2019 # only stores the raw revision and clearing the cache does have
2016 # only stores the raw revision and clearing the cache does have
2020 # the side-effect that we won't have a cache hit when the raw
2017 # the side-effect that we won't have a cache hit when the raw
2021 # revision data is accessed. But this case should be rare and
2018 # revision data is accessed. But this case should be rare and
2022 # it is extra work to teach the cache about the hash
2019 # it is extra work to teach the cache about the hash
2023 # verification state.
2020 # verification state.
2024 if self._revisioncache and self._revisioncache[0] == node:
2021 if self._revisioncache and self._revisioncache[0] == node:
2025 self._revisioncache = None
2022 self._revisioncache = None
2026
2023
2027 revornode = rev
2024 revornode = rev
2028 if revornode is None:
2025 if revornode is None:
2029 revornode = templatefilters.short(hex(node))
2026 revornode = templatefilters.short(hex(node))
2030 raise error.RevlogError(
2027 raise error.RevlogError(
2031 _(b"integrity check failed on %s:%s")
2028 _(b"integrity check failed on %s:%s")
2032 % (self.indexfile, pycompat.bytestr(revornode))
2029 % (self.indexfile, pycompat.bytestr(revornode))
2033 )
2030 )
2034 except error.RevlogError:
2031 except error.RevlogError:
2035 if self._censorable and storageutil.iscensoredtext(text):
2032 if self._censorable and storageutil.iscensoredtext(text):
2036 raise error.CensoredNodeError(self.indexfile, node, text)
2033 raise error.CensoredNodeError(self.indexfile, node, text)
2037 raise
2034 raise
2038
2035
2039 def _enforceinlinesize(self, tr, fp=None):
2036 def _enforceinlinesize(self, tr, fp=None):
2040 """Check if the revlog is too big for inline and convert if so.
2037 """Check if the revlog is too big for inline and convert if so.
2041
2038
2042 This should be called after revisions are added to the revlog. If the
2039 This should be called after revisions are added to the revlog. If the
2043 revlog has grown too large to be an inline revlog, it will convert it
2040 revlog has grown too large to be an inline revlog, it will convert it
2044 to use multiple index and data files.
2041 to use multiple index and data files.
2045 """
2042 """
2046 tiprev = len(self) - 1
2043 tiprev = len(self) - 1
2047 if (
2044 if (
2048 not self._inline
2045 not self._inline
2049 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2046 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2050 ):
2047 ):
2051 return
2048 return
2052
2049
2053 troffset = tr.findoffset(self.indexfile)
2050 troffset = tr.findoffset(self.indexfile)
2054 if troffset is None:
2051 if troffset is None:
2055 raise error.RevlogError(
2052 raise error.RevlogError(
2056 _(b"%s not found in the transaction") % self.indexfile
2053 _(b"%s not found in the transaction") % self.indexfile
2057 )
2054 )
2058 trindex = 0
2055 trindex = 0
2059 tr.add(self.datafile, 0)
2056 tr.add(self.datafile, 0)
2060
2057
2061 if fp:
2058 if fp:
2062 fp.flush()
2059 fp.flush()
2063 fp.close()
2060 fp.close()
2064 # We can't use the cached file handle after close(). So prevent
2061 # We can't use the cached file handle after close(). So prevent
2065 # its usage.
2062 # its usage.
2066 self._writinghandles = None
2063 self._writinghandles = None
2067
2064
2068 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2065 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2069 for r in self:
2066 for r in self:
2070 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2067 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2071 if troffset <= self.start(r):
2068 if troffset <= self.start(r):
2072 trindex = r
2069 trindex = r
2073
2070
2074 with self._indexfp(b'w') as fp:
2071 with self._indexfp(b'w') as fp:
2075 self.version &= ~FLAG_INLINE_DATA
2072 self.version &= ~FLAG_INLINE_DATA
2076 self._inline = False
2073 self._inline = False
2077 io = self._io
2074 io = self._io
2078 for i in self:
2075 for i in self:
2079 e = io.packentry(self.index[i], self.node, self.version, i)
2076 e = io.packentry(self.index[i], self.node, self.version, i)
2080 fp.write(e)
2077 fp.write(e)
2081
2078
2082 # the temp file replace the real index when we exit the context
2079 # the temp file replace the real index when we exit the context
2083 # manager
2080 # manager
2084
2081
2085 tr.replace(self.indexfile, trindex * self._io.size)
2082 tr.replace(self.indexfile, trindex * self._io.size)
2086 nodemaputil.setup_persistent_nodemap(tr, self)
2083 nodemaputil.setup_persistent_nodemap(tr, self)
2087 self._chunkclear()
2084 self._chunkclear()
2088
2085
2089 def _nodeduplicatecallback(self, transaction, node):
2086 def _nodeduplicatecallback(self, transaction, node):
2090 """called when trying to add a node already stored."""
2087 """called when trying to add a node already stored."""
2091
2088
2092 def addrevision(
2089 def addrevision(
2093 self,
2090 self,
2094 text,
2091 text,
2095 transaction,
2092 transaction,
2096 link,
2093 link,
2097 p1,
2094 p1,
2098 p2,
2095 p2,
2099 cachedelta=None,
2096 cachedelta=None,
2100 node=None,
2097 node=None,
2101 flags=REVIDX_DEFAULT_FLAGS,
2098 flags=REVIDX_DEFAULT_FLAGS,
2102 deltacomputer=None,
2099 deltacomputer=None,
2103 sidedata=None,
2100 sidedata=None,
2104 ):
2101 ):
2105 """add a revision to the log
2102 """add a revision to the log
2106
2103
2107 text - the revision data to add
2104 text - the revision data to add
2108 transaction - the transaction object used for rollback
2105 transaction - the transaction object used for rollback
2109 link - the linkrev data to add
2106 link - the linkrev data to add
2110 p1, p2 - the parent nodeids of the revision
2107 p1, p2 - the parent nodeids of the revision
2111 cachedelta - an optional precomputed delta
2108 cachedelta - an optional precomputed delta
2112 node - nodeid of revision; typically node is not specified, and it is
2109 node - nodeid of revision; typically node is not specified, and it is
2113 computed by default as hash(text, p1, p2), however subclasses might
2110 computed by default as hash(text, p1, p2), however subclasses might
2114 use different hashing method (and override checkhash() in such case)
2111 use different hashing method (and override checkhash() in such case)
2115 flags - the known flags to set on the revision
2112 flags - the known flags to set on the revision
2116 deltacomputer - an optional deltacomputer instance shared between
2113 deltacomputer - an optional deltacomputer instance shared between
2117 multiple calls
2114 multiple calls
2118 """
2115 """
2119 if link == nullrev:
2116 if link == nullrev:
2120 raise error.RevlogError(
2117 raise error.RevlogError(
2121 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2118 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2122 )
2119 )
2123
2120
2124 if sidedata is None:
2121 if sidedata is None:
2125 sidedata = {}
2122 sidedata = {}
2126 elif not self.hassidedata:
2123 elif not self.hassidedata:
2127 raise error.ProgrammingError(
2124 raise error.ProgrammingError(
2128 _(b"trying to add sidedata to a revlog who don't support them")
2125 _(b"trying to add sidedata to a revlog who don't support them")
2129 )
2126 )
2130
2127
2131 if flags:
2128 if flags:
2132 node = node or self.hash(text, p1, p2)
2129 node = node or self.hash(text, p1, p2)
2133
2130
2134 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2131 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2135
2132
2136 # If the flag processor modifies the revision data, ignore any provided
2133 # If the flag processor modifies the revision data, ignore any provided
2137 # cachedelta.
2134 # cachedelta.
2138 if rawtext != text:
2135 if rawtext != text:
2139 cachedelta = None
2136 cachedelta = None
2140
2137
2141 if len(rawtext) > _maxentrysize:
2138 if len(rawtext) > _maxentrysize:
2142 raise error.RevlogError(
2139 raise error.RevlogError(
2143 _(
2140 _(
2144 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2141 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2145 )
2142 )
2146 % (self.indexfile, len(rawtext))
2143 % (self.indexfile, len(rawtext))
2147 )
2144 )
2148
2145
2149 node = node or self.hash(rawtext, p1, p2)
2146 node = node or self.hash(rawtext, p1, p2)
2150 rev = self.index.get_rev(node)
2147 rev = self.index.get_rev(node)
2151 if rev is not None:
2148 if rev is not None:
2152 return rev
2149 return rev
2153
2150
2154 if validatehash:
2151 if validatehash:
2155 self.checkhash(rawtext, node, p1=p1, p2=p2)
2152 self.checkhash(rawtext, node, p1=p1, p2=p2)
2156
2153
2157 return self.addrawrevision(
2154 return self.addrawrevision(
2158 rawtext,
2155 rawtext,
2159 transaction,
2156 transaction,
2160 link,
2157 link,
2161 p1,
2158 p1,
2162 p2,
2159 p2,
2163 node,
2160 node,
2164 flags,
2161 flags,
2165 cachedelta=cachedelta,
2162 cachedelta=cachedelta,
2166 deltacomputer=deltacomputer,
2163 deltacomputer=deltacomputer,
2167 sidedata=sidedata,
2164 sidedata=sidedata,
2168 )
2165 )
2169
2166
2170 def addrawrevision(
2167 def addrawrevision(
2171 self,
2168 self,
2172 rawtext,
2169 rawtext,
2173 transaction,
2170 transaction,
2174 link,
2171 link,
2175 p1,
2172 p1,
2176 p2,
2173 p2,
2177 node,
2174 node,
2178 flags,
2175 flags,
2179 cachedelta=None,
2176 cachedelta=None,
2180 deltacomputer=None,
2177 deltacomputer=None,
2181 sidedata=None,
2178 sidedata=None,
2182 ):
2179 ):
2183 """add a raw revision with known flags, node and parents
2180 """add a raw revision with known flags, node and parents
2184 useful when reusing a revision not stored in this revlog (ex: received
2181 useful when reusing a revision not stored in this revlog (ex: received
2185 over wire, or read from an external bundle).
2182 over wire, or read from an external bundle).
2186 """
2183 """
2187 dfh = None
2184 dfh = None
2188 if not self._inline:
2185 if not self._inline:
2189 dfh = self._datafp(b"a+")
2186 dfh = self._datafp(b"a+")
2190 ifh = self._indexfp(b"a+")
2187 ifh = self._indexfp(b"a+")
2191 try:
2188 try:
2192 return self._addrevision(
2189 return self._addrevision(
2193 node,
2190 node,
2194 rawtext,
2191 rawtext,
2195 transaction,
2192 transaction,
2196 link,
2193 link,
2197 p1,
2194 p1,
2198 p2,
2195 p2,
2199 flags,
2196 flags,
2200 cachedelta,
2197 cachedelta,
2201 ifh,
2198 ifh,
2202 dfh,
2199 dfh,
2203 deltacomputer=deltacomputer,
2200 deltacomputer=deltacomputer,
2204 sidedata=sidedata,
2201 sidedata=sidedata,
2205 )
2202 )
2206 finally:
2203 finally:
2207 if dfh:
2204 if dfh:
2208 dfh.close()
2205 dfh.close()
2209 ifh.close()
2206 ifh.close()
2210
2207
2211 def compress(self, data):
2208 def compress(self, data):
2212 """Generate a possibly-compressed representation of data."""
2209 """Generate a possibly-compressed representation of data."""
2213 if not data:
2210 if not data:
2214 return b'', data
2211 return b'', data
2215
2212
2216 compressed = self._compressor.compress(data)
2213 compressed = self._compressor.compress(data)
2217
2214
2218 if compressed:
2215 if compressed:
2219 # The revlog compressor added the header in the returned data.
2216 # The revlog compressor added the header in the returned data.
2220 return b'', compressed
2217 return b'', compressed
2221
2218
2222 if data[0:1] == b'\0':
2219 if data[0:1] == b'\0':
2223 return b'', data
2220 return b'', data
2224 return b'u', data
2221 return b'u', data
2225
2222
2226 def decompress(self, data):
2223 def decompress(self, data):
2227 """Decompress a revlog chunk.
2224 """Decompress a revlog chunk.
2228
2225
2229 The chunk is expected to begin with a header identifying the
2226 The chunk is expected to begin with a header identifying the
2230 format type so it can be routed to an appropriate decompressor.
2227 format type so it can be routed to an appropriate decompressor.
2231 """
2228 """
2232 if not data:
2229 if not data:
2233 return data
2230 return data
2234
2231
2235 # Revlogs are read much more frequently than they are written and many
2232 # Revlogs are read much more frequently than they are written and many
2236 # chunks only take microseconds to decompress, so performance is
2233 # chunks only take microseconds to decompress, so performance is
2237 # important here.
2234 # important here.
2238 #
2235 #
2239 # We can make a few assumptions about revlogs:
2236 # We can make a few assumptions about revlogs:
2240 #
2237 #
2241 # 1) the majority of chunks will be compressed (as opposed to inline
2238 # 1) the majority of chunks will be compressed (as opposed to inline
2242 # raw data).
2239 # raw data).
2243 # 2) decompressing *any* data will likely by at least 10x slower than
2240 # 2) decompressing *any* data will likely by at least 10x slower than
2244 # returning raw inline data.
2241 # returning raw inline data.
2245 # 3) we want to prioritize common and officially supported compression
2242 # 3) we want to prioritize common and officially supported compression
2246 # engines
2243 # engines
2247 #
2244 #
2248 # It follows that we want to optimize for "decompress compressed data
2245 # It follows that we want to optimize for "decompress compressed data
2249 # when encoded with common and officially supported compression engines"
2246 # when encoded with common and officially supported compression engines"
2250 # case over "raw data" and "data encoded by less common or non-official
2247 # case over "raw data" and "data encoded by less common or non-official
2251 # compression engines." That is why we have the inline lookup first
2248 # compression engines." That is why we have the inline lookup first
2252 # followed by the compengines lookup.
2249 # followed by the compengines lookup.
2253 #
2250 #
2254 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2251 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2255 # compressed chunks. And this matters for changelog and manifest reads.
2252 # compressed chunks. And this matters for changelog and manifest reads.
2256 t = data[0:1]
2253 t = data[0:1]
2257
2254
2258 if t == b'x':
2255 if t == b'x':
2259 try:
2256 try:
2260 return _zlibdecompress(data)
2257 return _zlibdecompress(data)
2261 except zlib.error as e:
2258 except zlib.error as e:
2262 raise error.RevlogError(
2259 raise error.RevlogError(
2263 _(b'revlog decompress error: %s')
2260 _(b'revlog decompress error: %s')
2264 % stringutil.forcebytestr(e)
2261 % stringutil.forcebytestr(e)
2265 )
2262 )
2266 # '\0' is more common than 'u' so it goes first.
2263 # '\0' is more common than 'u' so it goes first.
2267 elif t == b'\0':
2264 elif t == b'\0':
2268 return data
2265 return data
2269 elif t == b'u':
2266 elif t == b'u':
2270 return util.buffer(data, 1)
2267 return util.buffer(data, 1)
2271
2268
2272 try:
2269 try:
2273 compressor = self._decompressors[t]
2270 compressor = self._decompressors[t]
2274 except KeyError:
2271 except KeyError:
2275 try:
2272 try:
2276 engine = util.compengines.forrevlogheader(t)
2273 engine = util.compengines.forrevlogheader(t)
2277 compressor = engine.revlogcompressor(self._compengineopts)
2274 compressor = engine.revlogcompressor(self._compengineopts)
2278 self._decompressors[t] = compressor
2275 self._decompressors[t] = compressor
2279 except KeyError:
2276 except KeyError:
2280 raise error.RevlogError(
2277 raise error.RevlogError(
2281 _(b'unknown compression type %s') % binascii.hexlify(t)
2278 _(b'unknown compression type %s') % binascii.hexlify(t)
2282 )
2279 )
2283
2280
2284 return compressor.decompress(data)
2281 return compressor.decompress(data)
2285
2282
2286 def _addrevision(
2283 def _addrevision(
2287 self,
2284 self,
2288 node,
2285 node,
2289 rawtext,
2286 rawtext,
2290 transaction,
2287 transaction,
2291 link,
2288 link,
2292 p1,
2289 p1,
2293 p2,
2290 p2,
2294 flags,
2291 flags,
2295 cachedelta,
2292 cachedelta,
2296 ifh,
2293 ifh,
2297 dfh,
2294 dfh,
2298 alwayscache=False,
2295 alwayscache=False,
2299 deltacomputer=None,
2296 deltacomputer=None,
2300 sidedata=None,
2297 sidedata=None,
2301 ):
2298 ):
2302 """internal function to add revisions to the log
2299 """internal function to add revisions to the log
2303
2300
2304 see addrevision for argument descriptions.
2301 see addrevision for argument descriptions.
2305
2302
2306 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2303 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2307
2304
2308 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2305 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2309 be used.
2306 be used.
2310
2307
2311 invariants:
2308 invariants:
2312 - rawtext is optional (can be None); if not set, cachedelta must be set.
2309 - rawtext is optional (can be None); if not set, cachedelta must be set.
2313 if both are set, they must correspond to each other.
2310 if both are set, they must correspond to each other.
2314 """
2311 """
2315 if node == nullid:
2312 if node == nullid:
2316 raise error.RevlogError(
2313 raise error.RevlogError(
2317 _(b"%s: attempt to add null revision") % self.indexfile
2314 _(b"%s: attempt to add null revision") % self.indexfile
2318 )
2315 )
2319 if node == wdirid or node in wdirfilenodeids:
2316 if node == wdirid or node in wdirfilenodeids:
2320 raise error.RevlogError(
2317 raise error.RevlogError(
2321 _(b"%s: attempt to add wdir revision") % self.indexfile
2318 _(b"%s: attempt to add wdir revision") % self.indexfile
2322 )
2319 )
2323
2320
2324 if self._inline:
2321 if self._inline:
2325 fh = ifh
2322 fh = ifh
2326 else:
2323 else:
2327 fh = dfh
2324 fh = dfh
2328
2325
2329 btext = [rawtext]
2326 btext = [rawtext]
2330
2327
2331 curr = len(self)
2328 curr = len(self)
2332 prev = curr - 1
2329 prev = curr - 1
2333
2330
2334 offset = self._get_data_offset(prev)
2331 offset = self._get_data_offset(prev)
2335
2332
2336 if self._concurrencychecker:
2333 if self._concurrencychecker:
2337 if self._inline:
2334 if self._inline:
2338 # offset is "as if" it were in the .d file, so we need to add on
2335 # offset is "as if" it were in the .d file, so we need to add on
2339 # the size of the entry metadata.
2336 # the size of the entry metadata.
2340 self._concurrencychecker(
2337 self._concurrencychecker(
2341 ifh, self.indexfile, offset + curr * self._io.size
2338 ifh, self.indexfile, offset + curr * self._io.size
2342 )
2339 )
2343 else:
2340 else:
2344 # Entries in the .i are a consistent size.
2341 # Entries in the .i are a consistent size.
2345 self._concurrencychecker(
2342 self._concurrencychecker(
2346 ifh, self.indexfile, curr * self._io.size
2343 ifh, self.indexfile, curr * self._io.size
2347 )
2344 )
2348 self._concurrencychecker(dfh, self.datafile, offset)
2345 self._concurrencychecker(dfh, self.datafile, offset)
2349
2346
2350 p1r, p2r = self.rev(p1), self.rev(p2)
2347 p1r, p2r = self.rev(p1), self.rev(p2)
2351
2348
2352 # full versions are inserted when the needed deltas
2349 # full versions are inserted when the needed deltas
2353 # become comparable to the uncompressed text
2350 # become comparable to the uncompressed text
2354 if rawtext is None:
2351 if rawtext is None:
2355 # need rawtext size, before changed by flag processors, which is
2352 # need rawtext size, before changed by flag processors, which is
2356 # the non-raw size. use revlog explicitly to avoid filelog's extra
2353 # the non-raw size. use revlog explicitly to avoid filelog's extra
2357 # logic that might remove metadata size.
2354 # logic that might remove metadata size.
2358 textlen = mdiff.patchedsize(
2355 textlen = mdiff.patchedsize(
2359 revlog.size(self, cachedelta[0]), cachedelta[1]
2356 revlog.size(self, cachedelta[0]), cachedelta[1]
2360 )
2357 )
2361 else:
2358 else:
2362 textlen = len(rawtext)
2359 textlen = len(rawtext)
2363
2360
2364 if deltacomputer is None:
2361 if deltacomputer is None:
2365 deltacomputer = deltautil.deltacomputer(self)
2362 deltacomputer = deltautil.deltacomputer(self)
2366
2363
2367 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2364 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2368
2365
2369 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2366 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2370
2367
2371 if sidedata:
2368 if sidedata:
2372 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2369 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2373 sidedata_offset = offset + deltainfo.deltalen
2370 sidedata_offset = offset + deltainfo.deltalen
2374 else:
2371 else:
2375 serialized_sidedata = b""
2372 serialized_sidedata = b""
2376 # Don't store the offset if the sidedata is empty, that way
2373 # Don't store the offset if the sidedata is empty, that way
2377 # we can easily detect empty sidedata and they will be no different
2374 # we can easily detect empty sidedata and they will be no different
2378 # than ones we manually add.
2375 # than ones we manually add.
2379 sidedata_offset = 0
2376 sidedata_offset = 0
2380
2377
2381 e = (
2378 e = (
2382 offset_type(offset, flags),
2379 offset_type(offset, flags),
2383 deltainfo.deltalen,
2380 deltainfo.deltalen,
2384 textlen,
2381 textlen,
2385 deltainfo.base,
2382 deltainfo.base,
2386 link,
2383 link,
2387 p1r,
2384 p1r,
2388 p2r,
2385 p2r,
2389 node,
2386 node,
2390 sidedata_offset,
2387 sidedata_offset,
2391 len(serialized_sidedata),
2388 len(serialized_sidedata),
2392 )
2389 )
2393
2390
2394 if self.version & 0xFFFF != REVLOGV2:
2391 if self.version & 0xFFFF != REVLOGV2:
2395 e = e[:8]
2392 e = e[:8]
2396
2393
2397 self.index.append(e)
2394 self.index.append(e)
2398 entry = self._io.packentry(e, self.node, self.version, curr)
2395 entry = self._io.packentry(e, self.node, self.version, curr)
2399 self._writeentry(
2396 self._writeentry(
2400 transaction,
2397 transaction,
2401 ifh,
2398 ifh,
2402 dfh,
2399 dfh,
2403 entry,
2400 entry,
2404 deltainfo.data,
2401 deltainfo.data,
2405 link,
2402 link,
2406 offset,
2403 offset,
2407 serialized_sidedata,
2404 serialized_sidedata,
2408 )
2405 )
2409
2406
2410 rawtext = btext[0]
2407 rawtext = btext[0]
2411
2408
2412 if alwayscache and rawtext is None:
2409 if alwayscache and rawtext is None:
2413 rawtext = deltacomputer.buildtext(revinfo, fh)
2410 rawtext = deltacomputer.buildtext(revinfo, fh)
2414
2411
2415 if type(rawtext) == bytes: # only accept immutable objects
2412 if type(rawtext) == bytes: # only accept immutable objects
2416 self._revisioncache = (node, curr, rawtext)
2413 self._revisioncache = (node, curr, rawtext)
2417 self._chainbasecache[curr] = deltainfo.chainbase
2414 self._chainbasecache[curr] = deltainfo.chainbase
2418 return curr
2415 return curr
2419
2416
2420 def _get_data_offset(self, prev):
2417 def _get_data_offset(self, prev):
2421 """Returns the current offset in the (in-transaction) data file.
2418 """Returns the current offset in the (in-transaction) data file.
2422 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2419 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2423 file to store that information: since sidedata can be rewritten to the
2420 file to store that information: since sidedata can be rewritten to the
2424 end of the data file within a transaction, you can have cases where, for
2421 end of the data file within a transaction, you can have cases where, for
2425 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2422 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2426 to `n - 1`'s sidedata being written after `n`'s data.
2423 to `n - 1`'s sidedata being written after `n`'s data.
2427
2424
2428 TODO cache this in a docket file before getting out of experimental."""
2425 TODO cache this in a docket file before getting out of experimental."""
2429 if self.version & 0xFFFF != REVLOGV2:
2426 if self.version & 0xFFFF != REVLOGV2:
2430 return self.end(prev)
2427 return self.end(prev)
2431
2428
2432 offset = 0
2429 offset = 0
2433 for rev, entry in enumerate(self.index):
2430 for rev, entry in enumerate(self.index):
2434 sidedata_end = entry[8] + entry[9]
2431 sidedata_end = entry[8] + entry[9]
2435 # Sidedata for a previous rev has potentially been written after
2432 # Sidedata for a previous rev has potentially been written after
2436 # this rev's end, so take the max.
2433 # this rev's end, so take the max.
2437 offset = max(self.end(rev), offset, sidedata_end)
2434 offset = max(self.end(rev), offset, sidedata_end)
2438 return offset
2435 return offset
2439
2436
2440 def _writeentry(
2437 def _writeentry(
2441 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2438 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2442 ):
2439 ):
2443 # Files opened in a+ mode have inconsistent behavior on various
2440 # Files opened in a+ mode have inconsistent behavior on various
2444 # platforms. Windows requires that a file positioning call be made
2441 # platforms. Windows requires that a file positioning call be made
2445 # when the file handle transitions between reads and writes. See
2442 # when the file handle transitions between reads and writes. See
2446 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2443 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2447 # platforms, Python or the platform itself can be buggy. Some versions
2444 # platforms, Python or the platform itself can be buggy. Some versions
2448 # of Solaris have been observed to not append at the end of the file
2445 # of Solaris have been observed to not append at the end of the file
2449 # if the file was seeked to before the end. See issue4943 for more.
2446 # if the file was seeked to before the end. See issue4943 for more.
2450 #
2447 #
2451 # We work around this issue by inserting a seek() before writing.
2448 # We work around this issue by inserting a seek() before writing.
2452 # Note: This is likely not necessary on Python 3. However, because
2449 # Note: This is likely not necessary on Python 3. However, because
2453 # the file handle is reused for reads and may be seeked there, we need
2450 # the file handle is reused for reads and may be seeked there, we need
2454 # to be careful before changing this.
2451 # to be careful before changing this.
2455 ifh.seek(0, os.SEEK_END)
2452 ifh.seek(0, os.SEEK_END)
2456 if dfh:
2453 if dfh:
2457 dfh.seek(0, os.SEEK_END)
2454 dfh.seek(0, os.SEEK_END)
2458
2455
2459 curr = len(self) - 1
2456 curr = len(self) - 1
2460 if not self._inline:
2457 if not self._inline:
2461 transaction.add(self.datafile, offset)
2458 transaction.add(self.datafile, offset)
2462 transaction.add(self.indexfile, curr * len(entry))
2459 transaction.add(self.indexfile, curr * len(entry))
2463 if data[0]:
2460 if data[0]:
2464 dfh.write(data[0])
2461 dfh.write(data[0])
2465 dfh.write(data[1])
2462 dfh.write(data[1])
2466 if sidedata:
2463 if sidedata:
2467 dfh.write(sidedata)
2464 dfh.write(sidedata)
2468 ifh.write(entry)
2465 ifh.write(entry)
2469 else:
2466 else:
2470 offset += curr * self._io.size
2467 offset += curr * self._io.size
2471 transaction.add(self.indexfile, offset)
2468 transaction.add(self.indexfile, offset)
2472 ifh.write(entry)
2469 ifh.write(entry)
2473 ifh.write(data[0])
2470 ifh.write(data[0])
2474 ifh.write(data[1])
2471 ifh.write(data[1])
2475 if sidedata:
2472 if sidedata:
2476 ifh.write(sidedata)
2473 ifh.write(sidedata)
2477 self._enforceinlinesize(transaction, ifh)
2474 self._enforceinlinesize(transaction, ifh)
2478 nodemaputil.setup_persistent_nodemap(transaction, self)
2475 nodemaputil.setup_persistent_nodemap(transaction, self)
2479
2476
2480 def addgroup(
2477 def addgroup(
2481 self,
2478 self,
2482 deltas,
2479 deltas,
2483 linkmapper,
2480 linkmapper,
2484 transaction,
2481 transaction,
2485 alwayscache=False,
2482 alwayscache=False,
2486 addrevisioncb=None,
2483 addrevisioncb=None,
2487 duplicaterevisioncb=None,
2484 duplicaterevisioncb=None,
2488 ):
2485 ):
2489 """
2486 """
2490 add a delta group
2487 add a delta group
2491
2488
2492 given a set of deltas, add them to the revision log. the
2489 given a set of deltas, add them to the revision log. the
2493 first delta is against its parent, which should be in our
2490 first delta is against its parent, which should be in our
2494 log, the rest are against the previous delta.
2491 log, the rest are against the previous delta.
2495
2492
2496 If ``addrevisioncb`` is defined, it will be called with arguments of
2493 If ``addrevisioncb`` is defined, it will be called with arguments of
2497 this revlog and the node that was added.
2494 this revlog and the node that was added.
2498 """
2495 """
2499
2496
2500 if self._writinghandles:
2497 if self._writinghandles:
2501 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2498 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2502
2499
2503 r = len(self)
2500 r = len(self)
2504 end = 0
2501 end = 0
2505 if r:
2502 if r:
2506 end = self.end(r - 1)
2503 end = self.end(r - 1)
2507 ifh = self._indexfp(b"a+")
2504 ifh = self._indexfp(b"a+")
2508 isize = r * self._io.size
2505 isize = r * self._io.size
2509 if self._inline:
2506 if self._inline:
2510 transaction.add(self.indexfile, end + isize)
2507 transaction.add(self.indexfile, end + isize)
2511 dfh = None
2508 dfh = None
2512 else:
2509 else:
2513 transaction.add(self.indexfile, isize)
2510 transaction.add(self.indexfile, isize)
2514 transaction.add(self.datafile, end)
2511 transaction.add(self.datafile, end)
2515 dfh = self._datafp(b"a+")
2512 dfh = self._datafp(b"a+")
2516
2513
2517 def flush():
2514 def flush():
2518 if dfh:
2515 if dfh:
2519 dfh.flush()
2516 dfh.flush()
2520 ifh.flush()
2517 ifh.flush()
2521
2518
2522 self._writinghandles = (ifh, dfh)
2519 self._writinghandles = (ifh, dfh)
2523 empty = True
2520 empty = True
2524
2521
2525 try:
2522 try:
2526 deltacomputer = deltautil.deltacomputer(self)
2523 deltacomputer = deltautil.deltacomputer(self)
2527 # loop through our set of deltas
2524 # loop through our set of deltas
2528 for data in deltas:
2525 for data in deltas:
2529 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2526 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2530 link = linkmapper(linknode)
2527 link = linkmapper(linknode)
2531 flags = flags or REVIDX_DEFAULT_FLAGS
2528 flags = flags or REVIDX_DEFAULT_FLAGS
2532
2529
2533 rev = self.index.get_rev(node)
2530 rev = self.index.get_rev(node)
2534 if rev is not None:
2531 if rev is not None:
2535 # this can happen if two branches make the same change
2532 # this can happen if two branches make the same change
2536 self._nodeduplicatecallback(transaction, rev)
2533 self._nodeduplicatecallback(transaction, rev)
2537 if duplicaterevisioncb:
2534 if duplicaterevisioncb:
2538 duplicaterevisioncb(self, rev)
2535 duplicaterevisioncb(self, rev)
2539 empty = False
2536 empty = False
2540 continue
2537 continue
2541
2538
2542 for p in (p1, p2):
2539 for p in (p1, p2):
2543 if not self.index.has_node(p):
2540 if not self.index.has_node(p):
2544 raise error.LookupError(
2541 raise error.LookupError(
2545 p, self.indexfile, _(b'unknown parent')
2542 p, self.indexfile, _(b'unknown parent')
2546 )
2543 )
2547
2544
2548 if not self.index.has_node(deltabase):
2545 if not self.index.has_node(deltabase):
2549 raise error.LookupError(
2546 raise error.LookupError(
2550 deltabase, self.indexfile, _(b'unknown delta base')
2547 deltabase, self.indexfile, _(b'unknown delta base')
2551 )
2548 )
2552
2549
2553 baserev = self.rev(deltabase)
2550 baserev = self.rev(deltabase)
2554
2551
2555 if baserev != nullrev and self.iscensored(baserev):
2552 if baserev != nullrev and self.iscensored(baserev):
2556 # if base is censored, delta must be full replacement in a
2553 # if base is censored, delta must be full replacement in a
2557 # single patch operation
2554 # single patch operation
2558 hlen = struct.calcsize(b">lll")
2555 hlen = struct.calcsize(b">lll")
2559 oldlen = self.rawsize(baserev)
2556 oldlen = self.rawsize(baserev)
2560 newlen = len(delta) - hlen
2557 newlen = len(delta) - hlen
2561 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2558 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2562 raise error.CensoredBaseError(
2559 raise error.CensoredBaseError(
2563 self.indexfile, self.node(baserev)
2560 self.indexfile, self.node(baserev)
2564 )
2561 )
2565
2562
2566 if not flags and self._peek_iscensored(baserev, delta, flush):
2563 if not flags and self._peek_iscensored(baserev, delta, flush):
2567 flags |= REVIDX_ISCENSORED
2564 flags |= REVIDX_ISCENSORED
2568
2565
2569 # We assume consumers of addrevisioncb will want to retrieve
2566 # We assume consumers of addrevisioncb will want to retrieve
2570 # the added revision, which will require a call to
2567 # the added revision, which will require a call to
2571 # revision(). revision() will fast path if there is a cache
2568 # revision(). revision() will fast path if there is a cache
2572 # hit. So, we tell _addrevision() to always cache in this case.
2569 # hit. So, we tell _addrevision() to always cache in this case.
2573 # We're only using addgroup() in the context of changegroup
2570 # We're only using addgroup() in the context of changegroup
2574 # generation so the revision data can always be handled as raw
2571 # generation so the revision data can always be handled as raw
2575 # by the flagprocessor.
2572 # by the flagprocessor.
2576 rev = self._addrevision(
2573 rev = self._addrevision(
2577 node,
2574 node,
2578 None,
2575 None,
2579 transaction,
2576 transaction,
2580 link,
2577 link,
2581 p1,
2578 p1,
2582 p2,
2579 p2,
2583 flags,
2580 flags,
2584 (baserev, delta),
2581 (baserev, delta),
2585 ifh,
2582 ifh,
2586 dfh,
2583 dfh,
2587 alwayscache=alwayscache,
2584 alwayscache=alwayscache,
2588 deltacomputer=deltacomputer,
2585 deltacomputer=deltacomputer,
2589 sidedata=sidedata,
2586 sidedata=sidedata,
2590 )
2587 )
2591
2588
2592 if addrevisioncb:
2589 if addrevisioncb:
2593 addrevisioncb(self, rev)
2590 addrevisioncb(self, rev)
2594 empty = False
2591 empty = False
2595
2592
2596 if not dfh and not self._inline:
2593 if not dfh and not self._inline:
2597 # addrevision switched from inline to conventional
2594 # addrevision switched from inline to conventional
2598 # reopen the index
2595 # reopen the index
2599 ifh.close()
2596 ifh.close()
2600 dfh = self._datafp(b"a+")
2597 dfh = self._datafp(b"a+")
2601 ifh = self._indexfp(b"a+")
2598 ifh = self._indexfp(b"a+")
2602 self._writinghandles = (ifh, dfh)
2599 self._writinghandles = (ifh, dfh)
2603 finally:
2600 finally:
2604 self._writinghandles = None
2601 self._writinghandles = None
2605
2602
2606 if dfh:
2603 if dfh:
2607 dfh.close()
2604 dfh.close()
2608 ifh.close()
2605 ifh.close()
2609 return not empty
2606 return not empty
2610
2607
2611 def iscensored(self, rev):
2608 def iscensored(self, rev):
2612 """Check if a file revision is censored."""
2609 """Check if a file revision is censored."""
2613 if not self._censorable:
2610 if not self._censorable:
2614 return False
2611 return False
2615
2612
2616 return self.flags(rev) & REVIDX_ISCENSORED
2613 return self.flags(rev) & REVIDX_ISCENSORED
2617
2614
2618 def _peek_iscensored(self, baserev, delta, flush):
2615 def _peek_iscensored(self, baserev, delta, flush):
2619 """Quickly check if a delta produces a censored revision."""
2616 """Quickly check if a delta produces a censored revision."""
2620 if not self._censorable:
2617 if not self._censorable:
2621 return False
2618 return False
2622
2619
2623 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2620 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2624
2621
2625 def getstrippoint(self, minlink):
2622 def getstrippoint(self, minlink):
2626 """find the minimum rev that must be stripped to strip the linkrev
2623 """find the minimum rev that must be stripped to strip the linkrev
2627
2624
2628 Returns a tuple containing the minimum rev and a set of all revs that
2625 Returns a tuple containing the minimum rev and a set of all revs that
2629 have linkrevs that will be broken by this strip.
2626 have linkrevs that will be broken by this strip.
2630 """
2627 """
2631 return storageutil.resolvestripinfo(
2628 return storageutil.resolvestripinfo(
2632 minlink,
2629 minlink,
2633 len(self) - 1,
2630 len(self) - 1,
2634 self.headrevs(),
2631 self.headrevs(),
2635 self.linkrev,
2632 self.linkrev,
2636 self.parentrevs,
2633 self.parentrevs,
2637 )
2634 )
2638
2635
2639 def strip(self, minlink, transaction):
2636 def strip(self, minlink, transaction):
2640 """truncate the revlog on the first revision with a linkrev >= minlink
2637 """truncate the revlog on the first revision with a linkrev >= minlink
2641
2638
2642 This function is called when we're stripping revision minlink and
2639 This function is called when we're stripping revision minlink and
2643 its descendants from the repository.
2640 its descendants from the repository.
2644
2641
2645 We have to remove all revisions with linkrev >= minlink, because
2642 We have to remove all revisions with linkrev >= minlink, because
2646 the equivalent changelog revisions will be renumbered after the
2643 the equivalent changelog revisions will be renumbered after the
2647 strip.
2644 strip.
2648
2645
2649 So we truncate the revlog on the first of these revisions, and
2646 So we truncate the revlog on the first of these revisions, and
2650 trust that the caller has saved the revisions that shouldn't be
2647 trust that the caller has saved the revisions that shouldn't be
2651 removed and that it'll re-add them after this truncation.
2648 removed and that it'll re-add them after this truncation.
2652 """
2649 """
2653 if len(self) == 0:
2650 if len(self) == 0:
2654 return
2651 return
2655
2652
2656 rev, _ = self.getstrippoint(minlink)
2653 rev, _ = self.getstrippoint(minlink)
2657 if rev == len(self):
2654 if rev == len(self):
2658 return
2655 return
2659
2656
2660 # first truncate the files on disk
2657 # first truncate the files on disk
2661 end = self.start(rev)
2658 end = self.start(rev)
2662 if not self._inline:
2659 if not self._inline:
2663 transaction.add(self.datafile, end)
2660 transaction.add(self.datafile, end)
2664 end = rev * self._io.size
2661 end = rev * self._io.size
2665 else:
2662 else:
2666 end += rev * self._io.size
2663 end += rev * self._io.size
2667
2664
2668 transaction.add(self.indexfile, end)
2665 transaction.add(self.indexfile, end)
2669
2666
2670 # then reset internal state in memory to forget those revisions
2667 # then reset internal state in memory to forget those revisions
2671 self._revisioncache = None
2668 self._revisioncache = None
2672 self._chaininfocache = util.lrucachedict(500)
2669 self._chaininfocache = util.lrucachedict(500)
2673 self._chunkclear()
2670 self._chunkclear()
2674
2671
2675 del self.index[rev:-1]
2672 del self.index[rev:-1]
2676
2673
2677 def checksize(self):
2674 def checksize(self):
2678 """Check size of index and data files
2675 """Check size of index and data files
2679
2676
2680 return a (dd, di) tuple.
2677 return a (dd, di) tuple.
2681 - dd: extra bytes for the "data" file
2678 - dd: extra bytes for the "data" file
2682 - di: extra bytes for the "index" file
2679 - di: extra bytes for the "index" file
2683
2680
2684 A healthy revlog will return (0, 0).
2681 A healthy revlog will return (0, 0).
2685 """
2682 """
2686 expected = 0
2683 expected = 0
2687 if len(self):
2684 if len(self):
2688 expected = max(0, self.end(len(self) - 1))
2685 expected = max(0, self.end(len(self) - 1))
2689
2686
2690 try:
2687 try:
2691 with self._datafp() as f:
2688 with self._datafp() as f:
2692 f.seek(0, io.SEEK_END)
2689 f.seek(0, io.SEEK_END)
2693 actual = f.tell()
2690 actual = f.tell()
2694 dd = actual - expected
2691 dd = actual - expected
2695 except IOError as inst:
2692 except IOError as inst:
2696 if inst.errno != errno.ENOENT:
2693 if inst.errno != errno.ENOENT:
2697 raise
2694 raise
2698 dd = 0
2695 dd = 0
2699
2696
2700 try:
2697 try:
2701 f = self.opener(self.indexfile)
2698 f = self.opener(self.indexfile)
2702 f.seek(0, io.SEEK_END)
2699 f.seek(0, io.SEEK_END)
2703 actual = f.tell()
2700 actual = f.tell()
2704 f.close()
2701 f.close()
2705 s = self._io.size
2702 s = self._io.size
2706 i = max(0, actual // s)
2703 i = max(0, actual // s)
2707 di = actual - (i * s)
2704 di = actual - (i * s)
2708 if self._inline:
2705 if self._inline:
2709 databytes = 0
2706 databytes = 0
2710 for r in self:
2707 for r in self:
2711 databytes += max(0, self.length(r))
2708 databytes += max(0, self.length(r))
2712 dd = 0
2709 dd = 0
2713 di = actual - len(self) * s - databytes
2710 di = actual - len(self) * s - databytes
2714 except IOError as inst:
2711 except IOError as inst:
2715 if inst.errno != errno.ENOENT:
2712 if inst.errno != errno.ENOENT:
2716 raise
2713 raise
2717 di = 0
2714 di = 0
2718
2715
2719 return (dd, di)
2716 return (dd, di)
2720
2717
2721 def files(self):
2718 def files(self):
2722 res = [self.indexfile]
2719 res = [self.indexfile]
2723 if not self._inline:
2720 if not self._inline:
2724 res.append(self.datafile)
2721 res.append(self.datafile)
2725 return res
2722 return res
2726
2723
2727 def emitrevisions(
2724 def emitrevisions(
2728 self,
2725 self,
2729 nodes,
2726 nodes,
2730 nodesorder=None,
2727 nodesorder=None,
2731 revisiondata=False,
2728 revisiondata=False,
2732 assumehaveparentrevisions=False,
2729 assumehaveparentrevisions=False,
2733 deltamode=repository.CG_DELTAMODE_STD,
2730 deltamode=repository.CG_DELTAMODE_STD,
2734 sidedata_helpers=None,
2731 sidedata_helpers=None,
2735 ):
2732 ):
2736 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2733 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2737 raise error.ProgrammingError(
2734 raise error.ProgrammingError(
2738 b'unhandled value for nodesorder: %s' % nodesorder
2735 b'unhandled value for nodesorder: %s' % nodesorder
2739 )
2736 )
2740
2737
2741 if nodesorder is None and not self._generaldelta:
2738 if nodesorder is None and not self._generaldelta:
2742 nodesorder = b'storage'
2739 nodesorder = b'storage'
2743
2740
2744 if (
2741 if (
2745 not self._storedeltachains
2742 not self._storedeltachains
2746 and deltamode != repository.CG_DELTAMODE_PREV
2743 and deltamode != repository.CG_DELTAMODE_PREV
2747 ):
2744 ):
2748 deltamode = repository.CG_DELTAMODE_FULL
2745 deltamode = repository.CG_DELTAMODE_FULL
2749
2746
2750 return storageutil.emitrevisions(
2747 return storageutil.emitrevisions(
2751 self,
2748 self,
2752 nodes,
2749 nodes,
2753 nodesorder,
2750 nodesorder,
2754 revlogrevisiondelta,
2751 revlogrevisiondelta,
2755 deltaparentfn=self.deltaparent,
2752 deltaparentfn=self.deltaparent,
2756 candeltafn=self.candelta,
2753 candeltafn=self.candelta,
2757 rawsizefn=self.rawsize,
2754 rawsizefn=self.rawsize,
2758 revdifffn=self.revdiff,
2755 revdifffn=self.revdiff,
2759 flagsfn=self.flags,
2756 flagsfn=self.flags,
2760 deltamode=deltamode,
2757 deltamode=deltamode,
2761 revisiondata=revisiondata,
2758 revisiondata=revisiondata,
2762 assumehaveparentrevisions=assumehaveparentrevisions,
2759 assumehaveparentrevisions=assumehaveparentrevisions,
2763 sidedata_helpers=sidedata_helpers,
2760 sidedata_helpers=sidedata_helpers,
2764 )
2761 )
2765
2762
2766 DELTAREUSEALWAYS = b'always'
2763 DELTAREUSEALWAYS = b'always'
2767 DELTAREUSESAMEREVS = b'samerevs'
2764 DELTAREUSESAMEREVS = b'samerevs'
2768 DELTAREUSENEVER = b'never'
2765 DELTAREUSENEVER = b'never'
2769
2766
2770 DELTAREUSEFULLADD = b'fulladd'
2767 DELTAREUSEFULLADD = b'fulladd'
2771
2768
2772 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2769 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2773
2770
2774 def clone(
2771 def clone(
2775 self,
2772 self,
2776 tr,
2773 tr,
2777 destrevlog,
2774 destrevlog,
2778 addrevisioncb=None,
2775 addrevisioncb=None,
2779 deltareuse=DELTAREUSESAMEREVS,
2776 deltareuse=DELTAREUSESAMEREVS,
2780 forcedeltabothparents=None,
2777 forcedeltabothparents=None,
2781 sidedatacompanion=None,
2778 sidedatacompanion=None,
2782 ):
2779 ):
2783 """Copy this revlog to another, possibly with format changes.
2780 """Copy this revlog to another, possibly with format changes.
2784
2781
2785 The destination revlog will contain the same revisions and nodes.
2782 The destination revlog will contain the same revisions and nodes.
2786 However, it may not be bit-for-bit identical due to e.g. delta encoding
2783 However, it may not be bit-for-bit identical due to e.g. delta encoding
2787 differences.
2784 differences.
2788
2785
2789 The ``deltareuse`` argument control how deltas from the existing revlog
2786 The ``deltareuse`` argument control how deltas from the existing revlog
2790 are preserved in the destination revlog. The argument can have the
2787 are preserved in the destination revlog. The argument can have the
2791 following values:
2788 following values:
2792
2789
2793 DELTAREUSEALWAYS
2790 DELTAREUSEALWAYS
2794 Deltas will always be reused (if possible), even if the destination
2791 Deltas will always be reused (if possible), even if the destination
2795 revlog would not select the same revisions for the delta. This is the
2792 revlog would not select the same revisions for the delta. This is the
2796 fastest mode of operation.
2793 fastest mode of operation.
2797 DELTAREUSESAMEREVS
2794 DELTAREUSESAMEREVS
2798 Deltas will be reused if the destination revlog would pick the same
2795 Deltas will be reused if the destination revlog would pick the same
2799 revisions for the delta. This mode strikes a balance between speed
2796 revisions for the delta. This mode strikes a balance between speed
2800 and optimization.
2797 and optimization.
2801 DELTAREUSENEVER
2798 DELTAREUSENEVER
2802 Deltas will never be reused. This is the slowest mode of execution.
2799 Deltas will never be reused. This is the slowest mode of execution.
2803 This mode can be used to recompute deltas (e.g. if the diff/delta
2800 This mode can be used to recompute deltas (e.g. if the diff/delta
2804 algorithm changes).
2801 algorithm changes).
2805 DELTAREUSEFULLADD
2802 DELTAREUSEFULLADD
2806 Revision will be re-added as if their were new content. This is
2803 Revision will be re-added as if their were new content. This is
2807 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2804 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2808 eg: large file detection and handling.
2805 eg: large file detection and handling.
2809
2806
2810 Delta computation can be slow, so the choice of delta reuse policy can
2807 Delta computation can be slow, so the choice of delta reuse policy can
2811 significantly affect run time.
2808 significantly affect run time.
2812
2809
2813 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2810 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2814 two extremes. Deltas will be reused if they are appropriate. But if the
2811 two extremes. Deltas will be reused if they are appropriate. But if the
2815 delta could choose a better revision, it will do so. This means if you
2812 delta could choose a better revision, it will do so. This means if you
2816 are converting a non-generaldelta revlog to a generaldelta revlog,
2813 are converting a non-generaldelta revlog to a generaldelta revlog,
2817 deltas will be recomputed if the delta's parent isn't a parent of the
2814 deltas will be recomputed if the delta's parent isn't a parent of the
2818 revision.
2815 revision.
2819
2816
2820 In addition to the delta policy, the ``forcedeltabothparents``
2817 In addition to the delta policy, the ``forcedeltabothparents``
2821 argument controls whether to force compute deltas against both parents
2818 argument controls whether to force compute deltas against both parents
2822 for merges. By default, the current default is used.
2819 for merges. By default, the current default is used.
2823
2820
2824 If not None, the `sidedatacompanion` is callable that accept two
2821 If not None, the `sidedatacompanion` is callable that accept two
2825 arguments:
2822 arguments:
2826
2823
2827 (srcrevlog, rev)
2824 (srcrevlog, rev)
2828
2825
2829 and return a quintet that control changes to sidedata content from the
2826 and return a quintet that control changes to sidedata content from the
2830 old revision to the new clone result:
2827 old revision to the new clone result:
2831
2828
2832 (dropall, filterout, update, new_flags, dropped_flags)
2829 (dropall, filterout, update, new_flags, dropped_flags)
2833
2830
2834 * if `dropall` is True, all sidedata should be dropped
2831 * if `dropall` is True, all sidedata should be dropped
2835 * `filterout` is a set of sidedata keys that should be dropped
2832 * `filterout` is a set of sidedata keys that should be dropped
2836 * `update` is a mapping of additionnal/new key -> value
2833 * `update` is a mapping of additionnal/new key -> value
2837 * new_flags is a bitfields of new flags that the revision should get
2834 * new_flags is a bitfields of new flags that the revision should get
2838 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2835 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2839 """
2836 """
2840 if deltareuse not in self.DELTAREUSEALL:
2837 if deltareuse not in self.DELTAREUSEALL:
2841 raise ValueError(
2838 raise ValueError(
2842 _(b'value for deltareuse invalid: %s') % deltareuse
2839 _(b'value for deltareuse invalid: %s') % deltareuse
2843 )
2840 )
2844
2841
2845 if len(destrevlog):
2842 if len(destrevlog):
2846 raise ValueError(_(b'destination revlog is not empty'))
2843 raise ValueError(_(b'destination revlog is not empty'))
2847
2844
2848 if getattr(self, 'filteredrevs', None):
2845 if getattr(self, 'filteredrevs', None):
2849 raise ValueError(_(b'source revlog has filtered revisions'))
2846 raise ValueError(_(b'source revlog has filtered revisions'))
2850 if getattr(destrevlog, 'filteredrevs', None):
2847 if getattr(destrevlog, 'filteredrevs', None):
2851 raise ValueError(_(b'destination revlog has filtered revisions'))
2848 raise ValueError(_(b'destination revlog has filtered revisions'))
2852
2849
2853 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2850 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2854 # if possible.
2851 # if possible.
2855 oldlazydelta = destrevlog._lazydelta
2852 oldlazydelta = destrevlog._lazydelta
2856 oldlazydeltabase = destrevlog._lazydeltabase
2853 oldlazydeltabase = destrevlog._lazydeltabase
2857 oldamd = destrevlog._deltabothparents
2854 oldamd = destrevlog._deltabothparents
2858
2855
2859 try:
2856 try:
2860 if deltareuse == self.DELTAREUSEALWAYS:
2857 if deltareuse == self.DELTAREUSEALWAYS:
2861 destrevlog._lazydeltabase = True
2858 destrevlog._lazydeltabase = True
2862 destrevlog._lazydelta = True
2859 destrevlog._lazydelta = True
2863 elif deltareuse == self.DELTAREUSESAMEREVS:
2860 elif deltareuse == self.DELTAREUSESAMEREVS:
2864 destrevlog._lazydeltabase = False
2861 destrevlog._lazydeltabase = False
2865 destrevlog._lazydelta = True
2862 destrevlog._lazydelta = True
2866 elif deltareuse == self.DELTAREUSENEVER:
2863 elif deltareuse == self.DELTAREUSENEVER:
2867 destrevlog._lazydeltabase = False
2864 destrevlog._lazydeltabase = False
2868 destrevlog._lazydelta = False
2865 destrevlog._lazydelta = False
2869
2866
2870 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2867 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2871
2868
2872 self._clone(
2869 self._clone(
2873 tr,
2870 tr,
2874 destrevlog,
2871 destrevlog,
2875 addrevisioncb,
2872 addrevisioncb,
2876 deltareuse,
2873 deltareuse,
2877 forcedeltabothparents,
2874 forcedeltabothparents,
2878 sidedatacompanion,
2875 sidedatacompanion,
2879 )
2876 )
2880
2877
2881 finally:
2878 finally:
2882 destrevlog._lazydelta = oldlazydelta
2879 destrevlog._lazydelta = oldlazydelta
2883 destrevlog._lazydeltabase = oldlazydeltabase
2880 destrevlog._lazydeltabase = oldlazydeltabase
2884 destrevlog._deltabothparents = oldamd
2881 destrevlog._deltabothparents = oldamd
2885
2882
2886 def _clone(
2883 def _clone(
2887 self,
2884 self,
2888 tr,
2885 tr,
2889 destrevlog,
2886 destrevlog,
2890 addrevisioncb,
2887 addrevisioncb,
2891 deltareuse,
2888 deltareuse,
2892 forcedeltabothparents,
2889 forcedeltabothparents,
2893 sidedatacompanion,
2890 sidedatacompanion,
2894 ):
2891 ):
2895 """perform the core duty of `revlog.clone` after parameter processing"""
2892 """perform the core duty of `revlog.clone` after parameter processing"""
2896 deltacomputer = deltautil.deltacomputer(destrevlog)
2893 deltacomputer = deltautil.deltacomputer(destrevlog)
2897 index = self.index
2894 index = self.index
2898 for rev in self:
2895 for rev in self:
2899 entry = index[rev]
2896 entry = index[rev]
2900
2897
2901 # Some classes override linkrev to take filtered revs into
2898 # Some classes override linkrev to take filtered revs into
2902 # account. Use raw entry from index.
2899 # account. Use raw entry from index.
2903 flags = entry[0] & 0xFFFF
2900 flags = entry[0] & 0xFFFF
2904 linkrev = entry[4]
2901 linkrev = entry[4]
2905 p1 = index[entry[5]][7]
2902 p1 = index[entry[5]][7]
2906 p2 = index[entry[6]][7]
2903 p2 = index[entry[6]][7]
2907 node = entry[7]
2904 node = entry[7]
2908
2905
2909 sidedataactions = (False, [], {}, 0, 0)
2906 sidedataactions = (False, [], {}, 0, 0)
2910 if sidedatacompanion is not None:
2907 if sidedatacompanion is not None:
2911 sidedataactions = sidedatacompanion(self, rev)
2908 sidedataactions = sidedatacompanion(self, rev)
2912
2909
2913 # (Possibly) reuse the delta from the revlog if allowed and
2910 # (Possibly) reuse the delta from the revlog if allowed and
2914 # the revlog chunk is a delta.
2911 # the revlog chunk is a delta.
2915 cachedelta = None
2912 cachedelta = None
2916 rawtext = None
2913 rawtext = None
2917 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2914 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2918 dropall = sidedataactions[0]
2915 dropall = sidedataactions[0]
2919 filterout = sidedataactions[1]
2916 filterout = sidedataactions[1]
2920 update = sidedataactions[2]
2917 update = sidedataactions[2]
2921 new_flags = sidedataactions[3]
2918 new_flags = sidedataactions[3]
2922 dropped_flags = sidedataactions[4]
2919 dropped_flags = sidedataactions[4]
2923 text, sidedata = self._revisiondata(rev)
2920 text, sidedata = self._revisiondata(rev)
2924 if dropall:
2921 if dropall:
2925 sidedata = {}
2922 sidedata = {}
2926 for key in filterout:
2923 for key in filterout:
2927 sidedata.pop(key, None)
2924 sidedata.pop(key, None)
2928 sidedata.update(update)
2925 sidedata.update(update)
2929 if not sidedata:
2926 if not sidedata:
2930 sidedata = None
2927 sidedata = None
2931
2928
2932 flags |= new_flags
2929 flags |= new_flags
2933 flags &= ~dropped_flags
2930 flags &= ~dropped_flags
2934
2931
2935 destrevlog.addrevision(
2932 destrevlog.addrevision(
2936 text,
2933 text,
2937 tr,
2934 tr,
2938 linkrev,
2935 linkrev,
2939 p1,
2936 p1,
2940 p2,
2937 p2,
2941 cachedelta=cachedelta,
2938 cachedelta=cachedelta,
2942 node=node,
2939 node=node,
2943 flags=flags,
2940 flags=flags,
2944 deltacomputer=deltacomputer,
2941 deltacomputer=deltacomputer,
2945 sidedata=sidedata,
2942 sidedata=sidedata,
2946 )
2943 )
2947 else:
2944 else:
2948 if destrevlog._lazydelta:
2945 if destrevlog._lazydelta:
2949 dp = self.deltaparent(rev)
2946 dp = self.deltaparent(rev)
2950 if dp != nullrev:
2947 if dp != nullrev:
2951 cachedelta = (dp, bytes(self._chunk(rev)))
2948 cachedelta = (dp, bytes(self._chunk(rev)))
2952
2949
2953 if not cachedelta:
2950 if not cachedelta:
2954 rawtext = self.rawdata(rev)
2951 rawtext = self.rawdata(rev)
2955
2952
2956 ifh = destrevlog.opener(
2953 ifh = destrevlog.opener(
2957 destrevlog.indexfile, b'a+', checkambig=False
2954 destrevlog.indexfile, b'a+', checkambig=False
2958 )
2955 )
2959 dfh = None
2956 dfh = None
2960 if not destrevlog._inline:
2957 if not destrevlog._inline:
2961 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2958 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2962 try:
2959 try:
2963 destrevlog._addrevision(
2960 destrevlog._addrevision(
2964 node,
2961 node,
2965 rawtext,
2962 rawtext,
2966 tr,
2963 tr,
2967 linkrev,
2964 linkrev,
2968 p1,
2965 p1,
2969 p2,
2966 p2,
2970 flags,
2967 flags,
2971 cachedelta,
2968 cachedelta,
2972 ifh,
2969 ifh,
2973 dfh,
2970 dfh,
2974 deltacomputer=deltacomputer,
2971 deltacomputer=deltacomputer,
2975 )
2972 )
2976 finally:
2973 finally:
2977 if dfh:
2974 if dfh:
2978 dfh.close()
2975 dfh.close()
2979 ifh.close()
2976 ifh.close()
2980
2977
2981 if addrevisioncb:
2978 if addrevisioncb:
2982 addrevisioncb(self, rev, node)
2979 addrevisioncb(self, rev, node)
2983
2980
2984 def censorrevision(self, tr, censornode, tombstone=b''):
2981 def censorrevision(self, tr, censornode, tombstone=b''):
2985 if (self.version & 0xFFFF) == REVLOGV0:
2982 if (self.version & 0xFFFF) == REVLOGV0:
2986 raise error.RevlogError(
2983 raise error.RevlogError(
2987 _(b'cannot censor with version %d revlogs') % self.version
2984 _(b'cannot censor with version %d revlogs') % self.version
2988 )
2985 )
2989
2986
2990 censorrev = self.rev(censornode)
2987 censorrev = self.rev(censornode)
2991 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2988 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2992
2989
2993 if len(tombstone) > self.rawsize(censorrev):
2990 if len(tombstone) > self.rawsize(censorrev):
2994 raise error.Abort(
2991 raise error.Abort(
2995 _(b'censor tombstone must be no longer than censored data')
2992 _(b'censor tombstone must be no longer than censored data')
2996 )
2993 )
2997
2994
2998 # Rewriting the revlog in place is hard. Our strategy for censoring is
2995 # Rewriting the revlog in place is hard. Our strategy for censoring is
2999 # to create a new revlog, copy all revisions to it, then replace the
2996 # to create a new revlog, copy all revisions to it, then replace the
3000 # revlogs on transaction close.
2997 # revlogs on transaction close.
3001
2998
3002 newindexfile = self.indexfile + b'.tmpcensored'
2999 newindexfile = self.indexfile + b'.tmpcensored'
3003 newdatafile = self.datafile + b'.tmpcensored'
3000 newdatafile = self.datafile + b'.tmpcensored'
3004
3001
3005 # This is a bit dangerous. We could easily have a mismatch of state.
3002 # This is a bit dangerous. We could easily have a mismatch of state.
3006 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3003 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3007 newrl.version = self.version
3004 newrl.version = self.version
3008 newrl._generaldelta = self._generaldelta
3005 newrl._generaldelta = self._generaldelta
3009 newrl._io = self._io
3006 newrl._io = self._io
3010
3007
3011 for rev in self.revs():
3008 for rev in self.revs():
3012 node = self.node(rev)
3009 node = self.node(rev)
3013 p1, p2 = self.parents(node)
3010 p1, p2 = self.parents(node)
3014
3011
3015 if rev == censorrev:
3012 if rev == censorrev:
3016 newrl.addrawrevision(
3013 newrl.addrawrevision(
3017 tombstone,
3014 tombstone,
3018 tr,
3015 tr,
3019 self.linkrev(censorrev),
3016 self.linkrev(censorrev),
3020 p1,
3017 p1,
3021 p2,
3018 p2,
3022 censornode,
3019 censornode,
3023 REVIDX_ISCENSORED,
3020 REVIDX_ISCENSORED,
3024 )
3021 )
3025
3022
3026 if newrl.deltaparent(rev) != nullrev:
3023 if newrl.deltaparent(rev) != nullrev:
3027 raise error.Abort(
3024 raise error.Abort(
3028 _(
3025 _(
3029 b'censored revision stored as delta; '
3026 b'censored revision stored as delta; '
3030 b'cannot censor'
3027 b'cannot censor'
3031 ),
3028 ),
3032 hint=_(
3029 hint=_(
3033 b'censoring of revlogs is not '
3030 b'censoring of revlogs is not '
3034 b'fully implemented; please report '
3031 b'fully implemented; please report '
3035 b'this bug'
3032 b'this bug'
3036 ),
3033 ),
3037 )
3034 )
3038 continue
3035 continue
3039
3036
3040 if self.iscensored(rev):
3037 if self.iscensored(rev):
3041 if self.deltaparent(rev) != nullrev:
3038 if self.deltaparent(rev) != nullrev:
3042 raise error.Abort(
3039 raise error.Abort(
3043 _(
3040 _(
3044 b'cannot censor due to censored '
3041 b'cannot censor due to censored '
3045 b'revision having delta stored'
3042 b'revision having delta stored'
3046 )
3043 )
3047 )
3044 )
3048 rawtext = self._chunk(rev)
3045 rawtext = self._chunk(rev)
3049 else:
3046 else:
3050 rawtext = self.rawdata(rev)
3047 rawtext = self.rawdata(rev)
3051
3048
3052 newrl.addrawrevision(
3049 newrl.addrawrevision(
3053 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3050 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3054 )
3051 )
3055
3052
3056 tr.addbackup(self.indexfile, location=b'store')
3053 tr.addbackup(self.indexfile, location=b'store')
3057 if not self._inline:
3054 if not self._inline:
3058 tr.addbackup(self.datafile, location=b'store')
3055 tr.addbackup(self.datafile, location=b'store')
3059
3056
3060 self.opener.rename(newrl.indexfile, self.indexfile)
3057 self.opener.rename(newrl.indexfile, self.indexfile)
3061 if not self._inline:
3058 if not self._inline:
3062 self.opener.rename(newrl.datafile, self.datafile)
3059 self.opener.rename(newrl.datafile, self.datafile)
3063
3060
3064 self.clearcaches()
3061 self.clearcaches()
3065 self._loadindex()
3062 self._loadindex()
3066
3063
3067 def verifyintegrity(self, state):
3064 def verifyintegrity(self, state):
3068 """Verifies the integrity of the revlog.
3065 """Verifies the integrity of the revlog.
3069
3066
3070 Yields ``revlogproblem`` instances describing problems that are
3067 Yields ``revlogproblem`` instances describing problems that are
3071 found.
3068 found.
3072 """
3069 """
3073 dd, di = self.checksize()
3070 dd, di = self.checksize()
3074 if dd:
3071 if dd:
3075 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3072 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3076 if di:
3073 if di:
3077 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3074 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3078
3075
3079 version = self.version & 0xFFFF
3076 version = self.version & 0xFFFF
3080
3077
3081 # The verifier tells us what version revlog we should be.
3078 # The verifier tells us what version revlog we should be.
3082 if version != state[b'expectedversion']:
3079 if version != state[b'expectedversion']:
3083 yield revlogproblem(
3080 yield revlogproblem(
3084 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3081 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3085 % (self.indexfile, version, state[b'expectedversion'])
3082 % (self.indexfile, version, state[b'expectedversion'])
3086 )
3083 )
3087
3084
3088 state[b'skipread'] = set()
3085 state[b'skipread'] = set()
3089 state[b'safe_renamed'] = set()
3086 state[b'safe_renamed'] = set()
3090
3087
3091 for rev in self:
3088 for rev in self:
3092 node = self.node(rev)
3089 node = self.node(rev)
3093
3090
3094 # Verify contents. 4 cases to care about:
3091 # Verify contents. 4 cases to care about:
3095 #
3092 #
3096 # common: the most common case
3093 # common: the most common case
3097 # rename: with a rename
3094 # rename: with a rename
3098 # meta: file content starts with b'\1\n', the metadata
3095 # meta: file content starts with b'\1\n', the metadata
3099 # header defined in filelog.py, but without a rename
3096 # header defined in filelog.py, but without a rename
3100 # ext: content stored externally
3097 # ext: content stored externally
3101 #
3098 #
3102 # More formally, their differences are shown below:
3099 # More formally, their differences are shown below:
3103 #
3100 #
3104 # | common | rename | meta | ext
3101 # | common | rename | meta | ext
3105 # -------------------------------------------------------
3102 # -------------------------------------------------------
3106 # flags() | 0 | 0 | 0 | not 0
3103 # flags() | 0 | 0 | 0 | not 0
3107 # renamed() | False | True | False | ?
3104 # renamed() | False | True | False | ?
3108 # rawtext[0:2]=='\1\n'| False | True | True | ?
3105 # rawtext[0:2]=='\1\n'| False | True | True | ?
3109 #
3106 #
3110 # "rawtext" means the raw text stored in revlog data, which
3107 # "rawtext" means the raw text stored in revlog data, which
3111 # could be retrieved by "rawdata(rev)". "text"
3108 # could be retrieved by "rawdata(rev)". "text"
3112 # mentioned below is "revision(rev)".
3109 # mentioned below is "revision(rev)".
3113 #
3110 #
3114 # There are 3 different lengths stored physically:
3111 # There are 3 different lengths stored physically:
3115 # 1. L1: rawsize, stored in revlog index
3112 # 1. L1: rawsize, stored in revlog index
3116 # 2. L2: len(rawtext), stored in revlog data
3113 # 2. L2: len(rawtext), stored in revlog data
3117 # 3. L3: len(text), stored in revlog data if flags==0, or
3114 # 3. L3: len(text), stored in revlog data if flags==0, or
3118 # possibly somewhere else if flags!=0
3115 # possibly somewhere else if flags!=0
3119 #
3116 #
3120 # L1 should be equal to L2. L3 could be different from them.
3117 # L1 should be equal to L2. L3 could be different from them.
3121 # "text" may or may not affect commit hash depending on flag
3118 # "text" may or may not affect commit hash depending on flag
3122 # processors (see flagutil.addflagprocessor).
3119 # processors (see flagutil.addflagprocessor).
3123 #
3120 #
3124 # | common | rename | meta | ext
3121 # | common | rename | meta | ext
3125 # -------------------------------------------------
3122 # -------------------------------------------------
3126 # rawsize() | L1 | L1 | L1 | L1
3123 # rawsize() | L1 | L1 | L1 | L1
3127 # size() | L1 | L2-LM | L1(*) | L1 (?)
3124 # size() | L1 | L2-LM | L1(*) | L1 (?)
3128 # len(rawtext) | L2 | L2 | L2 | L2
3125 # len(rawtext) | L2 | L2 | L2 | L2
3129 # len(text) | L2 | L2 | L2 | L3
3126 # len(text) | L2 | L2 | L2 | L3
3130 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3127 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3131 #
3128 #
3132 # LM: length of metadata, depending on rawtext
3129 # LM: length of metadata, depending on rawtext
3133 # (*): not ideal, see comment in filelog.size
3130 # (*): not ideal, see comment in filelog.size
3134 # (?): could be "- len(meta)" if the resolved content has
3131 # (?): could be "- len(meta)" if the resolved content has
3135 # rename metadata
3132 # rename metadata
3136 #
3133 #
3137 # Checks needed to be done:
3134 # Checks needed to be done:
3138 # 1. length check: L1 == L2, in all cases.
3135 # 1. length check: L1 == L2, in all cases.
3139 # 2. hash check: depending on flag processor, we may need to
3136 # 2. hash check: depending on flag processor, we may need to
3140 # use either "text" (external), or "rawtext" (in revlog).
3137 # use either "text" (external), or "rawtext" (in revlog).
3141
3138
3142 try:
3139 try:
3143 skipflags = state.get(b'skipflags', 0)
3140 skipflags = state.get(b'skipflags', 0)
3144 if skipflags:
3141 if skipflags:
3145 skipflags &= self.flags(rev)
3142 skipflags &= self.flags(rev)
3146
3143
3147 _verify_revision(self, skipflags, state, node)
3144 _verify_revision(self, skipflags, state, node)
3148
3145
3149 l1 = self.rawsize(rev)
3146 l1 = self.rawsize(rev)
3150 l2 = len(self.rawdata(node))
3147 l2 = len(self.rawdata(node))
3151
3148
3152 if l1 != l2:
3149 if l1 != l2:
3153 yield revlogproblem(
3150 yield revlogproblem(
3154 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3151 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3155 node=node,
3152 node=node,
3156 )
3153 )
3157
3154
3158 except error.CensoredNodeError:
3155 except error.CensoredNodeError:
3159 if state[b'erroroncensored']:
3156 if state[b'erroroncensored']:
3160 yield revlogproblem(
3157 yield revlogproblem(
3161 error=_(b'censored file data'), node=node
3158 error=_(b'censored file data'), node=node
3162 )
3159 )
3163 state[b'skipread'].add(node)
3160 state[b'skipread'].add(node)
3164 except Exception as e:
3161 except Exception as e:
3165 yield revlogproblem(
3162 yield revlogproblem(
3166 error=_(b'unpacking %s: %s')
3163 error=_(b'unpacking %s: %s')
3167 % (short(node), stringutil.forcebytestr(e)),
3164 % (short(node), stringutil.forcebytestr(e)),
3168 node=node,
3165 node=node,
3169 )
3166 )
3170 state[b'skipread'].add(node)
3167 state[b'skipread'].add(node)
3171
3168
3172 def storageinfo(
3169 def storageinfo(
3173 self,
3170 self,
3174 exclusivefiles=False,
3171 exclusivefiles=False,
3175 sharedfiles=False,
3172 sharedfiles=False,
3176 revisionscount=False,
3173 revisionscount=False,
3177 trackedsize=False,
3174 trackedsize=False,
3178 storedsize=False,
3175 storedsize=False,
3179 ):
3176 ):
3180 d = {}
3177 d = {}
3181
3178
3182 if exclusivefiles:
3179 if exclusivefiles:
3183 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3180 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3184 if not self._inline:
3181 if not self._inline:
3185 d[b'exclusivefiles'].append((self.opener, self.datafile))
3182 d[b'exclusivefiles'].append((self.opener, self.datafile))
3186
3183
3187 if sharedfiles:
3184 if sharedfiles:
3188 d[b'sharedfiles'] = []
3185 d[b'sharedfiles'] = []
3189
3186
3190 if revisionscount:
3187 if revisionscount:
3191 d[b'revisionscount'] = len(self)
3188 d[b'revisionscount'] = len(self)
3192
3189
3193 if trackedsize:
3190 if trackedsize:
3194 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3191 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3195
3192
3196 if storedsize:
3193 if storedsize:
3197 d[b'storedsize'] = sum(
3194 d[b'storedsize'] = sum(
3198 self.opener.stat(path).st_size for path in self.files()
3195 self.opener.stat(path).st_size for path in self.files()
3199 )
3196 )
3200
3197
3201 return d
3198 return d
3202
3199
3203 def rewrite_sidedata(self, helpers, startrev, endrev):
3200 def rewrite_sidedata(self, helpers, startrev, endrev):
3204 if self.version & 0xFFFF != REVLOGV2:
3201 if self.version & 0xFFFF != REVLOGV2:
3205 return
3202 return
3206 # inline are not yet supported because they suffer from an issue when
3203 # inline are not yet supported because they suffer from an issue when
3207 # rewriting them (since it's not an append-only operation).
3204 # rewriting them (since it's not an append-only operation).
3208 # See issue6485.
3205 # See issue6485.
3209 assert not self._inline
3206 assert not self._inline
3210 if not helpers[1] and not helpers[2]:
3207 if not helpers[1] and not helpers[2]:
3211 # Nothing to generate or remove
3208 # Nothing to generate or remove
3212 return
3209 return
3213
3210
3214 new_entries = []
3211 new_entries = []
3215 # append the new sidedata
3212 # append the new sidedata
3216 with self._datafp(b'a+') as fp:
3213 with self._datafp(b'a+') as fp:
3217 # Maybe this bug still exists, see revlog._writeentry
3214 # Maybe this bug still exists, see revlog._writeentry
3218 fp.seek(0, os.SEEK_END)
3215 fp.seek(0, os.SEEK_END)
3219 current_offset = fp.tell()
3216 current_offset = fp.tell()
3220 for rev in range(startrev, endrev + 1):
3217 for rev in range(startrev, endrev + 1):
3221 entry = self.index[rev]
3218 entry = self.index[rev]
3222 new_sidedata = storageutil.run_sidedata_helpers(
3219 new_sidedata = storageutil.run_sidedata_helpers(
3223 store=self,
3220 store=self,
3224 sidedata_helpers=helpers,
3221 sidedata_helpers=helpers,
3225 sidedata={},
3222 sidedata={},
3226 rev=rev,
3223 rev=rev,
3227 )
3224 )
3228
3225
3229 serialized_sidedata = sidedatautil.serialize_sidedata(
3226 serialized_sidedata = sidedatautil.serialize_sidedata(
3230 new_sidedata
3227 new_sidedata
3231 )
3228 )
3232 if entry[8] != 0 or entry[9] != 0:
3229 if entry[8] != 0 or entry[9] != 0:
3233 # rewriting entries that already have sidedata is not
3230 # rewriting entries that already have sidedata is not
3234 # supported yet, because it introduces garbage data in the
3231 # supported yet, because it introduces garbage data in the
3235 # revlog.
3232 # revlog.
3236 msg = b"Rewriting existing sidedata is not supported yet"
3233 msg = b"Rewriting existing sidedata is not supported yet"
3237 raise error.Abort(msg)
3234 raise error.Abort(msg)
3238 entry = entry[:8]
3235 entry = entry[:8]
3239 entry += (current_offset, len(serialized_sidedata))
3236 entry += (current_offset, len(serialized_sidedata))
3240
3237
3241 fp.write(serialized_sidedata)
3238 fp.write(serialized_sidedata)
3242 new_entries.append(entry)
3239 new_entries.append(entry)
3243 current_offset += len(serialized_sidedata)
3240 current_offset += len(serialized_sidedata)
3244
3241
3245 # rewrite the new index entries
3242 # rewrite the new index entries
3246 with self._indexfp(b'w+') as fp:
3243 with self._indexfp(b'w+') as fp:
3247 fp.seek(startrev * self._io.size)
3244 fp.seek(startrev * self._io.size)
3248 for i, entry in enumerate(new_entries):
3245 for i, entry in enumerate(new_entries):
3249 rev = startrev + i
3246 rev = startrev + i
3250 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3247 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3251 packed = self._io.packentry(entry, self.node, self.version, rev)
3248 packed = self._io.packentry(entry, self.node, self.version, rev)
3252 fp.write(packed)
3249 fp.write(packed)
@@ -1,105 +1,107 b''
1 # revlogdeltas.py - constant used for revlog logic
1 # revlogdeltas.py - constant used for revlog logic
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### main revlog header
16 ### main revlog header
17
17
18 INDEX_HEADER = struct.Struct(b">I")
19
18 ## revlog version
20 ## revlog version
19 REVLOGV0 = 0
21 REVLOGV0 = 0
20 REVLOGV1 = 1
22 REVLOGV1 = 1
21 # Dummy value until file format is finalized.
23 # Dummy value until file format is finalized.
22 REVLOGV2 = 0xDEAD
24 REVLOGV2 = 0xDEAD
23
25
24 ## global revlog header flags
26 ## global revlog header flags
25 # Shared across v1 and v2.
27 # Shared across v1 and v2.
26 FLAG_INLINE_DATA = 1 << 16
28 FLAG_INLINE_DATA = 1 << 16
27 # Only used by v1, implied by v2.
29 # Only used by v1, implied by v2.
28 FLAG_GENERALDELTA = 1 << 17
30 FLAG_GENERALDELTA = 1 << 17
29 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
31 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
30 REVLOG_DEFAULT_FORMAT = REVLOGV1
32 REVLOG_DEFAULT_FORMAT = REVLOGV1
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
34 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
33 REVLOGV2_FLAGS = FLAG_INLINE_DATA
35 REVLOGV2_FLAGS = FLAG_INLINE_DATA
34
36
35 ### individual entry
37 ### individual entry
36
38
37 ## index v0:
39 ## index v0:
38 # 4 bytes: offset
40 # 4 bytes: offset
39 # 4 bytes: compressed length
41 # 4 bytes: compressed length
40 # 4 bytes: base rev
42 # 4 bytes: base rev
41 # 4 bytes: link rev
43 # 4 bytes: link rev
42 # 20 bytes: parent 1 nodeid
44 # 20 bytes: parent 1 nodeid
43 # 20 bytes: parent 2 nodeid
45 # 20 bytes: parent 2 nodeid
44 # 20 bytes: nodeid
46 # 20 bytes: nodeid
45 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
47 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
46
48
47 ## index v1
49 ## index v1
48 # 6 bytes: offset
50 # 6 bytes: offset
49 # 2 bytes: flags
51 # 2 bytes: flags
50 # 4 bytes: compressed length
52 # 4 bytes: compressed length
51 # 4 bytes: uncompressed length
53 # 4 bytes: uncompressed length
52 # 4 bytes: base rev
54 # 4 bytes: base rev
53 # 4 bytes: link rev
55 # 4 bytes: link rev
54 # 4 bytes: parent 1 rev
56 # 4 bytes: parent 1 rev
55 # 4 bytes: parent 2 rev
57 # 4 bytes: parent 2 rev
56 # 32 bytes: nodeid
58 # 32 bytes: nodeid
57 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
59 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
58 assert INDEX_ENTRY_V1.size == 32 * 2
60 assert INDEX_ENTRY_V1.size == 32 * 2
59
61
60 # 6 bytes: offset
62 # 6 bytes: offset
61 # 2 bytes: flags
63 # 2 bytes: flags
62 # 4 bytes: compressed length
64 # 4 bytes: compressed length
63 # 4 bytes: uncompressed length
65 # 4 bytes: uncompressed length
64 # 4 bytes: base rev
66 # 4 bytes: base rev
65 # 4 bytes: link rev
67 # 4 bytes: link rev
66 # 4 bytes: parent 1 rev
68 # 4 bytes: parent 1 rev
67 # 4 bytes: parent 2 rev
69 # 4 bytes: parent 2 rev
68 # 32 bytes: nodeid
70 # 32 bytes: nodeid
69 # 8 bytes: sidedata offset
71 # 8 bytes: sidedata offset
70 # 4 bytes: sidedata compressed length
72 # 4 bytes: sidedata compressed length
71 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
73 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
72 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
74 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
73 assert INDEX_ENTRY_V2.size == 32 * 3
75 assert INDEX_ENTRY_V2.size == 32 * 3
74
76
75 # revlog index flags
77 # revlog index flags
76
78
77 # For historical reasons, revlog's internal flags were exposed via the
79 # For historical reasons, revlog's internal flags were exposed via the
78 # wire protocol and are even exposed in parts of the storage APIs.
80 # wire protocol and are even exposed in parts of the storage APIs.
79
81
80 # revision has censor metadata, must be verified
82 # revision has censor metadata, must be verified
81 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
83 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
82 # revision hash does not match data (narrowhg)
84 # revision hash does not match data (narrowhg)
83 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
85 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
84 # revision data is stored externally
86 # revision data is stored externally
85 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
87 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
86 # revision data contains extra metadata not part of the official digest
88 # revision data contains extra metadata not part of the official digest
87 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
89 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
88 # revision changes files in a way that could affect copy tracing.
90 # revision changes files in a way that could affect copy tracing.
89 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
91 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
90 REVIDX_DEFAULT_FLAGS = 0
92 REVIDX_DEFAULT_FLAGS = 0
91 # stable order in which flags need to be processed and their processors applied
93 # stable order in which flags need to be processed and their processors applied
92 REVIDX_FLAGS_ORDER = [
94 REVIDX_FLAGS_ORDER = [
93 REVIDX_ISCENSORED,
95 REVIDX_ISCENSORED,
94 REVIDX_ELLIPSIS,
96 REVIDX_ELLIPSIS,
95 REVIDX_EXTSTORED,
97 REVIDX_EXTSTORED,
96 REVIDX_SIDEDATA,
98 REVIDX_SIDEDATA,
97 REVIDX_HASCOPIESINFO,
99 REVIDX_HASCOPIESINFO,
98 ]
100 ]
99
101
100 # bitmark for flags that could cause rawdata content change
102 # bitmark for flags that could cause rawdata content change
101 REVIDX_RAWTEXT_CHANGING_FLAGS = (
103 REVIDX_RAWTEXT_CHANGING_FLAGS = (
102 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
104 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
103 )
105 )
104
106
105 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
107 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now