##// END OF EJS Templates
revlog: code for `revlogv0` in its own module...
marmoute -
r47812:724db234 default
parent child Browse files
Show More
@@ -0,0 +1,144 b''
1 # revlogv0 - code related to revlog format "V0"
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
8
9
10 from ..node import sha1nodeconstants
11 from .constants import (
12 INDEX_ENTRY_V0,
13 )
14 from ..i18n import _
15
16 from .. import (
17 error,
18 node,
19 pycompat,
20 util,
21 )
22
23 from . import (
24 flagutil,
25 nodemap as nodemaputil,
26 )
27
28
29 def getoffset(q):
30 return int(q >> 16)
31
32
33 def gettype(q):
34 return int(q & 0xFFFF)
35
36
37 def offset_type(offset, type):
38 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
39 raise ValueError(b'unknown revlog index flags')
40 return int(int(offset) << 16 | type)
41
42
43 class revlogoldindex(list):
44 entry_size = INDEX_ENTRY_V0.size
45
46 @property
47 def nodemap(self):
48 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
49 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
50 return self._nodemap
51
52 @util.propertycache
53 def _nodemap(self):
54 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: node.nullrev})
55 for r in range(0, len(self)):
56 n = self[r][7]
57 nodemap[n] = r
58 return nodemap
59
60 def has_node(self, node):
61 """return True if the node exist in the index"""
62 return node in self._nodemap
63
64 def rev(self, node):
65 """return a revision for a node
66
67 If the node is unknown, raise a RevlogError"""
68 return self._nodemap[node]
69
70 def get_rev(self, node):
71 """return a revision for a node
72
73 If the node is unknown, return None"""
74 return self._nodemap.get(node)
75
76 def append(self, tup):
77 self._nodemap[tup[7]] = len(self)
78 super(revlogoldindex, self).append(tup)
79
80 def __delitem__(self, i):
81 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
82 raise ValueError(b"deleting slices only supports a:-1 with step 1")
83 for r in pycompat.xrange(i.start, len(self)):
84 del self._nodemap[self[r][7]]
85 super(revlogoldindex, self).__delitem__(i)
86
87 def clearcaches(self):
88 self.__dict__.pop('_nodemap', None)
89
90 def __getitem__(self, i):
91 if i == -1:
92 return (0, 0, 0, -1, -1, -1, -1, node.nullid)
93 return list.__getitem__(self, i)
94
95 def pack_header(self, header):
96 """pack header information in binary"""
97 return b''
98
99 def entry_binary(self, rev):
100 """return the raw binary string representing a revision"""
101 entry = self[rev]
102 if gettype(entry[0]):
103 raise error.RevlogError(
104 _(b'index entry flags need revlog version 1')
105 )
106 e2 = (
107 getoffset(entry[0]),
108 entry[1],
109 entry[3],
110 entry[4],
111 self[entry[5]][7],
112 self[entry[6]][7],
113 entry[7],
114 )
115 return INDEX_ENTRY_V0.pack(*e2)
116
117
118 def parse_index_v0(data, inline):
119 s = INDEX_ENTRY_V0.size
120 index = []
121 nodemap = nodemaputil.NodeMap({node.nullid: node.nullrev})
122 n = off = 0
123 l = len(data)
124 while off + s <= l:
125 cur = data[off : off + s]
126 off += s
127 e = INDEX_ENTRY_V0.unpack(cur)
128 # transform to revlogv1 format
129 e2 = (
130 offset_type(e[0], 0),
131 e[1],
132 -1,
133 e[2],
134 e[3],
135 nodemap.get(e[4], node.nullrev),
136 nodemap.get(e[5], node.nullrev),
137 e[6],
138 )
139 index.append(e2)
140 nodemap[e[6]] = n
141 n += 1
142
143 index = revlogoldindex(index)
144 return index, None
@@ -1,3233 +1,3121 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 FLAG_GENERALDELTA,
38 38 FLAG_INLINE_DATA,
39 INDEX_ENTRY_V0,
40 39 INDEX_HEADER,
41 40 REVLOGV0,
42 41 REVLOGV1,
43 42 REVLOGV1_FLAGS,
44 43 REVLOGV2,
45 44 REVLOGV2_FLAGS,
46 45 REVLOG_DEFAULT_FLAGS,
47 46 REVLOG_DEFAULT_FORMAT,
48 47 REVLOG_DEFAULT_VERSION,
49 48 )
50 49 from .revlogutils.flagutil import (
51 50 REVIDX_DEFAULT_FLAGS,
52 51 REVIDX_ELLIPSIS,
53 52 REVIDX_EXTSTORED,
54 53 REVIDX_FLAGS_ORDER,
55 54 REVIDX_HASCOPIESINFO,
56 55 REVIDX_ISCENSORED,
57 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 57 REVIDX_SIDEDATA,
59 58 )
60 59 from .thirdparty import attr
61 60 from . import (
62 61 ancestor,
63 62 dagop,
64 63 error,
65 64 mdiff,
66 65 policy,
67 66 pycompat,
68 67 templatefilters,
69 68 util,
70 69 )
71 70 from .interfaces import (
72 71 repository,
73 72 util as interfaceutil,
74 73 )
75 74 from .revlogutils import (
76 75 deltas as deltautil,
77 76 flagutil,
78 77 nodemap as nodemaputil,
78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 def getoffset(q):
140 return int(q >> 16)
141
142
143 def gettype(q):
144 return int(q & 0xFFFF)
145
146
147 139 def offset_type(offset, type):
148 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 141 raise ValueError(b'unknown revlog index flags')
150 142 return int(int(offset) << 16 | type)
151 143
152 144
153 145 def _verify_revision(rl, skipflags, state, node):
154 146 """Verify the integrity of the given revlog ``node`` while providing a hook
155 147 point for extensions to influence the operation."""
156 148 if skipflags:
157 149 state[b'skipread'].add(node)
158 150 else:
159 151 # Side-effect: read content and verify hash.
160 152 rl.revision(node)
161 153
162 154
163 155 # True if a fast implementation for persistent-nodemap is available
164 156 #
165 157 # We also consider we have a "fast" implementation in "pure" python because
166 158 # people using pure don't really have performance consideration (and a
167 159 # wheelbarrow of other slowness source)
168 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 161 parsers, 'BaseIndexObject'
170 162 )
171 163
172 164
173 165 @attr.s(slots=True, frozen=True)
174 166 class _revisioninfo(object):
175 167 """Information about a revision that allows building its fulltext
176 168 node: expected hash of the revision
177 169 p1, p2: parent revs of the revision
178 170 btext: built text cache consisting of a one-element list
179 171 cachedelta: (baserev, uncompressed_delta) or None
180 172 flags: flags associated to the revision storage
181 173
182 174 One of btext[0] or cachedelta must be set.
183 175 """
184 176
185 177 node = attr.ib()
186 178 p1 = attr.ib()
187 179 p2 = attr.ib()
188 180 btext = attr.ib()
189 181 textlen = attr.ib()
190 182 cachedelta = attr.ib()
191 183 flags = attr.ib()
192 184
193 185
194 186 @interfaceutil.implementer(repository.irevisiondelta)
195 187 @attr.s(slots=True)
196 188 class revlogrevisiondelta(object):
197 189 node = attr.ib()
198 190 p1node = attr.ib()
199 191 p2node = attr.ib()
200 192 basenode = attr.ib()
201 193 flags = attr.ib()
202 194 baserevisionsize = attr.ib()
203 195 revision = attr.ib()
204 196 delta = attr.ib()
205 197 sidedata = attr.ib()
206 198 linknode = attr.ib(default=None)
207 199
208 200
209 201 @interfaceutil.implementer(repository.iverifyproblem)
210 202 @attr.s(frozen=True)
211 203 class revlogproblem(object):
212 204 warning = attr.ib(default=None)
213 205 error = attr.ib(default=None)
214 206 node = attr.ib(default=None)
215 207
216 208
217 class revlogoldindex(list):
218 entry_size = INDEX_ENTRY_V0.size
219
220 @property
221 def nodemap(self):
222 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
223 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
224 return self._nodemap
225
226 @util.propertycache
227 def _nodemap(self):
228 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
229 for r in range(0, len(self)):
230 n = self[r][7]
231 nodemap[n] = r
232 return nodemap
233
234 def has_node(self, node):
235 """return True if the node exist in the index"""
236 return node in self._nodemap
237
238 def rev(self, node):
239 """return a revision for a node
240
241 If the node is unknown, raise a RevlogError"""
242 return self._nodemap[node]
243
244 def get_rev(self, node):
245 """return a revision for a node
246
247 If the node is unknown, return None"""
248 return self._nodemap.get(node)
249
250 def append(self, tup):
251 self._nodemap[tup[7]] = len(self)
252 super(revlogoldindex, self).append(tup)
253
254 def __delitem__(self, i):
255 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
256 raise ValueError(b"deleting slices only supports a:-1 with step 1")
257 for r in pycompat.xrange(i.start, len(self)):
258 del self._nodemap[self[r][7]]
259 super(revlogoldindex, self).__delitem__(i)
260
261 def clearcaches(self):
262 self.__dict__.pop('_nodemap', None)
263
264 def __getitem__(self, i):
265 if i == -1:
266 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
267 return list.__getitem__(self, i)
268
269 def entry_binary(self, rev):
270 """return the raw binary string representing a revision"""
271 entry = self[rev]
272 if gettype(entry[0]):
273 raise error.RevlogError(
274 _(b'index entry flags need revlog version 1')
275 )
276 e2 = (
277 getoffset(entry[0]),
278 entry[1],
279 entry[3],
280 entry[4],
281 self[entry[5]][7],
282 self[entry[6]][7],
283 entry[7],
284 )
285 return INDEX_ENTRY_V0.pack(*e2)
286
287 def pack_header(self, header):
288 """Pack header information in binary"""
289 return b''
290
291
292 def parse_index_v0(data, inline):
293 s = INDEX_ENTRY_V0.size
294 index = []
295 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
296 n = off = 0
297 l = len(data)
298 while off + s <= l:
299 cur = data[off : off + s]
300 off += s
301 e = INDEX_ENTRY_V0.unpack(cur)
302 # transform to revlogv1 format
303 e2 = (
304 offset_type(e[0], 0),
305 e[1],
306 -1,
307 e[2],
308 e[3],
309 nodemap.get(e[4], nullrev),
310 nodemap.get(e[5], nullrev),
311 e[6],
312 )
313 index.append(e2)
314 nodemap[e[6]] = n
315 n += 1
316
317 index = revlogoldindex(index)
318 return index, None
319
320
321 209 def parse_index_v1(data, inline):
322 210 # call the C implementation to parse the index data
323 211 index, cache = parsers.parse_index2(data, inline)
324 212 return index, cache
325 213
326 214
327 215 def parse_index_v2(data, inline):
328 216 # call the C implementation to parse the index data
329 217 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
330 218 return index, cache
331 219
332 220
333 221 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
334 222
335 223 def parse_index_v1_nodemap(data, inline):
336 224 index, cache = parsers.parse_index_devel_nodemap(data, inline)
337 225 return index, cache
338 226
339 227
340 228 else:
341 229 parse_index_v1_nodemap = None
342 230
343 231
344 232 def parse_index_v1_mixed(data, inline):
345 233 index, cache = parse_index_v1(data, inline)
346 234 return rustrevlog.MixedIndex(index), cache
347 235
348 236
349 237 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
350 238 # signed integer)
351 239 _maxentrysize = 0x7FFFFFFF
352 240
353 241
354 242 class revlog(object):
355 243 """
356 244 the underlying revision storage object
357 245
358 246 A revlog consists of two parts, an index and the revision data.
359 247
360 248 The index is a file with a fixed record size containing
361 249 information on each revision, including its nodeid (hash), the
362 250 nodeids of its parents, the position and offset of its data within
363 251 the data file, and the revision it's based on. Finally, each entry
364 252 contains a linkrev entry that can serve as a pointer to external
365 253 data.
366 254
367 255 The revision data itself is a linear collection of data chunks.
368 256 Each chunk represents a revision and is usually represented as a
369 257 delta against the previous chunk. To bound lookup time, runs of
370 258 deltas are limited to about 2 times the length of the original
371 259 version data. This makes retrieval of a version proportional to
372 260 its size, or O(1) relative to the number of revisions.
373 261
374 262 Both pieces of the revlog are written to in an append-only
375 263 fashion, which means we never need to rewrite a file to insert or
376 264 remove data, and can use some simple techniques to avoid the need
377 265 for locking while reading.
378 266
379 267 If checkambig, indexfile is opened with checkambig=True at
380 268 writing, to avoid file stat ambiguity.
381 269
382 270 If mmaplargeindex is True, and an mmapindexthreshold is set, the
383 271 index will be mmapped rather than read if it is larger than the
384 272 configured threshold.
385 273
386 274 If censorable is True, the revlog can have censored revisions.
387 275
388 276 If `upperboundcomp` is not None, this is the expected maximal gain from
389 277 compression for the data content.
390 278
391 279 `concurrencychecker` is an optional function that receives 3 arguments: a
392 280 file handle, a filename, and an expected position. It should check whether
393 281 the current position in the file handle is valid, and log/warn/fail (by
394 282 raising).
395 283 """
396 284
397 285 _flagserrorclass = error.RevlogError
398 286
399 287 def __init__(
400 288 self,
401 289 opener,
402 290 indexfile,
403 291 datafile=None,
404 292 checkambig=False,
405 293 mmaplargeindex=False,
406 294 censorable=False,
407 295 upperboundcomp=None,
408 296 persistentnodemap=False,
409 297 concurrencychecker=None,
410 298 ):
411 299 """
412 300 create a revlog object
413 301
414 302 opener is a function that abstracts the file opening operation
415 303 and can be used to implement COW semantics or the like.
416 304
417 305 """
418 306 self.upperboundcomp = upperboundcomp
419 307 self.indexfile = indexfile
420 308 self.datafile = datafile or (indexfile[:-2] + b".d")
421 309 self.nodemap_file = None
422 310 if persistentnodemap:
423 311 self.nodemap_file = nodemaputil.get_nodemap_file(
424 312 opener, self.indexfile
425 313 )
426 314
427 315 self.opener = opener
428 316 # When True, indexfile is opened with checkambig=True at writing, to
429 317 # avoid file stat ambiguity.
430 318 self._checkambig = checkambig
431 319 self._mmaplargeindex = mmaplargeindex
432 320 self._censorable = censorable
433 321 # 3-tuple of (node, rev, text) for a raw revision.
434 322 self._revisioncache = None
435 323 # Maps rev to chain base rev.
436 324 self._chainbasecache = util.lrucachedict(100)
437 325 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
438 326 self._chunkcache = (0, b'')
439 327 # How much data to read and cache into the raw revlog data cache.
440 328 self._chunkcachesize = 65536
441 329 self._maxchainlen = None
442 330 self._deltabothparents = True
443 331 self.index = None
444 332 self._nodemap_docket = None
445 333 # Mapping of partial identifiers to full nodes.
446 334 self._pcache = {}
447 335 # Mapping of revision integer to full node.
448 336 self._compengine = b'zlib'
449 337 self._compengineopts = {}
450 338 self._maxdeltachainspan = -1
451 339 self._withsparseread = False
452 340 self._sparserevlog = False
453 341 self._srdensitythreshold = 0.50
454 342 self._srmingapsize = 262144
455 343
456 344 # Make copy of flag processors so each revlog instance can support
457 345 # custom flags.
458 346 self._flagprocessors = dict(flagutil.flagprocessors)
459 347
460 348 # 2-tuple of file handles being used for active writing.
461 349 self._writinghandles = None
462 350
463 351 self._loadindex()
464 352
465 353 self._concurrencychecker = concurrencychecker
466 354
467 355 def _loadindex(self):
468 356 mmapindexthreshold = None
469 357 opts = self.opener.options
470 358
471 359 if b'revlogv2' in opts:
472 360 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
473 361 elif b'revlogv1' in opts:
474 362 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
475 363 if b'generaldelta' in opts:
476 364 newversionflags |= FLAG_GENERALDELTA
477 365 elif b'revlogv0' in self.opener.options:
478 366 newversionflags = REVLOGV0
479 367 else:
480 368 newversionflags = REVLOG_DEFAULT_VERSION
481 369
482 370 if b'chunkcachesize' in opts:
483 371 self._chunkcachesize = opts[b'chunkcachesize']
484 372 if b'maxchainlen' in opts:
485 373 self._maxchainlen = opts[b'maxchainlen']
486 374 if b'deltabothparents' in opts:
487 375 self._deltabothparents = opts[b'deltabothparents']
488 376 self._lazydelta = bool(opts.get(b'lazydelta', True))
489 377 self._lazydeltabase = False
490 378 if self._lazydelta:
491 379 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
492 380 if b'compengine' in opts:
493 381 self._compengine = opts[b'compengine']
494 382 if b'zlib.level' in opts:
495 383 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
496 384 if b'zstd.level' in opts:
497 385 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
498 386 if b'maxdeltachainspan' in opts:
499 387 self._maxdeltachainspan = opts[b'maxdeltachainspan']
500 388 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
501 389 mmapindexthreshold = opts[b'mmapindexthreshold']
502 390 self.hassidedata = bool(opts.get(b'side-data', False))
503 391 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
504 392 withsparseread = bool(opts.get(b'with-sparse-read', False))
505 393 # sparse-revlog forces sparse-read
506 394 self._withsparseread = self._sparserevlog or withsparseread
507 395 if b'sparse-read-density-threshold' in opts:
508 396 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
509 397 if b'sparse-read-min-gap-size' in opts:
510 398 self._srmingapsize = opts[b'sparse-read-min-gap-size']
511 399 if opts.get(b'enableellipsis'):
512 400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
513 401
514 402 # revlog v0 doesn't have flag processors
515 403 for flag, processor in pycompat.iteritems(
516 404 opts.get(b'flagprocessors', {})
517 405 ):
518 406 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
519 407
520 408 if self._chunkcachesize <= 0:
521 409 raise error.RevlogError(
522 410 _(b'revlog chunk cache size %r is not greater than 0')
523 411 % self._chunkcachesize
524 412 )
525 413 elif self._chunkcachesize & (self._chunkcachesize - 1):
526 414 raise error.RevlogError(
527 415 _(b'revlog chunk cache size %r is not a power of 2')
528 416 % self._chunkcachesize
529 417 )
530 418
531 419 indexdata = b''
532 420 self._initempty = True
533 421 try:
534 422 with self._indexfp() as f:
535 423 if (
536 424 mmapindexthreshold is not None
537 425 and self.opener.fstat(f).st_size >= mmapindexthreshold
538 426 ):
539 427 # TODO: should .close() to release resources without
540 428 # relying on Python GC
541 429 indexdata = util.buffer(util.mmapread(f))
542 430 else:
543 431 indexdata = f.read()
544 432 if len(indexdata) > 0:
545 433 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
546 434 self._initempty = False
547 435 else:
548 436 versionflags = newversionflags
549 437 except IOError as inst:
550 438 if inst.errno != errno.ENOENT:
551 439 raise
552 440
553 441 versionflags = newversionflags
554 442
555 443 self.version = versionflags
556 444
557 445 flags = versionflags & ~0xFFFF
558 446 fmt = versionflags & 0xFFFF
559 447
560 448 if fmt == REVLOGV0:
561 449 if flags:
562 450 raise error.RevlogError(
563 451 _(b'unknown flags (%#04x) in version %d revlog %s')
564 452 % (flags >> 16, fmt, self.indexfile)
565 453 )
566 454
567 455 self._inline = False
568 456 self._generaldelta = False
569 457
570 458 elif fmt == REVLOGV1:
571 459 if flags & ~REVLOGV1_FLAGS:
572 460 raise error.RevlogError(
573 461 _(b'unknown flags (%#04x) in version %d revlog %s')
574 462 % (flags >> 16, fmt, self.indexfile)
575 463 )
576 464
577 465 self._inline = versionflags & FLAG_INLINE_DATA
578 466 self._generaldelta = versionflags & FLAG_GENERALDELTA
579 467
580 468 elif fmt == REVLOGV2:
581 469 if flags & ~REVLOGV2_FLAGS:
582 470 raise error.RevlogError(
583 471 _(b'unknown flags (%#04x) in version %d revlog %s')
584 472 % (flags >> 16, fmt, self.indexfile)
585 473 )
586 474
587 475 # There is a bug in the transaction handling when going from an
588 476 # inline revlog to a separate index and data file. Turn it off until
589 477 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
590 478 # See issue6485
591 479 self._inline = False
592 480 # generaldelta implied by version 2 revlogs.
593 481 self._generaldelta = True
594 482
595 483 else:
596 484 raise error.RevlogError(
597 485 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
598 486 )
599 487
600 488 self.nodeconstants = sha1nodeconstants
601 489 self.nullid = self.nodeconstants.nullid
602 490
603 491 # sparse-revlog can't be on without general-delta (issue6056)
604 492 if not self._generaldelta:
605 493 self._sparserevlog = False
606 494
607 495 self._storedeltachains = True
608 496
609 497 devel_nodemap = (
610 498 self.nodemap_file
611 499 and opts.get(b'devel-force-nodemap', False)
612 500 and parse_index_v1_nodemap is not None
613 501 )
614 502
615 503 use_rust_index = False
616 504 if rustrevlog is not None:
617 505 if self.nodemap_file is not None:
618 506 use_rust_index = True
619 507 else:
620 508 use_rust_index = self.opener.options.get(b'rust.index')
621 509
622 510 self._parse_index = parse_index_v1
623 511 if self.version == REVLOGV0:
624 self._parse_index = parse_index_v0
512 self._parse_index = revlogv0.parse_index_v0
625 513 elif fmt == REVLOGV2:
626 514 self._parse_index = parse_index_v2
627 515 elif devel_nodemap:
628 516 self._parse_index = parse_index_v1_nodemap
629 517 elif use_rust_index:
630 518 self._parse_index = parse_index_v1_mixed
631 519 try:
632 520 d = self._parse_index(indexdata, self._inline)
633 521 index, _chunkcache = d
634 522 use_nodemap = (
635 523 not self._inline
636 524 and self.nodemap_file is not None
637 525 and util.safehasattr(index, 'update_nodemap_data')
638 526 )
639 527 if use_nodemap:
640 528 nodemap_data = nodemaputil.persisted_data(self)
641 529 if nodemap_data is not None:
642 530 docket = nodemap_data[0]
643 531 if (
644 532 len(d[0]) > docket.tip_rev
645 533 and d[0][docket.tip_rev][7] == docket.tip_node
646 534 ):
647 535 # no changelog tampering
648 536 self._nodemap_docket = docket
649 537 index.update_nodemap_data(*nodemap_data)
650 538 except (ValueError, IndexError):
651 539 raise error.RevlogError(
652 540 _(b"index %s is corrupted") % self.indexfile
653 541 )
654 542 self.index, self._chunkcache = d
655 543 if not self._chunkcache:
656 544 self._chunkclear()
657 545 # revnum -> (chain-length, sum-delta-length)
658 546 self._chaininfocache = util.lrucachedict(500)
659 547 # revlog header -> revlog compressor
660 548 self._decompressors = {}
661 549
662 550 @util.propertycache
663 551 def _compressor(self):
664 552 engine = util.compengines[self._compengine]
665 553 return engine.revlogcompressor(self._compengineopts)
666 554
667 555 def _indexfp(self, mode=b'r'):
668 556 """file object for the revlog's index file"""
669 557 args = {'mode': mode}
670 558 if mode != b'r':
671 559 args['checkambig'] = self._checkambig
672 560 if mode == b'w':
673 561 args['atomictemp'] = True
674 562 return self.opener(self.indexfile, **args)
675 563
676 564 def _datafp(self, mode=b'r'):
677 565 """file object for the revlog's data file"""
678 566 return self.opener(self.datafile, mode=mode)
679 567
680 568 @contextlib.contextmanager
681 569 def _datareadfp(self, existingfp=None):
682 570 """file object suitable to read data"""
683 571 # Use explicit file handle, if given.
684 572 if existingfp is not None:
685 573 yield existingfp
686 574
687 575 # Use a file handle being actively used for writes, if available.
688 576 # There is some danger to doing this because reads will seek the
689 577 # file. However, _writeentry() performs a SEEK_END before all writes,
690 578 # so we should be safe.
691 579 elif self._writinghandles:
692 580 if self._inline:
693 581 yield self._writinghandles[0]
694 582 else:
695 583 yield self._writinghandles[1]
696 584
697 585 # Otherwise open a new file handle.
698 586 else:
699 587 if self._inline:
700 588 func = self._indexfp
701 589 else:
702 590 func = self._datafp
703 591 with func() as fp:
704 592 yield fp
705 593
706 594 def tiprev(self):
707 595 return len(self.index) - 1
708 596
709 597 def tip(self):
710 598 return self.node(self.tiprev())
711 599
712 600 def __contains__(self, rev):
713 601 return 0 <= rev < len(self)
714 602
715 603 def __len__(self):
716 604 return len(self.index)
717 605
718 606 def __iter__(self):
719 607 return iter(pycompat.xrange(len(self)))
720 608
721 609 def revs(self, start=0, stop=None):
722 610 """iterate over all rev in this revlog (from start to stop)"""
723 611 return storageutil.iterrevs(len(self), start=start, stop=stop)
724 612
725 613 @property
726 614 def nodemap(self):
727 615 msg = (
728 616 b"revlog.nodemap is deprecated, "
729 617 b"use revlog.index.[has_node|rev|get_rev]"
730 618 )
731 619 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
732 620 return self.index.nodemap
733 621
734 622 @property
735 623 def _nodecache(self):
736 624 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
737 625 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
738 626 return self.index.nodemap
739 627
740 628 def hasnode(self, node):
741 629 try:
742 630 self.rev(node)
743 631 return True
744 632 except KeyError:
745 633 return False
746 634
747 635 def candelta(self, baserev, rev):
748 636 """whether two revisions (baserev, rev) can be delta-ed or not"""
749 637 # Disable delta if either rev requires a content-changing flag
750 638 # processor (ex. LFS). This is because such flag processor can alter
751 639 # the rawtext content that the delta will be based on, and two clients
752 640 # could have a same revlog node with different flags (i.e. different
753 641 # rawtext contents) and the delta could be incompatible.
754 642 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
755 643 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
756 644 ):
757 645 return False
758 646 return True
759 647
760 648 def update_caches(self, transaction):
761 649 if self.nodemap_file is not None:
762 650 if transaction is None:
763 651 nodemaputil.update_persistent_nodemap(self)
764 652 else:
765 653 nodemaputil.setup_persistent_nodemap(transaction, self)
766 654
767 655 def clearcaches(self):
768 656 self._revisioncache = None
769 657 self._chainbasecache.clear()
770 658 self._chunkcache = (0, b'')
771 659 self._pcache = {}
772 660 self._nodemap_docket = None
773 661 self.index.clearcaches()
774 662 # The python code is the one responsible for validating the docket, we
775 663 # end up having to refresh it here.
776 664 use_nodemap = (
777 665 not self._inline
778 666 and self.nodemap_file is not None
779 667 and util.safehasattr(self.index, 'update_nodemap_data')
780 668 )
781 669 if use_nodemap:
782 670 nodemap_data = nodemaputil.persisted_data(self)
783 671 if nodemap_data is not None:
784 672 self._nodemap_docket = nodemap_data[0]
785 673 self.index.update_nodemap_data(*nodemap_data)
786 674
787 675 def rev(self, node):
788 676 try:
789 677 return self.index.rev(node)
790 678 except TypeError:
791 679 raise
792 680 except error.RevlogError:
793 681 # parsers.c radix tree lookup failed
794 682 if (
795 683 node == self.nodeconstants.wdirid
796 684 or node in self.nodeconstants.wdirfilenodeids
797 685 ):
798 686 raise error.WdirUnsupported
799 687 raise error.LookupError(node, self.indexfile, _(b'no node'))
800 688
801 689 # Accessors for index entries.
802 690
803 691 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
804 692 # are flags.
805 693 def start(self, rev):
806 694 return int(self.index[rev][0] >> 16)
807 695
808 696 def flags(self, rev):
809 697 return self.index[rev][0] & 0xFFFF
810 698
811 699 def length(self, rev):
812 700 return self.index[rev][1]
813 701
814 702 def sidedata_length(self, rev):
815 703 if self.version & 0xFFFF != REVLOGV2:
816 704 return 0
817 705 return self.index[rev][9]
818 706
819 707 def rawsize(self, rev):
820 708 """return the length of the uncompressed text for a given revision"""
821 709 l = self.index[rev][2]
822 710 if l >= 0:
823 711 return l
824 712
825 713 t = self.rawdata(rev)
826 714 return len(t)
827 715
828 716 def size(self, rev):
829 717 """length of non-raw text (processed by a "read" flag processor)"""
830 718 # fast path: if no "read" flag processor could change the content,
831 719 # size is rawsize. note: ELLIPSIS is known to not change the content.
832 720 flags = self.flags(rev)
833 721 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
834 722 return self.rawsize(rev)
835 723
836 724 return len(self.revision(rev, raw=False))
837 725
838 726 def chainbase(self, rev):
839 727 base = self._chainbasecache.get(rev)
840 728 if base is not None:
841 729 return base
842 730
843 731 index = self.index
844 732 iterrev = rev
845 733 base = index[iterrev][3]
846 734 while base != iterrev:
847 735 iterrev = base
848 736 base = index[iterrev][3]
849 737
850 738 self._chainbasecache[rev] = base
851 739 return base
852 740
853 741 def linkrev(self, rev):
854 742 return self.index[rev][4]
855 743
856 744 def parentrevs(self, rev):
857 745 try:
858 746 entry = self.index[rev]
859 747 except IndexError:
860 748 if rev == wdirrev:
861 749 raise error.WdirUnsupported
862 750 raise
863 751 if entry[5] == nullrev:
864 752 return entry[6], entry[5]
865 753 else:
866 754 return entry[5], entry[6]
867 755
868 756 # fast parentrevs(rev) where rev isn't filtered
869 757 _uncheckedparentrevs = parentrevs
870 758
871 759 def node(self, rev):
872 760 try:
873 761 return self.index[rev][7]
874 762 except IndexError:
875 763 if rev == wdirrev:
876 764 raise error.WdirUnsupported
877 765 raise
878 766
879 767 # Derived from index values.
880 768
881 769 def end(self, rev):
882 770 return self.start(rev) + self.length(rev)
883 771
884 772 def parents(self, node):
885 773 i = self.index
886 774 d = i[self.rev(node)]
887 775 # inline node() to avoid function call overhead
888 776 if d[5] == self.nullid:
889 777 return i[d[6]][7], i[d[5]][7]
890 778 else:
891 779 return i[d[5]][7], i[d[6]][7]
892 780
893 781 def chainlen(self, rev):
894 782 return self._chaininfo(rev)[0]
895 783
896 784 def _chaininfo(self, rev):
897 785 chaininfocache = self._chaininfocache
898 786 if rev in chaininfocache:
899 787 return chaininfocache[rev]
900 788 index = self.index
901 789 generaldelta = self._generaldelta
902 790 iterrev = rev
903 791 e = index[iterrev]
904 792 clen = 0
905 793 compresseddeltalen = 0
906 794 while iterrev != e[3]:
907 795 clen += 1
908 796 compresseddeltalen += e[1]
909 797 if generaldelta:
910 798 iterrev = e[3]
911 799 else:
912 800 iterrev -= 1
913 801 if iterrev in chaininfocache:
914 802 t = chaininfocache[iterrev]
915 803 clen += t[0]
916 804 compresseddeltalen += t[1]
917 805 break
918 806 e = index[iterrev]
919 807 else:
920 808 # Add text length of base since decompressing that also takes
921 809 # work. For cache hits the length is already included.
922 810 compresseddeltalen += e[1]
923 811 r = (clen, compresseddeltalen)
924 812 chaininfocache[rev] = r
925 813 return r
926 814
927 815 def _deltachain(self, rev, stoprev=None):
928 816 """Obtain the delta chain for a revision.
929 817
930 818 ``stoprev`` specifies a revision to stop at. If not specified, we
931 819 stop at the base of the chain.
932 820
933 821 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
934 822 revs in ascending order and ``stopped`` is a bool indicating whether
935 823 ``stoprev`` was hit.
936 824 """
937 825 # Try C implementation.
938 826 try:
939 827 return self.index.deltachain(rev, stoprev, self._generaldelta)
940 828 except AttributeError:
941 829 pass
942 830
943 831 chain = []
944 832
945 833 # Alias to prevent attribute lookup in tight loop.
946 834 index = self.index
947 835 generaldelta = self._generaldelta
948 836
949 837 iterrev = rev
950 838 e = index[iterrev]
951 839 while iterrev != e[3] and iterrev != stoprev:
952 840 chain.append(iterrev)
953 841 if generaldelta:
954 842 iterrev = e[3]
955 843 else:
956 844 iterrev -= 1
957 845 e = index[iterrev]
958 846
959 847 if iterrev == stoprev:
960 848 stopped = True
961 849 else:
962 850 chain.append(iterrev)
963 851 stopped = False
964 852
965 853 chain.reverse()
966 854 return chain, stopped
967 855
968 856 def ancestors(self, revs, stoprev=0, inclusive=False):
969 857 """Generate the ancestors of 'revs' in reverse revision order.
970 858 Does not generate revs lower than stoprev.
971 859
972 860 See the documentation for ancestor.lazyancestors for more details."""
973 861
974 862 # first, make sure start revisions aren't filtered
975 863 revs = list(revs)
976 864 checkrev = self.node
977 865 for r in revs:
978 866 checkrev(r)
979 867 # and we're sure ancestors aren't filtered as well
980 868
981 869 if rustancestor is not None:
982 870 lazyancestors = rustancestor.LazyAncestors
983 871 arg = self.index
984 872 else:
985 873 lazyancestors = ancestor.lazyancestors
986 874 arg = self._uncheckedparentrevs
987 875 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
988 876
989 877 def descendants(self, revs):
990 878 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
991 879
992 880 def findcommonmissing(self, common=None, heads=None):
993 881 """Return a tuple of the ancestors of common and the ancestors of heads
994 882 that are not ancestors of common. In revset terminology, we return the
995 883 tuple:
996 884
997 885 ::common, (::heads) - (::common)
998 886
999 887 The list is sorted by revision number, meaning it is
1000 888 topologically sorted.
1001 889
1002 890 'heads' and 'common' are both lists of node IDs. If heads is
1003 891 not supplied, uses all of the revlog's heads. If common is not
1004 892 supplied, uses nullid."""
1005 893 if common is None:
1006 894 common = [self.nullid]
1007 895 if heads is None:
1008 896 heads = self.heads()
1009 897
1010 898 common = [self.rev(n) for n in common]
1011 899 heads = [self.rev(n) for n in heads]
1012 900
1013 901 # we want the ancestors, but inclusive
1014 902 class lazyset(object):
1015 903 def __init__(self, lazyvalues):
1016 904 self.addedvalues = set()
1017 905 self.lazyvalues = lazyvalues
1018 906
1019 907 def __contains__(self, value):
1020 908 return value in self.addedvalues or value in self.lazyvalues
1021 909
1022 910 def __iter__(self):
1023 911 added = self.addedvalues
1024 912 for r in added:
1025 913 yield r
1026 914 for r in self.lazyvalues:
1027 915 if not r in added:
1028 916 yield r
1029 917
1030 918 def add(self, value):
1031 919 self.addedvalues.add(value)
1032 920
1033 921 def update(self, values):
1034 922 self.addedvalues.update(values)
1035 923
1036 924 has = lazyset(self.ancestors(common))
1037 925 has.add(nullrev)
1038 926 has.update(common)
1039 927
1040 928 # take all ancestors from heads that aren't in has
1041 929 missing = set()
1042 930 visit = collections.deque(r for r in heads if r not in has)
1043 931 while visit:
1044 932 r = visit.popleft()
1045 933 if r in missing:
1046 934 continue
1047 935 else:
1048 936 missing.add(r)
1049 937 for p in self.parentrevs(r):
1050 938 if p not in has:
1051 939 visit.append(p)
1052 940 missing = list(missing)
1053 941 missing.sort()
1054 942 return has, [self.node(miss) for miss in missing]
1055 943
1056 944 def incrementalmissingrevs(self, common=None):
1057 945 """Return an object that can be used to incrementally compute the
1058 946 revision numbers of the ancestors of arbitrary sets that are not
1059 947 ancestors of common. This is an ancestor.incrementalmissingancestors
1060 948 object.
1061 949
1062 950 'common' is a list of revision numbers. If common is not supplied, uses
1063 951 nullrev.
1064 952 """
1065 953 if common is None:
1066 954 common = [nullrev]
1067 955
1068 956 if rustancestor is not None:
1069 957 return rustancestor.MissingAncestors(self.index, common)
1070 958 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1071 959
1072 960 def findmissingrevs(self, common=None, heads=None):
1073 961 """Return the revision numbers of the ancestors of heads that
1074 962 are not ancestors of common.
1075 963
1076 964 More specifically, return a list of revision numbers corresponding to
1077 965 nodes N such that every N satisfies the following constraints:
1078 966
1079 967 1. N is an ancestor of some node in 'heads'
1080 968 2. N is not an ancestor of any node in 'common'
1081 969
1082 970 The list is sorted by revision number, meaning it is
1083 971 topologically sorted.
1084 972
1085 973 'heads' and 'common' are both lists of revision numbers. If heads is
1086 974 not supplied, uses all of the revlog's heads. If common is not
1087 975 supplied, uses nullid."""
1088 976 if common is None:
1089 977 common = [nullrev]
1090 978 if heads is None:
1091 979 heads = self.headrevs()
1092 980
1093 981 inc = self.incrementalmissingrevs(common=common)
1094 982 return inc.missingancestors(heads)
1095 983
1096 984 def findmissing(self, common=None, heads=None):
1097 985 """Return the ancestors of heads that are not ancestors of common.
1098 986
1099 987 More specifically, return a list of nodes N such that every N
1100 988 satisfies the following constraints:
1101 989
1102 990 1. N is an ancestor of some node in 'heads'
1103 991 2. N is not an ancestor of any node in 'common'
1104 992
1105 993 The list is sorted by revision number, meaning it is
1106 994 topologically sorted.
1107 995
1108 996 'heads' and 'common' are both lists of node IDs. If heads is
1109 997 not supplied, uses all of the revlog's heads. If common is not
1110 998 supplied, uses nullid."""
1111 999 if common is None:
1112 1000 common = [self.nullid]
1113 1001 if heads is None:
1114 1002 heads = self.heads()
1115 1003
1116 1004 common = [self.rev(n) for n in common]
1117 1005 heads = [self.rev(n) for n in heads]
1118 1006
1119 1007 inc = self.incrementalmissingrevs(common=common)
1120 1008 return [self.node(r) for r in inc.missingancestors(heads)]
1121 1009
1122 1010 def nodesbetween(self, roots=None, heads=None):
1123 1011 """Return a topological path from 'roots' to 'heads'.
1124 1012
1125 1013 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1126 1014 topologically sorted list of all nodes N that satisfy both of
1127 1015 these constraints:
1128 1016
1129 1017 1. N is a descendant of some node in 'roots'
1130 1018 2. N is an ancestor of some node in 'heads'
1131 1019
1132 1020 Every node is considered to be both a descendant and an ancestor
1133 1021 of itself, so every reachable node in 'roots' and 'heads' will be
1134 1022 included in 'nodes'.
1135 1023
1136 1024 'outroots' is the list of reachable nodes in 'roots', i.e., the
1137 1025 subset of 'roots' that is returned in 'nodes'. Likewise,
1138 1026 'outheads' is the subset of 'heads' that is also in 'nodes'.
1139 1027
1140 1028 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1141 1029 unspecified, uses nullid as the only root. If 'heads' is
1142 1030 unspecified, uses list of all of the revlog's heads."""
1143 1031 nonodes = ([], [], [])
1144 1032 if roots is not None:
1145 1033 roots = list(roots)
1146 1034 if not roots:
1147 1035 return nonodes
1148 1036 lowestrev = min([self.rev(n) for n in roots])
1149 1037 else:
1150 1038 roots = [self.nullid] # Everybody's a descendant of nullid
1151 1039 lowestrev = nullrev
1152 1040 if (lowestrev == nullrev) and (heads is None):
1153 1041 # We want _all_ the nodes!
1154 1042 return (
1155 1043 [self.node(r) for r in self],
1156 1044 [self.nullid],
1157 1045 list(self.heads()),
1158 1046 )
1159 1047 if heads is None:
1160 1048 # All nodes are ancestors, so the latest ancestor is the last
1161 1049 # node.
1162 1050 highestrev = len(self) - 1
1163 1051 # Set ancestors to None to signal that every node is an ancestor.
1164 1052 ancestors = None
1165 1053 # Set heads to an empty dictionary for later discovery of heads
1166 1054 heads = {}
1167 1055 else:
1168 1056 heads = list(heads)
1169 1057 if not heads:
1170 1058 return nonodes
1171 1059 ancestors = set()
1172 1060 # Turn heads into a dictionary so we can remove 'fake' heads.
1173 1061 # Also, later we will be using it to filter out the heads we can't
1174 1062 # find from roots.
1175 1063 heads = dict.fromkeys(heads, False)
1176 1064 # Start at the top and keep marking parents until we're done.
1177 1065 nodestotag = set(heads)
1178 1066 # Remember where the top was so we can use it as a limit later.
1179 1067 highestrev = max([self.rev(n) for n in nodestotag])
1180 1068 while nodestotag:
1181 1069 # grab a node to tag
1182 1070 n = nodestotag.pop()
1183 1071 # Never tag nullid
1184 1072 if n == self.nullid:
1185 1073 continue
1186 1074 # A node's revision number represents its place in a
1187 1075 # topologically sorted list of nodes.
1188 1076 r = self.rev(n)
1189 1077 if r >= lowestrev:
1190 1078 if n not in ancestors:
1191 1079 # If we are possibly a descendant of one of the roots
1192 1080 # and we haven't already been marked as an ancestor
1193 1081 ancestors.add(n) # Mark as ancestor
1194 1082 # Add non-nullid parents to list of nodes to tag.
1195 1083 nodestotag.update(
1196 1084 [p for p in self.parents(n) if p != self.nullid]
1197 1085 )
1198 1086 elif n in heads: # We've seen it before, is it a fake head?
1199 1087 # So it is, real heads should not be the ancestors of
1200 1088 # any other heads.
1201 1089 heads.pop(n)
1202 1090 if not ancestors:
1203 1091 return nonodes
1204 1092 # Now that we have our set of ancestors, we want to remove any
1205 1093 # roots that are not ancestors.
1206 1094
1207 1095 # If one of the roots was nullid, everything is included anyway.
1208 1096 if lowestrev > nullrev:
1209 1097 # But, since we weren't, let's recompute the lowest rev to not
1210 1098 # include roots that aren't ancestors.
1211 1099
1212 1100 # Filter out roots that aren't ancestors of heads
1213 1101 roots = [root for root in roots if root in ancestors]
1214 1102 # Recompute the lowest revision
1215 1103 if roots:
1216 1104 lowestrev = min([self.rev(root) for root in roots])
1217 1105 else:
1218 1106 # No more roots? Return empty list
1219 1107 return nonodes
1220 1108 else:
1221 1109 # We are descending from nullid, and don't need to care about
1222 1110 # any other roots.
1223 1111 lowestrev = nullrev
1224 1112 roots = [self.nullid]
1225 1113 # Transform our roots list into a set.
1226 1114 descendants = set(roots)
1227 1115 # Also, keep the original roots so we can filter out roots that aren't
1228 1116 # 'real' roots (i.e. are descended from other roots).
1229 1117 roots = descendants.copy()
1230 1118 # Our topologically sorted list of output nodes.
1231 1119 orderedout = []
1232 1120 # Don't start at nullid since we don't want nullid in our output list,
1233 1121 # and if nullid shows up in descendants, empty parents will look like
1234 1122 # they're descendants.
1235 1123 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1236 1124 n = self.node(r)
1237 1125 isdescendant = False
1238 1126 if lowestrev == nullrev: # Everybody is a descendant of nullid
1239 1127 isdescendant = True
1240 1128 elif n in descendants:
1241 1129 # n is already a descendant
1242 1130 isdescendant = True
1243 1131 # This check only needs to be done here because all the roots
1244 1132 # will start being marked is descendants before the loop.
1245 1133 if n in roots:
1246 1134 # If n was a root, check if it's a 'real' root.
1247 1135 p = tuple(self.parents(n))
1248 1136 # If any of its parents are descendants, it's not a root.
1249 1137 if (p[0] in descendants) or (p[1] in descendants):
1250 1138 roots.remove(n)
1251 1139 else:
1252 1140 p = tuple(self.parents(n))
1253 1141 # A node is a descendant if either of its parents are
1254 1142 # descendants. (We seeded the dependents list with the roots
1255 1143 # up there, remember?)
1256 1144 if (p[0] in descendants) or (p[1] in descendants):
1257 1145 descendants.add(n)
1258 1146 isdescendant = True
1259 1147 if isdescendant and ((ancestors is None) or (n in ancestors)):
1260 1148 # Only include nodes that are both descendants and ancestors.
1261 1149 orderedout.append(n)
1262 1150 if (ancestors is not None) and (n in heads):
1263 1151 # We're trying to figure out which heads are reachable
1264 1152 # from roots.
1265 1153 # Mark this head as having been reached
1266 1154 heads[n] = True
1267 1155 elif ancestors is None:
1268 1156 # Otherwise, we're trying to discover the heads.
1269 1157 # Assume this is a head because if it isn't, the next step
1270 1158 # will eventually remove it.
1271 1159 heads[n] = True
1272 1160 # But, obviously its parents aren't.
1273 1161 for p in self.parents(n):
1274 1162 heads.pop(p, None)
1275 1163 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1276 1164 roots = list(roots)
1277 1165 assert orderedout
1278 1166 assert roots
1279 1167 assert heads
1280 1168 return (orderedout, roots, heads)
1281 1169
1282 1170 def headrevs(self, revs=None):
1283 1171 if revs is None:
1284 1172 try:
1285 1173 return self.index.headrevs()
1286 1174 except AttributeError:
1287 1175 return self._headrevs()
1288 1176 if rustdagop is not None:
1289 1177 return rustdagop.headrevs(self.index, revs)
1290 1178 return dagop.headrevs(revs, self._uncheckedparentrevs)
1291 1179
1292 1180 def computephases(self, roots):
1293 1181 return self.index.computephasesmapsets(roots)
1294 1182
1295 1183 def _headrevs(self):
1296 1184 count = len(self)
1297 1185 if not count:
1298 1186 return [nullrev]
1299 1187 # we won't iter over filtered rev so nobody is a head at start
1300 1188 ishead = [0] * (count + 1)
1301 1189 index = self.index
1302 1190 for r in self:
1303 1191 ishead[r] = 1 # I may be an head
1304 1192 e = index[r]
1305 1193 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1306 1194 return [r for r, val in enumerate(ishead) if val]
1307 1195
1308 1196 def heads(self, start=None, stop=None):
1309 1197 """return the list of all nodes that have no children
1310 1198
1311 1199 if start is specified, only heads that are descendants of
1312 1200 start will be returned
1313 1201 if stop is specified, it will consider all the revs from stop
1314 1202 as if they had no children
1315 1203 """
1316 1204 if start is None and stop is None:
1317 1205 if not len(self):
1318 1206 return [self.nullid]
1319 1207 return [self.node(r) for r in self.headrevs()]
1320 1208
1321 1209 if start is None:
1322 1210 start = nullrev
1323 1211 else:
1324 1212 start = self.rev(start)
1325 1213
1326 1214 stoprevs = {self.rev(n) for n in stop or []}
1327 1215
1328 1216 revs = dagop.headrevssubset(
1329 1217 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1330 1218 )
1331 1219
1332 1220 return [self.node(rev) for rev in revs]
1333 1221
1334 1222 def children(self, node):
1335 1223 """find the children of a given node"""
1336 1224 c = []
1337 1225 p = self.rev(node)
1338 1226 for r in self.revs(start=p + 1):
1339 1227 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1340 1228 if prevs:
1341 1229 for pr in prevs:
1342 1230 if pr == p:
1343 1231 c.append(self.node(r))
1344 1232 elif p == nullrev:
1345 1233 c.append(self.node(r))
1346 1234 return c
1347 1235
1348 1236 def commonancestorsheads(self, a, b):
1349 1237 """calculate all the heads of the common ancestors of nodes a and b"""
1350 1238 a, b = self.rev(a), self.rev(b)
1351 1239 ancs = self._commonancestorsheads(a, b)
1352 1240 return pycompat.maplist(self.node, ancs)
1353 1241
1354 1242 def _commonancestorsheads(self, *revs):
1355 1243 """calculate all the heads of the common ancestors of revs"""
1356 1244 try:
1357 1245 ancs = self.index.commonancestorsheads(*revs)
1358 1246 except (AttributeError, OverflowError): # C implementation failed
1359 1247 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1360 1248 return ancs
1361 1249
1362 1250 def isancestor(self, a, b):
1363 1251 """return True if node a is an ancestor of node b
1364 1252
1365 1253 A revision is considered an ancestor of itself."""
1366 1254 a, b = self.rev(a), self.rev(b)
1367 1255 return self.isancestorrev(a, b)
1368 1256
1369 1257 def isancestorrev(self, a, b):
1370 1258 """return True if revision a is an ancestor of revision b
1371 1259
1372 1260 A revision is considered an ancestor of itself.
1373 1261
1374 1262 The implementation of this is trivial but the use of
1375 1263 reachableroots is not."""
1376 1264 if a == nullrev:
1377 1265 return True
1378 1266 elif a == b:
1379 1267 return True
1380 1268 elif a > b:
1381 1269 return False
1382 1270 return bool(self.reachableroots(a, [b], [a], includepath=False))
1383 1271
1384 1272 def reachableroots(self, minroot, heads, roots, includepath=False):
1385 1273 """return (heads(::(<roots> and <roots>::<heads>)))
1386 1274
1387 1275 If includepath is True, return (<roots>::<heads>)."""
1388 1276 try:
1389 1277 return self.index.reachableroots2(
1390 1278 minroot, heads, roots, includepath
1391 1279 )
1392 1280 except AttributeError:
1393 1281 return dagop._reachablerootspure(
1394 1282 self.parentrevs, minroot, roots, heads, includepath
1395 1283 )
1396 1284
1397 1285 def ancestor(self, a, b):
1398 1286 """calculate the "best" common ancestor of nodes a and b"""
1399 1287
1400 1288 a, b = self.rev(a), self.rev(b)
1401 1289 try:
1402 1290 ancs = self.index.ancestors(a, b)
1403 1291 except (AttributeError, OverflowError):
1404 1292 ancs = ancestor.ancestors(self.parentrevs, a, b)
1405 1293 if ancs:
1406 1294 # choose a consistent winner when there's a tie
1407 1295 return min(map(self.node, ancs))
1408 1296 return self.nullid
1409 1297
1410 1298 def _match(self, id):
1411 1299 if isinstance(id, int):
1412 1300 # rev
1413 1301 return self.node(id)
1414 1302 if len(id) == 20:
1415 1303 # possibly a binary node
1416 1304 # odds of a binary node being all hex in ASCII are 1 in 10**25
1417 1305 try:
1418 1306 node = id
1419 1307 self.rev(node) # quick search the index
1420 1308 return node
1421 1309 except error.LookupError:
1422 1310 pass # may be partial hex id
1423 1311 try:
1424 1312 # str(rev)
1425 1313 rev = int(id)
1426 1314 if b"%d" % rev != id:
1427 1315 raise ValueError
1428 1316 if rev < 0:
1429 1317 rev = len(self) + rev
1430 1318 if rev < 0 or rev >= len(self):
1431 1319 raise ValueError
1432 1320 return self.node(rev)
1433 1321 except (ValueError, OverflowError):
1434 1322 pass
1435 1323 if len(id) == 40:
1436 1324 try:
1437 1325 # a full hex nodeid?
1438 1326 node = bin(id)
1439 1327 self.rev(node)
1440 1328 return node
1441 1329 except (TypeError, error.LookupError):
1442 1330 pass
1443 1331
1444 1332 def _partialmatch(self, id):
1445 1333 # we don't care wdirfilenodeids as they should be always full hash
1446 1334 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1447 1335 try:
1448 1336 partial = self.index.partialmatch(id)
1449 1337 if partial and self.hasnode(partial):
1450 1338 if maybewdir:
1451 1339 # single 'ff...' match in radix tree, ambiguous with wdir
1452 1340 raise error.RevlogError
1453 1341 return partial
1454 1342 if maybewdir:
1455 1343 # no 'ff...' match in radix tree, wdir identified
1456 1344 raise error.WdirUnsupported
1457 1345 return None
1458 1346 except error.RevlogError:
1459 1347 # parsers.c radix tree lookup gave multiple matches
1460 1348 # fast path: for unfiltered changelog, radix tree is accurate
1461 1349 if not getattr(self, 'filteredrevs', None):
1462 1350 raise error.AmbiguousPrefixLookupError(
1463 1351 id, self.indexfile, _(b'ambiguous identifier')
1464 1352 )
1465 1353 # fall through to slow path that filters hidden revisions
1466 1354 except (AttributeError, ValueError):
1467 1355 # we are pure python, or key was too short to search radix tree
1468 1356 pass
1469 1357
1470 1358 if id in self._pcache:
1471 1359 return self._pcache[id]
1472 1360
1473 1361 if len(id) <= 40:
1474 1362 try:
1475 1363 # hex(node)[:...]
1476 1364 l = len(id) // 2 # grab an even number of digits
1477 1365 prefix = bin(id[: l * 2])
1478 1366 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1479 1367 nl = [
1480 1368 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1481 1369 ]
1482 1370 if self.nodeconstants.nullhex.startswith(id):
1483 1371 nl.append(self.nullid)
1484 1372 if len(nl) > 0:
1485 1373 if len(nl) == 1 and not maybewdir:
1486 1374 self._pcache[id] = nl[0]
1487 1375 return nl[0]
1488 1376 raise error.AmbiguousPrefixLookupError(
1489 1377 id, self.indexfile, _(b'ambiguous identifier')
1490 1378 )
1491 1379 if maybewdir:
1492 1380 raise error.WdirUnsupported
1493 1381 return None
1494 1382 except TypeError:
1495 1383 pass
1496 1384
1497 1385 def lookup(self, id):
1498 1386 """locate a node based on:
1499 1387 - revision number or str(revision number)
1500 1388 - nodeid or subset of hex nodeid
1501 1389 """
1502 1390 n = self._match(id)
1503 1391 if n is not None:
1504 1392 return n
1505 1393 n = self._partialmatch(id)
1506 1394 if n:
1507 1395 return n
1508 1396
1509 1397 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1510 1398
1511 1399 def shortest(self, node, minlength=1):
1512 1400 """Find the shortest unambiguous prefix that matches node."""
1513 1401
1514 1402 def isvalid(prefix):
1515 1403 try:
1516 1404 matchednode = self._partialmatch(prefix)
1517 1405 except error.AmbiguousPrefixLookupError:
1518 1406 return False
1519 1407 except error.WdirUnsupported:
1520 1408 # single 'ff...' match
1521 1409 return True
1522 1410 if matchednode is None:
1523 1411 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 1412 return True
1525 1413
1526 1414 def maybewdir(prefix):
1527 1415 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1528 1416
1529 1417 hexnode = hex(node)
1530 1418
1531 1419 def disambiguate(hexnode, minlength):
1532 1420 """Disambiguate against wdirid."""
1533 1421 for length in range(minlength, len(hexnode) + 1):
1534 1422 prefix = hexnode[:length]
1535 1423 if not maybewdir(prefix):
1536 1424 return prefix
1537 1425
1538 1426 if not getattr(self, 'filteredrevs', None):
1539 1427 try:
1540 1428 length = max(self.index.shortest(node), minlength)
1541 1429 return disambiguate(hexnode, length)
1542 1430 except error.RevlogError:
1543 1431 if node != self.nodeconstants.wdirid:
1544 1432 raise error.LookupError(node, self.indexfile, _(b'no node'))
1545 1433 except AttributeError:
1546 1434 # Fall through to pure code
1547 1435 pass
1548 1436
1549 1437 if node == self.nodeconstants.wdirid:
1550 1438 for length in range(minlength, len(hexnode) + 1):
1551 1439 prefix = hexnode[:length]
1552 1440 if isvalid(prefix):
1553 1441 return prefix
1554 1442
1555 1443 for length in range(minlength, len(hexnode) + 1):
1556 1444 prefix = hexnode[:length]
1557 1445 if isvalid(prefix):
1558 1446 return disambiguate(hexnode, length)
1559 1447
1560 1448 def cmp(self, node, text):
1561 1449 """compare text with a given file revision
1562 1450
1563 1451 returns True if text is different than what is stored.
1564 1452 """
1565 1453 p1, p2 = self.parents(node)
1566 1454 return storageutil.hashrevisionsha1(text, p1, p2) != node
1567 1455
1568 1456 def _cachesegment(self, offset, data):
1569 1457 """Add a segment to the revlog cache.
1570 1458
1571 1459 Accepts an absolute offset and the data that is at that location.
1572 1460 """
1573 1461 o, d = self._chunkcache
1574 1462 # try to add to existing cache
1575 1463 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 1464 self._chunkcache = o, d + data
1577 1465 else:
1578 1466 self._chunkcache = offset, data
1579 1467
1580 1468 def _readsegment(self, offset, length, df=None):
1581 1469 """Load a segment of raw data from the revlog.
1582 1470
1583 1471 Accepts an absolute offset, length to read, and an optional existing
1584 1472 file handle to read from.
1585 1473
1586 1474 If an existing file handle is passed, it will be seeked and the
1587 1475 original seek position will NOT be restored.
1588 1476
1589 1477 Returns a str or buffer of raw byte data.
1590 1478
1591 1479 Raises if the requested number of bytes could not be read.
1592 1480 """
1593 1481 # Cache data both forward and backward around the requested
1594 1482 # data, in a fixed size window. This helps speed up operations
1595 1483 # involving reading the revlog backwards.
1596 1484 cachesize = self._chunkcachesize
1597 1485 realoffset = offset & ~(cachesize - 1)
1598 1486 reallength = (
1599 1487 (offset + length + cachesize) & ~(cachesize - 1)
1600 1488 ) - realoffset
1601 1489 with self._datareadfp(df) as df:
1602 1490 df.seek(realoffset)
1603 1491 d = df.read(reallength)
1604 1492
1605 1493 self._cachesegment(realoffset, d)
1606 1494 if offset != realoffset or reallength != length:
1607 1495 startoffset = offset - realoffset
1608 1496 if len(d) - startoffset < length:
1609 1497 raise error.RevlogError(
1610 1498 _(
1611 1499 b'partial read of revlog %s; expected %d bytes from '
1612 1500 b'offset %d, got %d'
1613 1501 )
1614 1502 % (
1615 1503 self.indexfile if self._inline else self.datafile,
1616 1504 length,
1617 1505 realoffset,
1618 1506 len(d) - startoffset,
1619 1507 )
1620 1508 )
1621 1509
1622 1510 return util.buffer(d, startoffset, length)
1623 1511
1624 1512 if len(d) < length:
1625 1513 raise error.RevlogError(
1626 1514 _(
1627 1515 b'partial read of revlog %s; expected %d bytes from offset '
1628 1516 b'%d, got %d'
1629 1517 )
1630 1518 % (
1631 1519 self.indexfile if self._inline else self.datafile,
1632 1520 length,
1633 1521 offset,
1634 1522 len(d),
1635 1523 )
1636 1524 )
1637 1525
1638 1526 return d
1639 1527
1640 1528 def _getsegment(self, offset, length, df=None):
1641 1529 """Obtain a segment of raw data from the revlog.
1642 1530
1643 1531 Accepts an absolute offset, length of bytes to obtain, and an
1644 1532 optional file handle to the already-opened revlog. If the file
1645 1533 handle is used, it's original seek position will not be preserved.
1646 1534
1647 1535 Requests for data may be returned from a cache.
1648 1536
1649 1537 Returns a str or a buffer instance of raw byte data.
1650 1538 """
1651 1539 o, d = self._chunkcache
1652 1540 l = len(d)
1653 1541
1654 1542 # is it in the cache?
1655 1543 cachestart = offset - o
1656 1544 cacheend = cachestart + length
1657 1545 if cachestart >= 0 and cacheend <= l:
1658 1546 if cachestart == 0 and cacheend == l:
1659 1547 return d # avoid a copy
1660 1548 return util.buffer(d, cachestart, cacheend - cachestart)
1661 1549
1662 1550 return self._readsegment(offset, length, df=df)
1663 1551
1664 1552 def _getsegmentforrevs(self, startrev, endrev, df=None):
1665 1553 """Obtain a segment of raw data corresponding to a range of revisions.
1666 1554
1667 1555 Accepts the start and end revisions and an optional already-open
1668 1556 file handle to be used for reading. If the file handle is read, its
1669 1557 seek position will not be preserved.
1670 1558
1671 1559 Requests for data may be satisfied by a cache.
1672 1560
1673 1561 Returns a 2-tuple of (offset, data) for the requested range of
1674 1562 revisions. Offset is the integer offset from the beginning of the
1675 1563 revlog and data is a str or buffer of the raw byte data.
1676 1564
1677 1565 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1678 1566 to determine where each revision's data begins and ends.
1679 1567 """
1680 1568 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1681 1569 # (functions are expensive).
1682 1570 index = self.index
1683 1571 istart = index[startrev]
1684 1572 start = int(istart[0] >> 16)
1685 1573 if startrev == endrev:
1686 1574 end = start + istart[1]
1687 1575 else:
1688 1576 iend = index[endrev]
1689 1577 end = int(iend[0] >> 16) + iend[1]
1690 1578
1691 1579 if self._inline:
1692 1580 start += (startrev + 1) * self.index.entry_size
1693 1581 end += (endrev + 1) * self.index.entry_size
1694 1582 length = end - start
1695 1583
1696 1584 return start, self._getsegment(start, length, df=df)
1697 1585
1698 1586 def _chunk(self, rev, df=None):
1699 1587 """Obtain a single decompressed chunk for a revision.
1700 1588
1701 1589 Accepts an integer revision and an optional already-open file handle
1702 1590 to be used for reading. If used, the seek position of the file will not
1703 1591 be preserved.
1704 1592
1705 1593 Returns a str holding uncompressed data for the requested revision.
1706 1594 """
1707 1595 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1708 1596
1709 1597 def _chunks(self, revs, df=None, targetsize=None):
1710 1598 """Obtain decompressed chunks for the specified revisions.
1711 1599
1712 1600 Accepts an iterable of numeric revisions that are assumed to be in
1713 1601 ascending order. Also accepts an optional already-open file handle
1714 1602 to be used for reading. If used, the seek position of the file will
1715 1603 not be preserved.
1716 1604
1717 1605 This function is similar to calling ``self._chunk()`` multiple times,
1718 1606 but is faster.
1719 1607
1720 1608 Returns a list with decompressed data for each requested revision.
1721 1609 """
1722 1610 if not revs:
1723 1611 return []
1724 1612 start = self.start
1725 1613 length = self.length
1726 1614 inline = self._inline
1727 1615 iosize = self.index.entry_size
1728 1616 buffer = util.buffer
1729 1617
1730 1618 l = []
1731 1619 ladd = l.append
1732 1620
1733 1621 if not self._withsparseread:
1734 1622 slicedchunks = (revs,)
1735 1623 else:
1736 1624 slicedchunks = deltautil.slicechunk(
1737 1625 self, revs, targetsize=targetsize
1738 1626 )
1739 1627
1740 1628 for revschunk in slicedchunks:
1741 1629 firstrev = revschunk[0]
1742 1630 # Skip trailing revisions with empty diff
1743 1631 for lastrev in revschunk[::-1]:
1744 1632 if length(lastrev) != 0:
1745 1633 break
1746 1634
1747 1635 try:
1748 1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1749 1637 except OverflowError:
1750 1638 # issue4215 - we can't cache a run of chunks greater than
1751 1639 # 2G on Windows
1752 1640 return [self._chunk(rev, df=df) for rev in revschunk]
1753 1641
1754 1642 decomp = self.decompress
1755 1643 for rev in revschunk:
1756 1644 chunkstart = start(rev)
1757 1645 if inline:
1758 1646 chunkstart += (rev + 1) * iosize
1759 1647 chunklength = length(rev)
1760 1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1761 1649
1762 1650 return l
1763 1651
1764 1652 def _chunkclear(self):
1765 1653 """Clear the raw chunk cache."""
1766 1654 self._chunkcache = (0, b'')
1767 1655
1768 1656 def deltaparent(self, rev):
1769 1657 """return deltaparent of the given revision"""
1770 1658 base = self.index[rev][3]
1771 1659 if base == rev:
1772 1660 return nullrev
1773 1661 elif self._generaldelta:
1774 1662 return base
1775 1663 else:
1776 1664 return rev - 1
1777 1665
1778 1666 def issnapshot(self, rev):
1779 1667 """tells whether rev is a snapshot"""
1780 1668 if not self._sparserevlog:
1781 1669 return self.deltaparent(rev) == nullrev
1782 1670 elif util.safehasattr(self.index, b'issnapshot'):
1783 1671 # directly assign the method to cache the testing and access
1784 1672 self.issnapshot = self.index.issnapshot
1785 1673 return self.issnapshot(rev)
1786 1674 if rev == nullrev:
1787 1675 return True
1788 1676 entry = self.index[rev]
1789 1677 base = entry[3]
1790 1678 if base == rev:
1791 1679 return True
1792 1680 if base == nullrev:
1793 1681 return True
1794 1682 p1 = entry[5]
1795 1683 p2 = entry[6]
1796 1684 if base == p1 or base == p2:
1797 1685 return False
1798 1686 return self.issnapshot(base)
1799 1687
1800 1688 def snapshotdepth(self, rev):
1801 1689 """number of snapshot in the chain before this one"""
1802 1690 if not self.issnapshot(rev):
1803 1691 raise error.ProgrammingError(b'revision %d not a snapshot')
1804 1692 return len(self._deltachain(rev)[0]) - 1
1805 1693
1806 1694 def revdiff(self, rev1, rev2):
1807 1695 """return or calculate a delta between two revisions
1808 1696
1809 1697 The delta calculated is in binary form and is intended to be written to
1810 1698 revlog data directly. So this function needs raw revision data.
1811 1699 """
1812 1700 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1813 1701 return bytes(self._chunk(rev2))
1814 1702
1815 1703 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1816 1704
1817 1705 def _processflags(self, text, flags, operation, raw=False):
1818 1706 """deprecated entry point to access flag processors"""
1819 1707 msg = b'_processflag(...) use the specialized variant'
1820 1708 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1821 1709 if raw:
1822 1710 return text, flagutil.processflagsraw(self, text, flags)
1823 1711 elif operation == b'read':
1824 1712 return flagutil.processflagsread(self, text, flags)
1825 1713 else: # write operation
1826 1714 return flagutil.processflagswrite(self, text, flags)
1827 1715
1828 1716 def revision(self, nodeorrev, _df=None, raw=False):
1829 1717 """return an uncompressed revision of a given node or revision
1830 1718 number.
1831 1719
1832 1720 _df - an existing file handle to read from. (internal-only)
1833 1721 raw - an optional argument specifying if the revision data is to be
1834 1722 treated as raw data when applying flag transforms. 'raw' should be set
1835 1723 to True when generating changegroups or in debug commands.
1836 1724 """
1837 1725 if raw:
1838 1726 msg = (
1839 1727 b'revlog.revision(..., raw=True) is deprecated, '
1840 1728 b'use revlog.rawdata(...)'
1841 1729 )
1842 1730 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 1731 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1844 1732
1845 1733 def sidedata(self, nodeorrev, _df=None):
1846 1734 """a map of extra data related to the changeset but not part of the hash
1847 1735
1848 1736 This function currently return a dictionary. However, more advanced
1849 1737 mapping object will likely be used in the future for a more
1850 1738 efficient/lazy code.
1851 1739 """
1852 1740 return self._revisiondata(nodeorrev, _df)[1]
1853 1741
1854 1742 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1855 1743 # deal with <nodeorrev> argument type
1856 1744 if isinstance(nodeorrev, int):
1857 1745 rev = nodeorrev
1858 1746 node = self.node(rev)
1859 1747 else:
1860 1748 node = nodeorrev
1861 1749 rev = None
1862 1750
1863 1751 # fast path the special `nullid` rev
1864 1752 if node == self.nullid:
1865 1753 return b"", {}
1866 1754
1867 1755 # ``rawtext`` is the text as stored inside the revlog. Might be the
1868 1756 # revision or might need to be processed to retrieve the revision.
1869 1757 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1870 1758
1871 1759 if self.version & 0xFFFF == REVLOGV2:
1872 1760 if rev is None:
1873 1761 rev = self.rev(node)
1874 1762 sidedata = self._sidedata(rev)
1875 1763 else:
1876 1764 sidedata = {}
1877 1765
1878 1766 if raw and validated:
1879 1767 # if we don't want to process the raw text and that raw
1880 1768 # text is cached, we can exit early.
1881 1769 return rawtext, sidedata
1882 1770 if rev is None:
1883 1771 rev = self.rev(node)
1884 1772 # the revlog's flag for this revision
1885 1773 # (usually alter its state or content)
1886 1774 flags = self.flags(rev)
1887 1775
1888 1776 if validated and flags == REVIDX_DEFAULT_FLAGS:
1889 1777 # no extra flags set, no flag processor runs, text = rawtext
1890 1778 return rawtext, sidedata
1891 1779
1892 1780 if raw:
1893 1781 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1894 1782 text = rawtext
1895 1783 else:
1896 1784 r = flagutil.processflagsread(self, rawtext, flags)
1897 1785 text, validatehash = r
1898 1786 if validatehash:
1899 1787 self.checkhash(text, node, rev=rev)
1900 1788 if not validated:
1901 1789 self._revisioncache = (node, rev, rawtext)
1902 1790
1903 1791 return text, sidedata
1904 1792
1905 1793 def _rawtext(self, node, rev, _df=None):
1906 1794 """return the possibly unvalidated rawtext for a revision
1907 1795
1908 1796 returns (rev, rawtext, validated)
1909 1797 """
1910 1798
1911 1799 # revision in the cache (could be useful to apply delta)
1912 1800 cachedrev = None
1913 1801 # An intermediate text to apply deltas to
1914 1802 basetext = None
1915 1803
1916 1804 # Check if we have the entry in cache
1917 1805 # The cache entry looks like (node, rev, rawtext)
1918 1806 if self._revisioncache:
1919 1807 if self._revisioncache[0] == node:
1920 1808 return (rev, self._revisioncache[2], True)
1921 1809 cachedrev = self._revisioncache[1]
1922 1810
1923 1811 if rev is None:
1924 1812 rev = self.rev(node)
1925 1813
1926 1814 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1927 1815 if stopped:
1928 1816 basetext = self._revisioncache[2]
1929 1817
1930 1818 # drop cache to save memory, the caller is expected to
1931 1819 # update self._revisioncache after validating the text
1932 1820 self._revisioncache = None
1933 1821
1934 1822 targetsize = None
1935 1823 rawsize = self.index[rev][2]
1936 1824 if 0 <= rawsize:
1937 1825 targetsize = 4 * rawsize
1938 1826
1939 1827 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1940 1828 if basetext is None:
1941 1829 basetext = bytes(bins[0])
1942 1830 bins = bins[1:]
1943 1831
1944 1832 rawtext = mdiff.patches(basetext, bins)
1945 1833 del basetext # let us have a chance to free memory early
1946 1834 return (rev, rawtext, False)
1947 1835
1948 1836 def _sidedata(self, rev):
1949 1837 """Return the sidedata for a given revision number."""
1950 1838 index_entry = self.index[rev]
1951 1839 sidedata_offset = index_entry[8]
1952 1840 sidedata_size = index_entry[9]
1953 1841
1954 1842 if self._inline:
1955 1843 sidedata_offset += self.index.entry_size * (1 + rev)
1956 1844 if sidedata_size == 0:
1957 1845 return {}
1958 1846
1959 1847 segment = self._getsegment(sidedata_offset, sidedata_size)
1960 1848 sidedata = sidedatautil.deserialize_sidedata(segment)
1961 1849 return sidedata
1962 1850
1963 1851 def rawdata(self, nodeorrev, _df=None):
1964 1852 """return an uncompressed raw data of a given node or revision number.
1965 1853
1966 1854 _df - an existing file handle to read from. (internal-only)
1967 1855 """
1968 1856 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1969 1857
1970 1858 def hash(self, text, p1, p2):
1971 1859 """Compute a node hash.
1972 1860
1973 1861 Available as a function so that subclasses can replace the hash
1974 1862 as needed.
1975 1863 """
1976 1864 return storageutil.hashrevisionsha1(text, p1, p2)
1977 1865
1978 1866 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1979 1867 """Check node hash integrity.
1980 1868
1981 1869 Available as a function so that subclasses can extend hash mismatch
1982 1870 behaviors as needed.
1983 1871 """
1984 1872 try:
1985 1873 if p1 is None and p2 is None:
1986 1874 p1, p2 = self.parents(node)
1987 1875 if node != self.hash(text, p1, p2):
1988 1876 # Clear the revision cache on hash failure. The revision cache
1989 1877 # only stores the raw revision and clearing the cache does have
1990 1878 # the side-effect that we won't have a cache hit when the raw
1991 1879 # revision data is accessed. But this case should be rare and
1992 1880 # it is extra work to teach the cache about the hash
1993 1881 # verification state.
1994 1882 if self._revisioncache and self._revisioncache[0] == node:
1995 1883 self._revisioncache = None
1996 1884
1997 1885 revornode = rev
1998 1886 if revornode is None:
1999 1887 revornode = templatefilters.short(hex(node))
2000 1888 raise error.RevlogError(
2001 1889 _(b"integrity check failed on %s:%s")
2002 1890 % (self.indexfile, pycompat.bytestr(revornode))
2003 1891 )
2004 1892 except error.RevlogError:
2005 1893 if self._censorable and storageutil.iscensoredtext(text):
2006 1894 raise error.CensoredNodeError(self.indexfile, node, text)
2007 1895 raise
2008 1896
2009 1897 def _enforceinlinesize(self, tr, fp=None):
2010 1898 """Check if the revlog is too big for inline and convert if so.
2011 1899
2012 1900 This should be called after revisions are added to the revlog. If the
2013 1901 revlog has grown too large to be an inline revlog, it will convert it
2014 1902 to use multiple index and data files.
2015 1903 """
2016 1904 tiprev = len(self) - 1
2017 1905 if (
2018 1906 not self._inline
2019 1907 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2020 1908 ):
2021 1909 return
2022 1910
2023 1911 troffset = tr.findoffset(self.indexfile)
2024 1912 if troffset is None:
2025 1913 raise error.RevlogError(
2026 1914 _(b"%s not found in the transaction") % self.indexfile
2027 1915 )
2028 1916 trindex = 0
2029 1917 tr.add(self.datafile, 0)
2030 1918
2031 1919 if fp:
2032 1920 fp.flush()
2033 1921 fp.close()
2034 1922 # We can't use the cached file handle after close(). So prevent
2035 1923 # its usage.
2036 1924 self._writinghandles = None
2037 1925
2038 1926 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2039 1927 for r in self:
2040 1928 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2041 1929 if troffset <= self.start(r):
2042 1930 trindex = r
2043 1931
2044 1932 with self._indexfp(b'w') as fp:
2045 1933 self.version &= ~FLAG_INLINE_DATA
2046 1934 self._inline = False
2047 1935 for i in self:
2048 1936 e = self.index.entry_binary(i)
2049 1937 if i == 0:
2050 1938 header = self.index.pack_header(self.version)
2051 1939 e = header + e
2052 1940 fp.write(e)
2053 1941
2054 1942 # the temp file replace the real index when we exit the context
2055 1943 # manager
2056 1944
2057 1945 tr.replace(self.indexfile, trindex * self.index.entry_size)
2058 1946 nodemaputil.setup_persistent_nodemap(tr, self)
2059 1947 self._chunkclear()
2060 1948
2061 1949 def _nodeduplicatecallback(self, transaction, node):
2062 1950 """called when trying to add a node already stored."""
2063 1951
2064 1952 def addrevision(
2065 1953 self,
2066 1954 text,
2067 1955 transaction,
2068 1956 link,
2069 1957 p1,
2070 1958 p2,
2071 1959 cachedelta=None,
2072 1960 node=None,
2073 1961 flags=REVIDX_DEFAULT_FLAGS,
2074 1962 deltacomputer=None,
2075 1963 sidedata=None,
2076 1964 ):
2077 1965 """add a revision to the log
2078 1966
2079 1967 text - the revision data to add
2080 1968 transaction - the transaction object used for rollback
2081 1969 link - the linkrev data to add
2082 1970 p1, p2 - the parent nodeids of the revision
2083 1971 cachedelta - an optional precomputed delta
2084 1972 node - nodeid of revision; typically node is not specified, and it is
2085 1973 computed by default as hash(text, p1, p2), however subclasses might
2086 1974 use different hashing method (and override checkhash() in such case)
2087 1975 flags - the known flags to set on the revision
2088 1976 deltacomputer - an optional deltacomputer instance shared between
2089 1977 multiple calls
2090 1978 """
2091 1979 if link == nullrev:
2092 1980 raise error.RevlogError(
2093 1981 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2094 1982 )
2095 1983
2096 1984 if sidedata is None:
2097 1985 sidedata = {}
2098 1986 elif not self.hassidedata:
2099 1987 raise error.ProgrammingError(
2100 1988 _(b"trying to add sidedata to a revlog who don't support them")
2101 1989 )
2102 1990
2103 1991 if flags:
2104 1992 node = node or self.hash(text, p1, p2)
2105 1993
2106 1994 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2107 1995
2108 1996 # If the flag processor modifies the revision data, ignore any provided
2109 1997 # cachedelta.
2110 1998 if rawtext != text:
2111 1999 cachedelta = None
2112 2000
2113 2001 if len(rawtext) > _maxentrysize:
2114 2002 raise error.RevlogError(
2115 2003 _(
2116 2004 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2117 2005 )
2118 2006 % (self.indexfile, len(rawtext))
2119 2007 )
2120 2008
2121 2009 node = node or self.hash(rawtext, p1, p2)
2122 2010 rev = self.index.get_rev(node)
2123 2011 if rev is not None:
2124 2012 return rev
2125 2013
2126 2014 if validatehash:
2127 2015 self.checkhash(rawtext, node, p1=p1, p2=p2)
2128 2016
2129 2017 return self.addrawrevision(
2130 2018 rawtext,
2131 2019 transaction,
2132 2020 link,
2133 2021 p1,
2134 2022 p2,
2135 2023 node,
2136 2024 flags,
2137 2025 cachedelta=cachedelta,
2138 2026 deltacomputer=deltacomputer,
2139 2027 sidedata=sidedata,
2140 2028 )
2141 2029
2142 2030 def addrawrevision(
2143 2031 self,
2144 2032 rawtext,
2145 2033 transaction,
2146 2034 link,
2147 2035 p1,
2148 2036 p2,
2149 2037 node,
2150 2038 flags,
2151 2039 cachedelta=None,
2152 2040 deltacomputer=None,
2153 2041 sidedata=None,
2154 2042 ):
2155 2043 """add a raw revision with known flags, node and parents
2156 2044 useful when reusing a revision not stored in this revlog (ex: received
2157 2045 over wire, or read from an external bundle).
2158 2046 """
2159 2047 dfh = None
2160 2048 if not self._inline:
2161 2049 dfh = self._datafp(b"a+")
2162 2050 ifh = self._indexfp(b"a+")
2163 2051 try:
2164 2052 return self._addrevision(
2165 2053 node,
2166 2054 rawtext,
2167 2055 transaction,
2168 2056 link,
2169 2057 p1,
2170 2058 p2,
2171 2059 flags,
2172 2060 cachedelta,
2173 2061 ifh,
2174 2062 dfh,
2175 2063 deltacomputer=deltacomputer,
2176 2064 sidedata=sidedata,
2177 2065 )
2178 2066 finally:
2179 2067 if dfh:
2180 2068 dfh.close()
2181 2069 ifh.close()
2182 2070
2183 2071 def compress(self, data):
2184 2072 """Generate a possibly-compressed representation of data."""
2185 2073 if not data:
2186 2074 return b'', data
2187 2075
2188 2076 compressed = self._compressor.compress(data)
2189 2077
2190 2078 if compressed:
2191 2079 # The revlog compressor added the header in the returned data.
2192 2080 return b'', compressed
2193 2081
2194 2082 if data[0:1] == b'\0':
2195 2083 return b'', data
2196 2084 return b'u', data
2197 2085
2198 2086 def decompress(self, data):
2199 2087 """Decompress a revlog chunk.
2200 2088
2201 2089 The chunk is expected to begin with a header identifying the
2202 2090 format type so it can be routed to an appropriate decompressor.
2203 2091 """
2204 2092 if not data:
2205 2093 return data
2206 2094
2207 2095 # Revlogs are read much more frequently than they are written and many
2208 2096 # chunks only take microseconds to decompress, so performance is
2209 2097 # important here.
2210 2098 #
2211 2099 # We can make a few assumptions about revlogs:
2212 2100 #
2213 2101 # 1) the majority of chunks will be compressed (as opposed to inline
2214 2102 # raw data).
2215 2103 # 2) decompressing *any* data will likely by at least 10x slower than
2216 2104 # returning raw inline data.
2217 2105 # 3) we want to prioritize common and officially supported compression
2218 2106 # engines
2219 2107 #
2220 2108 # It follows that we want to optimize for "decompress compressed data
2221 2109 # when encoded with common and officially supported compression engines"
2222 2110 # case over "raw data" and "data encoded by less common or non-official
2223 2111 # compression engines." That is why we have the inline lookup first
2224 2112 # followed by the compengines lookup.
2225 2113 #
2226 2114 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2227 2115 # compressed chunks. And this matters for changelog and manifest reads.
2228 2116 t = data[0:1]
2229 2117
2230 2118 if t == b'x':
2231 2119 try:
2232 2120 return _zlibdecompress(data)
2233 2121 except zlib.error as e:
2234 2122 raise error.RevlogError(
2235 2123 _(b'revlog decompress error: %s')
2236 2124 % stringutil.forcebytestr(e)
2237 2125 )
2238 2126 # '\0' is more common than 'u' so it goes first.
2239 2127 elif t == b'\0':
2240 2128 return data
2241 2129 elif t == b'u':
2242 2130 return util.buffer(data, 1)
2243 2131
2244 2132 try:
2245 2133 compressor = self._decompressors[t]
2246 2134 except KeyError:
2247 2135 try:
2248 2136 engine = util.compengines.forrevlogheader(t)
2249 2137 compressor = engine.revlogcompressor(self._compengineopts)
2250 2138 self._decompressors[t] = compressor
2251 2139 except KeyError:
2252 2140 raise error.RevlogError(
2253 2141 _(b'unknown compression type %s') % binascii.hexlify(t)
2254 2142 )
2255 2143
2256 2144 return compressor.decompress(data)
2257 2145
2258 2146 def _addrevision(
2259 2147 self,
2260 2148 node,
2261 2149 rawtext,
2262 2150 transaction,
2263 2151 link,
2264 2152 p1,
2265 2153 p2,
2266 2154 flags,
2267 2155 cachedelta,
2268 2156 ifh,
2269 2157 dfh,
2270 2158 alwayscache=False,
2271 2159 deltacomputer=None,
2272 2160 sidedata=None,
2273 2161 ):
2274 2162 """internal function to add revisions to the log
2275 2163
2276 2164 see addrevision for argument descriptions.
2277 2165
2278 2166 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2279 2167
2280 2168 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2281 2169 be used.
2282 2170
2283 2171 invariants:
2284 2172 - rawtext is optional (can be None); if not set, cachedelta must be set.
2285 2173 if both are set, they must correspond to each other.
2286 2174 """
2287 2175 if node == self.nullid:
2288 2176 raise error.RevlogError(
2289 2177 _(b"%s: attempt to add null revision") % self.indexfile
2290 2178 )
2291 2179 if (
2292 2180 node == self.nodeconstants.wdirid
2293 2181 or node in self.nodeconstants.wdirfilenodeids
2294 2182 ):
2295 2183 raise error.RevlogError(
2296 2184 _(b"%s: attempt to add wdir revision") % self.indexfile
2297 2185 )
2298 2186
2299 2187 if self._inline:
2300 2188 fh = ifh
2301 2189 else:
2302 2190 fh = dfh
2303 2191
2304 2192 btext = [rawtext]
2305 2193
2306 2194 curr = len(self)
2307 2195 prev = curr - 1
2308 2196
2309 2197 offset = self._get_data_offset(prev)
2310 2198
2311 2199 if self._concurrencychecker:
2312 2200 if self._inline:
2313 2201 # offset is "as if" it were in the .d file, so we need to add on
2314 2202 # the size of the entry metadata.
2315 2203 self._concurrencychecker(
2316 2204 ifh, self.indexfile, offset + curr * self.index.entry_size
2317 2205 )
2318 2206 else:
2319 2207 # Entries in the .i are a consistent size.
2320 2208 self._concurrencychecker(
2321 2209 ifh, self.indexfile, curr * self.index.entry_size
2322 2210 )
2323 2211 self._concurrencychecker(dfh, self.datafile, offset)
2324 2212
2325 2213 p1r, p2r = self.rev(p1), self.rev(p2)
2326 2214
2327 2215 # full versions are inserted when the needed deltas
2328 2216 # become comparable to the uncompressed text
2329 2217 if rawtext is None:
2330 2218 # need rawtext size, before changed by flag processors, which is
2331 2219 # the non-raw size. use revlog explicitly to avoid filelog's extra
2332 2220 # logic that might remove metadata size.
2333 2221 textlen = mdiff.patchedsize(
2334 2222 revlog.size(self, cachedelta[0]), cachedelta[1]
2335 2223 )
2336 2224 else:
2337 2225 textlen = len(rawtext)
2338 2226
2339 2227 if deltacomputer is None:
2340 2228 deltacomputer = deltautil.deltacomputer(self)
2341 2229
2342 2230 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2343 2231
2344 2232 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2345 2233
2346 2234 if sidedata:
2347 2235 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2348 2236 sidedata_offset = offset + deltainfo.deltalen
2349 2237 else:
2350 2238 serialized_sidedata = b""
2351 2239 # Don't store the offset if the sidedata is empty, that way
2352 2240 # we can easily detect empty sidedata and they will be no different
2353 2241 # than ones we manually add.
2354 2242 sidedata_offset = 0
2355 2243
2356 2244 e = (
2357 2245 offset_type(offset, flags),
2358 2246 deltainfo.deltalen,
2359 2247 textlen,
2360 2248 deltainfo.base,
2361 2249 link,
2362 2250 p1r,
2363 2251 p2r,
2364 2252 node,
2365 2253 sidedata_offset,
2366 2254 len(serialized_sidedata),
2367 2255 )
2368 2256
2369 2257 if self.version & 0xFFFF != REVLOGV2:
2370 2258 e = e[:8]
2371 2259
2372 2260 self.index.append(e)
2373 2261 entry = self.index.entry_binary(curr)
2374 2262 if curr == 0:
2375 2263 header = self.index.pack_header(self.version)
2376 2264 entry = header + entry
2377 2265 self._writeentry(
2378 2266 transaction,
2379 2267 ifh,
2380 2268 dfh,
2381 2269 entry,
2382 2270 deltainfo.data,
2383 2271 link,
2384 2272 offset,
2385 2273 serialized_sidedata,
2386 2274 )
2387 2275
2388 2276 rawtext = btext[0]
2389 2277
2390 2278 if alwayscache and rawtext is None:
2391 2279 rawtext = deltacomputer.buildtext(revinfo, fh)
2392 2280
2393 2281 if type(rawtext) == bytes: # only accept immutable objects
2394 2282 self._revisioncache = (node, curr, rawtext)
2395 2283 self._chainbasecache[curr] = deltainfo.chainbase
2396 2284 return curr
2397 2285
2398 2286 def _get_data_offset(self, prev):
2399 2287 """Returns the current offset in the (in-transaction) data file.
2400 2288 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2401 2289 file to store that information: since sidedata can be rewritten to the
2402 2290 end of the data file within a transaction, you can have cases where, for
2403 2291 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2404 2292 to `n - 1`'s sidedata being written after `n`'s data.
2405 2293
2406 2294 TODO cache this in a docket file before getting out of experimental."""
2407 2295 if self.version & 0xFFFF != REVLOGV2:
2408 2296 return self.end(prev)
2409 2297
2410 2298 offset = 0
2411 2299 for rev, entry in enumerate(self.index):
2412 2300 sidedata_end = entry[8] + entry[9]
2413 2301 # Sidedata for a previous rev has potentially been written after
2414 2302 # this rev's end, so take the max.
2415 2303 offset = max(self.end(rev), offset, sidedata_end)
2416 2304 return offset
2417 2305
2418 2306 def _writeentry(
2419 2307 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2420 2308 ):
2421 2309 # Files opened in a+ mode have inconsistent behavior on various
2422 2310 # platforms. Windows requires that a file positioning call be made
2423 2311 # when the file handle transitions between reads and writes. See
2424 2312 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2425 2313 # platforms, Python or the platform itself can be buggy. Some versions
2426 2314 # of Solaris have been observed to not append at the end of the file
2427 2315 # if the file was seeked to before the end. See issue4943 for more.
2428 2316 #
2429 2317 # We work around this issue by inserting a seek() before writing.
2430 2318 # Note: This is likely not necessary on Python 3. However, because
2431 2319 # the file handle is reused for reads and may be seeked there, we need
2432 2320 # to be careful before changing this.
2433 2321 ifh.seek(0, os.SEEK_END)
2434 2322 if dfh:
2435 2323 dfh.seek(0, os.SEEK_END)
2436 2324
2437 2325 curr = len(self) - 1
2438 2326 if not self._inline:
2439 2327 transaction.add(self.datafile, offset)
2440 2328 transaction.add(self.indexfile, curr * len(entry))
2441 2329 if data[0]:
2442 2330 dfh.write(data[0])
2443 2331 dfh.write(data[1])
2444 2332 if sidedata:
2445 2333 dfh.write(sidedata)
2446 2334 ifh.write(entry)
2447 2335 else:
2448 2336 offset += curr * self.index.entry_size
2449 2337 transaction.add(self.indexfile, offset)
2450 2338 ifh.write(entry)
2451 2339 ifh.write(data[0])
2452 2340 ifh.write(data[1])
2453 2341 if sidedata:
2454 2342 ifh.write(sidedata)
2455 2343 self._enforceinlinesize(transaction, ifh)
2456 2344 nodemaputil.setup_persistent_nodemap(transaction, self)
2457 2345
2458 2346 def addgroup(
2459 2347 self,
2460 2348 deltas,
2461 2349 linkmapper,
2462 2350 transaction,
2463 2351 alwayscache=False,
2464 2352 addrevisioncb=None,
2465 2353 duplicaterevisioncb=None,
2466 2354 ):
2467 2355 """
2468 2356 add a delta group
2469 2357
2470 2358 given a set of deltas, add them to the revision log. the
2471 2359 first delta is against its parent, which should be in our
2472 2360 log, the rest are against the previous delta.
2473 2361
2474 2362 If ``addrevisioncb`` is defined, it will be called with arguments of
2475 2363 this revlog and the node that was added.
2476 2364 """
2477 2365
2478 2366 if self._writinghandles:
2479 2367 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2480 2368
2481 2369 r = len(self)
2482 2370 end = 0
2483 2371 if r:
2484 2372 end = self.end(r - 1)
2485 2373 ifh = self._indexfp(b"a+")
2486 2374 isize = r * self.index.entry_size
2487 2375 if self._inline:
2488 2376 transaction.add(self.indexfile, end + isize)
2489 2377 dfh = None
2490 2378 else:
2491 2379 transaction.add(self.indexfile, isize)
2492 2380 transaction.add(self.datafile, end)
2493 2381 dfh = self._datafp(b"a+")
2494 2382
2495 2383 def flush():
2496 2384 if dfh:
2497 2385 dfh.flush()
2498 2386 ifh.flush()
2499 2387
2500 2388 self._writinghandles = (ifh, dfh)
2501 2389 empty = True
2502 2390
2503 2391 try:
2504 2392 deltacomputer = deltautil.deltacomputer(self)
2505 2393 # loop through our set of deltas
2506 2394 for data in deltas:
2507 2395 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2508 2396 link = linkmapper(linknode)
2509 2397 flags = flags or REVIDX_DEFAULT_FLAGS
2510 2398
2511 2399 rev = self.index.get_rev(node)
2512 2400 if rev is not None:
2513 2401 # this can happen if two branches make the same change
2514 2402 self._nodeduplicatecallback(transaction, rev)
2515 2403 if duplicaterevisioncb:
2516 2404 duplicaterevisioncb(self, rev)
2517 2405 empty = False
2518 2406 continue
2519 2407
2520 2408 for p in (p1, p2):
2521 2409 if not self.index.has_node(p):
2522 2410 raise error.LookupError(
2523 2411 p, self.indexfile, _(b'unknown parent')
2524 2412 )
2525 2413
2526 2414 if not self.index.has_node(deltabase):
2527 2415 raise error.LookupError(
2528 2416 deltabase, self.indexfile, _(b'unknown delta base')
2529 2417 )
2530 2418
2531 2419 baserev = self.rev(deltabase)
2532 2420
2533 2421 if baserev != nullrev and self.iscensored(baserev):
2534 2422 # if base is censored, delta must be full replacement in a
2535 2423 # single patch operation
2536 2424 hlen = struct.calcsize(b">lll")
2537 2425 oldlen = self.rawsize(baserev)
2538 2426 newlen = len(delta) - hlen
2539 2427 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2540 2428 raise error.CensoredBaseError(
2541 2429 self.indexfile, self.node(baserev)
2542 2430 )
2543 2431
2544 2432 if not flags and self._peek_iscensored(baserev, delta, flush):
2545 2433 flags |= REVIDX_ISCENSORED
2546 2434
2547 2435 # We assume consumers of addrevisioncb will want to retrieve
2548 2436 # the added revision, which will require a call to
2549 2437 # revision(). revision() will fast path if there is a cache
2550 2438 # hit. So, we tell _addrevision() to always cache in this case.
2551 2439 # We're only using addgroup() in the context of changegroup
2552 2440 # generation so the revision data can always be handled as raw
2553 2441 # by the flagprocessor.
2554 2442 rev = self._addrevision(
2555 2443 node,
2556 2444 None,
2557 2445 transaction,
2558 2446 link,
2559 2447 p1,
2560 2448 p2,
2561 2449 flags,
2562 2450 (baserev, delta),
2563 2451 ifh,
2564 2452 dfh,
2565 2453 alwayscache=alwayscache,
2566 2454 deltacomputer=deltacomputer,
2567 2455 sidedata=sidedata,
2568 2456 )
2569 2457
2570 2458 if addrevisioncb:
2571 2459 addrevisioncb(self, rev)
2572 2460 empty = False
2573 2461
2574 2462 if not dfh and not self._inline:
2575 2463 # addrevision switched from inline to conventional
2576 2464 # reopen the index
2577 2465 ifh.close()
2578 2466 dfh = self._datafp(b"a+")
2579 2467 ifh = self._indexfp(b"a+")
2580 2468 self._writinghandles = (ifh, dfh)
2581 2469 finally:
2582 2470 self._writinghandles = None
2583 2471
2584 2472 if dfh:
2585 2473 dfh.close()
2586 2474 ifh.close()
2587 2475 return not empty
2588 2476
2589 2477 def iscensored(self, rev):
2590 2478 """Check if a file revision is censored."""
2591 2479 if not self._censorable:
2592 2480 return False
2593 2481
2594 2482 return self.flags(rev) & REVIDX_ISCENSORED
2595 2483
2596 2484 def _peek_iscensored(self, baserev, delta, flush):
2597 2485 """Quickly check if a delta produces a censored revision."""
2598 2486 if not self._censorable:
2599 2487 return False
2600 2488
2601 2489 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2602 2490
2603 2491 def getstrippoint(self, minlink):
2604 2492 """find the minimum rev that must be stripped to strip the linkrev
2605 2493
2606 2494 Returns a tuple containing the minimum rev and a set of all revs that
2607 2495 have linkrevs that will be broken by this strip.
2608 2496 """
2609 2497 return storageutil.resolvestripinfo(
2610 2498 minlink,
2611 2499 len(self) - 1,
2612 2500 self.headrevs(),
2613 2501 self.linkrev,
2614 2502 self.parentrevs,
2615 2503 )
2616 2504
2617 2505 def strip(self, minlink, transaction):
2618 2506 """truncate the revlog on the first revision with a linkrev >= minlink
2619 2507
2620 2508 This function is called when we're stripping revision minlink and
2621 2509 its descendants from the repository.
2622 2510
2623 2511 We have to remove all revisions with linkrev >= minlink, because
2624 2512 the equivalent changelog revisions will be renumbered after the
2625 2513 strip.
2626 2514
2627 2515 So we truncate the revlog on the first of these revisions, and
2628 2516 trust that the caller has saved the revisions that shouldn't be
2629 2517 removed and that it'll re-add them after this truncation.
2630 2518 """
2631 2519 if len(self) == 0:
2632 2520 return
2633 2521
2634 2522 rev, _ = self.getstrippoint(minlink)
2635 2523 if rev == len(self):
2636 2524 return
2637 2525
2638 2526 # first truncate the files on disk
2639 2527 end = self.start(rev)
2640 2528 if not self._inline:
2641 2529 transaction.add(self.datafile, end)
2642 2530 end = rev * self.index.entry_size
2643 2531 else:
2644 2532 end += rev * self.index.entry_size
2645 2533
2646 2534 transaction.add(self.indexfile, end)
2647 2535
2648 2536 # then reset internal state in memory to forget those revisions
2649 2537 self._revisioncache = None
2650 2538 self._chaininfocache = util.lrucachedict(500)
2651 2539 self._chunkclear()
2652 2540
2653 2541 del self.index[rev:-1]
2654 2542
2655 2543 def checksize(self):
2656 2544 """Check size of index and data files
2657 2545
2658 2546 return a (dd, di) tuple.
2659 2547 - dd: extra bytes for the "data" file
2660 2548 - di: extra bytes for the "index" file
2661 2549
2662 2550 A healthy revlog will return (0, 0).
2663 2551 """
2664 2552 expected = 0
2665 2553 if len(self):
2666 2554 expected = max(0, self.end(len(self) - 1))
2667 2555
2668 2556 try:
2669 2557 with self._datafp() as f:
2670 2558 f.seek(0, io.SEEK_END)
2671 2559 actual = f.tell()
2672 2560 dd = actual - expected
2673 2561 except IOError as inst:
2674 2562 if inst.errno != errno.ENOENT:
2675 2563 raise
2676 2564 dd = 0
2677 2565
2678 2566 try:
2679 2567 f = self.opener(self.indexfile)
2680 2568 f.seek(0, io.SEEK_END)
2681 2569 actual = f.tell()
2682 2570 f.close()
2683 2571 s = self.index.entry_size
2684 2572 i = max(0, actual // s)
2685 2573 di = actual - (i * s)
2686 2574 if self._inline:
2687 2575 databytes = 0
2688 2576 for r in self:
2689 2577 databytes += max(0, self.length(r))
2690 2578 dd = 0
2691 2579 di = actual - len(self) * s - databytes
2692 2580 except IOError as inst:
2693 2581 if inst.errno != errno.ENOENT:
2694 2582 raise
2695 2583 di = 0
2696 2584
2697 2585 return (dd, di)
2698 2586
2699 2587 def files(self):
2700 2588 res = [self.indexfile]
2701 2589 if not self._inline:
2702 2590 res.append(self.datafile)
2703 2591 return res
2704 2592
2705 2593 def emitrevisions(
2706 2594 self,
2707 2595 nodes,
2708 2596 nodesorder=None,
2709 2597 revisiondata=False,
2710 2598 assumehaveparentrevisions=False,
2711 2599 deltamode=repository.CG_DELTAMODE_STD,
2712 2600 sidedata_helpers=None,
2713 2601 ):
2714 2602 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2715 2603 raise error.ProgrammingError(
2716 2604 b'unhandled value for nodesorder: %s' % nodesorder
2717 2605 )
2718 2606
2719 2607 if nodesorder is None and not self._generaldelta:
2720 2608 nodesorder = b'storage'
2721 2609
2722 2610 if (
2723 2611 not self._storedeltachains
2724 2612 and deltamode != repository.CG_DELTAMODE_PREV
2725 2613 ):
2726 2614 deltamode = repository.CG_DELTAMODE_FULL
2727 2615
2728 2616 return storageutil.emitrevisions(
2729 2617 self,
2730 2618 nodes,
2731 2619 nodesorder,
2732 2620 revlogrevisiondelta,
2733 2621 deltaparentfn=self.deltaparent,
2734 2622 candeltafn=self.candelta,
2735 2623 rawsizefn=self.rawsize,
2736 2624 revdifffn=self.revdiff,
2737 2625 flagsfn=self.flags,
2738 2626 deltamode=deltamode,
2739 2627 revisiondata=revisiondata,
2740 2628 assumehaveparentrevisions=assumehaveparentrevisions,
2741 2629 sidedata_helpers=sidedata_helpers,
2742 2630 )
2743 2631
2744 2632 DELTAREUSEALWAYS = b'always'
2745 2633 DELTAREUSESAMEREVS = b'samerevs'
2746 2634 DELTAREUSENEVER = b'never'
2747 2635
2748 2636 DELTAREUSEFULLADD = b'fulladd'
2749 2637
2750 2638 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2751 2639
2752 2640 def clone(
2753 2641 self,
2754 2642 tr,
2755 2643 destrevlog,
2756 2644 addrevisioncb=None,
2757 2645 deltareuse=DELTAREUSESAMEREVS,
2758 2646 forcedeltabothparents=None,
2759 2647 sidedatacompanion=None,
2760 2648 ):
2761 2649 """Copy this revlog to another, possibly with format changes.
2762 2650
2763 2651 The destination revlog will contain the same revisions and nodes.
2764 2652 However, it may not be bit-for-bit identical due to e.g. delta encoding
2765 2653 differences.
2766 2654
2767 2655 The ``deltareuse`` argument control how deltas from the existing revlog
2768 2656 are preserved in the destination revlog. The argument can have the
2769 2657 following values:
2770 2658
2771 2659 DELTAREUSEALWAYS
2772 2660 Deltas will always be reused (if possible), even if the destination
2773 2661 revlog would not select the same revisions for the delta. This is the
2774 2662 fastest mode of operation.
2775 2663 DELTAREUSESAMEREVS
2776 2664 Deltas will be reused if the destination revlog would pick the same
2777 2665 revisions for the delta. This mode strikes a balance between speed
2778 2666 and optimization.
2779 2667 DELTAREUSENEVER
2780 2668 Deltas will never be reused. This is the slowest mode of execution.
2781 2669 This mode can be used to recompute deltas (e.g. if the diff/delta
2782 2670 algorithm changes).
2783 2671 DELTAREUSEFULLADD
2784 2672 Revision will be re-added as if their were new content. This is
2785 2673 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2786 2674 eg: large file detection and handling.
2787 2675
2788 2676 Delta computation can be slow, so the choice of delta reuse policy can
2789 2677 significantly affect run time.
2790 2678
2791 2679 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2792 2680 two extremes. Deltas will be reused if they are appropriate. But if the
2793 2681 delta could choose a better revision, it will do so. This means if you
2794 2682 are converting a non-generaldelta revlog to a generaldelta revlog,
2795 2683 deltas will be recomputed if the delta's parent isn't a parent of the
2796 2684 revision.
2797 2685
2798 2686 In addition to the delta policy, the ``forcedeltabothparents``
2799 2687 argument controls whether to force compute deltas against both parents
2800 2688 for merges. By default, the current default is used.
2801 2689
2802 2690 If not None, the `sidedatacompanion` is callable that accept two
2803 2691 arguments:
2804 2692
2805 2693 (srcrevlog, rev)
2806 2694
2807 2695 and return a quintet that control changes to sidedata content from the
2808 2696 old revision to the new clone result:
2809 2697
2810 2698 (dropall, filterout, update, new_flags, dropped_flags)
2811 2699
2812 2700 * if `dropall` is True, all sidedata should be dropped
2813 2701 * `filterout` is a set of sidedata keys that should be dropped
2814 2702 * `update` is a mapping of additionnal/new key -> value
2815 2703 * new_flags is a bitfields of new flags that the revision should get
2816 2704 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2817 2705 """
2818 2706 if deltareuse not in self.DELTAREUSEALL:
2819 2707 raise ValueError(
2820 2708 _(b'value for deltareuse invalid: %s') % deltareuse
2821 2709 )
2822 2710
2823 2711 if len(destrevlog):
2824 2712 raise ValueError(_(b'destination revlog is not empty'))
2825 2713
2826 2714 if getattr(self, 'filteredrevs', None):
2827 2715 raise ValueError(_(b'source revlog has filtered revisions'))
2828 2716 if getattr(destrevlog, 'filteredrevs', None):
2829 2717 raise ValueError(_(b'destination revlog has filtered revisions'))
2830 2718
2831 2719 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2832 2720 # if possible.
2833 2721 oldlazydelta = destrevlog._lazydelta
2834 2722 oldlazydeltabase = destrevlog._lazydeltabase
2835 2723 oldamd = destrevlog._deltabothparents
2836 2724
2837 2725 try:
2838 2726 if deltareuse == self.DELTAREUSEALWAYS:
2839 2727 destrevlog._lazydeltabase = True
2840 2728 destrevlog._lazydelta = True
2841 2729 elif deltareuse == self.DELTAREUSESAMEREVS:
2842 2730 destrevlog._lazydeltabase = False
2843 2731 destrevlog._lazydelta = True
2844 2732 elif deltareuse == self.DELTAREUSENEVER:
2845 2733 destrevlog._lazydeltabase = False
2846 2734 destrevlog._lazydelta = False
2847 2735
2848 2736 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2849 2737
2850 2738 self._clone(
2851 2739 tr,
2852 2740 destrevlog,
2853 2741 addrevisioncb,
2854 2742 deltareuse,
2855 2743 forcedeltabothparents,
2856 2744 sidedatacompanion,
2857 2745 )
2858 2746
2859 2747 finally:
2860 2748 destrevlog._lazydelta = oldlazydelta
2861 2749 destrevlog._lazydeltabase = oldlazydeltabase
2862 2750 destrevlog._deltabothparents = oldamd
2863 2751
2864 2752 def _clone(
2865 2753 self,
2866 2754 tr,
2867 2755 destrevlog,
2868 2756 addrevisioncb,
2869 2757 deltareuse,
2870 2758 forcedeltabothparents,
2871 2759 sidedatacompanion,
2872 2760 ):
2873 2761 """perform the core duty of `revlog.clone` after parameter processing"""
2874 2762 deltacomputer = deltautil.deltacomputer(destrevlog)
2875 2763 index = self.index
2876 2764 for rev in self:
2877 2765 entry = index[rev]
2878 2766
2879 2767 # Some classes override linkrev to take filtered revs into
2880 2768 # account. Use raw entry from index.
2881 2769 flags = entry[0] & 0xFFFF
2882 2770 linkrev = entry[4]
2883 2771 p1 = index[entry[5]][7]
2884 2772 p2 = index[entry[6]][7]
2885 2773 node = entry[7]
2886 2774
2887 2775 sidedataactions = (False, [], {}, 0, 0)
2888 2776 if sidedatacompanion is not None:
2889 2777 sidedataactions = sidedatacompanion(self, rev)
2890 2778
2891 2779 # (Possibly) reuse the delta from the revlog if allowed and
2892 2780 # the revlog chunk is a delta.
2893 2781 cachedelta = None
2894 2782 rawtext = None
2895 2783 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2896 2784 dropall = sidedataactions[0]
2897 2785 filterout = sidedataactions[1]
2898 2786 update = sidedataactions[2]
2899 2787 new_flags = sidedataactions[3]
2900 2788 dropped_flags = sidedataactions[4]
2901 2789 text, sidedata = self._revisiondata(rev)
2902 2790 if dropall:
2903 2791 sidedata = {}
2904 2792 for key in filterout:
2905 2793 sidedata.pop(key, None)
2906 2794 sidedata.update(update)
2907 2795 if not sidedata:
2908 2796 sidedata = None
2909 2797
2910 2798 flags |= new_flags
2911 2799 flags &= ~dropped_flags
2912 2800
2913 2801 destrevlog.addrevision(
2914 2802 text,
2915 2803 tr,
2916 2804 linkrev,
2917 2805 p1,
2918 2806 p2,
2919 2807 cachedelta=cachedelta,
2920 2808 node=node,
2921 2809 flags=flags,
2922 2810 deltacomputer=deltacomputer,
2923 2811 sidedata=sidedata,
2924 2812 )
2925 2813 else:
2926 2814 if destrevlog._lazydelta:
2927 2815 dp = self.deltaparent(rev)
2928 2816 if dp != nullrev:
2929 2817 cachedelta = (dp, bytes(self._chunk(rev)))
2930 2818
2931 2819 if not cachedelta:
2932 2820 rawtext = self.rawdata(rev)
2933 2821
2934 2822 ifh = destrevlog.opener(
2935 2823 destrevlog.indexfile, b'a+', checkambig=False
2936 2824 )
2937 2825 dfh = None
2938 2826 if not destrevlog._inline:
2939 2827 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2940 2828 try:
2941 2829 destrevlog._addrevision(
2942 2830 node,
2943 2831 rawtext,
2944 2832 tr,
2945 2833 linkrev,
2946 2834 p1,
2947 2835 p2,
2948 2836 flags,
2949 2837 cachedelta,
2950 2838 ifh,
2951 2839 dfh,
2952 2840 deltacomputer=deltacomputer,
2953 2841 )
2954 2842 finally:
2955 2843 if dfh:
2956 2844 dfh.close()
2957 2845 ifh.close()
2958 2846
2959 2847 if addrevisioncb:
2960 2848 addrevisioncb(self, rev, node)
2961 2849
2962 2850 def censorrevision(self, tr, censornode, tombstone=b''):
2963 2851 if (self.version & 0xFFFF) == REVLOGV0:
2964 2852 raise error.RevlogError(
2965 2853 _(b'cannot censor with version %d revlogs') % self.version
2966 2854 )
2967 2855
2968 2856 censorrev = self.rev(censornode)
2969 2857 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2970 2858
2971 2859 if len(tombstone) > self.rawsize(censorrev):
2972 2860 raise error.Abort(
2973 2861 _(b'censor tombstone must be no longer than censored data')
2974 2862 )
2975 2863
2976 2864 # Rewriting the revlog in place is hard. Our strategy for censoring is
2977 2865 # to create a new revlog, copy all revisions to it, then replace the
2978 2866 # revlogs on transaction close.
2979 2867
2980 2868 newindexfile = self.indexfile + b'.tmpcensored'
2981 2869 newdatafile = self.datafile + b'.tmpcensored'
2982 2870
2983 2871 # This is a bit dangerous. We could easily have a mismatch of state.
2984 2872 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2985 2873 newrl.version = self.version
2986 2874 newrl._generaldelta = self._generaldelta
2987 2875 newrl._parse_index = self._parse_index
2988 2876
2989 2877 for rev in self.revs():
2990 2878 node = self.node(rev)
2991 2879 p1, p2 = self.parents(node)
2992 2880
2993 2881 if rev == censorrev:
2994 2882 newrl.addrawrevision(
2995 2883 tombstone,
2996 2884 tr,
2997 2885 self.linkrev(censorrev),
2998 2886 p1,
2999 2887 p2,
3000 2888 censornode,
3001 2889 REVIDX_ISCENSORED,
3002 2890 )
3003 2891
3004 2892 if newrl.deltaparent(rev) != nullrev:
3005 2893 raise error.Abort(
3006 2894 _(
3007 2895 b'censored revision stored as delta; '
3008 2896 b'cannot censor'
3009 2897 ),
3010 2898 hint=_(
3011 2899 b'censoring of revlogs is not '
3012 2900 b'fully implemented; please report '
3013 2901 b'this bug'
3014 2902 ),
3015 2903 )
3016 2904 continue
3017 2905
3018 2906 if self.iscensored(rev):
3019 2907 if self.deltaparent(rev) != nullrev:
3020 2908 raise error.Abort(
3021 2909 _(
3022 2910 b'cannot censor due to censored '
3023 2911 b'revision having delta stored'
3024 2912 )
3025 2913 )
3026 2914 rawtext = self._chunk(rev)
3027 2915 else:
3028 2916 rawtext = self.rawdata(rev)
3029 2917
3030 2918 newrl.addrawrevision(
3031 2919 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3032 2920 )
3033 2921
3034 2922 tr.addbackup(self.indexfile, location=b'store')
3035 2923 if not self._inline:
3036 2924 tr.addbackup(self.datafile, location=b'store')
3037 2925
3038 2926 self.opener.rename(newrl.indexfile, self.indexfile)
3039 2927 if not self._inline:
3040 2928 self.opener.rename(newrl.datafile, self.datafile)
3041 2929
3042 2930 self.clearcaches()
3043 2931 self._loadindex()
3044 2932
3045 2933 def verifyintegrity(self, state):
3046 2934 """Verifies the integrity of the revlog.
3047 2935
3048 2936 Yields ``revlogproblem`` instances describing problems that are
3049 2937 found.
3050 2938 """
3051 2939 dd, di = self.checksize()
3052 2940 if dd:
3053 2941 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3054 2942 if di:
3055 2943 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3056 2944
3057 2945 version = self.version & 0xFFFF
3058 2946
3059 2947 # The verifier tells us what version revlog we should be.
3060 2948 if version != state[b'expectedversion']:
3061 2949 yield revlogproblem(
3062 2950 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3063 2951 % (self.indexfile, version, state[b'expectedversion'])
3064 2952 )
3065 2953
3066 2954 state[b'skipread'] = set()
3067 2955 state[b'safe_renamed'] = set()
3068 2956
3069 2957 for rev in self:
3070 2958 node = self.node(rev)
3071 2959
3072 2960 # Verify contents. 4 cases to care about:
3073 2961 #
3074 2962 # common: the most common case
3075 2963 # rename: with a rename
3076 2964 # meta: file content starts with b'\1\n', the metadata
3077 2965 # header defined in filelog.py, but without a rename
3078 2966 # ext: content stored externally
3079 2967 #
3080 2968 # More formally, their differences are shown below:
3081 2969 #
3082 2970 # | common | rename | meta | ext
3083 2971 # -------------------------------------------------------
3084 2972 # flags() | 0 | 0 | 0 | not 0
3085 2973 # renamed() | False | True | False | ?
3086 2974 # rawtext[0:2]=='\1\n'| False | True | True | ?
3087 2975 #
3088 2976 # "rawtext" means the raw text stored in revlog data, which
3089 2977 # could be retrieved by "rawdata(rev)". "text"
3090 2978 # mentioned below is "revision(rev)".
3091 2979 #
3092 2980 # There are 3 different lengths stored physically:
3093 2981 # 1. L1: rawsize, stored in revlog index
3094 2982 # 2. L2: len(rawtext), stored in revlog data
3095 2983 # 3. L3: len(text), stored in revlog data if flags==0, or
3096 2984 # possibly somewhere else if flags!=0
3097 2985 #
3098 2986 # L1 should be equal to L2. L3 could be different from them.
3099 2987 # "text" may or may not affect commit hash depending on flag
3100 2988 # processors (see flagutil.addflagprocessor).
3101 2989 #
3102 2990 # | common | rename | meta | ext
3103 2991 # -------------------------------------------------
3104 2992 # rawsize() | L1 | L1 | L1 | L1
3105 2993 # size() | L1 | L2-LM | L1(*) | L1 (?)
3106 2994 # len(rawtext) | L2 | L2 | L2 | L2
3107 2995 # len(text) | L2 | L2 | L2 | L3
3108 2996 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3109 2997 #
3110 2998 # LM: length of metadata, depending on rawtext
3111 2999 # (*): not ideal, see comment in filelog.size
3112 3000 # (?): could be "- len(meta)" if the resolved content has
3113 3001 # rename metadata
3114 3002 #
3115 3003 # Checks needed to be done:
3116 3004 # 1. length check: L1 == L2, in all cases.
3117 3005 # 2. hash check: depending on flag processor, we may need to
3118 3006 # use either "text" (external), or "rawtext" (in revlog).
3119 3007
3120 3008 try:
3121 3009 skipflags = state.get(b'skipflags', 0)
3122 3010 if skipflags:
3123 3011 skipflags &= self.flags(rev)
3124 3012
3125 3013 _verify_revision(self, skipflags, state, node)
3126 3014
3127 3015 l1 = self.rawsize(rev)
3128 3016 l2 = len(self.rawdata(node))
3129 3017
3130 3018 if l1 != l2:
3131 3019 yield revlogproblem(
3132 3020 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3133 3021 node=node,
3134 3022 )
3135 3023
3136 3024 except error.CensoredNodeError:
3137 3025 if state[b'erroroncensored']:
3138 3026 yield revlogproblem(
3139 3027 error=_(b'censored file data'), node=node
3140 3028 )
3141 3029 state[b'skipread'].add(node)
3142 3030 except Exception as e:
3143 3031 yield revlogproblem(
3144 3032 error=_(b'unpacking %s: %s')
3145 3033 % (short(node), stringutil.forcebytestr(e)),
3146 3034 node=node,
3147 3035 )
3148 3036 state[b'skipread'].add(node)
3149 3037
3150 3038 def storageinfo(
3151 3039 self,
3152 3040 exclusivefiles=False,
3153 3041 sharedfiles=False,
3154 3042 revisionscount=False,
3155 3043 trackedsize=False,
3156 3044 storedsize=False,
3157 3045 ):
3158 3046 d = {}
3159 3047
3160 3048 if exclusivefiles:
3161 3049 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3162 3050 if not self._inline:
3163 3051 d[b'exclusivefiles'].append((self.opener, self.datafile))
3164 3052
3165 3053 if sharedfiles:
3166 3054 d[b'sharedfiles'] = []
3167 3055
3168 3056 if revisionscount:
3169 3057 d[b'revisionscount'] = len(self)
3170 3058
3171 3059 if trackedsize:
3172 3060 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3173 3061
3174 3062 if storedsize:
3175 3063 d[b'storedsize'] = sum(
3176 3064 self.opener.stat(path).st_size for path in self.files()
3177 3065 )
3178 3066
3179 3067 return d
3180 3068
3181 3069 def rewrite_sidedata(self, helpers, startrev, endrev):
3182 3070 if self.version & 0xFFFF != REVLOGV2:
3183 3071 return
3184 3072 # inline are not yet supported because they suffer from an issue when
3185 3073 # rewriting them (since it's not an append-only operation).
3186 3074 # See issue6485.
3187 3075 assert not self._inline
3188 3076 if not helpers[1] and not helpers[2]:
3189 3077 # Nothing to generate or remove
3190 3078 return
3191 3079
3192 3080 new_entries = []
3193 3081 # append the new sidedata
3194 3082 with self._datafp(b'a+') as fp:
3195 3083 # Maybe this bug still exists, see revlog._writeentry
3196 3084 fp.seek(0, os.SEEK_END)
3197 3085 current_offset = fp.tell()
3198 3086 for rev in range(startrev, endrev + 1):
3199 3087 entry = self.index[rev]
3200 3088 new_sidedata = storageutil.run_sidedata_helpers(
3201 3089 store=self,
3202 3090 sidedata_helpers=helpers,
3203 3091 sidedata={},
3204 3092 rev=rev,
3205 3093 )
3206 3094
3207 3095 serialized_sidedata = sidedatautil.serialize_sidedata(
3208 3096 new_sidedata
3209 3097 )
3210 3098 if entry[8] != 0 or entry[9] != 0:
3211 3099 # rewriting entries that already have sidedata is not
3212 3100 # supported yet, because it introduces garbage data in the
3213 3101 # revlog.
3214 3102 msg = b"Rewriting existing sidedata is not supported yet"
3215 3103 raise error.Abort(msg)
3216 3104 entry = entry[:8]
3217 3105 entry += (current_offset, len(serialized_sidedata))
3218 3106
3219 3107 fp.write(serialized_sidedata)
3220 3108 new_entries.append(entry)
3221 3109 current_offset += len(serialized_sidedata)
3222 3110
3223 3111 # rewrite the new index entries
3224 3112 with self._indexfp(b'w+') as fp:
3225 3113 fp.seek(startrev * self.index.entry_size)
3226 3114 for i, entry in enumerate(new_entries):
3227 3115 rev = startrev + i
3228 3116 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3229 3117 packed = self.index.entry_binary(rev)
3230 3118 if rev == 0:
3231 3119 header = self.index.pack_header(self.version)
3232 3120 packed = header + packed
3233 3121 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now