##// END OF EJS Templates
transaction: rename find to findoffset and drop backup file support...
Joerg Sonnenberger -
r46482:a6f08085 default
parent child Browse files
Show More
@@ -1,3082 +1,3081 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False, {}
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text, sidedata):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def getoffset(q):
141 141 return int(q >> 16)
142 142
143 143
144 144 def gettype(q):
145 145 return int(q & 0xFFFF)
146 146
147 147
148 148 def offset_type(offset, type):
149 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 150 raise ValueError(b'unknown revlog index flags')
151 151 return int(int(offset) << 16 | type)
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 @attr.s(slots=True, frozen=True)
165 165 class _revisioninfo(object):
166 166 """Information about a revision that allows building its fulltext
167 167 node: expected hash of the revision
168 168 p1, p2: parent revs of the revision
169 169 btext: built text cache consisting of a one-element list
170 170 cachedelta: (baserev, uncompressed_delta) or None
171 171 flags: flags associated to the revision storage
172 172
173 173 One of btext[0] or cachedelta must be set.
174 174 """
175 175
176 176 node = attr.ib()
177 177 p1 = attr.ib()
178 178 p2 = attr.ib()
179 179 btext = attr.ib()
180 180 textlen = attr.ib()
181 181 cachedelta = attr.ib()
182 182 flags = attr.ib()
183 183
184 184
185 185 @interfaceutil.implementer(repository.irevisiondelta)
186 186 @attr.s(slots=True)
187 187 class revlogrevisiondelta(object):
188 188 node = attr.ib()
189 189 p1node = attr.ib()
190 190 p2node = attr.ib()
191 191 basenode = attr.ib()
192 192 flags = attr.ib()
193 193 baserevisionsize = attr.ib()
194 194 revision = attr.ib()
195 195 delta = attr.ib()
196 196 linknode = attr.ib(default=None)
197 197
198 198
199 199 @interfaceutil.implementer(repository.iverifyproblem)
200 200 @attr.s(frozen=True)
201 201 class revlogproblem(object):
202 202 warning = attr.ib(default=None)
203 203 error = attr.ib(default=None)
204 204 node = attr.ib(default=None)
205 205
206 206
207 207 # index v0:
208 208 # 4 bytes: offset
209 209 # 4 bytes: compressed length
210 210 # 4 bytes: base rev
211 211 # 4 bytes: link rev
212 212 # 20 bytes: parent 1 nodeid
213 213 # 20 bytes: parent 2 nodeid
214 214 # 20 bytes: nodeid
215 215 indexformatv0 = struct.Struct(b">4l20s20s20s")
216 216 indexformatv0_pack = indexformatv0.pack
217 217 indexformatv0_unpack = indexformatv0.unpack
218 218
219 219
220 220 class revlogoldindex(list):
221 221 @property
222 222 def nodemap(self):
223 223 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
224 224 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
225 225 return self._nodemap
226 226
227 227 @util.propertycache
228 228 def _nodemap(self):
229 229 nodemap = nodemaputil.NodeMap({nullid: nullrev})
230 230 for r in range(0, len(self)):
231 231 n = self[r][7]
232 232 nodemap[n] = r
233 233 return nodemap
234 234
235 235 def has_node(self, node):
236 236 """return True if the node exist in the index"""
237 237 return node in self._nodemap
238 238
239 239 def rev(self, node):
240 240 """return a revision for a node
241 241
242 242 If the node is unknown, raise a RevlogError"""
243 243 return self._nodemap[node]
244 244
245 245 def get_rev(self, node):
246 246 """return a revision for a node
247 247
248 248 If the node is unknown, return None"""
249 249 return self._nodemap.get(node)
250 250
251 251 def append(self, tup):
252 252 self._nodemap[tup[7]] = len(self)
253 253 super(revlogoldindex, self).append(tup)
254 254
255 255 def __delitem__(self, i):
256 256 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
257 257 raise ValueError(b"deleting slices only supports a:-1 with step 1")
258 258 for r in pycompat.xrange(i.start, len(self)):
259 259 del self._nodemap[self[r][7]]
260 260 super(revlogoldindex, self).__delitem__(i)
261 261
262 262 def clearcaches(self):
263 263 self.__dict__.pop('_nodemap', None)
264 264
265 265 def __getitem__(self, i):
266 266 if i == -1:
267 267 return (0, 0, 0, -1, -1, -1, -1, nullid)
268 268 return list.__getitem__(self, i)
269 269
270 270
271 271 class revlogoldio(object):
272 272 def __init__(self):
273 273 self.size = indexformatv0.size
274 274
275 275 def parseindex(self, data, inline):
276 276 s = self.size
277 277 index = []
278 278 nodemap = nodemaputil.NodeMap({nullid: nullrev})
279 279 n = off = 0
280 280 l = len(data)
281 281 while off + s <= l:
282 282 cur = data[off : off + s]
283 283 off += s
284 284 e = indexformatv0_unpack(cur)
285 285 # transform to revlogv1 format
286 286 e2 = (
287 287 offset_type(e[0], 0),
288 288 e[1],
289 289 -1,
290 290 e[2],
291 291 e[3],
292 292 nodemap.get(e[4], nullrev),
293 293 nodemap.get(e[5], nullrev),
294 294 e[6],
295 295 )
296 296 index.append(e2)
297 297 nodemap[e[6]] = n
298 298 n += 1
299 299
300 300 index = revlogoldindex(index)
301 301 return index, None
302 302
303 303 def packentry(self, entry, node, version, rev):
304 304 if gettype(entry[0]):
305 305 raise error.RevlogError(
306 306 _(b'index entry flags need revlog version 1')
307 307 )
308 308 e2 = (
309 309 getoffset(entry[0]),
310 310 entry[1],
311 311 entry[3],
312 312 entry[4],
313 313 node(entry[5]),
314 314 node(entry[6]),
315 315 entry[7],
316 316 )
317 317 return indexformatv0_pack(*e2)
318 318
319 319
320 320 # index ng:
321 321 # 6 bytes: offset
322 322 # 2 bytes: flags
323 323 # 4 bytes: compressed length
324 324 # 4 bytes: uncompressed length
325 325 # 4 bytes: base rev
326 326 # 4 bytes: link rev
327 327 # 4 bytes: parent 1 rev
328 328 # 4 bytes: parent 2 rev
329 329 # 32 bytes: nodeid
330 330 indexformatng = struct.Struct(b">Qiiiiii20s12x")
331 331 indexformatng_pack = indexformatng.pack
332 332 versionformat = struct.Struct(b">I")
333 333 versionformat_pack = versionformat.pack
334 334 versionformat_unpack = versionformat.unpack
335 335
336 336 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
337 337 # signed integer)
338 338 _maxentrysize = 0x7FFFFFFF
339 339
340 340
341 341 class revlogio(object):
342 342 def __init__(self):
343 343 self.size = indexformatng.size
344 344
345 345 def parseindex(self, data, inline):
346 346 # call the C implementation to parse the index data
347 347 index, cache = parsers.parse_index2(data, inline)
348 348 return index, cache
349 349
350 350 def packentry(self, entry, node, version, rev):
351 351 p = indexformatng_pack(*entry)
352 352 if rev == 0:
353 353 p = versionformat_pack(version) + p[4:]
354 354 return p
355 355
356 356
357 357 NodemapRevlogIO = None
358 358
359 359 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
360 360
361 361 class NodemapRevlogIO(revlogio):
362 362 """A debug oriented IO class that return a PersistentNodeMapIndexObject
363 363
364 364 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
365 365 """
366 366
367 367 def parseindex(self, data, inline):
368 368 index, cache = parsers.parse_index_devel_nodemap(data, inline)
369 369 return index, cache
370 370
371 371
372 372 class rustrevlogio(revlogio):
373 373 def parseindex(self, data, inline):
374 374 index, cache = super(rustrevlogio, self).parseindex(data, inline)
375 375 return rustrevlog.MixedIndex(index), cache
376 376
377 377
378 378 class revlog(object):
379 379 """
380 380 the underlying revision storage object
381 381
382 382 A revlog consists of two parts, an index and the revision data.
383 383
384 384 The index is a file with a fixed record size containing
385 385 information on each revision, including its nodeid (hash), the
386 386 nodeids of its parents, the position and offset of its data within
387 387 the data file, and the revision it's based on. Finally, each entry
388 388 contains a linkrev entry that can serve as a pointer to external
389 389 data.
390 390
391 391 The revision data itself is a linear collection of data chunks.
392 392 Each chunk represents a revision and is usually represented as a
393 393 delta against the previous chunk. To bound lookup time, runs of
394 394 deltas are limited to about 2 times the length of the original
395 395 version data. This makes retrieval of a version proportional to
396 396 its size, or O(1) relative to the number of revisions.
397 397
398 398 Both pieces of the revlog are written to in an append-only
399 399 fashion, which means we never need to rewrite a file to insert or
400 400 remove data, and can use some simple techniques to avoid the need
401 401 for locking while reading.
402 402
403 403 If checkambig, indexfile is opened with checkambig=True at
404 404 writing, to avoid file stat ambiguity.
405 405
406 406 If mmaplargeindex is True, and an mmapindexthreshold is set, the
407 407 index will be mmapped rather than read if it is larger than the
408 408 configured threshold.
409 409
410 410 If censorable is True, the revlog can have censored revisions.
411 411
412 412 If `upperboundcomp` is not None, this is the expected maximal gain from
413 413 compression for the data content.
414 414 """
415 415
416 416 _flagserrorclass = error.RevlogError
417 417
418 418 def __init__(
419 419 self,
420 420 opener,
421 421 indexfile,
422 422 datafile=None,
423 423 checkambig=False,
424 424 mmaplargeindex=False,
425 425 censorable=False,
426 426 upperboundcomp=None,
427 427 persistentnodemap=False,
428 428 ):
429 429 """
430 430 create a revlog object
431 431
432 432 opener is a function that abstracts the file opening operation
433 433 and can be used to implement COW semantics or the like.
434 434
435 435 """
436 436 self.upperboundcomp = upperboundcomp
437 437 self.indexfile = indexfile
438 438 self.datafile = datafile or (indexfile[:-2] + b".d")
439 439 self.nodemap_file = None
440 440 if persistentnodemap:
441 441 if indexfile.endswith(b'.a'):
442 442 pending_path = indexfile[:-4] + b".n.a"
443 443 if opener.exists(pending_path):
444 444 self.nodemap_file = pending_path
445 445 else:
446 446 self.nodemap_file = indexfile[:-4] + b".n"
447 447 else:
448 448 self.nodemap_file = indexfile[:-2] + b".n"
449 449
450 450 self.opener = opener
451 451 # When True, indexfile is opened with checkambig=True at writing, to
452 452 # avoid file stat ambiguity.
453 453 self._checkambig = checkambig
454 454 self._mmaplargeindex = mmaplargeindex
455 455 self._censorable = censorable
456 456 # 3-tuple of (node, rev, text) for a raw revision.
457 457 self._revisioncache = None
458 458 # Maps rev to chain base rev.
459 459 self._chainbasecache = util.lrucachedict(100)
460 460 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
461 461 self._chunkcache = (0, b'')
462 462 # How much data to read and cache into the raw revlog data cache.
463 463 self._chunkcachesize = 65536
464 464 self._maxchainlen = None
465 465 self._deltabothparents = True
466 466 self.index = None
467 467 self._nodemap_docket = None
468 468 # Mapping of partial identifiers to full nodes.
469 469 self._pcache = {}
470 470 # Mapping of revision integer to full node.
471 471 self._compengine = b'zlib'
472 472 self._compengineopts = {}
473 473 self._maxdeltachainspan = -1
474 474 self._withsparseread = False
475 475 self._sparserevlog = False
476 476 self._srdensitythreshold = 0.50
477 477 self._srmingapsize = 262144
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 2-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485
486 486 self._loadindex()
487 487
488 488 def _loadindex(self):
489 489 mmapindexthreshold = None
490 490 opts = self.opener.options
491 491
492 492 if b'revlogv2' in opts:
493 493 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
494 494 elif b'revlogv1' in opts:
495 495 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
496 496 if b'generaldelta' in opts:
497 497 newversionflags |= FLAG_GENERALDELTA
498 498 elif b'revlogv0' in self.opener.options:
499 499 newversionflags = REVLOGV0
500 500 else:
501 501 newversionflags = REVLOG_DEFAULT_VERSION
502 502
503 503 if b'chunkcachesize' in opts:
504 504 self._chunkcachesize = opts[b'chunkcachesize']
505 505 if b'maxchainlen' in opts:
506 506 self._maxchainlen = opts[b'maxchainlen']
507 507 if b'deltabothparents' in opts:
508 508 self._deltabothparents = opts[b'deltabothparents']
509 509 self._lazydelta = bool(opts.get(b'lazydelta', True))
510 510 self._lazydeltabase = False
511 511 if self._lazydelta:
512 512 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
513 513 if b'compengine' in opts:
514 514 self._compengine = opts[b'compengine']
515 515 if b'zlib.level' in opts:
516 516 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
517 517 if b'zstd.level' in opts:
518 518 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
519 519 if b'maxdeltachainspan' in opts:
520 520 self._maxdeltachainspan = opts[b'maxdeltachainspan']
521 521 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
522 522 mmapindexthreshold = opts[b'mmapindexthreshold']
523 523 self.hassidedata = bool(opts.get(b'side-data', False))
524 524 if self.hassidedata:
525 525 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
526 526 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
527 527 withsparseread = bool(opts.get(b'with-sparse-read', False))
528 528 # sparse-revlog forces sparse-read
529 529 self._withsparseread = self._sparserevlog or withsparseread
530 530 if b'sparse-read-density-threshold' in opts:
531 531 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
532 532 if b'sparse-read-min-gap-size' in opts:
533 533 self._srmingapsize = opts[b'sparse-read-min-gap-size']
534 534 if opts.get(b'enableellipsis'):
535 535 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
536 536
537 537 # revlog v0 doesn't have flag processors
538 538 for flag, processor in pycompat.iteritems(
539 539 opts.get(b'flagprocessors', {})
540 540 ):
541 541 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
542 542
543 543 if self._chunkcachesize <= 0:
544 544 raise error.RevlogError(
545 545 _(b'revlog chunk cache size %r is not greater than 0')
546 546 % self._chunkcachesize
547 547 )
548 548 elif self._chunkcachesize & (self._chunkcachesize - 1):
549 549 raise error.RevlogError(
550 550 _(b'revlog chunk cache size %r is not a power of 2')
551 551 % self._chunkcachesize
552 552 )
553 553
554 554 indexdata = b''
555 555 self._initempty = True
556 556 try:
557 557 with self._indexfp() as f:
558 558 if (
559 559 mmapindexthreshold is not None
560 560 and self.opener.fstat(f).st_size >= mmapindexthreshold
561 561 ):
562 562 # TODO: should .close() to release resources without
563 563 # relying on Python GC
564 564 indexdata = util.buffer(util.mmapread(f))
565 565 else:
566 566 indexdata = f.read()
567 567 if len(indexdata) > 0:
568 568 versionflags = versionformat_unpack(indexdata[:4])[0]
569 569 self._initempty = False
570 570 else:
571 571 versionflags = newversionflags
572 572 except IOError as inst:
573 573 if inst.errno != errno.ENOENT:
574 574 raise
575 575
576 576 versionflags = newversionflags
577 577
578 578 self.version = versionflags
579 579
580 580 flags = versionflags & ~0xFFFF
581 581 fmt = versionflags & 0xFFFF
582 582
583 583 if fmt == REVLOGV0:
584 584 if flags:
585 585 raise error.RevlogError(
586 586 _(b'unknown flags (%#04x) in version %d revlog %s')
587 587 % (flags >> 16, fmt, self.indexfile)
588 588 )
589 589
590 590 self._inline = False
591 591 self._generaldelta = False
592 592
593 593 elif fmt == REVLOGV1:
594 594 if flags & ~REVLOGV1_FLAGS:
595 595 raise error.RevlogError(
596 596 _(b'unknown flags (%#04x) in version %d revlog %s')
597 597 % (flags >> 16, fmt, self.indexfile)
598 598 )
599 599
600 600 self._inline = versionflags & FLAG_INLINE_DATA
601 601 self._generaldelta = versionflags & FLAG_GENERALDELTA
602 602
603 603 elif fmt == REVLOGV2:
604 604 if flags & ~REVLOGV2_FLAGS:
605 605 raise error.RevlogError(
606 606 _(b'unknown flags (%#04x) in version %d revlog %s')
607 607 % (flags >> 16, fmt, self.indexfile)
608 608 )
609 609
610 610 self._inline = versionflags & FLAG_INLINE_DATA
611 611 # generaldelta implied by version 2 revlogs.
612 612 self._generaldelta = True
613 613
614 614 else:
615 615 raise error.RevlogError(
616 616 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
617 617 )
618 618 # sparse-revlog can't be on without general-delta (issue6056)
619 619 if not self._generaldelta:
620 620 self._sparserevlog = False
621 621
622 622 self._storedeltachains = True
623 623
624 624 devel_nodemap = (
625 625 self.nodemap_file
626 626 and opts.get(b'devel-force-nodemap', False)
627 627 and NodemapRevlogIO is not None
628 628 )
629 629
630 630 use_rust_index = False
631 631 if rustrevlog is not None:
632 632 if self.nodemap_file is not None:
633 633 use_rust_index = True
634 634 else:
635 635 use_rust_index = self.opener.options.get(b'rust.index')
636 636
637 637 self._io = revlogio()
638 638 if self.version == REVLOGV0:
639 639 self._io = revlogoldio()
640 640 elif devel_nodemap:
641 641 self._io = NodemapRevlogIO()
642 642 elif use_rust_index:
643 643 self._io = rustrevlogio()
644 644 try:
645 645 d = self._io.parseindex(indexdata, self._inline)
646 646 index, _chunkcache = d
647 647 use_nodemap = (
648 648 not self._inline
649 649 and self.nodemap_file is not None
650 650 and util.safehasattr(index, 'update_nodemap_data')
651 651 )
652 652 if use_nodemap:
653 653 nodemap_data = nodemaputil.persisted_data(self)
654 654 if nodemap_data is not None:
655 655 docket = nodemap_data[0]
656 656 if (
657 657 len(d[0]) > docket.tip_rev
658 658 and d[0][docket.tip_rev][7] == docket.tip_node
659 659 ):
660 660 # no changelog tampering
661 661 self._nodemap_docket = docket
662 662 index.update_nodemap_data(*nodemap_data)
663 663 except (ValueError, IndexError):
664 664 raise error.RevlogError(
665 665 _(b"index %s is corrupted") % self.indexfile
666 666 )
667 667 self.index, self._chunkcache = d
668 668 if not self._chunkcache:
669 669 self._chunkclear()
670 670 # revnum -> (chain-length, sum-delta-length)
671 671 self._chaininfocache = util.lrucachedict(500)
672 672 # revlog header -> revlog compressor
673 673 self._decompressors = {}
674 674
675 675 @util.propertycache
676 676 def _compressor(self):
677 677 engine = util.compengines[self._compengine]
678 678 return engine.revlogcompressor(self._compengineopts)
679 679
680 680 def _indexfp(self, mode=b'r'):
681 681 """file object for the revlog's index file"""
682 682 args = {'mode': mode}
683 683 if mode != b'r':
684 684 args['checkambig'] = self._checkambig
685 685 if mode == b'w':
686 686 args['atomictemp'] = True
687 687 return self.opener(self.indexfile, **args)
688 688
689 689 def _datafp(self, mode=b'r'):
690 690 """file object for the revlog's data file"""
691 691 return self.opener(self.datafile, mode=mode)
692 692
693 693 @contextlib.contextmanager
694 694 def _datareadfp(self, existingfp=None):
695 695 """file object suitable to read data"""
696 696 # Use explicit file handle, if given.
697 697 if existingfp is not None:
698 698 yield existingfp
699 699
700 700 # Use a file handle being actively used for writes, if available.
701 701 # There is some danger to doing this because reads will seek the
702 702 # file. However, _writeentry() performs a SEEK_END before all writes,
703 703 # so we should be safe.
704 704 elif self._writinghandles:
705 705 if self._inline:
706 706 yield self._writinghandles[0]
707 707 else:
708 708 yield self._writinghandles[1]
709 709
710 710 # Otherwise open a new file handle.
711 711 else:
712 712 if self._inline:
713 713 func = self._indexfp
714 714 else:
715 715 func = self._datafp
716 716 with func() as fp:
717 717 yield fp
718 718
719 719 def tiprev(self):
720 720 return len(self.index) - 1
721 721
722 722 def tip(self):
723 723 return self.node(self.tiprev())
724 724
725 725 def __contains__(self, rev):
726 726 return 0 <= rev < len(self)
727 727
728 728 def __len__(self):
729 729 return len(self.index)
730 730
731 731 def __iter__(self):
732 732 return iter(pycompat.xrange(len(self)))
733 733
734 734 def revs(self, start=0, stop=None):
735 735 """iterate over all rev in this revlog (from start to stop)"""
736 736 return storageutil.iterrevs(len(self), start=start, stop=stop)
737 737
738 738 @property
739 739 def nodemap(self):
740 740 msg = (
741 741 b"revlog.nodemap is deprecated, "
742 742 b"use revlog.index.[has_node|rev|get_rev]"
743 743 )
744 744 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
745 745 return self.index.nodemap
746 746
747 747 @property
748 748 def _nodecache(self):
749 749 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
750 750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 751 return self.index.nodemap
752 752
753 753 def hasnode(self, node):
754 754 try:
755 755 self.rev(node)
756 756 return True
757 757 except KeyError:
758 758 return False
759 759
760 760 def candelta(self, baserev, rev):
761 761 """whether two revisions (baserev, rev) can be delta-ed or not"""
762 762 # Disable delta if either rev requires a content-changing flag
763 763 # processor (ex. LFS). This is because such flag processor can alter
764 764 # the rawtext content that the delta will be based on, and two clients
765 765 # could have a same revlog node with different flags (i.e. different
766 766 # rawtext contents) and the delta could be incompatible.
767 767 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
768 768 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
769 769 ):
770 770 return False
771 771 return True
772 772
773 773 def update_caches(self, transaction):
774 774 if self.nodemap_file is not None:
775 775 if transaction is None:
776 776 nodemaputil.update_persistent_nodemap(self)
777 777 else:
778 778 nodemaputil.setup_persistent_nodemap(transaction, self)
779 779
780 780 def clearcaches(self):
781 781 self._revisioncache = None
782 782 self._chainbasecache.clear()
783 783 self._chunkcache = (0, b'')
784 784 self._pcache = {}
785 785 self._nodemap_docket = None
786 786 self.index.clearcaches()
787 787 # The python code is the one responsible for validating the docket, we
788 788 # end up having to refresh it here.
789 789 use_nodemap = (
790 790 not self._inline
791 791 and self.nodemap_file is not None
792 792 and util.safehasattr(self.index, 'update_nodemap_data')
793 793 )
794 794 if use_nodemap:
795 795 nodemap_data = nodemaputil.persisted_data(self)
796 796 if nodemap_data is not None:
797 797 self._nodemap_docket = nodemap_data[0]
798 798 self.index.update_nodemap_data(*nodemap_data)
799 799
800 800 def rev(self, node):
801 801 try:
802 802 return self.index.rev(node)
803 803 except TypeError:
804 804 raise
805 805 except error.RevlogError:
806 806 # parsers.c radix tree lookup failed
807 807 if node == wdirid or node in wdirfilenodeids:
808 808 raise error.WdirUnsupported
809 809 raise error.LookupError(node, self.indexfile, _(b'no node'))
810 810
811 811 # Accessors for index entries.
812 812
813 813 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
814 814 # are flags.
815 815 def start(self, rev):
816 816 return int(self.index[rev][0] >> 16)
817 817
818 818 def flags(self, rev):
819 819 return self.index[rev][0] & 0xFFFF
820 820
821 821 def length(self, rev):
822 822 return self.index[rev][1]
823 823
824 824 def rawsize(self, rev):
825 825 """return the length of the uncompressed text for a given revision"""
826 826 l = self.index[rev][2]
827 827 if l >= 0:
828 828 return l
829 829
830 830 t = self.rawdata(rev)
831 831 return len(t)
832 832
833 833 def size(self, rev):
834 834 """length of non-raw text (processed by a "read" flag processor)"""
835 835 # fast path: if no "read" flag processor could change the content,
836 836 # size is rawsize. note: ELLIPSIS is known to not change the content.
837 837 flags = self.flags(rev)
838 838 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
839 839 return self.rawsize(rev)
840 840
841 841 return len(self.revision(rev, raw=False))
842 842
843 843 def chainbase(self, rev):
844 844 base = self._chainbasecache.get(rev)
845 845 if base is not None:
846 846 return base
847 847
848 848 index = self.index
849 849 iterrev = rev
850 850 base = index[iterrev][3]
851 851 while base != iterrev:
852 852 iterrev = base
853 853 base = index[iterrev][3]
854 854
855 855 self._chainbasecache[rev] = base
856 856 return base
857 857
858 858 def linkrev(self, rev):
859 859 return self.index[rev][4]
860 860
861 861 def parentrevs(self, rev):
862 862 try:
863 863 entry = self.index[rev]
864 864 except IndexError:
865 865 if rev == wdirrev:
866 866 raise error.WdirUnsupported
867 867 raise
868 868
869 869 return entry[5], entry[6]
870 870
871 871 # fast parentrevs(rev) where rev isn't filtered
872 872 _uncheckedparentrevs = parentrevs
873 873
874 874 def node(self, rev):
875 875 try:
876 876 return self.index[rev][7]
877 877 except IndexError:
878 878 if rev == wdirrev:
879 879 raise error.WdirUnsupported
880 880 raise
881 881
882 882 # Derived from index values.
883 883
884 884 def end(self, rev):
885 885 return self.start(rev) + self.length(rev)
886 886
887 887 def parents(self, node):
888 888 i = self.index
889 889 d = i[self.rev(node)]
890 890 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
891 891
892 892 def chainlen(self, rev):
893 893 return self._chaininfo(rev)[0]
894 894
895 895 def _chaininfo(self, rev):
896 896 chaininfocache = self._chaininfocache
897 897 if rev in chaininfocache:
898 898 return chaininfocache[rev]
899 899 index = self.index
900 900 generaldelta = self._generaldelta
901 901 iterrev = rev
902 902 e = index[iterrev]
903 903 clen = 0
904 904 compresseddeltalen = 0
905 905 while iterrev != e[3]:
906 906 clen += 1
907 907 compresseddeltalen += e[1]
908 908 if generaldelta:
909 909 iterrev = e[3]
910 910 else:
911 911 iterrev -= 1
912 912 if iterrev in chaininfocache:
913 913 t = chaininfocache[iterrev]
914 914 clen += t[0]
915 915 compresseddeltalen += t[1]
916 916 break
917 917 e = index[iterrev]
918 918 else:
919 919 # Add text length of base since decompressing that also takes
920 920 # work. For cache hits the length is already included.
921 921 compresseddeltalen += e[1]
922 922 r = (clen, compresseddeltalen)
923 923 chaininfocache[rev] = r
924 924 return r
925 925
926 926 def _deltachain(self, rev, stoprev=None):
927 927 """Obtain the delta chain for a revision.
928 928
929 929 ``stoprev`` specifies a revision to stop at. If not specified, we
930 930 stop at the base of the chain.
931 931
932 932 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
933 933 revs in ascending order and ``stopped`` is a bool indicating whether
934 934 ``stoprev`` was hit.
935 935 """
936 936 # Try C implementation.
937 937 try:
938 938 return self.index.deltachain(rev, stoprev, self._generaldelta)
939 939 except AttributeError:
940 940 pass
941 941
942 942 chain = []
943 943
944 944 # Alias to prevent attribute lookup in tight loop.
945 945 index = self.index
946 946 generaldelta = self._generaldelta
947 947
948 948 iterrev = rev
949 949 e = index[iterrev]
950 950 while iterrev != e[3] and iterrev != stoprev:
951 951 chain.append(iterrev)
952 952 if generaldelta:
953 953 iterrev = e[3]
954 954 else:
955 955 iterrev -= 1
956 956 e = index[iterrev]
957 957
958 958 if iterrev == stoprev:
959 959 stopped = True
960 960 else:
961 961 chain.append(iterrev)
962 962 stopped = False
963 963
964 964 chain.reverse()
965 965 return chain, stopped
966 966
967 967 def ancestors(self, revs, stoprev=0, inclusive=False):
968 968 """Generate the ancestors of 'revs' in reverse revision order.
969 969 Does not generate revs lower than stoprev.
970 970
971 971 See the documentation for ancestor.lazyancestors for more details."""
972 972
973 973 # first, make sure start revisions aren't filtered
974 974 revs = list(revs)
975 975 checkrev = self.node
976 976 for r in revs:
977 977 checkrev(r)
978 978 # and we're sure ancestors aren't filtered as well
979 979
980 980 if rustancestor is not None:
981 981 lazyancestors = rustancestor.LazyAncestors
982 982 arg = self.index
983 983 else:
984 984 lazyancestors = ancestor.lazyancestors
985 985 arg = self._uncheckedparentrevs
986 986 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
987 987
988 988 def descendants(self, revs):
989 989 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
990 990
991 991 def findcommonmissing(self, common=None, heads=None):
992 992 """Return a tuple of the ancestors of common and the ancestors of heads
993 993 that are not ancestors of common. In revset terminology, we return the
994 994 tuple:
995 995
996 996 ::common, (::heads) - (::common)
997 997
998 998 The list is sorted by revision number, meaning it is
999 999 topologically sorted.
1000 1000
1001 1001 'heads' and 'common' are both lists of node IDs. If heads is
1002 1002 not supplied, uses all of the revlog's heads. If common is not
1003 1003 supplied, uses nullid."""
1004 1004 if common is None:
1005 1005 common = [nullid]
1006 1006 if heads is None:
1007 1007 heads = self.heads()
1008 1008
1009 1009 common = [self.rev(n) for n in common]
1010 1010 heads = [self.rev(n) for n in heads]
1011 1011
1012 1012 # we want the ancestors, but inclusive
1013 1013 class lazyset(object):
1014 1014 def __init__(self, lazyvalues):
1015 1015 self.addedvalues = set()
1016 1016 self.lazyvalues = lazyvalues
1017 1017
1018 1018 def __contains__(self, value):
1019 1019 return value in self.addedvalues or value in self.lazyvalues
1020 1020
1021 1021 def __iter__(self):
1022 1022 added = self.addedvalues
1023 1023 for r in added:
1024 1024 yield r
1025 1025 for r in self.lazyvalues:
1026 1026 if not r in added:
1027 1027 yield r
1028 1028
1029 1029 def add(self, value):
1030 1030 self.addedvalues.add(value)
1031 1031
1032 1032 def update(self, values):
1033 1033 self.addedvalues.update(values)
1034 1034
1035 1035 has = lazyset(self.ancestors(common))
1036 1036 has.add(nullrev)
1037 1037 has.update(common)
1038 1038
1039 1039 # take all ancestors from heads that aren't in has
1040 1040 missing = set()
1041 1041 visit = collections.deque(r for r in heads if r not in has)
1042 1042 while visit:
1043 1043 r = visit.popleft()
1044 1044 if r in missing:
1045 1045 continue
1046 1046 else:
1047 1047 missing.add(r)
1048 1048 for p in self.parentrevs(r):
1049 1049 if p not in has:
1050 1050 visit.append(p)
1051 1051 missing = list(missing)
1052 1052 missing.sort()
1053 1053 return has, [self.node(miss) for miss in missing]
1054 1054
1055 1055 def incrementalmissingrevs(self, common=None):
1056 1056 """Return an object that can be used to incrementally compute the
1057 1057 revision numbers of the ancestors of arbitrary sets that are not
1058 1058 ancestors of common. This is an ancestor.incrementalmissingancestors
1059 1059 object.
1060 1060
1061 1061 'common' is a list of revision numbers. If common is not supplied, uses
1062 1062 nullrev.
1063 1063 """
1064 1064 if common is None:
1065 1065 common = [nullrev]
1066 1066
1067 1067 if rustancestor is not None:
1068 1068 return rustancestor.MissingAncestors(self.index, common)
1069 1069 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1070 1070
1071 1071 def findmissingrevs(self, common=None, heads=None):
1072 1072 """Return the revision numbers of the ancestors of heads that
1073 1073 are not ancestors of common.
1074 1074
1075 1075 More specifically, return a list of revision numbers corresponding to
1076 1076 nodes N such that every N satisfies the following constraints:
1077 1077
1078 1078 1. N is an ancestor of some node in 'heads'
1079 1079 2. N is not an ancestor of any node in 'common'
1080 1080
1081 1081 The list is sorted by revision number, meaning it is
1082 1082 topologically sorted.
1083 1083
1084 1084 'heads' and 'common' are both lists of revision numbers. If heads is
1085 1085 not supplied, uses all of the revlog's heads. If common is not
1086 1086 supplied, uses nullid."""
1087 1087 if common is None:
1088 1088 common = [nullrev]
1089 1089 if heads is None:
1090 1090 heads = self.headrevs()
1091 1091
1092 1092 inc = self.incrementalmissingrevs(common=common)
1093 1093 return inc.missingancestors(heads)
1094 1094
1095 1095 def findmissing(self, common=None, heads=None):
1096 1096 """Return the ancestors of heads that are not ancestors of common.
1097 1097
1098 1098 More specifically, return a list of nodes N such that every N
1099 1099 satisfies the following constraints:
1100 1100
1101 1101 1. N is an ancestor of some node in 'heads'
1102 1102 2. N is not an ancestor of any node in 'common'
1103 1103
1104 1104 The list is sorted by revision number, meaning it is
1105 1105 topologically sorted.
1106 1106
1107 1107 'heads' and 'common' are both lists of node IDs. If heads is
1108 1108 not supplied, uses all of the revlog's heads. If common is not
1109 1109 supplied, uses nullid."""
1110 1110 if common is None:
1111 1111 common = [nullid]
1112 1112 if heads is None:
1113 1113 heads = self.heads()
1114 1114
1115 1115 common = [self.rev(n) for n in common]
1116 1116 heads = [self.rev(n) for n in heads]
1117 1117
1118 1118 inc = self.incrementalmissingrevs(common=common)
1119 1119 return [self.node(r) for r in inc.missingancestors(heads)]
1120 1120
1121 1121 def nodesbetween(self, roots=None, heads=None):
1122 1122 """Return a topological path from 'roots' to 'heads'.
1123 1123
1124 1124 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1125 1125 topologically sorted list of all nodes N that satisfy both of
1126 1126 these constraints:
1127 1127
1128 1128 1. N is a descendant of some node in 'roots'
1129 1129 2. N is an ancestor of some node in 'heads'
1130 1130
1131 1131 Every node is considered to be both a descendant and an ancestor
1132 1132 of itself, so every reachable node in 'roots' and 'heads' will be
1133 1133 included in 'nodes'.
1134 1134
1135 1135 'outroots' is the list of reachable nodes in 'roots', i.e., the
1136 1136 subset of 'roots' that is returned in 'nodes'. Likewise,
1137 1137 'outheads' is the subset of 'heads' that is also in 'nodes'.
1138 1138
1139 1139 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1140 1140 unspecified, uses nullid as the only root. If 'heads' is
1141 1141 unspecified, uses list of all of the revlog's heads."""
1142 1142 nonodes = ([], [], [])
1143 1143 if roots is not None:
1144 1144 roots = list(roots)
1145 1145 if not roots:
1146 1146 return nonodes
1147 1147 lowestrev = min([self.rev(n) for n in roots])
1148 1148 else:
1149 1149 roots = [nullid] # Everybody's a descendant of nullid
1150 1150 lowestrev = nullrev
1151 1151 if (lowestrev == nullrev) and (heads is None):
1152 1152 # We want _all_ the nodes!
1153 1153 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1154 1154 if heads is None:
1155 1155 # All nodes are ancestors, so the latest ancestor is the last
1156 1156 # node.
1157 1157 highestrev = len(self) - 1
1158 1158 # Set ancestors to None to signal that every node is an ancestor.
1159 1159 ancestors = None
1160 1160 # Set heads to an empty dictionary for later discovery of heads
1161 1161 heads = {}
1162 1162 else:
1163 1163 heads = list(heads)
1164 1164 if not heads:
1165 1165 return nonodes
1166 1166 ancestors = set()
1167 1167 # Turn heads into a dictionary so we can remove 'fake' heads.
1168 1168 # Also, later we will be using it to filter out the heads we can't
1169 1169 # find from roots.
1170 1170 heads = dict.fromkeys(heads, False)
1171 1171 # Start at the top and keep marking parents until we're done.
1172 1172 nodestotag = set(heads)
1173 1173 # Remember where the top was so we can use it as a limit later.
1174 1174 highestrev = max([self.rev(n) for n in nodestotag])
1175 1175 while nodestotag:
1176 1176 # grab a node to tag
1177 1177 n = nodestotag.pop()
1178 1178 # Never tag nullid
1179 1179 if n == nullid:
1180 1180 continue
1181 1181 # A node's revision number represents its place in a
1182 1182 # topologically sorted list of nodes.
1183 1183 r = self.rev(n)
1184 1184 if r >= lowestrev:
1185 1185 if n not in ancestors:
1186 1186 # If we are possibly a descendant of one of the roots
1187 1187 # and we haven't already been marked as an ancestor
1188 1188 ancestors.add(n) # Mark as ancestor
1189 1189 # Add non-nullid parents to list of nodes to tag.
1190 1190 nodestotag.update(
1191 1191 [p for p in self.parents(n) if p != nullid]
1192 1192 )
1193 1193 elif n in heads: # We've seen it before, is it a fake head?
1194 1194 # So it is, real heads should not be the ancestors of
1195 1195 # any other heads.
1196 1196 heads.pop(n)
1197 1197 if not ancestors:
1198 1198 return nonodes
1199 1199 # Now that we have our set of ancestors, we want to remove any
1200 1200 # roots that are not ancestors.
1201 1201
1202 1202 # If one of the roots was nullid, everything is included anyway.
1203 1203 if lowestrev > nullrev:
1204 1204 # But, since we weren't, let's recompute the lowest rev to not
1205 1205 # include roots that aren't ancestors.
1206 1206
1207 1207 # Filter out roots that aren't ancestors of heads
1208 1208 roots = [root for root in roots if root in ancestors]
1209 1209 # Recompute the lowest revision
1210 1210 if roots:
1211 1211 lowestrev = min([self.rev(root) for root in roots])
1212 1212 else:
1213 1213 # No more roots? Return empty list
1214 1214 return nonodes
1215 1215 else:
1216 1216 # We are descending from nullid, and don't need to care about
1217 1217 # any other roots.
1218 1218 lowestrev = nullrev
1219 1219 roots = [nullid]
1220 1220 # Transform our roots list into a set.
1221 1221 descendants = set(roots)
1222 1222 # Also, keep the original roots so we can filter out roots that aren't
1223 1223 # 'real' roots (i.e. are descended from other roots).
1224 1224 roots = descendants.copy()
1225 1225 # Our topologically sorted list of output nodes.
1226 1226 orderedout = []
1227 1227 # Don't start at nullid since we don't want nullid in our output list,
1228 1228 # and if nullid shows up in descendants, empty parents will look like
1229 1229 # they're descendants.
1230 1230 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1231 1231 n = self.node(r)
1232 1232 isdescendant = False
1233 1233 if lowestrev == nullrev: # Everybody is a descendant of nullid
1234 1234 isdescendant = True
1235 1235 elif n in descendants:
1236 1236 # n is already a descendant
1237 1237 isdescendant = True
1238 1238 # This check only needs to be done here because all the roots
1239 1239 # will start being marked is descendants before the loop.
1240 1240 if n in roots:
1241 1241 # If n was a root, check if it's a 'real' root.
1242 1242 p = tuple(self.parents(n))
1243 1243 # If any of its parents are descendants, it's not a root.
1244 1244 if (p[0] in descendants) or (p[1] in descendants):
1245 1245 roots.remove(n)
1246 1246 else:
1247 1247 p = tuple(self.parents(n))
1248 1248 # A node is a descendant if either of its parents are
1249 1249 # descendants. (We seeded the dependents list with the roots
1250 1250 # up there, remember?)
1251 1251 if (p[0] in descendants) or (p[1] in descendants):
1252 1252 descendants.add(n)
1253 1253 isdescendant = True
1254 1254 if isdescendant and ((ancestors is None) or (n in ancestors)):
1255 1255 # Only include nodes that are both descendants and ancestors.
1256 1256 orderedout.append(n)
1257 1257 if (ancestors is not None) and (n in heads):
1258 1258 # We're trying to figure out which heads are reachable
1259 1259 # from roots.
1260 1260 # Mark this head as having been reached
1261 1261 heads[n] = True
1262 1262 elif ancestors is None:
1263 1263 # Otherwise, we're trying to discover the heads.
1264 1264 # Assume this is a head because if it isn't, the next step
1265 1265 # will eventually remove it.
1266 1266 heads[n] = True
1267 1267 # But, obviously its parents aren't.
1268 1268 for p in self.parents(n):
1269 1269 heads.pop(p, None)
1270 1270 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1271 1271 roots = list(roots)
1272 1272 assert orderedout
1273 1273 assert roots
1274 1274 assert heads
1275 1275 return (orderedout, roots, heads)
1276 1276
1277 1277 def headrevs(self, revs=None):
1278 1278 if revs is None:
1279 1279 try:
1280 1280 return self.index.headrevs()
1281 1281 except AttributeError:
1282 1282 return self._headrevs()
1283 1283 if rustdagop is not None:
1284 1284 return rustdagop.headrevs(self.index, revs)
1285 1285 return dagop.headrevs(revs, self._uncheckedparentrevs)
1286 1286
1287 1287 def computephases(self, roots):
1288 1288 return self.index.computephasesmapsets(roots)
1289 1289
1290 1290 def _headrevs(self):
1291 1291 count = len(self)
1292 1292 if not count:
1293 1293 return [nullrev]
1294 1294 # we won't iter over filtered rev so nobody is a head at start
1295 1295 ishead = [0] * (count + 1)
1296 1296 index = self.index
1297 1297 for r in self:
1298 1298 ishead[r] = 1 # I may be an head
1299 1299 e = index[r]
1300 1300 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1301 1301 return [r for r, val in enumerate(ishead) if val]
1302 1302
1303 1303 def heads(self, start=None, stop=None):
1304 1304 """return the list of all nodes that have no children
1305 1305
1306 1306 if start is specified, only heads that are descendants of
1307 1307 start will be returned
1308 1308 if stop is specified, it will consider all the revs from stop
1309 1309 as if they had no children
1310 1310 """
1311 1311 if start is None and stop is None:
1312 1312 if not len(self):
1313 1313 return [nullid]
1314 1314 return [self.node(r) for r in self.headrevs()]
1315 1315
1316 1316 if start is None:
1317 1317 start = nullrev
1318 1318 else:
1319 1319 start = self.rev(start)
1320 1320
1321 1321 stoprevs = {self.rev(n) for n in stop or []}
1322 1322
1323 1323 revs = dagop.headrevssubset(
1324 1324 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1325 1325 )
1326 1326
1327 1327 return [self.node(rev) for rev in revs]
1328 1328
1329 1329 def children(self, node):
1330 1330 """find the children of a given node"""
1331 1331 c = []
1332 1332 p = self.rev(node)
1333 1333 for r in self.revs(start=p + 1):
1334 1334 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1335 1335 if prevs:
1336 1336 for pr in prevs:
1337 1337 if pr == p:
1338 1338 c.append(self.node(r))
1339 1339 elif p == nullrev:
1340 1340 c.append(self.node(r))
1341 1341 return c
1342 1342
1343 1343 def commonancestorsheads(self, a, b):
1344 1344 """calculate all the heads of the common ancestors of nodes a and b"""
1345 1345 a, b = self.rev(a), self.rev(b)
1346 1346 ancs = self._commonancestorsheads(a, b)
1347 1347 return pycompat.maplist(self.node, ancs)
1348 1348
1349 1349 def _commonancestorsheads(self, *revs):
1350 1350 """calculate all the heads of the common ancestors of revs"""
1351 1351 try:
1352 1352 ancs = self.index.commonancestorsheads(*revs)
1353 1353 except (AttributeError, OverflowError): # C implementation failed
1354 1354 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1355 1355 return ancs
1356 1356
1357 1357 def isancestor(self, a, b):
1358 1358 """return True if node a is an ancestor of node b
1359 1359
1360 1360 A revision is considered an ancestor of itself."""
1361 1361 a, b = self.rev(a), self.rev(b)
1362 1362 return self.isancestorrev(a, b)
1363 1363
1364 1364 def isancestorrev(self, a, b):
1365 1365 """return True if revision a is an ancestor of revision b
1366 1366
1367 1367 A revision is considered an ancestor of itself.
1368 1368
1369 1369 The implementation of this is trivial but the use of
1370 1370 reachableroots is not."""
1371 1371 if a == nullrev:
1372 1372 return True
1373 1373 elif a == b:
1374 1374 return True
1375 1375 elif a > b:
1376 1376 return False
1377 1377 return bool(self.reachableroots(a, [b], [a], includepath=False))
1378 1378
1379 1379 def reachableroots(self, minroot, heads, roots, includepath=False):
1380 1380 """return (heads(::(<roots> and <roots>::<heads>)))
1381 1381
1382 1382 If includepath is True, return (<roots>::<heads>)."""
1383 1383 try:
1384 1384 return self.index.reachableroots2(
1385 1385 minroot, heads, roots, includepath
1386 1386 )
1387 1387 except AttributeError:
1388 1388 return dagop._reachablerootspure(
1389 1389 self.parentrevs, minroot, roots, heads, includepath
1390 1390 )
1391 1391
1392 1392 def ancestor(self, a, b):
1393 1393 """calculate the "best" common ancestor of nodes a and b"""
1394 1394
1395 1395 a, b = self.rev(a), self.rev(b)
1396 1396 try:
1397 1397 ancs = self.index.ancestors(a, b)
1398 1398 except (AttributeError, OverflowError):
1399 1399 ancs = ancestor.ancestors(self.parentrevs, a, b)
1400 1400 if ancs:
1401 1401 # choose a consistent winner when there's a tie
1402 1402 return min(map(self.node, ancs))
1403 1403 return nullid
1404 1404
1405 1405 def _match(self, id):
1406 1406 if isinstance(id, int):
1407 1407 # rev
1408 1408 return self.node(id)
1409 1409 if len(id) == 20:
1410 1410 # possibly a binary node
1411 1411 # odds of a binary node being all hex in ASCII are 1 in 10**25
1412 1412 try:
1413 1413 node = id
1414 1414 self.rev(node) # quick search the index
1415 1415 return node
1416 1416 except error.LookupError:
1417 1417 pass # may be partial hex id
1418 1418 try:
1419 1419 # str(rev)
1420 1420 rev = int(id)
1421 1421 if b"%d" % rev != id:
1422 1422 raise ValueError
1423 1423 if rev < 0:
1424 1424 rev = len(self) + rev
1425 1425 if rev < 0 or rev >= len(self):
1426 1426 raise ValueError
1427 1427 return self.node(rev)
1428 1428 except (ValueError, OverflowError):
1429 1429 pass
1430 1430 if len(id) == 40:
1431 1431 try:
1432 1432 # a full hex nodeid?
1433 1433 node = bin(id)
1434 1434 self.rev(node)
1435 1435 return node
1436 1436 except (TypeError, error.LookupError):
1437 1437 pass
1438 1438
1439 1439 def _partialmatch(self, id):
1440 1440 # we don't care wdirfilenodeids as they should be always full hash
1441 1441 maybewdir = wdirhex.startswith(id)
1442 1442 try:
1443 1443 partial = self.index.partialmatch(id)
1444 1444 if partial and self.hasnode(partial):
1445 1445 if maybewdir:
1446 1446 # single 'ff...' match in radix tree, ambiguous with wdir
1447 1447 raise error.RevlogError
1448 1448 return partial
1449 1449 if maybewdir:
1450 1450 # no 'ff...' match in radix tree, wdir identified
1451 1451 raise error.WdirUnsupported
1452 1452 return None
1453 1453 except error.RevlogError:
1454 1454 # parsers.c radix tree lookup gave multiple matches
1455 1455 # fast path: for unfiltered changelog, radix tree is accurate
1456 1456 if not getattr(self, 'filteredrevs', None):
1457 1457 raise error.AmbiguousPrefixLookupError(
1458 1458 id, self.indexfile, _(b'ambiguous identifier')
1459 1459 )
1460 1460 # fall through to slow path that filters hidden revisions
1461 1461 except (AttributeError, ValueError):
1462 1462 # we are pure python, or key was too short to search radix tree
1463 1463 pass
1464 1464
1465 1465 if id in self._pcache:
1466 1466 return self._pcache[id]
1467 1467
1468 1468 if len(id) <= 40:
1469 1469 try:
1470 1470 # hex(node)[:...]
1471 1471 l = len(id) // 2 # grab an even number of digits
1472 1472 prefix = bin(id[: l * 2])
1473 1473 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1474 1474 nl = [
1475 1475 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1476 1476 ]
1477 1477 if nullhex.startswith(id):
1478 1478 nl.append(nullid)
1479 1479 if len(nl) > 0:
1480 1480 if len(nl) == 1 and not maybewdir:
1481 1481 self._pcache[id] = nl[0]
1482 1482 return nl[0]
1483 1483 raise error.AmbiguousPrefixLookupError(
1484 1484 id, self.indexfile, _(b'ambiguous identifier')
1485 1485 )
1486 1486 if maybewdir:
1487 1487 raise error.WdirUnsupported
1488 1488 return None
1489 1489 except TypeError:
1490 1490 pass
1491 1491
1492 1492 def lookup(self, id):
1493 1493 """locate a node based on:
1494 1494 - revision number or str(revision number)
1495 1495 - nodeid or subset of hex nodeid
1496 1496 """
1497 1497 n = self._match(id)
1498 1498 if n is not None:
1499 1499 return n
1500 1500 n = self._partialmatch(id)
1501 1501 if n:
1502 1502 return n
1503 1503
1504 1504 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1505 1505
1506 1506 def shortest(self, node, minlength=1):
1507 1507 """Find the shortest unambiguous prefix that matches node."""
1508 1508
1509 1509 def isvalid(prefix):
1510 1510 try:
1511 1511 matchednode = self._partialmatch(prefix)
1512 1512 except error.AmbiguousPrefixLookupError:
1513 1513 return False
1514 1514 except error.WdirUnsupported:
1515 1515 # single 'ff...' match
1516 1516 return True
1517 1517 if matchednode is None:
1518 1518 raise error.LookupError(node, self.indexfile, _(b'no node'))
1519 1519 return True
1520 1520
1521 1521 def maybewdir(prefix):
1522 1522 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1523 1523
1524 1524 hexnode = hex(node)
1525 1525
1526 1526 def disambiguate(hexnode, minlength):
1527 1527 """Disambiguate against wdirid."""
1528 1528 for length in range(minlength, len(hexnode) + 1):
1529 1529 prefix = hexnode[:length]
1530 1530 if not maybewdir(prefix):
1531 1531 return prefix
1532 1532
1533 1533 if not getattr(self, 'filteredrevs', None):
1534 1534 try:
1535 1535 length = max(self.index.shortest(node), minlength)
1536 1536 return disambiguate(hexnode, length)
1537 1537 except error.RevlogError:
1538 1538 if node != wdirid:
1539 1539 raise error.LookupError(node, self.indexfile, _(b'no node'))
1540 1540 except AttributeError:
1541 1541 # Fall through to pure code
1542 1542 pass
1543 1543
1544 1544 if node == wdirid:
1545 1545 for length in range(minlength, len(hexnode) + 1):
1546 1546 prefix = hexnode[:length]
1547 1547 if isvalid(prefix):
1548 1548 return prefix
1549 1549
1550 1550 for length in range(minlength, len(hexnode) + 1):
1551 1551 prefix = hexnode[:length]
1552 1552 if isvalid(prefix):
1553 1553 return disambiguate(hexnode, length)
1554 1554
1555 1555 def cmp(self, node, text):
1556 1556 """compare text with a given file revision
1557 1557
1558 1558 returns True if text is different than what is stored.
1559 1559 """
1560 1560 p1, p2 = self.parents(node)
1561 1561 return storageutil.hashrevisionsha1(text, p1, p2) != node
1562 1562
1563 1563 def _cachesegment(self, offset, data):
1564 1564 """Add a segment to the revlog cache.
1565 1565
1566 1566 Accepts an absolute offset and the data that is at that location.
1567 1567 """
1568 1568 o, d = self._chunkcache
1569 1569 # try to add to existing cache
1570 1570 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1571 1571 self._chunkcache = o, d + data
1572 1572 else:
1573 1573 self._chunkcache = offset, data
1574 1574
1575 1575 def _readsegment(self, offset, length, df=None):
1576 1576 """Load a segment of raw data from the revlog.
1577 1577
1578 1578 Accepts an absolute offset, length to read, and an optional existing
1579 1579 file handle to read from.
1580 1580
1581 1581 If an existing file handle is passed, it will be seeked and the
1582 1582 original seek position will NOT be restored.
1583 1583
1584 1584 Returns a str or buffer of raw byte data.
1585 1585
1586 1586 Raises if the requested number of bytes could not be read.
1587 1587 """
1588 1588 # Cache data both forward and backward around the requested
1589 1589 # data, in a fixed size window. This helps speed up operations
1590 1590 # involving reading the revlog backwards.
1591 1591 cachesize = self._chunkcachesize
1592 1592 realoffset = offset & ~(cachesize - 1)
1593 1593 reallength = (
1594 1594 (offset + length + cachesize) & ~(cachesize - 1)
1595 1595 ) - realoffset
1596 1596 with self._datareadfp(df) as df:
1597 1597 df.seek(realoffset)
1598 1598 d = df.read(reallength)
1599 1599
1600 1600 self._cachesegment(realoffset, d)
1601 1601 if offset != realoffset or reallength != length:
1602 1602 startoffset = offset - realoffset
1603 1603 if len(d) - startoffset < length:
1604 1604 raise error.RevlogError(
1605 1605 _(
1606 1606 b'partial read of revlog %s; expected %d bytes from '
1607 1607 b'offset %d, got %d'
1608 1608 )
1609 1609 % (
1610 1610 self.indexfile if self._inline else self.datafile,
1611 1611 length,
1612 1612 realoffset,
1613 1613 len(d) - startoffset,
1614 1614 )
1615 1615 )
1616 1616
1617 1617 return util.buffer(d, startoffset, length)
1618 1618
1619 1619 if len(d) < length:
1620 1620 raise error.RevlogError(
1621 1621 _(
1622 1622 b'partial read of revlog %s; expected %d bytes from offset '
1623 1623 b'%d, got %d'
1624 1624 )
1625 1625 % (
1626 1626 self.indexfile if self._inline else self.datafile,
1627 1627 length,
1628 1628 offset,
1629 1629 len(d),
1630 1630 )
1631 1631 )
1632 1632
1633 1633 return d
1634 1634
1635 1635 def _getsegment(self, offset, length, df=None):
1636 1636 """Obtain a segment of raw data from the revlog.
1637 1637
1638 1638 Accepts an absolute offset, length of bytes to obtain, and an
1639 1639 optional file handle to the already-opened revlog. If the file
1640 1640 handle is used, it's original seek position will not be preserved.
1641 1641
1642 1642 Requests for data may be returned from a cache.
1643 1643
1644 1644 Returns a str or a buffer instance of raw byte data.
1645 1645 """
1646 1646 o, d = self._chunkcache
1647 1647 l = len(d)
1648 1648
1649 1649 # is it in the cache?
1650 1650 cachestart = offset - o
1651 1651 cacheend = cachestart + length
1652 1652 if cachestart >= 0 and cacheend <= l:
1653 1653 if cachestart == 0 and cacheend == l:
1654 1654 return d # avoid a copy
1655 1655 return util.buffer(d, cachestart, cacheend - cachestart)
1656 1656
1657 1657 return self._readsegment(offset, length, df=df)
1658 1658
1659 1659 def _getsegmentforrevs(self, startrev, endrev, df=None):
1660 1660 """Obtain a segment of raw data corresponding to a range of revisions.
1661 1661
1662 1662 Accepts the start and end revisions and an optional already-open
1663 1663 file handle to be used for reading. If the file handle is read, its
1664 1664 seek position will not be preserved.
1665 1665
1666 1666 Requests for data may be satisfied by a cache.
1667 1667
1668 1668 Returns a 2-tuple of (offset, data) for the requested range of
1669 1669 revisions. Offset is the integer offset from the beginning of the
1670 1670 revlog and data is a str or buffer of the raw byte data.
1671 1671
1672 1672 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1673 1673 to determine where each revision's data begins and ends.
1674 1674 """
1675 1675 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1676 1676 # (functions are expensive).
1677 1677 index = self.index
1678 1678 istart = index[startrev]
1679 1679 start = int(istart[0] >> 16)
1680 1680 if startrev == endrev:
1681 1681 end = start + istart[1]
1682 1682 else:
1683 1683 iend = index[endrev]
1684 1684 end = int(iend[0] >> 16) + iend[1]
1685 1685
1686 1686 if self._inline:
1687 1687 start += (startrev + 1) * self._io.size
1688 1688 end += (endrev + 1) * self._io.size
1689 1689 length = end - start
1690 1690
1691 1691 return start, self._getsegment(start, length, df=df)
1692 1692
1693 1693 def _chunk(self, rev, df=None):
1694 1694 """Obtain a single decompressed chunk for a revision.
1695 1695
1696 1696 Accepts an integer revision and an optional already-open file handle
1697 1697 to be used for reading. If used, the seek position of the file will not
1698 1698 be preserved.
1699 1699
1700 1700 Returns a str holding uncompressed data for the requested revision.
1701 1701 """
1702 1702 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1703 1703
1704 1704 def _chunks(self, revs, df=None, targetsize=None):
1705 1705 """Obtain decompressed chunks for the specified revisions.
1706 1706
1707 1707 Accepts an iterable of numeric revisions that are assumed to be in
1708 1708 ascending order. Also accepts an optional already-open file handle
1709 1709 to be used for reading. If used, the seek position of the file will
1710 1710 not be preserved.
1711 1711
1712 1712 This function is similar to calling ``self._chunk()`` multiple times,
1713 1713 but is faster.
1714 1714
1715 1715 Returns a list with decompressed data for each requested revision.
1716 1716 """
1717 1717 if not revs:
1718 1718 return []
1719 1719 start = self.start
1720 1720 length = self.length
1721 1721 inline = self._inline
1722 1722 iosize = self._io.size
1723 1723 buffer = util.buffer
1724 1724
1725 1725 l = []
1726 1726 ladd = l.append
1727 1727
1728 1728 if not self._withsparseread:
1729 1729 slicedchunks = (revs,)
1730 1730 else:
1731 1731 slicedchunks = deltautil.slicechunk(
1732 1732 self, revs, targetsize=targetsize
1733 1733 )
1734 1734
1735 1735 for revschunk in slicedchunks:
1736 1736 firstrev = revschunk[0]
1737 1737 # Skip trailing revisions with empty diff
1738 1738 for lastrev in revschunk[::-1]:
1739 1739 if length(lastrev) != 0:
1740 1740 break
1741 1741
1742 1742 try:
1743 1743 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1744 1744 except OverflowError:
1745 1745 # issue4215 - we can't cache a run of chunks greater than
1746 1746 # 2G on Windows
1747 1747 return [self._chunk(rev, df=df) for rev in revschunk]
1748 1748
1749 1749 decomp = self.decompress
1750 1750 for rev in revschunk:
1751 1751 chunkstart = start(rev)
1752 1752 if inline:
1753 1753 chunkstart += (rev + 1) * iosize
1754 1754 chunklength = length(rev)
1755 1755 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1756 1756
1757 1757 return l
1758 1758
1759 1759 def _chunkclear(self):
1760 1760 """Clear the raw chunk cache."""
1761 1761 self._chunkcache = (0, b'')
1762 1762
1763 1763 def deltaparent(self, rev):
1764 1764 """return deltaparent of the given revision"""
1765 1765 base = self.index[rev][3]
1766 1766 if base == rev:
1767 1767 return nullrev
1768 1768 elif self._generaldelta:
1769 1769 return base
1770 1770 else:
1771 1771 return rev - 1
1772 1772
1773 1773 def issnapshot(self, rev):
1774 1774 """tells whether rev is a snapshot
1775 1775 """
1776 1776 if not self._sparserevlog:
1777 1777 return self.deltaparent(rev) == nullrev
1778 1778 elif util.safehasattr(self.index, b'issnapshot'):
1779 1779 # directly assign the method to cache the testing and access
1780 1780 self.issnapshot = self.index.issnapshot
1781 1781 return self.issnapshot(rev)
1782 1782 if rev == nullrev:
1783 1783 return True
1784 1784 entry = self.index[rev]
1785 1785 base = entry[3]
1786 1786 if base == rev:
1787 1787 return True
1788 1788 if base == nullrev:
1789 1789 return True
1790 1790 p1 = entry[5]
1791 1791 p2 = entry[6]
1792 1792 if base == p1 or base == p2:
1793 1793 return False
1794 1794 return self.issnapshot(base)
1795 1795
1796 1796 def snapshotdepth(self, rev):
1797 1797 """number of snapshot in the chain before this one"""
1798 1798 if not self.issnapshot(rev):
1799 1799 raise error.ProgrammingError(b'revision %d not a snapshot')
1800 1800 return len(self._deltachain(rev)[0]) - 1
1801 1801
1802 1802 def revdiff(self, rev1, rev2):
1803 1803 """return or calculate a delta between two revisions
1804 1804
1805 1805 The delta calculated is in binary form and is intended to be written to
1806 1806 revlog data directly. So this function needs raw revision data.
1807 1807 """
1808 1808 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1809 1809 return bytes(self._chunk(rev2))
1810 1810
1811 1811 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1812 1812
1813 1813 def _processflags(self, text, flags, operation, raw=False):
1814 1814 """deprecated entry point to access flag processors"""
1815 1815 msg = b'_processflag(...) use the specialized variant'
1816 1816 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1817 1817 if raw:
1818 1818 return text, flagutil.processflagsraw(self, text, flags)
1819 1819 elif operation == b'read':
1820 1820 return flagutil.processflagsread(self, text, flags)
1821 1821 else: # write operation
1822 1822 return flagutil.processflagswrite(self, text, flags, None)
1823 1823
1824 1824 def revision(self, nodeorrev, _df=None, raw=False):
1825 1825 """return an uncompressed revision of a given node or revision
1826 1826 number.
1827 1827
1828 1828 _df - an existing file handle to read from. (internal-only)
1829 1829 raw - an optional argument specifying if the revision data is to be
1830 1830 treated as raw data when applying flag transforms. 'raw' should be set
1831 1831 to True when generating changegroups or in debug commands.
1832 1832 """
1833 1833 if raw:
1834 1834 msg = (
1835 1835 b'revlog.revision(..., raw=True) is deprecated, '
1836 1836 b'use revlog.rawdata(...)'
1837 1837 )
1838 1838 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1839 1839 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1840 1840
1841 1841 def sidedata(self, nodeorrev, _df=None):
1842 1842 """a map of extra data related to the changeset but not part of the hash
1843 1843
1844 1844 This function currently return a dictionary. However, more advanced
1845 1845 mapping object will likely be used in the future for a more
1846 1846 efficient/lazy code.
1847 1847 """
1848 1848 return self._revisiondata(nodeorrev, _df)[1]
1849 1849
1850 1850 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1851 1851 # deal with <nodeorrev> argument type
1852 1852 if isinstance(nodeorrev, int):
1853 1853 rev = nodeorrev
1854 1854 node = self.node(rev)
1855 1855 else:
1856 1856 node = nodeorrev
1857 1857 rev = None
1858 1858
1859 1859 # fast path the special `nullid` rev
1860 1860 if node == nullid:
1861 1861 return b"", {}
1862 1862
1863 1863 # ``rawtext`` is the text as stored inside the revlog. Might be the
1864 1864 # revision or might need to be processed to retrieve the revision.
1865 1865 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1866 1866
1867 1867 if raw and validated:
1868 1868 # if we don't want to process the raw text and that raw
1869 1869 # text is cached, we can exit early.
1870 1870 return rawtext, {}
1871 1871 if rev is None:
1872 1872 rev = self.rev(node)
1873 1873 # the revlog's flag for this revision
1874 1874 # (usually alter its state or content)
1875 1875 flags = self.flags(rev)
1876 1876
1877 1877 if validated and flags == REVIDX_DEFAULT_FLAGS:
1878 1878 # no extra flags set, no flag processor runs, text = rawtext
1879 1879 return rawtext, {}
1880 1880
1881 1881 sidedata = {}
1882 1882 if raw:
1883 1883 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1884 1884 text = rawtext
1885 1885 else:
1886 1886 try:
1887 1887 r = flagutil.processflagsread(self, rawtext, flags)
1888 1888 except error.SidedataHashError as exc:
1889 1889 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1890 1890 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1891 1891 raise error.RevlogError(msg)
1892 1892 text, validatehash, sidedata = r
1893 1893 if validatehash:
1894 1894 self.checkhash(text, node, rev=rev)
1895 1895 if not validated:
1896 1896 self._revisioncache = (node, rev, rawtext)
1897 1897
1898 1898 return text, sidedata
1899 1899
1900 1900 def _rawtext(self, node, rev, _df=None):
1901 1901 """return the possibly unvalidated rawtext for a revision
1902 1902
1903 1903 returns (rev, rawtext, validated)
1904 1904 """
1905 1905
1906 1906 # revision in the cache (could be useful to apply delta)
1907 1907 cachedrev = None
1908 1908 # An intermediate text to apply deltas to
1909 1909 basetext = None
1910 1910
1911 1911 # Check if we have the entry in cache
1912 1912 # The cache entry looks like (node, rev, rawtext)
1913 1913 if self._revisioncache:
1914 1914 if self._revisioncache[0] == node:
1915 1915 return (rev, self._revisioncache[2], True)
1916 1916 cachedrev = self._revisioncache[1]
1917 1917
1918 1918 if rev is None:
1919 1919 rev = self.rev(node)
1920 1920
1921 1921 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1922 1922 if stopped:
1923 1923 basetext = self._revisioncache[2]
1924 1924
1925 1925 # drop cache to save memory, the caller is expected to
1926 1926 # update self._revisioncache after validating the text
1927 1927 self._revisioncache = None
1928 1928
1929 1929 targetsize = None
1930 1930 rawsize = self.index[rev][2]
1931 1931 if 0 <= rawsize:
1932 1932 targetsize = 4 * rawsize
1933 1933
1934 1934 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1935 1935 if basetext is None:
1936 1936 basetext = bytes(bins[0])
1937 1937 bins = bins[1:]
1938 1938
1939 1939 rawtext = mdiff.patches(basetext, bins)
1940 1940 del basetext # let us have a chance to free memory early
1941 1941 return (rev, rawtext, False)
1942 1942
1943 1943 def rawdata(self, nodeorrev, _df=None):
1944 1944 """return an uncompressed raw data of a given node or revision number.
1945 1945
1946 1946 _df - an existing file handle to read from. (internal-only)
1947 1947 """
1948 1948 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1949 1949
1950 1950 def hash(self, text, p1, p2):
1951 1951 """Compute a node hash.
1952 1952
1953 1953 Available as a function so that subclasses can replace the hash
1954 1954 as needed.
1955 1955 """
1956 1956 return storageutil.hashrevisionsha1(text, p1, p2)
1957 1957
1958 1958 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1959 1959 """Check node hash integrity.
1960 1960
1961 1961 Available as a function so that subclasses can extend hash mismatch
1962 1962 behaviors as needed.
1963 1963 """
1964 1964 try:
1965 1965 if p1 is None and p2 is None:
1966 1966 p1, p2 = self.parents(node)
1967 1967 if node != self.hash(text, p1, p2):
1968 1968 # Clear the revision cache on hash failure. The revision cache
1969 1969 # only stores the raw revision and clearing the cache does have
1970 1970 # the side-effect that we won't have a cache hit when the raw
1971 1971 # revision data is accessed. But this case should be rare and
1972 1972 # it is extra work to teach the cache about the hash
1973 1973 # verification state.
1974 1974 if self._revisioncache and self._revisioncache[0] == node:
1975 1975 self._revisioncache = None
1976 1976
1977 1977 revornode = rev
1978 1978 if revornode is None:
1979 1979 revornode = templatefilters.short(hex(node))
1980 1980 raise error.RevlogError(
1981 1981 _(b"integrity check failed on %s:%s")
1982 1982 % (self.indexfile, pycompat.bytestr(revornode))
1983 1983 )
1984 1984 except error.RevlogError:
1985 1985 if self._censorable and storageutil.iscensoredtext(text):
1986 1986 raise error.CensoredNodeError(self.indexfile, node, text)
1987 1987 raise
1988 1988
1989 1989 def _enforceinlinesize(self, tr, fp=None):
1990 1990 """Check if the revlog is too big for inline and convert if so.
1991 1991
1992 1992 This should be called after revisions are added to the revlog. If the
1993 1993 revlog has grown too large to be an inline revlog, it will convert it
1994 1994 to use multiple index and data files.
1995 1995 """
1996 1996 tiprev = len(self) - 1
1997 1997 if (
1998 1998 not self._inline
1999 1999 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2000 2000 ):
2001 2001 return
2002 2002
2003 trinfo = tr.find(self.indexfile)
2004 if trinfo is None:
2003 troffset = tr.findoffset(self.indexfile)
2004 if troffset is None:
2005 2005 raise error.RevlogError(
2006 2006 _(b"%s not found in the transaction") % self.indexfile
2007 2007 )
2008 troffset = trinfo[1]
2009 2008 trindex = 0
2010 2009 tr.add(self.datafile, 0)
2011 2010
2012 2011 if fp:
2013 2012 fp.flush()
2014 2013 fp.close()
2015 2014 # We can't use the cached file handle after close(). So prevent
2016 2015 # its usage.
2017 2016 self._writinghandles = None
2018 2017
2019 2018 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2020 2019 for r in self:
2021 2020 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2022 2021 if troffset <= self.start(r):
2023 2022 trindex = r
2024 2023
2025 2024 with self._indexfp(b'w') as fp:
2026 2025 self.version &= ~FLAG_INLINE_DATA
2027 2026 self._inline = False
2028 2027 io = self._io
2029 2028 for i in self:
2030 2029 e = io.packentry(self.index[i], self.node, self.version, i)
2031 2030 fp.write(e)
2032 2031
2033 2032 # the temp file replace the real index when we exit the context
2034 2033 # manager
2035 2034
2036 2035 tr.replace(self.indexfile, trindex * self._io.size)
2037 2036 nodemaputil.setup_persistent_nodemap(tr, self)
2038 2037 self._chunkclear()
2039 2038
2040 2039 def _nodeduplicatecallback(self, transaction, node):
2041 2040 """called when trying to add a node already stored.
2042 2041 """
2043 2042
2044 2043 def addrevision(
2045 2044 self,
2046 2045 text,
2047 2046 transaction,
2048 2047 link,
2049 2048 p1,
2050 2049 p2,
2051 2050 cachedelta=None,
2052 2051 node=None,
2053 2052 flags=REVIDX_DEFAULT_FLAGS,
2054 2053 deltacomputer=None,
2055 2054 sidedata=None,
2056 2055 ):
2057 2056 """add a revision to the log
2058 2057
2059 2058 text - the revision data to add
2060 2059 transaction - the transaction object used for rollback
2061 2060 link - the linkrev data to add
2062 2061 p1, p2 - the parent nodeids of the revision
2063 2062 cachedelta - an optional precomputed delta
2064 2063 node - nodeid of revision; typically node is not specified, and it is
2065 2064 computed by default as hash(text, p1, p2), however subclasses might
2066 2065 use different hashing method (and override checkhash() in such case)
2067 2066 flags - the known flags to set on the revision
2068 2067 deltacomputer - an optional deltacomputer instance shared between
2069 2068 multiple calls
2070 2069 """
2071 2070 if link == nullrev:
2072 2071 raise error.RevlogError(
2073 2072 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2074 2073 )
2075 2074
2076 2075 if sidedata is None:
2077 2076 sidedata = {}
2078 2077 flags = flags & ~REVIDX_SIDEDATA
2079 2078 elif not self.hassidedata:
2080 2079 raise error.ProgrammingError(
2081 2080 _(b"trying to add sidedata to a revlog who don't support them")
2082 2081 )
2083 2082 else:
2084 2083 flags |= REVIDX_SIDEDATA
2085 2084
2086 2085 if flags:
2087 2086 node = node or self.hash(text, p1, p2)
2088 2087
2089 2088 rawtext, validatehash = flagutil.processflagswrite(
2090 2089 self, text, flags, sidedata=sidedata
2091 2090 )
2092 2091
2093 2092 # If the flag processor modifies the revision data, ignore any provided
2094 2093 # cachedelta.
2095 2094 if rawtext != text:
2096 2095 cachedelta = None
2097 2096
2098 2097 if len(rawtext) > _maxentrysize:
2099 2098 raise error.RevlogError(
2100 2099 _(
2101 2100 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2102 2101 )
2103 2102 % (self.indexfile, len(rawtext))
2104 2103 )
2105 2104
2106 2105 node = node or self.hash(rawtext, p1, p2)
2107 2106 if self.index.has_node(node):
2108 2107 return node
2109 2108
2110 2109 if validatehash:
2111 2110 self.checkhash(rawtext, node, p1=p1, p2=p2)
2112 2111
2113 2112 return self.addrawrevision(
2114 2113 rawtext,
2115 2114 transaction,
2116 2115 link,
2117 2116 p1,
2118 2117 p2,
2119 2118 node,
2120 2119 flags,
2121 2120 cachedelta=cachedelta,
2122 2121 deltacomputer=deltacomputer,
2123 2122 )
2124 2123
2125 2124 def addrawrevision(
2126 2125 self,
2127 2126 rawtext,
2128 2127 transaction,
2129 2128 link,
2130 2129 p1,
2131 2130 p2,
2132 2131 node,
2133 2132 flags,
2134 2133 cachedelta=None,
2135 2134 deltacomputer=None,
2136 2135 ):
2137 2136 """add a raw revision with known flags, node and parents
2138 2137 useful when reusing a revision not stored in this revlog (ex: received
2139 2138 over wire, or read from an external bundle).
2140 2139 """
2141 2140 dfh = None
2142 2141 if not self._inline:
2143 2142 dfh = self._datafp(b"a+")
2144 2143 ifh = self._indexfp(b"a+")
2145 2144 try:
2146 2145 return self._addrevision(
2147 2146 node,
2148 2147 rawtext,
2149 2148 transaction,
2150 2149 link,
2151 2150 p1,
2152 2151 p2,
2153 2152 flags,
2154 2153 cachedelta,
2155 2154 ifh,
2156 2155 dfh,
2157 2156 deltacomputer=deltacomputer,
2158 2157 )
2159 2158 finally:
2160 2159 if dfh:
2161 2160 dfh.close()
2162 2161 ifh.close()
2163 2162
2164 2163 def compress(self, data):
2165 2164 """Generate a possibly-compressed representation of data."""
2166 2165 if not data:
2167 2166 return b'', data
2168 2167
2169 2168 compressed = self._compressor.compress(data)
2170 2169
2171 2170 if compressed:
2172 2171 # The revlog compressor added the header in the returned data.
2173 2172 return b'', compressed
2174 2173
2175 2174 if data[0:1] == b'\0':
2176 2175 return b'', data
2177 2176 return b'u', data
2178 2177
2179 2178 def decompress(self, data):
2180 2179 """Decompress a revlog chunk.
2181 2180
2182 2181 The chunk is expected to begin with a header identifying the
2183 2182 format type so it can be routed to an appropriate decompressor.
2184 2183 """
2185 2184 if not data:
2186 2185 return data
2187 2186
2188 2187 # Revlogs are read much more frequently than they are written and many
2189 2188 # chunks only take microseconds to decompress, so performance is
2190 2189 # important here.
2191 2190 #
2192 2191 # We can make a few assumptions about revlogs:
2193 2192 #
2194 2193 # 1) the majority of chunks will be compressed (as opposed to inline
2195 2194 # raw data).
2196 2195 # 2) decompressing *any* data will likely by at least 10x slower than
2197 2196 # returning raw inline data.
2198 2197 # 3) we want to prioritize common and officially supported compression
2199 2198 # engines
2200 2199 #
2201 2200 # It follows that we want to optimize for "decompress compressed data
2202 2201 # when encoded with common and officially supported compression engines"
2203 2202 # case over "raw data" and "data encoded by less common or non-official
2204 2203 # compression engines." That is why we have the inline lookup first
2205 2204 # followed by the compengines lookup.
2206 2205 #
2207 2206 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2208 2207 # compressed chunks. And this matters for changelog and manifest reads.
2209 2208 t = data[0:1]
2210 2209
2211 2210 if t == b'x':
2212 2211 try:
2213 2212 return _zlibdecompress(data)
2214 2213 except zlib.error as e:
2215 2214 raise error.RevlogError(
2216 2215 _(b'revlog decompress error: %s')
2217 2216 % stringutil.forcebytestr(e)
2218 2217 )
2219 2218 # '\0' is more common than 'u' so it goes first.
2220 2219 elif t == b'\0':
2221 2220 return data
2222 2221 elif t == b'u':
2223 2222 return util.buffer(data, 1)
2224 2223
2225 2224 try:
2226 2225 compressor = self._decompressors[t]
2227 2226 except KeyError:
2228 2227 try:
2229 2228 engine = util.compengines.forrevlogheader(t)
2230 2229 compressor = engine.revlogcompressor(self._compengineopts)
2231 2230 self._decompressors[t] = compressor
2232 2231 except KeyError:
2233 2232 raise error.RevlogError(_(b'unknown compression type %r') % t)
2234 2233
2235 2234 return compressor.decompress(data)
2236 2235
2237 2236 def _addrevision(
2238 2237 self,
2239 2238 node,
2240 2239 rawtext,
2241 2240 transaction,
2242 2241 link,
2243 2242 p1,
2244 2243 p2,
2245 2244 flags,
2246 2245 cachedelta,
2247 2246 ifh,
2248 2247 dfh,
2249 2248 alwayscache=False,
2250 2249 deltacomputer=None,
2251 2250 ):
2252 2251 """internal function to add revisions to the log
2253 2252
2254 2253 see addrevision for argument descriptions.
2255 2254
2256 2255 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2257 2256
2258 2257 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2259 2258 be used.
2260 2259
2261 2260 invariants:
2262 2261 - rawtext is optional (can be None); if not set, cachedelta must be set.
2263 2262 if both are set, they must correspond to each other.
2264 2263 """
2265 2264 if node == nullid:
2266 2265 raise error.RevlogError(
2267 2266 _(b"%s: attempt to add null revision") % self.indexfile
2268 2267 )
2269 2268 if node == wdirid or node in wdirfilenodeids:
2270 2269 raise error.RevlogError(
2271 2270 _(b"%s: attempt to add wdir revision") % self.indexfile
2272 2271 )
2273 2272
2274 2273 if self._inline:
2275 2274 fh = ifh
2276 2275 else:
2277 2276 fh = dfh
2278 2277
2279 2278 btext = [rawtext]
2280 2279
2281 2280 curr = len(self)
2282 2281 prev = curr - 1
2283 2282 offset = self.end(prev)
2284 2283 p1r, p2r = self.rev(p1), self.rev(p2)
2285 2284
2286 2285 # full versions are inserted when the needed deltas
2287 2286 # become comparable to the uncompressed text
2288 2287 if rawtext is None:
2289 2288 # need rawtext size, before changed by flag processors, which is
2290 2289 # the non-raw size. use revlog explicitly to avoid filelog's extra
2291 2290 # logic that might remove metadata size.
2292 2291 textlen = mdiff.patchedsize(
2293 2292 revlog.size(self, cachedelta[0]), cachedelta[1]
2294 2293 )
2295 2294 else:
2296 2295 textlen = len(rawtext)
2297 2296
2298 2297 if deltacomputer is None:
2299 2298 deltacomputer = deltautil.deltacomputer(self)
2300 2299
2301 2300 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2302 2301
2303 2302 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2304 2303
2305 2304 e = (
2306 2305 offset_type(offset, flags),
2307 2306 deltainfo.deltalen,
2308 2307 textlen,
2309 2308 deltainfo.base,
2310 2309 link,
2311 2310 p1r,
2312 2311 p2r,
2313 2312 node,
2314 2313 )
2315 2314 self.index.append(e)
2316 2315
2317 2316 entry = self._io.packentry(e, self.node, self.version, curr)
2318 2317 self._writeentry(
2319 2318 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2320 2319 )
2321 2320
2322 2321 rawtext = btext[0]
2323 2322
2324 2323 if alwayscache and rawtext is None:
2325 2324 rawtext = deltacomputer.buildtext(revinfo, fh)
2326 2325
2327 2326 if type(rawtext) == bytes: # only accept immutable objects
2328 2327 self._revisioncache = (node, curr, rawtext)
2329 2328 self._chainbasecache[curr] = deltainfo.chainbase
2330 2329 return node
2331 2330
2332 2331 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2333 2332 # Files opened in a+ mode have inconsistent behavior on various
2334 2333 # platforms. Windows requires that a file positioning call be made
2335 2334 # when the file handle transitions between reads and writes. See
2336 2335 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2337 2336 # platforms, Python or the platform itself can be buggy. Some versions
2338 2337 # of Solaris have been observed to not append at the end of the file
2339 2338 # if the file was seeked to before the end. See issue4943 for more.
2340 2339 #
2341 2340 # We work around this issue by inserting a seek() before writing.
2342 2341 # Note: This is likely not necessary on Python 3. However, because
2343 2342 # the file handle is reused for reads and may be seeked there, we need
2344 2343 # to be careful before changing this.
2345 2344 ifh.seek(0, os.SEEK_END)
2346 2345 if dfh:
2347 2346 dfh.seek(0, os.SEEK_END)
2348 2347
2349 2348 curr = len(self) - 1
2350 2349 if not self._inline:
2351 2350 transaction.add(self.datafile, offset)
2352 2351 transaction.add(self.indexfile, curr * len(entry))
2353 2352 if data[0]:
2354 2353 dfh.write(data[0])
2355 2354 dfh.write(data[1])
2356 2355 ifh.write(entry)
2357 2356 else:
2358 2357 offset += curr * self._io.size
2359 2358 transaction.add(self.indexfile, offset)
2360 2359 ifh.write(entry)
2361 2360 ifh.write(data[0])
2362 2361 ifh.write(data[1])
2363 2362 self._enforceinlinesize(transaction, ifh)
2364 2363 nodemaputil.setup_persistent_nodemap(transaction, self)
2365 2364
2366 2365 def addgroup(
2367 2366 self,
2368 2367 deltas,
2369 2368 linkmapper,
2370 2369 transaction,
2371 2370 addrevisioncb=None,
2372 2371 duplicaterevisioncb=None,
2373 2372 ):
2374 2373 """
2375 2374 add a delta group
2376 2375
2377 2376 given a set of deltas, add them to the revision log. the
2378 2377 first delta is against its parent, which should be in our
2379 2378 log, the rest are against the previous delta.
2380 2379
2381 2380 If ``addrevisioncb`` is defined, it will be called with arguments of
2382 2381 this revlog and the node that was added.
2383 2382 """
2384 2383
2385 2384 if self._writinghandles:
2386 2385 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2387 2386
2388 2387 r = len(self)
2389 2388 end = 0
2390 2389 if r:
2391 2390 end = self.end(r - 1)
2392 2391 ifh = self._indexfp(b"a+")
2393 2392 isize = r * self._io.size
2394 2393 if self._inline:
2395 2394 transaction.add(self.indexfile, end + isize)
2396 2395 dfh = None
2397 2396 else:
2398 2397 transaction.add(self.indexfile, isize)
2399 2398 transaction.add(self.datafile, end)
2400 2399 dfh = self._datafp(b"a+")
2401 2400
2402 2401 def flush():
2403 2402 if dfh:
2404 2403 dfh.flush()
2405 2404 ifh.flush()
2406 2405
2407 2406 self._writinghandles = (ifh, dfh)
2408 2407 empty = True
2409 2408
2410 2409 try:
2411 2410 deltacomputer = deltautil.deltacomputer(self)
2412 2411 # loop through our set of deltas
2413 2412 for data in deltas:
2414 2413 node, p1, p2, linknode, deltabase, delta, flags = data
2415 2414 link = linkmapper(linknode)
2416 2415 flags = flags or REVIDX_DEFAULT_FLAGS
2417 2416
2418 2417 if self.index.has_node(node):
2419 2418 # this can happen if two branches make the same change
2420 2419 self._nodeduplicatecallback(transaction, node)
2421 2420 if duplicaterevisioncb:
2422 2421 duplicaterevisioncb(self, node)
2423 2422 empty = False
2424 2423 continue
2425 2424
2426 2425 for p in (p1, p2):
2427 2426 if not self.index.has_node(p):
2428 2427 raise error.LookupError(
2429 2428 p, self.indexfile, _(b'unknown parent')
2430 2429 )
2431 2430
2432 2431 if not self.index.has_node(deltabase):
2433 2432 raise error.LookupError(
2434 2433 deltabase, self.indexfile, _(b'unknown delta base')
2435 2434 )
2436 2435
2437 2436 baserev = self.rev(deltabase)
2438 2437
2439 2438 if baserev != nullrev and self.iscensored(baserev):
2440 2439 # if base is censored, delta must be full replacement in a
2441 2440 # single patch operation
2442 2441 hlen = struct.calcsize(b">lll")
2443 2442 oldlen = self.rawsize(baserev)
2444 2443 newlen = len(delta) - hlen
2445 2444 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2446 2445 raise error.CensoredBaseError(
2447 2446 self.indexfile, self.node(baserev)
2448 2447 )
2449 2448
2450 2449 if not flags and self._peek_iscensored(baserev, delta, flush):
2451 2450 flags |= REVIDX_ISCENSORED
2452 2451
2453 2452 # We assume consumers of addrevisioncb will want to retrieve
2454 2453 # the added revision, which will require a call to
2455 2454 # revision(). revision() will fast path if there is a cache
2456 2455 # hit. So, we tell _addrevision() to always cache in this case.
2457 2456 # We're only using addgroup() in the context of changegroup
2458 2457 # generation so the revision data can always be handled as raw
2459 2458 # by the flagprocessor.
2460 2459 self._addrevision(
2461 2460 node,
2462 2461 None,
2463 2462 transaction,
2464 2463 link,
2465 2464 p1,
2466 2465 p2,
2467 2466 flags,
2468 2467 (baserev, delta),
2469 2468 ifh,
2470 2469 dfh,
2471 2470 alwayscache=bool(addrevisioncb),
2472 2471 deltacomputer=deltacomputer,
2473 2472 )
2474 2473
2475 2474 if addrevisioncb:
2476 2475 addrevisioncb(self, node)
2477 2476 empty = False
2478 2477
2479 2478 if not dfh and not self._inline:
2480 2479 # addrevision switched from inline to conventional
2481 2480 # reopen the index
2482 2481 ifh.close()
2483 2482 dfh = self._datafp(b"a+")
2484 2483 ifh = self._indexfp(b"a+")
2485 2484 self._writinghandles = (ifh, dfh)
2486 2485 finally:
2487 2486 self._writinghandles = None
2488 2487
2489 2488 if dfh:
2490 2489 dfh.close()
2491 2490 ifh.close()
2492 2491 return not empty
2493 2492
2494 2493 def iscensored(self, rev):
2495 2494 """Check if a file revision is censored."""
2496 2495 if not self._censorable:
2497 2496 return False
2498 2497
2499 2498 return self.flags(rev) & REVIDX_ISCENSORED
2500 2499
2501 2500 def _peek_iscensored(self, baserev, delta, flush):
2502 2501 """Quickly check if a delta produces a censored revision."""
2503 2502 if not self._censorable:
2504 2503 return False
2505 2504
2506 2505 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2507 2506
2508 2507 def getstrippoint(self, minlink):
2509 2508 """find the minimum rev that must be stripped to strip the linkrev
2510 2509
2511 2510 Returns a tuple containing the minimum rev and a set of all revs that
2512 2511 have linkrevs that will be broken by this strip.
2513 2512 """
2514 2513 return storageutil.resolvestripinfo(
2515 2514 minlink,
2516 2515 len(self) - 1,
2517 2516 self.headrevs(),
2518 2517 self.linkrev,
2519 2518 self.parentrevs,
2520 2519 )
2521 2520
2522 2521 def strip(self, minlink, transaction):
2523 2522 """truncate the revlog on the first revision with a linkrev >= minlink
2524 2523
2525 2524 This function is called when we're stripping revision minlink and
2526 2525 its descendants from the repository.
2527 2526
2528 2527 We have to remove all revisions with linkrev >= minlink, because
2529 2528 the equivalent changelog revisions will be renumbered after the
2530 2529 strip.
2531 2530
2532 2531 So we truncate the revlog on the first of these revisions, and
2533 2532 trust that the caller has saved the revisions that shouldn't be
2534 2533 removed and that it'll re-add them after this truncation.
2535 2534 """
2536 2535 if len(self) == 0:
2537 2536 return
2538 2537
2539 2538 rev, _ = self.getstrippoint(minlink)
2540 2539 if rev == len(self):
2541 2540 return
2542 2541
2543 2542 # first truncate the files on disk
2544 2543 end = self.start(rev)
2545 2544 if not self._inline:
2546 2545 transaction.add(self.datafile, end)
2547 2546 end = rev * self._io.size
2548 2547 else:
2549 2548 end += rev * self._io.size
2550 2549
2551 2550 transaction.add(self.indexfile, end)
2552 2551
2553 2552 # then reset internal state in memory to forget those revisions
2554 2553 self._revisioncache = None
2555 2554 self._chaininfocache = util.lrucachedict(500)
2556 2555 self._chunkclear()
2557 2556
2558 2557 del self.index[rev:-1]
2559 2558
2560 2559 def checksize(self):
2561 2560 """Check size of index and data files
2562 2561
2563 2562 return a (dd, di) tuple.
2564 2563 - dd: extra bytes for the "data" file
2565 2564 - di: extra bytes for the "index" file
2566 2565
2567 2566 A healthy revlog will return (0, 0).
2568 2567 """
2569 2568 expected = 0
2570 2569 if len(self):
2571 2570 expected = max(0, self.end(len(self) - 1))
2572 2571
2573 2572 try:
2574 2573 with self._datafp() as f:
2575 2574 f.seek(0, io.SEEK_END)
2576 2575 actual = f.tell()
2577 2576 dd = actual - expected
2578 2577 except IOError as inst:
2579 2578 if inst.errno != errno.ENOENT:
2580 2579 raise
2581 2580 dd = 0
2582 2581
2583 2582 try:
2584 2583 f = self.opener(self.indexfile)
2585 2584 f.seek(0, io.SEEK_END)
2586 2585 actual = f.tell()
2587 2586 f.close()
2588 2587 s = self._io.size
2589 2588 i = max(0, actual // s)
2590 2589 di = actual - (i * s)
2591 2590 if self._inline:
2592 2591 databytes = 0
2593 2592 for r in self:
2594 2593 databytes += max(0, self.length(r))
2595 2594 dd = 0
2596 2595 di = actual - len(self) * s - databytes
2597 2596 except IOError as inst:
2598 2597 if inst.errno != errno.ENOENT:
2599 2598 raise
2600 2599 di = 0
2601 2600
2602 2601 return (dd, di)
2603 2602
2604 2603 def files(self):
2605 2604 res = [self.indexfile]
2606 2605 if not self._inline:
2607 2606 res.append(self.datafile)
2608 2607 return res
2609 2608
2610 2609 def emitrevisions(
2611 2610 self,
2612 2611 nodes,
2613 2612 nodesorder=None,
2614 2613 revisiondata=False,
2615 2614 assumehaveparentrevisions=False,
2616 2615 deltamode=repository.CG_DELTAMODE_STD,
2617 2616 ):
2618 2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2619 2618 raise error.ProgrammingError(
2620 2619 b'unhandled value for nodesorder: %s' % nodesorder
2621 2620 )
2622 2621
2623 2622 if nodesorder is None and not self._generaldelta:
2624 2623 nodesorder = b'storage'
2625 2624
2626 2625 if (
2627 2626 not self._storedeltachains
2628 2627 and deltamode != repository.CG_DELTAMODE_PREV
2629 2628 ):
2630 2629 deltamode = repository.CG_DELTAMODE_FULL
2631 2630
2632 2631 return storageutil.emitrevisions(
2633 2632 self,
2634 2633 nodes,
2635 2634 nodesorder,
2636 2635 revlogrevisiondelta,
2637 2636 deltaparentfn=self.deltaparent,
2638 2637 candeltafn=self.candelta,
2639 2638 rawsizefn=self.rawsize,
2640 2639 revdifffn=self.revdiff,
2641 2640 flagsfn=self.flags,
2642 2641 deltamode=deltamode,
2643 2642 revisiondata=revisiondata,
2644 2643 assumehaveparentrevisions=assumehaveparentrevisions,
2645 2644 )
2646 2645
2647 2646 DELTAREUSEALWAYS = b'always'
2648 2647 DELTAREUSESAMEREVS = b'samerevs'
2649 2648 DELTAREUSENEVER = b'never'
2650 2649
2651 2650 DELTAREUSEFULLADD = b'fulladd'
2652 2651
2653 2652 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2654 2653
2655 2654 def clone(
2656 2655 self,
2657 2656 tr,
2658 2657 destrevlog,
2659 2658 addrevisioncb=None,
2660 2659 deltareuse=DELTAREUSESAMEREVS,
2661 2660 forcedeltabothparents=None,
2662 2661 sidedatacompanion=None,
2663 2662 ):
2664 2663 """Copy this revlog to another, possibly with format changes.
2665 2664
2666 2665 The destination revlog will contain the same revisions and nodes.
2667 2666 However, it may not be bit-for-bit identical due to e.g. delta encoding
2668 2667 differences.
2669 2668
2670 2669 The ``deltareuse`` argument control how deltas from the existing revlog
2671 2670 are preserved in the destination revlog. The argument can have the
2672 2671 following values:
2673 2672
2674 2673 DELTAREUSEALWAYS
2675 2674 Deltas will always be reused (if possible), even if the destination
2676 2675 revlog would not select the same revisions for the delta. This is the
2677 2676 fastest mode of operation.
2678 2677 DELTAREUSESAMEREVS
2679 2678 Deltas will be reused if the destination revlog would pick the same
2680 2679 revisions for the delta. This mode strikes a balance between speed
2681 2680 and optimization.
2682 2681 DELTAREUSENEVER
2683 2682 Deltas will never be reused. This is the slowest mode of execution.
2684 2683 This mode can be used to recompute deltas (e.g. if the diff/delta
2685 2684 algorithm changes).
2686 2685 DELTAREUSEFULLADD
2687 2686 Revision will be re-added as if their were new content. This is
2688 2687 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2689 2688 eg: large file detection and handling.
2690 2689
2691 2690 Delta computation can be slow, so the choice of delta reuse policy can
2692 2691 significantly affect run time.
2693 2692
2694 2693 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2695 2694 two extremes. Deltas will be reused if they are appropriate. But if the
2696 2695 delta could choose a better revision, it will do so. This means if you
2697 2696 are converting a non-generaldelta revlog to a generaldelta revlog,
2698 2697 deltas will be recomputed if the delta's parent isn't a parent of the
2699 2698 revision.
2700 2699
2701 2700 In addition to the delta policy, the ``forcedeltabothparents``
2702 2701 argument controls whether to force compute deltas against both parents
2703 2702 for merges. By default, the current default is used.
2704 2703
2705 2704 If not None, the `sidedatacompanion` is callable that accept two
2706 2705 arguments:
2707 2706
2708 2707 (srcrevlog, rev)
2709 2708
2710 2709 and return a quintet that control changes to sidedata content from the
2711 2710 old revision to the new clone result:
2712 2711
2713 2712 (dropall, filterout, update, new_flags, dropped_flags)
2714 2713
2715 2714 * if `dropall` is True, all sidedata should be dropped
2716 2715 * `filterout` is a set of sidedata keys that should be dropped
2717 2716 * `update` is a mapping of additionnal/new key -> value
2718 2717 * new_flags is a bitfields of new flags that the revision should get
2719 2718 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2720 2719 """
2721 2720 if deltareuse not in self.DELTAREUSEALL:
2722 2721 raise ValueError(
2723 2722 _(b'value for deltareuse invalid: %s') % deltareuse
2724 2723 )
2725 2724
2726 2725 if len(destrevlog):
2727 2726 raise ValueError(_(b'destination revlog is not empty'))
2728 2727
2729 2728 if getattr(self, 'filteredrevs', None):
2730 2729 raise ValueError(_(b'source revlog has filtered revisions'))
2731 2730 if getattr(destrevlog, 'filteredrevs', None):
2732 2731 raise ValueError(_(b'destination revlog has filtered revisions'))
2733 2732
2734 2733 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2735 2734 # if possible.
2736 2735 oldlazydelta = destrevlog._lazydelta
2737 2736 oldlazydeltabase = destrevlog._lazydeltabase
2738 2737 oldamd = destrevlog._deltabothparents
2739 2738
2740 2739 try:
2741 2740 if deltareuse == self.DELTAREUSEALWAYS:
2742 2741 destrevlog._lazydeltabase = True
2743 2742 destrevlog._lazydelta = True
2744 2743 elif deltareuse == self.DELTAREUSESAMEREVS:
2745 2744 destrevlog._lazydeltabase = False
2746 2745 destrevlog._lazydelta = True
2747 2746 elif deltareuse == self.DELTAREUSENEVER:
2748 2747 destrevlog._lazydeltabase = False
2749 2748 destrevlog._lazydelta = False
2750 2749
2751 2750 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2752 2751
2753 2752 self._clone(
2754 2753 tr,
2755 2754 destrevlog,
2756 2755 addrevisioncb,
2757 2756 deltareuse,
2758 2757 forcedeltabothparents,
2759 2758 sidedatacompanion,
2760 2759 )
2761 2760
2762 2761 finally:
2763 2762 destrevlog._lazydelta = oldlazydelta
2764 2763 destrevlog._lazydeltabase = oldlazydeltabase
2765 2764 destrevlog._deltabothparents = oldamd
2766 2765
2767 2766 def _clone(
2768 2767 self,
2769 2768 tr,
2770 2769 destrevlog,
2771 2770 addrevisioncb,
2772 2771 deltareuse,
2773 2772 forcedeltabothparents,
2774 2773 sidedatacompanion,
2775 2774 ):
2776 2775 """perform the core duty of `revlog.clone` after parameter processing"""
2777 2776 deltacomputer = deltautil.deltacomputer(destrevlog)
2778 2777 index = self.index
2779 2778 for rev in self:
2780 2779 entry = index[rev]
2781 2780
2782 2781 # Some classes override linkrev to take filtered revs into
2783 2782 # account. Use raw entry from index.
2784 2783 flags = entry[0] & 0xFFFF
2785 2784 linkrev = entry[4]
2786 2785 p1 = index[entry[5]][7]
2787 2786 p2 = index[entry[6]][7]
2788 2787 node = entry[7]
2789 2788
2790 2789 sidedataactions = (False, [], {}, 0, 0)
2791 2790 if sidedatacompanion is not None:
2792 2791 sidedataactions = sidedatacompanion(self, rev)
2793 2792
2794 2793 # (Possibly) reuse the delta from the revlog if allowed and
2795 2794 # the revlog chunk is a delta.
2796 2795 cachedelta = None
2797 2796 rawtext = None
2798 2797 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2799 2798 dropall = sidedataactions[0]
2800 2799 filterout = sidedataactions[1]
2801 2800 update = sidedataactions[2]
2802 2801 new_flags = sidedataactions[3]
2803 2802 dropped_flags = sidedataactions[4]
2804 2803 text, sidedata = self._revisiondata(rev)
2805 2804 if dropall:
2806 2805 sidedata = {}
2807 2806 for key in filterout:
2808 2807 sidedata.pop(key, None)
2809 2808 sidedata.update(update)
2810 2809 if not sidedata:
2811 2810 sidedata = None
2812 2811
2813 2812 flags |= new_flags
2814 2813 flags &= ~dropped_flags
2815 2814
2816 2815 destrevlog.addrevision(
2817 2816 text,
2818 2817 tr,
2819 2818 linkrev,
2820 2819 p1,
2821 2820 p2,
2822 2821 cachedelta=cachedelta,
2823 2822 node=node,
2824 2823 flags=flags,
2825 2824 deltacomputer=deltacomputer,
2826 2825 sidedata=sidedata,
2827 2826 )
2828 2827 else:
2829 2828 if destrevlog._lazydelta:
2830 2829 dp = self.deltaparent(rev)
2831 2830 if dp != nullrev:
2832 2831 cachedelta = (dp, bytes(self._chunk(rev)))
2833 2832
2834 2833 if not cachedelta:
2835 2834 rawtext = self.rawdata(rev)
2836 2835
2837 2836 ifh = destrevlog.opener(
2838 2837 destrevlog.indexfile, b'a+', checkambig=False
2839 2838 )
2840 2839 dfh = None
2841 2840 if not destrevlog._inline:
2842 2841 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2843 2842 try:
2844 2843 destrevlog._addrevision(
2845 2844 node,
2846 2845 rawtext,
2847 2846 tr,
2848 2847 linkrev,
2849 2848 p1,
2850 2849 p2,
2851 2850 flags,
2852 2851 cachedelta,
2853 2852 ifh,
2854 2853 dfh,
2855 2854 deltacomputer=deltacomputer,
2856 2855 )
2857 2856 finally:
2858 2857 if dfh:
2859 2858 dfh.close()
2860 2859 ifh.close()
2861 2860
2862 2861 if addrevisioncb:
2863 2862 addrevisioncb(self, rev, node)
2864 2863
2865 2864 def censorrevision(self, tr, censornode, tombstone=b''):
2866 2865 if (self.version & 0xFFFF) == REVLOGV0:
2867 2866 raise error.RevlogError(
2868 2867 _(b'cannot censor with version %d revlogs') % self.version
2869 2868 )
2870 2869
2871 2870 censorrev = self.rev(censornode)
2872 2871 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2873 2872
2874 2873 if len(tombstone) > self.rawsize(censorrev):
2875 2874 raise error.Abort(
2876 2875 _(b'censor tombstone must be no longer than censored data')
2877 2876 )
2878 2877
2879 2878 # Rewriting the revlog in place is hard. Our strategy for censoring is
2880 2879 # to create a new revlog, copy all revisions to it, then replace the
2881 2880 # revlogs on transaction close.
2882 2881
2883 2882 newindexfile = self.indexfile + b'.tmpcensored'
2884 2883 newdatafile = self.datafile + b'.tmpcensored'
2885 2884
2886 2885 # This is a bit dangerous. We could easily have a mismatch of state.
2887 2886 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2888 2887 newrl.version = self.version
2889 2888 newrl._generaldelta = self._generaldelta
2890 2889 newrl._io = self._io
2891 2890
2892 2891 for rev in self.revs():
2893 2892 node = self.node(rev)
2894 2893 p1, p2 = self.parents(node)
2895 2894
2896 2895 if rev == censorrev:
2897 2896 newrl.addrawrevision(
2898 2897 tombstone,
2899 2898 tr,
2900 2899 self.linkrev(censorrev),
2901 2900 p1,
2902 2901 p2,
2903 2902 censornode,
2904 2903 REVIDX_ISCENSORED,
2905 2904 )
2906 2905
2907 2906 if newrl.deltaparent(rev) != nullrev:
2908 2907 raise error.Abort(
2909 2908 _(
2910 2909 b'censored revision stored as delta; '
2911 2910 b'cannot censor'
2912 2911 ),
2913 2912 hint=_(
2914 2913 b'censoring of revlogs is not '
2915 2914 b'fully implemented; please report '
2916 2915 b'this bug'
2917 2916 ),
2918 2917 )
2919 2918 continue
2920 2919
2921 2920 if self.iscensored(rev):
2922 2921 if self.deltaparent(rev) != nullrev:
2923 2922 raise error.Abort(
2924 2923 _(
2925 2924 b'cannot censor due to censored '
2926 2925 b'revision having delta stored'
2927 2926 )
2928 2927 )
2929 2928 rawtext = self._chunk(rev)
2930 2929 else:
2931 2930 rawtext = self.rawdata(rev)
2932 2931
2933 2932 newrl.addrawrevision(
2934 2933 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2935 2934 )
2936 2935
2937 2936 tr.addbackup(self.indexfile, location=b'store')
2938 2937 if not self._inline:
2939 2938 tr.addbackup(self.datafile, location=b'store')
2940 2939
2941 2940 self.opener.rename(newrl.indexfile, self.indexfile)
2942 2941 if not self._inline:
2943 2942 self.opener.rename(newrl.datafile, self.datafile)
2944 2943
2945 2944 self.clearcaches()
2946 2945 self._loadindex()
2947 2946
2948 2947 def verifyintegrity(self, state):
2949 2948 """Verifies the integrity of the revlog.
2950 2949
2951 2950 Yields ``revlogproblem`` instances describing problems that are
2952 2951 found.
2953 2952 """
2954 2953 dd, di = self.checksize()
2955 2954 if dd:
2956 2955 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2957 2956 if di:
2958 2957 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2959 2958
2960 2959 version = self.version & 0xFFFF
2961 2960
2962 2961 # The verifier tells us what version revlog we should be.
2963 2962 if version != state[b'expectedversion']:
2964 2963 yield revlogproblem(
2965 2964 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2966 2965 % (self.indexfile, version, state[b'expectedversion'])
2967 2966 )
2968 2967
2969 2968 state[b'skipread'] = set()
2970 2969 state[b'safe_renamed'] = set()
2971 2970
2972 2971 for rev in self:
2973 2972 node = self.node(rev)
2974 2973
2975 2974 # Verify contents. 4 cases to care about:
2976 2975 #
2977 2976 # common: the most common case
2978 2977 # rename: with a rename
2979 2978 # meta: file content starts with b'\1\n', the metadata
2980 2979 # header defined in filelog.py, but without a rename
2981 2980 # ext: content stored externally
2982 2981 #
2983 2982 # More formally, their differences are shown below:
2984 2983 #
2985 2984 # | common | rename | meta | ext
2986 2985 # -------------------------------------------------------
2987 2986 # flags() | 0 | 0 | 0 | not 0
2988 2987 # renamed() | False | True | False | ?
2989 2988 # rawtext[0:2]=='\1\n'| False | True | True | ?
2990 2989 #
2991 2990 # "rawtext" means the raw text stored in revlog data, which
2992 2991 # could be retrieved by "rawdata(rev)". "text"
2993 2992 # mentioned below is "revision(rev)".
2994 2993 #
2995 2994 # There are 3 different lengths stored physically:
2996 2995 # 1. L1: rawsize, stored in revlog index
2997 2996 # 2. L2: len(rawtext), stored in revlog data
2998 2997 # 3. L3: len(text), stored in revlog data if flags==0, or
2999 2998 # possibly somewhere else if flags!=0
3000 2999 #
3001 3000 # L1 should be equal to L2. L3 could be different from them.
3002 3001 # "text" may or may not affect commit hash depending on flag
3003 3002 # processors (see flagutil.addflagprocessor).
3004 3003 #
3005 3004 # | common | rename | meta | ext
3006 3005 # -------------------------------------------------
3007 3006 # rawsize() | L1 | L1 | L1 | L1
3008 3007 # size() | L1 | L2-LM | L1(*) | L1 (?)
3009 3008 # len(rawtext) | L2 | L2 | L2 | L2
3010 3009 # len(text) | L2 | L2 | L2 | L3
3011 3010 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3012 3011 #
3013 3012 # LM: length of metadata, depending on rawtext
3014 3013 # (*): not ideal, see comment in filelog.size
3015 3014 # (?): could be "- len(meta)" if the resolved content has
3016 3015 # rename metadata
3017 3016 #
3018 3017 # Checks needed to be done:
3019 3018 # 1. length check: L1 == L2, in all cases.
3020 3019 # 2. hash check: depending on flag processor, we may need to
3021 3020 # use either "text" (external), or "rawtext" (in revlog).
3022 3021
3023 3022 try:
3024 3023 skipflags = state.get(b'skipflags', 0)
3025 3024 if skipflags:
3026 3025 skipflags &= self.flags(rev)
3027 3026
3028 3027 _verify_revision(self, skipflags, state, node)
3029 3028
3030 3029 l1 = self.rawsize(rev)
3031 3030 l2 = len(self.rawdata(node))
3032 3031
3033 3032 if l1 != l2:
3034 3033 yield revlogproblem(
3035 3034 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3036 3035 node=node,
3037 3036 )
3038 3037
3039 3038 except error.CensoredNodeError:
3040 3039 if state[b'erroroncensored']:
3041 3040 yield revlogproblem(
3042 3041 error=_(b'censored file data'), node=node
3043 3042 )
3044 3043 state[b'skipread'].add(node)
3045 3044 except Exception as e:
3046 3045 yield revlogproblem(
3047 3046 error=_(b'unpacking %s: %s')
3048 3047 % (short(node), stringutil.forcebytestr(e)),
3049 3048 node=node,
3050 3049 )
3051 3050 state[b'skipread'].add(node)
3052 3051
3053 3052 def storageinfo(
3054 3053 self,
3055 3054 exclusivefiles=False,
3056 3055 sharedfiles=False,
3057 3056 revisionscount=False,
3058 3057 trackedsize=False,
3059 3058 storedsize=False,
3060 3059 ):
3061 3060 d = {}
3062 3061
3063 3062 if exclusivefiles:
3064 3063 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3065 3064 if not self._inline:
3066 3065 d[b'exclusivefiles'].append((self.opener, self.datafile))
3067 3066
3068 3067 if sharedfiles:
3069 3068 d[b'sharedfiles'] = []
3070 3069
3071 3070 if revisionscount:
3072 3071 d[b'revisionscount'] = len(self)
3073 3072
3074 3073 if trackedsize:
3075 3074 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3076 3075
3077 3076 if storedsize:
3078 3077 d[b'storedsize'] = sum(
3079 3078 self.opener.stat(path).st_size for path in self.files()
3080 3079 )
3081 3080
3082 3081 return d
@@ -1,734 +1,732 b''
1 1 # transaction.py - simple journaling scheme for mercurial
2 2 #
3 3 # This transaction scheme is intended to gracefully handle program
4 4 # errors and interruptions. More serious failures like system crashes
5 5 # can be recovered with an fsck-like tool. As the whole repository is
6 6 # effectively log-structured, this should amount to simply truncating
7 7 # anything that isn't referenced in the changelog.
8 8 #
9 9 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
10 10 #
11 11 # This software may be used and distributed according to the terms of the
12 12 # GNU General Public License version 2 or any later version.
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import errno
17 17
18 18 from .i18n import _
19 19 from . import (
20 20 error,
21 21 pycompat,
22 22 util,
23 23 )
24 24 from .utils import stringutil
25 25
26 26 version = 2
27 27
28 28 # These are the file generators that should only be executed after the
29 29 # finalizers are done, since they rely on the output of the finalizers (like
30 30 # the changelog having been written).
31 31 postfinalizegenerators = {b'bookmarks', b'dirstate'}
32 32
33 33 GEN_GROUP_ALL = b'all'
34 34 GEN_GROUP_PRE_FINALIZE = b'prefinalize'
35 35 GEN_GROUP_POST_FINALIZE = b'postfinalize'
36 36
37 37
38 38 def active(func):
39 39 def _active(self, *args, **kwds):
40 40 if self._count == 0:
41 41 raise error.ProgrammingError(
42 42 b'cannot use transaction when it is already committed/aborted'
43 43 )
44 44 return func(self, *args, **kwds)
45 45
46 46 return _active
47 47
48 48
49 49 def _playback(
50 50 journal,
51 51 report,
52 52 opener,
53 53 vfsmap,
54 54 entries,
55 55 backupentries,
56 56 unlink=True,
57 57 checkambigfiles=None,
58 58 ):
59 59 for f, o in entries:
60 60 if o or not unlink:
61 61 checkambig = checkambigfiles and (f, b'') in checkambigfiles
62 62 try:
63 63 fp = opener(f, b'a', checkambig=checkambig)
64 64 if fp.tell() < o:
65 65 raise error.Abort(
66 66 _(
67 67 b"attempted to truncate %s to %d bytes, but it was "
68 68 b"already %d bytes\n"
69 69 )
70 70 % (f, o, fp.tell())
71 71 )
72 72 fp.truncate(o)
73 73 fp.close()
74 74 except IOError:
75 75 report(_(b"failed to truncate %s\n") % f)
76 76 raise
77 77 else:
78 78 try:
79 79 opener.unlink(f)
80 80 except (IOError, OSError) as inst:
81 81 if inst.errno != errno.ENOENT:
82 82 raise
83 83
84 84 backupfiles = []
85 85 for l, f, b, c in backupentries:
86 86 if l not in vfsmap and c:
87 87 report(b"couldn't handle %s: unknown cache location %s\n" % (b, l))
88 88 vfs = vfsmap[l]
89 89 try:
90 90 if f and b:
91 91 filepath = vfs.join(f)
92 92 backuppath = vfs.join(b)
93 93 checkambig = checkambigfiles and (f, l) in checkambigfiles
94 94 try:
95 95 util.copyfile(backuppath, filepath, checkambig=checkambig)
96 96 backupfiles.append(b)
97 97 except IOError:
98 98 report(_(b"failed to recover %s\n") % f)
99 99 else:
100 100 target = f or b
101 101 try:
102 102 vfs.unlink(target)
103 103 except (IOError, OSError) as inst:
104 104 if inst.errno != errno.ENOENT:
105 105 raise
106 106 except (IOError, OSError, error.Abort):
107 107 if not c:
108 108 raise
109 109
110 110 backuppath = b"%s.backupfiles" % journal
111 111 if opener.exists(backuppath):
112 112 opener.unlink(backuppath)
113 113 opener.unlink(journal)
114 114 try:
115 115 for f in backupfiles:
116 116 if opener.exists(f):
117 117 opener.unlink(f)
118 118 except (IOError, OSError, error.Abort):
119 119 # only pure backup file remains, it is sage to ignore any error
120 120 pass
121 121
122 122
123 123 class transaction(util.transactional):
124 124 def __init__(
125 125 self,
126 126 report,
127 127 opener,
128 128 vfsmap,
129 129 journalname,
130 130 undoname=None,
131 131 after=None,
132 132 createmode=None,
133 133 validator=None,
134 134 releasefn=None,
135 135 checkambigfiles=None,
136 136 name='<unnamed>',
137 137 ):
138 138 """Begin a new transaction
139 139
140 140 Begins a new transaction that allows rolling back writes in the event of
141 141 an exception.
142 142
143 143 * `after`: called after the transaction has been committed
144 144 * `createmode`: the mode of the journal file that will be created
145 145 * `releasefn`: called after releasing (with transaction and result)
146 146
147 147 `checkambigfiles` is a set of (path, vfs-location) tuples,
148 148 which determine whether file stat ambiguity should be avoided
149 149 for corresponded files.
150 150 """
151 151 self._count = 1
152 152 self._usages = 1
153 153 self._report = report
154 154 # a vfs to the store content
155 155 self._opener = opener
156 156 # a map to access file in various {location -> vfs}
157 157 vfsmap = vfsmap.copy()
158 158 vfsmap[b''] = opener # set default value
159 159 self._vfsmap = vfsmap
160 160 self._after = after
161 161 self._entries = []
162 162 self._map = {}
163 163 self._journal = journalname
164 164 self._undoname = undoname
165 165 self._queue = []
166 166 # A callback to do something just after releasing transaction.
167 167 if releasefn is None:
168 168 releasefn = lambda tr, success: None
169 169 self._releasefn = releasefn
170 170
171 171 self._checkambigfiles = set()
172 172 if checkambigfiles:
173 173 self._checkambigfiles.update(checkambigfiles)
174 174
175 175 self._names = [name]
176 176
177 177 # A dict dedicated to precisely tracking the changes introduced in the
178 178 # transaction.
179 179 self.changes = {}
180 180
181 181 # a dict of arguments to be passed to hooks
182 182 self.hookargs = {}
183 183 self._file = opener.open(self._journal, b"w")
184 184
185 185 # a list of ('location', 'path', 'backuppath', cache) entries.
186 186 # - if 'backuppath' is empty, no file existed at backup time
187 187 # - if 'path' is empty, this is a temporary transaction file
188 188 # - if 'location' is not empty, the path is outside main opener reach.
189 189 # use 'location' value as a key in a vfsmap to find the right 'vfs'
190 190 # (cache is currently unused)
191 191 self._backupentries = []
192 192 self._backupmap = {}
193 193 self._backupjournal = b"%s.backupfiles" % self._journal
194 194 self._backupsfile = opener.open(self._backupjournal, b'w')
195 195 self._backupsfile.write(b'%d\n' % version)
196 196
197 197 if createmode is not None:
198 198 opener.chmod(self._journal, createmode & 0o666)
199 199 opener.chmod(self._backupjournal, createmode & 0o666)
200 200
201 201 # hold file generations to be performed on commit
202 202 self._filegenerators = {}
203 203 # hold callback to write pending data for hooks
204 204 self._pendingcallback = {}
205 205 # True is any pending data have been written ever
206 206 self._anypending = False
207 207 # holds callback to call when writing the transaction
208 208 self._finalizecallback = {}
209 209 # holds callback to call when validating the transaction
210 210 # should raise exception if anything is wrong
211 211 self._validatecallback = {}
212 212 if validator is not None:
213 213 self._validatecallback[b'001-userhooks'] = validator
214 214 # hold callback for post transaction close
215 215 self._postclosecallback = {}
216 216 # holds callbacks to call during abort
217 217 self._abortcallback = {}
218 218
219 219 def __repr__(self):
220 220 name = '/'.join(self._names)
221 221 return '<transaction name=%s, count=%d, usages=%d>' % (
222 222 name,
223 223 self._count,
224 224 self._usages,
225 225 )
226 226
227 227 def __del__(self):
228 228 if self._journal:
229 229 self._abort()
230 230
231 231 @active
232 232 def startgroup(self):
233 233 """delay registration of file entry
234 234
235 235 This is used by strip to delay vision of strip offset. The transaction
236 236 sees either none or all of the strip actions to be done."""
237 237 self._queue.append([])
238 238
239 239 @active
240 240 def endgroup(self):
241 241 """apply delayed registration of file entry.
242 242
243 243 This is used by strip to delay vision of strip offset. The transaction
244 244 sees either none or all of the strip actions to be done."""
245 245 q = self._queue.pop()
246 246 for f, o in q:
247 247 self._addentry(f, o)
248 248
249 249 @active
250 250 def add(self, file, offset):
251 251 """record the state of an append-only file before update"""
252 252 if file in self._map or file in self._backupmap:
253 253 return
254 254 if self._queue:
255 255 self._queue[-1].append((file, offset))
256 256 return
257 257
258 258 self._addentry(file, offset)
259 259
260 260 def _addentry(self, file, offset):
261 261 """add a append-only entry to memory and on-disk state"""
262 262 if file in self._map or file in self._backupmap:
263 263 return
264 264 self._entries.append((file, offset))
265 265 self._map[file] = len(self._entries) - 1
266 266 # add enough data to the journal to do the truncate
267 267 self._file.write(b"%s\0%d\n" % (file, offset))
268 268 self._file.flush()
269 269
270 270 @active
271 271 def addbackup(self, file, hardlink=True, location=b''):
272 272 """Adds a backup of the file to the transaction
273 273
274 274 Calling addbackup() creates a hardlink backup of the specified file
275 275 that is used to recover the file in the event of the transaction
276 276 aborting.
277 277
278 278 * `file`: the file path, relative to .hg/store
279 279 * `hardlink`: use a hardlink to quickly create the backup
280 280 """
281 281 if self._queue:
282 282 msg = b'cannot use transaction.addbackup inside "group"'
283 283 raise error.ProgrammingError(msg)
284 284
285 285 if file in self._map or file in self._backupmap:
286 286 return
287 287 vfs = self._vfsmap[location]
288 288 dirname, filename = vfs.split(file)
289 289 backupfilename = b"%s.backup.%s" % (self._journal, filename)
290 290 backupfile = vfs.reljoin(dirname, backupfilename)
291 291 if vfs.exists(file):
292 292 filepath = vfs.join(file)
293 293 backuppath = vfs.join(backupfile)
294 294 util.copyfile(filepath, backuppath, hardlink=hardlink)
295 295 else:
296 296 backupfile = b''
297 297
298 298 self._addbackupentry((location, file, backupfile, False))
299 299
300 300 def _addbackupentry(self, entry):
301 301 """register a new backup entry and write it to disk"""
302 302 self._backupentries.append(entry)
303 303 self._backupmap[entry[1]] = len(self._backupentries) - 1
304 304 self._backupsfile.write(b"%s\0%s\0%s\0%d\n" % entry)
305 305 self._backupsfile.flush()
306 306
307 307 @active
308 308 def registertmp(self, tmpfile, location=b''):
309 309 """register a temporary transaction file
310 310
311 311 Such files will be deleted when the transaction exits (on both
312 312 failure and success).
313 313 """
314 314 self._addbackupentry((location, b'', tmpfile, False))
315 315
316 316 @active
317 317 def addfilegenerator(
318 318 self, genid, filenames, genfunc, order=0, location=b''
319 319 ):
320 320 """add a function to generates some files at transaction commit
321 321
322 322 The `genfunc` argument is a function capable of generating proper
323 323 content of each entry in the `filename` tuple.
324 324
325 325 At transaction close time, `genfunc` will be called with one file
326 326 object argument per entries in `filenames`.
327 327
328 328 The transaction itself is responsible for the backup, creation and
329 329 final write of such file.
330 330
331 331 The `genid` argument is used to ensure the same set of file is only
332 332 generated once. Call to `addfilegenerator` for a `genid` already
333 333 present will overwrite the old entry.
334 334
335 335 The `order` argument may be used to control the order in which multiple
336 336 generator will be executed.
337 337
338 338 The `location` arguments may be used to indicate the files are located
339 339 outside of the the standard directory for transaction. It should match
340 340 one of the key of the `transaction.vfsmap` dictionary.
341 341 """
342 342 # For now, we are unable to do proper backup and restore of custom vfs
343 343 # but for bookmarks that are handled outside this mechanism.
344 344 self._filegenerators[genid] = (order, filenames, genfunc, location)
345 345
346 346 @active
347 347 def removefilegenerator(self, genid):
348 348 """reverse of addfilegenerator, remove a file generator function"""
349 349 if genid in self._filegenerators:
350 350 del self._filegenerators[genid]
351 351
352 352 def _generatefiles(self, suffix=b'', group=GEN_GROUP_ALL):
353 353 # write files registered for generation
354 354 any = False
355 355
356 356 if group == GEN_GROUP_ALL:
357 357 skip_post = skip_pre = False
358 358 else:
359 359 skip_pre = group == GEN_GROUP_POST_FINALIZE
360 360 skip_post = group == GEN_GROUP_PRE_FINALIZE
361 361
362 362 for id, entry in sorted(pycompat.iteritems(self._filegenerators)):
363 363 any = True
364 364 order, filenames, genfunc, location = entry
365 365
366 366 # for generation at closing, check if it's before or after finalize
367 367 is_post = id in postfinalizegenerators
368 368 if skip_post and is_post:
369 369 continue
370 370 elif skip_pre and not is_post:
371 371 continue
372 372
373 373 vfs = self._vfsmap[location]
374 374 files = []
375 375 try:
376 376 for name in filenames:
377 377 name += suffix
378 378 if suffix:
379 379 self.registertmp(name, location=location)
380 380 checkambig = False
381 381 else:
382 382 self.addbackup(name, location=location)
383 383 checkambig = (name, location) in self._checkambigfiles
384 384 files.append(
385 385 vfs(name, b'w', atomictemp=True, checkambig=checkambig)
386 386 )
387 387 genfunc(*files)
388 388 for f in files:
389 389 f.close()
390 390 # skip discard() loop since we're sure no open file remains
391 391 del files[:]
392 392 finally:
393 393 for f in files:
394 394 f.discard()
395 395 return any
396 396
397 397 @active
398 def find(self, file):
398 def findoffset(self, file):
399 399 if file in self._map:
400 return self._entries[self._map[file]]
401 if file in self._backupmap:
402 return self._backupentries[self._backupmap[file]]
400 return self._entries[self._map[file]][1]
403 401 return None
404 402
405 403 @active
406 404 def replace(self, file, offset):
407 405 '''
408 406 replace can only replace already committed entries
409 407 that are not pending in the queue
410 408 '''
411 409
412 410 if file not in self._map:
413 411 raise KeyError(file)
414 412 index = self._map[file]
415 413 self._entries[index] = (file, offset)
416 414 self._file.write(b"%s\0%d\n" % (file, offset))
417 415 self._file.flush()
418 416
419 417 @active
420 418 def nest(self, name='<unnamed>'):
421 419 self._count += 1
422 420 self._usages += 1
423 421 self._names.append(name)
424 422 return self
425 423
426 424 def release(self):
427 425 if self._count > 0:
428 426 self._usages -= 1
429 427 if self._names:
430 428 self._names.pop()
431 429 # if the transaction scopes are left without being closed, fail
432 430 if self._count > 0 and self._usages == 0:
433 431 self._abort()
434 432
435 433 def running(self):
436 434 return self._count > 0
437 435
438 436 def addpending(self, category, callback):
439 437 """add a callback to be called when the transaction is pending
440 438
441 439 The transaction will be given as callback's first argument.
442 440
443 441 Category is a unique identifier to allow overwriting an old callback
444 442 with a newer callback.
445 443 """
446 444 self._pendingcallback[category] = callback
447 445
448 446 @active
449 447 def writepending(self):
450 448 '''write pending file to temporary version
451 449
452 450 This is used to allow hooks to view a transaction before commit'''
453 451 categories = sorted(self._pendingcallback)
454 452 for cat in categories:
455 453 # remove callback since the data will have been flushed
456 454 any = self._pendingcallback.pop(cat)(self)
457 455 self._anypending = self._anypending or any
458 456 self._anypending |= self._generatefiles(suffix=b'.pending')
459 457 return self._anypending
460 458
461 459 @active
462 460 def hasfinalize(self, category):
463 461 """check is a callback already exist for a category
464 462 """
465 463 return category in self._finalizecallback
466 464
467 465 @active
468 466 def addfinalize(self, category, callback):
469 467 """add a callback to be called when the transaction is closed
470 468
471 469 The transaction will be given as callback's first argument.
472 470
473 471 Category is a unique identifier to allow overwriting old callbacks with
474 472 newer callbacks.
475 473 """
476 474 self._finalizecallback[category] = callback
477 475
478 476 @active
479 477 def addpostclose(self, category, callback):
480 478 """add or replace a callback to be called after the transaction closed
481 479
482 480 The transaction will be given as callback's first argument.
483 481
484 482 Category is a unique identifier to allow overwriting an old callback
485 483 with a newer callback.
486 484 """
487 485 self._postclosecallback[category] = callback
488 486
489 487 @active
490 488 def getpostclose(self, category):
491 489 """return a postclose callback added before, or None"""
492 490 return self._postclosecallback.get(category, None)
493 491
494 492 @active
495 493 def addabort(self, category, callback):
496 494 """add a callback to be called when the transaction is aborted.
497 495
498 496 The transaction will be given as the first argument to the callback.
499 497
500 498 Category is a unique identifier to allow overwriting an old callback
501 499 with a newer callback.
502 500 """
503 501 self._abortcallback[category] = callback
504 502
505 503 @active
506 504 def addvalidator(self, category, callback):
507 505 """ adds a callback to be called when validating the transaction.
508 506
509 507 The transaction will be given as the first argument to the callback.
510 508
511 509 callback should raise exception if to abort transaction """
512 510 self._validatecallback[category] = callback
513 511
514 512 @active
515 513 def close(self):
516 514 '''commit the transaction'''
517 515 if self._count == 1:
518 516 for category in sorted(self._validatecallback):
519 517 self._validatecallback[category](self)
520 518 self._validatecallback = None # Help prevent cycles.
521 519 self._generatefiles(group=GEN_GROUP_PRE_FINALIZE)
522 520 while self._finalizecallback:
523 521 callbacks = self._finalizecallback
524 522 self._finalizecallback = {}
525 523 categories = sorted(callbacks)
526 524 for cat in categories:
527 525 callbacks[cat](self)
528 526 # Prevent double usage and help clear cycles.
529 527 self._finalizecallback = None
530 528 self._generatefiles(group=GEN_GROUP_POST_FINALIZE)
531 529
532 530 self._count -= 1
533 531 if self._count != 0:
534 532 return
535 533 self._file.close()
536 534 self._backupsfile.close()
537 535 # cleanup temporary files
538 536 for l, f, b, c in self._backupentries:
539 537 if l not in self._vfsmap and c:
540 538 self._report(
541 539 b"couldn't remove %s: unknown cache location %s\n" % (b, l)
542 540 )
543 541 continue
544 542 vfs = self._vfsmap[l]
545 543 if not f and b and vfs.exists(b):
546 544 try:
547 545 vfs.unlink(b)
548 546 except (IOError, OSError, error.Abort) as inst:
549 547 if not c:
550 548 raise
551 549 # Abort may be raise by read only opener
552 550 self._report(
553 551 b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
554 552 )
555 553 self._entries = []
556 554 self._writeundo()
557 555 if self._after:
558 556 self._after()
559 557 self._after = None # Help prevent cycles.
560 558 if self._opener.isfile(self._backupjournal):
561 559 self._opener.unlink(self._backupjournal)
562 560 if self._opener.isfile(self._journal):
563 561 self._opener.unlink(self._journal)
564 562 for l, _f, b, c in self._backupentries:
565 563 if l not in self._vfsmap and c:
566 564 self._report(
567 565 b"couldn't remove %s: unknown cache location"
568 566 b"%s\n" % (b, l)
569 567 )
570 568 continue
571 569 vfs = self._vfsmap[l]
572 570 if b and vfs.exists(b):
573 571 try:
574 572 vfs.unlink(b)
575 573 except (IOError, OSError, error.Abort) as inst:
576 574 if not c:
577 575 raise
578 576 # Abort may be raise by read only opener
579 577 self._report(
580 578 b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
581 579 )
582 580 self._backupentries = []
583 581 self._journal = None
584 582
585 583 self._releasefn(self, True) # notify success of closing transaction
586 584 self._releasefn = None # Help prevent cycles.
587 585
588 586 # run post close action
589 587 categories = sorted(self._postclosecallback)
590 588 for cat in categories:
591 589 self._postclosecallback[cat](self)
592 590 # Prevent double usage and help clear cycles.
593 591 self._postclosecallback = None
594 592
595 593 @active
596 594 def abort(self):
597 595 '''abort the transaction (generally called on error, or when the
598 596 transaction is not explicitly committed before going out of
599 597 scope)'''
600 598 self._abort()
601 599
602 600 def _writeundo(self):
603 601 """write transaction data for possible future undo call"""
604 602 if self._undoname is None:
605 603 return
606 604 undobackupfile = self._opener.open(
607 605 b"%s.backupfiles" % self._undoname, b'w'
608 606 )
609 607 undobackupfile.write(b'%d\n' % version)
610 608 for l, f, b, c in self._backupentries:
611 609 if not f: # temporary file
612 610 continue
613 611 if not b:
614 612 u = b''
615 613 else:
616 614 if l not in self._vfsmap and c:
617 615 self._report(
618 616 b"couldn't remove %s: unknown cache location"
619 617 b"%s\n" % (b, l)
620 618 )
621 619 continue
622 620 vfs = self._vfsmap[l]
623 621 base, name = vfs.split(b)
624 622 assert name.startswith(self._journal), name
625 623 uname = name.replace(self._journal, self._undoname, 1)
626 624 u = vfs.reljoin(base, uname)
627 625 util.copyfile(vfs.join(b), vfs.join(u), hardlink=True)
628 626 undobackupfile.write(b"%s\0%s\0%s\0%d\n" % (l, f, u, c))
629 627 undobackupfile.close()
630 628
631 629 def _abort(self):
632 630 self._count = 0
633 631 self._usages = 0
634 632 self._file.close()
635 633 self._backupsfile.close()
636 634
637 635 try:
638 636 if not self._entries and not self._backupentries:
639 637 if self._backupjournal:
640 638 self._opener.unlink(self._backupjournal)
641 639 if self._journal:
642 640 self._opener.unlink(self._journal)
643 641 return
644 642
645 643 self._report(_(b"transaction abort!\n"))
646 644
647 645 try:
648 646 for cat in sorted(self._abortcallback):
649 647 self._abortcallback[cat](self)
650 648 # Prevent double usage and help clear cycles.
651 649 self._abortcallback = None
652 650 _playback(
653 651 self._journal,
654 652 self._report,
655 653 self._opener,
656 654 self._vfsmap,
657 655 self._entries,
658 656 self._backupentries,
659 657 False,
660 658 checkambigfiles=self._checkambigfiles,
661 659 )
662 660 self._report(_(b"rollback completed\n"))
663 661 except BaseException as exc:
664 662 self._report(_(b"rollback failed - please run hg recover\n"))
665 663 self._report(
666 664 _(b"(failure reason: %s)\n") % stringutil.forcebytestr(exc)
667 665 )
668 666 finally:
669 667 self._journal = None
670 668 self._releasefn(self, False) # notify failure of transaction
671 669 self._releasefn = None # Help prevent cycles.
672 670
673 671
674 672 def rollback(opener, vfsmap, file, report, checkambigfiles=None):
675 673 """Rolls back the transaction contained in the given file
676 674
677 675 Reads the entries in the specified file, and the corresponding
678 676 '*.backupfiles' file, to recover from an incomplete transaction.
679 677
680 678 * `file`: a file containing a list of entries, specifying where
681 679 to truncate each file. The file should contain a list of
682 680 file\0offset pairs, delimited by newlines. The corresponding
683 681 '*.backupfiles' file should contain a list of file\0backupfile
684 682 pairs, delimited by \0.
685 683
686 684 `checkambigfiles` is a set of (path, vfs-location) tuples,
687 685 which determine whether file stat ambiguity should be avoided at
688 686 restoring corresponded files.
689 687 """
690 688 entries = []
691 689 backupentries = []
692 690
693 691 fp = opener.open(file)
694 692 lines = fp.readlines()
695 693 fp.close()
696 694 for l in lines:
697 695 try:
698 696 f, o = l.split(b'\0')
699 697 entries.append((f, int(o)))
700 698 except ValueError:
701 699 report(
702 700 _(b"couldn't read journal entry %r!\n") % pycompat.bytestr(l)
703 701 )
704 702
705 703 backupjournal = b"%s.backupfiles" % file
706 704 if opener.exists(backupjournal):
707 705 fp = opener.open(backupjournal)
708 706 lines = fp.readlines()
709 707 if lines:
710 708 ver = lines[0][:-1]
711 709 if ver == (b'%d' % version):
712 710 for line in lines[1:]:
713 711 if line:
714 712 # Shave off the trailing newline
715 713 line = line[:-1]
716 714 l, f, b, c = line.split(b'\0')
717 715 backupentries.append((l, f, b, bool(c)))
718 716 else:
719 717 report(
720 718 _(
721 719 b"journal was created by a different version of "
722 720 b"Mercurial\n"
723 721 )
724 722 )
725 723
726 724 _playback(
727 725 file,
728 726 report,
729 727 opener,
730 728 vfsmap,
731 729 entries,
732 730 backupentries,
733 731 checkambigfiles=checkambigfiles,
734 732 )
General Comments 0
You need to be logged in to leave comments. Login now