##// END OF EJS Templates
revlog: move tiprev() from changelog up to revlog...
Martin von Zweigbergk -
r43745:ec7ba79b default
parent child Browse files
Show More
@@ -1,729 +1,726 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 copies,
20 20 encoding,
21 21 error,
22 22 pycompat,
23 23 revlog,
24 24 util,
25 25 )
26 26 from .utils import (
27 27 dateutil,
28 28 stringutil,
29 29 )
30 30
31 31 from .revlogutils import sidedata as sidedatamod
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [
87 87 _string_escape(b'%s:%s' % (k, pycompat.bytestr(d[k])))
88 88 for k in sorted(d)
89 89 ]
90 90 return b"\0".join(items)
91 91
92 92
93 93 def stripdesc(desc):
94 94 """strip trailing whitespace and leading and trailing empty lines"""
95 95 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
96 96
97 97
98 98 class appender(object):
99 99 '''the changelog index must be updated last on disk, so we use this class
100 100 to delay writes to it'''
101 101
102 102 def __init__(self, vfs, name, mode, buf):
103 103 self.data = buf
104 104 fp = vfs(name, mode)
105 105 self.fp = fp
106 106 self.offset = fp.tell()
107 107 self.size = vfs.fstat(fp).st_size
108 108 self._end = self.size
109 109
110 110 def end(self):
111 111 return self._end
112 112
113 113 def tell(self):
114 114 return self.offset
115 115
116 116 def flush(self):
117 117 pass
118 118
119 119 @property
120 120 def closed(self):
121 121 return self.fp.closed
122 122
123 123 def close(self):
124 124 self.fp.close()
125 125
126 126 def seek(self, offset, whence=0):
127 127 '''virtual file offset spans real file and data'''
128 128 if whence == 0:
129 129 self.offset = offset
130 130 elif whence == 1:
131 131 self.offset += offset
132 132 elif whence == 2:
133 133 self.offset = self.end() + offset
134 134 if self.offset < self.size:
135 135 self.fp.seek(self.offset)
136 136
137 137 def read(self, count=-1):
138 138 '''only trick here is reads that span real file and data'''
139 139 ret = b""
140 140 if self.offset < self.size:
141 141 s = self.fp.read(count)
142 142 ret = s
143 143 self.offset += len(s)
144 144 if count > 0:
145 145 count -= len(s)
146 146 if count != 0:
147 147 doff = self.offset - self.size
148 148 self.data.insert(0, b"".join(self.data))
149 149 del self.data[1:]
150 150 s = self.data[0][doff : doff + count]
151 151 self.offset += len(s)
152 152 ret += s
153 153 return ret
154 154
155 155 def write(self, s):
156 156 self.data.append(bytes(s))
157 157 self.offset += len(s)
158 158 self._end += len(s)
159 159
160 160 def __enter__(self):
161 161 self.fp.__enter__()
162 162 return self
163 163
164 164 def __exit__(self, *args):
165 165 return self.fp.__exit__(*args)
166 166
167 167
168 168 def _divertopener(opener, target):
169 169 """build an opener that writes in 'target.a' instead of 'target'"""
170 170
171 171 def _divert(name, mode=b'r', checkambig=False):
172 172 if name != target:
173 173 return opener(name, mode)
174 174 return opener(name + b".a", mode)
175 175
176 176 return _divert
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False):
183 183 if name != target:
184 184 return opener(name, mode)
185 185 return appender(opener, name, mode, buf)
186 186
187 187 return _delay
188 188
189 189
190 190 @attr.s
191 191 class _changelogrevision(object):
192 192 # Extensions might modify _defaultextra, so let the constructor below pass
193 193 # it in
194 194 extra = attr.ib()
195 195 manifest = attr.ib(default=nullid)
196 196 user = attr.ib(default=b'')
197 197 date = attr.ib(default=(0, 0))
198 198 files = attr.ib(default=attr.Factory(list))
199 199 filesadded = attr.ib(default=None)
200 200 filesremoved = attr.ib(default=None)
201 201 p1copies = attr.ib(default=None)
202 202 p2copies = attr.ib(default=None)
203 203 description = attr.ib(default=b'')
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 r'_offsets',
216 216 r'_text',
217 217 r'_sidedata',
218 218 r'_cpsd',
219 219 )
220 220
221 221 def __new__(cls, text, sidedata, cpsd):
222 222 if not text:
223 223 return _changelogrevision(extra=_defaultextra)
224 224
225 225 self = super(changelogrevision, cls).__new__(cls)
226 226 # We could return here and implement the following as an __init__.
227 227 # But doing it here is equivalent and saves an extra function call.
228 228
229 229 # format used:
230 230 # nodeid\n : manifest node in ascii
231 231 # user\n : user, no \n or \r allowed
232 232 # time tz extra\n : date (time is int or float, timezone is int)
233 233 # : extra is metadata, encoded and separated by '\0'
234 234 # : older versions ignore it
235 235 # files\n\n : files modified by the cset, no \n or \r allowed
236 236 # (.*) : comment (free text, ideally utf-8)
237 237 #
238 238 # changelog v0 doesn't use extra
239 239
240 240 nl1 = text.index(b'\n')
241 241 nl2 = text.index(b'\n', nl1 + 1)
242 242 nl3 = text.index(b'\n', nl2 + 1)
243 243
244 244 # The list of files may be empty. Which means nl3 is the first of the
245 245 # double newline that precedes the description.
246 246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 247 doublenl = nl3
248 248 else:
249 249 doublenl = text.index(b'\n\n', nl3 + 1)
250 250
251 251 self._offsets = (nl1, nl2, nl3, doublenl)
252 252 self._text = text
253 253 self._sidedata = sidedata
254 254 self._cpsd = cpsd
255 255
256 256 return self
257 257
258 258 @property
259 259 def manifest(self):
260 260 return bin(self._text[0 : self._offsets[0]])
261 261
262 262 @property
263 263 def user(self):
264 264 off = self._offsets
265 265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 266
267 267 @property
268 268 def _rawdate(self):
269 269 off = self._offsets
270 270 dateextra = self._text[off[1] + 1 : off[2]]
271 271 return dateextra.split(b' ', 2)[0:2]
272 272
273 273 @property
274 274 def _rawextra(self):
275 275 off = self._offsets
276 276 dateextra = self._text[off[1] + 1 : off[2]]
277 277 fields = dateextra.split(b' ', 2)
278 278 if len(fields) != 3:
279 279 return None
280 280
281 281 return fields[2]
282 282
283 283 @property
284 284 def date(self):
285 285 raw = self._rawdate
286 286 time = float(raw[0])
287 287 # Various tools did silly things with the timezone.
288 288 try:
289 289 timezone = int(raw[1])
290 290 except ValueError:
291 291 timezone = 0
292 292
293 293 return time, timezone
294 294
295 295 @property
296 296 def extra(self):
297 297 raw = self._rawextra
298 298 if raw is None:
299 299 return _defaultextra
300 300
301 301 return decodeextra(raw)
302 302
303 303 @property
304 304 def files(self):
305 305 off = self._offsets
306 306 if off[2] == off[3]:
307 307 return []
308 308
309 309 return self._text[off[2] + 1 : off[3]].split(b'\n')
310 310
311 311 @property
312 312 def filesadded(self):
313 313 if self._cpsd:
314 314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 315 if not rawindices:
316 316 return []
317 317 else:
318 318 rawindices = self.extra.get(b'filesadded')
319 319 if rawindices is None:
320 320 return None
321 321 return copies.decodefileindices(self.files, rawindices)
322 322
323 323 @property
324 324 def filesremoved(self):
325 325 if self._cpsd:
326 326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 327 if not rawindices:
328 328 return []
329 329 else:
330 330 rawindices = self.extra.get(b'filesremoved')
331 331 if rawindices is None:
332 332 return None
333 333 return copies.decodefileindices(self.files, rawindices)
334 334
335 335 @property
336 336 def p1copies(self):
337 337 if self._cpsd:
338 338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 339 if not rawcopies:
340 340 return {}
341 341 else:
342 342 rawcopies = self.extra.get(b'p1copies')
343 343 if rawcopies is None:
344 344 return None
345 345 return copies.decodecopies(self.files, rawcopies)
346 346
347 347 @property
348 348 def p2copies(self):
349 349 if self._cpsd:
350 350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 351 if not rawcopies:
352 352 return {}
353 353 else:
354 354 rawcopies = self.extra.get(b'p2copies')
355 355 if rawcopies is None:
356 356 return None
357 357 return copies.decodecopies(self.files, rawcopies)
358 358
359 359 @property
360 360 def description(self):
361 361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362 362
363 363
364 364 class changelog(revlog.revlog):
365 365 def __init__(self, opener, trypending=False):
366 366 """Load a changelog revlog using an opener.
367 367
368 368 If ``trypending`` is true, we attempt to load the index from a
369 369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 370 The ``00changelog.i.a`` file contains index (and possibly inline
371 371 revision) data for a transaction that hasn't been finalized yet.
372 372 It exists in a separate file to facilitate readers (such as
373 373 hooks processes) accessing data before a transaction is finalized.
374 374 """
375 375 if trypending and opener.exists(b'00changelog.i.a'):
376 376 indexfile = b'00changelog.i.a'
377 377 else:
378 378 indexfile = b'00changelog.i'
379 379
380 380 datafile = b'00changelog.d'
381 381 revlog.revlog.__init__(
382 382 self,
383 383 opener,
384 384 indexfile,
385 385 datafile=datafile,
386 386 checkambig=True,
387 387 mmaplargeindex=True,
388 388 )
389 389
390 390 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
391 391 # changelogs don't benefit from generaldelta.
392 392
393 393 self.version &= ~revlog.FLAG_GENERALDELTA
394 394 self._generaldelta = False
395 395
396 396 # Delta chains for changelogs tend to be very small because entries
397 397 # tend to be small and don't delta well with each. So disable delta
398 398 # chains.
399 399 self._storedeltachains = False
400 400
401 401 self._realopener = opener
402 402 self._delayed = False
403 403 self._delaybuf = None
404 404 self._divert = False
405 405 self.filteredrevs = frozenset()
406 406 self._copiesstorage = opener.options.get(b'copies-storage')
407 407
408 408 def tiprev(self):
409 """filtered version of revlog.tiprev"""
409 410 for i in pycompat.xrange(len(self) - 1, -2, -1):
410 411 if i not in self.filteredrevs:
411 412 return i
412 413
413 def tip(self):
414 """filtered version of revlog.tip"""
415 return self.node(self.tiprev())
416
417 414 def __contains__(self, rev):
418 415 """filtered version of revlog.__contains__"""
419 416 return 0 <= rev < len(self) and rev not in self.filteredrevs
420 417
421 418 def __iter__(self):
422 419 """filtered version of revlog.__iter__"""
423 420 if len(self.filteredrevs) == 0:
424 421 return revlog.revlog.__iter__(self)
425 422
426 423 def filterediter():
427 424 for i in pycompat.xrange(len(self)):
428 425 if i not in self.filteredrevs:
429 426 yield i
430 427
431 428 return filterediter()
432 429
433 430 def revs(self, start=0, stop=None):
434 431 """filtered version of revlog.revs"""
435 432 for i in super(changelog, self).revs(start, stop):
436 433 if i not in self.filteredrevs:
437 434 yield i
438 435
439 436 def _checknofilteredinrevs(self, revs):
440 437 """raise the appropriate error if 'revs' contains a filtered revision
441 438
442 439 This returns a version of 'revs' to be used thereafter by the caller.
443 440 In particular, if revs is an iterator, it is converted into a set.
444 441 """
445 442 safehasattr = util.safehasattr
446 443 if safehasattr(revs, '__next__'):
447 444 # Note that inspect.isgenerator() is not true for iterators,
448 445 revs = set(revs)
449 446
450 447 filteredrevs = self.filteredrevs
451 448 if safehasattr(revs, 'first'): # smartset
452 449 offenders = revs & filteredrevs
453 450 else:
454 451 offenders = filteredrevs.intersection(revs)
455 452
456 453 for rev in offenders:
457 454 raise error.FilteredIndexError(rev)
458 455 return revs
459 456
460 457 def headrevs(self, revs=None):
461 458 if revs is None and self.filteredrevs:
462 459 try:
463 460 return self.index.headrevsfiltered(self.filteredrevs)
464 461 # AttributeError covers non-c-extension environments and
465 462 # old c extensions without filter handling.
466 463 except AttributeError:
467 464 return self._headrevs()
468 465
469 466 if self.filteredrevs:
470 467 revs = self._checknofilteredinrevs(revs)
471 468 return super(changelog, self).headrevs(revs)
472 469
473 470 def strip(self, *args, **kwargs):
474 471 # XXX make something better than assert
475 472 # We can't expect proper strip behavior if we are filtered.
476 473 assert not self.filteredrevs
477 474 super(changelog, self).strip(*args, **kwargs)
478 475
479 476 def rev(self, node):
480 477 """filtered version of revlog.rev"""
481 478 r = super(changelog, self).rev(node)
482 479 if r in self.filteredrevs:
483 480 raise error.FilteredLookupError(
484 481 hex(node), self.indexfile, _(b'filtered node')
485 482 )
486 483 return r
487 484
488 485 def node(self, rev):
489 486 """filtered version of revlog.node"""
490 487 if rev in self.filteredrevs:
491 488 raise error.FilteredIndexError(rev)
492 489 return super(changelog, self).node(rev)
493 490
494 491 def linkrev(self, rev):
495 492 """filtered version of revlog.linkrev"""
496 493 if rev in self.filteredrevs:
497 494 raise error.FilteredIndexError(rev)
498 495 return super(changelog, self).linkrev(rev)
499 496
500 497 def parentrevs(self, rev):
501 498 """filtered version of revlog.parentrevs"""
502 499 if rev in self.filteredrevs:
503 500 raise error.FilteredIndexError(rev)
504 501 return super(changelog, self).parentrevs(rev)
505 502
506 503 def flags(self, rev):
507 504 """filtered version of revlog.flags"""
508 505 if rev in self.filteredrevs:
509 506 raise error.FilteredIndexError(rev)
510 507 return super(changelog, self).flags(rev)
511 508
512 509 def delayupdate(self, tr):
513 510 b"delay visibility of index updates to other readers"
514 511
515 512 if not self._delayed:
516 513 if len(self) == 0:
517 514 self._divert = True
518 515 if self._realopener.exists(self.indexfile + b'.a'):
519 516 self._realopener.unlink(self.indexfile + b'.a')
520 517 self.opener = _divertopener(self._realopener, self.indexfile)
521 518 else:
522 519 self._delaybuf = []
523 520 self.opener = _delayopener(
524 521 self._realopener, self.indexfile, self._delaybuf
525 522 )
526 523 self._delayed = True
527 524 tr.addpending(b'cl-%i' % id(self), self._writepending)
528 525 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
529 526
530 527 def _finalize(self, tr):
531 528 b"finalize index updates"
532 529 self._delayed = False
533 530 self.opener = self._realopener
534 531 # move redirected index data back into place
535 532 if self._divert:
536 533 assert not self._delaybuf
537 534 tmpname = self.indexfile + b".a"
538 535 nfile = self.opener.open(tmpname)
539 536 nfile.close()
540 537 self.opener.rename(tmpname, self.indexfile, checkambig=True)
541 538 elif self._delaybuf:
542 539 fp = self.opener(self.indexfile, b'a', checkambig=True)
543 540 fp.write(b"".join(self._delaybuf))
544 541 fp.close()
545 542 self._delaybuf = None
546 543 self._divert = False
547 544 # split when we're done
548 545 self._enforceinlinesize(tr)
549 546
550 547 def _writepending(self, tr):
551 548 b"create a file containing the unfinalized state for pretxnchangegroup"
552 549 if self._delaybuf:
553 550 # make a temporary copy of the index
554 551 fp1 = self._realopener(self.indexfile)
555 552 pendingfilename = self.indexfile + b".a"
556 553 # register as a temp file to ensure cleanup on failure
557 554 tr.registertmp(pendingfilename)
558 555 # write existing data
559 556 fp2 = self._realopener(pendingfilename, b"w")
560 557 fp2.write(fp1.read())
561 558 # add pending data
562 559 fp2.write(b"".join(self._delaybuf))
563 560 fp2.close()
564 561 # switch modes so finalize can simply rename
565 562 self._delaybuf = None
566 563 self._divert = True
567 564 self.opener = _divertopener(self._realopener, self.indexfile)
568 565
569 566 if self._divert:
570 567 return True
571 568
572 569 return False
573 570
574 571 def _enforceinlinesize(self, tr, fp=None):
575 572 if not self._delayed:
576 573 revlog.revlog._enforceinlinesize(self, tr, fp)
577 574
578 575 def read(self, node):
579 576 """Obtain data from a parsed changelog revision.
580 577
581 578 Returns a 6-tuple of:
582 579
583 580 - manifest node in binary
584 581 - author/user as a localstr
585 582 - date as a 2-tuple of (time, timezone)
586 583 - list of files
587 584 - commit message as a localstr
588 585 - dict of extra metadata
589 586
590 587 Unless you need to access all fields, consider calling
591 588 ``changelogrevision`` instead, as it is faster for partial object
592 589 access.
593 590 """
594 591 d, s = self._revisiondata(node)
595 592 c = changelogrevision(
596 593 d, s, self._copiesstorage == b'changeset-sidedata'
597 594 )
598 595 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
599 596
600 597 def changelogrevision(self, nodeorrev):
601 598 """Obtain a ``changelogrevision`` for a node or revision."""
602 599 text, sidedata = self._revisiondata(nodeorrev)
603 600 return changelogrevision(
604 601 text, sidedata, self._copiesstorage == b'changeset-sidedata'
605 602 )
606 603
607 604 def readfiles(self, node):
608 605 """
609 606 short version of read that only returns the files modified by the cset
610 607 """
611 608 text = self.revision(node)
612 609 if not text:
613 610 return []
614 611 last = text.index(b"\n\n")
615 612 l = text[:last].split(b'\n')
616 613 return l[3:]
617 614
618 615 def add(
619 616 self,
620 617 manifest,
621 618 files,
622 619 desc,
623 620 transaction,
624 621 p1,
625 622 p2,
626 623 user,
627 624 date=None,
628 625 extra=None,
629 626 p1copies=None,
630 627 p2copies=None,
631 628 filesadded=None,
632 629 filesremoved=None,
633 630 ):
634 631 # Convert to UTF-8 encoded bytestrings as the very first
635 632 # thing: calling any method on a localstr object will turn it
636 633 # into a str object and the cached UTF-8 string is thus lost.
637 634 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
638 635
639 636 user = user.strip()
640 637 # An empty username or a username with a "\n" will make the
641 638 # revision text contain two "\n\n" sequences -> corrupt
642 639 # repository since read cannot unpack the revision.
643 640 if not user:
644 641 raise error.StorageError(_(b"empty username"))
645 642 if b"\n" in user:
646 643 raise error.StorageError(
647 644 _(b"username %r contains a newline") % pycompat.bytestr(user)
648 645 )
649 646
650 647 desc = stripdesc(desc)
651 648
652 649 if date:
653 650 parseddate = b"%d %d" % dateutil.parsedate(date)
654 651 else:
655 652 parseddate = b"%d %d" % dateutil.makedate()
656 653 if extra:
657 654 branch = extra.get(b"branch")
658 655 if branch in (b"default", b""):
659 656 del extra[b"branch"]
660 657 elif branch in (b".", b"null", b"tip"):
661 658 raise error.StorageError(
662 659 _(b'the name \'%s\' is reserved') % branch
663 660 )
664 661 sortedfiles = sorted(files)
665 662 sidedata = None
666 663 if extra is not None:
667 664 for name in (
668 665 b'p1copies',
669 666 b'p2copies',
670 667 b'filesadded',
671 668 b'filesremoved',
672 669 ):
673 670 extra.pop(name, None)
674 671 if p1copies is not None:
675 672 p1copies = copies.encodecopies(sortedfiles, p1copies)
676 673 if p2copies is not None:
677 674 p2copies = copies.encodecopies(sortedfiles, p2copies)
678 675 if filesadded is not None:
679 676 filesadded = copies.encodefileindices(sortedfiles, filesadded)
680 677 if filesremoved is not None:
681 678 filesremoved = copies.encodefileindices(sortedfiles, filesremoved)
682 679 if self._copiesstorage == b'extra':
683 680 extrasentries = p1copies, p2copies, filesadded, filesremoved
684 681 if extra is None and any(x is not None for x in extrasentries):
685 682 extra = {}
686 683 if p1copies is not None:
687 684 extra[b'p1copies'] = p1copies
688 685 if p2copies is not None:
689 686 extra[b'p2copies'] = p2copies
690 687 if filesadded is not None:
691 688 extra[b'filesadded'] = filesadded
692 689 if filesremoved is not None:
693 690 extra[b'filesremoved'] = filesremoved
694 691 elif self._copiesstorage == b'changeset-sidedata':
695 692 sidedata = {}
696 693 if p1copies:
697 694 sidedata[sidedatamod.SD_P1COPIES] = p1copies
698 695 if p2copies:
699 696 sidedata[sidedatamod.SD_P2COPIES] = p2copies
700 697 if filesadded:
701 698 sidedata[sidedatamod.SD_FILESADDED] = filesadded
702 699 if filesremoved:
703 700 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
704 701 if not sidedata:
705 702 sidedata = None
706 703
707 704 if extra:
708 705 extra = encodeextra(extra)
709 706 parseddate = b"%s %s" % (parseddate, extra)
710 707 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
711 708 text = b"\n".join(l)
712 709 return self.addrevision(
713 710 text, transaction, len(self), p1, p2, sidedata=sidedata
714 711 )
715 712
716 713 def branchinfo(self, rev):
717 714 """return the branch name and open/close state of a revision
718 715
719 716 This function exists because creating a changectx object
720 717 just to access this is costly."""
721 718 extra = self.read(rev)[5]
722 719 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
723 720
724 721 def _nodeduplicatecallback(self, transaction, node):
725 722 # keep track of revisions that got "re-added", eg: unbunde of know rev.
726 723 #
727 724 # We track them in a list to preserve their order from the source bundle
728 725 duplicates = transaction.changes.setdefault(b'revduplicates', [])
729 726 duplicates.append(self.rev(node))
@@ -1,2958 +1,2961 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 sidedata as sidedatautil,
79 79 )
80 80 from .utils import (
81 81 storageutil,
82 82 stringutil,
83 83 )
84 84
85 85 # blanked usage of all the name to prevent pyflakes constraints
86 86 # We need these name available in the module for extensions.
87 87 REVLOGV0
88 88 REVLOGV1
89 89 REVLOGV2
90 90 FLAG_INLINE_DATA
91 91 FLAG_GENERALDELTA
92 92 REVLOG_DEFAULT_FLAGS
93 93 REVLOG_DEFAULT_FORMAT
94 94 REVLOG_DEFAULT_VERSION
95 95 REVLOGV1_FLAGS
96 96 REVLOGV2_FLAGS
97 97 REVIDX_ISCENSORED
98 98 REVIDX_ELLIPSIS
99 99 REVIDX_SIDEDATA
100 100 REVIDX_EXTSTORED
101 101 REVIDX_DEFAULT_FLAGS
102 102 REVIDX_FLAGS_ORDER
103 103 REVIDX_RAWTEXT_CHANGING_FLAGS
104 104
105 105 parsers = policy.importmod(r'parsers')
106 106 rustancestor = policy.importrust(r'ancestor')
107 107 rustdagop = policy.importrust(r'dagop')
108 108
109 109 # Aliased for performance.
110 110 _zlibdecompress = zlib.decompress
111 111
112 112 # max size of revlog with inline data
113 113 _maxinline = 131072
114 114 _chunksize = 1048576
115 115
116 116 # Flag processors for REVIDX_ELLIPSIS.
117 117 def ellipsisreadprocessor(rl, text):
118 118 return text, False, {}
119 119
120 120
121 121 def ellipsiswriteprocessor(rl, text, sidedata):
122 122 return text, False
123 123
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128
129 129 ellipsisprocessor = (
130 130 ellipsisreadprocessor,
131 131 ellipsiswriteprocessor,
132 132 ellipsisrawprocessor,
133 133 )
134 134
135 135
136 136 def getoffset(q):
137 137 return int(q >> 16)
138 138
139 139
140 140 def gettype(q):
141 141 return int(q & 0xFFFF)
142 142
143 143
144 144 def offset_type(offset, type):
145 145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 146 raise ValueError(b'unknown revlog index flags')
147 147 return int(int(offset) << 16 | type)
148 148
149 149
150 150 @attr.s(slots=True, frozen=True)
151 151 class _revisioninfo(object):
152 152 """Information about a revision that allows building its fulltext
153 153 node: expected hash of the revision
154 154 p1, p2: parent revs of the revision
155 155 btext: built text cache consisting of a one-element list
156 156 cachedelta: (baserev, uncompressed_delta) or None
157 157 flags: flags associated to the revision storage
158 158
159 159 One of btext[0] or cachedelta must be set.
160 160 """
161 161
162 162 node = attr.ib()
163 163 p1 = attr.ib()
164 164 p2 = attr.ib()
165 165 btext = attr.ib()
166 166 textlen = attr.ib()
167 167 cachedelta = attr.ib()
168 168 flags = attr.ib()
169 169
170 170
171 171 @interfaceutil.implementer(repository.irevisiondelta)
172 172 @attr.s(slots=True)
173 173 class revlogrevisiondelta(object):
174 174 node = attr.ib()
175 175 p1node = attr.ib()
176 176 p2node = attr.ib()
177 177 basenode = attr.ib()
178 178 flags = attr.ib()
179 179 baserevisionsize = attr.ib()
180 180 revision = attr.ib()
181 181 delta = attr.ib()
182 182 linknode = attr.ib(default=None)
183 183
184 184
185 185 @interfaceutil.implementer(repository.iverifyproblem)
186 186 @attr.s(frozen=True)
187 187 class revlogproblem(object):
188 188 warning = attr.ib(default=None)
189 189 error = attr.ib(default=None)
190 190 node = attr.ib(default=None)
191 191
192 192
193 193 # index v0:
194 194 # 4 bytes: offset
195 195 # 4 bytes: compressed length
196 196 # 4 bytes: base rev
197 197 # 4 bytes: link rev
198 198 # 20 bytes: parent 1 nodeid
199 199 # 20 bytes: parent 2 nodeid
200 200 # 20 bytes: nodeid
201 201 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 202 indexformatv0_pack = indexformatv0.pack
203 203 indexformatv0_unpack = indexformatv0.unpack
204 204
205 205
206 206 class revlogoldindex(list):
207 207 def __getitem__(self, i):
208 208 if i == -1:
209 209 return (0, 0, 0, -1, -1, -1, -1, nullid)
210 210 return list.__getitem__(self, i)
211 211
212 212
213 213 class revlogoldio(object):
214 214 def __init__(self):
215 215 self.size = indexformatv0.size
216 216
217 217 def parseindex(self, data, inline):
218 218 s = self.size
219 219 index = []
220 220 nodemap = {nullid: nullrev}
221 221 n = off = 0
222 222 l = len(data)
223 223 while off + s <= l:
224 224 cur = data[off : off + s]
225 225 off += s
226 226 e = indexformatv0_unpack(cur)
227 227 # transform to revlogv1 format
228 228 e2 = (
229 229 offset_type(e[0], 0),
230 230 e[1],
231 231 -1,
232 232 e[2],
233 233 e[3],
234 234 nodemap.get(e[4], nullrev),
235 235 nodemap.get(e[5], nullrev),
236 236 e[6],
237 237 )
238 238 index.append(e2)
239 239 nodemap[e[6]] = n
240 240 n += 1
241 241
242 242 return revlogoldindex(index), nodemap, None
243 243
244 244 def packentry(self, entry, node, version, rev):
245 245 if gettype(entry[0]):
246 246 raise error.RevlogError(
247 247 _(b'index entry flags need revlog version 1')
248 248 )
249 249 e2 = (
250 250 getoffset(entry[0]),
251 251 entry[1],
252 252 entry[3],
253 253 entry[4],
254 254 node(entry[5]),
255 255 node(entry[6]),
256 256 entry[7],
257 257 )
258 258 return indexformatv0_pack(*e2)
259 259
260 260
261 261 # index ng:
262 262 # 6 bytes: offset
263 263 # 2 bytes: flags
264 264 # 4 bytes: compressed length
265 265 # 4 bytes: uncompressed length
266 266 # 4 bytes: base rev
267 267 # 4 bytes: link rev
268 268 # 4 bytes: parent 1 rev
269 269 # 4 bytes: parent 2 rev
270 270 # 32 bytes: nodeid
271 271 indexformatng = struct.Struct(b">Qiiiiii20s12x")
272 272 indexformatng_pack = indexformatng.pack
273 273 versionformat = struct.Struct(b">I")
274 274 versionformat_pack = versionformat.pack
275 275 versionformat_unpack = versionformat.unpack
276 276
277 277 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
278 278 # signed integer)
279 279 _maxentrysize = 0x7FFFFFFF
280 280
281 281
282 282 class revlogio(object):
283 283 def __init__(self):
284 284 self.size = indexformatng.size
285 285
286 286 def parseindex(self, data, inline):
287 287 # call the C implementation to parse the index data
288 288 index, cache = parsers.parse_index2(data, inline)
289 289 return index, getattr(index, 'nodemap', None), cache
290 290
291 291 def packentry(self, entry, node, version, rev):
292 292 p = indexformatng_pack(*entry)
293 293 if rev == 0:
294 294 p = versionformat_pack(version) + p[4:]
295 295 return p
296 296
297 297
298 298 class revlog(object):
299 299 """
300 300 the underlying revision storage object
301 301
302 302 A revlog consists of two parts, an index and the revision data.
303 303
304 304 The index is a file with a fixed record size containing
305 305 information on each revision, including its nodeid (hash), the
306 306 nodeids of its parents, the position and offset of its data within
307 307 the data file, and the revision it's based on. Finally, each entry
308 308 contains a linkrev entry that can serve as a pointer to external
309 309 data.
310 310
311 311 The revision data itself is a linear collection of data chunks.
312 312 Each chunk represents a revision and is usually represented as a
313 313 delta against the previous chunk. To bound lookup time, runs of
314 314 deltas are limited to about 2 times the length of the original
315 315 version data. This makes retrieval of a version proportional to
316 316 its size, or O(1) relative to the number of revisions.
317 317
318 318 Both pieces of the revlog are written to in an append-only
319 319 fashion, which means we never need to rewrite a file to insert or
320 320 remove data, and can use some simple techniques to avoid the need
321 321 for locking while reading.
322 322
323 323 If checkambig, indexfile is opened with checkambig=True at
324 324 writing, to avoid file stat ambiguity.
325 325
326 326 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 327 index will be mmapped rather than read if it is larger than the
328 328 configured threshold.
329 329
330 330 If censorable is True, the revlog can have censored revisions.
331 331
332 332 If `upperboundcomp` is not None, this is the expected maximal gain from
333 333 compression for the data content.
334 334 """
335 335
336 336 _flagserrorclass = error.RevlogError
337 337
338 338 def __init__(
339 339 self,
340 340 opener,
341 341 indexfile,
342 342 datafile=None,
343 343 checkambig=False,
344 344 mmaplargeindex=False,
345 345 censorable=False,
346 346 upperboundcomp=None,
347 347 ):
348 348 """
349 349 create a revlog object
350 350
351 351 opener is a function that abstracts the file opening operation
352 352 and can be used to implement COW semantics or the like.
353 353
354 354 """
355 355 self.upperboundcomp = upperboundcomp
356 356 self.indexfile = indexfile
357 357 self.datafile = datafile or (indexfile[:-2] + b".d")
358 358 self.opener = opener
359 359 # When True, indexfile is opened with checkambig=True at writing, to
360 360 # avoid file stat ambiguity.
361 361 self._checkambig = checkambig
362 362 self._mmaplargeindex = mmaplargeindex
363 363 self._censorable = censorable
364 364 # 3-tuple of (node, rev, text) for a raw revision.
365 365 self._revisioncache = None
366 366 # Maps rev to chain base rev.
367 367 self._chainbasecache = util.lrucachedict(100)
368 368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 369 self._chunkcache = (0, b'')
370 370 # How much data to read and cache into the raw revlog data cache.
371 371 self._chunkcachesize = 65536
372 372 self._maxchainlen = None
373 373 self._deltabothparents = True
374 374 self.index = []
375 375 # Mapping of partial identifiers to full nodes.
376 376 self._pcache = {}
377 377 # Mapping of revision integer to full node.
378 378 self._nodecache = {nullid: nullrev}
379 379 self._nodepos = None
380 380 self._compengine = b'zlib'
381 381 self._compengineopts = {}
382 382 self._maxdeltachainspan = -1
383 383 self._withsparseread = False
384 384 self._sparserevlog = False
385 385 self._srdensitythreshold = 0.50
386 386 self._srmingapsize = 262144
387 387
388 388 # Make copy of flag processors so each revlog instance can support
389 389 # custom flags.
390 390 self._flagprocessors = dict(flagutil.flagprocessors)
391 391
392 392 # 2-tuple of file handles being used for active writing.
393 393 self._writinghandles = None
394 394
395 395 self._loadindex()
396 396
397 397 def _loadindex(self):
398 398 mmapindexthreshold = None
399 399 opts = self.opener.options
400 400
401 401 if b'revlogv2' in opts:
402 402 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
403 403 elif b'revlogv1' in opts:
404 404 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
405 405 if b'generaldelta' in opts:
406 406 newversionflags |= FLAG_GENERALDELTA
407 407 elif b'revlogv0' in self.opener.options:
408 408 newversionflags = REVLOGV0
409 409 else:
410 410 newversionflags = REVLOG_DEFAULT_VERSION
411 411
412 412 if b'chunkcachesize' in opts:
413 413 self._chunkcachesize = opts[b'chunkcachesize']
414 414 if b'maxchainlen' in opts:
415 415 self._maxchainlen = opts[b'maxchainlen']
416 416 if b'deltabothparents' in opts:
417 417 self._deltabothparents = opts[b'deltabothparents']
418 418 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 419 self._lazydeltabase = False
420 420 if self._lazydelta:
421 421 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 422 if b'compengine' in opts:
423 423 self._compengine = opts[b'compengine']
424 424 if b'zlib.level' in opts:
425 425 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 426 if b'zstd.level' in opts:
427 427 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 428 if b'maxdeltachainspan' in opts:
429 429 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 430 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 431 mmapindexthreshold = opts[b'mmapindexthreshold']
432 432 self.hassidedata = bool(opts.get(b'side-data', False))
433 433 if self.hassidedata:
434 434 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
435 435 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
436 436 withsparseread = bool(opts.get(b'with-sparse-read', False))
437 437 # sparse-revlog forces sparse-read
438 438 self._withsparseread = self._sparserevlog or withsparseread
439 439 if b'sparse-read-density-threshold' in opts:
440 440 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
441 441 if b'sparse-read-min-gap-size' in opts:
442 442 self._srmingapsize = opts[b'sparse-read-min-gap-size']
443 443 if opts.get(b'enableellipsis'):
444 444 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
445 445
446 446 # revlog v0 doesn't have flag processors
447 447 for flag, processor in pycompat.iteritems(
448 448 opts.get(b'flagprocessors', {})
449 449 ):
450 450 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
451 451
452 452 if self._chunkcachesize <= 0:
453 453 raise error.RevlogError(
454 454 _(b'revlog chunk cache size %r is not greater than 0')
455 455 % self._chunkcachesize
456 456 )
457 457 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 458 raise error.RevlogError(
459 459 _(b'revlog chunk cache size %r is not a power of 2')
460 460 % self._chunkcachesize
461 461 )
462 462
463 463 indexdata = b''
464 464 self._initempty = True
465 465 try:
466 466 with self._indexfp() as f:
467 467 if (
468 468 mmapindexthreshold is not None
469 469 and self.opener.fstat(f).st_size >= mmapindexthreshold
470 470 ):
471 471 # TODO: should .close() to release resources without
472 472 # relying on Python GC
473 473 indexdata = util.buffer(util.mmapread(f))
474 474 else:
475 475 indexdata = f.read()
476 476 if len(indexdata) > 0:
477 477 versionflags = versionformat_unpack(indexdata[:4])[0]
478 478 self._initempty = False
479 479 else:
480 480 versionflags = newversionflags
481 481 except IOError as inst:
482 482 if inst.errno != errno.ENOENT:
483 483 raise
484 484
485 485 versionflags = newversionflags
486 486
487 487 self.version = versionflags
488 488
489 489 flags = versionflags & ~0xFFFF
490 490 fmt = versionflags & 0xFFFF
491 491
492 492 if fmt == REVLOGV0:
493 493 if flags:
494 494 raise error.RevlogError(
495 495 _(b'unknown flags (%#04x) in version %d revlog %s')
496 496 % (flags >> 16, fmt, self.indexfile)
497 497 )
498 498
499 499 self._inline = False
500 500 self._generaldelta = False
501 501
502 502 elif fmt == REVLOGV1:
503 503 if flags & ~REVLOGV1_FLAGS:
504 504 raise error.RevlogError(
505 505 _(b'unknown flags (%#04x) in version %d revlog %s')
506 506 % (flags >> 16, fmt, self.indexfile)
507 507 )
508 508
509 509 self._inline = versionflags & FLAG_INLINE_DATA
510 510 self._generaldelta = versionflags & FLAG_GENERALDELTA
511 511
512 512 elif fmt == REVLOGV2:
513 513 if flags & ~REVLOGV2_FLAGS:
514 514 raise error.RevlogError(
515 515 _(b'unknown flags (%#04x) in version %d revlog %s')
516 516 % (flags >> 16, fmt, self.indexfile)
517 517 )
518 518
519 519 self._inline = versionflags & FLAG_INLINE_DATA
520 520 # generaldelta implied by version 2 revlogs.
521 521 self._generaldelta = True
522 522
523 523 else:
524 524 raise error.RevlogError(
525 525 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 526 )
527 527 # sparse-revlog can't be on without general-delta (issue6056)
528 528 if not self._generaldelta:
529 529 self._sparserevlog = False
530 530
531 531 self._storedeltachains = True
532 532
533 533 self._io = revlogio()
534 534 if self.version == REVLOGV0:
535 535 self._io = revlogoldio()
536 536 try:
537 537 d = self._io.parseindex(indexdata, self._inline)
538 538 except (ValueError, IndexError):
539 539 raise error.RevlogError(
540 540 _(b"index %s is corrupted") % self.indexfile
541 541 )
542 542 self.index, nodemap, self._chunkcache = d
543 543 if nodemap is not None:
544 544 self.nodemap = self._nodecache = nodemap
545 545 if not self._chunkcache:
546 546 self._chunkclear()
547 547 # revnum -> (chain-length, sum-delta-length)
548 548 self._chaininfocache = {}
549 549 # revlog header -> revlog compressor
550 550 self._decompressors = {}
551 551
552 552 @util.propertycache
553 553 def _compressor(self):
554 554 engine = util.compengines[self._compengine]
555 555 return engine.revlogcompressor(self._compengineopts)
556 556
557 557 def _indexfp(self, mode=b'r'):
558 558 """file object for the revlog's index file"""
559 559 args = {r'mode': mode}
560 560 if mode != b'r':
561 561 args[r'checkambig'] = self._checkambig
562 562 if mode == b'w':
563 563 args[r'atomictemp'] = True
564 564 return self.opener(self.indexfile, **args)
565 565
566 566 def _datafp(self, mode=b'r'):
567 567 """file object for the revlog's data file"""
568 568 return self.opener(self.datafile, mode=mode)
569 569
570 570 @contextlib.contextmanager
571 571 def _datareadfp(self, existingfp=None):
572 572 """file object suitable to read data"""
573 573 # Use explicit file handle, if given.
574 574 if existingfp is not None:
575 575 yield existingfp
576 576
577 577 # Use a file handle being actively used for writes, if available.
578 578 # There is some danger to doing this because reads will seek the
579 579 # file. However, _writeentry() performs a SEEK_END before all writes,
580 580 # so we should be safe.
581 581 elif self._writinghandles:
582 582 if self._inline:
583 583 yield self._writinghandles[0]
584 584 else:
585 585 yield self._writinghandles[1]
586 586
587 587 # Otherwise open a new file handle.
588 588 else:
589 589 if self._inline:
590 590 func = self._indexfp
591 591 else:
592 592 func = self._datafp
593 593 with func() as fp:
594 594 yield fp
595 595
596 def tiprev(self):
597 return len(self.index) - 1
598
596 599 def tip(self):
597 return self.node(len(self.index) - 1)
600 return self.node(self.tiprev())
598 601
599 602 def __contains__(self, rev):
600 603 return 0 <= rev < len(self)
601 604
602 605 def __len__(self):
603 606 return len(self.index)
604 607
605 608 def __iter__(self):
606 609 return iter(pycompat.xrange(len(self)))
607 610
608 611 def revs(self, start=0, stop=None):
609 612 """iterate over all rev in this revlog (from start to stop)"""
610 613 return storageutil.iterrevs(len(self), start=start, stop=stop)
611 614
612 615 @util.propertycache
613 616 def nodemap(self):
614 617 if self.index:
615 618 # populate mapping down to the initial node
616 619 node0 = self.index[0][7] # get around changelog filtering
617 620 self.rev(node0)
618 621 return self._nodecache
619 622
620 623 def hasnode(self, node):
621 624 try:
622 625 self.rev(node)
623 626 return True
624 627 except KeyError:
625 628 return False
626 629
627 630 def candelta(self, baserev, rev):
628 631 """whether two revisions (baserev, rev) can be delta-ed or not"""
629 632 # Disable delta if either rev requires a content-changing flag
630 633 # processor (ex. LFS). This is because such flag processor can alter
631 634 # the rawtext content that the delta will be based on, and two clients
632 635 # could have a same revlog node with different flags (i.e. different
633 636 # rawtext contents) and the delta could be incompatible.
634 637 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
635 638 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
636 639 ):
637 640 return False
638 641 return True
639 642
640 643 def clearcaches(self):
641 644 self._revisioncache = None
642 645 self._chainbasecache.clear()
643 646 self._chunkcache = (0, b'')
644 647 self._pcache = {}
645 648
646 649 try:
647 650 # If we are using the native C version, you are in a fun case
648 651 # where self.index, self.nodemap and self._nodecaches is the same
649 652 # object.
650 653 self._nodecache.clearcaches()
651 654 except AttributeError:
652 655 self._nodecache = {nullid: nullrev}
653 656 self._nodepos = None
654 657
655 658 def rev(self, node):
656 659 try:
657 660 return self._nodecache[node]
658 661 except TypeError:
659 662 raise
660 663 except error.RevlogError:
661 664 # parsers.c radix tree lookup failed
662 665 if node == wdirid or node in wdirfilenodeids:
663 666 raise error.WdirUnsupported
664 667 raise error.LookupError(node, self.indexfile, _(b'no node'))
665 668 except KeyError:
666 669 # pure python cache lookup failed
667 670 n = self._nodecache
668 671 i = self.index
669 672 p = self._nodepos
670 673 if p is None:
671 674 p = len(i) - 1
672 675 else:
673 676 assert p < len(i)
674 677 for r in pycompat.xrange(p, -1, -1):
675 678 v = i[r][7]
676 679 n[v] = r
677 680 if v == node:
678 681 self._nodepos = r - 1
679 682 return r
680 683 if node == wdirid or node in wdirfilenodeids:
681 684 raise error.WdirUnsupported
682 685 raise error.LookupError(node, self.indexfile, _(b'no node'))
683 686
684 687 # Accessors for index entries.
685 688
686 689 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
687 690 # are flags.
688 691 def start(self, rev):
689 692 return int(self.index[rev][0] >> 16)
690 693
691 694 def flags(self, rev):
692 695 return self.index[rev][0] & 0xFFFF
693 696
694 697 def length(self, rev):
695 698 return self.index[rev][1]
696 699
697 700 def rawsize(self, rev):
698 701 """return the length of the uncompressed text for a given revision"""
699 702 l = self.index[rev][2]
700 703 if l >= 0:
701 704 return l
702 705
703 706 t = self.rawdata(rev)
704 707 return len(t)
705 708
706 709 def size(self, rev):
707 710 """length of non-raw text (processed by a "read" flag processor)"""
708 711 # fast path: if no "read" flag processor could change the content,
709 712 # size is rawsize. note: ELLIPSIS is known to not change the content.
710 713 flags = self.flags(rev)
711 714 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
712 715 return self.rawsize(rev)
713 716
714 717 return len(self.revision(rev, raw=False))
715 718
716 719 def chainbase(self, rev):
717 720 base = self._chainbasecache.get(rev)
718 721 if base is not None:
719 722 return base
720 723
721 724 index = self.index
722 725 iterrev = rev
723 726 base = index[iterrev][3]
724 727 while base != iterrev:
725 728 iterrev = base
726 729 base = index[iterrev][3]
727 730
728 731 self._chainbasecache[rev] = base
729 732 return base
730 733
731 734 def linkrev(self, rev):
732 735 return self.index[rev][4]
733 736
734 737 def parentrevs(self, rev):
735 738 try:
736 739 entry = self.index[rev]
737 740 except IndexError:
738 741 if rev == wdirrev:
739 742 raise error.WdirUnsupported
740 743 raise
741 744
742 745 return entry[5], entry[6]
743 746
744 747 # fast parentrevs(rev) where rev isn't filtered
745 748 _uncheckedparentrevs = parentrevs
746 749
747 750 def node(self, rev):
748 751 try:
749 752 return self.index[rev][7]
750 753 except IndexError:
751 754 if rev == wdirrev:
752 755 raise error.WdirUnsupported
753 756 raise
754 757
755 758 # Derived from index values.
756 759
757 760 def end(self, rev):
758 761 return self.start(rev) + self.length(rev)
759 762
760 763 def parents(self, node):
761 764 i = self.index
762 765 d = i[self.rev(node)]
763 766 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
764 767
765 768 def chainlen(self, rev):
766 769 return self._chaininfo(rev)[0]
767 770
768 771 def _chaininfo(self, rev):
769 772 chaininfocache = self._chaininfocache
770 773 if rev in chaininfocache:
771 774 return chaininfocache[rev]
772 775 index = self.index
773 776 generaldelta = self._generaldelta
774 777 iterrev = rev
775 778 e = index[iterrev]
776 779 clen = 0
777 780 compresseddeltalen = 0
778 781 while iterrev != e[3]:
779 782 clen += 1
780 783 compresseddeltalen += e[1]
781 784 if generaldelta:
782 785 iterrev = e[3]
783 786 else:
784 787 iterrev -= 1
785 788 if iterrev in chaininfocache:
786 789 t = chaininfocache[iterrev]
787 790 clen += t[0]
788 791 compresseddeltalen += t[1]
789 792 break
790 793 e = index[iterrev]
791 794 else:
792 795 # Add text length of base since decompressing that also takes
793 796 # work. For cache hits the length is already included.
794 797 compresseddeltalen += e[1]
795 798 r = (clen, compresseddeltalen)
796 799 chaininfocache[rev] = r
797 800 return r
798 801
799 802 def _deltachain(self, rev, stoprev=None):
800 803 """Obtain the delta chain for a revision.
801 804
802 805 ``stoprev`` specifies a revision to stop at. If not specified, we
803 806 stop at the base of the chain.
804 807
805 808 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
806 809 revs in ascending order and ``stopped`` is a bool indicating whether
807 810 ``stoprev`` was hit.
808 811 """
809 812 # Try C implementation.
810 813 try:
811 814 return self.index.deltachain(rev, stoprev, self._generaldelta)
812 815 except AttributeError:
813 816 pass
814 817
815 818 chain = []
816 819
817 820 # Alias to prevent attribute lookup in tight loop.
818 821 index = self.index
819 822 generaldelta = self._generaldelta
820 823
821 824 iterrev = rev
822 825 e = index[iterrev]
823 826 while iterrev != e[3] and iterrev != stoprev:
824 827 chain.append(iterrev)
825 828 if generaldelta:
826 829 iterrev = e[3]
827 830 else:
828 831 iterrev -= 1
829 832 e = index[iterrev]
830 833
831 834 if iterrev == stoprev:
832 835 stopped = True
833 836 else:
834 837 chain.append(iterrev)
835 838 stopped = False
836 839
837 840 chain.reverse()
838 841 return chain, stopped
839 842
840 843 def ancestors(self, revs, stoprev=0, inclusive=False):
841 844 """Generate the ancestors of 'revs' in reverse revision order.
842 845 Does not generate revs lower than stoprev.
843 846
844 847 See the documentation for ancestor.lazyancestors for more details."""
845 848
846 849 # first, make sure start revisions aren't filtered
847 850 revs = list(revs)
848 851 checkrev = self.node
849 852 for r in revs:
850 853 checkrev(r)
851 854 # and we're sure ancestors aren't filtered as well
852 855
853 856 if rustancestor is not None:
854 857 lazyancestors = rustancestor.LazyAncestors
855 858 arg = self.index
856 859 elif util.safehasattr(parsers, b'rustlazyancestors'):
857 860 lazyancestors = ancestor.rustlazyancestors
858 861 arg = self.index
859 862 else:
860 863 lazyancestors = ancestor.lazyancestors
861 864 arg = self._uncheckedparentrevs
862 865 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
863 866
864 867 def descendants(self, revs):
865 868 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
866 869
867 870 def findcommonmissing(self, common=None, heads=None):
868 871 """Return a tuple of the ancestors of common and the ancestors of heads
869 872 that are not ancestors of common. In revset terminology, we return the
870 873 tuple:
871 874
872 875 ::common, (::heads) - (::common)
873 876
874 877 The list is sorted by revision number, meaning it is
875 878 topologically sorted.
876 879
877 880 'heads' and 'common' are both lists of node IDs. If heads is
878 881 not supplied, uses all of the revlog's heads. If common is not
879 882 supplied, uses nullid."""
880 883 if common is None:
881 884 common = [nullid]
882 885 if heads is None:
883 886 heads = self.heads()
884 887
885 888 common = [self.rev(n) for n in common]
886 889 heads = [self.rev(n) for n in heads]
887 890
888 891 # we want the ancestors, but inclusive
889 892 class lazyset(object):
890 893 def __init__(self, lazyvalues):
891 894 self.addedvalues = set()
892 895 self.lazyvalues = lazyvalues
893 896
894 897 def __contains__(self, value):
895 898 return value in self.addedvalues or value in self.lazyvalues
896 899
897 900 def __iter__(self):
898 901 added = self.addedvalues
899 902 for r in added:
900 903 yield r
901 904 for r in self.lazyvalues:
902 905 if not r in added:
903 906 yield r
904 907
905 908 def add(self, value):
906 909 self.addedvalues.add(value)
907 910
908 911 def update(self, values):
909 912 self.addedvalues.update(values)
910 913
911 914 has = lazyset(self.ancestors(common))
912 915 has.add(nullrev)
913 916 has.update(common)
914 917
915 918 # take all ancestors from heads that aren't in has
916 919 missing = set()
917 920 visit = collections.deque(r for r in heads if r not in has)
918 921 while visit:
919 922 r = visit.popleft()
920 923 if r in missing:
921 924 continue
922 925 else:
923 926 missing.add(r)
924 927 for p in self.parentrevs(r):
925 928 if p not in has:
926 929 visit.append(p)
927 930 missing = list(missing)
928 931 missing.sort()
929 932 return has, [self.node(miss) for miss in missing]
930 933
931 934 def incrementalmissingrevs(self, common=None):
932 935 """Return an object that can be used to incrementally compute the
933 936 revision numbers of the ancestors of arbitrary sets that are not
934 937 ancestors of common. This is an ancestor.incrementalmissingancestors
935 938 object.
936 939
937 940 'common' is a list of revision numbers. If common is not supplied, uses
938 941 nullrev.
939 942 """
940 943 if common is None:
941 944 common = [nullrev]
942 945
943 946 if rustancestor is not None:
944 947 return rustancestor.MissingAncestors(self.index, common)
945 948 return ancestor.incrementalmissingancestors(self.parentrevs, common)
946 949
947 950 def findmissingrevs(self, common=None, heads=None):
948 951 """Return the revision numbers of the ancestors of heads that
949 952 are not ancestors of common.
950 953
951 954 More specifically, return a list of revision numbers corresponding to
952 955 nodes N such that every N satisfies the following constraints:
953 956
954 957 1. N is an ancestor of some node in 'heads'
955 958 2. N is not an ancestor of any node in 'common'
956 959
957 960 The list is sorted by revision number, meaning it is
958 961 topologically sorted.
959 962
960 963 'heads' and 'common' are both lists of revision numbers. If heads is
961 964 not supplied, uses all of the revlog's heads. If common is not
962 965 supplied, uses nullid."""
963 966 if common is None:
964 967 common = [nullrev]
965 968 if heads is None:
966 969 heads = self.headrevs()
967 970
968 971 inc = self.incrementalmissingrevs(common=common)
969 972 return inc.missingancestors(heads)
970 973
971 974 def findmissing(self, common=None, heads=None):
972 975 """Return the ancestors of heads that are not ancestors of common.
973 976
974 977 More specifically, return a list of nodes N such that every N
975 978 satisfies the following constraints:
976 979
977 980 1. N is an ancestor of some node in 'heads'
978 981 2. N is not an ancestor of any node in 'common'
979 982
980 983 The list is sorted by revision number, meaning it is
981 984 topologically sorted.
982 985
983 986 'heads' and 'common' are both lists of node IDs. If heads is
984 987 not supplied, uses all of the revlog's heads. If common is not
985 988 supplied, uses nullid."""
986 989 if common is None:
987 990 common = [nullid]
988 991 if heads is None:
989 992 heads = self.heads()
990 993
991 994 common = [self.rev(n) for n in common]
992 995 heads = [self.rev(n) for n in heads]
993 996
994 997 inc = self.incrementalmissingrevs(common=common)
995 998 return [self.node(r) for r in inc.missingancestors(heads)]
996 999
997 1000 def nodesbetween(self, roots=None, heads=None):
998 1001 """Return a topological path from 'roots' to 'heads'.
999 1002
1000 1003 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1001 1004 topologically sorted list of all nodes N that satisfy both of
1002 1005 these constraints:
1003 1006
1004 1007 1. N is a descendant of some node in 'roots'
1005 1008 2. N is an ancestor of some node in 'heads'
1006 1009
1007 1010 Every node is considered to be both a descendant and an ancestor
1008 1011 of itself, so every reachable node in 'roots' and 'heads' will be
1009 1012 included in 'nodes'.
1010 1013
1011 1014 'outroots' is the list of reachable nodes in 'roots', i.e., the
1012 1015 subset of 'roots' that is returned in 'nodes'. Likewise,
1013 1016 'outheads' is the subset of 'heads' that is also in 'nodes'.
1014 1017
1015 1018 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1016 1019 unspecified, uses nullid as the only root. If 'heads' is
1017 1020 unspecified, uses list of all of the revlog's heads."""
1018 1021 nonodes = ([], [], [])
1019 1022 if roots is not None:
1020 1023 roots = list(roots)
1021 1024 if not roots:
1022 1025 return nonodes
1023 1026 lowestrev = min([self.rev(n) for n in roots])
1024 1027 else:
1025 1028 roots = [nullid] # Everybody's a descendant of nullid
1026 1029 lowestrev = nullrev
1027 1030 if (lowestrev == nullrev) and (heads is None):
1028 1031 # We want _all_ the nodes!
1029 1032 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1030 1033 if heads is None:
1031 1034 # All nodes are ancestors, so the latest ancestor is the last
1032 1035 # node.
1033 1036 highestrev = len(self) - 1
1034 1037 # Set ancestors to None to signal that every node is an ancestor.
1035 1038 ancestors = None
1036 1039 # Set heads to an empty dictionary for later discovery of heads
1037 1040 heads = {}
1038 1041 else:
1039 1042 heads = list(heads)
1040 1043 if not heads:
1041 1044 return nonodes
1042 1045 ancestors = set()
1043 1046 # Turn heads into a dictionary so we can remove 'fake' heads.
1044 1047 # Also, later we will be using it to filter out the heads we can't
1045 1048 # find from roots.
1046 1049 heads = dict.fromkeys(heads, False)
1047 1050 # Start at the top and keep marking parents until we're done.
1048 1051 nodestotag = set(heads)
1049 1052 # Remember where the top was so we can use it as a limit later.
1050 1053 highestrev = max([self.rev(n) for n in nodestotag])
1051 1054 while nodestotag:
1052 1055 # grab a node to tag
1053 1056 n = nodestotag.pop()
1054 1057 # Never tag nullid
1055 1058 if n == nullid:
1056 1059 continue
1057 1060 # A node's revision number represents its place in a
1058 1061 # topologically sorted list of nodes.
1059 1062 r = self.rev(n)
1060 1063 if r >= lowestrev:
1061 1064 if n not in ancestors:
1062 1065 # If we are possibly a descendant of one of the roots
1063 1066 # and we haven't already been marked as an ancestor
1064 1067 ancestors.add(n) # Mark as ancestor
1065 1068 # Add non-nullid parents to list of nodes to tag.
1066 1069 nodestotag.update(
1067 1070 [p for p in self.parents(n) if p != nullid]
1068 1071 )
1069 1072 elif n in heads: # We've seen it before, is it a fake head?
1070 1073 # So it is, real heads should not be the ancestors of
1071 1074 # any other heads.
1072 1075 heads.pop(n)
1073 1076 if not ancestors:
1074 1077 return nonodes
1075 1078 # Now that we have our set of ancestors, we want to remove any
1076 1079 # roots that are not ancestors.
1077 1080
1078 1081 # If one of the roots was nullid, everything is included anyway.
1079 1082 if lowestrev > nullrev:
1080 1083 # But, since we weren't, let's recompute the lowest rev to not
1081 1084 # include roots that aren't ancestors.
1082 1085
1083 1086 # Filter out roots that aren't ancestors of heads
1084 1087 roots = [root for root in roots if root in ancestors]
1085 1088 # Recompute the lowest revision
1086 1089 if roots:
1087 1090 lowestrev = min([self.rev(root) for root in roots])
1088 1091 else:
1089 1092 # No more roots? Return empty list
1090 1093 return nonodes
1091 1094 else:
1092 1095 # We are descending from nullid, and don't need to care about
1093 1096 # any other roots.
1094 1097 lowestrev = nullrev
1095 1098 roots = [nullid]
1096 1099 # Transform our roots list into a set.
1097 1100 descendants = set(roots)
1098 1101 # Also, keep the original roots so we can filter out roots that aren't
1099 1102 # 'real' roots (i.e. are descended from other roots).
1100 1103 roots = descendants.copy()
1101 1104 # Our topologically sorted list of output nodes.
1102 1105 orderedout = []
1103 1106 # Don't start at nullid since we don't want nullid in our output list,
1104 1107 # and if nullid shows up in descendants, empty parents will look like
1105 1108 # they're descendants.
1106 1109 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1107 1110 n = self.node(r)
1108 1111 isdescendant = False
1109 1112 if lowestrev == nullrev: # Everybody is a descendant of nullid
1110 1113 isdescendant = True
1111 1114 elif n in descendants:
1112 1115 # n is already a descendant
1113 1116 isdescendant = True
1114 1117 # This check only needs to be done here because all the roots
1115 1118 # will start being marked is descendants before the loop.
1116 1119 if n in roots:
1117 1120 # If n was a root, check if it's a 'real' root.
1118 1121 p = tuple(self.parents(n))
1119 1122 # If any of its parents are descendants, it's not a root.
1120 1123 if (p[0] in descendants) or (p[1] in descendants):
1121 1124 roots.remove(n)
1122 1125 else:
1123 1126 p = tuple(self.parents(n))
1124 1127 # A node is a descendant if either of its parents are
1125 1128 # descendants. (We seeded the dependents list with the roots
1126 1129 # up there, remember?)
1127 1130 if (p[0] in descendants) or (p[1] in descendants):
1128 1131 descendants.add(n)
1129 1132 isdescendant = True
1130 1133 if isdescendant and ((ancestors is None) or (n in ancestors)):
1131 1134 # Only include nodes that are both descendants and ancestors.
1132 1135 orderedout.append(n)
1133 1136 if (ancestors is not None) and (n in heads):
1134 1137 # We're trying to figure out which heads are reachable
1135 1138 # from roots.
1136 1139 # Mark this head as having been reached
1137 1140 heads[n] = True
1138 1141 elif ancestors is None:
1139 1142 # Otherwise, we're trying to discover the heads.
1140 1143 # Assume this is a head because if it isn't, the next step
1141 1144 # will eventually remove it.
1142 1145 heads[n] = True
1143 1146 # But, obviously its parents aren't.
1144 1147 for p in self.parents(n):
1145 1148 heads.pop(p, None)
1146 1149 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1147 1150 roots = list(roots)
1148 1151 assert orderedout
1149 1152 assert roots
1150 1153 assert heads
1151 1154 return (orderedout, roots, heads)
1152 1155
1153 1156 def headrevs(self, revs=None):
1154 1157 if revs is None:
1155 1158 try:
1156 1159 return self.index.headrevs()
1157 1160 except AttributeError:
1158 1161 return self._headrevs()
1159 1162 if rustdagop is not None:
1160 1163 return rustdagop.headrevs(self.index, revs)
1161 1164 return dagop.headrevs(revs, self._uncheckedparentrevs)
1162 1165
1163 1166 def computephases(self, roots):
1164 1167 return self.index.computephasesmapsets(roots)
1165 1168
1166 1169 def _headrevs(self):
1167 1170 count = len(self)
1168 1171 if not count:
1169 1172 return [nullrev]
1170 1173 # we won't iter over filtered rev so nobody is a head at start
1171 1174 ishead = [0] * (count + 1)
1172 1175 index = self.index
1173 1176 for r in self:
1174 1177 ishead[r] = 1 # I may be an head
1175 1178 e = index[r]
1176 1179 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1177 1180 return [r for r, val in enumerate(ishead) if val]
1178 1181
1179 1182 def heads(self, start=None, stop=None):
1180 1183 """return the list of all nodes that have no children
1181 1184
1182 1185 if start is specified, only heads that are descendants of
1183 1186 start will be returned
1184 1187 if stop is specified, it will consider all the revs from stop
1185 1188 as if they had no children
1186 1189 """
1187 1190 if start is None and stop is None:
1188 1191 if not len(self):
1189 1192 return [nullid]
1190 1193 return [self.node(r) for r in self.headrevs()]
1191 1194
1192 1195 if start is None:
1193 1196 start = nullrev
1194 1197 else:
1195 1198 start = self.rev(start)
1196 1199
1197 1200 stoprevs = set(self.rev(n) for n in stop or [])
1198 1201
1199 1202 revs = dagop.headrevssubset(
1200 1203 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1201 1204 )
1202 1205
1203 1206 return [self.node(rev) for rev in revs]
1204 1207
1205 1208 def children(self, node):
1206 1209 """find the children of a given node"""
1207 1210 c = []
1208 1211 p = self.rev(node)
1209 1212 for r in self.revs(start=p + 1):
1210 1213 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1211 1214 if prevs:
1212 1215 for pr in prevs:
1213 1216 if pr == p:
1214 1217 c.append(self.node(r))
1215 1218 elif p == nullrev:
1216 1219 c.append(self.node(r))
1217 1220 return c
1218 1221
1219 1222 def commonancestorsheads(self, a, b):
1220 1223 """calculate all the heads of the common ancestors of nodes a and b"""
1221 1224 a, b = self.rev(a), self.rev(b)
1222 1225 ancs = self._commonancestorsheads(a, b)
1223 1226 return pycompat.maplist(self.node, ancs)
1224 1227
1225 1228 def _commonancestorsheads(self, *revs):
1226 1229 """calculate all the heads of the common ancestors of revs"""
1227 1230 try:
1228 1231 ancs = self.index.commonancestorsheads(*revs)
1229 1232 except (AttributeError, OverflowError): # C implementation failed
1230 1233 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1231 1234 return ancs
1232 1235
1233 1236 def isancestor(self, a, b):
1234 1237 """return True if node a is an ancestor of node b
1235 1238
1236 1239 A revision is considered an ancestor of itself."""
1237 1240 a, b = self.rev(a), self.rev(b)
1238 1241 return self.isancestorrev(a, b)
1239 1242
1240 1243 def isancestorrev(self, a, b):
1241 1244 """return True if revision a is an ancestor of revision b
1242 1245
1243 1246 A revision is considered an ancestor of itself.
1244 1247
1245 1248 The implementation of this is trivial but the use of
1246 1249 reachableroots is not."""
1247 1250 if a == nullrev:
1248 1251 return True
1249 1252 elif a == b:
1250 1253 return True
1251 1254 elif a > b:
1252 1255 return False
1253 1256 return bool(self.reachableroots(a, [b], [a], includepath=False))
1254 1257
1255 1258 def reachableroots(self, minroot, heads, roots, includepath=False):
1256 1259 """return (heads(::<roots> and <roots>::<heads>))
1257 1260
1258 1261 If includepath is True, return (<roots>::<heads>)."""
1259 1262 try:
1260 1263 return self.index.reachableroots2(
1261 1264 minroot, heads, roots, includepath
1262 1265 )
1263 1266 except AttributeError:
1264 1267 return dagop._reachablerootspure(
1265 1268 self.parentrevs, minroot, roots, heads, includepath
1266 1269 )
1267 1270
1268 1271 def ancestor(self, a, b):
1269 1272 """calculate the "best" common ancestor of nodes a and b"""
1270 1273
1271 1274 a, b = self.rev(a), self.rev(b)
1272 1275 try:
1273 1276 ancs = self.index.ancestors(a, b)
1274 1277 except (AttributeError, OverflowError):
1275 1278 ancs = ancestor.ancestors(self.parentrevs, a, b)
1276 1279 if ancs:
1277 1280 # choose a consistent winner when there's a tie
1278 1281 return min(map(self.node, ancs))
1279 1282 return nullid
1280 1283
1281 1284 def _match(self, id):
1282 1285 if isinstance(id, int):
1283 1286 # rev
1284 1287 return self.node(id)
1285 1288 if len(id) == 20:
1286 1289 # possibly a binary node
1287 1290 # odds of a binary node being all hex in ASCII are 1 in 10**25
1288 1291 try:
1289 1292 node = id
1290 1293 self.rev(node) # quick search the index
1291 1294 return node
1292 1295 except error.LookupError:
1293 1296 pass # may be partial hex id
1294 1297 try:
1295 1298 # str(rev)
1296 1299 rev = int(id)
1297 1300 if b"%d" % rev != id:
1298 1301 raise ValueError
1299 1302 if rev < 0:
1300 1303 rev = len(self) + rev
1301 1304 if rev < 0 or rev >= len(self):
1302 1305 raise ValueError
1303 1306 return self.node(rev)
1304 1307 except (ValueError, OverflowError):
1305 1308 pass
1306 1309 if len(id) == 40:
1307 1310 try:
1308 1311 # a full hex nodeid?
1309 1312 node = bin(id)
1310 1313 self.rev(node)
1311 1314 return node
1312 1315 except (TypeError, error.LookupError):
1313 1316 pass
1314 1317
1315 1318 def _partialmatch(self, id):
1316 1319 # we don't care wdirfilenodeids as they should be always full hash
1317 1320 maybewdir = wdirhex.startswith(id)
1318 1321 try:
1319 1322 partial = self.index.partialmatch(id)
1320 1323 if partial and self.hasnode(partial):
1321 1324 if maybewdir:
1322 1325 # single 'ff...' match in radix tree, ambiguous with wdir
1323 1326 raise error.RevlogError
1324 1327 return partial
1325 1328 if maybewdir:
1326 1329 # no 'ff...' match in radix tree, wdir identified
1327 1330 raise error.WdirUnsupported
1328 1331 return None
1329 1332 except error.RevlogError:
1330 1333 # parsers.c radix tree lookup gave multiple matches
1331 1334 # fast path: for unfiltered changelog, radix tree is accurate
1332 1335 if not getattr(self, 'filteredrevs', None):
1333 1336 raise error.AmbiguousPrefixLookupError(
1334 1337 id, self.indexfile, _(b'ambiguous identifier')
1335 1338 )
1336 1339 # fall through to slow path that filters hidden revisions
1337 1340 except (AttributeError, ValueError):
1338 1341 # we are pure python, or key was too short to search radix tree
1339 1342 pass
1340 1343
1341 1344 if id in self._pcache:
1342 1345 return self._pcache[id]
1343 1346
1344 1347 if len(id) <= 40:
1345 1348 try:
1346 1349 # hex(node)[:...]
1347 1350 l = len(id) // 2 # grab an even number of digits
1348 1351 prefix = bin(id[: l * 2])
1349 1352 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1350 1353 nl = [
1351 1354 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1352 1355 ]
1353 1356 if nullhex.startswith(id):
1354 1357 nl.append(nullid)
1355 1358 if len(nl) > 0:
1356 1359 if len(nl) == 1 and not maybewdir:
1357 1360 self._pcache[id] = nl[0]
1358 1361 return nl[0]
1359 1362 raise error.AmbiguousPrefixLookupError(
1360 1363 id, self.indexfile, _(b'ambiguous identifier')
1361 1364 )
1362 1365 if maybewdir:
1363 1366 raise error.WdirUnsupported
1364 1367 return None
1365 1368 except TypeError:
1366 1369 pass
1367 1370
1368 1371 def lookup(self, id):
1369 1372 """locate a node based on:
1370 1373 - revision number or str(revision number)
1371 1374 - nodeid or subset of hex nodeid
1372 1375 """
1373 1376 n = self._match(id)
1374 1377 if n is not None:
1375 1378 return n
1376 1379 n = self._partialmatch(id)
1377 1380 if n:
1378 1381 return n
1379 1382
1380 1383 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1381 1384
1382 1385 def shortest(self, node, minlength=1):
1383 1386 """Find the shortest unambiguous prefix that matches node."""
1384 1387
1385 1388 def isvalid(prefix):
1386 1389 try:
1387 1390 matchednode = self._partialmatch(prefix)
1388 1391 except error.AmbiguousPrefixLookupError:
1389 1392 return False
1390 1393 except error.WdirUnsupported:
1391 1394 # single 'ff...' match
1392 1395 return True
1393 1396 if matchednode is None:
1394 1397 raise error.LookupError(node, self.indexfile, _(b'no node'))
1395 1398 return True
1396 1399
1397 1400 def maybewdir(prefix):
1398 1401 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1399 1402
1400 1403 hexnode = hex(node)
1401 1404
1402 1405 def disambiguate(hexnode, minlength):
1403 1406 """Disambiguate against wdirid."""
1404 1407 for length in range(minlength, 41):
1405 1408 prefix = hexnode[:length]
1406 1409 if not maybewdir(prefix):
1407 1410 return prefix
1408 1411
1409 1412 if not getattr(self, 'filteredrevs', None):
1410 1413 try:
1411 1414 length = max(self.index.shortest(node), minlength)
1412 1415 return disambiguate(hexnode, length)
1413 1416 except error.RevlogError:
1414 1417 if node != wdirid:
1415 1418 raise error.LookupError(node, self.indexfile, _(b'no node'))
1416 1419 except AttributeError:
1417 1420 # Fall through to pure code
1418 1421 pass
1419 1422
1420 1423 if node == wdirid:
1421 1424 for length in range(minlength, 41):
1422 1425 prefix = hexnode[:length]
1423 1426 if isvalid(prefix):
1424 1427 return prefix
1425 1428
1426 1429 for length in range(minlength, 41):
1427 1430 prefix = hexnode[:length]
1428 1431 if isvalid(prefix):
1429 1432 return disambiguate(hexnode, length)
1430 1433
1431 1434 def cmp(self, node, text):
1432 1435 """compare text with a given file revision
1433 1436
1434 1437 returns True if text is different than what is stored.
1435 1438 """
1436 1439 p1, p2 = self.parents(node)
1437 1440 return storageutil.hashrevisionsha1(text, p1, p2) != node
1438 1441
1439 1442 def _cachesegment(self, offset, data):
1440 1443 """Add a segment to the revlog cache.
1441 1444
1442 1445 Accepts an absolute offset and the data that is at that location.
1443 1446 """
1444 1447 o, d = self._chunkcache
1445 1448 # try to add to existing cache
1446 1449 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1447 1450 self._chunkcache = o, d + data
1448 1451 else:
1449 1452 self._chunkcache = offset, data
1450 1453
1451 1454 def _readsegment(self, offset, length, df=None):
1452 1455 """Load a segment of raw data from the revlog.
1453 1456
1454 1457 Accepts an absolute offset, length to read, and an optional existing
1455 1458 file handle to read from.
1456 1459
1457 1460 If an existing file handle is passed, it will be seeked and the
1458 1461 original seek position will NOT be restored.
1459 1462
1460 1463 Returns a str or buffer of raw byte data.
1461 1464
1462 1465 Raises if the requested number of bytes could not be read.
1463 1466 """
1464 1467 # Cache data both forward and backward around the requested
1465 1468 # data, in a fixed size window. This helps speed up operations
1466 1469 # involving reading the revlog backwards.
1467 1470 cachesize = self._chunkcachesize
1468 1471 realoffset = offset & ~(cachesize - 1)
1469 1472 reallength = (
1470 1473 (offset + length + cachesize) & ~(cachesize - 1)
1471 1474 ) - realoffset
1472 1475 with self._datareadfp(df) as df:
1473 1476 df.seek(realoffset)
1474 1477 d = df.read(reallength)
1475 1478
1476 1479 self._cachesegment(realoffset, d)
1477 1480 if offset != realoffset or reallength != length:
1478 1481 startoffset = offset - realoffset
1479 1482 if len(d) - startoffset < length:
1480 1483 raise error.RevlogError(
1481 1484 _(
1482 1485 b'partial read of revlog %s; expected %d bytes from '
1483 1486 b'offset %d, got %d'
1484 1487 )
1485 1488 % (
1486 1489 self.indexfile if self._inline else self.datafile,
1487 1490 length,
1488 1491 realoffset,
1489 1492 len(d) - startoffset,
1490 1493 )
1491 1494 )
1492 1495
1493 1496 return util.buffer(d, startoffset, length)
1494 1497
1495 1498 if len(d) < length:
1496 1499 raise error.RevlogError(
1497 1500 _(
1498 1501 b'partial read of revlog %s; expected %d bytes from offset '
1499 1502 b'%d, got %d'
1500 1503 )
1501 1504 % (
1502 1505 self.indexfile if self._inline else self.datafile,
1503 1506 length,
1504 1507 offset,
1505 1508 len(d),
1506 1509 )
1507 1510 )
1508 1511
1509 1512 return d
1510 1513
1511 1514 def _getsegment(self, offset, length, df=None):
1512 1515 """Obtain a segment of raw data from the revlog.
1513 1516
1514 1517 Accepts an absolute offset, length of bytes to obtain, and an
1515 1518 optional file handle to the already-opened revlog. If the file
1516 1519 handle is used, it's original seek position will not be preserved.
1517 1520
1518 1521 Requests for data may be returned from a cache.
1519 1522
1520 1523 Returns a str or a buffer instance of raw byte data.
1521 1524 """
1522 1525 o, d = self._chunkcache
1523 1526 l = len(d)
1524 1527
1525 1528 # is it in the cache?
1526 1529 cachestart = offset - o
1527 1530 cacheend = cachestart + length
1528 1531 if cachestart >= 0 and cacheend <= l:
1529 1532 if cachestart == 0 and cacheend == l:
1530 1533 return d # avoid a copy
1531 1534 return util.buffer(d, cachestart, cacheend - cachestart)
1532 1535
1533 1536 return self._readsegment(offset, length, df=df)
1534 1537
1535 1538 def _getsegmentforrevs(self, startrev, endrev, df=None):
1536 1539 """Obtain a segment of raw data corresponding to a range of revisions.
1537 1540
1538 1541 Accepts the start and end revisions and an optional already-open
1539 1542 file handle to be used for reading. If the file handle is read, its
1540 1543 seek position will not be preserved.
1541 1544
1542 1545 Requests for data may be satisfied by a cache.
1543 1546
1544 1547 Returns a 2-tuple of (offset, data) for the requested range of
1545 1548 revisions. Offset is the integer offset from the beginning of the
1546 1549 revlog and data is a str or buffer of the raw byte data.
1547 1550
1548 1551 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1549 1552 to determine where each revision's data begins and ends.
1550 1553 """
1551 1554 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1552 1555 # (functions are expensive).
1553 1556 index = self.index
1554 1557 istart = index[startrev]
1555 1558 start = int(istart[0] >> 16)
1556 1559 if startrev == endrev:
1557 1560 end = start + istart[1]
1558 1561 else:
1559 1562 iend = index[endrev]
1560 1563 end = int(iend[0] >> 16) + iend[1]
1561 1564
1562 1565 if self._inline:
1563 1566 start += (startrev + 1) * self._io.size
1564 1567 end += (endrev + 1) * self._io.size
1565 1568 length = end - start
1566 1569
1567 1570 return start, self._getsegment(start, length, df=df)
1568 1571
1569 1572 def _chunk(self, rev, df=None):
1570 1573 """Obtain a single decompressed chunk for a revision.
1571 1574
1572 1575 Accepts an integer revision and an optional already-open file handle
1573 1576 to be used for reading. If used, the seek position of the file will not
1574 1577 be preserved.
1575 1578
1576 1579 Returns a str holding uncompressed data for the requested revision.
1577 1580 """
1578 1581 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1579 1582
1580 1583 def _chunks(self, revs, df=None, targetsize=None):
1581 1584 """Obtain decompressed chunks for the specified revisions.
1582 1585
1583 1586 Accepts an iterable of numeric revisions that are assumed to be in
1584 1587 ascending order. Also accepts an optional already-open file handle
1585 1588 to be used for reading. If used, the seek position of the file will
1586 1589 not be preserved.
1587 1590
1588 1591 This function is similar to calling ``self._chunk()`` multiple times,
1589 1592 but is faster.
1590 1593
1591 1594 Returns a list with decompressed data for each requested revision.
1592 1595 """
1593 1596 if not revs:
1594 1597 return []
1595 1598 start = self.start
1596 1599 length = self.length
1597 1600 inline = self._inline
1598 1601 iosize = self._io.size
1599 1602 buffer = util.buffer
1600 1603
1601 1604 l = []
1602 1605 ladd = l.append
1603 1606
1604 1607 if not self._withsparseread:
1605 1608 slicedchunks = (revs,)
1606 1609 else:
1607 1610 slicedchunks = deltautil.slicechunk(
1608 1611 self, revs, targetsize=targetsize
1609 1612 )
1610 1613
1611 1614 for revschunk in slicedchunks:
1612 1615 firstrev = revschunk[0]
1613 1616 # Skip trailing revisions with empty diff
1614 1617 for lastrev in revschunk[::-1]:
1615 1618 if length(lastrev) != 0:
1616 1619 break
1617 1620
1618 1621 try:
1619 1622 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1620 1623 except OverflowError:
1621 1624 # issue4215 - we can't cache a run of chunks greater than
1622 1625 # 2G on Windows
1623 1626 return [self._chunk(rev, df=df) for rev in revschunk]
1624 1627
1625 1628 decomp = self.decompress
1626 1629 for rev in revschunk:
1627 1630 chunkstart = start(rev)
1628 1631 if inline:
1629 1632 chunkstart += (rev + 1) * iosize
1630 1633 chunklength = length(rev)
1631 1634 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1632 1635
1633 1636 return l
1634 1637
1635 1638 def _chunkclear(self):
1636 1639 """Clear the raw chunk cache."""
1637 1640 self._chunkcache = (0, b'')
1638 1641
1639 1642 def deltaparent(self, rev):
1640 1643 """return deltaparent of the given revision"""
1641 1644 base = self.index[rev][3]
1642 1645 if base == rev:
1643 1646 return nullrev
1644 1647 elif self._generaldelta:
1645 1648 return base
1646 1649 else:
1647 1650 return rev - 1
1648 1651
1649 1652 def issnapshot(self, rev):
1650 1653 """tells whether rev is a snapshot
1651 1654 """
1652 1655 if not self._sparserevlog:
1653 1656 return self.deltaparent(rev) == nullrev
1654 1657 elif util.safehasattr(self.index, b'issnapshot'):
1655 1658 # directly assign the method to cache the testing and access
1656 1659 self.issnapshot = self.index.issnapshot
1657 1660 return self.issnapshot(rev)
1658 1661 if rev == nullrev:
1659 1662 return True
1660 1663 entry = self.index[rev]
1661 1664 base = entry[3]
1662 1665 if base == rev:
1663 1666 return True
1664 1667 if base == nullrev:
1665 1668 return True
1666 1669 p1 = entry[5]
1667 1670 p2 = entry[6]
1668 1671 if base == p1 or base == p2:
1669 1672 return False
1670 1673 return self.issnapshot(base)
1671 1674
1672 1675 def snapshotdepth(self, rev):
1673 1676 """number of snapshot in the chain before this one"""
1674 1677 if not self.issnapshot(rev):
1675 1678 raise error.ProgrammingError(b'revision %d not a snapshot')
1676 1679 return len(self._deltachain(rev)[0]) - 1
1677 1680
1678 1681 def revdiff(self, rev1, rev2):
1679 1682 """return or calculate a delta between two revisions
1680 1683
1681 1684 The delta calculated is in binary form and is intended to be written to
1682 1685 revlog data directly. So this function needs raw revision data.
1683 1686 """
1684 1687 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1685 1688 return bytes(self._chunk(rev2))
1686 1689
1687 1690 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1688 1691
1689 1692 def _processflags(self, text, flags, operation, raw=False):
1690 1693 """deprecated entry point to access flag processors"""
1691 1694 msg = b'_processflag(...) use the specialized variant'
1692 1695 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1693 1696 if raw:
1694 1697 return text, flagutil.processflagsraw(self, text, flags)
1695 1698 elif operation == b'read':
1696 1699 return flagutil.processflagsread(self, text, flags)
1697 1700 else: # write operation
1698 1701 return flagutil.processflagswrite(self, text, flags)
1699 1702
1700 1703 def revision(self, nodeorrev, _df=None, raw=False):
1701 1704 """return an uncompressed revision of a given node or revision
1702 1705 number.
1703 1706
1704 1707 _df - an existing file handle to read from. (internal-only)
1705 1708 raw - an optional argument specifying if the revision data is to be
1706 1709 treated as raw data when applying flag transforms. 'raw' should be set
1707 1710 to True when generating changegroups or in debug commands.
1708 1711 """
1709 1712 if raw:
1710 1713 msg = (
1711 1714 b'revlog.revision(..., raw=True) is deprecated, '
1712 1715 b'use revlog.rawdata(...)'
1713 1716 )
1714 1717 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1715 1718 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1716 1719
1717 1720 def sidedata(self, nodeorrev, _df=None):
1718 1721 """a map of extra data related to the changeset but not part of the hash
1719 1722
1720 1723 This function currently return a dictionary. However, more advanced
1721 1724 mapping object will likely be used in the future for a more
1722 1725 efficient/lazy code.
1723 1726 """
1724 1727 return self._revisiondata(nodeorrev, _df)[1]
1725 1728
1726 1729 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1727 1730 # deal with <nodeorrev> argument type
1728 1731 if isinstance(nodeorrev, int):
1729 1732 rev = nodeorrev
1730 1733 node = self.node(rev)
1731 1734 else:
1732 1735 node = nodeorrev
1733 1736 rev = None
1734 1737
1735 1738 # fast path the special `nullid` rev
1736 1739 if node == nullid:
1737 1740 return b"", {}
1738 1741
1739 1742 # The text as stored inside the revlog. Might be the revision or might
1740 1743 # need to be processed to retrieve the revision.
1741 1744 rawtext = None
1742 1745
1743 1746 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1744 1747
1745 1748 if raw and validated:
1746 1749 # if we don't want to process the raw text and that raw
1747 1750 # text is cached, we can exit early.
1748 1751 return rawtext, {}
1749 1752 if rev is None:
1750 1753 rev = self.rev(node)
1751 1754 # the revlog's flag for this revision
1752 1755 # (usually alter its state or content)
1753 1756 flags = self.flags(rev)
1754 1757
1755 1758 if validated and flags == REVIDX_DEFAULT_FLAGS:
1756 1759 # no extra flags set, no flag processor runs, text = rawtext
1757 1760 return rawtext, {}
1758 1761
1759 1762 sidedata = {}
1760 1763 if raw:
1761 1764 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1762 1765 text = rawtext
1763 1766 else:
1764 1767 try:
1765 1768 r = flagutil.processflagsread(self, rawtext, flags)
1766 1769 except error.SidedataHashError as exc:
1767 1770 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1768 1771 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1769 1772 raise error.RevlogError(msg)
1770 1773 text, validatehash, sidedata = r
1771 1774 if validatehash:
1772 1775 self.checkhash(text, node, rev=rev)
1773 1776 if not validated:
1774 1777 self._revisioncache = (node, rev, rawtext)
1775 1778
1776 1779 return text, sidedata
1777 1780
1778 1781 def _rawtext(self, node, rev, _df=None):
1779 1782 """return the possibly unvalidated rawtext for a revision
1780 1783
1781 1784 returns (rev, rawtext, validated)
1782 1785 """
1783 1786
1784 1787 # revision in the cache (could be useful to apply delta)
1785 1788 cachedrev = None
1786 1789 # An intermediate text to apply deltas to
1787 1790 basetext = None
1788 1791
1789 1792 # Check if we have the entry in cache
1790 1793 # The cache entry looks like (node, rev, rawtext)
1791 1794 if self._revisioncache:
1792 1795 if self._revisioncache[0] == node:
1793 1796 return (rev, self._revisioncache[2], True)
1794 1797 cachedrev = self._revisioncache[1]
1795 1798
1796 1799 if rev is None:
1797 1800 rev = self.rev(node)
1798 1801
1799 1802 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1800 1803 if stopped:
1801 1804 basetext = self._revisioncache[2]
1802 1805
1803 1806 # drop cache to save memory, the caller is expected to
1804 1807 # update self._revisioncache after validating the text
1805 1808 self._revisioncache = None
1806 1809
1807 1810 targetsize = None
1808 1811 rawsize = self.index[rev][2]
1809 1812 if 0 <= rawsize:
1810 1813 targetsize = 4 * rawsize
1811 1814
1812 1815 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1813 1816 if basetext is None:
1814 1817 basetext = bytes(bins[0])
1815 1818 bins = bins[1:]
1816 1819
1817 1820 rawtext = mdiff.patches(basetext, bins)
1818 1821 del basetext # let us have a chance to free memory early
1819 1822 return (rev, rawtext, False)
1820 1823
1821 1824 def rawdata(self, nodeorrev, _df=None):
1822 1825 """return an uncompressed raw data of a given node or revision number.
1823 1826
1824 1827 _df - an existing file handle to read from. (internal-only)
1825 1828 """
1826 1829 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1827 1830
1828 1831 def hash(self, text, p1, p2):
1829 1832 """Compute a node hash.
1830 1833
1831 1834 Available as a function so that subclasses can replace the hash
1832 1835 as needed.
1833 1836 """
1834 1837 return storageutil.hashrevisionsha1(text, p1, p2)
1835 1838
1836 1839 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1837 1840 """Check node hash integrity.
1838 1841
1839 1842 Available as a function so that subclasses can extend hash mismatch
1840 1843 behaviors as needed.
1841 1844 """
1842 1845 try:
1843 1846 if p1 is None and p2 is None:
1844 1847 p1, p2 = self.parents(node)
1845 1848 if node != self.hash(text, p1, p2):
1846 1849 # Clear the revision cache on hash failure. The revision cache
1847 1850 # only stores the raw revision and clearing the cache does have
1848 1851 # the side-effect that we won't have a cache hit when the raw
1849 1852 # revision data is accessed. But this case should be rare and
1850 1853 # it is extra work to teach the cache about the hash
1851 1854 # verification state.
1852 1855 if self._revisioncache and self._revisioncache[0] == node:
1853 1856 self._revisioncache = None
1854 1857
1855 1858 revornode = rev
1856 1859 if revornode is None:
1857 1860 revornode = templatefilters.short(hex(node))
1858 1861 raise error.RevlogError(
1859 1862 _(b"integrity check failed on %s:%s")
1860 1863 % (self.indexfile, pycompat.bytestr(revornode))
1861 1864 )
1862 1865 except error.RevlogError:
1863 1866 if self._censorable and storageutil.iscensoredtext(text):
1864 1867 raise error.CensoredNodeError(self.indexfile, node, text)
1865 1868 raise
1866 1869
1867 1870 def _enforceinlinesize(self, tr, fp=None):
1868 1871 """Check if the revlog is too big for inline and convert if so.
1869 1872
1870 1873 This should be called after revisions are added to the revlog. If the
1871 1874 revlog has grown too large to be an inline revlog, it will convert it
1872 1875 to use multiple index and data files.
1873 1876 """
1874 1877 tiprev = len(self) - 1
1875 1878 if (
1876 1879 not self._inline
1877 1880 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1878 1881 ):
1879 1882 return
1880 1883
1881 1884 trinfo = tr.find(self.indexfile)
1882 1885 if trinfo is None:
1883 1886 raise error.RevlogError(
1884 1887 _(b"%s not found in the transaction") % self.indexfile
1885 1888 )
1886 1889
1887 1890 trindex = trinfo[2]
1888 1891 if trindex is not None:
1889 1892 dataoff = self.start(trindex)
1890 1893 else:
1891 1894 # revlog was stripped at start of transaction, use all leftover data
1892 1895 trindex = len(self) - 1
1893 1896 dataoff = self.end(tiprev)
1894 1897
1895 1898 tr.add(self.datafile, dataoff)
1896 1899
1897 1900 if fp:
1898 1901 fp.flush()
1899 1902 fp.close()
1900 1903 # We can't use the cached file handle after close(). So prevent
1901 1904 # its usage.
1902 1905 self._writinghandles = None
1903 1906
1904 1907 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1905 1908 for r in self:
1906 1909 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1907 1910
1908 1911 with self._indexfp(b'w') as fp:
1909 1912 self.version &= ~FLAG_INLINE_DATA
1910 1913 self._inline = False
1911 1914 io = self._io
1912 1915 for i in self:
1913 1916 e = io.packentry(self.index[i], self.node, self.version, i)
1914 1917 fp.write(e)
1915 1918
1916 1919 # the temp file replace the real index when we exit the context
1917 1920 # manager
1918 1921
1919 1922 tr.replace(self.indexfile, trindex * self._io.size)
1920 1923 self._chunkclear()
1921 1924
1922 1925 def _nodeduplicatecallback(self, transaction, node):
1923 1926 """called when trying to add a node already stored.
1924 1927 """
1925 1928
1926 1929 def addrevision(
1927 1930 self,
1928 1931 text,
1929 1932 transaction,
1930 1933 link,
1931 1934 p1,
1932 1935 p2,
1933 1936 cachedelta=None,
1934 1937 node=None,
1935 1938 flags=REVIDX_DEFAULT_FLAGS,
1936 1939 deltacomputer=None,
1937 1940 sidedata=None,
1938 1941 ):
1939 1942 """add a revision to the log
1940 1943
1941 1944 text - the revision data to add
1942 1945 transaction - the transaction object used for rollback
1943 1946 link - the linkrev data to add
1944 1947 p1, p2 - the parent nodeids of the revision
1945 1948 cachedelta - an optional precomputed delta
1946 1949 node - nodeid of revision; typically node is not specified, and it is
1947 1950 computed by default as hash(text, p1, p2), however subclasses might
1948 1951 use different hashing method (and override checkhash() in such case)
1949 1952 flags - the known flags to set on the revision
1950 1953 deltacomputer - an optional deltacomputer instance shared between
1951 1954 multiple calls
1952 1955 """
1953 1956 if link == nullrev:
1954 1957 raise error.RevlogError(
1955 1958 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1956 1959 )
1957 1960
1958 1961 if sidedata is None:
1959 1962 sidedata = {}
1960 1963 flags = flags & ~REVIDX_SIDEDATA
1961 1964 elif not self.hassidedata:
1962 1965 raise error.ProgrammingError(
1963 1966 _(b"trying to add sidedata to a revlog who don't support them")
1964 1967 )
1965 1968 else:
1966 1969 flags |= REVIDX_SIDEDATA
1967 1970
1968 1971 if flags:
1969 1972 node = node or self.hash(text, p1, p2)
1970 1973
1971 1974 rawtext, validatehash = flagutil.processflagswrite(
1972 1975 self, text, flags, sidedata=sidedata
1973 1976 )
1974 1977
1975 1978 # If the flag processor modifies the revision data, ignore any provided
1976 1979 # cachedelta.
1977 1980 if rawtext != text:
1978 1981 cachedelta = None
1979 1982
1980 1983 if len(rawtext) > _maxentrysize:
1981 1984 raise error.RevlogError(
1982 1985 _(
1983 1986 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1984 1987 )
1985 1988 % (self.indexfile, len(rawtext))
1986 1989 )
1987 1990
1988 1991 node = node or self.hash(rawtext, p1, p2)
1989 1992 if node in self.nodemap:
1990 1993 return node
1991 1994
1992 1995 if validatehash:
1993 1996 self.checkhash(rawtext, node, p1=p1, p2=p2)
1994 1997
1995 1998 return self.addrawrevision(
1996 1999 rawtext,
1997 2000 transaction,
1998 2001 link,
1999 2002 p1,
2000 2003 p2,
2001 2004 node,
2002 2005 flags,
2003 2006 cachedelta=cachedelta,
2004 2007 deltacomputer=deltacomputer,
2005 2008 )
2006 2009
2007 2010 def addrawrevision(
2008 2011 self,
2009 2012 rawtext,
2010 2013 transaction,
2011 2014 link,
2012 2015 p1,
2013 2016 p2,
2014 2017 node,
2015 2018 flags,
2016 2019 cachedelta=None,
2017 2020 deltacomputer=None,
2018 2021 ):
2019 2022 """add a raw revision with known flags, node and parents
2020 2023 useful when reusing a revision not stored in this revlog (ex: received
2021 2024 over wire, or read from an external bundle).
2022 2025 """
2023 2026 dfh = None
2024 2027 if not self._inline:
2025 2028 dfh = self._datafp(b"a+")
2026 2029 ifh = self._indexfp(b"a+")
2027 2030 try:
2028 2031 return self._addrevision(
2029 2032 node,
2030 2033 rawtext,
2031 2034 transaction,
2032 2035 link,
2033 2036 p1,
2034 2037 p2,
2035 2038 flags,
2036 2039 cachedelta,
2037 2040 ifh,
2038 2041 dfh,
2039 2042 deltacomputer=deltacomputer,
2040 2043 )
2041 2044 finally:
2042 2045 if dfh:
2043 2046 dfh.close()
2044 2047 ifh.close()
2045 2048
2046 2049 def compress(self, data):
2047 2050 """Generate a possibly-compressed representation of data."""
2048 2051 if not data:
2049 2052 return b'', data
2050 2053
2051 2054 compressed = self._compressor.compress(data)
2052 2055
2053 2056 if compressed:
2054 2057 # The revlog compressor added the header in the returned data.
2055 2058 return b'', compressed
2056 2059
2057 2060 if data[0:1] == b'\0':
2058 2061 return b'', data
2059 2062 return b'u', data
2060 2063
2061 2064 def decompress(self, data):
2062 2065 """Decompress a revlog chunk.
2063 2066
2064 2067 The chunk is expected to begin with a header identifying the
2065 2068 format type so it can be routed to an appropriate decompressor.
2066 2069 """
2067 2070 if not data:
2068 2071 return data
2069 2072
2070 2073 # Revlogs are read much more frequently than they are written and many
2071 2074 # chunks only take microseconds to decompress, so performance is
2072 2075 # important here.
2073 2076 #
2074 2077 # We can make a few assumptions about revlogs:
2075 2078 #
2076 2079 # 1) the majority of chunks will be compressed (as opposed to inline
2077 2080 # raw data).
2078 2081 # 2) decompressing *any* data will likely by at least 10x slower than
2079 2082 # returning raw inline data.
2080 2083 # 3) we want to prioritize common and officially supported compression
2081 2084 # engines
2082 2085 #
2083 2086 # It follows that we want to optimize for "decompress compressed data
2084 2087 # when encoded with common and officially supported compression engines"
2085 2088 # case over "raw data" and "data encoded by less common or non-official
2086 2089 # compression engines." That is why we have the inline lookup first
2087 2090 # followed by the compengines lookup.
2088 2091 #
2089 2092 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2090 2093 # compressed chunks. And this matters for changelog and manifest reads.
2091 2094 t = data[0:1]
2092 2095
2093 2096 if t == b'x':
2094 2097 try:
2095 2098 return _zlibdecompress(data)
2096 2099 except zlib.error as e:
2097 2100 raise error.RevlogError(
2098 2101 _(b'revlog decompress error: %s')
2099 2102 % stringutil.forcebytestr(e)
2100 2103 )
2101 2104 # '\0' is more common than 'u' so it goes first.
2102 2105 elif t == b'\0':
2103 2106 return data
2104 2107 elif t == b'u':
2105 2108 return util.buffer(data, 1)
2106 2109
2107 2110 try:
2108 2111 compressor = self._decompressors[t]
2109 2112 except KeyError:
2110 2113 try:
2111 2114 engine = util.compengines.forrevlogheader(t)
2112 2115 compressor = engine.revlogcompressor(self._compengineopts)
2113 2116 self._decompressors[t] = compressor
2114 2117 except KeyError:
2115 2118 raise error.RevlogError(_(b'unknown compression type %r') % t)
2116 2119
2117 2120 return compressor.decompress(data)
2118 2121
2119 2122 def _addrevision(
2120 2123 self,
2121 2124 node,
2122 2125 rawtext,
2123 2126 transaction,
2124 2127 link,
2125 2128 p1,
2126 2129 p2,
2127 2130 flags,
2128 2131 cachedelta,
2129 2132 ifh,
2130 2133 dfh,
2131 2134 alwayscache=False,
2132 2135 deltacomputer=None,
2133 2136 ):
2134 2137 """internal function to add revisions to the log
2135 2138
2136 2139 see addrevision for argument descriptions.
2137 2140
2138 2141 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2139 2142
2140 2143 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2141 2144 be used.
2142 2145
2143 2146 invariants:
2144 2147 - rawtext is optional (can be None); if not set, cachedelta must be set.
2145 2148 if both are set, they must correspond to each other.
2146 2149 """
2147 2150 if node == nullid:
2148 2151 raise error.RevlogError(
2149 2152 _(b"%s: attempt to add null revision") % self.indexfile
2150 2153 )
2151 2154 if node == wdirid or node in wdirfilenodeids:
2152 2155 raise error.RevlogError(
2153 2156 _(b"%s: attempt to add wdir revision") % self.indexfile
2154 2157 )
2155 2158
2156 2159 if self._inline:
2157 2160 fh = ifh
2158 2161 else:
2159 2162 fh = dfh
2160 2163
2161 2164 btext = [rawtext]
2162 2165
2163 2166 curr = len(self)
2164 2167 prev = curr - 1
2165 2168 offset = self.end(prev)
2166 2169 p1r, p2r = self.rev(p1), self.rev(p2)
2167 2170
2168 2171 # full versions are inserted when the needed deltas
2169 2172 # become comparable to the uncompressed text
2170 2173 if rawtext is None:
2171 2174 # need rawtext size, before changed by flag processors, which is
2172 2175 # the non-raw size. use revlog explicitly to avoid filelog's extra
2173 2176 # logic that might remove metadata size.
2174 2177 textlen = mdiff.patchedsize(
2175 2178 revlog.size(self, cachedelta[0]), cachedelta[1]
2176 2179 )
2177 2180 else:
2178 2181 textlen = len(rawtext)
2179 2182
2180 2183 if deltacomputer is None:
2181 2184 deltacomputer = deltautil.deltacomputer(self)
2182 2185
2183 2186 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2184 2187
2185 2188 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2186 2189
2187 2190 e = (
2188 2191 offset_type(offset, flags),
2189 2192 deltainfo.deltalen,
2190 2193 textlen,
2191 2194 deltainfo.base,
2192 2195 link,
2193 2196 p1r,
2194 2197 p2r,
2195 2198 node,
2196 2199 )
2197 2200 self.index.append(e)
2198 2201 self.nodemap[node] = curr
2199 2202
2200 2203 # Reset the pure node cache start lookup offset to account for new
2201 2204 # revision.
2202 2205 if self._nodepos is not None:
2203 2206 self._nodepos = curr
2204 2207
2205 2208 entry = self._io.packentry(e, self.node, self.version, curr)
2206 2209 self._writeentry(
2207 2210 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2208 2211 )
2209 2212
2210 2213 rawtext = btext[0]
2211 2214
2212 2215 if alwayscache and rawtext is None:
2213 2216 rawtext = deltacomputer.buildtext(revinfo, fh)
2214 2217
2215 2218 if type(rawtext) == bytes: # only accept immutable objects
2216 2219 self._revisioncache = (node, curr, rawtext)
2217 2220 self._chainbasecache[curr] = deltainfo.chainbase
2218 2221 return node
2219 2222
2220 2223 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2221 2224 # Files opened in a+ mode have inconsistent behavior on various
2222 2225 # platforms. Windows requires that a file positioning call be made
2223 2226 # when the file handle transitions between reads and writes. See
2224 2227 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2225 2228 # platforms, Python or the platform itself can be buggy. Some versions
2226 2229 # of Solaris have been observed to not append at the end of the file
2227 2230 # if the file was seeked to before the end. See issue4943 for more.
2228 2231 #
2229 2232 # We work around this issue by inserting a seek() before writing.
2230 2233 # Note: This is likely not necessary on Python 3. However, because
2231 2234 # the file handle is reused for reads and may be seeked there, we need
2232 2235 # to be careful before changing this.
2233 2236 ifh.seek(0, os.SEEK_END)
2234 2237 if dfh:
2235 2238 dfh.seek(0, os.SEEK_END)
2236 2239
2237 2240 curr = len(self) - 1
2238 2241 if not self._inline:
2239 2242 transaction.add(self.datafile, offset)
2240 2243 transaction.add(self.indexfile, curr * len(entry))
2241 2244 if data[0]:
2242 2245 dfh.write(data[0])
2243 2246 dfh.write(data[1])
2244 2247 ifh.write(entry)
2245 2248 else:
2246 2249 offset += curr * self._io.size
2247 2250 transaction.add(self.indexfile, offset, curr)
2248 2251 ifh.write(entry)
2249 2252 ifh.write(data[0])
2250 2253 ifh.write(data[1])
2251 2254 self._enforceinlinesize(transaction, ifh)
2252 2255
2253 2256 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2254 2257 """
2255 2258 add a delta group
2256 2259
2257 2260 given a set of deltas, add them to the revision log. the
2258 2261 first delta is against its parent, which should be in our
2259 2262 log, the rest are against the previous delta.
2260 2263
2261 2264 If ``addrevisioncb`` is defined, it will be called with arguments of
2262 2265 this revlog and the node that was added.
2263 2266 """
2264 2267
2265 2268 if self._writinghandles:
2266 2269 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2267 2270
2268 2271 nodes = []
2269 2272
2270 2273 r = len(self)
2271 2274 end = 0
2272 2275 if r:
2273 2276 end = self.end(r - 1)
2274 2277 ifh = self._indexfp(b"a+")
2275 2278 isize = r * self._io.size
2276 2279 if self._inline:
2277 2280 transaction.add(self.indexfile, end + isize, r)
2278 2281 dfh = None
2279 2282 else:
2280 2283 transaction.add(self.indexfile, isize, r)
2281 2284 transaction.add(self.datafile, end)
2282 2285 dfh = self._datafp(b"a+")
2283 2286
2284 2287 def flush():
2285 2288 if dfh:
2286 2289 dfh.flush()
2287 2290 ifh.flush()
2288 2291
2289 2292 self._writinghandles = (ifh, dfh)
2290 2293
2291 2294 try:
2292 2295 deltacomputer = deltautil.deltacomputer(self)
2293 2296 # loop through our set of deltas
2294 2297 for data in deltas:
2295 2298 node, p1, p2, linknode, deltabase, delta, flags = data
2296 2299 link = linkmapper(linknode)
2297 2300 flags = flags or REVIDX_DEFAULT_FLAGS
2298 2301
2299 2302 nodes.append(node)
2300 2303
2301 2304 if node in self.nodemap:
2302 2305 self._nodeduplicatecallback(transaction, node)
2303 2306 # this can happen if two branches make the same change
2304 2307 continue
2305 2308
2306 2309 for p in (p1, p2):
2307 2310 if p not in self.nodemap:
2308 2311 raise error.LookupError(
2309 2312 p, self.indexfile, _(b'unknown parent')
2310 2313 )
2311 2314
2312 2315 if deltabase not in self.nodemap:
2313 2316 raise error.LookupError(
2314 2317 deltabase, self.indexfile, _(b'unknown delta base')
2315 2318 )
2316 2319
2317 2320 baserev = self.rev(deltabase)
2318 2321
2319 2322 if baserev != nullrev and self.iscensored(baserev):
2320 2323 # if base is censored, delta must be full replacement in a
2321 2324 # single patch operation
2322 2325 hlen = struct.calcsize(b">lll")
2323 2326 oldlen = self.rawsize(baserev)
2324 2327 newlen = len(delta) - hlen
2325 2328 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2326 2329 raise error.CensoredBaseError(
2327 2330 self.indexfile, self.node(baserev)
2328 2331 )
2329 2332
2330 2333 if not flags and self._peek_iscensored(baserev, delta, flush):
2331 2334 flags |= REVIDX_ISCENSORED
2332 2335
2333 2336 # We assume consumers of addrevisioncb will want to retrieve
2334 2337 # the added revision, which will require a call to
2335 2338 # revision(). revision() will fast path if there is a cache
2336 2339 # hit. So, we tell _addrevision() to always cache in this case.
2337 2340 # We're only using addgroup() in the context of changegroup
2338 2341 # generation so the revision data can always be handled as raw
2339 2342 # by the flagprocessor.
2340 2343 self._addrevision(
2341 2344 node,
2342 2345 None,
2343 2346 transaction,
2344 2347 link,
2345 2348 p1,
2346 2349 p2,
2347 2350 flags,
2348 2351 (baserev, delta),
2349 2352 ifh,
2350 2353 dfh,
2351 2354 alwayscache=bool(addrevisioncb),
2352 2355 deltacomputer=deltacomputer,
2353 2356 )
2354 2357
2355 2358 if addrevisioncb:
2356 2359 addrevisioncb(self, node)
2357 2360
2358 2361 if not dfh and not self._inline:
2359 2362 # addrevision switched from inline to conventional
2360 2363 # reopen the index
2361 2364 ifh.close()
2362 2365 dfh = self._datafp(b"a+")
2363 2366 ifh = self._indexfp(b"a+")
2364 2367 self._writinghandles = (ifh, dfh)
2365 2368 finally:
2366 2369 self._writinghandles = None
2367 2370
2368 2371 if dfh:
2369 2372 dfh.close()
2370 2373 ifh.close()
2371 2374
2372 2375 return nodes
2373 2376
2374 2377 def iscensored(self, rev):
2375 2378 """Check if a file revision is censored."""
2376 2379 if not self._censorable:
2377 2380 return False
2378 2381
2379 2382 return self.flags(rev) & REVIDX_ISCENSORED
2380 2383
2381 2384 def _peek_iscensored(self, baserev, delta, flush):
2382 2385 """Quickly check if a delta produces a censored revision."""
2383 2386 if not self._censorable:
2384 2387 return False
2385 2388
2386 2389 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2387 2390
2388 2391 def getstrippoint(self, minlink):
2389 2392 """find the minimum rev that must be stripped to strip the linkrev
2390 2393
2391 2394 Returns a tuple containing the minimum rev and a set of all revs that
2392 2395 have linkrevs that will be broken by this strip.
2393 2396 """
2394 2397 return storageutil.resolvestripinfo(
2395 2398 minlink,
2396 2399 len(self) - 1,
2397 2400 self.headrevs(),
2398 2401 self.linkrev,
2399 2402 self.parentrevs,
2400 2403 )
2401 2404
2402 2405 def strip(self, minlink, transaction):
2403 2406 """truncate the revlog on the first revision with a linkrev >= minlink
2404 2407
2405 2408 This function is called when we're stripping revision minlink and
2406 2409 its descendants from the repository.
2407 2410
2408 2411 We have to remove all revisions with linkrev >= minlink, because
2409 2412 the equivalent changelog revisions will be renumbered after the
2410 2413 strip.
2411 2414
2412 2415 So we truncate the revlog on the first of these revisions, and
2413 2416 trust that the caller has saved the revisions that shouldn't be
2414 2417 removed and that it'll re-add them after this truncation.
2415 2418 """
2416 2419 if len(self) == 0:
2417 2420 return
2418 2421
2419 2422 rev, _ = self.getstrippoint(minlink)
2420 2423 if rev == len(self):
2421 2424 return
2422 2425
2423 2426 # first truncate the files on disk
2424 2427 end = self.start(rev)
2425 2428 if not self._inline:
2426 2429 transaction.add(self.datafile, end)
2427 2430 end = rev * self._io.size
2428 2431 else:
2429 2432 end += rev * self._io.size
2430 2433
2431 2434 transaction.add(self.indexfile, end)
2432 2435
2433 2436 # then reset internal state in memory to forget those revisions
2434 2437 self._revisioncache = None
2435 2438 self._chaininfocache = {}
2436 2439 self._chunkclear()
2437 2440 for x in pycompat.xrange(rev, len(self)):
2438 2441 del self.nodemap[self.node(x)]
2439 2442
2440 2443 del self.index[rev:-1]
2441 2444 self._nodepos = None
2442 2445
2443 2446 def checksize(self):
2444 2447 """Check size of index and data files
2445 2448
2446 2449 return a (dd, di) tuple.
2447 2450 - dd: extra bytes for the "data" file
2448 2451 - di: extra bytes for the "index" file
2449 2452
2450 2453 A healthy revlog will return (0, 0).
2451 2454 """
2452 2455 expected = 0
2453 2456 if len(self):
2454 2457 expected = max(0, self.end(len(self) - 1))
2455 2458
2456 2459 try:
2457 2460 with self._datafp() as f:
2458 2461 f.seek(0, io.SEEK_END)
2459 2462 actual = f.tell()
2460 2463 dd = actual - expected
2461 2464 except IOError as inst:
2462 2465 if inst.errno != errno.ENOENT:
2463 2466 raise
2464 2467 dd = 0
2465 2468
2466 2469 try:
2467 2470 f = self.opener(self.indexfile)
2468 2471 f.seek(0, io.SEEK_END)
2469 2472 actual = f.tell()
2470 2473 f.close()
2471 2474 s = self._io.size
2472 2475 i = max(0, actual // s)
2473 2476 di = actual - (i * s)
2474 2477 if self._inline:
2475 2478 databytes = 0
2476 2479 for r in self:
2477 2480 databytes += max(0, self.length(r))
2478 2481 dd = 0
2479 2482 di = actual - len(self) * s - databytes
2480 2483 except IOError as inst:
2481 2484 if inst.errno != errno.ENOENT:
2482 2485 raise
2483 2486 di = 0
2484 2487
2485 2488 return (dd, di)
2486 2489
2487 2490 def files(self):
2488 2491 res = [self.indexfile]
2489 2492 if not self._inline:
2490 2493 res.append(self.datafile)
2491 2494 return res
2492 2495
2493 2496 def emitrevisions(
2494 2497 self,
2495 2498 nodes,
2496 2499 nodesorder=None,
2497 2500 revisiondata=False,
2498 2501 assumehaveparentrevisions=False,
2499 2502 deltamode=repository.CG_DELTAMODE_STD,
2500 2503 ):
2501 2504 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2502 2505 raise error.ProgrammingError(
2503 2506 b'unhandled value for nodesorder: %s' % nodesorder
2504 2507 )
2505 2508
2506 2509 if nodesorder is None and not self._generaldelta:
2507 2510 nodesorder = b'storage'
2508 2511
2509 2512 if (
2510 2513 not self._storedeltachains
2511 2514 and deltamode != repository.CG_DELTAMODE_PREV
2512 2515 ):
2513 2516 deltamode = repository.CG_DELTAMODE_FULL
2514 2517
2515 2518 return storageutil.emitrevisions(
2516 2519 self,
2517 2520 nodes,
2518 2521 nodesorder,
2519 2522 revlogrevisiondelta,
2520 2523 deltaparentfn=self.deltaparent,
2521 2524 candeltafn=self.candelta,
2522 2525 rawsizefn=self.rawsize,
2523 2526 revdifffn=self.revdiff,
2524 2527 flagsfn=self.flags,
2525 2528 deltamode=deltamode,
2526 2529 revisiondata=revisiondata,
2527 2530 assumehaveparentrevisions=assumehaveparentrevisions,
2528 2531 )
2529 2532
2530 2533 DELTAREUSEALWAYS = b'always'
2531 2534 DELTAREUSESAMEREVS = b'samerevs'
2532 2535 DELTAREUSENEVER = b'never'
2533 2536
2534 2537 DELTAREUSEFULLADD = b'fulladd'
2535 2538
2536 2539 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2537 2540
2538 2541 def clone(
2539 2542 self,
2540 2543 tr,
2541 2544 destrevlog,
2542 2545 addrevisioncb=None,
2543 2546 deltareuse=DELTAREUSESAMEREVS,
2544 2547 forcedeltabothparents=None,
2545 2548 sidedatacompanion=None,
2546 2549 ):
2547 2550 """Copy this revlog to another, possibly with format changes.
2548 2551
2549 2552 The destination revlog will contain the same revisions and nodes.
2550 2553 However, it may not be bit-for-bit identical due to e.g. delta encoding
2551 2554 differences.
2552 2555
2553 2556 The ``deltareuse`` argument control how deltas from the existing revlog
2554 2557 are preserved in the destination revlog. The argument can have the
2555 2558 following values:
2556 2559
2557 2560 DELTAREUSEALWAYS
2558 2561 Deltas will always be reused (if possible), even if the destination
2559 2562 revlog would not select the same revisions for the delta. This is the
2560 2563 fastest mode of operation.
2561 2564 DELTAREUSESAMEREVS
2562 2565 Deltas will be reused if the destination revlog would pick the same
2563 2566 revisions for the delta. This mode strikes a balance between speed
2564 2567 and optimization.
2565 2568 DELTAREUSENEVER
2566 2569 Deltas will never be reused. This is the slowest mode of execution.
2567 2570 This mode can be used to recompute deltas (e.g. if the diff/delta
2568 2571 algorithm changes).
2569 2572 DELTAREUSEFULLADD
2570 2573 Revision will be re-added as if their were new content. This is
2571 2574 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2572 2575 eg: large file detection and handling.
2573 2576
2574 2577 Delta computation can be slow, so the choice of delta reuse policy can
2575 2578 significantly affect run time.
2576 2579
2577 2580 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2578 2581 two extremes. Deltas will be reused if they are appropriate. But if the
2579 2582 delta could choose a better revision, it will do so. This means if you
2580 2583 are converting a non-generaldelta revlog to a generaldelta revlog,
2581 2584 deltas will be recomputed if the delta's parent isn't a parent of the
2582 2585 revision.
2583 2586
2584 2587 In addition to the delta policy, the ``forcedeltabothparents``
2585 2588 argument controls whether to force compute deltas against both parents
2586 2589 for merges. By default, the current default is used.
2587 2590
2588 2591 If not None, the `sidedatacompanion` is callable that accept two
2589 2592 arguments:
2590 2593
2591 2594 (srcrevlog, rev)
2592 2595
2593 2596 and return a triplet that control changes to sidedata content from the
2594 2597 old revision to the new clone result:
2595 2598
2596 2599 (dropall, filterout, update)
2597 2600
2598 2601 * if `dropall` is True, all sidedata should be dropped
2599 2602 * `filterout` is a set of sidedata keys that should be dropped
2600 2603 * `update` is a mapping of additionnal/new key -> value
2601 2604 """
2602 2605 if deltareuse not in self.DELTAREUSEALL:
2603 2606 raise ValueError(
2604 2607 _(b'value for deltareuse invalid: %s') % deltareuse
2605 2608 )
2606 2609
2607 2610 if len(destrevlog):
2608 2611 raise ValueError(_(b'destination revlog is not empty'))
2609 2612
2610 2613 if getattr(self, 'filteredrevs', None):
2611 2614 raise ValueError(_(b'source revlog has filtered revisions'))
2612 2615 if getattr(destrevlog, 'filteredrevs', None):
2613 2616 raise ValueError(_(b'destination revlog has filtered revisions'))
2614 2617
2615 2618 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2616 2619 # if possible.
2617 2620 oldlazydelta = destrevlog._lazydelta
2618 2621 oldlazydeltabase = destrevlog._lazydeltabase
2619 2622 oldamd = destrevlog._deltabothparents
2620 2623
2621 2624 try:
2622 2625 if deltareuse == self.DELTAREUSEALWAYS:
2623 2626 destrevlog._lazydeltabase = True
2624 2627 destrevlog._lazydelta = True
2625 2628 elif deltareuse == self.DELTAREUSESAMEREVS:
2626 2629 destrevlog._lazydeltabase = False
2627 2630 destrevlog._lazydelta = True
2628 2631 elif deltareuse == self.DELTAREUSENEVER:
2629 2632 destrevlog._lazydeltabase = False
2630 2633 destrevlog._lazydelta = False
2631 2634
2632 2635 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2633 2636
2634 2637 self._clone(
2635 2638 tr,
2636 2639 destrevlog,
2637 2640 addrevisioncb,
2638 2641 deltareuse,
2639 2642 forcedeltabothparents,
2640 2643 sidedatacompanion,
2641 2644 )
2642 2645
2643 2646 finally:
2644 2647 destrevlog._lazydelta = oldlazydelta
2645 2648 destrevlog._lazydeltabase = oldlazydeltabase
2646 2649 destrevlog._deltabothparents = oldamd
2647 2650
2648 2651 def _clone(
2649 2652 self,
2650 2653 tr,
2651 2654 destrevlog,
2652 2655 addrevisioncb,
2653 2656 deltareuse,
2654 2657 forcedeltabothparents,
2655 2658 sidedatacompanion,
2656 2659 ):
2657 2660 """perform the core duty of `revlog.clone` after parameter processing"""
2658 2661 deltacomputer = deltautil.deltacomputer(destrevlog)
2659 2662 index = self.index
2660 2663 for rev in self:
2661 2664 entry = index[rev]
2662 2665
2663 2666 # Some classes override linkrev to take filtered revs into
2664 2667 # account. Use raw entry from index.
2665 2668 flags = entry[0] & 0xFFFF
2666 2669 linkrev = entry[4]
2667 2670 p1 = index[entry[5]][7]
2668 2671 p2 = index[entry[6]][7]
2669 2672 node = entry[7]
2670 2673
2671 2674 sidedataactions = (False, [], {})
2672 2675 if sidedatacompanion is not None:
2673 2676 sidedataactions = sidedatacompanion(self, rev)
2674 2677
2675 2678 # (Possibly) reuse the delta from the revlog if allowed and
2676 2679 # the revlog chunk is a delta.
2677 2680 cachedelta = None
2678 2681 rawtext = None
2679 2682 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2680 2683 dropall, filterout, update = sidedataactions
2681 2684 text, sidedata = self._revisiondata(rev)
2682 2685 if dropall:
2683 2686 sidedata = {}
2684 2687 for key in filterout:
2685 2688 sidedata.pop(key, None)
2686 2689 sidedata.update(update)
2687 2690 if not sidedata:
2688 2691 sidedata = None
2689 2692 destrevlog.addrevision(
2690 2693 text,
2691 2694 tr,
2692 2695 linkrev,
2693 2696 p1,
2694 2697 p2,
2695 2698 cachedelta=cachedelta,
2696 2699 node=node,
2697 2700 flags=flags,
2698 2701 deltacomputer=deltacomputer,
2699 2702 sidedata=sidedata,
2700 2703 )
2701 2704 else:
2702 2705 if destrevlog._lazydelta:
2703 2706 dp = self.deltaparent(rev)
2704 2707 if dp != nullrev:
2705 2708 cachedelta = (dp, bytes(self._chunk(rev)))
2706 2709
2707 2710 if not cachedelta:
2708 2711 rawtext = self.rawdata(rev)
2709 2712
2710 2713 ifh = destrevlog.opener(
2711 2714 destrevlog.indexfile, b'a+', checkambig=False
2712 2715 )
2713 2716 dfh = None
2714 2717 if not destrevlog._inline:
2715 2718 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2716 2719 try:
2717 2720 destrevlog._addrevision(
2718 2721 node,
2719 2722 rawtext,
2720 2723 tr,
2721 2724 linkrev,
2722 2725 p1,
2723 2726 p2,
2724 2727 flags,
2725 2728 cachedelta,
2726 2729 ifh,
2727 2730 dfh,
2728 2731 deltacomputer=deltacomputer,
2729 2732 )
2730 2733 finally:
2731 2734 if dfh:
2732 2735 dfh.close()
2733 2736 ifh.close()
2734 2737
2735 2738 if addrevisioncb:
2736 2739 addrevisioncb(self, rev, node)
2737 2740
2738 2741 def censorrevision(self, tr, censornode, tombstone=b''):
2739 2742 if (self.version & 0xFFFF) == REVLOGV0:
2740 2743 raise error.RevlogError(
2741 2744 _(b'cannot censor with version %d revlogs') % self.version
2742 2745 )
2743 2746
2744 2747 censorrev = self.rev(censornode)
2745 2748 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2746 2749
2747 2750 if len(tombstone) > self.rawsize(censorrev):
2748 2751 raise error.Abort(
2749 2752 _(b'censor tombstone must be no longer than censored data')
2750 2753 )
2751 2754
2752 2755 # Rewriting the revlog in place is hard. Our strategy for censoring is
2753 2756 # to create a new revlog, copy all revisions to it, then replace the
2754 2757 # revlogs on transaction close.
2755 2758
2756 2759 newindexfile = self.indexfile + b'.tmpcensored'
2757 2760 newdatafile = self.datafile + b'.tmpcensored'
2758 2761
2759 2762 # This is a bit dangerous. We could easily have a mismatch of state.
2760 2763 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2761 2764 newrl.version = self.version
2762 2765 newrl._generaldelta = self._generaldelta
2763 2766 newrl._io = self._io
2764 2767
2765 2768 for rev in self.revs():
2766 2769 node = self.node(rev)
2767 2770 p1, p2 = self.parents(node)
2768 2771
2769 2772 if rev == censorrev:
2770 2773 newrl.addrawrevision(
2771 2774 tombstone,
2772 2775 tr,
2773 2776 self.linkrev(censorrev),
2774 2777 p1,
2775 2778 p2,
2776 2779 censornode,
2777 2780 REVIDX_ISCENSORED,
2778 2781 )
2779 2782
2780 2783 if newrl.deltaparent(rev) != nullrev:
2781 2784 raise error.Abort(
2782 2785 _(
2783 2786 b'censored revision stored as delta; '
2784 2787 b'cannot censor'
2785 2788 ),
2786 2789 hint=_(
2787 2790 b'censoring of revlogs is not '
2788 2791 b'fully implemented; please report '
2789 2792 b'this bug'
2790 2793 ),
2791 2794 )
2792 2795 continue
2793 2796
2794 2797 if self.iscensored(rev):
2795 2798 if self.deltaparent(rev) != nullrev:
2796 2799 raise error.Abort(
2797 2800 _(
2798 2801 b'cannot censor due to censored '
2799 2802 b'revision having delta stored'
2800 2803 )
2801 2804 )
2802 2805 rawtext = self._chunk(rev)
2803 2806 else:
2804 2807 rawtext = self.rawdata(rev)
2805 2808
2806 2809 newrl.addrawrevision(
2807 2810 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2808 2811 )
2809 2812
2810 2813 tr.addbackup(self.indexfile, location=b'store')
2811 2814 if not self._inline:
2812 2815 tr.addbackup(self.datafile, location=b'store')
2813 2816
2814 2817 self.opener.rename(newrl.indexfile, self.indexfile)
2815 2818 if not self._inline:
2816 2819 self.opener.rename(newrl.datafile, self.datafile)
2817 2820
2818 2821 self.clearcaches()
2819 2822 self._loadindex()
2820 2823
2821 2824 def verifyintegrity(self, state):
2822 2825 """Verifies the integrity of the revlog.
2823 2826
2824 2827 Yields ``revlogproblem`` instances describing problems that are
2825 2828 found.
2826 2829 """
2827 2830 dd, di = self.checksize()
2828 2831 if dd:
2829 2832 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2830 2833 if di:
2831 2834 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2832 2835
2833 2836 version = self.version & 0xFFFF
2834 2837
2835 2838 # The verifier tells us what version revlog we should be.
2836 2839 if version != state[b'expectedversion']:
2837 2840 yield revlogproblem(
2838 2841 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2839 2842 % (self.indexfile, version, state[b'expectedversion'])
2840 2843 )
2841 2844
2842 2845 state[b'skipread'] = set()
2843 2846
2844 2847 for rev in self:
2845 2848 node = self.node(rev)
2846 2849
2847 2850 # Verify contents. 4 cases to care about:
2848 2851 #
2849 2852 # common: the most common case
2850 2853 # rename: with a rename
2851 2854 # meta: file content starts with b'\1\n', the metadata
2852 2855 # header defined in filelog.py, but without a rename
2853 2856 # ext: content stored externally
2854 2857 #
2855 2858 # More formally, their differences are shown below:
2856 2859 #
2857 2860 # | common | rename | meta | ext
2858 2861 # -------------------------------------------------------
2859 2862 # flags() | 0 | 0 | 0 | not 0
2860 2863 # renamed() | False | True | False | ?
2861 2864 # rawtext[0:2]=='\1\n'| False | True | True | ?
2862 2865 #
2863 2866 # "rawtext" means the raw text stored in revlog data, which
2864 2867 # could be retrieved by "rawdata(rev)". "text"
2865 2868 # mentioned below is "revision(rev)".
2866 2869 #
2867 2870 # There are 3 different lengths stored physically:
2868 2871 # 1. L1: rawsize, stored in revlog index
2869 2872 # 2. L2: len(rawtext), stored in revlog data
2870 2873 # 3. L3: len(text), stored in revlog data if flags==0, or
2871 2874 # possibly somewhere else if flags!=0
2872 2875 #
2873 2876 # L1 should be equal to L2. L3 could be different from them.
2874 2877 # "text" may or may not affect commit hash depending on flag
2875 2878 # processors (see flagutil.addflagprocessor).
2876 2879 #
2877 2880 # | common | rename | meta | ext
2878 2881 # -------------------------------------------------
2879 2882 # rawsize() | L1 | L1 | L1 | L1
2880 2883 # size() | L1 | L2-LM | L1(*) | L1 (?)
2881 2884 # len(rawtext) | L2 | L2 | L2 | L2
2882 2885 # len(text) | L2 | L2 | L2 | L3
2883 2886 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2884 2887 #
2885 2888 # LM: length of metadata, depending on rawtext
2886 2889 # (*): not ideal, see comment in filelog.size
2887 2890 # (?): could be "- len(meta)" if the resolved content has
2888 2891 # rename metadata
2889 2892 #
2890 2893 # Checks needed to be done:
2891 2894 # 1. length check: L1 == L2, in all cases.
2892 2895 # 2. hash check: depending on flag processor, we may need to
2893 2896 # use either "text" (external), or "rawtext" (in revlog).
2894 2897
2895 2898 try:
2896 2899 skipflags = state.get(b'skipflags', 0)
2897 2900 if skipflags:
2898 2901 skipflags &= self.flags(rev)
2899 2902
2900 2903 if skipflags:
2901 2904 state[b'skipread'].add(node)
2902 2905 else:
2903 2906 # Side-effect: read content and verify hash.
2904 2907 self.revision(node)
2905 2908
2906 2909 l1 = self.rawsize(rev)
2907 2910 l2 = len(self.rawdata(node))
2908 2911
2909 2912 if l1 != l2:
2910 2913 yield revlogproblem(
2911 2914 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2912 2915 node=node,
2913 2916 )
2914 2917
2915 2918 except error.CensoredNodeError:
2916 2919 if state[b'erroroncensored']:
2917 2920 yield revlogproblem(
2918 2921 error=_(b'censored file data'), node=node
2919 2922 )
2920 2923 state[b'skipread'].add(node)
2921 2924 except Exception as e:
2922 2925 yield revlogproblem(
2923 2926 error=_(b'unpacking %s: %s')
2924 2927 % (short(node), stringutil.forcebytestr(e)),
2925 2928 node=node,
2926 2929 )
2927 2930 state[b'skipread'].add(node)
2928 2931
2929 2932 def storageinfo(
2930 2933 self,
2931 2934 exclusivefiles=False,
2932 2935 sharedfiles=False,
2933 2936 revisionscount=False,
2934 2937 trackedsize=False,
2935 2938 storedsize=False,
2936 2939 ):
2937 2940 d = {}
2938 2941
2939 2942 if exclusivefiles:
2940 2943 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2941 2944 if not self._inline:
2942 2945 d[b'exclusivefiles'].append((self.opener, self.datafile))
2943 2946
2944 2947 if sharedfiles:
2945 2948 d[b'sharedfiles'] = []
2946 2949
2947 2950 if revisionscount:
2948 2951 d[b'revisionscount'] = len(self)
2949 2952
2950 2953 if trackedsize:
2951 2954 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2952 2955
2953 2956 if storedsize:
2954 2957 d[b'storedsize'] = sum(
2955 2958 self.opener.stat(path).st_size for path in self.files()
2956 2959 )
2957 2960
2958 2961 return d
General Comments 0
You need to be logged in to leave comments. Login now