##// END OF EJS Templates
revlog: speed up isancestor...
Valentin Gatien-Baron -
r42639:055c3e2c default
parent child Browse files
Show More
@@ -1,668 +1,665
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import (
17 17 attr,
18 18 )
19 19
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 revlog,
25 25 util,
26 26 )
27 27 from .utils import (
28 28 dateutil,
29 29 stringutil,
30 30 )
31 31
32 32 _defaultextra = {'branch': 'default'}
33 33
34 34 def _string_escape(text):
35 35 """
36 36 >>> from .pycompat import bytechr as chr
37 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 39 >>> s
40 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 41 >>> res = _string_escape(s)
42 42 >>> s == _string_unescape(res)
43 43 True
44 44 """
45 45 # subset of the string_escape codec
46 46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
47 47 return text.replace('\0', '\\0')
48 48
49 49 def _string_unescape(text):
50 50 if '\\0' in text:
51 51 # fix up \0 without getting into trouble with \\0
52 52 text = text.replace('\\\\', '\\\\\n')
53 53 text = text.replace('\\0', '\0')
54 54 text = text.replace('\n', '')
55 55 return stringutil.unescapestr(text)
56 56
57 57 def decodeextra(text):
58 58 """
59 59 >>> from .pycompat import bytechr as chr
60 60 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
61 61 ... ).items())
62 62 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
63 63 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
64 64 ... b'baz': chr(92) + chr(0) + b'2'})
65 65 ... ).items())
66 66 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
67 67 """
68 68 extra = _defaultextra.copy()
69 69 for l in text.split('\0'):
70 70 if l:
71 71 k, v = _string_unescape(l).split(':', 1)
72 72 extra[k] = v
73 73 return extra
74 74
75 75 def encodeextra(d):
76 76 # keys must be sorted to produce a deterministic changelog entry
77 77 items = [
78 78 _string_escape('%s:%s' % (k, pycompat.bytestr(d[k])))
79 79 for k in sorted(d)
80 80 ]
81 81 return "\0".join(items)
82 82
83 83 def encodecopies(files, copies):
84 84 items = []
85 85 for i, dst in enumerate(files):
86 86 if dst in copies:
87 87 items.append('%d\0%s' % (i, copies[dst]))
88 88 if len(items) != len(copies):
89 89 raise error.ProgrammingError('some copy targets missing from file list')
90 90 return "\n".join(items)
91 91
92 92 def decodecopies(files, data):
93 93 try:
94 94 copies = {}
95 95 for l in data.split('\n'):
96 96 strindex, src = l.split('\0')
97 97 i = int(strindex)
98 98 dst = files[i]
99 99 copies[dst] = src
100 100 return copies
101 101 except (ValueError, IndexError):
102 102 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
103 103 # used different syntax for the value.
104 104 return None
105 105
106 106 def encodefileindices(files, subset):
107 107 subset = set(subset)
108 108 indices = []
109 109 for i, f in enumerate(files):
110 110 if f in subset:
111 111 indices.append('%d' % i)
112 112 return '\n'.join(indices)
113 113
114 114 def decodefileindices(files, data):
115 115 try:
116 116 subset = []
117 117 for strindex in data.split('\n'):
118 118 i = int(strindex)
119 119 if i < 0 or i >= len(files):
120 120 return None
121 121 subset.append(files[i])
122 122 return subset
123 123 except (ValueError, IndexError):
124 124 # Perhaps someone had chosen the same key name (e.g. "added") and
125 125 # used different syntax for the value.
126 126 return None
127 127
128 128 def stripdesc(desc):
129 129 """strip trailing whitespace and leading and trailing empty lines"""
130 130 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
131 131
132 132 class appender(object):
133 133 '''the changelog index must be updated last on disk, so we use this class
134 134 to delay writes to it'''
135 135 def __init__(self, vfs, name, mode, buf):
136 136 self.data = buf
137 137 fp = vfs(name, mode)
138 138 self.fp = fp
139 139 self.offset = fp.tell()
140 140 self.size = vfs.fstat(fp).st_size
141 141 self._end = self.size
142 142
143 143 def end(self):
144 144 return self._end
145 145 def tell(self):
146 146 return self.offset
147 147 def flush(self):
148 148 pass
149 149
150 150 @property
151 151 def closed(self):
152 152 return self.fp.closed
153 153
154 154 def close(self):
155 155 self.fp.close()
156 156
157 157 def seek(self, offset, whence=0):
158 158 '''virtual file offset spans real file and data'''
159 159 if whence == 0:
160 160 self.offset = offset
161 161 elif whence == 1:
162 162 self.offset += offset
163 163 elif whence == 2:
164 164 self.offset = self.end() + offset
165 165 if self.offset < self.size:
166 166 self.fp.seek(self.offset)
167 167
168 168 def read(self, count=-1):
169 169 '''only trick here is reads that span real file and data'''
170 170 ret = ""
171 171 if self.offset < self.size:
172 172 s = self.fp.read(count)
173 173 ret = s
174 174 self.offset += len(s)
175 175 if count > 0:
176 176 count -= len(s)
177 177 if count != 0:
178 178 doff = self.offset - self.size
179 179 self.data.insert(0, "".join(self.data))
180 180 del self.data[1:]
181 181 s = self.data[0][doff:doff + count]
182 182 self.offset += len(s)
183 183 ret += s
184 184 return ret
185 185
186 186 def write(self, s):
187 187 self.data.append(bytes(s))
188 188 self.offset += len(s)
189 189 self._end += len(s)
190 190
191 191 def __enter__(self):
192 192 self.fp.__enter__()
193 193 return self
194 194
195 195 def __exit__(self, *args):
196 196 return self.fp.__exit__(*args)
197 197
198 198 def _divertopener(opener, target):
199 199 """build an opener that writes in 'target.a' instead of 'target'"""
200 200 def _divert(name, mode='r', checkambig=False):
201 201 if name != target:
202 202 return opener(name, mode)
203 203 return opener(name + ".a", mode)
204 204 return _divert
205 205
206 206 def _delayopener(opener, target, buf):
207 207 """build an opener that stores chunks in 'buf' instead of 'target'"""
208 208 def _delay(name, mode='r', checkambig=False):
209 209 if name != target:
210 210 return opener(name, mode)
211 211 return appender(opener, name, mode, buf)
212 212 return _delay
213 213
214 214 @attr.s
215 215 class _changelogrevision(object):
216 216 # Extensions might modify _defaultextra, so let the constructor below pass
217 217 # it in
218 218 extra = attr.ib()
219 219 manifest = attr.ib(default=nullid)
220 220 user = attr.ib(default='')
221 221 date = attr.ib(default=(0, 0))
222 222 files = attr.ib(default=attr.Factory(list))
223 223 filesadded = attr.ib(default=None)
224 224 filesremoved = attr.ib(default=None)
225 225 p1copies = attr.ib(default=None)
226 226 p2copies = attr.ib(default=None)
227 227 description = attr.ib(default='')
228 228
229 229 class changelogrevision(object):
230 230 """Holds results of a parsed changelog revision.
231 231
232 232 Changelog revisions consist of multiple pieces of data, including
233 233 the manifest node, user, and date. This object exposes a view into
234 234 the parsed object.
235 235 """
236 236
237 237 __slots__ = (
238 238 r'_offsets',
239 239 r'_text',
240 240 )
241 241
242 242 def __new__(cls, text):
243 243 if not text:
244 244 return _changelogrevision(extra=_defaultextra)
245 245
246 246 self = super(changelogrevision, cls).__new__(cls)
247 247 # We could return here and implement the following as an __init__.
248 248 # But doing it here is equivalent and saves an extra function call.
249 249
250 250 # format used:
251 251 # nodeid\n : manifest node in ascii
252 252 # user\n : user, no \n or \r allowed
253 253 # time tz extra\n : date (time is int or float, timezone is int)
254 254 # : extra is metadata, encoded and separated by '\0'
255 255 # : older versions ignore it
256 256 # files\n\n : files modified by the cset, no \n or \r allowed
257 257 # (.*) : comment (free text, ideally utf-8)
258 258 #
259 259 # changelog v0 doesn't use extra
260 260
261 261 nl1 = text.index('\n')
262 262 nl2 = text.index('\n', nl1 + 1)
263 263 nl3 = text.index('\n', nl2 + 1)
264 264
265 265 # The list of files may be empty. Which means nl3 is the first of the
266 266 # double newline that precedes the description.
267 267 if text[nl3 + 1:nl3 + 2] == '\n':
268 268 doublenl = nl3
269 269 else:
270 270 doublenl = text.index('\n\n', nl3 + 1)
271 271
272 272 self._offsets = (nl1, nl2, nl3, doublenl)
273 273 self._text = text
274 274
275 275 return self
276 276
277 277 @property
278 278 def manifest(self):
279 279 return bin(self._text[0:self._offsets[0]])
280 280
281 281 @property
282 282 def user(self):
283 283 off = self._offsets
284 284 return encoding.tolocal(self._text[off[0] + 1:off[1]])
285 285
286 286 @property
287 287 def _rawdate(self):
288 288 off = self._offsets
289 289 dateextra = self._text[off[1] + 1:off[2]]
290 290 return dateextra.split(' ', 2)[0:2]
291 291
292 292 @property
293 293 def _rawextra(self):
294 294 off = self._offsets
295 295 dateextra = self._text[off[1] + 1:off[2]]
296 296 fields = dateextra.split(' ', 2)
297 297 if len(fields) != 3:
298 298 return None
299 299
300 300 return fields[2]
301 301
302 302 @property
303 303 def date(self):
304 304 raw = self._rawdate
305 305 time = float(raw[0])
306 306 # Various tools did silly things with the timezone.
307 307 try:
308 308 timezone = int(raw[1])
309 309 except ValueError:
310 310 timezone = 0
311 311
312 312 return time, timezone
313 313
314 314 @property
315 315 def extra(self):
316 316 raw = self._rawextra
317 317 if raw is None:
318 318 return _defaultextra
319 319
320 320 return decodeextra(raw)
321 321
322 322 @property
323 323 def files(self):
324 324 off = self._offsets
325 325 if off[2] == off[3]:
326 326 return []
327 327
328 328 return self._text[off[2] + 1:off[3]].split('\n')
329 329
330 330 @property
331 331 def filesadded(self):
332 332 rawindices = self.extra.get('filesadded')
333 333 return rawindices and decodefileindices(self.files, rawindices)
334 334
335 335 @property
336 336 def filesremoved(self):
337 337 rawindices = self.extra.get('filesremoved')
338 338 return rawindices and decodefileindices(self.files, rawindices)
339 339
340 340 @property
341 341 def p1copies(self):
342 342 rawcopies = self.extra.get('p1copies')
343 343 return rawcopies and decodecopies(self.files, rawcopies)
344 344
345 345 @property
346 346 def p2copies(self):
347 347 rawcopies = self.extra.get('p2copies')
348 348 return rawcopies and decodecopies(self.files, rawcopies)
349 349
350 350 @property
351 351 def description(self):
352 352 return encoding.tolocal(self._text[self._offsets[3] + 2:])
353 353
354 354 class changelog(revlog.revlog):
355 355 def __init__(self, opener, trypending=False):
356 356 """Load a changelog revlog using an opener.
357 357
358 358 If ``trypending`` is true, we attempt to load the index from a
359 359 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
360 360 The ``00changelog.i.a`` file contains index (and possibly inline
361 361 revision) data for a transaction that hasn't been finalized yet.
362 362 It exists in a separate file to facilitate readers (such as
363 363 hooks processes) accessing data before a transaction is finalized.
364 364 """
365 365 if trypending and opener.exists('00changelog.i.a'):
366 366 indexfile = '00changelog.i.a'
367 367 else:
368 368 indexfile = '00changelog.i'
369 369
370 370 datafile = '00changelog.d'
371 371 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
372 372 checkambig=True, mmaplargeindex=True)
373 373
374 374 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
375 375 # changelogs don't benefit from generaldelta.
376 376
377 377 self.version &= ~revlog.FLAG_GENERALDELTA
378 378 self._generaldelta = False
379 379
380 380 # Delta chains for changelogs tend to be very small because entries
381 381 # tend to be small and don't delta well with each. So disable delta
382 382 # chains.
383 383 self._storedeltachains = False
384 384
385 385 self._realopener = opener
386 386 self._delayed = False
387 387 self._delaybuf = None
388 388 self._divert = False
389 389 self.filteredrevs = frozenset()
390 390
391 391 def tiprev(self):
392 392 for i in pycompat.xrange(len(self) -1, -2, -1):
393 393 if i not in self.filteredrevs:
394 394 return i
395 395
396 396 def tip(self):
397 397 """filtered version of revlog.tip"""
398 398 return self.node(self.tiprev())
399 399
400 400 def __contains__(self, rev):
401 401 """filtered version of revlog.__contains__"""
402 402 return (0 <= rev < len(self)
403 403 and rev not in self.filteredrevs)
404 404
405 405 def __iter__(self):
406 406 """filtered version of revlog.__iter__"""
407 407 if len(self.filteredrevs) == 0:
408 408 return revlog.revlog.__iter__(self)
409 409
410 410 def filterediter():
411 411 for i in pycompat.xrange(len(self)):
412 412 if i not in self.filteredrevs:
413 413 yield i
414 414
415 415 return filterediter()
416 416
417 417 def revs(self, start=0, stop=None):
418 418 """filtered version of revlog.revs"""
419 419 for i in super(changelog, self).revs(start, stop):
420 420 if i not in self.filteredrevs:
421 421 yield i
422 422
423 def reachableroots(self, minroot, heads, roots, includepath=False):
424 return self.index.reachableroots2(minroot, heads, roots, includepath)
425
426 423 def _checknofilteredinrevs(self, revs):
427 424 """raise the appropriate error if 'revs' contains a filtered revision
428 425
429 426 This returns a version of 'revs' to be used thereafter by the caller.
430 427 In particular, if revs is an iterator, it is converted into a set.
431 428 """
432 429 safehasattr = util.safehasattr
433 430 if safehasattr(revs, '__next__'):
434 431 # Note that inspect.isgenerator() is not true for iterators,
435 432 revs = set(revs)
436 433
437 434 filteredrevs = self.filteredrevs
438 435 if safehasattr(revs, 'first'): # smartset
439 436 offenders = revs & filteredrevs
440 437 else:
441 438 offenders = filteredrevs.intersection(revs)
442 439
443 440 for rev in offenders:
444 441 raise error.FilteredIndexError(rev)
445 442 return revs
446 443
447 444 def headrevs(self, revs=None):
448 445 if revs is None and self.filteredrevs:
449 446 try:
450 447 return self.index.headrevsfiltered(self.filteredrevs)
451 448 # AttributeError covers non-c-extension environments and
452 449 # old c extensions without filter handling.
453 450 except AttributeError:
454 451 return self._headrevs()
455 452
456 453 if self.filteredrevs:
457 454 revs = self._checknofilteredinrevs(revs)
458 455 return super(changelog, self).headrevs(revs)
459 456
460 457 def strip(self, *args, **kwargs):
461 458 # XXX make something better than assert
462 459 # We can't expect proper strip behavior if we are filtered.
463 460 assert not self.filteredrevs
464 461 super(changelog, self).strip(*args, **kwargs)
465 462
466 463 def rev(self, node):
467 464 """filtered version of revlog.rev"""
468 465 r = super(changelog, self).rev(node)
469 466 if r in self.filteredrevs:
470 467 raise error.FilteredLookupError(hex(node), self.indexfile,
471 468 _('filtered node'))
472 469 return r
473 470
474 471 def node(self, rev):
475 472 """filtered version of revlog.node"""
476 473 if rev in self.filteredrevs:
477 474 raise error.FilteredIndexError(rev)
478 475 return super(changelog, self).node(rev)
479 476
480 477 def linkrev(self, rev):
481 478 """filtered version of revlog.linkrev"""
482 479 if rev in self.filteredrevs:
483 480 raise error.FilteredIndexError(rev)
484 481 return super(changelog, self).linkrev(rev)
485 482
486 483 def parentrevs(self, rev):
487 484 """filtered version of revlog.parentrevs"""
488 485 if rev in self.filteredrevs:
489 486 raise error.FilteredIndexError(rev)
490 487 return super(changelog, self).parentrevs(rev)
491 488
492 489 def flags(self, rev):
493 490 """filtered version of revlog.flags"""
494 491 if rev in self.filteredrevs:
495 492 raise error.FilteredIndexError(rev)
496 493 return super(changelog, self).flags(rev)
497 494
498 495 def delayupdate(self, tr):
499 496 "delay visibility of index updates to other readers"
500 497
501 498 if not self._delayed:
502 499 if len(self) == 0:
503 500 self._divert = True
504 501 if self._realopener.exists(self.indexfile + '.a'):
505 502 self._realopener.unlink(self.indexfile + '.a')
506 503 self.opener = _divertopener(self._realopener, self.indexfile)
507 504 else:
508 505 self._delaybuf = []
509 506 self.opener = _delayopener(self._realopener, self.indexfile,
510 507 self._delaybuf)
511 508 self._delayed = True
512 509 tr.addpending('cl-%i' % id(self), self._writepending)
513 510 tr.addfinalize('cl-%i' % id(self), self._finalize)
514 511
515 512 def _finalize(self, tr):
516 513 "finalize index updates"
517 514 self._delayed = False
518 515 self.opener = self._realopener
519 516 # move redirected index data back into place
520 517 if self._divert:
521 518 assert not self._delaybuf
522 519 tmpname = self.indexfile + ".a"
523 520 nfile = self.opener.open(tmpname)
524 521 nfile.close()
525 522 self.opener.rename(tmpname, self.indexfile, checkambig=True)
526 523 elif self._delaybuf:
527 524 fp = self.opener(self.indexfile, 'a', checkambig=True)
528 525 fp.write("".join(self._delaybuf))
529 526 fp.close()
530 527 self._delaybuf = None
531 528 self._divert = False
532 529 # split when we're done
533 530 self._enforceinlinesize(tr)
534 531
535 532 def _writepending(self, tr):
536 533 "create a file containing the unfinalized state for pretxnchangegroup"
537 534 if self._delaybuf:
538 535 # make a temporary copy of the index
539 536 fp1 = self._realopener(self.indexfile)
540 537 pendingfilename = self.indexfile + ".a"
541 538 # register as a temp file to ensure cleanup on failure
542 539 tr.registertmp(pendingfilename)
543 540 # write existing data
544 541 fp2 = self._realopener(pendingfilename, "w")
545 542 fp2.write(fp1.read())
546 543 # add pending data
547 544 fp2.write("".join(self._delaybuf))
548 545 fp2.close()
549 546 # switch modes so finalize can simply rename
550 547 self._delaybuf = None
551 548 self._divert = True
552 549 self.opener = _divertopener(self._realopener, self.indexfile)
553 550
554 551 if self._divert:
555 552 return True
556 553
557 554 return False
558 555
559 556 def _enforceinlinesize(self, tr, fp=None):
560 557 if not self._delayed:
561 558 revlog.revlog._enforceinlinesize(self, tr, fp)
562 559
563 560 def read(self, node):
564 561 """Obtain data from a parsed changelog revision.
565 562
566 563 Returns a 6-tuple of:
567 564
568 565 - manifest node in binary
569 566 - author/user as a localstr
570 567 - date as a 2-tuple of (time, timezone)
571 568 - list of files
572 569 - commit message as a localstr
573 570 - dict of extra metadata
574 571
575 572 Unless you need to access all fields, consider calling
576 573 ``changelogrevision`` instead, as it is faster for partial object
577 574 access.
578 575 """
579 576 c = changelogrevision(self.revision(node))
580 577 return (
581 578 c.manifest,
582 579 c.user,
583 580 c.date,
584 581 c.files,
585 582 c.description,
586 583 c.extra
587 584 )
588 585
589 586 def changelogrevision(self, nodeorrev):
590 587 """Obtain a ``changelogrevision`` for a node or revision."""
591 588 return changelogrevision(self.revision(nodeorrev))
592 589
593 590 def readfiles(self, node):
594 591 """
595 592 short version of read that only returns the files modified by the cset
596 593 """
597 594 text = self.revision(node)
598 595 if not text:
599 596 return []
600 597 last = text.index("\n\n")
601 598 l = text[:last].split('\n')
602 599 return l[3:]
603 600
604 601 def add(self, manifest, files, desc, transaction, p1, p2,
605 602 user, date=None, extra=None, p1copies=None, p2copies=None,
606 603 filesadded=None, filesremoved=None):
607 604 # Convert to UTF-8 encoded bytestrings as the very first
608 605 # thing: calling any method on a localstr object will turn it
609 606 # into a str object and the cached UTF-8 string is thus lost.
610 607 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
611 608
612 609 user = user.strip()
613 610 # An empty username or a username with a "\n" will make the
614 611 # revision text contain two "\n\n" sequences -> corrupt
615 612 # repository since read cannot unpack the revision.
616 613 if not user:
617 614 raise error.StorageError(_("empty username"))
618 615 if "\n" in user:
619 616 raise error.StorageError(_("username %r contains a newline")
620 617 % pycompat.bytestr(user))
621 618
622 619 desc = stripdesc(desc)
623 620
624 621 if date:
625 622 parseddate = "%d %d" % dateutil.parsedate(date)
626 623 else:
627 624 parseddate = "%d %d" % dateutil.makedate()
628 625 if extra:
629 626 branch = extra.get("branch")
630 627 if branch in ("default", ""):
631 628 del extra["branch"]
632 629 elif branch in (".", "null", "tip"):
633 630 raise error.StorageError(_('the name \'%s\' is reserved')
634 631 % branch)
635 632 extrasentries = p1copies, p2copies, filesadded, filesremoved
636 633 if extra is None and any(x is not None for x in extrasentries):
637 634 extra = {}
638 635 sortedfiles = sorted(files)
639 636 if p1copies is not None:
640 637 extra['p1copies'] = encodecopies(sortedfiles, p1copies)
641 638 if p2copies is not None:
642 639 extra['p2copies'] = encodecopies(sortedfiles, p2copies)
643 640 if filesadded is not None:
644 641 extra['filesadded'] = encodefileindices(sortedfiles, filesadded)
645 642 if filesremoved is not None:
646 643 extra['filesremoved'] = encodefileindices(sortedfiles, filesremoved)
647 644
648 645 if extra:
649 646 extra = encodeextra(extra)
650 647 parseddate = "%s %s" % (parseddate, extra)
651 648 l = [hex(manifest), user, parseddate] + sortedfiles + ["", desc]
652 649 text = "\n".join(l)
653 650 return self.addrevision(text, transaction, len(self), p1, p2)
654 651
655 652 def branchinfo(self, rev):
656 653 """return the branch name and open/close state of a revision
657 654
658 655 This function exists because creating a changectx object
659 656 just to access this is costly."""
660 657 extra = self.read(rev)[5]
661 658 return encoding.tolocal(extra.get("branch")), 'close' in extra
662 659
663 660 def _nodeduplicatecallback(self, transaction, node):
664 661 # keep track of revisions that got "re-added", eg: unbunde of know rev.
665 662 #
666 663 # We track them in a list to preserve their order from the source bundle
667 664 duplicates = transaction.changes.setdefault('revduplicates', [])
668 665 duplicates.append(self.rev(node))
@@ -1,844 +1,838
1 1 # dagop.py - graph ancestry and topology algorithm for revset
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from .node import (
13 13 nullrev,
14 14 )
15 15 from .thirdparty import (
16 16 attr,
17 17 )
18 18 from . import (
19 19 error,
20 20 mdiff,
21 21 node,
22 22 patch,
23 23 pycompat,
24 24 smartset,
25 25 )
26 26
27 27 baseset = smartset.baseset
28 28 generatorset = smartset.generatorset
29 29
30 30 # possible maximum depth between null and wdir()
31 31 maxlogdepth = 0x80000000
32 32
33 33 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
34 34 """Walk DAG using 'pfunc' from the given 'revs' nodes
35 35
36 36 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
37 37 if 'reverse' is True/False respectively.
38 38
39 39 Scan ends at the stopdepth (exlusive) if specified. Revisions found
40 40 earlier than the startdepth are omitted.
41 41 """
42 42 if startdepth is None:
43 43 startdepth = 0
44 44 if stopdepth is None:
45 45 stopdepth = maxlogdepth
46 46 if stopdepth == 0:
47 47 return
48 48 if stopdepth < 0:
49 49 raise error.ProgrammingError('negative stopdepth')
50 50 if reverse:
51 51 heapsign = -1 # max heap
52 52 else:
53 53 heapsign = +1 # min heap
54 54
55 55 # load input revs lazily to heap so earlier revisions can be yielded
56 56 # without fully computing the input revs
57 57 revs.sort(reverse)
58 58 irevs = iter(revs)
59 59 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
60 60
61 61 inputrev = next(irevs, None)
62 62 if inputrev is not None:
63 63 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
64 64
65 65 lastrev = None
66 66 while pendingheap:
67 67 currev, curdepth = heapq.heappop(pendingheap)
68 68 currev = heapsign * currev
69 69 if currev == inputrev:
70 70 inputrev = next(irevs, None)
71 71 if inputrev is not None:
72 72 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
73 73 # rescan parents until curdepth >= startdepth because queued entries
74 74 # of the same revision are iterated from the lowest depth
75 75 foundnew = (currev != lastrev)
76 76 if foundnew and curdepth >= startdepth:
77 77 lastrev = currev
78 78 yield currev
79 79 pdepth = curdepth + 1
80 80 if foundnew and pdepth < stopdepth:
81 81 for prev in pfunc(currev):
82 82 if prev != node.nullrev:
83 83 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
84 84
85 85 def filectxancestors(fctxs, followfirst=False):
86 86 """Like filectx.ancestors(), but can walk from multiple files/revisions,
87 87 and includes the given fctxs themselves
88 88
89 89 Yields (rev, {fctx, ...}) pairs in descending order.
90 90 """
91 91 visit = {}
92 92 visitheap = []
93 93 def addvisit(fctx):
94 94 rev = fctx.rev()
95 95 if rev not in visit:
96 96 visit[rev] = set()
97 97 heapq.heappush(visitheap, -rev) # max heap
98 98 visit[rev].add(fctx)
99 99
100 100 if followfirst:
101 101 cut = 1
102 102 else:
103 103 cut = None
104 104
105 105 for c in fctxs:
106 106 addvisit(c)
107 107 while visit:
108 108 currev = -heapq.heappop(visitheap)
109 109 curfctxs = visit.pop(currev)
110 110 yield currev, curfctxs
111 111 for c in curfctxs:
112 112 for parent in c.parents()[:cut]:
113 113 addvisit(parent)
114 114 assert not visitheap
115 115
116 116 def filerevancestors(fctxs, followfirst=False):
117 117 """Like filectx.ancestors(), but can walk from multiple files/revisions,
118 118 and includes the given fctxs themselves
119 119
120 120 Returns a smartset.
121 121 """
122 122 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
123 123 return generatorset(gen, iterasc=False)
124 124
125 125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
126 126 if followfirst:
127 127 cut = 1
128 128 else:
129 129 cut = None
130 130 cl = repo.changelog
131 131 def plainpfunc(rev):
132 132 try:
133 133 return cl.parentrevs(rev)[:cut]
134 134 except error.WdirUnsupported:
135 135 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
136 136 if cutfunc is None:
137 137 pfunc = plainpfunc
138 138 else:
139 139 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
140 140 revs = revs.filter(lambda rev: not cutfunc(rev))
141 141 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
142 142
143 143 def revancestors(repo, revs, followfirst=False, startdepth=None,
144 144 stopdepth=None, cutfunc=None):
145 145 r"""Like revlog.ancestors(), but supports additional options, includes
146 146 the given revs themselves, and returns a smartset
147 147
148 148 Scan ends at the stopdepth (exlusive) if specified. Revisions found
149 149 earlier than the startdepth are omitted.
150 150
151 151 If cutfunc is provided, it will be used to cut the traversal of the DAG.
152 152 When cutfunc(X) returns True, the DAG traversal stops - revision X and
153 153 X's ancestors in the traversal path will be skipped. This could be an
154 154 optimization sometimes.
155 155
156 156 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
157 157 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
158 158 return True in this case. For example,
159 159
160 160 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
161 161 |\ # will include "A", because the path D -> C -> A was not cut.
162 162 B C # If "B" gets cut, "A" might want to be cut too.
163 163 |/
164 164 A
165 165 """
166 166 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
167 167 cutfunc)
168 168 return generatorset(gen, iterasc=False)
169 169
170 170 def _genrevdescendants(repo, revs, followfirst):
171 171 if followfirst:
172 172 cut = 1
173 173 else:
174 174 cut = None
175 175
176 176 cl = repo.changelog
177 177 first = revs.min()
178 178 nullrev = node.nullrev
179 179 if first == nullrev:
180 180 # Are there nodes with a null first parent and a non-null
181 181 # second one? Maybe. Do we care? Probably not.
182 182 yield first
183 183 for i in cl:
184 184 yield i
185 185 else:
186 186 seen = set(revs)
187 187 for i in cl.revs(first):
188 188 if i in seen:
189 189 yield i
190 190 continue
191 191 for x in cl.parentrevs(i)[:cut]:
192 192 if x != nullrev and x in seen:
193 193 seen.add(i)
194 194 yield i
195 195 break
196 196
197 197 def _builddescendantsmap(repo, startrev, followfirst):
198 198 """Build map of 'rev -> child revs', offset from startrev"""
199 199 cl = repo.changelog
200 200 nullrev = node.nullrev
201 201 descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
202 202 for currev in cl.revs(startrev + 1):
203 203 p1rev, p2rev = cl.parentrevs(currev)
204 204 if p1rev >= startrev:
205 205 descmap[p1rev - startrev].append(currev)
206 206 if not followfirst and p2rev != nullrev and p2rev >= startrev:
207 207 descmap[p2rev - startrev].append(currev)
208 208 return descmap
209 209
210 210 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
211 211 startrev = revs.min()
212 212 descmap = _builddescendantsmap(repo, startrev, followfirst)
213 213 def pfunc(rev):
214 214 return descmap[rev - startrev]
215 215 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
216 216
217 217 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
218 218 """Like revlog.descendants() but supports additional options, includes
219 219 the given revs themselves, and returns a smartset
220 220
221 221 Scan ends at the stopdepth (exlusive) if specified. Revisions found
222 222 earlier than the startdepth are omitted.
223 223 """
224 224 if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
225 225 gen = _genrevdescendants(repo, revs, followfirst)
226 226 else:
227 227 gen = _genrevdescendantsofdepth(repo, revs, followfirst,
228 228 startdepth, stopdepth)
229 229 return generatorset(gen, iterasc=True)
230 230
231 231 def descendantrevs(revs, revsfn, parentrevsfn):
232 232 """Generate revision number descendants in revision order.
233 233
234 234 Yields revision numbers starting with a child of some rev in
235 235 ``revs``. Results are ordered by revision number and are
236 236 therefore topological. Each revision is not considered a descendant
237 237 of itself.
238 238
239 239 ``revsfn`` is a callable that with no argument iterates over all
240 240 revision numbers and with a ``start`` argument iterates over revision
241 241 numbers beginning with that value.
242 242
243 243 ``parentrevsfn`` is a callable that receives a revision number and
244 244 returns an iterable of parent revision numbers, whose values may include
245 245 nullrev.
246 246 """
247 247 first = min(revs)
248 248
249 249 if first == nullrev:
250 250 for rev in revsfn():
251 251 yield rev
252 252 return
253 253
254 254 seen = set(revs)
255 255 for rev in revsfn(start=first + 1):
256 256 for prev in parentrevsfn(rev):
257 257 if prev != nullrev and prev in seen:
258 258 seen.add(rev)
259 259 yield rev
260 260 break
261 261
262 def _reachablerootspure(repo, minroot, roots, heads, includepath):
263 """See reachableroots"""
262 def _reachablerootspure(pfunc, minroot, roots, heads, includepath):
263 """See revlog.reachableroots"""
264 264 if not roots:
265 265 return []
266 parentrevs = repo.changelog.parentrevs
267 266 roots = set(roots)
268 267 visit = list(heads)
269 268 reachable = set()
270 269 seen = {}
271 270 # prefetch all the things! (because python is slow)
272 271 reached = reachable.add
273 272 dovisit = visit.append
274 273 nextvisit = visit.pop
275 274 # open-code the post-order traversal due to the tiny size of
276 275 # sys.getrecursionlimit()
277 276 while visit:
278 277 rev = nextvisit()
279 278 if rev in roots:
280 279 reached(rev)
281 280 if not includepath:
282 281 continue
283 parents = parentrevs(rev)
282 parents = pfunc(rev)
284 283 seen[rev] = parents
285 284 for parent in parents:
286 285 if parent >= minroot and parent not in seen:
287 286 dovisit(parent)
288 287 if not reachable:
289 288 return baseset()
290 289 if not includepath:
291 290 return reachable
292 291 for rev in sorted(seen):
293 292 for parent in seen[rev]:
294 293 if parent in reachable:
295 294 reached(rev)
296 295 return reachable
297 296
298 297 def reachableroots(repo, roots, heads, includepath=False):
299 """return (heads(::<roots> and <roots>::<heads>))
300
301 If includepath is True, return (<roots>::<heads>)."""
298 """See revlog.reachableroots"""
302 299 if not roots:
303 300 return baseset()
304 301 minroot = roots.min()
305 302 roots = list(roots)
306 303 heads = list(heads)
307 try:
308 304 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
309 except AttributeError:
310 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
311 305 revs = baseset(revs)
312 306 revs.sort()
313 307 return revs
314 308
315 309 def _changesrange(fctx1, fctx2, linerange2, diffopts):
316 310 """Return `(diffinrange, linerange1)` where `diffinrange` is True
317 311 if diff from fctx2 to fctx1 has changes in linerange2 and
318 312 `linerange1` is the new line range for fctx1.
319 313 """
320 314 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
321 315 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
322 316 diffinrange = any(stype == '!' for _, stype in filteredblocks)
323 317 return diffinrange, linerange1
324 318
325 319 def blockancestors(fctx, fromline, toline, followfirst=False):
326 320 """Yield ancestors of `fctx` with respect to the block of lines within
327 321 `fromline`-`toline` range.
328 322 """
329 323 diffopts = patch.diffopts(fctx._repo.ui)
330 324 fctx = fctx.introfilectx()
331 325 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
332 326 while visit:
333 327 c, linerange2 = visit.pop(max(visit))
334 328 pl = c.parents()
335 329 if followfirst:
336 330 pl = pl[:1]
337 331 if not pl:
338 332 # The block originates from the initial revision.
339 333 yield c, linerange2
340 334 continue
341 335 inrange = False
342 336 for p in pl:
343 337 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
344 338 inrange = inrange or inrangep
345 339 if linerange1[0] == linerange1[1]:
346 340 # Parent's linerange is empty, meaning that the block got
347 341 # introduced in this revision; no need to go futher in this
348 342 # branch.
349 343 continue
350 344 # Set _descendantrev with 'c' (a known descendant) so that, when
351 345 # _adjustlinkrev is called for 'p', it receives this descendant
352 346 # (as srcrev) instead possibly topmost introrev.
353 347 p._descendantrev = c.rev()
354 348 visit[p.linkrev(), p.filenode()] = p, linerange1
355 349 if inrange:
356 350 yield c, linerange2
357 351
358 352 def blockdescendants(fctx, fromline, toline):
359 353 """Yield descendants of `fctx` with respect to the block of lines within
360 354 `fromline`-`toline` range.
361 355 """
362 356 # First possibly yield 'fctx' if it has changes in range with respect to
363 357 # its parents.
364 358 try:
365 359 c, linerange1 = next(blockancestors(fctx, fromline, toline))
366 360 except StopIteration:
367 361 pass
368 362 else:
369 363 if c == fctx:
370 364 yield c, linerange1
371 365
372 366 diffopts = patch.diffopts(fctx._repo.ui)
373 367 fl = fctx.filelog()
374 368 seen = {fctx.filerev(): (fctx, (fromline, toline))}
375 369 for i in fl.descendants([fctx.filerev()]):
376 370 c = fctx.filectx(i)
377 371 inrange = False
378 372 for x in fl.parentrevs(i):
379 373 try:
380 374 p, linerange2 = seen[x]
381 375 except KeyError:
382 376 # nullrev or other branch
383 377 continue
384 378 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
385 379 inrange = inrange or inrangep
386 380 # If revision 'i' has been seen (it's a merge) and the line range
387 381 # previously computed differs from the one we just got, we take the
388 382 # surrounding interval. This is conservative but avoids loosing
389 383 # information.
390 384 if i in seen and seen[i][1] != linerange1:
391 385 lbs, ubs = zip(linerange1, seen[i][1])
392 386 linerange1 = min(lbs), max(ubs)
393 387 seen[i] = c, linerange1
394 388 if inrange:
395 389 yield c, linerange1
396 390
397 391 @attr.s(slots=True, frozen=True)
398 392 class annotateline(object):
399 393 fctx = attr.ib()
400 394 lineno = attr.ib()
401 395 # Whether this annotation was the result of a skip-annotate.
402 396 skip = attr.ib(default=False)
403 397 text = attr.ib(default=None)
404 398
405 399 @attr.s(slots=True, frozen=True)
406 400 class _annotatedfile(object):
407 401 # list indexed by lineno - 1
408 402 fctxs = attr.ib()
409 403 linenos = attr.ib()
410 404 skips = attr.ib()
411 405 # full file content
412 406 text = attr.ib()
413 407
414 408 def _countlines(text):
415 409 if text.endswith("\n"):
416 410 return text.count("\n")
417 411 return text.count("\n") + int(bool(text))
418 412
419 413 def _decoratelines(text, fctx):
420 414 n = _countlines(text)
421 415 linenos = pycompat.rangelist(1, n + 1)
422 416 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
423 417
424 418 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
425 419 r'''
426 420 Given parent and child fctxes and annotate data for parents, for all lines
427 421 in either parent that match the child, annotate the child with the parent's
428 422 data.
429 423
430 424 Additionally, if `skipchild` is True, replace all other lines with parent
431 425 annotate data as well such that child is never blamed for any lines.
432 426
433 427 See test-annotate.py for unit tests.
434 428 '''
435 429 pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
436 430 for parent in parents]
437 431
438 432 if skipchild:
439 433 # Need to iterate over the blocks twice -- make it a list
440 434 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
441 435 # Mercurial currently prefers p2 over p1 for annotate.
442 436 # TODO: change this?
443 437 for parent, blocks in pblocks:
444 438 for (a1, a2, b1, b2), t in blocks:
445 439 # Changed blocks ('!') or blocks made only of blank lines ('~')
446 440 # belong to the child.
447 441 if t == '=':
448 442 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
449 443 child.linenos[b1:b2] = parent.linenos[a1:a2]
450 444 child.skips[b1:b2] = parent.skips[a1:a2]
451 445
452 446 if skipchild:
453 447 # Now try and match up anything that couldn't be matched,
454 448 # Reversing pblocks maintains bias towards p2, matching above
455 449 # behavior.
456 450 pblocks.reverse()
457 451
458 452 # The heuristics are:
459 453 # * Work on blocks of changed lines (effectively diff hunks with -U0).
460 454 # This could potentially be smarter but works well enough.
461 455 # * For a non-matching section, do a best-effort fit. Match lines in
462 456 # diff hunks 1:1, dropping lines as necessary.
463 457 # * Repeat the last line as a last resort.
464 458
465 459 # First, replace as much as possible without repeating the last line.
466 460 remaining = [(parent, []) for parent, _blocks in pblocks]
467 461 for idx, (parent, blocks) in enumerate(pblocks):
468 462 for (a1, a2, b1, b2), _t in blocks:
469 463 if a2 - a1 >= b2 - b1:
470 464 for bk in pycompat.xrange(b1, b2):
471 465 if child.fctxs[bk] == childfctx:
472 466 ak = min(a1 + (bk - b1), a2 - 1)
473 467 child.fctxs[bk] = parent.fctxs[ak]
474 468 child.linenos[bk] = parent.linenos[ak]
475 469 child.skips[bk] = True
476 470 else:
477 471 remaining[idx][1].append((a1, a2, b1, b2))
478 472
479 473 # Then, look at anything left, which might involve repeating the last
480 474 # line.
481 475 for parent, blocks in remaining:
482 476 for a1, a2, b1, b2 in blocks:
483 477 for bk in pycompat.xrange(b1, b2):
484 478 if child.fctxs[bk] == childfctx:
485 479 ak = min(a1 + (bk - b1), a2 - 1)
486 480 child.fctxs[bk] = parent.fctxs[ak]
487 481 child.linenos[bk] = parent.linenos[ak]
488 482 child.skips[bk] = True
489 483 return child
490 484
491 485 def annotate(base, parents, skiprevs=None, diffopts=None):
492 486 """Core algorithm for filectx.annotate()
493 487
494 488 `parents(fctx)` is a function returning a list of parent filectxs.
495 489 """
496 490
497 491 # This algorithm would prefer to be recursive, but Python is a
498 492 # bit recursion-hostile. Instead we do an iterative
499 493 # depth-first search.
500 494
501 495 # 1st DFS pre-calculates pcache and needed
502 496 visit = [base]
503 497 pcache = {}
504 498 needed = {base: 1}
505 499 while visit:
506 500 f = visit.pop()
507 501 if f in pcache:
508 502 continue
509 503 pl = parents(f)
510 504 pcache[f] = pl
511 505 for p in pl:
512 506 needed[p] = needed.get(p, 0) + 1
513 507 if p not in pcache:
514 508 visit.append(p)
515 509
516 510 # 2nd DFS does the actual annotate
517 511 visit[:] = [base]
518 512 hist = {}
519 513 while visit:
520 514 f = visit[-1]
521 515 if f in hist:
522 516 visit.pop()
523 517 continue
524 518
525 519 ready = True
526 520 pl = pcache[f]
527 521 for p in pl:
528 522 if p not in hist:
529 523 ready = False
530 524 visit.append(p)
531 525 if ready:
532 526 visit.pop()
533 527 curr = _decoratelines(f.data(), f)
534 528 skipchild = False
535 529 if skiprevs is not None:
536 530 skipchild = f._changeid in skiprevs
537 531 curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
538 532 diffopts)
539 533 for p in pl:
540 534 if needed[p] == 1:
541 535 del hist[p]
542 536 del needed[p]
543 537 else:
544 538 needed[p] -= 1
545 539
546 540 hist[f] = curr
547 541 del pcache[f]
548 542
549 543 a = hist[base]
550 544 return [annotateline(*r) for r in zip(a.fctxs, a.linenos, a.skips,
551 545 mdiff.splitnewlines(a.text))]
552 546
553 547 def toposort(revs, parentsfunc, firstbranch=()):
554 548 """Yield revisions from heads to roots one (topo) branch at a time.
555 549
556 550 This function aims to be used by a graph generator that wishes to minimize
557 551 the number of parallel branches and their interleaving.
558 552
559 553 Example iteration order (numbers show the "true" order in a changelog):
560 554
561 555 o 4
562 556 |
563 557 o 1
564 558 |
565 559 | o 3
566 560 | |
567 561 | o 2
568 562 |/
569 563 o 0
570 564
571 565 Note that the ancestors of merges are understood by the current
572 566 algorithm to be on the same branch. This means no reordering will
573 567 occur behind a merge.
574 568 """
575 569
576 570 ### Quick summary of the algorithm
577 571 #
578 572 # This function is based around a "retention" principle. We keep revisions
579 573 # in memory until we are ready to emit a whole branch that immediately
580 574 # "merges" into an existing one. This reduces the number of parallel
581 575 # branches with interleaved revisions.
582 576 #
583 577 # During iteration revs are split into two groups:
584 578 # A) revision already emitted
585 579 # B) revision in "retention". They are stored as different subgroups.
586 580 #
587 581 # for each REV, we do the following logic:
588 582 #
589 583 # 1) if REV is a parent of (A), we will emit it. If there is a
590 584 # retention group ((B) above) that is blocked on REV being
591 585 # available, we emit all the revisions out of that retention
592 586 # group first.
593 587 #
594 588 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
595 589 # available, if such subgroup exist, we add REV to it and the subgroup is
596 590 # now awaiting for REV.parents() to be available.
597 591 #
598 592 # 3) finally if no such group existed in (B), we create a new subgroup.
599 593 #
600 594 #
601 595 # To bootstrap the algorithm, we emit the tipmost revision (which
602 596 # puts it in group (A) from above).
603 597
604 598 revs.sort(reverse=True)
605 599
606 600 # Set of parents of revision that have been emitted. They can be considered
607 601 # unblocked as the graph generator is already aware of them so there is no
608 602 # need to delay the revisions that reference them.
609 603 #
610 604 # If someone wants to prioritize a branch over the others, pre-filling this
611 605 # set will force all other branches to wait until this branch is ready to be
612 606 # emitted.
613 607 unblocked = set(firstbranch)
614 608
615 609 # list of groups waiting to be displayed, each group is defined by:
616 610 #
617 611 # (revs: lists of revs waiting to be displayed,
618 612 # blocked: set of that cannot be displayed before those in 'revs')
619 613 #
620 614 # The second value ('blocked') correspond to parents of any revision in the
621 615 # group ('revs') that is not itself contained in the group. The main idea
622 616 # of this algorithm is to delay as much as possible the emission of any
623 617 # revision. This means waiting for the moment we are about to display
624 618 # these parents to display the revs in a group.
625 619 #
626 620 # This first implementation is smart until it encounters a merge: it will
627 621 # emit revs as soon as any parent is about to be emitted and can grow an
628 622 # arbitrary number of revs in 'blocked'. In practice this mean we properly
629 623 # retains new branches but gives up on any special ordering for ancestors
630 624 # of merges. The implementation can be improved to handle this better.
631 625 #
632 626 # The first subgroup is special. It corresponds to all the revision that
633 627 # were already emitted. The 'revs' lists is expected to be empty and the
634 628 # 'blocked' set contains the parents revisions of already emitted revision.
635 629 #
636 630 # You could pre-seed the <parents> set of groups[0] to a specific
637 631 # changesets to select what the first emitted branch should be.
638 632 groups = [([], unblocked)]
639 633 pendingheap = []
640 634 pendingset = set()
641 635
642 636 heapq.heapify(pendingheap)
643 637 heappop = heapq.heappop
644 638 heappush = heapq.heappush
645 639 for currentrev in revs:
646 640 # Heap works with smallest element, we want highest so we invert
647 641 if currentrev not in pendingset:
648 642 heappush(pendingheap, -currentrev)
649 643 pendingset.add(currentrev)
650 644 # iterates on pending rev until after the current rev have been
651 645 # processed.
652 646 rev = None
653 647 while rev != currentrev:
654 648 rev = -heappop(pendingheap)
655 649 pendingset.remove(rev)
656 650
657 651 # Seek for a subgroup blocked, waiting for the current revision.
658 652 matching = [i for i, g in enumerate(groups) if rev in g[1]]
659 653
660 654 if matching:
661 655 # The main idea is to gather together all sets that are blocked
662 656 # on the same revision.
663 657 #
664 658 # Groups are merged when a common blocking ancestor is
665 659 # observed. For example, given two groups:
666 660 #
667 661 # revs [5, 4] waiting for 1
668 662 # revs [3, 2] waiting for 1
669 663 #
670 664 # These two groups will be merged when we process
671 665 # 1. In theory, we could have merged the groups when
672 666 # we added 2 to the group it is now in (we could have
673 667 # noticed the groups were both blocked on 1 then), but
674 668 # the way it works now makes the algorithm simpler.
675 669 #
676 670 # We also always keep the oldest subgroup first. We can
677 671 # probably improve the behavior by having the longest set
678 672 # first. That way, graph algorithms could minimise the length
679 673 # of parallel lines their drawing. This is currently not done.
680 674 targetidx = matching.pop(0)
681 675 trevs, tparents = groups[targetidx]
682 676 for i in matching:
683 677 gr = groups[i]
684 678 trevs.extend(gr[0])
685 679 tparents |= gr[1]
686 680 # delete all merged subgroups (except the one we kept)
687 681 # (starting from the last subgroup for performance and
688 682 # sanity reasons)
689 683 for i in reversed(matching):
690 684 del groups[i]
691 685 else:
692 686 # This is a new head. We create a new subgroup for it.
693 687 targetidx = len(groups)
694 688 groups.append(([], {rev}))
695 689
696 690 gr = groups[targetidx]
697 691
698 692 # We now add the current nodes to this subgroups. This is done
699 693 # after the subgroup merging because all elements from a subgroup
700 694 # that relied on this rev must precede it.
701 695 #
702 696 # we also update the <parents> set to include the parents of the
703 697 # new nodes.
704 698 if rev == currentrev: # only display stuff in rev
705 699 gr[0].append(rev)
706 700 gr[1].remove(rev)
707 701 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
708 702 gr[1].update(parents)
709 703 for p in parents:
710 704 if p not in pendingset:
711 705 pendingset.add(p)
712 706 heappush(pendingheap, -p)
713 707
714 708 # Look for a subgroup to display
715 709 #
716 710 # When unblocked is empty (if clause), we were not waiting for any
717 711 # revisions during the first iteration (if no priority was given) or
718 712 # if we emitted a whole disconnected set of the graph (reached a
719 713 # root). In that case we arbitrarily take the oldest known
720 714 # subgroup. The heuristic could probably be better.
721 715 #
722 716 # Otherwise (elif clause) if the subgroup is blocked on
723 717 # a revision we just emitted, we can safely emit it as
724 718 # well.
725 719 if not unblocked:
726 720 if len(groups) > 1: # display other subset
727 721 targetidx = 1
728 722 gr = groups[1]
729 723 elif not gr[1] & unblocked:
730 724 gr = None
731 725
732 726 if gr is not None:
733 727 # update the set of awaited revisions with the one from the
734 728 # subgroup
735 729 unblocked |= gr[1]
736 730 # output all revisions in the subgroup
737 731 for r in gr[0]:
738 732 yield r
739 733 # delete the subgroup that you just output
740 734 # unless it is groups[0] in which case you just empty it.
741 735 if targetidx:
742 736 del groups[targetidx]
743 737 else:
744 738 gr[0][:] = []
745 739 # Check if we have some subgroup waiting for revisions we are not going to
746 740 # iterate over
747 741 for g in groups:
748 742 for r in g[0]:
749 743 yield r
750 744
751 745 def headrevs(revs, parentsfn):
752 746 """Resolve the set of heads from a set of revisions.
753 747
754 748 Receives an iterable of revision numbers and a callbable that receives a
755 749 revision number and returns an iterable of parent revision numbers, possibly
756 750 including nullrev.
757 751
758 752 Returns a set of revision numbers that are DAG heads within the passed
759 753 subset.
760 754
761 755 ``nullrev`` is never included in the returned set, even if it is provided in
762 756 the input set.
763 757 """
764 758 headrevs = set(revs)
765 759 parents = {node.nullrev}
766 760 up = parents.update
767 761
768 762 for rev in revs:
769 763 up(parentsfn(rev))
770 764 headrevs.difference_update(parents)
771 765 return headrevs
772 766
773 767 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
774 768 """Returns the set of all revs that have no children with control.
775 769
776 770 ``revsfn`` is a callable that with no arguments returns an iterator over
777 771 all revision numbers in topological order. With a ``start`` argument, it
778 772 returns revision numbers starting at that number.
779 773
780 774 ``parentrevsfn`` is a callable receiving a revision number and returns an
781 775 iterable of parent revision numbers, where values can include nullrev.
782 776
783 777 ``startrev`` is a revision number at which to start the search.
784 778
785 779 ``stoprevs`` is an iterable of revision numbers that, when encountered,
786 780 will stop DAG traversal beyond them. Parents of revisions in this
787 781 collection will be heads.
788 782 """
789 783 if startrev is None:
790 784 startrev = nullrev
791 785
792 786 stoprevs = set(stoprevs or [])
793 787
794 788 reachable = {startrev}
795 789 heads = {startrev}
796 790
797 791 for rev in revsfn(start=startrev + 1):
798 792 for prev in parentrevsfn(rev):
799 793 if prev in reachable:
800 794 if rev not in stoprevs:
801 795 reachable.add(rev)
802 796 heads.add(rev)
803 797
804 798 if prev in heads and prev not in stoprevs:
805 799 heads.remove(prev)
806 800
807 801 return heads
808 802
809 803 def linearize(revs, parentsfn):
810 804 """Linearize and topologically sort a list of revisions.
811 805
812 806 The linearization process tries to create long runs of revs where a child
813 807 rev comes immediately after its first parent. This is done by visiting the
814 808 heads of the revs in inverse topological order, and for each visited rev,
815 809 visiting its second parent, then its first parent, then adding the rev
816 810 itself to the output list.
817 811
818 812 Returns a list of revision numbers.
819 813 """
820 814 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
821 815 finished = set()
822 816 result = []
823 817
824 818 while visit:
825 819 rev = visit.pop()
826 820 if rev < 0:
827 821 rev = -rev - 1
828 822
829 823 if rev not in finished:
830 824 result.append(rev)
831 825 finished.add(rev)
832 826
833 827 else:
834 828 visit.append(-rev - 1)
835 829
836 830 for prev in parentsfn(rev):
837 831 if prev == node.nullrev or prev not in revs or prev in finished:
838 832 continue
839 833
840 834 visit.append(prev)
841 835
842 836 assert len(result) == len(revs)
843 837
844 838 return result
@@ -1,2676 +1,2687
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._compengineopts = {}
375 375 self._maxdeltachainspan = -1
376 376 self._withsparseread = False
377 377 self._sparserevlog = False
378 378 self._srdensitythreshold = 0.50
379 379 self._srmingapsize = 262144
380 380
381 381 # Make copy of flag processors so each revlog instance can support
382 382 # custom flags.
383 383 self._flagprocessors = dict(_flagprocessors)
384 384
385 385 # 2-tuple of file handles being used for active writing.
386 386 self._writinghandles = None
387 387
388 388 self._loadindex()
389 389
390 390 def _loadindex(self):
391 391 mmapindexthreshold = None
392 392 opts = getattr(self.opener, 'options', {}) or {}
393 393
394 394 if 'revlogv2' in opts:
395 395 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
396 396 elif 'revlogv1' in opts:
397 397 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
398 398 if 'generaldelta' in opts:
399 399 newversionflags |= FLAG_GENERALDELTA
400 400 elif getattr(self.opener, 'options', None) is not None:
401 401 # If options provided but no 'revlog*' found, the repository
402 402 # would have no 'requires' file in it, which means we have to
403 403 # stick to the old format.
404 404 newversionflags = REVLOGV0
405 405 else:
406 406 newversionflags = REVLOG_DEFAULT_VERSION
407 407
408 408 if 'chunkcachesize' in opts:
409 409 self._chunkcachesize = opts['chunkcachesize']
410 410 if 'maxchainlen' in opts:
411 411 self._maxchainlen = opts['maxchainlen']
412 412 if 'deltabothparents' in opts:
413 413 self._deltabothparents = opts['deltabothparents']
414 414 self._lazydelta = bool(opts.get('lazydelta', True))
415 415 self._lazydeltabase = False
416 416 if self._lazydelta:
417 417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
418 418 if 'compengine' in opts:
419 419 self._compengine = opts['compengine']
420 420 if 'zlib.level' in opts:
421 421 self._compengineopts['zlib.level'] = opts['zlib.level']
422 422 if 'zstd.level' in opts:
423 423 self._compengineopts['zstd.level'] = opts['zstd.level']
424 424 if 'maxdeltachainspan' in opts:
425 425 self._maxdeltachainspan = opts['maxdeltachainspan']
426 426 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
427 427 mmapindexthreshold = opts['mmapindexthreshold']
428 428 self._sparserevlog = bool(opts.get('sparse-revlog', False))
429 429 withsparseread = bool(opts.get('with-sparse-read', False))
430 430 # sparse-revlog forces sparse-read
431 431 self._withsparseread = self._sparserevlog or withsparseread
432 432 if 'sparse-read-density-threshold' in opts:
433 433 self._srdensitythreshold = opts['sparse-read-density-threshold']
434 434 if 'sparse-read-min-gap-size' in opts:
435 435 self._srmingapsize = opts['sparse-read-min-gap-size']
436 436 if opts.get('enableellipsis'):
437 437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 438
439 439 # revlog v0 doesn't have flag processors
440 440 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
441 441 _insertflagprocessor(flag, processor, self._flagprocessors)
442 442
443 443 if self._chunkcachesize <= 0:
444 444 raise error.RevlogError(_('revlog chunk cache size %r is not '
445 445 'greater than 0') % self._chunkcachesize)
446 446 elif self._chunkcachesize & (self._chunkcachesize - 1):
447 447 raise error.RevlogError(_('revlog chunk cache size %r is not a '
448 448 'power of 2') % self._chunkcachesize)
449 449
450 450 indexdata = ''
451 451 self._initempty = True
452 452 try:
453 453 with self._indexfp() as f:
454 454 if (mmapindexthreshold is not None and
455 455 self.opener.fstat(f).st_size >= mmapindexthreshold):
456 456 # TODO: should .close() to release resources without
457 457 # relying on Python GC
458 458 indexdata = util.buffer(util.mmapread(f))
459 459 else:
460 460 indexdata = f.read()
461 461 if len(indexdata) > 0:
462 462 versionflags = versionformat_unpack(indexdata[:4])[0]
463 463 self._initempty = False
464 464 else:
465 465 versionflags = newversionflags
466 466 except IOError as inst:
467 467 if inst.errno != errno.ENOENT:
468 468 raise
469 469
470 470 versionflags = newversionflags
471 471
472 472 self.version = versionflags
473 473
474 474 flags = versionflags & ~0xFFFF
475 475 fmt = versionflags & 0xFFFF
476 476
477 477 if fmt == REVLOGV0:
478 478 if flags:
479 479 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
480 480 'revlog %s') %
481 481 (flags >> 16, fmt, self.indexfile))
482 482
483 483 self._inline = False
484 484 self._generaldelta = False
485 485
486 486 elif fmt == REVLOGV1:
487 487 if flags & ~REVLOGV1_FLAGS:
488 488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 489 'revlog %s') %
490 490 (flags >> 16, fmt, self.indexfile))
491 491
492 492 self._inline = versionflags & FLAG_INLINE_DATA
493 493 self._generaldelta = versionflags & FLAG_GENERALDELTA
494 494
495 495 elif fmt == REVLOGV2:
496 496 if flags & ~REVLOGV2_FLAGS:
497 497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
498 498 'revlog %s') %
499 499 (flags >> 16, fmt, self.indexfile))
500 500
501 501 self._inline = versionflags & FLAG_INLINE_DATA
502 502 # generaldelta implied by version 2 revlogs.
503 503 self._generaldelta = True
504 504
505 505 else:
506 506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
507 507 (fmt, self.indexfile))
508 508 # sparse-revlog can't be on without general-delta (issue6056)
509 509 if not self._generaldelta:
510 510 self._sparserevlog = False
511 511
512 512 self._storedeltachains = True
513 513
514 514 self._io = revlogio()
515 515 if self.version == REVLOGV0:
516 516 self._io = revlogoldio()
517 517 try:
518 518 d = self._io.parseindex(indexdata, self._inline)
519 519 except (ValueError, IndexError):
520 520 raise error.RevlogError(_("index %s is corrupted") %
521 521 self.indexfile)
522 522 self.index, nodemap, self._chunkcache = d
523 523 if nodemap is not None:
524 524 self.nodemap = self._nodecache = nodemap
525 525 if not self._chunkcache:
526 526 self._chunkclear()
527 527 # revnum -> (chain-length, sum-delta-length)
528 528 self._chaininfocache = {}
529 529 # revlog header -> revlog compressor
530 530 self._decompressors = {}
531 531
532 532 @util.propertycache
533 533 def _compressor(self):
534 534 engine = util.compengines[self._compengine]
535 535 return engine.revlogcompressor(self._compengineopts)
536 536
537 537 def _indexfp(self, mode='r'):
538 538 """file object for the revlog's index file"""
539 539 args = {r'mode': mode}
540 540 if mode != 'r':
541 541 args[r'checkambig'] = self._checkambig
542 542 if mode == 'w':
543 543 args[r'atomictemp'] = True
544 544 return self.opener(self.indexfile, **args)
545 545
546 546 def _datafp(self, mode='r'):
547 547 """file object for the revlog's data file"""
548 548 return self.opener(self.datafile, mode=mode)
549 549
550 550 @contextlib.contextmanager
551 551 def _datareadfp(self, existingfp=None):
552 552 """file object suitable to read data"""
553 553 # Use explicit file handle, if given.
554 554 if existingfp is not None:
555 555 yield existingfp
556 556
557 557 # Use a file handle being actively used for writes, if available.
558 558 # There is some danger to doing this because reads will seek the
559 559 # file. However, _writeentry() performs a SEEK_END before all writes,
560 560 # so we should be safe.
561 561 elif self._writinghandles:
562 562 if self._inline:
563 563 yield self._writinghandles[0]
564 564 else:
565 565 yield self._writinghandles[1]
566 566
567 567 # Otherwise open a new file handle.
568 568 else:
569 569 if self._inline:
570 570 func = self._indexfp
571 571 else:
572 572 func = self._datafp
573 573 with func() as fp:
574 574 yield fp
575 575
576 576 def tip(self):
577 577 return self.node(len(self.index) - 1)
578 578 def __contains__(self, rev):
579 579 return 0 <= rev < len(self)
580 580 def __len__(self):
581 581 return len(self.index)
582 582 def __iter__(self):
583 583 return iter(pycompat.xrange(len(self)))
584 584 def revs(self, start=0, stop=None):
585 585 """iterate over all rev in this revlog (from start to stop)"""
586 586 return storageutil.iterrevs(len(self), start=start, stop=stop)
587 587
588 588 @util.propertycache
589 589 def nodemap(self):
590 590 if self.index:
591 591 # populate mapping down to the initial node
592 592 node0 = self.index[0][7] # get around changelog filtering
593 593 self.rev(node0)
594 594 return self._nodecache
595 595
596 596 def hasnode(self, node):
597 597 try:
598 598 self.rev(node)
599 599 return True
600 600 except KeyError:
601 601 return False
602 602
603 603 def candelta(self, baserev, rev):
604 604 """whether two revisions (baserev, rev) can be delta-ed or not"""
605 605 # Disable delta if either rev requires a content-changing flag
606 606 # processor (ex. LFS). This is because such flag processor can alter
607 607 # the rawtext content that the delta will be based on, and two clients
608 608 # could have a same revlog node with different flags (i.e. different
609 609 # rawtext contents) and the delta could be incompatible.
610 610 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
611 611 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
612 612 return False
613 613 return True
614 614
615 615 def clearcaches(self):
616 616 self._revisioncache = None
617 617 self._chainbasecache.clear()
618 618 self._chunkcache = (0, '')
619 619 self._pcache = {}
620 620
621 621 try:
622 622 # If we are using the native C version, you are in a fun case
623 623 # where self.index, self.nodemap and self._nodecaches is the same
624 624 # object.
625 625 self._nodecache.clearcaches()
626 626 except AttributeError:
627 627 self._nodecache = {nullid: nullrev}
628 628 self._nodepos = None
629 629
630 630 def rev(self, node):
631 631 try:
632 632 return self._nodecache[node]
633 633 except TypeError:
634 634 raise
635 635 except error.RevlogError:
636 636 # parsers.c radix tree lookup failed
637 637 if node == wdirid or node in wdirfilenodeids:
638 638 raise error.WdirUnsupported
639 639 raise error.LookupError(node, self.indexfile, _('no node'))
640 640 except KeyError:
641 641 # pure python cache lookup failed
642 642 n = self._nodecache
643 643 i = self.index
644 644 p = self._nodepos
645 645 if p is None:
646 646 p = len(i) - 1
647 647 else:
648 648 assert p < len(i)
649 649 for r in pycompat.xrange(p, -1, -1):
650 650 v = i[r][7]
651 651 n[v] = r
652 652 if v == node:
653 653 self._nodepos = r - 1
654 654 return r
655 655 if node == wdirid or node in wdirfilenodeids:
656 656 raise error.WdirUnsupported
657 657 raise error.LookupError(node, self.indexfile, _('no node'))
658 658
659 659 # Accessors for index entries.
660 660
661 661 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
662 662 # are flags.
663 663 def start(self, rev):
664 664 return int(self.index[rev][0] >> 16)
665 665
666 666 def flags(self, rev):
667 667 return self.index[rev][0] & 0xFFFF
668 668
669 669 def length(self, rev):
670 670 return self.index[rev][1]
671 671
672 672 def rawsize(self, rev):
673 673 """return the length of the uncompressed text for a given revision"""
674 674 l = self.index[rev][2]
675 675 if l >= 0:
676 676 return l
677 677
678 678 t = self.revision(rev, raw=True)
679 679 return len(t)
680 680
681 681 def size(self, rev):
682 682 """length of non-raw text (processed by a "read" flag processor)"""
683 683 # fast path: if no "read" flag processor could change the content,
684 684 # size is rawsize. note: ELLIPSIS is known to not change the content.
685 685 flags = self.flags(rev)
686 686 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
687 687 return self.rawsize(rev)
688 688
689 689 return len(self.revision(rev, raw=False))
690 690
691 691 def chainbase(self, rev):
692 692 base = self._chainbasecache.get(rev)
693 693 if base is not None:
694 694 return base
695 695
696 696 index = self.index
697 697 iterrev = rev
698 698 base = index[iterrev][3]
699 699 while base != iterrev:
700 700 iterrev = base
701 701 base = index[iterrev][3]
702 702
703 703 self._chainbasecache[rev] = base
704 704 return base
705 705
706 706 def linkrev(self, rev):
707 707 return self.index[rev][4]
708 708
709 709 def parentrevs(self, rev):
710 710 try:
711 711 entry = self.index[rev]
712 712 except IndexError:
713 713 if rev == wdirrev:
714 714 raise error.WdirUnsupported
715 715 raise
716 716
717 717 return entry[5], entry[6]
718 718
719 719 # fast parentrevs(rev) where rev isn't filtered
720 720 _uncheckedparentrevs = parentrevs
721 721
722 722 def node(self, rev):
723 723 try:
724 724 return self.index[rev][7]
725 725 except IndexError:
726 726 if rev == wdirrev:
727 727 raise error.WdirUnsupported
728 728 raise
729 729
730 730 # Derived from index values.
731 731
732 732 def end(self, rev):
733 733 return self.start(rev) + self.length(rev)
734 734
735 735 def parents(self, node):
736 736 i = self.index
737 737 d = i[self.rev(node)]
738 738 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
739 739
740 740 def chainlen(self, rev):
741 741 return self._chaininfo(rev)[0]
742 742
743 743 def _chaininfo(self, rev):
744 744 chaininfocache = self._chaininfocache
745 745 if rev in chaininfocache:
746 746 return chaininfocache[rev]
747 747 index = self.index
748 748 generaldelta = self._generaldelta
749 749 iterrev = rev
750 750 e = index[iterrev]
751 751 clen = 0
752 752 compresseddeltalen = 0
753 753 while iterrev != e[3]:
754 754 clen += 1
755 755 compresseddeltalen += e[1]
756 756 if generaldelta:
757 757 iterrev = e[3]
758 758 else:
759 759 iterrev -= 1
760 760 if iterrev in chaininfocache:
761 761 t = chaininfocache[iterrev]
762 762 clen += t[0]
763 763 compresseddeltalen += t[1]
764 764 break
765 765 e = index[iterrev]
766 766 else:
767 767 # Add text length of base since decompressing that also takes
768 768 # work. For cache hits the length is already included.
769 769 compresseddeltalen += e[1]
770 770 r = (clen, compresseddeltalen)
771 771 chaininfocache[rev] = r
772 772 return r
773 773
774 774 def _deltachain(self, rev, stoprev=None):
775 775 """Obtain the delta chain for a revision.
776 776
777 777 ``stoprev`` specifies a revision to stop at. If not specified, we
778 778 stop at the base of the chain.
779 779
780 780 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
781 781 revs in ascending order and ``stopped`` is a bool indicating whether
782 782 ``stoprev`` was hit.
783 783 """
784 784 # Try C implementation.
785 785 try:
786 786 return self.index.deltachain(rev, stoprev, self._generaldelta)
787 787 except AttributeError:
788 788 pass
789 789
790 790 chain = []
791 791
792 792 # Alias to prevent attribute lookup in tight loop.
793 793 index = self.index
794 794 generaldelta = self._generaldelta
795 795
796 796 iterrev = rev
797 797 e = index[iterrev]
798 798 while iterrev != e[3] and iterrev != stoprev:
799 799 chain.append(iterrev)
800 800 if generaldelta:
801 801 iterrev = e[3]
802 802 else:
803 803 iterrev -= 1
804 804 e = index[iterrev]
805 805
806 806 if iterrev == stoprev:
807 807 stopped = True
808 808 else:
809 809 chain.append(iterrev)
810 810 stopped = False
811 811
812 812 chain.reverse()
813 813 return chain, stopped
814 814
815 815 def ancestors(self, revs, stoprev=0, inclusive=False):
816 816 """Generate the ancestors of 'revs' in reverse revision order.
817 817 Does not generate revs lower than stoprev.
818 818
819 819 See the documentation for ancestor.lazyancestors for more details."""
820 820
821 821 # first, make sure start revisions aren't filtered
822 822 revs = list(revs)
823 823 checkrev = self.node
824 824 for r in revs:
825 825 checkrev(r)
826 826 # and we're sure ancestors aren't filtered as well
827 827
828 828 if rustext is not None:
829 829 lazyancestors = rustext.ancestor.LazyAncestors
830 830 arg = self.index
831 831 elif util.safehasattr(parsers, 'rustlazyancestors'):
832 832 lazyancestors = ancestor.rustlazyancestors
833 833 arg = self.index
834 834 else:
835 835 lazyancestors = ancestor.lazyancestors
836 836 arg = self._uncheckedparentrevs
837 837 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
838 838
839 839 def descendants(self, revs):
840 840 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
841 841
842 842 def findcommonmissing(self, common=None, heads=None):
843 843 """Return a tuple of the ancestors of common and the ancestors of heads
844 844 that are not ancestors of common. In revset terminology, we return the
845 845 tuple:
846 846
847 847 ::common, (::heads) - (::common)
848 848
849 849 The list is sorted by revision number, meaning it is
850 850 topologically sorted.
851 851
852 852 'heads' and 'common' are both lists of node IDs. If heads is
853 853 not supplied, uses all of the revlog's heads. If common is not
854 854 supplied, uses nullid."""
855 855 if common is None:
856 856 common = [nullid]
857 857 if heads is None:
858 858 heads = self.heads()
859 859
860 860 common = [self.rev(n) for n in common]
861 861 heads = [self.rev(n) for n in heads]
862 862
863 863 # we want the ancestors, but inclusive
864 864 class lazyset(object):
865 865 def __init__(self, lazyvalues):
866 866 self.addedvalues = set()
867 867 self.lazyvalues = lazyvalues
868 868
869 869 def __contains__(self, value):
870 870 return value in self.addedvalues or value in self.lazyvalues
871 871
872 872 def __iter__(self):
873 873 added = self.addedvalues
874 874 for r in added:
875 875 yield r
876 876 for r in self.lazyvalues:
877 877 if not r in added:
878 878 yield r
879 879
880 880 def add(self, value):
881 881 self.addedvalues.add(value)
882 882
883 883 def update(self, values):
884 884 self.addedvalues.update(values)
885 885
886 886 has = lazyset(self.ancestors(common))
887 887 has.add(nullrev)
888 888 has.update(common)
889 889
890 890 # take all ancestors from heads that aren't in has
891 891 missing = set()
892 892 visit = collections.deque(r for r in heads if r not in has)
893 893 while visit:
894 894 r = visit.popleft()
895 895 if r in missing:
896 896 continue
897 897 else:
898 898 missing.add(r)
899 899 for p in self.parentrevs(r):
900 900 if p not in has:
901 901 visit.append(p)
902 902 missing = list(missing)
903 903 missing.sort()
904 904 return has, [self.node(miss) for miss in missing]
905 905
906 906 def incrementalmissingrevs(self, common=None):
907 907 """Return an object that can be used to incrementally compute the
908 908 revision numbers of the ancestors of arbitrary sets that are not
909 909 ancestors of common. This is an ancestor.incrementalmissingancestors
910 910 object.
911 911
912 912 'common' is a list of revision numbers. If common is not supplied, uses
913 913 nullrev.
914 914 """
915 915 if common is None:
916 916 common = [nullrev]
917 917
918 918 if rustext is not None:
919 919 return rustext.ancestor.MissingAncestors(self.index, common)
920 920 return ancestor.incrementalmissingancestors(self.parentrevs, common)
921 921
922 922 def findmissingrevs(self, common=None, heads=None):
923 923 """Return the revision numbers of the ancestors of heads that
924 924 are not ancestors of common.
925 925
926 926 More specifically, return a list of revision numbers corresponding to
927 927 nodes N such that every N satisfies the following constraints:
928 928
929 929 1. N is an ancestor of some node in 'heads'
930 930 2. N is not an ancestor of any node in 'common'
931 931
932 932 The list is sorted by revision number, meaning it is
933 933 topologically sorted.
934 934
935 935 'heads' and 'common' are both lists of revision numbers. If heads is
936 936 not supplied, uses all of the revlog's heads. If common is not
937 937 supplied, uses nullid."""
938 938 if common is None:
939 939 common = [nullrev]
940 940 if heads is None:
941 941 heads = self.headrevs()
942 942
943 943 inc = self.incrementalmissingrevs(common=common)
944 944 return inc.missingancestors(heads)
945 945
946 946 def findmissing(self, common=None, heads=None):
947 947 """Return the ancestors of heads that are not ancestors of common.
948 948
949 949 More specifically, return a list of nodes N such that every N
950 950 satisfies the following constraints:
951 951
952 952 1. N is an ancestor of some node in 'heads'
953 953 2. N is not an ancestor of any node in 'common'
954 954
955 955 The list is sorted by revision number, meaning it is
956 956 topologically sorted.
957 957
958 958 'heads' and 'common' are both lists of node IDs. If heads is
959 959 not supplied, uses all of the revlog's heads. If common is not
960 960 supplied, uses nullid."""
961 961 if common is None:
962 962 common = [nullid]
963 963 if heads is None:
964 964 heads = self.heads()
965 965
966 966 common = [self.rev(n) for n in common]
967 967 heads = [self.rev(n) for n in heads]
968 968
969 969 inc = self.incrementalmissingrevs(common=common)
970 970 return [self.node(r) for r in inc.missingancestors(heads)]
971 971
972 972 def nodesbetween(self, roots=None, heads=None):
973 973 """Return a topological path from 'roots' to 'heads'.
974 974
975 975 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
976 976 topologically sorted list of all nodes N that satisfy both of
977 977 these constraints:
978 978
979 979 1. N is a descendant of some node in 'roots'
980 980 2. N is an ancestor of some node in 'heads'
981 981
982 982 Every node is considered to be both a descendant and an ancestor
983 983 of itself, so every reachable node in 'roots' and 'heads' will be
984 984 included in 'nodes'.
985 985
986 986 'outroots' is the list of reachable nodes in 'roots', i.e., the
987 987 subset of 'roots' that is returned in 'nodes'. Likewise,
988 988 'outheads' is the subset of 'heads' that is also in 'nodes'.
989 989
990 990 'roots' and 'heads' are both lists of node IDs. If 'roots' is
991 991 unspecified, uses nullid as the only root. If 'heads' is
992 992 unspecified, uses list of all of the revlog's heads."""
993 993 nonodes = ([], [], [])
994 994 if roots is not None:
995 995 roots = list(roots)
996 996 if not roots:
997 997 return nonodes
998 998 lowestrev = min([self.rev(n) for n in roots])
999 999 else:
1000 1000 roots = [nullid] # Everybody's a descendant of nullid
1001 1001 lowestrev = nullrev
1002 1002 if (lowestrev == nullrev) and (heads is None):
1003 1003 # We want _all_ the nodes!
1004 1004 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1005 1005 if heads is None:
1006 1006 # All nodes are ancestors, so the latest ancestor is the last
1007 1007 # node.
1008 1008 highestrev = len(self) - 1
1009 1009 # Set ancestors to None to signal that every node is an ancestor.
1010 1010 ancestors = None
1011 1011 # Set heads to an empty dictionary for later discovery of heads
1012 1012 heads = {}
1013 1013 else:
1014 1014 heads = list(heads)
1015 1015 if not heads:
1016 1016 return nonodes
1017 1017 ancestors = set()
1018 1018 # Turn heads into a dictionary so we can remove 'fake' heads.
1019 1019 # Also, later we will be using it to filter out the heads we can't
1020 1020 # find from roots.
1021 1021 heads = dict.fromkeys(heads, False)
1022 1022 # Start at the top and keep marking parents until we're done.
1023 1023 nodestotag = set(heads)
1024 1024 # Remember where the top was so we can use it as a limit later.
1025 1025 highestrev = max([self.rev(n) for n in nodestotag])
1026 1026 while nodestotag:
1027 1027 # grab a node to tag
1028 1028 n = nodestotag.pop()
1029 1029 # Never tag nullid
1030 1030 if n == nullid:
1031 1031 continue
1032 1032 # A node's revision number represents its place in a
1033 1033 # topologically sorted list of nodes.
1034 1034 r = self.rev(n)
1035 1035 if r >= lowestrev:
1036 1036 if n not in ancestors:
1037 1037 # If we are possibly a descendant of one of the roots
1038 1038 # and we haven't already been marked as an ancestor
1039 1039 ancestors.add(n) # Mark as ancestor
1040 1040 # Add non-nullid parents to list of nodes to tag.
1041 1041 nodestotag.update([p for p in self.parents(n) if
1042 1042 p != nullid])
1043 1043 elif n in heads: # We've seen it before, is it a fake head?
1044 1044 # So it is, real heads should not be the ancestors of
1045 1045 # any other heads.
1046 1046 heads.pop(n)
1047 1047 if not ancestors:
1048 1048 return nonodes
1049 1049 # Now that we have our set of ancestors, we want to remove any
1050 1050 # roots that are not ancestors.
1051 1051
1052 1052 # If one of the roots was nullid, everything is included anyway.
1053 1053 if lowestrev > nullrev:
1054 1054 # But, since we weren't, let's recompute the lowest rev to not
1055 1055 # include roots that aren't ancestors.
1056 1056
1057 1057 # Filter out roots that aren't ancestors of heads
1058 1058 roots = [root for root in roots if root in ancestors]
1059 1059 # Recompute the lowest revision
1060 1060 if roots:
1061 1061 lowestrev = min([self.rev(root) for root in roots])
1062 1062 else:
1063 1063 # No more roots? Return empty list
1064 1064 return nonodes
1065 1065 else:
1066 1066 # We are descending from nullid, and don't need to care about
1067 1067 # any other roots.
1068 1068 lowestrev = nullrev
1069 1069 roots = [nullid]
1070 1070 # Transform our roots list into a set.
1071 1071 descendants = set(roots)
1072 1072 # Also, keep the original roots so we can filter out roots that aren't
1073 1073 # 'real' roots (i.e. are descended from other roots).
1074 1074 roots = descendants.copy()
1075 1075 # Our topologically sorted list of output nodes.
1076 1076 orderedout = []
1077 1077 # Don't start at nullid since we don't want nullid in our output list,
1078 1078 # and if nullid shows up in descendants, empty parents will look like
1079 1079 # they're descendants.
1080 1080 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1081 1081 n = self.node(r)
1082 1082 isdescendant = False
1083 1083 if lowestrev == nullrev: # Everybody is a descendant of nullid
1084 1084 isdescendant = True
1085 1085 elif n in descendants:
1086 1086 # n is already a descendant
1087 1087 isdescendant = True
1088 1088 # This check only needs to be done here because all the roots
1089 1089 # will start being marked is descendants before the loop.
1090 1090 if n in roots:
1091 1091 # If n was a root, check if it's a 'real' root.
1092 1092 p = tuple(self.parents(n))
1093 1093 # If any of its parents are descendants, it's not a root.
1094 1094 if (p[0] in descendants) or (p[1] in descendants):
1095 1095 roots.remove(n)
1096 1096 else:
1097 1097 p = tuple(self.parents(n))
1098 1098 # A node is a descendant if either of its parents are
1099 1099 # descendants. (We seeded the dependents list with the roots
1100 1100 # up there, remember?)
1101 1101 if (p[0] in descendants) or (p[1] in descendants):
1102 1102 descendants.add(n)
1103 1103 isdescendant = True
1104 1104 if isdescendant and ((ancestors is None) or (n in ancestors)):
1105 1105 # Only include nodes that are both descendants and ancestors.
1106 1106 orderedout.append(n)
1107 1107 if (ancestors is not None) and (n in heads):
1108 1108 # We're trying to figure out which heads are reachable
1109 1109 # from roots.
1110 1110 # Mark this head as having been reached
1111 1111 heads[n] = True
1112 1112 elif ancestors is None:
1113 1113 # Otherwise, we're trying to discover the heads.
1114 1114 # Assume this is a head because if it isn't, the next step
1115 1115 # will eventually remove it.
1116 1116 heads[n] = True
1117 1117 # But, obviously its parents aren't.
1118 1118 for p in self.parents(n):
1119 1119 heads.pop(p, None)
1120 1120 heads = [head for head, flag in heads.iteritems() if flag]
1121 1121 roots = list(roots)
1122 1122 assert orderedout
1123 1123 assert roots
1124 1124 assert heads
1125 1125 return (orderedout, roots, heads)
1126 1126
1127 1127 def headrevs(self, revs=None):
1128 1128 if revs is None:
1129 1129 try:
1130 1130 return self.index.headrevs()
1131 1131 except AttributeError:
1132 1132 return self._headrevs()
1133 1133 if rustext is not None:
1134 1134 return rustext.dagop.headrevs(self.index, revs)
1135 1135 return dagop.headrevs(revs, self._uncheckedparentrevs)
1136 1136
1137 1137 def computephases(self, roots):
1138 1138 return self.index.computephasesmapsets(roots)
1139 1139
1140 1140 def _headrevs(self):
1141 1141 count = len(self)
1142 1142 if not count:
1143 1143 return [nullrev]
1144 1144 # we won't iter over filtered rev so nobody is a head at start
1145 1145 ishead = [0] * (count + 1)
1146 1146 index = self.index
1147 1147 for r in self:
1148 1148 ishead[r] = 1 # I may be an head
1149 1149 e = index[r]
1150 1150 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1151 1151 return [r for r, val in enumerate(ishead) if val]
1152 1152
1153 1153 def heads(self, start=None, stop=None):
1154 1154 """return the list of all nodes that have no children
1155 1155
1156 1156 if start is specified, only heads that are descendants of
1157 1157 start will be returned
1158 1158 if stop is specified, it will consider all the revs from stop
1159 1159 as if they had no children
1160 1160 """
1161 1161 if start is None and stop is None:
1162 1162 if not len(self):
1163 1163 return [nullid]
1164 1164 return [self.node(r) for r in self.headrevs()]
1165 1165
1166 1166 if start is None:
1167 1167 start = nullrev
1168 1168 else:
1169 1169 start = self.rev(start)
1170 1170
1171 1171 stoprevs = set(self.rev(n) for n in stop or [])
1172 1172
1173 1173 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1174 1174 stoprevs=stoprevs)
1175 1175
1176 1176 return [self.node(rev) for rev in revs]
1177 1177
1178 1178 def children(self, node):
1179 1179 """find the children of a given node"""
1180 1180 c = []
1181 1181 p = self.rev(node)
1182 1182 for r in self.revs(start=p + 1):
1183 1183 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1184 1184 if prevs:
1185 1185 for pr in prevs:
1186 1186 if pr == p:
1187 1187 c.append(self.node(r))
1188 1188 elif p == nullrev:
1189 1189 c.append(self.node(r))
1190 1190 return c
1191 1191
1192 1192 def commonancestorsheads(self, a, b):
1193 1193 """calculate all the heads of the common ancestors of nodes a and b"""
1194 1194 a, b = self.rev(a), self.rev(b)
1195 1195 ancs = self._commonancestorsheads(a, b)
1196 1196 return pycompat.maplist(self.node, ancs)
1197 1197
1198 1198 def _commonancestorsheads(self, *revs):
1199 1199 """calculate all the heads of the common ancestors of revs"""
1200 1200 try:
1201 1201 ancs = self.index.commonancestorsheads(*revs)
1202 1202 except (AttributeError, OverflowError): # C implementation failed
1203 1203 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1204 1204 return ancs
1205 1205
1206 1206 def isancestor(self, a, b):
1207 1207 """return True if node a is an ancestor of node b
1208 1208
1209 1209 A revision is considered an ancestor of itself."""
1210 1210 a, b = self.rev(a), self.rev(b)
1211 1211 return self.isancestorrev(a, b)
1212 1212
1213 1213 def isancestorrev(self, a, b):
1214 1214 """return True if revision a is an ancestor of revision b
1215 1215
1216 1216 A revision is considered an ancestor of itself.
1217 1217
1218 1218 The implementation of this is trivial but the use of
1219 commonancestorsheads is not."""
1219 reachableroots is not."""
1220 1220 if a == nullrev:
1221 1221 return True
1222 1222 elif a == b:
1223 1223 return True
1224 1224 elif a > b:
1225 1225 return False
1226 return a in self._commonancestorsheads(a, b)
1226 return bool(self.reachableroots(a, [b], [a], includepath=False))
1227
1228 def reachableroots(self, minroot, heads, roots, includepath=False):
1229 """return (heads(::<roots> and <roots>::<heads>))
1230
1231 If includepath is True, return (<roots>::<heads>)."""
1232 try:
1233 return self.index.reachableroots2(minroot, heads, roots,
1234 includepath)
1235 except AttributeError:
1236 return dagop._reachablerootspure(self.parentrevs,
1237 minroot, roots, heads, includepath)
1227 1238
1228 1239 def ancestor(self, a, b):
1229 1240 """calculate the "best" common ancestor of nodes a and b"""
1230 1241
1231 1242 a, b = self.rev(a), self.rev(b)
1232 1243 try:
1233 1244 ancs = self.index.ancestors(a, b)
1234 1245 except (AttributeError, OverflowError):
1235 1246 ancs = ancestor.ancestors(self.parentrevs, a, b)
1236 1247 if ancs:
1237 1248 # choose a consistent winner when there's a tie
1238 1249 return min(map(self.node, ancs))
1239 1250 return nullid
1240 1251
1241 1252 def _match(self, id):
1242 1253 if isinstance(id, int):
1243 1254 # rev
1244 1255 return self.node(id)
1245 1256 if len(id) == 20:
1246 1257 # possibly a binary node
1247 1258 # odds of a binary node being all hex in ASCII are 1 in 10**25
1248 1259 try:
1249 1260 node = id
1250 1261 self.rev(node) # quick search the index
1251 1262 return node
1252 1263 except error.LookupError:
1253 1264 pass # may be partial hex id
1254 1265 try:
1255 1266 # str(rev)
1256 1267 rev = int(id)
1257 1268 if "%d" % rev != id:
1258 1269 raise ValueError
1259 1270 if rev < 0:
1260 1271 rev = len(self) + rev
1261 1272 if rev < 0 or rev >= len(self):
1262 1273 raise ValueError
1263 1274 return self.node(rev)
1264 1275 except (ValueError, OverflowError):
1265 1276 pass
1266 1277 if len(id) == 40:
1267 1278 try:
1268 1279 # a full hex nodeid?
1269 1280 node = bin(id)
1270 1281 self.rev(node)
1271 1282 return node
1272 1283 except (TypeError, error.LookupError):
1273 1284 pass
1274 1285
1275 1286 def _partialmatch(self, id):
1276 1287 # we don't care wdirfilenodeids as they should be always full hash
1277 1288 maybewdir = wdirhex.startswith(id)
1278 1289 try:
1279 1290 partial = self.index.partialmatch(id)
1280 1291 if partial and self.hasnode(partial):
1281 1292 if maybewdir:
1282 1293 # single 'ff...' match in radix tree, ambiguous with wdir
1283 1294 raise error.RevlogError
1284 1295 return partial
1285 1296 if maybewdir:
1286 1297 # no 'ff...' match in radix tree, wdir identified
1287 1298 raise error.WdirUnsupported
1288 1299 return None
1289 1300 except error.RevlogError:
1290 1301 # parsers.c radix tree lookup gave multiple matches
1291 1302 # fast path: for unfiltered changelog, radix tree is accurate
1292 1303 if not getattr(self, 'filteredrevs', None):
1293 1304 raise error.AmbiguousPrefixLookupError(
1294 1305 id, self.indexfile, _('ambiguous identifier'))
1295 1306 # fall through to slow path that filters hidden revisions
1296 1307 except (AttributeError, ValueError):
1297 1308 # we are pure python, or key was too short to search radix tree
1298 1309 pass
1299 1310
1300 1311 if id in self._pcache:
1301 1312 return self._pcache[id]
1302 1313
1303 1314 if len(id) <= 40:
1304 1315 try:
1305 1316 # hex(node)[:...]
1306 1317 l = len(id) // 2 # grab an even number of digits
1307 1318 prefix = bin(id[:l * 2])
1308 1319 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1309 1320 nl = [n for n in nl if hex(n).startswith(id) and
1310 1321 self.hasnode(n)]
1311 1322 if nullhex.startswith(id):
1312 1323 nl.append(nullid)
1313 1324 if len(nl) > 0:
1314 1325 if len(nl) == 1 and not maybewdir:
1315 1326 self._pcache[id] = nl[0]
1316 1327 return nl[0]
1317 1328 raise error.AmbiguousPrefixLookupError(
1318 1329 id, self.indexfile, _('ambiguous identifier'))
1319 1330 if maybewdir:
1320 1331 raise error.WdirUnsupported
1321 1332 return None
1322 1333 except TypeError:
1323 1334 pass
1324 1335
1325 1336 def lookup(self, id):
1326 1337 """locate a node based on:
1327 1338 - revision number or str(revision number)
1328 1339 - nodeid or subset of hex nodeid
1329 1340 """
1330 1341 n = self._match(id)
1331 1342 if n is not None:
1332 1343 return n
1333 1344 n = self._partialmatch(id)
1334 1345 if n:
1335 1346 return n
1336 1347
1337 1348 raise error.LookupError(id, self.indexfile, _('no match found'))
1338 1349
1339 1350 def shortest(self, node, minlength=1):
1340 1351 """Find the shortest unambiguous prefix that matches node."""
1341 1352 def isvalid(prefix):
1342 1353 try:
1343 1354 node = self._partialmatch(prefix)
1344 1355 except error.AmbiguousPrefixLookupError:
1345 1356 return False
1346 1357 except error.WdirUnsupported:
1347 1358 # single 'ff...' match
1348 1359 return True
1349 1360 if node is None:
1350 1361 raise error.LookupError(node, self.indexfile, _('no node'))
1351 1362 return True
1352 1363
1353 1364 def maybewdir(prefix):
1354 1365 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1355 1366
1356 1367 hexnode = hex(node)
1357 1368
1358 1369 def disambiguate(hexnode, minlength):
1359 1370 """Disambiguate against wdirid."""
1360 1371 for length in range(minlength, 41):
1361 1372 prefix = hexnode[:length]
1362 1373 if not maybewdir(prefix):
1363 1374 return prefix
1364 1375
1365 1376 if not getattr(self, 'filteredrevs', None):
1366 1377 try:
1367 1378 length = max(self.index.shortest(node), minlength)
1368 1379 return disambiguate(hexnode, length)
1369 1380 except error.RevlogError:
1370 1381 if node != wdirid:
1371 1382 raise error.LookupError(node, self.indexfile, _('no node'))
1372 1383 except AttributeError:
1373 1384 # Fall through to pure code
1374 1385 pass
1375 1386
1376 1387 if node == wdirid:
1377 1388 for length in range(minlength, 41):
1378 1389 prefix = hexnode[:length]
1379 1390 if isvalid(prefix):
1380 1391 return prefix
1381 1392
1382 1393 for length in range(minlength, 41):
1383 1394 prefix = hexnode[:length]
1384 1395 if isvalid(prefix):
1385 1396 return disambiguate(hexnode, length)
1386 1397
1387 1398 def cmp(self, node, text):
1388 1399 """compare text with a given file revision
1389 1400
1390 1401 returns True if text is different than what is stored.
1391 1402 """
1392 1403 p1, p2 = self.parents(node)
1393 1404 return storageutil.hashrevisionsha1(text, p1, p2) != node
1394 1405
1395 1406 def _cachesegment(self, offset, data):
1396 1407 """Add a segment to the revlog cache.
1397 1408
1398 1409 Accepts an absolute offset and the data that is at that location.
1399 1410 """
1400 1411 o, d = self._chunkcache
1401 1412 # try to add to existing cache
1402 1413 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1403 1414 self._chunkcache = o, d + data
1404 1415 else:
1405 1416 self._chunkcache = offset, data
1406 1417
1407 1418 def _readsegment(self, offset, length, df=None):
1408 1419 """Load a segment of raw data from the revlog.
1409 1420
1410 1421 Accepts an absolute offset, length to read, and an optional existing
1411 1422 file handle to read from.
1412 1423
1413 1424 If an existing file handle is passed, it will be seeked and the
1414 1425 original seek position will NOT be restored.
1415 1426
1416 1427 Returns a str or buffer of raw byte data.
1417 1428
1418 1429 Raises if the requested number of bytes could not be read.
1419 1430 """
1420 1431 # Cache data both forward and backward around the requested
1421 1432 # data, in a fixed size window. This helps speed up operations
1422 1433 # involving reading the revlog backwards.
1423 1434 cachesize = self._chunkcachesize
1424 1435 realoffset = offset & ~(cachesize - 1)
1425 1436 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1426 1437 - realoffset)
1427 1438 with self._datareadfp(df) as df:
1428 1439 df.seek(realoffset)
1429 1440 d = df.read(reallength)
1430 1441
1431 1442 self._cachesegment(realoffset, d)
1432 1443 if offset != realoffset or reallength != length:
1433 1444 startoffset = offset - realoffset
1434 1445 if len(d) - startoffset < length:
1435 1446 raise error.RevlogError(
1436 1447 _('partial read of revlog %s; expected %d bytes from '
1437 1448 'offset %d, got %d') %
1438 1449 (self.indexfile if self._inline else self.datafile,
1439 1450 length, realoffset, len(d) - startoffset))
1440 1451
1441 1452 return util.buffer(d, startoffset, length)
1442 1453
1443 1454 if len(d) < length:
1444 1455 raise error.RevlogError(
1445 1456 _('partial read of revlog %s; expected %d bytes from offset '
1446 1457 '%d, got %d') %
1447 1458 (self.indexfile if self._inline else self.datafile,
1448 1459 length, offset, len(d)))
1449 1460
1450 1461 return d
1451 1462
1452 1463 def _getsegment(self, offset, length, df=None):
1453 1464 """Obtain a segment of raw data from the revlog.
1454 1465
1455 1466 Accepts an absolute offset, length of bytes to obtain, and an
1456 1467 optional file handle to the already-opened revlog. If the file
1457 1468 handle is used, it's original seek position will not be preserved.
1458 1469
1459 1470 Requests for data may be returned from a cache.
1460 1471
1461 1472 Returns a str or a buffer instance of raw byte data.
1462 1473 """
1463 1474 o, d = self._chunkcache
1464 1475 l = len(d)
1465 1476
1466 1477 # is it in the cache?
1467 1478 cachestart = offset - o
1468 1479 cacheend = cachestart + length
1469 1480 if cachestart >= 0 and cacheend <= l:
1470 1481 if cachestart == 0 and cacheend == l:
1471 1482 return d # avoid a copy
1472 1483 return util.buffer(d, cachestart, cacheend - cachestart)
1473 1484
1474 1485 return self._readsegment(offset, length, df=df)
1475 1486
1476 1487 def _getsegmentforrevs(self, startrev, endrev, df=None):
1477 1488 """Obtain a segment of raw data corresponding to a range of revisions.
1478 1489
1479 1490 Accepts the start and end revisions and an optional already-open
1480 1491 file handle to be used for reading. If the file handle is read, its
1481 1492 seek position will not be preserved.
1482 1493
1483 1494 Requests for data may be satisfied by a cache.
1484 1495
1485 1496 Returns a 2-tuple of (offset, data) for the requested range of
1486 1497 revisions. Offset is the integer offset from the beginning of the
1487 1498 revlog and data is a str or buffer of the raw byte data.
1488 1499
1489 1500 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1490 1501 to determine where each revision's data begins and ends.
1491 1502 """
1492 1503 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1493 1504 # (functions are expensive).
1494 1505 index = self.index
1495 1506 istart = index[startrev]
1496 1507 start = int(istart[0] >> 16)
1497 1508 if startrev == endrev:
1498 1509 end = start + istart[1]
1499 1510 else:
1500 1511 iend = index[endrev]
1501 1512 end = int(iend[0] >> 16) + iend[1]
1502 1513
1503 1514 if self._inline:
1504 1515 start += (startrev + 1) * self._io.size
1505 1516 end += (endrev + 1) * self._io.size
1506 1517 length = end - start
1507 1518
1508 1519 return start, self._getsegment(start, length, df=df)
1509 1520
1510 1521 def _chunk(self, rev, df=None):
1511 1522 """Obtain a single decompressed chunk for a revision.
1512 1523
1513 1524 Accepts an integer revision and an optional already-open file handle
1514 1525 to be used for reading. If used, the seek position of the file will not
1515 1526 be preserved.
1516 1527
1517 1528 Returns a str holding uncompressed data for the requested revision.
1518 1529 """
1519 1530 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1520 1531
1521 1532 def _chunks(self, revs, df=None, targetsize=None):
1522 1533 """Obtain decompressed chunks for the specified revisions.
1523 1534
1524 1535 Accepts an iterable of numeric revisions that are assumed to be in
1525 1536 ascending order. Also accepts an optional already-open file handle
1526 1537 to be used for reading. If used, the seek position of the file will
1527 1538 not be preserved.
1528 1539
1529 1540 This function is similar to calling ``self._chunk()`` multiple times,
1530 1541 but is faster.
1531 1542
1532 1543 Returns a list with decompressed data for each requested revision.
1533 1544 """
1534 1545 if not revs:
1535 1546 return []
1536 1547 start = self.start
1537 1548 length = self.length
1538 1549 inline = self._inline
1539 1550 iosize = self._io.size
1540 1551 buffer = util.buffer
1541 1552
1542 1553 l = []
1543 1554 ladd = l.append
1544 1555
1545 1556 if not self._withsparseread:
1546 1557 slicedchunks = (revs,)
1547 1558 else:
1548 1559 slicedchunks = deltautil.slicechunk(self, revs,
1549 1560 targetsize=targetsize)
1550 1561
1551 1562 for revschunk in slicedchunks:
1552 1563 firstrev = revschunk[0]
1553 1564 # Skip trailing revisions with empty diff
1554 1565 for lastrev in revschunk[::-1]:
1555 1566 if length(lastrev) != 0:
1556 1567 break
1557 1568
1558 1569 try:
1559 1570 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1560 1571 except OverflowError:
1561 1572 # issue4215 - we can't cache a run of chunks greater than
1562 1573 # 2G on Windows
1563 1574 return [self._chunk(rev, df=df) for rev in revschunk]
1564 1575
1565 1576 decomp = self.decompress
1566 1577 for rev in revschunk:
1567 1578 chunkstart = start(rev)
1568 1579 if inline:
1569 1580 chunkstart += (rev + 1) * iosize
1570 1581 chunklength = length(rev)
1571 1582 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1572 1583
1573 1584 return l
1574 1585
1575 1586 def _chunkclear(self):
1576 1587 """Clear the raw chunk cache."""
1577 1588 self._chunkcache = (0, '')
1578 1589
1579 1590 def deltaparent(self, rev):
1580 1591 """return deltaparent of the given revision"""
1581 1592 base = self.index[rev][3]
1582 1593 if base == rev:
1583 1594 return nullrev
1584 1595 elif self._generaldelta:
1585 1596 return base
1586 1597 else:
1587 1598 return rev - 1
1588 1599
1589 1600 def issnapshot(self, rev):
1590 1601 """tells whether rev is a snapshot
1591 1602 """
1592 1603 if not self._sparserevlog:
1593 1604 return self.deltaparent(rev) == nullrev
1594 1605 elif util.safehasattr(self.index, 'issnapshot'):
1595 1606 # directly assign the method to cache the testing and access
1596 1607 self.issnapshot = self.index.issnapshot
1597 1608 return self.issnapshot(rev)
1598 1609 if rev == nullrev:
1599 1610 return True
1600 1611 entry = self.index[rev]
1601 1612 base = entry[3]
1602 1613 if base == rev:
1603 1614 return True
1604 1615 if base == nullrev:
1605 1616 return True
1606 1617 p1 = entry[5]
1607 1618 p2 = entry[6]
1608 1619 if base == p1 or base == p2:
1609 1620 return False
1610 1621 return self.issnapshot(base)
1611 1622
1612 1623 def snapshotdepth(self, rev):
1613 1624 """number of snapshot in the chain before this one"""
1614 1625 if not self.issnapshot(rev):
1615 1626 raise error.ProgrammingError('revision %d not a snapshot')
1616 1627 return len(self._deltachain(rev)[0]) - 1
1617 1628
1618 1629 def revdiff(self, rev1, rev2):
1619 1630 """return or calculate a delta between two revisions
1620 1631
1621 1632 The delta calculated is in binary form and is intended to be written to
1622 1633 revlog data directly. So this function needs raw revision data.
1623 1634 """
1624 1635 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1625 1636 return bytes(self._chunk(rev2))
1626 1637
1627 1638 return mdiff.textdiff(self.revision(rev1, raw=True),
1628 1639 self.revision(rev2, raw=True))
1629 1640
1630 1641 def revision(self, nodeorrev, _df=None, raw=False):
1631 1642 """return an uncompressed revision of a given node or revision
1632 1643 number.
1633 1644
1634 1645 _df - an existing file handle to read from. (internal-only)
1635 1646 raw - an optional argument specifying if the revision data is to be
1636 1647 treated as raw data when applying flag transforms. 'raw' should be set
1637 1648 to True when generating changegroups or in debug commands.
1638 1649 """
1639 1650 if isinstance(nodeorrev, int):
1640 1651 rev = nodeorrev
1641 1652 node = self.node(rev)
1642 1653 else:
1643 1654 node = nodeorrev
1644 1655 rev = None
1645 1656
1646 1657 cachedrev = None
1647 1658 flags = None
1648 1659 rawtext = None
1649 1660 if node == nullid:
1650 1661 return ""
1651 1662 if self._revisioncache:
1652 1663 if self._revisioncache[0] == node:
1653 1664 # _cache only stores rawtext
1654 1665 if raw:
1655 1666 return self._revisioncache[2]
1656 1667 # duplicated, but good for perf
1657 1668 if rev is None:
1658 1669 rev = self.rev(node)
1659 1670 if flags is None:
1660 1671 flags = self.flags(rev)
1661 1672 # no extra flags set, no flag processor runs, text = rawtext
1662 1673 if flags == REVIDX_DEFAULT_FLAGS:
1663 1674 return self._revisioncache[2]
1664 1675 # rawtext is reusable. need to run flag processor
1665 1676 rawtext = self._revisioncache[2]
1666 1677
1667 1678 cachedrev = self._revisioncache[1]
1668 1679
1669 1680 # look up what we need to read
1670 1681 if rawtext is None:
1671 1682 if rev is None:
1672 1683 rev = self.rev(node)
1673 1684
1674 1685 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1675 1686 if stopped:
1676 1687 rawtext = self._revisioncache[2]
1677 1688
1678 1689 # drop cache to save memory
1679 1690 self._revisioncache = None
1680 1691
1681 1692 targetsize = None
1682 1693 rawsize = self.index[rev][2]
1683 1694 if 0 <= rawsize:
1684 1695 targetsize = 4 * rawsize
1685 1696
1686 1697 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1687 1698 if rawtext is None:
1688 1699 rawtext = bytes(bins[0])
1689 1700 bins = bins[1:]
1690 1701
1691 1702 rawtext = mdiff.patches(rawtext, bins)
1692 1703 self._revisioncache = (node, rev, rawtext)
1693 1704
1694 1705 if flags is None:
1695 1706 if rev is None:
1696 1707 rev = self.rev(node)
1697 1708 flags = self.flags(rev)
1698 1709
1699 1710 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1700 1711 if validatehash:
1701 1712 self.checkhash(text, node, rev=rev)
1702 1713
1703 1714 return text
1704 1715
1705 1716 def hash(self, text, p1, p2):
1706 1717 """Compute a node hash.
1707 1718
1708 1719 Available as a function so that subclasses can replace the hash
1709 1720 as needed.
1710 1721 """
1711 1722 return storageutil.hashrevisionsha1(text, p1, p2)
1712 1723
1713 1724 def _processflags(self, text, flags, operation, raw=False):
1714 1725 """Inspect revision data flags and applies transforms defined by
1715 1726 registered flag processors.
1716 1727
1717 1728 ``text`` - the revision data to process
1718 1729 ``flags`` - the revision flags
1719 1730 ``operation`` - the operation being performed (read or write)
1720 1731 ``raw`` - an optional argument describing if the raw transform should be
1721 1732 applied.
1722 1733
1723 1734 This method processes the flags in the order (or reverse order if
1724 1735 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1725 1736 flag processors registered for present flags. The order of flags defined
1726 1737 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1727 1738
1728 1739 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1729 1740 processed text and ``validatehash`` is a bool indicating whether the
1730 1741 returned text should be checked for hash integrity.
1731 1742
1732 1743 Note: If the ``raw`` argument is set, it has precedence over the
1733 1744 operation and will only update the value of ``validatehash``.
1734 1745 """
1735 1746 # fast path: no flag processors will run
1736 1747 if flags == 0:
1737 1748 return text, True
1738 1749 if not operation in ('read', 'write'):
1739 1750 raise error.ProgrammingError(_("invalid '%s' operation") %
1740 1751 operation)
1741 1752 # Check all flags are known.
1742 1753 if flags & ~REVIDX_KNOWN_FLAGS:
1743 1754 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1744 1755 (flags & ~REVIDX_KNOWN_FLAGS))
1745 1756 validatehash = True
1746 1757 # Depending on the operation (read or write), the order might be
1747 1758 # reversed due to non-commutative transforms.
1748 1759 orderedflags = REVIDX_FLAGS_ORDER
1749 1760 if operation == 'write':
1750 1761 orderedflags = reversed(orderedflags)
1751 1762
1752 1763 for flag in orderedflags:
1753 1764 # If a flagprocessor has been registered for a known flag, apply the
1754 1765 # related operation transform and update result tuple.
1755 1766 if flag & flags:
1756 1767 vhash = True
1757 1768
1758 1769 if flag not in self._flagprocessors:
1759 1770 message = _("missing processor for flag '%#x'") % (flag)
1760 1771 raise error.RevlogError(message)
1761 1772
1762 1773 processor = self._flagprocessors[flag]
1763 1774 if processor is not None:
1764 1775 readtransform, writetransform, rawtransform = processor
1765 1776
1766 1777 if raw:
1767 1778 vhash = rawtransform(self, text)
1768 1779 elif operation == 'read':
1769 1780 text, vhash = readtransform(self, text)
1770 1781 else: # write operation
1771 1782 text, vhash = writetransform(self, text)
1772 1783 validatehash = validatehash and vhash
1773 1784
1774 1785 return text, validatehash
1775 1786
1776 1787 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1777 1788 """Check node hash integrity.
1778 1789
1779 1790 Available as a function so that subclasses can extend hash mismatch
1780 1791 behaviors as needed.
1781 1792 """
1782 1793 try:
1783 1794 if p1 is None and p2 is None:
1784 1795 p1, p2 = self.parents(node)
1785 1796 if node != self.hash(text, p1, p2):
1786 1797 # Clear the revision cache on hash failure. The revision cache
1787 1798 # only stores the raw revision and clearing the cache does have
1788 1799 # the side-effect that we won't have a cache hit when the raw
1789 1800 # revision data is accessed. But this case should be rare and
1790 1801 # it is extra work to teach the cache about the hash
1791 1802 # verification state.
1792 1803 if self._revisioncache and self._revisioncache[0] == node:
1793 1804 self._revisioncache = None
1794 1805
1795 1806 revornode = rev
1796 1807 if revornode is None:
1797 1808 revornode = templatefilters.short(hex(node))
1798 1809 raise error.RevlogError(_("integrity check failed on %s:%s")
1799 1810 % (self.indexfile, pycompat.bytestr(revornode)))
1800 1811 except error.RevlogError:
1801 1812 if self._censorable and storageutil.iscensoredtext(text):
1802 1813 raise error.CensoredNodeError(self.indexfile, node, text)
1803 1814 raise
1804 1815
1805 1816 def _enforceinlinesize(self, tr, fp=None):
1806 1817 """Check if the revlog is too big for inline and convert if so.
1807 1818
1808 1819 This should be called after revisions are added to the revlog. If the
1809 1820 revlog has grown too large to be an inline revlog, it will convert it
1810 1821 to use multiple index and data files.
1811 1822 """
1812 1823 tiprev = len(self) - 1
1813 1824 if (not self._inline or
1814 1825 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1815 1826 return
1816 1827
1817 1828 trinfo = tr.find(self.indexfile)
1818 1829 if trinfo is None:
1819 1830 raise error.RevlogError(_("%s not found in the transaction")
1820 1831 % self.indexfile)
1821 1832
1822 1833 trindex = trinfo[2]
1823 1834 if trindex is not None:
1824 1835 dataoff = self.start(trindex)
1825 1836 else:
1826 1837 # revlog was stripped at start of transaction, use all leftover data
1827 1838 trindex = len(self) - 1
1828 1839 dataoff = self.end(tiprev)
1829 1840
1830 1841 tr.add(self.datafile, dataoff)
1831 1842
1832 1843 if fp:
1833 1844 fp.flush()
1834 1845 fp.close()
1835 1846 # We can't use the cached file handle after close(). So prevent
1836 1847 # its usage.
1837 1848 self._writinghandles = None
1838 1849
1839 1850 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1840 1851 for r in self:
1841 1852 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1842 1853
1843 1854 with self._indexfp('w') as fp:
1844 1855 self.version &= ~FLAG_INLINE_DATA
1845 1856 self._inline = False
1846 1857 io = self._io
1847 1858 for i in self:
1848 1859 e = io.packentry(self.index[i], self.node, self.version, i)
1849 1860 fp.write(e)
1850 1861
1851 1862 # the temp file replace the real index when we exit the context
1852 1863 # manager
1853 1864
1854 1865 tr.replace(self.indexfile, trindex * self._io.size)
1855 1866 self._chunkclear()
1856 1867
1857 1868 def _nodeduplicatecallback(self, transaction, node):
1858 1869 """called when trying to add a node already stored.
1859 1870 """
1860 1871
1861 1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1862 1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1863 1874 """add a revision to the log
1864 1875
1865 1876 text - the revision data to add
1866 1877 transaction - the transaction object used for rollback
1867 1878 link - the linkrev data to add
1868 1879 p1, p2 - the parent nodeids of the revision
1869 1880 cachedelta - an optional precomputed delta
1870 1881 node - nodeid of revision; typically node is not specified, and it is
1871 1882 computed by default as hash(text, p1, p2), however subclasses might
1872 1883 use different hashing method (and override checkhash() in such case)
1873 1884 flags - the known flags to set on the revision
1874 1885 deltacomputer - an optional deltacomputer instance shared between
1875 1886 multiple calls
1876 1887 """
1877 1888 if link == nullrev:
1878 1889 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1879 1890 % self.indexfile)
1880 1891
1881 1892 if flags:
1882 1893 node = node or self.hash(text, p1, p2)
1883 1894
1884 1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1885 1896
1886 1897 # If the flag processor modifies the revision data, ignore any provided
1887 1898 # cachedelta.
1888 1899 if rawtext != text:
1889 1900 cachedelta = None
1890 1901
1891 1902 if len(rawtext) > _maxentrysize:
1892 1903 raise error.RevlogError(
1893 1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1894 1905 % (self.indexfile, len(rawtext)))
1895 1906
1896 1907 node = node or self.hash(rawtext, p1, p2)
1897 1908 if node in self.nodemap:
1898 1909 return node
1899 1910
1900 1911 if validatehash:
1901 1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1902 1913
1903 1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1904 1915 flags, cachedelta=cachedelta,
1905 1916 deltacomputer=deltacomputer)
1906 1917
1907 1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1908 1919 cachedelta=None, deltacomputer=None):
1909 1920 """add a raw revision with known flags, node and parents
1910 1921 useful when reusing a revision not stored in this revlog (ex: received
1911 1922 over wire, or read from an external bundle).
1912 1923 """
1913 1924 dfh = None
1914 1925 if not self._inline:
1915 1926 dfh = self._datafp("a+")
1916 1927 ifh = self._indexfp("a+")
1917 1928 try:
1918 1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1919 1930 flags, cachedelta, ifh, dfh,
1920 1931 deltacomputer=deltacomputer)
1921 1932 finally:
1922 1933 if dfh:
1923 1934 dfh.close()
1924 1935 ifh.close()
1925 1936
1926 1937 def compress(self, data):
1927 1938 """Generate a possibly-compressed representation of data."""
1928 1939 if not data:
1929 1940 return '', data
1930 1941
1931 1942 compressed = self._compressor.compress(data)
1932 1943
1933 1944 if compressed:
1934 1945 # The revlog compressor added the header in the returned data.
1935 1946 return '', compressed
1936 1947
1937 1948 if data[0:1] == '\0':
1938 1949 return '', data
1939 1950 return 'u', data
1940 1951
1941 1952 def decompress(self, data):
1942 1953 """Decompress a revlog chunk.
1943 1954
1944 1955 The chunk is expected to begin with a header identifying the
1945 1956 format type so it can be routed to an appropriate decompressor.
1946 1957 """
1947 1958 if not data:
1948 1959 return data
1949 1960
1950 1961 # Revlogs are read much more frequently than they are written and many
1951 1962 # chunks only take microseconds to decompress, so performance is
1952 1963 # important here.
1953 1964 #
1954 1965 # We can make a few assumptions about revlogs:
1955 1966 #
1956 1967 # 1) the majority of chunks will be compressed (as opposed to inline
1957 1968 # raw data).
1958 1969 # 2) decompressing *any* data will likely by at least 10x slower than
1959 1970 # returning raw inline data.
1960 1971 # 3) we want to prioritize common and officially supported compression
1961 1972 # engines
1962 1973 #
1963 1974 # It follows that we want to optimize for "decompress compressed data
1964 1975 # when encoded with common and officially supported compression engines"
1965 1976 # case over "raw data" and "data encoded by less common or non-official
1966 1977 # compression engines." That is why we have the inline lookup first
1967 1978 # followed by the compengines lookup.
1968 1979 #
1969 1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1970 1981 # compressed chunks. And this matters for changelog and manifest reads.
1971 1982 t = data[0:1]
1972 1983
1973 1984 if t == 'x':
1974 1985 try:
1975 1986 return _zlibdecompress(data)
1976 1987 except zlib.error as e:
1977 1988 raise error.RevlogError(_('revlog decompress error: %s') %
1978 1989 stringutil.forcebytestr(e))
1979 1990 # '\0' is more common than 'u' so it goes first.
1980 1991 elif t == '\0':
1981 1992 return data
1982 1993 elif t == 'u':
1983 1994 return util.buffer(data, 1)
1984 1995
1985 1996 try:
1986 1997 compressor = self._decompressors[t]
1987 1998 except KeyError:
1988 1999 try:
1989 2000 engine = util.compengines.forrevlogheader(t)
1990 2001 compressor = engine.revlogcompressor(self._compengineopts)
1991 2002 self._decompressors[t] = compressor
1992 2003 except KeyError:
1993 2004 raise error.RevlogError(_('unknown compression type %r') % t)
1994 2005
1995 2006 return compressor.decompress(data)
1996 2007
1997 2008 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1998 2009 cachedelta, ifh, dfh, alwayscache=False,
1999 2010 deltacomputer=None):
2000 2011 """internal function to add revisions to the log
2001 2012
2002 2013 see addrevision for argument descriptions.
2003 2014
2004 2015 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2005 2016
2006 2017 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2007 2018 be used.
2008 2019
2009 2020 invariants:
2010 2021 - rawtext is optional (can be None); if not set, cachedelta must be set.
2011 2022 if both are set, they must correspond to each other.
2012 2023 """
2013 2024 if node == nullid:
2014 2025 raise error.RevlogError(_("%s: attempt to add null revision") %
2015 2026 self.indexfile)
2016 2027 if node == wdirid or node in wdirfilenodeids:
2017 2028 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2018 2029 self.indexfile)
2019 2030
2020 2031 if self._inline:
2021 2032 fh = ifh
2022 2033 else:
2023 2034 fh = dfh
2024 2035
2025 2036 btext = [rawtext]
2026 2037
2027 2038 curr = len(self)
2028 2039 prev = curr - 1
2029 2040 offset = self.end(prev)
2030 2041 p1r, p2r = self.rev(p1), self.rev(p2)
2031 2042
2032 2043 # full versions are inserted when the needed deltas
2033 2044 # become comparable to the uncompressed text
2034 2045 if rawtext is None:
2035 2046 # need rawtext size, before changed by flag processors, which is
2036 2047 # the non-raw size. use revlog explicitly to avoid filelog's extra
2037 2048 # logic that might remove metadata size.
2038 2049 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2039 2050 cachedelta[1])
2040 2051 else:
2041 2052 textlen = len(rawtext)
2042 2053
2043 2054 if deltacomputer is None:
2044 2055 deltacomputer = deltautil.deltacomputer(self)
2045 2056
2046 2057 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2047 2058
2048 2059 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2049 2060
2050 2061 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2051 2062 deltainfo.base, link, p1r, p2r, node)
2052 2063 self.index.append(e)
2053 2064 self.nodemap[node] = curr
2054 2065
2055 2066 # Reset the pure node cache start lookup offset to account for new
2056 2067 # revision.
2057 2068 if self._nodepos is not None:
2058 2069 self._nodepos = curr
2059 2070
2060 2071 entry = self._io.packentry(e, self.node, self.version, curr)
2061 2072 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2062 2073 link, offset)
2063 2074
2064 2075 rawtext = btext[0]
2065 2076
2066 2077 if alwayscache and rawtext is None:
2067 2078 rawtext = deltacomputer.buildtext(revinfo, fh)
2068 2079
2069 2080 if type(rawtext) == bytes: # only accept immutable objects
2070 2081 self._revisioncache = (node, curr, rawtext)
2071 2082 self._chainbasecache[curr] = deltainfo.chainbase
2072 2083 return node
2073 2084
2074 2085 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2075 2086 # Files opened in a+ mode have inconsistent behavior on various
2076 2087 # platforms. Windows requires that a file positioning call be made
2077 2088 # when the file handle transitions between reads and writes. See
2078 2089 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2079 2090 # platforms, Python or the platform itself can be buggy. Some versions
2080 2091 # of Solaris have been observed to not append at the end of the file
2081 2092 # if the file was seeked to before the end. See issue4943 for more.
2082 2093 #
2083 2094 # We work around this issue by inserting a seek() before writing.
2084 2095 # Note: This is likely not necessary on Python 3. However, because
2085 2096 # the file handle is reused for reads and may be seeked there, we need
2086 2097 # to be careful before changing this.
2087 2098 ifh.seek(0, os.SEEK_END)
2088 2099 if dfh:
2089 2100 dfh.seek(0, os.SEEK_END)
2090 2101
2091 2102 curr = len(self) - 1
2092 2103 if not self._inline:
2093 2104 transaction.add(self.datafile, offset)
2094 2105 transaction.add(self.indexfile, curr * len(entry))
2095 2106 if data[0]:
2096 2107 dfh.write(data[0])
2097 2108 dfh.write(data[1])
2098 2109 ifh.write(entry)
2099 2110 else:
2100 2111 offset += curr * self._io.size
2101 2112 transaction.add(self.indexfile, offset, curr)
2102 2113 ifh.write(entry)
2103 2114 ifh.write(data[0])
2104 2115 ifh.write(data[1])
2105 2116 self._enforceinlinesize(transaction, ifh)
2106 2117
2107 2118 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2108 2119 """
2109 2120 add a delta group
2110 2121
2111 2122 given a set of deltas, add them to the revision log. the
2112 2123 first delta is against its parent, which should be in our
2113 2124 log, the rest are against the previous delta.
2114 2125
2115 2126 If ``addrevisioncb`` is defined, it will be called with arguments of
2116 2127 this revlog and the node that was added.
2117 2128 """
2118 2129
2119 2130 if self._writinghandles:
2120 2131 raise error.ProgrammingError('cannot nest addgroup() calls')
2121 2132
2122 2133 nodes = []
2123 2134
2124 2135 r = len(self)
2125 2136 end = 0
2126 2137 if r:
2127 2138 end = self.end(r - 1)
2128 2139 ifh = self._indexfp("a+")
2129 2140 isize = r * self._io.size
2130 2141 if self._inline:
2131 2142 transaction.add(self.indexfile, end + isize, r)
2132 2143 dfh = None
2133 2144 else:
2134 2145 transaction.add(self.indexfile, isize, r)
2135 2146 transaction.add(self.datafile, end)
2136 2147 dfh = self._datafp("a+")
2137 2148 def flush():
2138 2149 if dfh:
2139 2150 dfh.flush()
2140 2151 ifh.flush()
2141 2152
2142 2153 self._writinghandles = (ifh, dfh)
2143 2154
2144 2155 try:
2145 2156 deltacomputer = deltautil.deltacomputer(self)
2146 2157 # loop through our set of deltas
2147 2158 for data in deltas:
2148 2159 node, p1, p2, linknode, deltabase, delta, flags = data
2149 2160 link = linkmapper(linknode)
2150 2161 flags = flags or REVIDX_DEFAULT_FLAGS
2151 2162
2152 2163 nodes.append(node)
2153 2164
2154 2165 if node in self.nodemap:
2155 2166 self._nodeduplicatecallback(transaction, node)
2156 2167 # this can happen if two branches make the same change
2157 2168 continue
2158 2169
2159 2170 for p in (p1, p2):
2160 2171 if p not in self.nodemap:
2161 2172 raise error.LookupError(p, self.indexfile,
2162 2173 _('unknown parent'))
2163 2174
2164 2175 if deltabase not in self.nodemap:
2165 2176 raise error.LookupError(deltabase, self.indexfile,
2166 2177 _('unknown delta base'))
2167 2178
2168 2179 baserev = self.rev(deltabase)
2169 2180
2170 2181 if baserev != nullrev and self.iscensored(baserev):
2171 2182 # if base is censored, delta must be full replacement in a
2172 2183 # single patch operation
2173 2184 hlen = struct.calcsize(">lll")
2174 2185 oldlen = self.rawsize(baserev)
2175 2186 newlen = len(delta) - hlen
2176 2187 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2177 2188 raise error.CensoredBaseError(self.indexfile,
2178 2189 self.node(baserev))
2179 2190
2180 2191 if not flags and self._peek_iscensored(baserev, delta, flush):
2181 2192 flags |= REVIDX_ISCENSORED
2182 2193
2183 2194 # We assume consumers of addrevisioncb will want to retrieve
2184 2195 # the added revision, which will require a call to
2185 2196 # revision(). revision() will fast path if there is a cache
2186 2197 # hit. So, we tell _addrevision() to always cache in this case.
2187 2198 # We're only using addgroup() in the context of changegroup
2188 2199 # generation so the revision data can always be handled as raw
2189 2200 # by the flagprocessor.
2190 2201 self._addrevision(node, None, transaction, link,
2191 2202 p1, p2, flags, (baserev, delta),
2192 2203 ifh, dfh,
2193 2204 alwayscache=bool(addrevisioncb),
2194 2205 deltacomputer=deltacomputer)
2195 2206
2196 2207 if addrevisioncb:
2197 2208 addrevisioncb(self, node)
2198 2209
2199 2210 if not dfh and not self._inline:
2200 2211 # addrevision switched from inline to conventional
2201 2212 # reopen the index
2202 2213 ifh.close()
2203 2214 dfh = self._datafp("a+")
2204 2215 ifh = self._indexfp("a+")
2205 2216 self._writinghandles = (ifh, dfh)
2206 2217 finally:
2207 2218 self._writinghandles = None
2208 2219
2209 2220 if dfh:
2210 2221 dfh.close()
2211 2222 ifh.close()
2212 2223
2213 2224 return nodes
2214 2225
2215 2226 def iscensored(self, rev):
2216 2227 """Check if a file revision is censored."""
2217 2228 if not self._censorable:
2218 2229 return False
2219 2230
2220 2231 return self.flags(rev) & REVIDX_ISCENSORED
2221 2232
2222 2233 def _peek_iscensored(self, baserev, delta, flush):
2223 2234 """Quickly check if a delta produces a censored revision."""
2224 2235 if not self._censorable:
2225 2236 return False
2226 2237
2227 2238 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2228 2239
2229 2240 def getstrippoint(self, minlink):
2230 2241 """find the minimum rev that must be stripped to strip the linkrev
2231 2242
2232 2243 Returns a tuple containing the minimum rev and a set of all revs that
2233 2244 have linkrevs that will be broken by this strip.
2234 2245 """
2235 2246 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2236 2247 self.headrevs(),
2237 2248 self.linkrev, self.parentrevs)
2238 2249
2239 2250 def strip(self, minlink, transaction):
2240 2251 """truncate the revlog on the first revision with a linkrev >= minlink
2241 2252
2242 2253 This function is called when we're stripping revision minlink and
2243 2254 its descendants from the repository.
2244 2255
2245 2256 We have to remove all revisions with linkrev >= minlink, because
2246 2257 the equivalent changelog revisions will be renumbered after the
2247 2258 strip.
2248 2259
2249 2260 So we truncate the revlog on the first of these revisions, and
2250 2261 trust that the caller has saved the revisions that shouldn't be
2251 2262 removed and that it'll re-add them after this truncation.
2252 2263 """
2253 2264 if len(self) == 0:
2254 2265 return
2255 2266
2256 2267 rev, _ = self.getstrippoint(minlink)
2257 2268 if rev == len(self):
2258 2269 return
2259 2270
2260 2271 # first truncate the files on disk
2261 2272 end = self.start(rev)
2262 2273 if not self._inline:
2263 2274 transaction.add(self.datafile, end)
2264 2275 end = rev * self._io.size
2265 2276 else:
2266 2277 end += rev * self._io.size
2267 2278
2268 2279 transaction.add(self.indexfile, end)
2269 2280
2270 2281 # then reset internal state in memory to forget those revisions
2271 2282 self._revisioncache = None
2272 2283 self._chaininfocache = {}
2273 2284 self._chunkclear()
2274 2285 for x in pycompat.xrange(rev, len(self)):
2275 2286 del self.nodemap[self.node(x)]
2276 2287
2277 2288 del self.index[rev:-1]
2278 2289 self._nodepos = None
2279 2290
2280 2291 def checksize(self):
2281 2292 """Check size of index and data files
2282 2293
2283 2294 return a (dd, di) tuple.
2284 2295 - dd: extra bytes for the "data" file
2285 2296 - di: extra bytes for the "index" file
2286 2297
2287 2298 A healthy revlog will return (0, 0).
2288 2299 """
2289 2300 expected = 0
2290 2301 if len(self):
2291 2302 expected = max(0, self.end(len(self) - 1))
2292 2303
2293 2304 try:
2294 2305 with self._datafp() as f:
2295 2306 f.seek(0, 2)
2296 2307 actual = f.tell()
2297 2308 dd = actual - expected
2298 2309 except IOError as inst:
2299 2310 if inst.errno != errno.ENOENT:
2300 2311 raise
2301 2312 dd = 0
2302 2313
2303 2314 try:
2304 2315 f = self.opener(self.indexfile)
2305 2316 f.seek(0, 2)
2306 2317 actual = f.tell()
2307 2318 f.close()
2308 2319 s = self._io.size
2309 2320 i = max(0, actual // s)
2310 2321 di = actual - (i * s)
2311 2322 if self._inline:
2312 2323 databytes = 0
2313 2324 for r in self:
2314 2325 databytes += max(0, self.length(r))
2315 2326 dd = 0
2316 2327 di = actual - len(self) * s - databytes
2317 2328 except IOError as inst:
2318 2329 if inst.errno != errno.ENOENT:
2319 2330 raise
2320 2331 di = 0
2321 2332
2322 2333 return (dd, di)
2323 2334
2324 2335 def files(self):
2325 2336 res = [self.indexfile]
2326 2337 if not self._inline:
2327 2338 res.append(self.datafile)
2328 2339 return res
2329 2340
2330 2341 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2331 2342 assumehaveparentrevisions=False,
2332 2343 deltamode=repository.CG_DELTAMODE_STD):
2333 2344 if nodesorder not in ('nodes', 'storage', 'linear', None):
2334 2345 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2335 2346 nodesorder)
2336 2347
2337 2348 if nodesorder is None and not self._generaldelta:
2338 2349 nodesorder = 'storage'
2339 2350
2340 2351 if (not self._storedeltachains and
2341 2352 deltamode != repository.CG_DELTAMODE_PREV):
2342 2353 deltamode = repository.CG_DELTAMODE_FULL
2343 2354
2344 2355 return storageutil.emitrevisions(
2345 2356 self, nodes, nodesorder, revlogrevisiondelta,
2346 2357 deltaparentfn=self.deltaparent,
2347 2358 candeltafn=self.candelta,
2348 2359 rawsizefn=self.rawsize,
2349 2360 revdifffn=self.revdiff,
2350 2361 flagsfn=self.flags,
2351 2362 deltamode=deltamode,
2352 2363 revisiondata=revisiondata,
2353 2364 assumehaveparentrevisions=assumehaveparentrevisions)
2354 2365
2355 2366 DELTAREUSEALWAYS = 'always'
2356 2367 DELTAREUSESAMEREVS = 'samerevs'
2357 2368 DELTAREUSENEVER = 'never'
2358 2369
2359 2370 DELTAREUSEFULLADD = 'fulladd'
2360 2371
2361 2372 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2362 2373
2363 2374 def clone(self, tr, destrevlog, addrevisioncb=None,
2364 2375 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2365 2376 """Copy this revlog to another, possibly with format changes.
2366 2377
2367 2378 The destination revlog will contain the same revisions and nodes.
2368 2379 However, it may not be bit-for-bit identical due to e.g. delta encoding
2369 2380 differences.
2370 2381
2371 2382 The ``deltareuse`` argument control how deltas from the existing revlog
2372 2383 are preserved in the destination revlog. The argument can have the
2373 2384 following values:
2374 2385
2375 2386 DELTAREUSEALWAYS
2376 2387 Deltas will always be reused (if possible), even if the destination
2377 2388 revlog would not select the same revisions for the delta. This is the
2378 2389 fastest mode of operation.
2379 2390 DELTAREUSESAMEREVS
2380 2391 Deltas will be reused if the destination revlog would pick the same
2381 2392 revisions for the delta. This mode strikes a balance between speed
2382 2393 and optimization.
2383 2394 DELTAREUSENEVER
2384 2395 Deltas will never be reused. This is the slowest mode of execution.
2385 2396 This mode can be used to recompute deltas (e.g. if the diff/delta
2386 2397 algorithm changes).
2387 2398
2388 2399 Delta computation can be slow, so the choice of delta reuse policy can
2389 2400 significantly affect run time.
2390 2401
2391 2402 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2392 2403 two extremes. Deltas will be reused if they are appropriate. But if the
2393 2404 delta could choose a better revision, it will do so. This means if you
2394 2405 are converting a non-generaldelta revlog to a generaldelta revlog,
2395 2406 deltas will be recomputed if the delta's parent isn't a parent of the
2396 2407 revision.
2397 2408
2398 2409 In addition to the delta policy, the ``forcedeltabothparents``
2399 2410 argument controls whether to force compute deltas against both parents
2400 2411 for merges. By default, the current default is used.
2401 2412 """
2402 2413 if deltareuse not in self.DELTAREUSEALL:
2403 2414 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2404 2415
2405 2416 if len(destrevlog):
2406 2417 raise ValueError(_('destination revlog is not empty'))
2407 2418
2408 2419 if getattr(self, 'filteredrevs', None):
2409 2420 raise ValueError(_('source revlog has filtered revisions'))
2410 2421 if getattr(destrevlog, 'filteredrevs', None):
2411 2422 raise ValueError(_('destination revlog has filtered revisions'))
2412 2423
2413 2424 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2414 2425 # if possible.
2415 2426 oldlazydelta = destrevlog._lazydelta
2416 2427 oldlazydeltabase = destrevlog._lazydeltabase
2417 2428 oldamd = destrevlog._deltabothparents
2418 2429
2419 2430 try:
2420 2431 if deltareuse == self.DELTAREUSEALWAYS:
2421 2432 destrevlog._lazydeltabase = True
2422 2433 destrevlog._lazydelta = True
2423 2434 elif deltareuse == self.DELTAREUSESAMEREVS:
2424 2435 destrevlog._lazydeltabase = False
2425 2436 destrevlog._lazydelta = True
2426 2437 elif deltareuse == self.DELTAREUSENEVER:
2427 2438 destrevlog._lazydeltabase = False
2428 2439 destrevlog._lazydelta = False
2429 2440
2430 2441 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2431 2442
2432 2443 deltacomputer = deltautil.deltacomputer(destrevlog)
2433 2444 index = self.index
2434 2445 for rev in self:
2435 2446 entry = index[rev]
2436 2447
2437 2448 # Some classes override linkrev to take filtered revs into
2438 2449 # account. Use raw entry from index.
2439 2450 flags = entry[0] & 0xffff
2440 2451 linkrev = entry[4]
2441 2452 p1 = index[entry[5]][7]
2442 2453 p2 = index[entry[6]][7]
2443 2454 node = entry[7]
2444 2455
2445 2456 # (Possibly) reuse the delta from the revlog if allowed and
2446 2457 # the revlog chunk is a delta.
2447 2458 cachedelta = None
2448 2459 rawtext = None
2449 2460 if destrevlog._lazydelta:
2450 2461 dp = self.deltaparent(rev)
2451 2462 if dp != nullrev:
2452 2463 cachedelta = (dp, bytes(self._chunk(rev)))
2453 2464
2454 2465 if not cachedelta:
2455 2466 rawtext = self.revision(rev, raw=True)
2456 2467
2457 2468
2458 2469 if deltareuse == self.DELTAREUSEFULLADD:
2459 2470 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2460 2471 cachedelta=cachedelta,
2461 2472 node=node, flags=flags,
2462 2473 deltacomputer=deltacomputer)
2463 2474 else:
2464 2475 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2465 2476 checkambig=False)
2466 2477 dfh = None
2467 2478 if not destrevlog._inline:
2468 2479 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2469 2480 try:
2470 2481 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2471 2482 p2, flags, cachedelta, ifh, dfh,
2472 2483 deltacomputer=deltacomputer)
2473 2484 finally:
2474 2485 if dfh:
2475 2486 dfh.close()
2476 2487 ifh.close()
2477 2488
2478 2489 if addrevisioncb:
2479 2490 addrevisioncb(self, rev, node)
2480 2491 finally:
2481 2492 destrevlog._lazydelta = oldlazydelta
2482 2493 destrevlog._lazydeltabase = oldlazydeltabase
2483 2494 destrevlog._deltabothparents = oldamd
2484 2495
2485 2496 def censorrevision(self, tr, censornode, tombstone=b''):
2486 2497 if (self.version & 0xFFFF) == REVLOGV0:
2487 2498 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2488 2499 self.version)
2489 2500
2490 2501 censorrev = self.rev(censornode)
2491 2502 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2492 2503
2493 2504 if len(tombstone) > self.rawsize(censorrev):
2494 2505 raise error.Abort(_('censor tombstone must be no longer than '
2495 2506 'censored data'))
2496 2507
2497 2508 # Rewriting the revlog in place is hard. Our strategy for censoring is
2498 2509 # to create a new revlog, copy all revisions to it, then replace the
2499 2510 # revlogs on transaction close.
2500 2511
2501 2512 newindexfile = self.indexfile + b'.tmpcensored'
2502 2513 newdatafile = self.datafile + b'.tmpcensored'
2503 2514
2504 2515 # This is a bit dangerous. We could easily have a mismatch of state.
2505 2516 newrl = revlog(self.opener, newindexfile, newdatafile,
2506 2517 censorable=True)
2507 2518 newrl.version = self.version
2508 2519 newrl._generaldelta = self._generaldelta
2509 2520 newrl._io = self._io
2510 2521
2511 2522 for rev in self.revs():
2512 2523 node = self.node(rev)
2513 2524 p1, p2 = self.parents(node)
2514 2525
2515 2526 if rev == censorrev:
2516 2527 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2517 2528 p1, p2, censornode, REVIDX_ISCENSORED)
2518 2529
2519 2530 if newrl.deltaparent(rev) != nullrev:
2520 2531 raise error.Abort(_('censored revision stored as delta; '
2521 2532 'cannot censor'),
2522 2533 hint=_('censoring of revlogs is not '
2523 2534 'fully implemented; please report '
2524 2535 'this bug'))
2525 2536 continue
2526 2537
2527 2538 if self.iscensored(rev):
2528 2539 if self.deltaparent(rev) != nullrev:
2529 2540 raise error.Abort(_('cannot censor due to censored '
2530 2541 'revision having delta stored'))
2531 2542 rawtext = self._chunk(rev)
2532 2543 else:
2533 2544 rawtext = self.revision(rev, raw=True)
2534 2545
2535 2546 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2536 2547 self.flags(rev))
2537 2548
2538 2549 tr.addbackup(self.indexfile, location='store')
2539 2550 if not self._inline:
2540 2551 tr.addbackup(self.datafile, location='store')
2541 2552
2542 2553 self.opener.rename(newrl.indexfile, self.indexfile)
2543 2554 if not self._inline:
2544 2555 self.opener.rename(newrl.datafile, self.datafile)
2545 2556
2546 2557 self.clearcaches()
2547 2558 self._loadindex()
2548 2559
2549 2560 def verifyintegrity(self, state):
2550 2561 """Verifies the integrity of the revlog.
2551 2562
2552 2563 Yields ``revlogproblem`` instances describing problems that are
2553 2564 found.
2554 2565 """
2555 2566 dd, di = self.checksize()
2556 2567 if dd:
2557 2568 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2558 2569 if di:
2559 2570 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2560 2571
2561 2572 version = self.version & 0xFFFF
2562 2573
2563 2574 # The verifier tells us what version revlog we should be.
2564 2575 if version != state['expectedversion']:
2565 2576 yield revlogproblem(
2566 2577 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2567 2578 (self.indexfile, version, state['expectedversion']))
2568 2579
2569 2580 state['skipread'] = set()
2570 2581
2571 2582 for rev in self:
2572 2583 node = self.node(rev)
2573 2584
2574 2585 # Verify contents. 4 cases to care about:
2575 2586 #
2576 2587 # common: the most common case
2577 2588 # rename: with a rename
2578 2589 # meta: file content starts with b'\1\n', the metadata
2579 2590 # header defined in filelog.py, but without a rename
2580 2591 # ext: content stored externally
2581 2592 #
2582 2593 # More formally, their differences are shown below:
2583 2594 #
2584 2595 # | common | rename | meta | ext
2585 2596 # -------------------------------------------------------
2586 2597 # flags() | 0 | 0 | 0 | not 0
2587 2598 # renamed() | False | True | False | ?
2588 2599 # rawtext[0:2]=='\1\n'| False | True | True | ?
2589 2600 #
2590 2601 # "rawtext" means the raw text stored in revlog data, which
2591 2602 # could be retrieved by "revision(rev, raw=True)". "text"
2592 2603 # mentioned below is "revision(rev, raw=False)".
2593 2604 #
2594 2605 # There are 3 different lengths stored physically:
2595 2606 # 1. L1: rawsize, stored in revlog index
2596 2607 # 2. L2: len(rawtext), stored in revlog data
2597 2608 # 3. L3: len(text), stored in revlog data if flags==0, or
2598 2609 # possibly somewhere else if flags!=0
2599 2610 #
2600 2611 # L1 should be equal to L2. L3 could be different from them.
2601 2612 # "text" may or may not affect commit hash depending on flag
2602 2613 # processors (see revlog.addflagprocessor).
2603 2614 #
2604 2615 # | common | rename | meta | ext
2605 2616 # -------------------------------------------------
2606 2617 # rawsize() | L1 | L1 | L1 | L1
2607 2618 # size() | L1 | L2-LM | L1(*) | L1 (?)
2608 2619 # len(rawtext) | L2 | L2 | L2 | L2
2609 2620 # len(text) | L2 | L2 | L2 | L3
2610 2621 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2611 2622 #
2612 2623 # LM: length of metadata, depending on rawtext
2613 2624 # (*): not ideal, see comment in filelog.size
2614 2625 # (?): could be "- len(meta)" if the resolved content has
2615 2626 # rename metadata
2616 2627 #
2617 2628 # Checks needed to be done:
2618 2629 # 1. length check: L1 == L2, in all cases.
2619 2630 # 2. hash check: depending on flag processor, we may need to
2620 2631 # use either "text" (external), or "rawtext" (in revlog).
2621 2632
2622 2633 try:
2623 2634 skipflags = state.get('skipflags', 0)
2624 2635 if skipflags:
2625 2636 skipflags &= self.flags(rev)
2626 2637
2627 2638 if skipflags:
2628 2639 state['skipread'].add(node)
2629 2640 else:
2630 2641 # Side-effect: read content and verify hash.
2631 2642 self.revision(node)
2632 2643
2633 2644 l1 = self.rawsize(rev)
2634 2645 l2 = len(self.revision(node, raw=True))
2635 2646
2636 2647 if l1 != l2:
2637 2648 yield revlogproblem(
2638 2649 error=_('unpacked size is %d, %d expected') % (l2, l1),
2639 2650 node=node)
2640 2651
2641 2652 except error.CensoredNodeError:
2642 2653 if state['erroroncensored']:
2643 2654 yield revlogproblem(error=_('censored file data'),
2644 2655 node=node)
2645 2656 state['skipread'].add(node)
2646 2657 except Exception as e:
2647 2658 yield revlogproblem(
2648 2659 error=_('unpacking %s: %s') % (short(node),
2649 2660 stringutil.forcebytestr(e)),
2650 2661 node=node)
2651 2662 state['skipread'].add(node)
2652 2663
2653 2664 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2654 2665 revisionscount=False, trackedsize=False,
2655 2666 storedsize=False):
2656 2667 d = {}
2657 2668
2658 2669 if exclusivefiles:
2659 2670 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2660 2671 if not self._inline:
2661 2672 d['exclusivefiles'].append((self.opener, self.datafile))
2662 2673
2663 2674 if sharedfiles:
2664 2675 d['sharedfiles'] = []
2665 2676
2666 2677 if revisionscount:
2667 2678 d['revisionscount'] = len(self)
2668 2679
2669 2680 if trackedsize:
2670 2681 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2671 2682
2672 2683 if storedsize:
2673 2684 d['storedsize'] = sum(self.opener.stat(path).st_size
2674 2685 for path in self.files())
2675 2686
2676 2687 return d
General Comments 0
You need to be logged in to leave comments. Login now