##// END OF EJS Templates
revlog: move the `trypending` logic from the `changelog` to the `revlog`...
marmoute -
r48014:4f38ada3 default
parent child Browse files
Show More
@@ -1,630 +1,625
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 28 from .revlogutils import (
29 29 constants as revlog_constants,
30 30 flagutil,
31 31 )
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 87 return b"\0".join(items)
88 88
89 89
90 90 def stripdesc(desc):
91 91 """strip trailing whitespace and leading and trailing empty lines"""
92 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93 93
94 94
95 95 class appender(object):
96 96 """the changelog index must be updated last on disk, so we use this class
97 97 to delay writes to it"""
98 98
99 99 def __init__(self, vfs, name, mode, buf):
100 100 self.data = buf
101 101 fp = vfs(name, mode)
102 102 self.fp = fp
103 103 self.offset = fp.tell()
104 104 self.size = vfs.fstat(fp).st_size
105 105 self._end = self.size
106 106
107 107 def end(self):
108 108 return self._end
109 109
110 110 def tell(self):
111 111 return self.offset
112 112
113 113 def flush(self):
114 114 pass
115 115
116 116 @property
117 117 def closed(self):
118 118 return self.fp.closed
119 119
120 120 def close(self):
121 121 self.fp.close()
122 122
123 123 def seek(self, offset, whence=0):
124 124 '''virtual file offset spans real file and data'''
125 125 if whence == 0:
126 126 self.offset = offset
127 127 elif whence == 1:
128 128 self.offset += offset
129 129 elif whence == 2:
130 130 self.offset = self.end() + offset
131 131 if self.offset < self.size:
132 132 self.fp.seek(self.offset)
133 133
134 134 def read(self, count=-1):
135 135 '''only trick here is reads that span real file and data'''
136 136 ret = b""
137 137 if self.offset < self.size:
138 138 s = self.fp.read(count)
139 139 ret = s
140 140 self.offset += len(s)
141 141 if count > 0:
142 142 count -= len(s)
143 143 if count != 0:
144 144 doff = self.offset - self.size
145 145 self.data.insert(0, b"".join(self.data))
146 146 del self.data[1:]
147 147 s = self.data[0][doff : doff + count]
148 148 self.offset += len(s)
149 149 ret += s
150 150 return ret
151 151
152 152 def write(self, s):
153 153 self.data.append(bytes(s))
154 154 self.offset += len(s)
155 155 self._end += len(s)
156 156
157 157 def __enter__(self):
158 158 self.fp.__enter__()
159 159 return self
160 160
161 161 def __exit__(self, *args):
162 162 return self.fp.__exit__(*args)
163 163
164 164
165 165 class _divertopener(object):
166 166 def __init__(self, opener, target):
167 167 self._opener = opener
168 168 self._target = target
169 169
170 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 171 if name != self._target:
172 172 return self._opener(name, mode, **kwargs)
173 173 return self._opener(name + b".a", mode, **kwargs)
174 174
175 175 def __getattr__(self, attr):
176 176 return getattr(self._opener, attr)
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 183 if name != target:
184 184 return opener(name, mode, **kwargs)
185 185 assert not kwargs
186 186 return appender(opener, name, mode, buf)
187 187
188 188 return _delay
189 189
190 190
191 191 @attr.s
192 192 class _changelogrevision(object):
193 193 # Extensions might modify _defaultextra, so let the constructor below pass
194 194 # it in
195 195 extra = attr.ib()
196 196 manifest = attr.ib()
197 197 user = attr.ib(default=b'')
198 198 date = attr.ib(default=(0, 0))
199 199 files = attr.ib(default=attr.Factory(list))
200 200 filesadded = attr.ib(default=None)
201 201 filesremoved = attr.ib(default=None)
202 202 p1copies = attr.ib(default=None)
203 203 p2copies = attr.ib(default=None)
204 204 description = attr.ib(default=b'')
205 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206 206
207 207
208 208 class changelogrevision(object):
209 209 """Holds results of a parsed changelog revision.
210 210
211 211 Changelog revisions consist of multiple pieces of data, including
212 212 the manifest node, user, and date. This object exposes a view into
213 213 the parsed object.
214 214 """
215 215
216 216 __slots__ = (
217 217 '_offsets',
218 218 '_text',
219 219 '_sidedata',
220 220 '_cpsd',
221 221 '_changes',
222 222 )
223 223
224 224 def __new__(cls, cl, text, sidedata, cpsd):
225 225 if not text:
226 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227 227
228 228 self = super(changelogrevision, cls).__new__(cls)
229 229 # We could return here and implement the following as an __init__.
230 230 # But doing it here is equivalent and saves an extra function call.
231 231
232 232 # format used:
233 233 # nodeid\n : manifest node in ascii
234 234 # user\n : user, no \n or \r allowed
235 235 # time tz extra\n : date (time is int or float, timezone is int)
236 236 # : extra is metadata, encoded and separated by '\0'
237 237 # : older versions ignore it
238 238 # files\n\n : files modified by the cset, no \n or \r allowed
239 239 # (.*) : comment (free text, ideally utf-8)
240 240 #
241 241 # changelog v0 doesn't use extra
242 242
243 243 nl1 = text.index(b'\n')
244 244 nl2 = text.index(b'\n', nl1 + 1)
245 245 nl3 = text.index(b'\n', nl2 + 1)
246 246
247 247 # The list of files may be empty. Which means nl3 is the first of the
248 248 # double newline that precedes the description.
249 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 250 doublenl = nl3
251 251 else:
252 252 doublenl = text.index(b'\n\n', nl3 + 1)
253 253
254 254 self._offsets = (nl1, nl2, nl3, doublenl)
255 255 self._text = text
256 256 self._sidedata = sidedata
257 257 self._cpsd = cpsd
258 258 self._changes = None
259 259
260 260 return self
261 261
262 262 @property
263 263 def manifest(self):
264 264 return bin(self._text[0 : self._offsets[0]])
265 265
266 266 @property
267 267 def user(self):
268 268 off = self._offsets
269 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270 270
271 271 @property
272 272 def _rawdate(self):
273 273 off = self._offsets
274 274 dateextra = self._text[off[1] + 1 : off[2]]
275 275 return dateextra.split(b' ', 2)[0:2]
276 276
277 277 @property
278 278 def _rawextra(self):
279 279 off = self._offsets
280 280 dateextra = self._text[off[1] + 1 : off[2]]
281 281 fields = dateextra.split(b' ', 2)
282 282 if len(fields) != 3:
283 283 return None
284 284
285 285 return fields[2]
286 286
287 287 @property
288 288 def date(self):
289 289 raw = self._rawdate
290 290 time = float(raw[0])
291 291 # Various tools did silly things with the timezone.
292 292 try:
293 293 timezone = int(raw[1])
294 294 except ValueError:
295 295 timezone = 0
296 296
297 297 return time, timezone
298 298
299 299 @property
300 300 def extra(self):
301 301 raw = self._rawextra
302 302 if raw is None:
303 303 return _defaultextra
304 304
305 305 return decodeextra(raw)
306 306
307 307 @property
308 308 def changes(self):
309 309 if self._changes is not None:
310 310 return self._changes
311 311 if self._cpsd:
312 312 changes = metadata.decode_files_sidedata(self._sidedata)
313 313 else:
314 314 changes = metadata.ChangingFiles(
315 315 touched=self.files or (),
316 316 added=self.filesadded or (),
317 317 removed=self.filesremoved or (),
318 318 p1_copies=self.p1copies or {},
319 319 p2_copies=self.p2copies or {},
320 320 )
321 321 self._changes = changes
322 322 return changes
323 323
324 324 @property
325 325 def files(self):
326 326 if self._cpsd:
327 327 return sorted(self.changes.touched)
328 328 off = self._offsets
329 329 if off[2] == off[3]:
330 330 return []
331 331
332 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333 333
334 334 @property
335 335 def filesadded(self):
336 336 if self._cpsd:
337 337 return self.changes.added
338 338 else:
339 339 rawindices = self.extra.get(b'filesadded')
340 340 if rawindices is None:
341 341 return None
342 342 return metadata.decodefileindices(self.files, rawindices)
343 343
344 344 @property
345 345 def filesremoved(self):
346 346 if self._cpsd:
347 347 return self.changes.removed
348 348 else:
349 349 rawindices = self.extra.get(b'filesremoved')
350 350 if rawindices is None:
351 351 return None
352 352 return metadata.decodefileindices(self.files, rawindices)
353 353
354 354 @property
355 355 def p1copies(self):
356 356 if self._cpsd:
357 357 return self.changes.copied_from_p1
358 358 else:
359 359 rawcopies = self.extra.get(b'p1copies')
360 360 if rawcopies is None:
361 361 return None
362 362 return metadata.decodecopies(self.files, rawcopies)
363 363
364 364 @property
365 365 def p2copies(self):
366 366 if self._cpsd:
367 367 return self.changes.copied_from_p2
368 368 else:
369 369 rawcopies = self.extra.get(b'p2copies')
370 370 if rawcopies is None:
371 371 return None
372 372 return metadata.decodecopies(self.files, rawcopies)
373 373
374 374 @property
375 375 def description(self):
376 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 377
378 378 @property
379 379 def branchinfo(self):
380 380 extra = self.extra
381 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382 382
383 383
384 384 class changelog(revlog.revlog):
385 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 386 """Load a changelog revlog using an opener.
387 387
388 388 If ``trypending`` is true, we attempt to load the index from a
389 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 390 The ``00changelog.i.a`` file contains index (and possibly inline
391 391 revision) data for a transaction that hasn't been finalized yet.
392 392 It exists in a separate file to facilitate readers (such as
393 393 hooks processes) accessing data before a transaction is finalized.
394 394
395 395 ``concurrencychecker`` will be passed to the revlog init function, see
396 396 the documentation there.
397 397 """
398 398
399 if trypending and opener.exists(b'00changelog.i.a'):
400 postfix = b'a'
401 else:
402 postfix = None
403
404 399 revlog.revlog.__init__(
405 400 self,
406 401 opener,
407 402 target=(revlog_constants.KIND_CHANGELOG, None),
408 403 radix=b'00changelog',
409 postfix=postfix,
410 404 checkambig=True,
411 405 mmaplargeindex=True,
412 406 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 407 concurrencychecker=concurrencychecker,
408 trypending=trypending,
414 409 )
415 410
416 411 if self._initempty and (self._format_version == revlog.REVLOGV1):
417 412 # changelogs don't benefit from generaldelta.
418 413
419 414 self._format_flags &= ~revlog.FLAG_GENERALDELTA
420 415 self._generaldelta = False
421 416
422 417 # Delta chains for changelogs tend to be very small because entries
423 418 # tend to be small and don't delta well with each. So disable delta
424 419 # chains.
425 420 self._storedeltachains = False
426 421
427 422 self._realopener = opener
428 423 self._delayed = False
429 424 self._delaybuf = None
430 425 self._divert = False
431 426 self._filteredrevs = frozenset()
432 427 self._filteredrevs_hashcache = {}
433 428 self._copiesstorage = opener.options.get(b'copies-storage')
434 429
435 430 @property
436 431 def filteredrevs(self):
437 432 return self._filteredrevs
438 433
439 434 @filteredrevs.setter
440 435 def filteredrevs(self, val):
441 436 # Ensure all updates go through this function
442 437 assert isinstance(val, frozenset)
443 438 self._filteredrevs = val
444 439 self._filteredrevs_hashcache = {}
445 440
446 441 def _write_docket(self, tr):
447 442 if not self._delayed:
448 443 super(changelog, self)._write_docket(tr)
449 444
450 445 def delayupdate(self, tr):
451 446 """delay visibility of index updates to other readers"""
452 447 if self._docket is None and not self._delayed:
453 448 if len(self) == 0:
454 449 self._divert = True
455 450 if self._realopener.exists(self._indexfile + b'.a'):
456 451 self._realopener.unlink(self._indexfile + b'.a')
457 452 self.opener = _divertopener(self._realopener, self._indexfile)
458 453 else:
459 454 self._delaybuf = []
460 455 self.opener = _delayopener(
461 456 self._realopener, self._indexfile, self._delaybuf
462 457 )
463 458 self._delayed = True
464 459 tr.addpending(b'cl-%i' % id(self), self._writepending)
465 460 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
466 461
467 462 def _finalize(self, tr):
468 463 """finalize index updates"""
469 464 self._delayed = False
470 465 self.opener = self._realopener
471 466 # move redirected index data back into place
472 467 if self._docket is not None:
473 468 self._write_docket(tr)
474 469 elif self._divert:
475 470 assert not self._delaybuf
476 471 tmpname = self._indexfile + b".a"
477 472 nfile = self.opener.open(tmpname)
478 473 nfile.close()
479 474 self.opener.rename(tmpname, self._indexfile, checkambig=True)
480 475 elif self._delaybuf:
481 476 fp = self.opener(self._indexfile, b'a', checkambig=True)
482 477 fp.write(b"".join(self._delaybuf))
483 478 fp.close()
484 479 self._delaybuf = None
485 480 self._divert = False
486 481 # split when we're done
487 482 self._enforceinlinesize(tr)
488 483
489 484 def _writepending(self, tr):
490 485 """create a file containing the unfinalized state for
491 486 pretxnchangegroup"""
492 487 if self._delaybuf:
493 488 # make a temporary copy of the index
494 489 fp1 = self._realopener(self._indexfile)
495 490 pendingfilename = self._indexfile + b".a"
496 491 # register as a temp file to ensure cleanup on failure
497 492 tr.registertmp(pendingfilename)
498 493 # write existing data
499 494 fp2 = self._realopener(pendingfilename, b"w")
500 495 fp2.write(fp1.read())
501 496 # add pending data
502 497 fp2.write(b"".join(self._delaybuf))
503 498 fp2.close()
504 499 # switch modes so finalize can simply rename
505 500 self._delaybuf = None
506 501 self._divert = True
507 502 self.opener = _divertopener(self._realopener, self._indexfile)
508 503
509 504 if self._divert:
510 505 return True
511 506
512 507 return False
513 508
514 509 def _enforceinlinesize(self, tr):
515 510 if not self._delayed:
516 511 revlog.revlog._enforceinlinesize(self, tr)
517 512
518 513 def read(self, nodeorrev):
519 514 """Obtain data from a parsed changelog revision.
520 515
521 516 Returns a 6-tuple of:
522 517
523 518 - manifest node in binary
524 519 - author/user as a localstr
525 520 - date as a 2-tuple of (time, timezone)
526 521 - list of files
527 522 - commit message as a localstr
528 523 - dict of extra metadata
529 524
530 525 Unless you need to access all fields, consider calling
531 526 ``changelogrevision`` instead, as it is faster for partial object
532 527 access.
533 528 """
534 529 d, s = self._revisiondata(nodeorrev)
535 530 c = changelogrevision(
536 531 self, d, s, self._copiesstorage == b'changeset-sidedata'
537 532 )
538 533 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
539 534
540 535 def changelogrevision(self, nodeorrev):
541 536 """Obtain a ``changelogrevision`` for a node or revision."""
542 537 text, sidedata = self._revisiondata(nodeorrev)
543 538 return changelogrevision(
544 539 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
545 540 )
546 541
547 542 def readfiles(self, nodeorrev):
548 543 """
549 544 short version of read that only returns the files modified by the cset
550 545 """
551 546 text = self.revision(nodeorrev)
552 547 if not text:
553 548 return []
554 549 last = text.index(b"\n\n")
555 550 l = text[:last].split(b'\n')
556 551 return l[3:]
557 552
558 553 def add(
559 554 self,
560 555 manifest,
561 556 files,
562 557 desc,
563 558 transaction,
564 559 p1,
565 560 p2,
566 561 user,
567 562 date=None,
568 563 extra=None,
569 564 ):
570 565 # Convert to UTF-8 encoded bytestrings as the very first
571 566 # thing: calling any method on a localstr object will turn it
572 567 # into a str object and the cached UTF-8 string is thus lost.
573 568 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
574 569
575 570 user = user.strip()
576 571 # An empty username or a username with a "\n" will make the
577 572 # revision text contain two "\n\n" sequences -> corrupt
578 573 # repository since read cannot unpack the revision.
579 574 if not user:
580 575 raise error.StorageError(_(b"empty username"))
581 576 if b"\n" in user:
582 577 raise error.StorageError(
583 578 _(b"username %r contains a newline") % pycompat.bytestr(user)
584 579 )
585 580
586 581 desc = stripdesc(desc)
587 582
588 583 if date:
589 584 parseddate = b"%d %d" % dateutil.parsedate(date)
590 585 else:
591 586 parseddate = b"%d %d" % dateutil.makedate()
592 587 if extra:
593 588 branch = extra.get(b"branch")
594 589 if branch in (b"default", b""):
595 590 del extra[b"branch"]
596 591 elif branch in (b".", b"null", b"tip"):
597 592 raise error.StorageError(
598 593 _(b'the name \'%s\' is reserved') % branch
599 594 )
600 595 sortedfiles = sorted(files.touched)
601 596 flags = 0
602 597 sidedata = None
603 598 if self._copiesstorage == b'changeset-sidedata':
604 599 if files.has_copies_info:
605 600 flags |= flagutil.REVIDX_HASCOPIESINFO
606 601 sidedata = metadata.encode_files_sidedata(files)
607 602
608 603 if extra:
609 604 extra = encodeextra(extra)
610 605 parseddate = b"%s %s" % (parseddate, extra)
611 606 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
612 607 text = b"\n".join(l)
613 608 rev = self.addrevision(
614 609 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
615 610 )
616 611 return self.node(rev)
617 612
618 613 def branchinfo(self, rev):
619 614 """return the branch name and open/close state of a revision
620 615
621 616 This function exists because creating a changectx object
622 617 just to access this is costly."""
623 618 return self.changelogrevision(rev).branchinfo
624 619
625 620 def _nodeduplicatecallback(self, transaction, rev):
626 621 # keep track of revisions that got "re-added", eg: unbunde of know rev.
627 622 #
628 623 # We track them in a list to preserve their order from the source bundle
629 624 duplicates = transaction.changes.setdefault(b'revduplicates', [])
630 625 duplicates.append(rev)
@@ -1,3246 +1,3254
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FEATURES_BY_VERSION,
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 INDEX_HEADER,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 SUPPORTED_FLAGS,
51 51 )
52 52 from .revlogutils.flagutil import (
53 53 REVIDX_DEFAULT_FLAGS,
54 54 REVIDX_ELLIPSIS,
55 55 REVIDX_EXTSTORED,
56 56 REVIDX_FLAGS_ORDER,
57 57 REVIDX_HASCOPIESINFO,
58 58 REVIDX_ISCENSORED,
59 59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 docket as docketutil,
79 79 flagutil,
80 80 nodemap as nodemaputil,
81 81 revlogv0,
82 82 sidedata as sidedatautil,
83 83 )
84 84 from .utils import (
85 85 storageutil,
86 86 stringutil,
87 87 )
88 88
89 89 # blanked usage of all the name to prevent pyflakes constraints
90 90 # We need these name available in the module for extensions.
91 91
92 92 REVLOGV0
93 93 REVLOGV1
94 94 REVLOGV2
95 95 FLAG_INLINE_DATA
96 96 FLAG_GENERALDELTA
97 97 REVLOG_DEFAULT_FLAGS
98 98 REVLOG_DEFAULT_FORMAT
99 99 REVLOG_DEFAULT_VERSION
100 100 REVLOGV1_FLAGS
101 101 REVLOGV2_FLAGS
102 102 REVIDX_ISCENSORED
103 103 REVIDX_ELLIPSIS
104 104 REVIDX_HASCOPIESINFO
105 105 REVIDX_EXTSTORED
106 106 REVIDX_DEFAULT_FLAGS
107 107 REVIDX_FLAGS_ORDER
108 108 REVIDX_RAWTEXT_CHANGING_FLAGS
109 109
110 110 parsers = policy.importmod('parsers')
111 111 rustancestor = policy.importrust('ancestor')
112 112 rustdagop = policy.importrust('dagop')
113 113 rustrevlog = policy.importrust('revlog')
114 114
115 115 # Aliased for performance.
116 116 _zlibdecompress = zlib.decompress
117 117
118 118 # max size of revlog with inline data
119 119 _maxinline = 131072
120 120 _chunksize = 1048576
121 121
122 122 # Flag processors for REVIDX_ELLIPSIS.
123 123 def ellipsisreadprocessor(rl, text):
124 124 return text, False
125 125
126 126
127 127 def ellipsiswriteprocessor(rl, text):
128 128 return text, False
129 129
130 130
131 131 def ellipsisrawprocessor(rl, text):
132 132 return False
133 133
134 134
135 135 ellipsisprocessor = (
136 136 ellipsisreadprocessor,
137 137 ellipsiswriteprocessor,
138 138 ellipsisrawprocessor,
139 139 )
140 140
141 141
142 142 def offset_type(offset, type):
143 143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
144 144 raise ValueError(b'unknown revlog index flags')
145 145 return int(int(offset) << 16 | type)
146 146
147 147
148 148 def _verify_revision(rl, skipflags, state, node):
149 149 """Verify the integrity of the given revlog ``node`` while providing a hook
150 150 point for extensions to influence the operation."""
151 151 if skipflags:
152 152 state[b'skipread'].add(node)
153 153 else:
154 154 # Side-effect: read content and verify hash.
155 155 rl.revision(node)
156 156
157 157
158 158 # True if a fast implementation for persistent-nodemap is available
159 159 #
160 160 # We also consider we have a "fast" implementation in "pure" python because
161 161 # people using pure don't really have performance consideration (and a
162 162 # wheelbarrow of other slowness source)
163 163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
164 164 parsers, 'BaseIndexObject'
165 165 )
166 166
167 167
168 168 @attr.s(slots=True, frozen=True)
169 169 class _revisioninfo(object):
170 170 """Information about a revision that allows building its fulltext
171 171 node: expected hash of the revision
172 172 p1, p2: parent revs of the revision
173 173 btext: built text cache consisting of a one-element list
174 174 cachedelta: (baserev, uncompressed_delta) or None
175 175 flags: flags associated to the revision storage
176 176
177 177 One of btext[0] or cachedelta must be set.
178 178 """
179 179
180 180 node = attr.ib()
181 181 p1 = attr.ib()
182 182 p2 = attr.ib()
183 183 btext = attr.ib()
184 184 textlen = attr.ib()
185 185 cachedelta = attr.ib()
186 186 flags = attr.ib()
187 187
188 188
189 189 @interfaceutil.implementer(repository.irevisiondelta)
190 190 @attr.s(slots=True)
191 191 class revlogrevisiondelta(object):
192 192 node = attr.ib()
193 193 p1node = attr.ib()
194 194 p2node = attr.ib()
195 195 basenode = attr.ib()
196 196 flags = attr.ib()
197 197 baserevisionsize = attr.ib()
198 198 revision = attr.ib()
199 199 delta = attr.ib()
200 200 sidedata = attr.ib()
201 201 protocol_flags = attr.ib()
202 202 linknode = attr.ib(default=None)
203 203
204 204
205 205 @interfaceutil.implementer(repository.iverifyproblem)
206 206 @attr.s(frozen=True)
207 207 class revlogproblem(object):
208 208 warning = attr.ib(default=None)
209 209 error = attr.ib(default=None)
210 210 node = attr.ib(default=None)
211 211
212 212
213 213 def parse_index_v1(data, inline):
214 214 # call the C implementation to parse the index data
215 215 index, cache = parsers.parse_index2(data, inline)
216 216 return index, cache
217 217
218 218
219 219 def parse_index_v2(data, inline):
220 220 # call the C implementation to parse the index data
221 221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
222 222 return index, cache
223 223
224 224
225 225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
226 226
227 227 def parse_index_v1_nodemap(data, inline):
228 228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
229 229 return index, cache
230 230
231 231
232 232 else:
233 233 parse_index_v1_nodemap = None
234 234
235 235
236 236 def parse_index_v1_mixed(data, inline):
237 237 index, cache = parse_index_v1(data, inline)
238 238 return rustrevlog.MixedIndex(index), cache
239 239
240 240
241 241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
242 242 # signed integer)
243 243 _maxentrysize = 0x7FFFFFFF
244 244
245 245
246 246 class revlog(object):
247 247 """
248 248 the underlying revision storage object
249 249
250 250 A revlog consists of two parts, an index and the revision data.
251 251
252 252 The index is a file with a fixed record size containing
253 253 information on each revision, including its nodeid (hash), the
254 254 nodeids of its parents, the position and offset of its data within
255 255 the data file, and the revision it's based on. Finally, each entry
256 256 contains a linkrev entry that can serve as a pointer to external
257 257 data.
258 258
259 259 The revision data itself is a linear collection of data chunks.
260 260 Each chunk represents a revision and is usually represented as a
261 261 delta against the previous chunk. To bound lookup time, runs of
262 262 deltas are limited to about 2 times the length of the original
263 263 version data. This makes retrieval of a version proportional to
264 264 its size, or O(1) relative to the number of revisions.
265 265
266 266 Both pieces of the revlog are written to in an append-only
267 267 fashion, which means we never need to rewrite a file to insert or
268 268 remove data, and can use some simple techniques to avoid the need
269 269 for locking while reading.
270 270
271 271 If checkambig, indexfile is opened with checkambig=True at
272 272 writing, to avoid file stat ambiguity.
273 273
274 274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
275 275 index will be mmapped rather than read if it is larger than the
276 276 configured threshold.
277 277
278 278 If censorable is True, the revlog can have censored revisions.
279 279
280 280 If `upperboundcomp` is not None, this is the expected maximal gain from
281 281 compression for the data content.
282 282
283 283 `concurrencychecker` is an optional function that receives 3 arguments: a
284 284 file handle, a filename, and an expected position. It should check whether
285 285 the current position in the file handle is valid, and log/warn/fail (by
286 286 raising).
287 287 """
288 288
289 289 _flagserrorclass = error.RevlogError
290 290
291 291 def __init__(
292 292 self,
293 293 opener,
294 294 target,
295 295 radix,
296 postfix=None,
296 postfix=None, # only exist for `tmpcensored` now
297 297 checkambig=False,
298 298 mmaplargeindex=False,
299 299 censorable=False,
300 300 upperboundcomp=None,
301 301 persistentnodemap=False,
302 302 concurrencychecker=None,
303 trypending=False,
303 304 ):
304 305 """
305 306 create a revlog object
306 307
307 308 opener is a function that abstracts the file opening operation
308 309 and can be used to implement COW semantics or the like.
309 310
310 311 `target`: a (KIND, ID) tuple that identify the content stored in
311 312 this revlog. It help the rest of the code to understand what the revlog
312 313 is about without having to resort to heuristic and index filename
313 314 analysis. Note: that this must be reliably be set by normal code, but
314 315 that test, debug, or performance measurement code might not set this to
315 316 accurate value.
316 317 """
317 318 self.upperboundcomp = upperboundcomp
318 319
319 320 self.radix = radix
320 321
321 322 self._docket_file = None
322 323 self._indexfile = None
323 324 self._datafile = None
324 325 self._nodemap_file = None
325 326 self.postfix = postfix
327 self._trypending = trypending
326 328 self.opener = opener
327 329 if persistentnodemap:
328 330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 331
330 332 assert target[0] in ALL_KINDS
331 333 assert len(target) == 2
332 334 self.target = target
333 335 # When True, indexfile is opened with checkambig=True at writing, to
334 336 # avoid file stat ambiguity.
335 337 self._checkambig = checkambig
336 338 self._mmaplargeindex = mmaplargeindex
337 339 self._censorable = censorable
338 340 # 3-tuple of (node, rev, text) for a raw revision.
339 341 self._revisioncache = None
340 342 # Maps rev to chain base rev.
341 343 self._chainbasecache = util.lrucachedict(100)
342 344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 345 self._chunkcache = (0, b'')
344 346 # How much data to read and cache into the raw revlog data cache.
345 347 self._chunkcachesize = 65536
346 348 self._maxchainlen = None
347 349 self._deltabothparents = True
348 350 self.index = None
349 351 self._docket = None
350 352 self._nodemap_docket = None
351 353 # Mapping of partial identifiers to full nodes.
352 354 self._pcache = {}
353 355 # Mapping of revision integer to full node.
354 356 self._compengine = b'zlib'
355 357 self._compengineopts = {}
356 358 self._maxdeltachainspan = -1
357 359 self._withsparseread = False
358 360 self._sparserevlog = False
359 361 self.hassidedata = False
360 362 self._srdensitythreshold = 0.50
361 363 self._srmingapsize = 262144
362 364
363 365 # Make copy of flag processors so each revlog instance can support
364 366 # custom flags.
365 367 self._flagprocessors = dict(flagutil.flagprocessors)
366 368
367 369 # 2-tuple of file handles being used for active writing.
368 370 self._writinghandles = None
369 371 # prevent nesting of addgroup
370 372 self._adding_group = None
371 373
372 374 self._loadindex()
373 375
374 376 self._concurrencychecker = concurrencychecker
375 377
376 378 def _init_opts(self):
377 379 """process options (from above/config) to setup associated default revlog mode
378 380
379 381 These values might be affected when actually reading on disk information.
380 382
381 383 The relevant values are returned for use in _loadindex().
382 384
383 385 * newversionflags:
384 386 version header to use if we need to create a new revlog
385 387
386 388 * mmapindexthreshold:
387 389 minimal index size for start to use mmap
388 390
389 391 * force_nodemap:
390 392 force the usage of a "development" version of the nodemap code
391 393 """
392 394 mmapindexthreshold = None
393 395 opts = self.opener.options
394 396
395 397 if b'revlogv2' in opts:
396 398 new_header = REVLOGV2 | FLAG_INLINE_DATA
397 399 elif b'revlogv1' in opts:
398 400 new_header = REVLOGV1 | FLAG_INLINE_DATA
399 401 if b'generaldelta' in opts:
400 402 new_header |= FLAG_GENERALDELTA
401 403 elif b'revlogv0' in self.opener.options:
402 404 new_header = REVLOGV0
403 405 else:
404 406 new_header = REVLOG_DEFAULT_VERSION
405 407
406 408 if b'chunkcachesize' in opts:
407 409 self._chunkcachesize = opts[b'chunkcachesize']
408 410 if b'maxchainlen' in opts:
409 411 self._maxchainlen = opts[b'maxchainlen']
410 412 if b'deltabothparents' in opts:
411 413 self._deltabothparents = opts[b'deltabothparents']
412 414 self._lazydelta = bool(opts.get(b'lazydelta', True))
413 415 self._lazydeltabase = False
414 416 if self._lazydelta:
415 417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
416 418 if b'compengine' in opts:
417 419 self._compengine = opts[b'compengine']
418 420 if b'zlib.level' in opts:
419 421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
420 422 if b'zstd.level' in opts:
421 423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
422 424 if b'maxdeltachainspan' in opts:
423 425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
424 426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
425 427 mmapindexthreshold = opts[b'mmapindexthreshold']
426 428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
427 429 withsparseread = bool(opts.get(b'with-sparse-read', False))
428 430 # sparse-revlog forces sparse-read
429 431 self._withsparseread = self._sparserevlog or withsparseread
430 432 if b'sparse-read-density-threshold' in opts:
431 433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
432 434 if b'sparse-read-min-gap-size' in opts:
433 435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
434 436 if opts.get(b'enableellipsis'):
435 437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
436 438
437 439 # revlog v0 doesn't have flag processors
438 440 for flag, processor in pycompat.iteritems(
439 441 opts.get(b'flagprocessors', {})
440 442 ):
441 443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
442 444
443 445 if self._chunkcachesize <= 0:
444 446 raise error.RevlogError(
445 447 _(b'revlog chunk cache size %r is not greater than 0')
446 448 % self._chunkcachesize
447 449 )
448 450 elif self._chunkcachesize & (self._chunkcachesize - 1):
449 451 raise error.RevlogError(
450 452 _(b'revlog chunk cache size %r is not a power of 2')
451 453 % self._chunkcachesize
452 454 )
453 455 force_nodemap = opts.get(b'devel-force-nodemap', False)
454 456 return new_header, mmapindexthreshold, force_nodemap
455 457
456 458 def _get_data(self, filepath, mmap_threshold, size=None):
457 459 """return a file content with or without mmap
458 460
459 461 If the file is missing return the empty string"""
460 462 try:
461 463 with self.opener(filepath) as fp:
462 464 if mmap_threshold is not None:
463 465 file_size = self.opener.fstat(fp).st_size
464 466 if file_size >= mmap_threshold:
465 467 if size is not None:
466 468 # avoid potentiel mmap crash
467 469 size = min(file_size, size)
468 470 # TODO: should .close() to release resources without
469 471 # relying on Python GC
470 472 if size is None:
471 473 return util.buffer(util.mmapread(fp))
472 474 else:
473 475 return util.buffer(util.mmapread(fp, size))
474 476 if size is None:
475 477 return fp.read()
476 478 else:
477 479 return fp.read(size)
478 480 except IOError as inst:
479 481 if inst.errno != errno.ENOENT:
480 482 raise
481 483 return b''
482 484
483 485 def _loadindex(self):
484 486
485 487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
486 488
487 if self.postfix is None:
489 if self.postfix is not None:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 entry_point = b'%s.i.a' % self.radix
493 else:
488 494 entry_point = b'%s.i' % self.radix
489 else:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 495
492 496 entry_data = b''
493 497 self._initempty = True
494 498 entry_data = self._get_data(entry_point, mmapindexthreshold)
495 499 if len(entry_data) > 0:
496 500 header = INDEX_HEADER.unpack(entry_data[:4])[0]
497 501 self._initempty = False
498 502 else:
499 503 header = new_header
500 504
501 505 self._format_flags = header & ~0xFFFF
502 506 self._format_version = header & 0xFFFF
503 507
504 508 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
505 509 if supported_flags is None:
506 510 msg = _(b'unknown version (%d) in revlog %s')
507 511 msg %= (self._format_version, self.display_id)
508 512 raise error.RevlogError(msg)
509 513 elif self._format_flags & ~supported_flags:
510 514 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
511 515 display_flag = self._format_flags >> 16
512 516 msg %= (display_flag, self._format_version, self.display_id)
513 517 raise error.RevlogError(msg)
514 518
515 519 features = FEATURES_BY_VERSION[self._format_version]
516 520 self._inline = features[b'inline'](self._format_flags)
517 521 self._generaldelta = features[b'generaldelta'](self._format_flags)
518 522 self.hassidedata = features[b'sidedata']
519 523
520 524 if not features[b'docket']:
521 525 self._indexfile = entry_point
522 526 index_data = entry_data
523 527 else:
524 528 self._docket_file = entry_point
525 529 if self._initempty:
526 530 self._docket = docketutil.default_docket(self, header)
527 531 else:
528 532 self._docket = docketutil.parse_docket(self, entry_data)
529 533 self._indexfile = self._docket.index_filepath()
530 534 index_data = b''
531 535 index_size = self._docket.index_end
532 536 if index_size > 0:
533 537 index_data = self._get_data(
534 538 self._indexfile, mmapindexthreshold, size=index_size
535 539 )
536 540 if len(index_data) < index_size:
537 541 msg = _(b'too few index data for %s: got %d, expected %d')
538 542 msg %= (self.display_id, len(index_data), index_size)
539 543 raise error.RevlogError(msg)
540 544
541 545 self._inline = False
542 546 # generaldelta implied by version 2 revlogs.
543 547 self._generaldelta = True
544 548 # the logic for persistent nodemap will be dealt with within the
545 549 # main docket, so disable it for now.
546 550 self._nodemap_file = None
547 551
548 if self.postfix is None or self.postfix == b'a':
552 if self.postfix is None:
549 553 self._datafile = b'%s.d' % self.radix
550 554 else:
551 555 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
552 556
553 557 self.nodeconstants = sha1nodeconstants
554 558 self.nullid = self.nodeconstants.nullid
555 559
556 560 # sparse-revlog can't be on without general-delta (issue6056)
557 561 if not self._generaldelta:
558 562 self._sparserevlog = False
559 563
560 564 self._storedeltachains = True
561 565
562 566 devel_nodemap = (
563 567 self._nodemap_file
564 568 and force_nodemap
565 569 and parse_index_v1_nodemap is not None
566 570 )
567 571
568 572 use_rust_index = False
569 573 if rustrevlog is not None:
570 574 if self._nodemap_file is not None:
571 575 use_rust_index = True
572 576 else:
573 577 use_rust_index = self.opener.options.get(b'rust.index')
574 578
575 579 self._parse_index = parse_index_v1
576 580 if self._format_version == REVLOGV0:
577 581 self._parse_index = revlogv0.parse_index_v0
578 582 elif self._format_version == REVLOGV2:
579 583 self._parse_index = parse_index_v2
580 584 elif devel_nodemap:
581 585 self._parse_index = parse_index_v1_nodemap
582 586 elif use_rust_index:
583 587 self._parse_index = parse_index_v1_mixed
584 588 try:
585 589 d = self._parse_index(index_data, self._inline)
586 590 index, _chunkcache = d
587 591 use_nodemap = (
588 592 not self._inline
589 593 and self._nodemap_file is not None
590 594 and util.safehasattr(index, 'update_nodemap_data')
591 595 )
592 596 if use_nodemap:
593 597 nodemap_data = nodemaputil.persisted_data(self)
594 598 if nodemap_data is not None:
595 599 docket = nodemap_data[0]
596 600 if (
597 601 len(d[0]) > docket.tip_rev
598 602 and d[0][docket.tip_rev][7] == docket.tip_node
599 603 ):
600 604 # no changelog tampering
601 605 self._nodemap_docket = docket
602 606 index.update_nodemap_data(*nodemap_data)
603 607 except (ValueError, IndexError):
604 608 raise error.RevlogError(
605 609 _(b"index %s is corrupted") % self.display_id
606 610 )
607 611 self.index, self._chunkcache = d
608 612 if not self._chunkcache:
609 613 self._chunkclear()
610 614 # revnum -> (chain-length, sum-delta-length)
611 615 self._chaininfocache = util.lrucachedict(500)
612 616 # revlog header -> revlog compressor
613 617 self._decompressors = {}
614 618
615 619 @util.propertycache
616 620 def revlog_kind(self):
617 621 return self.target[0]
618 622
619 623 @util.propertycache
620 624 def display_id(self):
621 625 """The public facing "ID" of the revlog that we use in message"""
622 626 # Maybe we should build a user facing representation of
623 627 # revlog.target instead of using `self.radix`
624 628 return self.radix
625 629
626 630 @util.propertycache
627 631 def _compressor(self):
628 632 engine = util.compengines[self._compengine]
629 633 return engine.revlogcompressor(self._compengineopts)
630 634
631 635 def _indexfp(self):
632 636 """file object for the revlog's index file"""
633 637 return self.opener(self._indexfile, mode=b"r")
634 638
635 639 def __index_write_fp(self):
636 640 # You should not use this directly and use `_writing` instead
637 641 try:
638 642 f = self.opener(
639 643 self._indexfile, mode=b"r+", checkambig=self._checkambig
640 644 )
641 645 if self._docket is None:
642 646 f.seek(0, os.SEEK_END)
643 647 else:
644 648 f.seek(self._docket.index_end, os.SEEK_SET)
645 649 return f
646 650 except IOError as inst:
647 651 if inst.errno != errno.ENOENT:
648 652 raise
649 653 return self.opener(
650 654 self._indexfile, mode=b"w+", checkambig=self._checkambig
651 655 )
652 656
653 657 def __index_new_fp(self):
654 658 # You should not use this unless you are upgrading from inline revlog
655 659 return self.opener(
656 660 self._indexfile,
657 661 mode=b"w",
658 662 checkambig=self._checkambig,
659 663 atomictemp=True,
660 664 )
661 665
662 666 def _datafp(self, mode=b'r'):
663 667 """file object for the revlog's data file"""
664 668 return self.opener(self._datafile, mode=mode)
665 669
666 670 @contextlib.contextmanager
667 671 def _datareadfp(self, existingfp=None):
668 672 """file object suitable to read data"""
669 673 # Use explicit file handle, if given.
670 674 if existingfp is not None:
671 675 yield existingfp
672 676
673 677 # Use a file handle being actively used for writes, if available.
674 678 # There is some danger to doing this because reads will seek the
675 679 # file. However, _writeentry() performs a SEEK_END before all writes,
676 680 # so we should be safe.
677 681 elif self._writinghandles:
678 682 if self._inline:
679 683 yield self._writinghandles[0]
680 684 else:
681 685 yield self._writinghandles[1]
682 686
683 687 # Otherwise open a new file handle.
684 688 else:
685 689 if self._inline:
686 690 func = self._indexfp
687 691 else:
688 692 func = self._datafp
689 693 with func() as fp:
690 694 yield fp
691 695
692 696 def tiprev(self):
693 697 return len(self.index) - 1
694 698
695 699 def tip(self):
696 700 return self.node(self.tiprev())
697 701
698 702 def __contains__(self, rev):
699 703 return 0 <= rev < len(self)
700 704
701 705 def __len__(self):
702 706 return len(self.index)
703 707
704 708 def __iter__(self):
705 709 return iter(pycompat.xrange(len(self)))
706 710
707 711 def revs(self, start=0, stop=None):
708 712 """iterate over all rev in this revlog (from start to stop)"""
709 713 return storageutil.iterrevs(len(self), start=start, stop=stop)
710 714
711 715 @property
712 716 def nodemap(self):
713 717 msg = (
714 718 b"revlog.nodemap is deprecated, "
715 719 b"use revlog.index.[has_node|rev|get_rev]"
716 720 )
717 721 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
718 722 return self.index.nodemap
719 723
720 724 @property
721 725 def _nodecache(self):
722 726 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
723 727 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
724 728 return self.index.nodemap
725 729
726 730 def hasnode(self, node):
727 731 try:
728 732 self.rev(node)
729 733 return True
730 734 except KeyError:
731 735 return False
732 736
733 737 def candelta(self, baserev, rev):
734 738 """whether two revisions (baserev, rev) can be delta-ed or not"""
735 739 # Disable delta if either rev requires a content-changing flag
736 740 # processor (ex. LFS). This is because such flag processor can alter
737 741 # the rawtext content that the delta will be based on, and two clients
738 742 # could have a same revlog node with different flags (i.e. different
739 743 # rawtext contents) and the delta could be incompatible.
740 744 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
741 745 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
742 746 ):
743 747 return False
744 748 return True
745 749
746 750 def update_caches(self, transaction):
747 751 if self._nodemap_file is not None:
748 752 if transaction is None:
749 753 nodemaputil.update_persistent_nodemap(self)
750 754 else:
751 755 nodemaputil.setup_persistent_nodemap(transaction, self)
752 756
753 757 def clearcaches(self):
754 758 self._revisioncache = None
755 759 self._chainbasecache.clear()
756 760 self._chunkcache = (0, b'')
757 761 self._pcache = {}
758 762 self._nodemap_docket = None
759 763 self.index.clearcaches()
760 764 # The python code is the one responsible for validating the docket, we
761 765 # end up having to refresh it here.
762 766 use_nodemap = (
763 767 not self._inline
764 768 and self._nodemap_file is not None
765 769 and util.safehasattr(self.index, 'update_nodemap_data')
766 770 )
767 771 if use_nodemap:
768 772 nodemap_data = nodemaputil.persisted_data(self)
769 773 if nodemap_data is not None:
770 774 self._nodemap_docket = nodemap_data[0]
771 775 self.index.update_nodemap_data(*nodemap_data)
772 776
773 777 def rev(self, node):
774 778 try:
775 779 return self.index.rev(node)
776 780 except TypeError:
777 781 raise
778 782 except error.RevlogError:
779 783 # parsers.c radix tree lookup failed
780 784 if (
781 785 node == self.nodeconstants.wdirid
782 786 or node in self.nodeconstants.wdirfilenodeids
783 787 ):
784 788 raise error.WdirUnsupported
785 789 raise error.LookupError(node, self.display_id, _(b'no node'))
786 790
787 791 # Accessors for index entries.
788 792
789 793 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
790 794 # are flags.
791 795 def start(self, rev):
792 796 return int(self.index[rev][0] >> 16)
793 797
794 798 def flags(self, rev):
795 799 return self.index[rev][0] & 0xFFFF
796 800
797 801 def length(self, rev):
798 802 return self.index[rev][1]
799 803
800 804 def sidedata_length(self, rev):
801 805 if not self.hassidedata:
802 806 return 0
803 807 return self.index[rev][9]
804 808
805 809 def rawsize(self, rev):
806 810 """return the length of the uncompressed text for a given revision"""
807 811 l = self.index[rev][2]
808 812 if l >= 0:
809 813 return l
810 814
811 815 t = self.rawdata(rev)
812 816 return len(t)
813 817
814 818 def size(self, rev):
815 819 """length of non-raw text (processed by a "read" flag processor)"""
816 820 # fast path: if no "read" flag processor could change the content,
817 821 # size is rawsize. note: ELLIPSIS is known to not change the content.
818 822 flags = self.flags(rev)
819 823 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
820 824 return self.rawsize(rev)
821 825
822 826 return len(self.revision(rev, raw=False))
823 827
824 828 def chainbase(self, rev):
825 829 base = self._chainbasecache.get(rev)
826 830 if base is not None:
827 831 return base
828 832
829 833 index = self.index
830 834 iterrev = rev
831 835 base = index[iterrev][3]
832 836 while base != iterrev:
833 837 iterrev = base
834 838 base = index[iterrev][3]
835 839
836 840 self._chainbasecache[rev] = base
837 841 return base
838 842
839 843 def linkrev(self, rev):
840 844 return self.index[rev][4]
841 845
842 846 def parentrevs(self, rev):
843 847 try:
844 848 entry = self.index[rev]
845 849 except IndexError:
846 850 if rev == wdirrev:
847 851 raise error.WdirUnsupported
848 852 raise
849 853 if entry[5] == nullrev:
850 854 return entry[6], entry[5]
851 855 else:
852 856 return entry[5], entry[6]
853 857
854 858 # fast parentrevs(rev) where rev isn't filtered
855 859 _uncheckedparentrevs = parentrevs
856 860
857 861 def node(self, rev):
858 862 try:
859 863 return self.index[rev][7]
860 864 except IndexError:
861 865 if rev == wdirrev:
862 866 raise error.WdirUnsupported
863 867 raise
864 868
865 869 # Derived from index values.
866 870
867 871 def end(self, rev):
868 872 return self.start(rev) + self.length(rev)
869 873
870 874 def parents(self, node):
871 875 i = self.index
872 876 d = i[self.rev(node)]
873 877 # inline node() to avoid function call overhead
874 878 if d[5] == self.nullid:
875 879 return i[d[6]][7], i[d[5]][7]
876 880 else:
877 881 return i[d[5]][7], i[d[6]][7]
878 882
879 883 def chainlen(self, rev):
880 884 return self._chaininfo(rev)[0]
881 885
882 886 def _chaininfo(self, rev):
883 887 chaininfocache = self._chaininfocache
884 888 if rev in chaininfocache:
885 889 return chaininfocache[rev]
886 890 index = self.index
887 891 generaldelta = self._generaldelta
888 892 iterrev = rev
889 893 e = index[iterrev]
890 894 clen = 0
891 895 compresseddeltalen = 0
892 896 while iterrev != e[3]:
893 897 clen += 1
894 898 compresseddeltalen += e[1]
895 899 if generaldelta:
896 900 iterrev = e[3]
897 901 else:
898 902 iterrev -= 1
899 903 if iterrev in chaininfocache:
900 904 t = chaininfocache[iterrev]
901 905 clen += t[0]
902 906 compresseddeltalen += t[1]
903 907 break
904 908 e = index[iterrev]
905 909 else:
906 910 # Add text length of base since decompressing that also takes
907 911 # work. For cache hits the length is already included.
908 912 compresseddeltalen += e[1]
909 913 r = (clen, compresseddeltalen)
910 914 chaininfocache[rev] = r
911 915 return r
912 916
913 917 def _deltachain(self, rev, stoprev=None):
914 918 """Obtain the delta chain for a revision.
915 919
916 920 ``stoprev`` specifies a revision to stop at. If not specified, we
917 921 stop at the base of the chain.
918 922
919 923 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
920 924 revs in ascending order and ``stopped`` is a bool indicating whether
921 925 ``stoprev`` was hit.
922 926 """
923 927 # Try C implementation.
924 928 try:
925 929 return self.index.deltachain(rev, stoprev, self._generaldelta)
926 930 except AttributeError:
927 931 pass
928 932
929 933 chain = []
930 934
931 935 # Alias to prevent attribute lookup in tight loop.
932 936 index = self.index
933 937 generaldelta = self._generaldelta
934 938
935 939 iterrev = rev
936 940 e = index[iterrev]
937 941 while iterrev != e[3] and iterrev != stoprev:
938 942 chain.append(iterrev)
939 943 if generaldelta:
940 944 iterrev = e[3]
941 945 else:
942 946 iterrev -= 1
943 947 e = index[iterrev]
944 948
945 949 if iterrev == stoprev:
946 950 stopped = True
947 951 else:
948 952 chain.append(iterrev)
949 953 stopped = False
950 954
951 955 chain.reverse()
952 956 return chain, stopped
953 957
954 958 def ancestors(self, revs, stoprev=0, inclusive=False):
955 959 """Generate the ancestors of 'revs' in reverse revision order.
956 960 Does not generate revs lower than stoprev.
957 961
958 962 See the documentation for ancestor.lazyancestors for more details."""
959 963
960 964 # first, make sure start revisions aren't filtered
961 965 revs = list(revs)
962 966 checkrev = self.node
963 967 for r in revs:
964 968 checkrev(r)
965 969 # and we're sure ancestors aren't filtered as well
966 970
967 971 if rustancestor is not None:
968 972 lazyancestors = rustancestor.LazyAncestors
969 973 arg = self.index
970 974 else:
971 975 lazyancestors = ancestor.lazyancestors
972 976 arg = self._uncheckedparentrevs
973 977 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
974 978
975 979 def descendants(self, revs):
976 980 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
977 981
978 982 def findcommonmissing(self, common=None, heads=None):
979 983 """Return a tuple of the ancestors of common and the ancestors of heads
980 984 that are not ancestors of common. In revset terminology, we return the
981 985 tuple:
982 986
983 987 ::common, (::heads) - (::common)
984 988
985 989 The list is sorted by revision number, meaning it is
986 990 topologically sorted.
987 991
988 992 'heads' and 'common' are both lists of node IDs. If heads is
989 993 not supplied, uses all of the revlog's heads. If common is not
990 994 supplied, uses nullid."""
991 995 if common is None:
992 996 common = [self.nullid]
993 997 if heads is None:
994 998 heads = self.heads()
995 999
996 1000 common = [self.rev(n) for n in common]
997 1001 heads = [self.rev(n) for n in heads]
998 1002
999 1003 # we want the ancestors, but inclusive
1000 1004 class lazyset(object):
1001 1005 def __init__(self, lazyvalues):
1002 1006 self.addedvalues = set()
1003 1007 self.lazyvalues = lazyvalues
1004 1008
1005 1009 def __contains__(self, value):
1006 1010 return value in self.addedvalues or value in self.lazyvalues
1007 1011
1008 1012 def __iter__(self):
1009 1013 added = self.addedvalues
1010 1014 for r in added:
1011 1015 yield r
1012 1016 for r in self.lazyvalues:
1013 1017 if not r in added:
1014 1018 yield r
1015 1019
1016 1020 def add(self, value):
1017 1021 self.addedvalues.add(value)
1018 1022
1019 1023 def update(self, values):
1020 1024 self.addedvalues.update(values)
1021 1025
1022 1026 has = lazyset(self.ancestors(common))
1023 1027 has.add(nullrev)
1024 1028 has.update(common)
1025 1029
1026 1030 # take all ancestors from heads that aren't in has
1027 1031 missing = set()
1028 1032 visit = collections.deque(r for r in heads if r not in has)
1029 1033 while visit:
1030 1034 r = visit.popleft()
1031 1035 if r in missing:
1032 1036 continue
1033 1037 else:
1034 1038 missing.add(r)
1035 1039 for p in self.parentrevs(r):
1036 1040 if p not in has:
1037 1041 visit.append(p)
1038 1042 missing = list(missing)
1039 1043 missing.sort()
1040 1044 return has, [self.node(miss) for miss in missing]
1041 1045
1042 1046 def incrementalmissingrevs(self, common=None):
1043 1047 """Return an object that can be used to incrementally compute the
1044 1048 revision numbers of the ancestors of arbitrary sets that are not
1045 1049 ancestors of common. This is an ancestor.incrementalmissingancestors
1046 1050 object.
1047 1051
1048 1052 'common' is a list of revision numbers. If common is not supplied, uses
1049 1053 nullrev.
1050 1054 """
1051 1055 if common is None:
1052 1056 common = [nullrev]
1053 1057
1054 1058 if rustancestor is not None:
1055 1059 return rustancestor.MissingAncestors(self.index, common)
1056 1060 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1057 1061
1058 1062 def findmissingrevs(self, common=None, heads=None):
1059 1063 """Return the revision numbers of the ancestors of heads that
1060 1064 are not ancestors of common.
1061 1065
1062 1066 More specifically, return a list of revision numbers corresponding to
1063 1067 nodes N such that every N satisfies the following constraints:
1064 1068
1065 1069 1. N is an ancestor of some node in 'heads'
1066 1070 2. N is not an ancestor of any node in 'common'
1067 1071
1068 1072 The list is sorted by revision number, meaning it is
1069 1073 topologically sorted.
1070 1074
1071 1075 'heads' and 'common' are both lists of revision numbers. If heads is
1072 1076 not supplied, uses all of the revlog's heads. If common is not
1073 1077 supplied, uses nullid."""
1074 1078 if common is None:
1075 1079 common = [nullrev]
1076 1080 if heads is None:
1077 1081 heads = self.headrevs()
1078 1082
1079 1083 inc = self.incrementalmissingrevs(common=common)
1080 1084 return inc.missingancestors(heads)
1081 1085
1082 1086 def findmissing(self, common=None, heads=None):
1083 1087 """Return the ancestors of heads that are not ancestors of common.
1084 1088
1085 1089 More specifically, return a list of nodes N such that every N
1086 1090 satisfies the following constraints:
1087 1091
1088 1092 1. N is an ancestor of some node in 'heads'
1089 1093 2. N is not an ancestor of any node in 'common'
1090 1094
1091 1095 The list is sorted by revision number, meaning it is
1092 1096 topologically sorted.
1093 1097
1094 1098 'heads' and 'common' are both lists of node IDs. If heads is
1095 1099 not supplied, uses all of the revlog's heads. If common is not
1096 1100 supplied, uses nullid."""
1097 1101 if common is None:
1098 1102 common = [self.nullid]
1099 1103 if heads is None:
1100 1104 heads = self.heads()
1101 1105
1102 1106 common = [self.rev(n) for n in common]
1103 1107 heads = [self.rev(n) for n in heads]
1104 1108
1105 1109 inc = self.incrementalmissingrevs(common=common)
1106 1110 return [self.node(r) for r in inc.missingancestors(heads)]
1107 1111
1108 1112 def nodesbetween(self, roots=None, heads=None):
1109 1113 """Return a topological path from 'roots' to 'heads'.
1110 1114
1111 1115 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1112 1116 topologically sorted list of all nodes N that satisfy both of
1113 1117 these constraints:
1114 1118
1115 1119 1. N is a descendant of some node in 'roots'
1116 1120 2. N is an ancestor of some node in 'heads'
1117 1121
1118 1122 Every node is considered to be both a descendant and an ancestor
1119 1123 of itself, so every reachable node in 'roots' and 'heads' will be
1120 1124 included in 'nodes'.
1121 1125
1122 1126 'outroots' is the list of reachable nodes in 'roots', i.e., the
1123 1127 subset of 'roots' that is returned in 'nodes'. Likewise,
1124 1128 'outheads' is the subset of 'heads' that is also in 'nodes'.
1125 1129
1126 1130 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1127 1131 unspecified, uses nullid as the only root. If 'heads' is
1128 1132 unspecified, uses list of all of the revlog's heads."""
1129 1133 nonodes = ([], [], [])
1130 1134 if roots is not None:
1131 1135 roots = list(roots)
1132 1136 if not roots:
1133 1137 return nonodes
1134 1138 lowestrev = min([self.rev(n) for n in roots])
1135 1139 else:
1136 1140 roots = [self.nullid] # Everybody's a descendant of nullid
1137 1141 lowestrev = nullrev
1138 1142 if (lowestrev == nullrev) and (heads is None):
1139 1143 # We want _all_ the nodes!
1140 1144 return (
1141 1145 [self.node(r) for r in self],
1142 1146 [self.nullid],
1143 1147 list(self.heads()),
1144 1148 )
1145 1149 if heads is None:
1146 1150 # All nodes are ancestors, so the latest ancestor is the last
1147 1151 # node.
1148 1152 highestrev = len(self) - 1
1149 1153 # Set ancestors to None to signal that every node is an ancestor.
1150 1154 ancestors = None
1151 1155 # Set heads to an empty dictionary for later discovery of heads
1152 1156 heads = {}
1153 1157 else:
1154 1158 heads = list(heads)
1155 1159 if not heads:
1156 1160 return nonodes
1157 1161 ancestors = set()
1158 1162 # Turn heads into a dictionary so we can remove 'fake' heads.
1159 1163 # Also, later we will be using it to filter out the heads we can't
1160 1164 # find from roots.
1161 1165 heads = dict.fromkeys(heads, False)
1162 1166 # Start at the top and keep marking parents until we're done.
1163 1167 nodestotag = set(heads)
1164 1168 # Remember where the top was so we can use it as a limit later.
1165 1169 highestrev = max([self.rev(n) for n in nodestotag])
1166 1170 while nodestotag:
1167 1171 # grab a node to tag
1168 1172 n = nodestotag.pop()
1169 1173 # Never tag nullid
1170 1174 if n == self.nullid:
1171 1175 continue
1172 1176 # A node's revision number represents its place in a
1173 1177 # topologically sorted list of nodes.
1174 1178 r = self.rev(n)
1175 1179 if r >= lowestrev:
1176 1180 if n not in ancestors:
1177 1181 # If we are possibly a descendant of one of the roots
1178 1182 # and we haven't already been marked as an ancestor
1179 1183 ancestors.add(n) # Mark as ancestor
1180 1184 # Add non-nullid parents to list of nodes to tag.
1181 1185 nodestotag.update(
1182 1186 [p for p in self.parents(n) if p != self.nullid]
1183 1187 )
1184 1188 elif n in heads: # We've seen it before, is it a fake head?
1185 1189 # So it is, real heads should not be the ancestors of
1186 1190 # any other heads.
1187 1191 heads.pop(n)
1188 1192 if not ancestors:
1189 1193 return nonodes
1190 1194 # Now that we have our set of ancestors, we want to remove any
1191 1195 # roots that are not ancestors.
1192 1196
1193 1197 # If one of the roots was nullid, everything is included anyway.
1194 1198 if lowestrev > nullrev:
1195 1199 # But, since we weren't, let's recompute the lowest rev to not
1196 1200 # include roots that aren't ancestors.
1197 1201
1198 1202 # Filter out roots that aren't ancestors of heads
1199 1203 roots = [root for root in roots if root in ancestors]
1200 1204 # Recompute the lowest revision
1201 1205 if roots:
1202 1206 lowestrev = min([self.rev(root) for root in roots])
1203 1207 else:
1204 1208 # No more roots? Return empty list
1205 1209 return nonodes
1206 1210 else:
1207 1211 # We are descending from nullid, and don't need to care about
1208 1212 # any other roots.
1209 1213 lowestrev = nullrev
1210 1214 roots = [self.nullid]
1211 1215 # Transform our roots list into a set.
1212 1216 descendants = set(roots)
1213 1217 # Also, keep the original roots so we can filter out roots that aren't
1214 1218 # 'real' roots (i.e. are descended from other roots).
1215 1219 roots = descendants.copy()
1216 1220 # Our topologically sorted list of output nodes.
1217 1221 orderedout = []
1218 1222 # Don't start at nullid since we don't want nullid in our output list,
1219 1223 # and if nullid shows up in descendants, empty parents will look like
1220 1224 # they're descendants.
1221 1225 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1222 1226 n = self.node(r)
1223 1227 isdescendant = False
1224 1228 if lowestrev == nullrev: # Everybody is a descendant of nullid
1225 1229 isdescendant = True
1226 1230 elif n in descendants:
1227 1231 # n is already a descendant
1228 1232 isdescendant = True
1229 1233 # This check only needs to be done here because all the roots
1230 1234 # will start being marked is descendants before the loop.
1231 1235 if n in roots:
1232 1236 # If n was a root, check if it's a 'real' root.
1233 1237 p = tuple(self.parents(n))
1234 1238 # If any of its parents are descendants, it's not a root.
1235 1239 if (p[0] in descendants) or (p[1] in descendants):
1236 1240 roots.remove(n)
1237 1241 else:
1238 1242 p = tuple(self.parents(n))
1239 1243 # A node is a descendant if either of its parents are
1240 1244 # descendants. (We seeded the dependents list with the roots
1241 1245 # up there, remember?)
1242 1246 if (p[0] in descendants) or (p[1] in descendants):
1243 1247 descendants.add(n)
1244 1248 isdescendant = True
1245 1249 if isdescendant and ((ancestors is None) or (n in ancestors)):
1246 1250 # Only include nodes that are both descendants and ancestors.
1247 1251 orderedout.append(n)
1248 1252 if (ancestors is not None) and (n in heads):
1249 1253 # We're trying to figure out which heads are reachable
1250 1254 # from roots.
1251 1255 # Mark this head as having been reached
1252 1256 heads[n] = True
1253 1257 elif ancestors is None:
1254 1258 # Otherwise, we're trying to discover the heads.
1255 1259 # Assume this is a head because if it isn't, the next step
1256 1260 # will eventually remove it.
1257 1261 heads[n] = True
1258 1262 # But, obviously its parents aren't.
1259 1263 for p in self.parents(n):
1260 1264 heads.pop(p, None)
1261 1265 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1262 1266 roots = list(roots)
1263 1267 assert orderedout
1264 1268 assert roots
1265 1269 assert heads
1266 1270 return (orderedout, roots, heads)
1267 1271
1268 1272 def headrevs(self, revs=None):
1269 1273 if revs is None:
1270 1274 try:
1271 1275 return self.index.headrevs()
1272 1276 except AttributeError:
1273 1277 return self._headrevs()
1274 1278 if rustdagop is not None:
1275 1279 return rustdagop.headrevs(self.index, revs)
1276 1280 return dagop.headrevs(revs, self._uncheckedparentrevs)
1277 1281
1278 1282 def computephases(self, roots):
1279 1283 return self.index.computephasesmapsets(roots)
1280 1284
1281 1285 def _headrevs(self):
1282 1286 count = len(self)
1283 1287 if not count:
1284 1288 return [nullrev]
1285 1289 # we won't iter over filtered rev so nobody is a head at start
1286 1290 ishead = [0] * (count + 1)
1287 1291 index = self.index
1288 1292 for r in self:
1289 1293 ishead[r] = 1 # I may be an head
1290 1294 e = index[r]
1291 1295 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1292 1296 return [r for r, val in enumerate(ishead) if val]
1293 1297
1294 1298 def heads(self, start=None, stop=None):
1295 1299 """return the list of all nodes that have no children
1296 1300
1297 1301 if start is specified, only heads that are descendants of
1298 1302 start will be returned
1299 1303 if stop is specified, it will consider all the revs from stop
1300 1304 as if they had no children
1301 1305 """
1302 1306 if start is None and stop is None:
1303 1307 if not len(self):
1304 1308 return [self.nullid]
1305 1309 return [self.node(r) for r in self.headrevs()]
1306 1310
1307 1311 if start is None:
1308 1312 start = nullrev
1309 1313 else:
1310 1314 start = self.rev(start)
1311 1315
1312 1316 stoprevs = {self.rev(n) for n in stop or []}
1313 1317
1314 1318 revs = dagop.headrevssubset(
1315 1319 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1316 1320 )
1317 1321
1318 1322 return [self.node(rev) for rev in revs]
1319 1323
1320 1324 def children(self, node):
1321 1325 """find the children of a given node"""
1322 1326 c = []
1323 1327 p = self.rev(node)
1324 1328 for r in self.revs(start=p + 1):
1325 1329 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1326 1330 if prevs:
1327 1331 for pr in prevs:
1328 1332 if pr == p:
1329 1333 c.append(self.node(r))
1330 1334 elif p == nullrev:
1331 1335 c.append(self.node(r))
1332 1336 return c
1333 1337
1334 1338 def commonancestorsheads(self, a, b):
1335 1339 """calculate all the heads of the common ancestors of nodes a and b"""
1336 1340 a, b = self.rev(a), self.rev(b)
1337 1341 ancs = self._commonancestorsheads(a, b)
1338 1342 return pycompat.maplist(self.node, ancs)
1339 1343
1340 1344 def _commonancestorsheads(self, *revs):
1341 1345 """calculate all the heads of the common ancestors of revs"""
1342 1346 try:
1343 1347 ancs = self.index.commonancestorsheads(*revs)
1344 1348 except (AttributeError, OverflowError): # C implementation failed
1345 1349 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1346 1350 return ancs
1347 1351
1348 1352 def isancestor(self, a, b):
1349 1353 """return True if node a is an ancestor of node b
1350 1354
1351 1355 A revision is considered an ancestor of itself."""
1352 1356 a, b = self.rev(a), self.rev(b)
1353 1357 return self.isancestorrev(a, b)
1354 1358
1355 1359 def isancestorrev(self, a, b):
1356 1360 """return True if revision a is an ancestor of revision b
1357 1361
1358 1362 A revision is considered an ancestor of itself.
1359 1363
1360 1364 The implementation of this is trivial but the use of
1361 1365 reachableroots is not."""
1362 1366 if a == nullrev:
1363 1367 return True
1364 1368 elif a == b:
1365 1369 return True
1366 1370 elif a > b:
1367 1371 return False
1368 1372 return bool(self.reachableroots(a, [b], [a], includepath=False))
1369 1373
1370 1374 def reachableroots(self, minroot, heads, roots, includepath=False):
1371 1375 """return (heads(::(<roots> and <roots>::<heads>)))
1372 1376
1373 1377 If includepath is True, return (<roots>::<heads>)."""
1374 1378 try:
1375 1379 return self.index.reachableroots2(
1376 1380 minroot, heads, roots, includepath
1377 1381 )
1378 1382 except AttributeError:
1379 1383 return dagop._reachablerootspure(
1380 1384 self.parentrevs, minroot, roots, heads, includepath
1381 1385 )
1382 1386
1383 1387 def ancestor(self, a, b):
1384 1388 """calculate the "best" common ancestor of nodes a and b"""
1385 1389
1386 1390 a, b = self.rev(a), self.rev(b)
1387 1391 try:
1388 1392 ancs = self.index.ancestors(a, b)
1389 1393 except (AttributeError, OverflowError):
1390 1394 ancs = ancestor.ancestors(self.parentrevs, a, b)
1391 1395 if ancs:
1392 1396 # choose a consistent winner when there's a tie
1393 1397 return min(map(self.node, ancs))
1394 1398 return self.nullid
1395 1399
1396 1400 def _match(self, id):
1397 1401 if isinstance(id, int):
1398 1402 # rev
1399 1403 return self.node(id)
1400 1404 if len(id) == self.nodeconstants.nodelen:
1401 1405 # possibly a binary node
1402 1406 # odds of a binary node being all hex in ASCII are 1 in 10**25
1403 1407 try:
1404 1408 node = id
1405 1409 self.rev(node) # quick search the index
1406 1410 return node
1407 1411 except error.LookupError:
1408 1412 pass # may be partial hex id
1409 1413 try:
1410 1414 # str(rev)
1411 1415 rev = int(id)
1412 1416 if b"%d" % rev != id:
1413 1417 raise ValueError
1414 1418 if rev < 0:
1415 1419 rev = len(self) + rev
1416 1420 if rev < 0 or rev >= len(self):
1417 1421 raise ValueError
1418 1422 return self.node(rev)
1419 1423 except (ValueError, OverflowError):
1420 1424 pass
1421 1425 if len(id) == 2 * self.nodeconstants.nodelen:
1422 1426 try:
1423 1427 # a full hex nodeid?
1424 1428 node = bin(id)
1425 1429 self.rev(node)
1426 1430 return node
1427 1431 except (TypeError, error.LookupError):
1428 1432 pass
1429 1433
1430 1434 def _partialmatch(self, id):
1431 1435 # we don't care wdirfilenodeids as they should be always full hash
1432 1436 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1433 1437 try:
1434 1438 partial = self.index.partialmatch(id)
1435 1439 if partial and self.hasnode(partial):
1436 1440 if maybewdir:
1437 1441 # single 'ff...' match in radix tree, ambiguous with wdir
1438 1442 raise error.RevlogError
1439 1443 return partial
1440 1444 if maybewdir:
1441 1445 # no 'ff...' match in radix tree, wdir identified
1442 1446 raise error.WdirUnsupported
1443 1447 return None
1444 1448 except error.RevlogError:
1445 1449 # parsers.c radix tree lookup gave multiple matches
1446 1450 # fast path: for unfiltered changelog, radix tree is accurate
1447 1451 if not getattr(self, 'filteredrevs', None):
1448 1452 raise error.AmbiguousPrefixLookupError(
1449 1453 id, self.display_id, _(b'ambiguous identifier')
1450 1454 )
1451 1455 # fall through to slow path that filters hidden revisions
1452 1456 except (AttributeError, ValueError):
1453 1457 # we are pure python, or key was too short to search radix tree
1454 1458 pass
1455 1459
1456 1460 if id in self._pcache:
1457 1461 return self._pcache[id]
1458 1462
1459 1463 if len(id) <= 40:
1460 1464 try:
1461 1465 # hex(node)[:...]
1462 1466 l = len(id) // 2 # grab an even number of digits
1463 1467 prefix = bin(id[: l * 2])
1464 1468 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1465 1469 nl = [
1466 1470 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1467 1471 ]
1468 1472 if self.nodeconstants.nullhex.startswith(id):
1469 1473 nl.append(self.nullid)
1470 1474 if len(nl) > 0:
1471 1475 if len(nl) == 1 and not maybewdir:
1472 1476 self._pcache[id] = nl[0]
1473 1477 return nl[0]
1474 1478 raise error.AmbiguousPrefixLookupError(
1475 1479 id, self.display_id, _(b'ambiguous identifier')
1476 1480 )
1477 1481 if maybewdir:
1478 1482 raise error.WdirUnsupported
1479 1483 return None
1480 1484 except TypeError:
1481 1485 pass
1482 1486
1483 1487 def lookup(self, id):
1484 1488 """locate a node based on:
1485 1489 - revision number or str(revision number)
1486 1490 - nodeid or subset of hex nodeid
1487 1491 """
1488 1492 n = self._match(id)
1489 1493 if n is not None:
1490 1494 return n
1491 1495 n = self._partialmatch(id)
1492 1496 if n:
1493 1497 return n
1494 1498
1495 1499 raise error.LookupError(id, self.display_id, _(b'no match found'))
1496 1500
1497 1501 def shortest(self, node, minlength=1):
1498 1502 """Find the shortest unambiguous prefix that matches node."""
1499 1503
1500 1504 def isvalid(prefix):
1501 1505 try:
1502 1506 matchednode = self._partialmatch(prefix)
1503 1507 except error.AmbiguousPrefixLookupError:
1504 1508 return False
1505 1509 except error.WdirUnsupported:
1506 1510 # single 'ff...' match
1507 1511 return True
1508 1512 if matchednode is None:
1509 1513 raise error.LookupError(node, self.display_id, _(b'no node'))
1510 1514 return True
1511 1515
1512 1516 def maybewdir(prefix):
1513 1517 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1514 1518
1515 1519 hexnode = hex(node)
1516 1520
1517 1521 def disambiguate(hexnode, minlength):
1518 1522 """Disambiguate against wdirid."""
1519 1523 for length in range(minlength, len(hexnode) + 1):
1520 1524 prefix = hexnode[:length]
1521 1525 if not maybewdir(prefix):
1522 1526 return prefix
1523 1527
1524 1528 if not getattr(self, 'filteredrevs', None):
1525 1529 try:
1526 1530 length = max(self.index.shortest(node), minlength)
1527 1531 return disambiguate(hexnode, length)
1528 1532 except error.RevlogError:
1529 1533 if node != self.nodeconstants.wdirid:
1530 1534 raise error.LookupError(
1531 1535 node, self.display_id, _(b'no node')
1532 1536 )
1533 1537 except AttributeError:
1534 1538 # Fall through to pure code
1535 1539 pass
1536 1540
1537 1541 if node == self.nodeconstants.wdirid:
1538 1542 for length in range(minlength, len(hexnode) + 1):
1539 1543 prefix = hexnode[:length]
1540 1544 if isvalid(prefix):
1541 1545 return prefix
1542 1546
1543 1547 for length in range(minlength, len(hexnode) + 1):
1544 1548 prefix = hexnode[:length]
1545 1549 if isvalid(prefix):
1546 1550 return disambiguate(hexnode, length)
1547 1551
1548 1552 def cmp(self, node, text):
1549 1553 """compare text with a given file revision
1550 1554
1551 1555 returns True if text is different than what is stored.
1552 1556 """
1553 1557 p1, p2 = self.parents(node)
1554 1558 return storageutil.hashrevisionsha1(text, p1, p2) != node
1555 1559
1556 1560 def _cachesegment(self, offset, data):
1557 1561 """Add a segment to the revlog cache.
1558 1562
1559 1563 Accepts an absolute offset and the data that is at that location.
1560 1564 """
1561 1565 o, d = self._chunkcache
1562 1566 # try to add to existing cache
1563 1567 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1564 1568 self._chunkcache = o, d + data
1565 1569 else:
1566 1570 self._chunkcache = offset, data
1567 1571
1568 1572 def _readsegment(self, offset, length, df=None):
1569 1573 """Load a segment of raw data from the revlog.
1570 1574
1571 1575 Accepts an absolute offset, length to read, and an optional existing
1572 1576 file handle to read from.
1573 1577
1574 1578 If an existing file handle is passed, it will be seeked and the
1575 1579 original seek position will NOT be restored.
1576 1580
1577 1581 Returns a str or buffer of raw byte data.
1578 1582
1579 1583 Raises if the requested number of bytes could not be read.
1580 1584 """
1581 1585 # Cache data both forward and backward around the requested
1582 1586 # data, in a fixed size window. This helps speed up operations
1583 1587 # involving reading the revlog backwards.
1584 1588 cachesize = self._chunkcachesize
1585 1589 realoffset = offset & ~(cachesize - 1)
1586 1590 reallength = (
1587 1591 (offset + length + cachesize) & ~(cachesize - 1)
1588 1592 ) - realoffset
1589 1593 with self._datareadfp(df) as df:
1590 1594 df.seek(realoffset)
1591 1595 d = df.read(reallength)
1592 1596
1593 1597 self._cachesegment(realoffset, d)
1594 1598 if offset != realoffset or reallength != length:
1595 1599 startoffset = offset - realoffset
1596 1600 if len(d) - startoffset < length:
1597 1601 raise error.RevlogError(
1598 1602 _(
1599 1603 b'partial read of revlog %s; expected %d bytes from '
1600 1604 b'offset %d, got %d'
1601 1605 )
1602 1606 % (
1603 1607 self._indexfile if self._inline else self._datafile,
1604 1608 length,
1605 1609 offset,
1606 1610 len(d) - startoffset,
1607 1611 )
1608 1612 )
1609 1613
1610 1614 return util.buffer(d, startoffset, length)
1611 1615
1612 1616 if len(d) < length:
1613 1617 raise error.RevlogError(
1614 1618 _(
1615 1619 b'partial read of revlog %s; expected %d bytes from offset '
1616 1620 b'%d, got %d'
1617 1621 )
1618 1622 % (
1619 1623 self._indexfile if self._inline else self._datafile,
1620 1624 length,
1621 1625 offset,
1622 1626 len(d),
1623 1627 )
1624 1628 )
1625 1629
1626 1630 return d
1627 1631
1628 1632 def _getsegment(self, offset, length, df=None):
1629 1633 """Obtain a segment of raw data from the revlog.
1630 1634
1631 1635 Accepts an absolute offset, length of bytes to obtain, and an
1632 1636 optional file handle to the already-opened revlog. If the file
1633 1637 handle is used, it's original seek position will not be preserved.
1634 1638
1635 1639 Requests for data may be returned from a cache.
1636 1640
1637 1641 Returns a str or a buffer instance of raw byte data.
1638 1642 """
1639 1643 o, d = self._chunkcache
1640 1644 l = len(d)
1641 1645
1642 1646 # is it in the cache?
1643 1647 cachestart = offset - o
1644 1648 cacheend = cachestart + length
1645 1649 if cachestart >= 0 and cacheend <= l:
1646 1650 if cachestart == 0 and cacheend == l:
1647 1651 return d # avoid a copy
1648 1652 return util.buffer(d, cachestart, cacheend - cachestart)
1649 1653
1650 1654 return self._readsegment(offset, length, df=df)
1651 1655
1652 1656 def _getsegmentforrevs(self, startrev, endrev, df=None):
1653 1657 """Obtain a segment of raw data corresponding to a range of revisions.
1654 1658
1655 1659 Accepts the start and end revisions and an optional already-open
1656 1660 file handle to be used for reading. If the file handle is read, its
1657 1661 seek position will not be preserved.
1658 1662
1659 1663 Requests for data may be satisfied by a cache.
1660 1664
1661 1665 Returns a 2-tuple of (offset, data) for the requested range of
1662 1666 revisions. Offset is the integer offset from the beginning of the
1663 1667 revlog and data is a str or buffer of the raw byte data.
1664 1668
1665 1669 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1666 1670 to determine where each revision's data begins and ends.
1667 1671 """
1668 1672 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1669 1673 # (functions are expensive).
1670 1674 index = self.index
1671 1675 istart = index[startrev]
1672 1676 start = int(istart[0] >> 16)
1673 1677 if startrev == endrev:
1674 1678 end = start + istart[1]
1675 1679 else:
1676 1680 iend = index[endrev]
1677 1681 end = int(iend[0] >> 16) + iend[1]
1678 1682
1679 1683 if self._inline:
1680 1684 start += (startrev + 1) * self.index.entry_size
1681 1685 end += (endrev + 1) * self.index.entry_size
1682 1686 length = end - start
1683 1687
1684 1688 return start, self._getsegment(start, length, df=df)
1685 1689
1686 1690 def _chunk(self, rev, df=None):
1687 1691 """Obtain a single decompressed chunk for a revision.
1688 1692
1689 1693 Accepts an integer revision and an optional already-open file handle
1690 1694 to be used for reading. If used, the seek position of the file will not
1691 1695 be preserved.
1692 1696
1693 1697 Returns a str holding uncompressed data for the requested revision.
1694 1698 """
1695 1699 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1696 1700
1697 1701 def _chunks(self, revs, df=None, targetsize=None):
1698 1702 """Obtain decompressed chunks for the specified revisions.
1699 1703
1700 1704 Accepts an iterable of numeric revisions that are assumed to be in
1701 1705 ascending order. Also accepts an optional already-open file handle
1702 1706 to be used for reading. If used, the seek position of the file will
1703 1707 not be preserved.
1704 1708
1705 1709 This function is similar to calling ``self._chunk()`` multiple times,
1706 1710 but is faster.
1707 1711
1708 1712 Returns a list with decompressed data for each requested revision.
1709 1713 """
1710 1714 if not revs:
1711 1715 return []
1712 1716 start = self.start
1713 1717 length = self.length
1714 1718 inline = self._inline
1715 1719 iosize = self.index.entry_size
1716 1720 buffer = util.buffer
1717 1721
1718 1722 l = []
1719 1723 ladd = l.append
1720 1724
1721 1725 if not self._withsparseread:
1722 1726 slicedchunks = (revs,)
1723 1727 else:
1724 1728 slicedchunks = deltautil.slicechunk(
1725 1729 self, revs, targetsize=targetsize
1726 1730 )
1727 1731
1728 1732 for revschunk in slicedchunks:
1729 1733 firstrev = revschunk[0]
1730 1734 # Skip trailing revisions with empty diff
1731 1735 for lastrev in revschunk[::-1]:
1732 1736 if length(lastrev) != 0:
1733 1737 break
1734 1738
1735 1739 try:
1736 1740 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1737 1741 except OverflowError:
1738 1742 # issue4215 - we can't cache a run of chunks greater than
1739 1743 # 2G on Windows
1740 1744 return [self._chunk(rev, df=df) for rev in revschunk]
1741 1745
1742 1746 decomp = self.decompress
1743 1747 for rev in revschunk:
1744 1748 chunkstart = start(rev)
1745 1749 if inline:
1746 1750 chunkstart += (rev + 1) * iosize
1747 1751 chunklength = length(rev)
1748 1752 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1749 1753
1750 1754 return l
1751 1755
1752 1756 def _chunkclear(self):
1753 1757 """Clear the raw chunk cache."""
1754 1758 self._chunkcache = (0, b'')
1755 1759
1756 1760 def deltaparent(self, rev):
1757 1761 """return deltaparent of the given revision"""
1758 1762 base = self.index[rev][3]
1759 1763 if base == rev:
1760 1764 return nullrev
1761 1765 elif self._generaldelta:
1762 1766 return base
1763 1767 else:
1764 1768 return rev - 1
1765 1769
1766 1770 def issnapshot(self, rev):
1767 1771 """tells whether rev is a snapshot"""
1768 1772 if not self._sparserevlog:
1769 1773 return self.deltaparent(rev) == nullrev
1770 1774 elif util.safehasattr(self.index, b'issnapshot'):
1771 1775 # directly assign the method to cache the testing and access
1772 1776 self.issnapshot = self.index.issnapshot
1773 1777 return self.issnapshot(rev)
1774 1778 if rev == nullrev:
1775 1779 return True
1776 1780 entry = self.index[rev]
1777 1781 base = entry[3]
1778 1782 if base == rev:
1779 1783 return True
1780 1784 if base == nullrev:
1781 1785 return True
1782 1786 p1 = entry[5]
1783 1787 p2 = entry[6]
1784 1788 if base == p1 or base == p2:
1785 1789 return False
1786 1790 return self.issnapshot(base)
1787 1791
1788 1792 def snapshotdepth(self, rev):
1789 1793 """number of snapshot in the chain before this one"""
1790 1794 if not self.issnapshot(rev):
1791 1795 raise error.ProgrammingError(b'revision %d not a snapshot')
1792 1796 return len(self._deltachain(rev)[0]) - 1
1793 1797
1794 1798 def revdiff(self, rev1, rev2):
1795 1799 """return or calculate a delta between two revisions
1796 1800
1797 1801 The delta calculated is in binary form and is intended to be written to
1798 1802 revlog data directly. So this function needs raw revision data.
1799 1803 """
1800 1804 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1801 1805 return bytes(self._chunk(rev2))
1802 1806
1803 1807 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1804 1808
1805 1809 def _processflags(self, text, flags, operation, raw=False):
1806 1810 """deprecated entry point to access flag processors"""
1807 1811 msg = b'_processflag(...) use the specialized variant'
1808 1812 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1809 1813 if raw:
1810 1814 return text, flagutil.processflagsraw(self, text, flags)
1811 1815 elif operation == b'read':
1812 1816 return flagutil.processflagsread(self, text, flags)
1813 1817 else: # write operation
1814 1818 return flagutil.processflagswrite(self, text, flags)
1815 1819
1816 1820 def revision(self, nodeorrev, _df=None, raw=False):
1817 1821 """return an uncompressed revision of a given node or revision
1818 1822 number.
1819 1823
1820 1824 _df - an existing file handle to read from. (internal-only)
1821 1825 raw - an optional argument specifying if the revision data is to be
1822 1826 treated as raw data when applying flag transforms. 'raw' should be set
1823 1827 to True when generating changegroups or in debug commands.
1824 1828 """
1825 1829 if raw:
1826 1830 msg = (
1827 1831 b'revlog.revision(..., raw=True) is deprecated, '
1828 1832 b'use revlog.rawdata(...)'
1829 1833 )
1830 1834 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1831 1835 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1832 1836
1833 1837 def sidedata(self, nodeorrev, _df=None):
1834 1838 """a map of extra data related to the changeset but not part of the hash
1835 1839
1836 1840 This function currently return a dictionary. However, more advanced
1837 1841 mapping object will likely be used in the future for a more
1838 1842 efficient/lazy code.
1839 1843 """
1840 1844 return self._revisiondata(nodeorrev, _df)[1]
1841 1845
1842 1846 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1843 1847 # deal with <nodeorrev> argument type
1844 1848 if isinstance(nodeorrev, int):
1845 1849 rev = nodeorrev
1846 1850 node = self.node(rev)
1847 1851 else:
1848 1852 node = nodeorrev
1849 1853 rev = None
1850 1854
1851 1855 # fast path the special `nullid` rev
1852 1856 if node == self.nullid:
1853 1857 return b"", {}
1854 1858
1855 1859 # ``rawtext`` is the text as stored inside the revlog. Might be the
1856 1860 # revision or might need to be processed to retrieve the revision.
1857 1861 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1858 1862
1859 1863 if self.hassidedata:
1860 1864 if rev is None:
1861 1865 rev = self.rev(node)
1862 1866 sidedata = self._sidedata(rev)
1863 1867 else:
1864 1868 sidedata = {}
1865 1869
1866 1870 if raw and validated:
1867 1871 # if we don't want to process the raw text and that raw
1868 1872 # text is cached, we can exit early.
1869 1873 return rawtext, sidedata
1870 1874 if rev is None:
1871 1875 rev = self.rev(node)
1872 1876 # the revlog's flag for this revision
1873 1877 # (usually alter its state or content)
1874 1878 flags = self.flags(rev)
1875 1879
1876 1880 if validated and flags == REVIDX_DEFAULT_FLAGS:
1877 1881 # no extra flags set, no flag processor runs, text = rawtext
1878 1882 return rawtext, sidedata
1879 1883
1880 1884 if raw:
1881 1885 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1882 1886 text = rawtext
1883 1887 else:
1884 1888 r = flagutil.processflagsread(self, rawtext, flags)
1885 1889 text, validatehash = r
1886 1890 if validatehash:
1887 1891 self.checkhash(text, node, rev=rev)
1888 1892 if not validated:
1889 1893 self._revisioncache = (node, rev, rawtext)
1890 1894
1891 1895 return text, sidedata
1892 1896
1893 1897 def _rawtext(self, node, rev, _df=None):
1894 1898 """return the possibly unvalidated rawtext for a revision
1895 1899
1896 1900 returns (rev, rawtext, validated)
1897 1901 """
1898 1902
1899 1903 # revision in the cache (could be useful to apply delta)
1900 1904 cachedrev = None
1901 1905 # An intermediate text to apply deltas to
1902 1906 basetext = None
1903 1907
1904 1908 # Check if we have the entry in cache
1905 1909 # The cache entry looks like (node, rev, rawtext)
1906 1910 if self._revisioncache:
1907 1911 if self._revisioncache[0] == node:
1908 1912 return (rev, self._revisioncache[2], True)
1909 1913 cachedrev = self._revisioncache[1]
1910 1914
1911 1915 if rev is None:
1912 1916 rev = self.rev(node)
1913 1917
1914 1918 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1915 1919 if stopped:
1916 1920 basetext = self._revisioncache[2]
1917 1921
1918 1922 # drop cache to save memory, the caller is expected to
1919 1923 # update self._revisioncache after validating the text
1920 1924 self._revisioncache = None
1921 1925
1922 1926 targetsize = None
1923 1927 rawsize = self.index[rev][2]
1924 1928 if 0 <= rawsize:
1925 1929 targetsize = 4 * rawsize
1926 1930
1927 1931 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1928 1932 if basetext is None:
1929 1933 basetext = bytes(bins[0])
1930 1934 bins = bins[1:]
1931 1935
1932 1936 rawtext = mdiff.patches(basetext, bins)
1933 1937 del basetext # let us have a chance to free memory early
1934 1938 return (rev, rawtext, False)
1935 1939
1936 1940 def _sidedata(self, rev):
1937 1941 """Return the sidedata for a given revision number."""
1938 1942 index_entry = self.index[rev]
1939 1943 sidedata_offset = index_entry[8]
1940 1944 sidedata_size = index_entry[9]
1941 1945
1942 1946 if self._inline:
1943 1947 sidedata_offset += self.index.entry_size * (1 + rev)
1944 1948 if sidedata_size == 0:
1945 1949 return {}
1946 1950
1947 1951 segment = self._getsegment(sidedata_offset, sidedata_size)
1948 1952 sidedata = sidedatautil.deserialize_sidedata(segment)
1949 1953 return sidedata
1950 1954
1951 1955 def rawdata(self, nodeorrev, _df=None):
1952 1956 """return an uncompressed raw data of a given node or revision number.
1953 1957
1954 1958 _df - an existing file handle to read from. (internal-only)
1955 1959 """
1956 1960 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1957 1961
1958 1962 def hash(self, text, p1, p2):
1959 1963 """Compute a node hash.
1960 1964
1961 1965 Available as a function so that subclasses can replace the hash
1962 1966 as needed.
1963 1967 """
1964 1968 return storageutil.hashrevisionsha1(text, p1, p2)
1965 1969
1966 1970 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1967 1971 """Check node hash integrity.
1968 1972
1969 1973 Available as a function so that subclasses can extend hash mismatch
1970 1974 behaviors as needed.
1971 1975 """
1972 1976 try:
1973 1977 if p1 is None and p2 is None:
1974 1978 p1, p2 = self.parents(node)
1975 1979 if node != self.hash(text, p1, p2):
1976 1980 # Clear the revision cache on hash failure. The revision cache
1977 1981 # only stores the raw revision and clearing the cache does have
1978 1982 # the side-effect that we won't have a cache hit when the raw
1979 1983 # revision data is accessed. But this case should be rare and
1980 1984 # it is extra work to teach the cache about the hash
1981 1985 # verification state.
1982 1986 if self._revisioncache and self._revisioncache[0] == node:
1983 1987 self._revisioncache = None
1984 1988
1985 1989 revornode = rev
1986 1990 if revornode is None:
1987 1991 revornode = templatefilters.short(hex(node))
1988 1992 raise error.RevlogError(
1989 1993 _(b"integrity check failed on %s:%s")
1990 1994 % (self.display_id, pycompat.bytestr(revornode))
1991 1995 )
1992 1996 except error.RevlogError:
1993 1997 if self._censorable and storageutil.iscensoredtext(text):
1994 1998 raise error.CensoredNodeError(self.display_id, node, text)
1995 1999 raise
1996 2000
1997 2001 def _enforceinlinesize(self, tr):
1998 2002 """Check if the revlog is too big for inline and convert if so.
1999 2003
2000 2004 This should be called after revisions are added to the revlog. If the
2001 2005 revlog has grown too large to be an inline revlog, it will convert it
2002 2006 to use multiple index and data files.
2003 2007 """
2004 2008 tiprev = len(self) - 1
2005 2009 total_size = self.start(tiprev) + self.length(tiprev)
2006 2010 if not self._inline or total_size < _maxinline:
2007 2011 return
2008 2012
2009 2013 troffset = tr.findoffset(self._indexfile)
2010 2014 if troffset is None:
2011 2015 raise error.RevlogError(
2012 2016 _(b"%s not found in the transaction") % self._indexfile
2013 2017 )
2014 2018 trindex = 0
2015 2019 tr.add(self._datafile, 0)
2016 2020
2017 2021 existing_handles = False
2018 2022 if self._writinghandles is not None:
2019 2023 existing_handles = True
2020 2024 fp = self._writinghandles[0]
2021 2025 fp.flush()
2022 2026 fp.close()
2023 2027 # We can't use the cached file handle after close(). So prevent
2024 2028 # its usage.
2025 2029 self._writinghandles = None
2026 2030
2027 2031 new_dfh = self._datafp(b'w+')
2028 2032 new_dfh.truncate(0) # drop any potentially existing data
2029 2033 try:
2030 2034 with self._indexfp() as read_ifh:
2031 2035 for r in self:
2032 2036 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2033 2037 if troffset <= self.start(r):
2034 2038 trindex = r
2035 2039 new_dfh.flush()
2036 2040
2037 2041 with self.__index_new_fp() as fp:
2038 2042 self._format_flags &= ~FLAG_INLINE_DATA
2039 2043 self._inline = False
2040 2044 for i in self:
2041 2045 e = self.index.entry_binary(i)
2042 2046 if i == 0 and self._docket is None:
2043 2047 header = self._format_flags | self._format_version
2044 2048 header = self.index.pack_header(header)
2045 2049 e = header + e
2046 2050 fp.write(e)
2047 2051 if self._docket is not None:
2048 2052 self._docket.index_end = fp.tell()
2049 2053 # the temp file replace the real index when we exit the context
2050 2054 # manager
2051 2055
2052 2056 tr.replace(self._indexfile, trindex * self.index.entry_size)
2053 2057 nodemaputil.setup_persistent_nodemap(tr, self)
2054 2058 self._chunkclear()
2055 2059
2056 2060 if existing_handles:
2057 2061 # switched from inline to conventional reopen the index
2058 2062 ifh = self.__index_write_fp()
2059 2063 self._writinghandles = (ifh, new_dfh)
2060 2064 new_dfh = None
2061 2065 finally:
2062 2066 if new_dfh is not None:
2063 2067 new_dfh.close()
2064 2068
2065 2069 def _nodeduplicatecallback(self, transaction, node):
2066 2070 """called when trying to add a node already stored."""
2067 2071
2068 2072 @contextlib.contextmanager
2069 2073 def _writing(self, transaction):
2074 if self._trypending:
2075 msg = b'try to write in a `trypending` revlog: %s'
2076 msg %= self.display_id
2077 raise error.ProgrammingError(msg)
2070 2078 if self._writinghandles is not None:
2071 2079 yield
2072 2080 else:
2073 2081 r = len(self)
2074 2082 dsize = 0
2075 2083 if r:
2076 2084 dsize = self.end(r - 1)
2077 2085 dfh = None
2078 2086 if not self._inline:
2079 2087 try:
2080 2088 dfh = self._datafp(b"r+")
2081 2089 dfh.seek(0, os.SEEK_END)
2082 2090 except IOError as inst:
2083 2091 if inst.errno != errno.ENOENT:
2084 2092 raise
2085 2093 dfh = self._datafp(b"w+")
2086 2094 transaction.add(self._datafile, dsize)
2087 2095 try:
2088 2096 isize = r * self.index.entry_size
2089 2097 ifh = self.__index_write_fp()
2090 2098 if self._inline:
2091 2099 transaction.add(self._indexfile, dsize + isize)
2092 2100 else:
2093 2101 transaction.add(self._indexfile, isize)
2094 2102 try:
2095 2103 self._writinghandles = (ifh, dfh)
2096 2104 try:
2097 2105 yield
2098 2106 if self._docket is not None:
2099 2107 self._write_docket(transaction)
2100 2108 finally:
2101 2109 self._writinghandles = None
2102 2110 finally:
2103 2111 ifh.close()
2104 2112 finally:
2105 2113 if dfh is not None:
2106 2114 dfh.close()
2107 2115
2108 2116 def _write_docket(self, transaction):
2109 2117 """write the current docket on disk
2110 2118
2111 2119 Exist as a method to help changelog to implement transaction logic
2112 2120
2113 2121 We could also imagine using the same transaction logic for all revlog
2114 2122 since docket are cheap."""
2115 2123 self._docket.write(transaction)
2116 2124
2117 2125 def addrevision(
2118 2126 self,
2119 2127 text,
2120 2128 transaction,
2121 2129 link,
2122 2130 p1,
2123 2131 p2,
2124 2132 cachedelta=None,
2125 2133 node=None,
2126 2134 flags=REVIDX_DEFAULT_FLAGS,
2127 2135 deltacomputer=None,
2128 2136 sidedata=None,
2129 2137 ):
2130 2138 """add a revision to the log
2131 2139
2132 2140 text - the revision data to add
2133 2141 transaction - the transaction object used for rollback
2134 2142 link - the linkrev data to add
2135 2143 p1, p2 - the parent nodeids of the revision
2136 2144 cachedelta - an optional precomputed delta
2137 2145 node - nodeid of revision; typically node is not specified, and it is
2138 2146 computed by default as hash(text, p1, p2), however subclasses might
2139 2147 use different hashing method (and override checkhash() in such case)
2140 2148 flags - the known flags to set on the revision
2141 2149 deltacomputer - an optional deltacomputer instance shared between
2142 2150 multiple calls
2143 2151 """
2144 2152 if link == nullrev:
2145 2153 raise error.RevlogError(
2146 2154 _(b"attempted to add linkrev -1 to %s") % self.display_id
2147 2155 )
2148 2156
2149 2157 if sidedata is None:
2150 2158 sidedata = {}
2151 2159 elif sidedata and not self.hassidedata:
2152 2160 raise error.ProgrammingError(
2153 2161 _(b"trying to add sidedata to a revlog who don't support them")
2154 2162 )
2155 2163
2156 2164 if flags:
2157 2165 node = node or self.hash(text, p1, p2)
2158 2166
2159 2167 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2160 2168
2161 2169 # If the flag processor modifies the revision data, ignore any provided
2162 2170 # cachedelta.
2163 2171 if rawtext != text:
2164 2172 cachedelta = None
2165 2173
2166 2174 if len(rawtext) > _maxentrysize:
2167 2175 raise error.RevlogError(
2168 2176 _(
2169 2177 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2170 2178 )
2171 2179 % (self.display_id, len(rawtext))
2172 2180 )
2173 2181
2174 2182 node = node or self.hash(rawtext, p1, p2)
2175 2183 rev = self.index.get_rev(node)
2176 2184 if rev is not None:
2177 2185 return rev
2178 2186
2179 2187 if validatehash:
2180 2188 self.checkhash(rawtext, node, p1=p1, p2=p2)
2181 2189
2182 2190 return self.addrawrevision(
2183 2191 rawtext,
2184 2192 transaction,
2185 2193 link,
2186 2194 p1,
2187 2195 p2,
2188 2196 node,
2189 2197 flags,
2190 2198 cachedelta=cachedelta,
2191 2199 deltacomputer=deltacomputer,
2192 2200 sidedata=sidedata,
2193 2201 )
2194 2202
2195 2203 def addrawrevision(
2196 2204 self,
2197 2205 rawtext,
2198 2206 transaction,
2199 2207 link,
2200 2208 p1,
2201 2209 p2,
2202 2210 node,
2203 2211 flags,
2204 2212 cachedelta=None,
2205 2213 deltacomputer=None,
2206 2214 sidedata=None,
2207 2215 ):
2208 2216 """add a raw revision with known flags, node and parents
2209 2217 useful when reusing a revision not stored in this revlog (ex: received
2210 2218 over wire, or read from an external bundle).
2211 2219 """
2212 2220 with self._writing(transaction):
2213 2221 return self._addrevision(
2214 2222 node,
2215 2223 rawtext,
2216 2224 transaction,
2217 2225 link,
2218 2226 p1,
2219 2227 p2,
2220 2228 flags,
2221 2229 cachedelta,
2222 2230 deltacomputer=deltacomputer,
2223 2231 sidedata=sidedata,
2224 2232 )
2225 2233
2226 2234 def compress(self, data):
2227 2235 """Generate a possibly-compressed representation of data."""
2228 2236 if not data:
2229 2237 return b'', data
2230 2238
2231 2239 compressed = self._compressor.compress(data)
2232 2240
2233 2241 if compressed:
2234 2242 # The revlog compressor added the header in the returned data.
2235 2243 return b'', compressed
2236 2244
2237 2245 if data[0:1] == b'\0':
2238 2246 return b'', data
2239 2247 return b'u', data
2240 2248
2241 2249 def decompress(self, data):
2242 2250 """Decompress a revlog chunk.
2243 2251
2244 2252 The chunk is expected to begin with a header identifying the
2245 2253 format type so it can be routed to an appropriate decompressor.
2246 2254 """
2247 2255 if not data:
2248 2256 return data
2249 2257
2250 2258 # Revlogs are read much more frequently than they are written and many
2251 2259 # chunks only take microseconds to decompress, so performance is
2252 2260 # important here.
2253 2261 #
2254 2262 # We can make a few assumptions about revlogs:
2255 2263 #
2256 2264 # 1) the majority of chunks will be compressed (as opposed to inline
2257 2265 # raw data).
2258 2266 # 2) decompressing *any* data will likely by at least 10x slower than
2259 2267 # returning raw inline data.
2260 2268 # 3) we want to prioritize common and officially supported compression
2261 2269 # engines
2262 2270 #
2263 2271 # It follows that we want to optimize for "decompress compressed data
2264 2272 # when encoded with common and officially supported compression engines"
2265 2273 # case over "raw data" and "data encoded by less common or non-official
2266 2274 # compression engines." That is why we have the inline lookup first
2267 2275 # followed by the compengines lookup.
2268 2276 #
2269 2277 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2270 2278 # compressed chunks. And this matters for changelog and manifest reads.
2271 2279 t = data[0:1]
2272 2280
2273 2281 if t == b'x':
2274 2282 try:
2275 2283 return _zlibdecompress(data)
2276 2284 except zlib.error as e:
2277 2285 raise error.RevlogError(
2278 2286 _(b'revlog decompress error: %s')
2279 2287 % stringutil.forcebytestr(e)
2280 2288 )
2281 2289 # '\0' is more common than 'u' so it goes first.
2282 2290 elif t == b'\0':
2283 2291 return data
2284 2292 elif t == b'u':
2285 2293 return util.buffer(data, 1)
2286 2294
2287 2295 try:
2288 2296 compressor = self._decompressors[t]
2289 2297 except KeyError:
2290 2298 try:
2291 2299 engine = util.compengines.forrevlogheader(t)
2292 2300 compressor = engine.revlogcompressor(self._compengineopts)
2293 2301 self._decompressors[t] = compressor
2294 2302 except KeyError:
2295 2303 raise error.RevlogError(
2296 2304 _(b'unknown compression type %s') % binascii.hexlify(t)
2297 2305 )
2298 2306
2299 2307 return compressor.decompress(data)
2300 2308
2301 2309 def _addrevision(
2302 2310 self,
2303 2311 node,
2304 2312 rawtext,
2305 2313 transaction,
2306 2314 link,
2307 2315 p1,
2308 2316 p2,
2309 2317 flags,
2310 2318 cachedelta,
2311 2319 alwayscache=False,
2312 2320 deltacomputer=None,
2313 2321 sidedata=None,
2314 2322 ):
2315 2323 """internal function to add revisions to the log
2316 2324
2317 2325 see addrevision for argument descriptions.
2318 2326
2319 2327 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2320 2328
2321 2329 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2322 2330 be used.
2323 2331
2324 2332 invariants:
2325 2333 - rawtext is optional (can be None); if not set, cachedelta must be set.
2326 2334 if both are set, they must correspond to each other.
2327 2335 """
2328 2336 if node == self.nullid:
2329 2337 raise error.RevlogError(
2330 2338 _(b"%s: attempt to add null revision") % self.display_id
2331 2339 )
2332 2340 if (
2333 2341 node == self.nodeconstants.wdirid
2334 2342 or node in self.nodeconstants.wdirfilenodeids
2335 2343 ):
2336 2344 raise error.RevlogError(
2337 2345 _(b"%s: attempt to add wdir revision") % self.display_id
2338 2346 )
2339 2347 if self._writinghandles is None:
2340 2348 msg = b'adding revision outside `revlog._writing` context'
2341 2349 raise error.ProgrammingError(msg)
2342 2350
2343 2351 if self._inline:
2344 2352 fh = self._writinghandles[0]
2345 2353 else:
2346 2354 fh = self._writinghandles[1]
2347 2355
2348 2356 btext = [rawtext]
2349 2357
2350 2358 curr = len(self)
2351 2359 prev = curr - 1
2352 2360
2353 2361 offset = self._get_data_offset(prev)
2354 2362
2355 2363 if self._concurrencychecker:
2356 2364 ifh, dfh = self._writinghandles
2357 2365 if self._inline:
2358 2366 # offset is "as if" it were in the .d file, so we need to add on
2359 2367 # the size of the entry metadata.
2360 2368 self._concurrencychecker(
2361 2369 ifh, self._indexfile, offset + curr * self.index.entry_size
2362 2370 )
2363 2371 else:
2364 2372 # Entries in the .i are a consistent size.
2365 2373 self._concurrencychecker(
2366 2374 ifh, self._indexfile, curr * self.index.entry_size
2367 2375 )
2368 2376 self._concurrencychecker(dfh, self._datafile, offset)
2369 2377
2370 2378 p1r, p2r = self.rev(p1), self.rev(p2)
2371 2379
2372 2380 # full versions are inserted when the needed deltas
2373 2381 # become comparable to the uncompressed text
2374 2382 if rawtext is None:
2375 2383 # need rawtext size, before changed by flag processors, which is
2376 2384 # the non-raw size. use revlog explicitly to avoid filelog's extra
2377 2385 # logic that might remove metadata size.
2378 2386 textlen = mdiff.patchedsize(
2379 2387 revlog.size(self, cachedelta[0]), cachedelta[1]
2380 2388 )
2381 2389 else:
2382 2390 textlen = len(rawtext)
2383 2391
2384 2392 if deltacomputer is None:
2385 2393 deltacomputer = deltautil.deltacomputer(self)
2386 2394
2387 2395 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2388 2396
2389 2397 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2390 2398
2391 2399 if sidedata and self.hassidedata:
2392 2400 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2393 2401 sidedata_offset = offset + deltainfo.deltalen
2394 2402 else:
2395 2403 serialized_sidedata = b""
2396 2404 # Don't store the offset if the sidedata is empty, that way
2397 2405 # we can easily detect empty sidedata and they will be no different
2398 2406 # than ones we manually add.
2399 2407 sidedata_offset = 0
2400 2408
2401 2409 e = (
2402 2410 offset_type(offset, flags),
2403 2411 deltainfo.deltalen,
2404 2412 textlen,
2405 2413 deltainfo.base,
2406 2414 link,
2407 2415 p1r,
2408 2416 p2r,
2409 2417 node,
2410 2418 sidedata_offset,
2411 2419 len(serialized_sidedata),
2412 2420 )
2413 2421
2414 2422 self.index.append(e)
2415 2423 entry = self.index.entry_binary(curr)
2416 2424 if curr == 0 and self._docket is None:
2417 2425 header = self._format_flags | self._format_version
2418 2426 header = self.index.pack_header(header)
2419 2427 entry = header + entry
2420 2428 self._writeentry(
2421 2429 transaction,
2422 2430 entry,
2423 2431 deltainfo.data,
2424 2432 link,
2425 2433 offset,
2426 2434 serialized_sidedata,
2427 2435 )
2428 2436
2429 2437 rawtext = btext[0]
2430 2438
2431 2439 if alwayscache and rawtext is None:
2432 2440 rawtext = deltacomputer.buildtext(revinfo, fh)
2433 2441
2434 2442 if type(rawtext) == bytes: # only accept immutable objects
2435 2443 self._revisioncache = (node, curr, rawtext)
2436 2444 self._chainbasecache[curr] = deltainfo.chainbase
2437 2445 return curr
2438 2446
2439 2447 def _get_data_offset(self, prev):
2440 2448 """Returns the current offset in the (in-transaction) data file.
2441 2449 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2442 2450 file to store that information: since sidedata can be rewritten to the
2443 2451 end of the data file within a transaction, you can have cases where, for
2444 2452 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2445 2453 to `n - 1`'s sidedata being written after `n`'s data.
2446 2454
2447 2455 TODO cache this in a docket file before getting out of experimental."""
2448 2456 if self._format_version != REVLOGV2:
2449 2457 return self.end(prev)
2450 2458
2451 2459 offset = 0
2452 2460 for rev, entry in enumerate(self.index):
2453 2461 sidedata_end = entry[8] + entry[9]
2454 2462 # Sidedata for a previous rev has potentially been written after
2455 2463 # this rev's end, so take the max.
2456 2464 offset = max(self.end(rev), offset, sidedata_end)
2457 2465 return offset
2458 2466
2459 2467 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2460 2468 # Files opened in a+ mode have inconsistent behavior on various
2461 2469 # platforms. Windows requires that a file positioning call be made
2462 2470 # when the file handle transitions between reads and writes. See
2463 2471 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2464 2472 # platforms, Python or the platform itself can be buggy. Some versions
2465 2473 # of Solaris have been observed to not append at the end of the file
2466 2474 # if the file was seeked to before the end. See issue4943 for more.
2467 2475 #
2468 2476 # We work around this issue by inserting a seek() before writing.
2469 2477 # Note: This is likely not necessary on Python 3. However, because
2470 2478 # the file handle is reused for reads and may be seeked there, we need
2471 2479 # to be careful before changing this.
2472 2480 if self._writinghandles is None:
2473 2481 msg = b'adding revision outside `revlog._writing` context'
2474 2482 raise error.ProgrammingError(msg)
2475 2483 ifh, dfh = self._writinghandles
2476 2484 if self._docket is None:
2477 2485 ifh.seek(0, os.SEEK_END)
2478 2486 else:
2479 2487 ifh.seek(self._docket.index_end, os.SEEK_SET)
2480 2488 if dfh:
2481 2489 dfh.seek(0, os.SEEK_END)
2482 2490
2483 2491 curr = len(self) - 1
2484 2492 if not self._inline:
2485 2493 transaction.add(self._datafile, offset)
2486 2494 transaction.add(self._indexfile, curr * len(entry))
2487 2495 if data[0]:
2488 2496 dfh.write(data[0])
2489 2497 dfh.write(data[1])
2490 2498 if sidedata:
2491 2499 dfh.write(sidedata)
2492 2500 ifh.write(entry)
2493 2501 else:
2494 2502 offset += curr * self.index.entry_size
2495 2503 transaction.add(self._indexfile, offset)
2496 2504 ifh.write(entry)
2497 2505 ifh.write(data[0])
2498 2506 ifh.write(data[1])
2499 2507 if sidedata:
2500 2508 ifh.write(sidedata)
2501 2509 self._enforceinlinesize(transaction)
2502 2510 if self._docket is not None:
2503 2511 self._docket.index_end = self._writinghandles[0].tell()
2504 2512
2505 2513 nodemaputil.setup_persistent_nodemap(transaction, self)
2506 2514
2507 2515 def addgroup(
2508 2516 self,
2509 2517 deltas,
2510 2518 linkmapper,
2511 2519 transaction,
2512 2520 alwayscache=False,
2513 2521 addrevisioncb=None,
2514 2522 duplicaterevisioncb=None,
2515 2523 ):
2516 2524 """
2517 2525 add a delta group
2518 2526
2519 2527 given a set of deltas, add them to the revision log. the
2520 2528 first delta is against its parent, which should be in our
2521 2529 log, the rest are against the previous delta.
2522 2530
2523 2531 If ``addrevisioncb`` is defined, it will be called with arguments of
2524 2532 this revlog and the node that was added.
2525 2533 """
2526 2534
2527 2535 if self._adding_group:
2528 2536 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2529 2537
2530 2538 self._adding_group = True
2531 2539 empty = True
2532 2540 try:
2533 2541 with self._writing(transaction):
2534 2542 deltacomputer = deltautil.deltacomputer(self)
2535 2543 # loop through our set of deltas
2536 2544 for data in deltas:
2537 2545 (
2538 2546 node,
2539 2547 p1,
2540 2548 p2,
2541 2549 linknode,
2542 2550 deltabase,
2543 2551 delta,
2544 2552 flags,
2545 2553 sidedata,
2546 2554 ) = data
2547 2555 link = linkmapper(linknode)
2548 2556 flags = flags or REVIDX_DEFAULT_FLAGS
2549 2557
2550 2558 rev = self.index.get_rev(node)
2551 2559 if rev is not None:
2552 2560 # this can happen if two branches make the same change
2553 2561 self._nodeduplicatecallback(transaction, rev)
2554 2562 if duplicaterevisioncb:
2555 2563 duplicaterevisioncb(self, rev)
2556 2564 empty = False
2557 2565 continue
2558 2566
2559 2567 for p in (p1, p2):
2560 2568 if not self.index.has_node(p):
2561 2569 raise error.LookupError(
2562 2570 p, self.radix, _(b'unknown parent')
2563 2571 )
2564 2572
2565 2573 if not self.index.has_node(deltabase):
2566 2574 raise error.LookupError(
2567 2575 deltabase, self.display_id, _(b'unknown delta base')
2568 2576 )
2569 2577
2570 2578 baserev = self.rev(deltabase)
2571 2579
2572 2580 if baserev != nullrev and self.iscensored(baserev):
2573 2581 # if base is censored, delta must be full replacement in a
2574 2582 # single patch operation
2575 2583 hlen = struct.calcsize(b">lll")
2576 2584 oldlen = self.rawsize(baserev)
2577 2585 newlen = len(delta) - hlen
2578 2586 if delta[:hlen] != mdiff.replacediffheader(
2579 2587 oldlen, newlen
2580 2588 ):
2581 2589 raise error.CensoredBaseError(
2582 2590 self.display_id, self.node(baserev)
2583 2591 )
2584 2592
2585 2593 if not flags and self._peek_iscensored(baserev, delta):
2586 2594 flags |= REVIDX_ISCENSORED
2587 2595
2588 2596 # We assume consumers of addrevisioncb will want to retrieve
2589 2597 # the added revision, which will require a call to
2590 2598 # revision(). revision() will fast path if there is a cache
2591 2599 # hit. So, we tell _addrevision() to always cache in this case.
2592 2600 # We're only using addgroup() in the context of changegroup
2593 2601 # generation so the revision data can always be handled as raw
2594 2602 # by the flagprocessor.
2595 2603 rev = self._addrevision(
2596 2604 node,
2597 2605 None,
2598 2606 transaction,
2599 2607 link,
2600 2608 p1,
2601 2609 p2,
2602 2610 flags,
2603 2611 (baserev, delta),
2604 2612 alwayscache=alwayscache,
2605 2613 deltacomputer=deltacomputer,
2606 2614 sidedata=sidedata,
2607 2615 )
2608 2616
2609 2617 if addrevisioncb:
2610 2618 addrevisioncb(self, rev)
2611 2619 empty = False
2612 2620 finally:
2613 2621 self._adding_group = False
2614 2622 return not empty
2615 2623
2616 2624 def iscensored(self, rev):
2617 2625 """Check if a file revision is censored."""
2618 2626 if not self._censorable:
2619 2627 return False
2620 2628
2621 2629 return self.flags(rev) & REVIDX_ISCENSORED
2622 2630
2623 2631 def _peek_iscensored(self, baserev, delta):
2624 2632 """Quickly check if a delta produces a censored revision."""
2625 2633 if not self._censorable:
2626 2634 return False
2627 2635
2628 2636 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2629 2637
2630 2638 def getstrippoint(self, minlink):
2631 2639 """find the minimum rev that must be stripped to strip the linkrev
2632 2640
2633 2641 Returns a tuple containing the minimum rev and a set of all revs that
2634 2642 have linkrevs that will be broken by this strip.
2635 2643 """
2636 2644 return storageutil.resolvestripinfo(
2637 2645 minlink,
2638 2646 len(self) - 1,
2639 2647 self.headrevs(),
2640 2648 self.linkrev,
2641 2649 self.parentrevs,
2642 2650 )
2643 2651
2644 2652 def strip(self, minlink, transaction):
2645 2653 """truncate the revlog on the first revision with a linkrev >= minlink
2646 2654
2647 2655 This function is called when we're stripping revision minlink and
2648 2656 its descendants from the repository.
2649 2657
2650 2658 We have to remove all revisions with linkrev >= minlink, because
2651 2659 the equivalent changelog revisions will be renumbered after the
2652 2660 strip.
2653 2661
2654 2662 So we truncate the revlog on the first of these revisions, and
2655 2663 trust that the caller has saved the revisions that shouldn't be
2656 2664 removed and that it'll re-add them after this truncation.
2657 2665 """
2658 2666 if len(self) == 0:
2659 2667 return
2660 2668
2661 2669 rev, _ = self.getstrippoint(minlink)
2662 2670 if rev == len(self):
2663 2671 return
2664 2672
2665 2673 # first truncate the files on disk
2666 2674 end = self.start(rev)
2667 2675 if not self._inline:
2668 2676 transaction.add(self._datafile, end)
2669 2677 end = rev * self.index.entry_size
2670 2678 else:
2671 2679 end += rev * self.index.entry_size
2672 2680
2673 2681 transaction.add(self._indexfile, end)
2674 2682 if self._docket is not None:
2675 2683 # XXX we could, leverage the docket while stripping. However it is
2676 2684 # not powerfull enough at the time of this comment
2677 2685 self._docket.index_end = end
2678 2686 self._docket.write(transaction, stripping=True)
2679 2687
2680 2688 # then reset internal state in memory to forget those revisions
2681 2689 self._revisioncache = None
2682 2690 self._chaininfocache = util.lrucachedict(500)
2683 2691 self._chunkclear()
2684 2692
2685 2693 del self.index[rev:-1]
2686 2694
2687 2695 def checksize(self):
2688 2696 """Check size of index and data files
2689 2697
2690 2698 return a (dd, di) tuple.
2691 2699 - dd: extra bytes for the "data" file
2692 2700 - di: extra bytes for the "index" file
2693 2701
2694 2702 A healthy revlog will return (0, 0).
2695 2703 """
2696 2704 expected = 0
2697 2705 if len(self):
2698 2706 expected = max(0, self.end(len(self) - 1))
2699 2707
2700 2708 try:
2701 2709 with self._datafp() as f:
2702 2710 f.seek(0, io.SEEK_END)
2703 2711 actual = f.tell()
2704 2712 dd = actual - expected
2705 2713 except IOError as inst:
2706 2714 if inst.errno != errno.ENOENT:
2707 2715 raise
2708 2716 dd = 0
2709 2717
2710 2718 try:
2711 2719 f = self.opener(self._indexfile)
2712 2720 f.seek(0, io.SEEK_END)
2713 2721 actual = f.tell()
2714 2722 f.close()
2715 2723 s = self.index.entry_size
2716 2724 i = max(0, actual // s)
2717 2725 di = actual - (i * s)
2718 2726 if self._inline:
2719 2727 databytes = 0
2720 2728 for r in self:
2721 2729 databytes += max(0, self.length(r))
2722 2730 dd = 0
2723 2731 di = actual - len(self) * s - databytes
2724 2732 except IOError as inst:
2725 2733 if inst.errno != errno.ENOENT:
2726 2734 raise
2727 2735 di = 0
2728 2736
2729 2737 return (dd, di)
2730 2738
2731 2739 def files(self):
2732 2740 res = [self._indexfile]
2733 2741 if not self._inline:
2734 2742 res.append(self._datafile)
2735 2743 return res
2736 2744
2737 2745 def emitrevisions(
2738 2746 self,
2739 2747 nodes,
2740 2748 nodesorder=None,
2741 2749 revisiondata=False,
2742 2750 assumehaveparentrevisions=False,
2743 2751 deltamode=repository.CG_DELTAMODE_STD,
2744 2752 sidedata_helpers=None,
2745 2753 ):
2746 2754 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2747 2755 raise error.ProgrammingError(
2748 2756 b'unhandled value for nodesorder: %s' % nodesorder
2749 2757 )
2750 2758
2751 2759 if nodesorder is None and not self._generaldelta:
2752 2760 nodesorder = b'storage'
2753 2761
2754 2762 if (
2755 2763 not self._storedeltachains
2756 2764 and deltamode != repository.CG_DELTAMODE_PREV
2757 2765 ):
2758 2766 deltamode = repository.CG_DELTAMODE_FULL
2759 2767
2760 2768 return storageutil.emitrevisions(
2761 2769 self,
2762 2770 nodes,
2763 2771 nodesorder,
2764 2772 revlogrevisiondelta,
2765 2773 deltaparentfn=self.deltaparent,
2766 2774 candeltafn=self.candelta,
2767 2775 rawsizefn=self.rawsize,
2768 2776 revdifffn=self.revdiff,
2769 2777 flagsfn=self.flags,
2770 2778 deltamode=deltamode,
2771 2779 revisiondata=revisiondata,
2772 2780 assumehaveparentrevisions=assumehaveparentrevisions,
2773 2781 sidedata_helpers=sidedata_helpers,
2774 2782 )
2775 2783
2776 2784 DELTAREUSEALWAYS = b'always'
2777 2785 DELTAREUSESAMEREVS = b'samerevs'
2778 2786 DELTAREUSENEVER = b'never'
2779 2787
2780 2788 DELTAREUSEFULLADD = b'fulladd'
2781 2789
2782 2790 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2783 2791
2784 2792 def clone(
2785 2793 self,
2786 2794 tr,
2787 2795 destrevlog,
2788 2796 addrevisioncb=None,
2789 2797 deltareuse=DELTAREUSESAMEREVS,
2790 2798 forcedeltabothparents=None,
2791 2799 sidedata_helpers=None,
2792 2800 ):
2793 2801 """Copy this revlog to another, possibly with format changes.
2794 2802
2795 2803 The destination revlog will contain the same revisions and nodes.
2796 2804 However, it may not be bit-for-bit identical due to e.g. delta encoding
2797 2805 differences.
2798 2806
2799 2807 The ``deltareuse`` argument control how deltas from the existing revlog
2800 2808 are preserved in the destination revlog. The argument can have the
2801 2809 following values:
2802 2810
2803 2811 DELTAREUSEALWAYS
2804 2812 Deltas will always be reused (if possible), even if the destination
2805 2813 revlog would not select the same revisions for the delta. This is the
2806 2814 fastest mode of operation.
2807 2815 DELTAREUSESAMEREVS
2808 2816 Deltas will be reused if the destination revlog would pick the same
2809 2817 revisions for the delta. This mode strikes a balance between speed
2810 2818 and optimization.
2811 2819 DELTAREUSENEVER
2812 2820 Deltas will never be reused. This is the slowest mode of execution.
2813 2821 This mode can be used to recompute deltas (e.g. if the diff/delta
2814 2822 algorithm changes).
2815 2823 DELTAREUSEFULLADD
2816 2824 Revision will be re-added as if their were new content. This is
2817 2825 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2818 2826 eg: large file detection and handling.
2819 2827
2820 2828 Delta computation can be slow, so the choice of delta reuse policy can
2821 2829 significantly affect run time.
2822 2830
2823 2831 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2824 2832 two extremes. Deltas will be reused if they are appropriate. But if the
2825 2833 delta could choose a better revision, it will do so. This means if you
2826 2834 are converting a non-generaldelta revlog to a generaldelta revlog,
2827 2835 deltas will be recomputed if the delta's parent isn't a parent of the
2828 2836 revision.
2829 2837
2830 2838 In addition to the delta policy, the ``forcedeltabothparents``
2831 2839 argument controls whether to force compute deltas against both parents
2832 2840 for merges. By default, the current default is used.
2833 2841
2834 2842 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2835 2843 `sidedata_helpers`.
2836 2844 """
2837 2845 if deltareuse not in self.DELTAREUSEALL:
2838 2846 raise ValueError(
2839 2847 _(b'value for deltareuse invalid: %s') % deltareuse
2840 2848 )
2841 2849
2842 2850 if len(destrevlog):
2843 2851 raise ValueError(_(b'destination revlog is not empty'))
2844 2852
2845 2853 if getattr(self, 'filteredrevs', None):
2846 2854 raise ValueError(_(b'source revlog has filtered revisions'))
2847 2855 if getattr(destrevlog, 'filteredrevs', None):
2848 2856 raise ValueError(_(b'destination revlog has filtered revisions'))
2849 2857
2850 2858 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2851 2859 # if possible.
2852 2860 oldlazydelta = destrevlog._lazydelta
2853 2861 oldlazydeltabase = destrevlog._lazydeltabase
2854 2862 oldamd = destrevlog._deltabothparents
2855 2863
2856 2864 try:
2857 2865 if deltareuse == self.DELTAREUSEALWAYS:
2858 2866 destrevlog._lazydeltabase = True
2859 2867 destrevlog._lazydelta = True
2860 2868 elif deltareuse == self.DELTAREUSESAMEREVS:
2861 2869 destrevlog._lazydeltabase = False
2862 2870 destrevlog._lazydelta = True
2863 2871 elif deltareuse == self.DELTAREUSENEVER:
2864 2872 destrevlog._lazydeltabase = False
2865 2873 destrevlog._lazydelta = False
2866 2874
2867 2875 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2868 2876
2869 2877 self._clone(
2870 2878 tr,
2871 2879 destrevlog,
2872 2880 addrevisioncb,
2873 2881 deltareuse,
2874 2882 forcedeltabothparents,
2875 2883 sidedata_helpers,
2876 2884 )
2877 2885
2878 2886 finally:
2879 2887 destrevlog._lazydelta = oldlazydelta
2880 2888 destrevlog._lazydeltabase = oldlazydeltabase
2881 2889 destrevlog._deltabothparents = oldamd
2882 2890
2883 2891 def _clone(
2884 2892 self,
2885 2893 tr,
2886 2894 destrevlog,
2887 2895 addrevisioncb,
2888 2896 deltareuse,
2889 2897 forcedeltabothparents,
2890 2898 sidedata_helpers,
2891 2899 ):
2892 2900 """perform the core duty of `revlog.clone` after parameter processing"""
2893 2901 deltacomputer = deltautil.deltacomputer(destrevlog)
2894 2902 index = self.index
2895 2903 for rev in self:
2896 2904 entry = index[rev]
2897 2905
2898 2906 # Some classes override linkrev to take filtered revs into
2899 2907 # account. Use raw entry from index.
2900 2908 flags = entry[0] & 0xFFFF
2901 2909 linkrev = entry[4]
2902 2910 p1 = index[entry[5]][7]
2903 2911 p2 = index[entry[6]][7]
2904 2912 node = entry[7]
2905 2913
2906 2914 # (Possibly) reuse the delta from the revlog if allowed and
2907 2915 # the revlog chunk is a delta.
2908 2916 cachedelta = None
2909 2917 rawtext = None
2910 2918 if deltareuse == self.DELTAREUSEFULLADD:
2911 2919 text, sidedata = self._revisiondata(rev)
2912 2920
2913 2921 if sidedata_helpers is not None:
2914 2922 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2915 2923 self, sidedata_helpers, sidedata, rev
2916 2924 )
2917 2925 flags = flags | new_flags[0] & ~new_flags[1]
2918 2926
2919 2927 destrevlog.addrevision(
2920 2928 text,
2921 2929 tr,
2922 2930 linkrev,
2923 2931 p1,
2924 2932 p2,
2925 2933 cachedelta=cachedelta,
2926 2934 node=node,
2927 2935 flags=flags,
2928 2936 deltacomputer=deltacomputer,
2929 2937 sidedata=sidedata,
2930 2938 )
2931 2939 else:
2932 2940 if destrevlog._lazydelta:
2933 2941 dp = self.deltaparent(rev)
2934 2942 if dp != nullrev:
2935 2943 cachedelta = (dp, bytes(self._chunk(rev)))
2936 2944
2937 2945 sidedata = None
2938 2946 if not cachedelta:
2939 2947 rawtext, sidedata = self._revisiondata(rev)
2940 2948 if sidedata is None:
2941 2949 sidedata = self.sidedata(rev)
2942 2950
2943 2951 if sidedata_helpers is not None:
2944 2952 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2945 2953 self, sidedata_helpers, sidedata, rev
2946 2954 )
2947 2955 flags = flags | new_flags[0] & ~new_flags[1]
2948 2956
2949 2957 with destrevlog._writing(tr):
2950 2958 destrevlog._addrevision(
2951 2959 node,
2952 2960 rawtext,
2953 2961 tr,
2954 2962 linkrev,
2955 2963 p1,
2956 2964 p2,
2957 2965 flags,
2958 2966 cachedelta,
2959 2967 deltacomputer=deltacomputer,
2960 2968 sidedata=sidedata,
2961 2969 )
2962 2970
2963 2971 if addrevisioncb:
2964 2972 addrevisioncb(self, rev, node)
2965 2973
2966 2974 def censorrevision(self, tr, censornode, tombstone=b''):
2967 2975 if self._format_version == REVLOGV0:
2968 2976 raise error.RevlogError(
2969 2977 _(b'cannot censor with version %d revlogs')
2970 2978 % self._format_version
2971 2979 )
2972 2980
2973 2981 censorrev = self.rev(censornode)
2974 2982 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2975 2983
2976 2984 if len(tombstone) > self.rawsize(censorrev):
2977 2985 raise error.Abort(
2978 2986 _(b'censor tombstone must be no longer than censored data')
2979 2987 )
2980 2988
2981 2989 # Rewriting the revlog in place is hard. Our strategy for censoring is
2982 2990 # to create a new revlog, copy all revisions to it, then replace the
2983 2991 # revlogs on transaction close.
2984 2992 #
2985 2993 # This is a bit dangerous. We could easily have a mismatch of state.
2986 2994 newrl = revlog(
2987 2995 self.opener,
2988 2996 target=self.target,
2989 2997 radix=self.radix,
2990 2998 postfix=b'tmpcensored',
2991 2999 censorable=True,
2992 3000 )
2993 3001 newrl._format_version = self._format_version
2994 3002 newrl._format_flags = self._format_flags
2995 3003 newrl._generaldelta = self._generaldelta
2996 3004 newrl._parse_index = self._parse_index
2997 3005
2998 3006 for rev in self.revs():
2999 3007 node = self.node(rev)
3000 3008 p1, p2 = self.parents(node)
3001 3009
3002 3010 if rev == censorrev:
3003 3011 newrl.addrawrevision(
3004 3012 tombstone,
3005 3013 tr,
3006 3014 self.linkrev(censorrev),
3007 3015 p1,
3008 3016 p2,
3009 3017 censornode,
3010 3018 REVIDX_ISCENSORED,
3011 3019 )
3012 3020
3013 3021 if newrl.deltaparent(rev) != nullrev:
3014 3022 raise error.Abort(
3015 3023 _(
3016 3024 b'censored revision stored as delta; '
3017 3025 b'cannot censor'
3018 3026 ),
3019 3027 hint=_(
3020 3028 b'censoring of revlogs is not '
3021 3029 b'fully implemented; please report '
3022 3030 b'this bug'
3023 3031 ),
3024 3032 )
3025 3033 continue
3026 3034
3027 3035 if self.iscensored(rev):
3028 3036 if self.deltaparent(rev) != nullrev:
3029 3037 raise error.Abort(
3030 3038 _(
3031 3039 b'cannot censor due to censored '
3032 3040 b'revision having delta stored'
3033 3041 )
3034 3042 )
3035 3043 rawtext = self._chunk(rev)
3036 3044 else:
3037 3045 rawtext = self.rawdata(rev)
3038 3046
3039 3047 newrl.addrawrevision(
3040 3048 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3041 3049 )
3042 3050
3043 3051 tr.addbackup(self._indexfile, location=b'store')
3044 3052 if not self._inline:
3045 3053 tr.addbackup(self._datafile, location=b'store')
3046 3054
3047 3055 self.opener.rename(newrl._indexfile, self._indexfile)
3048 3056 if not self._inline:
3049 3057 self.opener.rename(newrl._datafile, self._datafile)
3050 3058
3051 3059 self.clearcaches()
3052 3060 self._loadindex()
3053 3061
3054 3062 def verifyintegrity(self, state):
3055 3063 """Verifies the integrity of the revlog.
3056 3064
3057 3065 Yields ``revlogproblem`` instances describing problems that are
3058 3066 found.
3059 3067 """
3060 3068 dd, di = self.checksize()
3061 3069 if dd:
3062 3070 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3063 3071 if di:
3064 3072 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3065 3073
3066 3074 version = self._format_version
3067 3075
3068 3076 # The verifier tells us what version revlog we should be.
3069 3077 if version != state[b'expectedversion']:
3070 3078 yield revlogproblem(
3071 3079 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3072 3080 % (self.display_id, version, state[b'expectedversion'])
3073 3081 )
3074 3082
3075 3083 state[b'skipread'] = set()
3076 3084 state[b'safe_renamed'] = set()
3077 3085
3078 3086 for rev in self:
3079 3087 node = self.node(rev)
3080 3088
3081 3089 # Verify contents. 4 cases to care about:
3082 3090 #
3083 3091 # common: the most common case
3084 3092 # rename: with a rename
3085 3093 # meta: file content starts with b'\1\n', the metadata
3086 3094 # header defined in filelog.py, but without a rename
3087 3095 # ext: content stored externally
3088 3096 #
3089 3097 # More formally, their differences are shown below:
3090 3098 #
3091 3099 # | common | rename | meta | ext
3092 3100 # -------------------------------------------------------
3093 3101 # flags() | 0 | 0 | 0 | not 0
3094 3102 # renamed() | False | True | False | ?
3095 3103 # rawtext[0:2]=='\1\n'| False | True | True | ?
3096 3104 #
3097 3105 # "rawtext" means the raw text stored in revlog data, which
3098 3106 # could be retrieved by "rawdata(rev)". "text"
3099 3107 # mentioned below is "revision(rev)".
3100 3108 #
3101 3109 # There are 3 different lengths stored physically:
3102 3110 # 1. L1: rawsize, stored in revlog index
3103 3111 # 2. L2: len(rawtext), stored in revlog data
3104 3112 # 3. L3: len(text), stored in revlog data if flags==0, or
3105 3113 # possibly somewhere else if flags!=0
3106 3114 #
3107 3115 # L1 should be equal to L2. L3 could be different from them.
3108 3116 # "text" may or may not affect commit hash depending on flag
3109 3117 # processors (see flagutil.addflagprocessor).
3110 3118 #
3111 3119 # | common | rename | meta | ext
3112 3120 # -------------------------------------------------
3113 3121 # rawsize() | L1 | L1 | L1 | L1
3114 3122 # size() | L1 | L2-LM | L1(*) | L1 (?)
3115 3123 # len(rawtext) | L2 | L2 | L2 | L2
3116 3124 # len(text) | L2 | L2 | L2 | L3
3117 3125 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3118 3126 #
3119 3127 # LM: length of metadata, depending on rawtext
3120 3128 # (*): not ideal, see comment in filelog.size
3121 3129 # (?): could be "- len(meta)" if the resolved content has
3122 3130 # rename metadata
3123 3131 #
3124 3132 # Checks needed to be done:
3125 3133 # 1. length check: L1 == L2, in all cases.
3126 3134 # 2. hash check: depending on flag processor, we may need to
3127 3135 # use either "text" (external), or "rawtext" (in revlog).
3128 3136
3129 3137 try:
3130 3138 skipflags = state.get(b'skipflags', 0)
3131 3139 if skipflags:
3132 3140 skipflags &= self.flags(rev)
3133 3141
3134 3142 _verify_revision(self, skipflags, state, node)
3135 3143
3136 3144 l1 = self.rawsize(rev)
3137 3145 l2 = len(self.rawdata(node))
3138 3146
3139 3147 if l1 != l2:
3140 3148 yield revlogproblem(
3141 3149 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3142 3150 node=node,
3143 3151 )
3144 3152
3145 3153 except error.CensoredNodeError:
3146 3154 if state[b'erroroncensored']:
3147 3155 yield revlogproblem(
3148 3156 error=_(b'censored file data'), node=node
3149 3157 )
3150 3158 state[b'skipread'].add(node)
3151 3159 except Exception as e:
3152 3160 yield revlogproblem(
3153 3161 error=_(b'unpacking %s: %s')
3154 3162 % (short(node), stringutil.forcebytestr(e)),
3155 3163 node=node,
3156 3164 )
3157 3165 state[b'skipread'].add(node)
3158 3166
3159 3167 def storageinfo(
3160 3168 self,
3161 3169 exclusivefiles=False,
3162 3170 sharedfiles=False,
3163 3171 revisionscount=False,
3164 3172 trackedsize=False,
3165 3173 storedsize=False,
3166 3174 ):
3167 3175 d = {}
3168 3176
3169 3177 if exclusivefiles:
3170 3178 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3171 3179 if not self._inline:
3172 3180 d[b'exclusivefiles'].append((self.opener, self._datafile))
3173 3181
3174 3182 if sharedfiles:
3175 3183 d[b'sharedfiles'] = []
3176 3184
3177 3185 if revisionscount:
3178 3186 d[b'revisionscount'] = len(self)
3179 3187
3180 3188 if trackedsize:
3181 3189 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3182 3190
3183 3191 if storedsize:
3184 3192 d[b'storedsize'] = sum(
3185 3193 self.opener.stat(path).st_size for path in self.files()
3186 3194 )
3187 3195
3188 3196 return d
3189 3197
3190 3198 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3191 3199 if not self.hassidedata:
3192 3200 return
3193 3201 # revlog formats with sidedata support does not support inline
3194 3202 assert not self._inline
3195 3203 if not helpers[1] and not helpers[2]:
3196 3204 # Nothing to generate or remove
3197 3205 return
3198 3206
3199 3207 new_entries = []
3200 3208 # append the new sidedata
3201 3209 with self._writing(transaction):
3202 3210 ifh, dfh = self._writinghandles
3203 3211 dfh.seek(0, os.SEEK_END)
3204 3212 current_offset = dfh.tell()
3205 3213 for rev in range(startrev, endrev + 1):
3206 3214 entry = self.index[rev]
3207 3215 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3208 3216 store=self,
3209 3217 sidedata_helpers=helpers,
3210 3218 sidedata={},
3211 3219 rev=rev,
3212 3220 )
3213 3221
3214 3222 serialized_sidedata = sidedatautil.serialize_sidedata(
3215 3223 new_sidedata
3216 3224 )
3217 3225 if entry[8] != 0 or entry[9] != 0:
3218 3226 # rewriting entries that already have sidedata is not
3219 3227 # supported yet, because it introduces garbage data in the
3220 3228 # revlog.
3221 3229 msg = b"rewriting existing sidedata is not supported yet"
3222 3230 raise error.Abort(msg)
3223 3231
3224 3232 # Apply (potential) flags to add and to remove after running
3225 3233 # the sidedata helpers
3226 3234 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3227 3235 entry = (new_offset_flags,) + entry[1:8]
3228 3236 entry += (current_offset, len(serialized_sidedata))
3229 3237
3230 3238 # the sidedata computation might have move the file cursors around
3231 3239 dfh.seek(current_offset, os.SEEK_SET)
3232 3240 dfh.write(serialized_sidedata)
3233 3241 new_entries.append(entry)
3234 3242 current_offset += len(serialized_sidedata)
3235 3243
3236 3244 # rewrite the new index entries
3237 3245 ifh.seek(startrev * self.index.entry_size)
3238 3246 for i, e in enumerate(new_entries):
3239 3247 rev = startrev + i
3240 3248 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3241 3249 packed = self.index.entry_binary(rev)
3242 3250 if rev == 0 and self._docket is None:
3243 3251 header = self._format_flags | self._format_version
3244 3252 header = self.index.pack_header(header)
3245 3253 packed = header + packed
3246 3254 ifh.write(packed)
@@ -1,656 +1,656
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import os
13 13 import re
14 14 import struct
15 15
16 16 from ..node import hex
17 17
18 18 from .. import (
19 19 error,
20 20 util,
21 21 )
22 22
23 23
24 24 class NodeMap(dict):
25 25 def __missing__(self, x):
26 26 raise error.RevlogError(b'unknown node: %s' % x)
27 27
28 28
29 29 def persisted_data(revlog):
30 30 """read the nodemap for a revlog from disk"""
31 31 if revlog._nodemap_file is None:
32 32 return None
33 33 pdata = revlog.opener.tryread(revlog._nodemap_file)
34 34 if not pdata:
35 35 return None
36 36 offset = 0
37 37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 38 if version != ONDISK_VERSION:
39 39 return None
40 40 offset += S_VERSION.size
41 41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 43 offset += S_HEADER.size
44 44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 45 offset += uid_size
46 46 docket.tip_rev = tip_rev
47 47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 48 docket.data_length = data_length
49 49 docket.data_unused = data_unused
50 50
51 51 filename = _rawdata_filepath(revlog, docket)
52 52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 53 try:
54 54 with revlog.opener(filename) as fd:
55 55 if use_mmap:
56 56 try:
57 57 data = util.buffer(util.mmapread(fd, data_length))
58 58 except ValueError:
59 59 # raised when the read file is too small
60 60 data = b''
61 61 else:
62 62 data = fd.read(data_length)
63 63 except (IOError, OSError) as e:
64 64 if e.errno == errno.ENOENT:
65 65 return None
66 66 else:
67 67 raise
68 68 if len(data) < data_length:
69 69 return None
70 70 return docket, data
71 71
72 72
73 73 def setup_persistent_nodemap(tr, revlog):
74 74 """Install whatever is needed transaction side to persist a nodemap on disk
75 75
76 76 (only actually persist the nodemap if this is relevant for this revlog)
77 77 """
78 78 if revlog._inline:
79 79 return # inlined revlog are too small for this to be relevant
80 80 if revlog._nodemap_file is None:
81 81 return # we do not use persistent_nodemap on this revlog
82 82
83 83 # we need to happen after the changelog finalization, in that use "cl-"
84 84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
85 85 if tr.hasfinalize(callback_id):
86 86 return # no need to register again
87 87 tr.addpending(
88 88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
89 89 )
90 90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
91 91
92 92
93 93 class _NoTransaction(object):
94 94 """transaction like object to update the nodemap outside a transaction"""
95 95
96 96 def __init__(self):
97 97 self._postclose = {}
98 98
99 99 def addpostclose(self, callback_id, callback_func):
100 100 self._postclose[callback_id] = callback_func
101 101
102 102 def registertmp(self, *args, **kwargs):
103 103 pass
104 104
105 105 def addbackup(self, *args, **kwargs):
106 106 pass
107 107
108 108 def add(self, *args, **kwargs):
109 109 pass
110 110
111 111 def addabort(self, *args, **kwargs):
112 112 pass
113 113
114 114 def _report(self, *args):
115 115 pass
116 116
117 117
118 118 def update_persistent_nodemap(revlog):
119 119 """update the persistent nodemap right now
120 120
121 121 To be used for updating the nodemap on disk outside of a normal transaction
122 122 setup (eg, `debugupdatecache`).
123 123 """
124 124 if revlog._inline:
125 125 return # inlined revlog are too small for this to be relevant
126 126 if revlog._nodemap_file is None:
127 127 return # we do not use persistent_nodemap on this revlog
128 128
129 129 notr = _NoTransaction()
130 130 persist_nodemap(notr, revlog)
131 131 for k in sorted(notr._postclose):
132 132 notr._postclose[k](None)
133 133
134 134
135 135 def delete_nodemap(tr, repo, revlog):
136 136 """ Delete nodemap data on disk for a given revlog"""
137 137 if revlog._nodemap_file is None:
138 138 msg = "calling persist nodemap on a revlog without the feature enabled"
139 139 raise error.ProgrammingError(msg)
140 140 repo.svfs.unlink(revlog._nodemap_file)
141 141
142 142
143 143 def persist_nodemap(tr, revlog, pending=False, force=False):
144 144 """Write nodemap data on disk for a given revlog"""
145 145 if getattr(revlog, 'filteredrevs', ()):
146 146 raise error.ProgrammingError(
147 147 "cannot persist nodemap of a filtered changelog"
148 148 )
149 149 if revlog._nodemap_file is None:
150 150 if force:
151 151 revlog._nodemap_file = get_nodemap_file(revlog)
152 152 else:
153 153 msg = "calling persist nodemap on a revlog without the feature enabled"
154 154 raise error.ProgrammingError(msg)
155 155
156 156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
157 157 ondisk_docket = revlog._nodemap_docket
158 158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
159 159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
160 160
161 161 data = None
162 162 # first attemp an incremental update of the data
163 163 if can_incremental and ondisk_docket is not None:
164 164 target_docket = revlog._nodemap_docket.copy()
165 165 (
166 166 src_docket,
167 167 data_changed_count,
168 168 data,
169 169 ) = revlog.index.nodemap_data_incremental()
170 170 new_length = target_docket.data_length + len(data)
171 171 new_unused = target_docket.data_unused + data_changed_count
172 172 if src_docket != target_docket:
173 173 data = None
174 174 elif new_length <= (new_unused * 10): # under 10% of unused data
175 175 data = None
176 176 else:
177 177 datafile = _rawdata_filepath(revlog, target_docket)
178 178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
179 179 # store vfs
180 180 tr.add(datafile, target_docket.data_length)
181 181 with revlog.opener(datafile, b'r+') as fd:
182 182 fd.seek(target_docket.data_length)
183 183 fd.write(data)
184 184 if feed_data:
185 185 if use_mmap:
186 186 fd.seek(0)
187 187 new_data = fd.read(new_length)
188 188 else:
189 189 fd.flush()
190 190 new_data = util.buffer(util.mmapread(fd, new_length))
191 191 target_docket.data_length = new_length
192 192 target_docket.data_unused = new_unused
193 193
194 194 if data is None:
195 195 # otherwise fallback to a full new export
196 196 target_docket = NodeMapDocket()
197 197 datafile = _rawdata_filepath(revlog, target_docket)
198 198 if util.safehasattr(revlog.index, "nodemap_data_all"):
199 199 data = revlog.index.nodemap_data_all()
200 200 else:
201 201 data = persistent_data(revlog.index)
202 202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
203 203 # store vfs
204 204
205 205 tryunlink = revlog.opener.tryunlink
206 206
207 207 def abortck(tr):
208 208 tryunlink(datafile)
209 209
210 210 callback_id = b"delete-%s" % datafile
211 211
212 212 # some flavor of the transaction abort does not cleanup new file, it
213 213 # simply empty them.
214 214 tr.addabort(callback_id, abortck)
215 215 with revlog.opener(datafile, b'w+') as fd:
216 216 fd.write(data)
217 217 if feed_data:
218 218 if use_mmap:
219 219 new_data = data
220 220 else:
221 221 fd.flush()
222 222 new_data = util.buffer(util.mmapread(fd, len(data)))
223 223 target_docket.data_length = len(data)
224 224 target_docket.tip_rev = revlog.tiprev()
225 225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
226 226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
227 227 # store vfs
228 228 file_path = revlog._nodemap_file
229 229 if pending:
230 230 file_path += b'.a'
231 231 tr.registertmp(file_path)
232 232 else:
233 233 tr.addbackup(file_path)
234 234
235 235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
236 236 fp.write(target_docket.serialize())
237 237 revlog._nodemap_docket = target_docket
238 238 if feed_data:
239 239 revlog.index.update_nodemap_data(target_docket, new_data)
240 240
241 241 # search for old index file in all cases, some older process might have
242 242 # left one behind.
243 243 olds = _other_rawdata_filepath(revlog, target_docket)
244 244 if olds:
245 245 realvfs = getattr(revlog, '_realopener', revlog.opener)
246 246
247 247 def cleanup(tr):
248 248 for oldfile in olds:
249 249 realvfs.tryunlink(oldfile)
250 250
251 251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
252 252 tr.addpostclose(callback_id, cleanup)
253 253
254 254
255 255 ### Nodemap docket file
256 256 #
257 257 # The nodemap data are stored on disk using 2 files:
258 258 #
259 259 # * a raw data files containing a persistent nodemap
260 260 # (see `Nodemap Trie` section)
261 261 #
262 262 # * a small "docket" file containing medatadata
263 263 #
264 264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
265 265 # small, it is easy to update it automatically or to duplicated its content
266 266 # during a transaction.
267 267 #
268 268 # Multiple raw data can exist at the same time (The currently valid one and a
269 269 # new one beind used by an in progress transaction). To accomodate this, the
270 270 # filename hosting the raw data has a variable parts. The exact filename is
271 271 # specified inside the "docket" file.
272 272 #
273 273 # The docket file contains information to find, qualify and validate the raw
274 274 # data. Its content is currently very light, but it will expand as the on disk
275 275 # nodemap gains the necessary features to be used in production.
276 276
277 277 ONDISK_VERSION = 1
278 278 S_VERSION = struct.Struct(">B")
279 279 S_HEADER = struct.Struct(">BQQQQ")
280 280
281 281 ID_SIZE = 8
282 282
283 283
284 284 def _make_uid():
285 285 """return a new unique identifier.
286 286
287 287 The identifier is random and composed of ascii characters."""
288 288 return hex(os.urandom(ID_SIZE))
289 289
290 290
291 291 class NodeMapDocket(object):
292 292 """metadata associated with persistent nodemap data
293 293
294 294 The persistent data may come from disk or be on their way to disk.
295 295 """
296 296
297 297 def __init__(self, uid=None):
298 298 if uid is None:
299 299 uid = _make_uid()
300 300 # a unique identifier for the data file:
301 301 # - When new data are appended, it is preserved.
302 302 # - When a new data file is created, a new identifier is generated.
303 303 self.uid = uid
304 304 # the tipmost revision stored in the data file. This revision and all
305 305 # revision before it are expected to be encoded in the data file.
306 306 self.tip_rev = None
307 307 # the node of that tipmost revision, if it mismatch the current index
308 308 # data the docket is not valid for the current index and should be
309 309 # discarded.
310 310 #
311 311 # note: this method is not perfect as some destructive operation could
312 312 # preserve the same tip_rev + tip_node while altering lower revision.
313 313 # However this multiple other caches have the same vulnerability (eg:
314 314 # brancmap cache).
315 315 self.tip_node = None
316 316 # the size (in bytes) of the persisted data to encode the nodemap valid
317 317 # for `tip_rev`.
318 318 # - data file shorter than this are corrupted,
319 319 # - any extra data should be ignored.
320 320 self.data_length = None
321 321 # the amount (in bytes) of "dead" data, still in the data file but no
322 322 # longer used for the nodemap.
323 323 self.data_unused = 0
324 324
325 325 def copy(self):
326 326 new = NodeMapDocket(uid=self.uid)
327 327 new.tip_rev = self.tip_rev
328 328 new.tip_node = self.tip_node
329 329 new.data_length = self.data_length
330 330 new.data_unused = self.data_unused
331 331 return new
332 332
333 333 def __cmp__(self, other):
334 334 if self.uid < other.uid:
335 335 return -1
336 336 if self.uid > other.uid:
337 337 return 1
338 338 elif self.data_length < other.data_length:
339 339 return -1
340 340 elif self.data_length > other.data_length:
341 341 return 1
342 342 return 0
343 343
344 344 def __eq__(self, other):
345 345 return self.uid == other.uid and self.data_length == other.data_length
346 346
347 347 def serialize(self):
348 348 """return serialized bytes for a docket using the passed uid"""
349 349 data = []
350 350 data.append(S_VERSION.pack(ONDISK_VERSION))
351 351 headers = (
352 352 len(self.uid),
353 353 self.tip_rev,
354 354 self.data_length,
355 355 self.data_unused,
356 356 len(self.tip_node),
357 357 )
358 358 data.append(S_HEADER.pack(*headers))
359 359 data.append(self.uid)
360 360 data.append(self.tip_node)
361 361 return b''.join(data)
362 362
363 363
364 364 def _rawdata_filepath(revlog, docket):
365 365 """The (vfs relative) nodemap's rawdata file for a given uid"""
366 366 prefix = revlog.radix
367 367 return b"%s-%s.nd" % (prefix, docket.uid)
368 368
369 369
370 370 def _other_rawdata_filepath(revlog, docket):
371 371 prefix = revlog.radix
372 372 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
373 373 new_file_path = _rawdata_filepath(revlog, docket)
374 374 new_file_name = revlog.opener.basename(new_file_path)
375 375 dirpath = revlog.opener.dirname(new_file_path)
376 376 others = []
377 377 for f in revlog.opener.listdir(dirpath):
378 378 if pattern.match(f) and f != new_file_name:
379 379 others.append(f)
380 380 return others
381 381
382 382
383 383 ### Nodemap Trie
384 384 #
385 385 # This is a simple reference implementation to compute and persist a nodemap
386 386 # trie. This reference implementation is write only. The python version of this
387 387 # is not expected to be actually used, since it wont provide performance
388 388 # improvement over existing non-persistent C implementation.
389 389 #
390 390 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
391 391 # revision can be adressed using its node shortest prefix.
392 392 #
393 393 # The trie is stored as a sequence of block. Each block contains 16 entries
394 394 # (signed 64bit integer, big endian). Each entry can be one of the following:
395 395 #
396 396 # * value >= 0 -> index of sub-block
397 397 # * value == -1 -> no value
398 398 # * value < -1 -> encoded revision: rev = -(value+2)
399 399 #
400 400 # See REV_OFFSET and _transform_rev below.
401 401 #
402 402 # The implementation focus on simplicity, not on performance. A Rust
403 403 # implementation should provide a efficient version of the same binary
404 404 # persistence. This reference python implementation is never meant to be
405 405 # extensively use in production.
406 406
407 407
408 408 def persistent_data(index):
409 409 """return the persistent binary form for a nodemap for a given index"""
410 410 trie = _build_trie(index)
411 411 return _persist_trie(trie)
412 412
413 413
414 414 def update_persistent_data(index, root, max_idx, last_rev):
415 415 """return the incremental update for persistent nodemap from a given index"""
416 416 changed_block, trie = _update_trie(index, root, last_rev)
417 417 return (
418 418 changed_block * S_BLOCK.size,
419 419 _persist_trie(trie, existing_idx=max_idx),
420 420 )
421 421
422 422
423 423 S_BLOCK = struct.Struct(">" + ("l" * 16))
424 424
425 425 NO_ENTRY = -1
426 426 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
427 427 REV_OFFSET = 2
428 428
429 429
430 430 def _transform_rev(rev):
431 431 """Return the number used to represent the rev in the tree.
432 432
433 433 (or retrieve a rev number from such representation)
434 434
435 435 Note that this is an involution, a function equal to its inverse (i.e.
436 436 which gives the identity when applied to itself).
437 437 """
438 438 return -(rev + REV_OFFSET)
439 439
440 440
441 441 def _to_int(hex_digit):
442 442 """turn an hexadecimal digit into a proper integer"""
443 443 return int(hex_digit, 16)
444 444
445 445
446 446 class Block(dict):
447 447 """represent a block of the Trie
448 448
449 449 contains up to 16 entry indexed from 0 to 15"""
450 450
451 451 def __init__(self):
452 452 super(Block, self).__init__()
453 453 # If this block exist on disk, here is its ID
454 454 self.ondisk_id = None
455 455
456 456 def __iter__(self):
457 457 return iter(self.get(i) for i in range(16))
458 458
459 459
460 460 def _build_trie(index):
461 461 """build a nodemap trie
462 462
463 463 The nodemap stores revision number for each unique prefix.
464 464
465 465 Each block is a dictionary with keys in `[0, 15]`. Values are either
466 466 another block or a revision number.
467 467 """
468 468 root = Block()
469 469 for rev in range(len(index)):
470 470 current_hex = hex(index[rev][7])
471 471 _insert_into_block(index, 0, root, rev, current_hex)
472 472 return root
473 473
474 474
475 475 def _update_trie(index, root, last_rev):
476 476 """consume"""
477 477 changed = 0
478 478 for rev in range(last_rev + 1, len(index)):
479 479 current_hex = hex(index[rev][7])
480 480 changed += _insert_into_block(index, 0, root, rev, current_hex)
481 481 return changed, root
482 482
483 483
484 484 def _insert_into_block(index, level, block, current_rev, current_hex):
485 485 """insert a new revision in a block
486 486
487 487 index: the index we are adding revision for
488 488 level: the depth of the current block in the trie
489 489 block: the block currently being considered
490 490 current_rev: the revision number we are adding
491 491 current_hex: the hexadecimal representation of the of that revision
492 492 """
493 493 changed = 1
494 494 if block.ondisk_id is not None:
495 495 block.ondisk_id = None
496 496 hex_digit = _to_int(current_hex[level : level + 1])
497 497 entry = block.get(hex_digit)
498 498 if entry is None:
499 499 # no entry, simply store the revision number
500 500 block[hex_digit] = current_rev
501 501 elif isinstance(entry, dict):
502 502 # need to recurse to an underlying block
503 503 changed += _insert_into_block(
504 504 index, level + 1, entry, current_rev, current_hex
505 505 )
506 506 else:
507 507 # collision with a previously unique prefix, inserting new
508 508 # vertices to fit both entry.
509 509 other_hex = hex(index[entry][7])
510 510 other_rev = entry
511 511 new = Block()
512 512 block[hex_digit] = new
513 513 _insert_into_block(index, level + 1, new, other_rev, other_hex)
514 514 _insert_into_block(index, level + 1, new, current_rev, current_hex)
515 515 return changed
516 516
517 517
518 518 def _persist_trie(root, existing_idx=None):
519 519 """turn a nodemap trie into persistent binary data
520 520
521 521 See `_build_trie` for nodemap trie structure"""
522 522 block_map = {}
523 523 if existing_idx is not None:
524 524 base_idx = existing_idx + 1
525 525 else:
526 526 base_idx = 0
527 527 chunks = []
528 528 for tn in _walk_trie(root):
529 529 if tn.ondisk_id is not None:
530 530 block_map[id(tn)] = tn.ondisk_id
531 531 else:
532 532 block_map[id(tn)] = len(chunks) + base_idx
533 533 chunks.append(_persist_block(tn, block_map))
534 534 return b''.join(chunks)
535 535
536 536
537 537 def _walk_trie(block):
538 538 """yield all the block in a trie
539 539
540 540 Children blocks are always yield before their parent block.
541 541 """
542 542 for (__, item) in sorted(block.items()):
543 543 if isinstance(item, dict):
544 544 for sub_block in _walk_trie(item):
545 545 yield sub_block
546 546 yield block
547 547
548 548
549 549 def _persist_block(block_node, block_map):
550 550 """produce persistent binary data for a single block
551 551
552 552 Children block are assumed to be already persisted and present in
553 553 block_map.
554 554 """
555 555 data = tuple(_to_value(v, block_map) for v in block_node)
556 556 return S_BLOCK.pack(*data)
557 557
558 558
559 559 def _to_value(item, block_map):
560 560 """persist any value as an integer"""
561 561 if item is None:
562 562 return NO_ENTRY
563 563 elif isinstance(item, dict):
564 564 return block_map[id(item)]
565 565 else:
566 566 return _transform_rev(item)
567 567
568 568
569 569 def parse_data(data):
570 570 """parse parse nodemap data into a nodemap Trie"""
571 571 if (len(data) % S_BLOCK.size) != 0:
572 572 msg = b"nodemap data size is not a multiple of block size (%d): %d"
573 573 raise error.Abort(msg % (S_BLOCK.size, len(data)))
574 574 if not data:
575 575 return Block(), None
576 576 block_map = {}
577 577 new_blocks = []
578 578 for i in range(0, len(data), S_BLOCK.size):
579 579 block = Block()
580 580 block.ondisk_id = len(block_map)
581 581 block_map[block.ondisk_id] = block
582 582 block_data = data[i : i + S_BLOCK.size]
583 583 values = S_BLOCK.unpack(block_data)
584 584 new_blocks.append((block, values))
585 585 for b, values in new_blocks:
586 586 for idx, v in enumerate(values):
587 587 if v == NO_ENTRY:
588 588 continue
589 589 elif v >= 0:
590 590 b[idx] = block_map[v]
591 591 else:
592 592 b[idx] = _transform_rev(v)
593 593 return block, i // S_BLOCK.size
594 594
595 595
596 596 # debug utility
597 597
598 598
599 599 def check_data(ui, index, data):
600 600 """verify that the provided nodemap data are valid for the given idex"""
601 601 ret = 0
602 602 ui.status((b"revision in index: %d\n") % len(index))
603 603 root, __ = parse_data(data)
604 604 all_revs = set(_all_revisions(root))
605 605 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
606 606 for r in range(len(index)):
607 607 if r not in all_revs:
608 608 msg = b" revision missing from nodemap: %d\n" % r
609 609 ui.write_err(msg)
610 610 ret = 1
611 611 else:
612 612 all_revs.remove(r)
613 613 nm_rev = _find_node(root, hex(index[r][7]))
614 614 if nm_rev is None:
615 615 msg = b" revision node does not match any entries: %d\n" % r
616 616 ui.write_err(msg)
617 617 ret = 1
618 618 elif nm_rev != r:
619 619 msg = (
620 620 b" revision node does not match the expected revision: "
621 621 b"%d != %d\n" % (r, nm_rev)
622 622 )
623 623 ui.write_err(msg)
624 624 ret = 1
625 625
626 626 if all_revs:
627 627 for r in sorted(all_revs):
628 628 msg = b" extra revision in nodemap: %d\n" % r
629 629 ui.write_err(msg)
630 630 ret = 1
631 631 return ret
632 632
633 633
634 634 def _all_revisions(root):
635 635 """return all revisions stored in a Trie"""
636 636 for block in _walk_trie(root):
637 637 for v in block:
638 638 if v is None or isinstance(v, Block):
639 639 continue
640 640 yield v
641 641
642 642
643 643 def _find_node(block, node):
644 644 """find the revision associated with a given node"""
645 645 entry = block.get(_to_int(node[0:1]))
646 646 if isinstance(entry, dict):
647 647 return _find_node(entry, node[1:])
648 648 return entry
649 649
650 650
651 651 def get_nodemap_file(revlog):
652 if revlog.postfix == b'a':
652 if revlog._trypending:
653 653 pending_path = revlog.radix + b".n.a"
654 654 if revlog.opener.exists(pending_path):
655 655 return pending_path
656 656 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now