##// END OF EJS Templates
revlog: use file read caching for sidedata...
Simon Sapin -
r48219:cac0e062 default
parent child Browse files
Show More
@@ -1,630 +1,633 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 28 from .revlogutils import (
29 29 constants as revlog_constants,
30 30 flagutil,
31 31 )
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 87 return b"\0".join(items)
88 88
89 89
90 90 def stripdesc(desc):
91 91 """strip trailing whitespace and leading and trailing empty lines"""
92 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93 93
94 94
95 95 class appender(object):
96 96 """the changelog index must be updated last on disk, so we use this class
97 97 to delay writes to it"""
98 98
99 99 def __init__(self, vfs, name, mode, buf):
100 100 self.data = buf
101 101 fp = vfs(name, mode)
102 102 self.fp = fp
103 103 self.offset = fp.tell()
104 104 self.size = vfs.fstat(fp).st_size
105 105 self._end = self.size
106 106
107 107 def end(self):
108 108 return self._end
109 109
110 110 def tell(self):
111 111 return self.offset
112 112
113 113 def flush(self):
114 114 pass
115 115
116 116 @property
117 117 def closed(self):
118 118 return self.fp.closed
119 119
120 120 def close(self):
121 121 self.fp.close()
122 122
123 123 def seek(self, offset, whence=0):
124 124 '''virtual file offset spans real file and data'''
125 125 if whence == 0:
126 126 self.offset = offset
127 127 elif whence == 1:
128 128 self.offset += offset
129 129 elif whence == 2:
130 130 self.offset = self.end() + offset
131 131 if self.offset < self.size:
132 132 self.fp.seek(self.offset)
133 133
134 134 def read(self, count=-1):
135 135 '''only trick here is reads that span real file and data'''
136 136 ret = b""
137 137 if self.offset < self.size:
138 138 s = self.fp.read(count)
139 139 ret = s
140 140 self.offset += len(s)
141 141 if count > 0:
142 142 count -= len(s)
143 143 if count != 0:
144 144 doff = self.offset - self.size
145 145 self.data.insert(0, b"".join(self.data))
146 146 del self.data[1:]
147 147 s = self.data[0][doff : doff + count]
148 148 self.offset += len(s)
149 149 ret += s
150 150 return ret
151 151
152 152 def write(self, s):
153 153 self.data.append(bytes(s))
154 154 self.offset += len(s)
155 155 self._end += len(s)
156 156
157 157 def __enter__(self):
158 158 self.fp.__enter__()
159 159 return self
160 160
161 161 def __exit__(self, *args):
162 162 return self.fp.__exit__(*args)
163 163
164 164
165 165 class _divertopener(object):
166 166 def __init__(self, opener, target):
167 167 self._opener = opener
168 168 self._target = target
169 169
170 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 171 if name != self._target:
172 172 return self._opener(name, mode, **kwargs)
173 173 return self._opener(name + b".a", mode, **kwargs)
174 174
175 175 def __getattr__(self, attr):
176 176 return getattr(self._opener, attr)
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 183 if name != target:
184 184 return opener(name, mode, **kwargs)
185 185 assert not kwargs
186 186 return appender(opener, name, mode, buf)
187 187
188 188 return _delay
189 189
190 190
191 191 @attr.s
192 192 class _changelogrevision(object):
193 193 # Extensions might modify _defaultextra, so let the constructor below pass
194 194 # it in
195 195 extra = attr.ib()
196 196 manifest = attr.ib()
197 197 user = attr.ib(default=b'')
198 198 date = attr.ib(default=(0, 0))
199 199 files = attr.ib(default=attr.Factory(list))
200 200 filesadded = attr.ib(default=None)
201 201 filesremoved = attr.ib(default=None)
202 202 p1copies = attr.ib(default=None)
203 203 p2copies = attr.ib(default=None)
204 204 description = attr.ib(default=b'')
205 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206 206
207 207
208 208 class changelogrevision(object):
209 209 """Holds results of a parsed changelog revision.
210 210
211 211 Changelog revisions consist of multiple pieces of data, including
212 212 the manifest node, user, and date. This object exposes a view into
213 213 the parsed object.
214 214 """
215 215
216 216 __slots__ = (
217 217 '_offsets',
218 218 '_text',
219 219 '_sidedata',
220 220 '_cpsd',
221 221 '_changes',
222 222 )
223 223
224 224 def __new__(cls, cl, text, sidedata, cpsd):
225 225 if not text:
226 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227 227
228 228 self = super(changelogrevision, cls).__new__(cls)
229 229 # We could return here and implement the following as an __init__.
230 230 # But doing it here is equivalent and saves an extra function call.
231 231
232 232 # format used:
233 233 # nodeid\n : manifest node in ascii
234 234 # user\n : user, no \n or \r allowed
235 235 # time tz extra\n : date (time is int or float, timezone is int)
236 236 # : extra is metadata, encoded and separated by '\0'
237 237 # : older versions ignore it
238 238 # files\n\n : files modified by the cset, no \n or \r allowed
239 239 # (.*) : comment (free text, ideally utf-8)
240 240 #
241 241 # changelog v0 doesn't use extra
242 242
243 243 nl1 = text.index(b'\n')
244 244 nl2 = text.index(b'\n', nl1 + 1)
245 245 nl3 = text.index(b'\n', nl2 + 1)
246 246
247 247 # The list of files may be empty. Which means nl3 is the first of the
248 248 # double newline that precedes the description.
249 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 250 doublenl = nl3
251 251 else:
252 252 doublenl = text.index(b'\n\n', nl3 + 1)
253 253
254 254 self._offsets = (nl1, nl2, nl3, doublenl)
255 255 self._text = text
256 256 self._sidedata = sidedata
257 257 self._cpsd = cpsd
258 258 self._changes = None
259 259
260 260 return self
261 261
262 262 @property
263 263 def manifest(self):
264 264 return bin(self._text[0 : self._offsets[0]])
265 265
266 266 @property
267 267 def user(self):
268 268 off = self._offsets
269 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270 270
271 271 @property
272 272 def _rawdate(self):
273 273 off = self._offsets
274 274 dateextra = self._text[off[1] + 1 : off[2]]
275 275 return dateextra.split(b' ', 2)[0:2]
276 276
277 277 @property
278 278 def _rawextra(self):
279 279 off = self._offsets
280 280 dateextra = self._text[off[1] + 1 : off[2]]
281 281 fields = dateextra.split(b' ', 2)
282 282 if len(fields) != 3:
283 283 return None
284 284
285 285 return fields[2]
286 286
287 287 @property
288 288 def date(self):
289 289 raw = self._rawdate
290 290 time = float(raw[0])
291 291 # Various tools did silly things with the timezone.
292 292 try:
293 293 timezone = int(raw[1])
294 294 except ValueError:
295 295 timezone = 0
296 296
297 297 return time, timezone
298 298
299 299 @property
300 300 def extra(self):
301 301 raw = self._rawextra
302 302 if raw is None:
303 303 return _defaultextra
304 304
305 305 return decodeextra(raw)
306 306
307 307 @property
308 308 def changes(self):
309 309 if self._changes is not None:
310 310 return self._changes
311 311 if self._cpsd:
312 312 changes = metadata.decode_files_sidedata(self._sidedata)
313 313 else:
314 314 changes = metadata.ChangingFiles(
315 315 touched=self.files or (),
316 316 added=self.filesadded or (),
317 317 removed=self.filesremoved or (),
318 318 p1_copies=self.p1copies or {},
319 319 p2_copies=self.p2copies or {},
320 320 )
321 321 self._changes = changes
322 322 return changes
323 323
324 324 @property
325 325 def files(self):
326 326 if self._cpsd:
327 327 return sorted(self.changes.touched)
328 328 off = self._offsets
329 329 if off[2] == off[3]:
330 330 return []
331 331
332 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333 333
334 334 @property
335 335 def filesadded(self):
336 336 if self._cpsd:
337 337 return self.changes.added
338 338 else:
339 339 rawindices = self.extra.get(b'filesadded')
340 340 if rawindices is None:
341 341 return None
342 342 return metadata.decodefileindices(self.files, rawindices)
343 343
344 344 @property
345 345 def filesremoved(self):
346 346 if self._cpsd:
347 347 return self.changes.removed
348 348 else:
349 349 rawindices = self.extra.get(b'filesremoved')
350 350 if rawindices is None:
351 351 return None
352 352 return metadata.decodefileindices(self.files, rawindices)
353 353
354 354 @property
355 355 def p1copies(self):
356 356 if self._cpsd:
357 357 return self.changes.copied_from_p1
358 358 else:
359 359 rawcopies = self.extra.get(b'p1copies')
360 360 if rawcopies is None:
361 361 return None
362 362 return metadata.decodecopies(self.files, rawcopies)
363 363
364 364 @property
365 365 def p2copies(self):
366 366 if self._cpsd:
367 367 return self.changes.copied_from_p2
368 368 else:
369 369 rawcopies = self.extra.get(b'p2copies')
370 370 if rawcopies is None:
371 371 return None
372 372 return metadata.decodecopies(self.files, rawcopies)
373 373
374 374 @property
375 375 def description(self):
376 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 377
378 378 @property
379 379 def branchinfo(self):
380 380 extra = self.extra
381 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382 382
383 383
384 384 class changelog(revlog.revlog):
385 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 386 """Load a changelog revlog using an opener.
387 387
388 388 If ``trypending`` is true, we attempt to load the index from a
389 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 390 The ``00changelog.i.a`` file contains index (and possibly inline
391 391 revision) data for a transaction that hasn't been finalized yet.
392 392 It exists in a separate file to facilitate readers (such as
393 393 hooks processes) accessing data before a transaction is finalized.
394 394
395 395 ``concurrencychecker`` will be passed to the revlog init function, see
396 396 the documentation there.
397 397 """
398 398 revlog.revlog.__init__(
399 399 self,
400 400 opener,
401 401 target=(revlog_constants.KIND_CHANGELOG, None),
402 402 radix=b'00changelog',
403 403 checkambig=True,
404 404 mmaplargeindex=True,
405 405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
406 406 concurrencychecker=concurrencychecker,
407 407 trypending=trypending,
408 408 )
409 409
410 410 if self._initempty and (self._format_version == revlog.REVLOGV1):
411 411 # changelogs don't benefit from generaldelta.
412 412
413 413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
414 414 self._generaldelta = False
415 415
416 416 # Delta chains for changelogs tend to be very small because entries
417 417 # tend to be small and don't delta well with each. So disable delta
418 418 # chains.
419 419 self._storedeltachains = False
420 420
421 421 self._realopener = opener
422 422 self._delayed = False
423 423 self._delaybuf = None
424 424 self._divert = False
425 425 self._filteredrevs = frozenset()
426 426 self._filteredrevs_hashcache = {}
427 427 self._copiesstorage = opener.options.get(b'copies-storage')
428 428
429 429 @property
430 430 def filteredrevs(self):
431 431 return self._filteredrevs
432 432
433 433 @filteredrevs.setter
434 434 def filteredrevs(self, val):
435 435 # Ensure all updates go through this function
436 436 assert isinstance(val, frozenset)
437 437 self._filteredrevs = val
438 438 self._filteredrevs_hashcache = {}
439 439
440 440 def _write_docket(self, tr):
441 441 if not self._delayed:
442 442 super(changelog, self)._write_docket(tr)
443 443
444 444 def delayupdate(self, tr):
445 445 """delay visibility of index updates to other readers"""
446 446 if self._docket is None and not self._delayed:
447 447 if len(self) == 0:
448 448 self._divert = True
449 449 if self._realopener.exists(self._indexfile + b'.a'):
450 450 self._realopener.unlink(self._indexfile + b'.a')
451 451 self.opener = _divertopener(self._realopener, self._indexfile)
452 452 else:
453 453 self._delaybuf = []
454 454 self.opener = _delayopener(
455 455 self._realopener, self._indexfile, self._delaybuf
456 456 )
457 457 self._segmentfile.opener = self.opener
458 self._segmentfile_sidedata.opener = self.opener
458 459 self._delayed = True
459 460 tr.addpending(b'cl-%i' % id(self), self._writepending)
460 461 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
461 462
462 463 def _finalize(self, tr):
463 464 """finalize index updates"""
464 465 self._delayed = False
465 466 self.opener = self._realopener
466 467 self._segmentfile.opener = self.opener
468 self._segmentfile_sidedata.opener = self.opener
467 469 # move redirected index data back into place
468 470 if self._docket is not None:
469 471 self._write_docket(tr)
470 472 elif self._divert:
471 473 assert not self._delaybuf
472 474 tmpname = self._indexfile + b".a"
473 475 nfile = self.opener.open(tmpname)
474 476 nfile.close()
475 477 self.opener.rename(tmpname, self._indexfile, checkambig=True)
476 478 elif self._delaybuf:
477 479 fp = self.opener(self._indexfile, b'a', checkambig=True)
478 480 fp.write(b"".join(self._delaybuf))
479 481 fp.close()
480 482 self._delaybuf = None
481 483 self._divert = False
482 484 # split when we're done
483 485 self._enforceinlinesize(tr)
484 486
485 487 def _writepending(self, tr):
486 488 """create a file containing the unfinalized state for
487 489 pretxnchangegroup"""
488 490 if self._docket:
489 491 return self._docket.write(tr, pending=True)
490 492 if self._delaybuf:
491 493 # make a temporary copy of the index
492 494 fp1 = self._realopener(self._indexfile)
493 495 pendingfilename = self._indexfile + b".a"
494 496 # register as a temp file to ensure cleanup on failure
495 497 tr.registertmp(pendingfilename)
496 498 # write existing data
497 499 fp2 = self._realopener(pendingfilename, b"w")
498 500 fp2.write(fp1.read())
499 501 # add pending data
500 502 fp2.write(b"".join(self._delaybuf))
501 503 fp2.close()
502 504 # switch modes so finalize can simply rename
503 505 self._delaybuf = None
504 506 self._divert = True
505 507 self.opener = _divertopener(self._realopener, self._indexfile)
506 508 self._segmentfile.opener = self.opener
509 self._segmentfile_sidedata.opener = self.opener
507 510
508 511 if self._divert:
509 512 return True
510 513
511 514 return False
512 515
513 516 def _enforceinlinesize(self, tr):
514 517 if not self._delayed:
515 518 revlog.revlog._enforceinlinesize(self, tr)
516 519
517 520 def read(self, nodeorrev):
518 521 """Obtain data from a parsed changelog revision.
519 522
520 523 Returns a 6-tuple of:
521 524
522 525 - manifest node in binary
523 526 - author/user as a localstr
524 527 - date as a 2-tuple of (time, timezone)
525 528 - list of files
526 529 - commit message as a localstr
527 530 - dict of extra metadata
528 531
529 532 Unless you need to access all fields, consider calling
530 533 ``changelogrevision`` instead, as it is faster for partial object
531 534 access.
532 535 """
533 536 d = self._revisiondata(nodeorrev)
534 537 sidedata = self.sidedata(nodeorrev)
535 538 copy_sd = self._copiesstorage == b'changeset-sidedata'
536 539 c = changelogrevision(self, d, sidedata, copy_sd)
537 540 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
538 541
539 542 def changelogrevision(self, nodeorrev):
540 543 """Obtain a ``changelogrevision`` for a node or revision."""
541 544 text = self._revisiondata(nodeorrev)
542 545 sidedata = self.sidedata(nodeorrev)
543 546 return changelogrevision(
544 547 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
545 548 )
546 549
547 550 def readfiles(self, nodeorrev):
548 551 """
549 552 short version of read that only returns the files modified by the cset
550 553 """
551 554 text = self.revision(nodeorrev)
552 555 if not text:
553 556 return []
554 557 last = text.index(b"\n\n")
555 558 l = text[:last].split(b'\n')
556 559 return l[3:]
557 560
558 561 def add(
559 562 self,
560 563 manifest,
561 564 files,
562 565 desc,
563 566 transaction,
564 567 p1,
565 568 p2,
566 569 user,
567 570 date=None,
568 571 extra=None,
569 572 ):
570 573 # Convert to UTF-8 encoded bytestrings as the very first
571 574 # thing: calling any method on a localstr object will turn it
572 575 # into a str object and the cached UTF-8 string is thus lost.
573 576 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
574 577
575 578 user = user.strip()
576 579 # An empty username or a username with a "\n" will make the
577 580 # revision text contain two "\n\n" sequences -> corrupt
578 581 # repository since read cannot unpack the revision.
579 582 if not user:
580 583 raise error.StorageError(_(b"empty username"))
581 584 if b"\n" in user:
582 585 raise error.StorageError(
583 586 _(b"username %r contains a newline") % pycompat.bytestr(user)
584 587 )
585 588
586 589 desc = stripdesc(desc)
587 590
588 591 if date:
589 592 parseddate = b"%d %d" % dateutil.parsedate(date)
590 593 else:
591 594 parseddate = b"%d %d" % dateutil.makedate()
592 595 if extra:
593 596 branch = extra.get(b"branch")
594 597 if branch in (b"default", b""):
595 598 del extra[b"branch"]
596 599 elif branch in (b".", b"null", b"tip"):
597 600 raise error.StorageError(
598 601 _(b'the name \'%s\' is reserved') % branch
599 602 )
600 603 sortedfiles = sorted(files.touched)
601 604 flags = 0
602 605 sidedata = None
603 606 if self._copiesstorage == b'changeset-sidedata':
604 607 if files.has_copies_info:
605 608 flags |= flagutil.REVIDX_HASCOPIESINFO
606 609 sidedata = metadata.encode_files_sidedata(files)
607 610
608 611 if extra:
609 612 extra = encodeextra(extra)
610 613 parseddate = b"%s %s" % (parseddate, extra)
611 614 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
612 615 text = b"\n".join(l)
613 616 rev = self.addrevision(
614 617 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
615 618 )
616 619 return self.node(rev)
617 620
618 621 def branchinfo(self, rev):
619 622 """return the branch name and open/close state of a revision
620 623
621 624 This function exists because creating a changectx object
622 625 just to access this is costly."""
623 626 return self.changelogrevision(rev).branchinfo
624 627
625 628 def _nodeduplicatecallback(self, transaction, rev):
626 629 # keep track of revisions that got "re-added", eg: unbunde of know rev.
627 630 #
628 631 # We track them in a list to preserve their order from the source bundle
629 632 duplicates = transaction.changes.setdefault(b'revduplicates', [])
630 633 duplicates.append(rev)
@@ -1,3298 +1,3299 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 censor,
85 85 deltas as deltautil,
86 86 docket as docketutil,
87 87 flagutil,
88 88 nodemap as nodemaputil,
89 89 randomaccessfile,
90 90 revlogv0,
91 91 sidedata as sidedatautil,
92 92 )
93 93 from .utils import (
94 94 storageutil,
95 95 stringutil,
96 96 )
97 97
98 98 # blanked usage of all the name to prevent pyflakes constraints
99 99 # We need these name available in the module for extensions.
100 100
101 101 REVLOGV0
102 102 REVLOGV1
103 103 REVLOGV2
104 104 FLAG_INLINE_DATA
105 105 FLAG_GENERALDELTA
106 106 REVLOG_DEFAULT_FLAGS
107 107 REVLOG_DEFAULT_FORMAT
108 108 REVLOG_DEFAULT_VERSION
109 109 REVLOGV1_FLAGS
110 110 REVLOGV2_FLAGS
111 111 REVIDX_ISCENSORED
112 112 REVIDX_ELLIPSIS
113 113 REVIDX_HASCOPIESINFO
114 114 REVIDX_EXTSTORED
115 115 REVIDX_DEFAULT_FLAGS
116 116 REVIDX_FLAGS_ORDER
117 117 REVIDX_RAWTEXT_CHANGING_FLAGS
118 118
119 119 parsers = policy.importmod('parsers')
120 120 rustancestor = policy.importrust('ancestor')
121 121 rustdagop = policy.importrust('dagop')
122 122 rustrevlog = policy.importrust('revlog')
123 123
124 124 # Aliased for performance.
125 125 _zlibdecompress = zlib.decompress
126 126
127 127 # max size of revlog with inline data
128 128 _maxinline = 131072
129 129
130 130 # Flag processors for REVIDX_ELLIPSIS.
131 131 def ellipsisreadprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsiswriteprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsisrawprocessor(rl, text):
140 140 return False
141 141
142 142
143 143 ellipsisprocessor = (
144 144 ellipsisreadprocessor,
145 145 ellipsiswriteprocessor,
146 146 ellipsisrawprocessor,
147 147 )
148 148
149 149
150 150 def _verify_revision(rl, skipflags, state, node):
151 151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 152 point for extensions to influence the operation."""
153 153 if skipflags:
154 154 state[b'skipread'].add(node)
155 155 else:
156 156 # Side-effect: read content and verify hash.
157 157 rl.revision(node)
158 158
159 159
160 160 # True if a fast implementation for persistent-nodemap is available
161 161 #
162 162 # We also consider we have a "fast" implementation in "pure" python because
163 163 # people using pure don't really have performance consideration (and a
164 164 # wheelbarrow of other slowness source)
165 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 166 parsers, 'BaseIndexObject'
167 167 )
168 168
169 169
170 170 @interfaceutil.implementer(repository.irevisiondelta)
171 171 @attr.s(slots=True)
172 172 class revlogrevisiondelta(object):
173 173 node = attr.ib()
174 174 p1node = attr.ib()
175 175 p2node = attr.ib()
176 176 basenode = attr.ib()
177 177 flags = attr.ib()
178 178 baserevisionsize = attr.ib()
179 179 revision = attr.ib()
180 180 delta = attr.ib()
181 181 sidedata = attr.ib()
182 182 protocol_flags = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 def parse_index_v1(data, inline):
195 195 # call the C implementation to parse the index data
196 196 index, cache = parsers.parse_index2(data, inline)
197 197 return index, cache
198 198
199 199
200 200 def parse_index_v2(data, inline):
201 201 # call the C implementation to parse the index data
202 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 203 return index, cache
204 204
205 205
206 206 def parse_index_cl_v2(data, inline):
207 207 # call the C implementation to parse the index data
208 208 assert not inline
209 209 from .pure.parsers import parse_index_cl_v2
210 210
211 211 index, cache = parse_index_cl_v2(data)
212 212 return index, cache
213 213
214 214
215 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 216
217 217 def parse_index_v1_nodemap(data, inline):
218 218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 219 return index, cache
220 220
221 221
222 222 else:
223 223 parse_index_v1_nodemap = None
224 224
225 225
226 226 def parse_index_v1_mixed(data, inline):
227 227 index, cache = parse_index_v1(data, inline)
228 228 return rustrevlog.MixedIndex(index), cache
229 229
230 230
231 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 232 # signed integer)
233 233 _maxentrysize = 0x7FFFFFFF
234 234
235 235 FILE_TOO_SHORT_MSG = _(
236 236 b'cannot read from revlog %s;'
237 237 b' expected %d bytes from offset %d, data size is %d'
238 238 )
239 239
240 240
241 241 class revlog(object):
242 242 """
243 243 the underlying revision storage object
244 244
245 245 A revlog consists of two parts, an index and the revision data.
246 246
247 247 The index is a file with a fixed record size containing
248 248 information on each revision, including its nodeid (hash), the
249 249 nodeids of its parents, the position and offset of its data within
250 250 the data file, and the revision it's based on. Finally, each entry
251 251 contains a linkrev entry that can serve as a pointer to external
252 252 data.
253 253
254 254 The revision data itself is a linear collection of data chunks.
255 255 Each chunk represents a revision and is usually represented as a
256 256 delta against the previous chunk. To bound lookup time, runs of
257 257 deltas are limited to about 2 times the length of the original
258 258 version data. This makes retrieval of a version proportional to
259 259 its size, or O(1) relative to the number of revisions.
260 260
261 261 Both pieces of the revlog are written to in an append-only
262 262 fashion, which means we never need to rewrite a file to insert or
263 263 remove data, and can use some simple techniques to avoid the need
264 264 for locking while reading.
265 265
266 266 If checkambig, indexfile is opened with checkambig=True at
267 267 writing, to avoid file stat ambiguity.
268 268
269 269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 270 index will be mmapped rather than read if it is larger than the
271 271 configured threshold.
272 272
273 273 If censorable is True, the revlog can have censored revisions.
274 274
275 275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 276 compression for the data content.
277 277
278 278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 279 file handle, a filename, and an expected position. It should check whether
280 280 the current position in the file handle is valid, and log/warn/fail (by
281 281 raising).
282 282
283 283 See mercurial/revlogutils/contants.py for details about the content of an
284 284 index entry.
285 285 """
286 286
287 287 _flagserrorclass = error.RevlogError
288 288
289 289 def __init__(
290 290 self,
291 291 opener,
292 292 target,
293 293 radix,
294 294 postfix=None, # only exist for `tmpcensored` now
295 295 checkambig=False,
296 296 mmaplargeindex=False,
297 297 censorable=False,
298 298 upperboundcomp=None,
299 299 persistentnodemap=False,
300 300 concurrencychecker=None,
301 301 trypending=False,
302 302 ):
303 303 """
304 304 create a revlog object
305 305
306 306 opener is a function that abstracts the file opening operation
307 307 and can be used to implement COW semantics or the like.
308 308
309 309 `target`: a (KIND, ID) tuple that identify the content stored in
310 310 this revlog. It help the rest of the code to understand what the revlog
311 311 is about without having to resort to heuristic and index filename
312 312 analysis. Note: that this must be reliably be set by normal code, but
313 313 that test, debug, or performance measurement code might not set this to
314 314 accurate value.
315 315 """
316 316 self.upperboundcomp = upperboundcomp
317 317
318 318 self.radix = radix
319 319
320 320 self._docket_file = None
321 321 self._indexfile = None
322 322 self._datafile = None
323 323 self._sidedatafile = None
324 324 self._nodemap_file = None
325 325 self.postfix = postfix
326 326 self._trypending = trypending
327 327 self.opener = opener
328 328 if persistentnodemap:
329 329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 330
331 331 assert target[0] in ALL_KINDS
332 332 assert len(target) == 2
333 333 self.target = target
334 334 # When True, indexfile is opened with checkambig=True at writing, to
335 335 # avoid file stat ambiguity.
336 336 self._checkambig = checkambig
337 337 self._mmaplargeindex = mmaplargeindex
338 338 self._censorable = censorable
339 339 # 3-tuple of (node, rev, text) for a raw revision.
340 340 self._revisioncache = None
341 341 # Maps rev to chain base rev.
342 342 self._chainbasecache = util.lrucachedict(100)
343 343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 344 self._chunkcache = (0, b'')
345 345 # How much data to read and cache into the raw revlog data cache.
346 346 self._chunkcachesize = 65536
347 347 self._maxchainlen = None
348 348 self._deltabothparents = True
349 349 self.index = None
350 350 self._docket = None
351 351 self._nodemap_docket = None
352 352 # Mapping of partial identifiers to full nodes.
353 353 self._pcache = {}
354 354 # Mapping of revision integer to full node.
355 355 self._compengine = b'zlib'
356 356 self._compengineopts = {}
357 357 self._maxdeltachainspan = -1
358 358 self._withsparseread = False
359 359 self._sparserevlog = False
360 360 self.hassidedata = False
361 361 self._srdensitythreshold = 0.50
362 362 self._srmingapsize = 262144
363 363
364 364 # Make copy of flag processors so each revlog instance can support
365 365 # custom flags.
366 366 self._flagprocessors = dict(flagutil.flagprocessors)
367 367
368 368 # 3-tuple of file handles being used for active writing.
369 369 self._writinghandles = None
370 370 # prevent nesting of addgroup
371 371 self._adding_group = None
372 372
373 373 self._loadindex()
374 374
375 375 self._concurrencychecker = concurrencychecker
376 376
377 377 def _init_opts(self):
378 378 """process options (from above/config) to setup associated default revlog mode
379 379
380 380 These values might be affected when actually reading on disk information.
381 381
382 382 The relevant values are returned for use in _loadindex().
383 383
384 384 * newversionflags:
385 385 version header to use if we need to create a new revlog
386 386
387 387 * mmapindexthreshold:
388 388 minimal index size for start to use mmap
389 389
390 390 * force_nodemap:
391 391 force the usage of a "development" version of the nodemap code
392 392 """
393 393 mmapindexthreshold = None
394 394 opts = self.opener.options
395 395
396 396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 397 new_header = CHANGELOGV2
398 398 elif b'revlogv2' in opts:
399 399 new_header = REVLOGV2
400 400 elif b'revlogv1' in opts:
401 401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 402 if b'generaldelta' in opts:
403 403 new_header |= FLAG_GENERALDELTA
404 404 elif b'revlogv0' in self.opener.options:
405 405 new_header = REVLOGV0
406 406 else:
407 407 new_header = REVLOG_DEFAULT_VERSION
408 408
409 409 if b'chunkcachesize' in opts:
410 410 self._chunkcachesize = opts[b'chunkcachesize']
411 411 if b'maxchainlen' in opts:
412 412 self._maxchainlen = opts[b'maxchainlen']
413 413 if b'deltabothparents' in opts:
414 414 self._deltabothparents = opts[b'deltabothparents']
415 415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 416 self._lazydeltabase = False
417 417 if self._lazydelta:
418 418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 419 if b'compengine' in opts:
420 420 self._compengine = opts[b'compengine']
421 421 if b'zlib.level' in opts:
422 422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 423 if b'zstd.level' in opts:
424 424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 425 if b'maxdeltachainspan' in opts:
426 426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 431 # sparse-revlog forces sparse-read
432 432 self._withsparseread = self._sparserevlog or withsparseread
433 433 if b'sparse-read-density-threshold' in opts:
434 434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 435 if b'sparse-read-min-gap-size' in opts:
436 436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 437 if opts.get(b'enableellipsis'):
438 438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439 439
440 440 # revlog v0 doesn't have flag processors
441 441 for flag, processor in pycompat.iteritems(
442 442 opts.get(b'flagprocessors', {})
443 443 ):
444 444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445 445
446 446 if self._chunkcachesize <= 0:
447 447 raise error.RevlogError(
448 448 _(b'revlog chunk cache size %r is not greater than 0')
449 449 % self._chunkcachesize
450 450 )
451 451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 452 raise error.RevlogError(
453 453 _(b'revlog chunk cache size %r is not a power of 2')
454 454 % self._chunkcachesize
455 455 )
456 456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 457 return new_header, mmapindexthreshold, force_nodemap
458 458
459 459 def _get_data(self, filepath, mmap_threshold, size=None):
460 460 """return a file content with or without mmap
461 461
462 462 If the file is missing return the empty string"""
463 463 try:
464 464 with self.opener(filepath) as fp:
465 465 if mmap_threshold is not None:
466 466 file_size = self.opener.fstat(fp).st_size
467 467 if file_size >= mmap_threshold:
468 468 if size is not None:
469 469 # avoid potentiel mmap crash
470 470 size = min(file_size, size)
471 471 # TODO: should .close() to release resources without
472 472 # relying on Python GC
473 473 if size is None:
474 474 return util.buffer(util.mmapread(fp))
475 475 else:
476 476 return util.buffer(util.mmapread(fp, size))
477 477 if size is None:
478 478 return fp.read()
479 479 else:
480 480 return fp.read(size)
481 481 except IOError as inst:
482 482 if inst.errno != errno.ENOENT:
483 483 raise
484 484 return b''
485 485
486 486 def _loadindex(self, docket=None):
487 487
488 488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489 489
490 490 if self.postfix is not None:
491 491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 493 entry_point = b'%s.i.a' % self.radix
494 494 else:
495 495 entry_point = b'%s.i' % self.radix
496 496
497 497 if docket is not None:
498 498 self._docket = docket
499 499 self._docket_file = entry_point
500 500 else:
501 501 entry_data = b''
502 502 self._initempty = True
503 503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 504 if len(entry_data) > 0:
505 505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 506 self._initempty = False
507 507 else:
508 508 header = new_header
509 509
510 510 self._format_flags = header & ~0xFFFF
511 511 self._format_version = header & 0xFFFF
512 512
513 513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 514 if supported_flags is None:
515 515 msg = _(b'unknown version (%d) in revlog %s')
516 516 msg %= (self._format_version, self.display_id)
517 517 raise error.RevlogError(msg)
518 518 elif self._format_flags & ~supported_flags:
519 519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 520 display_flag = self._format_flags >> 16
521 521 msg %= (display_flag, self._format_version, self.display_id)
522 522 raise error.RevlogError(msg)
523 523
524 524 features = FEATURES_BY_VERSION[self._format_version]
525 525 self._inline = features[b'inline'](self._format_flags)
526 526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 527 self.hassidedata = features[b'sidedata']
528 528
529 529 if not features[b'docket']:
530 530 self._indexfile = entry_point
531 531 index_data = entry_data
532 532 else:
533 533 self._docket_file = entry_point
534 534 if self._initempty:
535 535 self._docket = docketutil.default_docket(self, header)
536 536 else:
537 537 self._docket = docketutil.parse_docket(
538 538 self, entry_data, use_pending=self._trypending
539 539 )
540 540
541 541 if self._docket is not None:
542 542 self._indexfile = self._docket.index_filepath()
543 543 index_data = b''
544 544 index_size = self._docket.index_end
545 545 if index_size > 0:
546 546 index_data = self._get_data(
547 547 self._indexfile, mmapindexthreshold, size=index_size
548 548 )
549 549 if len(index_data) < index_size:
550 550 msg = _(b'too few index data for %s: got %d, expected %d')
551 551 msg %= (self.display_id, len(index_data), index_size)
552 552 raise error.RevlogError(msg)
553 553
554 554 self._inline = False
555 555 # generaldelta implied by version 2 revlogs.
556 556 self._generaldelta = True
557 557 # the logic for persistent nodemap will be dealt with within the
558 558 # main docket, so disable it for now.
559 559 self._nodemap_file = None
560 560
561 561 if self._docket is not None:
562 562 self._datafile = self._docket.data_filepath()
563 563 self._sidedatafile = self._docket.sidedata_filepath()
564 564 elif self.postfix is None:
565 565 self._datafile = b'%s.d' % self.radix
566 566 else:
567 567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568 568
569 569 self.nodeconstants = sha1nodeconstants
570 570 self.nullid = self.nodeconstants.nullid
571 571
572 572 # sparse-revlog can't be on without general-delta (issue6056)
573 573 if not self._generaldelta:
574 574 self._sparserevlog = False
575 575
576 576 self._storedeltachains = True
577 577
578 578 devel_nodemap = (
579 579 self._nodemap_file
580 580 and force_nodemap
581 581 and parse_index_v1_nodemap is not None
582 582 )
583 583
584 584 use_rust_index = False
585 585 if rustrevlog is not None:
586 586 if self._nodemap_file is not None:
587 587 use_rust_index = True
588 588 else:
589 589 use_rust_index = self.opener.options.get(b'rust.index')
590 590
591 591 self._parse_index = parse_index_v1
592 592 if self._format_version == REVLOGV0:
593 593 self._parse_index = revlogv0.parse_index_v0
594 594 elif self._format_version == REVLOGV2:
595 595 self._parse_index = parse_index_v2
596 596 elif self._format_version == CHANGELOGV2:
597 597 self._parse_index = parse_index_cl_v2
598 598 elif devel_nodemap:
599 599 self._parse_index = parse_index_v1_nodemap
600 600 elif use_rust_index:
601 601 self._parse_index = parse_index_v1_mixed
602 602 try:
603 603 d = self._parse_index(index_data, self._inline)
604 604 index, chunkcache = d
605 605 use_nodemap = (
606 606 not self._inline
607 607 and self._nodemap_file is not None
608 608 and util.safehasattr(index, 'update_nodemap_data')
609 609 )
610 610 if use_nodemap:
611 611 nodemap_data = nodemaputil.persisted_data(self)
612 612 if nodemap_data is not None:
613 613 docket = nodemap_data[0]
614 614 if (
615 615 len(d[0]) > docket.tip_rev
616 616 and d[0][docket.tip_rev][7] == docket.tip_node
617 617 ):
618 618 # no changelog tampering
619 619 self._nodemap_docket = docket
620 620 index.update_nodemap_data(*nodemap_data)
621 621 except (ValueError, IndexError):
622 622 raise error.RevlogError(
623 623 _(b"index %s is corrupted") % self.display_id
624 624 )
625 625 self.index = index
626 626 self._segmentfile = randomaccessfile.randomaccessfile(
627 627 self.opener,
628 628 (self._indexfile if self._inline else self._datafile),
629 629 self._chunkcachesize,
630 630 chunkcache,
631 631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
634 self._sidedatafile,
635 self._chunkcachesize,
636 )
632 637 # revnum -> (chain-length, sum-delta-length)
633 638 self._chaininfocache = util.lrucachedict(500)
634 639 # revlog header -> revlog compressor
635 640 self._decompressors = {}
636 641
637 642 @util.propertycache
638 643 def revlog_kind(self):
639 644 return self.target[0]
640 645
641 646 @util.propertycache
642 647 def display_id(self):
643 648 """The public facing "ID" of the revlog that we use in message"""
644 649 # Maybe we should build a user facing representation of
645 650 # revlog.target instead of using `self.radix`
646 651 return self.radix
647 652
648 653 def _get_decompressor(self, t):
649 654 try:
650 655 compressor = self._decompressors[t]
651 656 except KeyError:
652 657 try:
653 658 engine = util.compengines.forrevlogheader(t)
654 659 compressor = engine.revlogcompressor(self._compengineopts)
655 660 self._decompressors[t] = compressor
656 661 except KeyError:
657 662 raise error.RevlogError(
658 663 _(b'unknown compression type %s') % binascii.hexlify(t)
659 664 )
660 665 return compressor
661 666
662 667 @util.propertycache
663 668 def _compressor(self):
664 669 engine = util.compengines[self._compengine]
665 670 return engine.revlogcompressor(self._compengineopts)
666 671
667 672 @util.propertycache
668 673 def _decompressor(self):
669 674 """the default decompressor"""
670 675 if self._docket is None:
671 676 return None
672 677 t = self._docket.default_compression_header
673 678 c = self._get_decompressor(t)
674 679 return c.decompress
675 680
676 681 def _indexfp(self):
677 682 """file object for the revlog's index file"""
678 683 return self.opener(self._indexfile, mode=b"r")
679 684
680 685 def __index_write_fp(self):
681 686 # You should not use this directly and use `_writing` instead
682 687 try:
683 688 f = self.opener(
684 689 self._indexfile, mode=b"r+", checkambig=self._checkambig
685 690 )
686 691 if self._docket is None:
687 692 f.seek(0, os.SEEK_END)
688 693 else:
689 694 f.seek(self._docket.index_end, os.SEEK_SET)
690 695 return f
691 696 except IOError as inst:
692 697 if inst.errno != errno.ENOENT:
693 698 raise
694 699 return self.opener(
695 700 self._indexfile, mode=b"w+", checkambig=self._checkambig
696 701 )
697 702
698 703 def __index_new_fp(self):
699 704 # You should not use this unless you are upgrading from inline revlog
700 705 return self.opener(
701 706 self._indexfile,
702 707 mode=b"w",
703 708 checkambig=self._checkambig,
704 709 atomictemp=True,
705 710 )
706 711
707 712 def _datafp(self, mode=b'r'):
708 713 """file object for the revlog's data file"""
709 714 return self.opener(self._datafile, mode=mode)
710 715
711 716 @contextlib.contextmanager
712 717 def _sidedatareadfp(self):
713 718 """file object suitable to read sidedata"""
714 719 if self._writinghandles:
715 720 yield self._writinghandles[2]
716 721 else:
717 722 with self.opener(self._sidedatafile) as fp:
718 723 yield fp
719 724
720 725 def tiprev(self):
721 726 return len(self.index) - 1
722 727
723 728 def tip(self):
724 729 return self.node(self.tiprev())
725 730
726 731 def __contains__(self, rev):
727 732 return 0 <= rev < len(self)
728 733
729 734 def __len__(self):
730 735 return len(self.index)
731 736
732 737 def __iter__(self):
733 738 return iter(pycompat.xrange(len(self)))
734 739
735 740 def revs(self, start=0, stop=None):
736 741 """iterate over all rev in this revlog (from start to stop)"""
737 742 return storageutil.iterrevs(len(self), start=start, stop=stop)
738 743
739 744 @property
740 745 def nodemap(self):
741 746 msg = (
742 747 b"revlog.nodemap is deprecated, "
743 748 b"use revlog.index.[has_node|rev|get_rev]"
744 749 )
745 750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
746 751 return self.index.nodemap
747 752
748 753 @property
749 754 def _nodecache(self):
750 755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
751 756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
752 757 return self.index.nodemap
753 758
754 759 def hasnode(self, node):
755 760 try:
756 761 self.rev(node)
757 762 return True
758 763 except KeyError:
759 764 return False
760 765
761 766 def candelta(self, baserev, rev):
762 767 """whether two revisions (baserev, rev) can be delta-ed or not"""
763 768 # Disable delta if either rev requires a content-changing flag
764 769 # processor (ex. LFS). This is because such flag processor can alter
765 770 # the rawtext content that the delta will be based on, and two clients
766 771 # could have a same revlog node with different flags (i.e. different
767 772 # rawtext contents) and the delta could be incompatible.
768 773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
769 774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
770 775 ):
771 776 return False
772 777 return True
773 778
774 779 def update_caches(self, transaction):
775 780 if self._nodemap_file is not None:
776 781 if transaction is None:
777 782 nodemaputil.update_persistent_nodemap(self)
778 783 else:
779 784 nodemaputil.setup_persistent_nodemap(transaction, self)
780 785
781 786 def clearcaches(self):
782 787 self._revisioncache = None
783 788 self._chainbasecache.clear()
784 789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
785 791 self._pcache = {}
786 792 self._nodemap_docket = None
787 793 self.index.clearcaches()
788 794 # The python code is the one responsible for validating the docket, we
789 795 # end up having to refresh it here.
790 796 use_nodemap = (
791 797 not self._inline
792 798 and self._nodemap_file is not None
793 799 and util.safehasattr(self.index, 'update_nodemap_data')
794 800 )
795 801 if use_nodemap:
796 802 nodemap_data = nodemaputil.persisted_data(self)
797 803 if nodemap_data is not None:
798 804 self._nodemap_docket = nodemap_data[0]
799 805 self.index.update_nodemap_data(*nodemap_data)
800 806
801 807 def rev(self, node):
802 808 try:
803 809 return self.index.rev(node)
804 810 except TypeError:
805 811 raise
806 812 except error.RevlogError:
807 813 # parsers.c radix tree lookup failed
808 814 if (
809 815 node == self.nodeconstants.wdirid
810 816 or node in self.nodeconstants.wdirfilenodeids
811 817 ):
812 818 raise error.WdirUnsupported
813 819 raise error.LookupError(node, self.display_id, _(b'no node'))
814 820
815 821 # Accessors for index entries.
816 822
817 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
818 824 # are flags.
819 825 def start(self, rev):
820 826 return int(self.index[rev][0] >> 16)
821 827
822 828 def sidedata_cut_off(self, rev):
823 829 sd_cut_off = self.index[rev][8]
824 830 if sd_cut_off != 0:
825 831 return sd_cut_off
826 832 # This is some annoying dance, because entries without sidedata
827 833 # currently use 0 as their ofsset. (instead of previous-offset +
828 834 # previous-size)
829 835 #
830 836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
831 837 # In the meantime, we need this.
832 838 while 0 <= rev:
833 839 e = self.index[rev]
834 840 if e[9] != 0:
835 841 return e[8] + e[9]
836 842 rev -= 1
837 843 return 0
838 844
839 845 def flags(self, rev):
840 846 return self.index[rev][0] & 0xFFFF
841 847
842 848 def length(self, rev):
843 849 return self.index[rev][1]
844 850
845 851 def sidedata_length(self, rev):
846 852 if not self.hassidedata:
847 853 return 0
848 854 return self.index[rev][9]
849 855
850 856 def rawsize(self, rev):
851 857 """return the length of the uncompressed text for a given revision"""
852 858 l = self.index[rev][2]
853 859 if l >= 0:
854 860 return l
855 861
856 862 t = self.rawdata(rev)
857 863 return len(t)
858 864
859 865 def size(self, rev):
860 866 """length of non-raw text (processed by a "read" flag processor)"""
861 867 # fast path: if no "read" flag processor could change the content,
862 868 # size is rawsize. note: ELLIPSIS is known to not change the content.
863 869 flags = self.flags(rev)
864 870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
865 871 return self.rawsize(rev)
866 872
867 873 return len(self.revision(rev, raw=False))
868 874
869 875 def chainbase(self, rev):
870 876 base = self._chainbasecache.get(rev)
871 877 if base is not None:
872 878 return base
873 879
874 880 index = self.index
875 881 iterrev = rev
876 882 base = index[iterrev][3]
877 883 while base != iterrev:
878 884 iterrev = base
879 885 base = index[iterrev][3]
880 886
881 887 self._chainbasecache[rev] = base
882 888 return base
883 889
884 890 def linkrev(self, rev):
885 891 return self.index[rev][4]
886 892
887 893 def parentrevs(self, rev):
888 894 try:
889 895 entry = self.index[rev]
890 896 except IndexError:
891 897 if rev == wdirrev:
892 898 raise error.WdirUnsupported
893 899 raise
894 900 if entry[5] == nullrev:
895 901 return entry[6], entry[5]
896 902 else:
897 903 return entry[5], entry[6]
898 904
899 905 # fast parentrevs(rev) where rev isn't filtered
900 906 _uncheckedparentrevs = parentrevs
901 907
902 908 def node(self, rev):
903 909 try:
904 910 return self.index[rev][7]
905 911 except IndexError:
906 912 if rev == wdirrev:
907 913 raise error.WdirUnsupported
908 914 raise
909 915
910 916 # Derived from index values.
911 917
912 918 def end(self, rev):
913 919 return self.start(rev) + self.length(rev)
914 920
915 921 def parents(self, node):
916 922 i = self.index
917 923 d = i[self.rev(node)]
918 924 # inline node() to avoid function call overhead
919 925 if d[5] == self.nullid:
920 926 return i[d[6]][7], i[d[5]][7]
921 927 else:
922 928 return i[d[5]][7], i[d[6]][7]
923 929
924 930 def chainlen(self, rev):
925 931 return self._chaininfo(rev)[0]
926 932
927 933 def _chaininfo(self, rev):
928 934 chaininfocache = self._chaininfocache
929 935 if rev in chaininfocache:
930 936 return chaininfocache[rev]
931 937 index = self.index
932 938 generaldelta = self._generaldelta
933 939 iterrev = rev
934 940 e = index[iterrev]
935 941 clen = 0
936 942 compresseddeltalen = 0
937 943 while iterrev != e[3]:
938 944 clen += 1
939 945 compresseddeltalen += e[1]
940 946 if generaldelta:
941 947 iterrev = e[3]
942 948 else:
943 949 iterrev -= 1
944 950 if iterrev in chaininfocache:
945 951 t = chaininfocache[iterrev]
946 952 clen += t[0]
947 953 compresseddeltalen += t[1]
948 954 break
949 955 e = index[iterrev]
950 956 else:
951 957 # Add text length of base since decompressing that also takes
952 958 # work. For cache hits the length is already included.
953 959 compresseddeltalen += e[1]
954 960 r = (clen, compresseddeltalen)
955 961 chaininfocache[rev] = r
956 962 return r
957 963
958 964 def _deltachain(self, rev, stoprev=None):
959 965 """Obtain the delta chain for a revision.
960 966
961 967 ``stoprev`` specifies a revision to stop at. If not specified, we
962 968 stop at the base of the chain.
963 969
964 970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 971 revs in ascending order and ``stopped`` is a bool indicating whether
966 972 ``stoprev`` was hit.
967 973 """
968 974 # Try C implementation.
969 975 try:
970 976 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 977 except AttributeError:
972 978 pass
973 979
974 980 chain = []
975 981
976 982 # Alias to prevent attribute lookup in tight loop.
977 983 index = self.index
978 984 generaldelta = self._generaldelta
979 985
980 986 iterrev = rev
981 987 e = index[iterrev]
982 988 while iterrev != e[3] and iterrev != stoprev:
983 989 chain.append(iterrev)
984 990 if generaldelta:
985 991 iterrev = e[3]
986 992 else:
987 993 iterrev -= 1
988 994 e = index[iterrev]
989 995
990 996 if iterrev == stoprev:
991 997 stopped = True
992 998 else:
993 999 chain.append(iterrev)
994 1000 stopped = False
995 1001
996 1002 chain.reverse()
997 1003 return chain, stopped
998 1004
999 1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 1006 """Generate the ancestors of 'revs' in reverse revision order.
1001 1007 Does not generate revs lower than stoprev.
1002 1008
1003 1009 See the documentation for ancestor.lazyancestors for more details."""
1004 1010
1005 1011 # first, make sure start revisions aren't filtered
1006 1012 revs = list(revs)
1007 1013 checkrev = self.node
1008 1014 for r in revs:
1009 1015 checkrev(r)
1010 1016 # and we're sure ancestors aren't filtered as well
1011 1017
1012 1018 if rustancestor is not None and self.index.rust_ext_compat:
1013 1019 lazyancestors = rustancestor.LazyAncestors
1014 1020 arg = self.index
1015 1021 else:
1016 1022 lazyancestors = ancestor.lazyancestors
1017 1023 arg = self._uncheckedparentrevs
1018 1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019 1025
1020 1026 def descendants(self, revs):
1021 1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022 1028
1023 1029 def findcommonmissing(self, common=None, heads=None):
1024 1030 """Return a tuple of the ancestors of common and the ancestors of heads
1025 1031 that are not ancestors of common. In revset terminology, we return the
1026 1032 tuple:
1027 1033
1028 1034 ::common, (::heads) - (::common)
1029 1035
1030 1036 The list is sorted by revision number, meaning it is
1031 1037 topologically sorted.
1032 1038
1033 1039 'heads' and 'common' are both lists of node IDs. If heads is
1034 1040 not supplied, uses all of the revlog's heads. If common is not
1035 1041 supplied, uses nullid."""
1036 1042 if common is None:
1037 1043 common = [self.nullid]
1038 1044 if heads is None:
1039 1045 heads = self.heads()
1040 1046
1041 1047 common = [self.rev(n) for n in common]
1042 1048 heads = [self.rev(n) for n in heads]
1043 1049
1044 1050 # we want the ancestors, but inclusive
1045 1051 class lazyset(object):
1046 1052 def __init__(self, lazyvalues):
1047 1053 self.addedvalues = set()
1048 1054 self.lazyvalues = lazyvalues
1049 1055
1050 1056 def __contains__(self, value):
1051 1057 return value in self.addedvalues or value in self.lazyvalues
1052 1058
1053 1059 def __iter__(self):
1054 1060 added = self.addedvalues
1055 1061 for r in added:
1056 1062 yield r
1057 1063 for r in self.lazyvalues:
1058 1064 if not r in added:
1059 1065 yield r
1060 1066
1061 1067 def add(self, value):
1062 1068 self.addedvalues.add(value)
1063 1069
1064 1070 def update(self, values):
1065 1071 self.addedvalues.update(values)
1066 1072
1067 1073 has = lazyset(self.ancestors(common))
1068 1074 has.add(nullrev)
1069 1075 has.update(common)
1070 1076
1071 1077 # take all ancestors from heads that aren't in has
1072 1078 missing = set()
1073 1079 visit = collections.deque(r for r in heads if r not in has)
1074 1080 while visit:
1075 1081 r = visit.popleft()
1076 1082 if r in missing:
1077 1083 continue
1078 1084 else:
1079 1085 missing.add(r)
1080 1086 for p in self.parentrevs(r):
1081 1087 if p not in has:
1082 1088 visit.append(p)
1083 1089 missing = list(missing)
1084 1090 missing.sort()
1085 1091 return has, [self.node(miss) for miss in missing]
1086 1092
1087 1093 def incrementalmissingrevs(self, common=None):
1088 1094 """Return an object that can be used to incrementally compute the
1089 1095 revision numbers of the ancestors of arbitrary sets that are not
1090 1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 1097 object.
1092 1098
1093 1099 'common' is a list of revision numbers. If common is not supplied, uses
1094 1100 nullrev.
1095 1101 """
1096 1102 if common is None:
1097 1103 common = [nullrev]
1098 1104
1099 1105 if rustancestor is not None and self.index.rust_ext_compat:
1100 1106 return rustancestor.MissingAncestors(self.index, common)
1101 1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102 1108
1103 1109 def findmissingrevs(self, common=None, heads=None):
1104 1110 """Return the revision numbers of the ancestors of heads that
1105 1111 are not ancestors of common.
1106 1112
1107 1113 More specifically, return a list of revision numbers corresponding to
1108 1114 nodes N such that every N satisfies the following constraints:
1109 1115
1110 1116 1. N is an ancestor of some node in 'heads'
1111 1117 2. N is not an ancestor of any node in 'common'
1112 1118
1113 1119 The list is sorted by revision number, meaning it is
1114 1120 topologically sorted.
1115 1121
1116 1122 'heads' and 'common' are both lists of revision numbers. If heads is
1117 1123 not supplied, uses all of the revlog's heads. If common is not
1118 1124 supplied, uses nullid."""
1119 1125 if common is None:
1120 1126 common = [nullrev]
1121 1127 if heads is None:
1122 1128 heads = self.headrevs()
1123 1129
1124 1130 inc = self.incrementalmissingrevs(common=common)
1125 1131 return inc.missingancestors(heads)
1126 1132
1127 1133 def findmissing(self, common=None, heads=None):
1128 1134 """Return the ancestors of heads that are not ancestors of common.
1129 1135
1130 1136 More specifically, return a list of nodes N such that every N
1131 1137 satisfies the following constraints:
1132 1138
1133 1139 1. N is an ancestor of some node in 'heads'
1134 1140 2. N is not an ancestor of any node in 'common'
1135 1141
1136 1142 The list is sorted by revision number, meaning it is
1137 1143 topologically sorted.
1138 1144
1139 1145 'heads' and 'common' are both lists of node IDs. If heads is
1140 1146 not supplied, uses all of the revlog's heads. If common is not
1141 1147 supplied, uses nullid."""
1142 1148 if common is None:
1143 1149 common = [self.nullid]
1144 1150 if heads is None:
1145 1151 heads = self.heads()
1146 1152
1147 1153 common = [self.rev(n) for n in common]
1148 1154 heads = [self.rev(n) for n in heads]
1149 1155
1150 1156 inc = self.incrementalmissingrevs(common=common)
1151 1157 return [self.node(r) for r in inc.missingancestors(heads)]
1152 1158
1153 1159 def nodesbetween(self, roots=None, heads=None):
1154 1160 """Return a topological path from 'roots' to 'heads'.
1155 1161
1156 1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 1163 topologically sorted list of all nodes N that satisfy both of
1158 1164 these constraints:
1159 1165
1160 1166 1. N is a descendant of some node in 'roots'
1161 1167 2. N is an ancestor of some node in 'heads'
1162 1168
1163 1169 Every node is considered to be both a descendant and an ancestor
1164 1170 of itself, so every reachable node in 'roots' and 'heads' will be
1165 1171 included in 'nodes'.
1166 1172
1167 1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170 1176
1171 1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 1178 unspecified, uses nullid as the only root. If 'heads' is
1173 1179 unspecified, uses list of all of the revlog's heads."""
1174 1180 nonodes = ([], [], [])
1175 1181 if roots is not None:
1176 1182 roots = list(roots)
1177 1183 if not roots:
1178 1184 return nonodes
1179 1185 lowestrev = min([self.rev(n) for n in roots])
1180 1186 else:
1181 1187 roots = [self.nullid] # Everybody's a descendant of nullid
1182 1188 lowestrev = nullrev
1183 1189 if (lowestrev == nullrev) and (heads is None):
1184 1190 # We want _all_ the nodes!
1185 1191 return (
1186 1192 [self.node(r) for r in self],
1187 1193 [self.nullid],
1188 1194 list(self.heads()),
1189 1195 )
1190 1196 if heads is None:
1191 1197 # All nodes are ancestors, so the latest ancestor is the last
1192 1198 # node.
1193 1199 highestrev = len(self) - 1
1194 1200 # Set ancestors to None to signal that every node is an ancestor.
1195 1201 ancestors = None
1196 1202 # Set heads to an empty dictionary for later discovery of heads
1197 1203 heads = {}
1198 1204 else:
1199 1205 heads = list(heads)
1200 1206 if not heads:
1201 1207 return nonodes
1202 1208 ancestors = set()
1203 1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1204 1210 # Also, later we will be using it to filter out the heads we can't
1205 1211 # find from roots.
1206 1212 heads = dict.fromkeys(heads, False)
1207 1213 # Start at the top and keep marking parents until we're done.
1208 1214 nodestotag = set(heads)
1209 1215 # Remember where the top was so we can use it as a limit later.
1210 1216 highestrev = max([self.rev(n) for n in nodestotag])
1211 1217 while nodestotag:
1212 1218 # grab a node to tag
1213 1219 n = nodestotag.pop()
1214 1220 # Never tag nullid
1215 1221 if n == self.nullid:
1216 1222 continue
1217 1223 # A node's revision number represents its place in a
1218 1224 # topologically sorted list of nodes.
1219 1225 r = self.rev(n)
1220 1226 if r >= lowestrev:
1221 1227 if n not in ancestors:
1222 1228 # If we are possibly a descendant of one of the roots
1223 1229 # and we haven't already been marked as an ancestor
1224 1230 ancestors.add(n) # Mark as ancestor
1225 1231 # Add non-nullid parents to list of nodes to tag.
1226 1232 nodestotag.update(
1227 1233 [p for p in self.parents(n) if p != self.nullid]
1228 1234 )
1229 1235 elif n in heads: # We've seen it before, is it a fake head?
1230 1236 # So it is, real heads should not be the ancestors of
1231 1237 # any other heads.
1232 1238 heads.pop(n)
1233 1239 if not ancestors:
1234 1240 return nonodes
1235 1241 # Now that we have our set of ancestors, we want to remove any
1236 1242 # roots that are not ancestors.
1237 1243
1238 1244 # If one of the roots was nullid, everything is included anyway.
1239 1245 if lowestrev > nullrev:
1240 1246 # But, since we weren't, let's recompute the lowest rev to not
1241 1247 # include roots that aren't ancestors.
1242 1248
1243 1249 # Filter out roots that aren't ancestors of heads
1244 1250 roots = [root for root in roots if root in ancestors]
1245 1251 # Recompute the lowest revision
1246 1252 if roots:
1247 1253 lowestrev = min([self.rev(root) for root in roots])
1248 1254 else:
1249 1255 # No more roots? Return empty list
1250 1256 return nonodes
1251 1257 else:
1252 1258 # We are descending from nullid, and don't need to care about
1253 1259 # any other roots.
1254 1260 lowestrev = nullrev
1255 1261 roots = [self.nullid]
1256 1262 # Transform our roots list into a set.
1257 1263 descendants = set(roots)
1258 1264 # Also, keep the original roots so we can filter out roots that aren't
1259 1265 # 'real' roots (i.e. are descended from other roots).
1260 1266 roots = descendants.copy()
1261 1267 # Our topologically sorted list of output nodes.
1262 1268 orderedout = []
1263 1269 # Don't start at nullid since we don't want nullid in our output list,
1264 1270 # and if nullid shows up in descendants, empty parents will look like
1265 1271 # they're descendants.
1266 1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1267 1273 n = self.node(r)
1268 1274 isdescendant = False
1269 1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1270 1276 isdescendant = True
1271 1277 elif n in descendants:
1272 1278 # n is already a descendant
1273 1279 isdescendant = True
1274 1280 # This check only needs to be done here because all the roots
1275 1281 # will start being marked is descendants before the loop.
1276 1282 if n in roots:
1277 1283 # If n was a root, check if it's a 'real' root.
1278 1284 p = tuple(self.parents(n))
1279 1285 # If any of its parents are descendants, it's not a root.
1280 1286 if (p[0] in descendants) or (p[1] in descendants):
1281 1287 roots.remove(n)
1282 1288 else:
1283 1289 p = tuple(self.parents(n))
1284 1290 # A node is a descendant if either of its parents are
1285 1291 # descendants. (We seeded the dependents list with the roots
1286 1292 # up there, remember?)
1287 1293 if (p[0] in descendants) or (p[1] in descendants):
1288 1294 descendants.add(n)
1289 1295 isdescendant = True
1290 1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1291 1297 # Only include nodes that are both descendants and ancestors.
1292 1298 orderedout.append(n)
1293 1299 if (ancestors is not None) and (n in heads):
1294 1300 # We're trying to figure out which heads are reachable
1295 1301 # from roots.
1296 1302 # Mark this head as having been reached
1297 1303 heads[n] = True
1298 1304 elif ancestors is None:
1299 1305 # Otherwise, we're trying to discover the heads.
1300 1306 # Assume this is a head because if it isn't, the next step
1301 1307 # will eventually remove it.
1302 1308 heads[n] = True
1303 1309 # But, obviously its parents aren't.
1304 1310 for p in self.parents(n):
1305 1311 heads.pop(p, None)
1306 1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1307 1313 roots = list(roots)
1308 1314 assert orderedout
1309 1315 assert roots
1310 1316 assert heads
1311 1317 return (orderedout, roots, heads)
1312 1318
1313 1319 def headrevs(self, revs=None):
1314 1320 if revs is None:
1315 1321 try:
1316 1322 return self.index.headrevs()
1317 1323 except AttributeError:
1318 1324 return self._headrevs()
1319 1325 if rustdagop is not None and self.index.rust_ext_compat:
1320 1326 return rustdagop.headrevs(self.index, revs)
1321 1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1322 1328
1323 1329 def computephases(self, roots):
1324 1330 return self.index.computephasesmapsets(roots)
1325 1331
1326 1332 def _headrevs(self):
1327 1333 count = len(self)
1328 1334 if not count:
1329 1335 return [nullrev]
1330 1336 # we won't iter over filtered rev so nobody is a head at start
1331 1337 ishead = [0] * (count + 1)
1332 1338 index = self.index
1333 1339 for r in self:
1334 1340 ishead[r] = 1 # I may be an head
1335 1341 e = index[r]
1336 1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1337 1343 return [r for r, val in enumerate(ishead) if val]
1338 1344
1339 1345 def heads(self, start=None, stop=None):
1340 1346 """return the list of all nodes that have no children
1341 1347
1342 1348 if start is specified, only heads that are descendants of
1343 1349 start will be returned
1344 1350 if stop is specified, it will consider all the revs from stop
1345 1351 as if they had no children
1346 1352 """
1347 1353 if start is None and stop is None:
1348 1354 if not len(self):
1349 1355 return [self.nullid]
1350 1356 return [self.node(r) for r in self.headrevs()]
1351 1357
1352 1358 if start is None:
1353 1359 start = nullrev
1354 1360 else:
1355 1361 start = self.rev(start)
1356 1362
1357 1363 stoprevs = {self.rev(n) for n in stop or []}
1358 1364
1359 1365 revs = dagop.headrevssubset(
1360 1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1361 1367 )
1362 1368
1363 1369 return [self.node(rev) for rev in revs]
1364 1370
1365 1371 def children(self, node):
1366 1372 """find the children of a given node"""
1367 1373 c = []
1368 1374 p = self.rev(node)
1369 1375 for r in self.revs(start=p + 1):
1370 1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1371 1377 if prevs:
1372 1378 for pr in prevs:
1373 1379 if pr == p:
1374 1380 c.append(self.node(r))
1375 1381 elif p == nullrev:
1376 1382 c.append(self.node(r))
1377 1383 return c
1378 1384
1379 1385 def commonancestorsheads(self, a, b):
1380 1386 """calculate all the heads of the common ancestors of nodes a and b"""
1381 1387 a, b = self.rev(a), self.rev(b)
1382 1388 ancs = self._commonancestorsheads(a, b)
1383 1389 return pycompat.maplist(self.node, ancs)
1384 1390
1385 1391 def _commonancestorsheads(self, *revs):
1386 1392 """calculate all the heads of the common ancestors of revs"""
1387 1393 try:
1388 1394 ancs = self.index.commonancestorsheads(*revs)
1389 1395 except (AttributeError, OverflowError): # C implementation failed
1390 1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1391 1397 return ancs
1392 1398
1393 1399 def isancestor(self, a, b):
1394 1400 """return True if node a is an ancestor of node b
1395 1401
1396 1402 A revision is considered an ancestor of itself."""
1397 1403 a, b = self.rev(a), self.rev(b)
1398 1404 return self.isancestorrev(a, b)
1399 1405
1400 1406 def isancestorrev(self, a, b):
1401 1407 """return True if revision a is an ancestor of revision b
1402 1408
1403 1409 A revision is considered an ancestor of itself.
1404 1410
1405 1411 The implementation of this is trivial but the use of
1406 1412 reachableroots is not."""
1407 1413 if a == nullrev:
1408 1414 return True
1409 1415 elif a == b:
1410 1416 return True
1411 1417 elif a > b:
1412 1418 return False
1413 1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1414 1420
1415 1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1416 1422 """return (heads(::(<roots> and <roots>::<heads>)))
1417 1423
1418 1424 If includepath is True, return (<roots>::<heads>)."""
1419 1425 try:
1420 1426 return self.index.reachableroots2(
1421 1427 minroot, heads, roots, includepath
1422 1428 )
1423 1429 except AttributeError:
1424 1430 return dagop._reachablerootspure(
1425 1431 self.parentrevs, minroot, roots, heads, includepath
1426 1432 )
1427 1433
1428 1434 def ancestor(self, a, b):
1429 1435 """calculate the "best" common ancestor of nodes a and b"""
1430 1436
1431 1437 a, b = self.rev(a), self.rev(b)
1432 1438 try:
1433 1439 ancs = self.index.ancestors(a, b)
1434 1440 except (AttributeError, OverflowError):
1435 1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1436 1442 if ancs:
1437 1443 # choose a consistent winner when there's a tie
1438 1444 return min(map(self.node, ancs))
1439 1445 return self.nullid
1440 1446
1441 1447 def _match(self, id):
1442 1448 if isinstance(id, int):
1443 1449 # rev
1444 1450 return self.node(id)
1445 1451 if len(id) == self.nodeconstants.nodelen:
1446 1452 # possibly a binary node
1447 1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1448 1454 try:
1449 1455 node = id
1450 1456 self.rev(node) # quick search the index
1451 1457 return node
1452 1458 except error.LookupError:
1453 1459 pass # may be partial hex id
1454 1460 try:
1455 1461 # str(rev)
1456 1462 rev = int(id)
1457 1463 if b"%d" % rev != id:
1458 1464 raise ValueError
1459 1465 if rev < 0:
1460 1466 rev = len(self) + rev
1461 1467 if rev < 0 or rev >= len(self):
1462 1468 raise ValueError
1463 1469 return self.node(rev)
1464 1470 except (ValueError, OverflowError):
1465 1471 pass
1466 1472 if len(id) == 2 * self.nodeconstants.nodelen:
1467 1473 try:
1468 1474 # a full hex nodeid?
1469 1475 node = bin(id)
1470 1476 self.rev(node)
1471 1477 return node
1472 1478 except (TypeError, error.LookupError):
1473 1479 pass
1474 1480
1475 1481 def _partialmatch(self, id):
1476 1482 # we don't care wdirfilenodeids as they should be always full hash
1477 1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1478 1484 ambiguous = False
1479 1485 try:
1480 1486 partial = self.index.partialmatch(id)
1481 1487 if partial and self.hasnode(partial):
1482 1488 if maybewdir:
1483 1489 # single 'ff...' match in radix tree, ambiguous with wdir
1484 1490 ambiguous = True
1485 1491 else:
1486 1492 return partial
1487 1493 elif maybewdir:
1488 1494 # no 'ff...' match in radix tree, wdir identified
1489 1495 raise error.WdirUnsupported
1490 1496 else:
1491 1497 return None
1492 1498 except error.RevlogError:
1493 1499 # parsers.c radix tree lookup gave multiple matches
1494 1500 # fast path: for unfiltered changelog, radix tree is accurate
1495 1501 if not getattr(self, 'filteredrevs', None):
1496 1502 ambiguous = True
1497 1503 # fall through to slow path that filters hidden revisions
1498 1504 except (AttributeError, ValueError):
1499 1505 # we are pure python, or key was too short to search radix tree
1500 1506 pass
1501 1507 if ambiguous:
1502 1508 raise error.AmbiguousPrefixLookupError(
1503 1509 id, self.display_id, _(b'ambiguous identifier')
1504 1510 )
1505 1511
1506 1512 if id in self._pcache:
1507 1513 return self._pcache[id]
1508 1514
1509 1515 if len(id) <= 40:
1510 1516 try:
1511 1517 # hex(node)[:...]
1512 1518 l = len(id) // 2 # grab an even number of digits
1513 1519 prefix = bin(id[: l * 2])
1514 1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1515 1521 nl = [
1516 1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1517 1523 ]
1518 1524 if self.nodeconstants.nullhex.startswith(id):
1519 1525 nl.append(self.nullid)
1520 1526 if len(nl) > 0:
1521 1527 if len(nl) == 1 and not maybewdir:
1522 1528 self._pcache[id] = nl[0]
1523 1529 return nl[0]
1524 1530 raise error.AmbiguousPrefixLookupError(
1525 1531 id, self.display_id, _(b'ambiguous identifier')
1526 1532 )
1527 1533 if maybewdir:
1528 1534 raise error.WdirUnsupported
1529 1535 return None
1530 1536 except TypeError:
1531 1537 pass
1532 1538
1533 1539 def lookup(self, id):
1534 1540 """locate a node based on:
1535 1541 - revision number or str(revision number)
1536 1542 - nodeid or subset of hex nodeid
1537 1543 """
1538 1544 n = self._match(id)
1539 1545 if n is not None:
1540 1546 return n
1541 1547 n = self._partialmatch(id)
1542 1548 if n:
1543 1549 return n
1544 1550
1545 1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1546 1552
1547 1553 def shortest(self, node, minlength=1):
1548 1554 """Find the shortest unambiguous prefix that matches node."""
1549 1555
1550 1556 def isvalid(prefix):
1551 1557 try:
1552 1558 matchednode = self._partialmatch(prefix)
1553 1559 except error.AmbiguousPrefixLookupError:
1554 1560 return False
1555 1561 except error.WdirUnsupported:
1556 1562 # single 'ff...' match
1557 1563 return True
1558 1564 if matchednode is None:
1559 1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1560 1566 return True
1561 1567
1562 1568 def maybewdir(prefix):
1563 1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1564 1570
1565 1571 hexnode = hex(node)
1566 1572
1567 1573 def disambiguate(hexnode, minlength):
1568 1574 """Disambiguate against wdirid."""
1569 1575 for length in range(minlength, len(hexnode) + 1):
1570 1576 prefix = hexnode[:length]
1571 1577 if not maybewdir(prefix):
1572 1578 return prefix
1573 1579
1574 1580 if not getattr(self, 'filteredrevs', None):
1575 1581 try:
1576 1582 length = max(self.index.shortest(node), minlength)
1577 1583 return disambiguate(hexnode, length)
1578 1584 except error.RevlogError:
1579 1585 if node != self.nodeconstants.wdirid:
1580 1586 raise error.LookupError(
1581 1587 node, self.display_id, _(b'no node')
1582 1588 )
1583 1589 except AttributeError:
1584 1590 # Fall through to pure code
1585 1591 pass
1586 1592
1587 1593 if node == self.nodeconstants.wdirid:
1588 1594 for length in range(minlength, len(hexnode) + 1):
1589 1595 prefix = hexnode[:length]
1590 1596 if isvalid(prefix):
1591 1597 return prefix
1592 1598
1593 1599 for length in range(minlength, len(hexnode) + 1):
1594 1600 prefix = hexnode[:length]
1595 1601 if isvalid(prefix):
1596 1602 return disambiguate(hexnode, length)
1597 1603
1598 1604 def cmp(self, node, text):
1599 1605 """compare text with a given file revision
1600 1606
1601 1607 returns True if text is different than what is stored.
1602 1608 """
1603 1609 p1, p2 = self.parents(node)
1604 1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1605 1611
1606 1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 1613 """Obtain a segment of raw data corresponding to a range of revisions.
1608 1614
1609 1615 Accepts the start and end revisions and an optional already-open
1610 1616 file handle to be used for reading. If the file handle is read, its
1611 1617 seek position will not be preserved.
1612 1618
1613 1619 Requests for data may be satisfied by a cache.
1614 1620
1615 1621 Returns a 2-tuple of (offset, data) for the requested range of
1616 1622 revisions. Offset is the integer offset from the beginning of the
1617 1623 revlog and data is a str or buffer of the raw byte data.
1618 1624
1619 1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 1626 to determine where each revision's data begins and ends.
1621 1627 """
1622 1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 1629 # (functions are expensive).
1624 1630 index = self.index
1625 1631 istart = index[startrev]
1626 1632 start = int(istart[0] >> 16)
1627 1633 if startrev == endrev:
1628 1634 end = start + istart[1]
1629 1635 else:
1630 1636 iend = index[endrev]
1631 1637 end = int(iend[0] >> 16) + iend[1]
1632 1638
1633 1639 if self._inline:
1634 1640 start += (startrev + 1) * self.index.entry_size
1635 1641 end += (endrev + 1) * self.index.entry_size
1636 1642 length = end - start
1637 1643
1638 1644 return start, self._segmentfile.read_chunk(start, length, df)
1639 1645
1640 1646 def _chunk(self, rev, df=None):
1641 1647 """Obtain a single decompressed chunk for a revision.
1642 1648
1643 1649 Accepts an integer revision and an optional already-open file handle
1644 1650 to be used for reading. If used, the seek position of the file will not
1645 1651 be preserved.
1646 1652
1647 1653 Returns a str holding uncompressed data for the requested revision.
1648 1654 """
1649 1655 compression_mode = self.index[rev][10]
1650 1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1651 1657 if compression_mode == COMP_MODE_PLAIN:
1652 1658 return data
1653 1659 elif compression_mode == COMP_MODE_DEFAULT:
1654 1660 return self._decompressor(data)
1655 1661 elif compression_mode == COMP_MODE_INLINE:
1656 1662 return self.decompress(data)
1657 1663 else:
1658 1664 msg = b'unknown compression mode %d'
1659 1665 msg %= compression_mode
1660 1666 raise error.RevlogError(msg)
1661 1667
1662 1668 def _chunks(self, revs, df=None, targetsize=None):
1663 1669 """Obtain decompressed chunks for the specified revisions.
1664 1670
1665 1671 Accepts an iterable of numeric revisions that are assumed to be in
1666 1672 ascending order. Also accepts an optional already-open file handle
1667 1673 to be used for reading. If used, the seek position of the file will
1668 1674 not be preserved.
1669 1675
1670 1676 This function is similar to calling ``self._chunk()`` multiple times,
1671 1677 but is faster.
1672 1678
1673 1679 Returns a list with decompressed data for each requested revision.
1674 1680 """
1675 1681 if not revs:
1676 1682 return []
1677 1683 start = self.start
1678 1684 length = self.length
1679 1685 inline = self._inline
1680 1686 iosize = self.index.entry_size
1681 1687 buffer = util.buffer
1682 1688
1683 1689 l = []
1684 1690 ladd = l.append
1685 1691
1686 1692 if not self._withsparseread:
1687 1693 slicedchunks = (revs,)
1688 1694 else:
1689 1695 slicedchunks = deltautil.slicechunk(
1690 1696 self, revs, targetsize=targetsize
1691 1697 )
1692 1698
1693 1699 for revschunk in slicedchunks:
1694 1700 firstrev = revschunk[0]
1695 1701 # Skip trailing revisions with empty diff
1696 1702 for lastrev in revschunk[::-1]:
1697 1703 if length(lastrev) != 0:
1698 1704 break
1699 1705
1700 1706 try:
1701 1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1702 1708 except OverflowError:
1703 1709 # issue4215 - we can't cache a run of chunks greater than
1704 1710 # 2G on Windows
1705 1711 return [self._chunk(rev, df=df) for rev in revschunk]
1706 1712
1707 1713 decomp = self.decompress
1708 1714 # self._decompressor might be None, but will not be used in that case
1709 1715 def_decomp = self._decompressor
1710 1716 for rev in revschunk:
1711 1717 chunkstart = start(rev)
1712 1718 if inline:
1713 1719 chunkstart += (rev + 1) * iosize
1714 1720 chunklength = length(rev)
1715 1721 comp_mode = self.index[rev][10]
1716 1722 c = buffer(data, chunkstart - offset, chunklength)
1717 1723 if comp_mode == COMP_MODE_PLAIN:
1718 1724 ladd(c)
1719 1725 elif comp_mode == COMP_MODE_INLINE:
1720 1726 ladd(decomp(c))
1721 1727 elif comp_mode == COMP_MODE_DEFAULT:
1722 1728 ladd(def_decomp(c))
1723 1729 else:
1724 1730 msg = b'unknown compression mode %d'
1725 1731 msg %= comp_mode
1726 1732 raise error.RevlogError(msg)
1727 1733
1728 1734 return l
1729 1735
1730 1736 def deltaparent(self, rev):
1731 1737 """return deltaparent of the given revision"""
1732 1738 base = self.index[rev][3]
1733 1739 if base == rev:
1734 1740 return nullrev
1735 1741 elif self._generaldelta:
1736 1742 return base
1737 1743 else:
1738 1744 return rev - 1
1739 1745
1740 1746 def issnapshot(self, rev):
1741 1747 """tells whether rev is a snapshot"""
1742 1748 if not self._sparserevlog:
1743 1749 return self.deltaparent(rev) == nullrev
1744 1750 elif util.safehasattr(self.index, b'issnapshot'):
1745 1751 # directly assign the method to cache the testing and access
1746 1752 self.issnapshot = self.index.issnapshot
1747 1753 return self.issnapshot(rev)
1748 1754 if rev == nullrev:
1749 1755 return True
1750 1756 entry = self.index[rev]
1751 1757 base = entry[3]
1752 1758 if base == rev:
1753 1759 return True
1754 1760 if base == nullrev:
1755 1761 return True
1756 1762 p1 = entry[5]
1757 1763 p2 = entry[6]
1758 1764 if base == p1 or base == p2:
1759 1765 return False
1760 1766 return self.issnapshot(base)
1761 1767
1762 1768 def snapshotdepth(self, rev):
1763 1769 """number of snapshot in the chain before this one"""
1764 1770 if not self.issnapshot(rev):
1765 1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1766 1772 return len(self._deltachain(rev)[0]) - 1
1767 1773
1768 1774 def revdiff(self, rev1, rev2):
1769 1775 """return or calculate a delta between two revisions
1770 1776
1771 1777 The delta calculated is in binary form and is intended to be written to
1772 1778 revlog data directly. So this function needs raw revision data.
1773 1779 """
1774 1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1775 1781 return bytes(self._chunk(rev2))
1776 1782
1777 1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1778 1784
1779 1785 def _processflags(self, text, flags, operation, raw=False):
1780 1786 """deprecated entry point to access flag processors"""
1781 1787 msg = b'_processflag(...) use the specialized variant'
1782 1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1783 1789 if raw:
1784 1790 return text, flagutil.processflagsraw(self, text, flags)
1785 1791 elif operation == b'read':
1786 1792 return flagutil.processflagsread(self, text, flags)
1787 1793 else: # write operation
1788 1794 return flagutil.processflagswrite(self, text, flags)
1789 1795
1790 1796 def revision(self, nodeorrev, _df=None, raw=False):
1791 1797 """return an uncompressed revision of a given node or revision
1792 1798 number.
1793 1799
1794 1800 _df - an existing file handle to read from. (internal-only)
1795 1801 raw - an optional argument specifying if the revision data is to be
1796 1802 treated as raw data when applying flag transforms. 'raw' should be set
1797 1803 to True when generating changegroups or in debug commands.
1798 1804 """
1799 1805 if raw:
1800 1806 msg = (
1801 1807 b'revlog.revision(..., raw=True) is deprecated, '
1802 1808 b'use revlog.rawdata(...)'
1803 1809 )
1804 1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1805 1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1806 1812
1807 1813 def sidedata(self, nodeorrev, _df=None):
1808 1814 """a map of extra data related to the changeset but not part of the hash
1809 1815
1810 1816 This function currently return a dictionary. However, more advanced
1811 1817 mapping object will likely be used in the future for a more
1812 1818 efficient/lazy code.
1813 1819 """
1814 1820 # deal with <nodeorrev> argument type
1815 1821 if isinstance(nodeorrev, int):
1816 1822 rev = nodeorrev
1817 1823 else:
1818 1824 rev = self.rev(nodeorrev)
1819 1825 return self._sidedata(rev)
1820 1826
1821 1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1822 1828 # deal with <nodeorrev> argument type
1823 1829 if isinstance(nodeorrev, int):
1824 1830 rev = nodeorrev
1825 1831 node = self.node(rev)
1826 1832 else:
1827 1833 node = nodeorrev
1828 1834 rev = None
1829 1835
1830 1836 # fast path the special `nullid` rev
1831 1837 if node == self.nullid:
1832 1838 return b""
1833 1839
1834 1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1835 1841 # revision or might need to be processed to retrieve the revision.
1836 1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1837 1843
1838 1844 if raw and validated:
1839 1845 # if we don't want to process the raw text and that raw
1840 1846 # text is cached, we can exit early.
1841 1847 return rawtext
1842 1848 if rev is None:
1843 1849 rev = self.rev(node)
1844 1850 # the revlog's flag for this revision
1845 1851 # (usually alter its state or content)
1846 1852 flags = self.flags(rev)
1847 1853
1848 1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1849 1855 # no extra flags set, no flag processor runs, text = rawtext
1850 1856 return rawtext
1851 1857
1852 1858 if raw:
1853 1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854 1860 text = rawtext
1855 1861 else:
1856 1862 r = flagutil.processflagsread(self, rawtext, flags)
1857 1863 text, validatehash = r
1858 1864 if validatehash:
1859 1865 self.checkhash(text, node, rev=rev)
1860 1866 if not validated:
1861 1867 self._revisioncache = (node, rev, rawtext)
1862 1868
1863 1869 return text
1864 1870
1865 1871 def _rawtext(self, node, rev, _df=None):
1866 1872 """return the possibly unvalidated rawtext for a revision
1867 1873
1868 1874 returns (rev, rawtext, validated)
1869 1875 """
1870 1876
1871 1877 # revision in the cache (could be useful to apply delta)
1872 1878 cachedrev = None
1873 1879 # An intermediate text to apply deltas to
1874 1880 basetext = None
1875 1881
1876 1882 # Check if we have the entry in cache
1877 1883 # The cache entry looks like (node, rev, rawtext)
1878 1884 if self._revisioncache:
1879 1885 if self._revisioncache[0] == node:
1880 1886 return (rev, self._revisioncache[2], True)
1881 1887 cachedrev = self._revisioncache[1]
1882 1888
1883 1889 if rev is None:
1884 1890 rev = self.rev(node)
1885 1891
1886 1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887 1893 if stopped:
1888 1894 basetext = self._revisioncache[2]
1889 1895
1890 1896 # drop cache to save memory, the caller is expected to
1891 1897 # update self._revisioncache after validating the text
1892 1898 self._revisioncache = None
1893 1899
1894 1900 targetsize = None
1895 1901 rawsize = self.index[rev][2]
1896 1902 if 0 <= rawsize:
1897 1903 targetsize = 4 * rawsize
1898 1904
1899 1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900 1906 if basetext is None:
1901 1907 basetext = bytes(bins[0])
1902 1908 bins = bins[1:]
1903 1909
1904 1910 rawtext = mdiff.patches(basetext, bins)
1905 1911 del basetext # let us have a chance to free memory early
1906 1912 return (rev, rawtext, False)
1907 1913
1908 1914 def _sidedata(self, rev):
1909 1915 """Return the sidedata for a given revision number."""
1910 1916 index_entry = self.index[rev]
1911 1917 sidedata_offset = index_entry[8]
1912 1918 sidedata_size = index_entry[9]
1913 1919
1914 1920 if self._inline:
1915 1921 sidedata_offset += self.index.entry_size * (1 + rev)
1916 1922 if sidedata_size == 0:
1917 1923 return {}
1918 1924
1919 # XXX this need caching, as we do for data
1920 with self._sidedatareadfp() as sdf:
1921 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1922 filename = self._sidedatafile
1923 end = self._docket.sidedata_end
1924 offset = sidedata_offset
1925 length = sidedata_size
1926 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1927 raise error.RevlogError(m)
1928
1929 sdf.seek(sidedata_offset, os.SEEK_SET)
1930 comp_segment = sdf.read(sidedata_size)
1931
1932 if len(comp_segment) < sidedata_size:
1933 filename = self._sidedatafile
1934 length = sidedata_size
1935 offset = sidedata_offset
1936 got = len(comp_segment)
1937 m = randomaccessfile.PARTIAL_READ_MSG % (
1938 filename,
1939 length,
1940 offset,
1941 got,
1942 )
1943 raise error.RevlogError(m)
1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1926 filename = self._sidedatafile
1927 end = self._docket.sidedata_end
1928 offset = sidedata_offset
1929 length = sidedata_size
1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1931 raise error.RevlogError(m)
1932
1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1934 sidedata_offset, sidedata_size
1935 )
1944 1936
1945 1937 comp = self.index[rev][11]
1946 1938 if comp == COMP_MODE_PLAIN:
1947 1939 segment = comp_segment
1948 1940 elif comp == COMP_MODE_DEFAULT:
1949 1941 segment = self._decompressor(comp_segment)
1950 1942 elif comp == COMP_MODE_INLINE:
1951 1943 segment = self.decompress(comp_segment)
1952 1944 else:
1953 1945 msg = b'unknown compression mode %d'
1954 1946 msg %= comp
1955 1947 raise error.RevlogError(msg)
1956 1948
1957 1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1958 1950 return sidedata
1959 1951
1960 1952 def rawdata(self, nodeorrev, _df=None):
1961 1953 """return an uncompressed raw data of a given node or revision number.
1962 1954
1963 1955 _df - an existing file handle to read from. (internal-only)
1964 1956 """
1965 1957 return self._revisiondata(nodeorrev, _df, raw=True)
1966 1958
1967 1959 def hash(self, text, p1, p2):
1968 1960 """Compute a node hash.
1969 1961
1970 1962 Available as a function so that subclasses can replace the hash
1971 1963 as needed.
1972 1964 """
1973 1965 return storageutil.hashrevisionsha1(text, p1, p2)
1974 1966
1975 1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1976 1968 """Check node hash integrity.
1977 1969
1978 1970 Available as a function so that subclasses can extend hash mismatch
1979 1971 behaviors as needed.
1980 1972 """
1981 1973 try:
1982 1974 if p1 is None and p2 is None:
1983 1975 p1, p2 = self.parents(node)
1984 1976 if node != self.hash(text, p1, p2):
1985 1977 # Clear the revision cache on hash failure. The revision cache
1986 1978 # only stores the raw revision and clearing the cache does have
1987 1979 # the side-effect that we won't have a cache hit when the raw
1988 1980 # revision data is accessed. But this case should be rare and
1989 1981 # it is extra work to teach the cache about the hash
1990 1982 # verification state.
1991 1983 if self._revisioncache and self._revisioncache[0] == node:
1992 1984 self._revisioncache = None
1993 1985
1994 1986 revornode = rev
1995 1987 if revornode is None:
1996 1988 revornode = templatefilters.short(hex(node))
1997 1989 raise error.RevlogError(
1998 1990 _(b"integrity check failed on %s:%s")
1999 1991 % (self.display_id, pycompat.bytestr(revornode))
2000 1992 )
2001 1993 except error.RevlogError:
2002 1994 if self._censorable and storageutil.iscensoredtext(text):
2003 1995 raise error.CensoredNodeError(self.display_id, node, text)
2004 1996 raise
2005 1997
2006 1998 def _enforceinlinesize(self, tr):
2007 1999 """Check if the revlog is too big for inline and convert if so.
2008 2000
2009 2001 This should be called after revisions are added to the revlog. If the
2010 2002 revlog has grown too large to be an inline revlog, it will convert it
2011 2003 to use multiple index and data files.
2012 2004 """
2013 2005 tiprev = len(self) - 1
2014 2006 total_size = self.start(tiprev) + self.length(tiprev)
2015 2007 if not self._inline or total_size < _maxinline:
2016 2008 return
2017 2009
2018 2010 troffset = tr.findoffset(self._indexfile)
2019 2011 if troffset is None:
2020 2012 raise error.RevlogError(
2021 2013 _(b"%s not found in the transaction") % self._indexfile
2022 2014 )
2023 2015 trindex = 0
2024 2016 tr.add(self._datafile, 0)
2025 2017
2026 2018 existing_handles = False
2027 2019 if self._writinghandles is not None:
2028 2020 existing_handles = True
2029 2021 fp = self._writinghandles[0]
2030 2022 fp.flush()
2031 2023 fp.close()
2032 2024 # We can't use the cached file handle after close(). So prevent
2033 2025 # its usage.
2034 2026 self._writinghandles = None
2035 2027 self._segmentfile.writing_handle = None
2028 # No need to deal with sidedata writing handle as it is only
2029 # relevant with revlog-v2 which is never inline, not reaching
2030 # this code
2036 2031
2037 2032 new_dfh = self._datafp(b'w+')
2038 2033 new_dfh.truncate(0) # drop any potentially existing data
2039 2034 try:
2040 2035 with self._indexfp() as read_ifh:
2041 2036 for r in self:
2042 2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2043 2038 if troffset <= self.start(r) + r * self.index.entry_size:
2044 2039 trindex = r
2045 2040 new_dfh.flush()
2046 2041
2047 2042 with self.__index_new_fp() as fp:
2048 2043 self._format_flags &= ~FLAG_INLINE_DATA
2049 2044 self._inline = False
2050 2045 for i in self:
2051 2046 e = self.index.entry_binary(i)
2052 2047 if i == 0 and self._docket is None:
2053 2048 header = self._format_flags | self._format_version
2054 2049 header = self.index.pack_header(header)
2055 2050 e = header + e
2056 2051 fp.write(e)
2057 2052 if self._docket is not None:
2058 2053 self._docket.index_end = fp.tell()
2059 2054
2060 2055 # There is a small transactional race here. If the rename of
2061 2056 # the index fails, we should remove the datafile. It is more
2062 2057 # important to ensure that the data file is not truncated
2063 2058 # when the index is replaced as otherwise data is lost.
2064 2059 tr.replace(self._datafile, self.start(trindex))
2065 2060
2066 2061 # the temp file replace the real index when we exit the context
2067 2062 # manager
2068 2063
2069 2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2070 2065 nodemaputil.setup_persistent_nodemap(tr, self)
2071 2066 self._segmentfile = randomaccessfile.randomaccessfile(
2072 2067 self.opener,
2073 2068 self._datafile,
2074 2069 self._chunkcachesize,
2075 2070 )
2076 2071
2077 2072 if existing_handles:
2078 2073 # switched from inline to conventional reopen the index
2079 2074 ifh = self.__index_write_fp()
2080 2075 self._writinghandles = (ifh, new_dfh, None)
2081 2076 self._segmentfile.writing_handle = new_dfh
2082 2077 new_dfh = None
2078 # No need to deal with sidedata writing handle as it is only
2079 # relevant with revlog-v2 which is never inline, not reaching
2080 # this code
2083 2081 finally:
2084 2082 if new_dfh is not None:
2085 2083 new_dfh.close()
2086 2084
2087 2085 def _nodeduplicatecallback(self, transaction, node):
2088 2086 """called when trying to add a node already stored."""
2089 2087
2090 2088 @contextlib.contextmanager
2091 2089 def _writing(self, transaction):
2092 2090 if self._trypending:
2093 2091 msg = b'try to write in a `trypending` revlog: %s'
2094 2092 msg %= self.display_id
2095 2093 raise error.ProgrammingError(msg)
2096 2094 if self._writinghandles is not None:
2097 2095 yield
2098 2096 else:
2099 2097 ifh = dfh = sdfh = None
2100 2098 try:
2101 2099 r = len(self)
2102 2100 # opening the data file.
2103 2101 dsize = 0
2104 2102 if r:
2105 2103 dsize = self.end(r - 1)
2106 2104 dfh = None
2107 2105 if not self._inline:
2108 2106 try:
2109 2107 dfh = self._datafp(b"r+")
2110 2108 if self._docket is None:
2111 2109 dfh.seek(0, os.SEEK_END)
2112 2110 else:
2113 2111 dfh.seek(self._docket.data_end, os.SEEK_SET)
2114 2112 except IOError as inst:
2115 2113 if inst.errno != errno.ENOENT:
2116 2114 raise
2117 2115 dfh = self._datafp(b"w+")
2118 2116 transaction.add(self._datafile, dsize)
2119 2117 if self._sidedatafile is not None:
2120 2118 try:
2121 2119 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2122 2120 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2123 2121 except IOError as inst:
2124 2122 if inst.errno != errno.ENOENT:
2125 2123 raise
2126 2124 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2127 2125 transaction.add(
2128 2126 self._sidedatafile, self._docket.sidedata_end
2129 2127 )
2130 2128
2131 2129 # opening the index file.
2132 2130 isize = r * self.index.entry_size
2133 2131 ifh = self.__index_write_fp()
2134 2132 if self._inline:
2135 2133 transaction.add(self._indexfile, dsize + isize)
2136 2134 else:
2137 2135 transaction.add(self._indexfile, isize)
2138 2136 # exposing all file handle for writing.
2139 2137 self._writinghandles = (ifh, dfh, sdfh)
2140 2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2141 2140 yield
2142 2141 if self._docket is not None:
2143 2142 self._write_docket(transaction)
2144 2143 finally:
2145 2144 self._writinghandles = None
2146 2145 self._segmentfile.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2147 2147 if dfh is not None:
2148 2148 dfh.close()
2149 2149 if sdfh is not None:
2150 2150 sdfh.close()
2151 2151 # closing the index file last to avoid exposing referent to
2152 2152 # potential unflushed data content.
2153 2153 if ifh is not None:
2154 2154 ifh.close()
2155 2155
2156 2156 def _write_docket(self, transaction):
2157 2157 """write the current docket on disk
2158 2158
2159 2159 Exist as a method to help changelog to implement transaction logic
2160 2160
2161 2161 We could also imagine using the same transaction logic for all revlog
2162 2162 since docket are cheap."""
2163 2163 self._docket.write(transaction)
2164 2164
2165 2165 def addrevision(
2166 2166 self,
2167 2167 text,
2168 2168 transaction,
2169 2169 link,
2170 2170 p1,
2171 2171 p2,
2172 2172 cachedelta=None,
2173 2173 node=None,
2174 2174 flags=REVIDX_DEFAULT_FLAGS,
2175 2175 deltacomputer=None,
2176 2176 sidedata=None,
2177 2177 ):
2178 2178 """add a revision to the log
2179 2179
2180 2180 text - the revision data to add
2181 2181 transaction - the transaction object used for rollback
2182 2182 link - the linkrev data to add
2183 2183 p1, p2 - the parent nodeids of the revision
2184 2184 cachedelta - an optional precomputed delta
2185 2185 node - nodeid of revision; typically node is not specified, and it is
2186 2186 computed by default as hash(text, p1, p2), however subclasses might
2187 2187 use different hashing method (and override checkhash() in such case)
2188 2188 flags - the known flags to set on the revision
2189 2189 deltacomputer - an optional deltacomputer instance shared between
2190 2190 multiple calls
2191 2191 """
2192 2192 if link == nullrev:
2193 2193 raise error.RevlogError(
2194 2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2195 2195 )
2196 2196
2197 2197 if sidedata is None:
2198 2198 sidedata = {}
2199 2199 elif sidedata and not self.hassidedata:
2200 2200 raise error.ProgrammingError(
2201 2201 _(b"trying to add sidedata to a revlog who don't support them")
2202 2202 )
2203 2203
2204 2204 if flags:
2205 2205 node = node or self.hash(text, p1, p2)
2206 2206
2207 2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2208 2208
2209 2209 # If the flag processor modifies the revision data, ignore any provided
2210 2210 # cachedelta.
2211 2211 if rawtext != text:
2212 2212 cachedelta = None
2213 2213
2214 2214 if len(rawtext) > _maxentrysize:
2215 2215 raise error.RevlogError(
2216 2216 _(
2217 2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2218 2218 )
2219 2219 % (self.display_id, len(rawtext))
2220 2220 )
2221 2221
2222 2222 node = node or self.hash(rawtext, p1, p2)
2223 2223 rev = self.index.get_rev(node)
2224 2224 if rev is not None:
2225 2225 return rev
2226 2226
2227 2227 if validatehash:
2228 2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2229 2229
2230 2230 return self.addrawrevision(
2231 2231 rawtext,
2232 2232 transaction,
2233 2233 link,
2234 2234 p1,
2235 2235 p2,
2236 2236 node,
2237 2237 flags,
2238 2238 cachedelta=cachedelta,
2239 2239 deltacomputer=deltacomputer,
2240 2240 sidedata=sidedata,
2241 2241 )
2242 2242
2243 2243 def addrawrevision(
2244 2244 self,
2245 2245 rawtext,
2246 2246 transaction,
2247 2247 link,
2248 2248 p1,
2249 2249 p2,
2250 2250 node,
2251 2251 flags,
2252 2252 cachedelta=None,
2253 2253 deltacomputer=None,
2254 2254 sidedata=None,
2255 2255 ):
2256 2256 """add a raw revision with known flags, node and parents
2257 2257 useful when reusing a revision not stored in this revlog (ex: received
2258 2258 over wire, or read from an external bundle).
2259 2259 """
2260 2260 with self._writing(transaction):
2261 2261 return self._addrevision(
2262 2262 node,
2263 2263 rawtext,
2264 2264 transaction,
2265 2265 link,
2266 2266 p1,
2267 2267 p2,
2268 2268 flags,
2269 2269 cachedelta,
2270 2270 deltacomputer=deltacomputer,
2271 2271 sidedata=sidedata,
2272 2272 )
2273 2273
2274 2274 def compress(self, data):
2275 2275 """Generate a possibly-compressed representation of data."""
2276 2276 if not data:
2277 2277 return b'', data
2278 2278
2279 2279 compressed = self._compressor.compress(data)
2280 2280
2281 2281 if compressed:
2282 2282 # The revlog compressor added the header in the returned data.
2283 2283 return b'', compressed
2284 2284
2285 2285 if data[0:1] == b'\0':
2286 2286 return b'', data
2287 2287 return b'u', data
2288 2288
2289 2289 def decompress(self, data):
2290 2290 """Decompress a revlog chunk.
2291 2291
2292 2292 The chunk is expected to begin with a header identifying the
2293 2293 format type so it can be routed to an appropriate decompressor.
2294 2294 """
2295 2295 if not data:
2296 2296 return data
2297 2297
2298 2298 # Revlogs are read much more frequently than they are written and many
2299 2299 # chunks only take microseconds to decompress, so performance is
2300 2300 # important here.
2301 2301 #
2302 2302 # We can make a few assumptions about revlogs:
2303 2303 #
2304 2304 # 1) the majority of chunks will be compressed (as opposed to inline
2305 2305 # raw data).
2306 2306 # 2) decompressing *any* data will likely by at least 10x slower than
2307 2307 # returning raw inline data.
2308 2308 # 3) we want to prioritize common and officially supported compression
2309 2309 # engines
2310 2310 #
2311 2311 # It follows that we want to optimize for "decompress compressed data
2312 2312 # when encoded with common and officially supported compression engines"
2313 2313 # case over "raw data" and "data encoded by less common or non-official
2314 2314 # compression engines." That is why we have the inline lookup first
2315 2315 # followed by the compengines lookup.
2316 2316 #
2317 2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2318 2318 # compressed chunks. And this matters for changelog and manifest reads.
2319 2319 t = data[0:1]
2320 2320
2321 2321 if t == b'x':
2322 2322 try:
2323 2323 return _zlibdecompress(data)
2324 2324 except zlib.error as e:
2325 2325 raise error.RevlogError(
2326 2326 _(b'revlog decompress error: %s')
2327 2327 % stringutil.forcebytestr(e)
2328 2328 )
2329 2329 # '\0' is more common than 'u' so it goes first.
2330 2330 elif t == b'\0':
2331 2331 return data
2332 2332 elif t == b'u':
2333 2333 return util.buffer(data, 1)
2334 2334
2335 2335 compressor = self._get_decompressor(t)
2336 2336
2337 2337 return compressor.decompress(data)
2338 2338
2339 2339 def _addrevision(
2340 2340 self,
2341 2341 node,
2342 2342 rawtext,
2343 2343 transaction,
2344 2344 link,
2345 2345 p1,
2346 2346 p2,
2347 2347 flags,
2348 2348 cachedelta,
2349 2349 alwayscache=False,
2350 2350 deltacomputer=None,
2351 2351 sidedata=None,
2352 2352 ):
2353 2353 """internal function to add revisions to the log
2354 2354
2355 2355 see addrevision for argument descriptions.
2356 2356
2357 2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2358 2358
2359 2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2360 2360 be used.
2361 2361
2362 2362 invariants:
2363 2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2364 2364 if both are set, they must correspond to each other.
2365 2365 """
2366 2366 if node == self.nullid:
2367 2367 raise error.RevlogError(
2368 2368 _(b"%s: attempt to add null revision") % self.display_id
2369 2369 )
2370 2370 if (
2371 2371 node == self.nodeconstants.wdirid
2372 2372 or node in self.nodeconstants.wdirfilenodeids
2373 2373 ):
2374 2374 raise error.RevlogError(
2375 2375 _(b"%s: attempt to add wdir revision") % self.display_id
2376 2376 )
2377 2377 if self._writinghandles is None:
2378 2378 msg = b'adding revision outside `revlog._writing` context'
2379 2379 raise error.ProgrammingError(msg)
2380 2380
2381 2381 if self._inline:
2382 2382 fh = self._writinghandles[0]
2383 2383 else:
2384 2384 fh = self._writinghandles[1]
2385 2385
2386 2386 btext = [rawtext]
2387 2387
2388 2388 curr = len(self)
2389 2389 prev = curr - 1
2390 2390
2391 2391 offset = self._get_data_offset(prev)
2392 2392
2393 2393 if self._concurrencychecker:
2394 2394 ifh, dfh, sdfh = self._writinghandles
2395 2395 # XXX no checking for the sidedata file
2396 2396 if self._inline:
2397 2397 # offset is "as if" it were in the .d file, so we need to add on
2398 2398 # the size of the entry metadata.
2399 2399 self._concurrencychecker(
2400 2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2401 2401 )
2402 2402 else:
2403 2403 # Entries in the .i are a consistent size.
2404 2404 self._concurrencychecker(
2405 2405 ifh, self._indexfile, curr * self.index.entry_size
2406 2406 )
2407 2407 self._concurrencychecker(dfh, self._datafile, offset)
2408 2408
2409 2409 p1r, p2r = self.rev(p1), self.rev(p2)
2410 2410
2411 2411 # full versions are inserted when the needed deltas
2412 2412 # become comparable to the uncompressed text
2413 2413 if rawtext is None:
2414 2414 # need rawtext size, before changed by flag processors, which is
2415 2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2416 2416 # logic that might remove metadata size.
2417 2417 textlen = mdiff.patchedsize(
2418 2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2419 2419 )
2420 2420 else:
2421 2421 textlen = len(rawtext)
2422 2422
2423 2423 if deltacomputer is None:
2424 2424 deltacomputer = deltautil.deltacomputer(self)
2425 2425
2426 2426 revinfo = revlogutils.revisioninfo(
2427 2427 node,
2428 2428 p1,
2429 2429 p2,
2430 2430 btext,
2431 2431 textlen,
2432 2432 cachedelta,
2433 2433 flags,
2434 2434 )
2435 2435
2436 2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2437 2437
2438 2438 compression_mode = COMP_MODE_INLINE
2439 2439 if self._docket is not None:
2440 2440 h, d = deltainfo.data
2441 2441 if not h and not d:
2442 2442 # not data to store at all... declare them uncompressed
2443 2443 compression_mode = COMP_MODE_PLAIN
2444 2444 elif not h:
2445 2445 t = d[0:1]
2446 2446 if t == b'\0':
2447 2447 compression_mode = COMP_MODE_PLAIN
2448 2448 elif t == self._docket.default_compression_header:
2449 2449 compression_mode = COMP_MODE_DEFAULT
2450 2450 elif h == b'u':
2451 2451 # we have a more efficient way to declare uncompressed
2452 2452 h = b''
2453 2453 compression_mode = COMP_MODE_PLAIN
2454 2454 deltainfo = deltautil.drop_u_compression(deltainfo)
2455 2455
2456 2456 sidedata_compression_mode = COMP_MODE_INLINE
2457 2457 if sidedata and self.hassidedata:
2458 2458 sidedata_compression_mode = COMP_MODE_PLAIN
2459 2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2460 2460 sidedata_offset = self._docket.sidedata_end
2461 2461 h, comp_sidedata = self.compress(serialized_sidedata)
2462 2462 if (
2463 2463 h != b'u'
2464 2464 and comp_sidedata[0:1] != b'\0'
2465 2465 and len(comp_sidedata) < len(serialized_sidedata)
2466 2466 ):
2467 2467 assert not h
2468 2468 if (
2469 2469 comp_sidedata[0:1]
2470 2470 == self._docket.default_compression_header
2471 2471 ):
2472 2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2473 2473 serialized_sidedata = comp_sidedata
2474 2474 else:
2475 2475 sidedata_compression_mode = COMP_MODE_INLINE
2476 2476 serialized_sidedata = comp_sidedata
2477 2477 else:
2478 2478 serialized_sidedata = b""
2479 2479 # Don't store the offset if the sidedata is empty, that way
2480 2480 # we can easily detect empty sidedata and they will be no different
2481 2481 # than ones we manually add.
2482 2482 sidedata_offset = 0
2483 2483
2484 2484 e = revlogutils.entry(
2485 2485 flags=flags,
2486 2486 data_offset=offset,
2487 2487 data_compressed_length=deltainfo.deltalen,
2488 2488 data_uncompressed_length=textlen,
2489 2489 data_compression_mode=compression_mode,
2490 2490 data_delta_base=deltainfo.base,
2491 2491 link_rev=link,
2492 2492 parent_rev_1=p1r,
2493 2493 parent_rev_2=p2r,
2494 2494 node_id=node,
2495 2495 sidedata_offset=sidedata_offset,
2496 2496 sidedata_compressed_length=len(serialized_sidedata),
2497 2497 sidedata_compression_mode=sidedata_compression_mode,
2498 2498 )
2499 2499
2500 2500 self.index.append(e)
2501 2501 entry = self.index.entry_binary(curr)
2502 2502 if curr == 0 and self._docket is None:
2503 2503 header = self._format_flags | self._format_version
2504 2504 header = self.index.pack_header(header)
2505 2505 entry = header + entry
2506 2506 self._writeentry(
2507 2507 transaction,
2508 2508 entry,
2509 2509 deltainfo.data,
2510 2510 link,
2511 2511 offset,
2512 2512 serialized_sidedata,
2513 2513 sidedata_offset,
2514 2514 )
2515 2515
2516 2516 rawtext = btext[0]
2517 2517
2518 2518 if alwayscache and rawtext is None:
2519 2519 rawtext = deltacomputer.buildtext(revinfo, fh)
2520 2520
2521 2521 if type(rawtext) == bytes: # only accept immutable objects
2522 2522 self._revisioncache = (node, curr, rawtext)
2523 2523 self._chainbasecache[curr] = deltainfo.chainbase
2524 2524 return curr
2525 2525
2526 2526 def _get_data_offset(self, prev):
2527 2527 """Returns the current offset in the (in-transaction) data file.
2528 2528 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2529 2529 file to store that information: since sidedata can be rewritten to the
2530 2530 end of the data file within a transaction, you can have cases where, for
2531 2531 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2532 2532 to `n - 1`'s sidedata being written after `n`'s data.
2533 2533
2534 2534 TODO cache this in a docket file before getting out of experimental."""
2535 2535 if self._docket is None:
2536 2536 return self.end(prev)
2537 2537 else:
2538 2538 return self._docket.data_end
2539 2539
2540 2540 def _writeentry(
2541 2541 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2542 2542 ):
2543 2543 # Files opened in a+ mode have inconsistent behavior on various
2544 2544 # platforms. Windows requires that a file positioning call be made
2545 2545 # when the file handle transitions between reads and writes. See
2546 2546 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2547 2547 # platforms, Python or the platform itself can be buggy. Some versions
2548 2548 # of Solaris have been observed to not append at the end of the file
2549 2549 # if the file was seeked to before the end. See issue4943 for more.
2550 2550 #
2551 2551 # We work around this issue by inserting a seek() before writing.
2552 2552 # Note: This is likely not necessary on Python 3. However, because
2553 2553 # the file handle is reused for reads and may be seeked there, we need
2554 2554 # to be careful before changing this.
2555 2555 if self._writinghandles is None:
2556 2556 msg = b'adding revision outside `revlog._writing` context'
2557 2557 raise error.ProgrammingError(msg)
2558 2558 ifh, dfh, sdfh = self._writinghandles
2559 2559 if self._docket is None:
2560 2560 ifh.seek(0, os.SEEK_END)
2561 2561 else:
2562 2562 ifh.seek(self._docket.index_end, os.SEEK_SET)
2563 2563 if dfh:
2564 2564 if self._docket is None:
2565 2565 dfh.seek(0, os.SEEK_END)
2566 2566 else:
2567 2567 dfh.seek(self._docket.data_end, os.SEEK_SET)
2568 2568 if sdfh:
2569 2569 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2570 2570
2571 2571 curr = len(self) - 1
2572 2572 if not self._inline:
2573 2573 transaction.add(self._datafile, offset)
2574 2574 if self._sidedatafile:
2575 2575 transaction.add(self._sidedatafile, sidedata_offset)
2576 2576 transaction.add(self._indexfile, curr * len(entry))
2577 2577 if data[0]:
2578 2578 dfh.write(data[0])
2579 2579 dfh.write(data[1])
2580 2580 if sidedata:
2581 2581 sdfh.write(sidedata)
2582 2582 ifh.write(entry)
2583 2583 else:
2584 2584 offset += curr * self.index.entry_size
2585 2585 transaction.add(self._indexfile, offset)
2586 2586 ifh.write(entry)
2587 2587 ifh.write(data[0])
2588 2588 ifh.write(data[1])
2589 2589 assert not sidedata
2590 2590 self._enforceinlinesize(transaction)
2591 2591 if self._docket is not None:
2592 2592 self._docket.index_end = self._writinghandles[0].tell()
2593 2593 self._docket.data_end = self._writinghandles[1].tell()
2594 2594 self._docket.sidedata_end = self._writinghandles[2].tell()
2595 2595
2596 2596 nodemaputil.setup_persistent_nodemap(transaction, self)
2597 2597
2598 2598 def addgroup(
2599 2599 self,
2600 2600 deltas,
2601 2601 linkmapper,
2602 2602 transaction,
2603 2603 alwayscache=False,
2604 2604 addrevisioncb=None,
2605 2605 duplicaterevisioncb=None,
2606 2606 ):
2607 2607 """
2608 2608 add a delta group
2609 2609
2610 2610 given a set of deltas, add them to the revision log. the
2611 2611 first delta is against its parent, which should be in our
2612 2612 log, the rest are against the previous delta.
2613 2613
2614 2614 If ``addrevisioncb`` is defined, it will be called with arguments of
2615 2615 this revlog and the node that was added.
2616 2616 """
2617 2617
2618 2618 if self._adding_group:
2619 2619 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2620 2620
2621 2621 self._adding_group = True
2622 2622 empty = True
2623 2623 try:
2624 2624 with self._writing(transaction):
2625 2625 deltacomputer = deltautil.deltacomputer(self)
2626 2626 # loop through our set of deltas
2627 2627 for data in deltas:
2628 2628 (
2629 2629 node,
2630 2630 p1,
2631 2631 p2,
2632 2632 linknode,
2633 2633 deltabase,
2634 2634 delta,
2635 2635 flags,
2636 2636 sidedata,
2637 2637 ) = data
2638 2638 link = linkmapper(linknode)
2639 2639 flags = flags or REVIDX_DEFAULT_FLAGS
2640 2640
2641 2641 rev = self.index.get_rev(node)
2642 2642 if rev is not None:
2643 2643 # this can happen if two branches make the same change
2644 2644 self._nodeduplicatecallback(transaction, rev)
2645 2645 if duplicaterevisioncb:
2646 2646 duplicaterevisioncb(self, rev)
2647 2647 empty = False
2648 2648 continue
2649 2649
2650 2650 for p in (p1, p2):
2651 2651 if not self.index.has_node(p):
2652 2652 raise error.LookupError(
2653 2653 p, self.radix, _(b'unknown parent')
2654 2654 )
2655 2655
2656 2656 if not self.index.has_node(deltabase):
2657 2657 raise error.LookupError(
2658 2658 deltabase, self.display_id, _(b'unknown delta base')
2659 2659 )
2660 2660
2661 2661 baserev = self.rev(deltabase)
2662 2662
2663 2663 if baserev != nullrev and self.iscensored(baserev):
2664 2664 # if base is censored, delta must be full replacement in a
2665 2665 # single patch operation
2666 2666 hlen = struct.calcsize(b">lll")
2667 2667 oldlen = self.rawsize(baserev)
2668 2668 newlen = len(delta) - hlen
2669 2669 if delta[:hlen] != mdiff.replacediffheader(
2670 2670 oldlen, newlen
2671 2671 ):
2672 2672 raise error.CensoredBaseError(
2673 2673 self.display_id, self.node(baserev)
2674 2674 )
2675 2675
2676 2676 if not flags and self._peek_iscensored(baserev, delta):
2677 2677 flags |= REVIDX_ISCENSORED
2678 2678
2679 2679 # We assume consumers of addrevisioncb will want to retrieve
2680 2680 # the added revision, which will require a call to
2681 2681 # revision(). revision() will fast path if there is a cache
2682 2682 # hit. So, we tell _addrevision() to always cache in this case.
2683 2683 # We're only using addgroup() in the context of changegroup
2684 2684 # generation so the revision data can always be handled as raw
2685 2685 # by the flagprocessor.
2686 2686 rev = self._addrevision(
2687 2687 node,
2688 2688 None,
2689 2689 transaction,
2690 2690 link,
2691 2691 p1,
2692 2692 p2,
2693 2693 flags,
2694 2694 (baserev, delta),
2695 2695 alwayscache=alwayscache,
2696 2696 deltacomputer=deltacomputer,
2697 2697 sidedata=sidedata,
2698 2698 )
2699 2699
2700 2700 if addrevisioncb:
2701 2701 addrevisioncb(self, rev)
2702 2702 empty = False
2703 2703 finally:
2704 2704 self._adding_group = False
2705 2705 return not empty
2706 2706
2707 2707 def iscensored(self, rev):
2708 2708 """Check if a file revision is censored."""
2709 2709 if not self._censorable:
2710 2710 return False
2711 2711
2712 2712 return self.flags(rev) & REVIDX_ISCENSORED
2713 2713
2714 2714 def _peek_iscensored(self, baserev, delta):
2715 2715 """Quickly check if a delta produces a censored revision."""
2716 2716 if not self._censorable:
2717 2717 return False
2718 2718
2719 2719 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2720 2720
2721 2721 def getstrippoint(self, minlink):
2722 2722 """find the minimum rev that must be stripped to strip the linkrev
2723 2723
2724 2724 Returns a tuple containing the minimum rev and a set of all revs that
2725 2725 have linkrevs that will be broken by this strip.
2726 2726 """
2727 2727 return storageutil.resolvestripinfo(
2728 2728 minlink,
2729 2729 len(self) - 1,
2730 2730 self.headrevs(),
2731 2731 self.linkrev,
2732 2732 self.parentrevs,
2733 2733 )
2734 2734
2735 2735 def strip(self, minlink, transaction):
2736 2736 """truncate the revlog on the first revision with a linkrev >= minlink
2737 2737
2738 2738 This function is called when we're stripping revision minlink and
2739 2739 its descendants from the repository.
2740 2740
2741 2741 We have to remove all revisions with linkrev >= minlink, because
2742 2742 the equivalent changelog revisions will be renumbered after the
2743 2743 strip.
2744 2744
2745 2745 So we truncate the revlog on the first of these revisions, and
2746 2746 trust that the caller has saved the revisions that shouldn't be
2747 2747 removed and that it'll re-add them after this truncation.
2748 2748 """
2749 2749 if len(self) == 0:
2750 2750 return
2751 2751
2752 2752 rev, _ = self.getstrippoint(minlink)
2753 2753 if rev == len(self):
2754 2754 return
2755 2755
2756 2756 # first truncate the files on disk
2757 2757 data_end = self.start(rev)
2758 2758 if not self._inline:
2759 2759 transaction.add(self._datafile, data_end)
2760 2760 end = rev * self.index.entry_size
2761 2761 else:
2762 2762 end = data_end + (rev * self.index.entry_size)
2763 2763
2764 2764 if self._sidedatafile:
2765 2765 sidedata_end = self.sidedata_cut_off(rev)
2766 2766 transaction.add(self._sidedatafile, sidedata_end)
2767 2767
2768 2768 transaction.add(self._indexfile, end)
2769 2769 if self._docket is not None:
2770 2770 # XXX we could, leverage the docket while stripping. However it is
2771 2771 # not powerfull enough at the time of this comment
2772 2772 self._docket.index_end = end
2773 2773 self._docket.data_end = data_end
2774 2774 self._docket.sidedata_end = sidedata_end
2775 2775 self._docket.write(transaction, stripping=True)
2776 2776
2777 2777 # then reset internal state in memory to forget those revisions
2778 2778 self._revisioncache = None
2779 2779 self._chaininfocache = util.lrucachedict(500)
2780 2780 self._segmentfile.clear_cache()
2781 self._segmentfile_sidedata.clear_cache()
2781 2782
2782 2783 del self.index[rev:-1]
2783 2784
2784 2785 def checksize(self):
2785 2786 """Check size of index and data files
2786 2787
2787 2788 return a (dd, di) tuple.
2788 2789 - dd: extra bytes for the "data" file
2789 2790 - di: extra bytes for the "index" file
2790 2791
2791 2792 A healthy revlog will return (0, 0).
2792 2793 """
2793 2794 expected = 0
2794 2795 if len(self):
2795 2796 expected = max(0, self.end(len(self) - 1))
2796 2797
2797 2798 try:
2798 2799 with self._datafp() as f:
2799 2800 f.seek(0, io.SEEK_END)
2800 2801 actual = f.tell()
2801 2802 dd = actual - expected
2802 2803 except IOError as inst:
2803 2804 if inst.errno != errno.ENOENT:
2804 2805 raise
2805 2806 dd = 0
2806 2807
2807 2808 try:
2808 2809 f = self.opener(self._indexfile)
2809 2810 f.seek(0, io.SEEK_END)
2810 2811 actual = f.tell()
2811 2812 f.close()
2812 2813 s = self.index.entry_size
2813 2814 i = max(0, actual // s)
2814 2815 di = actual - (i * s)
2815 2816 if self._inline:
2816 2817 databytes = 0
2817 2818 for r in self:
2818 2819 databytes += max(0, self.length(r))
2819 2820 dd = 0
2820 2821 di = actual - len(self) * s - databytes
2821 2822 except IOError as inst:
2822 2823 if inst.errno != errno.ENOENT:
2823 2824 raise
2824 2825 di = 0
2825 2826
2826 2827 return (dd, di)
2827 2828
2828 2829 def files(self):
2829 2830 res = [self._indexfile]
2830 2831 if not self._inline:
2831 2832 res.append(self._datafile)
2832 2833 return res
2833 2834
2834 2835 def emitrevisions(
2835 2836 self,
2836 2837 nodes,
2837 2838 nodesorder=None,
2838 2839 revisiondata=False,
2839 2840 assumehaveparentrevisions=False,
2840 2841 deltamode=repository.CG_DELTAMODE_STD,
2841 2842 sidedata_helpers=None,
2842 2843 ):
2843 2844 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2844 2845 raise error.ProgrammingError(
2845 2846 b'unhandled value for nodesorder: %s' % nodesorder
2846 2847 )
2847 2848
2848 2849 if nodesorder is None and not self._generaldelta:
2849 2850 nodesorder = b'storage'
2850 2851
2851 2852 if (
2852 2853 not self._storedeltachains
2853 2854 and deltamode != repository.CG_DELTAMODE_PREV
2854 2855 ):
2855 2856 deltamode = repository.CG_DELTAMODE_FULL
2856 2857
2857 2858 return storageutil.emitrevisions(
2858 2859 self,
2859 2860 nodes,
2860 2861 nodesorder,
2861 2862 revlogrevisiondelta,
2862 2863 deltaparentfn=self.deltaparent,
2863 2864 candeltafn=self.candelta,
2864 2865 rawsizefn=self.rawsize,
2865 2866 revdifffn=self.revdiff,
2866 2867 flagsfn=self.flags,
2867 2868 deltamode=deltamode,
2868 2869 revisiondata=revisiondata,
2869 2870 assumehaveparentrevisions=assumehaveparentrevisions,
2870 2871 sidedata_helpers=sidedata_helpers,
2871 2872 )
2872 2873
2873 2874 DELTAREUSEALWAYS = b'always'
2874 2875 DELTAREUSESAMEREVS = b'samerevs'
2875 2876 DELTAREUSENEVER = b'never'
2876 2877
2877 2878 DELTAREUSEFULLADD = b'fulladd'
2878 2879
2879 2880 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2880 2881
2881 2882 def clone(
2882 2883 self,
2883 2884 tr,
2884 2885 destrevlog,
2885 2886 addrevisioncb=None,
2886 2887 deltareuse=DELTAREUSESAMEREVS,
2887 2888 forcedeltabothparents=None,
2888 2889 sidedata_helpers=None,
2889 2890 ):
2890 2891 """Copy this revlog to another, possibly with format changes.
2891 2892
2892 2893 The destination revlog will contain the same revisions and nodes.
2893 2894 However, it may not be bit-for-bit identical due to e.g. delta encoding
2894 2895 differences.
2895 2896
2896 2897 The ``deltareuse`` argument control how deltas from the existing revlog
2897 2898 are preserved in the destination revlog. The argument can have the
2898 2899 following values:
2899 2900
2900 2901 DELTAREUSEALWAYS
2901 2902 Deltas will always be reused (if possible), even if the destination
2902 2903 revlog would not select the same revisions for the delta. This is the
2903 2904 fastest mode of operation.
2904 2905 DELTAREUSESAMEREVS
2905 2906 Deltas will be reused if the destination revlog would pick the same
2906 2907 revisions for the delta. This mode strikes a balance between speed
2907 2908 and optimization.
2908 2909 DELTAREUSENEVER
2909 2910 Deltas will never be reused. This is the slowest mode of execution.
2910 2911 This mode can be used to recompute deltas (e.g. if the diff/delta
2911 2912 algorithm changes).
2912 2913 DELTAREUSEFULLADD
2913 2914 Revision will be re-added as if their were new content. This is
2914 2915 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2915 2916 eg: large file detection and handling.
2916 2917
2917 2918 Delta computation can be slow, so the choice of delta reuse policy can
2918 2919 significantly affect run time.
2919 2920
2920 2921 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2921 2922 two extremes. Deltas will be reused if they are appropriate. But if the
2922 2923 delta could choose a better revision, it will do so. This means if you
2923 2924 are converting a non-generaldelta revlog to a generaldelta revlog,
2924 2925 deltas will be recomputed if the delta's parent isn't a parent of the
2925 2926 revision.
2926 2927
2927 2928 In addition to the delta policy, the ``forcedeltabothparents``
2928 2929 argument controls whether to force compute deltas against both parents
2929 2930 for merges. By default, the current default is used.
2930 2931
2931 2932 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2932 2933 `sidedata_helpers`.
2933 2934 """
2934 2935 if deltareuse not in self.DELTAREUSEALL:
2935 2936 raise ValueError(
2936 2937 _(b'value for deltareuse invalid: %s') % deltareuse
2937 2938 )
2938 2939
2939 2940 if len(destrevlog):
2940 2941 raise ValueError(_(b'destination revlog is not empty'))
2941 2942
2942 2943 if getattr(self, 'filteredrevs', None):
2943 2944 raise ValueError(_(b'source revlog has filtered revisions'))
2944 2945 if getattr(destrevlog, 'filteredrevs', None):
2945 2946 raise ValueError(_(b'destination revlog has filtered revisions'))
2946 2947
2947 2948 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2948 2949 # if possible.
2949 2950 oldlazydelta = destrevlog._lazydelta
2950 2951 oldlazydeltabase = destrevlog._lazydeltabase
2951 2952 oldamd = destrevlog._deltabothparents
2952 2953
2953 2954 try:
2954 2955 if deltareuse == self.DELTAREUSEALWAYS:
2955 2956 destrevlog._lazydeltabase = True
2956 2957 destrevlog._lazydelta = True
2957 2958 elif deltareuse == self.DELTAREUSESAMEREVS:
2958 2959 destrevlog._lazydeltabase = False
2959 2960 destrevlog._lazydelta = True
2960 2961 elif deltareuse == self.DELTAREUSENEVER:
2961 2962 destrevlog._lazydeltabase = False
2962 2963 destrevlog._lazydelta = False
2963 2964
2964 2965 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2965 2966
2966 2967 self._clone(
2967 2968 tr,
2968 2969 destrevlog,
2969 2970 addrevisioncb,
2970 2971 deltareuse,
2971 2972 forcedeltabothparents,
2972 2973 sidedata_helpers,
2973 2974 )
2974 2975
2975 2976 finally:
2976 2977 destrevlog._lazydelta = oldlazydelta
2977 2978 destrevlog._lazydeltabase = oldlazydeltabase
2978 2979 destrevlog._deltabothparents = oldamd
2979 2980
2980 2981 def _clone(
2981 2982 self,
2982 2983 tr,
2983 2984 destrevlog,
2984 2985 addrevisioncb,
2985 2986 deltareuse,
2986 2987 forcedeltabothparents,
2987 2988 sidedata_helpers,
2988 2989 ):
2989 2990 """perform the core duty of `revlog.clone` after parameter processing"""
2990 2991 deltacomputer = deltautil.deltacomputer(destrevlog)
2991 2992 index = self.index
2992 2993 for rev in self:
2993 2994 entry = index[rev]
2994 2995
2995 2996 # Some classes override linkrev to take filtered revs into
2996 2997 # account. Use raw entry from index.
2997 2998 flags = entry[0] & 0xFFFF
2998 2999 linkrev = entry[4]
2999 3000 p1 = index[entry[5]][7]
3000 3001 p2 = index[entry[6]][7]
3001 3002 node = entry[7]
3002 3003
3003 3004 # (Possibly) reuse the delta from the revlog if allowed and
3004 3005 # the revlog chunk is a delta.
3005 3006 cachedelta = None
3006 3007 rawtext = None
3007 3008 if deltareuse == self.DELTAREUSEFULLADD:
3008 3009 text = self._revisiondata(rev)
3009 3010 sidedata = self.sidedata(rev)
3010 3011
3011 3012 if sidedata_helpers is not None:
3012 3013 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3013 3014 self, sidedata_helpers, sidedata, rev
3014 3015 )
3015 3016 flags = flags | new_flags[0] & ~new_flags[1]
3016 3017
3017 3018 destrevlog.addrevision(
3018 3019 text,
3019 3020 tr,
3020 3021 linkrev,
3021 3022 p1,
3022 3023 p2,
3023 3024 cachedelta=cachedelta,
3024 3025 node=node,
3025 3026 flags=flags,
3026 3027 deltacomputer=deltacomputer,
3027 3028 sidedata=sidedata,
3028 3029 )
3029 3030 else:
3030 3031 if destrevlog._lazydelta:
3031 3032 dp = self.deltaparent(rev)
3032 3033 if dp != nullrev:
3033 3034 cachedelta = (dp, bytes(self._chunk(rev)))
3034 3035
3035 3036 sidedata = None
3036 3037 if not cachedelta:
3037 3038 rawtext = self._revisiondata(rev)
3038 3039 sidedata = self.sidedata(rev)
3039 3040 if sidedata is None:
3040 3041 sidedata = self.sidedata(rev)
3041 3042
3042 3043 if sidedata_helpers is not None:
3043 3044 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3044 3045 self, sidedata_helpers, sidedata, rev
3045 3046 )
3046 3047 flags = flags | new_flags[0] & ~new_flags[1]
3047 3048
3048 3049 with destrevlog._writing(tr):
3049 3050 destrevlog._addrevision(
3050 3051 node,
3051 3052 rawtext,
3052 3053 tr,
3053 3054 linkrev,
3054 3055 p1,
3055 3056 p2,
3056 3057 flags,
3057 3058 cachedelta,
3058 3059 deltacomputer=deltacomputer,
3059 3060 sidedata=sidedata,
3060 3061 )
3061 3062
3062 3063 if addrevisioncb:
3063 3064 addrevisioncb(self, rev, node)
3064 3065
3065 3066 def censorrevision(self, tr, censornode, tombstone=b''):
3066 3067 if self._format_version == REVLOGV0:
3067 3068 raise error.RevlogError(
3068 3069 _(b'cannot censor with version %d revlogs')
3069 3070 % self._format_version
3070 3071 )
3071 3072 elif self._format_version == REVLOGV1:
3072 3073 censor.v1_censor(self, tr, censornode, tombstone)
3073 3074 else:
3074 3075 # revlog v2
3075 3076 raise error.RevlogError(
3076 3077 _(b'cannot censor with version %d revlogs')
3077 3078 % self._format_version
3078 3079 )
3079 3080
3080 3081 def verifyintegrity(self, state):
3081 3082 """Verifies the integrity of the revlog.
3082 3083
3083 3084 Yields ``revlogproblem`` instances describing problems that are
3084 3085 found.
3085 3086 """
3086 3087 dd, di = self.checksize()
3087 3088 if dd:
3088 3089 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3089 3090 if di:
3090 3091 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3091 3092
3092 3093 version = self._format_version
3093 3094
3094 3095 # The verifier tells us what version revlog we should be.
3095 3096 if version != state[b'expectedversion']:
3096 3097 yield revlogproblem(
3097 3098 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3098 3099 % (self.display_id, version, state[b'expectedversion'])
3099 3100 )
3100 3101
3101 3102 state[b'skipread'] = set()
3102 3103 state[b'safe_renamed'] = set()
3103 3104
3104 3105 for rev in self:
3105 3106 node = self.node(rev)
3106 3107
3107 3108 # Verify contents. 4 cases to care about:
3108 3109 #
3109 3110 # common: the most common case
3110 3111 # rename: with a rename
3111 3112 # meta: file content starts with b'\1\n', the metadata
3112 3113 # header defined in filelog.py, but without a rename
3113 3114 # ext: content stored externally
3114 3115 #
3115 3116 # More formally, their differences are shown below:
3116 3117 #
3117 3118 # | common | rename | meta | ext
3118 3119 # -------------------------------------------------------
3119 3120 # flags() | 0 | 0 | 0 | not 0
3120 3121 # renamed() | False | True | False | ?
3121 3122 # rawtext[0:2]=='\1\n'| False | True | True | ?
3122 3123 #
3123 3124 # "rawtext" means the raw text stored in revlog data, which
3124 3125 # could be retrieved by "rawdata(rev)". "text"
3125 3126 # mentioned below is "revision(rev)".
3126 3127 #
3127 3128 # There are 3 different lengths stored physically:
3128 3129 # 1. L1: rawsize, stored in revlog index
3129 3130 # 2. L2: len(rawtext), stored in revlog data
3130 3131 # 3. L3: len(text), stored in revlog data if flags==0, or
3131 3132 # possibly somewhere else if flags!=0
3132 3133 #
3133 3134 # L1 should be equal to L2. L3 could be different from them.
3134 3135 # "text" may or may not affect commit hash depending on flag
3135 3136 # processors (see flagutil.addflagprocessor).
3136 3137 #
3137 3138 # | common | rename | meta | ext
3138 3139 # -------------------------------------------------
3139 3140 # rawsize() | L1 | L1 | L1 | L1
3140 3141 # size() | L1 | L2-LM | L1(*) | L1 (?)
3141 3142 # len(rawtext) | L2 | L2 | L2 | L2
3142 3143 # len(text) | L2 | L2 | L2 | L3
3143 3144 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3144 3145 #
3145 3146 # LM: length of metadata, depending on rawtext
3146 3147 # (*): not ideal, see comment in filelog.size
3147 3148 # (?): could be "- len(meta)" if the resolved content has
3148 3149 # rename metadata
3149 3150 #
3150 3151 # Checks needed to be done:
3151 3152 # 1. length check: L1 == L2, in all cases.
3152 3153 # 2. hash check: depending on flag processor, we may need to
3153 3154 # use either "text" (external), or "rawtext" (in revlog).
3154 3155
3155 3156 try:
3156 3157 skipflags = state.get(b'skipflags', 0)
3157 3158 if skipflags:
3158 3159 skipflags &= self.flags(rev)
3159 3160
3160 3161 _verify_revision(self, skipflags, state, node)
3161 3162
3162 3163 l1 = self.rawsize(rev)
3163 3164 l2 = len(self.rawdata(node))
3164 3165
3165 3166 if l1 != l2:
3166 3167 yield revlogproblem(
3167 3168 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3168 3169 node=node,
3169 3170 )
3170 3171
3171 3172 except error.CensoredNodeError:
3172 3173 if state[b'erroroncensored']:
3173 3174 yield revlogproblem(
3174 3175 error=_(b'censored file data'), node=node
3175 3176 )
3176 3177 state[b'skipread'].add(node)
3177 3178 except Exception as e:
3178 3179 yield revlogproblem(
3179 3180 error=_(b'unpacking %s: %s')
3180 3181 % (short(node), stringutil.forcebytestr(e)),
3181 3182 node=node,
3182 3183 )
3183 3184 state[b'skipread'].add(node)
3184 3185
3185 3186 def storageinfo(
3186 3187 self,
3187 3188 exclusivefiles=False,
3188 3189 sharedfiles=False,
3189 3190 revisionscount=False,
3190 3191 trackedsize=False,
3191 3192 storedsize=False,
3192 3193 ):
3193 3194 d = {}
3194 3195
3195 3196 if exclusivefiles:
3196 3197 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3197 3198 if not self._inline:
3198 3199 d[b'exclusivefiles'].append((self.opener, self._datafile))
3199 3200
3200 3201 if sharedfiles:
3201 3202 d[b'sharedfiles'] = []
3202 3203
3203 3204 if revisionscount:
3204 3205 d[b'revisionscount'] = len(self)
3205 3206
3206 3207 if trackedsize:
3207 3208 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3208 3209
3209 3210 if storedsize:
3210 3211 d[b'storedsize'] = sum(
3211 3212 self.opener.stat(path).st_size for path in self.files()
3212 3213 )
3213 3214
3214 3215 return d
3215 3216
3216 3217 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3217 3218 if not self.hassidedata:
3218 3219 return
3219 3220 # revlog formats with sidedata support does not support inline
3220 3221 assert not self._inline
3221 3222 if not helpers[1] and not helpers[2]:
3222 3223 # Nothing to generate or remove
3223 3224 return
3224 3225
3225 3226 new_entries = []
3226 3227 # append the new sidedata
3227 3228 with self._writing(transaction):
3228 3229 ifh, dfh, sdfh = self._writinghandles
3229 3230 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3230 3231
3231 3232 current_offset = sdfh.tell()
3232 3233 for rev in range(startrev, endrev + 1):
3233 3234 entry = self.index[rev]
3234 3235 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3235 3236 store=self,
3236 3237 sidedata_helpers=helpers,
3237 3238 sidedata={},
3238 3239 rev=rev,
3239 3240 )
3240 3241
3241 3242 serialized_sidedata = sidedatautil.serialize_sidedata(
3242 3243 new_sidedata
3243 3244 )
3244 3245
3245 3246 sidedata_compression_mode = COMP_MODE_INLINE
3246 3247 if serialized_sidedata and self.hassidedata:
3247 3248 sidedata_compression_mode = COMP_MODE_PLAIN
3248 3249 h, comp_sidedata = self.compress(serialized_sidedata)
3249 3250 if (
3250 3251 h != b'u'
3251 3252 and comp_sidedata[0] != b'\0'
3252 3253 and len(comp_sidedata) < len(serialized_sidedata)
3253 3254 ):
3254 3255 assert not h
3255 3256 if (
3256 3257 comp_sidedata[0]
3257 3258 == self._docket.default_compression_header
3258 3259 ):
3259 3260 sidedata_compression_mode = COMP_MODE_DEFAULT
3260 3261 serialized_sidedata = comp_sidedata
3261 3262 else:
3262 3263 sidedata_compression_mode = COMP_MODE_INLINE
3263 3264 serialized_sidedata = comp_sidedata
3264 3265 if entry[8] != 0 or entry[9] != 0:
3265 3266 # rewriting entries that already have sidedata is not
3266 3267 # supported yet, because it introduces garbage data in the
3267 3268 # revlog.
3268 3269 msg = b"rewriting existing sidedata is not supported yet"
3269 3270 raise error.Abort(msg)
3270 3271
3271 3272 # Apply (potential) flags to add and to remove after running
3272 3273 # the sidedata helpers
3273 3274 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3274 3275 entry_update = (
3275 3276 current_offset,
3276 3277 len(serialized_sidedata),
3277 3278 new_offset_flags,
3278 3279 sidedata_compression_mode,
3279 3280 )
3280 3281
3281 3282 # the sidedata computation might have move the file cursors around
3282 3283 sdfh.seek(current_offset, os.SEEK_SET)
3283 3284 sdfh.write(serialized_sidedata)
3284 3285 new_entries.append(entry_update)
3285 3286 current_offset += len(serialized_sidedata)
3286 3287 self._docket.sidedata_end = sdfh.tell()
3287 3288
3288 3289 # rewrite the new index entries
3289 3290 ifh.seek(startrev * self.index.entry_size)
3290 3291 for i, e in enumerate(new_entries):
3291 3292 rev = startrev + i
3292 3293 self.index.replace_sidedata_info(rev, *e)
3293 3294 packed = self.index.entry_binary(rev)
3294 3295 if rev == 0 and self._docket is None:
3295 3296 header = self._format_flags | self._format_version
3296 3297 header = self.index.pack_header(header)
3297 3298 packed = header + packed
3298 3299 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now