##// END OF EJS Templates
changelog-delay: adds some check around delaying and diverting write...
marmoute -
r51995:594f9128 default
parent child Browse files
Show More
@@ -1,641 +1,644 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 from .i18n import _
10 10 from .node import (
11 11 bin,
12 12 hex,
13 13 )
14 14 from .thirdparty import attr
15 15
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 metadata,
20 20 pycompat,
21 21 revlog,
22 22 )
23 23 from .utils import (
24 24 dateutil,
25 25 stringutil,
26 26 )
27 27 from .revlogutils import (
28 28 constants as revlog_constants,
29 29 flagutil,
30 30 )
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 86 return b"\0".join(items)
87 87
88 88
89 89 def stripdesc(desc):
90 90 """strip trailing whitespace and leading and trailing empty lines"""
91 91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 92
93 93
94 94 class appender:
95 95 """the changelog index must be updated last on disk, so we use this class
96 96 to delay writes to it"""
97 97
98 98 def __init__(self, vfs, name, mode, buf):
99 99 self.data = buf
100 100 fp = vfs(name, mode)
101 101 self.fp = fp
102 102 self.offset = fp.tell()
103 103 self.size = vfs.fstat(fp).st_size
104 104 self._end = self.size
105 105
106 106 def end(self):
107 107 return self._end
108 108
109 109 def tell(self):
110 110 return self.offset
111 111
112 112 def flush(self):
113 113 pass
114 114
115 115 @property
116 116 def closed(self):
117 117 return self.fp.closed
118 118
119 119 def close(self):
120 120 self.fp.close()
121 121
122 122 def seek(self, offset, whence=0):
123 123 '''virtual file offset spans real file and data'''
124 124 if whence == 0:
125 125 self.offset = offset
126 126 elif whence == 1:
127 127 self.offset += offset
128 128 elif whence == 2:
129 129 self.offset = self.end() + offset
130 130 if self.offset < self.size:
131 131 self.fp.seek(self.offset)
132 132
133 133 def read(self, count=-1):
134 134 '''only trick here is reads that span real file and data'''
135 135 ret = b""
136 136 if self.offset < self.size:
137 137 s = self.fp.read(count)
138 138 ret = s
139 139 self.offset += len(s)
140 140 if count > 0:
141 141 count -= len(s)
142 142 if count != 0:
143 143 doff = self.offset - self.size
144 144 self.data.insert(0, b"".join(self.data))
145 145 del self.data[1:]
146 146 s = self.data[0][doff : doff + count]
147 147 self.offset += len(s)
148 148 ret += s
149 149 return ret
150 150
151 151 def write(self, s):
152 152 self.data.append(bytes(s))
153 153 self.offset += len(s)
154 154 self._end += len(s)
155 155
156 156 def __enter__(self):
157 157 self.fp.__enter__()
158 158 return self
159 159
160 160 def __exit__(self, *args):
161 161 return self.fp.__exit__(*args)
162 162
163 163
164 164 class _divertopener:
165 165 def __init__(self, opener, target):
166 166 self._opener = opener
167 167 self._target = target
168 168
169 169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 170 if name != self._target:
171 171 return self._opener(name, mode, **kwargs)
172 172 return self._opener(name + b".a", mode, **kwargs)
173 173
174 174 def __getattr__(self, attr):
175 175 return getattr(self._opener, attr)
176 176
177 177
178 178 class _delayopener:
179 179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 180
181 181 def __init__(self, opener, target, buf):
182 182 self._opener = opener
183 183 self._target = target
184 184 self._buf = buf
185 185
186 186 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
187 187 if name != self._target:
188 188 return self._opener(name, mode, **kwargs)
189 189 assert not kwargs
190 190 return appender(self._opener, name, mode, self._buf)
191 191
192 192 def __getattr__(self, attr):
193 193 return getattr(self._opener, attr)
194 194
195 195
196 196 @attr.s
197 197 class _changelogrevision:
198 198 # Extensions might modify _defaultextra, so let the constructor below pass
199 199 # it in
200 200 extra = attr.ib()
201 201 manifest = attr.ib()
202 202 user = attr.ib(default=b'')
203 203 date = attr.ib(default=(0, 0))
204 204 files = attr.ib(default=attr.Factory(list))
205 205 filesadded = attr.ib(default=None)
206 206 filesremoved = attr.ib(default=None)
207 207 p1copies = attr.ib(default=None)
208 208 p2copies = attr.ib(default=None)
209 209 description = attr.ib(default=b'')
210 210 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
211 211
212 212
213 213 class changelogrevision:
214 214 """Holds results of a parsed changelog revision.
215 215
216 216 Changelog revisions consist of multiple pieces of data, including
217 217 the manifest node, user, and date. This object exposes a view into
218 218 the parsed object.
219 219 """
220 220
221 221 __slots__ = (
222 222 '_offsets',
223 223 '_text',
224 224 '_sidedata',
225 225 '_cpsd',
226 226 '_changes',
227 227 )
228 228
229 229 def __new__(cls, cl, text, sidedata, cpsd):
230 230 if not text:
231 231 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
232 232
233 233 self = super(changelogrevision, cls).__new__(cls)
234 234 # We could return here and implement the following as an __init__.
235 235 # But doing it here is equivalent and saves an extra function call.
236 236
237 237 # format used:
238 238 # nodeid\n : manifest node in ascii
239 239 # user\n : user, no \n or \r allowed
240 240 # time tz extra\n : date (time is int or float, timezone is int)
241 241 # : extra is metadata, encoded and separated by '\0'
242 242 # : older versions ignore it
243 243 # files\n\n : files modified by the cset, no \n or \r allowed
244 244 # (.*) : comment (free text, ideally utf-8)
245 245 #
246 246 # changelog v0 doesn't use extra
247 247
248 248 nl1 = text.index(b'\n')
249 249 nl2 = text.index(b'\n', nl1 + 1)
250 250 nl3 = text.index(b'\n', nl2 + 1)
251 251
252 252 # The list of files may be empty. Which means nl3 is the first of the
253 253 # double newline that precedes the description.
254 254 if text[nl3 + 1 : nl3 + 2] == b'\n':
255 255 doublenl = nl3
256 256 else:
257 257 doublenl = text.index(b'\n\n', nl3 + 1)
258 258
259 259 self._offsets = (nl1, nl2, nl3, doublenl)
260 260 self._text = text
261 261 self._sidedata = sidedata
262 262 self._cpsd = cpsd
263 263 self._changes = None
264 264
265 265 return self
266 266
267 267 @property
268 268 def manifest(self):
269 269 return bin(self._text[0 : self._offsets[0]])
270 270
271 271 @property
272 272 def user(self):
273 273 off = self._offsets
274 274 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
275 275
276 276 @property
277 277 def _rawdate(self):
278 278 off = self._offsets
279 279 dateextra = self._text[off[1] + 1 : off[2]]
280 280 return dateextra.split(b' ', 2)[0:2]
281 281
282 282 @property
283 283 def _rawextra(self):
284 284 off = self._offsets
285 285 dateextra = self._text[off[1] + 1 : off[2]]
286 286 fields = dateextra.split(b' ', 2)
287 287 if len(fields) != 3:
288 288 return None
289 289
290 290 return fields[2]
291 291
292 292 @property
293 293 def date(self):
294 294 raw = self._rawdate
295 295 time = float(raw[0])
296 296 # Various tools did silly things with the timezone.
297 297 try:
298 298 timezone = int(raw[1])
299 299 except ValueError:
300 300 timezone = 0
301 301
302 302 return time, timezone
303 303
304 304 @property
305 305 def extra(self):
306 306 raw = self._rawextra
307 307 if raw is None:
308 308 return _defaultextra
309 309
310 310 return decodeextra(raw)
311 311
312 312 @property
313 313 def changes(self):
314 314 if self._changes is not None:
315 315 return self._changes
316 316 if self._cpsd:
317 317 changes = metadata.decode_files_sidedata(self._sidedata)
318 318 else:
319 319 changes = metadata.ChangingFiles(
320 320 touched=self.files or (),
321 321 added=self.filesadded or (),
322 322 removed=self.filesremoved or (),
323 323 p1_copies=self.p1copies or {},
324 324 p2_copies=self.p2copies or {},
325 325 )
326 326 self._changes = changes
327 327 return changes
328 328
329 329 @property
330 330 def files(self):
331 331 if self._cpsd:
332 332 return sorted(self.changes.touched)
333 333 off = self._offsets
334 334 if off[2] == off[3]:
335 335 return []
336 336
337 337 return self._text[off[2] + 1 : off[3]].split(b'\n')
338 338
339 339 @property
340 340 def filesadded(self):
341 341 if self._cpsd:
342 342 return self.changes.added
343 343 else:
344 344 rawindices = self.extra.get(b'filesadded')
345 345 if rawindices is None:
346 346 return None
347 347 return metadata.decodefileindices(self.files, rawindices)
348 348
349 349 @property
350 350 def filesremoved(self):
351 351 if self._cpsd:
352 352 return self.changes.removed
353 353 else:
354 354 rawindices = self.extra.get(b'filesremoved')
355 355 if rawindices is None:
356 356 return None
357 357 return metadata.decodefileindices(self.files, rawindices)
358 358
359 359 @property
360 360 def p1copies(self):
361 361 if self._cpsd:
362 362 return self.changes.copied_from_p1
363 363 else:
364 364 rawcopies = self.extra.get(b'p1copies')
365 365 if rawcopies is None:
366 366 return None
367 367 return metadata.decodecopies(self.files, rawcopies)
368 368
369 369 @property
370 370 def p2copies(self):
371 371 if self._cpsd:
372 372 return self.changes.copied_from_p2
373 373 else:
374 374 rawcopies = self.extra.get(b'p2copies')
375 375 if rawcopies is None:
376 376 return None
377 377 return metadata.decodecopies(self.files, rawcopies)
378 378
379 379 @property
380 380 def description(self):
381 381 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
382 382
383 383 @property
384 384 def branchinfo(self):
385 385 extra = self.extra
386 386 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
387 387
388 388
389 389 class changelog(revlog.revlog):
390 390 def __init__(self, opener, trypending=False, concurrencychecker=None):
391 391 """Load a changelog revlog using an opener.
392 392
393 393 If ``trypending`` is true, we attempt to load the index from a
394 394 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
395 395 The ``00changelog.i.a`` file contains index (and possibly inline
396 396 revision) data for a transaction that hasn't been finalized yet.
397 397 It exists in a separate file to facilitate readers (such as
398 398 hooks processes) accessing data before a transaction is finalized.
399 399
400 400 ``concurrencychecker`` will be passed to the revlog init function, see
401 401 the documentation there.
402 402 """
403 403 revlog.revlog.__init__(
404 404 self,
405 405 opener,
406 406 target=(revlog_constants.KIND_CHANGELOG, None),
407 407 radix=b'00changelog',
408 408 checkambig=True,
409 409 mmaplargeindex=True,
410 410 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
411 411 concurrencychecker=concurrencychecker,
412 412 trypending=trypending,
413 413 )
414 414
415 415 if self._initempty and (self._format_version == revlog.REVLOGV1):
416 416 # changelogs don't benefit from generaldelta.
417 417
418 418 self._format_flags &= ~revlog.FLAG_GENERALDELTA
419 419 self.delta_config.general_delta = False
420 420
421 421 # Delta chains for changelogs tend to be very small because entries
422 422 # tend to be small and don't delta well with each. So disable delta
423 423 # chains.
424 424 self._storedeltachains = False
425 425
426 426 self._realopener = opener
427 427 self._delayed = False
428 428 self._delaybuf = None
429 429 self._divert = False
430 430 self._filteredrevs = frozenset()
431 431 self._filteredrevs_hashcache = {}
432 432 self._copiesstorage = opener.options.get(b'copies-storage')
433 433
434 434 @property
435 435 def filteredrevs(self):
436 436 return self._filteredrevs
437 437
438 438 @filteredrevs.setter
439 439 def filteredrevs(self, val):
440 440 # Ensure all updates go through this function
441 441 assert isinstance(val, frozenset)
442 442 self._filteredrevs = val
443 443 self._filteredrevs_hashcache = {}
444 444
445 445 def _write_docket(self, tr):
446 446 if not self._delayed:
447 447 super(changelog, self)._write_docket(tr)
448 448
449 449 def delayupdate(self, tr):
450 450 """delay visibility of index updates to other readers"""
451 assert not self._inner.is_open
451 452 if self._docket is None and not self._delayed:
452 453 if len(self) == 0:
453 454 self._divert = True
454 455 if self._realopener.exists(self._indexfile + b'.a'):
455 456 self._realopener.unlink(self._indexfile + b'.a')
456 457 self.opener = _divertopener(self._realopener, self._indexfile)
457 458 else:
458 459 self._delaybuf = []
459 460 self.opener = _delayopener(
460 461 self._realopener, self._indexfile, self._delaybuf
461 462 )
462 463 self._inner.opener = self.opener
463 464 self._inner._segmentfile.opener = self.opener
464 465 self._inner._segmentfile_sidedata.opener = self.opener
465 466 self._delayed = True
466 467 tr.addpending(b'cl-%i' % id(self), self._writepending)
467 468 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
468 469
469 470 def _finalize(self, tr):
470 471 """finalize index updates"""
472 assert not self._inner.is_open
471 473 self._delayed = False
472 474 self.opener = self._realopener
473 475 self._inner.opener = self.opener
474 476 self._inner._segmentfile.opener = self.opener
475 477 self._inner._segmentfile_sidedata.opener = self.opener
476 478 # move redirected index data back into place
477 479 if self._docket is not None:
478 480 self._write_docket(tr)
479 481 elif self._divert:
480 482 assert not self._delaybuf
481 483 tmpname = self._indexfile + b".a"
482 484 nfile = self.opener.open(tmpname)
483 485 nfile.close()
484 486 self.opener.rename(tmpname, self._indexfile, checkambig=True)
485 487 elif self._delaybuf:
486 488 fp = self.opener(self._indexfile, b'a', checkambig=True)
487 489 fp.write(b"".join(self._delaybuf))
488 490 fp.close()
489 491 self._delaybuf = None
490 492 self._divert = False
491 493 # split when we're done
492 494 self._enforceinlinesize(tr, side_write=False)
493 495
494 496 def _writepending(self, tr):
495 497 """create a file containing the unfinalized state for
496 498 pretxnchangegroup"""
499 assert not self._inner.is_open
497 500 if self._docket:
498 501 return self._docket.write(tr, pending=True)
499 502 if self._delaybuf:
500 503 # make a temporary copy of the index
501 504 fp1 = self._realopener(self._indexfile)
502 505 pendingfilename = self._indexfile + b".a"
503 506 # register as a temp file to ensure cleanup on failure
504 507 tr.registertmp(pendingfilename)
505 508 # write existing data
506 509 fp2 = self._realopener(pendingfilename, b"w")
507 510 fp2.write(fp1.read())
508 511 # add pending data
509 512 fp2.write(b"".join(self._delaybuf))
510 513 fp2.close()
511 514 # switch modes so finalize can simply rename
512 515 self._delaybuf = None
513 516 self._divert = True
514 517 self.opener = _divertopener(self._realopener, self._indexfile)
515 518 self._inner.opener = self.opener
516 519 self._inner._segmentfile.opener = self.opener
517 520 self._inner._segmentfile_sidedata.opener = self.opener
518 521
519 522 if self._divert:
520 523 return True
521 524
522 525 return False
523 526
524 527 def _enforceinlinesize(self, tr, side_write=True):
525 528 if not self._delayed:
526 529 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
527 530
528 531 def read(self, nodeorrev):
529 532 """Obtain data from a parsed changelog revision.
530 533
531 534 Returns a 6-tuple of:
532 535
533 536 - manifest node in binary
534 537 - author/user as a localstr
535 538 - date as a 2-tuple of (time, timezone)
536 539 - list of files
537 540 - commit message as a localstr
538 541 - dict of extra metadata
539 542
540 543 Unless you need to access all fields, consider calling
541 544 ``changelogrevision`` instead, as it is faster for partial object
542 545 access.
543 546 """
544 547 d = self._revisiondata(nodeorrev)
545 548 sidedata = self.sidedata(nodeorrev)
546 549 copy_sd = self._copiesstorage == b'changeset-sidedata'
547 550 c = changelogrevision(self, d, sidedata, copy_sd)
548 551 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
549 552
550 553 def changelogrevision(self, nodeorrev):
551 554 """Obtain a ``changelogrevision`` for a node or revision."""
552 555 text = self._revisiondata(nodeorrev)
553 556 sidedata = self.sidedata(nodeorrev)
554 557 return changelogrevision(
555 558 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
556 559 )
557 560
558 561 def readfiles(self, nodeorrev):
559 562 """
560 563 short version of read that only returns the files modified by the cset
561 564 """
562 565 text = self.revision(nodeorrev)
563 566 if not text:
564 567 return []
565 568 last = text.index(b"\n\n")
566 569 l = text[:last].split(b'\n')
567 570 return l[3:]
568 571
569 572 def add(
570 573 self,
571 574 manifest,
572 575 files,
573 576 desc,
574 577 transaction,
575 578 p1,
576 579 p2,
577 580 user,
578 581 date=None,
579 582 extra=None,
580 583 ):
581 584 # Convert to UTF-8 encoded bytestrings as the very first
582 585 # thing: calling any method on a localstr object will turn it
583 586 # into a str object and the cached UTF-8 string is thus lost.
584 587 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
585 588
586 589 user = user.strip()
587 590 # An empty username or a username with a "\n" will make the
588 591 # revision text contain two "\n\n" sequences -> corrupt
589 592 # repository since read cannot unpack the revision.
590 593 if not user:
591 594 raise error.StorageError(_(b"empty username"))
592 595 if b"\n" in user:
593 596 raise error.StorageError(
594 597 _(b"username %r contains a newline") % pycompat.bytestr(user)
595 598 )
596 599
597 600 desc = stripdesc(desc)
598 601
599 602 if date:
600 603 parseddate = b"%d %d" % dateutil.parsedate(date)
601 604 else:
602 605 parseddate = b"%d %d" % dateutil.makedate()
603 606 if extra:
604 607 branch = extra.get(b"branch")
605 608 if branch in (b"default", b""):
606 609 del extra[b"branch"]
607 610 elif branch in (b".", b"null", b"tip"):
608 611 raise error.StorageError(
609 612 _(b'the name \'%s\' is reserved') % branch
610 613 )
611 614 sortedfiles = sorted(files.touched)
612 615 flags = 0
613 616 sidedata = None
614 617 if self._copiesstorage == b'changeset-sidedata':
615 618 if files.has_copies_info:
616 619 flags |= flagutil.REVIDX_HASCOPIESINFO
617 620 sidedata = metadata.encode_files_sidedata(files)
618 621
619 622 if extra:
620 623 extra = encodeextra(extra)
621 624 parseddate = b"%s %s" % (parseddate, extra)
622 625 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
623 626 text = b"\n".join(l)
624 627 rev = self.addrevision(
625 628 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
626 629 )
627 630 return self.node(rev)
628 631
629 632 def branchinfo(self, rev):
630 633 """return the branch name and open/close state of a revision
631 634
632 635 This function exists because creating a changectx object
633 636 just to access this is costly."""
634 637 return self.changelogrevision(rev).branchinfo
635 638
636 639 def _nodeduplicatecallback(self, transaction, rev):
637 640 # keep track of revisions that got "re-added", eg: unbunde of know rev.
638 641 #
639 642 # We track them in a list to preserve their order from the source bundle
640 643 duplicates = transaction.changes.setdefault(b'revduplicates', [])
641 644 duplicates.append(rev)
@@ -1,4042 +1,4049 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 393 # 3-tuple of (node, rev, text) for a raw revision.
394 394 self._revisioncache = None
395 395
396 396 @property
397 397 def index_file(self):
398 398 return self.__index_file
399 399
400 400 @index_file.setter
401 401 def index_file(self, new_index_file):
402 402 self.__index_file = new_index_file
403 403 if self.inline:
404 404 self._segmentfile.filename = new_index_file
405 405
406 406 def __len__(self):
407 407 return len(self.index)
408 408
409 409 def clear_cache(self):
410 410 self._revisioncache = None
411 411 self._segmentfile.clear_cache()
412 412 self._segmentfile_sidedata.clear_cache()
413 413
414 414 # Derived from index values.
415 415
416 416 def start(self, rev):
417 417 """the offset of the data chunk for this revision"""
418 418 return int(self.index[rev][0] >> 16)
419 419
420 420 def length(self, rev):
421 421 """the length of the data chunk for this revision"""
422 422 return self.index[rev][1]
423 423
424 424 def end(self, rev):
425 425 """the end of the data chunk for this revision"""
426 426 return self.start(rev) + self.length(rev)
427 427
428 428 def deltaparent(self, rev):
429 429 """return deltaparent of the given revision"""
430 430 base = self.index[rev][3]
431 431 if base == rev:
432 432 return nullrev
433 433 elif self.delta_config.general_delta:
434 434 return base
435 435 else:
436 436 return rev - 1
437 437
438 438 def issnapshot(self, rev):
439 439 """tells whether rev is a snapshot"""
440 440 if not self.delta_config.sparse_revlog:
441 441 return self.deltaparent(rev) == nullrev
442 442 elif hasattr(self.index, 'issnapshot'):
443 443 # directly assign the method to cache the testing and access
444 444 self.issnapshot = self.index.issnapshot
445 445 return self.issnapshot(rev)
446 446 if rev == nullrev:
447 447 return True
448 448 entry = self.index[rev]
449 449 base = entry[3]
450 450 if base == rev:
451 451 return True
452 452 if base == nullrev:
453 453 return True
454 454 p1 = entry[5]
455 455 while self.length(p1) == 0:
456 456 b = self.deltaparent(p1)
457 457 if b == p1:
458 458 break
459 459 p1 = b
460 460 p2 = entry[6]
461 461 while self.length(p2) == 0:
462 462 b = self.deltaparent(p2)
463 463 if b == p2:
464 464 break
465 465 p2 = b
466 466 if base == p1 or base == p2:
467 467 return False
468 468 return self.issnapshot(base)
469 469
470 470 def _deltachain(self, rev, stoprev=None):
471 471 """Obtain the delta chain for a revision.
472 472
473 473 ``stoprev`` specifies a revision to stop at. If not specified, we
474 474 stop at the base of the chain.
475 475
476 476 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
477 477 revs in ascending order and ``stopped`` is a bool indicating whether
478 478 ``stoprev`` was hit.
479 479 """
480 480 generaldelta = self.delta_config.general_delta
481 481 # Try C implementation.
482 482 try:
483 483 return self.index.deltachain(rev, stoprev, generaldelta)
484 484 except AttributeError:
485 485 pass
486 486
487 487 chain = []
488 488
489 489 # Alias to prevent attribute lookup in tight loop.
490 490 index = self.index
491 491
492 492 iterrev = rev
493 493 e = index[iterrev]
494 494 while iterrev != e[3] and iterrev != stoprev:
495 495 chain.append(iterrev)
496 496 if generaldelta:
497 497 iterrev = e[3]
498 498 else:
499 499 iterrev -= 1
500 500 e = index[iterrev]
501 501
502 502 if iterrev == stoprev:
503 503 stopped = True
504 504 else:
505 505 chain.append(iterrev)
506 506 stopped = False
507 507
508 508 chain.reverse()
509 509 return chain, stopped
510 510
511 511 @util.propertycache
512 512 def _compressor(self):
513 513 engine = util.compengines[self.feature_config.compression_engine]
514 514 return engine.revlogcompressor(
515 515 self.feature_config.compression_engine_options
516 516 )
517 517
518 518 @util.propertycache
519 519 def _decompressor(self):
520 520 """the default decompressor"""
521 521 if self._default_compression_header is None:
522 522 return None
523 523 t = self._default_compression_header
524 524 c = self._get_decompressor(t)
525 525 return c.decompress
526 526
527 527 def _get_decompressor(self, t):
528 528 try:
529 529 compressor = self._decompressors[t]
530 530 except KeyError:
531 531 try:
532 532 engine = util.compengines.forrevlogheader(t)
533 533 compressor = engine.revlogcompressor(
534 534 self.feature_config.compression_engine_options
535 535 )
536 536 self._decompressors[t] = compressor
537 537 except KeyError:
538 538 raise error.RevlogError(
539 539 _(b'unknown compression type %s') % binascii.hexlify(t)
540 540 )
541 541 return compressor
542 542
543 543 def compress(self, data):
544 544 """Generate a possibly-compressed representation of data."""
545 545 if not data:
546 546 return b'', data
547 547
548 548 compressed = self._compressor.compress(data)
549 549
550 550 if compressed:
551 551 # The revlog compressor added the header in the returned data.
552 552 return b'', compressed
553 553
554 554 if data[0:1] == b'\0':
555 555 return b'', data
556 556 return b'u', data
557 557
558 558 def decompress(self, data):
559 559 """Decompress a revlog chunk.
560 560
561 561 The chunk is expected to begin with a header identifying the
562 562 format type so it can be routed to an appropriate decompressor.
563 563 """
564 564 if not data:
565 565 return data
566 566
567 567 # Revlogs are read much more frequently than they are written and many
568 568 # chunks only take microseconds to decompress, so performance is
569 569 # important here.
570 570 #
571 571 # We can make a few assumptions about revlogs:
572 572 #
573 573 # 1) the majority of chunks will be compressed (as opposed to inline
574 574 # raw data).
575 575 # 2) decompressing *any* data will likely by at least 10x slower than
576 576 # returning raw inline data.
577 577 # 3) we want to prioritize common and officially supported compression
578 578 # engines
579 579 #
580 580 # It follows that we want to optimize for "decompress compressed data
581 581 # when encoded with common and officially supported compression engines"
582 582 # case over "raw data" and "data encoded by less common or non-official
583 583 # compression engines." That is why we have the inline lookup first
584 584 # followed by the compengines lookup.
585 585 #
586 586 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
587 587 # compressed chunks. And this matters for changelog and manifest reads.
588 588 t = data[0:1]
589 589
590 590 if t == b'x':
591 591 try:
592 592 return _zlibdecompress(data)
593 593 except zlib.error as e:
594 594 raise error.RevlogError(
595 595 _(b'revlog decompress error: %s')
596 596 % stringutil.forcebytestr(e)
597 597 )
598 598 # '\0' is more common than 'u' so it goes first.
599 599 elif t == b'\0':
600 600 return data
601 601 elif t == b'u':
602 602 return util.buffer(data, 1)
603 603
604 604 compressor = self._get_decompressor(t)
605 605
606 606 return compressor.decompress(data)
607 607
608 608 @contextlib.contextmanager
609 609 def reading(self):
610 610 """Context manager that keeps data and sidedata files open for reading"""
611 611 if len(self.index) == 0:
612 612 yield # nothing to be read
613 613 else:
614 614 with self._segmentfile.reading():
615 615 with self._segmentfile_sidedata.reading():
616 616 yield
617 617
618 618 @property
619 619 def is_writing(self):
620 620 """True is a writing context is open"""
621 621 return self._writinghandles is not None
622 622
623 @property
624 def is_open(self):
625 """True if any file handle is being held
626
627 Used for assert and debug in the python code"""
628 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
629
623 630 @contextlib.contextmanager
624 631 def writing(self, transaction, data_end=None, sidedata_end=None):
625 632 """Open the revlog files for writing
626 633
627 634 Add content to a revlog should be done within such context.
628 635 """
629 636 if self.is_writing:
630 637 yield
631 638 else:
632 639 ifh = dfh = sdfh = None
633 640 try:
634 641 r = len(self.index)
635 642 # opening the data file.
636 643 dsize = 0
637 644 if r:
638 645 dsize = self.end(r - 1)
639 646 dfh = None
640 647 if not self.inline:
641 648 try:
642 649 dfh = self.opener(self.data_file, mode=b"r+")
643 650 if data_end is None:
644 651 dfh.seek(0, os.SEEK_END)
645 652 else:
646 653 dfh.seek(data_end, os.SEEK_SET)
647 654 except FileNotFoundError:
648 655 dfh = self.opener(self.data_file, mode=b"w+")
649 656 transaction.add(self.data_file, dsize)
650 657 if self.sidedata_file is not None:
651 658 assert sidedata_end is not None
652 659 # revlog-v2 does not inline, help Pytype
653 660 assert dfh is not None
654 661 try:
655 662 sdfh = self.opener(self.sidedata_file, mode=b"r+")
656 663 dfh.seek(sidedata_end, os.SEEK_SET)
657 664 except FileNotFoundError:
658 665 sdfh = self.opener(self.sidedata_file, mode=b"w+")
659 666 transaction.add(self.sidedata_file, sidedata_end)
660 667
661 668 # opening the index file.
662 669 isize = r * self.index.entry_size
663 670 ifh = self.__index_write_fp()
664 671 if self.inline:
665 672 transaction.add(self.index_file, dsize + isize)
666 673 else:
667 674 transaction.add(self.index_file, isize)
668 675 # exposing all file handle for writing.
669 676 self._writinghandles = (ifh, dfh, sdfh)
670 677 self._segmentfile.writing_handle = ifh if self.inline else dfh
671 678 self._segmentfile_sidedata.writing_handle = sdfh
672 679 yield
673 680 finally:
674 681 self._writinghandles = None
675 682 self._segmentfile.writing_handle = None
676 683 self._segmentfile_sidedata.writing_handle = None
677 684 if dfh is not None:
678 685 dfh.close()
679 686 if sdfh is not None:
680 687 sdfh.close()
681 688 # closing the index file last to avoid exposing referent to
682 689 # potential unflushed data content.
683 690 if ifh is not None:
684 691 ifh.close()
685 692
686 693 def __index_write_fp(self, index_end=None):
687 694 """internal method to open the index file for writing
688 695
689 696 You should not use this directly and use `_writing` instead
690 697 """
691 698 try:
692 699 f = self.opener(
693 700 self.index_file,
694 701 mode=b"r+",
695 702 checkambig=self.data_config.check_ambig,
696 703 )
697 704 if index_end is None:
698 705 f.seek(0, os.SEEK_END)
699 706 else:
700 707 f.seek(index_end, os.SEEK_SET)
701 708 return f
702 709 except FileNotFoundError:
703 710 return self.opener(
704 711 self.index_file,
705 712 mode=b"w+",
706 713 checkambig=self.data_config.check_ambig,
707 714 )
708 715
709 716 def __index_new_fp(self):
710 717 """internal method to create a new index file for writing
711 718
712 719 You should not use this unless you are upgrading from inline revlog
713 720 """
714 721 return self.opener(
715 722 self.index_file,
716 723 mode=b"w",
717 724 checkambig=self.data_config.check_ambig,
718 725 atomictemp=True,
719 726 )
720 727
721 728 def split_inline(self, tr, header, new_index_file_path=None):
722 729 """split the data of an inline revlog into an index and a data file"""
723 730 existing_handles = False
724 731 if self._writinghandles is not None:
725 732 existing_handles = True
726 733 fp = self._writinghandles[0]
727 734 fp.flush()
728 735 fp.close()
729 736 # We can't use the cached file handle after close(). So prevent
730 737 # its usage.
731 738 self._writinghandles = None
732 739 self._segmentfile.writing_handle = None
733 740 # No need to deal with sidedata writing handle as it is only
734 741 # relevant with revlog-v2 which is never inline, not reaching
735 742 # this code
736 743
737 744 new_dfh = self.opener(self.data_file, mode=b"w+")
738 745 new_dfh.truncate(0) # drop any potentially existing data
739 746 try:
740 747 with self.reading():
741 748 for r in range(len(self.index)):
742 749 new_dfh.write(self.get_segment_for_revs(r, r)[1])
743 750 new_dfh.flush()
744 751
745 752 if new_index_file_path is not None:
746 753 self.index_file = new_index_file_path
747 754 with self.__index_new_fp() as fp:
748 755 self.inline = False
749 756 for i in range(len(self.index)):
750 757 e = self.index.entry_binary(i)
751 758 if i == 0:
752 759 packed_header = self.index.pack_header(header)
753 760 e = packed_header + e
754 761 fp.write(e)
755 762
756 763 # If we don't use side-write, the temp file replace the real
757 764 # index when we exit the context manager
758 765
759 766 self._segmentfile = randomaccessfile.randomaccessfile(
760 767 self.opener,
761 768 self.data_file,
762 769 self.data_config.chunk_cache_size,
763 770 )
764 771
765 772 if existing_handles:
766 773 # switched from inline to conventional reopen the index
767 774 ifh = self.__index_write_fp()
768 775 self._writinghandles = (ifh, new_dfh, None)
769 776 self._segmentfile.writing_handle = new_dfh
770 777 new_dfh = None
771 778 # No need to deal with sidedata writing handle as it is only
772 779 # relevant with revlog-v2 which is never inline, not reaching
773 780 # this code
774 781 finally:
775 782 if new_dfh is not None:
776 783 new_dfh.close()
777 784 return self.index_file
778 785
779 786 def get_segment_for_revs(self, startrev, endrev):
780 787 """Obtain a segment of raw data corresponding to a range of revisions.
781 788
782 789 Accepts the start and end revisions and an optional already-open
783 790 file handle to be used for reading. If the file handle is read, its
784 791 seek position will not be preserved.
785 792
786 793 Requests for data may be satisfied by a cache.
787 794
788 795 Returns a 2-tuple of (offset, data) for the requested range of
789 796 revisions. Offset is the integer offset from the beginning of the
790 797 revlog and data is a str or buffer of the raw byte data.
791 798
792 799 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
793 800 to determine where each revision's data begins and ends.
794 801
795 802 API: we should consider making this a private part of the InnerRevlog
796 803 at some point.
797 804 """
798 805 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
799 806 # (functions are expensive).
800 807 index = self.index
801 808 istart = index[startrev]
802 809 start = int(istart[0] >> 16)
803 810 if startrev == endrev:
804 811 end = start + istart[1]
805 812 else:
806 813 iend = index[endrev]
807 814 end = int(iend[0] >> 16) + iend[1]
808 815
809 816 if self.inline:
810 817 start += (startrev + 1) * self.index.entry_size
811 818 end += (endrev + 1) * self.index.entry_size
812 819 length = end - start
813 820
814 821 return start, self._segmentfile.read_chunk(start, length)
815 822
816 823 def _chunk(self, rev):
817 824 """Obtain a single decompressed chunk for a revision.
818 825
819 826 Accepts an integer revision and an optional already-open file handle
820 827 to be used for reading. If used, the seek position of the file will not
821 828 be preserved.
822 829
823 830 Returns a str holding uncompressed data for the requested revision.
824 831 """
825 832 compression_mode = self.index[rev][10]
826 833 data = self.get_segment_for_revs(rev, rev)[1]
827 834 if compression_mode == COMP_MODE_PLAIN:
828 835 return data
829 836 elif compression_mode == COMP_MODE_DEFAULT:
830 837 return self._decompressor(data)
831 838 elif compression_mode == COMP_MODE_INLINE:
832 839 return self.decompress(data)
833 840 else:
834 841 msg = b'unknown compression mode %d'
835 842 msg %= compression_mode
836 843 raise error.RevlogError(msg)
837 844
838 845 def _chunks(self, revs, targetsize=None):
839 846 """Obtain decompressed chunks for the specified revisions.
840 847
841 848 Accepts an iterable of numeric revisions that are assumed to be in
842 849 ascending order. Also accepts an optional already-open file handle
843 850 to be used for reading. If used, the seek position of the file will
844 851 not be preserved.
845 852
846 853 This function is similar to calling ``self._chunk()`` multiple times,
847 854 but is faster.
848 855
849 856 Returns a list with decompressed data for each requested revision.
850 857 """
851 858 if not revs:
852 859 return []
853 860 start = self.start
854 861 length = self.length
855 862 inline = self.inline
856 863 iosize = self.index.entry_size
857 864 buffer = util.buffer
858 865
859 866 l = []
860 867 ladd = l.append
861 868
862 869 if not self.data_config.with_sparse_read:
863 870 slicedchunks = (revs,)
864 871 else:
865 872 slicedchunks = deltautil.slicechunk(
866 873 self,
867 874 revs,
868 875 targetsize=targetsize,
869 876 )
870 877
871 878 for revschunk in slicedchunks:
872 879 firstrev = revschunk[0]
873 880 # Skip trailing revisions with empty diff
874 881 for lastrev in revschunk[::-1]:
875 882 if length(lastrev) != 0:
876 883 break
877 884
878 885 try:
879 886 offset, data = self.get_segment_for_revs(firstrev, lastrev)
880 887 except OverflowError:
881 888 # issue4215 - we can't cache a run of chunks greater than
882 889 # 2G on Windows
883 890 return [self._chunk(rev) for rev in revschunk]
884 891
885 892 decomp = self.decompress
886 893 # self._decompressor might be None, but will not be used in that case
887 894 def_decomp = self._decompressor
888 895 for rev in revschunk:
889 896 chunkstart = start(rev)
890 897 if inline:
891 898 chunkstart += (rev + 1) * iosize
892 899 chunklength = length(rev)
893 900 comp_mode = self.index[rev][10]
894 901 c = buffer(data, chunkstart - offset, chunklength)
895 902 if comp_mode == COMP_MODE_PLAIN:
896 903 ladd(c)
897 904 elif comp_mode == COMP_MODE_INLINE:
898 905 ladd(decomp(c))
899 906 elif comp_mode == COMP_MODE_DEFAULT:
900 907 ladd(def_decomp(c))
901 908 else:
902 909 msg = b'unknown compression mode %d'
903 910 msg %= comp_mode
904 911 raise error.RevlogError(msg)
905 912
906 913 return l
907 914
908 915 def raw_text(self, node, rev):
909 916 """return the possibly unvalidated rawtext for a revision
910 917
911 918 returns (rev, rawtext, validated)
912 919 """
913 920
914 921 # revision in the cache (could be useful to apply delta)
915 922 cachedrev = None
916 923 # An intermediate text to apply deltas to
917 924 basetext = None
918 925
919 926 # Check if we have the entry in cache
920 927 # The cache entry looks like (node, rev, rawtext)
921 928 if self._revisioncache:
922 929 cachedrev = self._revisioncache[1]
923 930
924 931 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
925 932 if stopped:
926 933 basetext = self._revisioncache[2]
927 934
928 935 # drop cache to save memory, the caller is expected to
929 936 # update self._inner._revisioncache after validating the text
930 937 self._revisioncache = None
931 938
932 939 targetsize = None
933 940 rawsize = self.index[rev][2]
934 941 if 0 <= rawsize:
935 942 targetsize = 4 * rawsize
936 943
937 944 bins = self._chunks(chain, targetsize=targetsize)
938 945 if basetext is None:
939 946 basetext = bytes(bins[0])
940 947 bins = bins[1:]
941 948
942 949 rawtext = mdiff.patches(basetext, bins)
943 950 del basetext # let us have a chance to free memory early
944 951 return (rev, rawtext, False)
945 952
946 953 def sidedata(self, rev, sidedata_end):
947 954 """Return the sidedata for a given revision number."""
948 955 index_entry = self.index[rev]
949 956 sidedata_offset = index_entry[8]
950 957 sidedata_size = index_entry[9]
951 958
952 959 if self.inline:
953 960 sidedata_offset += self.index.entry_size * (1 + rev)
954 961 if sidedata_size == 0:
955 962 return {}
956 963
957 964 if sidedata_end < sidedata_offset + sidedata_size:
958 965 filename = self.sidedata_file
959 966 end = sidedata_end
960 967 offset = sidedata_offset
961 968 length = sidedata_size
962 969 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
963 970 raise error.RevlogError(m)
964 971
965 972 comp_segment = self._segmentfile_sidedata.read_chunk(
966 973 sidedata_offset, sidedata_size
967 974 )
968 975
969 976 comp = self.index[rev][11]
970 977 if comp == COMP_MODE_PLAIN:
971 978 segment = comp_segment
972 979 elif comp == COMP_MODE_DEFAULT:
973 980 segment = self._decompressor(comp_segment)
974 981 elif comp == COMP_MODE_INLINE:
975 982 segment = self.decompress(comp_segment)
976 983 else:
977 984 msg = b'unknown compression mode %d'
978 985 msg %= comp
979 986 raise error.RevlogError(msg)
980 987
981 988 sidedata = sidedatautil.deserialize_sidedata(segment)
982 989 return sidedata
983 990
984 991 def write_entry(
985 992 self,
986 993 transaction,
987 994 entry,
988 995 data,
989 996 link,
990 997 offset,
991 998 sidedata,
992 999 sidedata_offset,
993 1000 index_end,
994 1001 data_end,
995 1002 sidedata_end,
996 1003 ):
997 1004 # Files opened in a+ mode have inconsistent behavior on various
998 1005 # platforms. Windows requires that a file positioning call be made
999 1006 # when the file handle transitions between reads and writes. See
1000 1007 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1001 1008 # platforms, Python or the platform itself can be buggy. Some versions
1002 1009 # of Solaris have been observed to not append at the end of the file
1003 1010 # if the file was seeked to before the end. See issue4943 for more.
1004 1011 #
1005 1012 # We work around this issue by inserting a seek() before writing.
1006 1013 # Note: This is likely not necessary on Python 3. However, because
1007 1014 # the file handle is reused for reads and may be seeked there, we need
1008 1015 # to be careful before changing this.
1009 1016 if self._writinghandles is None:
1010 1017 msg = b'adding revision outside `revlog._writing` context'
1011 1018 raise error.ProgrammingError(msg)
1012 1019 ifh, dfh, sdfh = self._writinghandles
1013 1020 if index_end is None:
1014 1021 ifh.seek(0, os.SEEK_END)
1015 1022 else:
1016 1023 ifh.seek(index_end, os.SEEK_SET)
1017 1024 if dfh:
1018 1025 if data_end is None:
1019 1026 dfh.seek(0, os.SEEK_END)
1020 1027 else:
1021 1028 dfh.seek(data_end, os.SEEK_SET)
1022 1029 if sdfh:
1023 1030 sdfh.seek(sidedata_end, os.SEEK_SET)
1024 1031
1025 1032 curr = len(self.index) - 1
1026 1033 if not self.inline:
1027 1034 transaction.add(self.data_file, offset)
1028 1035 if self.sidedata_file:
1029 1036 transaction.add(self.sidedata_file, sidedata_offset)
1030 1037 transaction.add(self.index_file, curr * len(entry))
1031 1038 if data[0]:
1032 1039 dfh.write(data[0])
1033 1040 dfh.write(data[1])
1034 1041 if sidedata:
1035 1042 sdfh.write(sidedata)
1036 1043 ifh.write(entry)
1037 1044 else:
1038 1045 offset += curr * self.index.entry_size
1039 1046 transaction.add(self.index_file, offset)
1040 1047 ifh.write(entry)
1041 1048 ifh.write(data[0])
1042 1049 ifh.write(data[1])
1043 1050 assert not sidedata
1044 1051 return (
1045 1052 ifh.tell(),
1046 1053 dfh.tell() if dfh else None,
1047 1054 sdfh.tell() if sdfh else None,
1048 1055 )
1049 1056
1050 1057
1051 1058 class revlog:
1052 1059 """
1053 1060 the underlying revision storage object
1054 1061
1055 1062 A revlog consists of two parts, an index and the revision data.
1056 1063
1057 1064 The index is a file with a fixed record size containing
1058 1065 information on each revision, including its nodeid (hash), the
1059 1066 nodeids of its parents, the position and offset of its data within
1060 1067 the data file, and the revision it's based on. Finally, each entry
1061 1068 contains a linkrev entry that can serve as a pointer to external
1062 1069 data.
1063 1070
1064 1071 The revision data itself is a linear collection of data chunks.
1065 1072 Each chunk represents a revision and is usually represented as a
1066 1073 delta against the previous chunk. To bound lookup time, runs of
1067 1074 deltas are limited to about 2 times the length of the original
1068 1075 version data. This makes retrieval of a version proportional to
1069 1076 its size, or O(1) relative to the number of revisions.
1070 1077
1071 1078 Both pieces of the revlog are written to in an append-only
1072 1079 fashion, which means we never need to rewrite a file to insert or
1073 1080 remove data, and can use some simple techniques to avoid the need
1074 1081 for locking while reading.
1075 1082
1076 1083 If checkambig, indexfile is opened with checkambig=True at
1077 1084 writing, to avoid file stat ambiguity.
1078 1085
1079 1086 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1080 1087 index will be mmapped rather than read if it is larger than the
1081 1088 configured threshold.
1082 1089
1083 1090 If censorable is True, the revlog can have censored revisions.
1084 1091
1085 1092 If `upperboundcomp` is not None, this is the expected maximal gain from
1086 1093 compression for the data content.
1087 1094
1088 1095 `concurrencychecker` is an optional function that receives 3 arguments: a
1089 1096 file handle, a filename, and an expected position. It should check whether
1090 1097 the current position in the file handle is valid, and log/warn/fail (by
1091 1098 raising).
1092 1099
1093 1100 See mercurial/revlogutils/contants.py for details about the content of an
1094 1101 index entry.
1095 1102 """
1096 1103
1097 1104 _flagserrorclass = error.RevlogError
1098 1105
1099 1106 @staticmethod
1100 1107 def is_inline_index(header_bytes):
1101 1108 """Determine if a revlog is inline from the initial bytes of the index"""
1102 1109 header = INDEX_HEADER.unpack(header_bytes)[0]
1103 1110
1104 1111 _format_flags = header & ~0xFFFF
1105 1112 _format_version = header & 0xFFFF
1106 1113
1107 1114 features = FEATURES_BY_VERSION[_format_version]
1108 1115 return features[b'inline'](_format_flags)
1109 1116
1110 1117 def __init__(
1111 1118 self,
1112 1119 opener,
1113 1120 target,
1114 1121 radix,
1115 1122 postfix=None, # only exist for `tmpcensored` now
1116 1123 checkambig=False,
1117 1124 mmaplargeindex=False,
1118 1125 censorable=False,
1119 1126 upperboundcomp=None,
1120 1127 persistentnodemap=False,
1121 1128 concurrencychecker=None,
1122 1129 trypending=False,
1123 1130 try_split=False,
1124 1131 canonical_parent_order=True,
1125 1132 ):
1126 1133 """
1127 1134 create a revlog object
1128 1135
1129 1136 opener is a function that abstracts the file opening operation
1130 1137 and can be used to implement COW semantics or the like.
1131 1138
1132 1139 `target`: a (KIND, ID) tuple that identify the content stored in
1133 1140 this revlog. It help the rest of the code to understand what the revlog
1134 1141 is about without having to resort to heuristic and index filename
1135 1142 analysis. Note: that this must be reliably be set by normal code, but
1136 1143 that test, debug, or performance measurement code might not set this to
1137 1144 accurate value.
1138 1145 """
1139 1146
1140 1147 self.radix = radix
1141 1148
1142 1149 self._docket_file = None
1143 1150 self._indexfile = None
1144 1151 self._datafile = None
1145 1152 self._sidedatafile = None
1146 1153 self._nodemap_file = None
1147 1154 self.postfix = postfix
1148 1155 self._trypending = trypending
1149 1156 self._try_split = try_split
1150 1157 self.opener = opener
1151 1158 if persistentnodemap:
1152 1159 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1153 1160
1154 1161 assert target[0] in ALL_KINDS
1155 1162 assert len(target) == 2
1156 1163 self.target = target
1157 1164 if b'feature-config' in self.opener.options:
1158 1165 self.feature_config = self.opener.options[b'feature-config'].copy()
1159 1166 else:
1160 1167 self.feature_config = FeatureConfig()
1161 1168 self.feature_config.censorable = censorable
1162 1169 self.feature_config.canonical_parent_order = canonical_parent_order
1163 1170 if b'data-config' in self.opener.options:
1164 1171 self.data_config = self.opener.options[b'data-config'].copy()
1165 1172 else:
1166 1173 self.data_config = DataConfig()
1167 1174 self.data_config.check_ambig = checkambig
1168 1175 self.data_config.mmap_large_index = mmaplargeindex
1169 1176 if b'delta-config' in self.opener.options:
1170 1177 self.delta_config = self.opener.options[b'delta-config'].copy()
1171 1178 else:
1172 1179 self.delta_config = DeltaConfig()
1173 1180 self.delta_config.upper_bound_comp = upperboundcomp
1174 1181
1175 1182 # Maps rev to chain base rev.
1176 1183 self._chainbasecache = util.lrucachedict(100)
1177 1184
1178 1185 self.index = None
1179 1186 self._docket = None
1180 1187 self._nodemap_docket = None
1181 1188 # Mapping of partial identifiers to full nodes.
1182 1189 self._pcache = {}
1183 1190
1184 1191 # other optionnals features
1185 1192
1186 1193 # Make copy of flag processors so each revlog instance can support
1187 1194 # custom flags.
1188 1195 self._flagprocessors = dict(flagutil.flagprocessors)
1189 1196 # prevent nesting of addgroup
1190 1197 self._adding_group = None
1191 1198
1192 1199 chunk_cache = self._loadindex()
1193 1200 self._load_inner(chunk_cache)
1194 1201 self._concurrencychecker = concurrencychecker
1195 1202
1196 1203 @property
1197 1204 def _generaldelta(self):
1198 1205 """temporary compatibility proxy"""
1199 1206 util.nouideprecwarn(
1200 1207 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1201 1208 )
1202 1209 return self.delta_config.general_delta
1203 1210
1204 1211 @property
1205 1212 def _checkambig(self):
1206 1213 """temporary compatibility proxy"""
1207 1214 util.nouideprecwarn(
1208 1215 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1209 1216 )
1210 1217 return self.data_config.check_ambig
1211 1218
1212 1219 @property
1213 1220 def _mmaplargeindex(self):
1214 1221 """temporary compatibility proxy"""
1215 1222 util.nouideprecwarn(
1216 1223 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1217 1224 )
1218 1225 return self.data_config.mmap_large_index
1219 1226
1220 1227 @property
1221 1228 def _censorable(self):
1222 1229 """temporary compatibility proxy"""
1223 1230 util.nouideprecwarn(
1224 1231 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1225 1232 )
1226 1233 return self.feature_config.censorable
1227 1234
1228 1235 @property
1229 1236 def _chunkcachesize(self):
1230 1237 """temporary compatibility proxy"""
1231 1238 util.nouideprecwarn(
1232 1239 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1233 1240 )
1234 1241 return self.data_config.chunk_cache_size
1235 1242
1236 1243 @property
1237 1244 def _maxchainlen(self):
1238 1245 """temporary compatibility proxy"""
1239 1246 util.nouideprecwarn(
1240 1247 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1241 1248 )
1242 1249 return self.delta_config.max_chain_len
1243 1250
1244 1251 @property
1245 1252 def _deltabothparents(self):
1246 1253 """temporary compatibility proxy"""
1247 1254 util.nouideprecwarn(
1248 1255 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1249 1256 )
1250 1257 return self.delta_config.delta_both_parents
1251 1258
1252 1259 @property
1253 1260 def _candidate_group_chunk_size(self):
1254 1261 """temporary compatibility proxy"""
1255 1262 util.nouideprecwarn(
1256 1263 b"use revlog.delta_config.candidate_group_chunk_size",
1257 1264 b"6.6",
1258 1265 stacklevel=2,
1259 1266 )
1260 1267 return self.delta_config.candidate_group_chunk_size
1261 1268
1262 1269 @property
1263 1270 def _debug_delta(self):
1264 1271 """temporary compatibility proxy"""
1265 1272 util.nouideprecwarn(
1266 1273 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1267 1274 )
1268 1275 return self.delta_config.debug_delta
1269 1276
1270 1277 @property
1271 1278 def _compengine(self):
1272 1279 """temporary compatibility proxy"""
1273 1280 util.nouideprecwarn(
1274 1281 b"use revlog.feature_config.compression_engine",
1275 1282 b"6.6",
1276 1283 stacklevel=2,
1277 1284 )
1278 1285 return self.feature_config.compression_engine
1279 1286
1280 1287 @property
1281 1288 def upperboundcomp(self):
1282 1289 """temporary compatibility proxy"""
1283 1290 util.nouideprecwarn(
1284 1291 b"use revlog.delta_config.upper_bound_comp",
1285 1292 b"6.6",
1286 1293 stacklevel=2,
1287 1294 )
1288 1295 return self.delta_config.upper_bound_comp
1289 1296
1290 1297 @property
1291 1298 def _compengineopts(self):
1292 1299 """temporary compatibility proxy"""
1293 1300 util.nouideprecwarn(
1294 1301 b"use revlog.feature_config.compression_engine_options",
1295 1302 b"6.6",
1296 1303 stacklevel=2,
1297 1304 )
1298 1305 return self.feature_config.compression_engine_options
1299 1306
1300 1307 @property
1301 1308 def _maxdeltachainspan(self):
1302 1309 """temporary compatibility proxy"""
1303 1310 util.nouideprecwarn(
1304 1311 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1305 1312 )
1306 1313 return self.delta_config.max_deltachain_span
1307 1314
1308 1315 @property
1309 1316 def _withsparseread(self):
1310 1317 """temporary compatibility proxy"""
1311 1318 util.nouideprecwarn(
1312 1319 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1313 1320 )
1314 1321 return self.data_config.with_sparse_read
1315 1322
1316 1323 @property
1317 1324 def _sparserevlog(self):
1318 1325 """temporary compatibility proxy"""
1319 1326 util.nouideprecwarn(
1320 1327 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1321 1328 )
1322 1329 return self.delta_config.sparse_revlog
1323 1330
1324 1331 @property
1325 1332 def hassidedata(self):
1326 1333 """temporary compatibility proxy"""
1327 1334 util.nouideprecwarn(
1328 1335 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1329 1336 )
1330 1337 return self.feature_config.has_side_data
1331 1338
1332 1339 @property
1333 1340 def _srdensitythreshold(self):
1334 1341 """temporary compatibility proxy"""
1335 1342 util.nouideprecwarn(
1336 1343 b"use revlog.data_config.sr_density_threshold",
1337 1344 b"6.6",
1338 1345 stacklevel=2,
1339 1346 )
1340 1347 return self.data_config.sr_density_threshold
1341 1348
1342 1349 @property
1343 1350 def _srmingapsize(self):
1344 1351 """temporary compatibility proxy"""
1345 1352 util.nouideprecwarn(
1346 1353 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1347 1354 )
1348 1355 return self.data_config.sr_min_gap_size
1349 1356
1350 1357 @property
1351 1358 def _compute_rank(self):
1352 1359 """temporary compatibility proxy"""
1353 1360 util.nouideprecwarn(
1354 1361 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1355 1362 )
1356 1363 return self.feature_config.compute_rank
1357 1364
1358 1365 @property
1359 1366 def canonical_parent_order(self):
1360 1367 """temporary compatibility proxy"""
1361 1368 util.nouideprecwarn(
1362 1369 b"use revlog.feature_config.canonical_parent_order",
1363 1370 b"6.6",
1364 1371 stacklevel=2,
1365 1372 )
1366 1373 return self.feature_config.canonical_parent_order
1367 1374
1368 1375 @property
1369 1376 def _lazydelta(self):
1370 1377 """temporary compatibility proxy"""
1371 1378 util.nouideprecwarn(
1372 1379 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1373 1380 )
1374 1381 return self.delta_config.lazy_delta
1375 1382
1376 1383 @property
1377 1384 def _lazydeltabase(self):
1378 1385 """temporary compatibility proxy"""
1379 1386 util.nouideprecwarn(
1380 1387 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1381 1388 )
1382 1389 return self.delta_config.lazy_delta_base
1383 1390
1384 1391 def _init_opts(self):
1385 1392 """process options (from above/config) to setup associated default revlog mode
1386 1393
1387 1394 These values might be affected when actually reading on disk information.
1388 1395
1389 1396 The relevant values are returned for use in _loadindex().
1390 1397
1391 1398 * newversionflags:
1392 1399 version header to use if we need to create a new revlog
1393 1400
1394 1401 * mmapindexthreshold:
1395 1402 minimal index size for start to use mmap
1396 1403
1397 1404 * force_nodemap:
1398 1405 force the usage of a "development" version of the nodemap code
1399 1406 """
1400 1407 opts = self.opener.options
1401 1408
1402 1409 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1403 1410 new_header = CHANGELOGV2
1404 1411 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1405 1412 self.feature_config.compute_rank = compute_rank
1406 1413 elif b'revlogv2' in opts:
1407 1414 new_header = REVLOGV2
1408 1415 elif b'revlogv1' in opts:
1409 1416 new_header = REVLOGV1 | FLAG_INLINE_DATA
1410 1417 if b'generaldelta' in opts:
1411 1418 new_header |= FLAG_GENERALDELTA
1412 1419 elif b'revlogv0' in self.opener.options:
1413 1420 new_header = REVLOGV0
1414 1421 else:
1415 1422 new_header = REVLOG_DEFAULT_VERSION
1416 1423
1417 1424 mmapindexthreshold = None
1418 1425 if self.data_config.mmap_large_index:
1419 1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1420 1427 if self.feature_config.enable_ellipsis:
1421 1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1422 1429
1423 1430 # revlog v0 doesn't have flag processors
1424 1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1425 1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1426 1433
1427 1434 chunk_cache_size = self.data_config.chunk_cache_size
1428 1435 if chunk_cache_size <= 0:
1429 1436 raise error.RevlogError(
1430 1437 _(b'revlog chunk cache size %r is not greater than 0')
1431 1438 % chunk_cache_size
1432 1439 )
1433 1440 elif chunk_cache_size & (chunk_cache_size - 1):
1434 1441 raise error.RevlogError(
1435 1442 _(b'revlog chunk cache size %r is not a power of 2')
1436 1443 % chunk_cache_size
1437 1444 )
1438 1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1439 1446 return new_header, mmapindexthreshold, force_nodemap
1440 1447
1441 1448 def _get_data(self, filepath, mmap_threshold, size=None):
1442 1449 """return a file content with or without mmap
1443 1450
1444 1451 If the file is missing return the empty string"""
1445 1452 try:
1446 1453 with self.opener(filepath) as fp:
1447 1454 if mmap_threshold is not None:
1448 1455 file_size = self.opener.fstat(fp).st_size
1449 1456 if file_size >= mmap_threshold:
1450 1457 if size is not None:
1451 1458 # avoid potentiel mmap crash
1452 1459 size = min(file_size, size)
1453 1460 # TODO: should .close() to release resources without
1454 1461 # relying on Python GC
1455 1462 if size is None:
1456 1463 return util.buffer(util.mmapread(fp))
1457 1464 else:
1458 1465 return util.buffer(util.mmapread(fp, size))
1459 1466 if size is None:
1460 1467 return fp.read()
1461 1468 else:
1462 1469 return fp.read(size)
1463 1470 except FileNotFoundError:
1464 1471 return b''
1465 1472
1466 1473 def get_streams(self, max_linkrev, force_inline=False):
1467 1474 """return a list of streams that represent this revlog
1468 1475
1469 1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1470 1477
1471 1478 This streams data for all revisions that refer to a changelog revision up
1472 1479 to `max_linkrev`.
1473 1480
1474 1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1475 1482
1476 1483 It returns is a list of three-tuple:
1477 1484
1478 1485 [
1479 1486 (filename, bytes_stream, stream_size),
1480 1487 …
1481 1488 ]
1482 1489 """
1483 1490 n = len(self)
1484 1491 index = self.index
1485 1492 while n > 0:
1486 1493 linkrev = index[n - 1][4]
1487 1494 if linkrev < max_linkrev:
1488 1495 break
1489 1496 # note: this loop will rarely go through multiple iterations, since
1490 1497 # it only traverses commits created during the current streaming
1491 1498 # pull operation.
1492 1499 #
1493 1500 # If this become a problem, using a binary search should cap the
1494 1501 # runtime of this.
1495 1502 n = n - 1
1496 1503 if n == 0:
1497 1504 # no data to send
1498 1505 return []
1499 1506 index_size = n * index.entry_size
1500 1507 data_size = self.end(n - 1)
1501 1508
1502 1509 # XXX we might have been split (or stripped) since the object
1503 1510 # initialization, We need to close this race too, but having a way to
1504 1511 # pre-open the file we feed to the revlog and never closing them before
1505 1512 # we are done streaming.
1506 1513
1507 1514 if self._inline:
1508 1515
1509 1516 def get_stream():
1510 1517 with self.opener(self._indexfile, mode=b"r") as fp:
1511 1518 yield None
1512 1519 size = index_size + data_size
1513 1520 if size <= 65536:
1514 1521 yield fp.read(size)
1515 1522 else:
1516 1523 yield from util.filechunkiter(fp, limit=size)
1517 1524
1518 1525 inline_stream = get_stream()
1519 1526 next(inline_stream)
1520 1527 return [
1521 1528 (self._indexfile, inline_stream, index_size + data_size),
1522 1529 ]
1523 1530 elif force_inline:
1524 1531
1525 1532 def get_stream():
1526 1533 with self.reading():
1527 1534 yield None
1528 1535
1529 1536 for rev in range(n):
1530 1537 idx = self.index.entry_binary(rev)
1531 1538 if rev == 0 and self._docket is None:
1532 1539 # re-inject the inline flag
1533 1540 header = self._format_flags
1534 1541 header |= self._format_version
1535 1542 header |= FLAG_INLINE_DATA
1536 1543 header = self.index.pack_header(header)
1537 1544 idx = header + idx
1538 1545 yield idx
1539 1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1540 1547
1541 1548 inline_stream = get_stream()
1542 1549 next(inline_stream)
1543 1550 return [
1544 1551 (self._indexfile, inline_stream, index_size + data_size),
1545 1552 ]
1546 1553 else:
1547 1554
1548 1555 def get_index_stream():
1549 1556 with self.opener(self._indexfile, mode=b"r") as fp:
1550 1557 yield None
1551 1558 if index_size <= 65536:
1552 1559 yield fp.read(index_size)
1553 1560 else:
1554 1561 yield from util.filechunkiter(fp, limit=index_size)
1555 1562
1556 1563 def get_data_stream():
1557 1564 with self._datafp() as fp:
1558 1565 yield None
1559 1566 if data_size <= 65536:
1560 1567 yield fp.read(data_size)
1561 1568 else:
1562 1569 yield from util.filechunkiter(fp, limit=data_size)
1563 1570
1564 1571 index_stream = get_index_stream()
1565 1572 next(index_stream)
1566 1573 data_stream = get_data_stream()
1567 1574 next(data_stream)
1568 1575 return [
1569 1576 (self._datafile, data_stream, data_size),
1570 1577 (self._indexfile, index_stream, index_size),
1571 1578 ]
1572 1579
1573 1580 def _loadindex(self, docket=None):
1574 1581
1575 1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1576 1583
1577 1584 if self.postfix is not None:
1578 1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1579 1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1580 1587 entry_point = b'%s.i.a' % self.radix
1581 1588 elif self._try_split and self.opener.exists(self._split_index_file):
1582 1589 entry_point = self._split_index_file
1583 1590 else:
1584 1591 entry_point = b'%s.i' % self.radix
1585 1592
1586 1593 if docket is not None:
1587 1594 self._docket = docket
1588 1595 self._docket_file = entry_point
1589 1596 else:
1590 1597 self._initempty = True
1591 1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1592 1599 if len(entry_data) > 0:
1593 1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1594 1601 self._initempty = False
1595 1602 else:
1596 1603 header = new_header
1597 1604
1598 1605 self._format_flags = header & ~0xFFFF
1599 1606 self._format_version = header & 0xFFFF
1600 1607
1601 1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1602 1609 if supported_flags is None:
1603 1610 msg = _(b'unknown version (%d) in revlog %s')
1604 1611 msg %= (self._format_version, self.display_id)
1605 1612 raise error.RevlogError(msg)
1606 1613 elif self._format_flags & ~supported_flags:
1607 1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1608 1615 display_flag = self._format_flags >> 16
1609 1616 msg %= (display_flag, self._format_version, self.display_id)
1610 1617 raise error.RevlogError(msg)
1611 1618
1612 1619 features = FEATURES_BY_VERSION[self._format_version]
1613 1620 self._inline = features[b'inline'](self._format_flags)
1614 1621 self.delta_config.general_delta = features[b'generaldelta'](
1615 1622 self._format_flags
1616 1623 )
1617 1624 self.feature_config.has_side_data = features[b'sidedata']
1618 1625
1619 1626 if not features[b'docket']:
1620 1627 self._indexfile = entry_point
1621 1628 index_data = entry_data
1622 1629 else:
1623 1630 self._docket_file = entry_point
1624 1631 if self._initempty:
1625 1632 self._docket = docketutil.default_docket(self, header)
1626 1633 else:
1627 1634 self._docket = docketutil.parse_docket(
1628 1635 self, entry_data, use_pending=self._trypending
1629 1636 )
1630 1637
1631 1638 if self._docket is not None:
1632 1639 self._indexfile = self._docket.index_filepath()
1633 1640 index_data = b''
1634 1641 index_size = self._docket.index_end
1635 1642 if index_size > 0:
1636 1643 index_data = self._get_data(
1637 1644 self._indexfile, mmapindexthreshold, size=index_size
1638 1645 )
1639 1646 if len(index_data) < index_size:
1640 1647 msg = _(b'too few index data for %s: got %d, expected %d')
1641 1648 msg %= (self.display_id, len(index_data), index_size)
1642 1649 raise error.RevlogError(msg)
1643 1650
1644 1651 self._inline = False
1645 1652 # generaldelta implied by version 2 revlogs.
1646 1653 self.delta_config.general_delta = True
1647 1654 # the logic for persistent nodemap will be dealt with within the
1648 1655 # main docket, so disable it for now.
1649 1656 self._nodemap_file = None
1650 1657
1651 1658 if self._docket is not None:
1652 1659 self._datafile = self._docket.data_filepath()
1653 1660 self._sidedatafile = self._docket.sidedata_filepath()
1654 1661 elif self.postfix is None:
1655 1662 self._datafile = b'%s.d' % self.radix
1656 1663 else:
1657 1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1658 1665
1659 1666 self.nodeconstants = sha1nodeconstants
1660 1667 self.nullid = self.nodeconstants.nullid
1661 1668
1662 1669 # sparse-revlog can't be on without general-delta (issue6056)
1663 1670 if not self.delta_config.general_delta:
1664 1671 self.delta_config.sparse_revlog = False
1665 1672
1666 1673 self._storedeltachains = True
1667 1674
1668 1675 devel_nodemap = (
1669 1676 self._nodemap_file
1670 1677 and force_nodemap
1671 1678 and parse_index_v1_nodemap is not None
1672 1679 )
1673 1680
1674 1681 use_rust_index = False
1675 1682 if rustrevlog is not None:
1676 1683 if self._nodemap_file is not None:
1677 1684 use_rust_index = True
1678 1685 else:
1679 1686 use_rust_index = self.opener.options.get(b'rust.index')
1680 1687
1681 1688 self._parse_index = parse_index_v1
1682 1689 if self._format_version == REVLOGV0:
1683 1690 self._parse_index = revlogv0.parse_index_v0
1684 1691 elif self._format_version == REVLOGV2:
1685 1692 self._parse_index = parse_index_v2
1686 1693 elif self._format_version == CHANGELOGV2:
1687 1694 self._parse_index = parse_index_cl_v2
1688 1695 elif devel_nodemap:
1689 1696 self._parse_index = parse_index_v1_nodemap
1690 1697 elif use_rust_index:
1691 1698 self._parse_index = parse_index_v1_mixed
1692 1699 try:
1693 1700 d = self._parse_index(index_data, self._inline)
1694 1701 index, chunkcache = d
1695 1702 use_nodemap = (
1696 1703 not self._inline
1697 1704 and self._nodemap_file is not None
1698 1705 and hasattr(index, 'update_nodemap_data')
1699 1706 )
1700 1707 if use_nodemap:
1701 1708 nodemap_data = nodemaputil.persisted_data(self)
1702 1709 if nodemap_data is not None:
1703 1710 docket = nodemap_data[0]
1704 1711 if (
1705 1712 len(d[0]) > docket.tip_rev
1706 1713 and d[0][docket.tip_rev][7] == docket.tip_node
1707 1714 ):
1708 1715 # no changelog tampering
1709 1716 self._nodemap_docket = docket
1710 1717 index.update_nodemap_data(*nodemap_data)
1711 1718 except (ValueError, IndexError):
1712 1719 raise error.RevlogError(
1713 1720 _(b"index %s is corrupted") % self.display_id
1714 1721 )
1715 1722 self.index = index
1716 1723 # revnum -> (chain-length, sum-delta-length)
1717 1724 self._chaininfocache = util.lrucachedict(500)
1718 1725
1719 1726 return chunkcache
1720 1727
1721 1728 def _load_inner(self, chunk_cache):
1722 1729 if self._docket is None:
1723 1730 default_compression_header = None
1724 1731 else:
1725 1732 default_compression_header = self._docket.default_compression_header
1726 1733
1727 1734 self._inner = _InnerRevlog(
1728 1735 opener=self.opener,
1729 1736 index=self.index,
1730 1737 index_file=self._indexfile,
1731 1738 data_file=self._datafile,
1732 1739 sidedata_file=self._sidedatafile,
1733 1740 inline=self._inline,
1734 1741 data_config=self.data_config,
1735 1742 delta_config=self.delta_config,
1736 1743 feature_config=self.feature_config,
1737 1744 chunk_cache=chunk_cache,
1738 1745 default_compression_header=default_compression_header,
1739 1746 )
1740 1747
1741 1748 def get_revlog(self):
1742 1749 """simple function to mirror API of other not-really-revlog API"""
1743 1750 return self
1744 1751
1745 1752 @util.propertycache
1746 1753 def revlog_kind(self):
1747 1754 return self.target[0]
1748 1755
1749 1756 @util.propertycache
1750 1757 def display_id(self):
1751 1758 """The public facing "ID" of the revlog that we use in message"""
1752 1759 if self.revlog_kind == KIND_FILELOG:
1753 1760 # Reference the file without the "data/" prefix, so it is familiar
1754 1761 # to the user.
1755 1762 return self.target[1]
1756 1763 else:
1757 1764 return self.radix
1758 1765
1759 1766 def _datafp(self, mode=b'r'):
1760 1767 """file object for the revlog's data file"""
1761 1768 return self.opener(self._datafile, mode=mode)
1762 1769
1763 1770 def tiprev(self):
1764 1771 return len(self.index) - 1
1765 1772
1766 1773 def tip(self):
1767 1774 return self.node(self.tiprev())
1768 1775
1769 1776 def __contains__(self, rev):
1770 1777 return 0 <= rev < len(self)
1771 1778
1772 1779 def __len__(self):
1773 1780 return len(self.index)
1774 1781
1775 1782 def __iter__(self):
1776 1783 return iter(range(len(self)))
1777 1784
1778 1785 def revs(self, start=0, stop=None):
1779 1786 """iterate over all rev in this revlog (from start to stop)"""
1780 1787 return storageutil.iterrevs(len(self), start=start, stop=stop)
1781 1788
1782 1789 def hasnode(self, node):
1783 1790 try:
1784 1791 self.rev(node)
1785 1792 return True
1786 1793 except KeyError:
1787 1794 return False
1788 1795
1789 1796 def _candelta(self, baserev, rev):
1790 1797 """whether two revisions (baserev, rev) can be delta-ed or not"""
1791 1798 # Disable delta if either rev requires a content-changing flag
1792 1799 # processor (ex. LFS). This is because such flag processor can alter
1793 1800 # the rawtext content that the delta will be based on, and two clients
1794 1801 # could have a same revlog node with different flags (i.e. different
1795 1802 # rawtext contents) and the delta could be incompatible.
1796 1803 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1797 1804 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1798 1805 ):
1799 1806 return False
1800 1807 return True
1801 1808
1802 1809 def update_caches(self, transaction):
1803 1810 """update on disk cache
1804 1811
1805 1812 If a transaction is passed, the update may be delayed to transaction
1806 1813 commit."""
1807 1814 if self._nodemap_file is not None:
1808 1815 if transaction is None:
1809 1816 nodemaputil.update_persistent_nodemap(self)
1810 1817 else:
1811 1818 nodemaputil.setup_persistent_nodemap(transaction, self)
1812 1819
1813 1820 def clearcaches(self):
1814 1821 """Clear in-memory caches"""
1815 1822 self._chainbasecache.clear()
1816 1823 self._inner.clear_cache()
1817 1824 self._pcache = {}
1818 1825 self._nodemap_docket = None
1819 1826 self.index.clearcaches()
1820 1827 # The python code is the one responsible for validating the docket, we
1821 1828 # end up having to refresh it here.
1822 1829 use_nodemap = (
1823 1830 not self._inline
1824 1831 and self._nodemap_file is not None
1825 1832 and hasattr(self.index, 'update_nodemap_data')
1826 1833 )
1827 1834 if use_nodemap:
1828 1835 nodemap_data = nodemaputil.persisted_data(self)
1829 1836 if nodemap_data is not None:
1830 1837 self._nodemap_docket = nodemap_data[0]
1831 1838 self.index.update_nodemap_data(*nodemap_data)
1832 1839
1833 1840 def rev(self, node):
1834 1841 """return the revision number associated with a <nodeid>"""
1835 1842 try:
1836 1843 return self.index.rev(node)
1837 1844 except TypeError:
1838 1845 raise
1839 1846 except error.RevlogError:
1840 1847 # parsers.c radix tree lookup failed
1841 1848 if (
1842 1849 node == self.nodeconstants.wdirid
1843 1850 or node in self.nodeconstants.wdirfilenodeids
1844 1851 ):
1845 1852 raise error.WdirUnsupported
1846 1853 raise error.LookupError(node, self.display_id, _(b'no node'))
1847 1854
1848 1855 # Accessors for index entries.
1849 1856
1850 1857 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1851 1858 # are flags.
1852 1859 def start(self, rev):
1853 1860 return int(self.index[rev][0] >> 16)
1854 1861
1855 1862 def sidedata_cut_off(self, rev):
1856 1863 sd_cut_off = self.index[rev][8]
1857 1864 if sd_cut_off != 0:
1858 1865 return sd_cut_off
1859 1866 # This is some annoying dance, because entries without sidedata
1860 1867 # currently use 0 as their ofsset. (instead of previous-offset +
1861 1868 # previous-size)
1862 1869 #
1863 1870 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1864 1871 # In the meantime, we need this.
1865 1872 while 0 <= rev:
1866 1873 e = self.index[rev]
1867 1874 if e[9] != 0:
1868 1875 return e[8] + e[9]
1869 1876 rev -= 1
1870 1877 return 0
1871 1878
1872 1879 def flags(self, rev):
1873 1880 return self.index[rev][0] & 0xFFFF
1874 1881
1875 1882 def length(self, rev):
1876 1883 return self.index[rev][1]
1877 1884
1878 1885 def sidedata_length(self, rev):
1879 1886 if not self.feature_config.has_side_data:
1880 1887 return 0
1881 1888 return self.index[rev][9]
1882 1889
1883 1890 def rawsize(self, rev):
1884 1891 """return the length of the uncompressed text for a given revision"""
1885 1892 l = self.index[rev][2]
1886 1893 if l >= 0:
1887 1894 return l
1888 1895
1889 1896 t = self.rawdata(rev)
1890 1897 return len(t)
1891 1898
1892 1899 def size(self, rev):
1893 1900 """length of non-raw text (processed by a "read" flag processor)"""
1894 1901 # fast path: if no "read" flag processor could change the content,
1895 1902 # size is rawsize. note: ELLIPSIS is known to not change the content.
1896 1903 flags = self.flags(rev)
1897 1904 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1898 1905 return self.rawsize(rev)
1899 1906
1900 1907 return len(self.revision(rev))
1901 1908
1902 1909 def fast_rank(self, rev):
1903 1910 """Return the rank of a revision if already known, or None otherwise.
1904 1911
1905 1912 The rank of a revision is the size of the sub-graph it defines as a
1906 1913 head. Equivalently, the rank of a revision `r` is the size of the set
1907 1914 `ancestors(r)`, `r` included.
1908 1915
1909 1916 This method returns the rank retrieved from the revlog in constant
1910 1917 time. It makes no attempt at computing unknown values for versions of
1911 1918 the revlog which do not persist the rank.
1912 1919 """
1913 1920 rank = self.index[rev][ENTRY_RANK]
1914 1921 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1915 1922 return None
1916 1923 if rev == nullrev:
1917 1924 return 0 # convention
1918 1925 return rank
1919 1926
1920 1927 def chainbase(self, rev):
1921 1928 base = self._chainbasecache.get(rev)
1922 1929 if base is not None:
1923 1930 return base
1924 1931
1925 1932 index = self.index
1926 1933 iterrev = rev
1927 1934 base = index[iterrev][3]
1928 1935 while base != iterrev:
1929 1936 iterrev = base
1930 1937 base = index[iterrev][3]
1931 1938
1932 1939 self._chainbasecache[rev] = base
1933 1940 return base
1934 1941
1935 1942 def linkrev(self, rev):
1936 1943 return self.index[rev][4]
1937 1944
1938 1945 def parentrevs(self, rev):
1939 1946 try:
1940 1947 entry = self.index[rev]
1941 1948 except IndexError:
1942 1949 if rev == wdirrev:
1943 1950 raise error.WdirUnsupported
1944 1951 raise
1945 1952
1946 1953 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1947 1954 return entry[6], entry[5]
1948 1955 else:
1949 1956 return entry[5], entry[6]
1950 1957
1951 1958 # fast parentrevs(rev) where rev isn't filtered
1952 1959 _uncheckedparentrevs = parentrevs
1953 1960
1954 1961 def node(self, rev):
1955 1962 try:
1956 1963 return self.index[rev][7]
1957 1964 except IndexError:
1958 1965 if rev == wdirrev:
1959 1966 raise error.WdirUnsupported
1960 1967 raise
1961 1968
1962 1969 # Derived from index values.
1963 1970
1964 1971 def end(self, rev):
1965 1972 return self.start(rev) + self.length(rev)
1966 1973
1967 1974 def parents(self, node):
1968 1975 i = self.index
1969 1976 d = i[self.rev(node)]
1970 1977 # inline node() to avoid function call overhead
1971 1978 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1972 1979 return i[d[6]][7], i[d[5]][7]
1973 1980 else:
1974 1981 return i[d[5]][7], i[d[6]][7]
1975 1982
1976 1983 def chainlen(self, rev):
1977 1984 return self._chaininfo(rev)[0]
1978 1985
1979 1986 def _chaininfo(self, rev):
1980 1987 chaininfocache = self._chaininfocache
1981 1988 if rev in chaininfocache:
1982 1989 return chaininfocache[rev]
1983 1990 index = self.index
1984 1991 generaldelta = self.delta_config.general_delta
1985 1992 iterrev = rev
1986 1993 e = index[iterrev]
1987 1994 clen = 0
1988 1995 compresseddeltalen = 0
1989 1996 while iterrev != e[3]:
1990 1997 clen += 1
1991 1998 compresseddeltalen += e[1]
1992 1999 if generaldelta:
1993 2000 iterrev = e[3]
1994 2001 else:
1995 2002 iterrev -= 1
1996 2003 if iterrev in chaininfocache:
1997 2004 t = chaininfocache[iterrev]
1998 2005 clen += t[0]
1999 2006 compresseddeltalen += t[1]
2000 2007 break
2001 2008 e = index[iterrev]
2002 2009 else:
2003 2010 # Add text length of base since decompressing that also takes
2004 2011 # work. For cache hits the length is already included.
2005 2012 compresseddeltalen += e[1]
2006 2013 r = (clen, compresseddeltalen)
2007 2014 chaininfocache[rev] = r
2008 2015 return r
2009 2016
2010 2017 def _deltachain(self, rev, stoprev=None):
2011 2018 return self._inner._deltachain(rev, stoprev=stoprev)
2012 2019
2013 2020 def ancestors(self, revs, stoprev=0, inclusive=False):
2014 2021 """Generate the ancestors of 'revs' in reverse revision order.
2015 2022 Does not generate revs lower than stoprev.
2016 2023
2017 2024 See the documentation for ancestor.lazyancestors for more details."""
2018 2025
2019 2026 # first, make sure start revisions aren't filtered
2020 2027 revs = list(revs)
2021 2028 checkrev = self.node
2022 2029 for r in revs:
2023 2030 checkrev(r)
2024 2031 # and we're sure ancestors aren't filtered as well
2025 2032
2026 2033 if rustancestor is not None and self.index.rust_ext_compat:
2027 2034 lazyancestors = rustancestor.LazyAncestors
2028 2035 arg = self.index
2029 2036 else:
2030 2037 lazyancestors = ancestor.lazyancestors
2031 2038 arg = self._uncheckedparentrevs
2032 2039 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2033 2040
2034 2041 def descendants(self, revs):
2035 2042 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2036 2043
2037 2044 def findcommonmissing(self, common=None, heads=None):
2038 2045 """Return a tuple of the ancestors of common and the ancestors of heads
2039 2046 that are not ancestors of common. In revset terminology, we return the
2040 2047 tuple:
2041 2048
2042 2049 ::common, (::heads) - (::common)
2043 2050
2044 2051 The list is sorted by revision number, meaning it is
2045 2052 topologically sorted.
2046 2053
2047 2054 'heads' and 'common' are both lists of node IDs. If heads is
2048 2055 not supplied, uses all of the revlog's heads. If common is not
2049 2056 supplied, uses nullid."""
2050 2057 if common is None:
2051 2058 common = [self.nullid]
2052 2059 if heads is None:
2053 2060 heads = self.heads()
2054 2061
2055 2062 common = [self.rev(n) for n in common]
2056 2063 heads = [self.rev(n) for n in heads]
2057 2064
2058 2065 # we want the ancestors, but inclusive
2059 2066 class lazyset:
2060 2067 def __init__(self, lazyvalues):
2061 2068 self.addedvalues = set()
2062 2069 self.lazyvalues = lazyvalues
2063 2070
2064 2071 def __contains__(self, value):
2065 2072 return value in self.addedvalues or value in self.lazyvalues
2066 2073
2067 2074 def __iter__(self):
2068 2075 added = self.addedvalues
2069 2076 for r in added:
2070 2077 yield r
2071 2078 for r in self.lazyvalues:
2072 2079 if not r in added:
2073 2080 yield r
2074 2081
2075 2082 def add(self, value):
2076 2083 self.addedvalues.add(value)
2077 2084
2078 2085 def update(self, values):
2079 2086 self.addedvalues.update(values)
2080 2087
2081 2088 has = lazyset(self.ancestors(common))
2082 2089 has.add(nullrev)
2083 2090 has.update(common)
2084 2091
2085 2092 # take all ancestors from heads that aren't in has
2086 2093 missing = set()
2087 2094 visit = collections.deque(r for r in heads if r not in has)
2088 2095 while visit:
2089 2096 r = visit.popleft()
2090 2097 if r in missing:
2091 2098 continue
2092 2099 else:
2093 2100 missing.add(r)
2094 2101 for p in self.parentrevs(r):
2095 2102 if p not in has:
2096 2103 visit.append(p)
2097 2104 missing = list(missing)
2098 2105 missing.sort()
2099 2106 return has, [self.node(miss) for miss in missing]
2100 2107
2101 2108 def incrementalmissingrevs(self, common=None):
2102 2109 """Return an object that can be used to incrementally compute the
2103 2110 revision numbers of the ancestors of arbitrary sets that are not
2104 2111 ancestors of common. This is an ancestor.incrementalmissingancestors
2105 2112 object.
2106 2113
2107 2114 'common' is a list of revision numbers. If common is not supplied, uses
2108 2115 nullrev.
2109 2116 """
2110 2117 if common is None:
2111 2118 common = [nullrev]
2112 2119
2113 2120 if rustancestor is not None and self.index.rust_ext_compat:
2114 2121 return rustancestor.MissingAncestors(self.index, common)
2115 2122 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2116 2123
2117 2124 def findmissingrevs(self, common=None, heads=None):
2118 2125 """Return the revision numbers of the ancestors of heads that
2119 2126 are not ancestors of common.
2120 2127
2121 2128 More specifically, return a list of revision numbers corresponding to
2122 2129 nodes N such that every N satisfies the following constraints:
2123 2130
2124 2131 1. N is an ancestor of some node in 'heads'
2125 2132 2. N is not an ancestor of any node in 'common'
2126 2133
2127 2134 The list is sorted by revision number, meaning it is
2128 2135 topologically sorted.
2129 2136
2130 2137 'heads' and 'common' are both lists of revision numbers. If heads is
2131 2138 not supplied, uses all of the revlog's heads. If common is not
2132 2139 supplied, uses nullid."""
2133 2140 if common is None:
2134 2141 common = [nullrev]
2135 2142 if heads is None:
2136 2143 heads = self.headrevs()
2137 2144
2138 2145 inc = self.incrementalmissingrevs(common=common)
2139 2146 return inc.missingancestors(heads)
2140 2147
2141 2148 def findmissing(self, common=None, heads=None):
2142 2149 """Return the ancestors of heads that are not ancestors of common.
2143 2150
2144 2151 More specifically, return a list of nodes N such that every N
2145 2152 satisfies the following constraints:
2146 2153
2147 2154 1. N is an ancestor of some node in 'heads'
2148 2155 2. N is not an ancestor of any node in 'common'
2149 2156
2150 2157 The list is sorted by revision number, meaning it is
2151 2158 topologically sorted.
2152 2159
2153 2160 'heads' and 'common' are both lists of node IDs. If heads is
2154 2161 not supplied, uses all of the revlog's heads. If common is not
2155 2162 supplied, uses nullid."""
2156 2163 if common is None:
2157 2164 common = [self.nullid]
2158 2165 if heads is None:
2159 2166 heads = self.heads()
2160 2167
2161 2168 common = [self.rev(n) for n in common]
2162 2169 heads = [self.rev(n) for n in heads]
2163 2170
2164 2171 inc = self.incrementalmissingrevs(common=common)
2165 2172 return [self.node(r) for r in inc.missingancestors(heads)]
2166 2173
2167 2174 def nodesbetween(self, roots=None, heads=None):
2168 2175 """Return a topological path from 'roots' to 'heads'.
2169 2176
2170 2177 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2171 2178 topologically sorted list of all nodes N that satisfy both of
2172 2179 these constraints:
2173 2180
2174 2181 1. N is a descendant of some node in 'roots'
2175 2182 2. N is an ancestor of some node in 'heads'
2176 2183
2177 2184 Every node is considered to be both a descendant and an ancestor
2178 2185 of itself, so every reachable node in 'roots' and 'heads' will be
2179 2186 included in 'nodes'.
2180 2187
2181 2188 'outroots' is the list of reachable nodes in 'roots', i.e., the
2182 2189 subset of 'roots' that is returned in 'nodes'. Likewise,
2183 2190 'outheads' is the subset of 'heads' that is also in 'nodes'.
2184 2191
2185 2192 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2186 2193 unspecified, uses nullid as the only root. If 'heads' is
2187 2194 unspecified, uses list of all of the revlog's heads."""
2188 2195 nonodes = ([], [], [])
2189 2196 if roots is not None:
2190 2197 roots = list(roots)
2191 2198 if not roots:
2192 2199 return nonodes
2193 2200 lowestrev = min([self.rev(n) for n in roots])
2194 2201 else:
2195 2202 roots = [self.nullid] # Everybody's a descendant of nullid
2196 2203 lowestrev = nullrev
2197 2204 if (lowestrev == nullrev) and (heads is None):
2198 2205 # We want _all_ the nodes!
2199 2206 return (
2200 2207 [self.node(r) for r in self],
2201 2208 [self.nullid],
2202 2209 list(self.heads()),
2203 2210 )
2204 2211 if heads is None:
2205 2212 # All nodes are ancestors, so the latest ancestor is the last
2206 2213 # node.
2207 2214 highestrev = len(self) - 1
2208 2215 # Set ancestors to None to signal that every node is an ancestor.
2209 2216 ancestors = None
2210 2217 # Set heads to an empty dictionary for later discovery of heads
2211 2218 heads = {}
2212 2219 else:
2213 2220 heads = list(heads)
2214 2221 if not heads:
2215 2222 return nonodes
2216 2223 ancestors = set()
2217 2224 # Turn heads into a dictionary so we can remove 'fake' heads.
2218 2225 # Also, later we will be using it to filter out the heads we can't
2219 2226 # find from roots.
2220 2227 heads = dict.fromkeys(heads, False)
2221 2228 # Start at the top and keep marking parents until we're done.
2222 2229 nodestotag = set(heads)
2223 2230 # Remember where the top was so we can use it as a limit later.
2224 2231 highestrev = max([self.rev(n) for n in nodestotag])
2225 2232 while nodestotag:
2226 2233 # grab a node to tag
2227 2234 n = nodestotag.pop()
2228 2235 # Never tag nullid
2229 2236 if n == self.nullid:
2230 2237 continue
2231 2238 # A node's revision number represents its place in a
2232 2239 # topologically sorted list of nodes.
2233 2240 r = self.rev(n)
2234 2241 if r >= lowestrev:
2235 2242 if n not in ancestors:
2236 2243 # If we are possibly a descendant of one of the roots
2237 2244 # and we haven't already been marked as an ancestor
2238 2245 ancestors.add(n) # Mark as ancestor
2239 2246 # Add non-nullid parents to list of nodes to tag.
2240 2247 nodestotag.update(
2241 2248 [p for p in self.parents(n) if p != self.nullid]
2242 2249 )
2243 2250 elif n in heads: # We've seen it before, is it a fake head?
2244 2251 # So it is, real heads should not be the ancestors of
2245 2252 # any other heads.
2246 2253 heads.pop(n)
2247 2254 if not ancestors:
2248 2255 return nonodes
2249 2256 # Now that we have our set of ancestors, we want to remove any
2250 2257 # roots that are not ancestors.
2251 2258
2252 2259 # If one of the roots was nullid, everything is included anyway.
2253 2260 if lowestrev > nullrev:
2254 2261 # But, since we weren't, let's recompute the lowest rev to not
2255 2262 # include roots that aren't ancestors.
2256 2263
2257 2264 # Filter out roots that aren't ancestors of heads
2258 2265 roots = [root for root in roots if root in ancestors]
2259 2266 # Recompute the lowest revision
2260 2267 if roots:
2261 2268 lowestrev = min([self.rev(root) for root in roots])
2262 2269 else:
2263 2270 # No more roots? Return empty list
2264 2271 return nonodes
2265 2272 else:
2266 2273 # We are descending from nullid, and don't need to care about
2267 2274 # any other roots.
2268 2275 lowestrev = nullrev
2269 2276 roots = [self.nullid]
2270 2277 # Transform our roots list into a set.
2271 2278 descendants = set(roots)
2272 2279 # Also, keep the original roots so we can filter out roots that aren't
2273 2280 # 'real' roots (i.e. are descended from other roots).
2274 2281 roots = descendants.copy()
2275 2282 # Our topologically sorted list of output nodes.
2276 2283 orderedout = []
2277 2284 # Don't start at nullid since we don't want nullid in our output list,
2278 2285 # and if nullid shows up in descendants, empty parents will look like
2279 2286 # they're descendants.
2280 2287 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2281 2288 n = self.node(r)
2282 2289 isdescendant = False
2283 2290 if lowestrev == nullrev: # Everybody is a descendant of nullid
2284 2291 isdescendant = True
2285 2292 elif n in descendants:
2286 2293 # n is already a descendant
2287 2294 isdescendant = True
2288 2295 # This check only needs to be done here because all the roots
2289 2296 # will start being marked is descendants before the loop.
2290 2297 if n in roots:
2291 2298 # If n was a root, check if it's a 'real' root.
2292 2299 p = tuple(self.parents(n))
2293 2300 # If any of its parents are descendants, it's not a root.
2294 2301 if (p[0] in descendants) or (p[1] in descendants):
2295 2302 roots.remove(n)
2296 2303 else:
2297 2304 p = tuple(self.parents(n))
2298 2305 # A node is a descendant if either of its parents are
2299 2306 # descendants. (We seeded the dependents list with the roots
2300 2307 # up there, remember?)
2301 2308 if (p[0] in descendants) or (p[1] in descendants):
2302 2309 descendants.add(n)
2303 2310 isdescendant = True
2304 2311 if isdescendant and ((ancestors is None) or (n in ancestors)):
2305 2312 # Only include nodes that are both descendants and ancestors.
2306 2313 orderedout.append(n)
2307 2314 if (ancestors is not None) and (n in heads):
2308 2315 # We're trying to figure out which heads are reachable
2309 2316 # from roots.
2310 2317 # Mark this head as having been reached
2311 2318 heads[n] = True
2312 2319 elif ancestors is None:
2313 2320 # Otherwise, we're trying to discover the heads.
2314 2321 # Assume this is a head because if it isn't, the next step
2315 2322 # will eventually remove it.
2316 2323 heads[n] = True
2317 2324 # But, obviously its parents aren't.
2318 2325 for p in self.parents(n):
2319 2326 heads.pop(p, None)
2320 2327 heads = [head for head, flag in heads.items() if flag]
2321 2328 roots = list(roots)
2322 2329 assert orderedout
2323 2330 assert roots
2324 2331 assert heads
2325 2332 return (orderedout, roots, heads)
2326 2333
2327 2334 def headrevs(self, revs=None):
2328 2335 if revs is None:
2329 2336 try:
2330 2337 return self.index.headrevs()
2331 2338 except AttributeError:
2332 2339 return self._headrevs()
2333 2340 if rustdagop is not None and self.index.rust_ext_compat:
2334 2341 return rustdagop.headrevs(self.index, revs)
2335 2342 return dagop.headrevs(revs, self._uncheckedparentrevs)
2336 2343
2337 2344 def computephases(self, roots):
2338 2345 return self.index.computephasesmapsets(roots)
2339 2346
2340 2347 def _headrevs(self):
2341 2348 count = len(self)
2342 2349 if not count:
2343 2350 return [nullrev]
2344 2351 # we won't iter over filtered rev so nobody is a head at start
2345 2352 ishead = [0] * (count + 1)
2346 2353 index = self.index
2347 2354 for r in self:
2348 2355 ishead[r] = 1 # I may be an head
2349 2356 e = index[r]
2350 2357 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2351 2358 return [r for r, val in enumerate(ishead) if val]
2352 2359
2353 2360 def heads(self, start=None, stop=None):
2354 2361 """return the list of all nodes that have no children
2355 2362
2356 2363 if start is specified, only heads that are descendants of
2357 2364 start will be returned
2358 2365 if stop is specified, it will consider all the revs from stop
2359 2366 as if they had no children
2360 2367 """
2361 2368 if start is None and stop is None:
2362 2369 if not len(self):
2363 2370 return [self.nullid]
2364 2371 return [self.node(r) for r in self.headrevs()]
2365 2372
2366 2373 if start is None:
2367 2374 start = nullrev
2368 2375 else:
2369 2376 start = self.rev(start)
2370 2377
2371 2378 stoprevs = {self.rev(n) for n in stop or []}
2372 2379
2373 2380 revs = dagop.headrevssubset(
2374 2381 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2375 2382 )
2376 2383
2377 2384 return [self.node(rev) for rev in revs]
2378 2385
2379 2386 def children(self, node):
2380 2387 """find the children of a given node"""
2381 2388 c = []
2382 2389 p = self.rev(node)
2383 2390 for r in self.revs(start=p + 1):
2384 2391 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2385 2392 if prevs:
2386 2393 for pr in prevs:
2387 2394 if pr == p:
2388 2395 c.append(self.node(r))
2389 2396 elif p == nullrev:
2390 2397 c.append(self.node(r))
2391 2398 return c
2392 2399
2393 2400 def commonancestorsheads(self, a, b):
2394 2401 """calculate all the heads of the common ancestors of nodes a and b"""
2395 2402 a, b = self.rev(a), self.rev(b)
2396 2403 ancs = self._commonancestorsheads(a, b)
2397 2404 return pycompat.maplist(self.node, ancs)
2398 2405
2399 2406 def _commonancestorsheads(self, *revs):
2400 2407 """calculate all the heads of the common ancestors of revs"""
2401 2408 try:
2402 2409 ancs = self.index.commonancestorsheads(*revs)
2403 2410 except (AttributeError, OverflowError): # C implementation failed
2404 2411 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2405 2412 return ancs
2406 2413
2407 2414 def isancestor(self, a, b):
2408 2415 """return True if node a is an ancestor of node b
2409 2416
2410 2417 A revision is considered an ancestor of itself."""
2411 2418 a, b = self.rev(a), self.rev(b)
2412 2419 return self.isancestorrev(a, b)
2413 2420
2414 2421 def isancestorrev(self, a, b):
2415 2422 """return True if revision a is an ancestor of revision b
2416 2423
2417 2424 A revision is considered an ancestor of itself.
2418 2425
2419 2426 The implementation of this is trivial but the use of
2420 2427 reachableroots is not."""
2421 2428 if a == nullrev:
2422 2429 return True
2423 2430 elif a == b:
2424 2431 return True
2425 2432 elif a > b:
2426 2433 return False
2427 2434 return bool(self.reachableroots(a, [b], [a], includepath=False))
2428 2435
2429 2436 def reachableroots(self, minroot, heads, roots, includepath=False):
2430 2437 """return (heads(::(<roots> and <roots>::<heads>)))
2431 2438
2432 2439 If includepath is True, return (<roots>::<heads>)."""
2433 2440 try:
2434 2441 return self.index.reachableroots2(
2435 2442 minroot, heads, roots, includepath
2436 2443 )
2437 2444 except AttributeError:
2438 2445 return dagop._reachablerootspure(
2439 2446 self.parentrevs, minroot, roots, heads, includepath
2440 2447 )
2441 2448
2442 2449 def ancestor(self, a, b):
2443 2450 """calculate the "best" common ancestor of nodes a and b"""
2444 2451
2445 2452 a, b = self.rev(a), self.rev(b)
2446 2453 try:
2447 2454 ancs = self.index.ancestors(a, b)
2448 2455 except (AttributeError, OverflowError):
2449 2456 ancs = ancestor.ancestors(self.parentrevs, a, b)
2450 2457 if ancs:
2451 2458 # choose a consistent winner when there's a tie
2452 2459 return min(map(self.node, ancs))
2453 2460 return self.nullid
2454 2461
2455 2462 def _match(self, id):
2456 2463 if isinstance(id, int):
2457 2464 # rev
2458 2465 return self.node(id)
2459 2466 if len(id) == self.nodeconstants.nodelen:
2460 2467 # possibly a binary node
2461 2468 # odds of a binary node being all hex in ASCII are 1 in 10**25
2462 2469 try:
2463 2470 node = id
2464 2471 self.rev(node) # quick search the index
2465 2472 return node
2466 2473 except error.LookupError:
2467 2474 pass # may be partial hex id
2468 2475 try:
2469 2476 # str(rev)
2470 2477 rev = int(id)
2471 2478 if b"%d" % rev != id:
2472 2479 raise ValueError
2473 2480 if rev < 0:
2474 2481 rev = len(self) + rev
2475 2482 if rev < 0 or rev >= len(self):
2476 2483 raise ValueError
2477 2484 return self.node(rev)
2478 2485 except (ValueError, OverflowError):
2479 2486 pass
2480 2487 if len(id) == 2 * self.nodeconstants.nodelen:
2481 2488 try:
2482 2489 # a full hex nodeid?
2483 2490 node = bin(id)
2484 2491 self.rev(node)
2485 2492 return node
2486 2493 except (binascii.Error, error.LookupError):
2487 2494 pass
2488 2495
2489 2496 def _partialmatch(self, id):
2490 2497 # we don't care wdirfilenodeids as they should be always full hash
2491 2498 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2492 2499 ambiguous = False
2493 2500 try:
2494 2501 partial = self.index.partialmatch(id)
2495 2502 if partial and self.hasnode(partial):
2496 2503 if maybewdir:
2497 2504 # single 'ff...' match in radix tree, ambiguous with wdir
2498 2505 ambiguous = True
2499 2506 else:
2500 2507 return partial
2501 2508 elif maybewdir:
2502 2509 # no 'ff...' match in radix tree, wdir identified
2503 2510 raise error.WdirUnsupported
2504 2511 else:
2505 2512 return None
2506 2513 except error.RevlogError:
2507 2514 # parsers.c radix tree lookup gave multiple matches
2508 2515 # fast path: for unfiltered changelog, radix tree is accurate
2509 2516 if not getattr(self, 'filteredrevs', None):
2510 2517 ambiguous = True
2511 2518 # fall through to slow path that filters hidden revisions
2512 2519 except (AttributeError, ValueError):
2513 2520 # we are pure python, or key is not hex
2514 2521 pass
2515 2522 if ambiguous:
2516 2523 raise error.AmbiguousPrefixLookupError(
2517 2524 id, self.display_id, _(b'ambiguous identifier')
2518 2525 )
2519 2526
2520 2527 if id in self._pcache:
2521 2528 return self._pcache[id]
2522 2529
2523 2530 if len(id) <= 40:
2524 2531 # hex(node)[:...]
2525 2532 l = len(id) // 2 * 2 # grab an even number of digits
2526 2533 try:
2527 2534 # we're dropping the last digit, so let's check that it's hex,
2528 2535 # to avoid the expensive computation below if it's not
2529 2536 if len(id) % 2 > 0:
2530 2537 if not (id[-1] in hexdigits):
2531 2538 return None
2532 2539 prefix = bin(id[:l])
2533 2540 except binascii.Error:
2534 2541 pass
2535 2542 else:
2536 2543 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2537 2544 nl = [
2538 2545 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2539 2546 ]
2540 2547 if self.nodeconstants.nullhex.startswith(id):
2541 2548 nl.append(self.nullid)
2542 2549 if len(nl) > 0:
2543 2550 if len(nl) == 1 and not maybewdir:
2544 2551 self._pcache[id] = nl[0]
2545 2552 return nl[0]
2546 2553 raise error.AmbiguousPrefixLookupError(
2547 2554 id, self.display_id, _(b'ambiguous identifier')
2548 2555 )
2549 2556 if maybewdir:
2550 2557 raise error.WdirUnsupported
2551 2558 return None
2552 2559
2553 2560 def lookup(self, id):
2554 2561 """locate a node based on:
2555 2562 - revision number or str(revision number)
2556 2563 - nodeid or subset of hex nodeid
2557 2564 """
2558 2565 n = self._match(id)
2559 2566 if n is not None:
2560 2567 return n
2561 2568 n = self._partialmatch(id)
2562 2569 if n:
2563 2570 return n
2564 2571
2565 2572 raise error.LookupError(id, self.display_id, _(b'no match found'))
2566 2573
2567 2574 def shortest(self, node, minlength=1):
2568 2575 """Find the shortest unambiguous prefix that matches node."""
2569 2576
2570 2577 def isvalid(prefix):
2571 2578 try:
2572 2579 matchednode = self._partialmatch(prefix)
2573 2580 except error.AmbiguousPrefixLookupError:
2574 2581 return False
2575 2582 except error.WdirUnsupported:
2576 2583 # single 'ff...' match
2577 2584 return True
2578 2585 if matchednode is None:
2579 2586 raise error.LookupError(node, self.display_id, _(b'no node'))
2580 2587 return True
2581 2588
2582 2589 def maybewdir(prefix):
2583 2590 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2584 2591
2585 2592 hexnode = hex(node)
2586 2593
2587 2594 def disambiguate(hexnode, minlength):
2588 2595 """Disambiguate against wdirid."""
2589 2596 for length in range(minlength, len(hexnode) + 1):
2590 2597 prefix = hexnode[:length]
2591 2598 if not maybewdir(prefix):
2592 2599 return prefix
2593 2600
2594 2601 if not getattr(self, 'filteredrevs', None):
2595 2602 try:
2596 2603 length = max(self.index.shortest(node), minlength)
2597 2604 return disambiguate(hexnode, length)
2598 2605 except error.RevlogError:
2599 2606 if node != self.nodeconstants.wdirid:
2600 2607 raise error.LookupError(
2601 2608 node, self.display_id, _(b'no node')
2602 2609 )
2603 2610 except AttributeError:
2604 2611 # Fall through to pure code
2605 2612 pass
2606 2613
2607 2614 if node == self.nodeconstants.wdirid:
2608 2615 for length in range(minlength, len(hexnode) + 1):
2609 2616 prefix = hexnode[:length]
2610 2617 if isvalid(prefix):
2611 2618 return prefix
2612 2619
2613 2620 for length in range(minlength, len(hexnode) + 1):
2614 2621 prefix = hexnode[:length]
2615 2622 if isvalid(prefix):
2616 2623 return disambiguate(hexnode, length)
2617 2624
2618 2625 def cmp(self, node, text):
2619 2626 """compare text with a given file revision
2620 2627
2621 2628 returns True if text is different than what is stored.
2622 2629 """
2623 2630 p1, p2 = self.parents(node)
2624 2631 return storageutil.hashrevisionsha1(text, p1, p2) != node
2625 2632
2626 2633 def deltaparent(self, rev):
2627 2634 """return deltaparent of the given revision"""
2628 2635 base = self.index[rev][3]
2629 2636 if base == rev:
2630 2637 return nullrev
2631 2638 elif self.delta_config.general_delta:
2632 2639 return base
2633 2640 else:
2634 2641 return rev - 1
2635 2642
2636 2643 def issnapshot(self, rev):
2637 2644 """tells whether rev is a snapshot"""
2638 2645 ret = self._inner.issnapshot(rev)
2639 2646 self.issnapshot = self._inner.issnapshot
2640 2647 return ret
2641 2648
2642 2649 def snapshotdepth(self, rev):
2643 2650 """number of snapshot in the chain before this one"""
2644 2651 if not self.issnapshot(rev):
2645 2652 raise error.ProgrammingError(b'revision %d not a snapshot')
2646 2653 return len(self._inner._deltachain(rev)[0]) - 1
2647 2654
2648 2655 def revdiff(self, rev1, rev2):
2649 2656 """return or calculate a delta between two revisions
2650 2657
2651 2658 The delta calculated is in binary form and is intended to be written to
2652 2659 revlog data directly. So this function needs raw revision data.
2653 2660 """
2654 2661 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2655 2662 return bytes(self._inner._chunk(rev2))
2656 2663
2657 2664 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2658 2665
2659 2666 def revision(self, nodeorrev):
2660 2667 """return an uncompressed revision of a given node or revision
2661 2668 number.
2662 2669 """
2663 2670 return self._revisiondata(nodeorrev)
2664 2671
2665 2672 def sidedata(self, nodeorrev):
2666 2673 """a map of extra data related to the changeset but not part of the hash
2667 2674
2668 2675 This function currently return a dictionary. However, more advanced
2669 2676 mapping object will likely be used in the future for a more
2670 2677 efficient/lazy code.
2671 2678 """
2672 2679 # deal with <nodeorrev> argument type
2673 2680 if isinstance(nodeorrev, int):
2674 2681 rev = nodeorrev
2675 2682 else:
2676 2683 rev = self.rev(nodeorrev)
2677 2684 return self._sidedata(rev)
2678 2685
2679 2686 def _rawtext(self, node, rev):
2680 2687 """return the possibly unvalidated rawtext for a revision
2681 2688
2682 2689 returns (rev, rawtext, validated)
2683 2690 """
2684 2691 # Check if we have the entry in cache
2685 2692 # The cache entry looks like (node, rev, rawtext)
2686 2693 if self._inner._revisioncache:
2687 2694 if self._inner._revisioncache[0] == node:
2688 2695 return (rev, self._inner._revisioncache[2], True)
2689 2696
2690 2697 if rev is None:
2691 2698 rev = self.rev(node)
2692 2699
2693 2700 return self._inner.raw_text(node, rev)
2694 2701
2695 2702 def _revisiondata(self, nodeorrev, raw=False):
2696 2703 # deal with <nodeorrev> argument type
2697 2704 if isinstance(nodeorrev, int):
2698 2705 rev = nodeorrev
2699 2706 node = self.node(rev)
2700 2707 else:
2701 2708 node = nodeorrev
2702 2709 rev = None
2703 2710
2704 2711 # fast path the special `nullid` rev
2705 2712 if node == self.nullid:
2706 2713 return b""
2707 2714
2708 2715 # ``rawtext`` is the text as stored inside the revlog. Might be the
2709 2716 # revision or might need to be processed to retrieve the revision.
2710 2717 rev, rawtext, validated = self._rawtext(node, rev)
2711 2718
2712 2719 if raw and validated:
2713 2720 # if we don't want to process the raw text and that raw
2714 2721 # text is cached, we can exit early.
2715 2722 return rawtext
2716 2723 if rev is None:
2717 2724 rev = self.rev(node)
2718 2725 # the revlog's flag for this revision
2719 2726 # (usually alter its state or content)
2720 2727 flags = self.flags(rev)
2721 2728
2722 2729 if validated and flags == REVIDX_DEFAULT_FLAGS:
2723 2730 # no extra flags set, no flag processor runs, text = rawtext
2724 2731 return rawtext
2725 2732
2726 2733 if raw:
2727 2734 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2728 2735 text = rawtext
2729 2736 else:
2730 2737 r = flagutil.processflagsread(self, rawtext, flags)
2731 2738 text, validatehash = r
2732 2739 if validatehash:
2733 2740 self.checkhash(text, node, rev=rev)
2734 2741 if not validated:
2735 2742 self._inner._revisioncache = (node, rev, rawtext)
2736 2743
2737 2744 return text
2738 2745
2739 2746 def _sidedata(self, rev):
2740 2747 """Return the sidedata for a given revision number."""
2741 2748 sidedata_end = None
2742 2749 if self._docket is not None:
2743 2750 sidedata_end = self._docket.sidedata_end
2744 2751 return self._inner.sidedata(rev, sidedata_end)
2745 2752
2746 2753 def rawdata(self, nodeorrev):
2747 2754 """return an uncompressed raw data of a given node or revision number."""
2748 2755 return self._revisiondata(nodeorrev, raw=True)
2749 2756
2750 2757 def hash(self, text, p1, p2):
2751 2758 """Compute a node hash.
2752 2759
2753 2760 Available as a function so that subclasses can replace the hash
2754 2761 as needed.
2755 2762 """
2756 2763 return storageutil.hashrevisionsha1(text, p1, p2)
2757 2764
2758 2765 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2759 2766 """Check node hash integrity.
2760 2767
2761 2768 Available as a function so that subclasses can extend hash mismatch
2762 2769 behaviors as needed.
2763 2770 """
2764 2771 try:
2765 2772 if p1 is None and p2 is None:
2766 2773 p1, p2 = self.parents(node)
2767 2774 if node != self.hash(text, p1, p2):
2768 2775 # Clear the revision cache on hash failure. The revision cache
2769 2776 # only stores the raw revision and clearing the cache does have
2770 2777 # the side-effect that we won't have a cache hit when the raw
2771 2778 # revision data is accessed. But this case should be rare and
2772 2779 # it is extra work to teach the cache about the hash
2773 2780 # verification state.
2774 2781 if (
2775 2782 self._inner._revisioncache
2776 2783 and self._inner._revisioncache[0] == node
2777 2784 ):
2778 2785 self._inner._revisioncache = None
2779 2786
2780 2787 revornode = rev
2781 2788 if revornode is None:
2782 2789 revornode = templatefilters.short(hex(node))
2783 2790 raise error.RevlogError(
2784 2791 _(b"integrity check failed on %s:%s")
2785 2792 % (self.display_id, pycompat.bytestr(revornode))
2786 2793 )
2787 2794 except error.RevlogError:
2788 2795 if self.feature_config.censorable and storageutil.iscensoredtext(
2789 2796 text
2790 2797 ):
2791 2798 raise error.CensoredNodeError(self.display_id, node, text)
2792 2799 raise
2793 2800
2794 2801 @property
2795 2802 def _split_index_file(self):
2796 2803 """the path where to expect the index of an ongoing splitting operation
2797 2804
2798 2805 The file will only exist if a splitting operation is in progress, but
2799 2806 it is always expected at the same location."""
2800 2807 parts = self.radix.split(b'/')
2801 2808 if len(parts) > 1:
2802 2809 # adds a '-s' prefix to the ``data/` or `meta/` base
2803 2810 head = parts[0] + b'-s'
2804 2811 mids = parts[1:-1]
2805 2812 tail = parts[-1] + b'.i'
2806 2813 pieces = [head] + mids + [tail]
2807 2814 return b'/'.join(pieces)
2808 2815 else:
2809 2816 # the revlog is stored at the root of the store (changelog or
2810 2817 # manifest), no risk of collision.
2811 2818 return self.radix + b'.i.s'
2812 2819
2813 2820 def _enforceinlinesize(self, tr, side_write=True):
2814 2821 """Check if the revlog is too big for inline and convert if so.
2815 2822
2816 2823 This should be called after revisions are added to the revlog. If the
2817 2824 revlog has grown too large to be an inline revlog, it will convert it
2818 2825 to use multiple index and data files.
2819 2826 """
2820 2827 tiprev = len(self) - 1
2821 2828 total_size = self.start(tiprev) + self.length(tiprev)
2822 2829 if not self._inline or total_size < _maxinline:
2823 2830 return
2824 2831
2825 2832 if self._docket is not None:
2826 2833 msg = b"inline revlog should not have a docket"
2827 2834 raise error.ProgrammingError(msg)
2828 2835
2829 2836 troffset = tr.findoffset(self._indexfile)
2830 2837 if troffset is None:
2831 2838 raise error.RevlogError(
2832 2839 _(b"%s not found in the transaction") % self._indexfile
2833 2840 )
2834 2841 if troffset:
2835 2842 tr.addbackup(self._indexfile, for_offset=True)
2836 2843 tr.add(self._datafile, 0)
2837 2844
2838 2845 new_index_file_path = None
2839 2846 if side_write:
2840 2847 old_index_file_path = self._indexfile
2841 2848 new_index_file_path = self._split_index_file
2842 2849 opener = self.opener
2843 2850 weak_self = weakref.ref(self)
2844 2851
2845 2852 # the "split" index replace the real index when the transaction is
2846 2853 # finalized
2847 2854 def finalize_callback(tr):
2848 2855 opener.rename(
2849 2856 new_index_file_path,
2850 2857 old_index_file_path,
2851 2858 checkambig=True,
2852 2859 )
2853 2860 maybe_self = weak_self()
2854 2861 if maybe_self is not None:
2855 2862 maybe_self._indexfile = old_index_file_path
2856 2863 maybe_self._inner.index_file = maybe_self._indexfile
2857 2864
2858 2865 def abort_callback(tr):
2859 2866 maybe_self = weak_self()
2860 2867 if maybe_self is not None:
2861 2868 maybe_self._indexfile = old_index_file_path
2862 2869 maybe_self._inner.inline = True
2863 2870 maybe_self._inner.index_file = old_index_file_path
2864 2871
2865 2872 tr.registertmp(new_index_file_path)
2866 2873 if self.target[1] is not None:
2867 2874 callback_id = b'000-revlog-split-%d-%s' % self.target
2868 2875 else:
2869 2876 callback_id = b'000-revlog-split-%d' % self.target[0]
2870 2877 tr.addfinalize(callback_id, finalize_callback)
2871 2878 tr.addabort(callback_id, abort_callback)
2872 2879
2873 2880 self._format_flags &= ~FLAG_INLINE_DATA
2874 2881 self._inner.split_inline(
2875 2882 tr,
2876 2883 self._format_flags | self._format_version,
2877 2884 new_index_file_path=new_index_file_path,
2878 2885 )
2879 2886
2880 2887 self._inline = False
2881 2888 if new_index_file_path is not None:
2882 2889 self._indexfile = new_index_file_path
2883 2890
2884 2891 nodemaputil.setup_persistent_nodemap(tr, self)
2885 2892
2886 2893 def _nodeduplicatecallback(self, transaction, node):
2887 2894 """called when trying to add a node already stored."""
2888 2895
2889 2896 @contextlib.contextmanager
2890 2897 def reading(self):
2891 2898 with self._inner.reading():
2892 2899 yield
2893 2900
2894 2901 @contextlib.contextmanager
2895 2902 def _writing(self, transaction):
2896 2903 if self._trypending:
2897 2904 msg = b'try to write in a `trypending` revlog: %s'
2898 2905 msg %= self.display_id
2899 2906 raise error.ProgrammingError(msg)
2900 2907 if self._inner.is_writing:
2901 2908 yield
2902 2909 else:
2903 2910 data_end = None
2904 2911 sidedata_end = None
2905 2912 if self._docket is not None:
2906 2913 data_end = self._docket.data_end
2907 2914 sidedata_end = self._docket.sidedata_end
2908 2915 with self._inner.writing(
2909 2916 transaction,
2910 2917 data_end=data_end,
2911 2918 sidedata_end=sidedata_end,
2912 2919 ):
2913 2920 yield
2914 2921 if self._docket is not None:
2915 2922 self._write_docket(transaction)
2916 2923
2917 2924 def _write_docket(self, transaction):
2918 2925 """write the current docket on disk
2919 2926
2920 2927 Exist as a method to help changelog to implement transaction logic
2921 2928
2922 2929 We could also imagine using the same transaction logic for all revlog
2923 2930 since docket are cheap."""
2924 2931 self._docket.write(transaction)
2925 2932
2926 2933 def addrevision(
2927 2934 self,
2928 2935 text,
2929 2936 transaction,
2930 2937 link,
2931 2938 p1,
2932 2939 p2,
2933 2940 cachedelta=None,
2934 2941 node=None,
2935 2942 flags=REVIDX_DEFAULT_FLAGS,
2936 2943 deltacomputer=None,
2937 2944 sidedata=None,
2938 2945 ):
2939 2946 """add a revision to the log
2940 2947
2941 2948 text - the revision data to add
2942 2949 transaction - the transaction object used for rollback
2943 2950 link - the linkrev data to add
2944 2951 p1, p2 - the parent nodeids of the revision
2945 2952 cachedelta - an optional precomputed delta
2946 2953 node - nodeid of revision; typically node is not specified, and it is
2947 2954 computed by default as hash(text, p1, p2), however subclasses might
2948 2955 use different hashing method (and override checkhash() in such case)
2949 2956 flags - the known flags to set on the revision
2950 2957 deltacomputer - an optional deltacomputer instance shared between
2951 2958 multiple calls
2952 2959 """
2953 2960 if link == nullrev:
2954 2961 raise error.RevlogError(
2955 2962 _(b"attempted to add linkrev -1 to %s") % self.display_id
2956 2963 )
2957 2964
2958 2965 if sidedata is None:
2959 2966 sidedata = {}
2960 2967 elif sidedata and not self.feature_config.has_side_data:
2961 2968 raise error.ProgrammingError(
2962 2969 _(b"trying to add sidedata to a revlog who don't support them")
2963 2970 )
2964 2971
2965 2972 if flags:
2966 2973 node = node or self.hash(text, p1, p2)
2967 2974
2968 2975 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2969 2976
2970 2977 # If the flag processor modifies the revision data, ignore any provided
2971 2978 # cachedelta.
2972 2979 if rawtext != text:
2973 2980 cachedelta = None
2974 2981
2975 2982 if len(rawtext) > _maxentrysize:
2976 2983 raise error.RevlogError(
2977 2984 _(
2978 2985 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2979 2986 )
2980 2987 % (self.display_id, len(rawtext))
2981 2988 )
2982 2989
2983 2990 node = node or self.hash(rawtext, p1, p2)
2984 2991 rev = self.index.get_rev(node)
2985 2992 if rev is not None:
2986 2993 return rev
2987 2994
2988 2995 if validatehash:
2989 2996 self.checkhash(rawtext, node, p1=p1, p2=p2)
2990 2997
2991 2998 return self.addrawrevision(
2992 2999 rawtext,
2993 3000 transaction,
2994 3001 link,
2995 3002 p1,
2996 3003 p2,
2997 3004 node,
2998 3005 flags,
2999 3006 cachedelta=cachedelta,
3000 3007 deltacomputer=deltacomputer,
3001 3008 sidedata=sidedata,
3002 3009 )
3003 3010
3004 3011 def addrawrevision(
3005 3012 self,
3006 3013 rawtext,
3007 3014 transaction,
3008 3015 link,
3009 3016 p1,
3010 3017 p2,
3011 3018 node,
3012 3019 flags,
3013 3020 cachedelta=None,
3014 3021 deltacomputer=None,
3015 3022 sidedata=None,
3016 3023 ):
3017 3024 """add a raw revision with known flags, node and parents
3018 3025 useful when reusing a revision not stored in this revlog (ex: received
3019 3026 over wire, or read from an external bundle).
3020 3027 """
3021 3028 with self._writing(transaction):
3022 3029 return self._addrevision(
3023 3030 node,
3024 3031 rawtext,
3025 3032 transaction,
3026 3033 link,
3027 3034 p1,
3028 3035 p2,
3029 3036 flags,
3030 3037 cachedelta,
3031 3038 deltacomputer=deltacomputer,
3032 3039 sidedata=sidedata,
3033 3040 )
3034 3041
3035 3042 def compress(self, data):
3036 3043 return self._inner.compress(data)
3037 3044
3038 3045 def decompress(self, data):
3039 3046 return self._inner.decompress(data)
3040 3047
3041 3048 def _addrevision(
3042 3049 self,
3043 3050 node,
3044 3051 rawtext,
3045 3052 transaction,
3046 3053 link,
3047 3054 p1,
3048 3055 p2,
3049 3056 flags,
3050 3057 cachedelta,
3051 3058 alwayscache=False,
3052 3059 deltacomputer=None,
3053 3060 sidedata=None,
3054 3061 ):
3055 3062 """internal function to add revisions to the log
3056 3063
3057 3064 see addrevision for argument descriptions.
3058 3065
3059 3066 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3060 3067
3061 3068 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3062 3069 be used.
3063 3070
3064 3071 invariants:
3065 3072 - rawtext is optional (can be None); if not set, cachedelta must be set.
3066 3073 if both are set, they must correspond to each other.
3067 3074 """
3068 3075 if node == self.nullid:
3069 3076 raise error.RevlogError(
3070 3077 _(b"%s: attempt to add null revision") % self.display_id
3071 3078 )
3072 3079 if (
3073 3080 node == self.nodeconstants.wdirid
3074 3081 or node in self.nodeconstants.wdirfilenodeids
3075 3082 ):
3076 3083 raise error.RevlogError(
3077 3084 _(b"%s: attempt to add wdir revision") % self.display_id
3078 3085 )
3079 3086 if self._inner._writinghandles is None:
3080 3087 msg = b'adding revision outside `revlog._writing` context'
3081 3088 raise error.ProgrammingError(msg)
3082 3089
3083 3090 btext = [rawtext]
3084 3091
3085 3092 curr = len(self)
3086 3093 prev = curr - 1
3087 3094
3088 3095 offset = self._get_data_offset(prev)
3089 3096
3090 3097 if self._concurrencychecker:
3091 3098 ifh, dfh, sdfh = self._inner._writinghandles
3092 3099 # XXX no checking for the sidedata file
3093 3100 if self._inline:
3094 3101 # offset is "as if" it were in the .d file, so we need to add on
3095 3102 # the size of the entry metadata.
3096 3103 self._concurrencychecker(
3097 3104 ifh, self._indexfile, offset + curr * self.index.entry_size
3098 3105 )
3099 3106 else:
3100 3107 # Entries in the .i are a consistent size.
3101 3108 self._concurrencychecker(
3102 3109 ifh, self._indexfile, curr * self.index.entry_size
3103 3110 )
3104 3111 self._concurrencychecker(dfh, self._datafile, offset)
3105 3112
3106 3113 p1r, p2r = self.rev(p1), self.rev(p2)
3107 3114
3108 3115 # full versions are inserted when the needed deltas
3109 3116 # become comparable to the uncompressed text
3110 3117 if rawtext is None:
3111 3118 # need rawtext size, before changed by flag processors, which is
3112 3119 # the non-raw size. use revlog explicitly to avoid filelog's extra
3113 3120 # logic that might remove metadata size.
3114 3121 textlen = mdiff.patchedsize(
3115 3122 revlog.size(self, cachedelta[0]), cachedelta[1]
3116 3123 )
3117 3124 else:
3118 3125 textlen = len(rawtext)
3119 3126
3120 3127 if deltacomputer is None:
3121 3128 write_debug = None
3122 3129 if self.delta_config.debug_delta:
3123 3130 write_debug = transaction._report
3124 3131 deltacomputer = deltautil.deltacomputer(
3125 3132 self, write_debug=write_debug
3126 3133 )
3127 3134
3128 3135 if cachedelta is not None and len(cachedelta) == 2:
3129 3136 # If the cached delta has no information about how it should be
3130 3137 # reused, add the default reuse instruction according to the
3131 3138 # revlog's configuration.
3132 3139 if (
3133 3140 self.delta_config.general_delta
3134 3141 and self.delta_config.lazy_delta_base
3135 3142 ):
3136 3143 delta_base_reuse = DELTA_BASE_REUSE_TRY
3137 3144 else:
3138 3145 delta_base_reuse = DELTA_BASE_REUSE_NO
3139 3146 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3140 3147
3141 3148 revinfo = revlogutils.revisioninfo(
3142 3149 node,
3143 3150 p1,
3144 3151 p2,
3145 3152 btext,
3146 3153 textlen,
3147 3154 cachedelta,
3148 3155 flags,
3149 3156 )
3150 3157
3151 3158 deltainfo = deltacomputer.finddeltainfo(revinfo)
3152 3159
3153 3160 compression_mode = COMP_MODE_INLINE
3154 3161 if self._docket is not None:
3155 3162 default_comp = self._docket.default_compression_header
3156 3163 r = deltautil.delta_compression(default_comp, deltainfo)
3157 3164 compression_mode, deltainfo = r
3158 3165
3159 3166 sidedata_compression_mode = COMP_MODE_INLINE
3160 3167 if sidedata and self.feature_config.has_side_data:
3161 3168 sidedata_compression_mode = COMP_MODE_PLAIN
3162 3169 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3163 3170 sidedata_offset = self._docket.sidedata_end
3164 3171 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3165 3172 if (
3166 3173 h != b'u'
3167 3174 and comp_sidedata[0:1] != b'\0'
3168 3175 and len(comp_sidedata) < len(serialized_sidedata)
3169 3176 ):
3170 3177 assert not h
3171 3178 if (
3172 3179 comp_sidedata[0:1]
3173 3180 == self._docket.default_compression_header
3174 3181 ):
3175 3182 sidedata_compression_mode = COMP_MODE_DEFAULT
3176 3183 serialized_sidedata = comp_sidedata
3177 3184 else:
3178 3185 sidedata_compression_mode = COMP_MODE_INLINE
3179 3186 serialized_sidedata = comp_sidedata
3180 3187 else:
3181 3188 serialized_sidedata = b""
3182 3189 # Don't store the offset if the sidedata is empty, that way
3183 3190 # we can easily detect empty sidedata and they will be no different
3184 3191 # than ones we manually add.
3185 3192 sidedata_offset = 0
3186 3193
3187 3194 rank = RANK_UNKNOWN
3188 3195 if self.feature_config.compute_rank:
3189 3196 if (p1r, p2r) == (nullrev, nullrev):
3190 3197 rank = 1
3191 3198 elif p1r != nullrev and p2r == nullrev:
3192 3199 rank = 1 + self.fast_rank(p1r)
3193 3200 elif p1r == nullrev and p2r != nullrev:
3194 3201 rank = 1 + self.fast_rank(p2r)
3195 3202 else: # merge node
3196 3203 if rustdagop is not None and self.index.rust_ext_compat:
3197 3204 rank = rustdagop.rank(self.index, p1r, p2r)
3198 3205 else:
3199 3206 pmin, pmax = sorted((p1r, p2r))
3200 3207 rank = 1 + self.fast_rank(pmax)
3201 3208 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3202 3209
3203 3210 e = revlogutils.entry(
3204 3211 flags=flags,
3205 3212 data_offset=offset,
3206 3213 data_compressed_length=deltainfo.deltalen,
3207 3214 data_uncompressed_length=textlen,
3208 3215 data_compression_mode=compression_mode,
3209 3216 data_delta_base=deltainfo.base,
3210 3217 link_rev=link,
3211 3218 parent_rev_1=p1r,
3212 3219 parent_rev_2=p2r,
3213 3220 node_id=node,
3214 3221 sidedata_offset=sidedata_offset,
3215 3222 sidedata_compressed_length=len(serialized_sidedata),
3216 3223 sidedata_compression_mode=sidedata_compression_mode,
3217 3224 rank=rank,
3218 3225 )
3219 3226
3220 3227 self.index.append(e)
3221 3228 entry = self.index.entry_binary(curr)
3222 3229 if curr == 0 and self._docket is None:
3223 3230 header = self._format_flags | self._format_version
3224 3231 header = self.index.pack_header(header)
3225 3232 entry = header + entry
3226 3233 self._writeentry(
3227 3234 transaction,
3228 3235 entry,
3229 3236 deltainfo.data,
3230 3237 link,
3231 3238 offset,
3232 3239 serialized_sidedata,
3233 3240 sidedata_offset,
3234 3241 )
3235 3242
3236 3243 rawtext = btext[0]
3237 3244
3238 3245 if alwayscache and rawtext is None:
3239 3246 rawtext = deltacomputer.buildtext(revinfo)
3240 3247
3241 3248 if type(rawtext) == bytes: # only accept immutable objects
3242 3249 self._inner._revisioncache = (node, curr, rawtext)
3243 3250 self._chainbasecache[curr] = deltainfo.chainbase
3244 3251 return curr
3245 3252
3246 3253 def _get_data_offset(self, prev):
3247 3254 """Returns the current offset in the (in-transaction) data file.
3248 3255 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3249 3256 file to store that information: since sidedata can be rewritten to the
3250 3257 end of the data file within a transaction, you can have cases where, for
3251 3258 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3252 3259 to `n - 1`'s sidedata being written after `n`'s data.
3253 3260
3254 3261 TODO cache this in a docket file before getting out of experimental."""
3255 3262 if self._docket is None:
3256 3263 return self.end(prev)
3257 3264 else:
3258 3265 return self._docket.data_end
3259 3266
3260 3267 def _writeentry(
3261 3268 self,
3262 3269 transaction,
3263 3270 entry,
3264 3271 data,
3265 3272 link,
3266 3273 offset,
3267 3274 sidedata,
3268 3275 sidedata_offset,
3269 3276 ):
3270 3277 # Files opened in a+ mode have inconsistent behavior on various
3271 3278 # platforms. Windows requires that a file positioning call be made
3272 3279 # when the file handle transitions between reads and writes. See
3273 3280 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3274 3281 # platforms, Python or the platform itself can be buggy. Some versions
3275 3282 # of Solaris have been observed to not append at the end of the file
3276 3283 # if the file was seeked to before the end. See issue4943 for more.
3277 3284 #
3278 3285 # We work around this issue by inserting a seek() before writing.
3279 3286 # Note: This is likely not necessary on Python 3. However, because
3280 3287 # the file handle is reused for reads and may be seeked there, we need
3281 3288 # to be careful before changing this.
3282 3289 index_end = data_end = sidedata_end = None
3283 3290 if self._docket is not None:
3284 3291 index_end = self._docket.index_end
3285 3292 data_end = self._docket.data_end
3286 3293 sidedata_end = self._docket.sidedata_end
3287 3294
3288 3295 files_end = self._inner.write_entry(
3289 3296 transaction,
3290 3297 entry,
3291 3298 data,
3292 3299 link,
3293 3300 offset,
3294 3301 sidedata,
3295 3302 sidedata_offset,
3296 3303 index_end,
3297 3304 data_end,
3298 3305 sidedata_end,
3299 3306 )
3300 3307 self._enforceinlinesize(transaction)
3301 3308 if self._docket is not None:
3302 3309 self._docket.index_end = files_end[0]
3303 3310 self._docket.data_end = files_end[1]
3304 3311 self._docket.sidedata_end = files_end[2]
3305 3312
3306 3313 nodemaputil.setup_persistent_nodemap(transaction, self)
3307 3314
3308 3315 def addgroup(
3309 3316 self,
3310 3317 deltas,
3311 3318 linkmapper,
3312 3319 transaction,
3313 3320 alwayscache=False,
3314 3321 addrevisioncb=None,
3315 3322 duplicaterevisioncb=None,
3316 3323 debug_info=None,
3317 3324 delta_base_reuse_policy=None,
3318 3325 ):
3319 3326 """
3320 3327 add a delta group
3321 3328
3322 3329 given a set of deltas, add them to the revision log. the
3323 3330 first delta is against its parent, which should be in our
3324 3331 log, the rest are against the previous delta.
3325 3332
3326 3333 If ``addrevisioncb`` is defined, it will be called with arguments of
3327 3334 this revlog and the node that was added.
3328 3335 """
3329 3336
3330 3337 if self._adding_group:
3331 3338 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3332 3339
3333 3340 # read the default delta-base reuse policy from revlog config if the
3334 3341 # group did not specify one.
3335 3342 if delta_base_reuse_policy is None:
3336 3343 if (
3337 3344 self.delta_config.general_delta
3338 3345 and self.delta_config.lazy_delta_base
3339 3346 ):
3340 3347 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3341 3348 else:
3342 3349 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3343 3350
3344 3351 self._adding_group = True
3345 3352 empty = True
3346 3353 try:
3347 3354 with self._writing(transaction):
3348 3355 write_debug = None
3349 3356 if self.delta_config.debug_delta:
3350 3357 write_debug = transaction._report
3351 3358 deltacomputer = deltautil.deltacomputer(
3352 3359 self,
3353 3360 write_debug=write_debug,
3354 3361 debug_info=debug_info,
3355 3362 )
3356 3363 # loop through our set of deltas
3357 3364 for data in deltas:
3358 3365 (
3359 3366 node,
3360 3367 p1,
3361 3368 p2,
3362 3369 linknode,
3363 3370 deltabase,
3364 3371 delta,
3365 3372 flags,
3366 3373 sidedata,
3367 3374 ) = data
3368 3375 link = linkmapper(linknode)
3369 3376 flags = flags or REVIDX_DEFAULT_FLAGS
3370 3377
3371 3378 rev = self.index.get_rev(node)
3372 3379 if rev is not None:
3373 3380 # this can happen if two branches make the same change
3374 3381 self._nodeduplicatecallback(transaction, rev)
3375 3382 if duplicaterevisioncb:
3376 3383 duplicaterevisioncb(self, rev)
3377 3384 empty = False
3378 3385 continue
3379 3386
3380 3387 for p in (p1, p2):
3381 3388 if not self.index.has_node(p):
3382 3389 raise error.LookupError(
3383 3390 p, self.radix, _(b'unknown parent')
3384 3391 )
3385 3392
3386 3393 if not self.index.has_node(deltabase):
3387 3394 raise error.LookupError(
3388 3395 deltabase, self.display_id, _(b'unknown delta base')
3389 3396 )
3390 3397
3391 3398 baserev = self.rev(deltabase)
3392 3399
3393 3400 if baserev != nullrev and self.iscensored(baserev):
3394 3401 # if base is censored, delta must be full replacement in a
3395 3402 # single patch operation
3396 3403 hlen = struct.calcsize(b">lll")
3397 3404 oldlen = self.rawsize(baserev)
3398 3405 newlen = len(delta) - hlen
3399 3406 if delta[:hlen] != mdiff.replacediffheader(
3400 3407 oldlen, newlen
3401 3408 ):
3402 3409 raise error.CensoredBaseError(
3403 3410 self.display_id, self.node(baserev)
3404 3411 )
3405 3412
3406 3413 if not flags and self._peek_iscensored(baserev, delta):
3407 3414 flags |= REVIDX_ISCENSORED
3408 3415
3409 3416 # We assume consumers of addrevisioncb will want to retrieve
3410 3417 # the added revision, which will require a call to
3411 3418 # revision(). revision() will fast path if there is a cache
3412 3419 # hit. So, we tell _addrevision() to always cache in this case.
3413 3420 # We're only using addgroup() in the context of changegroup
3414 3421 # generation so the revision data can always be handled as raw
3415 3422 # by the flagprocessor.
3416 3423 rev = self._addrevision(
3417 3424 node,
3418 3425 None,
3419 3426 transaction,
3420 3427 link,
3421 3428 p1,
3422 3429 p2,
3423 3430 flags,
3424 3431 (baserev, delta, delta_base_reuse_policy),
3425 3432 alwayscache=alwayscache,
3426 3433 deltacomputer=deltacomputer,
3427 3434 sidedata=sidedata,
3428 3435 )
3429 3436
3430 3437 if addrevisioncb:
3431 3438 addrevisioncb(self, rev)
3432 3439 empty = False
3433 3440 finally:
3434 3441 self._adding_group = False
3435 3442 return not empty
3436 3443
3437 3444 def iscensored(self, rev):
3438 3445 """Check if a file revision is censored."""
3439 3446 if not self.feature_config.censorable:
3440 3447 return False
3441 3448
3442 3449 return self.flags(rev) & REVIDX_ISCENSORED
3443 3450
3444 3451 def _peek_iscensored(self, baserev, delta):
3445 3452 """Quickly check if a delta produces a censored revision."""
3446 3453 if not self.feature_config.censorable:
3447 3454 return False
3448 3455
3449 3456 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3450 3457
3451 3458 def getstrippoint(self, minlink):
3452 3459 """find the minimum rev that must be stripped to strip the linkrev
3453 3460
3454 3461 Returns a tuple containing the minimum rev and a set of all revs that
3455 3462 have linkrevs that will be broken by this strip.
3456 3463 """
3457 3464 return storageutil.resolvestripinfo(
3458 3465 minlink,
3459 3466 len(self) - 1,
3460 3467 self.headrevs(),
3461 3468 self.linkrev,
3462 3469 self.parentrevs,
3463 3470 )
3464 3471
3465 3472 def strip(self, minlink, transaction):
3466 3473 """truncate the revlog on the first revision with a linkrev >= minlink
3467 3474
3468 3475 This function is called when we're stripping revision minlink and
3469 3476 its descendants from the repository.
3470 3477
3471 3478 We have to remove all revisions with linkrev >= minlink, because
3472 3479 the equivalent changelog revisions will be renumbered after the
3473 3480 strip.
3474 3481
3475 3482 So we truncate the revlog on the first of these revisions, and
3476 3483 trust that the caller has saved the revisions that shouldn't be
3477 3484 removed and that it'll re-add them after this truncation.
3478 3485 """
3479 3486 if len(self) == 0:
3480 3487 return
3481 3488
3482 3489 rev, _ = self.getstrippoint(minlink)
3483 3490 if rev == len(self):
3484 3491 return
3485 3492
3486 3493 # first truncate the files on disk
3487 3494 data_end = self.start(rev)
3488 3495 if not self._inline:
3489 3496 transaction.add(self._datafile, data_end)
3490 3497 end = rev * self.index.entry_size
3491 3498 else:
3492 3499 end = data_end + (rev * self.index.entry_size)
3493 3500
3494 3501 if self._sidedatafile:
3495 3502 sidedata_end = self.sidedata_cut_off(rev)
3496 3503 transaction.add(self._sidedatafile, sidedata_end)
3497 3504
3498 3505 transaction.add(self._indexfile, end)
3499 3506 if self._docket is not None:
3500 3507 # XXX we could, leverage the docket while stripping. However it is
3501 3508 # not powerfull enough at the time of this comment
3502 3509 self._docket.index_end = end
3503 3510 self._docket.data_end = data_end
3504 3511 self._docket.sidedata_end = sidedata_end
3505 3512 self._docket.write(transaction, stripping=True)
3506 3513
3507 3514 # then reset internal state in memory to forget those revisions
3508 3515 self._chaininfocache = util.lrucachedict(500)
3509 3516 self._inner.clear_cache()
3510 3517
3511 3518 del self.index[rev:-1]
3512 3519
3513 3520 def checksize(self):
3514 3521 """Check size of index and data files
3515 3522
3516 3523 return a (dd, di) tuple.
3517 3524 - dd: extra bytes for the "data" file
3518 3525 - di: extra bytes for the "index" file
3519 3526
3520 3527 A healthy revlog will return (0, 0).
3521 3528 """
3522 3529 expected = 0
3523 3530 if len(self):
3524 3531 expected = max(0, self.end(len(self) - 1))
3525 3532
3526 3533 try:
3527 3534 with self._datafp() as f:
3528 3535 f.seek(0, io.SEEK_END)
3529 3536 actual = f.tell()
3530 3537 dd = actual - expected
3531 3538 except FileNotFoundError:
3532 3539 dd = 0
3533 3540
3534 3541 try:
3535 3542 f = self.opener(self._indexfile)
3536 3543 f.seek(0, io.SEEK_END)
3537 3544 actual = f.tell()
3538 3545 f.close()
3539 3546 s = self.index.entry_size
3540 3547 i = max(0, actual // s)
3541 3548 di = actual - (i * s)
3542 3549 if self._inline:
3543 3550 databytes = 0
3544 3551 for r in self:
3545 3552 databytes += max(0, self.length(r))
3546 3553 dd = 0
3547 3554 di = actual - len(self) * s - databytes
3548 3555 except FileNotFoundError:
3549 3556 di = 0
3550 3557
3551 3558 return (dd, di)
3552 3559
3553 3560 def files(self):
3554 3561 """return list of files that compose this revlog"""
3555 3562 res = [self._indexfile]
3556 3563 if self._docket_file is None:
3557 3564 if not self._inline:
3558 3565 res.append(self._datafile)
3559 3566 else:
3560 3567 res.append(self._docket_file)
3561 3568 res.extend(self._docket.old_index_filepaths(include_empty=False))
3562 3569 if self._docket.data_end:
3563 3570 res.append(self._datafile)
3564 3571 res.extend(self._docket.old_data_filepaths(include_empty=False))
3565 3572 if self._docket.sidedata_end:
3566 3573 res.append(self._sidedatafile)
3567 3574 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3568 3575 return res
3569 3576
3570 3577 def emitrevisions(
3571 3578 self,
3572 3579 nodes,
3573 3580 nodesorder=None,
3574 3581 revisiondata=False,
3575 3582 assumehaveparentrevisions=False,
3576 3583 deltamode=repository.CG_DELTAMODE_STD,
3577 3584 sidedata_helpers=None,
3578 3585 debug_info=None,
3579 3586 ):
3580 3587 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3581 3588 raise error.ProgrammingError(
3582 3589 b'unhandled value for nodesorder: %s' % nodesorder
3583 3590 )
3584 3591
3585 3592 if nodesorder is None and not self.delta_config.general_delta:
3586 3593 nodesorder = b'storage'
3587 3594
3588 3595 if (
3589 3596 not self._storedeltachains
3590 3597 and deltamode != repository.CG_DELTAMODE_PREV
3591 3598 ):
3592 3599 deltamode = repository.CG_DELTAMODE_FULL
3593 3600
3594 3601 return storageutil.emitrevisions(
3595 3602 self,
3596 3603 nodes,
3597 3604 nodesorder,
3598 3605 revlogrevisiondelta,
3599 3606 deltaparentfn=self.deltaparent,
3600 3607 candeltafn=self._candelta,
3601 3608 rawsizefn=self.rawsize,
3602 3609 revdifffn=self.revdiff,
3603 3610 flagsfn=self.flags,
3604 3611 deltamode=deltamode,
3605 3612 revisiondata=revisiondata,
3606 3613 assumehaveparentrevisions=assumehaveparentrevisions,
3607 3614 sidedata_helpers=sidedata_helpers,
3608 3615 debug_info=debug_info,
3609 3616 )
3610 3617
3611 3618 DELTAREUSEALWAYS = b'always'
3612 3619 DELTAREUSESAMEREVS = b'samerevs'
3613 3620 DELTAREUSENEVER = b'never'
3614 3621
3615 3622 DELTAREUSEFULLADD = b'fulladd'
3616 3623
3617 3624 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3618 3625
3619 3626 def clone(
3620 3627 self,
3621 3628 tr,
3622 3629 destrevlog,
3623 3630 addrevisioncb=None,
3624 3631 deltareuse=DELTAREUSESAMEREVS,
3625 3632 forcedeltabothparents=None,
3626 3633 sidedata_helpers=None,
3627 3634 ):
3628 3635 """Copy this revlog to another, possibly with format changes.
3629 3636
3630 3637 The destination revlog will contain the same revisions and nodes.
3631 3638 However, it may not be bit-for-bit identical due to e.g. delta encoding
3632 3639 differences.
3633 3640
3634 3641 The ``deltareuse`` argument control how deltas from the existing revlog
3635 3642 are preserved in the destination revlog. The argument can have the
3636 3643 following values:
3637 3644
3638 3645 DELTAREUSEALWAYS
3639 3646 Deltas will always be reused (if possible), even if the destination
3640 3647 revlog would not select the same revisions for the delta. This is the
3641 3648 fastest mode of operation.
3642 3649 DELTAREUSESAMEREVS
3643 3650 Deltas will be reused if the destination revlog would pick the same
3644 3651 revisions for the delta. This mode strikes a balance between speed
3645 3652 and optimization.
3646 3653 DELTAREUSENEVER
3647 3654 Deltas will never be reused. This is the slowest mode of execution.
3648 3655 This mode can be used to recompute deltas (e.g. if the diff/delta
3649 3656 algorithm changes).
3650 3657 DELTAREUSEFULLADD
3651 3658 Revision will be re-added as if their were new content. This is
3652 3659 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3653 3660 eg: large file detection and handling.
3654 3661
3655 3662 Delta computation can be slow, so the choice of delta reuse policy can
3656 3663 significantly affect run time.
3657 3664
3658 3665 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3659 3666 two extremes. Deltas will be reused if they are appropriate. But if the
3660 3667 delta could choose a better revision, it will do so. This means if you
3661 3668 are converting a non-generaldelta revlog to a generaldelta revlog,
3662 3669 deltas will be recomputed if the delta's parent isn't a parent of the
3663 3670 revision.
3664 3671
3665 3672 In addition to the delta policy, the ``forcedeltabothparents``
3666 3673 argument controls whether to force compute deltas against both parents
3667 3674 for merges. By default, the current default is used.
3668 3675
3669 3676 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3670 3677 `sidedata_helpers`.
3671 3678 """
3672 3679 if deltareuse not in self.DELTAREUSEALL:
3673 3680 raise ValueError(
3674 3681 _(b'value for deltareuse invalid: %s') % deltareuse
3675 3682 )
3676 3683
3677 3684 if len(destrevlog):
3678 3685 raise ValueError(_(b'destination revlog is not empty'))
3679 3686
3680 3687 if getattr(self, 'filteredrevs', None):
3681 3688 raise ValueError(_(b'source revlog has filtered revisions'))
3682 3689 if getattr(destrevlog, 'filteredrevs', None):
3683 3690 raise ValueError(_(b'destination revlog has filtered revisions'))
3684 3691
3685 3692 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3686 3693 # if possible.
3687 3694 old_delta_config = destrevlog.delta_config
3688 3695 destrevlog.delta_config = destrevlog.delta_config.copy()
3689 3696
3690 3697 try:
3691 3698 if deltareuse == self.DELTAREUSEALWAYS:
3692 3699 destrevlog.delta_config.lazy_delta_base = True
3693 3700 destrevlog.delta_config.lazy_delta = True
3694 3701 elif deltareuse == self.DELTAREUSESAMEREVS:
3695 3702 destrevlog.delta_config.lazy_delta_base = False
3696 3703 destrevlog.delta_config.lazy_delta = True
3697 3704 elif deltareuse == self.DELTAREUSENEVER:
3698 3705 destrevlog.delta_config.lazy_delta_base = False
3699 3706 destrevlog.delta_config.lazy_delta = False
3700 3707
3701 3708 delta_both_parents = (
3702 3709 forcedeltabothparents or old_delta_config.delta_both_parents
3703 3710 )
3704 3711 destrevlog.delta_config.delta_both_parents = delta_both_parents
3705 3712
3706 3713 with self.reading(), destrevlog._writing(tr):
3707 3714 self._clone(
3708 3715 tr,
3709 3716 destrevlog,
3710 3717 addrevisioncb,
3711 3718 deltareuse,
3712 3719 forcedeltabothparents,
3713 3720 sidedata_helpers,
3714 3721 )
3715 3722
3716 3723 finally:
3717 3724 destrevlog.delta_config = old_delta_config
3718 3725
3719 3726 def _clone(
3720 3727 self,
3721 3728 tr,
3722 3729 destrevlog,
3723 3730 addrevisioncb,
3724 3731 deltareuse,
3725 3732 forcedeltabothparents,
3726 3733 sidedata_helpers,
3727 3734 ):
3728 3735 """perform the core duty of `revlog.clone` after parameter processing"""
3729 3736 write_debug = None
3730 3737 if self.delta_config.debug_delta:
3731 3738 write_debug = tr._report
3732 3739 deltacomputer = deltautil.deltacomputer(
3733 3740 destrevlog,
3734 3741 write_debug=write_debug,
3735 3742 )
3736 3743 index = self.index
3737 3744 for rev in self:
3738 3745 entry = index[rev]
3739 3746
3740 3747 # Some classes override linkrev to take filtered revs into
3741 3748 # account. Use raw entry from index.
3742 3749 flags = entry[0] & 0xFFFF
3743 3750 linkrev = entry[4]
3744 3751 p1 = index[entry[5]][7]
3745 3752 p2 = index[entry[6]][7]
3746 3753 node = entry[7]
3747 3754
3748 3755 # (Possibly) reuse the delta from the revlog if allowed and
3749 3756 # the revlog chunk is a delta.
3750 3757 cachedelta = None
3751 3758 rawtext = None
3752 3759 if deltareuse == self.DELTAREUSEFULLADD:
3753 3760 text = self._revisiondata(rev)
3754 3761 sidedata = self.sidedata(rev)
3755 3762
3756 3763 if sidedata_helpers is not None:
3757 3764 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3758 3765 self, sidedata_helpers, sidedata, rev
3759 3766 )
3760 3767 flags = flags | new_flags[0] & ~new_flags[1]
3761 3768
3762 3769 destrevlog.addrevision(
3763 3770 text,
3764 3771 tr,
3765 3772 linkrev,
3766 3773 p1,
3767 3774 p2,
3768 3775 cachedelta=cachedelta,
3769 3776 node=node,
3770 3777 flags=flags,
3771 3778 deltacomputer=deltacomputer,
3772 3779 sidedata=sidedata,
3773 3780 )
3774 3781 else:
3775 3782 if destrevlog.delta_config.lazy_delta:
3776 3783 dp = self.deltaparent(rev)
3777 3784 if dp != nullrev:
3778 3785 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3779 3786
3780 3787 sidedata = None
3781 3788 if not cachedelta:
3782 3789 try:
3783 3790 rawtext = self._revisiondata(rev)
3784 3791 except error.CensoredNodeError as censored:
3785 3792 assert flags & REVIDX_ISCENSORED
3786 3793 rawtext = censored.tombstone
3787 3794 sidedata = self.sidedata(rev)
3788 3795 if sidedata is None:
3789 3796 sidedata = self.sidedata(rev)
3790 3797
3791 3798 if sidedata_helpers is not None:
3792 3799 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3793 3800 self, sidedata_helpers, sidedata, rev
3794 3801 )
3795 3802 flags = flags | new_flags[0] & ~new_flags[1]
3796 3803
3797 3804 destrevlog._addrevision(
3798 3805 node,
3799 3806 rawtext,
3800 3807 tr,
3801 3808 linkrev,
3802 3809 p1,
3803 3810 p2,
3804 3811 flags,
3805 3812 cachedelta,
3806 3813 deltacomputer=deltacomputer,
3807 3814 sidedata=sidedata,
3808 3815 )
3809 3816
3810 3817 if addrevisioncb:
3811 3818 addrevisioncb(self, rev, node)
3812 3819
3813 3820 def censorrevision(self, tr, censornode, tombstone=b''):
3814 3821 if self._format_version == REVLOGV0:
3815 3822 raise error.RevlogError(
3816 3823 _(b'cannot censor with version %d revlogs')
3817 3824 % self._format_version
3818 3825 )
3819 3826 elif self._format_version == REVLOGV1:
3820 3827 rewrite.v1_censor(self, tr, censornode, tombstone)
3821 3828 else:
3822 3829 rewrite.v2_censor(self, tr, censornode, tombstone)
3823 3830
3824 3831 def verifyintegrity(self, state):
3825 3832 """Verifies the integrity of the revlog.
3826 3833
3827 3834 Yields ``revlogproblem`` instances describing problems that are
3828 3835 found.
3829 3836 """
3830 3837 dd, di = self.checksize()
3831 3838 if dd:
3832 3839 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3833 3840 if di:
3834 3841 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3835 3842
3836 3843 version = self._format_version
3837 3844
3838 3845 # The verifier tells us what version revlog we should be.
3839 3846 if version != state[b'expectedversion']:
3840 3847 yield revlogproblem(
3841 3848 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3842 3849 % (self.display_id, version, state[b'expectedversion'])
3843 3850 )
3844 3851
3845 3852 state[b'skipread'] = set()
3846 3853 state[b'safe_renamed'] = set()
3847 3854
3848 3855 for rev in self:
3849 3856 node = self.node(rev)
3850 3857
3851 3858 # Verify contents. 4 cases to care about:
3852 3859 #
3853 3860 # common: the most common case
3854 3861 # rename: with a rename
3855 3862 # meta: file content starts with b'\1\n', the metadata
3856 3863 # header defined in filelog.py, but without a rename
3857 3864 # ext: content stored externally
3858 3865 #
3859 3866 # More formally, their differences are shown below:
3860 3867 #
3861 3868 # | common | rename | meta | ext
3862 3869 # -------------------------------------------------------
3863 3870 # flags() | 0 | 0 | 0 | not 0
3864 3871 # renamed() | False | True | False | ?
3865 3872 # rawtext[0:2]=='\1\n'| False | True | True | ?
3866 3873 #
3867 3874 # "rawtext" means the raw text stored in revlog data, which
3868 3875 # could be retrieved by "rawdata(rev)". "text"
3869 3876 # mentioned below is "revision(rev)".
3870 3877 #
3871 3878 # There are 3 different lengths stored physically:
3872 3879 # 1. L1: rawsize, stored in revlog index
3873 3880 # 2. L2: len(rawtext), stored in revlog data
3874 3881 # 3. L3: len(text), stored in revlog data if flags==0, or
3875 3882 # possibly somewhere else if flags!=0
3876 3883 #
3877 3884 # L1 should be equal to L2. L3 could be different from them.
3878 3885 # "text" may or may not affect commit hash depending on flag
3879 3886 # processors (see flagutil.addflagprocessor).
3880 3887 #
3881 3888 # | common | rename | meta | ext
3882 3889 # -------------------------------------------------
3883 3890 # rawsize() | L1 | L1 | L1 | L1
3884 3891 # size() | L1 | L2-LM | L1(*) | L1 (?)
3885 3892 # len(rawtext) | L2 | L2 | L2 | L2
3886 3893 # len(text) | L2 | L2 | L2 | L3
3887 3894 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3888 3895 #
3889 3896 # LM: length of metadata, depending on rawtext
3890 3897 # (*): not ideal, see comment in filelog.size
3891 3898 # (?): could be "- len(meta)" if the resolved content has
3892 3899 # rename metadata
3893 3900 #
3894 3901 # Checks needed to be done:
3895 3902 # 1. length check: L1 == L2, in all cases.
3896 3903 # 2. hash check: depending on flag processor, we may need to
3897 3904 # use either "text" (external), or "rawtext" (in revlog).
3898 3905
3899 3906 try:
3900 3907 skipflags = state.get(b'skipflags', 0)
3901 3908 if skipflags:
3902 3909 skipflags &= self.flags(rev)
3903 3910
3904 3911 _verify_revision(self, skipflags, state, node)
3905 3912
3906 3913 l1 = self.rawsize(rev)
3907 3914 l2 = len(self.rawdata(node))
3908 3915
3909 3916 if l1 != l2:
3910 3917 yield revlogproblem(
3911 3918 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3912 3919 node=node,
3913 3920 )
3914 3921
3915 3922 except error.CensoredNodeError:
3916 3923 if state[b'erroroncensored']:
3917 3924 yield revlogproblem(
3918 3925 error=_(b'censored file data'), node=node
3919 3926 )
3920 3927 state[b'skipread'].add(node)
3921 3928 except Exception as e:
3922 3929 yield revlogproblem(
3923 3930 error=_(b'unpacking %s: %s')
3924 3931 % (short(node), stringutil.forcebytestr(e)),
3925 3932 node=node,
3926 3933 )
3927 3934 state[b'skipread'].add(node)
3928 3935
3929 3936 def storageinfo(
3930 3937 self,
3931 3938 exclusivefiles=False,
3932 3939 sharedfiles=False,
3933 3940 revisionscount=False,
3934 3941 trackedsize=False,
3935 3942 storedsize=False,
3936 3943 ):
3937 3944 d = {}
3938 3945
3939 3946 if exclusivefiles:
3940 3947 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3941 3948 if not self._inline:
3942 3949 d[b'exclusivefiles'].append((self.opener, self._datafile))
3943 3950
3944 3951 if sharedfiles:
3945 3952 d[b'sharedfiles'] = []
3946 3953
3947 3954 if revisionscount:
3948 3955 d[b'revisionscount'] = len(self)
3949 3956
3950 3957 if trackedsize:
3951 3958 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3952 3959
3953 3960 if storedsize:
3954 3961 d[b'storedsize'] = sum(
3955 3962 self.opener.stat(path).st_size for path in self.files()
3956 3963 )
3957 3964
3958 3965 return d
3959 3966
3960 3967 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3961 3968 if not self.feature_config.has_side_data:
3962 3969 return
3963 3970 # revlog formats with sidedata support does not support inline
3964 3971 assert not self._inline
3965 3972 if not helpers[1] and not helpers[2]:
3966 3973 # Nothing to generate or remove
3967 3974 return
3968 3975
3969 3976 new_entries = []
3970 3977 # append the new sidedata
3971 3978 with self._writing(transaction):
3972 3979 ifh, dfh, sdfh = self._inner._writinghandles
3973 3980 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3974 3981
3975 3982 current_offset = sdfh.tell()
3976 3983 for rev in range(startrev, endrev + 1):
3977 3984 entry = self.index[rev]
3978 3985 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3979 3986 store=self,
3980 3987 sidedata_helpers=helpers,
3981 3988 sidedata={},
3982 3989 rev=rev,
3983 3990 )
3984 3991
3985 3992 serialized_sidedata = sidedatautil.serialize_sidedata(
3986 3993 new_sidedata
3987 3994 )
3988 3995
3989 3996 sidedata_compression_mode = COMP_MODE_INLINE
3990 3997 if serialized_sidedata and self.feature_config.has_side_data:
3991 3998 sidedata_compression_mode = COMP_MODE_PLAIN
3992 3999 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3993 4000 if (
3994 4001 h != b'u'
3995 4002 and comp_sidedata[0] != b'\0'
3996 4003 and len(comp_sidedata) < len(serialized_sidedata)
3997 4004 ):
3998 4005 assert not h
3999 4006 if (
4000 4007 comp_sidedata[0]
4001 4008 == self._docket.default_compression_header
4002 4009 ):
4003 4010 sidedata_compression_mode = COMP_MODE_DEFAULT
4004 4011 serialized_sidedata = comp_sidedata
4005 4012 else:
4006 4013 sidedata_compression_mode = COMP_MODE_INLINE
4007 4014 serialized_sidedata = comp_sidedata
4008 4015 if entry[8] != 0 or entry[9] != 0:
4009 4016 # rewriting entries that already have sidedata is not
4010 4017 # supported yet, because it introduces garbage data in the
4011 4018 # revlog.
4012 4019 msg = b"rewriting existing sidedata is not supported yet"
4013 4020 raise error.Abort(msg)
4014 4021
4015 4022 # Apply (potential) flags to add and to remove after running
4016 4023 # the sidedata helpers
4017 4024 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4018 4025 entry_update = (
4019 4026 current_offset,
4020 4027 len(serialized_sidedata),
4021 4028 new_offset_flags,
4022 4029 sidedata_compression_mode,
4023 4030 )
4024 4031
4025 4032 # the sidedata computation might have move the file cursors around
4026 4033 sdfh.seek(current_offset, os.SEEK_SET)
4027 4034 sdfh.write(serialized_sidedata)
4028 4035 new_entries.append(entry_update)
4029 4036 current_offset += len(serialized_sidedata)
4030 4037 self._docket.sidedata_end = sdfh.tell()
4031 4038
4032 4039 # rewrite the new index entries
4033 4040 ifh.seek(startrev * self.index.entry_size)
4034 4041 for i, e in enumerate(new_entries):
4035 4042 rev = startrev + i
4036 4043 self.index.replace_sidedata_info(rev, *e)
4037 4044 packed = self.index.entry_binary(rev)
4038 4045 if rev == 0 and self._docket is None:
4039 4046 header = self._format_flags | self._format_version
4040 4047 header = self.index.pack_header(header)
4041 4048 packed = header + packed
4042 4049 ifh.write(packed)
@@ -1,155 +1,164 b''
1 1 # Copyright Mercurial Contributors
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 import contextlib
7 7
8 8 from ..i18n import _
9 9 from .. import (
10 10 error,
11 11 util,
12 12 )
13 13
14 14
15 15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16 16
17 17 PARTIAL_READ_MSG = _(
18 18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 19 )
20 20
21 21
22 22 def _is_power_of_two(n):
23 23 return (n & (n - 1) == 0) and n != 0
24 24
25 25
26 26 class randomaccessfile:
27 27 """Accessing arbitrary chuncks of data within a file, with some caching"""
28 28
29 29 def __init__(
30 30 self,
31 31 opener,
32 32 filename,
33 33 default_cached_chunk_size,
34 34 initial_cache=None,
35 35 ):
36 36 # Required by bitwise manipulation below
37 37 assert _is_power_of_two(default_cached_chunk_size)
38 38
39 39 self.opener = opener
40 40 self.filename = filename
41 41 self.default_cached_chunk_size = default_cached_chunk_size
42 42 self.writing_handle = None # This is set from revlog.py
43 43 self.reading_handle = None
44 44 self._cached_chunk = b''
45 45 self._cached_chunk_position = 0 # Offset from the start of the file
46 46 if initial_cache:
47 47 self._cached_chunk_position, self._cached_chunk = initial_cache
48 48
49 49 def clear_cache(self):
50 50 self._cached_chunk = b''
51 51 self._cached_chunk_position = 0
52 52
53 @property
54 def is_open(self):
55 """True if any file handle is being held
56
57 Used for assert and debug in the python code"""
58 return (
59 self.reading_handle is not None or self.writing_handle is not None
60 )
61
53 62 def _open(self, mode=b'r'):
54 63 """Return a file object"""
55 64 return self.opener(self.filename, mode=mode)
56 65
57 66 @contextlib.contextmanager
58 67 def _read_handle(self):
59 68 """File object suitable for reading data"""
60 69 # Use a file handle being actively used for writes, if available.
61 70 # There is some danger to doing this because reads will seek the
62 71 # file. However, revlog._writeentry performs a SEEK_END before all
63 72 # writes, so we should be safe.
64 73 if self.writing_handle:
65 74 yield self.writing_handle
66 75
67 76 elif self.reading_handle:
68 77 yield self.reading_handle
69 78
70 79 # Otherwise open a new file handle.
71 80 else:
72 81 with self._open() as fp:
73 82 yield fp
74 83
75 84 @contextlib.contextmanager
76 85 def reading(self):
77 86 """Context manager that keeps the file open for reading"""
78 87 if (
79 88 self.reading_handle is None
80 89 and self.writing_handle is None
81 90 and self.filename is not None
82 91 ):
83 92 with self._open() as fp:
84 93 self.reading_handle = fp
85 94 try:
86 95 yield
87 96 finally:
88 97 self.reading_handle = None
89 98 else:
90 99 yield
91 100
92 101 def read_chunk(self, offset, length):
93 102 """Read a chunk of bytes from the file.
94 103
95 104 Accepts an absolute offset, length to read, and an optional existing
96 105 file handle to read from.
97 106
98 107 If an existing file handle is passed, it will be seeked and the
99 108 original seek position will NOT be restored.
100 109
101 110 Returns a str or buffer of raw byte data.
102 111
103 112 Raises if the requested number of bytes could not be read.
104 113 """
105 114 end = offset + length
106 115 cache_start = self._cached_chunk_position
107 116 cache_end = cache_start + len(self._cached_chunk)
108 117 # Is the requested chunk within the cache?
109 118 if cache_start <= offset and end <= cache_end:
110 119 if cache_start == offset and end == cache_end:
111 120 return self._cached_chunk # avoid a copy
112 121 relative_start = offset - cache_start
113 122 return util.buffer(self._cached_chunk, relative_start, length)
114 123
115 124 return self._read_and_update_cache(offset, length)
116 125
117 126 def _read_and_update_cache(self, offset, length):
118 127 # Cache data both forward and backward around the requested
119 128 # data, in a fixed size window. This helps speed up operations
120 129 # involving reading the revlog backwards.
121 130 real_offset = offset & ~(self.default_cached_chunk_size - 1)
122 131 real_length = (
123 132 (offset + length + self.default_cached_chunk_size)
124 133 & ~(self.default_cached_chunk_size - 1)
125 134 ) - real_offset
126 135 with self._read_handle() as file_obj:
127 136 file_obj.seek(real_offset)
128 137 data = file_obj.read(real_length)
129 138
130 139 self._add_cached_chunk(real_offset, data)
131 140
132 141 relative_offset = offset - real_offset
133 142 got = len(data) - relative_offset
134 143 if got < length:
135 144 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
136 145 raise error.RevlogError(message)
137 146
138 147 if offset != real_offset or real_length != length:
139 148 return util.buffer(data, relative_offset, length)
140 149 return data
141 150
142 151 def _add_cached_chunk(self, offset, data):
143 152 """Add to or replace the cached data chunk.
144 153
145 154 Accepts an absolute offset and the data that is at that location.
146 155 """
147 156 if (
148 157 self._cached_chunk_position + len(self._cached_chunk) == offset
149 158 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
150 159 ):
151 160 # add to existing cache
152 161 self._cached_chunk += data
153 162 else:
154 163 self._cached_chunk = data
155 164 self._cached_chunk_position = offset
General Comments 0
You need to be logged in to leave comments. Login now