##// END OF EJS Templates
revlog: deal with special "postfix" explicitely...
marmoute -
r47916:c6b8d5d9 default
parent child Browse files
Show More
@@ -1,625 +1,628
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 28 from .revlogutils import (
29 29 constants as revlog_constants,
30 30 flagutil,
31 31 )
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 87 return b"\0".join(items)
88 88
89 89
90 90 def stripdesc(desc):
91 91 """strip trailing whitespace and leading and trailing empty lines"""
92 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93 93
94 94
95 95 class appender(object):
96 96 """the changelog index must be updated last on disk, so we use this class
97 97 to delay writes to it"""
98 98
99 99 def __init__(self, vfs, name, mode, buf):
100 100 self.data = buf
101 101 fp = vfs(name, mode)
102 102 self.fp = fp
103 103 self.offset = fp.tell()
104 104 self.size = vfs.fstat(fp).st_size
105 105 self._end = self.size
106 106
107 107 def end(self):
108 108 return self._end
109 109
110 110 def tell(self):
111 111 return self.offset
112 112
113 113 def flush(self):
114 114 pass
115 115
116 116 @property
117 117 def closed(self):
118 118 return self.fp.closed
119 119
120 120 def close(self):
121 121 self.fp.close()
122 122
123 123 def seek(self, offset, whence=0):
124 124 '''virtual file offset spans real file and data'''
125 125 if whence == 0:
126 126 self.offset = offset
127 127 elif whence == 1:
128 128 self.offset += offset
129 129 elif whence == 2:
130 130 self.offset = self.end() + offset
131 131 if self.offset < self.size:
132 132 self.fp.seek(self.offset)
133 133
134 134 def read(self, count=-1):
135 135 '''only trick here is reads that span real file and data'''
136 136 ret = b""
137 137 if self.offset < self.size:
138 138 s = self.fp.read(count)
139 139 ret = s
140 140 self.offset += len(s)
141 141 if count > 0:
142 142 count -= len(s)
143 143 if count != 0:
144 144 doff = self.offset - self.size
145 145 self.data.insert(0, b"".join(self.data))
146 146 del self.data[1:]
147 147 s = self.data[0][doff : doff + count]
148 148 self.offset += len(s)
149 149 ret += s
150 150 return ret
151 151
152 152 def write(self, s):
153 153 self.data.append(bytes(s))
154 154 self.offset += len(s)
155 155 self._end += len(s)
156 156
157 157 def __enter__(self):
158 158 self.fp.__enter__()
159 159 return self
160 160
161 161 def __exit__(self, *args):
162 162 return self.fp.__exit__(*args)
163 163
164 164
165 165 class _divertopener(object):
166 166 def __init__(self, opener, target):
167 167 self._opener = opener
168 168 self._target = target
169 169
170 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 171 if name != self._target:
172 172 return self._opener(name, mode, **kwargs)
173 173 return self._opener(name + b".a", mode, **kwargs)
174 174
175 175 def __getattr__(self, attr):
176 176 return getattr(self._opener, attr)
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 183 if name != target:
184 184 return opener(name, mode, **kwargs)
185 185 assert not kwargs
186 186 return appender(opener, name, mode, buf)
187 187
188 188 return _delay
189 189
190 190
191 191 @attr.s
192 192 class _changelogrevision(object):
193 193 # Extensions might modify _defaultextra, so let the constructor below pass
194 194 # it in
195 195 extra = attr.ib()
196 196 manifest = attr.ib()
197 197 user = attr.ib(default=b'')
198 198 date = attr.ib(default=(0, 0))
199 199 files = attr.ib(default=attr.Factory(list))
200 200 filesadded = attr.ib(default=None)
201 201 filesremoved = attr.ib(default=None)
202 202 p1copies = attr.ib(default=None)
203 203 p2copies = attr.ib(default=None)
204 204 description = attr.ib(default=b'')
205 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206 206
207 207
208 208 class changelogrevision(object):
209 209 """Holds results of a parsed changelog revision.
210 210
211 211 Changelog revisions consist of multiple pieces of data, including
212 212 the manifest node, user, and date. This object exposes a view into
213 213 the parsed object.
214 214 """
215 215
216 216 __slots__ = (
217 217 '_offsets',
218 218 '_text',
219 219 '_sidedata',
220 220 '_cpsd',
221 221 '_changes',
222 222 )
223 223
224 224 def __new__(cls, cl, text, sidedata, cpsd):
225 225 if not text:
226 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227 227
228 228 self = super(changelogrevision, cls).__new__(cls)
229 229 # We could return here and implement the following as an __init__.
230 230 # But doing it here is equivalent and saves an extra function call.
231 231
232 232 # format used:
233 233 # nodeid\n : manifest node in ascii
234 234 # user\n : user, no \n or \r allowed
235 235 # time tz extra\n : date (time is int or float, timezone is int)
236 236 # : extra is metadata, encoded and separated by '\0'
237 237 # : older versions ignore it
238 238 # files\n\n : files modified by the cset, no \n or \r allowed
239 239 # (.*) : comment (free text, ideally utf-8)
240 240 #
241 241 # changelog v0 doesn't use extra
242 242
243 243 nl1 = text.index(b'\n')
244 244 nl2 = text.index(b'\n', nl1 + 1)
245 245 nl3 = text.index(b'\n', nl2 + 1)
246 246
247 247 # The list of files may be empty. Which means nl3 is the first of the
248 248 # double newline that precedes the description.
249 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 250 doublenl = nl3
251 251 else:
252 252 doublenl = text.index(b'\n\n', nl3 + 1)
253 253
254 254 self._offsets = (nl1, nl2, nl3, doublenl)
255 255 self._text = text
256 256 self._sidedata = sidedata
257 257 self._cpsd = cpsd
258 258 self._changes = None
259 259
260 260 return self
261 261
262 262 @property
263 263 def manifest(self):
264 264 return bin(self._text[0 : self._offsets[0]])
265 265
266 266 @property
267 267 def user(self):
268 268 off = self._offsets
269 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270 270
271 271 @property
272 272 def _rawdate(self):
273 273 off = self._offsets
274 274 dateextra = self._text[off[1] + 1 : off[2]]
275 275 return dateextra.split(b' ', 2)[0:2]
276 276
277 277 @property
278 278 def _rawextra(self):
279 279 off = self._offsets
280 280 dateextra = self._text[off[1] + 1 : off[2]]
281 281 fields = dateextra.split(b' ', 2)
282 282 if len(fields) != 3:
283 283 return None
284 284
285 285 return fields[2]
286 286
287 287 @property
288 288 def date(self):
289 289 raw = self._rawdate
290 290 time = float(raw[0])
291 291 # Various tools did silly things with the timezone.
292 292 try:
293 293 timezone = int(raw[1])
294 294 except ValueError:
295 295 timezone = 0
296 296
297 297 return time, timezone
298 298
299 299 @property
300 300 def extra(self):
301 301 raw = self._rawextra
302 302 if raw is None:
303 303 return _defaultextra
304 304
305 305 return decodeextra(raw)
306 306
307 307 @property
308 308 def changes(self):
309 309 if self._changes is not None:
310 310 return self._changes
311 311 if self._cpsd:
312 312 changes = metadata.decode_files_sidedata(self._sidedata)
313 313 else:
314 314 changes = metadata.ChangingFiles(
315 315 touched=self.files or (),
316 316 added=self.filesadded or (),
317 317 removed=self.filesremoved or (),
318 318 p1_copies=self.p1copies or {},
319 319 p2_copies=self.p2copies or {},
320 320 )
321 321 self._changes = changes
322 322 return changes
323 323
324 324 @property
325 325 def files(self):
326 326 if self._cpsd:
327 327 return sorted(self.changes.touched)
328 328 off = self._offsets
329 329 if off[2] == off[3]:
330 330 return []
331 331
332 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333 333
334 334 @property
335 335 def filesadded(self):
336 336 if self._cpsd:
337 337 return self.changes.added
338 338 else:
339 339 rawindices = self.extra.get(b'filesadded')
340 340 if rawindices is None:
341 341 return None
342 342 return metadata.decodefileindices(self.files, rawindices)
343 343
344 344 @property
345 345 def filesremoved(self):
346 346 if self._cpsd:
347 347 return self.changes.removed
348 348 else:
349 349 rawindices = self.extra.get(b'filesremoved')
350 350 if rawindices is None:
351 351 return None
352 352 return metadata.decodefileindices(self.files, rawindices)
353 353
354 354 @property
355 355 def p1copies(self):
356 356 if self._cpsd:
357 357 return self.changes.copied_from_p1
358 358 else:
359 359 rawcopies = self.extra.get(b'p1copies')
360 360 if rawcopies is None:
361 361 return None
362 362 return metadata.decodecopies(self.files, rawcopies)
363 363
364 364 @property
365 365 def p2copies(self):
366 366 if self._cpsd:
367 367 return self.changes.copied_from_p2
368 368 else:
369 369 rawcopies = self.extra.get(b'p2copies')
370 370 if rawcopies is None:
371 371 return None
372 372 return metadata.decodecopies(self.files, rawcopies)
373 373
374 374 @property
375 375 def description(self):
376 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 377
378 378 @property
379 379 def branchinfo(self):
380 380 extra = self.extra
381 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382 382
383 383
384 384 class changelog(revlog.revlog):
385 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 386 """Load a changelog revlog using an opener.
387 387
388 388 If ``trypending`` is true, we attempt to load the index from a
389 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 390 The ``00changelog.i.a`` file contains index (and possibly inline
391 391 revision) data for a transaction that hasn't been finalized yet.
392 392 It exists in a separate file to facilitate readers (such as
393 393 hooks processes) accessing data before a transaction is finalized.
394 394
395 395 ``concurrencychecker`` will be passed to the revlog init function, see
396 396 the documentation there.
397 397 """
398
399 indexfile = b'00changelog.i'
398 400 if trypending and opener.exists(b'00changelog.i.a'):
399 indexfile = b'00changelog.i.a'
401 postfix = b'a'
400 402 else:
401 indexfile = b'00changelog.i'
403 postfix = None
402 404
403 405 datafile = b'00changelog.d'
404 406 revlog.revlog.__init__(
405 407 self,
406 408 opener,
407 409 target=(revlog_constants.KIND_CHANGELOG, None),
410 postfix=postfix,
408 411 indexfile=indexfile,
409 412 datafile=datafile,
410 413 checkambig=True,
411 414 mmaplargeindex=True,
412 415 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 416 concurrencychecker=concurrencychecker,
414 417 )
415 418
416 419 if self._initempty and (self._format_version == revlog.REVLOGV1):
417 420 # changelogs don't benefit from generaldelta.
418 421
419 422 self._format_flags &= ~revlog.FLAG_GENERALDELTA
420 423 self._generaldelta = False
421 424
422 425 # Delta chains for changelogs tend to be very small because entries
423 426 # tend to be small and don't delta well with each. So disable delta
424 427 # chains.
425 428 self._storedeltachains = False
426 429
427 430 self._realopener = opener
428 431 self._delayed = False
429 432 self._delaybuf = None
430 433 self._divert = False
431 434 self._filteredrevs = frozenset()
432 435 self._filteredrevs_hashcache = {}
433 436 self._copiesstorage = opener.options.get(b'copies-storage')
434 437
435 438 @property
436 439 def filteredrevs(self):
437 440 return self._filteredrevs
438 441
439 442 @filteredrevs.setter
440 443 def filteredrevs(self, val):
441 444 # Ensure all updates go through this function
442 445 assert isinstance(val, frozenset)
443 446 self._filteredrevs = val
444 447 self._filteredrevs_hashcache = {}
445 448
446 449 def delayupdate(self, tr):
447 450 """delay visibility of index updates to other readers"""
448 451
449 452 if not self._delayed:
450 453 if len(self) == 0:
451 454 self._divert = True
452 455 if self._realopener.exists(self.indexfile + b'.a'):
453 456 self._realopener.unlink(self.indexfile + b'.a')
454 457 self.opener = _divertopener(self._realopener, self.indexfile)
455 458 else:
456 459 self._delaybuf = []
457 460 self.opener = _delayopener(
458 461 self._realopener, self.indexfile, self._delaybuf
459 462 )
460 463 self._delayed = True
461 464 tr.addpending(b'cl-%i' % id(self), self._writepending)
462 465 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
463 466
464 467 def _finalize(self, tr):
465 468 """finalize index updates"""
466 469 self._delayed = False
467 470 self.opener = self._realopener
468 471 # move redirected index data back into place
469 472 if self._divert:
470 473 assert not self._delaybuf
471 474 tmpname = self.indexfile + b".a"
472 475 nfile = self.opener.open(tmpname)
473 476 nfile.close()
474 477 self.opener.rename(tmpname, self.indexfile, checkambig=True)
475 478 elif self._delaybuf:
476 479 fp = self.opener(self.indexfile, b'a', checkambig=True)
477 480 fp.write(b"".join(self._delaybuf))
478 481 fp.close()
479 482 self._delaybuf = None
480 483 self._divert = False
481 484 # split when we're done
482 485 self._enforceinlinesize(tr)
483 486
484 487 def _writepending(self, tr):
485 488 """create a file containing the unfinalized state for
486 489 pretxnchangegroup"""
487 490 if self._delaybuf:
488 491 # make a temporary copy of the index
489 492 fp1 = self._realopener(self.indexfile)
490 493 pendingfilename = self.indexfile + b".a"
491 494 # register as a temp file to ensure cleanup on failure
492 495 tr.registertmp(pendingfilename)
493 496 # write existing data
494 497 fp2 = self._realopener(pendingfilename, b"w")
495 498 fp2.write(fp1.read())
496 499 # add pending data
497 500 fp2.write(b"".join(self._delaybuf))
498 501 fp2.close()
499 502 # switch modes so finalize can simply rename
500 503 self._delaybuf = None
501 504 self._divert = True
502 505 self.opener = _divertopener(self._realopener, self.indexfile)
503 506
504 507 if self._divert:
505 508 return True
506 509
507 510 return False
508 511
509 512 def _enforceinlinesize(self, tr, fp=None):
510 513 if not self._delayed:
511 514 revlog.revlog._enforceinlinesize(self, tr, fp)
512 515
513 516 def read(self, nodeorrev):
514 517 """Obtain data from a parsed changelog revision.
515 518
516 519 Returns a 6-tuple of:
517 520
518 521 - manifest node in binary
519 522 - author/user as a localstr
520 523 - date as a 2-tuple of (time, timezone)
521 524 - list of files
522 525 - commit message as a localstr
523 526 - dict of extra metadata
524 527
525 528 Unless you need to access all fields, consider calling
526 529 ``changelogrevision`` instead, as it is faster for partial object
527 530 access.
528 531 """
529 532 d, s = self._revisiondata(nodeorrev)
530 533 c = changelogrevision(
531 534 self, d, s, self._copiesstorage == b'changeset-sidedata'
532 535 )
533 536 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
534 537
535 538 def changelogrevision(self, nodeorrev):
536 539 """Obtain a ``changelogrevision`` for a node or revision."""
537 540 text, sidedata = self._revisiondata(nodeorrev)
538 541 return changelogrevision(
539 542 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
540 543 )
541 544
542 545 def readfiles(self, nodeorrev):
543 546 """
544 547 short version of read that only returns the files modified by the cset
545 548 """
546 549 text = self.revision(nodeorrev)
547 550 if not text:
548 551 return []
549 552 last = text.index(b"\n\n")
550 553 l = text[:last].split(b'\n')
551 554 return l[3:]
552 555
553 556 def add(
554 557 self,
555 558 manifest,
556 559 files,
557 560 desc,
558 561 transaction,
559 562 p1,
560 563 p2,
561 564 user,
562 565 date=None,
563 566 extra=None,
564 567 ):
565 568 # Convert to UTF-8 encoded bytestrings as the very first
566 569 # thing: calling any method on a localstr object will turn it
567 570 # into a str object and the cached UTF-8 string is thus lost.
568 571 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
569 572
570 573 user = user.strip()
571 574 # An empty username or a username with a "\n" will make the
572 575 # revision text contain two "\n\n" sequences -> corrupt
573 576 # repository since read cannot unpack the revision.
574 577 if not user:
575 578 raise error.StorageError(_(b"empty username"))
576 579 if b"\n" in user:
577 580 raise error.StorageError(
578 581 _(b"username %r contains a newline") % pycompat.bytestr(user)
579 582 )
580 583
581 584 desc = stripdesc(desc)
582 585
583 586 if date:
584 587 parseddate = b"%d %d" % dateutil.parsedate(date)
585 588 else:
586 589 parseddate = b"%d %d" % dateutil.makedate()
587 590 if extra:
588 591 branch = extra.get(b"branch")
589 592 if branch in (b"default", b""):
590 593 del extra[b"branch"]
591 594 elif branch in (b".", b"null", b"tip"):
592 595 raise error.StorageError(
593 596 _(b'the name \'%s\' is reserved') % branch
594 597 )
595 598 sortedfiles = sorted(files.touched)
596 599 flags = 0
597 600 sidedata = None
598 601 if self._copiesstorage == b'changeset-sidedata':
599 602 if files.has_copies_info:
600 603 flags |= flagutil.REVIDX_HASCOPIESINFO
601 604 sidedata = metadata.encode_files_sidedata(files)
602 605
603 606 if extra:
604 607 extra = encodeextra(extra)
605 608 parseddate = b"%s %s" % (parseddate, extra)
606 609 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
607 610 text = b"\n".join(l)
608 611 rev = self.addrevision(
609 612 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
610 613 )
611 614 return self.node(rev)
612 615
613 616 def branchinfo(self, rev):
614 617 """return the branch name and open/close state of a revision
615 618
616 619 This function exists because creating a changectx object
617 620 just to access this is costly."""
618 621 return self.changelogrevision(rev).branchinfo
619 622
620 623 def _nodeduplicatecallback(self, transaction, rev):
621 624 # keep track of revisions that got "re-added", eg: unbunde of know rev.
622 625 #
623 626 # We track them in a list to preserve their order from the source bundle
624 627 duplicates = transaction.changes.setdefault(b'revduplicates', [])
625 628 duplicates.append(rev)
@@ -1,3162 +1,3171
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 postfix=None,
292 293 indexfile=None,
293 294 datafile=None,
294 295 checkambig=False,
295 296 mmaplargeindex=False,
296 297 censorable=False,
297 298 upperboundcomp=None,
298 299 persistentnodemap=False,
299 300 concurrencychecker=None,
300 301 ):
301 302 """
302 303 create a revlog object
303 304
304 305 opener is a function that abstracts the file opening operation
305 306 and can be used to implement COW semantics or the like.
306 307
307 308 `target`: a (KIND, ID) tuple that identify the content stored in
308 309 this revlog. It help the rest of the code to understand what the revlog
309 310 is about without having to resort to heuristic and index filename
310 311 analysis. Note: that this must be reliably be set by normal code, but
311 312 that test, debug, or performance measurement code might not set this to
312 313 accurate value.
313 314 """
314 315 self.upperboundcomp = upperboundcomp
316 if not indexfile.endswith(b'.i'):
317 raise error.ProgrammingError(
318 b"revlog's indexfile should end with `.i`"
319 )
320 if datafile is None:
321 datafile = indexfile[:-2] + b".d"
322 if postfix is not None:
323 datafile = b'%s.%s' % (datafile, postfix)
324 if postfix is not None:
325 indexfile = b'%s.%s' % (indexfile, postfix)
315 326 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + b".d")
327 self.datafile = datafile
317 328 self.nodemap_file = None
329 self.postfix = postfix
318 330 if persistentnodemap:
319 331 self.nodemap_file = nodemaputil.get_nodemap_file(
320 332 opener, self.indexfile
321 333 )
322 334
323 335 self.opener = opener
324 336 assert target[0] in ALL_KINDS
325 337 assert len(target) == 2
326 338 self.target = target
327 339 # When True, indexfile is opened with checkambig=True at writing, to
328 340 # avoid file stat ambiguity.
329 341 self._checkambig = checkambig
330 342 self._mmaplargeindex = mmaplargeindex
331 343 self._censorable = censorable
332 344 # 3-tuple of (node, rev, text) for a raw revision.
333 345 self._revisioncache = None
334 346 # Maps rev to chain base rev.
335 347 self._chainbasecache = util.lrucachedict(100)
336 348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 349 self._chunkcache = (0, b'')
338 350 # How much data to read and cache into the raw revlog data cache.
339 351 self._chunkcachesize = 65536
340 352 self._maxchainlen = None
341 353 self._deltabothparents = True
342 354 self.index = None
343 355 self._nodemap_docket = None
344 356 # Mapping of partial identifiers to full nodes.
345 357 self._pcache = {}
346 358 # Mapping of revision integer to full node.
347 359 self._compengine = b'zlib'
348 360 self._compengineopts = {}
349 361 self._maxdeltachainspan = -1
350 362 self._withsparseread = False
351 363 self._sparserevlog = False
352 364 self._srdensitythreshold = 0.50
353 365 self._srmingapsize = 262144
354 366
355 367 # Make copy of flag processors so each revlog instance can support
356 368 # custom flags.
357 369 self._flagprocessors = dict(flagutil.flagprocessors)
358 370
359 371 # 2-tuple of file handles being used for active writing.
360 372 self._writinghandles = None
361 373
362 374 self._loadindex()
363 375
364 376 self._concurrencychecker = concurrencychecker
365 377
366 378 def _init_opts(self):
367 379 """process options (from above/config) to setup associated default revlog mode
368 380
369 381 These values might be affected when actually reading on disk information.
370 382
371 383 The relevant values are returned for use in _loadindex().
372 384
373 385 * newversionflags:
374 386 version header to use if we need to create a new revlog
375 387
376 388 * mmapindexthreshold:
377 389 minimal index size for start to use mmap
378 390
379 391 * force_nodemap:
380 392 force the usage of a "development" version of the nodemap code
381 393 """
382 394 mmapindexthreshold = None
383 395 opts = self.opener.options
384 396
385 397 if b'revlogv2' in opts:
386 398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
387 399 elif b'revlogv1' in opts:
388 400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
389 401 if b'generaldelta' in opts:
390 402 newversionflags |= FLAG_GENERALDELTA
391 403 elif b'revlogv0' in self.opener.options:
392 404 newversionflags = REVLOGV0
393 405 else:
394 406 newversionflags = REVLOG_DEFAULT_VERSION
395 407
396 408 if b'chunkcachesize' in opts:
397 409 self._chunkcachesize = opts[b'chunkcachesize']
398 410 if b'maxchainlen' in opts:
399 411 self._maxchainlen = opts[b'maxchainlen']
400 412 if b'deltabothparents' in opts:
401 413 self._deltabothparents = opts[b'deltabothparents']
402 414 self._lazydelta = bool(opts.get(b'lazydelta', True))
403 415 self._lazydeltabase = False
404 416 if self._lazydelta:
405 417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
406 418 if b'compengine' in opts:
407 419 self._compengine = opts[b'compengine']
408 420 if b'zlib.level' in opts:
409 421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
410 422 if b'zstd.level' in opts:
411 423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
412 424 if b'maxdeltachainspan' in opts:
413 425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
414 426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
415 427 mmapindexthreshold = opts[b'mmapindexthreshold']
416 428 self.hassidedata = bool(opts.get(b'side-data', False))
417 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
418 430 withsparseread = bool(opts.get(b'with-sparse-read', False))
419 431 # sparse-revlog forces sparse-read
420 432 self._withsparseread = self._sparserevlog or withsparseread
421 433 if b'sparse-read-density-threshold' in opts:
422 434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
423 435 if b'sparse-read-min-gap-size' in opts:
424 436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
425 437 if opts.get(b'enableellipsis'):
426 438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
427 439
428 440 # revlog v0 doesn't have flag processors
429 441 for flag, processor in pycompat.iteritems(
430 442 opts.get(b'flagprocessors', {})
431 443 ):
432 444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
433 445
434 446 if self._chunkcachesize <= 0:
435 447 raise error.RevlogError(
436 448 _(b'revlog chunk cache size %r is not greater than 0')
437 449 % self._chunkcachesize
438 450 )
439 451 elif self._chunkcachesize & (self._chunkcachesize - 1):
440 452 raise error.RevlogError(
441 453 _(b'revlog chunk cache size %r is not a power of 2')
442 454 % self._chunkcachesize
443 455 )
444 456 force_nodemap = opts.get(b'devel-force-nodemap', False)
445 457 return newversionflags, mmapindexthreshold, force_nodemap
446 458
447 459 def _loadindex(self):
448 460
449 461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
450 462 indexdata = b''
451 463 self._initempty = True
452 464 try:
453 465 with self._indexfp() as f:
454 466 if (
455 467 mmapindexthreshold is not None
456 468 and self.opener.fstat(f).st_size >= mmapindexthreshold
457 469 ):
458 470 # TODO: should .close() to release resources without
459 471 # relying on Python GC
460 472 indexdata = util.buffer(util.mmapread(f))
461 473 else:
462 474 indexdata = f.read()
463 475 if len(indexdata) > 0:
464 476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
465 477 self._initempty = False
466 478 else:
467 479 versionflags = newversionflags
468 480 except IOError as inst:
469 481 if inst.errno != errno.ENOENT:
470 482 raise
471 483
472 484 versionflags = newversionflags
473 485
474 486 flags = self._format_flags = versionflags & ~0xFFFF
475 487 fmt = self._format_version = versionflags & 0xFFFF
476 488
477 489 if fmt == REVLOGV0:
478 490 if flags:
479 491 raise error.RevlogError(
480 492 _(b'unknown flags (%#04x) in version %d revlog %s')
481 493 % (flags >> 16, fmt, self.indexfile)
482 494 )
483 495
484 496 self._inline = False
485 497 self._generaldelta = False
486 498
487 499 elif fmt == REVLOGV1:
488 500 if flags & ~REVLOGV1_FLAGS:
489 501 raise error.RevlogError(
490 502 _(b'unknown flags (%#04x) in version %d revlog %s')
491 503 % (flags >> 16, fmt, self.indexfile)
492 504 )
493 505
494 506 self._inline = versionflags & FLAG_INLINE_DATA
495 507 self._generaldelta = versionflags & FLAG_GENERALDELTA
496 508
497 509 elif fmt == REVLOGV2:
498 510 if flags & ~REVLOGV2_FLAGS:
499 511 raise error.RevlogError(
500 512 _(b'unknown flags (%#04x) in version %d revlog %s')
501 513 % (flags >> 16, fmt, self.indexfile)
502 514 )
503 515
504 516 # There is a bug in the transaction handling when going from an
505 517 # inline revlog to a separate index and data file. Turn it off until
506 518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
507 519 # See issue6485
508 520 self._inline = False
509 521 # generaldelta implied by version 2 revlogs.
510 522 self._generaldelta = True
511 523
512 524 else:
513 525 raise error.RevlogError(
514 526 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
515 527 )
516 528
517 529 self.nodeconstants = sha1nodeconstants
518 530 self.nullid = self.nodeconstants.nullid
519 531
520 532 # sparse-revlog can't be on without general-delta (issue6056)
521 533 if not self._generaldelta:
522 534 self._sparserevlog = False
523 535
524 536 self._storedeltachains = True
525 537
526 538 devel_nodemap = (
527 539 self.nodemap_file
528 540 and force_nodemap
529 541 and parse_index_v1_nodemap is not None
530 542 )
531 543
532 544 use_rust_index = False
533 545 if rustrevlog is not None:
534 546 if self.nodemap_file is not None:
535 547 use_rust_index = True
536 548 else:
537 549 use_rust_index = self.opener.options.get(b'rust.index')
538 550
539 551 self._parse_index = parse_index_v1
540 552 if self._format_version == REVLOGV0:
541 553 self._parse_index = revlogv0.parse_index_v0
542 554 elif fmt == REVLOGV2:
543 555 self._parse_index = parse_index_v2
544 556 elif devel_nodemap:
545 557 self._parse_index = parse_index_v1_nodemap
546 558 elif use_rust_index:
547 559 self._parse_index = parse_index_v1_mixed
548 560 try:
549 561 d = self._parse_index(indexdata, self._inline)
550 562 index, _chunkcache = d
551 563 use_nodemap = (
552 564 not self._inline
553 565 and self.nodemap_file is not None
554 566 and util.safehasattr(index, 'update_nodemap_data')
555 567 )
556 568 if use_nodemap:
557 569 nodemap_data = nodemaputil.persisted_data(self)
558 570 if nodemap_data is not None:
559 571 docket = nodemap_data[0]
560 572 if (
561 573 len(d[0]) > docket.tip_rev
562 574 and d[0][docket.tip_rev][7] == docket.tip_node
563 575 ):
564 576 # no changelog tampering
565 577 self._nodemap_docket = docket
566 578 index.update_nodemap_data(*nodemap_data)
567 579 except (ValueError, IndexError):
568 580 raise error.RevlogError(
569 581 _(b"index %s is corrupted") % self.indexfile
570 582 )
571 583 self.index, self._chunkcache = d
572 584 if not self._chunkcache:
573 585 self._chunkclear()
574 586 # revnum -> (chain-length, sum-delta-length)
575 587 self._chaininfocache = util.lrucachedict(500)
576 588 # revlog header -> revlog compressor
577 589 self._decompressors = {}
578 590
579 591 @util.propertycache
580 592 def revlog_kind(self):
581 593 return self.target[0]
582 594
583 595 @util.propertycache
584 596 def _compressor(self):
585 597 engine = util.compengines[self._compengine]
586 598 return engine.revlogcompressor(self._compengineopts)
587 599
588 600 def _indexfp(self, mode=b'r'):
589 601 """file object for the revlog's index file"""
590 602 args = {'mode': mode}
591 603 if mode != b'r':
592 604 args['checkambig'] = self._checkambig
593 605 if mode == b'w':
594 606 args['atomictemp'] = True
595 607 return self.opener(self.indexfile, **args)
596 608
597 609 def _datafp(self, mode=b'r'):
598 610 """file object for the revlog's data file"""
599 611 return self.opener(self.datafile, mode=mode)
600 612
601 613 @contextlib.contextmanager
602 614 def _datareadfp(self, existingfp=None):
603 615 """file object suitable to read data"""
604 616 # Use explicit file handle, if given.
605 617 if existingfp is not None:
606 618 yield existingfp
607 619
608 620 # Use a file handle being actively used for writes, if available.
609 621 # There is some danger to doing this because reads will seek the
610 622 # file. However, _writeentry() performs a SEEK_END before all writes,
611 623 # so we should be safe.
612 624 elif self._writinghandles:
613 625 if self._inline:
614 626 yield self._writinghandles[0]
615 627 else:
616 628 yield self._writinghandles[1]
617 629
618 630 # Otherwise open a new file handle.
619 631 else:
620 632 if self._inline:
621 633 func = self._indexfp
622 634 else:
623 635 func = self._datafp
624 636 with func() as fp:
625 637 yield fp
626 638
627 639 def tiprev(self):
628 640 return len(self.index) - 1
629 641
630 642 def tip(self):
631 643 return self.node(self.tiprev())
632 644
633 645 def __contains__(self, rev):
634 646 return 0 <= rev < len(self)
635 647
636 648 def __len__(self):
637 649 return len(self.index)
638 650
639 651 def __iter__(self):
640 652 return iter(pycompat.xrange(len(self)))
641 653
642 654 def revs(self, start=0, stop=None):
643 655 """iterate over all rev in this revlog (from start to stop)"""
644 656 return storageutil.iterrevs(len(self), start=start, stop=stop)
645 657
646 658 @property
647 659 def nodemap(self):
648 660 msg = (
649 661 b"revlog.nodemap is deprecated, "
650 662 b"use revlog.index.[has_node|rev|get_rev]"
651 663 )
652 664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
653 665 return self.index.nodemap
654 666
655 667 @property
656 668 def _nodecache(self):
657 669 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
658 670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
659 671 return self.index.nodemap
660 672
661 673 def hasnode(self, node):
662 674 try:
663 675 self.rev(node)
664 676 return True
665 677 except KeyError:
666 678 return False
667 679
668 680 def candelta(self, baserev, rev):
669 681 """whether two revisions (baserev, rev) can be delta-ed or not"""
670 682 # Disable delta if either rev requires a content-changing flag
671 683 # processor (ex. LFS). This is because such flag processor can alter
672 684 # the rawtext content that the delta will be based on, and two clients
673 685 # could have a same revlog node with different flags (i.e. different
674 686 # rawtext contents) and the delta could be incompatible.
675 687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
676 688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
677 689 ):
678 690 return False
679 691 return True
680 692
681 693 def update_caches(self, transaction):
682 694 if self.nodemap_file is not None:
683 695 if transaction is None:
684 696 nodemaputil.update_persistent_nodemap(self)
685 697 else:
686 698 nodemaputil.setup_persistent_nodemap(transaction, self)
687 699
688 700 def clearcaches(self):
689 701 self._revisioncache = None
690 702 self._chainbasecache.clear()
691 703 self._chunkcache = (0, b'')
692 704 self._pcache = {}
693 705 self._nodemap_docket = None
694 706 self.index.clearcaches()
695 707 # The python code is the one responsible for validating the docket, we
696 708 # end up having to refresh it here.
697 709 use_nodemap = (
698 710 not self._inline
699 711 and self.nodemap_file is not None
700 712 and util.safehasattr(self.index, 'update_nodemap_data')
701 713 )
702 714 if use_nodemap:
703 715 nodemap_data = nodemaputil.persisted_data(self)
704 716 if nodemap_data is not None:
705 717 self._nodemap_docket = nodemap_data[0]
706 718 self.index.update_nodemap_data(*nodemap_data)
707 719
708 720 def rev(self, node):
709 721 try:
710 722 return self.index.rev(node)
711 723 except TypeError:
712 724 raise
713 725 except error.RevlogError:
714 726 # parsers.c radix tree lookup failed
715 727 if (
716 728 node == self.nodeconstants.wdirid
717 729 or node in self.nodeconstants.wdirfilenodeids
718 730 ):
719 731 raise error.WdirUnsupported
720 732 raise error.LookupError(node, self.indexfile, _(b'no node'))
721 733
722 734 # Accessors for index entries.
723 735
724 736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
725 737 # are flags.
726 738 def start(self, rev):
727 739 return int(self.index[rev][0] >> 16)
728 740
729 741 def flags(self, rev):
730 742 return self.index[rev][0] & 0xFFFF
731 743
732 744 def length(self, rev):
733 745 return self.index[rev][1]
734 746
735 747 def sidedata_length(self, rev):
736 748 if not self.hassidedata:
737 749 return 0
738 750 return self.index[rev][9]
739 751
740 752 def rawsize(self, rev):
741 753 """return the length of the uncompressed text for a given revision"""
742 754 l = self.index[rev][2]
743 755 if l >= 0:
744 756 return l
745 757
746 758 t = self.rawdata(rev)
747 759 return len(t)
748 760
749 761 def size(self, rev):
750 762 """length of non-raw text (processed by a "read" flag processor)"""
751 763 # fast path: if no "read" flag processor could change the content,
752 764 # size is rawsize. note: ELLIPSIS is known to not change the content.
753 765 flags = self.flags(rev)
754 766 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
755 767 return self.rawsize(rev)
756 768
757 769 return len(self.revision(rev, raw=False))
758 770
759 771 def chainbase(self, rev):
760 772 base = self._chainbasecache.get(rev)
761 773 if base is not None:
762 774 return base
763 775
764 776 index = self.index
765 777 iterrev = rev
766 778 base = index[iterrev][3]
767 779 while base != iterrev:
768 780 iterrev = base
769 781 base = index[iterrev][3]
770 782
771 783 self._chainbasecache[rev] = base
772 784 return base
773 785
774 786 def linkrev(self, rev):
775 787 return self.index[rev][4]
776 788
777 789 def parentrevs(self, rev):
778 790 try:
779 791 entry = self.index[rev]
780 792 except IndexError:
781 793 if rev == wdirrev:
782 794 raise error.WdirUnsupported
783 795 raise
784 796 if entry[5] == nullrev:
785 797 return entry[6], entry[5]
786 798 else:
787 799 return entry[5], entry[6]
788 800
789 801 # fast parentrevs(rev) where rev isn't filtered
790 802 _uncheckedparentrevs = parentrevs
791 803
792 804 def node(self, rev):
793 805 try:
794 806 return self.index[rev][7]
795 807 except IndexError:
796 808 if rev == wdirrev:
797 809 raise error.WdirUnsupported
798 810 raise
799 811
800 812 # Derived from index values.
801 813
802 814 def end(self, rev):
803 815 return self.start(rev) + self.length(rev)
804 816
805 817 def parents(self, node):
806 818 i = self.index
807 819 d = i[self.rev(node)]
808 820 # inline node() to avoid function call overhead
809 821 if d[5] == self.nullid:
810 822 return i[d[6]][7], i[d[5]][7]
811 823 else:
812 824 return i[d[5]][7], i[d[6]][7]
813 825
814 826 def chainlen(self, rev):
815 827 return self._chaininfo(rev)[0]
816 828
817 829 def _chaininfo(self, rev):
818 830 chaininfocache = self._chaininfocache
819 831 if rev in chaininfocache:
820 832 return chaininfocache[rev]
821 833 index = self.index
822 834 generaldelta = self._generaldelta
823 835 iterrev = rev
824 836 e = index[iterrev]
825 837 clen = 0
826 838 compresseddeltalen = 0
827 839 while iterrev != e[3]:
828 840 clen += 1
829 841 compresseddeltalen += e[1]
830 842 if generaldelta:
831 843 iterrev = e[3]
832 844 else:
833 845 iterrev -= 1
834 846 if iterrev in chaininfocache:
835 847 t = chaininfocache[iterrev]
836 848 clen += t[0]
837 849 compresseddeltalen += t[1]
838 850 break
839 851 e = index[iterrev]
840 852 else:
841 853 # Add text length of base since decompressing that also takes
842 854 # work. For cache hits the length is already included.
843 855 compresseddeltalen += e[1]
844 856 r = (clen, compresseddeltalen)
845 857 chaininfocache[rev] = r
846 858 return r
847 859
848 860 def _deltachain(self, rev, stoprev=None):
849 861 """Obtain the delta chain for a revision.
850 862
851 863 ``stoprev`` specifies a revision to stop at. If not specified, we
852 864 stop at the base of the chain.
853 865
854 866 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
855 867 revs in ascending order and ``stopped`` is a bool indicating whether
856 868 ``stoprev`` was hit.
857 869 """
858 870 # Try C implementation.
859 871 try:
860 872 return self.index.deltachain(rev, stoprev, self._generaldelta)
861 873 except AttributeError:
862 874 pass
863 875
864 876 chain = []
865 877
866 878 # Alias to prevent attribute lookup in tight loop.
867 879 index = self.index
868 880 generaldelta = self._generaldelta
869 881
870 882 iterrev = rev
871 883 e = index[iterrev]
872 884 while iterrev != e[3] and iterrev != stoprev:
873 885 chain.append(iterrev)
874 886 if generaldelta:
875 887 iterrev = e[3]
876 888 else:
877 889 iterrev -= 1
878 890 e = index[iterrev]
879 891
880 892 if iterrev == stoprev:
881 893 stopped = True
882 894 else:
883 895 chain.append(iterrev)
884 896 stopped = False
885 897
886 898 chain.reverse()
887 899 return chain, stopped
888 900
889 901 def ancestors(self, revs, stoprev=0, inclusive=False):
890 902 """Generate the ancestors of 'revs' in reverse revision order.
891 903 Does not generate revs lower than stoprev.
892 904
893 905 See the documentation for ancestor.lazyancestors for more details."""
894 906
895 907 # first, make sure start revisions aren't filtered
896 908 revs = list(revs)
897 909 checkrev = self.node
898 910 for r in revs:
899 911 checkrev(r)
900 912 # and we're sure ancestors aren't filtered as well
901 913
902 914 if rustancestor is not None:
903 915 lazyancestors = rustancestor.LazyAncestors
904 916 arg = self.index
905 917 else:
906 918 lazyancestors = ancestor.lazyancestors
907 919 arg = self._uncheckedparentrevs
908 920 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
909 921
910 922 def descendants(self, revs):
911 923 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
912 924
913 925 def findcommonmissing(self, common=None, heads=None):
914 926 """Return a tuple of the ancestors of common and the ancestors of heads
915 927 that are not ancestors of common. In revset terminology, we return the
916 928 tuple:
917 929
918 930 ::common, (::heads) - (::common)
919 931
920 932 The list is sorted by revision number, meaning it is
921 933 topologically sorted.
922 934
923 935 'heads' and 'common' are both lists of node IDs. If heads is
924 936 not supplied, uses all of the revlog's heads. If common is not
925 937 supplied, uses nullid."""
926 938 if common is None:
927 939 common = [self.nullid]
928 940 if heads is None:
929 941 heads = self.heads()
930 942
931 943 common = [self.rev(n) for n in common]
932 944 heads = [self.rev(n) for n in heads]
933 945
934 946 # we want the ancestors, but inclusive
935 947 class lazyset(object):
936 948 def __init__(self, lazyvalues):
937 949 self.addedvalues = set()
938 950 self.lazyvalues = lazyvalues
939 951
940 952 def __contains__(self, value):
941 953 return value in self.addedvalues or value in self.lazyvalues
942 954
943 955 def __iter__(self):
944 956 added = self.addedvalues
945 957 for r in added:
946 958 yield r
947 959 for r in self.lazyvalues:
948 960 if not r in added:
949 961 yield r
950 962
951 963 def add(self, value):
952 964 self.addedvalues.add(value)
953 965
954 966 def update(self, values):
955 967 self.addedvalues.update(values)
956 968
957 969 has = lazyset(self.ancestors(common))
958 970 has.add(nullrev)
959 971 has.update(common)
960 972
961 973 # take all ancestors from heads that aren't in has
962 974 missing = set()
963 975 visit = collections.deque(r for r in heads if r not in has)
964 976 while visit:
965 977 r = visit.popleft()
966 978 if r in missing:
967 979 continue
968 980 else:
969 981 missing.add(r)
970 982 for p in self.parentrevs(r):
971 983 if p not in has:
972 984 visit.append(p)
973 985 missing = list(missing)
974 986 missing.sort()
975 987 return has, [self.node(miss) for miss in missing]
976 988
977 989 def incrementalmissingrevs(self, common=None):
978 990 """Return an object that can be used to incrementally compute the
979 991 revision numbers of the ancestors of arbitrary sets that are not
980 992 ancestors of common. This is an ancestor.incrementalmissingancestors
981 993 object.
982 994
983 995 'common' is a list of revision numbers. If common is not supplied, uses
984 996 nullrev.
985 997 """
986 998 if common is None:
987 999 common = [nullrev]
988 1000
989 1001 if rustancestor is not None:
990 1002 return rustancestor.MissingAncestors(self.index, common)
991 1003 return ancestor.incrementalmissingancestors(self.parentrevs, common)
992 1004
993 1005 def findmissingrevs(self, common=None, heads=None):
994 1006 """Return the revision numbers of the ancestors of heads that
995 1007 are not ancestors of common.
996 1008
997 1009 More specifically, return a list of revision numbers corresponding to
998 1010 nodes N such that every N satisfies the following constraints:
999 1011
1000 1012 1. N is an ancestor of some node in 'heads'
1001 1013 2. N is not an ancestor of any node in 'common'
1002 1014
1003 1015 The list is sorted by revision number, meaning it is
1004 1016 topologically sorted.
1005 1017
1006 1018 'heads' and 'common' are both lists of revision numbers. If heads is
1007 1019 not supplied, uses all of the revlog's heads. If common is not
1008 1020 supplied, uses nullid."""
1009 1021 if common is None:
1010 1022 common = [nullrev]
1011 1023 if heads is None:
1012 1024 heads = self.headrevs()
1013 1025
1014 1026 inc = self.incrementalmissingrevs(common=common)
1015 1027 return inc.missingancestors(heads)
1016 1028
1017 1029 def findmissing(self, common=None, heads=None):
1018 1030 """Return the ancestors of heads that are not ancestors of common.
1019 1031
1020 1032 More specifically, return a list of nodes N such that every N
1021 1033 satisfies the following constraints:
1022 1034
1023 1035 1. N is an ancestor of some node in 'heads'
1024 1036 2. N is not an ancestor of any node in 'common'
1025 1037
1026 1038 The list is sorted by revision number, meaning it is
1027 1039 topologically sorted.
1028 1040
1029 1041 'heads' and 'common' are both lists of node IDs. If heads is
1030 1042 not supplied, uses all of the revlog's heads. If common is not
1031 1043 supplied, uses nullid."""
1032 1044 if common is None:
1033 1045 common = [self.nullid]
1034 1046 if heads is None:
1035 1047 heads = self.heads()
1036 1048
1037 1049 common = [self.rev(n) for n in common]
1038 1050 heads = [self.rev(n) for n in heads]
1039 1051
1040 1052 inc = self.incrementalmissingrevs(common=common)
1041 1053 return [self.node(r) for r in inc.missingancestors(heads)]
1042 1054
1043 1055 def nodesbetween(self, roots=None, heads=None):
1044 1056 """Return a topological path from 'roots' to 'heads'.
1045 1057
1046 1058 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1047 1059 topologically sorted list of all nodes N that satisfy both of
1048 1060 these constraints:
1049 1061
1050 1062 1. N is a descendant of some node in 'roots'
1051 1063 2. N is an ancestor of some node in 'heads'
1052 1064
1053 1065 Every node is considered to be both a descendant and an ancestor
1054 1066 of itself, so every reachable node in 'roots' and 'heads' will be
1055 1067 included in 'nodes'.
1056 1068
1057 1069 'outroots' is the list of reachable nodes in 'roots', i.e., the
1058 1070 subset of 'roots' that is returned in 'nodes'. Likewise,
1059 1071 'outheads' is the subset of 'heads' that is also in 'nodes'.
1060 1072
1061 1073 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1062 1074 unspecified, uses nullid as the only root. If 'heads' is
1063 1075 unspecified, uses list of all of the revlog's heads."""
1064 1076 nonodes = ([], [], [])
1065 1077 if roots is not None:
1066 1078 roots = list(roots)
1067 1079 if not roots:
1068 1080 return nonodes
1069 1081 lowestrev = min([self.rev(n) for n in roots])
1070 1082 else:
1071 1083 roots = [self.nullid] # Everybody's a descendant of nullid
1072 1084 lowestrev = nullrev
1073 1085 if (lowestrev == nullrev) and (heads is None):
1074 1086 # We want _all_ the nodes!
1075 1087 return (
1076 1088 [self.node(r) for r in self],
1077 1089 [self.nullid],
1078 1090 list(self.heads()),
1079 1091 )
1080 1092 if heads is None:
1081 1093 # All nodes are ancestors, so the latest ancestor is the last
1082 1094 # node.
1083 1095 highestrev = len(self) - 1
1084 1096 # Set ancestors to None to signal that every node is an ancestor.
1085 1097 ancestors = None
1086 1098 # Set heads to an empty dictionary for later discovery of heads
1087 1099 heads = {}
1088 1100 else:
1089 1101 heads = list(heads)
1090 1102 if not heads:
1091 1103 return nonodes
1092 1104 ancestors = set()
1093 1105 # Turn heads into a dictionary so we can remove 'fake' heads.
1094 1106 # Also, later we will be using it to filter out the heads we can't
1095 1107 # find from roots.
1096 1108 heads = dict.fromkeys(heads, False)
1097 1109 # Start at the top and keep marking parents until we're done.
1098 1110 nodestotag = set(heads)
1099 1111 # Remember where the top was so we can use it as a limit later.
1100 1112 highestrev = max([self.rev(n) for n in nodestotag])
1101 1113 while nodestotag:
1102 1114 # grab a node to tag
1103 1115 n = nodestotag.pop()
1104 1116 # Never tag nullid
1105 1117 if n == self.nullid:
1106 1118 continue
1107 1119 # A node's revision number represents its place in a
1108 1120 # topologically sorted list of nodes.
1109 1121 r = self.rev(n)
1110 1122 if r >= lowestrev:
1111 1123 if n not in ancestors:
1112 1124 # If we are possibly a descendant of one of the roots
1113 1125 # and we haven't already been marked as an ancestor
1114 1126 ancestors.add(n) # Mark as ancestor
1115 1127 # Add non-nullid parents to list of nodes to tag.
1116 1128 nodestotag.update(
1117 1129 [p for p in self.parents(n) if p != self.nullid]
1118 1130 )
1119 1131 elif n in heads: # We've seen it before, is it a fake head?
1120 1132 # So it is, real heads should not be the ancestors of
1121 1133 # any other heads.
1122 1134 heads.pop(n)
1123 1135 if not ancestors:
1124 1136 return nonodes
1125 1137 # Now that we have our set of ancestors, we want to remove any
1126 1138 # roots that are not ancestors.
1127 1139
1128 1140 # If one of the roots was nullid, everything is included anyway.
1129 1141 if lowestrev > nullrev:
1130 1142 # But, since we weren't, let's recompute the lowest rev to not
1131 1143 # include roots that aren't ancestors.
1132 1144
1133 1145 # Filter out roots that aren't ancestors of heads
1134 1146 roots = [root for root in roots if root in ancestors]
1135 1147 # Recompute the lowest revision
1136 1148 if roots:
1137 1149 lowestrev = min([self.rev(root) for root in roots])
1138 1150 else:
1139 1151 # No more roots? Return empty list
1140 1152 return nonodes
1141 1153 else:
1142 1154 # We are descending from nullid, and don't need to care about
1143 1155 # any other roots.
1144 1156 lowestrev = nullrev
1145 1157 roots = [self.nullid]
1146 1158 # Transform our roots list into a set.
1147 1159 descendants = set(roots)
1148 1160 # Also, keep the original roots so we can filter out roots that aren't
1149 1161 # 'real' roots (i.e. are descended from other roots).
1150 1162 roots = descendants.copy()
1151 1163 # Our topologically sorted list of output nodes.
1152 1164 orderedout = []
1153 1165 # Don't start at nullid since we don't want nullid in our output list,
1154 1166 # and if nullid shows up in descendants, empty parents will look like
1155 1167 # they're descendants.
1156 1168 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1157 1169 n = self.node(r)
1158 1170 isdescendant = False
1159 1171 if lowestrev == nullrev: # Everybody is a descendant of nullid
1160 1172 isdescendant = True
1161 1173 elif n in descendants:
1162 1174 # n is already a descendant
1163 1175 isdescendant = True
1164 1176 # This check only needs to be done here because all the roots
1165 1177 # will start being marked is descendants before the loop.
1166 1178 if n in roots:
1167 1179 # If n was a root, check if it's a 'real' root.
1168 1180 p = tuple(self.parents(n))
1169 1181 # If any of its parents are descendants, it's not a root.
1170 1182 if (p[0] in descendants) or (p[1] in descendants):
1171 1183 roots.remove(n)
1172 1184 else:
1173 1185 p = tuple(self.parents(n))
1174 1186 # A node is a descendant if either of its parents are
1175 1187 # descendants. (We seeded the dependents list with the roots
1176 1188 # up there, remember?)
1177 1189 if (p[0] in descendants) or (p[1] in descendants):
1178 1190 descendants.add(n)
1179 1191 isdescendant = True
1180 1192 if isdescendant and ((ancestors is None) or (n in ancestors)):
1181 1193 # Only include nodes that are both descendants and ancestors.
1182 1194 orderedout.append(n)
1183 1195 if (ancestors is not None) and (n in heads):
1184 1196 # We're trying to figure out which heads are reachable
1185 1197 # from roots.
1186 1198 # Mark this head as having been reached
1187 1199 heads[n] = True
1188 1200 elif ancestors is None:
1189 1201 # Otherwise, we're trying to discover the heads.
1190 1202 # Assume this is a head because if it isn't, the next step
1191 1203 # will eventually remove it.
1192 1204 heads[n] = True
1193 1205 # But, obviously its parents aren't.
1194 1206 for p in self.parents(n):
1195 1207 heads.pop(p, None)
1196 1208 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1197 1209 roots = list(roots)
1198 1210 assert orderedout
1199 1211 assert roots
1200 1212 assert heads
1201 1213 return (orderedout, roots, heads)
1202 1214
1203 1215 def headrevs(self, revs=None):
1204 1216 if revs is None:
1205 1217 try:
1206 1218 return self.index.headrevs()
1207 1219 except AttributeError:
1208 1220 return self._headrevs()
1209 1221 if rustdagop is not None:
1210 1222 return rustdagop.headrevs(self.index, revs)
1211 1223 return dagop.headrevs(revs, self._uncheckedparentrevs)
1212 1224
1213 1225 def computephases(self, roots):
1214 1226 return self.index.computephasesmapsets(roots)
1215 1227
1216 1228 def _headrevs(self):
1217 1229 count = len(self)
1218 1230 if not count:
1219 1231 return [nullrev]
1220 1232 # we won't iter over filtered rev so nobody is a head at start
1221 1233 ishead = [0] * (count + 1)
1222 1234 index = self.index
1223 1235 for r in self:
1224 1236 ishead[r] = 1 # I may be an head
1225 1237 e = index[r]
1226 1238 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1227 1239 return [r for r, val in enumerate(ishead) if val]
1228 1240
1229 1241 def heads(self, start=None, stop=None):
1230 1242 """return the list of all nodes that have no children
1231 1243
1232 1244 if start is specified, only heads that are descendants of
1233 1245 start will be returned
1234 1246 if stop is specified, it will consider all the revs from stop
1235 1247 as if they had no children
1236 1248 """
1237 1249 if start is None and stop is None:
1238 1250 if not len(self):
1239 1251 return [self.nullid]
1240 1252 return [self.node(r) for r in self.headrevs()]
1241 1253
1242 1254 if start is None:
1243 1255 start = nullrev
1244 1256 else:
1245 1257 start = self.rev(start)
1246 1258
1247 1259 stoprevs = {self.rev(n) for n in stop or []}
1248 1260
1249 1261 revs = dagop.headrevssubset(
1250 1262 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1251 1263 )
1252 1264
1253 1265 return [self.node(rev) for rev in revs]
1254 1266
1255 1267 def children(self, node):
1256 1268 """find the children of a given node"""
1257 1269 c = []
1258 1270 p = self.rev(node)
1259 1271 for r in self.revs(start=p + 1):
1260 1272 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1261 1273 if prevs:
1262 1274 for pr in prevs:
1263 1275 if pr == p:
1264 1276 c.append(self.node(r))
1265 1277 elif p == nullrev:
1266 1278 c.append(self.node(r))
1267 1279 return c
1268 1280
1269 1281 def commonancestorsheads(self, a, b):
1270 1282 """calculate all the heads of the common ancestors of nodes a and b"""
1271 1283 a, b = self.rev(a), self.rev(b)
1272 1284 ancs = self._commonancestorsheads(a, b)
1273 1285 return pycompat.maplist(self.node, ancs)
1274 1286
1275 1287 def _commonancestorsheads(self, *revs):
1276 1288 """calculate all the heads of the common ancestors of revs"""
1277 1289 try:
1278 1290 ancs = self.index.commonancestorsheads(*revs)
1279 1291 except (AttributeError, OverflowError): # C implementation failed
1280 1292 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1281 1293 return ancs
1282 1294
1283 1295 def isancestor(self, a, b):
1284 1296 """return True if node a is an ancestor of node b
1285 1297
1286 1298 A revision is considered an ancestor of itself."""
1287 1299 a, b = self.rev(a), self.rev(b)
1288 1300 return self.isancestorrev(a, b)
1289 1301
1290 1302 def isancestorrev(self, a, b):
1291 1303 """return True if revision a is an ancestor of revision b
1292 1304
1293 1305 A revision is considered an ancestor of itself.
1294 1306
1295 1307 The implementation of this is trivial but the use of
1296 1308 reachableroots is not."""
1297 1309 if a == nullrev:
1298 1310 return True
1299 1311 elif a == b:
1300 1312 return True
1301 1313 elif a > b:
1302 1314 return False
1303 1315 return bool(self.reachableroots(a, [b], [a], includepath=False))
1304 1316
1305 1317 def reachableroots(self, minroot, heads, roots, includepath=False):
1306 1318 """return (heads(::(<roots> and <roots>::<heads>)))
1307 1319
1308 1320 If includepath is True, return (<roots>::<heads>)."""
1309 1321 try:
1310 1322 return self.index.reachableroots2(
1311 1323 minroot, heads, roots, includepath
1312 1324 )
1313 1325 except AttributeError:
1314 1326 return dagop._reachablerootspure(
1315 1327 self.parentrevs, minroot, roots, heads, includepath
1316 1328 )
1317 1329
1318 1330 def ancestor(self, a, b):
1319 1331 """calculate the "best" common ancestor of nodes a and b"""
1320 1332
1321 1333 a, b = self.rev(a), self.rev(b)
1322 1334 try:
1323 1335 ancs = self.index.ancestors(a, b)
1324 1336 except (AttributeError, OverflowError):
1325 1337 ancs = ancestor.ancestors(self.parentrevs, a, b)
1326 1338 if ancs:
1327 1339 # choose a consistent winner when there's a tie
1328 1340 return min(map(self.node, ancs))
1329 1341 return self.nullid
1330 1342
1331 1343 def _match(self, id):
1332 1344 if isinstance(id, int):
1333 1345 # rev
1334 1346 return self.node(id)
1335 1347 if len(id) == self.nodeconstants.nodelen:
1336 1348 # possibly a binary node
1337 1349 # odds of a binary node being all hex in ASCII are 1 in 10**25
1338 1350 try:
1339 1351 node = id
1340 1352 self.rev(node) # quick search the index
1341 1353 return node
1342 1354 except error.LookupError:
1343 1355 pass # may be partial hex id
1344 1356 try:
1345 1357 # str(rev)
1346 1358 rev = int(id)
1347 1359 if b"%d" % rev != id:
1348 1360 raise ValueError
1349 1361 if rev < 0:
1350 1362 rev = len(self) + rev
1351 1363 if rev < 0 or rev >= len(self):
1352 1364 raise ValueError
1353 1365 return self.node(rev)
1354 1366 except (ValueError, OverflowError):
1355 1367 pass
1356 1368 if len(id) == 2 * self.nodeconstants.nodelen:
1357 1369 try:
1358 1370 # a full hex nodeid?
1359 1371 node = bin(id)
1360 1372 self.rev(node)
1361 1373 return node
1362 1374 except (TypeError, error.LookupError):
1363 1375 pass
1364 1376
1365 1377 def _partialmatch(self, id):
1366 1378 # we don't care wdirfilenodeids as they should be always full hash
1367 1379 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1368 1380 try:
1369 1381 partial = self.index.partialmatch(id)
1370 1382 if partial and self.hasnode(partial):
1371 1383 if maybewdir:
1372 1384 # single 'ff...' match in radix tree, ambiguous with wdir
1373 1385 raise error.RevlogError
1374 1386 return partial
1375 1387 if maybewdir:
1376 1388 # no 'ff...' match in radix tree, wdir identified
1377 1389 raise error.WdirUnsupported
1378 1390 return None
1379 1391 except error.RevlogError:
1380 1392 # parsers.c radix tree lookup gave multiple matches
1381 1393 # fast path: for unfiltered changelog, radix tree is accurate
1382 1394 if not getattr(self, 'filteredrevs', None):
1383 1395 raise error.AmbiguousPrefixLookupError(
1384 1396 id, self.indexfile, _(b'ambiguous identifier')
1385 1397 )
1386 1398 # fall through to slow path that filters hidden revisions
1387 1399 except (AttributeError, ValueError):
1388 1400 # we are pure python, or key was too short to search radix tree
1389 1401 pass
1390 1402
1391 1403 if id in self._pcache:
1392 1404 return self._pcache[id]
1393 1405
1394 1406 if len(id) <= 40:
1395 1407 try:
1396 1408 # hex(node)[:...]
1397 1409 l = len(id) // 2 # grab an even number of digits
1398 1410 prefix = bin(id[: l * 2])
1399 1411 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1400 1412 nl = [
1401 1413 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1402 1414 ]
1403 1415 if self.nodeconstants.nullhex.startswith(id):
1404 1416 nl.append(self.nullid)
1405 1417 if len(nl) > 0:
1406 1418 if len(nl) == 1 and not maybewdir:
1407 1419 self._pcache[id] = nl[0]
1408 1420 return nl[0]
1409 1421 raise error.AmbiguousPrefixLookupError(
1410 1422 id, self.indexfile, _(b'ambiguous identifier')
1411 1423 )
1412 1424 if maybewdir:
1413 1425 raise error.WdirUnsupported
1414 1426 return None
1415 1427 except TypeError:
1416 1428 pass
1417 1429
1418 1430 def lookup(self, id):
1419 1431 """locate a node based on:
1420 1432 - revision number or str(revision number)
1421 1433 - nodeid or subset of hex nodeid
1422 1434 """
1423 1435 n = self._match(id)
1424 1436 if n is not None:
1425 1437 return n
1426 1438 n = self._partialmatch(id)
1427 1439 if n:
1428 1440 return n
1429 1441
1430 1442 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1431 1443
1432 1444 def shortest(self, node, minlength=1):
1433 1445 """Find the shortest unambiguous prefix that matches node."""
1434 1446
1435 1447 def isvalid(prefix):
1436 1448 try:
1437 1449 matchednode = self._partialmatch(prefix)
1438 1450 except error.AmbiguousPrefixLookupError:
1439 1451 return False
1440 1452 except error.WdirUnsupported:
1441 1453 # single 'ff...' match
1442 1454 return True
1443 1455 if matchednode is None:
1444 1456 raise error.LookupError(node, self.indexfile, _(b'no node'))
1445 1457 return True
1446 1458
1447 1459 def maybewdir(prefix):
1448 1460 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1449 1461
1450 1462 hexnode = hex(node)
1451 1463
1452 1464 def disambiguate(hexnode, minlength):
1453 1465 """Disambiguate against wdirid."""
1454 1466 for length in range(minlength, len(hexnode) + 1):
1455 1467 prefix = hexnode[:length]
1456 1468 if not maybewdir(prefix):
1457 1469 return prefix
1458 1470
1459 1471 if not getattr(self, 'filteredrevs', None):
1460 1472 try:
1461 1473 length = max(self.index.shortest(node), minlength)
1462 1474 return disambiguate(hexnode, length)
1463 1475 except error.RevlogError:
1464 1476 if node != self.nodeconstants.wdirid:
1465 1477 raise error.LookupError(node, self.indexfile, _(b'no node'))
1466 1478 except AttributeError:
1467 1479 # Fall through to pure code
1468 1480 pass
1469 1481
1470 1482 if node == self.nodeconstants.wdirid:
1471 1483 for length in range(minlength, len(hexnode) + 1):
1472 1484 prefix = hexnode[:length]
1473 1485 if isvalid(prefix):
1474 1486 return prefix
1475 1487
1476 1488 for length in range(minlength, len(hexnode) + 1):
1477 1489 prefix = hexnode[:length]
1478 1490 if isvalid(prefix):
1479 1491 return disambiguate(hexnode, length)
1480 1492
1481 1493 def cmp(self, node, text):
1482 1494 """compare text with a given file revision
1483 1495
1484 1496 returns True if text is different than what is stored.
1485 1497 """
1486 1498 p1, p2 = self.parents(node)
1487 1499 return storageutil.hashrevisionsha1(text, p1, p2) != node
1488 1500
1489 1501 def _cachesegment(self, offset, data):
1490 1502 """Add a segment to the revlog cache.
1491 1503
1492 1504 Accepts an absolute offset and the data that is at that location.
1493 1505 """
1494 1506 o, d = self._chunkcache
1495 1507 # try to add to existing cache
1496 1508 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1497 1509 self._chunkcache = o, d + data
1498 1510 else:
1499 1511 self._chunkcache = offset, data
1500 1512
1501 1513 def _readsegment(self, offset, length, df=None):
1502 1514 """Load a segment of raw data from the revlog.
1503 1515
1504 1516 Accepts an absolute offset, length to read, and an optional existing
1505 1517 file handle to read from.
1506 1518
1507 1519 If an existing file handle is passed, it will be seeked and the
1508 1520 original seek position will NOT be restored.
1509 1521
1510 1522 Returns a str or buffer of raw byte data.
1511 1523
1512 1524 Raises if the requested number of bytes could not be read.
1513 1525 """
1514 1526 # Cache data both forward and backward around the requested
1515 1527 # data, in a fixed size window. This helps speed up operations
1516 1528 # involving reading the revlog backwards.
1517 1529 cachesize = self._chunkcachesize
1518 1530 realoffset = offset & ~(cachesize - 1)
1519 1531 reallength = (
1520 1532 (offset + length + cachesize) & ~(cachesize - 1)
1521 1533 ) - realoffset
1522 1534 with self._datareadfp(df) as df:
1523 1535 df.seek(realoffset)
1524 1536 d = df.read(reallength)
1525 1537
1526 1538 self._cachesegment(realoffset, d)
1527 1539 if offset != realoffset or reallength != length:
1528 1540 startoffset = offset - realoffset
1529 1541 if len(d) - startoffset < length:
1530 1542 raise error.RevlogError(
1531 1543 _(
1532 1544 b'partial read of revlog %s; expected %d bytes from '
1533 1545 b'offset %d, got %d'
1534 1546 )
1535 1547 % (
1536 1548 self.indexfile if self._inline else self.datafile,
1537 1549 length,
1538 1550 realoffset,
1539 1551 len(d) - startoffset,
1540 1552 )
1541 1553 )
1542 1554
1543 1555 return util.buffer(d, startoffset, length)
1544 1556
1545 1557 if len(d) < length:
1546 1558 raise error.RevlogError(
1547 1559 _(
1548 1560 b'partial read of revlog %s; expected %d bytes from offset '
1549 1561 b'%d, got %d'
1550 1562 )
1551 1563 % (
1552 1564 self.indexfile if self._inline else self.datafile,
1553 1565 length,
1554 1566 offset,
1555 1567 len(d),
1556 1568 )
1557 1569 )
1558 1570
1559 1571 return d
1560 1572
1561 1573 def _getsegment(self, offset, length, df=None):
1562 1574 """Obtain a segment of raw data from the revlog.
1563 1575
1564 1576 Accepts an absolute offset, length of bytes to obtain, and an
1565 1577 optional file handle to the already-opened revlog. If the file
1566 1578 handle is used, it's original seek position will not be preserved.
1567 1579
1568 1580 Requests for data may be returned from a cache.
1569 1581
1570 1582 Returns a str or a buffer instance of raw byte data.
1571 1583 """
1572 1584 o, d = self._chunkcache
1573 1585 l = len(d)
1574 1586
1575 1587 # is it in the cache?
1576 1588 cachestart = offset - o
1577 1589 cacheend = cachestart + length
1578 1590 if cachestart >= 0 and cacheend <= l:
1579 1591 if cachestart == 0 and cacheend == l:
1580 1592 return d # avoid a copy
1581 1593 return util.buffer(d, cachestart, cacheend - cachestart)
1582 1594
1583 1595 return self._readsegment(offset, length, df=df)
1584 1596
1585 1597 def _getsegmentforrevs(self, startrev, endrev, df=None):
1586 1598 """Obtain a segment of raw data corresponding to a range of revisions.
1587 1599
1588 1600 Accepts the start and end revisions and an optional already-open
1589 1601 file handle to be used for reading. If the file handle is read, its
1590 1602 seek position will not be preserved.
1591 1603
1592 1604 Requests for data may be satisfied by a cache.
1593 1605
1594 1606 Returns a 2-tuple of (offset, data) for the requested range of
1595 1607 revisions. Offset is the integer offset from the beginning of the
1596 1608 revlog and data is a str or buffer of the raw byte data.
1597 1609
1598 1610 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1599 1611 to determine where each revision's data begins and ends.
1600 1612 """
1601 1613 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1602 1614 # (functions are expensive).
1603 1615 index = self.index
1604 1616 istart = index[startrev]
1605 1617 start = int(istart[0] >> 16)
1606 1618 if startrev == endrev:
1607 1619 end = start + istart[1]
1608 1620 else:
1609 1621 iend = index[endrev]
1610 1622 end = int(iend[0] >> 16) + iend[1]
1611 1623
1612 1624 if self._inline:
1613 1625 start += (startrev + 1) * self.index.entry_size
1614 1626 end += (endrev + 1) * self.index.entry_size
1615 1627 length = end - start
1616 1628
1617 1629 return start, self._getsegment(start, length, df=df)
1618 1630
1619 1631 def _chunk(self, rev, df=None):
1620 1632 """Obtain a single decompressed chunk for a revision.
1621 1633
1622 1634 Accepts an integer revision and an optional already-open file handle
1623 1635 to be used for reading. If used, the seek position of the file will not
1624 1636 be preserved.
1625 1637
1626 1638 Returns a str holding uncompressed data for the requested revision.
1627 1639 """
1628 1640 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1629 1641
1630 1642 def _chunks(self, revs, df=None, targetsize=None):
1631 1643 """Obtain decompressed chunks for the specified revisions.
1632 1644
1633 1645 Accepts an iterable of numeric revisions that are assumed to be in
1634 1646 ascending order. Also accepts an optional already-open file handle
1635 1647 to be used for reading. If used, the seek position of the file will
1636 1648 not be preserved.
1637 1649
1638 1650 This function is similar to calling ``self._chunk()`` multiple times,
1639 1651 but is faster.
1640 1652
1641 1653 Returns a list with decompressed data for each requested revision.
1642 1654 """
1643 1655 if not revs:
1644 1656 return []
1645 1657 start = self.start
1646 1658 length = self.length
1647 1659 inline = self._inline
1648 1660 iosize = self.index.entry_size
1649 1661 buffer = util.buffer
1650 1662
1651 1663 l = []
1652 1664 ladd = l.append
1653 1665
1654 1666 if not self._withsparseread:
1655 1667 slicedchunks = (revs,)
1656 1668 else:
1657 1669 slicedchunks = deltautil.slicechunk(
1658 1670 self, revs, targetsize=targetsize
1659 1671 )
1660 1672
1661 1673 for revschunk in slicedchunks:
1662 1674 firstrev = revschunk[0]
1663 1675 # Skip trailing revisions with empty diff
1664 1676 for lastrev in revschunk[::-1]:
1665 1677 if length(lastrev) != 0:
1666 1678 break
1667 1679
1668 1680 try:
1669 1681 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1670 1682 except OverflowError:
1671 1683 # issue4215 - we can't cache a run of chunks greater than
1672 1684 # 2G on Windows
1673 1685 return [self._chunk(rev, df=df) for rev in revschunk]
1674 1686
1675 1687 decomp = self.decompress
1676 1688 for rev in revschunk:
1677 1689 chunkstart = start(rev)
1678 1690 if inline:
1679 1691 chunkstart += (rev + 1) * iosize
1680 1692 chunklength = length(rev)
1681 1693 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1682 1694
1683 1695 return l
1684 1696
1685 1697 def _chunkclear(self):
1686 1698 """Clear the raw chunk cache."""
1687 1699 self._chunkcache = (0, b'')
1688 1700
1689 1701 def deltaparent(self, rev):
1690 1702 """return deltaparent of the given revision"""
1691 1703 base = self.index[rev][3]
1692 1704 if base == rev:
1693 1705 return nullrev
1694 1706 elif self._generaldelta:
1695 1707 return base
1696 1708 else:
1697 1709 return rev - 1
1698 1710
1699 1711 def issnapshot(self, rev):
1700 1712 """tells whether rev is a snapshot"""
1701 1713 if not self._sparserevlog:
1702 1714 return self.deltaparent(rev) == nullrev
1703 1715 elif util.safehasattr(self.index, b'issnapshot'):
1704 1716 # directly assign the method to cache the testing and access
1705 1717 self.issnapshot = self.index.issnapshot
1706 1718 return self.issnapshot(rev)
1707 1719 if rev == nullrev:
1708 1720 return True
1709 1721 entry = self.index[rev]
1710 1722 base = entry[3]
1711 1723 if base == rev:
1712 1724 return True
1713 1725 if base == nullrev:
1714 1726 return True
1715 1727 p1 = entry[5]
1716 1728 p2 = entry[6]
1717 1729 if base == p1 or base == p2:
1718 1730 return False
1719 1731 return self.issnapshot(base)
1720 1732
1721 1733 def snapshotdepth(self, rev):
1722 1734 """number of snapshot in the chain before this one"""
1723 1735 if not self.issnapshot(rev):
1724 1736 raise error.ProgrammingError(b'revision %d not a snapshot')
1725 1737 return len(self._deltachain(rev)[0]) - 1
1726 1738
1727 1739 def revdiff(self, rev1, rev2):
1728 1740 """return or calculate a delta between two revisions
1729 1741
1730 1742 The delta calculated is in binary form and is intended to be written to
1731 1743 revlog data directly. So this function needs raw revision data.
1732 1744 """
1733 1745 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1734 1746 return bytes(self._chunk(rev2))
1735 1747
1736 1748 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1737 1749
1738 1750 def _processflags(self, text, flags, operation, raw=False):
1739 1751 """deprecated entry point to access flag processors"""
1740 1752 msg = b'_processflag(...) use the specialized variant'
1741 1753 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1742 1754 if raw:
1743 1755 return text, flagutil.processflagsraw(self, text, flags)
1744 1756 elif operation == b'read':
1745 1757 return flagutil.processflagsread(self, text, flags)
1746 1758 else: # write operation
1747 1759 return flagutil.processflagswrite(self, text, flags)
1748 1760
1749 1761 def revision(self, nodeorrev, _df=None, raw=False):
1750 1762 """return an uncompressed revision of a given node or revision
1751 1763 number.
1752 1764
1753 1765 _df - an existing file handle to read from. (internal-only)
1754 1766 raw - an optional argument specifying if the revision data is to be
1755 1767 treated as raw data when applying flag transforms. 'raw' should be set
1756 1768 to True when generating changegroups or in debug commands.
1757 1769 """
1758 1770 if raw:
1759 1771 msg = (
1760 1772 b'revlog.revision(..., raw=True) is deprecated, '
1761 1773 b'use revlog.rawdata(...)'
1762 1774 )
1763 1775 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1764 1776 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1765 1777
1766 1778 def sidedata(self, nodeorrev, _df=None):
1767 1779 """a map of extra data related to the changeset but not part of the hash
1768 1780
1769 1781 This function currently return a dictionary. However, more advanced
1770 1782 mapping object will likely be used in the future for a more
1771 1783 efficient/lazy code.
1772 1784 """
1773 1785 return self._revisiondata(nodeorrev, _df)[1]
1774 1786
1775 1787 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1776 1788 # deal with <nodeorrev> argument type
1777 1789 if isinstance(nodeorrev, int):
1778 1790 rev = nodeorrev
1779 1791 node = self.node(rev)
1780 1792 else:
1781 1793 node = nodeorrev
1782 1794 rev = None
1783 1795
1784 1796 # fast path the special `nullid` rev
1785 1797 if node == self.nullid:
1786 1798 return b"", {}
1787 1799
1788 1800 # ``rawtext`` is the text as stored inside the revlog. Might be the
1789 1801 # revision or might need to be processed to retrieve the revision.
1790 1802 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1791 1803
1792 1804 if self.hassidedata:
1793 1805 if rev is None:
1794 1806 rev = self.rev(node)
1795 1807 sidedata = self._sidedata(rev)
1796 1808 else:
1797 1809 sidedata = {}
1798 1810
1799 1811 if raw and validated:
1800 1812 # if we don't want to process the raw text and that raw
1801 1813 # text is cached, we can exit early.
1802 1814 return rawtext, sidedata
1803 1815 if rev is None:
1804 1816 rev = self.rev(node)
1805 1817 # the revlog's flag for this revision
1806 1818 # (usually alter its state or content)
1807 1819 flags = self.flags(rev)
1808 1820
1809 1821 if validated and flags == REVIDX_DEFAULT_FLAGS:
1810 1822 # no extra flags set, no flag processor runs, text = rawtext
1811 1823 return rawtext, sidedata
1812 1824
1813 1825 if raw:
1814 1826 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1815 1827 text = rawtext
1816 1828 else:
1817 1829 r = flagutil.processflagsread(self, rawtext, flags)
1818 1830 text, validatehash = r
1819 1831 if validatehash:
1820 1832 self.checkhash(text, node, rev=rev)
1821 1833 if not validated:
1822 1834 self._revisioncache = (node, rev, rawtext)
1823 1835
1824 1836 return text, sidedata
1825 1837
1826 1838 def _rawtext(self, node, rev, _df=None):
1827 1839 """return the possibly unvalidated rawtext for a revision
1828 1840
1829 1841 returns (rev, rawtext, validated)
1830 1842 """
1831 1843
1832 1844 # revision in the cache (could be useful to apply delta)
1833 1845 cachedrev = None
1834 1846 # An intermediate text to apply deltas to
1835 1847 basetext = None
1836 1848
1837 1849 # Check if we have the entry in cache
1838 1850 # The cache entry looks like (node, rev, rawtext)
1839 1851 if self._revisioncache:
1840 1852 if self._revisioncache[0] == node:
1841 1853 return (rev, self._revisioncache[2], True)
1842 1854 cachedrev = self._revisioncache[1]
1843 1855
1844 1856 if rev is None:
1845 1857 rev = self.rev(node)
1846 1858
1847 1859 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1848 1860 if stopped:
1849 1861 basetext = self._revisioncache[2]
1850 1862
1851 1863 # drop cache to save memory, the caller is expected to
1852 1864 # update self._revisioncache after validating the text
1853 1865 self._revisioncache = None
1854 1866
1855 1867 targetsize = None
1856 1868 rawsize = self.index[rev][2]
1857 1869 if 0 <= rawsize:
1858 1870 targetsize = 4 * rawsize
1859 1871
1860 1872 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1861 1873 if basetext is None:
1862 1874 basetext = bytes(bins[0])
1863 1875 bins = bins[1:]
1864 1876
1865 1877 rawtext = mdiff.patches(basetext, bins)
1866 1878 del basetext # let us have a chance to free memory early
1867 1879 return (rev, rawtext, False)
1868 1880
1869 1881 def _sidedata(self, rev):
1870 1882 """Return the sidedata for a given revision number."""
1871 1883 index_entry = self.index[rev]
1872 1884 sidedata_offset = index_entry[8]
1873 1885 sidedata_size = index_entry[9]
1874 1886
1875 1887 if self._inline:
1876 1888 sidedata_offset += self.index.entry_size * (1 + rev)
1877 1889 if sidedata_size == 0:
1878 1890 return {}
1879 1891
1880 1892 segment = self._getsegment(sidedata_offset, sidedata_size)
1881 1893 sidedata = sidedatautil.deserialize_sidedata(segment)
1882 1894 return sidedata
1883 1895
1884 1896 def rawdata(self, nodeorrev, _df=None):
1885 1897 """return an uncompressed raw data of a given node or revision number.
1886 1898
1887 1899 _df - an existing file handle to read from. (internal-only)
1888 1900 """
1889 1901 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1890 1902
1891 1903 def hash(self, text, p1, p2):
1892 1904 """Compute a node hash.
1893 1905
1894 1906 Available as a function so that subclasses can replace the hash
1895 1907 as needed.
1896 1908 """
1897 1909 return storageutil.hashrevisionsha1(text, p1, p2)
1898 1910
1899 1911 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1900 1912 """Check node hash integrity.
1901 1913
1902 1914 Available as a function so that subclasses can extend hash mismatch
1903 1915 behaviors as needed.
1904 1916 """
1905 1917 try:
1906 1918 if p1 is None and p2 is None:
1907 1919 p1, p2 = self.parents(node)
1908 1920 if node != self.hash(text, p1, p2):
1909 1921 # Clear the revision cache on hash failure. The revision cache
1910 1922 # only stores the raw revision and clearing the cache does have
1911 1923 # the side-effect that we won't have a cache hit when the raw
1912 1924 # revision data is accessed. But this case should be rare and
1913 1925 # it is extra work to teach the cache about the hash
1914 1926 # verification state.
1915 1927 if self._revisioncache and self._revisioncache[0] == node:
1916 1928 self._revisioncache = None
1917 1929
1918 1930 revornode = rev
1919 1931 if revornode is None:
1920 1932 revornode = templatefilters.short(hex(node))
1921 1933 raise error.RevlogError(
1922 1934 _(b"integrity check failed on %s:%s")
1923 1935 % (self.indexfile, pycompat.bytestr(revornode))
1924 1936 )
1925 1937 except error.RevlogError:
1926 1938 if self._censorable and storageutil.iscensoredtext(text):
1927 1939 raise error.CensoredNodeError(self.indexfile, node, text)
1928 1940 raise
1929 1941
1930 1942 def _enforceinlinesize(self, tr, fp=None):
1931 1943 """Check if the revlog is too big for inline and convert if so.
1932 1944
1933 1945 This should be called after revisions are added to the revlog. If the
1934 1946 revlog has grown too large to be an inline revlog, it will convert it
1935 1947 to use multiple index and data files.
1936 1948 """
1937 1949 tiprev = len(self) - 1
1938 1950 if (
1939 1951 not self._inline
1940 1952 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1941 1953 ):
1942 1954 return
1943 1955
1944 1956 troffset = tr.findoffset(self.indexfile)
1945 1957 if troffset is None:
1946 1958 raise error.RevlogError(
1947 1959 _(b"%s not found in the transaction") % self.indexfile
1948 1960 )
1949 1961 trindex = 0
1950 1962 tr.add(self.datafile, 0)
1951 1963
1952 1964 if fp:
1953 1965 fp.flush()
1954 1966 fp.close()
1955 1967 # We can't use the cached file handle after close(). So prevent
1956 1968 # its usage.
1957 1969 self._writinghandles = None
1958 1970
1959 1971 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1960 1972 for r in self:
1961 1973 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1962 1974 if troffset <= self.start(r):
1963 1975 trindex = r
1964 1976
1965 1977 with self._indexfp(b'w') as fp:
1966 1978 self._format_flags &= ~FLAG_INLINE_DATA
1967 1979 self._inline = False
1968 1980 for i in self:
1969 1981 e = self.index.entry_binary(i)
1970 1982 if i == 0:
1971 1983 header = self._format_flags | self._format_version
1972 1984 header = self.index.pack_header(header)
1973 1985 e = header + e
1974 1986 fp.write(e)
1975 1987
1976 1988 # the temp file replace the real index when we exit the context
1977 1989 # manager
1978 1990
1979 1991 tr.replace(self.indexfile, trindex * self.index.entry_size)
1980 1992 nodemaputil.setup_persistent_nodemap(tr, self)
1981 1993 self._chunkclear()
1982 1994
1983 1995 def _nodeduplicatecallback(self, transaction, node):
1984 1996 """called when trying to add a node already stored."""
1985 1997
1986 1998 def addrevision(
1987 1999 self,
1988 2000 text,
1989 2001 transaction,
1990 2002 link,
1991 2003 p1,
1992 2004 p2,
1993 2005 cachedelta=None,
1994 2006 node=None,
1995 2007 flags=REVIDX_DEFAULT_FLAGS,
1996 2008 deltacomputer=None,
1997 2009 sidedata=None,
1998 2010 ):
1999 2011 """add a revision to the log
2000 2012
2001 2013 text - the revision data to add
2002 2014 transaction - the transaction object used for rollback
2003 2015 link - the linkrev data to add
2004 2016 p1, p2 - the parent nodeids of the revision
2005 2017 cachedelta - an optional precomputed delta
2006 2018 node - nodeid of revision; typically node is not specified, and it is
2007 2019 computed by default as hash(text, p1, p2), however subclasses might
2008 2020 use different hashing method (and override checkhash() in such case)
2009 2021 flags - the known flags to set on the revision
2010 2022 deltacomputer - an optional deltacomputer instance shared between
2011 2023 multiple calls
2012 2024 """
2013 2025 if link == nullrev:
2014 2026 raise error.RevlogError(
2015 2027 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2016 2028 )
2017 2029
2018 2030 if sidedata is None:
2019 2031 sidedata = {}
2020 2032 elif sidedata and not self.hassidedata:
2021 2033 raise error.ProgrammingError(
2022 2034 _(b"trying to add sidedata to a revlog who don't support them")
2023 2035 )
2024 2036
2025 2037 if flags:
2026 2038 node = node or self.hash(text, p1, p2)
2027 2039
2028 2040 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2029 2041
2030 2042 # If the flag processor modifies the revision data, ignore any provided
2031 2043 # cachedelta.
2032 2044 if rawtext != text:
2033 2045 cachedelta = None
2034 2046
2035 2047 if len(rawtext) > _maxentrysize:
2036 2048 raise error.RevlogError(
2037 2049 _(
2038 2050 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2039 2051 )
2040 2052 % (self.indexfile, len(rawtext))
2041 2053 )
2042 2054
2043 2055 node = node or self.hash(rawtext, p1, p2)
2044 2056 rev = self.index.get_rev(node)
2045 2057 if rev is not None:
2046 2058 return rev
2047 2059
2048 2060 if validatehash:
2049 2061 self.checkhash(rawtext, node, p1=p1, p2=p2)
2050 2062
2051 2063 return self.addrawrevision(
2052 2064 rawtext,
2053 2065 transaction,
2054 2066 link,
2055 2067 p1,
2056 2068 p2,
2057 2069 node,
2058 2070 flags,
2059 2071 cachedelta=cachedelta,
2060 2072 deltacomputer=deltacomputer,
2061 2073 sidedata=sidedata,
2062 2074 )
2063 2075
2064 2076 def addrawrevision(
2065 2077 self,
2066 2078 rawtext,
2067 2079 transaction,
2068 2080 link,
2069 2081 p1,
2070 2082 p2,
2071 2083 node,
2072 2084 flags,
2073 2085 cachedelta=None,
2074 2086 deltacomputer=None,
2075 2087 sidedata=None,
2076 2088 ):
2077 2089 """add a raw revision with known flags, node and parents
2078 2090 useful when reusing a revision not stored in this revlog (ex: received
2079 2091 over wire, or read from an external bundle).
2080 2092 """
2081 2093 dfh = None
2082 2094 if not self._inline:
2083 2095 dfh = self._datafp(b"a+")
2084 2096 ifh = self._indexfp(b"a+")
2085 2097 try:
2086 2098 return self._addrevision(
2087 2099 node,
2088 2100 rawtext,
2089 2101 transaction,
2090 2102 link,
2091 2103 p1,
2092 2104 p2,
2093 2105 flags,
2094 2106 cachedelta,
2095 2107 ifh,
2096 2108 dfh,
2097 2109 deltacomputer=deltacomputer,
2098 2110 sidedata=sidedata,
2099 2111 )
2100 2112 finally:
2101 2113 if dfh:
2102 2114 dfh.close()
2103 2115 ifh.close()
2104 2116
2105 2117 def compress(self, data):
2106 2118 """Generate a possibly-compressed representation of data."""
2107 2119 if not data:
2108 2120 return b'', data
2109 2121
2110 2122 compressed = self._compressor.compress(data)
2111 2123
2112 2124 if compressed:
2113 2125 # The revlog compressor added the header in the returned data.
2114 2126 return b'', compressed
2115 2127
2116 2128 if data[0:1] == b'\0':
2117 2129 return b'', data
2118 2130 return b'u', data
2119 2131
2120 2132 def decompress(self, data):
2121 2133 """Decompress a revlog chunk.
2122 2134
2123 2135 The chunk is expected to begin with a header identifying the
2124 2136 format type so it can be routed to an appropriate decompressor.
2125 2137 """
2126 2138 if not data:
2127 2139 return data
2128 2140
2129 2141 # Revlogs are read much more frequently than they are written and many
2130 2142 # chunks only take microseconds to decompress, so performance is
2131 2143 # important here.
2132 2144 #
2133 2145 # We can make a few assumptions about revlogs:
2134 2146 #
2135 2147 # 1) the majority of chunks will be compressed (as opposed to inline
2136 2148 # raw data).
2137 2149 # 2) decompressing *any* data will likely by at least 10x slower than
2138 2150 # returning raw inline data.
2139 2151 # 3) we want to prioritize common and officially supported compression
2140 2152 # engines
2141 2153 #
2142 2154 # It follows that we want to optimize for "decompress compressed data
2143 2155 # when encoded with common and officially supported compression engines"
2144 2156 # case over "raw data" and "data encoded by less common or non-official
2145 2157 # compression engines." That is why we have the inline lookup first
2146 2158 # followed by the compengines lookup.
2147 2159 #
2148 2160 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2149 2161 # compressed chunks. And this matters for changelog and manifest reads.
2150 2162 t = data[0:1]
2151 2163
2152 2164 if t == b'x':
2153 2165 try:
2154 2166 return _zlibdecompress(data)
2155 2167 except zlib.error as e:
2156 2168 raise error.RevlogError(
2157 2169 _(b'revlog decompress error: %s')
2158 2170 % stringutil.forcebytestr(e)
2159 2171 )
2160 2172 # '\0' is more common than 'u' so it goes first.
2161 2173 elif t == b'\0':
2162 2174 return data
2163 2175 elif t == b'u':
2164 2176 return util.buffer(data, 1)
2165 2177
2166 2178 try:
2167 2179 compressor = self._decompressors[t]
2168 2180 except KeyError:
2169 2181 try:
2170 2182 engine = util.compengines.forrevlogheader(t)
2171 2183 compressor = engine.revlogcompressor(self._compengineopts)
2172 2184 self._decompressors[t] = compressor
2173 2185 except KeyError:
2174 2186 raise error.RevlogError(
2175 2187 _(b'unknown compression type %s') % binascii.hexlify(t)
2176 2188 )
2177 2189
2178 2190 return compressor.decompress(data)
2179 2191
2180 2192 def _addrevision(
2181 2193 self,
2182 2194 node,
2183 2195 rawtext,
2184 2196 transaction,
2185 2197 link,
2186 2198 p1,
2187 2199 p2,
2188 2200 flags,
2189 2201 cachedelta,
2190 2202 ifh,
2191 2203 dfh,
2192 2204 alwayscache=False,
2193 2205 deltacomputer=None,
2194 2206 sidedata=None,
2195 2207 ):
2196 2208 """internal function to add revisions to the log
2197 2209
2198 2210 see addrevision for argument descriptions.
2199 2211
2200 2212 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2201 2213
2202 2214 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2203 2215 be used.
2204 2216
2205 2217 invariants:
2206 2218 - rawtext is optional (can be None); if not set, cachedelta must be set.
2207 2219 if both are set, they must correspond to each other.
2208 2220 """
2209 2221 if node == self.nullid:
2210 2222 raise error.RevlogError(
2211 2223 _(b"%s: attempt to add null revision") % self.indexfile
2212 2224 )
2213 2225 if (
2214 2226 node == self.nodeconstants.wdirid
2215 2227 or node in self.nodeconstants.wdirfilenodeids
2216 2228 ):
2217 2229 raise error.RevlogError(
2218 2230 _(b"%s: attempt to add wdir revision") % self.indexfile
2219 2231 )
2220 2232
2221 2233 if self._inline:
2222 2234 fh = ifh
2223 2235 else:
2224 2236 fh = dfh
2225 2237
2226 2238 btext = [rawtext]
2227 2239
2228 2240 curr = len(self)
2229 2241 prev = curr - 1
2230 2242
2231 2243 offset = self._get_data_offset(prev)
2232 2244
2233 2245 if self._concurrencychecker:
2234 2246 if self._inline:
2235 2247 # offset is "as if" it were in the .d file, so we need to add on
2236 2248 # the size of the entry metadata.
2237 2249 self._concurrencychecker(
2238 2250 ifh, self.indexfile, offset + curr * self.index.entry_size
2239 2251 )
2240 2252 else:
2241 2253 # Entries in the .i are a consistent size.
2242 2254 self._concurrencychecker(
2243 2255 ifh, self.indexfile, curr * self.index.entry_size
2244 2256 )
2245 2257 self._concurrencychecker(dfh, self.datafile, offset)
2246 2258
2247 2259 p1r, p2r = self.rev(p1), self.rev(p2)
2248 2260
2249 2261 # full versions are inserted when the needed deltas
2250 2262 # become comparable to the uncompressed text
2251 2263 if rawtext is None:
2252 2264 # need rawtext size, before changed by flag processors, which is
2253 2265 # the non-raw size. use revlog explicitly to avoid filelog's extra
2254 2266 # logic that might remove metadata size.
2255 2267 textlen = mdiff.patchedsize(
2256 2268 revlog.size(self, cachedelta[0]), cachedelta[1]
2257 2269 )
2258 2270 else:
2259 2271 textlen = len(rawtext)
2260 2272
2261 2273 if deltacomputer is None:
2262 2274 deltacomputer = deltautil.deltacomputer(self)
2263 2275
2264 2276 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2265 2277
2266 2278 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2267 2279
2268 2280 if sidedata and self.hassidedata:
2269 2281 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2270 2282 sidedata_offset = offset + deltainfo.deltalen
2271 2283 else:
2272 2284 serialized_sidedata = b""
2273 2285 # Don't store the offset if the sidedata is empty, that way
2274 2286 # we can easily detect empty sidedata and they will be no different
2275 2287 # than ones we manually add.
2276 2288 sidedata_offset = 0
2277 2289
2278 2290 e = (
2279 2291 offset_type(offset, flags),
2280 2292 deltainfo.deltalen,
2281 2293 textlen,
2282 2294 deltainfo.base,
2283 2295 link,
2284 2296 p1r,
2285 2297 p2r,
2286 2298 node,
2287 2299 sidedata_offset,
2288 2300 len(serialized_sidedata),
2289 2301 )
2290 2302
2291 2303 self.index.append(e)
2292 2304 entry = self.index.entry_binary(curr)
2293 2305 if curr == 0:
2294 2306 header = self._format_flags | self._format_version
2295 2307 header = self.index.pack_header(header)
2296 2308 entry = header + entry
2297 2309 self._writeentry(
2298 2310 transaction,
2299 2311 ifh,
2300 2312 dfh,
2301 2313 entry,
2302 2314 deltainfo.data,
2303 2315 link,
2304 2316 offset,
2305 2317 serialized_sidedata,
2306 2318 )
2307 2319
2308 2320 rawtext = btext[0]
2309 2321
2310 2322 if alwayscache and rawtext is None:
2311 2323 rawtext = deltacomputer.buildtext(revinfo, fh)
2312 2324
2313 2325 if type(rawtext) == bytes: # only accept immutable objects
2314 2326 self._revisioncache = (node, curr, rawtext)
2315 2327 self._chainbasecache[curr] = deltainfo.chainbase
2316 2328 return curr
2317 2329
2318 2330 def _get_data_offset(self, prev):
2319 2331 """Returns the current offset in the (in-transaction) data file.
2320 2332 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2321 2333 file to store that information: since sidedata can be rewritten to the
2322 2334 end of the data file within a transaction, you can have cases where, for
2323 2335 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2324 2336 to `n - 1`'s sidedata being written after `n`'s data.
2325 2337
2326 2338 TODO cache this in a docket file before getting out of experimental."""
2327 2339 if self._format_version != REVLOGV2:
2328 2340 return self.end(prev)
2329 2341
2330 2342 offset = 0
2331 2343 for rev, entry in enumerate(self.index):
2332 2344 sidedata_end = entry[8] + entry[9]
2333 2345 # Sidedata for a previous rev has potentially been written after
2334 2346 # this rev's end, so take the max.
2335 2347 offset = max(self.end(rev), offset, sidedata_end)
2336 2348 return offset
2337 2349
2338 2350 def _writeentry(
2339 2351 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2340 2352 ):
2341 2353 # Files opened in a+ mode have inconsistent behavior on various
2342 2354 # platforms. Windows requires that a file positioning call be made
2343 2355 # when the file handle transitions between reads and writes. See
2344 2356 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2345 2357 # platforms, Python or the platform itself can be buggy. Some versions
2346 2358 # of Solaris have been observed to not append at the end of the file
2347 2359 # if the file was seeked to before the end. See issue4943 for more.
2348 2360 #
2349 2361 # We work around this issue by inserting a seek() before writing.
2350 2362 # Note: This is likely not necessary on Python 3. However, because
2351 2363 # the file handle is reused for reads and may be seeked there, we need
2352 2364 # to be careful before changing this.
2353 2365 ifh.seek(0, os.SEEK_END)
2354 2366 if dfh:
2355 2367 dfh.seek(0, os.SEEK_END)
2356 2368
2357 2369 curr = len(self) - 1
2358 2370 if not self._inline:
2359 2371 transaction.add(self.datafile, offset)
2360 2372 transaction.add(self.indexfile, curr * len(entry))
2361 2373 if data[0]:
2362 2374 dfh.write(data[0])
2363 2375 dfh.write(data[1])
2364 2376 if sidedata:
2365 2377 dfh.write(sidedata)
2366 2378 ifh.write(entry)
2367 2379 else:
2368 2380 offset += curr * self.index.entry_size
2369 2381 transaction.add(self.indexfile, offset)
2370 2382 ifh.write(entry)
2371 2383 ifh.write(data[0])
2372 2384 ifh.write(data[1])
2373 2385 if sidedata:
2374 2386 ifh.write(sidedata)
2375 2387 self._enforceinlinesize(transaction, ifh)
2376 2388 nodemaputil.setup_persistent_nodemap(transaction, self)
2377 2389
2378 2390 def addgroup(
2379 2391 self,
2380 2392 deltas,
2381 2393 linkmapper,
2382 2394 transaction,
2383 2395 alwayscache=False,
2384 2396 addrevisioncb=None,
2385 2397 duplicaterevisioncb=None,
2386 2398 ):
2387 2399 """
2388 2400 add a delta group
2389 2401
2390 2402 given a set of deltas, add them to the revision log. the
2391 2403 first delta is against its parent, which should be in our
2392 2404 log, the rest are against the previous delta.
2393 2405
2394 2406 If ``addrevisioncb`` is defined, it will be called with arguments of
2395 2407 this revlog and the node that was added.
2396 2408 """
2397 2409
2398 2410 if self._writinghandles:
2399 2411 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2400 2412
2401 2413 r = len(self)
2402 2414 end = 0
2403 2415 if r:
2404 2416 end = self.end(r - 1)
2405 2417 ifh = self._indexfp(b"a+")
2406 2418 isize = r * self.index.entry_size
2407 2419 if self._inline:
2408 2420 transaction.add(self.indexfile, end + isize)
2409 2421 dfh = None
2410 2422 else:
2411 2423 transaction.add(self.indexfile, isize)
2412 2424 transaction.add(self.datafile, end)
2413 2425 dfh = self._datafp(b"a+")
2414 2426
2415 2427 def flush():
2416 2428 if dfh:
2417 2429 dfh.flush()
2418 2430 ifh.flush()
2419 2431
2420 2432 self._writinghandles = (ifh, dfh)
2421 2433 empty = True
2422 2434
2423 2435 try:
2424 2436 deltacomputer = deltautil.deltacomputer(self)
2425 2437 # loop through our set of deltas
2426 2438 for data in deltas:
2427 2439 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2428 2440 link = linkmapper(linknode)
2429 2441 flags = flags or REVIDX_DEFAULT_FLAGS
2430 2442
2431 2443 rev = self.index.get_rev(node)
2432 2444 if rev is not None:
2433 2445 # this can happen if two branches make the same change
2434 2446 self._nodeduplicatecallback(transaction, rev)
2435 2447 if duplicaterevisioncb:
2436 2448 duplicaterevisioncb(self, rev)
2437 2449 empty = False
2438 2450 continue
2439 2451
2440 2452 for p in (p1, p2):
2441 2453 if not self.index.has_node(p):
2442 2454 raise error.LookupError(
2443 2455 p, self.indexfile, _(b'unknown parent')
2444 2456 )
2445 2457
2446 2458 if not self.index.has_node(deltabase):
2447 2459 raise error.LookupError(
2448 2460 deltabase, self.indexfile, _(b'unknown delta base')
2449 2461 )
2450 2462
2451 2463 baserev = self.rev(deltabase)
2452 2464
2453 2465 if baserev != nullrev and self.iscensored(baserev):
2454 2466 # if base is censored, delta must be full replacement in a
2455 2467 # single patch operation
2456 2468 hlen = struct.calcsize(b">lll")
2457 2469 oldlen = self.rawsize(baserev)
2458 2470 newlen = len(delta) - hlen
2459 2471 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2460 2472 raise error.CensoredBaseError(
2461 2473 self.indexfile, self.node(baserev)
2462 2474 )
2463 2475
2464 2476 if not flags and self._peek_iscensored(baserev, delta, flush):
2465 2477 flags |= REVIDX_ISCENSORED
2466 2478
2467 2479 # We assume consumers of addrevisioncb will want to retrieve
2468 2480 # the added revision, which will require a call to
2469 2481 # revision(). revision() will fast path if there is a cache
2470 2482 # hit. So, we tell _addrevision() to always cache in this case.
2471 2483 # We're only using addgroup() in the context of changegroup
2472 2484 # generation so the revision data can always be handled as raw
2473 2485 # by the flagprocessor.
2474 2486 rev = self._addrevision(
2475 2487 node,
2476 2488 None,
2477 2489 transaction,
2478 2490 link,
2479 2491 p1,
2480 2492 p2,
2481 2493 flags,
2482 2494 (baserev, delta),
2483 2495 ifh,
2484 2496 dfh,
2485 2497 alwayscache=alwayscache,
2486 2498 deltacomputer=deltacomputer,
2487 2499 sidedata=sidedata,
2488 2500 )
2489 2501
2490 2502 if addrevisioncb:
2491 2503 addrevisioncb(self, rev)
2492 2504 empty = False
2493 2505
2494 2506 if not dfh and not self._inline:
2495 2507 # addrevision switched from inline to conventional
2496 2508 # reopen the index
2497 2509 ifh.close()
2498 2510 dfh = self._datafp(b"a+")
2499 2511 ifh = self._indexfp(b"a+")
2500 2512 self._writinghandles = (ifh, dfh)
2501 2513 finally:
2502 2514 self._writinghandles = None
2503 2515
2504 2516 if dfh:
2505 2517 dfh.close()
2506 2518 ifh.close()
2507 2519 return not empty
2508 2520
2509 2521 def iscensored(self, rev):
2510 2522 """Check if a file revision is censored."""
2511 2523 if not self._censorable:
2512 2524 return False
2513 2525
2514 2526 return self.flags(rev) & REVIDX_ISCENSORED
2515 2527
2516 2528 def _peek_iscensored(self, baserev, delta, flush):
2517 2529 """Quickly check if a delta produces a censored revision."""
2518 2530 if not self._censorable:
2519 2531 return False
2520 2532
2521 2533 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2522 2534
2523 2535 def getstrippoint(self, minlink):
2524 2536 """find the minimum rev that must be stripped to strip the linkrev
2525 2537
2526 2538 Returns a tuple containing the minimum rev and a set of all revs that
2527 2539 have linkrevs that will be broken by this strip.
2528 2540 """
2529 2541 return storageutil.resolvestripinfo(
2530 2542 minlink,
2531 2543 len(self) - 1,
2532 2544 self.headrevs(),
2533 2545 self.linkrev,
2534 2546 self.parentrevs,
2535 2547 )
2536 2548
2537 2549 def strip(self, minlink, transaction):
2538 2550 """truncate the revlog on the first revision with a linkrev >= minlink
2539 2551
2540 2552 This function is called when we're stripping revision minlink and
2541 2553 its descendants from the repository.
2542 2554
2543 2555 We have to remove all revisions with linkrev >= minlink, because
2544 2556 the equivalent changelog revisions will be renumbered after the
2545 2557 strip.
2546 2558
2547 2559 So we truncate the revlog on the first of these revisions, and
2548 2560 trust that the caller has saved the revisions that shouldn't be
2549 2561 removed and that it'll re-add them after this truncation.
2550 2562 """
2551 2563 if len(self) == 0:
2552 2564 return
2553 2565
2554 2566 rev, _ = self.getstrippoint(minlink)
2555 2567 if rev == len(self):
2556 2568 return
2557 2569
2558 2570 # first truncate the files on disk
2559 2571 end = self.start(rev)
2560 2572 if not self._inline:
2561 2573 transaction.add(self.datafile, end)
2562 2574 end = rev * self.index.entry_size
2563 2575 else:
2564 2576 end += rev * self.index.entry_size
2565 2577
2566 2578 transaction.add(self.indexfile, end)
2567 2579
2568 2580 # then reset internal state in memory to forget those revisions
2569 2581 self._revisioncache = None
2570 2582 self._chaininfocache = util.lrucachedict(500)
2571 2583 self._chunkclear()
2572 2584
2573 2585 del self.index[rev:-1]
2574 2586
2575 2587 def checksize(self):
2576 2588 """Check size of index and data files
2577 2589
2578 2590 return a (dd, di) tuple.
2579 2591 - dd: extra bytes for the "data" file
2580 2592 - di: extra bytes for the "index" file
2581 2593
2582 2594 A healthy revlog will return (0, 0).
2583 2595 """
2584 2596 expected = 0
2585 2597 if len(self):
2586 2598 expected = max(0, self.end(len(self) - 1))
2587 2599
2588 2600 try:
2589 2601 with self._datafp() as f:
2590 2602 f.seek(0, io.SEEK_END)
2591 2603 actual = f.tell()
2592 2604 dd = actual - expected
2593 2605 except IOError as inst:
2594 2606 if inst.errno != errno.ENOENT:
2595 2607 raise
2596 2608 dd = 0
2597 2609
2598 2610 try:
2599 2611 f = self.opener(self.indexfile)
2600 2612 f.seek(0, io.SEEK_END)
2601 2613 actual = f.tell()
2602 2614 f.close()
2603 2615 s = self.index.entry_size
2604 2616 i = max(0, actual // s)
2605 2617 di = actual - (i * s)
2606 2618 if self._inline:
2607 2619 databytes = 0
2608 2620 for r in self:
2609 2621 databytes += max(0, self.length(r))
2610 2622 dd = 0
2611 2623 di = actual - len(self) * s - databytes
2612 2624 except IOError as inst:
2613 2625 if inst.errno != errno.ENOENT:
2614 2626 raise
2615 2627 di = 0
2616 2628
2617 2629 return (dd, di)
2618 2630
2619 2631 def files(self):
2620 2632 res = [self.indexfile]
2621 2633 if not self._inline:
2622 2634 res.append(self.datafile)
2623 2635 return res
2624 2636
2625 2637 def emitrevisions(
2626 2638 self,
2627 2639 nodes,
2628 2640 nodesorder=None,
2629 2641 revisiondata=False,
2630 2642 assumehaveparentrevisions=False,
2631 2643 deltamode=repository.CG_DELTAMODE_STD,
2632 2644 sidedata_helpers=None,
2633 2645 ):
2634 2646 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2635 2647 raise error.ProgrammingError(
2636 2648 b'unhandled value for nodesorder: %s' % nodesorder
2637 2649 )
2638 2650
2639 2651 if nodesorder is None and not self._generaldelta:
2640 2652 nodesorder = b'storage'
2641 2653
2642 2654 if (
2643 2655 not self._storedeltachains
2644 2656 and deltamode != repository.CG_DELTAMODE_PREV
2645 2657 ):
2646 2658 deltamode = repository.CG_DELTAMODE_FULL
2647 2659
2648 2660 return storageutil.emitrevisions(
2649 2661 self,
2650 2662 nodes,
2651 2663 nodesorder,
2652 2664 revlogrevisiondelta,
2653 2665 deltaparentfn=self.deltaparent,
2654 2666 candeltafn=self.candelta,
2655 2667 rawsizefn=self.rawsize,
2656 2668 revdifffn=self.revdiff,
2657 2669 flagsfn=self.flags,
2658 2670 deltamode=deltamode,
2659 2671 revisiondata=revisiondata,
2660 2672 assumehaveparentrevisions=assumehaveparentrevisions,
2661 2673 sidedata_helpers=sidedata_helpers,
2662 2674 )
2663 2675
2664 2676 DELTAREUSEALWAYS = b'always'
2665 2677 DELTAREUSESAMEREVS = b'samerevs'
2666 2678 DELTAREUSENEVER = b'never'
2667 2679
2668 2680 DELTAREUSEFULLADD = b'fulladd'
2669 2681
2670 2682 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2671 2683
2672 2684 def clone(
2673 2685 self,
2674 2686 tr,
2675 2687 destrevlog,
2676 2688 addrevisioncb=None,
2677 2689 deltareuse=DELTAREUSESAMEREVS,
2678 2690 forcedeltabothparents=None,
2679 2691 sidedata_helpers=None,
2680 2692 ):
2681 2693 """Copy this revlog to another, possibly with format changes.
2682 2694
2683 2695 The destination revlog will contain the same revisions and nodes.
2684 2696 However, it may not be bit-for-bit identical due to e.g. delta encoding
2685 2697 differences.
2686 2698
2687 2699 The ``deltareuse`` argument control how deltas from the existing revlog
2688 2700 are preserved in the destination revlog. The argument can have the
2689 2701 following values:
2690 2702
2691 2703 DELTAREUSEALWAYS
2692 2704 Deltas will always be reused (if possible), even if the destination
2693 2705 revlog would not select the same revisions for the delta. This is the
2694 2706 fastest mode of operation.
2695 2707 DELTAREUSESAMEREVS
2696 2708 Deltas will be reused if the destination revlog would pick the same
2697 2709 revisions for the delta. This mode strikes a balance between speed
2698 2710 and optimization.
2699 2711 DELTAREUSENEVER
2700 2712 Deltas will never be reused. This is the slowest mode of execution.
2701 2713 This mode can be used to recompute deltas (e.g. if the diff/delta
2702 2714 algorithm changes).
2703 2715 DELTAREUSEFULLADD
2704 2716 Revision will be re-added as if their were new content. This is
2705 2717 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2706 2718 eg: large file detection and handling.
2707 2719
2708 2720 Delta computation can be slow, so the choice of delta reuse policy can
2709 2721 significantly affect run time.
2710 2722
2711 2723 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2712 2724 two extremes. Deltas will be reused if they are appropriate. But if the
2713 2725 delta could choose a better revision, it will do so. This means if you
2714 2726 are converting a non-generaldelta revlog to a generaldelta revlog,
2715 2727 deltas will be recomputed if the delta's parent isn't a parent of the
2716 2728 revision.
2717 2729
2718 2730 In addition to the delta policy, the ``forcedeltabothparents``
2719 2731 argument controls whether to force compute deltas against both parents
2720 2732 for merges. By default, the current default is used.
2721 2733
2722 2734 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2723 2735 `sidedata_helpers`.
2724 2736 """
2725 2737 if deltareuse not in self.DELTAREUSEALL:
2726 2738 raise ValueError(
2727 2739 _(b'value for deltareuse invalid: %s') % deltareuse
2728 2740 )
2729 2741
2730 2742 if len(destrevlog):
2731 2743 raise ValueError(_(b'destination revlog is not empty'))
2732 2744
2733 2745 if getattr(self, 'filteredrevs', None):
2734 2746 raise ValueError(_(b'source revlog has filtered revisions'))
2735 2747 if getattr(destrevlog, 'filteredrevs', None):
2736 2748 raise ValueError(_(b'destination revlog has filtered revisions'))
2737 2749
2738 2750 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2739 2751 # if possible.
2740 2752 oldlazydelta = destrevlog._lazydelta
2741 2753 oldlazydeltabase = destrevlog._lazydeltabase
2742 2754 oldamd = destrevlog._deltabothparents
2743 2755
2744 2756 try:
2745 2757 if deltareuse == self.DELTAREUSEALWAYS:
2746 2758 destrevlog._lazydeltabase = True
2747 2759 destrevlog._lazydelta = True
2748 2760 elif deltareuse == self.DELTAREUSESAMEREVS:
2749 2761 destrevlog._lazydeltabase = False
2750 2762 destrevlog._lazydelta = True
2751 2763 elif deltareuse == self.DELTAREUSENEVER:
2752 2764 destrevlog._lazydeltabase = False
2753 2765 destrevlog._lazydelta = False
2754 2766
2755 2767 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2756 2768
2757 2769 self._clone(
2758 2770 tr,
2759 2771 destrevlog,
2760 2772 addrevisioncb,
2761 2773 deltareuse,
2762 2774 forcedeltabothparents,
2763 2775 sidedata_helpers,
2764 2776 )
2765 2777
2766 2778 finally:
2767 2779 destrevlog._lazydelta = oldlazydelta
2768 2780 destrevlog._lazydeltabase = oldlazydeltabase
2769 2781 destrevlog._deltabothparents = oldamd
2770 2782
2771 2783 def _clone(
2772 2784 self,
2773 2785 tr,
2774 2786 destrevlog,
2775 2787 addrevisioncb,
2776 2788 deltareuse,
2777 2789 forcedeltabothparents,
2778 2790 sidedata_helpers,
2779 2791 ):
2780 2792 """perform the core duty of `revlog.clone` after parameter processing"""
2781 2793 deltacomputer = deltautil.deltacomputer(destrevlog)
2782 2794 index = self.index
2783 2795 for rev in self:
2784 2796 entry = index[rev]
2785 2797
2786 2798 # Some classes override linkrev to take filtered revs into
2787 2799 # account. Use raw entry from index.
2788 2800 flags = entry[0] & 0xFFFF
2789 2801 linkrev = entry[4]
2790 2802 p1 = index[entry[5]][7]
2791 2803 p2 = index[entry[6]][7]
2792 2804 node = entry[7]
2793 2805
2794 2806 # (Possibly) reuse the delta from the revlog if allowed and
2795 2807 # the revlog chunk is a delta.
2796 2808 cachedelta = None
2797 2809 rawtext = None
2798 2810 if deltareuse == self.DELTAREUSEFULLADD:
2799 2811 text, sidedata = self._revisiondata(rev)
2800 2812
2801 2813 if sidedata_helpers is not None:
2802 2814 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2803 2815 self, sidedata_helpers, sidedata, rev
2804 2816 )
2805 2817 flags = flags | new_flags[0] & ~new_flags[1]
2806 2818
2807 2819 destrevlog.addrevision(
2808 2820 text,
2809 2821 tr,
2810 2822 linkrev,
2811 2823 p1,
2812 2824 p2,
2813 2825 cachedelta=cachedelta,
2814 2826 node=node,
2815 2827 flags=flags,
2816 2828 deltacomputer=deltacomputer,
2817 2829 sidedata=sidedata,
2818 2830 )
2819 2831 else:
2820 2832 if destrevlog._lazydelta:
2821 2833 dp = self.deltaparent(rev)
2822 2834 if dp != nullrev:
2823 2835 cachedelta = (dp, bytes(self._chunk(rev)))
2824 2836
2825 2837 sidedata = None
2826 2838 if not cachedelta:
2827 2839 rawtext, sidedata = self._revisiondata(rev)
2828 2840 if sidedata is None:
2829 2841 sidedata = self.sidedata(rev)
2830 2842
2831 2843 if sidedata_helpers is not None:
2832 2844 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2833 2845 self, sidedata_helpers, sidedata, rev
2834 2846 )
2835 2847 flags = flags | new_flags[0] & ~new_flags[1]
2836 2848
2837 2849 ifh = destrevlog.opener(
2838 2850 destrevlog.indexfile, b'a+', checkambig=False
2839 2851 )
2840 2852 dfh = None
2841 2853 if not destrevlog._inline:
2842 2854 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2843 2855 try:
2844 2856 destrevlog._addrevision(
2845 2857 node,
2846 2858 rawtext,
2847 2859 tr,
2848 2860 linkrev,
2849 2861 p1,
2850 2862 p2,
2851 2863 flags,
2852 2864 cachedelta,
2853 2865 ifh,
2854 2866 dfh,
2855 2867 deltacomputer=deltacomputer,
2856 2868 sidedata=sidedata,
2857 2869 )
2858 2870 finally:
2859 2871 if dfh:
2860 2872 dfh.close()
2861 2873 ifh.close()
2862 2874
2863 2875 if addrevisioncb:
2864 2876 addrevisioncb(self, rev, node)
2865 2877
2866 2878 def censorrevision(self, tr, censornode, tombstone=b''):
2867 2879 if self._format_version == REVLOGV0:
2868 2880 raise error.RevlogError(
2869 2881 _(b'cannot censor with version %d revlogs')
2870 2882 % self._format_version
2871 2883 )
2872 2884
2873 2885 censorrev = self.rev(censornode)
2874 2886 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2875 2887
2876 2888 if len(tombstone) > self.rawsize(censorrev):
2877 2889 raise error.Abort(
2878 2890 _(b'censor tombstone must be no longer than censored data')
2879 2891 )
2880 2892
2881 2893 # Rewriting the revlog in place is hard. Our strategy for censoring is
2882 2894 # to create a new revlog, copy all revisions to it, then replace the
2883 2895 # revlogs on transaction close.
2884
2885 newindexfile = self.indexfile + b'.tmpcensored'
2886 newdatafile = self.datafile + b'.tmpcensored'
2887
2896 #
2888 2897 # This is a bit dangerous. We could easily have a mismatch of state.
2889 2898 newrl = revlog(
2890 2899 self.opener,
2891 2900 target=self.target,
2892 indexfile=newindexfile,
2893 datafile=newdatafile,
2901 postfix=b'tmpcensored',
2902 indexfile=self.indexfile,
2894 2903 censorable=True,
2895 2904 )
2896 2905 newrl._format_version = self._format_version
2897 2906 newrl._format_flags = self._format_flags
2898 2907 newrl._generaldelta = self._generaldelta
2899 2908 newrl._parse_index = self._parse_index
2900 2909
2901 2910 for rev in self.revs():
2902 2911 node = self.node(rev)
2903 2912 p1, p2 = self.parents(node)
2904 2913
2905 2914 if rev == censorrev:
2906 2915 newrl.addrawrevision(
2907 2916 tombstone,
2908 2917 tr,
2909 2918 self.linkrev(censorrev),
2910 2919 p1,
2911 2920 p2,
2912 2921 censornode,
2913 2922 REVIDX_ISCENSORED,
2914 2923 )
2915 2924
2916 2925 if newrl.deltaparent(rev) != nullrev:
2917 2926 raise error.Abort(
2918 2927 _(
2919 2928 b'censored revision stored as delta; '
2920 2929 b'cannot censor'
2921 2930 ),
2922 2931 hint=_(
2923 2932 b'censoring of revlogs is not '
2924 2933 b'fully implemented; please report '
2925 2934 b'this bug'
2926 2935 ),
2927 2936 )
2928 2937 continue
2929 2938
2930 2939 if self.iscensored(rev):
2931 2940 if self.deltaparent(rev) != nullrev:
2932 2941 raise error.Abort(
2933 2942 _(
2934 2943 b'cannot censor due to censored '
2935 2944 b'revision having delta stored'
2936 2945 )
2937 2946 )
2938 2947 rawtext = self._chunk(rev)
2939 2948 else:
2940 2949 rawtext = self.rawdata(rev)
2941 2950
2942 2951 newrl.addrawrevision(
2943 2952 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2944 2953 )
2945 2954
2946 2955 tr.addbackup(self.indexfile, location=b'store')
2947 2956 if not self._inline:
2948 2957 tr.addbackup(self.datafile, location=b'store')
2949 2958
2950 2959 self.opener.rename(newrl.indexfile, self.indexfile)
2951 2960 if not self._inline:
2952 2961 self.opener.rename(newrl.datafile, self.datafile)
2953 2962
2954 2963 self.clearcaches()
2955 2964 self._loadindex()
2956 2965
2957 2966 def verifyintegrity(self, state):
2958 2967 """Verifies the integrity of the revlog.
2959 2968
2960 2969 Yields ``revlogproblem`` instances describing problems that are
2961 2970 found.
2962 2971 """
2963 2972 dd, di = self.checksize()
2964 2973 if dd:
2965 2974 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2966 2975 if di:
2967 2976 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2968 2977
2969 2978 version = self._format_version
2970 2979
2971 2980 # The verifier tells us what version revlog we should be.
2972 2981 if version != state[b'expectedversion']:
2973 2982 yield revlogproblem(
2974 2983 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2975 2984 % (self.indexfile, version, state[b'expectedversion'])
2976 2985 )
2977 2986
2978 2987 state[b'skipread'] = set()
2979 2988 state[b'safe_renamed'] = set()
2980 2989
2981 2990 for rev in self:
2982 2991 node = self.node(rev)
2983 2992
2984 2993 # Verify contents. 4 cases to care about:
2985 2994 #
2986 2995 # common: the most common case
2987 2996 # rename: with a rename
2988 2997 # meta: file content starts with b'\1\n', the metadata
2989 2998 # header defined in filelog.py, but without a rename
2990 2999 # ext: content stored externally
2991 3000 #
2992 3001 # More formally, their differences are shown below:
2993 3002 #
2994 3003 # | common | rename | meta | ext
2995 3004 # -------------------------------------------------------
2996 3005 # flags() | 0 | 0 | 0 | not 0
2997 3006 # renamed() | False | True | False | ?
2998 3007 # rawtext[0:2]=='\1\n'| False | True | True | ?
2999 3008 #
3000 3009 # "rawtext" means the raw text stored in revlog data, which
3001 3010 # could be retrieved by "rawdata(rev)". "text"
3002 3011 # mentioned below is "revision(rev)".
3003 3012 #
3004 3013 # There are 3 different lengths stored physically:
3005 3014 # 1. L1: rawsize, stored in revlog index
3006 3015 # 2. L2: len(rawtext), stored in revlog data
3007 3016 # 3. L3: len(text), stored in revlog data if flags==0, or
3008 3017 # possibly somewhere else if flags!=0
3009 3018 #
3010 3019 # L1 should be equal to L2. L3 could be different from them.
3011 3020 # "text" may or may not affect commit hash depending on flag
3012 3021 # processors (see flagutil.addflagprocessor).
3013 3022 #
3014 3023 # | common | rename | meta | ext
3015 3024 # -------------------------------------------------
3016 3025 # rawsize() | L1 | L1 | L1 | L1
3017 3026 # size() | L1 | L2-LM | L1(*) | L1 (?)
3018 3027 # len(rawtext) | L2 | L2 | L2 | L2
3019 3028 # len(text) | L2 | L2 | L2 | L3
3020 3029 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3021 3030 #
3022 3031 # LM: length of metadata, depending on rawtext
3023 3032 # (*): not ideal, see comment in filelog.size
3024 3033 # (?): could be "- len(meta)" if the resolved content has
3025 3034 # rename metadata
3026 3035 #
3027 3036 # Checks needed to be done:
3028 3037 # 1. length check: L1 == L2, in all cases.
3029 3038 # 2. hash check: depending on flag processor, we may need to
3030 3039 # use either "text" (external), or "rawtext" (in revlog).
3031 3040
3032 3041 try:
3033 3042 skipflags = state.get(b'skipflags', 0)
3034 3043 if skipflags:
3035 3044 skipflags &= self.flags(rev)
3036 3045
3037 3046 _verify_revision(self, skipflags, state, node)
3038 3047
3039 3048 l1 = self.rawsize(rev)
3040 3049 l2 = len(self.rawdata(node))
3041 3050
3042 3051 if l1 != l2:
3043 3052 yield revlogproblem(
3044 3053 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3045 3054 node=node,
3046 3055 )
3047 3056
3048 3057 except error.CensoredNodeError:
3049 3058 if state[b'erroroncensored']:
3050 3059 yield revlogproblem(
3051 3060 error=_(b'censored file data'), node=node
3052 3061 )
3053 3062 state[b'skipread'].add(node)
3054 3063 except Exception as e:
3055 3064 yield revlogproblem(
3056 3065 error=_(b'unpacking %s: %s')
3057 3066 % (short(node), stringutil.forcebytestr(e)),
3058 3067 node=node,
3059 3068 )
3060 3069 state[b'skipread'].add(node)
3061 3070
3062 3071 def storageinfo(
3063 3072 self,
3064 3073 exclusivefiles=False,
3065 3074 sharedfiles=False,
3066 3075 revisionscount=False,
3067 3076 trackedsize=False,
3068 3077 storedsize=False,
3069 3078 ):
3070 3079 d = {}
3071 3080
3072 3081 if exclusivefiles:
3073 3082 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3074 3083 if not self._inline:
3075 3084 d[b'exclusivefiles'].append((self.opener, self.datafile))
3076 3085
3077 3086 if sharedfiles:
3078 3087 d[b'sharedfiles'] = []
3079 3088
3080 3089 if revisionscount:
3081 3090 d[b'revisionscount'] = len(self)
3082 3091
3083 3092 if trackedsize:
3084 3093 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3085 3094
3086 3095 if storedsize:
3087 3096 d[b'storedsize'] = sum(
3088 3097 self.opener.stat(path).st_size for path in self.files()
3089 3098 )
3090 3099
3091 3100 return d
3092 3101
3093 3102 def rewrite_sidedata(self, helpers, startrev, endrev):
3094 3103 if not self.hassidedata:
3095 3104 return
3096 3105 # inline are not yet supported because they suffer from an issue when
3097 3106 # rewriting them (since it's not an append-only operation).
3098 3107 # See issue6485.
3099 3108 assert not self._inline
3100 3109 if not helpers[1] and not helpers[2]:
3101 3110 # Nothing to generate or remove
3102 3111 return
3103 3112
3104 3113 # changelog implement some "delayed" writing mechanism that assume that
3105 3114 # all index data is writen in append mode and is therefor incompatible
3106 3115 # with the seeked write done in this method. The use of such "delayed"
3107 3116 # writing will soon be removed for revlog version that support side
3108 3117 # data, so for now, we only keep this simple assert to highlight the
3109 3118 # situation.
3110 3119 delayed = getattr(self, '_delayed', False)
3111 3120 diverted = getattr(self, '_divert', False)
3112 3121 if delayed and not diverted:
3113 3122 msg = "cannot rewrite_sidedata of a delayed revlog"
3114 3123 raise error.ProgrammingError(msg)
3115 3124
3116 3125 new_entries = []
3117 3126 # append the new sidedata
3118 3127 with self._datafp(b'a+') as fp:
3119 3128 # Maybe this bug still exists, see revlog._writeentry
3120 3129 fp.seek(0, os.SEEK_END)
3121 3130 current_offset = fp.tell()
3122 3131 for rev in range(startrev, endrev + 1):
3123 3132 entry = self.index[rev]
3124 3133 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3125 3134 store=self,
3126 3135 sidedata_helpers=helpers,
3127 3136 sidedata={},
3128 3137 rev=rev,
3129 3138 )
3130 3139
3131 3140 serialized_sidedata = sidedatautil.serialize_sidedata(
3132 3141 new_sidedata
3133 3142 )
3134 3143 if entry[8] != 0 or entry[9] != 0:
3135 3144 # rewriting entries that already have sidedata is not
3136 3145 # supported yet, because it introduces garbage data in the
3137 3146 # revlog.
3138 3147 msg = b"Rewriting existing sidedata is not supported yet"
3139 3148 raise error.Abort(msg)
3140 3149
3141 3150 # Apply (potential) flags to add and to remove after running
3142 3151 # the sidedata helpers
3143 3152 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3144 3153 entry = (new_offset_flags,) + entry[1:8]
3145 3154 entry += (current_offset, len(serialized_sidedata))
3146 3155
3147 3156 fp.write(serialized_sidedata)
3148 3157 new_entries.append(entry)
3149 3158 current_offset += len(serialized_sidedata)
3150 3159
3151 3160 # rewrite the new index entries
3152 3161 with self._indexfp(b'r+') as fp:
3153 3162 fp.seek(startrev * self.index.entry_size)
3154 3163 for i, e in enumerate(new_entries):
3155 3164 rev = startrev + i
3156 3165 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3157 3166 packed = self.index.entry_binary(rev)
3158 3167 if rev == 0:
3159 3168 header = self._format_flags | self._format_version
3160 3169 header = self.index.pack_header(header)
3161 3170 packed = header + packed
3162 3171 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now