##// END OF EJS Templates
revlog: pass a transaction object to `rewrite_sidedata`...
marmoute -
r47990:2bd4b521 default
parent child Browse files
Show More
@@ -1,1952 +1,1954 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullrev,
18 18 short,
19 19 )
20 20 from .pycompat import open
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 requirements,
29 29 scmutil,
30 30 util,
31 31 )
32 32
33 33 from .interfaces import repository
34 34 from .revlogutils import sidedata as sidedatamod
35 35 from .revlogutils import constants as revlog_constants
36 36 from .utils import storageutil
37 37
38 38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
39 39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
40 40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
41 41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
42 42
43 43 LFS_REQUIREMENT = b'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47
48 48 def getchunk(stream):
49 49 """return the next chunk from stream as a string"""
50 50 d = readexactly(stream, 4)
51 51 l = struct.unpack(b">l", d)[0]
52 52 if l <= 4:
53 53 if l:
54 54 raise error.Abort(_(b"invalid chunk length %d") % l)
55 55 return b""
56 56 return readexactly(stream, l - 4)
57 57
58 58
59 59 def chunkheader(length):
60 60 """return a changegroup chunk header (string)"""
61 61 return struct.pack(b">l", length + 4)
62 62
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(b">l", 0)
67 67
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73
74 74 def writechunks(ui, chunks, filename, vfs=None):
75 75 """Write chunks to a file and return its filename.
76 76
77 77 The stream is assumed to be a bundle file.
78 78 Existing files will not be overwritten.
79 79 If no filename is specified, a temporary file is created.
80 80 """
81 81 fh = None
82 82 cleanup = None
83 83 try:
84 84 if filename:
85 85 if vfs:
86 86 fh = vfs.open(filename, b"wb")
87 87 else:
88 88 # Increase default buffer size because default is usually
89 89 # small (4k is common on Linux).
90 90 fh = open(filename, b"wb", 131072)
91 91 else:
92 92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
93 93 fh = os.fdopen(fd, "wb")
94 94 cleanup = filename
95 95 for c in chunks:
96 96 fh.write(c)
97 97 cleanup = None
98 98 return filename
99 99 finally:
100 100 if fh is not None:
101 101 fh.close()
102 102 if cleanup is not None:
103 103 if filename and vfs:
104 104 vfs.unlink(cleanup)
105 105 else:
106 106 os.unlink(cleanup)
107 107
108 108
109 109 class cg1unpacker(object):
110 110 """Unpacker for cg1 changegroup streams.
111 111
112 112 A changegroup unpacker handles the framing of the revision data in
113 113 the wire format. Most consumers will want to use the apply()
114 114 method to add the changes from the changegroup to a repository.
115 115
116 116 If you're forwarding a changegroup unmodified to another consumer,
117 117 use getchunks(), which returns an iterator of changegroup
118 118 chunks. This is mostly useful for cases where you need to know the
119 119 data stream has ended by observing the end of the changegroup.
120 120
121 121 deltachunk() is useful only if you're applying delta data. Most
122 122 consumers should prefer apply() instead.
123 123
124 124 A few other public methods exist. Those are used only for
125 125 bundlerepo and some debug commands - their use is discouraged.
126 126 """
127 127
128 128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
129 129 deltaheadersize = deltaheader.size
130 130 version = b'01'
131 131 _grouplistcount = 1 # One list of files after the manifests
132 132
133 133 def __init__(self, fh, alg, extras=None):
134 134 if alg is None:
135 135 alg = b'UN'
136 136 if alg not in util.compengines.supportedbundletypes:
137 137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
138 138 if alg == b'BZ':
139 139 alg = b'_truncatedBZ'
140 140
141 141 compengine = util.compengines.forbundletype(alg)
142 142 self._stream = compengine.decompressorreader(fh)
143 143 self._type = alg
144 144 self.extras = extras or {}
145 145 self.callback = None
146 146
147 147 # These methods (compressed, read, seek, tell) all appear to only
148 148 # be used by bundlerepo, but it's a little hard to tell.
149 149 def compressed(self):
150 150 return self._type is not None and self._type != b'UN'
151 151
152 152 def read(self, l):
153 153 return self._stream.read(l)
154 154
155 155 def seek(self, pos):
156 156 return self._stream.seek(pos)
157 157
158 158 def tell(self):
159 159 return self._stream.tell()
160 160
161 161 def close(self):
162 162 return self._stream.close()
163 163
164 164 def _chunklength(self):
165 165 d = readexactly(self._stream, 4)
166 166 l = struct.unpack(b">l", d)[0]
167 167 if l <= 4:
168 168 if l:
169 169 raise error.Abort(_(b"invalid chunk length %d") % l)
170 170 return 0
171 171 if self.callback:
172 172 self.callback()
173 173 return l - 4
174 174
175 175 def changelogheader(self):
176 176 """v10 does not have a changelog header chunk"""
177 177 return {}
178 178
179 179 def manifestheader(self):
180 180 """v10 does not have a manifest header chunk"""
181 181 return {}
182 182
183 183 def filelogheader(self):
184 184 """return the header of the filelogs chunk, v10 only has the filename"""
185 185 l = self._chunklength()
186 186 if not l:
187 187 return {}
188 188 fname = readexactly(self._stream, l)
189 189 return {b'filename': fname}
190 190
191 191 def _deltaheader(self, headertuple, prevnode):
192 192 node, p1, p2, cs = headertuple
193 193 if prevnode is None:
194 194 deltabase = p1
195 195 else:
196 196 deltabase = prevnode
197 197 flags = 0
198 198 protocol_flags = 0
199 199 return node, p1, p2, deltabase, cs, flags, protocol_flags
200 200
201 201 def deltachunk(self, prevnode):
202 202 l = self._chunklength()
203 203 if not l:
204 204 return {}
205 205 headerdata = readexactly(self._stream, self.deltaheadersize)
206 206 header = self.deltaheader.unpack(headerdata)
207 207 delta = readexactly(self._stream, l - self.deltaheadersize)
208 208 header = self._deltaheader(header, prevnode)
209 209 node, p1, p2, deltabase, cs, flags, protocol_flags = header
210 210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
211 211
212 212 def getchunks(self):
213 213 """returns all the chunks contains in the bundle
214 214
215 215 Used when you need to forward the binary stream to a file or another
216 216 network API. To do so, it parse the changegroup data, otherwise it will
217 217 block in case of sshrepo because it don't know the end of the stream.
218 218 """
219 219 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
220 220 # and a list of filelogs. For changegroup 3, we expect 4 parts:
221 221 # changelog, manifestlog, a list of tree manifestlogs, and a list of
222 222 # filelogs.
223 223 #
224 224 # Changelog and manifestlog parts are terminated with empty chunks. The
225 225 # tree and file parts are a list of entry sections. Each entry section
226 226 # is a series of chunks terminating in an empty chunk. The list of these
227 227 # entry sections is terminated in yet another empty chunk, so we know
228 228 # we've reached the end of the tree/file list when we reach an empty
229 229 # chunk that was proceeded by no non-empty chunks.
230 230
231 231 parts = 0
232 232 while parts < 2 + self._grouplistcount:
233 233 noentries = True
234 234 while True:
235 235 chunk = getchunk(self)
236 236 if not chunk:
237 237 # The first two empty chunks represent the end of the
238 238 # changelog and the manifestlog portions. The remaining
239 239 # empty chunks represent either A) the end of individual
240 240 # tree or file entries in the file list, or B) the end of
241 241 # the entire list. It's the end of the entire list if there
242 242 # were no entries (i.e. noentries is True).
243 243 if parts < 2:
244 244 parts += 1
245 245 elif noentries:
246 246 parts += 1
247 247 break
248 248 noentries = False
249 249 yield chunkheader(len(chunk))
250 250 pos = 0
251 251 while pos < len(chunk):
252 252 next = pos + 2 ** 20
253 253 yield chunk[pos:next]
254 254 pos = next
255 255 yield closechunk()
256 256
257 257 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
258 258 self.callback = prog.increment
259 259 # no need to check for empty manifest group here:
260 260 # if the result of the merge of 1 and 2 is the same in 3 and 4,
261 261 # no new manifest will be created and the manifest group will
262 262 # be empty during the pull
263 263 self.manifestheader()
264 264 deltas = self.deltaiter()
265 265 storage = repo.manifestlog.getstorage(b'')
266 266 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
267 267 prog.complete()
268 268 self.callback = None
269 269
270 270 def apply(
271 271 self,
272 272 repo,
273 273 tr,
274 274 srctype,
275 275 url,
276 276 targetphase=phases.draft,
277 277 expectedtotal=None,
278 278 sidedata_categories=None,
279 279 ):
280 280 """Add the changegroup returned by source.read() to this repo.
281 281 srctype is a string like 'push', 'pull', or 'unbundle'. url is
282 282 the URL of the repo where this changegroup is coming from.
283 283
284 284 Return an integer summarizing the change to this repo:
285 285 - nothing changed or no source: 0
286 286 - more heads than before: 1+added heads (2..n)
287 287 - fewer heads than before: -1-removed heads (-2..-n)
288 288 - number of heads stays the same: 1
289 289
290 290 `sidedata_categories` is an optional set of the remote's sidedata wanted
291 291 categories.
292 292 """
293 293 repo = repo.unfiltered()
294 294
295 295 # Only useful if we're adding sidedata categories. If both peers have
296 296 # the same categories, then we simply don't do anything.
297 297 adding_sidedata = (
298 298 requirements.REVLOGV2_REQUIREMENT in repo.requirements
299 299 and self.version == b'04'
300 300 and srctype == b'pull'
301 301 )
302 302 if adding_sidedata:
303 303 sidedata_helpers = sidedatamod.get_sidedata_helpers(
304 304 repo,
305 305 sidedata_categories or set(),
306 306 pull=True,
307 307 )
308 308 else:
309 309 sidedata_helpers = None
310 310
311 311 def csmap(x):
312 312 repo.ui.debug(b"add changeset %s\n" % short(x))
313 313 return len(cl)
314 314
315 315 def revmap(x):
316 316 return cl.rev(x)
317 317
318 318 try:
319 319 # The transaction may already carry source information. In this
320 320 # case we use the top level data. We overwrite the argument
321 321 # because we need to use the top level value (if they exist)
322 322 # in this function.
323 323 srctype = tr.hookargs.setdefault(b'source', srctype)
324 324 tr.hookargs.setdefault(b'url', url)
325 325 repo.hook(
326 326 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
327 327 )
328 328
329 329 # write changelog data to temp files so concurrent readers
330 330 # will not see an inconsistent view
331 331 cl = repo.changelog
332 332 cl.delayupdate(tr)
333 333 oldheads = set(cl.heads())
334 334
335 335 trp = weakref.proxy(tr)
336 336 # pull off the changeset group
337 337 repo.ui.status(_(b"adding changesets\n"))
338 338 clstart = len(cl)
339 339 progress = repo.ui.makeprogress(
340 340 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
341 341 )
342 342 self.callback = progress.increment
343 343
344 344 efilesset = set()
345 345 duprevs = []
346 346
347 347 def ondupchangelog(cl, rev):
348 348 if rev < clstart:
349 349 duprevs.append(rev)
350 350
351 351 def onchangelog(cl, rev):
352 352 ctx = cl.changelogrevision(rev)
353 353 efilesset.update(ctx.files)
354 354 repo.register_changeset(rev, ctx)
355 355
356 356 self.changelogheader()
357 357 deltas = self.deltaiter()
358 358 if not cl.addgroup(
359 359 deltas,
360 360 csmap,
361 361 trp,
362 362 alwayscache=True,
363 363 addrevisioncb=onchangelog,
364 364 duplicaterevisioncb=ondupchangelog,
365 365 ):
366 366 repo.ui.develwarn(
367 367 b'applied empty changelog from changegroup',
368 368 config=b'warn-empty-changegroup',
369 369 )
370 370 efiles = len(efilesset)
371 371 clend = len(cl)
372 372 changesets = clend - clstart
373 373 progress.complete()
374 374 del deltas
375 375 # TODO Python 2.7 removal
376 376 # del efilesset
377 377 efilesset = None
378 378 self.callback = None
379 379
380 380 # Keep track of the (non-changelog) revlogs we've updated and their
381 381 # range of new revisions for sidedata rewrite.
382 382 # TODO do something more efficient than keeping the reference to
383 383 # the revlogs, especially memory-wise.
384 384 touched_manifests = {}
385 385 touched_filelogs = {}
386 386
387 387 # pull off the manifest group
388 388 repo.ui.status(_(b"adding manifests\n"))
389 389 # We know that we'll never have more manifests than we had
390 390 # changesets.
391 391 progress = repo.ui.makeprogress(
392 392 _(b'manifests'), unit=_(b'chunks'), total=changesets
393 393 )
394 394 on_manifest_rev = None
395 395 if sidedata_helpers:
396 396 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
397 397
398 398 def on_manifest_rev(manifest, rev):
399 399 range = touched_manifests.get(manifest)
400 400 if not range:
401 401 touched_manifests[manifest] = (rev, rev)
402 402 else:
403 403 assert rev == range[1] + 1
404 404 touched_manifests[manifest] = (range[0], rev)
405 405
406 406 self._unpackmanifests(
407 407 repo,
408 408 revmap,
409 409 trp,
410 410 progress,
411 411 addrevisioncb=on_manifest_rev,
412 412 )
413 413
414 414 needfiles = {}
415 415 if repo.ui.configbool(b'server', b'validate'):
416 416 cl = repo.changelog
417 417 ml = repo.manifestlog
418 418 # validate incoming csets have their manifests
419 419 for cset in pycompat.xrange(clstart, clend):
420 420 mfnode = cl.changelogrevision(cset).manifest
421 421 mfest = ml[mfnode].readdelta()
422 422 # store file nodes we must see
423 423 for f, n in pycompat.iteritems(mfest):
424 424 needfiles.setdefault(f, set()).add(n)
425 425
426 426 on_filelog_rev = None
427 427 if sidedata_helpers:
428 428 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
429 429
430 430 def on_filelog_rev(filelog, rev):
431 431 range = touched_filelogs.get(filelog)
432 432 if not range:
433 433 touched_filelogs[filelog] = (rev, rev)
434 434 else:
435 435 assert rev == range[1] + 1
436 436 touched_filelogs[filelog] = (range[0], rev)
437 437
438 438 # process the files
439 439 repo.ui.status(_(b"adding file changes\n"))
440 440 newrevs, newfiles = _addchangegroupfiles(
441 441 repo,
442 442 self,
443 443 revmap,
444 444 trp,
445 445 efiles,
446 446 needfiles,
447 447 addrevisioncb=on_filelog_rev,
448 448 )
449 449
450 450 if sidedata_helpers:
451 451 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
452 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
452 cl.rewrite_sidedata(
453 trp, sidedata_helpers, clstart, clend - 1
454 )
453 455 for mf, (startrev, endrev) in touched_manifests.items():
454 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
456 mf.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
455 457 for fl, (startrev, endrev) in touched_filelogs.items():
456 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
458 fl.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
457 459
458 460 # making sure the value exists
459 461 tr.changes.setdefault(b'changegroup-count-changesets', 0)
460 462 tr.changes.setdefault(b'changegroup-count-revisions', 0)
461 463 tr.changes.setdefault(b'changegroup-count-files', 0)
462 464 tr.changes.setdefault(b'changegroup-count-heads', 0)
463 465
464 466 # some code use bundle operation for internal purpose. They usually
465 467 # set `ui.quiet` to do this outside of user sight. Size the report
466 468 # of such operation now happens at the end of the transaction, that
467 469 # ui.quiet has not direct effect on the output.
468 470 #
469 471 # To preserve this intend use an inelegant hack, we fail to report
470 472 # the change if `quiet` is set. We should probably move to
471 473 # something better, but this is a good first step to allow the "end
472 474 # of transaction report" to pass tests.
473 475 if not repo.ui.quiet:
474 476 tr.changes[b'changegroup-count-changesets'] += changesets
475 477 tr.changes[b'changegroup-count-revisions'] += newrevs
476 478 tr.changes[b'changegroup-count-files'] += newfiles
477 479
478 480 deltaheads = 0
479 481 if oldheads:
480 482 heads = cl.heads()
481 483 deltaheads += len(heads) - len(oldheads)
482 484 for h in heads:
483 485 if h not in oldheads and repo[h].closesbranch():
484 486 deltaheads -= 1
485 487
486 488 # see previous comment about checking ui.quiet
487 489 if not repo.ui.quiet:
488 490 tr.changes[b'changegroup-count-heads'] += deltaheads
489 491 repo.invalidatevolatilesets()
490 492
491 493 if changesets > 0:
492 494 if b'node' not in tr.hookargs:
493 495 tr.hookargs[b'node'] = hex(cl.node(clstart))
494 496 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
495 497 hookargs = dict(tr.hookargs)
496 498 else:
497 499 hookargs = dict(tr.hookargs)
498 500 hookargs[b'node'] = hex(cl.node(clstart))
499 501 hookargs[b'node_last'] = hex(cl.node(clend - 1))
500 502 repo.hook(
501 503 b'pretxnchangegroup',
502 504 throw=True,
503 505 **pycompat.strkwargs(hookargs)
504 506 )
505 507
506 508 added = pycompat.xrange(clstart, clend)
507 509 phaseall = None
508 510 if srctype in (b'push', b'serve'):
509 511 # Old servers can not push the boundary themselves.
510 512 # New servers won't push the boundary if changeset already
511 513 # exists locally as secret
512 514 #
513 515 # We should not use added here but the list of all change in
514 516 # the bundle
515 517 if repo.publishing():
516 518 targetphase = phaseall = phases.public
517 519 else:
518 520 # closer target phase computation
519 521
520 522 # Those changesets have been pushed from the
521 523 # outside, their phases are going to be pushed
522 524 # alongside. Therefor `targetphase` is
523 525 # ignored.
524 526 targetphase = phaseall = phases.draft
525 527 if added:
526 528 phases.registernew(repo, tr, targetphase, added)
527 529 if phaseall is not None:
528 530 if duprevs:
529 531 duprevs.extend(added)
530 532 else:
531 533 duprevs = added
532 534 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
533 535 duprevs = []
534 536
535 537 if changesets > 0:
536 538
537 539 def runhooks(unused_success):
538 540 # These hooks run when the lock releases, not when the
539 541 # transaction closes. So it's possible for the changelog
540 542 # to have changed since we last saw it.
541 543 if clstart >= len(repo):
542 544 return
543 545
544 546 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
545 547
546 548 for rev in added:
547 549 args = hookargs.copy()
548 550 args[b'node'] = hex(cl.node(rev))
549 551 del args[b'node_last']
550 552 repo.hook(b"incoming", **pycompat.strkwargs(args))
551 553
552 554 newheads = [h for h in repo.heads() if h not in oldheads]
553 555 repo.ui.log(
554 556 b"incoming",
555 557 b"%d incoming changes - new heads: %s\n",
556 558 len(added),
557 559 b', '.join([hex(c[:6]) for c in newheads]),
558 560 )
559 561
560 562 tr.addpostclose(
561 563 b'changegroup-runhooks-%020i' % clstart,
562 564 lambda tr: repo._afterlock(runhooks),
563 565 )
564 566 finally:
565 567 repo.ui.flush()
566 568 # never return 0 here:
567 569 if deltaheads < 0:
568 570 ret = deltaheads - 1
569 571 else:
570 572 ret = deltaheads + 1
571 573 return ret
572 574
573 575 def deltaiter(self):
574 576 """
575 577 returns an iterator of the deltas in this changegroup
576 578
577 579 Useful for passing to the underlying storage system to be stored.
578 580 """
579 581 chain = None
580 582 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
581 583 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
582 584 yield chunkdata
583 585 chain = chunkdata[0]
584 586
585 587
586 588 class cg2unpacker(cg1unpacker):
587 589 """Unpacker for cg2 streams.
588 590
589 591 cg2 streams add support for generaldelta, so the delta header
590 592 format is slightly different. All other features about the data
591 593 remain the same.
592 594 """
593 595
594 596 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
595 597 deltaheadersize = deltaheader.size
596 598 version = b'02'
597 599
598 600 def _deltaheader(self, headertuple, prevnode):
599 601 node, p1, p2, deltabase, cs = headertuple
600 602 flags = 0
601 603 protocol_flags = 0
602 604 return node, p1, p2, deltabase, cs, flags, protocol_flags
603 605
604 606
605 607 class cg3unpacker(cg2unpacker):
606 608 """Unpacker for cg3 streams.
607 609
608 610 cg3 streams add support for exchanging treemanifests and revlog
609 611 flags. It adds the revlog flags to the delta header and an empty chunk
610 612 separating manifests and files.
611 613 """
612 614
613 615 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
614 616 deltaheadersize = deltaheader.size
615 617 version = b'03'
616 618 _grouplistcount = 2 # One list of manifests and one list of files
617 619
618 620 def _deltaheader(self, headertuple, prevnode):
619 621 node, p1, p2, deltabase, cs, flags = headertuple
620 622 protocol_flags = 0
621 623 return node, p1, p2, deltabase, cs, flags, protocol_flags
622 624
623 625 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
624 626 super(cg3unpacker, self)._unpackmanifests(
625 627 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
626 628 )
627 629 for chunkdata in iter(self.filelogheader, {}):
628 630 # If we get here, there are directory manifests in the changegroup
629 631 d = chunkdata[b"filename"]
630 632 repo.ui.debug(b"adding %s revisions\n" % d)
631 633 deltas = self.deltaiter()
632 634 if not repo.manifestlog.getstorage(d).addgroup(
633 635 deltas, revmap, trp, addrevisioncb=addrevisioncb
634 636 ):
635 637 raise error.Abort(_(b"received dir revlog group is empty"))
636 638
637 639
638 640 class cg4unpacker(cg3unpacker):
639 641 """Unpacker for cg4 streams.
640 642
641 643 cg4 streams add support for exchanging sidedata.
642 644 """
643 645
644 646 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
645 647 deltaheadersize = deltaheader.size
646 648 version = b'04'
647 649
648 650 def _deltaheader(self, headertuple, prevnode):
649 651 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
650 652 return node, p1, p2, deltabase, cs, flags, protocol_flags
651 653
652 654 def deltachunk(self, prevnode):
653 655 res = super(cg4unpacker, self).deltachunk(prevnode)
654 656 if not res:
655 657 return res
656 658
657 659 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
658 660
659 661 sidedata = {}
660 662 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
661 663 sidedata_raw = getchunk(self._stream)
662 664 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
663 665
664 666 return node, p1, p2, cs, deltabase, delta, flags, sidedata
665 667
666 668
667 669 class headerlessfixup(object):
668 670 def __init__(self, fh, h):
669 671 self._h = h
670 672 self._fh = fh
671 673
672 674 def read(self, n):
673 675 if self._h:
674 676 d, self._h = self._h[:n], self._h[n:]
675 677 if len(d) < n:
676 678 d += readexactly(self._fh, n - len(d))
677 679 return d
678 680 return readexactly(self._fh, n)
679 681
680 682
681 683 def _revisiondeltatochunks(repo, delta, headerfn):
682 684 """Serialize a revisiondelta to changegroup chunks."""
683 685
684 686 # The captured revision delta may be encoded as a delta against
685 687 # a base revision or as a full revision. The changegroup format
686 688 # requires that everything on the wire be deltas. So for full
687 689 # revisions, we need to invent a header that says to rewrite
688 690 # data.
689 691
690 692 if delta.delta is not None:
691 693 prefix, data = b'', delta.delta
692 694 elif delta.basenode == repo.nullid:
693 695 data = delta.revision
694 696 prefix = mdiff.trivialdiffheader(len(data))
695 697 else:
696 698 data = delta.revision
697 699 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
698 700
699 701 meta = headerfn(delta)
700 702
701 703 yield chunkheader(len(meta) + len(prefix) + len(data))
702 704 yield meta
703 705 if prefix:
704 706 yield prefix
705 707 yield data
706 708
707 709 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
708 710 # Need a separate chunk for sidedata to be able to differentiate
709 711 # "raw delta" length and sidedata length
710 712 sidedata = delta.sidedata
711 713 yield chunkheader(len(sidedata))
712 714 yield sidedata
713 715
714 716
715 717 def _sortnodesellipsis(store, nodes, cl, lookup):
716 718 """Sort nodes for changegroup generation."""
717 719 # Ellipses serving mode.
718 720 #
719 721 # In a perfect world, we'd generate better ellipsis-ified graphs
720 722 # for non-changelog revlogs. In practice, we haven't started doing
721 723 # that yet, so the resulting DAGs for the manifestlog and filelogs
722 724 # are actually full of bogus parentage on all the ellipsis
723 725 # nodes. This has the side effect that, while the contents are
724 726 # correct, the individual DAGs might be completely out of whack in
725 727 # a case like 882681bc3166 and its ancestors (back about 10
726 728 # revisions or so) in the main hg repo.
727 729 #
728 730 # The one invariant we *know* holds is that the new (potentially
729 731 # bogus) DAG shape will be valid if we order the nodes in the
730 732 # order that they're introduced in dramatis personae by the
731 733 # changelog, so what we do is we sort the non-changelog histories
732 734 # by the order in which they are used by the changelog.
733 735 key = lambda n: cl.rev(lookup(n))
734 736 return sorted(nodes, key=key)
735 737
736 738
737 739 def _resolvenarrowrevisioninfo(
738 740 cl,
739 741 store,
740 742 ischangelog,
741 743 rev,
742 744 linkrev,
743 745 linknode,
744 746 clrevtolocalrev,
745 747 fullclnodes,
746 748 precomputedellipsis,
747 749 ):
748 750 linkparents = precomputedellipsis[linkrev]
749 751
750 752 def local(clrev):
751 753 """Turn a changelog revnum into a local revnum.
752 754
753 755 The ellipsis dag is stored as revnums on the changelog,
754 756 but when we're producing ellipsis entries for
755 757 non-changelog revlogs, we need to turn those numbers into
756 758 something local. This does that for us, and during the
757 759 changelog sending phase will also expand the stored
758 760 mappings as needed.
759 761 """
760 762 if clrev == nullrev:
761 763 return nullrev
762 764
763 765 if ischangelog:
764 766 return clrev
765 767
766 768 # Walk the ellipsis-ized changelog breadth-first looking for a
767 769 # change that has been linked from the current revlog.
768 770 #
769 771 # For a flat manifest revlog only a single step should be necessary
770 772 # as all relevant changelog entries are relevant to the flat
771 773 # manifest.
772 774 #
773 775 # For a filelog or tree manifest dirlog however not every changelog
774 776 # entry will have been relevant, so we need to skip some changelog
775 777 # nodes even after ellipsis-izing.
776 778 walk = [clrev]
777 779 while walk:
778 780 p = walk[0]
779 781 walk = walk[1:]
780 782 if p in clrevtolocalrev:
781 783 return clrevtolocalrev[p]
782 784 elif p in fullclnodes:
783 785 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
784 786 elif p in precomputedellipsis:
785 787 walk.extend(
786 788 [pp for pp in precomputedellipsis[p] if pp != nullrev]
787 789 )
788 790 else:
789 791 # In this case, we've got an ellipsis with parents
790 792 # outside the current bundle (likely an
791 793 # incremental pull). We "know" that we can use the
792 794 # value of this same revlog at whatever revision
793 795 # is pointed to by linknode. "Know" is in scare
794 796 # quotes because I haven't done enough examination
795 797 # of edge cases to convince myself this is really
796 798 # a fact - it works for all the (admittedly
797 799 # thorough) cases in our testsuite, but I would be
798 800 # somewhat unsurprised to find a case in the wild
799 801 # where this breaks down a bit. That said, I don't
800 802 # know if it would hurt anything.
801 803 for i in pycompat.xrange(rev, 0, -1):
802 804 if store.linkrev(i) == clrev:
803 805 return i
804 806 # We failed to resolve a parent for this node, so
805 807 # we crash the changegroup construction.
806 808 if util.safehasattr(store, 'target'):
807 809 target = store.display_id
808 810 else:
809 811 # some revlog not actually a revlog
810 812 target = store._revlog.display_id
811 813
812 814 raise error.Abort(
813 815 b"unable to resolve parent while packing '%s' %r"
814 816 b' for changeset %r' % (target, rev, clrev)
815 817 )
816 818
817 819 return nullrev
818 820
819 821 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
820 822 p1, p2 = nullrev, nullrev
821 823 elif len(linkparents) == 1:
822 824 (p1,) = sorted(local(p) for p in linkparents)
823 825 p2 = nullrev
824 826 else:
825 827 p1, p2 = sorted(local(p) for p in linkparents)
826 828
827 829 p1node, p2node = store.node(p1), store.node(p2)
828 830
829 831 return p1node, p2node, linknode
830 832
831 833
832 834 def deltagroup(
833 835 repo,
834 836 store,
835 837 nodes,
836 838 ischangelog,
837 839 lookup,
838 840 forcedeltaparentprev,
839 841 topic=None,
840 842 ellipses=False,
841 843 clrevtolocalrev=None,
842 844 fullclnodes=None,
843 845 precomputedellipsis=None,
844 846 sidedata_helpers=None,
845 847 ):
846 848 """Calculate deltas for a set of revisions.
847 849
848 850 Is a generator of ``revisiondelta`` instances.
849 851
850 852 If topic is not None, progress detail will be generated using this
851 853 topic name (e.g. changesets, manifests, etc).
852 854
853 855 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
854 856 `sidedata_helpers`.
855 857 """
856 858 if not nodes:
857 859 return
858 860
859 861 cl = repo.changelog
860 862
861 863 if ischangelog:
862 864 # `hg log` shows changesets in storage order. To preserve order
863 865 # across clones, send out changesets in storage order.
864 866 nodesorder = b'storage'
865 867 elif ellipses:
866 868 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
867 869 nodesorder = b'nodes'
868 870 else:
869 871 nodesorder = None
870 872
871 873 # Perform ellipses filtering and revision massaging. We do this before
872 874 # emitrevisions() because a) filtering out revisions creates less work
873 875 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
874 876 # assumptions about delta choices and we would possibly send a delta
875 877 # referencing a missing base revision.
876 878 #
877 879 # Also, calling lookup() has side-effects with regards to populating
878 880 # data structures. If we don't call lookup() for each node or if we call
879 881 # lookup() after the first pass through each node, things can break -
880 882 # possibly intermittently depending on the python hash seed! For that
881 883 # reason, we store a mapping of all linknodes during the initial node
882 884 # pass rather than use lookup() on the output side.
883 885 if ellipses:
884 886 filtered = []
885 887 adjustedparents = {}
886 888 linknodes = {}
887 889
888 890 for node in nodes:
889 891 rev = store.rev(node)
890 892 linknode = lookup(node)
891 893 linkrev = cl.rev(linknode)
892 894 clrevtolocalrev[linkrev] = rev
893 895
894 896 # If linknode is in fullclnodes, it means the corresponding
895 897 # changeset was a full changeset and is being sent unaltered.
896 898 if linknode in fullclnodes:
897 899 linknodes[node] = linknode
898 900
899 901 # If the corresponding changeset wasn't in the set computed
900 902 # as relevant to us, it should be dropped outright.
901 903 elif linkrev not in precomputedellipsis:
902 904 continue
903 905
904 906 else:
905 907 # We could probably do this later and avoid the dict
906 908 # holding state. But it likely doesn't matter.
907 909 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
908 910 cl,
909 911 store,
910 912 ischangelog,
911 913 rev,
912 914 linkrev,
913 915 linknode,
914 916 clrevtolocalrev,
915 917 fullclnodes,
916 918 precomputedellipsis,
917 919 )
918 920
919 921 adjustedparents[node] = (p1node, p2node)
920 922 linknodes[node] = linknode
921 923
922 924 filtered.append(node)
923 925
924 926 nodes = filtered
925 927
926 928 # We expect the first pass to be fast, so we only engage the progress
927 929 # meter for constructing the revision deltas.
928 930 progress = None
929 931 if topic is not None:
930 932 progress = repo.ui.makeprogress(
931 933 topic, unit=_(b'chunks'), total=len(nodes)
932 934 )
933 935
934 936 configtarget = repo.ui.config(b'devel', b'bundle.delta')
935 937 if configtarget not in (b'', b'p1', b'full'):
936 938 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
937 939 repo.ui.warn(msg % configtarget)
938 940
939 941 deltamode = repository.CG_DELTAMODE_STD
940 942 if forcedeltaparentprev:
941 943 deltamode = repository.CG_DELTAMODE_PREV
942 944 elif configtarget == b'p1':
943 945 deltamode = repository.CG_DELTAMODE_P1
944 946 elif configtarget == b'full':
945 947 deltamode = repository.CG_DELTAMODE_FULL
946 948
947 949 revisions = store.emitrevisions(
948 950 nodes,
949 951 nodesorder=nodesorder,
950 952 revisiondata=True,
951 953 assumehaveparentrevisions=not ellipses,
952 954 deltamode=deltamode,
953 955 sidedata_helpers=sidedata_helpers,
954 956 )
955 957
956 958 for i, revision in enumerate(revisions):
957 959 if progress:
958 960 progress.update(i + 1)
959 961
960 962 if ellipses:
961 963 linknode = linknodes[revision.node]
962 964
963 965 if revision.node in adjustedparents:
964 966 p1node, p2node = adjustedparents[revision.node]
965 967 revision.p1node = p1node
966 968 revision.p2node = p2node
967 969 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
968 970
969 971 else:
970 972 linknode = lookup(revision.node)
971 973
972 974 revision.linknode = linknode
973 975 yield revision
974 976
975 977 if progress:
976 978 progress.complete()
977 979
978 980
979 981 class cgpacker(object):
980 982 def __init__(
981 983 self,
982 984 repo,
983 985 oldmatcher,
984 986 matcher,
985 987 version,
986 988 builddeltaheader,
987 989 manifestsend,
988 990 forcedeltaparentprev=False,
989 991 bundlecaps=None,
990 992 ellipses=False,
991 993 shallow=False,
992 994 ellipsisroots=None,
993 995 fullnodes=None,
994 996 remote_sidedata=None,
995 997 ):
996 998 """Given a source repo, construct a bundler.
997 999
998 1000 oldmatcher is a matcher that matches on files the client already has.
999 1001 These will not be included in the changegroup.
1000 1002
1001 1003 matcher is a matcher that matches on files to include in the
1002 1004 changegroup. Used to facilitate sparse changegroups.
1003 1005
1004 1006 forcedeltaparentprev indicates whether delta parents must be against
1005 1007 the previous revision in a delta group. This should only be used for
1006 1008 compatibility with changegroup version 1.
1007 1009
1008 1010 builddeltaheader is a callable that constructs the header for a group
1009 1011 delta.
1010 1012
1011 1013 manifestsend is a chunk to send after manifests have been fully emitted.
1012 1014
1013 1015 ellipses indicates whether ellipsis serving mode is enabled.
1014 1016
1015 1017 bundlecaps is optional and can be used to specify the set of
1016 1018 capabilities which can be used to build the bundle. While bundlecaps is
1017 1019 unused in core Mercurial, extensions rely on this feature to communicate
1018 1020 capabilities to customize the changegroup packer.
1019 1021
1020 1022 shallow indicates whether shallow data might be sent. The packer may
1021 1023 need to pack file contents not introduced by the changes being packed.
1022 1024
1023 1025 fullnodes is the set of changelog nodes which should not be ellipsis
1024 1026 nodes. We store this rather than the set of nodes that should be
1025 1027 ellipsis because for very large histories we expect this to be
1026 1028 significantly smaller.
1027 1029
1028 1030 remote_sidedata is the set of sidedata categories wanted by the remote.
1029 1031 """
1030 1032 assert oldmatcher
1031 1033 assert matcher
1032 1034 self._oldmatcher = oldmatcher
1033 1035 self._matcher = matcher
1034 1036
1035 1037 self.version = version
1036 1038 self._forcedeltaparentprev = forcedeltaparentprev
1037 1039 self._builddeltaheader = builddeltaheader
1038 1040 self._manifestsend = manifestsend
1039 1041 self._ellipses = ellipses
1040 1042
1041 1043 # Set of capabilities we can use to build the bundle.
1042 1044 if bundlecaps is None:
1043 1045 bundlecaps = set()
1044 1046 self._bundlecaps = bundlecaps
1045 1047 if remote_sidedata is None:
1046 1048 remote_sidedata = set()
1047 1049 self._remote_sidedata = remote_sidedata
1048 1050 self._isshallow = shallow
1049 1051 self._fullclnodes = fullnodes
1050 1052
1051 1053 # Maps ellipsis revs to their roots at the changelog level.
1052 1054 self._precomputedellipsis = ellipsisroots
1053 1055
1054 1056 self._repo = repo
1055 1057
1056 1058 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1057 1059 self._verbosenote = self._repo.ui.note
1058 1060 else:
1059 1061 self._verbosenote = lambda s: None
1060 1062
1061 1063 def generate(
1062 1064 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1063 1065 ):
1064 1066 """Yield a sequence of changegroup byte chunks.
1065 1067 If changelog is False, changelog data won't be added to changegroup
1066 1068 """
1067 1069
1068 1070 repo = self._repo
1069 1071 cl = repo.changelog
1070 1072
1071 1073 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1072 1074 size = 0
1073 1075
1074 1076 sidedata_helpers = None
1075 1077 if self.version == b'04':
1076 1078 remote_sidedata = self._remote_sidedata
1077 1079 if source == b'strip':
1078 1080 # We're our own remote when stripping, get the no-op helpers
1079 1081 # TODO a better approach would be for the strip bundle to
1080 1082 # correctly advertise its sidedata categories directly.
1081 1083 remote_sidedata = repo._wanted_sidedata
1082 1084 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1083 1085 repo, remote_sidedata
1084 1086 )
1085 1087
1086 1088 clstate, deltas = self._generatechangelog(
1087 1089 cl,
1088 1090 clnodes,
1089 1091 generate=changelog,
1090 1092 sidedata_helpers=sidedata_helpers,
1091 1093 )
1092 1094 for delta in deltas:
1093 1095 for chunk in _revisiondeltatochunks(
1094 1096 self._repo, delta, self._builddeltaheader
1095 1097 ):
1096 1098 size += len(chunk)
1097 1099 yield chunk
1098 1100
1099 1101 close = closechunk()
1100 1102 size += len(close)
1101 1103 yield closechunk()
1102 1104
1103 1105 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1104 1106
1105 1107 clrevorder = clstate[b'clrevorder']
1106 1108 manifests = clstate[b'manifests']
1107 1109 changedfiles = clstate[b'changedfiles']
1108 1110
1109 1111 # We need to make sure that the linkrev in the changegroup refers to
1110 1112 # the first changeset that introduced the manifest or file revision.
1111 1113 # The fastpath is usually safer than the slowpath, because the filelogs
1112 1114 # are walked in revlog order.
1113 1115 #
1114 1116 # When taking the slowpath when the manifest revlog uses generaldelta,
1115 1117 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1116 1118 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1117 1119 #
1118 1120 # When taking the fastpath, we are only vulnerable to reordering
1119 1121 # of the changelog itself. The changelog never uses generaldelta and is
1120 1122 # never reordered. To handle this case, we simply take the slowpath,
1121 1123 # which already has the 'clrevorder' logic. This was also fixed in
1122 1124 # cc0ff93d0c0c.
1123 1125
1124 1126 # Treemanifests don't work correctly with fastpathlinkrev
1125 1127 # either, because we don't discover which directory nodes to
1126 1128 # send along with files. This could probably be fixed.
1127 1129 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1128 1130
1129 1131 fnodes = {} # needed file nodes
1130 1132
1131 1133 size = 0
1132 1134 it = self.generatemanifests(
1133 1135 commonrevs,
1134 1136 clrevorder,
1135 1137 fastpathlinkrev,
1136 1138 manifests,
1137 1139 fnodes,
1138 1140 source,
1139 1141 clstate[b'clrevtomanifestrev'],
1140 1142 sidedata_helpers=sidedata_helpers,
1141 1143 )
1142 1144
1143 1145 for tree, deltas in it:
1144 1146 if tree:
1145 1147 assert self.version in (b'03', b'04')
1146 1148 chunk = _fileheader(tree)
1147 1149 size += len(chunk)
1148 1150 yield chunk
1149 1151
1150 1152 for delta in deltas:
1151 1153 chunks = _revisiondeltatochunks(
1152 1154 self._repo, delta, self._builddeltaheader
1153 1155 )
1154 1156 for chunk in chunks:
1155 1157 size += len(chunk)
1156 1158 yield chunk
1157 1159
1158 1160 close = closechunk()
1159 1161 size += len(close)
1160 1162 yield close
1161 1163
1162 1164 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1163 1165 yield self._manifestsend
1164 1166
1165 1167 mfdicts = None
1166 1168 if self._ellipses and self._isshallow:
1167 1169 mfdicts = [
1168 1170 (repo.manifestlog[n].read(), lr)
1169 1171 for (n, lr) in pycompat.iteritems(manifests)
1170 1172 ]
1171 1173
1172 1174 manifests.clear()
1173 1175 clrevs = {cl.rev(x) for x in clnodes}
1174 1176
1175 1177 it = self.generatefiles(
1176 1178 changedfiles,
1177 1179 commonrevs,
1178 1180 source,
1179 1181 mfdicts,
1180 1182 fastpathlinkrev,
1181 1183 fnodes,
1182 1184 clrevs,
1183 1185 sidedata_helpers=sidedata_helpers,
1184 1186 )
1185 1187
1186 1188 for path, deltas in it:
1187 1189 h = _fileheader(path)
1188 1190 size = len(h)
1189 1191 yield h
1190 1192
1191 1193 for delta in deltas:
1192 1194 chunks = _revisiondeltatochunks(
1193 1195 self._repo, delta, self._builddeltaheader
1194 1196 )
1195 1197 for chunk in chunks:
1196 1198 size += len(chunk)
1197 1199 yield chunk
1198 1200
1199 1201 close = closechunk()
1200 1202 size += len(close)
1201 1203 yield close
1202 1204
1203 1205 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1204 1206
1205 1207 yield closechunk()
1206 1208
1207 1209 if clnodes:
1208 1210 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1209 1211
1210 1212 def _generatechangelog(
1211 1213 self, cl, nodes, generate=True, sidedata_helpers=None
1212 1214 ):
1213 1215 """Generate data for changelog chunks.
1214 1216
1215 1217 Returns a 2-tuple of a dict containing state and an iterable of
1216 1218 byte chunks. The state will not be fully populated until the
1217 1219 chunk stream has been fully consumed.
1218 1220
1219 1221 if generate is False, the state will be fully populated and no chunk
1220 1222 stream will be yielded
1221 1223
1222 1224 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1223 1225 `sidedata_helpers`.
1224 1226 """
1225 1227 clrevorder = {}
1226 1228 manifests = {}
1227 1229 mfl = self._repo.manifestlog
1228 1230 changedfiles = set()
1229 1231 clrevtomanifestrev = {}
1230 1232
1231 1233 state = {
1232 1234 b'clrevorder': clrevorder,
1233 1235 b'manifests': manifests,
1234 1236 b'changedfiles': changedfiles,
1235 1237 b'clrevtomanifestrev': clrevtomanifestrev,
1236 1238 }
1237 1239
1238 1240 if not (generate or self._ellipses):
1239 1241 # sort the nodes in storage order
1240 1242 nodes = sorted(nodes, key=cl.rev)
1241 1243 for node in nodes:
1242 1244 c = cl.changelogrevision(node)
1243 1245 clrevorder[node] = len(clrevorder)
1244 1246 # record the first changeset introducing this manifest version
1245 1247 manifests.setdefault(c.manifest, node)
1246 1248 # Record a complete list of potentially-changed files in
1247 1249 # this manifest.
1248 1250 changedfiles.update(c.files)
1249 1251
1250 1252 return state, ()
1251 1253
1252 1254 # Callback for the changelog, used to collect changed files and
1253 1255 # manifest nodes.
1254 1256 # Returns the linkrev node (identity in the changelog case).
1255 1257 def lookupcl(x):
1256 1258 c = cl.changelogrevision(x)
1257 1259 clrevorder[x] = len(clrevorder)
1258 1260
1259 1261 if self._ellipses:
1260 1262 # Only update manifests if x is going to be sent. Otherwise we
1261 1263 # end up with bogus linkrevs specified for manifests and
1262 1264 # we skip some manifest nodes that we should otherwise
1263 1265 # have sent.
1264 1266 if (
1265 1267 x in self._fullclnodes
1266 1268 or cl.rev(x) in self._precomputedellipsis
1267 1269 ):
1268 1270
1269 1271 manifestnode = c.manifest
1270 1272 # Record the first changeset introducing this manifest
1271 1273 # version.
1272 1274 manifests.setdefault(manifestnode, x)
1273 1275 # Set this narrow-specific dict so we have the lowest
1274 1276 # manifest revnum to look up for this cl revnum. (Part of
1275 1277 # mapping changelog ellipsis parents to manifest ellipsis
1276 1278 # parents)
1277 1279 clrevtomanifestrev.setdefault(
1278 1280 cl.rev(x), mfl.rev(manifestnode)
1279 1281 )
1280 1282 # We can't trust the changed files list in the changeset if the
1281 1283 # client requested a shallow clone.
1282 1284 if self._isshallow:
1283 1285 changedfiles.update(mfl[c.manifest].read().keys())
1284 1286 else:
1285 1287 changedfiles.update(c.files)
1286 1288 else:
1287 1289 # record the first changeset introducing this manifest version
1288 1290 manifests.setdefault(c.manifest, x)
1289 1291 # Record a complete list of potentially-changed files in
1290 1292 # this manifest.
1291 1293 changedfiles.update(c.files)
1292 1294
1293 1295 return x
1294 1296
1295 1297 gen = deltagroup(
1296 1298 self._repo,
1297 1299 cl,
1298 1300 nodes,
1299 1301 True,
1300 1302 lookupcl,
1301 1303 self._forcedeltaparentprev,
1302 1304 ellipses=self._ellipses,
1303 1305 topic=_(b'changesets'),
1304 1306 clrevtolocalrev={},
1305 1307 fullclnodes=self._fullclnodes,
1306 1308 precomputedellipsis=self._precomputedellipsis,
1307 1309 sidedata_helpers=sidedata_helpers,
1308 1310 )
1309 1311
1310 1312 return state, gen
1311 1313
1312 1314 def generatemanifests(
1313 1315 self,
1314 1316 commonrevs,
1315 1317 clrevorder,
1316 1318 fastpathlinkrev,
1317 1319 manifests,
1318 1320 fnodes,
1319 1321 source,
1320 1322 clrevtolocalrev,
1321 1323 sidedata_helpers=None,
1322 1324 ):
1323 1325 """Returns an iterator of changegroup chunks containing manifests.
1324 1326
1325 1327 `source` is unused here, but is used by extensions like remotefilelog to
1326 1328 change what is sent based in pulls vs pushes, etc.
1327 1329
1328 1330 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1329 1331 `sidedata_helpers`.
1330 1332 """
1331 1333 repo = self._repo
1332 1334 mfl = repo.manifestlog
1333 1335 tmfnodes = {b'': manifests}
1334 1336
1335 1337 # Callback for the manifest, used to collect linkrevs for filelog
1336 1338 # revisions.
1337 1339 # Returns the linkrev node (collected in lookupcl).
1338 1340 def makelookupmflinknode(tree, nodes):
1339 1341 if fastpathlinkrev:
1340 1342 assert not tree
1341 1343
1342 1344 # pytype: disable=unsupported-operands
1343 1345 return manifests.__getitem__
1344 1346 # pytype: enable=unsupported-operands
1345 1347
1346 1348 def lookupmflinknode(x):
1347 1349 """Callback for looking up the linknode for manifests.
1348 1350
1349 1351 Returns the linkrev node for the specified manifest.
1350 1352
1351 1353 SIDE EFFECT:
1352 1354
1353 1355 1) fclnodes gets populated with the list of relevant
1354 1356 file nodes if we're not using fastpathlinkrev
1355 1357 2) When treemanifests are in use, collects treemanifest nodes
1356 1358 to send
1357 1359
1358 1360 Note that this means manifests must be completely sent to
1359 1361 the client before you can trust the list of files and
1360 1362 treemanifests to send.
1361 1363 """
1362 1364 clnode = nodes[x]
1363 1365 mdata = mfl.get(tree, x).readfast(shallow=True)
1364 1366 for p, n, fl in mdata.iterentries():
1365 1367 if fl == b't': # subdirectory manifest
1366 1368 subtree = tree + p + b'/'
1367 1369 tmfclnodes = tmfnodes.setdefault(subtree, {})
1368 1370 tmfclnode = tmfclnodes.setdefault(n, clnode)
1369 1371 if clrevorder[clnode] < clrevorder[tmfclnode]:
1370 1372 tmfclnodes[n] = clnode
1371 1373 else:
1372 1374 f = tree + p
1373 1375 fclnodes = fnodes.setdefault(f, {})
1374 1376 fclnode = fclnodes.setdefault(n, clnode)
1375 1377 if clrevorder[clnode] < clrevorder[fclnode]:
1376 1378 fclnodes[n] = clnode
1377 1379 return clnode
1378 1380
1379 1381 return lookupmflinknode
1380 1382
1381 1383 while tmfnodes:
1382 1384 tree, nodes = tmfnodes.popitem()
1383 1385
1384 1386 should_visit = self._matcher.visitdir(tree[:-1])
1385 1387 if tree and not should_visit:
1386 1388 continue
1387 1389
1388 1390 store = mfl.getstorage(tree)
1389 1391
1390 1392 if not should_visit:
1391 1393 # No nodes to send because this directory is out of
1392 1394 # the client's view of the repository (probably
1393 1395 # because of narrow clones). Do this even for the root
1394 1396 # directory (tree=='')
1395 1397 prunednodes = []
1396 1398 else:
1397 1399 # Avoid sending any manifest nodes we can prove the
1398 1400 # client already has by checking linkrevs. See the
1399 1401 # related comment in generatefiles().
1400 1402 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1401 1403
1402 1404 if tree and not prunednodes:
1403 1405 continue
1404 1406
1405 1407 lookupfn = makelookupmflinknode(tree, nodes)
1406 1408
1407 1409 deltas = deltagroup(
1408 1410 self._repo,
1409 1411 store,
1410 1412 prunednodes,
1411 1413 False,
1412 1414 lookupfn,
1413 1415 self._forcedeltaparentprev,
1414 1416 ellipses=self._ellipses,
1415 1417 topic=_(b'manifests'),
1416 1418 clrevtolocalrev=clrevtolocalrev,
1417 1419 fullclnodes=self._fullclnodes,
1418 1420 precomputedellipsis=self._precomputedellipsis,
1419 1421 sidedata_helpers=sidedata_helpers,
1420 1422 )
1421 1423
1422 1424 if not self._oldmatcher.visitdir(store.tree[:-1]):
1423 1425 yield tree, deltas
1424 1426 else:
1425 1427 # 'deltas' is a generator and we need to consume it even if
1426 1428 # we are not going to send it because a side-effect is that
1427 1429 # it updates tmdnodes (via lookupfn)
1428 1430 for d in deltas:
1429 1431 pass
1430 1432 if not tree:
1431 1433 yield tree, []
1432 1434
1433 1435 def _prunemanifests(self, store, nodes, commonrevs):
1434 1436 if not self._ellipses:
1435 1437 # In non-ellipses case and large repositories, it is better to
1436 1438 # prevent calling of store.rev and store.linkrev on a lot of
1437 1439 # nodes as compared to sending some extra data
1438 1440 return nodes.copy()
1439 1441 # This is split out as a separate method to allow filtering
1440 1442 # commonrevs in extension code.
1441 1443 #
1442 1444 # TODO(augie): this shouldn't be required, instead we should
1443 1445 # make filtering of revisions to send delegated to the store
1444 1446 # layer.
1445 1447 frev, flr = store.rev, store.linkrev
1446 1448 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1447 1449
1448 1450 # The 'source' parameter is useful for extensions
1449 1451 def generatefiles(
1450 1452 self,
1451 1453 changedfiles,
1452 1454 commonrevs,
1453 1455 source,
1454 1456 mfdicts,
1455 1457 fastpathlinkrev,
1456 1458 fnodes,
1457 1459 clrevs,
1458 1460 sidedata_helpers=None,
1459 1461 ):
1460 1462 changedfiles = [
1461 1463 f
1462 1464 for f in changedfiles
1463 1465 if self._matcher(f) and not self._oldmatcher(f)
1464 1466 ]
1465 1467
1466 1468 if not fastpathlinkrev:
1467 1469
1468 1470 def normallinknodes(unused, fname):
1469 1471 return fnodes.get(fname, {})
1470 1472
1471 1473 else:
1472 1474 cln = self._repo.changelog.node
1473 1475
1474 1476 def normallinknodes(store, fname):
1475 1477 flinkrev = store.linkrev
1476 1478 fnode = store.node
1477 1479 revs = ((r, flinkrev(r)) for r in store)
1478 1480 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1479 1481
1480 1482 clrevtolocalrev = {}
1481 1483
1482 1484 if self._isshallow:
1483 1485 # In a shallow clone, the linknodes callback needs to also include
1484 1486 # those file nodes that are in the manifests we sent but weren't
1485 1487 # introduced by those manifests.
1486 1488 commonctxs = [self._repo[c] for c in commonrevs]
1487 1489 clrev = self._repo.changelog.rev
1488 1490
1489 1491 def linknodes(flog, fname):
1490 1492 for c in commonctxs:
1491 1493 try:
1492 1494 fnode = c.filenode(fname)
1493 1495 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1494 1496 except error.ManifestLookupError:
1495 1497 pass
1496 1498 links = normallinknodes(flog, fname)
1497 1499 if len(links) != len(mfdicts):
1498 1500 for mf, lr in mfdicts:
1499 1501 fnode = mf.get(fname, None)
1500 1502 if fnode in links:
1501 1503 links[fnode] = min(links[fnode], lr, key=clrev)
1502 1504 elif fnode:
1503 1505 links[fnode] = lr
1504 1506 return links
1505 1507
1506 1508 else:
1507 1509 linknodes = normallinknodes
1508 1510
1509 1511 repo = self._repo
1510 1512 progress = repo.ui.makeprogress(
1511 1513 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1512 1514 )
1513 1515 for i, fname in enumerate(sorted(changedfiles)):
1514 1516 filerevlog = repo.file(fname)
1515 1517 if not filerevlog:
1516 1518 raise error.Abort(
1517 1519 _(b"empty or missing file data for %s") % fname
1518 1520 )
1519 1521
1520 1522 clrevtolocalrev.clear()
1521 1523
1522 1524 linkrevnodes = linknodes(filerevlog, fname)
1523 1525 # Lookup for filenodes, we collected the linkrev nodes above in the
1524 1526 # fastpath case and with lookupmf in the slowpath case.
1525 1527 def lookupfilelog(x):
1526 1528 return linkrevnodes[x]
1527 1529
1528 1530 frev, flr = filerevlog.rev, filerevlog.linkrev
1529 1531 # Skip sending any filenode we know the client already
1530 1532 # has. This avoids over-sending files relatively
1531 1533 # inexpensively, so it's not a problem if we under-filter
1532 1534 # here.
1533 1535 filenodes = [
1534 1536 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1535 1537 ]
1536 1538
1537 1539 if not filenodes:
1538 1540 continue
1539 1541
1540 1542 progress.update(i + 1, item=fname)
1541 1543
1542 1544 deltas = deltagroup(
1543 1545 self._repo,
1544 1546 filerevlog,
1545 1547 filenodes,
1546 1548 False,
1547 1549 lookupfilelog,
1548 1550 self._forcedeltaparentprev,
1549 1551 ellipses=self._ellipses,
1550 1552 clrevtolocalrev=clrevtolocalrev,
1551 1553 fullclnodes=self._fullclnodes,
1552 1554 precomputedellipsis=self._precomputedellipsis,
1553 1555 sidedata_helpers=sidedata_helpers,
1554 1556 )
1555 1557
1556 1558 yield fname, deltas
1557 1559
1558 1560 progress.complete()
1559 1561
1560 1562
1561 1563 def _makecg1packer(
1562 1564 repo,
1563 1565 oldmatcher,
1564 1566 matcher,
1565 1567 bundlecaps,
1566 1568 ellipses=False,
1567 1569 shallow=False,
1568 1570 ellipsisroots=None,
1569 1571 fullnodes=None,
1570 1572 remote_sidedata=None,
1571 1573 ):
1572 1574 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1573 1575 d.node, d.p1node, d.p2node, d.linknode
1574 1576 )
1575 1577
1576 1578 return cgpacker(
1577 1579 repo,
1578 1580 oldmatcher,
1579 1581 matcher,
1580 1582 b'01',
1581 1583 builddeltaheader=builddeltaheader,
1582 1584 manifestsend=b'',
1583 1585 forcedeltaparentprev=True,
1584 1586 bundlecaps=bundlecaps,
1585 1587 ellipses=ellipses,
1586 1588 shallow=shallow,
1587 1589 ellipsisroots=ellipsisroots,
1588 1590 fullnodes=fullnodes,
1589 1591 )
1590 1592
1591 1593
1592 1594 def _makecg2packer(
1593 1595 repo,
1594 1596 oldmatcher,
1595 1597 matcher,
1596 1598 bundlecaps,
1597 1599 ellipses=False,
1598 1600 shallow=False,
1599 1601 ellipsisroots=None,
1600 1602 fullnodes=None,
1601 1603 remote_sidedata=None,
1602 1604 ):
1603 1605 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1604 1606 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1605 1607 )
1606 1608
1607 1609 return cgpacker(
1608 1610 repo,
1609 1611 oldmatcher,
1610 1612 matcher,
1611 1613 b'02',
1612 1614 builddeltaheader=builddeltaheader,
1613 1615 manifestsend=b'',
1614 1616 bundlecaps=bundlecaps,
1615 1617 ellipses=ellipses,
1616 1618 shallow=shallow,
1617 1619 ellipsisroots=ellipsisroots,
1618 1620 fullnodes=fullnodes,
1619 1621 )
1620 1622
1621 1623
1622 1624 def _makecg3packer(
1623 1625 repo,
1624 1626 oldmatcher,
1625 1627 matcher,
1626 1628 bundlecaps,
1627 1629 ellipses=False,
1628 1630 shallow=False,
1629 1631 ellipsisroots=None,
1630 1632 fullnodes=None,
1631 1633 remote_sidedata=None,
1632 1634 ):
1633 1635 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1634 1636 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1635 1637 )
1636 1638
1637 1639 return cgpacker(
1638 1640 repo,
1639 1641 oldmatcher,
1640 1642 matcher,
1641 1643 b'03',
1642 1644 builddeltaheader=builddeltaheader,
1643 1645 manifestsend=closechunk(),
1644 1646 bundlecaps=bundlecaps,
1645 1647 ellipses=ellipses,
1646 1648 shallow=shallow,
1647 1649 ellipsisroots=ellipsisroots,
1648 1650 fullnodes=fullnodes,
1649 1651 )
1650 1652
1651 1653
1652 1654 def _makecg4packer(
1653 1655 repo,
1654 1656 oldmatcher,
1655 1657 matcher,
1656 1658 bundlecaps,
1657 1659 ellipses=False,
1658 1660 shallow=False,
1659 1661 ellipsisroots=None,
1660 1662 fullnodes=None,
1661 1663 remote_sidedata=None,
1662 1664 ):
1663 1665 # Sidedata is in a separate chunk from the delta to differentiate
1664 1666 # "raw delta" and sidedata.
1665 1667 def builddeltaheader(d):
1666 1668 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1667 1669 d.protocol_flags,
1668 1670 d.node,
1669 1671 d.p1node,
1670 1672 d.p2node,
1671 1673 d.basenode,
1672 1674 d.linknode,
1673 1675 d.flags,
1674 1676 )
1675 1677
1676 1678 return cgpacker(
1677 1679 repo,
1678 1680 oldmatcher,
1679 1681 matcher,
1680 1682 b'04',
1681 1683 builddeltaheader=builddeltaheader,
1682 1684 manifestsend=closechunk(),
1683 1685 bundlecaps=bundlecaps,
1684 1686 ellipses=ellipses,
1685 1687 shallow=shallow,
1686 1688 ellipsisroots=ellipsisroots,
1687 1689 fullnodes=fullnodes,
1688 1690 remote_sidedata=remote_sidedata,
1689 1691 )
1690 1692
1691 1693
1692 1694 _packermap = {
1693 1695 b'01': (_makecg1packer, cg1unpacker),
1694 1696 # cg2 adds support for exchanging generaldelta
1695 1697 b'02': (_makecg2packer, cg2unpacker),
1696 1698 # cg3 adds support for exchanging revlog flags and treemanifests
1697 1699 b'03': (_makecg3packer, cg3unpacker),
1698 1700 # ch4 adds support for exchanging sidedata
1699 1701 b'04': (_makecg4packer, cg4unpacker),
1700 1702 }
1701 1703
1702 1704
1703 1705 def allsupportedversions(repo):
1704 1706 versions = set(_packermap.keys())
1705 1707 needv03 = False
1706 1708 if (
1707 1709 repo.ui.configbool(b'experimental', b'changegroup3')
1708 1710 or repo.ui.configbool(b'experimental', b'treemanifest')
1709 1711 or scmutil.istreemanifest(repo)
1710 1712 ):
1711 1713 # we keep version 03 because we need to to exchange treemanifest data
1712 1714 #
1713 1715 # we also keep vresion 01 and 02, because it is possible for repo to
1714 1716 # contains both normal and tree manifest at the same time. so using
1715 1717 # older version to pull data is viable
1716 1718 #
1717 1719 # (or even to push subset of history)
1718 1720 needv03 = True
1719 1721 if not needv03:
1720 1722 versions.discard(b'03')
1721 1723 want_v4 = (
1722 1724 repo.ui.configbool(b'experimental', b'changegroup4')
1723 1725 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1724 1726 )
1725 1727 if not want_v4:
1726 1728 versions.discard(b'04')
1727 1729 return versions
1728 1730
1729 1731
1730 1732 # Changegroup versions that can be applied to the repo
1731 1733 def supportedincomingversions(repo):
1732 1734 return allsupportedversions(repo)
1733 1735
1734 1736
1735 1737 # Changegroup versions that can be created from the repo
1736 1738 def supportedoutgoingversions(repo):
1737 1739 versions = allsupportedversions(repo)
1738 1740 if scmutil.istreemanifest(repo):
1739 1741 # Versions 01 and 02 support only flat manifests and it's just too
1740 1742 # expensive to convert between the flat manifest and tree manifest on
1741 1743 # the fly. Since tree manifests are hashed differently, all of history
1742 1744 # would have to be converted. Instead, we simply don't even pretend to
1743 1745 # support versions 01 and 02.
1744 1746 versions.discard(b'01')
1745 1747 versions.discard(b'02')
1746 1748 if requirements.NARROW_REQUIREMENT in repo.requirements:
1747 1749 # Versions 01 and 02 don't support revlog flags, and we need to
1748 1750 # support that for stripping and unbundling to work.
1749 1751 versions.discard(b'01')
1750 1752 versions.discard(b'02')
1751 1753 if LFS_REQUIREMENT in repo.requirements:
1752 1754 # Versions 01 and 02 don't support revlog flags, and we need to
1753 1755 # mark LFS entries with REVIDX_EXTSTORED.
1754 1756 versions.discard(b'01')
1755 1757 versions.discard(b'02')
1756 1758
1757 1759 return versions
1758 1760
1759 1761
1760 1762 def localversion(repo):
1761 1763 # Finds the best version to use for bundles that are meant to be used
1762 1764 # locally, such as those from strip and shelve, and temporary bundles.
1763 1765 return max(supportedoutgoingversions(repo))
1764 1766
1765 1767
1766 1768 def safeversion(repo):
1767 1769 # Finds the smallest version that it's safe to assume clients of the repo
1768 1770 # will support. For example, all hg versions that support generaldelta also
1769 1771 # support changegroup 02.
1770 1772 versions = supportedoutgoingversions(repo)
1771 1773 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1772 1774 versions.discard(b'01')
1773 1775 assert versions
1774 1776 return min(versions)
1775 1777
1776 1778
1777 1779 def getbundler(
1778 1780 version,
1779 1781 repo,
1780 1782 bundlecaps=None,
1781 1783 oldmatcher=None,
1782 1784 matcher=None,
1783 1785 ellipses=False,
1784 1786 shallow=False,
1785 1787 ellipsisroots=None,
1786 1788 fullnodes=None,
1787 1789 remote_sidedata=None,
1788 1790 ):
1789 1791 assert version in supportedoutgoingversions(repo)
1790 1792
1791 1793 if matcher is None:
1792 1794 matcher = matchmod.always()
1793 1795 if oldmatcher is None:
1794 1796 oldmatcher = matchmod.never()
1795 1797
1796 1798 if version == b'01' and not matcher.always():
1797 1799 raise error.ProgrammingError(
1798 1800 b'version 01 changegroups do not support sparse file matchers'
1799 1801 )
1800 1802
1801 1803 if ellipses and version in (b'01', b'02'):
1802 1804 raise error.Abort(
1803 1805 _(
1804 1806 b'ellipsis nodes require at least cg3 on client and server, '
1805 1807 b'but negotiated version %s'
1806 1808 )
1807 1809 % version
1808 1810 )
1809 1811
1810 1812 # Requested files could include files not in the local store. So
1811 1813 # filter those out.
1812 1814 matcher = repo.narrowmatch(matcher)
1813 1815
1814 1816 fn = _packermap[version][0]
1815 1817 return fn(
1816 1818 repo,
1817 1819 oldmatcher,
1818 1820 matcher,
1819 1821 bundlecaps,
1820 1822 ellipses=ellipses,
1821 1823 shallow=shallow,
1822 1824 ellipsisroots=ellipsisroots,
1823 1825 fullnodes=fullnodes,
1824 1826 remote_sidedata=remote_sidedata,
1825 1827 )
1826 1828
1827 1829
1828 1830 def getunbundler(version, fh, alg, extras=None):
1829 1831 return _packermap[version][1](fh, alg, extras=extras)
1830 1832
1831 1833
1832 1834 def _changegroupinfo(repo, nodes, source):
1833 1835 if repo.ui.verbose or source == b'bundle':
1834 1836 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1835 1837 if repo.ui.debugflag:
1836 1838 repo.ui.debug(b"list of changesets:\n")
1837 1839 for node in nodes:
1838 1840 repo.ui.debug(b"%s\n" % hex(node))
1839 1841
1840 1842
1841 1843 def makechangegroup(
1842 1844 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1843 1845 ):
1844 1846 cgstream = makestream(
1845 1847 repo,
1846 1848 outgoing,
1847 1849 version,
1848 1850 source,
1849 1851 fastpath=fastpath,
1850 1852 bundlecaps=bundlecaps,
1851 1853 )
1852 1854 return getunbundler(
1853 1855 version,
1854 1856 util.chunkbuffer(cgstream),
1855 1857 None,
1856 1858 {b'clcount': len(outgoing.missing)},
1857 1859 )
1858 1860
1859 1861
1860 1862 def makestream(
1861 1863 repo,
1862 1864 outgoing,
1863 1865 version,
1864 1866 source,
1865 1867 fastpath=False,
1866 1868 bundlecaps=None,
1867 1869 matcher=None,
1868 1870 remote_sidedata=None,
1869 1871 ):
1870 1872 bundler = getbundler(
1871 1873 version,
1872 1874 repo,
1873 1875 bundlecaps=bundlecaps,
1874 1876 matcher=matcher,
1875 1877 remote_sidedata=remote_sidedata,
1876 1878 )
1877 1879
1878 1880 repo = repo.unfiltered()
1879 1881 commonrevs = outgoing.common
1880 1882 csets = outgoing.missing
1881 1883 heads = outgoing.ancestorsof
1882 1884 # We go through the fast path if we get told to, or if all (unfiltered
1883 1885 # heads have been requested (since we then know there all linkrevs will
1884 1886 # be pulled by the client).
1885 1887 heads.sort()
1886 1888 fastpathlinkrev = fastpath or (
1887 1889 repo.filtername is None and heads == sorted(repo.heads())
1888 1890 )
1889 1891
1890 1892 repo.hook(b'preoutgoing', throw=True, source=source)
1891 1893 _changegroupinfo(repo, csets, source)
1892 1894 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1893 1895
1894 1896
1895 1897 def _addchangegroupfiles(
1896 1898 repo,
1897 1899 source,
1898 1900 revmap,
1899 1901 trp,
1900 1902 expectedfiles,
1901 1903 needfiles,
1902 1904 addrevisioncb=None,
1903 1905 ):
1904 1906 revisions = 0
1905 1907 files = 0
1906 1908 progress = repo.ui.makeprogress(
1907 1909 _(b'files'), unit=_(b'files'), total=expectedfiles
1908 1910 )
1909 1911 for chunkdata in iter(source.filelogheader, {}):
1910 1912 files += 1
1911 1913 f = chunkdata[b"filename"]
1912 1914 repo.ui.debug(b"adding %s revisions\n" % f)
1913 1915 progress.increment()
1914 1916 fl = repo.file(f)
1915 1917 o = len(fl)
1916 1918 try:
1917 1919 deltas = source.deltaiter()
1918 1920 added = fl.addgroup(
1919 1921 deltas,
1920 1922 revmap,
1921 1923 trp,
1922 1924 addrevisioncb=addrevisioncb,
1923 1925 )
1924 1926 if not added:
1925 1927 raise error.Abort(_(b"received file revlog group is empty"))
1926 1928 except error.CensoredBaseError as e:
1927 1929 raise error.Abort(_(b"received delta base is censored: %s") % e)
1928 1930 revisions += len(fl) - o
1929 1931 if f in needfiles:
1930 1932 needs = needfiles[f]
1931 1933 for new in pycompat.xrange(o, len(fl)):
1932 1934 n = fl.node(new)
1933 1935 if n in needs:
1934 1936 needs.remove(n)
1935 1937 else:
1936 1938 raise error.Abort(_(b"received spurious file revlog entry"))
1937 1939 if not needs:
1938 1940 del needfiles[f]
1939 1941 progress.complete()
1940 1942
1941 1943 for f, needs in pycompat.iteritems(needfiles):
1942 1944 fl = repo.file(f)
1943 1945 for n in needs:
1944 1946 try:
1945 1947 fl.rev(n)
1946 1948 except error.LookupError:
1947 1949 raise error.Abort(
1948 1950 _(b'missing file data for %s:%s - run hg verify')
1949 1951 % (f, hex(n))
1950 1952 )
1951 1953
1952 1954 return revisions, files
@@ -1,3192 +1,3192 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 self._indexfile = None
319 319 self._datafile = None
320 320 self._nodemap_file = None
321 321 self.postfix = postfix
322 322 self.opener = opener
323 323 if persistentnodemap:
324 324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 325
326 326 assert target[0] in ALL_KINDS
327 327 assert len(target) == 2
328 328 self.target = target
329 329 # When True, indexfile is opened with checkambig=True at writing, to
330 330 # avoid file stat ambiguity.
331 331 self._checkambig = checkambig
332 332 self._mmaplargeindex = mmaplargeindex
333 333 self._censorable = censorable
334 334 # 3-tuple of (node, rev, text) for a raw revision.
335 335 self._revisioncache = None
336 336 # Maps rev to chain base rev.
337 337 self._chainbasecache = util.lrucachedict(100)
338 338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 339 self._chunkcache = (0, b'')
340 340 # How much data to read and cache into the raw revlog data cache.
341 341 self._chunkcachesize = 65536
342 342 self._maxchainlen = None
343 343 self._deltabothparents = True
344 344 self.index = None
345 345 self._nodemap_docket = None
346 346 # Mapping of partial identifiers to full nodes.
347 347 self._pcache = {}
348 348 # Mapping of revision integer to full node.
349 349 self._compengine = b'zlib'
350 350 self._compengineopts = {}
351 351 self._maxdeltachainspan = -1
352 352 self._withsparseread = False
353 353 self._sparserevlog = False
354 354 self._srdensitythreshold = 0.50
355 355 self._srmingapsize = 262144
356 356
357 357 # Make copy of flag processors so each revlog instance can support
358 358 # custom flags.
359 359 self._flagprocessors = dict(flagutil.flagprocessors)
360 360
361 361 # 2-tuple of file handles being used for active writing.
362 362 self._writinghandles = None
363 363 # prevent nesting of addgroup
364 364 self._adding_group = None
365 365
366 366 self._loadindex()
367 367
368 368 self._concurrencychecker = concurrencychecker
369 369
370 370 def _init_opts(self):
371 371 """process options (from above/config) to setup associated default revlog mode
372 372
373 373 These values might be affected when actually reading on disk information.
374 374
375 375 The relevant values are returned for use in _loadindex().
376 376
377 377 * newversionflags:
378 378 version header to use if we need to create a new revlog
379 379
380 380 * mmapindexthreshold:
381 381 minimal index size for start to use mmap
382 382
383 383 * force_nodemap:
384 384 force the usage of a "development" version of the nodemap code
385 385 """
386 386 mmapindexthreshold = None
387 387 opts = self.opener.options
388 388
389 389 if b'revlogv2' in opts:
390 390 new_header = REVLOGV2 | FLAG_INLINE_DATA
391 391 elif b'revlogv1' in opts:
392 392 new_header = REVLOGV1 | FLAG_INLINE_DATA
393 393 if b'generaldelta' in opts:
394 394 new_header |= FLAG_GENERALDELTA
395 395 elif b'revlogv0' in self.opener.options:
396 396 new_header = REVLOGV0
397 397 else:
398 398 new_header = REVLOG_DEFAULT_VERSION
399 399
400 400 if b'chunkcachesize' in opts:
401 401 self._chunkcachesize = opts[b'chunkcachesize']
402 402 if b'maxchainlen' in opts:
403 403 self._maxchainlen = opts[b'maxchainlen']
404 404 if b'deltabothparents' in opts:
405 405 self._deltabothparents = opts[b'deltabothparents']
406 406 self._lazydelta = bool(opts.get(b'lazydelta', True))
407 407 self._lazydeltabase = False
408 408 if self._lazydelta:
409 409 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
410 410 if b'compengine' in opts:
411 411 self._compengine = opts[b'compengine']
412 412 if b'zlib.level' in opts:
413 413 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
414 414 if b'zstd.level' in opts:
415 415 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
416 416 if b'maxdeltachainspan' in opts:
417 417 self._maxdeltachainspan = opts[b'maxdeltachainspan']
418 418 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
419 419 mmapindexthreshold = opts[b'mmapindexthreshold']
420 420 self.hassidedata = bool(opts.get(b'side-data', False))
421 421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 422 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 423 # sparse-revlog forces sparse-read
424 424 self._withsparseread = self._sparserevlog or withsparseread
425 425 if b'sparse-read-density-threshold' in opts:
426 426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 427 if b'sparse-read-min-gap-size' in opts:
428 428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 429 if opts.get(b'enableellipsis'):
430 430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431 431
432 432 # revlog v0 doesn't have flag processors
433 433 for flag, processor in pycompat.iteritems(
434 434 opts.get(b'flagprocessors', {})
435 435 ):
436 436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437 437
438 438 if self._chunkcachesize <= 0:
439 439 raise error.RevlogError(
440 440 _(b'revlog chunk cache size %r is not greater than 0')
441 441 % self._chunkcachesize
442 442 )
443 443 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 444 raise error.RevlogError(
445 445 _(b'revlog chunk cache size %r is not a power of 2')
446 446 % self._chunkcachesize
447 447 )
448 448 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 449 return new_header, mmapindexthreshold, force_nodemap
450 450
451 451 def _get_data(self, filepath, mmap_threshold):
452 452 """return a file content with or without mmap
453 453
454 454 If the file is missing return the empty string"""
455 455 try:
456 456 with self.opener(filepath) as fp:
457 457 if mmap_threshold is not None:
458 458 file_size = self.opener.fstat(fp).st_size
459 459 if file_size >= mmap_threshold:
460 460 # TODO: should .close() to release resources without
461 461 # relying on Python GC
462 462 return util.buffer(util.mmapread(fp))
463 463 return fp.read()
464 464 except IOError as inst:
465 465 if inst.errno != errno.ENOENT:
466 466 raise
467 467 return b''
468 468
469 469 def _loadindex(self):
470 470
471 471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472 472
473 473 if self.postfix is None:
474 474 entry_point = b'%s.i' % self.radix
475 475 else:
476 476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477 477
478 478 entry_data = b''
479 479 self._initempty = True
480 480 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 481 if len(entry_data) > 0:
482 482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 483 self._initempty = False
484 484 else:
485 485 header = new_header
486 486
487 487 self._format_flags = header & ~0xFFFF
488 488 self._format_version = header & 0xFFFF
489 489
490 490 if self._format_version == REVLOGV0:
491 491 if self._format_flags:
492 492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 493 display_flag = self._format_flags >> 16
494 494 msg %= (display_flag, self._format_version, self.display_id)
495 495 raise error.RevlogError(msg)
496 496
497 497 self._inline = False
498 498 self._generaldelta = False
499 499
500 500 elif self._format_version == REVLOGV1:
501 501 if self._format_flags & ~REVLOGV1_FLAGS:
502 502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 503 display_flag = self._format_flags >> 16
504 504 msg %= (display_flag, self._format_version, self.display_id)
505 505 raise error.RevlogError(msg)
506 506
507 507 self._inline = self._format_flags & FLAG_INLINE_DATA
508 508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509 509
510 510 elif self._format_version == REVLOGV2:
511 511 if self._format_flags & ~REVLOGV2_FLAGS:
512 512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 513 display_flag = self._format_flags >> 16
514 514 msg %= (display_flag, self._format_version, self.display_id)
515 515 raise error.RevlogError(msg)
516 516
517 517 # There is a bug in the transaction handling when going from an
518 518 # inline revlog to a separate index and data file. Turn it off until
519 519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 520 # See issue6485
521 521 self._inline = False
522 522 # generaldelta implied by version 2 revlogs.
523 523 self._generaldelta = True
524 524
525 525 else:
526 526 msg = _(b'unknown version (%d) in revlog %s')
527 527 msg %= (self._format_version, self.display_id)
528 528 raise error.RevlogError(msg)
529 529
530 530 index_data = entry_data
531 531 self._indexfile = entry_point
532 532
533 533 if self.postfix is None or self.postfix == b'a':
534 534 self._datafile = b'%s.d' % self.radix
535 535 else:
536 536 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
537 537
538 538 self.nodeconstants = sha1nodeconstants
539 539 self.nullid = self.nodeconstants.nullid
540 540
541 541 # sparse-revlog can't be on without general-delta (issue6056)
542 542 if not self._generaldelta:
543 543 self._sparserevlog = False
544 544
545 545 self._storedeltachains = True
546 546
547 547 devel_nodemap = (
548 548 self._nodemap_file
549 549 and force_nodemap
550 550 and parse_index_v1_nodemap is not None
551 551 )
552 552
553 553 use_rust_index = False
554 554 if rustrevlog is not None:
555 555 if self._nodemap_file is not None:
556 556 use_rust_index = True
557 557 else:
558 558 use_rust_index = self.opener.options.get(b'rust.index')
559 559
560 560 self._parse_index = parse_index_v1
561 561 if self._format_version == REVLOGV0:
562 562 self._parse_index = revlogv0.parse_index_v0
563 563 elif self._format_version == REVLOGV2:
564 564 self._parse_index = parse_index_v2
565 565 elif devel_nodemap:
566 566 self._parse_index = parse_index_v1_nodemap
567 567 elif use_rust_index:
568 568 self._parse_index = parse_index_v1_mixed
569 569 try:
570 570 d = self._parse_index(index_data, self._inline)
571 571 index, _chunkcache = d
572 572 use_nodemap = (
573 573 not self._inline
574 574 and self._nodemap_file is not None
575 575 and util.safehasattr(index, 'update_nodemap_data')
576 576 )
577 577 if use_nodemap:
578 578 nodemap_data = nodemaputil.persisted_data(self)
579 579 if nodemap_data is not None:
580 580 docket = nodemap_data[0]
581 581 if (
582 582 len(d[0]) > docket.tip_rev
583 583 and d[0][docket.tip_rev][7] == docket.tip_node
584 584 ):
585 585 # no changelog tampering
586 586 self._nodemap_docket = docket
587 587 index.update_nodemap_data(*nodemap_data)
588 588 except (ValueError, IndexError):
589 589 raise error.RevlogError(
590 590 _(b"index %s is corrupted") % self.display_id
591 591 )
592 592 self.index, self._chunkcache = d
593 593 if not self._chunkcache:
594 594 self._chunkclear()
595 595 # revnum -> (chain-length, sum-delta-length)
596 596 self._chaininfocache = util.lrucachedict(500)
597 597 # revlog header -> revlog compressor
598 598 self._decompressors = {}
599 599
600 600 @util.propertycache
601 601 def revlog_kind(self):
602 602 return self.target[0]
603 603
604 604 @util.propertycache
605 605 def display_id(self):
606 606 """The public facing "ID" of the revlog that we use in message"""
607 607 # Maybe we should build a user facing representation of
608 608 # revlog.target instead of using `self.radix`
609 609 return self.radix
610 610
611 611 @util.propertycache
612 612 def _compressor(self):
613 613 engine = util.compengines[self._compengine]
614 614 return engine.revlogcompressor(self._compengineopts)
615 615
616 616 def _indexfp(self, mode=b'r'):
617 617 """file object for the revlog's index file"""
618 618 args = {'mode': mode}
619 619 if mode != b'r':
620 620 args['checkambig'] = self._checkambig
621 621 if mode == b'w':
622 622 args['atomictemp'] = True
623 623 return self.opener(self._indexfile, **args)
624 624
625 625 def _datafp(self, mode=b'r'):
626 626 """file object for the revlog's data file"""
627 627 return self.opener(self._datafile, mode=mode)
628 628
629 629 @contextlib.contextmanager
630 630 def _datareadfp(self, existingfp=None):
631 631 """file object suitable to read data"""
632 632 # Use explicit file handle, if given.
633 633 if existingfp is not None:
634 634 yield existingfp
635 635
636 636 # Use a file handle being actively used for writes, if available.
637 637 # There is some danger to doing this because reads will seek the
638 638 # file. However, _writeentry() performs a SEEK_END before all writes,
639 639 # so we should be safe.
640 640 elif self._writinghandles:
641 641 if self._inline:
642 642 yield self._writinghandles[0]
643 643 else:
644 644 yield self._writinghandles[1]
645 645
646 646 # Otherwise open a new file handle.
647 647 else:
648 648 if self._inline:
649 649 func = self._indexfp
650 650 else:
651 651 func = self._datafp
652 652 with func() as fp:
653 653 yield fp
654 654
655 655 def tiprev(self):
656 656 return len(self.index) - 1
657 657
658 658 def tip(self):
659 659 return self.node(self.tiprev())
660 660
661 661 def __contains__(self, rev):
662 662 return 0 <= rev < len(self)
663 663
664 664 def __len__(self):
665 665 return len(self.index)
666 666
667 667 def __iter__(self):
668 668 return iter(pycompat.xrange(len(self)))
669 669
670 670 def revs(self, start=0, stop=None):
671 671 """iterate over all rev in this revlog (from start to stop)"""
672 672 return storageutil.iterrevs(len(self), start=start, stop=stop)
673 673
674 674 @property
675 675 def nodemap(self):
676 676 msg = (
677 677 b"revlog.nodemap is deprecated, "
678 678 b"use revlog.index.[has_node|rev|get_rev]"
679 679 )
680 680 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
681 681 return self.index.nodemap
682 682
683 683 @property
684 684 def _nodecache(self):
685 685 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
686 686 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
687 687 return self.index.nodemap
688 688
689 689 def hasnode(self, node):
690 690 try:
691 691 self.rev(node)
692 692 return True
693 693 except KeyError:
694 694 return False
695 695
696 696 def candelta(self, baserev, rev):
697 697 """whether two revisions (baserev, rev) can be delta-ed or not"""
698 698 # Disable delta if either rev requires a content-changing flag
699 699 # processor (ex. LFS). This is because such flag processor can alter
700 700 # the rawtext content that the delta will be based on, and two clients
701 701 # could have a same revlog node with different flags (i.e. different
702 702 # rawtext contents) and the delta could be incompatible.
703 703 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
704 704 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
705 705 ):
706 706 return False
707 707 return True
708 708
709 709 def update_caches(self, transaction):
710 710 if self._nodemap_file is not None:
711 711 if transaction is None:
712 712 nodemaputil.update_persistent_nodemap(self)
713 713 else:
714 714 nodemaputil.setup_persistent_nodemap(transaction, self)
715 715
716 716 def clearcaches(self):
717 717 self._revisioncache = None
718 718 self._chainbasecache.clear()
719 719 self._chunkcache = (0, b'')
720 720 self._pcache = {}
721 721 self._nodemap_docket = None
722 722 self.index.clearcaches()
723 723 # The python code is the one responsible for validating the docket, we
724 724 # end up having to refresh it here.
725 725 use_nodemap = (
726 726 not self._inline
727 727 and self._nodemap_file is not None
728 728 and util.safehasattr(self.index, 'update_nodemap_data')
729 729 )
730 730 if use_nodemap:
731 731 nodemap_data = nodemaputil.persisted_data(self)
732 732 if nodemap_data is not None:
733 733 self._nodemap_docket = nodemap_data[0]
734 734 self.index.update_nodemap_data(*nodemap_data)
735 735
736 736 def rev(self, node):
737 737 try:
738 738 return self.index.rev(node)
739 739 except TypeError:
740 740 raise
741 741 except error.RevlogError:
742 742 # parsers.c radix tree lookup failed
743 743 if (
744 744 node == self.nodeconstants.wdirid
745 745 or node in self.nodeconstants.wdirfilenodeids
746 746 ):
747 747 raise error.WdirUnsupported
748 748 raise error.LookupError(node, self.display_id, _(b'no node'))
749 749
750 750 # Accessors for index entries.
751 751
752 752 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
753 753 # are flags.
754 754 def start(self, rev):
755 755 return int(self.index[rev][0] >> 16)
756 756
757 757 def flags(self, rev):
758 758 return self.index[rev][0] & 0xFFFF
759 759
760 760 def length(self, rev):
761 761 return self.index[rev][1]
762 762
763 763 def sidedata_length(self, rev):
764 764 if not self.hassidedata:
765 765 return 0
766 766 return self.index[rev][9]
767 767
768 768 def rawsize(self, rev):
769 769 """return the length of the uncompressed text for a given revision"""
770 770 l = self.index[rev][2]
771 771 if l >= 0:
772 772 return l
773 773
774 774 t = self.rawdata(rev)
775 775 return len(t)
776 776
777 777 def size(self, rev):
778 778 """length of non-raw text (processed by a "read" flag processor)"""
779 779 # fast path: if no "read" flag processor could change the content,
780 780 # size is rawsize. note: ELLIPSIS is known to not change the content.
781 781 flags = self.flags(rev)
782 782 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
783 783 return self.rawsize(rev)
784 784
785 785 return len(self.revision(rev, raw=False))
786 786
787 787 def chainbase(self, rev):
788 788 base = self._chainbasecache.get(rev)
789 789 if base is not None:
790 790 return base
791 791
792 792 index = self.index
793 793 iterrev = rev
794 794 base = index[iterrev][3]
795 795 while base != iterrev:
796 796 iterrev = base
797 797 base = index[iterrev][3]
798 798
799 799 self._chainbasecache[rev] = base
800 800 return base
801 801
802 802 def linkrev(self, rev):
803 803 return self.index[rev][4]
804 804
805 805 def parentrevs(self, rev):
806 806 try:
807 807 entry = self.index[rev]
808 808 except IndexError:
809 809 if rev == wdirrev:
810 810 raise error.WdirUnsupported
811 811 raise
812 812 if entry[5] == nullrev:
813 813 return entry[6], entry[5]
814 814 else:
815 815 return entry[5], entry[6]
816 816
817 817 # fast parentrevs(rev) where rev isn't filtered
818 818 _uncheckedparentrevs = parentrevs
819 819
820 820 def node(self, rev):
821 821 try:
822 822 return self.index[rev][7]
823 823 except IndexError:
824 824 if rev == wdirrev:
825 825 raise error.WdirUnsupported
826 826 raise
827 827
828 828 # Derived from index values.
829 829
830 830 def end(self, rev):
831 831 return self.start(rev) + self.length(rev)
832 832
833 833 def parents(self, node):
834 834 i = self.index
835 835 d = i[self.rev(node)]
836 836 # inline node() to avoid function call overhead
837 837 if d[5] == self.nullid:
838 838 return i[d[6]][7], i[d[5]][7]
839 839 else:
840 840 return i[d[5]][7], i[d[6]][7]
841 841
842 842 def chainlen(self, rev):
843 843 return self._chaininfo(rev)[0]
844 844
845 845 def _chaininfo(self, rev):
846 846 chaininfocache = self._chaininfocache
847 847 if rev in chaininfocache:
848 848 return chaininfocache[rev]
849 849 index = self.index
850 850 generaldelta = self._generaldelta
851 851 iterrev = rev
852 852 e = index[iterrev]
853 853 clen = 0
854 854 compresseddeltalen = 0
855 855 while iterrev != e[3]:
856 856 clen += 1
857 857 compresseddeltalen += e[1]
858 858 if generaldelta:
859 859 iterrev = e[3]
860 860 else:
861 861 iterrev -= 1
862 862 if iterrev in chaininfocache:
863 863 t = chaininfocache[iterrev]
864 864 clen += t[0]
865 865 compresseddeltalen += t[1]
866 866 break
867 867 e = index[iterrev]
868 868 else:
869 869 # Add text length of base since decompressing that also takes
870 870 # work. For cache hits the length is already included.
871 871 compresseddeltalen += e[1]
872 872 r = (clen, compresseddeltalen)
873 873 chaininfocache[rev] = r
874 874 return r
875 875
876 876 def _deltachain(self, rev, stoprev=None):
877 877 """Obtain the delta chain for a revision.
878 878
879 879 ``stoprev`` specifies a revision to stop at. If not specified, we
880 880 stop at the base of the chain.
881 881
882 882 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
883 883 revs in ascending order and ``stopped`` is a bool indicating whether
884 884 ``stoprev`` was hit.
885 885 """
886 886 # Try C implementation.
887 887 try:
888 888 return self.index.deltachain(rev, stoprev, self._generaldelta)
889 889 except AttributeError:
890 890 pass
891 891
892 892 chain = []
893 893
894 894 # Alias to prevent attribute lookup in tight loop.
895 895 index = self.index
896 896 generaldelta = self._generaldelta
897 897
898 898 iterrev = rev
899 899 e = index[iterrev]
900 900 while iterrev != e[3] and iterrev != stoprev:
901 901 chain.append(iterrev)
902 902 if generaldelta:
903 903 iterrev = e[3]
904 904 else:
905 905 iterrev -= 1
906 906 e = index[iterrev]
907 907
908 908 if iterrev == stoprev:
909 909 stopped = True
910 910 else:
911 911 chain.append(iterrev)
912 912 stopped = False
913 913
914 914 chain.reverse()
915 915 return chain, stopped
916 916
917 917 def ancestors(self, revs, stoprev=0, inclusive=False):
918 918 """Generate the ancestors of 'revs' in reverse revision order.
919 919 Does not generate revs lower than stoprev.
920 920
921 921 See the documentation for ancestor.lazyancestors for more details."""
922 922
923 923 # first, make sure start revisions aren't filtered
924 924 revs = list(revs)
925 925 checkrev = self.node
926 926 for r in revs:
927 927 checkrev(r)
928 928 # and we're sure ancestors aren't filtered as well
929 929
930 930 if rustancestor is not None:
931 931 lazyancestors = rustancestor.LazyAncestors
932 932 arg = self.index
933 933 else:
934 934 lazyancestors = ancestor.lazyancestors
935 935 arg = self._uncheckedparentrevs
936 936 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
937 937
938 938 def descendants(self, revs):
939 939 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
940 940
941 941 def findcommonmissing(self, common=None, heads=None):
942 942 """Return a tuple of the ancestors of common and the ancestors of heads
943 943 that are not ancestors of common. In revset terminology, we return the
944 944 tuple:
945 945
946 946 ::common, (::heads) - (::common)
947 947
948 948 The list is sorted by revision number, meaning it is
949 949 topologically sorted.
950 950
951 951 'heads' and 'common' are both lists of node IDs. If heads is
952 952 not supplied, uses all of the revlog's heads. If common is not
953 953 supplied, uses nullid."""
954 954 if common is None:
955 955 common = [self.nullid]
956 956 if heads is None:
957 957 heads = self.heads()
958 958
959 959 common = [self.rev(n) for n in common]
960 960 heads = [self.rev(n) for n in heads]
961 961
962 962 # we want the ancestors, but inclusive
963 963 class lazyset(object):
964 964 def __init__(self, lazyvalues):
965 965 self.addedvalues = set()
966 966 self.lazyvalues = lazyvalues
967 967
968 968 def __contains__(self, value):
969 969 return value in self.addedvalues or value in self.lazyvalues
970 970
971 971 def __iter__(self):
972 972 added = self.addedvalues
973 973 for r in added:
974 974 yield r
975 975 for r in self.lazyvalues:
976 976 if not r in added:
977 977 yield r
978 978
979 979 def add(self, value):
980 980 self.addedvalues.add(value)
981 981
982 982 def update(self, values):
983 983 self.addedvalues.update(values)
984 984
985 985 has = lazyset(self.ancestors(common))
986 986 has.add(nullrev)
987 987 has.update(common)
988 988
989 989 # take all ancestors from heads that aren't in has
990 990 missing = set()
991 991 visit = collections.deque(r for r in heads if r not in has)
992 992 while visit:
993 993 r = visit.popleft()
994 994 if r in missing:
995 995 continue
996 996 else:
997 997 missing.add(r)
998 998 for p in self.parentrevs(r):
999 999 if p not in has:
1000 1000 visit.append(p)
1001 1001 missing = list(missing)
1002 1002 missing.sort()
1003 1003 return has, [self.node(miss) for miss in missing]
1004 1004
1005 1005 def incrementalmissingrevs(self, common=None):
1006 1006 """Return an object that can be used to incrementally compute the
1007 1007 revision numbers of the ancestors of arbitrary sets that are not
1008 1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1009 1009 object.
1010 1010
1011 1011 'common' is a list of revision numbers. If common is not supplied, uses
1012 1012 nullrev.
1013 1013 """
1014 1014 if common is None:
1015 1015 common = [nullrev]
1016 1016
1017 1017 if rustancestor is not None:
1018 1018 return rustancestor.MissingAncestors(self.index, common)
1019 1019 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1020 1020
1021 1021 def findmissingrevs(self, common=None, heads=None):
1022 1022 """Return the revision numbers of the ancestors of heads that
1023 1023 are not ancestors of common.
1024 1024
1025 1025 More specifically, return a list of revision numbers corresponding to
1026 1026 nodes N such that every N satisfies the following constraints:
1027 1027
1028 1028 1. N is an ancestor of some node in 'heads'
1029 1029 2. N is not an ancestor of any node in 'common'
1030 1030
1031 1031 The list is sorted by revision number, meaning it is
1032 1032 topologically sorted.
1033 1033
1034 1034 'heads' and 'common' are both lists of revision numbers. If heads is
1035 1035 not supplied, uses all of the revlog's heads. If common is not
1036 1036 supplied, uses nullid."""
1037 1037 if common is None:
1038 1038 common = [nullrev]
1039 1039 if heads is None:
1040 1040 heads = self.headrevs()
1041 1041
1042 1042 inc = self.incrementalmissingrevs(common=common)
1043 1043 return inc.missingancestors(heads)
1044 1044
1045 1045 def findmissing(self, common=None, heads=None):
1046 1046 """Return the ancestors of heads that are not ancestors of common.
1047 1047
1048 1048 More specifically, return a list of nodes N such that every N
1049 1049 satisfies the following constraints:
1050 1050
1051 1051 1. N is an ancestor of some node in 'heads'
1052 1052 2. N is not an ancestor of any node in 'common'
1053 1053
1054 1054 The list is sorted by revision number, meaning it is
1055 1055 topologically sorted.
1056 1056
1057 1057 'heads' and 'common' are both lists of node IDs. If heads is
1058 1058 not supplied, uses all of the revlog's heads. If common is not
1059 1059 supplied, uses nullid."""
1060 1060 if common is None:
1061 1061 common = [self.nullid]
1062 1062 if heads is None:
1063 1063 heads = self.heads()
1064 1064
1065 1065 common = [self.rev(n) for n in common]
1066 1066 heads = [self.rev(n) for n in heads]
1067 1067
1068 1068 inc = self.incrementalmissingrevs(common=common)
1069 1069 return [self.node(r) for r in inc.missingancestors(heads)]
1070 1070
1071 1071 def nodesbetween(self, roots=None, heads=None):
1072 1072 """Return a topological path from 'roots' to 'heads'.
1073 1073
1074 1074 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1075 1075 topologically sorted list of all nodes N that satisfy both of
1076 1076 these constraints:
1077 1077
1078 1078 1. N is a descendant of some node in 'roots'
1079 1079 2. N is an ancestor of some node in 'heads'
1080 1080
1081 1081 Every node is considered to be both a descendant and an ancestor
1082 1082 of itself, so every reachable node in 'roots' and 'heads' will be
1083 1083 included in 'nodes'.
1084 1084
1085 1085 'outroots' is the list of reachable nodes in 'roots', i.e., the
1086 1086 subset of 'roots' that is returned in 'nodes'. Likewise,
1087 1087 'outheads' is the subset of 'heads' that is also in 'nodes'.
1088 1088
1089 1089 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1090 1090 unspecified, uses nullid as the only root. If 'heads' is
1091 1091 unspecified, uses list of all of the revlog's heads."""
1092 1092 nonodes = ([], [], [])
1093 1093 if roots is not None:
1094 1094 roots = list(roots)
1095 1095 if not roots:
1096 1096 return nonodes
1097 1097 lowestrev = min([self.rev(n) for n in roots])
1098 1098 else:
1099 1099 roots = [self.nullid] # Everybody's a descendant of nullid
1100 1100 lowestrev = nullrev
1101 1101 if (lowestrev == nullrev) and (heads is None):
1102 1102 # We want _all_ the nodes!
1103 1103 return (
1104 1104 [self.node(r) for r in self],
1105 1105 [self.nullid],
1106 1106 list(self.heads()),
1107 1107 )
1108 1108 if heads is None:
1109 1109 # All nodes are ancestors, so the latest ancestor is the last
1110 1110 # node.
1111 1111 highestrev = len(self) - 1
1112 1112 # Set ancestors to None to signal that every node is an ancestor.
1113 1113 ancestors = None
1114 1114 # Set heads to an empty dictionary for later discovery of heads
1115 1115 heads = {}
1116 1116 else:
1117 1117 heads = list(heads)
1118 1118 if not heads:
1119 1119 return nonodes
1120 1120 ancestors = set()
1121 1121 # Turn heads into a dictionary so we can remove 'fake' heads.
1122 1122 # Also, later we will be using it to filter out the heads we can't
1123 1123 # find from roots.
1124 1124 heads = dict.fromkeys(heads, False)
1125 1125 # Start at the top and keep marking parents until we're done.
1126 1126 nodestotag = set(heads)
1127 1127 # Remember where the top was so we can use it as a limit later.
1128 1128 highestrev = max([self.rev(n) for n in nodestotag])
1129 1129 while nodestotag:
1130 1130 # grab a node to tag
1131 1131 n = nodestotag.pop()
1132 1132 # Never tag nullid
1133 1133 if n == self.nullid:
1134 1134 continue
1135 1135 # A node's revision number represents its place in a
1136 1136 # topologically sorted list of nodes.
1137 1137 r = self.rev(n)
1138 1138 if r >= lowestrev:
1139 1139 if n not in ancestors:
1140 1140 # If we are possibly a descendant of one of the roots
1141 1141 # and we haven't already been marked as an ancestor
1142 1142 ancestors.add(n) # Mark as ancestor
1143 1143 # Add non-nullid parents to list of nodes to tag.
1144 1144 nodestotag.update(
1145 1145 [p for p in self.parents(n) if p != self.nullid]
1146 1146 )
1147 1147 elif n in heads: # We've seen it before, is it a fake head?
1148 1148 # So it is, real heads should not be the ancestors of
1149 1149 # any other heads.
1150 1150 heads.pop(n)
1151 1151 if not ancestors:
1152 1152 return nonodes
1153 1153 # Now that we have our set of ancestors, we want to remove any
1154 1154 # roots that are not ancestors.
1155 1155
1156 1156 # If one of the roots was nullid, everything is included anyway.
1157 1157 if lowestrev > nullrev:
1158 1158 # But, since we weren't, let's recompute the lowest rev to not
1159 1159 # include roots that aren't ancestors.
1160 1160
1161 1161 # Filter out roots that aren't ancestors of heads
1162 1162 roots = [root for root in roots if root in ancestors]
1163 1163 # Recompute the lowest revision
1164 1164 if roots:
1165 1165 lowestrev = min([self.rev(root) for root in roots])
1166 1166 else:
1167 1167 # No more roots? Return empty list
1168 1168 return nonodes
1169 1169 else:
1170 1170 # We are descending from nullid, and don't need to care about
1171 1171 # any other roots.
1172 1172 lowestrev = nullrev
1173 1173 roots = [self.nullid]
1174 1174 # Transform our roots list into a set.
1175 1175 descendants = set(roots)
1176 1176 # Also, keep the original roots so we can filter out roots that aren't
1177 1177 # 'real' roots (i.e. are descended from other roots).
1178 1178 roots = descendants.copy()
1179 1179 # Our topologically sorted list of output nodes.
1180 1180 orderedout = []
1181 1181 # Don't start at nullid since we don't want nullid in our output list,
1182 1182 # and if nullid shows up in descendants, empty parents will look like
1183 1183 # they're descendants.
1184 1184 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1185 1185 n = self.node(r)
1186 1186 isdescendant = False
1187 1187 if lowestrev == nullrev: # Everybody is a descendant of nullid
1188 1188 isdescendant = True
1189 1189 elif n in descendants:
1190 1190 # n is already a descendant
1191 1191 isdescendant = True
1192 1192 # This check only needs to be done here because all the roots
1193 1193 # will start being marked is descendants before the loop.
1194 1194 if n in roots:
1195 1195 # If n was a root, check if it's a 'real' root.
1196 1196 p = tuple(self.parents(n))
1197 1197 # If any of its parents are descendants, it's not a root.
1198 1198 if (p[0] in descendants) or (p[1] in descendants):
1199 1199 roots.remove(n)
1200 1200 else:
1201 1201 p = tuple(self.parents(n))
1202 1202 # A node is a descendant if either of its parents are
1203 1203 # descendants. (We seeded the dependents list with the roots
1204 1204 # up there, remember?)
1205 1205 if (p[0] in descendants) or (p[1] in descendants):
1206 1206 descendants.add(n)
1207 1207 isdescendant = True
1208 1208 if isdescendant and ((ancestors is None) or (n in ancestors)):
1209 1209 # Only include nodes that are both descendants and ancestors.
1210 1210 orderedout.append(n)
1211 1211 if (ancestors is not None) and (n in heads):
1212 1212 # We're trying to figure out which heads are reachable
1213 1213 # from roots.
1214 1214 # Mark this head as having been reached
1215 1215 heads[n] = True
1216 1216 elif ancestors is None:
1217 1217 # Otherwise, we're trying to discover the heads.
1218 1218 # Assume this is a head because if it isn't, the next step
1219 1219 # will eventually remove it.
1220 1220 heads[n] = True
1221 1221 # But, obviously its parents aren't.
1222 1222 for p in self.parents(n):
1223 1223 heads.pop(p, None)
1224 1224 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1225 1225 roots = list(roots)
1226 1226 assert orderedout
1227 1227 assert roots
1228 1228 assert heads
1229 1229 return (orderedout, roots, heads)
1230 1230
1231 1231 def headrevs(self, revs=None):
1232 1232 if revs is None:
1233 1233 try:
1234 1234 return self.index.headrevs()
1235 1235 except AttributeError:
1236 1236 return self._headrevs()
1237 1237 if rustdagop is not None:
1238 1238 return rustdagop.headrevs(self.index, revs)
1239 1239 return dagop.headrevs(revs, self._uncheckedparentrevs)
1240 1240
1241 1241 def computephases(self, roots):
1242 1242 return self.index.computephasesmapsets(roots)
1243 1243
1244 1244 def _headrevs(self):
1245 1245 count = len(self)
1246 1246 if not count:
1247 1247 return [nullrev]
1248 1248 # we won't iter over filtered rev so nobody is a head at start
1249 1249 ishead = [0] * (count + 1)
1250 1250 index = self.index
1251 1251 for r in self:
1252 1252 ishead[r] = 1 # I may be an head
1253 1253 e = index[r]
1254 1254 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1255 1255 return [r for r, val in enumerate(ishead) if val]
1256 1256
1257 1257 def heads(self, start=None, stop=None):
1258 1258 """return the list of all nodes that have no children
1259 1259
1260 1260 if start is specified, only heads that are descendants of
1261 1261 start will be returned
1262 1262 if stop is specified, it will consider all the revs from stop
1263 1263 as if they had no children
1264 1264 """
1265 1265 if start is None and stop is None:
1266 1266 if not len(self):
1267 1267 return [self.nullid]
1268 1268 return [self.node(r) for r in self.headrevs()]
1269 1269
1270 1270 if start is None:
1271 1271 start = nullrev
1272 1272 else:
1273 1273 start = self.rev(start)
1274 1274
1275 1275 stoprevs = {self.rev(n) for n in stop or []}
1276 1276
1277 1277 revs = dagop.headrevssubset(
1278 1278 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1279 1279 )
1280 1280
1281 1281 return [self.node(rev) for rev in revs]
1282 1282
1283 1283 def children(self, node):
1284 1284 """find the children of a given node"""
1285 1285 c = []
1286 1286 p = self.rev(node)
1287 1287 for r in self.revs(start=p + 1):
1288 1288 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1289 1289 if prevs:
1290 1290 for pr in prevs:
1291 1291 if pr == p:
1292 1292 c.append(self.node(r))
1293 1293 elif p == nullrev:
1294 1294 c.append(self.node(r))
1295 1295 return c
1296 1296
1297 1297 def commonancestorsheads(self, a, b):
1298 1298 """calculate all the heads of the common ancestors of nodes a and b"""
1299 1299 a, b = self.rev(a), self.rev(b)
1300 1300 ancs = self._commonancestorsheads(a, b)
1301 1301 return pycompat.maplist(self.node, ancs)
1302 1302
1303 1303 def _commonancestorsheads(self, *revs):
1304 1304 """calculate all the heads of the common ancestors of revs"""
1305 1305 try:
1306 1306 ancs = self.index.commonancestorsheads(*revs)
1307 1307 except (AttributeError, OverflowError): # C implementation failed
1308 1308 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1309 1309 return ancs
1310 1310
1311 1311 def isancestor(self, a, b):
1312 1312 """return True if node a is an ancestor of node b
1313 1313
1314 1314 A revision is considered an ancestor of itself."""
1315 1315 a, b = self.rev(a), self.rev(b)
1316 1316 return self.isancestorrev(a, b)
1317 1317
1318 1318 def isancestorrev(self, a, b):
1319 1319 """return True if revision a is an ancestor of revision b
1320 1320
1321 1321 A revision is considered an ancestor of itself.
1322 1322
1323 1323 The implementation of this is trivial but the use of
1324 1324 reachableroots is not."""
1325 1325 if a == nullrev:
1326 1326 return True
1327 1327 elif a == b:
1328 1328 return True
1329 1329 elif a > b:
1330 1330 return False
1331 1331 return bool(self.reachableroots(a, [b], [a], includepath=False))
1332 1332
1333 1333 def reachableroots(self, minroot, heads, roots, includepath=False):
1334 1334 """return (heads(::(<roots> and <roots>::<heads>)))
1335 1335
1336 1336 If includepath is True, return (<roots>::<heads>)."""
1337 1337 try:
1338 1338 return self.index.reachableroots2(
1339 1339 minroot, heads, roots, includepath
1340 1340 )
1341 1341 except AttributeError:
1342 1342 return dagop._reachablerootspure(
1343 1343 self.parentrevs, minroot, roots, heads, includepath
1344 1344 )
1345 1345
1346 1346 def ancestor(self, a, b):
1347 1347 """calculate the "best" common ancestor of nodes a and b"""
1348 1348
1349 1349 a, b = self.rev(a), self.rev(b)
1350 1350 try:
1351 1351 ancs = self.index.ancestors(a, b)
1352 1352 except (AttributeError, OverflowError):
1353 1353 ancs = ancestor.ancestors(self.parentrevs, a, b)
1354 1354 if ancs:
1355 1355 # choose a consistent winner when there's a tie
1356 1356 return min(map(self.node, ancs))
1357 1357 return self.nullid
1358 1358
1359 1359 def _match(self, id):
1360 1360 if isinstance(id, int):
1361 1361 # rev
1362 1362 return self.node(id)
1363 1363 if len(id) == self.nodeconstants.nodelen:
1364 1364 # possibly a binary node
1365 1365 # odds of a binary node being all hex in ASCII are 1 in 10**25
1366 1366 try:
1367 1367 node = id
1368 1368 self.rev(node) # quick search the index
1369 1369 return node
1370 1370 except error.LookupError:
1371 1371 pass # may be partial hex id
1372 1372 try:
1373 1373 # str(rev)
1374 1374 rev = int(id)
1375 1375 if b"%d" % rev != id:
1376 1376 raise ValueError
1377 1377 if rev < 0:
1378 1378 rev = len(self) + rev
1379 1379 if rev < 0 or rev >= len(self):
1380 1380 raise ValueError
1381 1381 return self.node(rev)
1382 1382 except (ValueError, OverflowError):
1383 1383 pass
1384 1384 if len(id) == 2 * self.nodeconstants.nodelen:
1385 1385 try:
1386 1386 # a full hex nodeid?
1387 1387 node = bin(id)
1388 1388 self.rev(node)
1389 1389 return node
1390 1390 except (TypeError, error.LookupError):
1391 1391 pass
1392 1392
1393 1393 def _partialmatch(self, id):
1394 1394 # we don't care wdirfilenodeids as they should be always full hash
1395 1395 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1396 1396 try:
1397 1397 partial = self.index.partialmatch(id)
1398 1398 if partial and self.hasnode(partial):
1399 1399 if maybewdir:
1400 1400 # single 'ff...' match in radix tree, ambiguous with wdir
1401 1401 raise error.RevlogError
1402 1402 return partial
1403 1403 if maybewdir:
1404 1404 # no 'ff...' match in radix tree, wdir identified
1405 1405 raise error.WdirUnsupported
1406 1406 return None
1407 1407 except error.RevlogError:
1408 1408 # parsers.c radix tree lookup gave multiple matches
1409 1409 # fast path: for unfiltered changelog, radix tree is accurate
1410 1410 if not getattr(self, 'filteredrevs', None):
1411 1411 raise error.AmbiguousPrefixLookupError(
1412 1412 id, self.display_id, _(b'ambiguous identifier')
1413 1413 )
1414 1414 # fall through to slow path that filters hidden revisions
1415 1415 except (AttributeError, ValueError):
1416 1416 # we are pure python, or key was too short to search radix tree
1417 1417 pass
1418 1418
1419 1419 if id in self._pcache:
1420 1420 return self._pcache[id]
1421 1421
1422 1422 if len(id) <= 40:
1423 1423 try:
1424 1424 # hex(node)[:...]
1425 1425 l = len(id) // 2 # grab an even number of digits
1426 1426 prefix = bin(id[: l * 2])
1427 1427 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1428 1428 nl = [
1429 1429 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1430 1430 ]
1431 1431 if self.nodeconstants.nullhex.startswith(id):
1432 1432 nl.append(self.nullid)
1433 1433 if len(nl) > 0:
1434 1434 if len(nl) == 1 and not maybewdir:
1435 1435 self._pcache[id] = nl[0]
1436 1436 return nl[0]
1437 1437 raise error.AmbiguousPrefixLookupError(
1438 1438 id, self.display_id, _(b'ambiguous identifier')
1439 1439 )
1440 1440 if maybewdir:
1441 1441 raise error.WdirUnsupported
1442 1442 return None
1443 1443 except TypeError:
1444 1444 pass
1445 1445
1446 1446 def lookup(self, id):
1447 1447 """locate a node based on:
1448 1448 - revision number or str(revision number)
1449 1449 - nodeid or subset of hex nodeid
1450 1450 """
1451 1451 n = self._match(id)
1452 1452 if n is not None:
1453 1453 return n
1454 1454 n = self._partialmatch(id)
1455 1455 if n:
1456 1456 return n
1457 1457
1458 1458 raise error.LookupError(id, self.display_id, _(b'no match found'))
1459 1459
1460 1460 def shortest(self, node, minlength=1):
1461 1461 """Find the shortest unambiguous prefix that matches node."""
1462 1462
1463 1463 def isvalid(prefix):
1464 1464 try:
1465 1465 matchednode = self._partialmatch(prefix)
1466 1466 except error.AmbiguousPrefixLookupError:
1467 1467 return False
1468 1468 except error.WdirUnsupported:
1469 1469 # single 'ff...' match
1470 1470 return True
1471 1471 if matchednode is None:
1472 1472 raise error.LookupError(node, self.display_id, _(b'no node'))
1473 1473 return True
1474 1474
1475 1475 def maybewdir(prefix):
1476 1476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1477 1477
1478 1478 hexnode = hex(node)
1479 1479
1480 1480 def disambiguate(hexnode, minlength):
1481 1481 """Disambiguate against wdirid."""
1482 1482 for length in range(minlength, len(hexnode) + 1):
1483 1483 prefix = hexnode[:length]
1484 1484 if not maybewdir(prefix):
1485 1485 return prefix
1486 1486
1487 1487 if not getattr(self, 'filteredrevs', None):
1488 1488 try:
1489 1489 length = max(self.index.shortest(node), minlength)
1490 1490 return disambiguate(hexnode, length)
1491 1491 except error.RevlogError:
1492 1492 if node != self.nodeconstants.wdirid:
1493 1493 raise error.LookupError(
1494 1494 node, self.display_id, _(b'no node')
1495 1495 )
1496 1496 except AttributeError:
1497 1497 # Fall through to pure code
1498 1498 pass
1499 1499
1500 1500 if node == self.nodeconstants.wdirid:
1501 1501 for length in range(minlength, len(hexnode) + 1):
1502 1502 prefix = hexnode[:length]
1503 1503 if isvalid(prefix):
1504 1504 return prefix
1505 1505
1506 1506 for length in range(minlength, len(hexnode) + 1):
1507 1507 prefix = hexnode[:length]
1508 1508 if isvalid(prefix):
1509 1509 return disambiguate(hexnode, length)
1510 1510
1511 1511 def cmp(self, node, text):
1512 1512 """compare text with a given file revision
1513 1513
1514 1514 returns True if text is different than what is stored.
1515 1515 """
1516 1516 p1, p2 = self.parents(node)
1517 1517 return storageutil.hashrevisionsha1(text, p1, p2) != node
1518 1518
1519 1519 def _cachesegment(self, offset, data):
1520 1520 """Add a segment to the revlog cache.
1521 1521
1522 1522 Accepts an absolute offset and the data that is at that location.
1523 1523 """
1524 1524 o, d = self._chunkcache
1525 1525 # try to add to existing cache
1526 1526 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1527 1527 self._chunkcache = o, d + data
1528 1528 else:
1529 1529 self._chunkcache = offset, data
1530 1530
1531 1531 def _readsegment(self, offset, length, df=None):
1532 1532 """Load a segment of raw data from the revlog.
1533 1533
1534 1534 Accepts an absolute offset, length to read, and an optional existing
1535 1535 file handle to read from.
1536 1536
1537 1537 If an existing file handle is passed, it will be seeked and the
1538 1538 original seek position will NOT be restored.
1539 1539
1540 1540 Returns a str or buffer of raw byte data.
1541 1541
1542 1542 Raises if the requested number of bytes could not be read.
1543 1543 """
1544 1544 # Cache data both forward and backward around the requested
1545 1545 # data, in a fixed size window. This helps speed up operations
1546 1546 # involving reading the revlog backwards.
1547 1547 cachesize = self._chunkcachesize
1548 1548 realoffset = offset & ~(cachesize - 1)
1549 1549 reallength = (
1550 1550 (offset + length + cachesize) & ~(cachesize - 1)
1551 1551 ) - realoffset
1552 1552 with self._datareadfp(df) as df:
1553 1553 df.seek(realoffset)
1554 1554 d = df.read(reallength)
1555 1555
1556 1556 self._cachesegment(realoffset, d)
1557 1557 if offset != realoffset or reallength != length:
1558 1558 startoffset = offset - realoffset
1559 1559 if len(d) - startoffset < length:
1560 1560 raise error.RevlogError(
1561 1561 _(
1562 1562 b'partial read of revlog %s; expected %d bytes from '
1563 1563 b'offset %d, got %d'
1564 1564 )
1565 1565 % (
1566 1566 self._indexfile if self._inline else self._datafile,
1567 1567 length,
1568 1568 offset,
1569 1569 len(d) - startoffset,
1570 1570 )
1571 1571 )
1572 1572
1573 1573 return util.buffer(d, startoffset, length)
1574 1574
1575 1575 if len(d) < length:
1576 1576 raise error.RevlogError(
1577 1577 _(
1578 1578 b'partial read of revlog %s; expected %d bytes from offset '
1579 1579 b'%d, got %d'
1580 1580 )
1581 1581 % (
1582 1582 self._indexfile if self._inline else self._datafile,
1583 1583 length,
1584 1584 offset,
1585 1585 len(d),
1586 1586 )
1587 1587 )
1588 1588
1589 1589 return d
1590 1590
1591 1591 def _getsegment(self, offset, length, df=None):
1592 1592 """Obtain a segment of raw data from the revlog.
1593 1593
1594 1594 Accepts an absolute offset, length of bytes to obtain, and an
1595 1595 optional file handle to the already-opened revlog. If the file
1596 1596 handle is used, it's original seek position will not be preserved.
1597 1597
1598 1598 Requests for data may be returned from a cache.
1599 1599
1600 1600 Returns a str or a buffer instance of raw byte data.
1601 1601 """
1602 1602 o, d = self._chunkcache
1603 1603 l = len(d)
1604 1604
1605 1605 # is it in the cache?
1606 1606 cachestart = offset - o
1607 1607 cacheend = cachestart + length
1608 1608 if cachestart >= 0 and cacheend <= l:
1609 1609 if cachestart == 0 and cacheend == l:
1610 1610 return d # avoid a copy
1611 1611 return util.buffer(d, cachestart, cacheend - cachestart)
1612 1612
1613 1613 return self._readsegment(offset, length, df=df)
1614 1614
1615 1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1616 1616 """Obtain a segment of raw data corresponding to a range of revisions.
1617 1617
1618 1618 Accepts the start and end revisions and an optional already-open
1619 1619 file handle to be used for reading. If the file handle is read, its
1620 1620 seek position will not be preserved.
1621 1621
1622 1622 Requests for data may be satisfied by a cache.
1623 1623
1624 1624 Returns a 2-tuple of (offset, data) for the requested range of
1625 1625 revisions. Offset is the integer offset from the beginning of the
1626 1626 revlog and data is a str or buffer of the raw byte data.
1627 1627
1628 1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1629 1629 to determine where each revision's data begins and ends.
1630 1630 """
1631 1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1632 1632 # (functions are expensive).
1633 1633 index = self.index
1634 1634 istart = index[startrev]
1635 1635 start = int(istart[0] >> 16)
1636 1636 if startrev == endrev:
1637 1637 end = start + istart[1]
1638 1638 else:
1639 1639 iend = index[endrev]
1640 1640 end = int(iend[0] >> 16) + iend[1]
1641 1641
1642 1642 if self._inline:
1643 1643 start += (startrev + 1) * self.index.entry_size
1644 1644 end += (endrev + 1) * self.index.entry_size
1645 1645 length = end - start
1646 1646
1647 1647 return start, self._getsegment(start, length, df=df)
1648 1648
1649 1649 def _chunk(self, rev, df=None):
1650 1650 """Obtain a single decompressed chunk for a revision.
1651 1651
1652 1652 Accepts an integer revision and an optional already-open file handle
1653 1653 to be used for reading. If used, the seek position of the file will not
1654 1654 be preserved.
1655 1655
1656 1656 Returns a str holding uncompressed data for the requested revision.
1657 1657 """
1658 1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1659 1659
1660 1660 def _chunks(self, revs, df=None, targetsize=None):
1661 1661 """Obtain decompressed chunks for the specified revisions.
1662 1662
1663 1663 Accepts an iterable of numeric revisions that are assumed to be in
1664 1664 ascending order. Also accepts an optional already-open file handle
1665 1665 to be used for reading. If used, the seek position of the file will
1666 1666 not be preserved.
1667 1667
1668 1668 This function is similar to calling ``self._chunk()`` multiple times,
1669 1669 but is faster.
1670 1670
1671 1671 Returns a list with decompressed data for each requested revision.
1672 1672 """
1673 1673 if not revs:
1674 1674 return []
1675 1675 start = self.start
1676 1676 length = self.length
1677 1677 inline = self._inline
1678 1678 iosize = self.index.entry_size
1679 1679 buffer = util.buffer
1680 1680
1681 1681 l = []
1682 1682 ladd = l.append
1683 1683
1684 1684 if not self._withsparseread:
1685 1685 slicedchunks = (revs,)
1686 1686 else:
1687 1687 slicedchunks = deltautil.slicechunk(
1688 1688 self, revs, targetsize=targetsize
1689 1689 )
1690 1690
1691 1691 for revschunk in slicedchunks:
1692 1692 firstrev = revschunk[0]
1693 1693 # Skip trailing revisions with empty diff
1694 1694 for lastrev in revschunk[::-1]:
1695 1695 if length(lastrev) != 0:
1696 1696 break
1697 1697
1698 1698 try:
1699 1699 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1700 1700 except OverflowError:
1701 1701 # issue4215 - we can't cache a run of chunks greater than
1702 1702 # 2G on Windows
1703 1703 return [self._chunk(rev, df=df) for rev in revschunk]
1704 1704
1705 1705 decomp = self.decompress
1706 1706 for rev in revschunk:
1707 1707 chunkstart = start(rev)
1708 1708 if inline:
1709 1709 chunkstart += (rev + 1) * iosize
1710 1710 chunklength = length(rev)
1711 1711 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1712 1712
1713 1713 return l
1714 1714
1715 1715 def _chunkclear(self):
1716 1716 """Clear the raw chunk cache."""
1717 1717 self._chunkcache = (0, b'')
1718 1718
1719 1719 def deltaparent(self, rev):
1720 1720 """return deltaparent of the given revision"""
1721 1721 base = self.index[rev][3]
1722 1722 if base == rev:
1723 1723 return nullrev
1724 1724 elif self._generaldelta:
1725 1725 return base
1726 1726 else:
1727 1727 return rev - 1
1728 1728
1729 1729 def issnapshot(self, rev):
1730 1730 """tells whether rev is a snapshot"""
1731 1731 if not self._sparserevlog:
1732 1732 return self.deltaparent(rev) == nullrev
1733 1733 elif util.safehasattr(self.index, b'issnapshot'):
1734 1734 # directly assign the method to cache the testing and access
1735 1735 self.issnapshot = self.index.issnapshot
1736 1736 return self.issnapshot(rev)
1737 1737 if rev == nullrev:
1738 1738 return True
1739 1739 entry = self.index[rev]
1740 1740 base = entry[3]
1741 1741 if base == rev:
1742 1742 return True
1743 1743 if base == nullrev:
1744 1744 return True
1745 1745 p1 = entry[5]
1746 1746 p2 = entry[6]
1747 1747 if base == p1 or base == p2:
1748 1748 return False
1749 1749 return self.issnapshot(base)
1750 1750
1751 1751 def snapshotdepth(self, rev):
1752 1752 """number of snapshot in the chain before this one"""
1753 1753 if not self.issnapshot(rev):
1754 1754 raise error.ProgrammingError(b'revision %d not a snapshot')
1755 1755 return len(self._deltachain(rev)[0]) - 1
1756 1756
1757 1757 def revdiff(self, rev1, rev2):
1758 1758 """return or calculate a delta between two revisions
1759 1759
1760 1760 The delta calculated is in binary form and is intended to be written to
1761 1761 revlog data directly. So this function needs raw revision data.
1762 1762 """
1763 1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1764 1764 return bytes(self._chunk(rev2))
1765 1765
1766 1766 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1767 1767
1768 1768 def _processflags(self, text, flags, operation, raw=False):
1769 1769 """deprecated entry point to access flag processors"""
1770 1770 msg = b'_processflag(...) use the specialized variant'
1771 1771 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1772 1772 if raw:
1773 1773 return text, flagutil.processflagsraw(self, text, flags)
1774 1774 elif operation == b'read':
1775 1775 return flagutil.processflagsread(self, text, flags)
1776 1776 else: # write operation
1777 1777 return flagutil.processflagswrite(self, text, flags)
1778 1778
1779 1779 def revision(self, nodeorrev, _df=None, raw=False):
1780 1780 """return an uncompressed revision of a given node or revision
1781 1781 number.
1782 1782
1783 1783 _df - an existing file handle to read from. (internal-only)
1784 1784 raw - an optional argument specifying if the revision data is to be
1785 1785 treated as raw data when applying flag transforms. 'raw' should be set
1786 1786 to True when generating changegroups or in debug commands.
1787 1787 """
1788 1788 if raw:
1789 1789 msg = (
1790 1790 b'revlog.revision(..., raw=True) is deprecated, '
1791 1791 b'use revlog.rawdata(...)'
1792 1792 )
1793 1793 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1794 1794 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1795 1795
1796 1796 def sidedata(self, nodeorrev, _df=None):
1797 1797 """a map of extra data related to the changeset but not part of the hash
1798 1798
1799 1799 This function currently return a dictionary. However, more advanced
1800 1800 mapping object will likely be used in the future for a more
1801 1801 efficient/lazy code.
1802 1802 """
1803 1803 return self._revisiondata(nodeorrev, _df)[1]
1804 1804
1805 1805 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1806 1806 # deal with <nodeorrev> argument type
1807 1807 if isinstance(nodeorrev, int):
1808 1808 rev = nodeorrev
1809 1809 node = self.node(rev)
1810 1810 else:
1811 1811 node = nodeorrev
1812 1812 rev = None
1813 1813
1814 1814 # fast path the special `nullid` rev
1815 1815 if node == self.nullid:
1816 1816 return b"", {}
1817 1817
1818 1818 # ``rawtext`` is the text as stored inside the revlog. Might be the
1819 1819 # revision or might need to be processed to retrieve the revision.
1820 1820 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1821 1821
1822 1822 if self.hassidedata:
1823 1823 if rev is None:
1824 1824 rev = self.rev(node)
1825 1825 sidedata = self._sidedata(rev)
1826 1826 else:
1827 1827 sidedata = {}
1828 1828
1829 1829 if raw and validated:
1830 1830 # if we don't want to process the raw text and that raw
1831 1831 # text is cached, we can exit early.
1832 1832 return rawtext, sidedata
1833 1833 if rev is None:
1834 1834 rev = self.rev(node)
1835 1835 # the revlog's flag for this revision
1836 1836 # (usually alter its state or content)
1837 1837 flags = self.flags(rev)
1838 1838
1839 1839 if validated and flags == REVIDX_DEFAULT_FLAGS:
1840 1840 # no extra flags set, no flag processor runs, text = rawtext
1841 1841 return rawtext, sidedata
1842 1842
1843 1843 if raw:
1844 1844 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1845 1845 text = rawtext
1846 1846 else:
1847 1847 r = flagutil.processflagsread(self, rawtext, flags)
1848 1848 text, validatehash = r
1849 1849 if validatehash:
1850 1850 self.checkhash(text, node, rev=rev)
1851 1851 if not validated:
1852 1852 self._revisioncache = (node, rev, rawtext)
1853 1853
1854 1854 return text, sidedata
1855 1855
1856 1856 def _rawtext(self, node, rev, _df=None):
1857 1857 """return the possibly unvalidated rawtext for a revision
1858 1858
1859 1859 returns (rev, rawtext, validated)
1860 1860 """
1861 1861
1862 1862 # revision in the cache (could be useful to apply delta)
1863 1863 cachedrev = None
1864 1864 # An intermediate text to apply deltas to
1865 1865 basetext = None
1866 1866
1867 1867 # Check if we have the entry in cache
1868 1868 # The cache entry looks like (node, rev, rawtext)
1869 1869 if self._revisioncache:
1870 1870 if self._revisioncache[0] == node:
1871 1871 return (rev, self._revisioncache[2], True)
1872 1872 cachedrev = self._revisioncache[1]
1873 1873
1874 1874 if rev is None:
1875 1875 rev = self.rev(node)
1876 1876
1877 1877 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1878 1878 if stopped:
1879 1879 basetext = self._revisioncache[2]
1880 1880
1881 1881 # drop cache to save memory, the caller is expected to
1882 1882 # update self._revisioncache after validating the text
1883 1883 self._revisioncache = None
1884 1884
1885 1885 targetsize = None
1886 1886 rawsize = self.index[rev][2]
1887 1887 if 0 <= rawsize:
1888 1888 targetsize = 4 * rawsize
1889 1889
1890 1890 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1891 1891 if basetext is None:
1892 1892 basetext = bytes(bins[0])
1893 1893 bins = bins[1:]
1894 1894
1895 1895 rawtext = mdiff.patches(basetext, bins)
1896 1896 del basetext # let us have a chance to free memory early
1897 1897 return (rev, rawtext, False)
1898 1898
1899 1899 def _sidedata(self, rev):
1900 1900 """Return the sidedata for a given revision number."""
1901 1901 index_entry = self.index[rev]
1902 1902 sidedata_offset = index_entry[8]
1903 1903 sidedata_size = index_entry[9]
1904 1904
1905 1905 if self._inline:
1906 1906 sidedata_offset += self.index.entry_size * (1 + rev)
1907 1907 if sidedata_size == 0:
1908 1908 return {}
1909 1909
1910 1910 segment = self._getsegment(sidedata_offset, sidedata_size)
1911 1911 sidedata = sidedatautil.deserialize_sidedata(segment)
1912 1912 return sidedata
1913 1913
1914 1914 def rawdata(self, nodeorrev, _df=None):
1915 1915 """return an uncompressed raw data of a given node or revision number.
1916 1916
1917 1917 _df - an existing file handle to read from. (internal-only)
1918 1918 """
1919 1919 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1920 1920
1921 1921 def hash(self, text, p1, p2):
1922 1922 """Compute a node hash.
1923 1923
1924 1924 Available as a function so that subclasses can replace the hash
1925 1925 as needed.
1926 1926 """
1927 1927 return storageutil.hashrevisionsha1(text, p1, p2)
1928 1928
1929 1929 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1930 1930 """Check node hash integrity.
1931 1931
1932 1932 Available as a function so that subclasses can extend hash mismatch
1933 1933 behaviors as needed.
1934 1934 """
1935 1935 try:
1936 1936 if p1 is None and p2 is None:
1937 1937 p1, p2 = self.parents(node)
1938 1938 if node != self.hash(text, p1, p2):
1939 1939 # Clear the revision cache on hash failure. The revision cache
1940 1940 # only stores the raw revision and clearing the cache does have
1941 1941 # the side-effect that we won't have a cache hit when the raw
1942 1942 # revision data is accessed. But this case should be rare and
1943 1943 # it is extra work to teach the cache about the hash
1944 1944 # verification state.
1945 1945 if self._revisioncache and self._revisioncache[0] == node:
1946 1946 self._revisioncache = None
1947 1947
1948 1948 revornode = rev
1949 1949 if revornode is None:
1950 1950 revornode = templatefilters.short(hex(node))
1951 1951 raise error.RevlogError(
1952 1952 _(b"integrity check failed on %s:%s")
1953 1953 % (self.display_id, pycompat.bytestr(revornode))
1954 1954 )
1955 1955 except error.RevlogError:
1956 1956 if self._censorable and storageutil.iscensoredtext(text):
1957 1957 raise error.CensoredNodeError(self.display_id, node, text)
1958 1958 raise
1959 1959
1960 1960 def _enforceinlinesize(self, tr):
1961 1961 """Check if the revlog is too big for inline and convert if so.
1962 1962
1963 1963 This should be called after revisions are added to the revlog. If the
1964 1964 revlog has grown too large to be an inline revlog, it will convert it
1965 1965 to use multiple index and data files.
1966 1966 """
1967 1967 tiprev = len(self) - 1
1968 1968 total_size = self.start(tiprev) + self.length(tiprev)
1969 1969 if not self._inline or total_size < _maxinline:
1970 1970 return
1971 1971
1972 1972 troffset = tr.findoffset(self._indexfile)
1973 1973 if troffset is None:
1974 1974 raise error.RevlogError(
1975 1975 _(b"%s not found in the transaction") % self._indexfile
1976 1976 )
1977 1977 trindex = 0
1978 1978 tr.add(self._datafile, 0)
1979 1979
1980 1980 existing_handles = False
1981 1981 if self._writinghandles is not None:
1982 1982 existing_handles = True
1983 1983 fp = self._writinghandles[0]
1984 1984 fp.flush()
1985 1985 fp.close()
1986 1986 # We can't use the cached file handle after close(). So prevent
1987 1987 # its usage.
1988 1988 self._writinghandles = None
1989 1989
1990 1990 new_dfh = self._datafp(b'w+')
1991 1991 new_dfh.truncate(0) # drop any potentially existing data
1992 1992 try:
1993 1993 with self._indexfp(b'r') as read_ifh:
1994 1994 for r in self:
1995 1995 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1996 1996 if troffset <= self.start(r):
1997 1997 trindex = r
1998 1998 new_dfh.flush()
1999 1999
2000 2000 with self.opener(self._indexfile, mode=b'w', atomictemp=True) as fp:
2001 2001 self._format_flags &= ~FLAG_INLINE_DATA
2002 2002 self._inline = False
2003 2003 for i in self:
2004 2004 e = self.index.entry_binary(i)
2005 2005 if i == 0:
2006 2006 header = self._format_flags | self._format_version
2007 2007 header = self.index.pack_header(header)
2008 2008 e = header + e
2009 2009 fp.write(e)
2010 2010 # the temp file replace the real index when we exit the context
2011 2011 # manager
2012 2012
2013 2013 tr.replace(self._indexfile, trindex * self.index.entry_size)
2014 2014 nodemaputil.setup_persistent_nodemap(tr, self)
2015 2015 self._chunkclear()
2016 2016
2017 2017 if existing_handles:
2018 2018 # switched from inline to conventional reopen the index
2019 2019 ifh = self._indexfp(b"a+")
2020 2020 self._writinghandles = (ifh, new_dfh)
2021 2021 new_dfh = None
2022 2022 finally:
2023 2023 if new_dfh is not None:
2024 2024 new_dfh.close()
2025 2025
2026 2026 def _nodeduplicatecallback(self, transaction, node):
2027 2027 """called when trying to add a node already stored."""
2028 2028
2029 2029 @contextlib.contextmanager
2030 2030 def _writing(self, transaction):
2031 2031 if self._writinghandles is not None:
2032 2032 yield
2033 2033 else:
2034 2034 r = len(self)
2035 2035 dsize = 0
2036 2036 if r:
2037 2037 dsize = self.end(r - 1)
2038 2038 dfh = None
2039 2039 if not self._inline:
2040 2040 dfh = self._datafp(b"a+")
2041 2041 transaction.add(self._datafile, dsize)
2042 2042 try:
2043 2043 isize = r * self.index.entry_size
2044 2044 ifh = self._indexfp(b"a+")
2045 2045 if self._inline:
2046 2046 transaction.add(self._indexfile, dsize + isize)
2047 2047 else:
2048 2048 transaction.add(self._indexfile, isize)
2049 2049 try:
2050 2050 self._writinghandles = (ifh, dfh)
2051 2051 try:
2052 2052 yield
2053 2053 finally:
2054 2054 self._writinghandles = None
2055 2055 finally:
2056 2056 ifh.close()
2057 2057 finally:
2058 2058 if dfh is not None:
2059 2059 dfh.close()
2060 2060
2061 2061 def addrevision(
2062 2062 self,
2063 2063 text,
2064 2064 transaction,
2065 2065 link,
2066 2066 p1,
2067 2067 p2,
2068 2068 cachedelta=None,
2069 2069 node=None,
2070 2070 flags=REVIDX_DEFAULT_FLAGS,
2071 2071 deltacomputer=None,
2072 2072 sidedata=None,
2073 2073 ):
2074 2074 """add a revision to the log
2075 2075
2076 2076 text - the revision data to add
2077 2077 transaction - the transaction object used for rollback
2078 2078 link - the linkrev data to add
2079 2079 p1, p2 - the parent nodeids of the revision
2080 2080 cachedelta - an optional precomputed delta
2081 2081 node - nodeid of revision; typically node is not specified, and it is
2082 2082 computed by default as hash(text, p1, p2), however subclasses might
2083 2083 use different hashing method (and override checkhash() in such case)
2084 2084 flags - the known flags to set on the revision
2085 2085 deltacomputer - an optional deltacomputer instance shared between
2086 2086 multiple calls
2087 2087 """
2088 2088 if link == nullrev:
2089 2089 raise error.RevlogError(
2090 2090 _(b"attempted to add linkrev -1 to %s") % self.display_id
2091 2091 )
2092 2092
2093 2093 if sidedata is None:
2094 2094 sidedata = {}
2095 2095 elif sidedata and not self.hassidedata:
2096 2096 raise error.ProgrammingError(
2097 2097 _(b"trying to add sidedata to a revlog who don't support them")
2098 2098 )
2099 2099
2100 2100 if flags:
2101 2101 node = node or self.hash(text, p1, p2)
2102 2102
2103 2103 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2104 2104
2105 2105 # If the flag processor modifies the revision data, ignore any provided
2106 2106 # cachedelta.
2107 2107 if rawtext != text:
2108 2108 cachedelta = None
2109 2109
2110 2110 if len(rawtext) > _maxentrysize:
2111 2111 raise error.RevlogError(
2112 2112 _(
2113 2113 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2114 2114 )
2115 2115 % (self.display_id, len(rawtext))
2116 2116 )
2117 2117
2118 2118 node = node or self.hash(rawtext, p1, p2)
2119 2119 rev = self.index.get_rev(node)
2120 2120 if rev is not None:
2121 2121 return rev
2122 2122
2123 2123 if validatehash:
2124 2124 self.checkhash(rawtext, node, p1=p1, p2=p2)
2125 2125
2126 2126 return self.addrawrevision(
2127 2127 rawtext,
2128 2128 transaction,
2129 2129 link,
2130 2130 p1,
2131 2131 p2,
2132 2132 node,
2133 2133 flags,
2134 2134 cachedelta=cachedelta,
2135 2135 deltacomputer=deltacomputer,
2136 2136 sidedata=sidedata,
2137 2137 )
2138 2138
2139 2139 def addrawrevision(
2140 2140 self,
2141 2141 rawtext,
2142 2142 transaction,
2143 2143 link,
2144 2144 p1,
2145 2145 p2,
2146 2146 node,
2147 2147 flags,
2148 2148 cachedelta=None,
2149 2149 deltacomputer=None,
2150 2150 sidedata=None,
2151 2151 ):
2152 2152 """add a raw revision with known flags, node and parents
2153 2153 useful when reusing a revision not stored in this revlog (ex: received
2154 2154 over wire, or read from an external bundle).
2155 2155 """
2156 2156 with self._writing(transaction):
2157 2157 return self._addrevision(
2158 2158 node,
2159 2159 rawtext,
2160 2160 transaction,
2161 2161 link,
2162 2162 p1,
2163 2163 p2,
2164 2164 flags,
2165 2165 cachedelta,
2166 2166 deltacomputer=deltacomputer,
2167 2167 sidedata=sidedata,
2168 2168 )
2169 2169
2170 2170 def compress(self, data):
2171 2171 """Generate a possibly-compressed representation of data."""
2172 2172 if not data:
2173 2173 return b'', data
2174 2174
2175 2175 compressed = self._compressor.compress(data)
2176 2176
2177 2177 if compressed:
2178 2178 # The revlog compressor added the header in the returned data.
2179 2179 return b'', compressed
2180 2180
2181 2181 if data[0:1] == b'\0':
2182 2182 return b'', data
2183 2183 return b'u', data
2184 2184
2185 2185 def decompress(self, data):
2186 2186 """Decompress a revlog chunk.
2187 2187
2188 2188 The chunk is expected to begin with a header identifying the
2189 2189 format type so it can be routed to an appropriate decompressor.
2190 2190 """
2191 2191 if not data:
2192 2192 return data
2193 2193
2194 2194 # Revlogs are read much more frequently than they are written and many
2195 2195 # chunks only take microseconds to decompress, so performance is
2196 2196 # important here.
2197 2197 #
2198 2198 # We can make a few assumptions about revlogs:
2199 2199 #
2200 2200 # 1) the majority of chunks will be compressed (as opposed to inline
2201 2201 # raw data).
2202 2202 # 2) decompressing *any* data will likely by at least 10x slower than
2203 2203 # returning raw inline data.
2204 2204 # 3) we want to prioritize common and officially supported compression
2205 2205 # engines
2206 2206 #
2207 2207 # It follows that we want to optimize for "decompress compressed data
2208 2208 # when encoded with common and officially supported compression engines"
2209 2209 # case over "raw data" and "data encoded by less common or non-official
2210 2210 # compression engines." That is why we have the inline lookup first
2211 2211 # followed by the compengines lookup.
2212 2212 #
2213 2213 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2214 2214 # compressed chunks. And this matters for changelog and manifest reads.
2215 2215 t = data[0:1]
2216 2216
2217 2217 if t == b'x':
2218 2218 try:
2219 2219 return _zlibdecompress(data)
2220 2220 except zlib.error as e:
2221 2221 raise error.RevlogError(
2222 2222 _(b'revlog decompress error: %s')
2223 2223 % stringutil.forcebytestr(e)
2224 2224 )
2225 2225 # '\0' is more common than 'u' so it goes first.
2226 2226 elif t == b'\0':
2227 2227 return data
2228 2228 elif t == b'u':
2229 2229 return util.buffer(data, 1)
2230 2230
2231 2231 try:
2232 2232 compressor = self._decompressors[t]
2233 2233 except KeyError:
2234 2234 try:
2235 2235 engine = util.compengines.forrevlogheader(t)
2236 2236 compressor = engine.revlogcompressor(self._compengineopts)
2237 2237 self._decompressors[t] = compressor
2238 2238 except KeyError:
2239 2239 raise error.RevlogError(
2240 2240 _(b'unknown compression type %s') % binascii.hexlify(t)
2241 2241 )
2242 2242
2243 2243 return compressor.decompress(data)
2244 2244
2245 2245 def _addrevision(
2246 2246 self,
2247 2247 node,
2248 2248 rawtext,
2249 2249 transaction,
2250 2250 link,
2251 2251 p1,
2252 2252 p2,
2253 2253 flags,
2254 2254 cachedelta,
2255 2255 alwayscache=False,
2256 2256 deltacomputer=None,
2257 2257 sidedata=None,
2258 2258 ):
2259 2259 """internal function to add revisions to the log
2260 2260
2261 2261 see addrevision for argument descriptions.
2262 2262
2263 2263 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2264 2264
2265 2265 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2266 2266 be used.
2267 2267
2268 2268 invariants:
2269 2269 - rawtext is optional (can be None); if not set, cachedelta must be set.
2270 2270 if both are set, they must correspond to each other.
2271 2271 """
2272 2272 if node == self.nullid:
2273 2273 raise error.RevlogError(
2274 2274 _(b"%s: attempt to add null revision") % self.display_id
2275 2275 )
2276 2276 if (
2277 2277 node == self.nodeconstants.wdirid
2278 2278 or node in self.nodeconstants.wdirfilenodeids
2279 2279 ):
2280 2280 raise error.RevlogError(
2281 2281 _(b"%s: attempt to add wdir revision") % self.display_id
2282 2282 )
2283 2283 if self._writinghandles is None:
2284 2284 msg = b'adding revision outside `revlog._writing` context'
2285 2285 raise error.ProgrammingError(msg)
2286 2286
2287 2287 if self._inline:
2288 2288 fh = self._writinghandles[0]
2289 2289 else:
2290 2290 fh = self._writinghandles[1]
2291 2291
2292 2292 btext = [rawtext]
2293 2293
2294 2294 curr = len(self)
2295 2295 prev = curr - 1
2296 2296
2297 2297 offset = self._get_data_offset(prev)
2298 2298
2299 2299 if self._concurrencychecker:
2300 2300 ifh, dfh = self._writinghandles
2301 2301 if self._inline:
2302 2302 # offset is "as if" it were in the .d file, so we need to add on
2303 2303 # the size of the entry metadata.
2304 2304 self._concurrencychecker(
2305 2305 ifh, self._indexfile, offset + curr * self.index.entry_size
2306 2306 )
2307 2307 else:
2308 2308 # Entries in the .i are a consistent size.
2309 2309 self._concurrencychecker(
2310 2310 ifh, self._indexfile, curr * self.index.entry_size
2311 2311 )
2312 2312 self._concurrencychecker(dfh, self._datafile, offset)
2313 2313
2314 2314 p1r, p2r = self.rev(p1), self.rev(p2)
2315 2315
2316 2316 # full versions are inserted when the needed deltas
2317 2317 # become comparable to the uncompressed text
2318 2318 if rawtext is None:
2319 2319 # need rawtext size, before changed by flag processors, which is
2320 2320 # the non-raw size. use revlog explicitly to avoid filelog's extra
2321 2321 # logic that might remove metadata size.
2322 2322 textlen = mdiff.patchedsize(
2323 2323 revlog.size(self, cachedelta[0]), cachedelta[1]
2324 2324 )
2325 2325 else:
2326 2326 textlen = len(rawtext)
2327 2327
2328 2328 if deltacomputer is None:
2329 2329 deltacomputer = deltautil.deltacomputer(self)
2330 2330
2331 2331 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2332 2332
2333 2333 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2334 2334
2335 2335 if sidedata and self.hassidedata:
2336 2336 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2337 2337 sidedata_offset = offset + deltainfo.deltalen
2338 2338 else:
2339 2339 serialized_sidedata = b""
2340 2340 # Don't store the offset if the sidedata is empty, that way
2341 2341 # we can easily detect empty sidedata and they will be no different
2342 2342 # than ones we manually add.
2343 2343 sidedata_offset = 0
2344 2344
2345 2345 e = (
2346 2346 offset_type(offset, flags),
2347 2347 deltainfo.deltalen,
2348 2348 textlen,
2349 2349 deltainfo.base,
2350 2350 link,
2351 2351 p1r,
2352 2352 p2r,
2353 2353 node,
2354 2354 sidedata_offset,
2355 2355 len(serialized_sidedata),
2356 2356 )
2357 2357
2358 2358 self.index.append(e)
2359 2359 entry = self.index.entry_binary(curr)
2360 2360 if curr == 0:
2361 2361 header = self._format_flags | self._format_version
2362 2362 header = self.index.pack_header(header)
2363 2363 entry = header + entry
2364 2364 self._writeentry(
2365 2365 transaction,
2366 2366 entry,
2367 2367 deltainfo.data,
2368 2368 link,
2369 2369 offset,
2370 2370 serialized_sidedata,
2371 2371 )
2372 2372
2373 2373 rawtext = btext[0]
2374 2374
2375 2375 if alwayscache and rawtext is None:
2376 2376 rawtext = deltacomputer.buildtext(revinfo, fh)
2377 2377
2378 2378 if type(rawtext) == bytes: # only accept immutable objects
2379 2379 self._revisioncache = (node, curr, rawtext)
2380 2380 self._chainbasecache[curr] = deltainfo.chainbase
2381 2381 return curr
2382 2382
2383 2383 def _get_data_offset(self, prev):
2384 2384 """Returns the current offset in the (in-transaction) data file.
2385 2385 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2386 2386 file to store that information: since sidedata can be rewritten to the
2387 2387 end of the data file within a transaction, you can have cases where, for
2388 2388 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2389 2389 to `n - 1`'s sidedata being written after `n`'s data.
2390 2390
2391 2391 TODO cache this in a docket file before getting out of experimental."""
2392 2392 if self._format_version != REVLOGV2:
2393 2393 return self.end(prev)
2394 2394
2395 2395 offset = 0
2396 2396 for rev, entry in enumerate(self.index):
2397 2397 sidedata_end = entry[8] + entry[9]
2398 2398 # Sidedata for a previous rev has potentially been written after
2399 2399 # this rev's end, so take the max.
2400 2400 offset = max(self.end(rev), offset, sidedata_end)
2401 2401 return offset
2402 2402
2403 2403 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2404 2404 # Files opened in a+ mode have inconsistent behavior on various
2405 2405 # platforms. Windows requires that a file positioning call be made
2406 2406 # when the file handle transitions between reads and writes. See
2407 2407 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2408 2408 # platforms, Python or the platform itself can be buggy. Some versions
2409 2409 # of Solaris have been observed to not append at the end of the file
2410 2410 # if the file was seeked to before the end. See issue4943 for more.
2411 2411 #
2412 2412 # We work around this issue by inserting a seek() before writing.
2413 2413 # Note: This is likely not necessary on Python 3. However, because
2414 2414 # the file handle is reused for reads and may be seeked there, we need
2415 2415 # to be careful before changing this.
2416 2416 if self._writinghandles is None:
2417 2417 msg = b'adding revision outside `revlog._writing` context'
2418 2418 raise error.ProgrammingError(msg)
2419 2419 ifh, dfh = self._writinghandles
2420 2420 ifh.seek(0, os.SEEK_END)
2421 2421 if dfh:
2422 2422 dfh.seek(0, os.SEEK_END)
2423 2423
2424 2424 curr = len(self) - 1
2425 2425 if not self._inline:
2426 2426 transaction.add(self._datafile, offset)
2427 2427 transaction.add(self._indexfile, curr * len(entry))
2428 2428 if data[0]:
2429 2429 dfh.write(data[0])
2430 2430 dfh.write(data[1])
2431 2431 if sidedata:
2432 2432 dfh.write(sidedata)
2433 2433 ifh.write(entry)
2434 2434 else:
2435 2435 offset += curr * self.index.entry_size
2436 2436 transaction.add(self._indexfile, offset)
2437 2437 ifh.write(entry)
2438 2438 ifh.write(data[0])
2439 2439 ifh.write(data[1])
2440 2440 if sidedata:
2441 2441 ifh.write(sidedata)
2442 2442 self._enforceinlinesize(transaction)
2443 2443 nodemaputil.setup_persistent_nodemap(transaction, self)
2444 2444
2445 2445 def addgroup(
2446 2446 self,
2447 2447 deltas,
2448 2448 linkmapper,
2449 2449 transaction,
2450 2450 alwayscache=False,
2451 2451 addrevisioncb=None,
2452 2452 duplicaterevisioncb=None,
2453 2453 ):
2454 2454 """
2455 2455 add a delta group
2456 2456
2457 2457 given a set of deltas, add them to the revision log. the
2458 2458 first delta is against its parent, which should be in our
2459 2459 log, the rest are against the previous delta.
2460 2460
2461 2461 If ``addrevisioncb`` is defined, it will be called with arguments of
2462 2462 this revlog and the node that was added.
2463 2463 """
2464 2464
2465 2465 if self._adding_group:
2466 2466 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2467 2467
2468 2468 self._adding_group = True
2469 2469 empty = True
2470 2470 try:
2471 2471 with self._writing(transaction):
2472 2472 deltacomputer = deltautil.deltacomputer(self)
2473 2473 # loop through our set of deltas
2474 2474 for data in deltas:
2475 2475 (
2476 2476 node,
2477 2477 p1,
2478 2478 p2,
2479 2479 linknode,
2480 2480 deltabase,
2481 2481 delta,
2482 2482 flags,
2483 2483 sidedata,
2484 2484 ) = data
2485 2485 link = linkmapper(linknode)
2486 2486 flags = flags or REVIDX_DEFAULT_FLAGS
2487 2487
2488 2488 rev = self.index.get_rev(node)
2489 2489 if rev is not None:
2490 2490 # this can happen if two branches make the same change
2491 2491 self._nodeduplicatecallback(transaction, rev)
2492 2492 if duplicaterevisioncb:
2493 2493 duplicaterevisioncb(self, rev)
2494 2494 empty = False
2495 2495 continue
2496 2496
2497 2497 for p in (p1, p2):
2498 2498 if not self.index.has_node(p):
2499 2499 raise error.LookupError(
2500 2500 p, self.radix, _(b'unknown parent')
2501 2501 )
2502 2502
2503 2503 if not self.index.has_node(deltabase):
2504 2504 raise error.LookupError(
2505 2505 deltabase, self.display_id, _(b'unknown delta base')
2506 2506 )
2507 2507
2508 2508 baserev = self.rev(deltabase)
2509 2509
2510 2510 if baserev != nullrev and self.iscensored(baserev):
2511 2511 # if base is censored, delta must be full replacement in a
2512 2512 # single patch operation
2513 2513 hlen = struct.calcsize(b">lll")
2514 2514 oldlen = self.rawsize(baserev)
2515 2515 newlen = len(delta) - hlen
2516 2516 if delta[:hlen] != mdiff.replacediffheader(
2517 2517 oldlen, newlen
2518 2518 ):
2519 2519 raise error.CensoredBaseError(
2520 2520 self.display_id, self.node(baserev)
2521 2521 )
2522 2522
2523 2523 if not flags and self._peek_iscensored(baserev, delta):
2524 2524 flags |= REVIDX_ISCENSORED
2525 2525
2526 2526 # We assume consumers of addrevisioncb will want to retrieve
2527 2527 # the added revision, which will require a call to
2528 2528 # revision(). revision() will fast path if there is a cache
2529 2529 # hit. So, we tell _addrevision() to always cache in this case.
2530 2530 # We're only using addgroup() in the context of changegroup
2531 2531 # generation so the revision data can always be handled as raw
2532 2532 # by the flagprocessor.
2533 2533 rev = self._addrevision(
2534 2534 node,
2535 2535 None,
2536 2536 transaction,
2537 2537 link,
2538 2538 p1,
2539 2539 p2,
2540 2540 flags,
2541 2541 (baserev, delta),
2542 2542 alwayscache=alwayscache,
2543 2543 deltacomputer=deltacomputer,
2544 2544 sidedata=sidedata,
2545 2545 )
2546 2546
2547 2547 if addrevisioncb:
2548 2548 addrevisioncb(self, rev)
2549 2549 empty = False
2550 2550 finally:
2551 2551 self._adding_group = False
2552 2552 return not empty
2553 2553
2554 2554 def iscensored(self, rev):
2555 2555 """Check if a file revision is censored."""
2556 2556 if not self._censorable:
2557 2557 return False
2558 2558
2559 2559 return self.flags(rev) & REVIDX_ISCENSORED
2560 2560
2561 2561 def _peek_iscensored(self, baserev, delta):
2562 2562 """Quickly check if a delta produces a censored revision."""
2563 2563 if not self._censorable:
2564 2564 return False
2565 2565
2566 2566 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2567 2567
2568 2568 def getstrippoint(self, minlink):
2569 2569 """find the minimum rev that must be stripped to strip the linkrev
2570 2570
2571 2571 Returns a tuple containing the minimum rev and a set of all revs that
2572 2572 have linkrevs that will be broken by this strip.
2573 2573 """
2574 2574 return storageutil.resolvestripinfo(
2575 2575 minlink,
2576 2576 len(self) - 1,
2577 2577 self.headrevs(),
2578 2578 self.linkrev,
2579 2579 self.parentrevs,
2580 2580 )
2581 2581
2582 2582 def strip(self, minlink, transaction):
2583 2583 """truncate the revlog on the first revision with a linkrev >= minlink
2584 2584
2585 2585 This function is called when we're stripping revision minlink and
2586 2586 its descendants from the repository.
2587 2587
2588 2588 We have to remove all revisions with linkrev >= minlink, because
2589 2589 the equivalent changelog revisions will be renumbered after the
2590 2590 strip.
2591 2591
2592 2592 So we truncate the revlog on the first of these revisions, and
2593 2593 trust that the caller has saved the revisions that shouldn't be
2594 2594 removed and that it'll re-add them after this truncation.
2595 2595 """
2596 2596 if len(self) == 0:
2597 2597 return
2598 2598
2599 2599 rev, _ = self.getstrippoint(minlink)
2600 2600 if rev == len(self):
2601 2601 return
2602 2602
2603 2603 # first truncate the files on disk
2604 2604 end = self.start(rev)
2605 2605 if not self._inline:
2606 2606 transaction.add(self._datafile, end)
2607 2607 end = rev * self.index.entry_size
2608 2608 else:
2609 2609 end += rev * self.index.entry_size
2610 2610
2611 2611 transaction.add(self._indexfile, end)
2612 2612
2613 2613 # then reset internal state in memory to forget those revisions
2614 2614 self._revisioncache = None
2615 2615 self._chaininfocache = util.lrucachedict(500)
2616 2616 self._chunkclear()
2617 2617
2618 2618 del self.index[rev:-1]
2619 2619
2620 2620 def checksize(self):
2621 2621 """Check size of index and data files
2622 2622
2623 2623 return a (dd, di) tuple.
2624 2624 - dd: extra bytes for the "data" file
2625 2625 - di: extra bytes for the "index" file
2626 2626
2627 2627 A healthy revlog will return (0, 0).
2628 2628 """
2629 2629 expected = 0
2630 2630 if len(self):
2631 2631 expected = max(0, self.end(len(self) - 1))
2632 2632
2633 2633 try:
2634 2634 with self._datafp() as f:
2635 2635 f.seek(0, io.SEEK_END)
2636 2636 actual = f.tell()
2637 2637 dd = actual - expected
2638 2638 except IOError as inst:
2639 2639 if inst.errno != errno.ENOENT:
2640 2640 raise
2641 2641 dd = 0
2642 2642
2643 2643 try:
2644 2644 f = self.opener(self._indexfile)
2645 2645 f.seek(0, io.SEEK_END)
2646 2646 actual = f.tell()
2647 2647 f.close()
2648 2648 s = self.index.entry_size
2649 2649 i = max(0, actual // s)
2650 2650 di = actual - (i * s)
2651 2651 if self._inline:
2652 2652 databytes = 0
2653 2653 for r in self:
2654 2654 databytes += max(0, self.length(r))
2655 2655 dd = 0
2656 2656 di = actual - len(self) * s - databytes
2657 2657 except IOError as inst:
2658 2658 if inst.errno != errno.ENOENT:
2659 2659 raise
2660 2660 di = 0
2661 2661
2662 2662 return (dd, di)
2663 2663
2664 2664 def files(self):
2665 2665 res = [self._indexfile]
2666 2666 if not self._inline:
2667 2667 res.append(self._datafile)
2668 2668 return res
2669 2669
2670 2670 def emitrevisions(
2671 2671 self,
2672 2672 nodes,
2673 2673 nodesorder=None,
2674 2674 revisiondata=False,
2675 2675 assumehaveparentrevisions=False,
2676 2676 deltamode=repository.CG_DELTAMODE_STD,
2677 2677 sidedata_helpers=None,
2678 2678 ):
2679 2679 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2680 2680 raise error.ProgrammingError(
2681 2681 b'unhandled value for nodesorder: %s' % nodesorder
2682 2682 )
2683 2683
2684 2684 if nodesorder is None and not self._generaldelta:
2685 2685 nodesorder = b'storage'
2686 2686
2687 2687 if (
2688 2688 not self._storedeltachains
2689 2689 and deltamode != repository.CG_DELTAMODE_PREV
2690 2690 ):
2691 2691 deltamode = repository.CG_DELTAMODE_FULL
2692 2692
2693 2693 return storageutil.emitrevisions(
2694 2694 self,
2695 2695 nodes,
2696 2696 nodesorder,
2697 2697 revlogrevisiondelta,
2698 2698 deltaparentfn=self.deltaparent,
2699 2699 candeltafn=self.candelta,
2700 2700 rawsizefn=self.rawsize,
2701 2701 revdifffn=self.revdiff,
2702 2702 flagsfn=self.flags,
2703 2703 deltamode=deltamode,
2704 2704 revisiondata=revisiondata,
2705 2705 assumehaveparentrevisions=assumehaveparentrevisions,
2706 2706 sidedata_helpers=sidedata_helpers,
2707 2707 )
2708 2708
2709 2709 DELTAREUSEALWAYS = b'always'
2710 2710 DELTAREUSESAMEREVS = b'samerevs'
2711 2711 DELTAREUSENEVER = b'never'
2712 2712
2713 2713 DELTAREUSEFULLADD = b'fulladd'
2714 2714
2715 2715 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2716 2716
2717 2717 def clone(
2718 2718 self,
2719 2719 tr,
2720 2720 destrevlog,
2721 2721 addrevisioncb=None,
2722 2722 deltareuse=DELTAREUSESAMEREVS,
2723 2723 forcedeltabothparents=None,
2724 2724 sidedata_helpers=None,
2725 2725 ):
2726 2726 """Copy this revlog to another, possibly with format changes.
2727 2727
2728 2728 The destination revlog will contain the same revisions and nodes.
2729 2729 However, it may not be bit-for-bit identical due to e.g. delta encoding
2730 2730 differences.
2731 2731
2732 2732 The ``deltareuse`` argument control how deltas from the existing revlog
2733 2733 are preserved in the destination revlog. The argument can have the
2734 2734 following values:
2735 2735
2736 2736 DELTAREUSEALWAYS
2737 2737 Deltas will always be reused (if possible), even if the destination
2738 2738 revlog would not select the same revisions for the delta. This is the
2739 2739 fastest mode of operation.
2740 2740 DELTAREUSESAMEREVS
2741 2741 Deltas will be reused if the destination revlog would pick the same
2742 2742 revisions for the delta. This mode strikes a balance between speed
2743 2743 and optimization.
2744 2744 DELTAREUSENEVER
2745 2745 Deltas will never be reused. This is the slowest mode of execution.
2746 2746 This mode can be used to recompute deltas (e.g. if the diff/delta
2747 2747 algorithm changes).
2748 2748 DELTAREUSEFULLADD
2749 2749 Revision will be re-added as if their were new content. This is
2750 2750 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2751 2751 eg: large file detection and handling.
2752 2752
2753 2753 Delta computation can be slow, so the choice of delta reuse policy can
2754 2754 significantly affect run time.
2755 2755
2756 2756 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2757 2757 two extremes. Deltas will be reused if they are appropriate. But if the
2758 2758 delta could choose a better revision, it will do so. This means if you
2759 2759 are converting a non-generaldelta revlog to a generaldelta revlog,
2760 2760 deltas will be recomputed if the delta's parent isn't a parent of the
2761 2761 revision.
2762 2762
2763 2763 In addition to the delta policy, the ``forcedeltabothparents``
2764 2764 argument controls whether to force compute deltas against both parents
2765 2765 for merges. By default, the current default is used.
2766 2766
2767 2767 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2768 2768 `sidedata_helpers`.
2769 2769 """
2770 2770 if deltareuse not in self.DELTAREUSEALL:
2771 2771 raise ValueError(
2772 2772 _(b'value for deltareuse invalid: %s') % deltareuse
2773 2773 )
2774 2774
2775 2775 if len(destrevlog):
2776 2776 raise ValueError(_(b'destination revlog is not empty'))
2777 2777
2778 2778 if getattr(self, 'filteredrevs', None):
2779 2779 raise ValueError(_(b'source revlog has filtered revisions'))
2780 2780 if getattr(destrevlog, 'filteredrevs', None):
2781 2781 raise ValueError(_(b'destination revlog has filtered revisions'))
2782 2782
2783 2783 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2784 2784 # if possible.
2785 2785 oldlazydelta = destrevlog._lazydelta
2786 2786 oldlazydeltabase = destrevlog._lazydeltabase
2787 2787 oldamd = destrevlog._deltabothparents
2788 2788
2789 2789 try:
2790 2790 if deltareuse == self.DELTAREUSEALWAYS:
2791 2791 destrevlog._lazydeltabase = True
2792 2792 destrevlog._lazydelta = True
2793 2793 elif deltareuse == self.DELTAREUSESAMEREVS:
2794 2794 destrevlog._lazydeltabase = False
2795 2795 destrevlog._lazydelta = True
2796 2796 elif deltareuse == self.DELTAREUSENEVER:
2797 2797 destrevlog._lazydeltabase = False
2798 2798 destrevlog._lazydelta = False
2799 2799
2800 2800 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2801 2801
2802 2802 self._clone(
2803 2803 tr,
2804 2804 destrevlog,
2805 2805 addrevisioncb,
2806 2806 deltareuse,
2807 2807 forcedeltabothparents,
2808 2808 sidedata_helpers,
2809 2809 )
2810 2810
2811 2811 finally:
2812 2812 destrevlog._lazydelta = oldlazydelta
2813 2813 destrevlog._lazydeltabase = oldlazydeltabase
2814 2814 destrevlog._deltabothparents = oldamd
2815 2815
2816 2816 def _clone(
2817 2817 self,
2818 2818 tr,
2819 2819 destrevlog,
2820 2820 addrevisioncb,
2821 2821 deltareuse,
2822 2822 forcedeltabothparents,
2823 2823 sidedata_helpers,
2824 2824 ):
2825 2825 """perform the core duty of `revlog.clone` after parameter processing"""
2826 2826 deltacomputer = deltautil.deltacomputer(destrevlog)
2827 2827 index = self.index
2828 2828 for rev in self:
2829 2829 entry = index[rev]
2830 2830
2831 2831 # Some classes override linkrev to take filtered revs into
2832 2832 # account. Use raw entry from index.
2833 2833 flags = entry[0] & 0xFFFF
2834 2834 linkrev = entry[4]
2835 2835 p1 = index[entry[5]][7]
2836 2836 p2 = index[entry[6]][7]
2837 2837 node = entry[7]
2838 2838
2839 2839 # (Possibly) reuse the delta from the revlog if allowed and
2840 2840 # the revlog chunk is a delta.
2841 2841 cachedelta = None
2842 2842 rawtext = None
2843 2843 if deltareuse == self.DELTAREUSEFULLADD:
2844 2844 text, sidedata = self._revisiondata(rev)
2845 2845
2846 2846 if sidedata_helpers is not None:
2847 2847 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2848 2848 self, sidedata_helpers, sidedata, rev
2849 2849 )
2850 2850 flags = flags | new_flags[0] & ~new_flags[1]
2851 2851
2852 2852 destrevlog.addrevision(
2853 2853 text,
2854 2854 tr,
2855 2855 linkrev,
2856 2856 p1,
2857 2857 p2,
2858 2858 cachedelta=cachedelta,
2859 2859 node=node,
2860 2860 flags=flags,
2861 2861 deltacomputer=deltacomputer,
2862 2862 sidedata=sidedata,
2863 2863 )
2864 2864 else:
2865 2865 if destrevlog._lazydelta:
2866 2866 dp = self.deltaparent(rev)
2867 2867 if dp != nullrev:
2868 2868 cachedelta = (dp, bytes(self._chunk(rev)))
2869 2869
2870 2870 sidedata = None
2871 2871 if not cachedelta:
2872 2872 rawtext, sidedata = self._revisiondata(rev)
2873 2873 if sidedata is None:
2874 2874 sidedata = self.sidedata(rev)
2875 2875
2876 2876 if sidedata_helpers is not None:
2877 2877 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2878 2878 self, sidedata_helpers, sidedata, rev
2879 2879 )
2880 2880 flags = flags | new_flags[0] & ~new_flags[1]
2881 2881
2882 2882 with destrevlog._writing(tr):
2883 2883 destrevlog._addrevision(
2884 2884 node,
2885 2885 rawtext,
2886 2886 tr,
2887 2887 linkrev,
2888 2888 p1,
2889 2889 p2,
2890 2890 flags,
2891 2891 cachedelta,
2892 2892 deltacomputer=deltacomputer,
2893 2893 sidedata=sidedata,
2894 2894 )
2895 2895
2896 2896 if addrevisioncb:
2897 2897 addrevisioncb(self, rev, node)
2898 2898
2899 2899 def censorrevision(self, tr, censornode, tombstone=b''):
2900 2900 if self._format_version == REVLOGV0:
2901 2901 raise error.RevlogError(
2902 2902 _(b'cannot censor with version %d revlogs')
2903 2903 % self._format_version
2904 2904 )
2905 2905
2906 2906 censorrev = self.rev(censornode)
2907 2907 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2908 2908
2909 2909 if len(tombstone) > self.rawsize(censorrev):
2910 2910 raise error.Abort(
2911 2911 _(b'censor tombstone must be no longer than censored data')
2912 2912 )
2913 2913
2914 2914 # Rewriting the revlog in place is hard. Our strategy for censoring is
2915 2915 # to create a new revlog, copy all revisions to it, then replace the
2916 2916 # revlogs on transaction close.
2917 2917 #
2918 2918 # This is a bit dangerous. We could easily have a mismatch of state.
2919 2919 newrl = revlog(
2920 2920 self.opener,
2921 2921 target=self.target,
2922 2922 radix=self.radix,
2923 2923 postfix=b'tmpcensored',
2924 2924 censorable=True,
2925 2925 )
2926 2926 newrl._format_version = self._format_version
2927 2927 newrl._format_flags = self._format_flags
2928 2928 newrl._generaldelta = self._generaldelta
2929 2929 newrl._parse_index = self._parse_index
2930 2930
2931 2931 for rev in self.revs():
2932 2932 node = self.node(rev)
2933 2933 p1, p2 = self.parents(node)
2934 2934
2935 2935 if rev == censorrev:
2936 2936 newrl.addrawrevision(
2937 2937 tombstone,
2938 2938 tr,
2939 2939 self.linkrev(censorrev),
2940 2940 p1,
2941 2941 p2,
2942 2942 censornode,
2943 2943 REVIDX_ISCENSORED,
2944 2944 )
2945 2945
2946 2946 if newrl.deltaparent(rev) != nullrev:
2947 2947 raise error.Abort(
2948 2948 _(
2949 2949 b'censored revision stored as delta; '
2950 2950 b'cannot censor'
2951 2951 ),
2952 2952 hint=_(
2953 2953 b'censoring of revlogs is not '
2954 2954 b'fully implemented; please report '
2955 2955 b'this bug'
2956 2956 ),
2957 2957 )
2958 2958 continue
2959 2959
2960 2960 if self.iscensored(rev):
2961 2961 if self.deltaparent(rev) != nullrev:
2962 2962 raise error.Abort(
2963 2963 _(
2964 2964 b'cannot censor due to censored '
2965 2965 b'revision having delta stored'
2966 2966 )
2967 2967 )
2968 2968 rawtext = self._chunk(rev)
2969 2969 else:
2970 2970 rawtext = self.rawdata(rev)
2971 2971
2972 2972 newrl.addrawrevision(
2973 2973 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2974 2974 )
2975 2975
2976 2976 tr.addbackup(self._indexfile, location=b'store')
2977 2977 if not self._inline:
2978 2978 tr.addbackup(self._datafile, location=b'store')
2979 2979
2980 2980 self.opener.rename(newrl._indexfile, self._indexfile)
2981 2981 if not self._inline:
2982 2982 self.opener.rename(newrl._datafile, self._datafile)
2983 2983
2984 2984 self.clearcaches()
2985 2985 self._loadindex()
2986 2986
2987 2987 def verifyintegrity(self, state):
2988 2988 """Verifies the integrity of the revlog.
2989 2989
2990 2990 Yields ``revlogproblem`` instances describing problems that are
2991 2991 found.
2992 2992 """
2993 2993 dd, di = self.checksize()
2994 2994 if dd:
2995 2995 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2996 2996 if di:
2997 2997 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2998 2998
2999 2999 version = self._format_version
3000 3000
3001 3001 # The verifier tells us what version revlog we should be.
3002 3002 if version != state[b'expectedversion']:
3003 3003 yield revlogproblem(
3004 3004 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3005 3005 % (self.display_id, version, state[b'expectedversion'])
3006 3006 )
3007 3007
3008 3008 state[b'skipread'] = set()
3009 3009 state[b'safe_renamed'] = set()
3010 3010
3011 3011 for rev in self:
3012 3012 node = self.node(rev)
3013 3013
3014 3014 # Verify contents. 4 cases to care about:
3015 3015 #
3016 3016 # common: the most common case
3017 3017 # rename: with a rename
3018 3018 # meta: file content starts with b'\1\n', the metadata
3019 3019 # header defined in filelog.py, but without a rename
3020 3020 # ext: content stored externally
3021 3021 #
3022 3022 # More formally, their differences are shown below:
3023 3023 #
3024 3024 # | common | rename | meta | ext
3025 3025 # -------------------------------------------------------
3026 3026 # flags() | 0 | 0 | 0 | not 0
3027 3027 # renamed() | False | True | False | ?
3028 3028 # rawtext[0:2]=='\1\n'| False | True | True | ?
3029 3029 #
3030 3030 # "rawtext" means the raw text stored in revlog data, which
3031 3031 # could be retrieved by "rawdata(rev)". "text"
3032 3032 # mentioned below is "revision(rev)".
3033 3033 #
3034 3034 # There are 3 different lengths stored physically:
3035 3035 # 1. L1: rawsize, stored in revlog index
3036 3036 # 2. L2: len(rawtext), stored in revlog data
3037 3037 # 3. L3: len(text), stored in revlog data if flags==0, or
3038 3038 # possibly somewhere else if flags!=0
3039 3039 #
3040 3040 # L1 should be equal to L2. L3 could be different from them.
3041 3041 # "text" may or may not affect commit hash depending on flag
3042 3042 # processors (see flagutil.addflagprocessor).
3043 3043 #
3044 3044 # | common | rename | meta | ext
3045 3045 # -------------------------------------------------
3046 3046 # rawsize() | L1 | L1 | L1 | L1
3047 3047 # size() | L1 | L2-LM | L1(*) | L1 (?)
3048 3048 # len(rawtext) | L2 | L2 | L2 | L2
3049 3049 # len(text) | L2 | L2 | L2 | L3
3050 3050 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3051 3051 #
3052 3052 # LM: length of metadata, depending on rawtext
3053 3053 # (*): not ideal, see comment in filelog.size
3054 3054 # (?): could be "- len(meta)" if the resolved content has
3055 3055 # rename metadata
3056 3056 #
3057 3057 # Checks needed to be done:
3058 3058 # 1. length check: L1 == L2, in all cases.
3059 3059 # 2. hash check: depending on flag processor, we may need to
3060 3060 # use either "text" (external), or "rawtext" (in revlog).
3061 3061
3062 3062 try:
3063 3063 skipflags = state.get(b'skipflags', 0)
3064 3064 if skipflags:
3065 3065 skipflags &= self.flags(rev)
3066 3066
3067 3067 _verify_revision(self, skipflags, state, node)
3068 3068
3069 3069 l1 = self.rawsize(rev)
3070 3070 l2 = len(self.rawdata(node))
3071 3071
3072 3072 if l1 != l2:
3073 3073 yield revlogproblem(
3074 3074 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3075 3075 node=node,
3076 3076 )
3077 3077
3078 3078 except error.CensoredNodeError:
3079 3079 if state[b'erroroncensored']:
3080 3080 yield revlogproblem(
3081 3081 error=_(b'censored file data'), node=node
3082 3082 )
3083 3083 state[b'skipread'].add(node)
3084 3084 except Exception as e:
3085 3085 yield revlogproblem(
3086 3086 error=_(b'unpacking %s: %s')
3087 3087 % (short(node), stringutil.forcebytestr(e)),
3088 3088 node=node,
3089 3089 )
3090 3090 state[b'skipread'].add(node)
3091 3091
3092 3092 def storageinfo(
3093 3093 self,
3094 3094 exclusivefiles=False,
3095 3095 sharedfiles=False,
3096 3096 revisionscount=False,
3097 3097 trackedsize=False,
3098 3098 storedsize=False,
3099 3099 ):
3100 3100 d = {}
3101 3101
3102 3102 if exclusivefiles:
3103 3103 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3104 3104 if not self._inline:
3105 3105 d[b'exclusivefiles'].append((self.opener, self._datafile))
3106 3106
3107 3107 if sharedfiles:
3108 3108 d[b'sharedfiles'] = []
3109 3109
3110 3110 if revisionscount:
3111 3111 d[b'revisionscount'] = len(self)
3112 3112
3113 3113 if trackedsize:
3114 3114 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3115 3115
3116 3116 if storedsize:
3117 3117 d[b'storedsize'] = sum(
3118 3118 self.opener.stat(path).st_size for path in self.files()
3119 3119 )
3120 3120
3121 3121 return d
3122 3122
3123 def rewrite_sidedata(self, helpers, startrev, endrev):
3123 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3124 3124 if not self.hassidedata:
3125 3125 return
3126 3126 # inline are not yet supported because they suffer from an issue when
3127 3127 # rewriting them (since it's not an append-only operation).
3128 3128 # See issue6485.
3129 3129 assert not self._inline
3130 3130 if not helpers[1] and not helpers[2]:
3131 3131 # Nothing to generate or remove
3132 3132 return
3133 3133
3134 3134 # changelog implement some "delayed" writing mechanism that assume that
3135 3135 # all index data is writen in append mode and is therefor incompatible
3136 3136 # with the seeked write done in this method. The use of such "delayed"
3137 3137 # writing will soon be removed for revlog version that support side
3138 3138 # data, so for now, we only keep this simple assert to highlight the
3139 3139 # situation.
3140 3140 delayed = getattr(self, '_delayed', False)
3141 3141 diverted = getattr(self, '_divert', False)
3142 3142 if delayed and not diverted:
3143 3143 msg = "cannot rewrite_sidedata of a delayed revlog"
3144 3144 raise error.ProgrammingError(msg)
3145 3145
3146 3146 new_entries = []
3147 3147 # append the new sidedata
3148 3148 with self._datafp(b'a+') as dfh:
3149 3149 # Maybe this bug still exists, see revlog._writeentry
3150 3150 dfh.seek(0, os.SEEK_END)
3151 3151 current_offset = dfh.tell()
3152 3152 for rev in range(startrev, endrev + 1):
3153 3153 entry = self.index[rev]
3154 3154 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3155 3155 store=self,
3156 3156 sidedata_helpers=helpers,
3157 3157 sidedata={},
3158 3158 rev=rev,
3159 3159 )
3160 3160
3161 3161 serialized_sidedata = sidedatautil.serialize_sidedata(
3162 3162 new_sidedata
3163 3163 )
3164 3164 if entry[8] != 0 or entry[9] != 0:
3165 3165 # rewriting entries that already have sidedata is not
3166 3166 # supported yet, because it introduces garbage data in the
3167 3167 # revlog.
3168 3168 msg = b"Rewriting existing sidedata is not supported yet"
3169 3169 raise error.Abort(msg)
3170 3170
3171 3171 # Apply (potential) flags to add and to remove after running
3172 3172 # the sidedata helpers
3173 3173 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3174 3174 entry = (new_offset_flags,) + entry[1:8]
3175 3175 entry += (current_offset, len(serialized_sidedata))
3176 3176
3177 3177 dfh.write(serialized_sidedata)
3178 3178 new_entries.append(entry)
3179 3179 current_offset += len(serialized_sidedata)
3180 3180
3181 3181 # rewrite the new index entries
3182 3182 with self._indexfp(b'r+') as ifh:
3183 3183 fp.seek(startrev * self.index.entry_size)
3184 3184 for i, e in enumerate(new_entries):
3185 3185 rev = startrev + i
3186 3186 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3187 3187 packed = self.index.entry_binary(rev)
3188 3188 if rev == 0:
3189 3189 header = self._format_flags | self._format_version
3190 3190 header = self.index.pack_header(header)
3191 3191 packed = header + packed
3192 3192 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now