##// END OF EJS Templates
sidedata: move documentation about sidedata helpers to sidedata module...
Raphaël Gomès -
r47849:8bd769b5 default
parent child Browse files
Show More
@@ -1,1943 +1,1946 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullrev,
18 18 short,
19 19 )
20 20 from .pycompat import open
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 requirements,
29 29 scmutil,
30 30 util,
31 31 )
32 32
33 33 from .interfaces import repository
34 34 from .revlogutils import sidedata as sidedatamod
35 35 from .revlogutils import constants as revlog_constants
36 36 from .utils import storageutil
37 37
38 38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
39 39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
40 40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
41 41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
42 42
43 43 LFS_REQUIREMENT = b'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47
48 48 def getchunk(stream):
49 49 """return the next chunk from stream as a string"""
50 50 d = readexactly(stream, 4)
51 51 l = struct.unpack(b">l", d)[0]
52 52 if l <= 4:
53 53 if l:
54 54 raise error.Abort(_(b"invalid chunk length %d") % l)
55 55 return b""
56 56 return readexactly(stream, l - 4)
57 57
58 58
59 59 def chunkheader(length):
60 60 """return a changegroup chunk header (string)"""
61 61 return struct.pack(b">l", length + 4)
62 62
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(b">l", 0)
67 67
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73
74 74 def writechunks(ui, chunks, filename, vfs=None):
75 75 """Write chunks to a file and return its filename.
76 76
77 77 The stream is assumed to be a bundle file.
78 78 Existing files will not be overwritten.
79 79 If no filename is specified, a temporary file is created.
80 80 """
81 81 fh = None
82 82 cleanup = None
83 83 try:
84 84 if filename:
85 85 if vfs:
86 86 fh = vfs.open(filename, b"wb")
87 87 else:
88 88 # Increase default buffer size because default is usually
89 89 # small (4k is common on Linux).
90 90 fh = open(filename, b"wb", 131072)
91 91 else:
92 92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
93 93 fh = os.fdopen(fd, "wb")
94 94 cleanup = filename
95 95 for c in chunks:
96 96 fh.write(c)
97 97 cleanup = None
98 98 return filename
99 99 finally:
100 100 if fh is not None:
101 101 fh.close()
102 102 if cleanup is not None:
103 103 if filename and vfs:
104 104 vfs.unlink(cleanup)
105 105 else:
106 106 os.unlink(cleanup)
107 107
108 108
109 109 class cg1unpacker(object):
110 110 """Unpacker for cg1 changegroup streams.
111 111
112 112 A changegroup unpacker handles the framing of the revision data in
113 113 the wire format. Most consumers will want to use the apply()
114 114 method to add the changes from the changegroup to a repository.
115 115
116 116 If you're forwarding a changegroup unmodified to another consumer,
117 117 use getchunks(), which returns an iterator of changegroup
118 118 chunks. This is mostly useful for cases where you need to know the
119 119 data stream has ended by observing the end of the changegroup.
120 120
121 121 deltachunk() is useful only if you're applying delta data. Most
122 122 consumers should prefer apply() instead.
123 123
124 124 A few other public methods exist. Those are used only for
125 125 bundlerepo and some debug commands - their use is discouraged.
126 126 """
127 127
128 128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
129 129 deltaheadersize = deltaheader.size
130 130 version = b'01'
131 131 _grouplistcount = 1 # One list of files after the manifests
132 132
133 133 def __init__(self, fh, alg, extras=None):
134 134 if alg is None:
135 135 alg = b'UN'
136 136 if alg not in util.compengines.supportedbundletypes:
137 137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
138 138 if alg == b'BZ':
139 139 alg = b'_truncatedBZ'
140 140
141 141 compengine = util.compengines.forbundletype(alg)
142 142 self._stream = compengine.decompressorreader(fh)
143 143 self._type = alg
144 144 self.extras = extras or {}
145 145 self.callback = None
146 146
147 147 # These methods (compressed, read, seek, tell) all appear to only
148 148 # be used by bundlerepo, but it's a little hard to tell.
149 149 def compressed(self):
150 150 return self._type is not None and self._type != b'UN'
151 151
152 152 def read(self, l):
153 153 return self._stream.read(l)
154 154
155 155 def seek(self, pos):
156 156 return self._stream.seek(pos)
157 157
158 158 def tell(self):
159 159 return self._stream.tell()
160 160
161 161 def close(self):
162 162 return self._stream.close()
163 163
164 164 def _chunklength(self):
165 165 d = readexactly(self._stream, 4)
166 166 l = struct.unpack(b">l", d)[0]
167 167 if l <= 4:
168 168 if l:
169 169 raise error.Abort(_(b"invalid chunk length %d") % l)
170 170 return 0
171 171 if self.callback:
172 172 self.callback()
173 173 return l - 4
174 174
175 175 def changelogheader(self):
176 176 """v10 does not have a changelog header chunk"""
177 177 return {}
178 178
179 179 def manifestheader(self):
180 180 """v10 does not have a manifest header chunk"""
181 181 return {}
182 182
183 183 def filelogheader(self):
184 184 """return the header of the filelogs chunk, v10 only has the filename"""
185 185 l = self._chunklength()
186 186 if not l:
187 187 return {}
188 188 fname = readexactly(self._stream, l)
189 189 return {b'filename': fname}
190 190
191 191 def _deltaheader(self, headertuple, prevnode):
192 192 node, p1, p2, cs = headertuple
193 193 if prevnode is None:
194 194 deltabase = p1
195 195 else:
196 196 deltabase = prevnode
197 197 flags = 0
198 198 protocol_flags = 0
199 199 return node, p1, p2, deltabase, cs, flags, protocol_flags
200 200
201 201 def deltachunk(self, prevnode):
202 202 l = self._chunklength()
203 203 if not l:
204 204 return {}
205 205 headerdata = readexactly(self._stream, self.deltaheadersize)
206 206 header = self.deltaheader.unpack(headerdata)
207 207 delta = readexactly(self._stream, l - self.deltaheadersize)
208 208 header = self._deltaheader(header, prevnode)
209 209 node, p1, p2, deltabase, cs, flags, protocol_flags = header
210 210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
211 211
212 212 def getchunks(self):
213 213 """returns all the chunks contains in the bundle
214 214
215 215 Used when you need to forward the binary stream to a file or another
216 216 network API. To do so, it parse the changegroup data, otherwise it will
217 217 block in case of sshrepo because it don't know the end of the stream.
218 218 """
219 219 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
220 220 # and a list of filelogs. For changegroup 3, we expect 4 parts:
221 221 # changelog, manifestlog, a list of tree manifestlogs, and a list of
222 222 # filelogs.
223 223 #
224 224 # Changelog and manifestlog parts are terminated with empty chunks. The
225 225 # tree and file parts are a list of entry sections. Each entry section
226 226 # is a series of chunks terminating in an empty chunk. The list of these
227 227 # entry sections is terminated in yet another empty chunk, so we know
228 228 # we've reached the end of the tree/file list when we reach an empty
229 229 # chunk that was proceeded by no non-empty chunks.
230 230
231 231 parts = 0
232 232 while parts < 2 + self._grouplistcount:
233 233 noentries = True
234 234 while True:
235 235 chunk = getchunk(self)
236 236 if not chunk:
237 237 # The first two empty chunks represent the end of the
238 238 # changelog and the manifestlog portions. The remaining
239 239 # empty chunks represent either A) the end of individual
240 240 # tree or file entries in the file list, or B) the end of
241 241 # the entire list. It's the end of the entire list if there
242 242 # were no entries (i.e. noentries is True).
243 243 if parts < 2:
244 244 parts += 1
245 245 elif noentries:
246 246 parts += 1
247 247 break
248 248 noentries = False
249 249 yield chunkheader(len(chunk))
250 250 pos = 0
251 251 while pos < len(chunk):
252 252 next = pos + 2 ** 20
253 253 yield chunk[pos:next]
254 254 pos = next
255 255 yield closechunk()
256 256
257 257 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
258 258 self.callback = prog.increment
259 259 # no need to check for empty manifest group here:
260 260 # if the result of the merge of 1 and 2 is the same in 3 and 4,
261 261 # no new manifest will be created and the manifest group will
262 262 # be empty during the pull
263 263 self.manifestheader()
264 264 deltas = self.deltaiter()
265 265 storage = repo.manifestlog.getstorage(b'')
266 266 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
267 267 prog.complete()
268 268 self.callback = None
269 269
270 270 def apply(
271 271 self,
272 272 repo,
273 273 tr,
274 274 srctype,
275 275 url,
276 276 targetphase=phases.draft,
277 277 expectedtotal=None,
278 278 sidedata_categories=None,
279 279 ):
280 280 """Add the changegroup returned by source.read() to this repo.
281 281 srctype is a string like 'push', 'pull', or 'unbundle'. url is
282 282 the URL of the repo where this changegroup is coming from.
283 283
284 284 Return an integer summarizing the change to this repo:
285 285 - nothing changed or no source: 0
286 286 - more heads than before: 1+added heads (2..n)
287 287 - fewer heads than before: -1-removed heads (-2..-n)
288 288 - number of heads stays the same: 1
289 289
290 290 `sidedata_categories` is an optional set of the remote's sidedata wanted
291 291 categories.
292 292 """
293 293 repo = repo.unfiltered()
294 294
295 295 # Only useful if we're adding sidedata categories. If both peers have
296 296 # the same categories, then we simply don't do anything.
297 297 adding_sidedata = (
298 298 requirements.REVLOGV2_REQUIREMENT in repo.requirements
299 299 and self.version == b'04'
300 300 and srctype == b'pull'
301 301 )
302 302 if adding_sidedata:
303 303 sidedata_helpers = sidedatamod.get_sidedata_helpers(
304 304 repo,
305 305 sidedata_categories or set(),
306 306 pull=True,
307 307 )
308 308 else:
309 309 sidedata_helpers = None
310 310
311 311 def csmap(x):
312 312 repo.ui.debug(b"add changeset %s\n" % short(x))
313 313 return len(cl)
314 314
315 315 def revmap(x):
316 316 return cl.rev(x)
317 317
318 318 try:
319 319 # The transaction may already carry source information. In this
320 320 # case we use the top level data. We overwrite the argument
321 321 # because we need to use the top level value (if they exist)
322 322 # in this function.
323 323 srctype = tr.hookargs.setdefault(b'source', srctype)
324 324 tr.hookargs.setdefault(b'url', url)
325 325 repo.hook(
326 326 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
327 327 )
328 328
329 329 # write changelog data to temp files so concurrent readers
330 330 # will not see an inconsistent view
331 331 cl = repo.changelog
332 332 cl.delayupdate(tr)
333 333 oldheads = set(cl.heads())
334 334
335 335 trp = weakref.proxy(tr)
336 336 # pull off the changeset group
337 337 repo.ui.status(_(b"adding changesets\n"))
338 338 clstart = len(cl)
339 339 progress = repo.ui.makeprogress(
340 340 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
341 341 )
342 342 self.callback = progress.increment
343 343
344 344 efilesset = set()
345 345 duprevs = []
346 346
347 347 def ondupchangelog(cl, rev):
348 348 if rev < clstart:
349 349 duprevs.append(rev)
350 350
351 351 def onchangelog(cl, rev):
352 352 ctx = cl.changelogrevision(rev)
353 353 efilesset.update(ctx.files)
354 354 repo.register_changeset(rev, ctx)
355 355
356 356 self.changelogheader()
357 357 deltas = self.deltaiter()
358 358 if not cl.addgroup(
359 359 deltas,
360 360 csmap,
361 361 trp,
362 362 alwayscache=True,
363 363 addrevisioncb=onchangelog,
364 364 duplicaterevisioncb=ondupchangelog,
365 365 ):
366 366 repo.ui.develwarn(
367 367 b'applied empty changelog from changegroup',
368 368 config=b'warn-empty-changegroup',
369 369 )
370 370 efiles = len(efilesset)
371 371 clend = len(cl)
372 372 changesets = clend - clstart
373 373 progress.complete()
374 374 del deltas
375 375 # TODO Python 2.7 removal
376 376 # del efilesset
377 377 efilesset = None
378 378 self.callback = None
379 379
380 380 # Keep track of the (non-changelog) revlogs we've updated and their
381 381 # range of new revisions for sidedata rewrite.
382 382 # TODO do something more efficient than keeping the reference to
383 383 # the revlogs, especially memory-wise.
384 384 touched_manifests = {}
385 385 touched_filelogs = {}
386 386
387 387 # pull off the manifest group
388 388 repo.ui.status(_(b"adding manifests\n"))
389 389 # We know that we'll never have more manifests than we had
390 390 # changesets.
391 391 progress = repo.ui.makeprogress(
392 392 _(b'manifests'), unit=_(b'chunks'), total=changesets
393 393 )
394 394 on_manifest_rev = None
395 395 if sidedata_helpers:
396 396 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
397 397
398 398 def on_manifest_rev(manifest, rev):
399 399 range = touched_manifests.get(manifest)
400 400 if not range:
401 401 touched_manifests[manifest] = (rev, rev)
402 402 else:
403 403 assert rev == range[1] + 1
404 404 touched_manifests[manifest] = (range[0], rev)
405 405
406 406 self._unpackmanifests(
407 407 repo,
408 408 revmap,
409 409 trp,
410 410 progress,
411 411 addrevisioncb=on_manifest_rev,
412 412 )
413 413
414 414 needfiles = {}
415 415 if repo.ui.configbool(b'server', b'validate'):
416 416 cl = repo.changelog
417 417 ml = repo.manifestlog
418 418 # validate incoming csets have their manifests
419 419 for cset in pycompat.xrange(clstart, clend):
420 420 mfnode = cl.changelogrevision(cset).manifest
421 421 mfest = ml[mfnode].readdelta()
422 422 # store file nodes we must see
423 423 for f, n in pycompat.iteritems(mfest):
424 424 needfiles.setdefault(f, set()).add(n)
425 425
426 426 on_filelog_rev = None
427 427 if sidedata_helpers:
428 428 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
429 429
430 430 def on_filelog_rev(filelog, rev):
431 431 range = touched_filelogs.get(filelog)
432 432 if not range:
433 433 touched_filelogs[filelog] = (rev, rev)
434 434 else:
435 435 assert rev == range[1] + 1
436 436 touched_filelogs[filelog] = (range[0], rev)
437 437
438 438 # process the files
439 439 repo.ui.status(_(b"adding file changes\n"))
440 440 newrevs, newfiles = _addchangegroupfiles(
441 441 repo,
442 442 self,
443 443 revmap,
444 444 trp,
445 445 efiles,
446 446 needfiles,
447 447 addrevisioncb=on_filelog_rev,
448 448 )
449 449
450 450 if sidedata_helpers:
451 451 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
452 452 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
453 453 for mf, (startrev, endrev) in touched_manifests.items():
454 454 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
455 455 for fl, (startrev, endrev) in touched_filelogs.items():
456 456 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
457 457
458 458 # making sure the value exists
459 459 tr.changes.setdefault(b'changegroup-count-changesets', 0)
460 460 tr.changes.setdefault(b'changegroup-count-revisions', 0)
461 461 tr.changes.setdefault(b'changegroup-count-files', 0)
462 462 tr.changes.setdefault(b'changegroup-count-heads', 0)
463 463
464 464 # some code use bundle operation for internal purpose. They usually
465 465 # set `ui.quiet` to do this outside of user sight. Size the report
466 466 # of such operation now happens at the end of the transaction, that
467 467 # ui.quiet has not direct effect on the output.
468 468 #
469 469 # To preserve this intend use an inelegant hack, we fail to report
470 470 # the change if `quiet` is set. We should probably move to
471 471 # something better, but this is a good first step to allow the "end
472 472 # of transaction report" to pass tests.
473 473 if not repo.ui.quiet:
474 474 tr.changes[b'changegroup-count-changesets'] += changesets
475 475 tr.changes[b'changegroup-count-revisions'] += newrevs
476 476 tr.changes[b'changegroup-count-files'] += newfiles
477 477
478 478 deltaheads = 0
479 479 if oldheads:
480 480 heads = cl.heads()
481 481 deltaheads += len(heads) - len(oldheads)
482 482 for h in heads:
483 483 if h not in oldheads and repo[h].closesbranch():
484 484 deltaheads -= 1
485 485
486 486 # see previous comment about checking ui.quiet
487 487 if not repo.ui.quiet:
488 488 tr.changes[b'changegroup-count-heads'] += deltaheads
489 489 repo.invalidatevolatilesets()
490 490
491 491 if changesets > 0:
492 492 if b'node' not in tr.hookargs:
493 493 tr.hookargs[b'node'] = hex(cl.node(clstart))
494 494 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
495 495 hookargs = dict(tr.hookargs)
496 496 else:
497 497 hookargs = dict(tr.hookargs)
498 498 hookargs[b'node'] = hex(cl.node(clstart))
499 499 hookargs[b'node_last'] = hex(cl.node(clend - 1))
500 500 repo.hook(
501 501 b'pretxnchangegroup',
502 502 throw=True,
503 503 **pycompat.strkwargs(hookargs)
504 504 )
505 505
506 506 added = pycompat.xrange(clstart, clend)
507 507 phaseall = None
508 508 if srctype in (b'push', b'serve'):
509 509 # Old servers can not push the boundary themselves.
510 510 # New servers won't push the boundary if changeset already
511 511 # exists locally as secret
512 512 #
513 513 # We should not use added here but the list of all change in
514 514 # the bundle
515 515 if repo.publishing():
516 516 targetphase = phaseall = phases.public
517 517 else:
518 518 # closer target phase computation
519 519
520 520 # Those changesets have been pushed from the
521 521 # outside, their phases are going to be pushed
522 522 # alongside. Therefor `targetphase` is
523 523 # ignored.
524 524 targetphase = phaseall = phases.draft
525 525 if added:
526 526 phases.registernew(repo, tr, targetphase, added)
527 527 if phaseall is not None:
528 528 if duprevs:
529 529 duprevs.extend(added)
530 530 else:
531 531 duprevs = added
532 532 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
533 533 duprevs = []
534 534
535 535 if changesets > 0:
536 536
537 537 def runhooks(unused_success):
538 538 # These hooks run when the lock releases, not when the
539 539 # transaction closes. So it's possible for the changelog
540 540 # to have changed since we last saw it.
541 541 if clstart >= len(repo):
542 542 return
543 543
544 544 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
545 545
546 546 for rev in added:
547 547 args = hookargs.copy()
548 548 args[b'node'] = hex(cl.node(rev))
549 549 del args[b'node_last']
550 550 repo.hook(b"incoming", **pycompat.strkwargs(args))
551 551
552 552 newheads = [h for h in repo.heads() if h not in oldheads]
553 553 repo.ui.log(
554 554 b"incoming",
555 555 b"%d incoming changes - new heads: %s\n",
556 556 len(added),
557 557 b', '.join([hex(c[:6]) for c in newheads]),
558 558 )
559 559
560 560 tr.addpostclose(
561 561 b'changegroup-runhooks-%020i' % clstart,
562 562 lambda tr: repo._afterlock(runhooks),
563 563 )
564 564 finally:
565 565 repo.ui.flush()
566 566 # never return 0 here:
567 567 if deltaheads < 0:
568 568 ret = deltaheads - 1
569 569 else:
570 570 ret = deltaheads + 1
571 571 return ret
572 572
573 573 def deltaiter(self):
574 574 """
575 575 returns an iterator of the deltas in this changegroup
576 576
577 577 Useful for passing to the underlying storage system to be stored.
578 578 """
579 579 chain = None
580 580 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
581 581 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
582 582 yield chunkdata
583 583 chain = chunkdata[0]
584 584
585 585
586 586 class cg2unpacker(cg1unpacker):
587 587 """Unpacker for cg2 streams.
588 588
589 589 cg2 streams add support for generaldelta, so the delta header
590 590 format is slightly different. All other features about the data
591 591 remain the same.
592 592 """
593 593
594 594 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
595 595 deltaheadersize = deltaheader.size
596 596 version = b'02'
597 597
598 598 def _deltaheader(self, headertuple, prevnode):
599 599 node, p1, p2, deltabase, cs = headertuple
600 600 flags = 0
601 601 protocol_flags = 0
602 602 return node, p1, p2, deltabase, cs, flags, protocol_flags
603 603
604 604
605 605 class cg3unpacker(cg2unpacker):
606 606 """Unpacker for cg3 streams.
607 607
608 608 cg3 streams add support for exchanging treemanifests and revlog
609 609 flags. It adds the revlog flags to the delta header and an empty chunk
610 610 separating manifests and files.
611 611 """
612 612
613 613 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
614 614 deltaheadersize = deltaheader.size
615 615 version = b'03'
616 616 _grouplistcount = 2 # One list of manifests and one list of files
617 617
618 618 def _deltaheader(self, headertuple, prevnode):
619 619 node, p1, p2, deltabase, cs, flags = headertuple
620 620 protocol_flags = 0
621 621 return node, p1, p2, deltabase, cs, flags, protocol_flags
622 622
623 623 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
624 624 super(cg3unpacker, self)._unpackmanifests(
625 625 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
626 626 )
627 627 for chunkdata in iter(self.filelogheader, {}):
628 628 # If we get here, there are directory manifests in the changegroup
629 629 d = chunkdata[b"filename"]
630 630 repo.ui.debug(b"adding %s revisions\n" % d)
631 631 deltas = self.deltaiter()
632 632 if not repo.manifestlog.getstorage(d).addgroup(
633 633 deltas, revmap, trp, addrevisioncb=addrevisioncb
634 634 ):
635 635 raise error.Abort(_(b"received dir revlog group is empty"))
636 636
637 637
638 638 class cg4unpacker(cg3unpacker):
639 639 """Unpacker for cg4 streams.
640 640
641 641 cg4 streams add support for exchanging sidedata.
642 642 """
643 643
644 644 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
645 645 deltaheadersize = deltaheader.size
646 646 version = b'04'
647 647
648 648 def _deltaheader(self, headertuple, prevnode):
649 649 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
650 650 return node, p1, p2, deltabase, cs, flags, protocol_flags
651 651
652 652 def deltachunk(self, prevnode):
653 653 res = super(cg4unpacker, self).deltachunk(prevnode)
654 654 if not res:
655 655 return res
656 656
657 657 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
658 658
659 659 sidedata = {}
660 660 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
661 661 sidedata_raw = getchunk(self._stream)
662 662 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
663 663
664 664 return node, p1, p2, cs, deltabase, delta, flags, sidedata
665 665
666 666
667 667 class headerlessfixup(object):
668 668 def __init__(self, fh, h):
669 669 self._h = h
670 670 self._fh = fh
671 671
672 672 def read(self, n):
673 673 if self._h:
674 674 d, self._h = self._h[:n], self._h[n:]
675 675 if len(d) < n:
676 676 d += readexactly(self._fh, n - len(d))
677 677 return d
678 678 return readexactly(self._fh, n)
679 679
680 680
681 681 def _revisiondeltatochunks(repo, delta, headerfn):
682 682 """Serialize a revisiondelta to changegroup chunks."""
683 683
684 684 # The captured revision delta may be encoded as a delta against
685 685 # a base revision or as a full revision. The changegroup format
686 686 # requires that everything on the wire be deltas. So for full
687 687 # revisions, we need to invent a header that says to rewrite
688 688 # data.
689 689
690 690 if delta.delta is not None:
691 691 prefix, data = b'', delta.delta
692 692 elif delta.basenode == repo.nullid:
693 693 data = delta.revision
694 694 prefix = mdiff.trivialdiffheader(len(data))
695 695 else:
696 696 data = delta.revision
697 697 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
698 698
699 699 meta = headerfn(delta)
700 700
701 701 yield chunkheader(len(meta) + len(prefix) + len(data))
702 702 yield meta
703 703 if prefix:
704 704 yield prefix
705 705 yield data
706 706
707 707 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
708 708 # Need a separate chunk for sidedata to be able to differentiate
709 709 # "raw delta" length and sidedata length
710 710 sidedata = delta.sidedata
711 711 yield chunkheader(len(sidedata))
712 712 yield sidedata
713 713
714 714
715 715 def _sortnodesellipsis(store, nodes, cl, lookup):
716 716 """Sort nodes for changegroup generation."""
717 717 # Ellipses serving mode.
718 718 #
719 719 # In a perfect world, we'd generate better ellipsis-ified graphs
720 720 # for non-changelog revlogs. In practice, we haven't started doing
721 721 # that yet, so the resulting DAGs for the manifestlog and filelogs
722 722 # are actually full of bogus parentage on all the ellipsis
723 723 # nodes. This has the side effect that, while the contents are
724 724 # correct, the individual DAGs might be completely out of whack in
725 725 # a case like 882681bc3166 and its ancestors (back about 10
726 726 # revisions or so) in the main hg repo.
727 727 #
728 728 # The one invariant we *know* holds is that the new (potentially
729 729 # bogus) DAG shape will be valid if we order the nodes in the
730 730 # order that they're introduced in dramatis personae by the
731 731 # changelog, so what we do is we sort the non-changelog histories
732 732 # by the order in which they are used by the changelog.
733 733 key = lambda n: cl.rev(lookup(n))
734 734 return sorted(nodes, key=key)
735 735
736 736
737 737 def _resolvenarrowrevisioninfo(
738 738 cl,
739 739 store,
740 740 ischangelog,
741 741 rev,
742 742 linkrev,
743 743 linknode,
744 744 clrevtolocalrev,
745 745 fullclnodes,
746 746 precomputedellipsis,
747 747 ):
748 748 linkparents = precomputedellipsis[linkrev]
749 749
750 750 def local(clrev):
751 751 """Turn a changelog revnum into a local revnum.
752 752
753 753 The ellipsis dag is stored as revnums on the changelog,
754 754 but when we're producing ellipsis entries for
755 755 non-changelog revlogs, we need to turn those numbers into
756 756 something local. This does that for us, and during the
757 757 changelog sending phase will also expand the stored
758 758 mappings as needed.
759 759 """
760 760 if clrev == nullrev:
761 761 return nullrev
762 762
763 763 if ischangelog:
764 764 return clrev
765 765
766 766 # Walk the ellipsis-ized changelog breadth-first looking for a
767 767 # change that has been linked from the current revlog.
768 768 #
769 769 # For a flat manifest revlog only a single step should be necessary
770 770 # as all relevant changelog entries are relevant to the flat
771 771 # manifest.
772 772 #
773 773 # For a filelog or tree manifest dirlog however not every changelog
774 774 # entry will have been relevant, so we need to skip some changelog
775 775 # nodes even after ellipsis-izing.
776 776 walk = [clrev]
777 777 while walk:
778 778 p = walk[0]
779 779 walk = walk[1:]
780 780 if p in clrevtolocalrev:
781 781 return clrevtolocalrev[p]
782 782 elif p in fullclnodes:
783 783 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
784 784 elif p in precomputedellipsis:
785 785 walk.extend(
786 786 [pp for pp in precomputedellipsis[p] if pp != nullrev]
787 787 )
788 788 else:
789 789 # In this case, we've got an ellipsis with parents
790 790 # outside the current bundle (likely an
791 791 # incremental pull). We "know" that we can use the
792 792 # value of this same revlog at whatever revision
793 793 # is pointed to by linknode. "Know" is in scare
794 794 # quotes because I haven't done enough examination
795 795 # of edge cases to convince myself this is really
796 796 # a fact - it works for all the (admittedly
797 797 # thorough) cases in our testsuite, but I would be
798 798 # somewhat unsurprised to find a case in the wild
799 799 # where this breaks down a bit. That said, I don't
800 800 # know if it would hurt anything.
801 801 for i in pycompat.xrange(rev, 0, -1):
802 802 if store.linkrev(i) == clrev:
803 803 return i
804 804 # We failed to resolve a parent for this node, so
805 805 # we crash the changegroup construction.
806 806 raise error.Abort(
807 807 b"unable to resolve parent while packing '%s' %r"
808 808 b' for changeset %r' % (store.indexfile, rev, clrev)
809 809 )
810 810
811 811 return nullrev
812 812
813 813 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
814 814 p1, p2 = nullrev, nullrev
815 815 elif len(linkparents) == 1:
816 816 (p1,) = sorted(local(p) for p in linkparents)
817 817 p2 = nullrev
818 818 else:
819 819 p1, p2 = sorted(local(p) for p in linkparents)
820 820
821 821 p1node, p2node = store.node(p1), store.node(p2)
822 822
823 823 return p1node, p2node, linknode
824 824
825 825
826 826 def deltagroup(
827 827 repo,
828 828 store,
829 829 nodes,
830 830 ischangelog,
831 831 lookup,
832 832 forcedeltaparentprev,
833 833 topic=None,
834 834 ellipses=False,
835 835 clrevtolocalrev=None,
836 836 fullclnodes=None,
837 837 precomputedellipsis=None,
838 838 sidedata_helpers=None,
839 839 ):
840 840 """Calculate deltas for a set of revisions.
841 841
842 842 Is a generator of ``revisiondelta`` instances.
843 843
844 844 If topic is not None, progress detail will be generated using this
845 845 topic name (e.g. changesets, manifests, etc).
846 846
847 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
847 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
848 `sidedata_helpers`.
848 849 """
849 850 if not nodes:
850 851 return
851 852
852 853 cl = repo.changelog
853 854
854 855 if ischangelog:
855 856 # `hg log` shows changesets in storage order. To preserve order
856 857 # across clones, send out changesets in storage order.
857 858 nodesorder = b'storage'
858 859 elif ellipses:
859 860 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
860 861 nodesorder = b'nodes'
861 862 else:
862 863 nodesorder = None
863 864
864 865 # Perform ellipses filtering and revision massaging. We do this before
865 866 # emitrevisions() because a) filtering out revisions creates less work
866 867 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
867 868 # assumptions about delta choices and we would possibly send a delta
868 869 # referencing a missing base revision.
869 870 #
870 871 # Also, calling lookup() has side-effects with regards to populating
871 872 # data structures. If we don't call lookup() for each node or if we call
872 873 # lookup() after the first pass through each node, things can break -
873 874 # possibly intermittently depending on the python hash seed! For that
874 875 # reason, we store a mapping of all linknodes during the initial node
875 876 # pass rather than use lookup() on the output side.
876 877 if ellipses:
877 878 filtered = []
878 879 adjustedparents = {}
879 880 linknodes = {}
880 881
881 882 for node in nodes:
882 883 rev = store.rev(node)
883 884 linknode = lookup(node)
884 885 linkrev = cl.rev(linknode)
885 886 clrevtolocalrev[linkrev] = rev
886 887
887 888 # If linknode is in fullclnodes, it means the corresponding
888 889 # changeset was a full changeset and is being sent unaltered.
889 890 if linknode in fullclnodes:
890 891 linknodes[node] = linknode
891 892
892 893 # If the corresponding changeset wasn't in the set computed
893 894 # as relevant to us, it should be dropped outright.
894 895 elif linkrev not in precomputedellipsis:
895 896 continue
896 897
897 898 else:
898 899 # We could probably do this later and avoid the dict
899 900 # holding state. But it likely doesn't matter.
900 901 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
901 902 cl,
902 903 store,
903 904 ischangelog,
904 905 rev,
905 906 linkrev,
906 907 linknode,
907 908 clrevtolocalrev,
908 909 fullclnodes,
909 910 precomputedellipsis,
910 911 )
911 912
912 913 adjustedparents[node] = (p1node, p2node)
913 914 linknodes[node] = linknode
914 915
915 916 filtered.append(node)
916 917
917 918 nodes = filtered
918 919
919 920 # We expect the first pass to be fast, so we only engage the progress
920 921 # meter for constructing the revision deltas.
921 922 progress = None
922 923 if topic is not None:
923 924 progress = repo.ui.makeprogress(
924 925 topic, unit=_(b'chunks'), total=len(nodes)
925 926 )
926 927
927 928 configtarget = repo.ui.config(b'devel', b'bundle.delta')
928 929 if configtarget not in (b'', b'p1', b'full'):
929 930 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
930 931 repo.ui.warn(msg % configtarget)
931 932
932 933 deltamode = repository.CG_DELTAMODE_STD
933 934 if forcedeltaparentprev:
934 935 deltamode = repository.CG_DELTAMODE_PREV
935 936 elif configtarget == b'p1':
936 937 deltamode = repository.CG_DELTAMODE_P1
937 938 elif configtarget == b'full':
938 939 deltamode = repository.CG_DELTAMODE_FULL
939 940
940 941 revisions = store.emitrevisions(
941 942 nodes,
942 943 nodesorder=nodesorder,
943 944 revisiondata=True,
944 945 assumehaveparentrevisions=not ellipses,
945 946 deltamode=deltamode,
946 947 sidedata_helpers=sidedata_helpers,
947 948 )
948 949
949 950 for i, revision in enumerate(revisions):
950 951 if progress:
951 952 progress.update(i + 1)
952 953
953 954 if ellipses:
954 955 linknode = linknodes[revision.node]
955 956
956 957 if revision.node in adjustedparents:
957 958 p1node, p2node = adjustedparents[revision.node]
958 959 revision.p1node = p1node
959 960 revision.p2node = p2node
960 961 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
961 962
962 963 else:
963 964 linknode = lookup(revision.node)
964 965
965 966 revision.linknode = linknode
966 967 yield revision
967 968
968 969 if progress:
969 970 progress.complete()
970 971
971 972
972 973 class cgpacker(object):
973 974 def __init__(
974 975 self,
975 976 repo,
976 977 oldmatcher,
977 978 matcher,
978 979 version,
979 980 builddeltaheader,
980 981 manifestsend,
981 982 forcedeltaparentprev=False,
982 983 bundlecaps=None,
983 984 ellipses=False,
984 985 shallow=False,
985 986 ellipsisroots=None,
986 987 fullnodes=None,
987 988 remote_sidedata=None,
988 989 ):
989 990 """Given a source repo, construct a bundler.
990 991
991 992 oldmatcher is a matcher that matches on files the client already has.
992 993 These will not be included in the changegroup.
993 994
994 995 matcher is a matcher that matches on files to include in the
995 996 changegroup. Used to facilitate sparse changegroups.
996 997
997 998 forcedeltaparentprev indicates whether delta parents must be against
998 999 the previous revision in a delta group. This should only be used for
999 1000 compatibility with changegroup version 1.
1000 1001
1001 1002 builddeltaheader is a callable that constructs the header for a group
1002 1003 delta.
1003 1004
1004 1005 manifestsend is a chunk to send after manifests have been fully emitted.
1005 1006
1006 1007 ellipses indicates whether ellipsis serving mode is enabled.
1007 1008
1008 1009 bundlecaps is optional and can be used to specify the set of
1009 1010 capabilities which can be used to build the bundle. While bundlecaps is
1010 1011 unused in core Mercurial, extensions rely on this feature to communicate
1011 1012 capabilities to customize the changegroup packer.
1012 1013
1013 1014 shallow indicates whether shallow data might be sent. The packer may
1014 1015 need to pack file contents not introduced by the changes being packed.
1015 1016
1016 1017 fullnodes is the set of changelog nodes which should not be ellipsis
1017 1018 nodes. We store this rather than the set of nodes that should be
1018 1019 ellipsis because for very large histories we expect this to be
1019 1020 significantly smaller.
1020 1021
1021 1022 remote_sidedata is the set of sidedata categories wanted by the remote.
1022 1023 """
1023 1024 assert oldmatcher
1024 1025 assert matcher
1025 1026 self._oldmatcher = oldmatcher
1026 1027 self._matcher = matcher
1027 1028
1028 1029 self.version = version
1029 1030 self._forcedeltaparentprev = forcedeltaparentprev
1030 1031 self._builddeltaheader = builddeltaheader
1031 1032 self._manifestsend = manifestsend
1032 1033 self._ellipses = ellipses
1033 1034
1034 1035 # Set of capabilities we can use to build the bundle.
1035 1036 if bundlecaps is None:
1036 1037 bundlecaps = set()
1037 1038 self._bundlecaps = bundlecaps
1038 1039 if remote_sidedata is None:
1039 1040 remote_sidedata = set()
1040 1041 self._remote_sidedata = remote_sidedata
1041 1042 self._isshallow = shallow
1042 1043 self._fullclnodes = fullnodes
1043 1044
1044 1045 # Maps ellipsis revs to their roots at the changelog level.
1045 1046 self._precomputedellipsis = ellipsisroots
1046 1047
1047 1048 self._repo = repo
1048 1049
1049 1050 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1050 1051 self._verbosenote = self._repo.ui.note
1051 1052 else:
1052 1053 self._verbosenote = lambda s: None
1053 1054
1054 1055 def generate(
1055 1056 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1056 1057 ):
1057 1058 """Yield a sequence of changegroup byte chunks.
1058 1059 If changelog is False, changelog data won't be added to changegroup
1059 1060 """
1060 1061
1061 1062 repo = self._repo
1062 1063 cl = repo.changelog
1063 1064
1064 1065 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1065 1066 size = 0
1066 1067
1067 1068 sidedata_helpers = None
1068 1069 if self.version == b'04':
1069 1070 remote_sidedata = self._remote_sidedata
1070 1071 if source == b'strip':
1071 1072 # We're our own remote when stripping, get the no-op helpers
1072 1073 # TODO a better approach would be for the strip bundle to
1073 1074 # correctly advertise its sidedata categories directly.
1074 1075 remote_sidedata = repo._wanted_sidedata
1075 1076 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1076 1077 repo, remote_sidedata
1077 1078 )
1078 1079
1079 1080 clstate, deltas = self._generatechangelog(
1080 1081 cl,
1081 1082 clnodes,
1082 1083 generate=changelog,
1083 1084 sidedata_helpers=sidedata_helpers,
1084 1085 )
1085 1086 for delta in deltas:
1086 1087 for chunk in _revisiondeltatochunks(
1087 1088 self._repo, delta, self._builddeltaheader
1088 1089 ):
1089 1090 size += len(chunk)
1090 1091 yield chunk
1091 1092
1092 1093 close = closechunk()
1093 1094 size += len(close)
1094 1095 yield closechunk()
1095 1096
1096 1097 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1097 1098
1098 1099 clrevorder = clstate[b'clrevorder']
1099 1100 manifests = clstate[b'manifests']
1100 1101 changedfiles = clstate[b'changedfiles']
1101 1102
1102 1103 # We need to make sure that the linkrev in the changegroup refers to
1103 1104 # the first changeset that introduced the manifest or file revision.
1104 1105 # The fastpath is usually safer than the slowpath, because the filelogs
1105 1106 # are walked in revlog order.
1106 1107 #
1107 1108 # When taking the slowpath when the manifest revlog uses generaldelta,
1108 1109 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1109 1110 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1110 1111 #
1111 1112 # When taking the fastpath, we are only vulnerable to reordering
1112 1113 # of the changelog itself. The changelog never uses generaldelta and is
1113 1114 # never reordered. To handle this case, we simply take the slowpath,
1114 1115 # which already has the 'clrevorder' logic. This was also fixed in
1115 1116 # cc0ff93d0c0c.
1116 1117
1117 1118 # Treemanifests don't work correctly with fastpathlinkrev
1118 1119 # either, because we don't discover which directory nodes to
1119 1120 # send along with files. This could probably be fixed.
1120 1121 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1121 1122
1122 1123 fnodes = {} # needed file nodes
1123 1124
1124 1125 size = 0
1125 1126 it = self.generatemanifests(
1126 1127 commonrevs,
1127 1128 clrevorder,
1128 1129 fastpathlinkrev,
1129 1130 manifests,
1130 1131 fnodes,
1131 1132 source,
1132 1133 clstate[b'clrevtomanifestrev'],
1133 1134 sidedata_helpers=sidedata_helpers,
1134 1135 )
1135 1136
1136 1137 for tree, deltas in it:
1137 1138 if tree:
1138 1139 assert self.version in (b'03', b'04')
1139 1140 chunk = _fileheader(tree)
1140 1141 size += len(chunk)
1141 1142 yield chunk
1142 1143
1143 1144 for delta in deltas:
1144 1145 chunks = _revisiondeltatochunks(
1145 1146 self._repo, delta, self._builddeltaheader
1146 1147 )
1147 1148 for chunk in chunks:
1148 1149 size += len(chunk)
1149 1150 yield chunk
1150 1151
1151 1152 close = closechunk()
1152 1153 size += len(close)
1153 1154 yield close
1154 1155
1155 1156 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1156 1157 yield self._manifestsend
1157 1158
1158 1159 mfdicts = None
1159 1160 if self._ellipses and self._isshallow:
1160 1161 mfdicts = [
1161 1162 (repo.manifestlog[n].read(), lr)
1162 1163 for (n, lr) in pycompat.iteritems(manifests)
1163 1164 ]
1164 1165
1165 1166 manifests.clear()
1166 1167 clrevs = {cl.rev(x) for x in clnodes}
1167 1168
1168 1169 it = self.generatefiles(
1169 1170 changedfiles,
1170 1171 commonrevs,
1171 1172 source,
1172 1173 mfdicts,
1173 1174 fastpathlinkrev,
1174 1175 fnodes,
1175 1176 clrevs,
1176 1177 sidedata_helpers=sidedata_helpers,
1177 1178 )
1178 1179
1179 1180 for path, deltas in it:
1180 1181 h = _fileheader(path)
1181 1182 size = len(h)
1182 1183 yield h
1183 1184
1184 1185 for delta in deltas:
1185 1186 chunks = _revisiondeltatochunks(
1186 1187 self._repo, delta, self._builddeltaheader
1187 1188 )
1188 1189 for chunk in chunks:
1189 1190 size += len(chunk)
1190 1191 yield chunk
1191 1192
1192 1193 close = closechunk()
1193 1194 size += len(close)
1194 1195 yield close
1195 1196
1196 1197 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1197 1198
1198 1199 yield closechunk()
1199 1200
1200 1201 if clnodes:
1201 1202 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1202 1203
1203 1204 def _generatechangelog(
1204 1205 self, cl, nodes, generate=True, sidedata_helpers=None
1205 1206 ):
1206 1207 """Generate data for changelog chunks.
1207 1208
1208 1209 Returns a 2-tuple of a dict containing state and an iterable of
1209 1210 byte chunks. The state will not be fully populated until the
1210 1211 chunk stream has been fully consumed.
1211 1212
1212 1213 if generate is False, the state will be fully populated and no chunk
1213 1214 stream will be yielded
1214 1215
1215 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1216 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1217 `sidedata_helpers`.
1216 1218 """
1217 1219 clrevorder = {}
1218 1220 manifests = {}
1219 1221 mfl = self._repo.manifestlog
1220 1222 changedfiles = set()
1221 1223 clrevtomanifestrev = {}
1222 1224
1223 1225 state = {
1224 1226 b'clrevorder': clrevorder,
1225 1227 b'manifests': manifests,
1226 1228 b'changedfiles': changedfiles,
1227 1229 b'clrevtomanifestrev': clrevtomanifestrev,
1228 1230 }
1229 1231
1230 1232 if not (generate or self._ellipses):
1231 1233 # sort the nodes in storage order
1232 1234 nodes = sorted(nodes, key=cl.rev)
1233 1235 for node in nodes:
1234 1236 c = cl.changelogrevision(node)
1235 1237 clrevorder[node] = len(clrevorder)
1236 1238 # record the first changeset introducing this manifest version
1237 1239 manifests.setdefault(c.manifest, node)
1238 1240 # Record a complete list of potentially-changed files in
1239 1241 # this manifest.
1240 1242 changedfiles.update(c.files)
1241 1243
1242 1244 return state, ()
1243 1245
1244 1246 # Callback for the changelog, used to collect changed files and
1245 1247 # manifest nodes.
1246 1248 # Returns the linkrev node (identity in the changelog case).
1247 1249 def lookupcl(x):
1248 1250 c = cl.changelogrevision(x)
1249 1251 clrevorder[x] = len(clrevorder)
1250 1252
1251 1253 if self._ellipses:
1252 1254 # Only update manifests if x is going to be sent. Otherwise we
1253 1255 # end up with bogus linkrevs specified for manifests and
1254 1256 # we skip some manifest nodes that we should otherwise
1255 1257 # have sent.
1256 1258 if (
1257 1259 x in self._fullclnodes
1258 1260 or cl.rev(x) in self._precomputedellipsis
1259 1261 ):
1260 1262
1261 1263 manifestnode = c.manifest
1262 1264 # Record the first changeset introducing this manifest
1263 1265 # version.
1264 1266 manifests.setdefault(manifestnode, x)
1265 1267 # Set this narrow-specific dict so we have the lowest
1266 1268 # manifest revnum to look up for this cl revnum. (Part of
1267 1269 # mapping changelog ellipsis parents to manifest ellipsis
1268 1270 # parents)
1269 1271 clrevtomanifestrev.setdefault(
1270 1272 cl.rev(x), mfl.rev(manifestnode)
1271 1273 )
1272 1274 # We can't trust the changed files list in the changeset if the
1273 1275 # client requested a shallow clone.
1274 1276 if self._isshallow:
1275 1277 changedfiles.update(mfl[c.manifest].read().keys())
1276 1278 else:
1277 1279 changedfiles.update(c.files)
1278 1280 else:
1279 1281 # record the first changeset introducing this manifest version
1280 1282 manifests.setdefault(c.manifest, x)
1281 1283 # Record a complete list of potentially-changed files in
1282 1284 # this manifest.
1283 1285 changedfiles.update(c.files)
1284 1286
1285 1287 return x
1286 1288
1287 1289 gen = deltagroup(
1288 1290 self._repo,
1289 1291 cl,
1290 1292 nodes,
1291 1293 True,
1292 1294 lookupcl,
1293 1295 self._forcedeltaparentprev,
1294 1296 ellipses=self._ellipses,
1295 1297 topic=_(b'changesets'),
1296 1298 clrevtolocalrev={},
1297 1299 fullclnodes=self._fullclnodes,
1298 1300 precomputedellipsis=self._precomputedellipsis,
1299 1301 sidedata_helpers=sidedata_helpers,
1300 1302 )
1301 1303
1302 1304 return state, gen
1303 1305
1304 1306 def generatemanifests(
1305 1307 self,
1306 1308 commonrevs,
1307 1309 clrevorder,
1308 1310 fastpathlinkrev,
1309 1311 manifests,
1310 1312 fnodes,
1311 1313 source,
1312 1314 clrevtolocalrev,
1313 1315 sidedata_helpers=None,
1314 1316 ):
1315 1317 """Returns an iterator of changegroup chunks containing manifests.
1316 1318
1317 1319 `source` is unused here, but is used by extensions like remotefilelog to
1318 1320 change what is sent based in pulls vs pushes, etc.
1319 1321
1320 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1322 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1323 `sidedata_helpers`.
1321 1324 """
1322 1325 repo = self._repo
1323 1326 mfl = repo.manifestlog
1324 1327 tmfnodes = {b'': manifests}
1325 1328
1326 1329 # Callback for the manifest, used to collect linkrevs for filelog
1327 1330 # revisions.
1328 1331 # Returns the linkrev node (collected in lookupcl).
1329 1332 def makelookupmflinknode(tree, nodes):
1330 1333 if fastpathlinkrev:
1331 1334 assert not tree
1332 1335
1333 1336 # pytype: disable=unsupported-operands
1334 1337 return manifests.__getitem__
1335 1338 # pytype: enable=unsupported-operands
1336 1339
1337 1340 def lookupmflinknode(x):
1338 1341 """Callback for looking up the linknode for manifests.
1339 1342
1340 1343 Returns the linkrev node for the specified manifest.
1341 1344
1342 1345 SIDE EFFECT:
1343 1346
1344 1347 1) fclnodes gets populated with the list of relevant
1345 1348 file nodes if we're not using fastpathlinkrev
1346 1349 2) When treemanifests are in use, collects treemanifest nodes
1347 1350 to send
1348 1351
1349 1352 Note that this means manifests must be completely sent to
1350 1353 the client before you can trust the list of files and
1351 1354 treemanifests to send.
1352 1355 """
1353 1356 clnode = nodes[x]
1354 1357 mdata = mfl.get(tree, x).readfast(shallow=True)
1355 1358 for p, n, fl in mdata.iterentries():
1356 1359 if fl == b't': # subdirectory manifest
1357 1360 subtree = tree + p + b'/'
1358 1361 tmfclnodes = tmfnodes.setdefault(subtree, {})
1359 1362 tmfclnode = tmfclnodes.setdefault(n, clnode)
1360 1363 if clrevorder[clnode] < clrevorder[tmfclnode]:
1361 1364 tmfclnodes[n] = clnode
1362 1365 else:
1363 1366 f = tree + p
1364 1367 fclnodes = fnodes.setdefault(f, {})
1365 1368 fclnode = fclnodes.setdefault(n, clnode)
1366 1369 if clrevorder[clnode] < clrevorder[fclnode]:
1367 1370 fclnodes[n] = clnode
1368 1371 return clnode
1369 1372
1370 1373 return lookupmflinknode
1371 1374
1372 1375 while tmfnodes:
1373 1376 tree, nodes = tmfnodes.popitem()
1374 1377
1375 1378 should_visit = self._matcher.visitdir(tree[:-1])
1376 1379 if tree and not should_visit:
1377 1380 continue
1378 1381
1379 1382 store = mfl.getstorage(tree)
1380 1383
1381 1384 if not should_visit:
1382 1385 # No nodes to send because this directory is out of
1383 1386 # the client's view of the repository (probably
1384 1387 # because of narrow clones). Do this even for the root
1385 1388 # directory (tree=='')
1386 1389 prunednodes = []
1387 1390 else:
1388 1391 # Avoid sending any manifest nodes we can prove the
1389 1392 # client already has by checking linkrevs. See the
1390 1393 # related comment in generatefiles().
1391 1394 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1392 1395
1393 1396 if tree and not prunednodes:
1394 1397 continue
1395 1398
1396 1399 lookupfn = makelookupmflinknode(tree, nodes)
1397 1400
1398 1401 deltas = deltagroup(
1399 1402 self._repo,
1400 1403 store,
1401 1404 prunednodes,
1402 1405 False,
1403 1406 lookupfn,
1404 1407 self._forcedeltaparentprev,
1405 1408 ellipses=self._ellipses,
1406 1409 topic=_(b'manifests'),
1407 1410 clrevtolocalrev=clrevtolocalrev,
1408 1411 fullclnodes=self._fullclnodes,
1409 1412 precomputedellipsis=self._precomputedellipsis,
1410 1413 sidedata_helpers=sidedata_helpers,
1411 1414 )
1412 1415
1413 1416 if not self._oldmatcher.visitdir(store.tree[:-1]):
1414 1417 yield tree, deltas
1415 1418 else:
1416 1419 # 'deltas' is a generator and we need to consume it even if
1417 1420 # we are not going to send it because a side-effect is that
1418 1421 # it updates tmdnodes (via lookupfn)
1419 1422 for d in deltas:
1420 1423 pass
1421 1424 if not tree:
1422 1425 yield tree, []
1423 1426
1424 1427 def _prunemanifests(self, store, nodes, commonrevs):
1425 1428 if not self._ellipses:
1426 1429 # In non-ellipses case and large repositories, it is better to
1427 1430 # prevent calling of store.rev and store.linkrev on a lot of
1428 1431 # nodes as compared to sending some extra data
1429 1432 return nodes.copy()
1430 1433 # This is split out as a separate method to allow filtering
1431 1434 # commonrevs in extension code.
1432 1435 #
1433 1436 # TODO(augie): this shouldn't be required, instead we should
1434 1437 # make filtering of revisions to send delegated to the store
1435 1438 # layer.
1436 1439 frev, flr = store.rev, store.linkrev
1437 1440 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1438 1441
1439 1442 # The 'source' parameter is useful for extensions
1440 1443 def generatefiles(
1441 1444 self,
1442 1445 changedfiles,
1443 1446 commonrevs,
1444 1447 source,
1445 1448 mfdicts,
1446 1449 fastpathlinkrev,
1447 1450 fnodes,
1448 1451 clrevs,
1449 1452 sidedata_helpers=None,
1450 1453 ):
1451 1454 changedfiles = [
1452 1455 f
1453 1456 for f in changedfiles
1454 1457 if self._matcher(f) and not self._oldmatcher(f)
1455 1458 ]
1456 1459
1457 1460 if not fastpathlinkrev:
1458 1461
1459 1462 def normallinknodes(unused, fname):
1460 1463 return fnodes.get(fname, {})
1461 1464
1462 1465 else:
1463 1466 cln = self._repo.changelog.node
1464 1467
1465 1468 def normallinknodes(store, fname):
1466 1469 flinkrev = store.linkrev
1467 1470 fnode = store.node
1468 1471 revs = ((r, flinkrev(r)) for r in store)
1469 1472 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1470 1473
1471 1474 clrevtolocalrev = {}
1472 1475
1473 1476 if self._isshallow:
1474 1477 # In a shallow clone, the linknodes callback needs to also include
1475 1478 # those file nodes that are in the manifests we sent but weren't
1476 1479 # introduced by those manifests.
1477 1480 commonctxs = [self._repo[c] for c in commonrevs]
1478 1481 clrev = self._repo.changelog.rev
1479 1482
1480 1483 def linknodes(flog, fname):
1481 1484 for c in commonctxs:
1482 1485 try:
1483 1486 fnode = c.filenode(fname)
1484 1487 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1485 1488 except error.ManifestLookupError:
1486 1489 pass
1487 1490 links = normallinknodes(flog, fname)
1488 1491 if len(links) != len(mfdicts):
1489 1492 for mf, lr in mfdicts:
1490 1493 fnode = mf.get(fname, None)
1491 1494 if fnode in links:
1492 1495 links[fnode] = min(links[fnode], lr, key=clrev)
1493 1496 elif fnode:
1494 1497 links[fnode] = lr
1495 1498 return links
1496 1499
1497 1500 else:
1498 1501 linknodes = normallinknodes
1499 1502
1500 1503 repo = self._repo
1501 1504 progress = repo.ui.makeprogress(
1502 1505 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1503 1506 )
1504 1507 for i, fname in enumerate(sorted(changedfiles)):
1505 1508 filerevlog = repo.file(fname)
1506 1509 if not filerevlog:
1507 1510 raise error.Abort(
1508 1511 _(b"empty or missing file data for %s") % fname
1509 1512 )
1510 1513
1511 1514 clrevtolocalrev.clear()
1512 1515
1513 1516 linkrevnodes = linknodes(filerevlog, fname)
1514 1517 # Lookup for filenodes, we collected the linkrev nodes above in the
1515 1518 # fastpath case and with lookupmf in the slowpath case.
1516 1519 def lookupfilelog(x):
1517 1520 return linkrevnodes[x]
1518 1521
1519 1522 frev, flr = filerevlog.rev, filerevlog.linkrev
1520 1523 # Skip sending any filenode we know the client already
1521 1524 # has. This avoids over-sending files relatively
1522 1525 # inexpensively, so it's not a problem if we under-filter
1523 1526 # here.
1524 1527 filenodes = [
1525 1528 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1526 1529 ]
1527 1530
1528 1531 if not filenodes:
1529 1532 continue
1530 1533
1531 1534 progress.update(i + 1, item=fname)
1532 1535
1533 1536 deltas = deltagroup(
1534 1537 self._repo,
1535 1538 filerevlog,
1536 1539 filenodes,
1537 1540 False,
1538 1541 lookupfilelog,
1539 1542 self._forcedeltaparentprev,
1540 1543 ellipses=self._ellipses,
1541 1544 clrevtolocalrev=clrevtolocalrev,
1542 1545 fullclnodes=self._fullclnodes,
1543 1546 precomputedellipsis=self._precomputedellipsis,
1544 1547 sidedata_helpers=sidedata_helpers,
1545 1548 )
1546 1549
1547 1550 yield fname, deltas
1548 1551
1549 1552 progress.complete()
1550 1553
1551 1554
1552 1555 def _makecg1packer(
1553 1556 repo,
1554 1557 oldmatcher,
1555 1558 matcher,
1556 1559 bundlecaps,
1557 1560 ellipses=False,
1558 1561 shallow=False,
1559 1562 ellipsisroots=None,
1560 1563 fullnodes=None,
1561 1564 remote_sidedata=None,
1562 1565 ):
1563 1566 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1564 1567 d.node, d.p1node, d.p2node, d.linknode
1565 1568 )
1566 1569
1567 1570 return cgpacker(
1568 1571 repo,
1569 1572 oldmatcher,
1570 1573 matcher,
1571 1574 b'01',
1572 1575 builddeltaheader=builddeltaheader,
1573 1576 manifestsend=b'',
1574 1577 forcedeltaparentprev=True,
1575 1578 bundlecaps=bundlecaps,
1576 1579 ellipses=ellipses,
1577 1580 shallow=shallow,
1578 1581 ellipsisroots=ellipsisroots,
1579 1582 fullnodes=fullnodes,
1580 1583 )
1581 1584
1582 1585
1583 1586 def _makecg2packer(
1584 1587 repo,
1585 1588 oldmatcher,
1586 1589 matcher,
1587 1590 bundlecaps,
1588 1591 ellipses=False,
1589 1592 shallow=False,
1590 1593 ellipsisroots=None,
1591 1594 fullnodes=None,
1592 1595 remote_sidedata=None,
1593 1596 ):
1594 1597 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1595 1598 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1596 1599 )
1597 1600
1598 1601 return cgpacker(
1599 1602 repo,
1600 1603 oldmatcher,
1601 1604 matcher,
1602 1605 b'02',
1603 1606 builddeltaheader=builddeltaheader,
1604 1607 manifestsend=b'',
1605 1608 bundlecaps=bundlecaps,
1606 1609 ellipses=ellipses,
1607 1610 shallow=shallow,
1608 1611 ellipsisroots=ellipsisroots,
1609 1612 fullnodes=fullnodes,
1610 1613 )
1611 1614
1612 1615
1613 1616 def _makecg3packer(
1614 1617 repo,
1615 1618 oldmatcher,
1616 1619 matcher,
1617 1620 bundlecaps,
1618 1621 ellipses=False,
1619 1622 shallow=False,
1620 1623 ellipsisroots=None,
1621 1624 fullnodes=None,
1622 1625 remote_sidedata=None,
1623 1626 ):
1624 1627 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1625 1628 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1626 1629 )
1627 1630
1628 1631 return cgpacker(
1629 1632 repo,
1630 1633 oldmatcher,
1631 1634 matcher,
1632 1635 b'03',
1633 1636 builddeltaheader=builddeltaheader,
1634 1637 manifestsend=closechunk(),
1635 1638 bundlecaps=bundlecaps,
1636 1639 ellipses=ellipses,
1637 1640 shallow=shallow,
1638 1641 ellipsisroots=ellipsisroots,
1639 1642 fullnodes=fullnodes,
1640 1643 )
1641 1644
1642 1645
1643 1646 def _makecg4packer(
1644 1647 repo,
1645 1648 oldmatcher,
1646 1649 matcher,
1647 1650 bundlecaps,
1648 1651 ellipses=False,
1649 1652 shallow=False,
1650 1653 ellipsisroots=None,
1651 1654 fullnodes=None,
1652 1655 remote_sidedata=None,
1653 1656 ):
1654 1657 # Sidedata is in a separate chunk from the delta to differentiate
1655 1658 # "raw delta" and sidedata.
1656 1659 def builddeltaheader(d):
1657 1660 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1658 1661 d.protocol_flags,
1659 1662 d.node,
1660 1663 d.p1node,
1661 1664 d.p2node,
1662 1665 d.basenode,
1663 1666 d.linknode,
1664 1667 d.flags,
1665 1668 )
1666 1669
1667 1670 return cgpacker(
1668 1671 repo,
1669 1672 oldmatcher,
1670 1673 matcher,
1671 1674 b'04',
1672 1675 builddeltaheader=builddeltaheader,
1673 1676 manifestsend=closechunk(),
1674 1677 bundlecaps=bundlecaps,
1675 1678 ellipses=ellipses,
1676 1679 shallow=shallow,
1677 1680 ellipsisroots=ellipsisroots,
1678 1681 fullnodes=fullnodes,
1679 1682 remote_sidedata=remote_sidedata,
1680 1683 )
1681 1684
1682 1685
1683 1686 _packermap = {
1684 1687 b'01': (_makecg1packer, cg1unpacker),
1685 1688 # cg2 adds support for exchanging generaldelta
1686 1689 b'02': (_makecg2packer, cg2unpacker),
1687 1690 # cg3 adds support for exchanging revlog flags and treemanifests
1688 1691 b'03': (_makecg3packer, cg3unpacker),
1689 1692 # ch4 adds support for exchanging sidedata
1690 1693 b'04': (_makecg4packer, cg4unpacker),
1691 1694 }
1692 1695
1693 1696
1694 1697 def allsupportedversions(repo):
1695 1698 versions = set(_packermap.keys())
1696 1699 needv03 = False
1697 1700 if (
1698 1701 repo.ui.configbool(b'experimental', b'changegroup3')
1699 1702 or repo.ui.configbool(b'experimental', b'treemanifest')
1700 1703 or scmutil.istreemanifest(repo)
1701 1704 ):
1702 1705 # we keep version 03 because we need to to exchange treemanifest data
1703 1706 #
1704 1707 # we also keep vresion 01 and 02, because it is possible for repo to
1705 1708 # contains both normal and tree manifest at the same time. so using
1706 1709 # older version to pull data is viable
1707 1710 #
1708 1711 # (or even to push subset of history)
1709 1712 needv03 = True
1710 1713 if not needv03:
1711 1714 versions.discard(b'03')
1712 1715 want_v4 = (
1713 1716 repo.ui.configbool(b'experimental', b'changegroup4')
1714 1717 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1715 1718 )
1716 1719 if not want_v4:
1717 1720 versions.discard(b'04')
1718 1721 return versions
1719 1722
1720 1723
1721 1724 # Changegroup versions that can be applied to the repo
1722 1725 def supportedincomingversions(repo):
1723 1726 return allsupportedversions(repo)
1724 1727
1725 1728
1726 1729 # Changegroup versions that can be created from the repo
1727 1730 def supportedoutgoingversions(repo):
1728 1731 versions = allsupportedversions(repo)
1729 1732 if scmutil.istreemanifest(repo):
1730 1733 # Versions 01 and 02 support only flat manifests and it's just too
1731 1734 # expensive to convert between the flat manifest and tree manifest on
1732 1735 # the fly. Since tree manifests are hashed differently, all of history
1733 1736 # would have to be converted. Instead, we simply don't even pretend to
1734 1737 # support versions 01 and 02.
1735 1738 versions.discard(b'01')
1736 1739 versions.discard(b'02')
1737 1740 if requirements.NARROW_REQUIREMENT in repo.requirements:
1738 1741 # Versions 01 and 02 don't support revlog flags, and we need to
1739 1742 # support that for stripping and unbundling to work.
1740 1743 versions.discard(b'01')
1741 1744 versions.discard(b'02')
1742 1745 if LFS_REQUIREMENT in repo.requirements:
1743 1746 # Versions 01 and 02 don't support revlog flags, and we need to
1744 1747 # mark LFS entries with REVIDX_EXTSTORED.
1745 1748 versions.discard(b'01')
1746 1749 versions.discard(b'02')
1747 1750
1748 1751 return versions
1749 1752
1750 1753
1751 1754 def localversion(repo):
1752 1755 # Finds the best version to use for bundles that are meant to be used
1753 1756 # locally, such as those from strip and shelve, and temporary bundles.
1754 1757 return max(supportedoutgoingversions(repo))
1755 1758
1756 1759
1757 1760 def safeversion(repo):
1758 1761 # Finds the smallest version that it's safe to assume clients of the repo
1759 1762 # will support. For example, all hg versions that support generaldelta also
1760 1763 # support changegroup 02.
1761 1764 versions = supportedoutgoingversions(repo)
1762 1765 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1763 1766 versions.discard(b'01')
1764 1767 assert versions
1765 1768 return min(versions)
1766 1769
1767 1770
1768 1771 def getbundler(
1769 1772 version,
1770 1773 repo,
1771 1774 bundlecaps=None,
1772 1775 oldmatcher=None,
1773 1776 matcher=None,
1774 1777 ellipses=False,
1775 1778 shallow=False,
1776 1779 ellipsisroots=None,
1777 1780 fullnodes=None,
1778 1781 remote_sidedata=None,
1779 1782 ):
1780 1783 assert version in supportedoutgoingversions(repo)
1781 1784
1782 1785 if matcher is None:
1783 1786 matcher = matchmod.always()
1784 1787 if oldmatcher is None:
1785 1788 oldmatcher = matchmod.never()
1786 1789
1787 1790 if version == b'01' and not matcher.always():
1788 1791 raise error.ProgrammingError(
1789 1792 b'version 01 changegroups do not support sparse file matchers'
1790 1793 )
1791 1794
1792 1795 if ellipses and version in (b'01', b'02'):
1793 1796 raise error.Abort(
1794 1797 _(
1795 1798 b'ellipsis nodes require at least cg3 on client and server, '
1796 1799 b'but negotiated version %s'
1797 1800 )
1798 1801 % version
1799 1802 )
1800 1803
1801 1804 # Requested files could include files not in the local store. So
1802 1805 # filter those out.
1803 1806 matcher = repo.narrowmatch(matcher)
1804 1807
1805 1808 fn = _packermap[version][0]
1806 1809 return fn(
1807 1810 repo,
1808 1811 oldmatcher,
1809 1812 matcher,
1810 1813 bundlecaps,
1811 1814 ellipses=ellipses,
1812 1815 shallow=shallow,
1813 1816 ellipsisroots=ellipsisroots,
1814 1817 fullnodes=fullnodes,
1815 1818 remote_sidedata=remote_sidedata,
1816 1819 )
1817 1820
1818 1821
1819 1822 def getunbundler(version, fh, alg, extras=None):
1820 1823 return _packermap[version][1](fh, alg, extras=extras)
1821 1824
1822 1825
1823 1826 def _changegroupinfo(repo, nodes, source):
1824 1827 if repo.ui.verbose or source == b'bundle':
1825 1828 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1826 1829 if repo.ui.debugflag:
1827 1830 repo.ui.debug(b"list of changesets:\n")
1828 1831 for node in nodes:
1829 1832 repo.ui.debug(b"%s\n" % hex(node))
1830 1833
1831 1834
1832 1835 def makechangegroup(
1833 1836 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1834 1837 ):
1835 1838 cgstream = makestream(
1836 1839 repo,
1837 1840 outgoing,
1838 1841 version,
1839 1842 source,
1840 1843 fastpath=fastpath,
1841 1844 bundlecaps=bundlecaps,
1842 1845 )
1843 1846 return getunbundler(
1844 1847 version,
1845 1848 util.chunkbuffer(cgstream),
1846 1849 None,
1847 1850 {b'clcount': len(outgoing.missing)},
1848 1851 )
1849 1852
1850 1853
1851 1854 def makestream(
1852 1855 repo,
1853 1856 outgoing,
1854 1857 version,
1855 1858 source,
1856 1859 fastpath=False,
1857 1860 bundlecaps=None,
1858 1861 matcher=None,
1859 1862 remote_sidedata=None,
1860 1863 ):
1861 1864 bundler = getbundler(
1862 1865 version,
1863 1866 repo,
1864 1867 bundlecaps=bundlecaps,
1865 1868 matcher=matcher,
1866 1869 remote_sidedata=remote_sidedata,
1867 1870 )
1868 1871
1869 1872 repo = repo.unfiltered()
1870 1873 commonrevs = outgoing.common
1871 1874 csets = outgoing.missing
1872 1875 heads = outgoing.ancestorsof
1873 1876 # We go through the fast path if we get told to, or if all (unfiltered
1874 1877 # heads have been requested (since we then know there all linkrevs will
1875 1878 # be pulled by the client).
1876 1879 heads.sort()
1877 1880 fastpathlinkrev = fastpath or (
1878 1881 repo.filtername is None and heads == sorted(repo.heads())
1879 1882 )
1880 1883
1881 1884 repo.hook(b'preoutgoing', throw=True, source=source)
1882 1885 _changegroupinfo(repo, csets, source)
1883 1886 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1884 1887
1885 1888
1886 1889 def _addchangegroupfiles(
1887 1890 repo,
1888 1891 source,
1889 1892 revmap,
1890 1893 trp,
1891 1894 expectedfiles,
1892 1895 needfiles,
1893 1896 addrevisioncb=None,
1894 1897 ):
1895 1898 revisions = 0
1896 1899 files = 0
1897 1900 progress = repo.ui.makeprogress(
1898 1901 _(b'files'), unit=_(b'files'), total=expectedfiles
1899 1902 )
1900 1903 for chunkdata in iter(source.filelogheader, {}):
1901 1904 files += 1
1902 1905 f = chunkdata[b"filename"]
1903 1906 repo.ui.debug(b"adding %s revisions\n" % f)
1904 1907 progress.increment()
1905 1908 fl = repo.file(f)
1906 1909 o = len(fl)
1907 1910 try:
1908 1911 deltas = source.deltaiter()
1909 1912 added = fl.addgroup(
1910 1913 deltas,
1911 1914 revmap,
1912 1915 trp,
1913 1916 addrevisioncb=addrevisioncb,
1914 1917 )
1915 1918 if not added:
1916 1919 raise error.Abort(_(b"received file revlog group is empty"))
1917 1920 except error.CensoredBaseError as e:
1918 1921 raise error.Abort(_(b"received delta base is censored: %s") % e)
1919 1922 revisions += len(fl) - o
1920 1923 if f in needfiles:
1921 1924 needs = needfiles[f]
1922 1925 for new in pycompat.xrange(o, len(fl)):
1923 1926 n = fl.node(new)
1924 1927 if n in needs:
1925 1928 needs.remove(n)
1926 1929 else:
1927 1930 raise error.Abort(_(b"received spurious file revlog entry"))
1928 1931 if not needs:
1929 1932 del needfiles[f]
1930 1933 progress.complete()
1931 1934
1932 1935 for f, needs in pycompat.iteritems(needfiles):
1933 1936 fl = repo.file(f)
1934 1937 for n in needs:
1935 1938 try:
1936 1939 fl.rev(n)
1937 1940 except error.LookupError:
1938 1941 raise error.Abort(
1939 1942 _(b'missing file data for %s:%s - run hg verify')
1940 1943 % (f, hex(n))
1941 1944 )
1942 1945
1943 1946 return revisions, files
@@ -1,3129 +1,3130 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 indexfile=None,
293 293 datafile=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile or (indexfile[:-2] + b".d")
317 317 self.nodemap_file = None
318 318 if persistentnodemap:
319 319 self.nodemap_file = nodemaputil.get_nodemap_file(
320 320 opener, self.indexfile
321 321 )
322 322
323 323 self.opener = opener
324 324 assert target[0] in ALL_KINDS
325 325 assert len(target) == 2
326 326 self.target = target
327 327 # When True, indexfile is opened with checkambig=True at writing, to
328 328 # avoid file stat ambiguity.
329 329 self._checkambig = checkambig
330 330 self._mmaplargeindex = mmaplargeindex
331 331 self._censorable = censorable
332 332 # 3-tuple of (node, rev, text) for a raw revision.
333 333 self._revisioncache = None
334 334 # Maps rev to chain base rev.
335 335 self._chainbasecache = util.lrucachedict(100)
336 336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 337 self._chunkcache = (0, b'')
338 338 # How much data to read and cache into the raw revlog data cache.
339 339 self._chunkcachesize = 65536
340 340 self._maxchainlen = None
341 341 self._deltabothparents = True
342 342 self.index = None
343 343 self._nodemap_docket = None
344 344 # Mapping of partial identifiers to full nodes.
345 345 self._pcache = {}
346 346 # Mapping of revision integer to full node.
347 347 self._compengine = b'zlib'
348 348 self._compengineopts = {}
349 349 self._maxdeltachainspan = -1
350 350 self._withsparseread = False
351 351 self._sparserevlog = False
352 352 self._srdensitythreshold = 0.50
353 353 self._srmingapsize = 262144
354 354
355 355 # Make copy of flag processors so each revlog instance can support
356 356 # custom flags.
357 357 self._flagprocessors = dict(flagutil.flagprocessors)
358 358
359 359 # 2-tuple of file handles being used for active writing.
360 360 self._writinghandles = None
361 361
362 362 self._loadindex()
363 363
364 364 self._concurrencychecker = concurrencychecker
365 365
366 366 def _loadindex(self):
367 367 mmapindexthreshold = None
368 368 opts = self.opener.options
369 369
370 370 if b'revlogv2' in opts:
371 371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
372 372 elif b'revlogv1' in opts:
373 373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
374 374 if b'generaldelta' in opts:
375 375 newversionflags |= FLAG_GENERALDELTA
376 376 elif b'revlogv0' in self.opener.options:
377 377 newversionflags = REVLOGV0
378 378 else:
379 379 newversionflags = REVLOG_DEFAULT_VERSION
380 380
381 381 if b'chunkcachesize' in opts:
382 382 self._chunkcachesize = opts[b'chunkcachesize']
383 383 if b'maxchainlen' in opts:
384 384 self._maxchainlen = opts[b'maxchainlen']
385 385 if b'deltabothparents' in opts:
386 386 self._deltabothparents = opts[b'deltabothparents']
387 387 self._lazydelta = bool(opts.get(b'lazydelta', True))
388 388 self._lazydeltabase = False
389 389 if self._lazydelta:
390 390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
391 391 if b'compengine' in opts:
392 392 self._compengine = opts[b'compengine']
393 393 if b'zlib.level' in opts:
394 394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
395 395 if b'zstd.level' in opts:
396 396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
397 397 if b'maxdeltachainspan' in opts:
398 398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
399 399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
400 400 mmapindexthreshold = opts[b'mmapindexthreshold']
401 401 self.hassidedata = bool(opts.get(b'side-data', False))
402 402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
403 403 withsparseread = bool(opts.get(b'with-sparse-read', False))
404 404 # sparse-revlog forces sparse-read
405 405 self._withsparseread = self._sparserevlog or withsparseread
406 406 if b'sparse-read-density-threshold' in opts:
407 407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
408 408 if b'sparse-read-min-gap-size' in opts:
409 409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
410 410 if opts.get(b'enableellipsis'):
411 411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412 412
413 413 # revlog v0 doesn't have flag processors
414 414 for flag, processor in pycompat.iteritems(
415 415 opts.get(b'flagprocessors', {})
416 416 ):
417 417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
418 418
419 419 if self._chunkcachesize <= 0:
420 420 raise error.RevlogError(
421 421 _(b'revlog chunk cache size %r is not greater than 0')
422 422 % self._chunkcachesize
423 423 )
424 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 425 raise error.RevlogError(
426 426 _(b'revlog chunk cache size %r is not a power of 2')
427 427 % self._chunkcachesize
428 428 )
429 429
430 430 indexdata = b''
431 431 self._initempty = True
432 432 try:
433 433 with self._indexfp() as f:
434 434 if (
435 435 mmapindexthreshold is not None
436 436 and self.opener.fstat(f).st_size >= mmapindexthreshold
437 437 ):
438 438 # TODO: should .close() to release resources without
439 439 # relying on Python GC
440 440 indexdata = util.buffer(util.mmapread(f))
441 441 else:
442 442 indexdata = f.read()
443 443 if len(indexdata) > 0:
444 444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
445 445 self._initempty = False
446 446 else:
447 447 versionflags = newversionflags
448 448 except IOError as inst:
449 449 if inst.errno != errno.ENOENT:
450 450 raise
451 451
452 452 versionflags = newversionflags
453 453
454 454 self.version = versionflags
455 455
456 456 flags = versionflags & ~0xFFFF
457 457 fmt = versionflags & 0xFFFF
458 458
459 459 if fmt == REVLOGV0:
460 460 if flags:
461 461 raise error.RevlogError(
462 462 _(b'unknown flags (%#04x) in version %d revlog %s')
463 463 % (flags >> 16, fmt, self.indexfile)
464 464 )
465 465
466 466 self._inline = False
467 467 self._generaldelta = False
468 468
469 469 elif fmt == REVLOGV1:
470 470 if flags & ~REVLOGV1_FLAGS:
471 471 raise error.RevlogError(
472 472 _(b'unknown flags (%#04x) in version %d revlog %s')
473 473 % (flags >> 16, fmt, self.indexfile)
474 474 )
475 475
476 476 self._inline = versionflags & FLAG_INLINE_DATA
477 477 self._generaldelta = versionflags & FLAG_GENERALDELTA
478 478
479 479 elif fmt == REVLOGV2:
480 480 if flags & ~REVLOGV2_FLAGS:
481 481 raise error.RevlogError(
482 482 _(b'unknown flags (%#04x) in version %d revlog %s')
483 483 % (flags >> 16, fmt, self.indexfile)
484 484 )
485 485
486 486 # There is a bug in the transaction handling when going from an
487 487 # inline revlog to a separate index and data file. Turn it off until
488 488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
489 489 # See issue6485
490 490 self._inline = False
491 491 # generaldelta implied by version 2 revlogs.
492 492 self._generaldelta = True
493 493
494 494 else:
495 495 raise error.RevlogError(
496 496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
497 497 )
498 498
499 499 self.nodeconstants = sha1nodeconstants
500 500 self.nullid = self.nodeconstants.nullid
501 501
502 502 # sparse-revlog can't be on without general-delta (issue6056)
503 503 if not self._generaldelta:
504 504 self._sparserevlog = False
505 505
506 506 self._storedeltachains = True
507 507
508 508 devel_nodemap = (
509 509 self.nodemap_file
510 510 and opts.get(b'devel-force-nodemap', False)
511 511 and parse_index_v1_nodemap is not None
512 512 )
513 513
514 514 use_rust_index = False
515 515 if rustrevlog is not None:
516 516 if self.nodemap_file is not None:
517 517 use_rust_index = True
518 518 else:
519 519 use_rust_index = self.opener.options.get(b'rust.index')
520 520
521 521 self._parse_index = parse_index_v1
522 522 if self.version == REVLOGV0:
523 523 self._parse_index = revlogv0.parse_index_v0
524 524 elif fmt == REVLOGV2:
525 525 self._parse_index = parse_index_v2
526 526 elif devel_nodemap:
527 527 self._parse_index = parse_index_v1_nodemap
528 528 elif use_rust_index:
529 529 self._parse_index = parse_index_v1_mixed
530 530 try:
531 531 d = self._parse_index(indexdata, self._inline)
532 532 index, _chunkcache = d
533 533 use_nodemap = (
534 534 not self._inline
535 535 and self.nodemap_file is not None
536 536 and util.safehasattr(index, 'update_nodemap_data')
537 537 )
538 538 if use_nodemap:
539 539 nodemap_data = nodemaputil.persisted_data(self)
540 540 if nodemap_data is not None:
541 541 docket = nodemap_data[0]
542 542 if (
543 543 len(d[0]) > docket.tip_rev
544 544 and d[0][docket.tip_rev][7] == docket.tip_node
545 545 ):
546 546 # no changelog tampering
547 547 self._nodemap_docket = docket
548 548 index.update_nodemap_data(*nodemap_data)
549 549 except (ValueError, IndexError):
550 550 raise error.RevlogError(
551 551 _(b"index %s is corrupted") % self.indexfile
552 552 )
553 553 self.index, self._chunkcache = d
554 554 if not self._chunkcache:
555 555 self._chunkclear()
556 556 # revnum -> (chain-length, sum-delta-length)
557 557 self._chaininfocache = util.lrucachedict(500)
558 558 # revlog header -> revlog compressor
559 559 self._decompressors = {}
560 560
561 561 @util.propertycache
562 562 def revlog_kind(self):
563 563 return self.target[0]
564 564
565 565 @util.propertycache
566 566 def _compressor(self):
567 567 engine = util.compengines[self._compengine]
568 568 return engine.revlogcompressor(self._compengineopts)
569 569
570 570 def _indexfp(self, mode=b'r'):
571 571 """file object for the revlog's index file"""
572 572 args = {'mode': mode}
573 573 if mode != b'r':
574 574 args['checkambig'] = self._checkambig
575 575 if mode == b'w':
576 576 args['atomictemp'] = True
577 577 return self.opener(self.indexfile, **args)
578 578
579 579 def _datafp(self, mode=b'r'):
580 580 """file object for the revlog's data file"""
581 581 return self.opener(self.datafile, mode=mode)
582 582
583 583 @contextlib.contextmanager
584 584 def _datareadfp(self, existingfp=None):
585 585 """file object suitable to read data"""
586 586 # Use explicit file handle, if given.
587 587 if existingfp is not None:
588 588 yield existingfp
589 589
590 590 # Use a file handle being actively used for writes, if available.
591 591 # There is some danger to doing this because reads will seek the
592 592 # file. However, _writeentry() performs a SEEK_END before all writes,
593 593 # so we should be safe.
594 594 elif self._writinghandles:
595 595 if self._inline:
596 596 yield self._writinghandles[0]
597 597 else:
598 598 yield self._writinghandles[1]
599 599
600 600 # Otherwise open a new file handle.
601 601 else:
602 602 if self._inline:
603 603 func = self._indexfp
604 604 else:
605 605 func = self._datafp
606 606 with func() as fp:
607 607 yield fp
608 608
609 609 def tiprev(self):
610 610 return len(self.index) - 1
611 611
612 612 def tip(self):
613 613 return self.node(self.tiprev())
614 614
615 615 def __contains__(self, rev):
616 616 return 0 <= rev < len(self)
617 617
618 618 def __len__(self):
619 619 return len(self.index)
620 620
621 621 def __iter__(self):
622 622 return iter(pycompat.xrange(len(self)))
623 623
624 624 def revs(self, start=0, stop=None):
625 625 """iterate over all rev in this revlog (from start to stop)"""
626 626 return storageutil.iterrevs(len(self), start=start, stop=stop)
627 627
628 628 @property
629 629 def nodemap(self):
630 630 msg = (
631 631 b"revlog.nodemap is deprecated, "
632 632 b"use revlog.index.[has_node|rev|get_rev]"
633 633 )
634 634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
635 635 return self.index.nodemap
636 636
637 637 @property
638 638 def _nodecache(self):
639 639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
640 640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
641 641 return self.index.nodemap
642 642
643 643 def hasnode(self, node):
644 644 try:
645 645 self.rev(node)
646 646 return True
647 647 except KeyError:
648 648 return False
649 649
650 650 def candelta(self, baserev, rev):
651 651 """whether two revisions (baserev, rev) can be delta-ed or not"""
652 652 # Disable delta if either rev requires a content-changing flag
653 653 # processor (ex. LFS). This is because such flag processor can alter
654 654 # the rawtext content that the delta will be based on, and two clients
655 655 # could have a same revlog node with different flags (i.e. different
656 656 # rawtext contents) and the delta could be incompatible.
657 657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
658 658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
659 659 ):
660 660 return False
661 661 return True
662 662
663 663 def update_caches(self, transaction):
664 664 if self.nodemap_file is not None:
665 665 if transaction is None:
666 666 nodemaputil.update_persistent_nodemap(self)
667 667 else:
668 668 nodemaputil.setup_persistent_nodemap(transaction, self)
669 669
670 670 def clearcaches(self):
671 671 self._revisioncache = None
672 672 self._chainbasecache.clear()
673 673 self._chunkcache = (0, b'')
674 674 self._pcache = {}
675 675 self._nodemap_docket = None
676 676 self.index.clearcaches()
677 677 # The python code is the one responsible for validating the docket, we
678 678 # end up having to refresh it here.
679 679 use_nodemap = (
680 680 not self._inline
681 681 and self.nodemap_file is not None
682 682 and util.safehasattr(self.index, 'update_nodemap_data')
683 683 )
684 684 if use_nodemap:
685 685 nodemap_data = nodemaputil.persisted_data(self)
686 686 if nodemap_data is not None:
687 687 self._nodemap_docket = nodemap_data[0]
688 688 self.index.update_nodemap_data(*nodemap_data)
689 689
690 690 def rev(self, node):
691 691 try:
692 692 return self.index.rev(node)
693 693 except TypeError:
694 694 raise
695 695 except error.RevlogError:
696 696 # parsers.c radix tree lookup failed
697 697 if (
698 698 node == self.nodeconstants.wdirid
699 699 or node in self.nodeconstants.wdirfilenodeids
700 700 ):
701 701 raise error.WdirUnsupported
702 702 raise error.LookupError(node, self.indexfile, _(b'no node'))
703 703
704 704 # Accessors for index entries.
705 705
706 706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
707 707 # are flags.
708 708 def start(self, rev):
709 709 return int(self.index[rev][0] >> 16)
710 710
711 711 def flags(self, rev):
712 712 return self.index[rev][0] & 0xFFFF
713 713
714 714 def length(self, rev):
715 715 return self.index[rev][1]
716 716
717 717 def sidedata_length(self, rev):
718 718 if self.version & 0xFFFF != REVLOGV2:
719 719 return 0
720 720 return self.index[rev][9]
721 721
722 722 def rawsize(self, rev):
723 723 """return the length of the uncompressed text for a given revision"""
724 724 l = self.index[rev][2]
725 725 if l >= 0:
726 726 return l
727 727
728 728 t = self.rawdata(rev)
729 729 return len(t)
730 730
731 731 def size(self, rev):
732 732 """length of non-raw text (processed by a "read" flag processor)"""
733 733 # fast path: if no "read" flag processor could change the content,
734 734 # size is rawsize. note: ELLIPSIS is known to not change the content.
735 735 flags = self.flags(rev)
736 736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
737 737 return self.rawsize(rev)
738 738
739 739 return len(self.revision(rev, raw=False))
740 740
741 741 def chainbase(self, rev):
742 742 base = self._chainbasecache.get(rev)
743 743 if base is not None:
744 744 return base
745 745
746 746 index = self.index
747 747 iterrev = rev
748 748 base = index[iterrev][3]
749 749 while base != iterrev:
750 750 iterrev = base
751 751 base = index[iterrev][3]
752 752
753 753 self._chainbasecache[rev] = base
754 754 return base
755 755
756 756 def linkrev(self, rev):
757 757 return self.index[rev][4]
758 758
759 759 def parentrevs(self, rev):
760 760 try:
761 761 entry = self.index[rev]
762 762 except IndexError:
763 763 if rev == wdirrev:
764 764 raise error.WdirUnsupported
765 765 raise
766 766 if entry[5] == nullrev:
767 767 return entry[6], entry[5]
768 768 else:
769 769 return entry[5], entry[6]
770 770
771 771 # fast parentrevs(rev) where rev isn't filtered
772 772 _uncheckedparentrevs = parentrevs
773 773
774 774 def node(self, rev):
775 775 try:
776 776 return self.index[rev][7]
777 777 except IndexError:
778 778 if rev == wdirrev:
779 779 raise error.WdirUnsupported
780 780 raise
781 781
782 782 # Derived from index values.
783 783
784 784 def end(self, rev):
785 785 return self.start(rev) + self.length(rev)
786 786
787 787 def parents(self, node):
788 788 i = self.index
789 789 d = i[self.rev(node)]
790 790 # inline node() to avoid function call overhead
791 791 if d[5] == self.nullid:
792 792 return i[d[6]][7], i[d[5]][7]
793 793 else:
794 794 return i[d[5]][7], i[d[6]][7]
795 795
796 796 def chainlen(self, rev):
797 797 return self._chaininfo(rev)[0]
798 798
799 799 def _chaininfo(self, rev):
800 800 chaininfocache = self._chaininfocache
801 801 if rev in chaininfocache:
802 802 return chaininfocache[rev]
803 803 index = self.index
804 804 generaldelta = self._generaldelta
805 805 iterrev = rev
806 806 e = index[iterrev]
807 807 clen = 0
808 808 compresseddeltalen = 0
809 809 while iterrev != e[3]:
810 810 clen += 1
811 811 compresseddeltalen += e[1]
812 812 if generaldelta:
813 813 iterrev = e[3]
814 814 else:
815 815 iterrev -= 1
816 816 if iterrev in chaininfocache:
817 817 t = chaininfocache[iterrev]
818 818 clen += t[0]
819 819 compresseddeltalen += t[1]
820 820 break
821 821 e = index[iterrev]
822 822 else:
823 823 # Add text length of base since decompressing that also takes
824 824 # work. For cache hits the length is already included.
825 825 compresseddeltalen += e[1]
826 826 r = (clen, compresseddeltalen)
827 827 chaininfocache[rev] = r
828 828 return r
829 829
830 830 def _deltachain(self, rev, stoprev=None):
831 831 """Obtain the delta chain for a revision.
832 832
833 833 ``stoprev`` specifies a revision to stop at. If not specified, we
834 834 stop at the base of the chain.
835 835
836 836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
837 837 revs in ascending order and ``stopped`` is a bool indicating whether
838 838 ``stoprev`` was hit.
839 839 """
840 840 # Try C implementation.
841 841 try:
842 842 return self.index.deltachain(rev, stoprev, self._generaldelta)
843 843 except AttributeError:
844 844 pass
845 845
846 846 chain = []
847 847
848 848 # Alias to prevent attribute lookup in tight loop.
849 849 index = self.index
850 850 generaldelta = self._generaldelta
851 851
852 852 iterrev = rev
853 853 e = index[iterrev]
854 854 while iterrev != e[3] and iterrev != stoprev:
855 855 chain.append(iterrev)
856 856 if generaldelta:
857 857 iterrev = e[3]
858 858 else:
859 859 iterrev -= 1
860 860 e = index[iterrev]
861 861
862 862 if iterrev == stoprev:
863 863 stopped = True
864 864 else:
865 865 chain.append(iterrev)
866 866 stopped = False
867 867
868 868 chain.reverse()
869 869 return chain, stopped
870 870
871 871 def ancestors(self, revs, stoprev=0, inclusive=False):
872 872 """Generate the ancestors of 'revs' in reverse revision order.
873 873 Does not generate revs lower than stoprev.
874 874
875 875 See the documentation for ancestor.lazyancestors for more details."""
876 876
877 877 # first, make sure start revisions aren't filtered
878 878 revs = list(revs)
879 879 checkrev = self.node
880 880 for r in revs:
881 881 checkrev(r)
882 882 # and we're sure ancestors aren't filtered as well
883 883
884 884 if rustancestor is not None:
885 885 lazyancestors = rustancestor.LazyAncestors
886 886 arg = self.index
887 887 else:
888 888 lazyancestors = ancestor.lazyancestors
889 889 arg = self._uncheckedparentrevs
890 890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
891 891
892 892 def descendants(self, revs):
893 893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
894 894
895 895 def findcommonmissing(self, common=None, heads=None):
896 896 """Return a tuple of the ancestors of common and the ancestors of heads
897 897 that are not ancestors of common. In revset terminology, we return the
898 898 tuple:
899 899
900 900 ::common, (::heads) - (::common)
901 901
902 902 The list is sorted by revision number, meaning it is
903 903 topologically sorted.
904 904
905 905 'heads' and 'common' are both lists of node IDs. If heads is
906 906 not supplied, uses all of the revlog's heads. If common is not
907 907 supplied, uses nullid."""
908 908 if common is None:
909 909 common = [self.nullid]
910 910 if heads is None:
911 911 heads = self.heads()
912 912
913 913 common = [self.rev(n) for n in common]
914 914 heads = [self.rev(n) for n in heads]
915 915
916 916 # we want the ancestors, but inclusive
917 917 class lazyset(object):
918 918 def __init__(self, lazyvalues):
919 919 self.addedvalues = set()
920 920 self.lazyvalues = lazyvalues
921 921
922 922 def __contains__(self, value):
923 923 return value in self.addedvalues or value in self.lazyvalues
924 924
925 925 def __iter__(self):
926 926 added = self.addedvalues
927 927 for r in added:
928 928 yield r
929 929 for r in self.lazyvalues:
930 930 if not r in added:
931 931 yield r
932 932
933 933 def add(self, value):
934 934 self.addedvalues.add(value)
935 935
936 936 def update(self, values):
937 937 self.addedvalues.update(values)
938 938
939 939 has = lazyset(self.ancestors(common))
940 940 has.add(nullrev)
941 941 has.update(common)
942 942
943 943 # take all ancestors from heads that aren't in has
944 944 missing = set()
945 945 visit = collections.deque(r for r in heads if r not in has)
946 946 while visit:
947 947 r = visit.popleft()
948 948 if r in missing:
949 949 continue
950 950 else:
951 951 missing.add(r)
952 952 for p in self.parentrevs(r):
953 953 if p not in has:
954 954 visit.append(p)
955 955 missing = list(missing)
956 956 missing.sort()
957 957 return has, [self.node(miss) for miss in missing]
958 958
959 959 def incrementalmissingrevs(self, common=None):
960 960 """Return an object that can be used to incrementally compute the
961 961 revision numbers of the ancestors of arbitrary sets that are not
962 962 ancestors of common. This is an ancestor.incrementalmissingancestors
963 963 object.
964 964
965 965 'common' is a list of revision numbers. If common is not supplied, uses
966 966 nullrev.
967 967 """
968 968 if common is None:
969 969 common = [nullrev]
970 970
971 971 if rustancestor is not None:
972 972 return rustancestor.MissingAncestors(self.index, common)
973 973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
974 974
975 975 def findmissingrevs(self, common=None, heads=None):
976 976 """Return the revision numbers of the ancestors of heads that
977 977 are not ancestors of common.
978 978
979 979 More specifically, return a list of revision numbers corresponding to
980 980 nodes N such that every N satisfies the following constraints:
981 981
982 982 1. N is an ancestor of some node in 'heads'
983 983 2. N is not an ancestor of any node in 'common'
984 984
985 985 The list is sorted by revision number, meaning it is
986 986 topologically sorted.
987 987
988 988 'heads' and 'common' are both lists of revision numbers. If heads is
989 989 not supplied, uses all of the revlog's heads. If common is not
990 990 supplied, uses nullid."""
991 991 if common is None:
992 992 common = [nullrev]
993 993 if heads is None:
994 994 heads = self.headrevs()
995 995
996 996 inc = self.incrementalmissingrevs(common=common)
997 997 return inc.missingancestors(heads)
998 998
999 999 def findmissing(self, common=None, heads=None):
1000 1000 """Return the ancestors of heads that are not ancestors of common.
1001 1001
1002 1002 More specifically, return a list of nodes N such that every N
1003 1003 satisfies the following constraints:
1004 1004
1005 1005 1. N is an ancestor of some node in 'heads'
1006 1006 2. N is not an ancestor of any node in 'common'
1007 1007
1008 1008 The list is sorted by revision number, meaning it is
1009 1009 topologically sorted.
1010 1010
1011 1011 'heads' and 'common' are both lists of node IDs. If heads is
1012 1012 not supplied, uses all of the revlog's heads. If common is not
1013 1013 supplied, uses nullid."""
1014 1014 if common is None:
1015 1015 common = [self.nullid]
1016 1016 if heads is None:
1017 1017 heads = self.heads()
1018 1018
1019 1019 common = [self.rev(n) for n in common]
1020 1020 heads = [self.rev(n) for n in heads]
1021 1021
1022 1022 inc = self.incrementalmissingrevs(common=common)
1023 1023 return [self.node(r) for r in inc.missingancestors(heads)]
1024 1024
1025 1025 def nodesbetween(self, roots=None, heads=None):
1026 1026 """Return a topological path from 'roots' to 'heads'.
1027 1027
1028 1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1029 1029 topologically sorted list of all nodes N that satisfy both of
1030 1030 these constraints:
1031 1031
1032 1032 1. N is a descendant of some node in 'roots'
1033 1033 2. N is an ancestor of some node in 'heads'
1034 1034
1035 1035 Every node is considered to be both a descendant and an ancestor
1036 1036 of itself, so every reachable node in 'roots' and 'heads' will be
1037 1037 included in 'nodes'.
1038 1038
1039 1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1040 1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1041 1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1042 1042
1043 1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1044 1044 unspecified, uses nullid as the only root. If 'heads' is
1045 1045 unspecified, uses list of all of the revlog's heads."""
1046 1046 nonodes = ([], [], [])
1047 1047 if roots is not None:
1048 1048 roots = list(roots)
1049 1049 if not roots:
1050 1050 return nonodes
1051 1051 lowestrev = min([self.rev(n) for n in roots])
1052 1052 else:
1053 1053 roots = [self.nullid] # Everybody's a descendant of nullid
1054 1054 lowestrev = nullrev
1055 1055 if (lowestrev == nullrev) and (heads is None):
1056 1056 # We want _all_ the nodes!
1057 1057 return (
1058 1058 [self.node(r) for r in self],
1059 1059 [self.nullid],
1060 1060 list(self.heads()),
1061 1061 )
1062 1062 if heads is None:
1063 1063 # All nodes are ancestors, so the latest ancestor is the last
1064 1064 # node.
1065 1065 highestrev = len(self) - 1
1066 1066 # Set ancestors to None to signal that every node is an ancestor.
1067 1067 ancestors = None
1068 1068 # Set heads to an empty dictionary for later discovery of heads
1069 1069 heads = {}
1070 1070 else:
1071 1071 heads = list(heads)
1072 1072 if not heads:
1073 1073 return nonodes
1074 1074 ancestors = set()
1075 1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1076 1076 # Also, later we will be using it to filter out the heads we can't
1077 1077 # find from roots.
1078 1078 heads = dict.fromkeys(heads, False)
1079 1079 # Start at the top and keep marking parents until we're done.
1080 1080 nodestotag = set(heads)
1081 1081 # Remember where the top was so we can use it as a limit later.
1082 1082 highestrev = max([self.rev(n) for n in nodestotag])
1083 1083 while nodestotag:
1084 1084 # grab a node to tag
1085 1085 n = nodestotag.pop()
1086 1086 # Never tag nullid
1087 1087 if n == self.nullid:
1088 1088 continue
1089 1089 # A node's revision number represents its place in a
1090 1090 # topologically sorted list of nodes.
1091 1091 r = self.rev(n)
1092 1092 if r >= lowestrev:
1093 1093 if n not in ancestors:
1094 1094 # If we are possibly a descendant of one of the roots
1095 1095 # and we haven't already been marked as an ancestor
1096 1096 ancestors.add(n) # Mark as ancestor
1097 1097 # Add non-nullid parents to list of nodes to tag.
1098 1098 nodestotag.update(
1099 1099 [p for p in self.parents(n) if p != self.nullid]
1100 1100 )
1101 1101 elif n in heads: # We've seen it before, is it a fake head?
1102 1102 # So it is, real heads should not be the ancestors of
1103 1103 # any other heads.
1104 1104 heads.pop(n)
1105 1105 if not ancestors:
1106 1106 return nonodes
1107 1107 # Now that we have our set of ancestors, we want to remove any
1108 1108 # roots that are not ancestors.
1109 1109
1110 1110 # If one of the roots was nullid, everything is included anyway.
1111 1111 if lowestrev > nullrev:
1112 1112 # But, since we weren't, let's recompute the lowest rev to not
1113 1113 # include roots that aren't ancestors.
1114 1114
1115 1115 # Filter out roots that aren't ancestors of heads
1116 1116 roots = [root for root in roots if root in ancestors]
1117 1117 # Recompute the lowest revision
1118 1118 if roots:
1119 1119 lowestrev = min([self.rev(root) for root in roots])
1120 1120 else:
1121 1121 # No more roots? Return empty list
1122 1122 return nonodes
1123 1123 else:
1124 1124 # We are descending from nullid, and don't need to care about
1125 1125 # any other roots.
1126 1126 lowestrev = nullrev
1127 1127 roots = [self.nullid]
1128 1128 # Transform our roots list into a set.
1129 1129 descendants = set(roots)
1130 1130 # Also, keep the original roots so we can filter out roots that aren't
1131 1131 # 'real' roots (i.e. are descended from other roots).
1132 1132 roots = descendants.copy()
1133 1133 # Our topologically sorted list of output nodes.
1134 1134 orderedout = []
1135 1135 # Don't start at nullid since we don't want nullid in our output list,
1136 1136 # and if nullid shows up in descendants, empty parents will look like
1137 1137 # they're descendants.
1138 1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1139 1139 n = self.node(r)
1140 1140 isdescendant = False
1141 1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1142 1142 isdescendant = True
1143 1143 elif n in descendants:
1144 1144 # n is already a descendant
1145 1145 isdescendant = True
1146 1146 # This check only needs to be done here because all the roots
1147 1147 # will start being marked is descendants before the loop.
1148 1148 if n in roots:
1149 1149 # If n was a root, check if it's a 'real' root.
1150 1150 p = tuple(self.parents(n))
1151 1151 # If any of its parents are descendants, it's not a root.
1152 1152 if (p[0] in descendants) or (p[1] in descendants):
1153 1153 roots.remove(n)
1154 1154 else:
1155 1155 p = tuple(self.parents(n))
1156 1156 # A node is a descendant if either of its parents are
1157 1157 # descendants. (We seeded the dependents list with the roots
1158 1158 # up there, remember?)
1159 1159 if (p[0] in descendants) or (p[1] in descendants):
1160 1160 descendants.add(n)
1161 1161 isdescendant = True
1162 1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1163 1163 # Only include nodes that are both descendants and ancestors.
1164 1164 orderedout.append(n)
1165 1165 if (ancestors is not None) and (n in heads):
1166 1166 # We're trying to figure out which heads are reachable
1167 1167 # from roots.
1168 1168 # Mark this head as having been reached
1169 1169 heads[n] = True
1170 1170 elif ancestors is None:
1171 1171 # Otherwise, we're trying to discover the heads.
1172 1172 # Assume this is a head because if it isn't, the next step
1173 1173 # will eventually remove it.
1174 1174 heads[n] = True
1175 1175 # But, obviously its parents aren't.
1176 1176 for p in self.parents(n):
1177 1177 heads.pop(p, None)
1178 1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1179 1179 roots = list(roots)
1180 1180 assert orderedout
1181 1181 assert roots
1182 1182 assert heads
1183 1183 return (orderedout, roots, heads)
1184 1184
1185 1185 def headrevs(self, revs=None):
1186 1186 if revs is None:
1187 1187 try:
1188 1188 return self.index.headrevs()
1189 1189 except AttributeError:
1190 1190 return self._headrevs()
1191 1191 if rustdagop is not None:
1192 1192 return rustdagop.headrevs(self.index, revs)
1193 1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1194 1194
1195 1195 def computephases(self, roots):
1196 1196 return self.index.computephasesmapsets(roots)
1197 1197
1198 1198 def _headrevs(self):
1199 1199 count = len(self)
1200 1200 if not count:
1201 1201 return [nullrev]
1202 1202 # we won't iter over filtered rev so nobody is a head at start
1203 1203 ishead = [0] * (count + 1)
1204 1204 index = self.index
1205 1205 for r in self:
1206 1206 ishead[r] = 1 # I may be an head
1207 1207 e = index[r]
1208 1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1209 1209 return [r for r, val in enumerate(ishead) if val]
1210 1210
1211 1211 def heads(self, start=None, stop=None):
1212 1212 """return the list of all nodes that have no children
1213 1213
1214 1214 if start is specified, only heads that are descendants of
1215 1215 start will be returned
1216 1216 if stop is specified, it will consider all the revs from stop
1217 1217 as if they had no children
1218 1218 """
1219 1219 if start is None and stop is None:
1220 1220 if not len(self):
1221 1221 return [self.nullid]
1222 1222 return [self.node(r) for r in self.headrevs()]
1223 1223
1224 1224 if start is None:
1225 1225 start = nullrev
1226 1226 else:
1227 1227 start = self.rev(start)
1228 1228
1229 1229 stoprevs = {self.rev(n) for n in stop or []}
1230 1230
1231 1231 revs = dagop.headrevssubset(
1232 1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1233 1233 )
1234 1234
1235 1235 return [self.node(rev) for rev in revs]
1236 1236
1237 1237 def children(self, node):
1238 1238 """find the children of a given node"""
1239 1239 c = []
1240 1240 p = self.rev(node)
1241 1241 for r in self.revs(start=p + 1):
1242 1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1243 1243 if prevs:
1244 1244 for pr in prevs:
1245 1245 if pr == p:
1246 1246 c.append(self.node(r))
1247 1247 elif p == nullrev:
1248 1248 c.append(self.node(r))
1249 1249 return c
1250 1250
1251 1251 def commonancestorsheads(self, a, b):
1252 1252 """calculate all the heads of the common ancestors of nodes a and b"""
1253 1253 a, b = self.rev(a), self.rev(b)
1254 1254 ancs = self._commonancestorsheads(a, b)
1255 1255 return pycompat.maplist(self.node, ancs)
1256 1256
1257 1257 def _commonancestorsheads(self, *revs):
1258 1258 """calculate all the heads of the common ancestors of revs"""
1259 1259 try:
1260 1260 ancs = self.index.commonancestorsheads(*revs)
1261 1261 except (AttributeError, OverflowError): # C implementation failed
1262 1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1263 1263 return ancs
1264 1264
1265 1265 def isancestor(self, a, b):
1266 1266 """return True if node a is an ancestor of node b
1267 1267
1268 1268 A revision is considered an ancestor of itself."""
1269 1269 a, b = self.rev(a), self.rev(b)
1270 1270 return self.isancestorrev(a, b)
1271 1271
1272 1272 def isancestorrev(self, a, b):
1273 1273 """return True if revision a is an ancestor of revision b
1274 1274
1275 1275 A revision is considered an ancestor of itself.
1276 1276
1277 1277 The implementation of this is trivial but the use of
1278 1278 reachableroots is not."""
1279 1279 if a == nullrev:
1280 1280 return True
1281 1281 elif a == b:
1282 1282 return True
1283 1283 elif a > b:
1284 1284 return False
1285 1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1286 1286
1287 1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1288 1288 """return (heads(::(<roots> and <roots>::<heads>)))
1289 1289
1290 1290 If includepath is True, return (<roots>::<heads>)."""
1291 1291 try:
1292 1292 return self.index.reachableroots2(
1293 1293 minroot, heads, roots, includepath
1294 1294 )
1295 1295 except AttributeError:
1296 1296 return dagop._reachablerootspure(
1297 1297 self.parentrevs, minroot, roots, heads, includepath
1298 1298 )
1299 1299
1300 1300 def ancestor(self, a, b):
1301 1301 """calculate the "best" common ancestor of nodes a and b"""
1302 1302
1303 1303 a, b = self.rev(a), self.rev(b)
1304 1304 try:
1305 1305 ancs = self.index.ancestors(a, b)
1306 1306 except (AttributeError, OverflowError):
1307 1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1308 1308 if ancs:
1309 1309 # choose a consistent winner when there's a tie
1310 1310 return min(map(self.node, ancs))
1311 1311 return self.nullid
1312 1312
1313 1313 def _match(self, id):
1314 1314 if isinstance(id, int):
1315 1315 # rev
1316 1316 return self.node(id)
1317 1317 if len(id) == self.nodeconstants.nodelen:
1318 1318 # possibly a binary node
1319 1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1320 1320 try:
1321 1321 node = id
1322 1322 self.rev(node) # quick search the index
1323 1323 return node
1324 1324 except error.LookupError:
1325 1325 pass # may be partial hex id
1326 1326 try:
1327 1327 # str(rev)
1328 1328 rev = int(id)
1329 1329 if b"%d" % rev != id:
1330 1330 raise ValueError
1331 1331 if rev < 0:
1332 1332 rev = len(self) + rev
1333 1333 if rev < 0 or rev >= len(self):
1334 1334 raise ValueError
1335 1335 return self.node(rev)
1336 1336 except (ValueError, OverflowError):
1337 1337 pass
1338 1338 if len(id) == 2 * self.nodeconstants.nodelen:
1339 1339 try:
1340 1340 # a full hex nodeid?
1341 1341 node = bin(id)
1342 1342 self.rev(node)
1343 1343 return node
1344 1344 except (TypeError, error.LookupError):
1345 1345 pass
1346 1346
1347 1347 def _partialmatch(self, id):
1348 1348 # we don't care wdirfilenodeids as they should be always full hash
1349 1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1350 1350 try:
1351 1351 partial = self.index.partialmatch(id)
1352 1352 if partial and self.hasnode(partial):
1353 1353 if maybewdir:
1354 1354 # single 'ff...' match in radix tree, ambiguous with wdir
1355 1355 raise error.RevlogError
1356 1356 return partial
1357 1357 if maybewdir:
1358 1358 # no 'ff...' match in radix tree, wdir identified
1359 1359 raise error.WdirUnsupported
1360 1360 return None
1361 1361 except error.RevlogError:
1362 1362 # parsers.c radix tree lookup gave multiple matches
1363 1363 # fast path: for unfiltered changelog, radix tree is accurate
1364 1364 if not getattr(self, 'filteredrevs', None):
1365 1365 raise error.AmbiguousPrefixLookupError(
1366 1366 id, self.indexfile, _(b'ambiguous identifier')
1367 1367 )
1368 1368 # fall through to slow path that filters hidden revisions
1369 1369 except (AttributeError, ValueError):
1370 1370 # we are pure python, or key was too short to search radix tree
1371 1371 pass
1372 1372
1373 1373 if id in self._pcache:
1374 1374 return self._pcache[id]
1375 1375
1376 1376 if len(id) <= 40:
1377 1377 try:
1378 1378 # hex(node)[:...]
1379 1379 l = len(id) // 2 # grab an even number of digits
1380 1380 prefix = bin(id[: l * 2])
1381 1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1382 1382 nl = [
1383 1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1384 1384 ]
1385 1385 if self.nodeconstants.nullhex.startswith(id):
1386 1386 nl.append(self.nullid)
1387 1387 if len(nl) > 0:
1388 1388 if len(nl) == 1 and not maybewdir:
1389 1389 self._pcache[id] = nl[0]
1390 1390 return nl[0]
1391 1391 raise error.AmbiguousPrefixLookupError(
1392 1392 id, self.indexfile, _(b'ambiguous identifier')
1393 1393 )
1394 1394 if maybewdir:
1395 1395 raise error.WdirUnsupported
1396 1396 return None
1397 1397 except TypeError:
1398 1398 pass
1399 1399
1400 1400 def lookup(self, id):
1401 1401 """locate a node based on:
1402 1402 - revision number or str(revision number)
1403 1403 - nodeid or subset of hex nodeid
1404 1404 """
1405 1405 n = self._match(id)
1406 1406 if n is not None:
1407 1407 return n
1408 1408 n = self._partialmatch(id)
1409 1409 if n:
1410 1410 return n
1411 1411
1412 1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1413 1413
1414 1414 def shortest(self, node, minlength=1):
1415 1415 """Find the shortest unambiguous prefix that matches node."""
1416 1416
1417 1417 def isvalid(prefix):
1418 1418 try:
1419 1419 matchednode = self._partialmatch(prefix)
1420 1420 except error.AmbiguousPrefixLookupError:
1421 1421 return False
1422 1422 except error.WdirUnsupported:
1423 1423 # single 'ff...' match
1424 1424 return True
1425 1425 if matchednode is None:
1426 1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1427 1427 return True
1428 1428
1429 1429 def maybewdir(prefix):
1430 1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1431 1431
1432 1432 hexnode = hex(node)
1433 1433
1434 1434 def disambiguate(hexnode, minlength):
1435 1435 """Disambiguate against wdirid."""
1436 1436 for length in range(minlength, len(hexnode) + 1):
1437 1437 prefix = hexnode[:length]
1438 1438 if not maybewdir(prefix):
1439 1439 return prefix
1440 1440
1441 1441 if not getattr(self, 'filteredrevs', None):
1442 1442 try:
1443 1443 length = max(self.index.shortest(node), minlength)
1444 1444 return disambiguate(hexnode, length)
1445 1445 except error.RevlogError:
1446 1446 if node != self.nodeconstants.wdirid:
1447 1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1448 1448 except AttributeError:
1449 1449 # Fall through to pure code
1450 1450 pass
1451 1451
1452 1452 if node == self.nodeconstants.wdirid:
1453 1453 for length in range(minlength, len(hexnode) + 1):
1454 1454 prefix = hexnode[:length]
1455 1455 if isvalid(prefix):
1456 1456 return prefix
1457 1457
1458 1458 for length in range(minlength, len(hexnode) + 1):
1459 1459 prefix = hexnode[:length]
1460 1460 if isvalid(prefix):
1461 1461 return disambiguate(hexnode, length)
1462 1462
1463 1463 def cmp(self, node, text):
1464 1464 """compare text with a given file revision
1465 1465
1466 1466 returns True if text is different than what is stored.
1467 1467 """
1468 1468 p1, p2 = self.parents(node)
1469 1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1470 1470
1471 1471 def _cachesegment(self, offset, data):
1472 1472 """Add a segment to the revlog cache.
1473 1473
1474 1474 Accepts an absolute offset and the data that is at that location.
1475 1475 """
1476 1476 o, d = self._chunkcache
1477 1477 # try to add to existing cache
1478 1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1479 1479 self._chunkcache = o, d + data
1480 1480 else:
1481 1481 self._chunkcache = offset, data
1482 1482
1483 1483 def _readsegment(self, offset, length, df=None):
1484 1484 """Load a segment of raw data from the revlog.
1485 1485
1486 1486 Accepts an absolute offset, length to read, and an optional existing
1487 1487 file handle to read from.
1488 1488
1489 1489 If an existing file handle is passed, it will be seeked and the
1490 1490 original seek position will NOT be restored.
1491 1491
1492 1492 Returns a str or buffer of raw byte data.
1493 1493
1494 1494 Raises if the requested number of bytes could not be read.
1495 1495 """
1496 1496 # Cache data both forward and backward around the requested
1497 1497 # data, in a fixed size window. This helps speed up operations
1498 1498 # involving reading the revlog backwards.
1499 1499 cachesize = self._chunkcachesize
1500 1500 realoffset = offset & ~(cachesize - 1)
1501 1501 reallength = (
1502 1502 (offset + length + cachesize) & ~(cachesize - 1)
1503 1503 ) - realoffset
1504 1504 with self._datareadfp(df) as df:
1505 1505 df.seek(realoffset)
1506 1506 d = df.read(reallength)
1507 1507
1508 1508 self._cachesegment(realoffset, d)
1509 1509 if offset != realoffset or reallength != length:
1510 1510 startoffset = offset - realoffset
1511 1511 if len(d) - startoffset < length:
1512 1512 raise error.RevlogError(
1513 1513 _(
1514 1514 b'partial read of revlog %s; expected %d bytes from '
1515 1515 b'offset %d, got %d'
1516 1516 )
1517 1517 % (
1518 1518 self.indexfile if self._inline else self.datafile,
1519 1519 length,
1520 1520 realoffset,
1521 1521 len(d) - startoffset,
1522 1522 )
1523 1523 )
1524 1524
1525 1525 return util.buffer(d, startoffset, length)
1526 1526
1527 1527 if len(d) < length:
1528 1528 raise error.RevlogError(
1529 1529 _(
1530 1530 b'partial read of revlog %s; expected %d bytes from offset '
1531 1531 b'%d, got %d'
1532 1532 )
1533 1533 % (
1534 1534 self.indexfile if self._inline else self.datafile,
1535 1535 length,
1536 1536 offset,
1537 1537 len(d),
1538 1538 )
1539 1539 )
1540 1540
1541 1541 return d
1542 1542
1543 1543 def _getsegment(self, offset, length, df=None):
1544 1544 """Obtain a segment of raw data from the revlog.
1545 1545
1546 1546 Accepts an absolute offset, length of bytes to obtain, and an
1547 1547 optional file handle to the already-opened revlog. If the file
1548 1548 handle is used, it's original seek position will not be preserved.
1549 1549
1550 1550 Requests for data may be returned from a cache.
1551 1551
1552 1552 Returns a str or a buffer instance of raw byte data.
1553 1553 """
1554 1554 o, d = self._chunkcache
1555 1555 l = len(d)
1556 1556
1557 1557 # is it in the cache?
1558 1558 cachestart = offset - o
1559 1559 cacheend = cachestart + length
1560 1560 if cachestart >= 0 and cacheend <= l:
1561 1561 if cachestart == 0 and cacheend == l:
1562 1562 return d # avoid a copy
1563 1563 return util.buffer(d, cachestart, cacheend - cachestart)
1564 1564
1565 1565 return self._readsegment(offset, length, df=df)
1566 1566
1567 1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1568 1568 """Obtain a segment of raw data corresponding to a range of revisions.
1569 1569
1570 1570 Accepts the start and end revisions and an optional already-open
1571 1571 file handle to be used for reading. If the file handle is read, its
1572 1572 seek position will not be preserved.
1573 1573
1574 1574 Requests for data may be satisfied by a cache.
1575 1575
1576 1576 Returns a 2-tuple of (offset, data) for the requested range of
1577 1577 revisions. Offset is the integer offset from the beginning of the
1578 1578 revlog and data is a str or buffer of the raw byte data.
1579 1579
1580 1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1581 1581 to determine where each revision's data begins and ends.
1582 1582 """
1583 1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1584 1584 # (functions are expensive).
1585 1585 index = self.index
1586 1586 istart = index[startrev]
1587 1587 start = int(istart[0] >> 16)
1588 1588 if startrev == endrev:
1589 1589 end = start + istart[1]
1590 1590 else:
1591 1591 iend = index[endrev]
1592 1592 end = int(iend[0] >> 16) + iend[1]
1593 1593
1594 1594 if self._inline:
1595 1595 start += (startrev + 1) * self.index.entry_size
1596 1596 end += (endrev + 1) * self.index.entry_size
1597 1597 length = end - start
1598 1598
1599 1599 return start, self._getsegment(start, length, df=df)
1600 1600
1601 1601 def _chunk(self, rev, df=None):
1602 1602 """Obtain a single decompressed chunk for a revision.
1603 1603
1604 1604 Accepts an integer revision and an optional already-open file handle
1605 1605 to be used for reading. If used, the seek position of the file will not
1606 1606 be preserved.
1607 1607
1608 1608 Returns a str holding uncompressed data for the requested revision.
1609 1609 """
1610 1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1611 1611
1612 1612 def _chunks(self, revs, df=None, targetsize=None):
1613 1613 """Obtain decompressed chunks for the specified revisions.
1614 1614
1615 1615 Accepts an iterable of numeric revisions that are assumed to be in
1616 1616 ascending order. Also accepts an optional already-open file handle
1617 1617 to be used for reading. If used, the seek position of the file will
1618 1618 not be preserved.
1619 1619
1620 1620 This function is similar to calling ``self._chunk()`` multiple times,
1621 1621 but is faster.
1622 1622
1623 1623 Returns a list with decompressed data for each requested revision.
1624 1624 """
1625 1625 if not revs:
1626 1626 return []
1627 1627 start = self.start
1628 1628 length = self.length
1629 1629 inline = self._inline
1630 1630 iosize = self.index.entry_size
1631 1631 buffer = util.buffer
1632 1632
1633 1633 l = []
1634 1634 ladd = l.append
1635 1635
1636 1636 if not self._withsparseread:
1637 1637 slicedchunks = (revs,)
1638 1638 else:
1639 1639 slicedchunks = deltautil.slicechunk(
1640 1640 self, revs, targetsize=targetsize
1641 1641 )
1642 1642
1643 1643 for revschunk in slicedchunks:
1644 1644 firstrev = revschunk[0]
1645 1645 # Skip trailing revisions with empty diff
1646 1646 for lastrev in revschunk[::-1]:
1647 1647 if length(lastrev) != 0:
1648 1648 break
1649 1649
1650 1650 try:
1651 1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1652 1652 except OverflowError:
1653 1653 # issue4215 - we can't cache a run of chunks greater than
1654 1654 # 2G on Windows
1655 1655 return [self._chunk(rev, df=df) for rev in revschunk]
1656 1656
1657 1657 decomp = self.decompress
1658 1658 for rev in revschunk:
1659 1659 chunkstart = start(rev)
1660 1660 if inline:
1661 1661 chunkstart += (rev + 1) * iosize
1662 1662 chunklength = length(rev)
1663 1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1664 1664
1665 1665 return l
1666 1666
1667 1667 def _chunkclear(self):
1668 1668 """Clear the raw chunk cache."""
1669 1669 self._chunkcache = (0, b'')
1670 1670
1671 1671 def deltaparent(self, rev):
1672 1672 """return deltaparent of the given revision"""
1673 1673 base = self.index[rev][3]
1674 1674 if base == rev:
1675 1675 return nullrev
1676 1676 elif self._generaldelta:
1677 1677 return base
1678 1678 else:
1679 1679 return rev - 1
1680 1680
1681 1681 def issnapshot(self, rev):
1682 1682 """tells whether rev is a snapshot"""
1683 1683 if not self._sparserevlog:
1684 1684 return self.deltaparent(rev) == nullrev
1685 1685 elif util.safehasattr(self.index, b'issnapshot'):
1686 1686 # directly assign the method to cache the testing and access
1687 1687 self.issnapshot = self.index.issnapshot
1688 1688 return self.issnapshot(rev)
1689 1689 if rev == nullrev:
1690 1690 return True
1691 1691 entry = self.index[rev]
1692 1692 base = entry[3]
1693 1693 if base == rev:
1694 1694 return True
1695 1695 if base == nullrev:
1696 1696 return True
1697 1697 p1 = entry[5]
1698 1698 p2 = entry[6]
1699 1699 if base == p1 or base == p2:
1700 1700 return False
1701 1701 return self.issnapshot(base)
1702 1702
1703 1703 def snapshotdepth(self, rev):
1704 1704 """number of snapshot in the chain before this one"""
1705 1705 if not self.issnapshot(rev):
1706 1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1707 1707 return len(self._deltachain(rev)[0]) - 1
1708 1708
1709 1709 def revdiff(self, rev1, rev2):
1710 1710 """return or calculate a delta between two revisions
1711 1711
1712 1712 The delta calculated is in binary form and is intended to be written to
1713 1713 revlog data directly. So this function needs raw revision data.
1714 1714 """
1715 1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1716 1716 return bytes(self._chunk(rev2))
1717 1717
1718 1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1719 1719
1720 1720 def _processflags(self, text, flags, operation, raw=False):
1721 1721 """deprecated entry point to access flag processors"""
1722 1722 msg = b'_processflag(...) use the specialized variant'
1723 1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 1724 if raw:
1725 1725 return text, flagutil.processflagsraw(self, text, flags)
1726 1726 elif operation == b'read':
1727 1727 return flagutil.processflagsread(self, text, flags)
1728 1728 else: # write operation
1729 1729 return flagutil.processflagswrite(self, text, flags)
1730 1730
1731 1731 def revision(self, nodeorrev, _df=None, raw=False):
1732 1732 """return an uncompressed revision of a given node or revision
1733 1733 number.
1734 1734
1735 1735 _df - an existing file handle to read from. (internal-only)
1736 1736 raw - an optional argument specifying if the revision data is to be
1737 1737 treated as raw data when applying flag transforms. 'raw' should be set
1738 1738 to True when generating changegroups or in debug commands.
1739 1739 """
1740 1740 if raw:
1741 1741 msg = (
1742 1742 b'revlog.revision(..., raw=True) is deprecated, '
1743 1743 b'use revlog.rawdata(...)'
1744 1744 )
1745 1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1746 1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1747 1747
1748 1748 def sidedata(self, nodeorrev, _df=None):
1749 1749 """a map of extra data related to the changeset but not part of the hash
1750 1750
1751 1751 This function currently return a dictionary. However, more advanced
1752 1752 mapping object will likely be used in the future for a more
1753 1753 efficient/lazy code.
1754 1754 """
1755 1755 return self._revisiondata(nodeorrev, _df)[1]
1756 1756
1757 1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1758 1758 # deal with <nodeorrev> argument type
1759 1759 if isinstance(nodeorrev, int):
1760 1760 rev = nodeorrev
1761 1761 node = self.node(rev)
1762 1762 else:
1763 1763 node = nodeorrev
1764 1764 rev = None
1765 1765
1766 1766 # fast path the special `nullid` rev
1767 1767 if node == self.nullid:
1768 1768 return b"", {}
1769 1769
1770 1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1771 1771 # revision or might need to be processed to retrieve the revision.
1772 1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1773 1773
1774 1774 if self.version & 0xFFFF == REVLOGV2:
1775 1775 if rev is None:
1776 1776 rev = self.rev(node)
1777 1777 sidedata = self._sidedata(rev)
1778 1778 else:
1779 1779 sidedata = {}
1780 1780
1781 1781 if raw and validated:
1782 1782 # if we don't want to process the raw text and that raw
1783 1783 # text is cached, we can exit early.
1784 1784 return rawtext, sidedata
1785 1785 if rev is None:
1786 1786 rev = self.rev(node)
1787 1787 # the revlog's flag for this revision
1788 1788 # (usually alter its state or content)
1789 1789 flags = self.flags(rev)
1790 1790
1791 1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1792 1792 # no extra flags set, no flag processor runs, text = rawtext
1793 1793 return rawtext, sidedata
1794 1794
1795 1795 if raw:
1796 1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1797 1797 text = rawtext
1798 1798 else:
1799 1799 r = flagutil.processflagsread(self, rawtext, flags)
1800 1800 text, validatehash = r
1801 1801 if validatehash:
1802 1802 self.checkhash(text, node, rev=rev)
1803 1803 if not validated:
1804 1804 self._revisioncache = (node, rev, rawtext)
1805 1805
1806 1806 return text, sidedata
1807 1807
1808 1808 def _rawtext(self, node, rev, _df=None):
1809 1809 """return the possibly unvalidated rawtext for a revision
1810 1810
1811 1811 returns (rev, rawtext, validated)
1812 1812 """
1813 1813
1814 1814 # revision in the cache (could be useful to apply delta)
1815 1815 cachedrev = None
1816 1816 # An intermediate text to apply deltas to
1817 1817 basetext = None
1818 1818
1819 1819 # Check if we have the entry in cache
1820 1820 # The cache entry looks like (node, rev, rawtext)
1821 1821 if self._revisioncache:
1822 1822 if self._revisioncache[0] == node:
1823 1823 return (rev, self._revisioncache[2], True)
1824 1824 cachedrev = self._revisioncache[1]
1825 1825
1826 1826 if rev is None:
1827 1827 rev = self.rev(node)
1828 1828
1829 1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1830 1830 if stopped:
1831 1831 basetext = self._revisioncache[2]
1832 1832
1833 1833 # drop cache to save memory, the caller is expected to
1834 1834 # update self._revisioncache after validating the text
1835 1835 self._revisioncache = None
1836 1836
1837 1837 targetsize = None
1838 1838 rawsize = self.index[rev][2]
1839 1839 if 0 <= rawsize:
1840 1840 targetsize = 4 * rawsize
1841 1841
1842 1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1843 1843 if basetext is None:
1844 1844 basetext = bytes(bins[0])
1845 1845 bins = bins[1:]
1846 1846
1847 1847 rawtext = mdiff.patches(basetext, bins)
1848 1848 del basetext # let us have a chance to free memory early
1849 1849 return (rev, rawtext, False)
1850 1850
1851 1851 def _sidedata(self, rev):
1852 1852 """Return the sidedata for a given revision number."""
1853 1853 index_entry = self.index[rev]
1854 1854 sidedata_offset = index_entry[8]
1855 1855 sidedata_size = index_entry[9]
1856 1856
1857 1857 if self._inline:
1858 1858 sidedata_offset += self.index.entry_size * (1 + rev)
1859 1859 if sidedata_size == 0:
1860 1860 return {}
1861 1861
1862 1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1863 1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1864 1864 return sidedata
1865 1865
1866 1866 def rawdata(self, nodeorrev, _df=None):
1867 1867 """return an uncompressed raw data of a given node or revision number.
1868 1868
1869 1869 _df - an existing file handle to read from. (internal-only)
1870 1870 """
1871 1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1872 1872
1873 1873 def hash(self, text, p1, p2):
1874 1874 """Compute a node hash.
1875 1875
1876 1876 Available as a function so that subclasses can replace the hash
1877 1877 as needed.
1878 1878 """
1879 1879 return storageutil.hashrevisionsha1(text, p1, p2)
1880 1880
1881 1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1882 1882 """Check node hash integrity.
1883 1883
1884 1884 Available as a function so that subclasses can extend hash mismatch
1885 1885 behaviors as needed.
1886 1886 """
1887 1887 try:
1888 1888 if p1 is None and p2 is None:
1889 1889 p1, p2 = self.parents(node)
1890 1890 if node != self.hash(text, p1, p2):
1891 1891 # Clear the revision cache on hash failure. The revision cache
1892 1892 # only stores the raw revision and clearing the cache does have
1893 1893 # the side-effect that we won't have a cache hit when the raw
1894 1894 # revision data is accessed. But this case should be rare and
1895 1895 # it is extra work to teach the cache about the hash
1896 1896 # verification state.
1897 1897 if self._revisioncache and self._revisioncache[0] == node:
1898 1898 self._revisioncache = None
1899 1899
1900 1900 revornode = rev
1901 1901 if revornode is None:
1902 1902 revornode = templatefilters.short(hex(node))
1903 1903 raise error.RevlogError(
1904 1904 _(b"integrity check failed on %s:%s")
1905 1905 % (self.indexfile, pycompat.bytestr(revornode))
1906 1906 )
1907 1907 except error.RevlogError:
1908 1908 if self._censorable and storageutil.iscensoredtext(text):
1909 1909 raise error.CensoredNodeError(self.indexfile, node, text)
1910 1910 raise
1911 1911
1912 1912 def _enforceinlinesize(self, tr, fp=None):
1913 1913 """Check if the revlog is too big for inline and convert if so.
1914 1914
1915 1915 This should be called after revisions are added to the revlog. If the
1916 1916 revlog has grown too large to be an inline revlog, it will convert it
1917 1917 to use multiple index and data files.
1918 1918 """
1919 1919 tiprev = len(self) - 1
1920 1920 if (
1921 1921 not self._inline
1922 1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1923 1923 ):
1924 1924 return
1925 1925
1926 1926 troffset = tr.findoffset(self.indexfile)
1927 1927 if troffset is None:
1928 1928 raise error.RevlogError(
1929 1929 _(b"%s not found in the transaction") % self.indexfile
1930 1930 )
1931 1931 trindex = 0
1932 1932 tr.add(self.datafile, 0)
1933 1933
1934 1934 if fp:
1935 1935 fp.flush()
1936 1936 fp.close()
1937 1937 # We can't use the cached file handle after close(). So prevent
1938 1938 # its usage.
1939 1939 self._writinghandles = None
1940 1940
1941 1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1942 1942 for r in self:
1943 1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1944 1944 if troffset <= self.start(r):
1945 1945 trindex = r
1946 1946
1947 1947 with self._indexfp(b'w') as fp:
1948 1948 self.version &= ~FLAG_INLINE_DATA
1949 1949 self._inline = False
1950 1950 for i in self:
1951 1951 e = self.index.entry_binary(i)
1952 1952 if i == 0:
1953 1953 header = self.index.pack_header(self.version)
1954 1954 e = header + e
1955 1955 fp.write(e)
1956 1956
1957 1957 # the temp file replace the real index when we exit the context
1958 1958 # manager
1959 1959
1960 1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1961 1961 nodemaputil.setup_persistent_nodemap(tr, self)
1962 1962 self._chunkclear()
1963 1963
1964 1964 def _nodeduplicatecallback(self, transaction, node):
1965 1965 """called when trying to add a node already stored."""
1966 1966
1967 1967 def addrevision(
1968 1968 self,
1969 1969 text,
1970 1970 transaction,
1971 1971 link,
1972 1972 p1,
1973 1973 p2,
1974 1974 cachedelta=None,
1975 1975 node=None,
1976 1976 flags=REVIDX_DEFAULT_FLAGS,
1977 1977 deltacomputer=None,
1978 1978 sidedata=None,
1979 1979 ):
1980 1980 """add a revision to the log
1981 1981
1982 1982 text - the revision data to add
1983 1983 transaction - the transaction object used for rollback
1984 1984 link - the linkrev data to add
1985 1985 p1, p2 - the parent nodeids of the revision
1986 1986 cachedelta - an optional precomputed delta
1987 1987 node - nodeid of revision; typically node is not specified, and it is
1988 1988 computed by default as hash(text, p1, p2), however subclasses might
1989 1989 use different hashing method (and override checkhash() in such case)
1990 1990 flags - the known flags to set on the revision
1991 1991 deltacomputer - an optional deltacomputer instance shared between
1992 1992 multiple calls
1993 1993 """
1994 1994 if link == nullrev:
1995 1995 raise error.RevlogError(
1996 1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1997 1997 )
1998 1998
1999 1999 if sidedata is None:
2000 2000 sidedata = {}
2001 2001 elif sidedata and not self.hassidedata:
2002 2002 raise error.ProgrammingError(
2003 2003 _(b"trying to add sidedata to a revlog who don't support them")
2004 2004 )
2005 2005
2006 2006 if flags:
2007 2007 node = node or self.hash(text, p1, p2)
2008 2008
2009 2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2010 2010
2011 2011 # If the flag processor modifies the revision data, ignore any provided
2012 2012 # cachedelta.
2013 2013 if rawtext != text:
2014 2014 cachedelta = None
2015 2015
2016 2016 if len(rawtext) > _maxentrysize:
2017 2017 raise error.RevlogError(
2018 2018 _(
2019 2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2020 2020 )
2021 2021 % (self.indexfile, len(rawtext))
2022 2022 )
2023 2023
2024 2024 node = node or self.hash(rawtext, p1, p2)
2025 2025 rev = self.index.get_rev(node)
2026 2026 if rev is not None:
2027 2027 return rev
2028 2028
2029 2029 if validatehash:
2030 2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2031 2031
2032 2032 return self.addrawrevision(
2033 2033 rawtext,
2034 2034 transaction,
2035 2035 link,
2036 2036 p1,
2037 2037 p2,
2038 2038 node,
2039 2039 flags,
2040 2040 cachedelta=cachedelta,
2041 2041 deltacomputer=deltacomputer,
2042 2042 sidedata=sidedata,
2043 2043 )
2044 2044
2045 2045 def addrawrevision(
2046 2046 self,
2047 2047 rawtext,
2048 2048 transaction,
2049 2049 link,
2050 2050 p1,
2051 2051 p2,
2052 2052 node,
2053 2053 flags,
2054 2054 cachedelta=None,
2055 2055 deltacomputer=None,
2056 2056 sidedata=None,
2057 2057 ):
2058 2058 """add a raw revision with known flags, node and parents
2059 2059 useful when reusing a revision not stored in this revlog (ex: received
2060 2060 over wire, or read from an external bundle).
2061 2061 """
2062 2062 dfh = None
2063 2063 if not self._inline:
2064 2064 dfh = self._datafp(b"a+")
2065 2065 ifh = self._indexfp(b"a+")
2066 2066 try:
2067 2067 return self._addrevision(
2068 2068 node,
2069 2069 rawtext,
2070 2070 transaction,
2071 2071 link,
2072 2072 p1,
2073 2073 p2,
2074 2074 flags,
2075 2075 cachedelta,
2076 2076 ifh,
2077 2077 dfh,
2078 2078 deltacomputer=deltacomputer,
2079 2079 sidedata=sidedata,
2080 2080 )
2081 2081 finally:
2082 2082 if dfh:
2083 2083 dfh.close()
2084 2084 ifh.close()
2085 2085
2086 2086 def compress(self, data):
2087 2087 """Generate a possibly-compressed representation of data."""
2088 2088 if not data:
2089 2089 return b'', data
2090 2090
2091 2091 compressed = self._compressor.compress(data)
2092 2092
2093 2093 if compressed:
2094 2094 # The revlog compressor added the header in the returned data.
2095 2095 return b'', compressed
2096 2096
2097 2097 if data[0:1] == b'\0':
2098 2098 return b'', data
2099 2099 return b'u', data
2100 2100
2101 2101 def decompress(self, data):
2102 2102 """Decompress a revlog chunk.
2103 2103
2104 2104 The chunk is expected to begin with a header identifying the
2105 2105 format type so it can be routed to an appropriate decompressor.
2106 2106 """
2107 2107 if not data:
2108 2108 return data
2109 2109
2110 2110 # Revlogs are read much more frequently than they are written and many
2111 2111 # chunks only take microseconds to decompress, so performance is
2112 2112 # important here.
2113 2113 #
2114 2114 # We can make a few assumptions about revlogs:
2115 2115 #
2116 2116 # 1) the majority of chunks will be compressed (as opposed to inline
2117 2117 # raw data).
2118 2118 # 2) decompressing *any* data will likely by at least 10x slower than
2119 2119 # returning raw inline data.
2120 2120 # 3) we want to prioritize common and officially supported compression
2121 2121 # engines
2122 2122 #
2123 2123 # It follows that we want to optimize for "decompress compressed data
2124 2124 # when encoded with common and officially supported compression engines"
2125 2125 # case over "raw data" and "data encoded by less common or non-official
2126 2126 # compression engines." That is why we have the inline lookup first
2127 2127 # followed by the compengines lookup.
2128 2128 #
2129 2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2130 2130 # compressed chunks. And this matters for changelog and manifest reads.
2131 2131 t = data[0:1]
2132 2132
2133 2133 if t == b'x':
2134 2134 try:
2135 2135 return _zlibdecompress(data)
2136 2136 except zlib.error as e:
2137 2137 raise error.RevlogError(
2138 2138 _(b'revlog decompress error: %s')
2139 2139 % stringutil.forcebytestr(e)
2140 2140 )
2141 2141 # '\0' is more common than 'u' so it goes first.
2142 2142 elif t == b'\0':
2143 2143 return data
2144 2144 elif t == b'u':
2145 2145 return util.buffer(data, 1)
2146 2146
2147 2147 try:
2148 2148 compressor = self._decompressors[t]
2149 2149 except KeyError:
2150 2150 try:
2151 2151 engine = util.compengines.forrevlogheader(t)
2152 2152 compressor = engine.revlogcompressor(self._compengineopts)
2153 2153 self._decompressors[t] = compressor
2154 2154 except KeyError:
2155 2155 raise error.RevlogError(
2156 2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2157 2157 )
2158 2158
2159 2159 return compressor.decompress(data)
2160 2160
2161 2161 def _addrevision(
2162 2162 self,
2163 2163 node,
2164 2164 rawtext,
2165 2165 transaction,
2166 2166 link,
2167 2167 p1,
2168 2168 p2,
2169 2169 flags,
2170 2170 cachedelta,
2171 2171 ifh,
2172 2172 dfh,
2173 2173 alwayscache=False,
2174 2174 deltacomputer=None,
2175 2175 sidedata=None,
2176 2176 ):
2177 2177 """internal function to add revisions to the log
2178 2178
2179 2179 see addrevision for argument descriptions.
2180 2180
2181 2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182 2182
2183 2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 2184 be used.
2185 2185
2186 2186 invariants:
2187 2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 2188 if both are set, they must correspond to each other.
2189 2189 """
2190 2190 if node == self.nullid:
2191 2191 raise error.RevlogError(
2192 2192 _(b"%s: attempt to add null revision") % self.indexfile
2193 2193 )
2194 2194 if (
2195 2195 node == self.nodeconstants.wdirid
2196 2196 or node in self.nodeconstants.wdirfilenodeids
2197 2197 ):
2198 2198 raise error.RevlogError(
2199 2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2200 2200 )
2201 2201
2202 2202 if self._inline:
2203 2203 fh = ifh
2204 2204 else:
2205 2205 fh = dfh
2206 2206
2207 2207 btext = [rawtext]
2208 2208
2209 2209 curr = len(self)
2210 2210 prev = curr - 1
2211 2211
2212 2212 offset = self._get_data_offset(prev)
2213 2213
2214 2214 if self._concurrencychecker:
2215 2215 if self._inline:
2216 2216 # offset is "as if" it were in the .d file, so we need to add on
2217 2217 # the size of the entry metadata.
2218 2218 self._concurrencychecker(
2219 2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2220 2220 )
2221 2221 else:
2222 2222 # Entries in the .i are a consistent size.
2223 2223 self._concurrencychecker(
2224 2224 ifh, self.indexfile, curr * self.index.entry_size
2225 2225 )
2226 2226 self._concurrencychecker(dfh, self.datafile, offset)
2227 2227
2228 2228 p1r, p2r = self.rev(p1), self.rev(p2)
2229 2229
2230 2230 # full versions are inserted when the needed deltas
2231 2231 # become comparable to the uncompressed text
2232 2232 if rawtext is None:
2233 2233 # need rawtext size, before changed by flag processors, which is
2234 2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2235 2235 # logic that might remove metadata size.
2236 2236 textlen = mdiff.patchedsize(
2237 2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2238 2238 )
2239 2239 else:
2240 2240 textlen = len(rawtext)
2241 2241
2242 2242 if deltacomputer is None:
2243 2243 deltacomputer = deltautil.deltacomputer(self)
2244 2244
2245 2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2246 2246
2247 2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2248 2248
2249 2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2250 2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2251 2251 sidedata_offset = offset + deltainfo.deltalen
2252 2252 else:
2253 2253 serialized_sidedata = b""
2254 2254 # Don't store the offset if the sidedata is empty, that way
2255 2255 # we can easily detect empty sidedata and they will be no different
2256 2256 # than ones we manually add.
2257 2257 sidedata_offset = 0
2258 2258
2259 2259 e = (
2260 2260 offset_type(offset, flags),
2261 2261 deltainfo.deltalen,
2262 2262 textlen,
2263 2263 deltainfo.base,
2264 2264 link,
2265 2265 p1r,
2266 2266 p2r,
2267 2267 node,
2268 2268 sidedata_offset,
2269 2269 len(serialized_sidedata),
2270 2270 )
2271 2271
2272 2272 if self.version & 0xFFFF != REVLOGV2:
2273 2273 e = e[:8]
2274 2274
2275 2275 self.index.append(e)
2276 2276 entry = self.index.entry_binary(curr)
2277 2277 if curr == 0:
2278 2278 header = self.index.pack_header(self.version)
2279 2279 entry = header + entry
2280 2280 self._writeentry(
2281 2281 transaction,
2282 2282 ifh,
2283 2283 dfh,
2284 2284 entry,
2285 2285 deltainfo.data,
2286 2286 link,
2287 2287 offset,
2288 2288 serialized_sidedata,
2289 2289 )
2290 2290
2291 2291 rawtext = btext[0]
2292 2292
2293 2293 if alwayscache and rawtext is None:
2294 2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2295 2295
2296 2296 if type(rawtext) == bytes: # only accept immutable objects
2297 2297 self._revisioncache = (node, curr, rawtext)
2298 2298 self._chainbasecache[curr] = deltainfo.chainbase
2299 2299 return curr
2300 2300
2301 2301 def _get_data_offset(self, prev):
2302 2302 """Returns the current offset in the (in-transaction) data file.
2303 2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2304 2304 file to store that information: since sidedata can be rewritten to the
2305 2305 end of the data file within a transaction, you can have cases where, for
2306 2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2307 2307 to `n - 1`'s sidedata being written after `n`'s data.
2308 2308
2309 2309 TODO cache this in a docket file before getting out of experimental."""
2310 2310 if self.version & 0xFFFF != REVLOGV2:
2311 2311 return self.end(prev)
2312 2312
2313 2313 offset = 0
2314 2314 for rev, entry in enumerate(self.index):
2315 2315 sidedata_end = entry[8] + entry[9]
2316 2316 # Sidedata for a previous rev has potentially been written after
2317 2317 # this rev's end, so take the max.
2318 2318 offset = max(self.end(rev), offset, sidedata_end)
2319 2319 return offset
2320 2320
2321 2321 def _writeentry(
2322 2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2323 2323 ):
2324 2324 # Files opened in a+ mode have inconsistent behavior on various
2325 2325 # platforms. Windows requires that a file positioning call be made
2326 2326 # when the file handle transitions between reads and writes. See
2327 2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2328 2328 # platforms, Python or the platform itself can be buggy. Some versions
2329 2329 # of Solaris have been observed to not append at the end of the file
2330 2330 # if the file was seeked to before the end. See issue4943 for more.
2331 2331 #
2332 2332 # We work around this issue by inserting a seek() before writing.
2333 2333 # Note: This is likely not necessary on Python 3. However, because
2334 2334 # the file handle is reused for reads and may be seeked there, we need
2335 2335 # to be careful before changing this.
2336 2336 ifh.seek(0, os.SEEK_END)
2337 2337 if dfh:
2338 2338 dfh.seek(0, os.SEEK_END)
2339 2339
2340 2340 curr = len(self) - 1
2341 2341 if not self._inline:
2342 2342 transaction.add(self.datafile, offset)
2343 2343 transaction.add(self.indexfile, curr * len(entry))
2344 2344 if data[0]:
2345 2345 dfh.write(data[0])
2346 2346 dfh.write(data[1])
2347 2347 if sidedata:
2348 2348 dfh.write(sidedata)
2349 2349 ifh.write(entry)
2350 2350 else:
2351 2351 offset += curr * self.index.entry_size
2352 2352 transaction.add(self.indexfile, offset)
2353 2353 ifh.write(entry)
2354 2354 ifh.write(data[0])
2355 2355 ifh.write(data[1])
2356 2356 if sidedata:
2357 2357 ifh.write(sidedata)
2358 2358 self._enforceinlinesize(transaction, ifh)
2359 2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2360 2360
2361 2361 def addgroup(
2362 2362 self,
2363 2363 deltas,
2364 2364 linkmapper,
2365 2365 transaction,
2366 2366 alwayscache=False,
2367 2367 addrevisioncb=None,
2368 2368 duplicaterevisioncb=None,
2369 2369 ):
2370 2370 """
2371 2371 add a delta group
2372 2372
2373 2373 given a set of deltas, add them to the revision log. the
2374 2374 first delta is against its parent, which should be in our
2375 2375 log, the rest are against the previous delta.
2376 2376
2377 2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 2378 this revlog and the node that was added.
2379 2379 """
2380 2380
2381 2381 if self._writinghandles:
2382 2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383 2383
2384 2384 r = len(self)
2385 2385 end = 0
2386 2386 if r:
2387 2387 end = self.end(r - 1)
2388 2388 ifh = self._indexfp(b"a+")
2389 2389 isize = r * self.index.entry_size
2390 2390 if self._inline:
2391 2391 transaction.add(self.indexfile, end + isize)
2392 2392 dfh = None
2393 2393 else:
2394 2394 transaction.add(self.indexfile, isize)
2395 2395 transaction.add(self.datafile, end)
2396 2396 dfh = self._datafp(b"a+")
2397 2397
2398 2398 def flush():
2399 2399 if dfh:
2400 2400 dfh.flush()
2401 2401 ifh.flush()
2402 2402
2403 2403 self._writinghandles = (ifh, dfh)
2404 2404 empty = True
2405 2405
2406 2406 try:
2407 2407 deltacomputer = deltautil.deltacomputer(self)
2408 2408 # loop through our set of deltas
2409 2409 for data in deltas:
2410 2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2411 2411 link = linkmapper(linknode)
2412 2412 flags = flags or REVIDX_DEFAULT_FLAGS
2413 2413
2414 2414 rev = self.index.get_rev(node)
2415 2415 if rev is not None:
2416 2416 # this can happen if two branches make the same change
2417 2417 self._nodeduplicatecallback(transaction, rev)
2418 2418 if duplicaterevisioncb:
2419 2419 duplicaterevisioncb(self, rev)
2420 2420 empty = False
2421 2421 continue
2422 2422
2423 2423 for p in (p1, p2):
2424 2424 if not self.index.has_node(p):
2425 2425 raise error.LookupError(
2426 2426 p, self.indexfile, _(b'unknown parent')
2427 2427 )
2428 2428
2429 2429 if not self.index.has_node(deltabase):
2430 2430 raise error.LookupError(
2431 2431 deltabase, self.indexfile, _(b'unknown delta base')
2432 2432 )
2433 2433
2434 2434 baserev = self.rev(deltabase)
2435 2435
2436 2436 if baserev != nullrev and self.iscensored(baserev):
2437 2437 # if base is censored, delta must be full replacement in a
2438 2438 # single patch operation
2439 2439 hlen = struct.calcsize(b">lll")
2440 2440 oldlen = self.rawsize(baserev)
2441 2441 newlen = len(delta) - hlen
2442 2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2443 2443 raise error.CensoredBaseError(
2444 2444 self.indexfile, self.node(baserev)
2445 2445 )
2446 2446
2447 2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2448 2448 flags |= REVIDX_ISCENSORED
2449 2449
2450 2450 # We assume consumers of addrevisioncb will want to retrieve
2451 2451 # the added revision, which will require a call to
2452 2452 # revision(). revision() will fast path if there is a cache
2453 2453 # hit. So, we tell _addrevision() to always cache in this case.
2454 2454 # We're only using addgroup() in the context of changegroup
2455 2455 # generation so the revision data can always be handled as raw
2456 2456 # by the flagprocessor.
2457 2457 rev = self._addrevision(
2458 2458 node,
2459 2459 None,
2460 2460 transaction,
2461 2461 link,
2462 2462 p1,
2463 2463 p2,
2464 2464 flags,
2465 2465 (baserev, delta),
2466 2466 ifh,
2467 2467 dfh,
2468 2468 alwayscache=alwayscache,
2469 2469 deltacomputer=deltacomputer,
2470 2470 sidedata=sidedata,
2471 2471 )
2472 2472
2473 2473 if addrevisioncb:
2474 2474 addrevisioncb(self, rev)
2475 2475 empty = False
2476 2476
2477 2477 if not dfh and not self._inline:
2478 2478 # addrevision switched from inline to conventional
2479 2479 # reopen the index
2480 2480 ifh.close()
2481 2481 dfh = self._datafp(b"a+")
2482 2482 ifh = self._indexfp(b"a+")
2483 2483 self._writinghandles = (ifh, dfh)
2484 2484 finally:
2485 2485 self._writinghandles = None
2486 2486
2487 2487 if dfh:
2488 2488 dfh.close()
2489 2489 ifh.close()
2490 2490 return not empty
2491 2491
2492 2492 def iscensored(self, rev):
2493 2493 """Check if a file revision is censored."""
2494 2494 if not self._censorable:
2495 2495 return False
2496 2496
2497 2497 return self.flags(rev) & REVIDX_ISCENSORED
2498 2498
2499 2499 def _peek_iscensored(self, baserev, delta, flush):
2500 2500 """Quickly check if a delta produces a censored revision."""
2501 2501 if not self._censorable:
2502 2502 return False
2503 2503
2504 2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2505 2505
2506 2506 def getstrippoint(self, minlink):
2507 2507 """find the minimum rev that must be stripped to strip the linkrev
2508 2508
2509 2509 Returns a tuple containing the minimum rev and a set of all revs that
2510 2510 have linkrevs that will be broken by this strip.
2511 2511 """
2512 2512 return storageutil.resolvestripinfo(
2513 2513 minlink,
2514 2514 len(self) - 1,
2515 2515 self.headrevs(),
2516 2516 self.linkrev,
2517 2517 self.parentrevs,
2518 2518 )
2519 2519
2520 2520 def strip(self, minlink, transaction):
2521 2521 """truncate the revlog on the first revision with a linkrev >= minlink
2522 2522
2523 2523 This function is called when we're stripping revision minlink and
2524 2524 its descendants from the repository.
2525 2525
2526 2526 We have to remove all revisions with linkrev >= minlink, because
2527 2527 the equivalent changelog revisions will be renumbered after the
2528 2528 strip.
2529 2529
2530 2530 So we truncate the revlog on the first of these revisions, and
2531 2531 trust that the caller has saved the revisions that shouldn't be
2532 2532 removed and that it'll re-add them after this truncation.
2533 2533 """
2534 2534 if len(self) == 0:
2535 2535 return
2536 2536
2537 2537 rev, _ = self.getstrippoint(minlink)
2538 2538 if rev == len(self):
2539 2539 return
2540 2540
2541 2541 # first truncate the files on disk
2542 2542 end = self.start(rev)
2543 2543 if not self._inline:
2544 2544 transaction.add(self.datafile, end)
2545 2545 end = rev * self.index.entry_size
2546 2546 else:
2547 2547 end += rev * self.index.entry_size
2548 2548
2549 2549 transaction.add(self.indexfile, end)
2550 2550
2551 2551 # then reset internal state in memory to forget those revisions
2552 2552 self._revisioncache = None
2553 2553 self._chaininfocache = util.lrucachedict(500)
2554 2554 self._chunkclear()
2555 2555
2556 2556 del self.index[rev:-1]
2557 2557
2558 2558 def checksize(self):
2559 2559 """Check size of index and data files
2560 2560
2561 2561 return a (dd, di) tuple.
2562 2562 - dd: extra bytes for the "data" file
2563 2563 - di: extra bytes for the "index" file
2564 2564
2565 2565 A healthy revlog will return (0, 0).
2566 2566 """
2567 2567 expected = 0
2568 2568 if len(self):
2569 2569 expected = max(0, self.end(len(self) - 1))
2570 2570
2571 2571 try:
2572 2572 with self._datafp() as f:
2573 2573 f.seek(0, io.SEEK_END)
2574 2574 actual = f.tell()
2575 2575 dd = actual - expected
2576 2576 except IOError as inst:
2577 2577 if inst.errno != errno.ENOENT:
2578 2578 raise
2579 2579 dd = 0
2580 2580
2581 2581 try:
2582 2582 f = self.opener(self.indexfile)
2583 2583 f.seek(0, io.SEEK_END)
2584 2584 actual = f.tell()
2585 2585 f.close()
2586 2586 s = self.index.entry_size
2587 2587 i = max(0, actual // s)
2588 2588 di = actual - (i * s)
2589 2589 if self._inline:
2590 2590 databytes = 0
2591 2591 for r in self:
2592 2592 databytes += max(0, self.length(r))
2593 2593 dd = 0
2594 2594 di = actual - len(self) * s - databytes
2595 2595 except IOError as inst:
2596 2596 if inst.errno != errno.ENOENT:
2597 2597 raise
2598 2598 di = 0
2599 2599
2600 2600 return (dd, di)
2601 2601
2602 2602 def files(self):
2603 2603 res = [self.indexfile]
2604 2604 if not self._inline:
2605 2605 res.append(self.datafile)
2606 2606 return res
2607 2607
2608 2608 def emitrevisions(
2609 2609 self,
2610 2610 nodes,
2611 2611 nodesorder=None,
2612 2612 revisiondata=False,
2613 2613 assumehaveparentrevisions=False,
2614 2614 deltamode=repository.CG_DELTAMODE_STD,
2615 2615 sidedata_helpers=None,
2616 2616 ):
2617 2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2618 2618 raise error.ProgrammingError(
2619 2619 b'unhandled value for nodesorder: %s' % nodesorder
2620 2620 )
2621 2621
2622 2622 if nodesorder is None and not self._generaldelta:
2623 2623 nodesorder = b'storage'
2624 2624
2625 2625 if (
2626 2626 not self._storedeltachains
2627 2627 and deltamode != repository.CG_DELTAMODE_PREV
2628 2628 ):
2629 2629 deltamode = repository.CG_DELTAMODE_FULL
2630 2630
2631 2631 return storageutil.emitrevisions(
2632 2632 self,
2633 2633 nodes,
2634 2634 nodesorder,
2635 2635 revlogrevisiondelta,
2636 2636 deltaparentfn=self.deltaparent,
2637 2637 candeltafn=self.candelta,
2638 2638 rawsizefn=self.rawsize,
2639 2639 revdifffn=self.revdiff,
2640 2640 flagsfn=self.flags,
2641 2641 deltamode=deltamode,
2642 2642 revisiondata=revisiondata,
2643 2643 assumehaveparentrevisions=assumehaveparentrevisions,
2644 2644 sidedata_helpers=sidedata_helpers,
2645 2645 )
2646 2646
2647 2647 DELTAREUSEALWAYS = b'always'
2648 2648 DELTAREUSESAMEREVS = b'samerevs'
2649 2649 DELTAREUSENEVER = b'never'
2650 2650
2651 2651 DELTAREUSEFULLADD = b'fulladd'
2652 2652
2653 2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2654 2654
2655 2655 def clone(
2656 2656 self,
2657 2657 tr,
2658 2658 destrevlog,
2659 2659 addrevisioncb=None,
2660 2660 deltareuse=DELTAREUSESAMEREVS,
2661 2661 forcedeltabothparents=None,
2662 2662 sidedata_helpers=None,
2663 2663 ):
2664 2664 """Copy this revlog to another, possibly with format changes.
2665 2665
2666 2666 The destination revlog will contain the same revisions and nodes.
2667 2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2668 2668 differences.
2669 2669
2670 2670 The ``deltareuse`` argument control how deltas from the existing revlog
2671 2671 are preserved in the destination revlog. The argument can have the
2672 2672 following values:
2673 2673
2674 2674 DELTAREUSEALWAYS
2675 2675 Deltas will always be reused (if possible), even if the destination
2676 2676 revlog would not select the same revisions for the delta. This is the
2677 2677 fastest mode of operation.
2678 2678 DELTAREUSESAMEREVS
2679 2679 Deltas will be reused if the destination revlog would pick the same
2680 2680 revisions for the delta. This mode strikes a balance between speed
2681 2681 and optimization.
2682 2682 DELTAREUSENEVER
2683 2683 Deltas will never be reused. This is the slowest mode of execution.
2684 2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2685 2685 algorithm changes).
2686 2686 DELTAREUSEFULLADD
2687 2687 Revision will be re-added as if their were new content. This is
2688 2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2689 2689 eg: large file detection and handling.
2690 2690
2691 2691 Delta computation can be slow, so the choice of delta reuse policy can
2692 2692 significantly affect run time.
2693 2693
2694 2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2695 2695 two extremes. Deltas will be reused if they are appropriate. But if the
2696 2696 delta could choose a better revision, it will do so. This means if you
2697 2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2698 2698 deltas will be recomputed if the delta's parent isn't a parent of the
2699 2699 revision.
2700 2700
2701 2701 In addition to the delta policy, the ``forcedeltabothparents``
2702 2702 argument controls whether to force compute deltas against both parents
2703 2703 for merges. By default, the current default is used.
2704 2704
2705 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
2705 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2706 `sidedata_helpers`.
2706 2707 """
2707 2708 if deltareuse not in self.DELTAREUSEALL:
2708 2709 raise ValueError(
2709 2710 _(b'value for deltareuse invalid: %s') % deltareuse
2710 2711 )
2711 2712
2712 2713 if len(destrevlog):
2713 2714 raise ValueError(_(b'destination revlog is not empty'))
2714 2715
2715 2716 if getattr(self, 'filteredrevs', None):
2716 2717 raise ValueError(_(b'source revlog has filtered revisions'))
2717 2718 if getattr(destrevlog, 'filteredrevs', None):
2718 2719 raise ValueError(_(b'destination revlog has filtered revisions'))
2719 2720
2720 2721 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2721 2722 # if possible.
2722 2723 oldlazydelta = destrevlog._lazydelta
2723 2724 oldlazydeltabase = destrevlog._lazydeltabase
2724 2725 oldamd = destrevlog._deltabothparents
2725 2726
2726 2727 try:
2727 2728 if deltareuse == self.DELTAREUSEALWAYS:
2728 2729 destrevlog._lazydeltabase = True
2729 2730 destrevlog._lazydelta = True
2730 2731 elif deltareuse == self.DELTAREUSESAMEREVS:
2731 2732 destrevlog._lazydeltabase = False
2732 2733 destrevlog._lazydelta = True
2733 2734 elif deltareuse == self.DELTAREUSENEVER:
2734 2735 destrevlog._lazydeltabase = False
2735 2736 destrevlog._lazydelta = False
2736 2737
2737 2738 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2738 2739
2739 2740 self._clone(
2740 2741 tr,
2741 2742 destrevlog,
2742 2743 addrevisioncb,
2743 2744 deltareuse,
2744 2745 forcedeltabothparents,
2745 2746 sidedata_helpers,
2746 2747 )
2747 2748
2748 2749 finally:
2749 2750 destrevlog._lazydelta = oldlazydelta
2750 2751 destrevlog._lazydeltabase = oldlazydeltabase
2751 2752 destrevlog._deltabothparents = oldamd
2752 2753
2753 2754 def _clone(
2754 2755 self,
2755 2756 tr,
2756 2757 destrevlog,
2757 2758 addrevisioncb,
2758 2759 deltareuse,
2759 2760 forcedeltabothparents,
2760 2761 sidedata_helpers,
2761 2762 ):
2762 2763 """perform the core duty of `revlog.clone` after parameter processing"""
2763 2764 deltacomputer = deltautil.deltacomputer(destrevlog)
2764 2765 index = self.index
2765 2766 for rev in self:
2766 2767 entry = index[rev]
2767 2768
2768 2769 # Some classes override linkrev to take filtered revs into
2769 2770 # account. Use raw entry from index.
2770 2771 flags = entry[0] & 0xFFFF
2771 2772 linkrev = entry[4]
2772 2773 p1 = index[entry[5]][7]
2773 2774 p2 = index[entry[6]][7]
2774 2775 node = entry[7]
2775 2776
2776 2777 # (Possibly) reuse the delta from the revlog if allowed and
2777 2778 # the revlog chunk is a delta.
2778 2779 cachedelta = None
2779 2780 rawtext = None
2780 2781 if deltareuse == self.DELTAREUSEFULLADD:
2781 2782 text, sidedata = self._revisiondata(rev)
2782 2783
2783 2784 if sidedata_helpers is not None:
2784 2785 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2785 2786 self, sidedata_helpers, sidedata, rev
2786 2787 )
2787 2788 flags = flags | new_flags[0] & ~new_flags[1]
2788 2789
2789 2790 destrevlog.addrevision(
2790 2791 text,
2791 2792 tr,
2792 2793 linkrev,
2793 2794 p1,
2794 2795 p2,
2795 2796 cachedelta=cachedelta,
2796 2797 node=node,
2797 2798 flags=flags,
2798 2799 deltacomputer=deltacomputer,
2799 2800 sidedata=sidedata,
2800 2801 )
2801 2802 else:
2802 2803 if destrevlog._lazydelta:
2803 2804 dp = self.deltaparent(rev)
2804 2805 if dp != nullrev:
2805 2806 cachedelta = (dp, bytes(self._chunk(rev)))
2806 2807
2807 2808 sidedata = None
2808 2809 if not cachedelta:
2809 2810 rawtext, sidedata = self._revisiondata(rev)
2810 2811 if sidedata is None:
2811 2812 sidedata = self.sidedata(rev)
2812 2813
2813 2814 if sidedata_helpers is not None:
2814 2815 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2815 2816 self, sidedata_helpers, sidedata, rev
2816 2817 )
2817 2818 flags = flags | new_flags[0] & ~new_flags[1]
2818 2819
2819 2820 ifh = destrevlog.opener(
2820 2821 destrevlog.indexfile, b'a+', checkambig=False
2821 2822 )
2822 2823 dfh = None
2823 2824 if not destrevlog._inline:
2824 2825 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2825 2826 try:
2826 2827 destrevlog._addrevision(
2827 2828 node,
2828 2829 rawtext,
2829 2830 tr,
2830 2831 linkrev,
2831 2832 p1,
2832 2833 p2,
2833 2834 flags,
2834 2835 cachedelta,
2835 2836 ifh,
2836 2837 dfh,
2837 2838 deltacomputer=deltacomputer,
2838 2839 sidedata=sidedata,
2839 2840 )
2840 2841 finally:
2841 2842 if dfh:
2842 2843 dfh.close()
2843 2844 ifh.close()
2844 2845
2845 2846 if addrevisioncb:
2846 2847 addrevisioncb(self, rev, node)
2847 2848
2848 2849 def censorrevision(self, tr, censornode, tombstone=b''):
2849 2850 if (self.version & 0xFFFF) == REVLOGV0:
2850 2851 raise error.RevlogError(
2851 2852 _(b'cannot censor with version %d revlogs') % self.version
2852 2853 )
2853 2854
2854 2855 censorrev = self.rev(censornode)
2855 2856 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2856 2857
2857 2858 if len(tombstone) > self.rawsize(censorrev):
2858 2859 raise error.Abort(
2859 2860 _(b'censor tombstone must be no longer than censored data')
2860 2861 )
2861 2862
2862 2863 # Rewriting the revlog in place is hard. Our strategy for censoring is
2863 2864 # to create a new revlog, copy all revisions to it, then replace the
2864 2865 # revlogs on transaction close.
2865 2866
2866 2867 newindexfile = self.indexfile + b'.tmpcensored'
2867 2868 newdatafile = self.datafile + b'.tmpcensored'
2868 2869
2869 2870 # This is a bit dangerous. We could easily have a mismatch of state.
2870 2871 newrl = revlog(
2871 2872 self.opener,
2872 2873 target=self.target,
2873 2874 indexfile=newindexfile,
2874 2875 datafile=newdatafile,
2875 2876 censorable=True,
2876 2877 )
2877 2878 newrl.version = self.version
2878 2879 newrl._generaldelta = self._generaldelta
2879 2880 newrl._parse_index = self._parse_index
2880 2881
2881 2882 for rev in self.revs():
2882 2883 node = self.node(rev)
2883 2884 p1, p2 = self.parents(node)
2884 2885
2885 2886 if rev == censorrev:
2886 2887 newrl.addrawrevision(
2887 2888 tombstone,
2888 2889 tr,
2889 2890 self.linkrev(censorrev),
2890 2891 p1,
2891 2892 p2,
2892 2893 censornode,
2893 2894 REVIDX_ISCENSORED,
2894 2895 )
2895 2896
2896 2897 if newrl.deltaparent(rev) != nullrev:
2897 2898 raise error.Abort(
2898 2899 _(
2899 2900 b'censored revision stored as delta; '
2900 2901 b'cannot censor'
2901 2902 ),
2902 2903 hint=_(
2903 2904 b'censoring of revlogs is not '
2904 2905 b'fully implemented; please report '
2905 2906 b'this bug'
2906 2907 ),
2907 2908 )
2908 2909 continue
2909 2910
2910 2911 if self.iscensored(rev):
2911 2912 if self.deltaparent(rev) != nullrev:
2912 2913 raise error.Abort(
2913 2914 _(
2914 2915 b'cannot censor due to censored '
2915 2916 b'revision having delta stored'
2916 2917 )
2917 2918 )
2918 2919 rawtext = self._chunk(rev)
2919 2920 else:
2920 2921 rawtext = self.rawdata(rev)
2921 2922
2922 2923 newrl.addrawrevision(
2923 2924 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2924 2925 )
2925 2926
2926 2927 tr.addbackup(self.indexfile, location=b'store')
2927 2928 if not self._inline:
2928 2929 tr.addbackup(self.datafile, location=b'store')
2929 2930
2930 2931 self.opener.rename(newrl.indexfile, self.indexfile)
2931 2932 if not self._inline:
2932 2933 self.opener.rename(newrl.datafile, self.datafile)
2933 2934
2934 2935 self.clearcaches()
2935 2936 self._loadindex()
2936 2937
2937 2938 def verifyintegrity(self, state):
2938 2939 """Verifies the integrity of the revlog.
2939 2940
2940 2941 Yields ``revlogproblem`` instances describing problems that are
2941 2942 found.
2942 2943 """
2943 2944 dd, di = self.checksize()
2944 2945 if dd:
2945 2946 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2946 2947 if di:
2947 2948 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2948 2949
2949 2950 version = self.version & 0xFFFF
2950 2951
2951 2952 # The verifier tells us what version revlog we should be.
2952 2953 if version != state[b'expectedversion']:
2953 2954 yield revlogproblem(
2954 2955 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2955 2956 % (self.indexfile, version, state[b'expectedversion'])
2956 2957 )
2957 2958
2958 2959 state[b'skipread'] = set()
2959 2960 state[b'safe_renamed'] = set()
2960 2961
2961 2962 for rev in self:
2962 2963 node = self.node(rev)
2963 2964
2964 2965 # Verify contents. 4 cases to care about:
2965 2966 #
2966 2967 # common: the most common case
2967 2968 # rename: with a rename
2968 2969 # meta: file content starts with b'\1\n', the metadata
2969 2970 # header defined in filelog.py, but without a rename
2970 2971 # ext: content stored externally
2971 2972 #
2972 2973 # More formally, their differences are shown below:
2973 2974 #
2974 2975 # | common | rename | meta | ext
2975 2976 # -------------------------------------------------------
2976 2977 # flags() | 0 | 0 | 0 | not 0
2977 2978 # renamed() | False | True | False | ?
2978 2979 # rawtext[0:2]=='\1\n'| False | True | True | ?
2979 2980 #
2980 2981 # "rawtext" means the raw text stored in revlog data, which
2981 2982 # could be retrieved by "rawdata(rev)". "text"
2982 2983 # mentioned below is "revision(rev)".
2983 2984 #
2984 2985 # There are 3 different lengths stored physically:
2985 2986 # 1. L1: rawsize, stored in revlog index
2986 2987 # 2. L2: len(rawtext), stored in revlog data
2987 2988 # 3. L3: len(text), stored in revlog data if flags==0, or
2988 2989 # possibly somewhere else if flags!=0
2989 2990 #
2990 2991 # L1 should be equal to L2. L3 could be different from them.
2991 2992 # "text" may or may not affect commit hash depending on flag
2992 2993 # processors (see flagutil.addflagprocessor).
2993 2994 #
2994 2995 # | common | rename | meta | ext
2995 2996 # -------------------------------------------------
2996 2997 # rawsize() | L1 | L1 | L1 | L1
2997 2998 # size() | L1 | L2-LM | L1(*) | L1 (?)
2998 2999 # len(rawtext) | L2 | L2 | L2 | L2
2999 3000 # len(text) | L2 | L2 | L2 | L3
3000 3001 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3001 3002 #
3002 3003 # LM: length of metadata, depending on rawtext
3003 3004 # (*): not ideal, see comment in filelog.size
3004 3005 # (?): could be "- len(meta)" if the resolved content has
3005 3006 # rename metadata
3006 3007 #
3007 3008 # Checks needed to be done:
3008 3009 # 1. length check: L1 == L2, in all cases.
3009 3010 # 2. hash check: depending on flag processor, we may need to
3010 3011 # use either "text" (external), or "rawtext" (in revlog).
3011 3012
3012 3013 try:
3013 3014 skipflags = state.get(b'skipflags', 0)
3014 3015 if skipflags:
3015 3016 skipflags &= self.flags(rev)
3016 3017
3017 3018 _verify_revision(self, skipflags, state, node)
3018 3019
3019 3020 l1 = self.rawsize(rev)
3020 3021 l2 = len(self.rawdata(node))
3021 3022
3022 3023 if l1 != l2:
3023 3024 yield revlogproblem(
3024 3025 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3025 3026 node=node,
3026 3027 )
3027 3028
3028 3029 except error.CensoredNodeError:
3029 3030 if state[b'erroroncensored']:
3030 3031 yield revlogproblem(
3031 3032 error=_(b'censored file data'), node=node
3032 3033 )
3033 3034 state[b'skipread'].add(node)
3034 3035 except Exception as e:
3035 3036 yield revlogproblem(
3036 3037 error=_(b'unpacking %s: %s')
3037 3038 % (short(node), stringutil.forcebytestr(e)),
3038 3039 node=node,
3039 3040 )
3040 3041 state[b'skipread'].add(node)
3041 3042
3042 3043 def storageinfo(
3043 3044 self,
3044 3045 exclusivefiles=False,
3045 3046 sharedfiles=False,
3046 3047 revisionscount=False,
3047 3048 trackedsize=False,
3048 3049 storedsize=False,
3049 3050 ):
3050 3051 d = {}
3051 3052
3052 3053 if exclusivefiles:
3053 3054 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3054 3055 if not self._inline:
3055 3056 d[b'exclusivefiles'].append((self.opener, self.datafile))
3056 3057
3057 3058 if sharedfiles:
3058 3059 d[b'sharedfiles'] = []
3059 3060
3060 3061 if revisionscount:
3061 3062 d[b'revisionscount'] = len(self)
3062 3063
3063 3064 if trackedsize:
3064 3065 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3065 3066
3066 3067 if storedsize:
3067 3068 d[b'storedsize'] = sum(
3068 3069 self.opener.stat(path).st_size for path in self.files()
3069 3070 )
3070 3071
3071 3072 return d
3072 3073
3073 3074 def rewrite_sidedata(self, helpers, startrev, endrev):
3074 3075 if self.version & 0xFFFF != REVLOGV2:
3075 3076 return
3076 3077 # inline are not yet supported because they suffer from an issue when
3077 3078 # rewriting them (since it's not an append-only operation).
3078 3079 # See issue6485.
3079 3080 assert not self._inline
3080 3081 if not helpers[1] and not helpers[2]:
3081 3082 # Nothing to generate or remove
3082 3083 return
3083 3084
3084 3085 new_entries = []
3085 3086 # append the new sidedata
3086 3087 with self._datafp(b'a+') as fp:
3087 3088 # Maybe this bug still exists, see revlog._writeentry
3088 3089 fp.seek(0, os.SEEK_END)
3089 3090 current_offset = fp.tell()
3090 3091 for rev in range(startrev, endrev + 1):
3091 3092 entry = self.index[rev]
3092 3093 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3093 3094 store=self,
3094 3095 sidedata_helpers=helpers,
3095 3096 sidedata={},
3096 3097 rev=rev,
3097 3098 )
3098 3099
3099 3100 serialized_sidedata = sidedatautil.serialize_sidedata(
3100 3101 new_sidedata
3101 3102 )
3102 3103 if entry[8] != 0 or entry[9] != 0:
3103 3104 # rewriting entries that already have sidedata is not
3104 3105 # supported yet, because it introduces garbage data in the
3105 3106 # revlog.
3106 3107 msg = b"Rewriting existing sidedata is not supported yet"
3107 3108 raise error.Abort(msg)
3108 3109
3109 3110 # Apply (potential) flags to add and to remove after running
3110 3111 # the sidedata helpers
3111 3112 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3112 3113 entry = (new_offset_flags,) + entry[1:8]
3113 3114 entry += (current_offset, len(serialized_sidedata))
3114 3115
3115 3116 fp.write(serialized_sidedata)
3116 3117 new_entries.append(entry)
3117 3118 current_offset += len(serialized_sidedata)
3118 3119
3119 3120 # rewrite the new index entries
3120 3121 with self._indexfp(b'w+') as fp:
3121 3122 fp.seek(startrev * self.index.entry_size)
3122 3123 for i, e in enumerate(new_entries):
3123 3124 rev = startrev + i
3124 3125 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3125 3126 packed = self.index.entry_binary(rev)
3126 3127 if rev == 0:
3127 3128 header = self.index.pack_header(self.version)
3128 3129 packed = header + packed
3129 3130 fp.write(packed)
@@ -1,155 +1,175 b''
1 1 # sidedata.py - Logic around store extra data alongside revlog revisions
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """core code for "sidedata" support
8 8
9 9 The "sidedata" are stored alongside the revision without actually being part of
10 10 its content and not affecting its hash. It's main use cases is to cache
11 11 important information related to a changesets.
12 12
13 13 The current implementation is experimental and subject to changes. Do not rely
14 14 on it in production.
15 15
16 16 Sidedata are stored in the revlog itself, thanks to a new version of the
17 17 revlog. The following format is currently used::
18 18
19 19 initial header:
20 20 <number of sidedata; 2 bytes>
21 21 sidedata (repeated N times):
22 22 <sidedata-key; 2 bytes>
23 23 <sidedata-entry-length: 4 bytes>
24 24 <sidedata-content-sha1-digest: 20 bytes>
25 25 <sidedata-content; X bytes>
26 26 normal raw text:
27 27 <all bytes remaining in the rawtext>
28 28
29 29 This is a simple and effective format. It should be enough to experiment with
30 30 the concept.
31 31 """
32 32
33 33 from __future__ import absolute_import
34 34
35 35 import collections
36 36 import struct
37 37
38 38 from .. import error, requirements as requirementsmod
39 39 from ..revlogutils import constants, flagutil
40 40 from ..utils import hashutil
41 41
42 42 ## sidedata type constant
43 43 # reserve a block for testing purposes.
44 44 SD_TEST1 = 1
45 45 SD_TEST2 = 2
46 46 SD_TEST3 = 3
47 47 SD_TEST4 = 4
48 48 SD_TEST5 = 5
49 49 SD_TEST6 = 6
50 50 SD_TEST7 = 7
51 51
52 52 # key to store copies related information
53 53 SD_P1COPIES = 8
54 54 SD_P2COPIES = 9
55 55 SD_FILESADDED = 10
56 56 SD_FILESREMOVED = 11
57 57 SD_FILES = 12
58 58
59 59 # internal format constant
60 60 SIDEDATA_HEADER = struct.Struct('>H')
61 61 SIDEDATA_ENTRY = struct.Struct('>HL20s')
62 62
63 63
64 64 def serialize_sidedata(sidedata):
65 65 sidedata = list(sidedata.items())
66 66 sidedata.sort()
67 67 buf = [SIDEDATA_HEADER.pack(len(sidedata))]
68 68 for key, value in sidedata:
69 69 digest = hashutil.sha1(value).digest()
70 70 buf.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
71 71 for key, value in sidedata:
72 72 buf.append(value)
73 73 buf = b''.join(buf)
74 74 return buf
75 75
76 76
77 77 def deserialize_sidedata(blob):
78 78 sidedata = {}
79 79 offset = 0
80 80 (nbentry,) = SIDEDATA_HEADER.unpack(blob[: SIDEDATA_HEADER.size])
81 81 offset += SIDEDATA_HEADER.size
82 82 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
83 83 for i in range(nbentry):
84 84 nextoffset = offset + SIDEDATA_ENTRY.size
85 85 key, size, storeddigest = SIDEDATA_ENTRY.unpack(blob[offset:nextoffset])
86 86 offset = nextoffset
87 87 # read the data associated with that entry
88 88 nextdataoffset = dataoffset + size
89 89 entrytext = bytes(blob[dataoffset:nextdataoffset])
90 90 readdigest = hashutil.sha1(entrytext).digest()
91 91 if storeddigest != readdigest:
92 92 raise error.SidedataHashError(key, storeddigest, readdigest)
93 93 sidedata[key] = entrytext
94 94 dataoffset = nextdataoffset
95 95 return sidedata
96 96
97 97
98 98 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
99 """
100 Returns a dictionary mapping revlog types to tuples of
101 `(repo, computers, removers)`:
102 * `repo` is used as an argument for computers
103 * `computers` is a list of `(category, (keys, computer, flags)` that
104 compute the missing sidedata categories that were asked:
105 * `category` is the sidedata category
106 * `keys` are the sidedata keys to be affected
107 * `flags` is a bitmask (an integer) of flags to remove when
108 removing the category.
109 * `computer` is the function `(repo, store, rev, sidedata)` that
110 returns a tuple of
111 `(new sidedata dict, (flags to add, flags to remove))`.
112 For example, it will return `({}, (0, 1 << 15))` to return no
113 sidedata, with no flags to add and one flag to remove.
114 * `removers` will remove the keys corresponding to the categories
115 that are present, but not needed.
116 If both `computers` and `removers` are empty, sidedata will simply not
117 be transformed.
118 """
99 119 # Computers for computing sidedata on-the-fly
100 120 sd_computers = collections.defaultdict(list)
101 121 # Computers for categories to remove from sidedata
102 122 sd_removers = collections.defaultdict(list)
103 123 to_generate = remote_sd_categories - repo._wanted_sidedata
104 124 to_remove = repo._wanted_sidedata - remote_sd_categories
105 125 if pull:
106 126 to_generate, to_remove = to_remove, to_generate
107 127
108 128 for revlog_kind, computers in repo._sidedata_computers.items():
109 129 for category, computer in computers.items():
110 130 if category in to_generate:
111 131 sd_computers[revlog_kind].append(computer)
112 132 if category in to_remove:
113 133 sd_removers[revlog_kind].append(computer)
114 134
115 135 sidedata_helpers = (repo, sd_computers, sd_removers)
116 136 return sidedata_helpers
117 137
118 138
119 139 def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
120 140 """Returns the sidedata for the given revision after running through
121 141 the given helpers.
122 142 - `store`: the revlog this applies to (changelog, manifest, or filelog
123 143 instance)
124 - `sidedata_helpers`: see `storageutil.emitrevisions`
144 - `sidedata_helpers`: see `get_sidedata_helpers`
125 145 - `sidedata`: previous sidedata at the given rev, if any
126 146 - `rev`: affected rev of `store`
127 147 """
128 148 repo, sd_computers, sd_removers = sidedata_helpers
129 149 kind = store.revlog_kind
130 150 flags_to_add = 0
131 151 flags_to_remove = 0
132 152 for _keys, sd_computer, _flags in sd_computers.get(kind, []):
133 153 sidedata, flags = sd_computer(repo, store, rev, sidedata)
134 154 flags_to_add |= flags[0]
135 155 flags_to_remove |= flags[1]
136 156 for keys, _computer, flags in sd_removers.get(kind, []):
137 157 for key in keys:
138 158 sidedata.pop(key, None)
139 159 flags_to_remove |= flags
140 160 return sidedata, (flags_to_add, flags_to_remove)
141 161
142 162
143 163 def set_sidedata_spec_for_repo(repo):
144 164 # prevent cycle metadata -> revlogutils.sidedata -> metadata
145 165 from .. import metadata
146 166
147 167 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
148 168 repo.register_wanted_sidedata(SD_FILES)
149 169 repo.register_sidedata_computer(
150 170 constants.KIND_CHANGELOG,
151 171 SD_FILES,
152 172 (SD_FILES,),
153 173 metadata.copies_sidedata_computer,
154 174 flagutil.REVIDX_HASCOPIESINFO,
155 175 )
@@ -1,561 +1,544 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullrev,
17 17 sha1nodeconstants,
18 18 )
19 19 from .. import (
20 20 dagop,
21 21 error,
22 22 mdiff,
23 23 pycompat,
24 24 )
25 25 from ..interfaces import repository
26 26 from ..revlogutils import sidedata as sidedatamod
27 27 from ..utils import hashutil
28 28
29 29 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
30 30
31 31 # revision data contains extra metadata not part of the official digest
32 32 # Only used in changegroup >= v4.
33 33 CG_FLAG_SIDEDATA = 1
34 34
35 35
36 36 def hashrevisionsha1(text, p1, p2):
37 37 """Compute the SHA-1 for revision data and its parents.
38 38
39 39 This hash combines both the current file contents and its history
40 40 in a manner that makes it easy to distinguish nodes with the same
41 41 content in the revision graph.
42 42 """
43 43 # As of now, if one of the parent node is null, p2 is null
44 44 if p2 == sha1nodeconstants.nullid:
45 45 # deep copy of a hash is faster than creating one
46 46 s = _nullhash.copy()
47 47 s.update(p1)
48 48 else:
49 49 # none of the parent nodes are nullid
50 50 if p1 < p2:
51 51 a = p1
52 52 b = p2
53 53 else:
54 54 a = p2
55 55 b = p1
56 56 s = hashutil.sha1(a)
57 57 s.update(b)
58 58 s.update(text)
59 59 return s.digest()
60 60
61 61
62 62 METADATA_RE = re.compile(b'\x01\n')
63 63
64 64
65 65 def parsemeta(text):
66 66 """Parse metadata header from revision data.
67 67
68 68 Returns a 2-tuple of (metadata, offset), where both can be None if there
69 69 is no metadata.
70 70 """
71 71 # text can be buffer, so we can't use .startswith or .index
72 72 if text[:2] != b'\x01\n':
73 73 return None, None
74 74 s = METADATA_RE.search(text, 2).start()
75 75 mtext = text[2:s]
76 76 meta = {}
77 77 for l in mtext.splitlines():
78 78 k, v = l.split(b': ', 1)
79 79 meta[k] = v
80 80 return meta, s + 2
81 81
82 82
83 83 def packmeta(meta, text):
84 84 """Add metadata to fulltext to produce revision text."""
85 85 keys = sorted(meta)
86 86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
87 87 return b'\x01\n%s\x01\n%s' % (metatext, text)
88 88
89 89
90 90 def iscensoredtext(text):
91 91 meta = parsemeta(text)[0]
92 92 return meta and b'censored' in meta
93 93
94 94
95 95 def filtermetadata(text):
96 96 """Extract just the revision data from source text.
97 97
98 98 Returns ``text`` unless it has a metadata header, in which case we return
99 99 a new buffer without hte metadata.
100 100 """
101 101 if not text.startswith(b'\x01\n'):
102 102 return text
103 103
104 104 offset = text.index(b'\x01\n', 2)
105 105 return text[offset + 2 :]
106 106
107 107
108 108 def filerevisioncopied(store, node):
109 109 """Resolve file revision copy metadata.
110 110
111 111 Returns ``False`` if the file has no copy metadata. Otherwise a
112 112 2-tuple of the source filename and node.
113 113 """
114 114 if store.parents(node)[0] != sha1nodeconstants.nullid:
115 115 return False
116 116
117 117 meta = parsemeta(store.revision(node))[0]
118 118
119 119 # copy and copyrev occur in pairs. In rare cases due to old bugs,
120 120 # one can occur without the other. So ensure both are present to flag
121 121 # as a copy.
122 122 if meta and b'copy' in meta and b'copyrev' in meta:
123 123 return meta[b'copy'], bin(meta[b'copyrev'])
124 124
125 125 return False
126 126
127 127
128 128 def filedataequivalent(store, node, filedata):
129 129 """Determines whether file data is equivalent to a stored node.
130 130
131 131 Returns True if the passed file data would hash to the same value
132 132 as a stored revision and False otherwise.
133 133
134 134 When a stored revision is censored, filedata must be empty to have
135 135 equivalence.
136 136
137 137 When a stored revision has copy metadata, it is ignored as part
138 138 of the compare.
139 139 """
140 140
141 141 if filedata.startswith(b'\x01\n'):
142 142 revisiontext = b'\x01\n\x01\n' + filedata
143 143 else:
144 144 revisiontext = filedata
145 145
146 146 p1, p2 = store.parents(node)
147 147
148 148 computednode = hashrevisionsha1(revisiontext, p1, p2)
149 149
150 150 if computednode == node:
151 151 return True
152 152
153 153 # Censored files compare against the empty file.
154 154 if store.iscensored(store.rev(node)):
155 155 return filedata == b''
156 156
157 157 # Renaming a file produces a different hash, even if the data
158 158 # remains unchanged. Check if that's the case.
159 159 if store.renamed(node):
160 160 return store.read(node) == filedata
161 161
162 162 return False
163 163
164 164
165 165 def iterrevs(storelen, start=0, stop=None):
166 166 """Iterate over revision numbers in a store."""
167 167 step = 1
168 168
169 169 if stop is not None:
170 170 if start > stop:
171 171 step = -1
172 172 stop += step
173 173 if stop > storelen:
174 174 stop = storelen
175 175 else:
176 176 stop = storelen
177 177
178 178 return pycompat.xrange(start, stop, step)
179 179
180 180
181 181 def fileidlookup(store, fileid, identifier):
182 182 """Resolve the file node for a value.
183 183
184 184 ``store`` is an object implementing the ``ifileindex`` interface.
185 185
186 186 ``fileid`` can be:
187 187
188 188 * A 20 or 32 byte binary node.
189 189 * An integer revision number
190 190 * A 40 or 64 byte hex node.
191 191 * A bytes that can be parsed as an integer representing a revision number.
192 192
193 193 ``identifier`` is used to populate ``error.LookupError`` with an identifier
194 194 for the store.
195 195
196 196 Raises ``error.LookupError`` on failure.
197 197 """
198 198 if isinstance(fileid, int):
199 199 try:
200 200 return store.node(fileid)
201 201 except IndexError:
202 202 raise error.LookupError(
203 203 b'%d' % fileid, identifier, _(b'no match found')
204 204 )
205 205
206 206 if len(fileid) in (20, 32):
207 207 try:
208 208 store.rev(fileid)
209 209 return fileid
210 210 except error.LookupError:
211 211 pass
212 212
213 213 if len(fileid) in (40, 64):
214 214 try:
215 215 rawnode = bin(fileid)
216 216 store.rev(rawnode)
217 217 return rawnode
218 218 except TypeError:
219 219 pass
220 220
221 221 try:
222 222 rev = int(fileid)
223 223
224 224 if b'%d' % rev != fileid:
225 225 raise ValueError
226 226
227 227 try:
228 228 return store.node(rev)
229 229 except (IndexError, TypeError):
230 230 pass
231 231 except (ValueError, OverflowError):
232 232 pass
233 233
234 234 raise error.LookupError(fileid, identifier, _(b'no match found'))
235 235
236 236
237 237 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
238 238 """Resolve information needed to strip revisions.
239 239
240 240 Finds the minimum revision number that must be stripped in order to
241 241 strip ``minlinkrev``.
242 242
243 243 Returns a 2-tuple of the minimum revision number to do that and a set
244 244 of all revision numbers that have linkrevs that would be broken
245 245 by that strip.
246 246
247 247 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
248 248 ``headrevs`` is an iterable of head revisions.
249 249 ``linkrevfn`` is a callable that receives a revision and returns a linked
250 250 revision.
251 251 ``parentrevsfn`` is a callable that receives a revision number and returns
252 252 an iterable of its parent revision numbers.
253 253 """
254 254 brokenrevs = set()
255 255 strippoint = tiprev + 1
256 256
257 257 heads = {}
258 258 futurelargelinkrevs = set()
259 259 for head in headrevs:
260 260 headlinkrev = linkrevfn(head)
261 261 heads[head] = headlinkrev
262 262 if headlinkrev >= minlinkrev:
263 263 futurelargelinkrevs.add(headlinkrev)
264 264
265 265 # This algorithm involves walking down the rev graph, starting at the
266 266 # heads. Since the revs are topologically sorted according to linkrev,
267 267 # once all head linkrevs are below the minlink, we know there are
268 268 # no more revs that could have a linkrev greater than minlink.
269 269 # So we can stop walking.
270 270 while futurelargelinkrevs:
271 271 strippoint -= 1
272 272 linkrev = heads.pop(strippoint)
273 273
274 274 if linkrev < minlinkrev:
275 275 brokenrevs.add(strippoint)
276 276 else:
277 277 futurelargelinkrevs.remove(linkrev)
278 278
279 279 for p in parentrevsfn(strippoint):
280 280 if p != nullrev:
281 281 plinkrev = linkrevfn(p)
282 282 heads[p] = plinkrev
283 283 if plinkrev >= minlinkrev:
284 284 futurelargelinkrevs.add(plinkrev)
285 285
286 286 return strippoint, brokenrevs
287 287
288 288
289 289 def emitrevisions(
290 290 store,
291 291 nodes,
292 292 nodesorder,
293 293 resultcls,
294 294 deltaparentfn=None,
295 295 candeltafn=None,
296 296 rawsizefn=None,
297 297 revdifffn=None,
298 298 flagsfn=None,
299 299 deltamode=repository.CG_DELTAMODE_STD,
300 300 revisiondata=False,
301 301 assumehaveparentrevisions=False,
302 302 sidedata_helpers=None,
303 303 ):
304 304 """Generic implementation of ifiledata.emitrevisions().
305 305
306 306 Emitting revision data is subtly complex. This function attempts to
307 307 encapsulate all the logic for doing so in a backend-agnostic way.
308 308
309 309 ``store``
310 310 Object conforming to ``ifilestorage`` interface.
311 311
312 312 ``nodes``
313 313 List of revision nodes whose data to emit.
314 314
315 315 ``resultcls``
316 316 A type implementing the ``irevisiondelta`` interface that will be
317 317 constructed and returned.
318 318
319 319 ``deltaparentfn`` (optional)
320 320 Callable receiving a revision number and returning the revision number
321 321 of a revision that the internal delta is stored against. This delta
322 322 will be preferred over computing a new arbitrary delta.
323 323
324 324 If not defined, a delta will always be computed from raw revision
325 325 data.
326 326
327 327 ``candeltafn`` (optional)
328 328 Callable receiving a pair of revision numbers that returns a bool
329 329 indicating whether a delta between them can be produced.
330 330
331 331 If not defined, it is assumed that any two revisions can delta with
332 332 each other.
333 333
334 334 ``rawsizefn`` (optional)
335 335 Callable receiving a revision number and returning the length of the
336 336 ``store.rawdata(rev)``.
337 337
338 338 If not defined, ``len(store.rawdata(rev))`` will be called.
339 339
340 340 ``revdifffn`` (optional)
341 341 Callable receiving a pair of revision numbers that returns a delta
342 342 between them.
343 343
344 344 If not defined, a delta will be computed by invoking mdiff code
345 345 on ``store.revision()`` results.
346 346
347 347 Defining this function allows a precomputed or stored delta to be
348 348 used without having to compute on.
349 349
350 350 ``flagsfn`` (optional)
351 351 Callable receiving a revision number and returns the integer flags
352 352 value for it. If not defined, flags value will be 0.
353 353
354 354 ``deltamode``
355 355 constaint on delta to be sent:
356 356 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
357 357 * CG_DELTAMODE_PREV - only delta against "prev",
358 358 * CG_DELTAMODE_FULL - only issue full snapshot.
359 359
360 360 Whether to send fulltext revisions instead of deltas, if allowed.
361 361
362 362 ``nodesorder``
363 363 ``revisiondata``
364 364 ``assumehaveparentrevisions``
365 365 ``sidedata_helpers`` (optional)
366 366 If not None, means that sidedata should be included.
367 A dictionary of revlog type to tuples of `(repo, computers, removers)`:
368 * `repo` is used as an argument for computers
369 * `computers` is a list of `(category, (keys, computer, flags)` that
370 compute the missing sidedata categories that were asked:
371 * `category` is the sidedata category
372 * `keys` are the sidedata keys to be affected
373 * `flags` is a bitmask (an integer) of flags to remove when
374 removing the category.
375 * `computer` is the function `(repo, store, rev, sidedata)` that
376 returns a tuple of
377 `(new sidedata dict, (flags to add, flags to remove))`.
378 For example, it will return `({}, (0, 1 << 15))` to return no
379 sidedata, with no flags to add and one flag to remove.
380 * `removers` will remove the keys corresponding to the categories
381 that are present, but not needed.
382 If both `computers` and `removers` are empty, sidedata are simply not
383 transformed.
384 Revlog types are `changelog`, `manifest` or `filelog`.
367 See `revlogutil.sidedata.get_sidedata_helpers`.
385 368 """
386 369
387 370 fnode = store.node
388 371 frev = store.rev
389 372
390 373 if nodesorder == b'nodes':
391 374 revs = [frev(n) for n in nodes]
392 375 elif nodesorder == b'linear':
393 376 revs = {frev(n) for n in nodes}
394 377 revs = dagop.linearize(revs, store.parentrevs)
395 378 else: # storage and default
396 379 revs = sorted(frev(n) for n in nodes)
397 380
398 381 prevrev = None
399 382
400 383 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
401 384 prevrev = store.parentrevs(revs[0])[0]
402 385
403 386 # Set of revs available to delta against.
404 387 available = set()
405 388
406 389 for rev in revs:
407 390 if rev == nullrev:
408 391 continue
409 392
410 393 node = fnode(rev)
411 394 p1rev, p2rev = store.parentrevs(rev)
412 395
413 396 if deltaparentfn:
414 397 deltaparentrev = deltaparentfn(rev)
415 398 else:
416 399 deltaparentrev = nullrev
417 400
418 401 # Forced delta against previous mode.
419 402 if deltamode == repository.CG_DELTAMODE_PREV:
420 403 baserev = prevrev
421 404
422 405 # We're instructed to send fulltext. Honor that.
423 406 elif deltamode == repository.CG_DELTAMODE_FULL:
424 407 baserev = nullrev
425 408 # We're instructed to use p1. Honor that
426 409 elif deltamode == repository.CG_DELTAMODE_P1:
427 410 baserev = p1rev
428 411
429 412 # There is a delta in storage. We try to use that because it
430 413 # amounts to effectively copying data from storage and is
431 414 # therefore the fastest.
432 415 elif deltaparentrev != nullrev:
433 416 # Base revision was already emitted in this group. We can
434 417 # always safely use the delta.
435 418 if deltaparentrev in available:
436 419 baserev = deltaparentrev
437 420
438 421 # Base revision is a parent that hasn't been emitted already.
439 422 # Use it if we can assume the receiver has the parent revision.
440 423 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
441 424 baserev = deltaparentrev
442 425
443 426 # No guarantee the receiver has the delta parent. Send delta
444 427 # against last revision (if possible), which in the common case
445 428 # should be similar enough to this revision that the delta is
446 429 # reasonable.
447 430 elif prevrev is not None:
448 431 baserev = prevrev
449 432 else:
450 433 baserev = nullrev
451 434
452 435 # Storage has a fulltext revision.
453 436
454 437 # Let's use the previous revision, which is as good a guess as any.
455 438 # There is definitely room to improve this logic.
456 439 elif prevrev is not None:
457 440 baserev = prevrev
458 441 else:
459 442 baserev = nullrev
460 443
461 444 # But we can't actually use our chosen delta base for whatever
462 445 # reason. Reset to fulltext.
463 446 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
464 447 baserev = nullrev
465 448
466 449 revision = None
467 450 delta = None
468 451 baserevisionsize = None
469 452
470 453 if revisiondata:
471 454 if store.iscensored(baserev) or store.iscensored(rev):
472 455 try:
473 456 revision = store.rawdata(node)
474 457 except error.CensoredNodeError as e:
475 458 revision = e.tombstone
476 459
477 460 if baserev != nullrev:
478 461 if rawsizefn:
479 462 baserevisionsize = rawsizefn(baserev)
480 463 else:
481 464 baserevisionsize = len(store.rawdata(baserev))
482 465
483 466 elif (
484 467 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
485 468 ):
486 469 revision = store.rawdata(node)
487 470 available.add(rev)
488 471 else:
489 472 if revdifffn:
490 473 delta = revdifffn(baserev, rev)
491 474 else:
492 475 delta = mdiff.textdiff(
493 476 store.rawdata(baserev), store.rawdata(rev)
494 477 )
495 478
496 479 available.add(rev)
497 480
498 481 serialized_sidedata = None
499 482 sidedata_flags = (0, 0)
500 483 if sidedata_helpers:
501 484 old_sidedata = store.sidedata(rev)
502 485 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
503 486 store=store,
504 487 sidedata_helpers=sidedata_helpers,
505 488 sidedata=old_sidedata,
506 489 rev=rev,
507 490 )
508 491 if sidedata:
509 492 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
510 493
511 494 flags = flagsfn(rev) if flagsfn else 0
512 495 protocol_flags = 0
513 496 if serialized_sidedata:
514 497 # Advertise that sidedata exists to the other side
515 498 protocol_flags |= CG_FLAG_SIDEDATA
516 499 # Computers and removers can return flags to add and/or remove
517 500 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
518 501
519 502 yield resultcls(
520 503 node=node,
521 504 p1node=fnode(p1rev),
522 505 p2node=fnode(p2rev),
523 506 basenode=fnode(baserev),
524 507 flags=flags,
525 508 baserevisionsize=baserevisionsize,
526 509 revision=revision,
527 510 delta=delta,
528 511 sidedata=serialized_sidedata,
529 512 protocol_flags=protocol_flags,
530 513 )
531 514
532 515 prevrev = rev
533 516
534 517
535 518 def deltaiscensored(delta, baserev, baselenfn):
536 519 """Determine if a delta represents censored revision data.
537 520
538 521 ``baserev`` is the base revision this delta is encoded against.
539 522 ``baselenfn`` is a callable receiving a revision number that resolves the
540 523 length of the revision fulltext.
541 524
542 525 Returns a bool indicating if the result of the delta represents a censored
543 526 revision.
544 527 """
545 528 # Fragile heuristic: unless new file meta keys are added alphabetically
546 529 # preceding "censored", all censored revisions are prefixed by
547 530 # "\1\ncensored:". A delta producing such a censored revision must be a
548 531 # full-replacement delta, so we inspect the first and only patch in the
549 532 # delta for this prefix.
550 533 hlen = struct.calcsize(b">lll")
551 534 if len(delta) <= hlen:
552 535 return False
553 536
554 537 oldlen = baselenfn(baserev)
555 538 newlen = len(delta) - hlen
556 539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
557 540 return False
558 541
559 542 add = b"\1\ncensored:"
560 543 addlen = len(add)
561 544 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now