##// END OF EJS Templates
revlog: decouple caching from addrevision callback for addgroup...
Joerg Sonnenberger -
r47085:711ba0f1 default
parent child Browse files
Show More
@@ -1,1706 +1,1707 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35
36 36 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 37 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 38 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 39
40 40 LFS_REQUIREMENT = b'lfs'
41 41
42 42 readexactly = util.readexactly
43 43
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(b">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_(b"invalid chunk length %d") % l)
52 52 return b""
53 53 return readexactly(stream, l - 4)
54 54
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(b">l", length + 4)
59 59
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(b">l", 0)
64 64
65 65
66 66 def _fileheader(path):
67 67 """Obtain a changegroup chunk header for a named path."""
68 68 return chunkheader(len(path)) + path
69 69
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, b"wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, b"wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 90 fh = os.fdopen(fd, "wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = b'01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = b'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 135 if alg == b'BZ':
136 136 alg = b'_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != b'UN'
148 148
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151
152 152 def seek(self, pos):
153 153 return self._stream.seek(pos)
154 154
155 155 def tell(self):
156 156 return self._stream.tell()
157 157
158 158 def close(self):
159 159 return self._stream.close()
160 160
161 161 def _chunklength(self):
162 162 d = readexactly(self._stream, 4)
163 163 l = struct.unpack(b">l", d)[0]
164 164 if l <= 4:
165 165 if l:
166 166 raise error.Abort(_(b"invalid chunk length %d") % l)
167 167 return 0
168 168 if self.callback:
169 169 self.callback()
170 170 return l - 4
171 171
172 172 def changelogheader(self):
173 173 """v10 does not have a changelog header chunk"""
174 174 return {}
175 175
176 176 def manifestheader(self):
177 177 """v10 does not have a manifest header chunk"""
178 178 return {}
179 179
180 180 def filelogheader(self):
181 181 """return the header of the filelogs chunk, v10 only has the filename"""
182 182 l = self._chunklength()
183 183 if not l:
184 184 return {}
185 185 fname = readexactly(self._stream, l)
186 186 return {b'filename': fname}
187 187
188 188 def _deltaheader(self, headertuple, prevnode):
189 189 node, p1, p2, cs = headertuple
190 190 if prevnode is None:
191 191 deltabase = p1
192 192 else:
193 193 deltabase = prevnode
194 194 flags = 0
195 195 return node, p1, p2, deltabase, cs, flags
196 196
197 197 def deltachunk(self, prevnode):
198 198 l = self._chunklength()
199 199 if not l:
200 200 return {}
201 201 headerdata = readexactly(self._stream, self.deltaheadersize)
202 202 header = self.deltaheader.unpack(headerdata)
203 203 delta = readexactly(self._stream, l - self.deltaheadersize)
204 204 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 205 return (node, p1, p2, cs, deltabase, delta, flags)
206 206
207 207 def getchunks(self):
208 208 """returns all the chunks contains in the bundle
209 209
210 210 Used when you need to forward the binary stream to a file or another
211 211 network API. To do so, it parse the changegroup data, otherwise it will
212 212 block in case of sshrepo because it don't know the end of the stream.
213 213 """
214 214 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
215 215 # and a list of filelogs. For changegroup 3, we expect 4 parts:
216 216 # changelog, manifestlog, a list of tree manifestlogs, and a list of
217 217 # filelogs.
218 218 #
219 219 # Changelog and manifestlog parts are terminated with empty chunks. The
220 220 # tree and file parts are a list of entry sections. Each entry section
221 221 # is a series of chunks terminating in an empty chunk. The list of these
222 222 # entry sections is terminated in yet another empty chunk, so we know
223 223 # we've reached the end of the tree/file list when we reach an empty
224 224 # chunk that was proceeded by no non-empty chunks.
225 225
226 226 parts = 0
227 227 while parts < 2 + self._grouplistcount:
228 228 noentries = True
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 # The first two empty chunks represent the end of the
233 233 # changelog and the manifestlog portions. The remaining
234 234 # empty chunks represent either A) the end of individual
235 235 # tree or file entries in the file list, or B) the end of
236 236 # the entire list. It's the end of the entire list if there
237 237 # were no entries (i.e. noentries is True).
238 238 if parts < 2:
239 239 parts += 1
240 240 elif noentries:
241 241 parts += 1
242 242 break
243 243 noentries = False
244 244 yield chunkheader(len(chunk))
245 245 pos = 0
246 246 while pos < len(chunk):
247 247 next = pos + 2 ** 20
248 248 yield chunk[pos:next]
249 249 pos = next
250 250 yield closechunk()
251 251
252 252 def _unpackmanifests(self, repo, revmap, trp, prog):
253 253 self.callback = prog.increment
254 254 # no need to check for empty manifest group here:
255 255 # if the result of the merge of 1 and 2 is the same in 3 and 4,
256 256 # no new manifest will be created and the manifest group will
257 257 # be empty during the pull
258 258 self.manifestheader()
259 259 deltas = self.deltaiter()
260 260 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
261 261 prog.complete()
262 262 self.callback = None
263 263
264 264 def apply(
265 265 self,
266 266 repo,
267 267 tr,
268 268 srctype,
269 269 url,
270 270 targetphase=phases.draft,
271 271 expectedtotal=None,
272 272 ):
273 273 """Add the changegroup returned by source.read() to this repo.
274 274 srctype is a string like 'push', 'pull', or 'unbundle'. url is
275 275 the URL of the repo where this changegroup is coming from.
276 276
277 277 Return an integer summarizing the change to this repo:
278 278 - nothing changed or no source: 0
279 279 - more heads than before: 1+added heads (2..n)
280 280 - fewer heads than before: -1-removed heads (-2..-n)
281 281 - number of heads stays the same: 1
282 282 """
283 283 repo = repo.unfiltered()
284 284
285 285 def csmap(x):
286 286 repo.ui.debug(b"add changeset %s\n" % short(x))
287 287 return len(cl)
288 288
289 289 def revmap(x):
290 290 return cl.rev(x)
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319 cgnodes = []
320 320
321 321 def ondupchangelog(cl, node):
322 322 if cl.rev(node) < clstart:
323 323 cgnodes.append(node)
324 324
325 325 def onchangelog(cl, node):
326 326 rev = cl.rev(node)
327 327 ctx = cl.changelogrevision(rev)
328 328 efilesset.update(ctx.files)
329 329 repo.register_changeset(rev, ctx)
330 330
331 331 self.changelogheader()
332 332 deltas = self.deltaiter()
333 333 if not cl.addgroup(
334 334 deltas,
335 335 csmap,
336 336 trp,
337 alwayscache=True,
337 338 addrevisioncb=onchangelog,
338 339 duplicaterevisioncb=ondupchangelog,
339 340 ):
340 341 repo.ui.develwarn(
341 342 b'applied empty changelog from changegroup',
342 343 config=b'warn-empty-changegroup',
343 344 )
344 345 efiles = len(efilesset)
345 346 clend = len(cl)
346 347 changesets = clend - clstart
347 348 progress.complete()
348 349 del deltas
349 350 # TODO Python 2.7 removal
350 351 # del efilesset
351 352 efilesset = None
352 353 self.callback = None
353 354
354 355 # pull off the manifest group
355 356 repo.ui.status(_(b"adding manifests\n"))
356 357 # We know that we'll never have more manifests than we had
357 358 # changesets.
358 359 progress = repo.ui.makeprogress(
359 360 _(b'manifests'), unit=_(b'chunks'), total=changesets
360 361 )
361 362 self._unpackmanifests(repo, revmap, trp, progress)
362 363
363 364 needfiles = {}
364 365 if repo.ui.configbool(b'server', b'validate'):
365 366 cl = repo.changelog
366 367 ml = repo.manifestlog
367 368 # validate incoming csets have their manifests
368 369 for cset in pycompat.xrange(clstart, clend):
369 370 mfnode = cl.changelogrevision(cset).manifest
370 371 mfest = ml[mfnode].readdelta()
371 372 # store file nodes we must see
372 373 for f, n in pycompat.iteritems(mfest):
373 374 needfiles.setdefault(f, set()).add(n)
374 375
375 376 # process the files
376 377 repo.ui.status(_(b"adding file changes\n"))
377 378 newrevs, newfiles = _addchangegroupfiles(
378 379 repo, self, revmap, trp, efiles, needfiles
379 380 )
380 381
381 382 # making sure the value exists
382 383 tr.changes.setdefault(b'changegroup-count-changesets', 0)
383 384 tr.changes.setdefault(b'changegroup-count-revisions', 0)
384 385 tr.changes.setdefault(b'changegroup-count-files', 0)
385 386 tr.changes.setdefault(b'changegroup-count-heads', 0)
386 387
387 388 # some code use bundle operation for internal purpose. They usually
388 389 # set `ui.quiet` to do this outside of user sight. Size the report
389 390 # of such operation now happens at the end of the transaction, that
390 391 # ui.quiet has not direct effect on the output.
391 392 #
392 393 # To preserve this intend use an inelegant hack, we fail to report
393 394 # the change if `quiet` is set. We should probably move to
394 395 # something better, but this is a good first step to allow the "end
395 396 # of transaction report" to pass tests.
396 397 if not repo.ui.quiet:
397 398 tr.changes[b'changegroup-count-changesets'] += changesets
398 399 tr.changes[b'changegroup-count-revisions'] += newrevs
399 400 tr.changes[b'changegroup-count-files'] += newfiles
400 401
401 402 deltaheads = 0
402 403 if oldheads:
403 404 heads = cl.heads()
404 405 deltaheads += len(heads) - len(oldheads)
405 406 for h in heads:
406 407 if h not in oldheads and repo[h].closesbranch():
407 408 deltaheads -= 1
408 409
409 410 # see previous comment about checking ui.quiet
410 411 if not repo.ui.quiet:
411 412 tr.changes[b'changegroup-count-heads'] += deltaheads
412 413 repo.invalidatevolatilesets()
413 414
414 415 if changesets > 0:
415 416 if b'node' not in tr.hookargs:
416 417 tr.hookargs[b'node'] = hex(cl.node(clstart))
417 418 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
418 419 hookargs = dict(tr.hookargs)
419 420 else:
420 421 hookargs = dict(tr.hookargs)
421 422 hookargs[b'node'] = hex(cl.node(clstart))
422 423 hookargs[b'node_last'] = hex(cl.node(clend - 1))
423 424 repo.hook(
424 425 b'pretxnchangegroup',
425 426 throw=True,
426 427 **pycompat.strkwargs(hookargs)
427 428 )
428 429
429 430 added = pycompat.xrange(clstart, clend)
430 431 phaseall = None
431 432 if srctype in (b'push', b'serve'):
432 433 # Old servers can not push the boundary themselves.
433 434 # New servers won't push the boundary if changeset already
434 435 # exists locally as secret
435 436 #
436 437 # We should not use added here but the list of all change in
437 438 # the bundle
438 439 if repo.publishing():
439 440 targetphase = phaseall = phases.public
440 441 else:
441 442 # closer target phase computation
442 443
443 444 # Those changesets have been pushed from the
444 445 # outside, their phases are going to be pushed
445 446 # alongside. Therefor `targetphase` is
446 447 # ignored.
447 448 targetphase = phaseall = phases.draft
448 449 if added:
449 450 phases.registernew(repo, tr, targetphase, added)
450 451 if phaseall is not None:
451 452 phases.advanceboundary(repo, tr, phaseall, cgnodes, revs=added)
452 453 cgnodes = []
453 454
454 455 if changesets > 0:
455 456
456 457 def runhooks(unused_success):
457 458 # These hooks run when the lock releases, not when the
458 459 # transaction closes. So it's possible for the changelog
459 460 # to have changed since we last saw it.
460 461 if clstart >= len(repo):
461 462 return
462 463
463 464 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
464 465
465 466 for rev in added:
466 467 args = hookargs.copy()
467 468 args[b'node'] = hex(cl.node(rev))
468 469 del args[b'node_last']
469 470 repo.hook(b"incoming", **pycompat.strkwargs(args))
470 471
471 472 newheads = [h for h in repo.heads() if h not in oldheads]
472 473 repo.ui.log(
473 474 b"incoming",
474 475 b"%d incoming changes - new heads: %s\n",
475 476 len(added),
476 477 b', '.join([hex(c[:6]) for c in newheads]),
477 478 )
478 479
479 480 tr.addpostclose(
480 481 b'changegroup-runhooks-%020i' % clstart,
481 482 lambda tr: repo._afterlock(runhooks),
482 483 )
483 484 finally:
484 485 repo.ui.flush()
485 486 # never return 0 here:
486 487 if deltaheads < 0:
487 488 ret = deltaheads - 1
488 489 else:
489 490 ret = deltaheads + 1
490 491 return ret
491 492
492 493 def deltaiter(self):
493 494 """
494 495 returns an iterator of the deltas in this changegroup
495 496
496 497 Useful for passing to the underlying storage system to be stored.
497 498 """
498 499 chain = None
499 500 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
500 501 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
501 502 yield chunkdata
502 503 chain = chunkdata[0]
503 504
504 505
505 506 class cg2unpacker(cg1unpacker):
506 507 """Unpacker for cg2 streams.
507 508
508 509 cg2 streams add support for generaldelta, so the delta header
509 510 format is slightly different. All other features about the data
510 511 remain the same.
511 512 """
512 513
513 514 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
514 515 deltaheadersize = deltaheader.size
515 516 version = b'02'
516 517
517 518 def _deltaheader(self, headertuple, prevnode):
518 519 node, p1, p2, deltabase, cs = headertuple
519 520 flags = 0
520 521 return node, p1, p2, deltabase, cs, flags
521 522
522 523
523 524 class cg3unpacker(cg2unpacker):
524 525 """Unpacker for cg3 streams.
525 526
526 527 cg3 streams add support for exchanging treemanifests and revlog
527 528 flags. It adds the revlog flags to the delta header and an empty chunk
528 529 separating manifests and files.
529 530 """
530 531
531 532 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
532 533 deltaheadersize = deltaheader.size
533 534 version = b'03'
534 535 _grouplistcount = 2 # One list of manifests and one list of files
535 536
536 537 def _deltaheader(self, headertuple, prevnode):
537 538 node, p1, p2, deltabase, cs, flags = headertuple
538 539 return node, p1, p2, deltabase, cs, flags
539 540
540 541 def _unpackmanifests(self, repo, revmap, trp, prog):
541 542 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
542 543 for chunkdata in iter(self.filelogheader, {}):
543 544 # If we get here, there are directory manifests in the changegroup
544 545 d = chunkdata[b"filename"]
545 546 repo.ui.debug(b"adding %s revisions\n" % d)
546 547 deltas = self.deltaiter()
547 548 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
548 549 raise error.Abort(_(b"received dir revlog group is empty"))
549 550
550 551
551 552 class headerlessfixup(object):
552 553 def __init__(self, fh, h):
553 554 self._h = h
554 555 self._fh = fh
555 556
556 557 def read(self, n):
557 558 if self._h:
558 559 d, self._h = self._h[:n], self._h[n:]
559 560 if len(d) < n:
560 561 d += readexactly(self._fh, n - len(d))
561 562 return d
562 563 return readexactly(self._fh, n)
563 564
564 565
565 566 def _revisiondeltatochunks(delta, headerfn):
566 567 """Serialize a revisiondelta to changegroup chunks."""
567 568
568 569 # The captured revision delta may be encoded as a delta against
569 570 # a base revision or as a full revision. The changegroup format
570 571 # requires that everything on the wire be deltas. So for full
571 572 # revisions, we need to invent a header that says to rewrite
572 573 # data.
573 574
574 575 if delta.delta is not None:
575 576 prefix, data = b'', delta.delta
576 577 elif delta.basenode == nullid:
577 578 data = delta.revision
578 579 prefix = mdiff.trivialdiffheader(len(data))
579 580 else:
580 581 data = delta.revision
581 582 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
582 583
583 584 meta = headerfn(delta)
584 585
585 586 yield chunkheader(len(meta) + len(prefix) + len(data))
586 587 yield meta
587 588 if prefix:
588 589 yield prefix
589 590 yield data
590 591
591 592
592 593 def _sortnodesellipsis(store, nodes, cl, lookup):
593 594 """Sort nodes for changegroup generation."""
594 595 # Ellipses serving mode.
595 596 #
596 597 # In a perfect world, we'd generate better ellipsis-ified graphs
597 598 # for non-changelog revlogs. In practice, we haven't started doing
598 599 # that yet, so the resulting DAGs for the manifestlog and filelogs
599 600 # are actually full of bogus parentage on all the ellipsis
600 601 # nodes. This has the side effect that, while the contents are
601 602 # correct, the individual DAGs might be completely out of whack in
602 603 # a case like 882681bc3166 and its ancestors (back about 10
603 604 # revisions or so) in the main hg repo.
604 605 #
605 606 # The one invariant we *know* holds is that the new (potentially
606 607 # bogus) DAG shape will be valid if we order the nodes in the
607 608 # order that they're introduced in dramatis personae by the
608 609 # changelog, so what we do is we sort the non-changelog histories
609 610 # by the order in which they are used by the changelog.
610 611 key = lambda n: cl.rev(lookup(n))
611 612 return sorted(nodes, key=key)
612 613
613 614
614 615 def _resolvenarrowrevisioninfo(
615 616 cl,
616 617 store,
617 618 ischangelog,
618 619 rev,
619 620 linkrev,
620 621 linknode,
621 622 clrevtolocalrev,
622 623 fullclnodes,
623 624 precomputedellipsis,
624 625 ):
625 626 linkparents = precomputedellipsis[linkrev]
626 627
627 628 def local(clrev):
628 629 """Turn a changelog revnum into a local revnum.
629 630
630 631 The ellipsis dag is stored as revnums on the changelog,
631 632 but when we're producing ellipsis entries for
632 633 non-changelog revlogs, we need to turn those numbers into
633 634 something local. This does that for us, and during the
634 635 changelog sending phase will also expand the stored
635 636 mappings as needed.
636 637 """
637 638 if clrev == nullrev:
638 639 return nullrev
639 640
640 641 if ischangelog:
641 642 return clrev
642 643
643 644 # Walk the ellipsis-ized changelog breadth-first looking for a
644 645 # change that has been linked from the current revlog.
645 646 #
646 647 # For a flat manifest revlog only a single step should be necessary
647 648 # as all relevant changelog entries are relevant to the flat
648 649 # manifest.
649 650 #
650 651 # For a filelog or tree manifest dirlog however not every changelog
651 652 # entry will have been relevant, so we need to skip some changelog
652 653 # nodes even after ellipsis-izing.
653 654 walk = [clrev]
654 655 while walk:
655 656 p = walk[0]
656 657 walk = walk[1:]
657 658 if p in clrevtolocalrev:
658 659 return clrevtolocalrev[p]
659 660 elif p in fullclnodes:
660 661 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
661 662 elif p in precomputedellipsis:
662 663 walk.extend(
663 664 [pp for pp in precomputedellipsis[p] if pp != nullrev]
664 665 )
665 666 else:
666 667 # In this case, we've got an ellipsis with parents
667 668 # outside the current bundle (likely an
668 669 # incremental pull). We "know" that we can use the
669 670 # value of this same revlog at whatever revision
670 671 # is pointed to by linknode. "Know" is in scare
671 672 # quotes because I haven't done enough examination
672 673 # of edge cases to convince myself this is really
673 674 # a fact - it works for all the (admittedly
674 675 # thorough) cases in our testsuite, but I would be
675 676 # somewhat unsurprised to find a case in the wild
676 677 # where this breaks down a bit. That said, I don't
677 678 # know if it would hurt anything.
678 679 for i in pycompat.xrange(rev, 0, -1):
679 680 if store.linkrev(i) == clrev:
680 681 return i
681 682 # We failed to resolve a parent for this node, so
682 683 # we crash the changegroup construction.
683 684 raise error.Abort(
684 685 b'unable to resolve parent while packing %r %r'
685 686 b' for changeset %r' % (store.indexfile, rev, clrev)
686 687 )
687 688
688 689 return nullrev
689 690
690 691 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
691 692 p1, p2 = nullrev, nullrev
692 693 elif len(linkparents) == 1:
693 694 (p1,) = sorted(local(p) for p in linkparents)
694 695 p2 = nullrev
695 696 else:
696 697 p1, p2 = sorted(local(p) for p in linkparents)
697 698
698 699 p1node, p2node = store.node(p1), store.node(p2)
699 700
700 701 return p1node, p2node, linknode
701 702
702 703
703 704 def deltagroup(
704 705 repo,
705 706 store,
706 707 nodes,
707 708 ischangelog,
708 709 lookup,
709 710 forcedeltaparentprev,
710 711 topic=None,
711 712 ellipses=False,
712 713 clrevtolocalrev=None,
713 714 fullclnodes=None,
714 715 precomputedellipsis=None,
715 716 ):
716 717 """Calculate deltas for a set of revisions.
717 718
718 719 Is a generator of ``revisiondelta`` instances.
719 720
720 721 If topic is not None, progress detail will be generated using this
721 722 topic name (e.g. changesets, manifests, etc).
722 723 """
723 724 if not nodes:
724 725 return
725 726
726 727 cl = repo.changelog
727 728
728 729 if ischangelog:
729 730 # `hg log` shows changesets in storage order. To preserve order
730 731 # across clones, send out changesets in storage order.
731 732 nodesorder = b'storage'
732 733 elif ellipses:
733 734 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
734 735 nodesorder = b'nodes'
735 736 else:
736 737 nodesorder = None
737 738
738 739 # Perform ellipses filtering and revision massaging. We do this before
739 740 # emitrevisions() because a) filtering out revisions creates less work
740 741 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
741 742 # assumptions about delta choices and we would possibly send a delta
742 743 # referencing a missing base revision.
743 744 #
744 745 # Also, calling lookup() has side-effects with regards to populating
745 746 # data structures. If we don't call lookup() for each node or if we call
746 747 # lookup() after the first pass through each node, things can break -
747 748 # possibly intermittently depending on the python hash seed! For that
748 749 # reason, we store a mapping of all linknodes during the initial node
749 750 # pass rather than use lookup() on the output side.
750 751 if ellipses:
751 752 filtered = []
752 753 adjustedparents = {}
753 754 linknodes = {}
754 755
755 756 for node in nodes:
756 757 rev = store.rev(node)
757 758 linknode = lookup(node)
758 759 linkrev = cl.rev(linknode)
759 760 clrevtolocalrev[linkrev] = rev
760 761
761 762 # If linknode is in fullclnodes, it means the corresponding
762 763 # changeset was a full changeset and is being sent unaltered.
763 764 if linknode in fullclnodes:
764 765 linknodes[node] = linknode
765 766
766 767 # If the corresponding changeset wasn't in the set computed
767 768 # as relevant to us, it should be dropped outright.
768 769 elif linkrev not in precomputedellipsis:
769 770 continue
770 771
771 772 else:
772 773 # We could probably do this later and avoid the dict
773 774 # holding state. But it likely doesn't matter.
774 775 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
775 776 cl,
776 777 store,
777 778 ischangelog,
778 779 rev,
779 780 linkrev,
780 781 linknode,
781 782 clrevtolocalrev,
782 783 fullclnodes,
783 784 precomputedellipsis,
784 785 )
785 786
786 787 adjustedparents[node] = (p1node, p2node)
787 788 linknodes[node] = linknode
788 789
789 790 filtered.append(node)
790 791
791 792 nodes = filtered
792 793
793 794 # We expect the first pass to be fast, so we only engage the progress
794 795 # meter for constructing the revision deltas.
795 796 progress = None
796 797 if topic is not None:
797 798 progress = repo.ui.makeprogress(
798 799 topic, unit=_(b'chunks'), total=len(nodes)
799 800 )
800 801
801 802 configtarget = repo.ui.config(b'devel', b'bundle.delta')
802 803 if configtarget not in (b'', b'p1', b'full'):
803 804 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
804 805 repo.ui.warn(msg % configtarget)
805 806
806 807 deltamode = repository.CG_DELTAMODE_STD
807 808 if forcedeltaparentprev:
808 809 deltamode = repository.CG_DELTAMODE_PREV
809 810 elif configtarget == b'p1':
810 811 deltamode = repository.CG_DELTAMODE_P1
811 812 elif configtarget == b'full':
812 813 deltamode = repository.CG_DELTAMODE_FULL
813 814
814 815 revisions = store.emitrevisions(
815 816 nodes,
816 817 nodesorder=nodesorder,
817 818 revisiondata=True,
818 819 assumehaveparentrevisions=not ellipses,
819 820 deltamode=deltamode,
820 821 )
821 822
822 823 for i, revision in enumerate(revisions):
823 824 if progress:
824 825 progress.update(i + 1)
825 826
826 827 if ellipses:
827 828 linknode = linknodes[revision.node]
828 829
829 830 if revision.node in adjustedparents:
830 831 p1node, p2node = adjustedparents[revision.node]
831 832 revision.p1node = p1node
832 833 revision.p2node = p2node
833 834 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
834 835
835 836 else:
836 837 linknode = lookup(revision.node)
837 838
838 839 revision.linknode = linknode
839 840 yield revision
840 841
841 842 if progress:
842 843 progress.complete()
843 844
844 845
845 846 class cgpacker(object):
846 847 def __init__(
847 848 self,
848 849 repo,
849 850 oldmatcher,
850 851 matcher,
851 852 version,
852 853 builddeltaheader,
853 854 manifestsend,
854 855 forcedeltaparentprev=False,
855 856 bundlecaps=None,
856 857 ellipses=False,
857 858 shallow=False,
858 859 ellipsisroots=None,
859 860 fullnodes=None,
860 861 ):
861 862 """Given a source repo, construct a bundler.
862 863
863 864 oldmatcher is a matcher that matches on files the client already has.
864 865 These will not be included in the changegroup.
865 866
866 867 matcher is a matcher that matches on files to include in the
867 868 changegroup. Used to facilitate sparse changegroups.
868 869
869 870 forcedeltaparentprev indicates whether delta parents must be against
870 871 the previous revision in a delta group. This should only be used for
871 872 compatibility with changegroup version 1.
872 873
873 874 builddeltaheader is a callable that constructs the header for a group
874 875 delta.
875 876
876 877 manifestsend is a chunk to send after manifests have been fully emitted.
877 878
878 879 ellipses indicates whether ellipsis serving mode is enabled.
879 880
880 881 bundlecaps is optional and can be used to specify the set of
881 882 capabilities which can be used to build the bundle. While bundlecaps is
882 883 unused in core Mercurial, extensions rely on this feature to communicate
883 884 capabilities to customize the changegroup packer.
884 885
885 886 shallow indicates whether shallow data might be sent. The packer may
886 887 need to pack file contents not introduced by the changes being packed.
887 888
888 889 fullnodes is the set of changelog nodes which should not be ellipsis
889 890 nodes. We store this rather than the set of nodes that should be
890 891 ellipsis because for very large histories we expect this to be
891 892 significantly smaller.
892 893 """
893 894 assert oldmatcher
894 895 assert matcher
895 896 self._oldmatcher = oldmatcher
896 897 self._matcher = matcher
897 898
898 899 self.version = version
899 900 self._forcedeltaparentprev = forcedeltaparentprev
900 901 self._builddeltaheader = builddeltaheader
901 902 self._manifestsend = manifestsend
902 903 self._ellipses = ellipses
903 904
904 905 # Set of capabilities we can use to build the bundle.
905 906 if bundlecaps is None:
906 907 bundlecaps = set()
907 908 self._bundlecaps = bundlecaps
908 909 self._isshallow = shallow
909 910 self._fullclnodes = fullnodes
910 911
911 912 # Maps ellipsis revs to their roots at the changelog level.
912 913 self._precomputedellipsis = ellipsisroots
913 914
914 915 self._repo = repo
915 916
916 917 if self._repo.ui.verbose and not self._repo.ui.debugflag:
917 918 self._verbosenote = self._repo.ui.note
918 919 else:
919 920 self._verbosenote = lambda s: None
920 921
921 922 def generate(
922 923 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
923 924 ):
924 925 """Yield a sequence of changegroup byte chunks.
925 926 If changelog is False, changelog data won't be added to changegroup
926 927 """
927 928
928 929 repo = self._repo
929 930 cl = repo.changelog
930 931
931 932 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
932 933 size = 0
933 934
934 935 clstate, deltas = self._generatechangelog(
935 936 cl, clnodes, generate=changelog
936 937 )
937 938 for delta in deltas:
938 939 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
939 940 size += len(chunk)
940 941 yield chunk
941 942
942 943 close = closechunk()
943 944 size += len(close)
944 945 yield closechunk()
945 946
946 947 self._verbosenote(_(b'%8.i (changelog)\n') % size)
947 948
948 949 clrevorder = clstate[b'clrevorder']
949 950 manifests = clstate[b'manifests']
950 951 changedfiles = clstate[b'changedfiles']
951 952
952 953 # We need to make sure that the linkrev in the changegroup refers to
953 954 # the first changeset that introduced the manifest or file revision.
954 955 # The fastpath is usually safer than the slowpath, because the filelogs
955 956 # are walked in revlog order.
956 957 #
957 958 # When taking the slowpath when the manifest revlog uses generaldelta,
958 959 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
959 960 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
960 961 #
961 962 # When taking the fastpath, we are only vulnerable to reordering
962 963 # of the changelog itself. The changelog never uses generaldelta and is
963 964 # never reordered. To handle this case, we simply take the slowpath,
964 965 # which already has the 'clrevorder' logic. This was also fixed in
965 966 # cc0ff93d0c0c.
966 967
967 968 # Treemanifests don't work correctly with fastpathlinkrev
968 969 # either, because we don't discover which directory nodes to
969 970 # send along with files. This could probably be fixed.
970 971 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
971 972
972 973 fnodes = {} # needed file nodes
973 974
974 975 size = 0
975 976 it = self.generatemanifests(
976 977 commonrevs,
977 978 clrevorder,
978 979 fastpathlinkrev,
979 980 manifests,
980 981 fnodes,
981 982 source,
982 983 clstate[b'clrevtomanifestrev'],
983 984 )
984 985
985 986 for tree, deltas in it:
986 987 if tree:
987 988 assert self.version == b'03'
988 989 chunk = _fileheader(tree)
989 990 size += len(chunk)
990 991 yield chunk
991 992
992 993 for delta in deltas:
993 994 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
994 995 for chunk in chunks:
995 996 size += len(chunk)
996 997 yield chunk
997 998
998 999 close = closechunk()
999 1000 size += len(close)
1000 1001 yield close
1001 1002
1002 1003 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1003 1004 yield self._manifestsend
1004 1005
1005 1006 mfdicts = None
1006 1007 if self._ellipses and self._isshallow:
1007 1008 mfdicts = [
1008 1009 (self._repo.manifestlog[n].read(), lr)
1009 1010 for (n, lr) in pycompat.iteritems(manifests)
1010 1011 ]
1011 1012
1012 1013 manifests.clear()
1013 1014 clrevs = {cl.rev(x) for x in clnodes}
1014 1015
1015 1016 it = self.generatefiles(
1016 1017 changedfiles,
1017 1018 commonrevs,
1018 1019 source,
1019 1020 mfdicts,
1020 1021 fastpathlinkrev,
1021 1022 fnodes,
1022 1023 clrevs,
1023 1024 )
1024 1025
1025 1026 for path, deltas in it:
1026 1027 h = _fileheader(path)
1027 1028 size = len(h)
1028 1029 yield h
1029 1030
1030 1031 for delta in deltas:
1031 1032 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1032 1033 for chunk in chunks:
1033 1034 size += len(chunk)
1034 1035 yield chunk
1035 1036
1036 1037 close = closechunk()
1037 1038 size += len(close)
1038 1039 yield close
1039 1040
1040 1041 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1041 1042
1042 1043 yield closechunk()
1043 1044
1044 1045 if clnodes:
1045 1046 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1046 1047
1047 1048 def _generatechangelog(self, cl, nodes, generate=True):
1048 1049 """Generate data for changelog chunks.
1049 1050
1050 1051 Returns a 2-tuple of a dict containing state and an iterable of
1051 1052 byte chunks. The state will not be fully populated until the
1052 1053 chunk stream has been fully consumed.
1053 1054
1054 1055 if generate is False, the state will be fully populated and no chunk
1055 1056 stream will be yielded
1056 1057 """
1057 1058 clrevorder = {}
1058 1059 manifests = {}
1059 1060 mfl = self._repo.manifestlog
1060 1061 changedfiles = set()
1061 1062 clrevtomanifestrev = {}
1062 1063
1063 1064 state = {
1064 1065 b'clrevorder': clrevorder,
1065 1066 b'manifests': manifests,
1066 1067 b'changedfiles': changedfiles,
1067 1068 b'clrevtomanifestrev': clrevtomanifestrev,
1068 1069 }
1069 1070
1070 1071 if not (generate or self._ellipses):
1071 1072 # sort the nodes in storage order
1072 1073 nodes = sorted(nodes, key=cl.rev)
1073 1074 for node in nodes:
1074 1075 c = cl.changelogrevision(node)
1075 1076 clrevorder[node] = len(clrevorder)
1076 1077 # record the first changeset introducing this manifest version
1077 1078 manifests.setdefault(c.manifest, node)
1078 1079 # Record a complete list of potentially-changed files in
1079 1080 # this manifest.
1080 1081 changedfiles.update(c.files)
1081 1082
1082 1083 return state, ()
1083 1084
1084 1085 # Callback for the changelog, used to collect changed files and
1085 1086 # manifest nodes.
1086 1087 # Returns the linkrev node (identity in the changelog case).
1087 1088 def lookupcl(x):
1088 1089 c = cl.changelogrevision(x)
1089 1090 clrevorder[x] = len(clrevorder)
1090 1091
1091 1092 if self._ellipses:
1092 1093 # Only update manifests if x is going to be sent. Otherwise we
1093 1094 # end up with bogus linkrevs specified for manifests and
1094 1095 # we skip some manifest nodes that we should otherwise
1095 1096 # have sent.
1096 1097 if (
1097 1098 x in self._fullclnodes
1098 1099 or cl.rev(x) in self._precomputedellipsis
1099 1100 ):
1100 1101
1101 1102 manifestnode = c.manifest
1102 1103 # Record the first changeset introducing this manifest
1103 1104 # version.
1104 1105 manifests.setdefault(manifestnode, x)
1105 1106 # Set this narrow-specific dict so we have the lowest
1106 1107 # manifest revnum to look up for this cl revnum. (Part of
1107 1108 # mapping changelog ellipsis parents to manifest ellipsis
1108 1109 # parents)
1109 1110 clrevtomanifestrev.setdefault(
1110 1111 cl.rev(x), mfl.rev(manifestnode)
1111 1112 )
1112 1113 # We can't trust the changed files list in the changeset if the
1113 1114 # client requested a shallow clone.
1114 1115 if self._isshallow:
1115 1116 changedfiles.update(mfl[c.manifest].read().keys())
1116 1117 else:
1117 1118 changedfiles.update(c.files)
1118 1119 else:
1119 1120 # record the first changeset introducing this manifest version
1120 1121 manifests.setdefault(c.manifest, x)
1121 1122 # Record a complete list of potentially-changed files in
1122 1123 # this manifest.
1123 1124 changedfiles.update(c.files)
1124 1125
1125 1126 return x
1126 1127
1127 1128 gen = deltagroup(
1128 1129 self._repo,
1129 1130 cl,
1130 1131 nodes,
1131 1132 True,
1132 1133 lookupcl,
1133 1134 self._forcedeltaparentprev,
1134 1135 ellipses=self._ellipses,
1135 1136 topic=_(b'changesets'),
1136 1137 clrevtolocalrev={},
1137 1138 fullclnodes=self._fullclnodes,
1138 1139 precomputedellipsis=self._precomputedellipsis,
1139 1140 )
1140 1141
1141 1142 return state, gen
1142 1143
1143 1144 def generatemanifests(
1144 1145 self,
1145 1146 commonrevs,
1146 1147 clrevorder,
1147 1148 fastpathlinkrev,
1148 1149 manifests,
1149 1150 fnodes,
1150 1151 source,
1151 1152 clrevtolocalrev,
1152 1153 ):
1153 1154 """Returns an iterator of changegroup chunks containing manifests.
1154 1155
1155 1156 `source` is unused here, but is used by extensions like remotefilelog to
1156 1157 change what is sent based in pulls vs pushes, etc.
1157 1158 """
1158 1159 repo = self._repo
1159 1160 mfl = repo.manifestlog
1160 1161 tmfnodes = {b'': manifests}
1161 1162
1162 1163 # Callback for the manifest, used to collect linkrevs for filelog
1163 1164 # revisions.
1164 1165 # Returns the linkrev node (collected in lookupcl).
1165 1166 def makelookupmflinknode(tree, nodes):
1166 1167 if fastpathlinkrev:
1167 1168 assert not tree
1168 1169 return (
1169 1170 manifests.__getitem__
1170 1171 ) # pytype: disable=unsupported-operands
1171 1172
1172 1173 def lookupmflinknode(x):
1173 1174 """Callback for looking up the linknode for manifests.
1174 1175
1175 1176 Returns the linkrev node for the specified manifest.
1176 1177
1177 1178 SIDE EFFECT:
1178 1179
1179 1180 1) fclnodes gets populated with the list of relevant
1180 1181 file nodes if we're not using fastpathlinkrev
1181 1182 2) When treemanifests are in use, collects treemanifest nodes
1182 1183 to send
1183 1184
1184 1185 Note that this means manifests must be completely sent to
1185 1186 the client before you can trust the list of files and
1186 1187 treemanifests to send.
1187 1188 """
1188 1189 clnode = nodes[x]
1189 1190 mdata = mfl.get(tree, x).readfast(shallow=True)
1190 1191 for p, n, fl in mdata.iterentries():
1191 1192 if fl == b't': # subdirectory manifest
1192 1193 subtree = tree + p + b'/'
1193 1194 tmfclnodes = tmfnodes.setdefault(subtree, {})
1194 1195 tmfclnode = tmfclnodes.setdefault(n, clnode)
1195 1196 if clrevorder[clnode] < clrevorder[tmfclnode]:
1196 1197 tmfclnodes[n] = clnode
1197 1198 else:
1198 1199 f = tree + p
1199 1200 fclnodes = fnodes.setdefault(f, {})
1200 1201 fclnode = fclnodes.setdefault(n, clnode)
1201 1202 if clrevorder[clnode] < clrevorder[fclnode]:
1202 1203 fclnodes[n] = clnode
1203 1204 return clnode
1204 1205
1205 1206 return lookupmflinknode
1206 1207
1207 1208 while tmfnodes:
1208 1209 tree, nodes = tmfnodes.popitem()
1209 1210
1210 1211 should_visit = self._matcher.visitdir(tree[:-1])
1211 1212 if tree and not should_visit:
1212 1213 continue
1213 1214
1214 1215 store = mfl.getstorage(tree)
1215 1216
1216 1217 if not should_visit:
1217 1218 # No nodes to send because this directory is out of
1218 1219 # the client's view of the repository (probably
1219 1220 # because of narrow clones). Do this even for the root
1220 1221 # directory (tree=='')
1221 1222 prunednodes = []
1222 1223 else:
1223 1224 # Avoid sending any manifest nodes we can prove the
1224 1225 # client already has by checking linkrevs. See the
1225 1226 # related comment in generatefiles().
1226 1227 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1227 1228
1228 1229 if tree and not prunednodes:
1229 1230 continue
1230 1231
1231 1232 lookupfn = makelookupmflinknode(tree, nodes)
1232 1233
1233 1234 deltas = deltagroup(
1234 1235 self._repo,
1235 1236 store,
1236 1237 prunednodes,
1237 1238 False,
1238 1239 lookupfn,
1239 1240 self._forcedeltaparentprev,
1240 1241 ellipses=self._ellipses,
1241 1242 topic=_(b'manifests'),
1242 1243 clrevtolocalrev=clrevtolocalrev,
1243 1244 fullclnodes=self._fullclnodes,
1244 1245 precomputedellipsis=self._precomputedellipsis,
1245 1246 )
1246 1247
1247 1248 if not self._oldmatcher.visitdir(store.tree[:-1]):
1248 1249 yield tree, deltas
1249 1250 else:
1250 1251 # 'deltas' is a generator and we need to consume it even if
1251 1252 # we are not going to send it because a side-effect is that
1252 1253 # it updates tmdnodes (via lookupfn)
1253 1254 for d in deltas:
1254 1255 pass
1255 1256 if not tree:
1256 1257 yield tree, []
1257 1258
1258 1259 def _prunemanifests(self, store, nodes, commonrevs):
1259 1260 if not self._ellipses:
1260 1261 # In non-ellipses case and large repositories, it is better to
1261 1262 # prevent calling of store.rev and store.linkrev on a lot of
1262 1263 # nodes as compared to sending some extra data
1263 1264 return nodes.copy()
1264 1265 # This is split out as a separate method to allow filtering
1265 1266 # commonrevs in extension code.
1266 1267 #
1267 1268 # TODO(augie): this shouldn't be required, instead we should
1268 1269 # make filtering of revisions to send delegated to the store
1269 1270 # layer.
1270 1271 frev, flr = store.rev, store.linkrev
1271 1272 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1272 1273
1273 1274 # The 'source' parameter is useful for extensions
1274 1275 def generatefiles(
1275 1276 self,
1276 1277 changedfiles,
1277 1278 commonrevs,
1278 1279 source,
1279 1280 mfdicts,
1280 1281 fastpathlinkrev,
1281 1282 fnodes,
1282 1283 clrevs,
1283 1284 ):
1284 1285 changedfiles = [
1285 1286 f
1286 1287 for f in changedfiles
1287 1288 if self._matcher(f) and not self._oldmatcher(f)
1288 1289 ]
1289 1290
1290 1291 if not fastpathlinkrev:
1291 1292
1292 1293 def normallinknodes(unused, fname):
1293 1294 return fnodes.get(fname, {})
1294 1295
1295 1296 else:
1296 1297 cln = self._repo.changelog.node
1297 1298
1298 1299 def normallinknodes(store, fname):
1299 1300 flinkrev = store.linkrev
1300 1301 fnode = store.node
1301 1302 revs = ((r, flinkrev(r)) for r in store)
1302 1303 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1303 1304
1304 1305 clrevtolocalrev = {}
1305 1306
1306 1307 if self._isshallow:
1307 1308 # In a shallow clone, the linknodes callback needs to also include
1308 1309 # those file nodes that are in the manifests we sent but weren't
1309 1310 # introduced by those manifests.
1310 1311 commonctxs = [self._repo[c] for c in commonrevs]
1311 1312 clrev = self._repo.changelog.rev
1312 1313
1313 1314 def linknodes(flog, fname):
1314 1315 for c in commonctxs:
1315 1316 try:
1316 1317 fnode = c.filenode(fname)
1317 1318 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1318 1319 except error.ManifestLookupError:
1319 1320 pass
1320 1321 links = normallinknodes(flog, fname)
1321 1322 if len(links) != len(mfdicts):
1322 1323 for mf, lr in mfdicts:
1323 1324 fnode = mf.get(fname, None)
1324 1325 if fnode in links:
1325 1326 links[fnode] = min(links[fnode], lr, key=clrev)
1326 1327 elif fnode:
1327 1328 links[fnode] = lr
1328 1329 return links
1329 1330
1330 1331 else:
1331 1332 linknodes = normallinknodes
1332 1333
1333 1334 repo = self._repo
1334 1335 progress = repo.ui.makeprogress(
1335 1336 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1336 1337 )
1337 1338 for i, fname in enumerate(sorted(changedfiles)):
1338 1339 filerevlog = repo.file(fname)
1339 1340 if not filerevlog:
1340 1341 raise error.Abort(
1341 1342 _(b"empty or missing file data for %s") % fname
1342 1343 )
1343 1344
1344 1345 clrevtolocalrev.clear()
1345 1346
1346 1347 linkrevnodes = linknodes(filerevlog, fname)
1347 1348 # Lookup for filenodes, we collected the linkrev nodes above in the
1348 1349 # fastpath case and with lookupmf in the slowpath case.
1349 1350 def lookupfilelog(x):
1350 1351 return linkrevnodes[x]
1351 1352
1352 1353 frev, flr = filerevlog.rev, filerevlog.linkrev
1353 1354 # Skip sending any filenode we know the client already
1354 1355 # has. This avoids over-sending files relatively
1355 1356 # inexpensively, so it's not a problem if we under-filter
1356 1357 # here.
1357 1358 filenodes = [
1358 1359 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1359 1360 ]
1360 1361
1361 1362 if not filenodes:
1362 1363 continue
1363 1364
1364 1365 progress.update(i + 1, item=fname)
1365 1366
1366 1367 deltas = deltagroup(
1367 1368 self._repo,
1368 1369 filerevlog,
1369 1370 filenodes,
1370 1371 False,
1371 1372 lookupfilelog,
1372 1373 self._forcedeltaparentprev,
1373 1374 ellipses=self._ellipses,
1374 1375 clrevtolocalrev=clrevtolocalrev,
1375 1376 fullclnodes=self._fullclnodes,
1376 1377 precomputedellipsis=self._precomputedellipsis,
1377 1378 )
1378 1379
1379 1380 yield fname, deltas
1380 1381
1381 1382 progress.complete()
1382 1383
1383 1384
1384 1385 def _makecg1packer(
1385 1386 repo,
1386 1387 oldmatcher,
1387 1388 matcher,
1388 1389 bundlecaps,
1389 1390 ellipses=False,
1390 1391 shallow=False,
1391 1392 ellipsisroots=None,
1392 1393 fullnodes=None,
1393 1394 ):
1394 1395 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1395 1396 d.node, d.p1node, d.p2node, d.linknode
1396 1397 )
1397 1398
1398 1399 return cgpacker(
1399 1400 repo,
1400 1401 oldmatcher,
1401 1402 matcher,
1402 1403 b'01',
1403 1404 builddeltaheader=builddeltaheader,
1404 1405 manifestsend=b'',
1405 1406 forcedeltaparentprev=True,
1406 1407 bundlecaps=bundlecaps,
1407 1408 ellipses=ellipses,
1408 1409 shallow=shallow,
1409 1410 ellipsisroots=ellipsisroots,
1410 1411 fullnodes=fullnodes,
1411 1412 )
1412 1413
1413 1414
1414 1415 def _makecg2packer(
1415 1416 repo,
1416 1417 oldmatcher,
1417 1418 matcher,
1418 1419 bundlecaps,
1419 1420 ellipses=False,
1420 1421 shallow=False,
1421 1422 ellipsisroots=None,
1422 1423 fullnodes=None,
1423 1424 ):
1424 1425 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1425 1426 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1426 1427 )
1427 1428
1428 1429 return cgpacker(
1429 1430 repo,
1430 1431 oldmatcher,
1431 1432 matcher,
1432 1433 b'02',
1433 1434 builddeltaheader=builddeltaheader,
1434 1435 manifestsend=b'',
1435 1436 bundlecaps=bundlecaps,
1436 1437 ellipses=ellipses,
1437 1438 shallow=shallow,
1438 1439 ellipsisroots=ellipsisroots,
1439 1440 fullnodes=fullnodes,
1440 1441 )
1441 1442
1442 1443
1443 1444 def _makecg3packer(
1444 1445 repo,
1445 1446 oldmatcher,
1446 1447 matcher,
1447 1448 bundlecaps,
1448 1449 ellipses=False,
1449 1450 shallow=False,
1450 1451 ellipsisroots=None,
1451 1452 fullnodes=None,
1452 1453 ):
1453 1454 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1454 1455 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1455 1456 )
1456 1457
1457 1458 return cgpacker(
1458 1459 repo,
1459 1460 oldmatcher,
1460 1461 matcher,
1461 1462 b'03',
1462 1463 builddeltaheader=builddeltaheader,
1463 1464 manifestsend=closechunk(),
1464 1465 bundlecaps=bundlecaps,
1465 1466 ellipses=ellipses,
1466 1467 shallow=shallow,
1467 1468 ellipsisroots=ellipsisroots,
1468 1469 fullnodes=fullnodes,
1469 1470 )
1470 1471
1471 1472
1472 1473 _packermap = {
1473 1474 b'01': (_makecg1packer, cg1unpacker),
1474 1475 # cg2 adds support for exchanging generaldelta
1475 1476 b'02': (_makecg2packer, cg2unpacker),
1476 1477 # cg3 adds support for exchanging revlog flags and treemanifests
1477 1478 b'03': (_makecg3packer, cg3unpacker),
1478 1479 }
1479 1480
1480 1481
1481 1482 def allsupportedversions(repo):
1482 1483 versions = set(_packermap.keys())
1483 1484 needv03 = False
1484 1485 if (
1485 1486 repo.ui.configbool(b'experimental', b'changegroup3')
1486 1487 or repo.ui.configbool(b'experimental', b'treemanifest')
1487 1488 or scmutil.istreemanifest(repo)
1488 1489 ):
1489 1490 # we keep version 03 because we need to to exchange treemanifest data
1490 1491 #
1491 1492 # we also keep vresion 01 and 02, because it is possible for repo to
1492 1493 # contains both normal and tree manifest at the same time. so using
1493 1494 # older version to pull data is viable
1494 1495 #
1495 1496 # (or even to push subset of history)
1496 1497 needv03 = True
1497 1498 if b'exp-sidedata-flag' in repo.requirements:
1498 1499 needv03 = True
1499 1500 # don't attempt to use 01/02 until we do sidedata cleaning
1500 1501 versions.discard(b'01')
1501 1502 versions.discard(b'02')
1502 1503 if not needv03:
1503 1504 versions.discard(b'03')
1504 1505 return versions
1505 1506
1506 1507
1507 1508 # Changegroup versions that can be applied to the repo
1508 1509 def supportedincomingversions(repo):
1509 1510 return allsupportedversions(repo)
1510 1511
1511 1512
1512 1513 # Changegroup versions that can be created from the repo
1513 1514 def supportedoutgoingversions(repo):
1514 1515 versions = allsupportedversions(repo)
1515 1516 if scmutil.istreemanifest(repo):
1516 1517 # Versions 01 and 02 support only flat manifests and it's just too
1517 1518 # expensive to convert between the flat manifest and tree manifest on
1518 1519 # the fly. Since tree manifests are hashed differently, all of history
1519 1520 # would have to be converted. Instead, we simply don't even pretend to
1520 1521 # support versions 01 and 02.
1521 1522 versions.discard(b'01')
1522 1523 versions.discard(b'02')
1523 1524 if requirements.NARROW_REQUIREMENT in repo.requirements:
1524 1525 # Versions 01 and 02 don't support revlog flags, and we need to
1525 1526 # support that for stripping and unbundling to work.
1526 1527 versions.discard(b'01')
1527 1528 versions.discard(b'02')
1528 1529 if LFS_REQUIREMENT in repo.requirements:
1529 1530 # Versions 01 and 02 don't support revlog flags, and we need to
1530 1531 # mark LFS entries with REVIDX_EXTSTORED.
1531 1532 versions.discard(b'01')
1532 1533 versions.discard(b'02')
1533 1534
1534 1535 return versions
1535 1536
1536 1537
1537 1538 def localversion(repo):
1538 1539 # Finds the best version to use for bundles that are meant to be used
1539 1540 # locally, such as those from strip and shelve, and temporary bundles.
1540 1541 return max(supportedoutgoingversions(repo))
1541 1542
1542 1543
1543 1544 def safeversion(repo):
1544 1545 # Finds the smallest version that it's safe to assume clients of the repo
1545 1546 # will support. For example, all hg versions that support generaldelta also
1546 1547 # support changegroup 02.
1547 1548 versions = supportedoutgoingversions(repo)
1548 1549 if b'generaldelta' in repo.requirements:
1549 1550 versions.discard(b'01')
1550 1551 assert versions
1551 1552 return min(versions)
1552 1553
1553 1554
1554 1555 def getbundler(
1555 1556 version,
1556 1557 repo,
1557 1558 bundlecaps=None,
1558 1559 oldmatcher=None,
1559 1560 matcher=None,
1560 1561 ellipses=False,
1561 1562 shallow=False,
1562 1563 ellipsisroots=None,
1563 1564 fullnodes=None,
1564 1565 ):
1565 1566 assert version in supportedoutgoingversions(repo)
1566 1567
1567 1568 if matcher is None:
1568 1569 matcher = matchmod.always()
1569 1570 if oldmatcher is None:
1570 1571 oldmatcher = matchmod.never()
1571 1572
1572 1573 if version == b'01' and not matcher.always():
1573 1574 raise error.ProgrammingError(
1574 1575 b'version 01 changegroups do not support sparse file matchers'
1575 1576 )
1576 1577
1577 1578 if ellipses and version in (b'01', b'02'):
1578 1579 raise error.Abort(
1579 1580 _(
1580 1581 b'ellipsis nodes require at least cg3 on client and server, '
1581 1582 b'but negotiated version %s'
1582 1583 )
1583 1584 % version
1584 1585 )
1585 1586
1586 1587 # Requested files could include files not in the local store. So
1587 1588 # filter those out.
1588 1589 matcher = repo.narrowmatch(matcher)
1589 1590
1590 1591 fn = _packermap[version][0]
1591 1592 return fn(
1592 1593 repo,
1593 1594 oldmatcher,
1594 1595 matcher,
1595 1596 bundlecaps,
1596 1597 ellipses=ellipses,
1597 1598 shallow=shallow,
1598 1599 ellipsisroots=ellipsisroots,
1599 1600 fullnodes=fullnodes,
1600 1601 )
1601 1602
1602 1603
1603 1604 def getunbundler(version, fh, alg, extras=None):
1604 1605 return _packermap[version][1](fh, alg, extras=extras)
1605 1606
1606 1607
1607 1608 def _changegroupinfo(repo, nodes, source):
1608 1609 if repo.ui.verbose or source == b'bundle':
1609 1610 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1610 1611 if repo.ui.debugflag:
1611 1612 repo.ui.debug(b"list of changesets:\n")
1612 1613 for node in nodes:
1613 1614 repo.ui.debug(b"%s\n" % hex(node))
1614 1615
1615 1616
1616 1617 def makechangegroup(
1617 1618 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1618 1619 ):
1619 1620 cgstream = makestream(
1620 1621 repo,
1621 1622 outgoing,
1622 1623 version,
1623 1624 source,
1624 1625 fastpath=fastpath,
1625 1626 bundlecaps=bundlecaps,
1626 1627 )
1627 1628 return getunbundler(
1628 1629 version,
1629 1630 util.chunkbuffer(cgstream),
1630 1631 None,
1631 1632 {b'clcount': len(outgoing.missing)},
1632 1633 )
1633 1634
1634 1635
1635 1636 def makestream(
1636 1637 repo,
1637 1638 outgoing,
1638 1639 version,
1639 1640 source,
1640 1641 fastpath=False,
1641 1642 bundlecaps=None,
1642 1643 matcher=None,
1643 1644 ):
1644 1645 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1645 1646
1646 1647 repo = repo.unfiltered()
1647 1648 commonrevs = outgoing.common
1648 1649 csets = outgoing.missing
1649 1650 heads = outgoing.ancestorsof
1650 1651 # We go through the fast path if we get told to, or if all (unfiltered
1651 1652 # heads have been requested (since we then know there all linkrevs will
1652 1653 # be pulled by the client).
1653 1654 heads.sort()
1654 1655 fastpathlinkrev = fastpath or (
1655 1656 repo.filtername is None and heads == sorted(repo.heads())
1656 1657 )
1657 1658
1658 1659 repo.hook(b'preoutgoing', throw=True, source=source)
1659 1660 _changegroupinfo(repo, csets, source)
1660 1661 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1661 1662
1662 1663
1663 1664 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1664 1665 revisions = 0
1665 1666 files = 0
1666 1667 progress = repo.ui.makeprogress(
1667 1668 _(b'files'), unit=_(b'files'), total=expectedfiles
1668 1669 )
1669 1670 for chunkdata in iter(source.filelogheader, {}):
1670 1671 files += 1
1671 1672 f = chunkdata[b"filename"]
1672 1673 repo.ui.debug(b"adding %s revisions\n" % f)
1673 1674 progress.increment()
1674 1675 fl = repo.file(f)
1675 1676 o = len(fl)
1676 1677 try:
1677 1678 deltas = source.deltaiter()
1678 1679 if not fl.addgroup(deltas, revmap, trp):
1679 1680 raise error.Abort(_(b"received file revlog group is empty"))
1680 1681 except error.CensoredBaseError as e:
1681 1682 raise error.Abort(_(b"received delta base is censored: %s") % e)
1682 1683 revisions += len(fl) - o
1683 1684 if f in needfiles:
1684 1685 needs = needfiles[f]
1685 1686 for new in pycompat.xrange(o, len(fl)):
1686 1687 n = fl.node(new)
1687 1688 if n in needs:
1688 1689 needs.remove(n)
1689 1690 else:
1690 1691 raise error.Abort(_(b"received spurious file revlog entry"))
1691 1692 if not needs:
1692 1693 del needfiles[f]
1693 1694 progress.complete()
1694 1695
1695 1696 for f, needs in pycompat.iteritems(needfiles):
1696 1697 fl = repo.file(f)
1697 1698 for n in needs:
1698 1699 try:
1699 1700 fl.rev(n)
1700 1701 except error.LookupError:
1701 1702 raise error.Abort(
1702 1703 _(b'missing file data for %s:%s - run hg verify')
1703 1704 % (f, hex(n))
1704 1705 )
1705 1706
1706 1707 return revisions, files
@@ -1,798 +1,799 b''
1 1 # exchangev2.py - repository exchange for wire protocol version 2
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import weakref
12 12
13 13 from .i18n import _
14 14 from .node import (
15 15 nullid,
16 16 short,
17 17 )
18 18 from . import (
19 19 bookmarks,
20 20 error,
21 21 mdiff,
22 22 narrowspec,
23 23 phases,
24 24 pycompat,
25 25 setdiscovery,
26 26 )
27 27 from .interfaces import repository
28 28
29 29
30 30 def pull(pullop):
31 31 """Pull using wire protocol version 2."""
32 32 repo = pullop.repo
33 33 remote = pullop.remote
34 34
35 35 usingrawchangelogandmanifest = _checkuserawstorefiledata(pullop)
36 36
37 37 # If this is a clone and it was requested to perform a "stream clone",
38 38 # we obtain the raw files data from the remote then fall back to an
39 39 # incremental pull. This is somewhat hacky and is not nearly robust enough
40 40 # for long-term usage.
41 41 if usingrawchangelogandmanifest:
42 42 with repo.transaction(b'clone'):
43 43 _fetchrawstorefiles(repo, remote)
44 44 repo.invalidate(clearfilecache=True)
45 45
46 46 tr = pullop.trmanager.transaction()
47 47
48 48 # We don't use the repo's narrow matcher here because the patterns passed
49 49 # to exchange.pull() could be different.
50 50 narrowmatcher = narrowspec.match(
51 51 repo.root,
52 52 # Empty maps to nevermatcher. So always
53 53 # set includes if missing.
54 54 pullop.includepats or {b'path:.'},
55 55 pullop.excludepats,
56 56 )
57 57
58 58 if pullop.includepats or pullop.excludepats:
59 59 pathfilter = {}
60 60 if pullop.includepats:
61 61 pathfilter[b'include'] = sorted(pullop.includepats)
62 62 if pullop.excludepats:
63 63 pathfilter[b'exclude'] = sorted(pullop.excludepats)
64 64 else:
65 65 pathfilter = None
66 66
67 67 # Figure out what needs to be fetched.
68 68 common, fetch, remoteheads = _pullchangesetdiscovery(
69 69 repo, remote, pullop.heads, abortwhenunrelated=pullop.force
70 70 )
71 71
72 72 # And fetch the data.
73 73 pullheads = pullop.heads or remoteheads
74 74 csetres = _fetchchangesets(repo, tr, remote, common, fetch, pullheads)
75 75
76 76 # New revisions are written to the changelog. But all other updates
77 77 # are deferred. Do those now.
78 78
79 79 # Ensure all new changesets are draft by default. If the repo is
80 80 # publishing, the phase will be adjusted by the loop below.
81 81 if csetres[b'added']:
82 82 phases.registernew(
83 83 repo, tr, phases.draft, [repo[n].rev() for n in csetres[b'added']]
84 84 )
85 85
86 86 # And adjust the phase of all changesets accordingly.
87 87 for phasenumber, phase in phases.phasenames.items():
88 88 if phase == b'secret' or not csetres[b'nodesbyphase'][phase]:
89 89 continue
90 90
91 91 phases.advanceboundary(
92 92 repo,
93 93 tr,
94 94 phasenumber,
95 95 csetres[b'nodesbyphase'][phase],
96 96 )
97 97
98 98 # Write bookmark updates.
99 99 bookmarks.updatefromremote(
100 100 repo.ui,
101 101 repo,
102 102 csetres[b'bookmarks'],
103 103 remote.url(),
104 104 pullop.gettransaction,
105 105 explicit=pullop.explicitbookmarks,
106 106 )
107 107
108 108 manres = _fetchmanifests(repo, tr, remote, csetres[b'manifestnodes'])
109 109
110 110 # We don't properly support shallow changeset and manifest yet. So we apply
111 111 # depth limiting locally.
112 112 if pullop.depth:
113 113 relevantcsetnodes = set()
114 114 clnode = repo.changelog.node
115 115
116 116 for rev in repo.revs(
117 117 b'ancestors(%ln, %s)', pullheads, pullop.depth - 1
118 118 ):
119 119 relevantcsetnodes.add(clnode(rev))
120 120
121 121 csetrelevantfilter = lambda n: n in relevantcsetnodes
122 122
123 123 else:
124 124 csetrelevantfilter = lambda n: True
125 125
126 126 # If obtaining the raw store files, we need to scan the full repo to
127 127 # derive all the changesets, manifests, and linkrevs.
128 128 if usingrawchangelogandmanifest:
129 129 csetsforfiles = []
130 130 mnodesforfiles = []
131 131 manifestlinkrevs = {}
132 132
133 133 for rev in repo:
134 134 ctx = repo[rev]
135 135 node = ctx.node()
136 136
137 137 if not csetrelevantfilter(node):
138 138 continue
139 139
140 140 mnode = ctx.manifestnode()
141 141
142 142 csetsforfiles.append(node)
143 143 mnodesforfiles.append(mnode)
144 144 manifestlinkrevs[mnode] = rev
145 145
146 146 else:
147 147 csetsforfiles = [n for n in csetres[b'added'] if csetrelevantfilter(n)]
148 148 mnodesforfiles = manres[b'added']
149 149 manifestlinkrevs = manres[b'linkrevs']
150 150
151 151 # Find all file nodes referenced by added manifests and fetch those
152 152 # revisions.
153 153 fnodes = _derivefilesfrommanifests(repo, narrowmatcher, mnodesforfiles)
154 154 _fetchfilesfromcsets(
155 155 repo,
156 156 tr,
157 157 remote,
158 158 pathfilter,
159 159 fnodes,
160 160 csetsforfiles,
161 161 manifestlinkrevs,
162 162 shallow=bool(pullop.depth),
163 163 )
164 164
165 165
166 166 def _checkuserawstorefiledata(pullop):
167 167 """Check whether we should use rawstorefiledata command to retrieve data."""
168 168
169 169 repo = pullop.repo
170 170 remote = pullop.remote
171 171
172 172 # Command to obtain raw store data isn't available.
173 173 if b'rawstorefiledata' not in remote.apidescriptor[b'commands']:
174 174 return False
175 175
176 176 # Only honor if user requested stream clone operation.
177 177 if not pullop.streamclonerequested:
178 178 return False
179 179
180 180 # Only works on empty repos.
181 181 if len(repo):
182 182 return False
183 183
184 184 # TODO This is super hacky. There needs to be a storage API for this. We
185 185 # also need to check for compatibility with the remote.
186 186 if b'revlogv1' not in repo.requirements:
187 187 return False
188 188
189 189 return True
190 190
191 191
192 192 def _fetchrawstorefiles(repo, remote):
193 193 with remote.commandexecutor() as e:
194 194 objs = e.callcommand(
195 195 b'rawstorefiledata',
196 196 {
197 197 b'files': [b'changelog', b'manifestlog'],
198 198 },
199 199 ).result()
200 200
201 201 # First object is a summary of files data that follows.
202 202 overall = next(objs)
203 203
204 204 progress = repo.ui.makeprogress(
205 205 _(b'clone'), total=overall[b'totalsize'], unit=_(b'bytes')
206 206 )
207 207 with progress:
208 208 progress.update(0)
209 209
210 210 # Next are pairs of file metadata, data.
211 211 while True:
212 212 try:
213 213 filemeta = next(objs)
214 214 except StopIteration:
215 215 break
216 216
217 217 for k in (b'location', b'path', b'size'):
218 218 if k not in filemeta:
219 219 raise error.Abort(
220 220 _(b'remote file data missing key: %s') % k
221 221 )
222 222
223 223 if filemeta[b'location'] == b'store':
224 224 vfs = repo.svfs
225 225 else:
226 226 raise error.Abort(
227 227 _(b'invalid location for raw file data: %s')
228 228 % filemeta[b'location']
229 229 )
230 230
231 231 bytesremaining = filemeta[b'size']
232 232
233 233 with vfs.open(filemeta[b'path'], b'wb') as fh:
234 234 while True:
235 235 try:
236 236 chunk = next(objs)
237 237 except StopIteration:
238 238 break
239 239
240 240 bytesremaining -= len(chunk)
241 241
242 242 if bytesremaining < 0:
243 243 raise error.Abort(
244 244 _(
245 245 b'received invalid number of bytes for file '
246 246 b'data; expected %d, got extra'
247 247 )
248 248 % filemeta[b'size']
249 249 )
250 250
251 251 progress.increment(step=len(chunk))
252 252 fh.write(chunk)
253 253
254 254 try:
255 255 if chunk.islast:
256 256 break
257 257 except AttributeError:
258 258 raise error.Abort(
259 259 _(
260 260 b'did not receive indefinite length bytestring '
261 261 b'for file data'
262 262 )
263 263 )
264 264
265 265 if bytesremaining:
266 266 raise error.Abort(
267 267 _(
268 268 b'received invalid number of bytes for'
269 269 b'file data; expected %d got %d'
270 270 )
271 271 % (
272 272 filemeta[b'size'],
273 273 filemeta[b'size'] - bytesremaining,
274 274 )
275 275 )
276 276
277 277
278 278 def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
279 279 """Determine which changesets need to be pulled."""
280 280
281 281 if heads:
282 282 knownnode = repo.changelog.hasnode
283 283 if all(knownnode(head) for head in heads):
284 284 return heads, False, heads
285 285
286 286 # TODO wire protocol version 2 is capable of more efficient discovery
287 287 # than setdiscovery. Consider implementing something better.
288 288 common, fetch, remoteheads = setdiscovery.findcommonheads(
289 289 repo.ui, repo, remote, abortwhenunrelated=abortwhenunrelated
290 290 )
291 291
292 292 common = set(common)
293 293 remoteheads = set(remoteheads)
294 294
295 295 # If a remote head is filtered locally, put it back in the common set.
296 296 # See the comment in exchange._pulldiscoverychangegroup() for more.
297 297
298 298 if fetch and remoteheads:
299 299 has_node = repo.unfiltered().changelog.index.has_node
300 300
301 301 common |= {head for head in remoteheads if has_node(head)}
302 302
303 303 if set(remoteheads).issubset(common):
304 304 fetch = []
305 305
306 306 common.discard(nullid)
307 307
308 308 return common, fetch, remoteheads
309 309
310 310
311 311 def _fetchchangesets(repo, tr, remote, common, fetch, remoteheads):
312 312 # TODO consider adding a step here where we obtain the DAG shape first
313 313 # (or ask the server to slice changesets into chunks for us) so that
314 314 # we can perform multiple fetches in batches. This will facilitate
315 315 # resuming interrupted clones, higher server-side cache hit rates due
316 316 # to smaller segments, etc.
317 317 with remote.commandexecutor() as e:
318 318 objs = e.callcommand(
319 319 b'changesetdata',
320 320 {
321 321 b'revisions': [
322 322 {
323 323 b'type': b'changesetdagrange',
324 324 b'roots': sorted(common),
325 325 b'heads': sorted(remoteheads),
326 326 }
327 327 ],
328 328 b'fields': {b'bookmarks', b'parents', b'phase', b'revision'},
329 329 },
330 330 ).result()
331 331
332 332 # The context manager waits on all response data when exiting. So
333 333 # we need to remain in the context manager in order to stream data.
334 334 return _processchangesetdata(repo, tr, objs)
335 335
336 336
337 337 def _processchangesetdata(repo, tr, objs):
338 338 repo.hook(b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs))
339 339
340 340 urepo = repo.unfiltered()
341 341 cl = urepo.changelog
342 342
343 343 cl.delayupdate(tr)
344 344
345 345 # The first emitted object is a header describing the data that
346 346 # follows.
347 347 meta = next(objs)
348 348
349 349 progress = repo.ui.makeprogress(
350 350 _(b'changesets'), unit=_(b'chunks'), total=meta.get(b'totalitems')
351 351 )
352 352
353 353 manifestnodes = {}
354 354 added = []
355 355
356 356 def linkrev(node):
357 357 repo.ui.debug(b'add changeset %s\n' % short(node))
358 358 # Linkrev for changelog is always self.
359 359 return len(cl)
360 360
361 361 def ondupchangeset(cl, node):
362 362 added.append(node)
363 363
364 364 def onchangeset(cl, node):
365 365 progress.increment()
366 366
367 367 rev = cl.rev(node)
368 368 revision = cl.changelogrevision(rev)
369 369 added.append(node)
370 370
371 371 # We need to preserve the mapping of changelog revision to node
372 372 # so we can set the linkrev accordingly when manifests are added.
373 373 manifestnodes[rev] = revision.manifest
374 374
375 375 repo.register_changeset(rev, revision)
376 376
377 377 nodesbyphase = {phase: set() for phase in phases.phasenames.values()}
378 378 remotebookmarks = {}
379 379
380 380 # addgroup() expects a 7-tuple describing revisions. This normalizes
381 381 # the wire data to that format.
382 382 #
383 383 # This loop also aggregates non-revision metadata, such as phase
384 384 # data.
385 385 def iterrevisions():
386 386 for cset in objs:
387 387 node = cset[b'node']
388 388
389 389 if b'phase' in cset:
390 390 nodesbyphase[cset[b'phase']].add(node)
391 391
392 392 for mark in cset.get(b'bookmarks', []):
393 393 remotebookmarks[mark] = node
394 394
395 395 # TODO add mechanism for extensions to examine records so they
396 396 # can siphon off custom data fields.
397 397
398 398 extrafields = {}
399 399
400 400 for field, size in cset.get(b'fieldsfollowing', []):
401 401 extrafields[field] = next(objs)
402 402
403 403 # Some entries might only be metadata only updates.
404 404 if b'revision' not in extrafields:
405 405 continue
406 406
407 407 data = extrafields[b'revision']
408 408
409 409 yield (
410 410 node,
411 411 cset[b'parents'][0],
412 412 cset[b'parents'][1],
413 413 # Linknode is always itself for changesets.
414 414 cset[b'node'],
415 415 # We always send full revisions. So delta base is not set.
416 416 nullid,
417 417 mdiff.trivialdiffheader(len(data)) + data,
418 418 # Flags not yet supported.
419 419 0,
420 420 )
421 421
422 422 cl.addgroup(
423 423 iterrevisions(),
424 424 linkrev,
425 425 weakref.proxy(tr),
426 alwayscache=True,
426 427 addrevisioncb=onchangeset,
427 428 duplicaterevisioncb=ondupchangeset,
428 429 )
429 430
430 431 progress.complete()
431 432
432 433 return {
433 434 b'added': added,
434 435 b'nodesbyphase': nodesbyphase,
435 436 b'bookmarks': remotebookmarks,
436 437 b'manifestnodes': manifestnodes,
437 438 }
438 439
439 440
440 441 def _fetchmanifests(repo, tr, remote, manifestnodes):
441 442 rootmanifest = repo.manifestlog.getstorage(b'')
442 443
443 444 # Some manifests can be shared between changesets. Filter out revisions
444 445 # we already know about.
445 446 fetchnodes = []
446 447 linkrevs = {}
447 448 seen = set()
448 449
449 450 for clrev, node in sorted(pycompat.iteritems(manifestnodes)):
450 451 if node in seen:
451 452 continue
452 453
453 454 try:
454 455 rootmanifest.rev(node)
455 456 except error.LookupError:
456 457 fetchnodes.append(node)
457 458 linkrevs[node] = clrev
458 459
459 460 seen.add(node)
460 461
461 462 # TODO handle tree manifests
462 463
463 464 # addgroup() expects 7-tuple describing revisions. This normalizes
464 465 # the wire data to that format.
465 466 def iterrevisions(objs, progress):
466 467 for manifest in objs:
467 468 node = manifest[b'node']
468 469
469 470 extrafields = {}
470 471
471 472 for field, size in manifest.get(b'fieldsfollowing', []):
472 473 extrafields[field] = next(objs)
473 474
474 475 if b'delta' in extrafields:
475 476 basenode = manifest[b'deltabasenode']
476 477 delta = extrafields[b'delta']
477 478 elif b'revision' in extrafields:
478 479 basenode = nullid
479 480 revision = extrafields[b'revision']
480 481 delta = mdiff.trivialdiffheader(len(revision)) + revision
481 482 else:
482 483 continue
483 484
484 485 yield (
485 486 node,
486 487 manifest[b'parents'][0],
487 488 manifest[b'parents'][1],
488 489 # The value passed in is passed to the lookup function passed
489 490 # to addgroup(). We already have a map of manifest node to
490 491 # changelog revision number. So we just pass in the
491 492 # manifest node here and use linkrevs.__getitem__ as the
492 493 # resolution function.
493 494 node,
494 495 basenode,
495 496 delta,
496 497 # Flags not yet supported.
497 498 0,
498 499 )
499 500
500 501 progress.increment()
501 502
502 503 progress = repo.ui.makeprogress(
503 504 _(b'manifests'), unit=_(b'chunks'), total=len(fetchnodes)
504 505 )
505 506
506 507 commandmeta = remote.apidescriptor[b'commands'][b'manifestdata']
507 508 batchsize = commandmeta.get(b'recommendedbatchsize', 10000)
508 509 # TODO make size configurable on client?
509 510
510 511 # We send commands 1 at a time to the remote. This is not the most
511 512 # efficient because we incur a round trip at the end of each batch.
512 513 # However, the existing frame-based reactor keeps consuming server
513 514 # data in the background. And this results in response data buffering
514 515 # in memory. This can consume gigabytes of memory.
515 516 # TODO send multiple commands in a request once background buffering
516 517 # issues are resolved.
517 518
518 519 added = []
519 520
520 521 for i in pycompat.xrange(0, len(fetchnodes), batchsize):
521 522 batch = [node for node in fetchnodes[i : i + batchsize]]
522 523 if not batch:
523 524 continue
524 525
525 526 with remote.commandexecutor() as e:
526 527 objs = e.callcommand(
527 528 b'manifestdata',
528 529 {
529 530 b'tree': b'',
530 531 b'nodes': batch,
531 532 b'fields': {b'parents', b'revision'},
532 533 b'haveparents': True,
533 534 },
534 535 ).result()
535 536
536 537 # Chomp off header object.
537 538 next(objs)
538 539
539 540 def onchangeset(cl, node):
540 541 added.append(node)
541 542
542 543 rootmanifest.addgroup(
543 544 iterrevisions(objs, progress),
544 545 linkrevs.__getitem__,
545 546 weakref.proxy(tr),
546 547 addrevisioncb=onchangeset,
547 548 duplicaterevisioncb=onchangeset,
548 549 )
549 550
550 551 progress.complete()
551 552
552 553 return {
553 554 b'added': added,
554 555 b'linkrevs': linkrevs,
555 556 }
556 557
557 558
558 559 def _derivefilesfrommanifests(repo, matcher, manifestnodes):
559 560 """Determine what file nodes are relevant given a set of manifest nodes.
560 561
561 562 Returns a dict mapping file paths to dicts of file node to first manifest
562 563 node.
563 564 """
564 565 ml = repo.manifestlog
565 566 fnodes = collections.defaultdict(dict)
566 567
567 568 progress = repo.ui.makeprogress(
568 569 _(b'scanning manifests'), total=len(manifestnodes)
569 570 )
570 571
571 572 with progress:
572 573 for manifestnode in manifestnodes:
573 574 m = ml.get(b'', manifestnode)
574 575
575 576 # TODO this will pull in unwanted nodes because it takes the storage
576 577 # delta into consideration. What we really want is something that
577 578 # takes the delta between the manifest's parents. And ideally we
578 579 # would ignore file nodes that are known locally. For now, ignore
579 580 # both these limitations. This will result in incremental fetches
580 581 # requesting data we already have. So this is far from ideal.
581 582 md = m.readfast()
582 583
583 584 for path, fnode in md.items():
584 585 if matcher(path):
585 586 fnodes[path].setdefault(fnode, manifestnode)
586 587
587 588 progress.increment()
588 589
589 590 return fnodes
590 591
591 592
592 593 def _fetchfiles(repo, tr, remote, fnodes, linkrevs):
593 594 """Fetch file data from explicit file revisions."""
594 595
595 596 def iterrevisions(objs, progress):
596 597 for filerevision in objs:
597 598 node = filerevision[b'node']
598 599
599 600 extrafields = {}
600 601
601 602 for field, size in filerevision.get(b'fieldsfollowing', []):
602 603 extrafields[field] = next(objs)
603 604
604 605 if b'delta' in extrafields:
605 606 basenode = filerevision[b'deltabasenode']
606 607 delta = extrafields[b'delta']
607 608 elif b'revision' in extrafields:
608 609 basenode = nullid
609 610 revision = extrafields[b'revision']
610 611 delta = mdiff.trivialdiffheader(len(revision)) + revision
611 612 else:
612 613 continue
613 614
614 615 yield (
615 616 node,
616 617 filerevision[b'parents'][0],
617 618 filerevision[b'parents'][1],
618 619 node,
619 620 basenode,
620 621 delta,
621 622 # Flags not yet supported.
622 623 0,
623 624 )
624 625
625 626 progress.increment()
626 627
627 628 progress = repo.ui.makeprogress(
628 629 _(b'files'),
629 630 unit=_(b'chunks'),
630 631 total=sum(len(v) for v in pycompat.itervalues(fnodes)),
631 632 )
632 633
633 634 # TODO make batch size configurable
634 635 batchsize = 10000
635 636 fnodeslist = [x for x in sorted(fnodes.items())]
636 637
637 638 for i in pycompat.xrange(0, len(fnodeslist), batchsize):
638 639 batch = [x for x in fnodeslist[i : i + batchsize]]
639 640 if not batch:
640 641 continue
641 642
642 643 with remote.commandexecutor() as e:
643 644 fs = []
644 645 locallinkrevs = {}
645 646
646 647 for path, nodes in batch:
647 648 fs.append(
648 649 (
649 650 path,
650 651 e.callcommand(
651 652 b'filedata',
652 653 {
653 654 b'path': path,
654 655 b'nodes': sorted(nodes),
655 656 b'fields': {b'parents', b'revision'},
656 657 b'haveparents': True,
657 658 },
658 659 ),
659 660 )
660 661 )
661 662
662 663 locallinkrevs[path] = {
663 664 node: linkrevs[manifestnode]
664 665 for node, manifestnode in pycompat.iteritems(nodes)
665 666 }
666 667
667 668 for path, f in fs:
668 669 objs = f.result()
669 670
670 671 # Chomp off header objects.
671 672 next(objs)
672 673
673 674 store = repo.file(path)
674 675 store.addgroup(
675 676 iterrevisions(objs, progress),
676 677 locallinkrevs[path].__getitem__,
677 678 weakref.proxy(tr),
678 679 )
679 680
680 681
681 682 def _fetchfilesfromcsets(
682 683 repo, tr, remote, pathfilter, fnodes, csets, manlinkrevs, shallow=False
683 684 ):
684 685 """Fetch file data from explicit changeset revisions."""
685 686
686 687 def iterrevisions(objs, remaining, progress):
687 688 while remaining:
688 689 filerevision = next(objs)
689 690
690 691 node = filerevision[b'node']
691 692
692 693 extrafields = {}
693 694
694 695 for field, size in filerevision.get(b'fieldsfollowing', []):
695 696 extrafields[field] = next(objs)
696 697
697 698 if b'delta' in extrafields:
698 699 basenode = filerevision[b'deltabasenode']
699 700 delta = extrafields[b'delta']
700 701 elif b'revision' in extrafields:
701 702 basenode = nullid
702 703 revision = extrafields[b'revision']
703 704 delta = mdiff.trivialdiffheader(len(revision)) + revision
704 705 else:
705 706 continue
706 707
707 708 if b'linknode' in filerevision:
708 709 linknode = filerevision[b'linknode']
709 710 else:
710 711 linknode = node
711 712
712 713 yield (
713 714 node,
714 715 filerevision[b'parents'][0],
715 716 filerevision[b'parents'][1],
716 717 linknode,
717 718 basenode,
718 719 delta,
719 720 # Flags not yet supported.
720 721 0,
721 722 )
722 723
723 724 progress.increment()
724 725 remaining -= 1
725 726
726 727 progress = repo.ui.makeprogress(
727 728 _(b'files'),
728 729 unit=_(b'chunks'),
729 730 total=sum(len(v) for v in pycompat.itervalues(fnodes)),
730 731 )
731 732
732 733 commandmeta = remote.apidescriptor[b'commands'][b'filesdata']
733 734 batchsize = commandmeta.get(b'recommendedbatchsize', 50000)
734 735
735 736 shallowfiles = repository.REPO_FEATURE_SHALLOW_FILE_STORAGE in repo.features
736 737 fields = {b'parents', b'revision'}
737 738 clrev = repo.changelog.rev
738 739
739 740 # There are no guarantees that we'll have ancestor revisions if
740 741 # a) this repo has shallow file storage b) shallow data fetching is enabled.
741 742 # Force remote to not delta against possibly unknown revisions when these
742 743 # conditions hold.
743 744 haveparents = not (shallowfiles or shallow)
744 745
745 746 # Similarly, we may not have calculated linkrevs for all incoming file
746 747 # revisions. Ask the remote to do work for us in this case.
747 748 if not haveparents:
748 749 fields.add(b'linknode')
749 750
750 751 for i in pycompat.xrange(0, len(csets), batchsize):
751 752 batch = [x for x in csets[i : i + batchsize]]
752 753 if not batch:
753 754 continue
754 755
755 756 with remote.commandexecutor() as e:
756 757 args = {
757 758 b'revisions': [
758 759 {
759 760 b'type': b'changesetexplicit',
760 761 b'nodes': batch,
761 762 }
762 763 ],
763 764 b'fields': fields,
764 765 b'haveparents': haveparents,
765 766 }
766 767
767 768 if pathfilter:
768 769 args[b'pathfilter'] = pathfilter
769 770
770 771 objs = e.callcommand(b'filesdata', args).result()
771 772
772 773 # First object is an overall header.
773 774 overall = next(objs)
774 775
775 776 # We have overall['totalpaths'] segments.
776 777 for i in pycompat.xrange(overall[b'totalpaths']):
777 778 header = next(objs)
778 779
779 780 path = header[b'path']
780 781 store = repo.file(path)
781 782
782 783 linkrevs = {
783 784 fnode: manlinkrevs[mnode]
784 785 for fnode, mnode in pycompat.iteritems(fnodes[path])
785 786 }
786 787
787 788 def getlinkrev(node):
788 789 if node in linkrevs:
789 790 return linkrevs[node]
790 791 else:
791 792 return clrev(node)
792 793
793 794 store.addgroup(
794 795 iterrevisions(objs, header[b'totalitems'], progress),
795 796 getlinkrev,
796 797 weakref.proxy(tr),
797 798 maybemissingparents=shallow,
798 799 )
@@ -1,1987 +1,1993 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_SIDEDATA = 1 << 12
31 31 REVISION_FLAG_HASCOPIESINFO = 1 << 11
32 32
33 33 REVISION_FLAGS_KNOWN = (
34 34 REVISION_FLAG_CENSORED
35 35 | REVISION_FLAG_ELLIPSIS
36 36 | REVISION_FLAG_EXTSTORED
37 37 | REVISION_FLAG_SIDEDATA
38 38 | REVISION_FLAG_HASCOPIESINFO
39 39 )
40 40
41 41 CG_DELTAMODE_STD = b'default'
42 42 CG_DELTAMODE_PREV = b'previous'
43 43 CG_DELTAMODE_FULL = b'fulltext'
44 44 CG_DELTAMODE_P1 = b'p1'
45 45
46 46
47 47 class ipeerconnection(interfaceutil.Interface):
48 48 """Represents a "connection" to a repository.
49 49
50 50 This is the base interface for representing a connection to a repository.
51 51 It holds basic properties and methods applicable to all peer types.
52 52
53 53 This is not a complete interface definition and should not be used
54 54 outside of this module.
55 55 """
56 56
57 57 ui = interfaceutil.Attribute("""ui.ui instance""")
58 58
59 59 def url():
60 60 """Returns a URL string representing this peer.
61 61
62 62 Currently, implementations expose the raw URL used to construct the
63 63 instance. It may contain credentials as part of the URL. The
64 64 expectations of the value aren't well-defined and this could lead to
65 65 data leakage.
66 66
67 67 TODO audit/clean consumers and more clearly define the contents of this
68 68 value.
69 69 """
70 70
71 71 def local():
72 72 """Returns a local repository instance.
73 73
74 74 If the peer represents a local repository, returns an object that
75 75 can be used to interface with it. Otherwise returns ``None``.
76 76 """
77 77
78 78 def peer():
79 79 """Returns an object conforming to this interface.
80 80
81 81 Most implementations will ``return self``.
82 82 """
83 83
84 84 def canpush():
85 85 """Returns a boolean indicating if this peer can be pushed to."""
86 86
87 87 def close():
88 88 """Close the connection to this peer.
89 89
90 90 This is called when the peer will no longer be used. Resources
91 91 associated with the peer should be cleaned up.
92 92 """
93 93
94 94
95 95 class ipeercapabilities(interfaceutil.Interface):
96 96 """Peer sub-interface related to capabilities."""
97 97
98 98 def capable(name):
99 99 """Determine support for a named capability.
100 100
101 101 Returns ``False`` if capability not supported.
102 102
103 103 Returns ``True`` if boolean capability is supported. Returns a string
104 104 if capability support is non-boolean.
105 105
106 106 Capability strings may or may not map to wire protocol capabilities.
107 107 """
108 108
109 109 def requirecap(name, purpose):
110 110 """Require a capability to be present.
111 111
112 112 Raises a ``CapabilityError`` if the capability isn't present.
113 113 """
114 114
115 115
116 116 class ipeercommands(interfaceutil.Interface):
117 117 """Client-side interface for communicating over the wire protocol.
118 118
119 119 This interface is used as a gateway to the Mercurial wire protocol.
120 120 methods commonly call wire protocol commands of the same name.
121 121 """
122 122
123 123 def branchmap():
124 124 """Obtain heads in named branches.
125 125
126 126 Returns a dict mapping branch name to an iterable of nodes that are
127 127 heads on that branch.
128 128 """
129 129
130 130 def capabilities():
131 131 """Obtain capabilities of the peer.
132 132
133 133 Returns a set of string capabilities.
134 134 """
135 135
136 136 def clonebundles():
137 137 """Obtains the clone bundles manifest for the repo.
138 138
139 139 Returns the manifest as unparsed bytes.
140 140 """
141 141
142 142 def debugwireargs(one, two, three=None, four=None, five=None):
143 143 """Used to facilitate debugging of arguments passed over the wire."""
144 144
145 145 def getbundle(source, **kwargs):
146 146 """Obtain remote repository data as a bundle.
147 147
148 148 This command is how the bulk of repository data is transferred from
149 149 the peer to the local repository
150 150
151 151 Returns a generator of bundle data.
152 152 """
153 153
154 154 def heads():
155 155 """Determine all known head revisions in the peer.
156 156
157 157 Returns an iterable of binary nodes.
158 158 """
159 159
160 160 def known(nodes):
161 161 """Determine whether multiple nodes are known.
162 162
163 163 Accepts an iterable of nodes whose presence to check for.
164 164
165 165 Returns an iterable of booleans indicating of the corresponding node
166 166 at that index is known to the peer.
167 167 """
168 168
169 169 def listkeys(namespace):
170 170 """Obtain all keys in a pushkey namespace.
171 171
172 172 Returns an iterable of key names.
173 173 """
174 174
175 175 def lookup(key):
176 176 """Resolve a value to a known revision.
177 177
178 178 Returns a binary node of the resolved revision on success.
179 179 """
180 180
181 181 def pushkey(namespace, key, old, new):
182 182 """Set a value using the ``pushkey`` protocol.
183 183
184 184 Arguments correspond to the pushkey namespace and key to operate on and
185 185 the old and new values for that key.
186 186
187 187 Returns a string with the peer result. The value inside varies by the
188 188 namespace.
189 189 """
190 190
191 191 def stream_out():
192 192 """Obtain streaming clone data.
193 193
194 194 Successful result should be a generator of data chunks.
195 195 """
196 196
197 197 def unbundle(bundle, heads, url):
198 198 """Transfer repository data to the peer.
199 199
200 200 This is how the bulk of data during a push is transferred.
201 201
202 202 Returns the integer number of heads added to the peer.
203 203 """
204 204
205 205
206 206 class ipeerlegacycommands(interfaceutil.Interface):
207 207 """Interface for implementing support for legacy wire protocol commands.
208 208
209 209 Wire protocol commands transition to legacy status when they are no longer
210 210 used by modern clients. To facilitate identifying which commands are
211 211 legacy, the interfaces are split.
212 212 """
213 213
214 214 def between(pairs):
215 215 """Obtain nodes between pairs of nodes.
216 216
217 217 ``pairs`` is an iterable of node pairs.
218 218
219 219 Returns an iterable of iterables of nodes corresponding to each
220 220 requested pair.
221 221 """
222 222
223 223 def branches(nodes):
224 224 """Obtain ancestor changesets of specific nodes back to a branch point.
225 225
226 226 For each requested node, the peer finds the first ancestor node that is
227 227 a DAG root or is a merge.
228 228
229 229 Returns an iterable of iterables with the resolved values for each node.
230 230 """
231 231
232 232 def changegroup(nodes, source):
233 233 """Obtain a changegroup with data for descendants of specified nodes."""
234 234
235 235 def changegroupsubset(bases, heads, source):
236 236 pass
237 237
238 238
239 239 class ipeercommandexecutor(interfaceutil.Interface):
240 240 """Represents a mechanism to execute remote commands.
241 241
242 242 This is the primary interface for requesting that wire protocol commands
243 243 be executed. Instances of this interface are active in a context manager
244 244 and have a well-defined lifetime. When the context manager exits, all
245 245 outstanding requests are waited on.
246 246 """
247 247
248 248 def callcommand(name, args):
249 249 """Request that a named command be executed.
250 250
251 251 Receives the command name and a dictionary of command arguments.
252 252
253 253 Returns a ``concurrent.futures.Future`` that will resolve to the
254 254 result of that command request. That exact value is left up to
255 255 the implementation and possibly varies by command.
256 256
257 257 Not all commands can coexist with other commands in an executor
258 258 instance: it depends on the underlying wire protocol transport being
259 259 used and the command itself.
260 260
261 261 Implementations MAY call ``sendcommands()`` automatically if the
262 262 requested command can not coexist with other commands in this executor.
263 263
264 264 Implementations MAY call ``sendcommands()`` automatically when the
265 265 future's ``result()`` is called. So, consumers using multiple
266 266 commands with an executor MUST ensure that ``result()`` is not called
267 267 until all command requests have been issued.
268 268 """
269 269
270 270 def sendcommands():
271 271 """Trigger submission of queued command requests.
272 272
273 273 Not all transports submit commands as soon as they are requested to
274 274 run. When called, this method forces queued command requests to be
275 275 issued. It will no-op if all commands have already been sent.
276 276
277 277 When called, no more new commands may be issued with this executor.
278 278 """
279 279
280 280 def close():
281 281 """Signal that this command request is finished.
282 282
283 283 When called, no more new commands may be issued. All outstanding
284 284 commands that have previously been issued are waited on before
285 285 returning. This not only includes waiting for the futures to resolve,
286 286 but also waiting for all response data to arrive. In other words,
287 287 calling this waits for all on-wire state for issued command requests
288 288 to finish.
289 289
290 290 When used as a context manager, this method is called when exiting the
291 291 context manager.
292 292
293 293 This method may call ``sendcommands()`` if there are buffered commands.
294 294 """
295 295
296 296
297 297 class ipeerrequests(interfaceutil.Interface):
298 298 """Interface for executing commands on a peer."""
299 299
300 300 limitedarguments = interfaceutil.Attribute(
301 301 """True if the peer cannot receive large argument value for commands."""
302 302 )
303 303
304 304 def commandexecutor():
305 305 """A context manager that resolves to an ipeercommandexecutor.
306 306
307 307 The object this resolves to can be used to issue command requests
308 308 to the peer.
309 309
310 310 Callers should call its ``callcommand`` method to issue command
311 311 requests.
312 312
313 313 A new executor should be obtained for each distinct set of commands
314 314 (possibly just a single command) that the consumer wants to execute
315 315 as part of a single operation or round trip. This is because some
316 316 peers are half-duplex and/or don't support persistent connections.
317 317 e.g. in the case of HTTP peers, commands sent to an executor represent
318 318 a single HTTP request. While some peers may support multiple command
319 319 sends over the wire per executor, consumers need to code to the least
320 320 capable peer. So it should be assumed that command executors buffer
321 321 called commands until they are told to send them and that each
322 322 command executor could result in a new connection or wire-level request
323 323 being issued.
324 324 """
325 325
326 326
327 327 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
328 328 """Unified interface for peer repositories.
329 329
330 330 All peer instances must conform to this interface.
331 331 """
332 332
333 333
334 334 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
335 335 """Unified peer interface for wire protocol version 2 peers."""
336 336
337 337 apidescriptor = interfaceutil.Attribute(
338 338 """Data structure holding description of server API."""
339 339 )
340 340
341 341
342 342 @interfaceutil.implementer(ipeerbase)
343 343 class peer(object):
344 344 """Base class for peer repositories."""
345 345
346 346 limitedarguments = False
347 347
348 348 def capable(self, name):
349 349 caps = self.capabilities()
350 350 if name in caps:
351 351 return True
352 352
353 353 name = b'%s=' % name
354 354 for cap in caps:
355 355 if cap.startswith(name):
356 356 return cap[len(name) :]
357 357
358 358 return False
359 359
360 360 def requirecap(self, name, purpose):
361 361 if self.capable(name):
362 362 return
363 363
364 364 raise error.CapabilityError(
365 365 _(
366 366 b'cannot %s; remote repository does not support the '
367 367 b'\'%s\' capability'
368 368 )
369 369 % (purpose, name)
370 370 )
371 371
372 372
373 373 class iverifyproblem(interfaceutil.Interface):
374 374 """Represents a problem with the integrity of the repository.
375 375
376 376 Instances of this interface are emitted to describe an integrity issue
377 377 with a repository (e.g. corrupt storage, missing data, etc).
378 378
379 379 Instances are essentially messages associated with severity.
380 380 """
381 381
382 382 warning = interfaceutil.Attribute(
383 383 """Message indicating a non-fatal problem."""
384 384 )
385 385
386 386 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
387 387
388 388 node = interfaceutil.Attribute(
389 389 """Revision encountering the problem.
390 390
391 391 ``None`` means the problem doesn't apply to a single revision.
392 392 """
393 393 )
394 394
395 395
396 396 class irevisiondelta(interfaceutil.Interface):
397 397 """Represents a delta between one revision and another.
398 398
399 399 Instances convey enough information to allow a revision to be exchanged
400 400 with another repository.
401 401
402 402 Instances represent the fulltext revision data or a delta against
403 403 another revision. Therefore the ``revision`` and ``delta`` attributes
404 404 are mutually exclusive.
405 405
406 406 Typically used for changegroup generation.
407 407 """
408 408
409 409 node = interfaceutil.Attribute("""20 byte node of this revision.""")
410 410
411 411 p1node = interfaceutil.Attribute(
412 412 """20 byte node of 1st parent of this revision."""
413 413 )
414 414
415 415 p2node = interfaceutil.Attribute(
416 416 """20 byte node of 2nd parent of this revision."""
417 417 )
418 418
419 419 linknode = interfaceutil.Attribute(
420 420 """20 byte node of the changelog revision this node is linked to."""
421 421 )
422 422
423 423 flags = interfaceutil.Attribute(
424 424 """2 bytes of integer flags that apply to this revision.
425 425
426 426 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
427 427 """
428 428 )
429 429
430 430 basenode = interfaceutil.Attribute(
431 431 """20 byte node of the revision this data is a delta against.
432 432
433 433 ``nullid`` indicates that the revision is a full revision and not
434 434 a delta.
435 435 """
436 436 )
437 437
438 438 baserevisionsize = interfaceutil.Attribute(
439 439 """Size of base revision this delta is against.
440 440
441 441 May be ``None`` if ``basenode`` is ``nullid``.
442 442 """
443 443 )
444 444
445 445 revision = interfaceutil.Attribute(
446 446 """Raw fulltext of revision data for this node."""
447 447 )
448 448
449 449 delta = interfaceutil.Attribute(
450 450 """Delta between ``basenode`` and ``node``.
451 451
452 452 Stored in the bdiff delta format.
453 453 """
454 454 )
455 455
456 456
457 457 class ifilerevisionssequence(interfaceutil.Interface):
458 458 """Contains index data for all revisions of a file.
459 459
460 460 Types implementing this behave like lists of tuples. The index
461 461 in the list corresponds to the revision number. The values contain
462 462 index metadata.
463 463
464 464 The *null* revision (revision number -1) is always the last item
465 465 in the index.
466 466 """
467 467
468 468 def __len__():
469 469 """The total number of revisions."""
470 470
471 471 def __getitem__(rev):
472 472 """Returns the object having a specific revision number.
473 473
474 474 Returns an 8-tuple with the following fields:
475 475
476 476 offset+flags
477 477 Contains the offset and flags for the revision. 64-bit unsigned
478 478 integer where first 6 bytes are the offset and the next 2 bytes
479 479 are flags. The offset can be 0 if it is not used by the store.
480 480 compressed size
481 481 Size of the revision data in the store. It can be 0 if it isn't
482 482 needed by the store.
483 483 uncompressed size
484 484 Fulltext size. It can be 0 if it isn't needed by the store.
485 485 base revision
486 486 Revision number of revision the delta for storage is encoded
487 487 against. -1 indicates not encoded against a base revision.
488 488 link revision
489 489 Revision number of changelog revision this entry is related to.
490 490 p1 revision
491 491 Revision number of 1st parent. -1 if no 1st parent.
492 492 p2 revision
493 493 Revision number of 2nd parent. -1 if no 1st parent.
494 494 node
495 495 Binary node value for this revision number.
496 496
497 497 Negative values should index off the end of the sequence. ``-1``
498 498 should return the null revision. ``-2`` should return the most
499 499 recent revision.
500 500 """
501 501
502 502 def __contains__(rev):
503 503 """Whether a revision number exists."""
504 504
505 505 def insert(self, i, entry):
506 506 """Add an item to the index at specific revision."""
507 507
508 508
509 509 class ifileindex(interfaceutil.Interface):
510 510 """Storage interface for index data of a single file.
511 511
512 512 File storage data is divided into index metadata and data storage.
513 513 This interface defines the index portion of the interface.
514 514
515 515 The index logically consists of:
516 516
517 517 * A mapping between revision numbers and nodes.
518 518 * DAG data (storing and querying the relationship between nodes).
519 519 * Metadata to facilitate storage.
520 520 """
521 521
522 522 def __len__():
523 523 """Obtain the number of revisions stored for this file."""
524 524
525 525 def __iter__():
526 526 """Iterate over revision numbers for this file."""
527 527
528 528 def hasnode(node):
529 529 """Returns a bool indicating if a node is known to this store.
530 530
531 531 Implementations must only return True for full, binary node values:
532 532 hex nodes, revision numbers, and partial node matches must be
533 533 rejected.
534 534
535 535 The null node is never present.
536 536 """
537 537
538 538 def revs(start=0, stop=None):
539 539 """Iterate over revision numbers for this file, with control."""
540 540
541 541 def parents(node):
542 542 """Returns a 2-tuple of parent nodes for a revision.
543 543
544 544 Values will be ``nullid`` if the parent is empty.
545 545 """
546 546
547 547 def parentrevs(rev):
548 548 """Like parents() but operates on revision numbers."""
549 549
550 550 def rev(node):
551 551 """Obtain the revision number given a node.
552 552
553 553 Raises ``error.LookupError`` if the node is not known.
554 554 """
555 555
556 556 def node(rev):
557 557 """Obtain the node value given a revision number.
558 558
559 559 Raises ``IndexError`` if the node is not known.
560 560 """
561 561
562 562 def lookup(node):
563 563 """Attempt to resolve a value to a node.
564 564
565 565 Value can be a binary node, hex node, revision number, or a string
566 566 that can be converted to an integer.
567 567
568 568 Raises ``error.LookupError`` if a node could not be resolved.
569 569 """
570 570
571 571 def linkrev(rev):
572 572 """Obtain the changeset revision number a revision is linked to."""
573 573
574 574 def iscensored(rev):
575 575 """Return whether a revision's content has been censored."""
576 576
577 577 def commonancestorsheads(node1, node2):
578 578 """Obtain an iterable of nodes containing heads of common ancestors.
579 579
580 580 See ``ancestor.commonancestorsheads()``.
581 581 """
582 582
583 583 def descendants(revs):
584 584 """Obtain descendant revision numbers for a set of revision numbers.
585 585
586 586 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
587 587 """
588 588
589 589 def heads(start=None, stop=None):
590 590 """Obtain a list of nodes that are DAG heads, with control.
591 591
592 592 The set of revisions examined can be limited by specifying
593 593 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
594 594 iterable of nodes. DAG traversal starts at earlier revision
595 595 ``start`` and iterates forward until any node in ``stop`` is
596 596 encountered.
597 597 """
598 598
599 599 def children(node):
600 600 """Obtain nodes that are children of a node.
601 601
602 602 Returns a list of nodes.
603 603 """
604 604
605 605
606 606 class ifiledata(interfaceutil.Interface):
607 607 """Storage interface for data storage of a specific file.
608 608
609 609 This complements ``ifileindex`` and provides an interface for accessing
610 610 data for a tracked file.
611 611 """
612 612
613 613 def size(rev):
614 614 """Obtain the fulltext size of file data.
615 615
616 616 Any metadata is excluded from size measurements.
617 617 """
618 618
619 619 def revision(node, raw=False):
620 620 """Obtain fulltext data for a node.
621 621
622 622 By default, any storage transformations are applied before the data
623 623 is returned. If ``raw`` is True, non-raw storage transformations
624 624 are not applied.
625 625
626 626 The fulltext data may contain a header containing metadata. Most
627 627 consumers should use ``read()`` to obtain the actual file data.
628 628 """
629 629
630 630 def rawdata(node):
631 631 """Obtain raw data for a node."""
632 632
633 633 def read(node):
634 634 """Resolve file fulltext data.
635 635
636 636 This is similar to ``revision()`` except any metadata in the data
637 637 headers is stripped.
638 638 """
639 639
640 640 def renamed(node):
641 641 """Obtain copy metadata for a node.
642 642
643 643 Returns ``False`` if no copy metadata is stored or a 2-tuple of
644 644 (path, node) from which this revision was copied.
645 645 """
646 646
647 647 def cmp(node, fulltext):
648 648 """Compare fulltext to another revision.
649 649
650 650 Returns True if the fulltext is different from what is stored.
651 651
652 652 This takes copy metadata into account.
653 653
654 654 TODO better document the copy metadata and censoring logic.
655 655 """
656 656
657 657 def emitrevisions(
658 658 nodes,
659 659 nodesorder=None,
660 660 revisiondata=False,
661 661 assumehaveparentrevisions=False,
662 662 deltamode=CG_DELTAMODE_STD,
663 663 ):
664 664 """Produce ``irevisiondelta`` for revisions.
665 665
666 666 Given an iterable of nodes, emits objects conforming to the
667 667 ``irevisiondelta`` interface that describe revisions in storage.
668 668
669 669 This method is a generator.
670 670
671 671 The input nodes may be unordered. Implementations must ensure that a
672 672 node's parents are emitted before the node itself. Transitively, this
673 673 means that a node may only be emitted once all its ancestors in
674 674 ``nodes`` have also been emitted.
675 675
676 676 By default, emits "index" data (the ``node``, ``p1node``, and
677 677 ``p2node`` attributes). If ``revisiondata`` is set, revision data
678 678 will also be present on the emitted objects.
679 679
680 680 With default argument values, implementations can choose to emit
681 681 either fulltext revision data or a delta. When emitting deltas,
682 682 implementations must consider whether the delta's base revision
683 683 fulltext is available to the receiver.
684 684
685 685 The base revision fulltext is guaranteed to be available if any of
686 686 the following are met:
687 687
688 688 * Its fulltext revision was emitted by this method call.
689 689 * A delta for that revision was emitted by this method call.
690 690 * ``assumehaveparentrevisions`` is True and the base revision is a
691 691 parent of the node.
692 692
693 693 ``nodesorder`` can be used to control the order that revisions are
694 694 emitted. By default, revisions can be reordered as long as they are
695 695 in DAG topological order (see above). If the value is ``nodes``,
696 696 the iteration order from ``nodes`` should be used. If the value is
697 697 ``storage``, then the native order from the backing storage layer
698 698 is used. (Not all storage layers will have strong ordering and behavior
699 699 of this mode is storage-dependent.) ``nodes`` ordering can force
700 700 revisions to be emitted before their ancestors, so consumers should
701 701 use it with care.
702 702
703 703 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
704 704 be set and it is the caller's responsibility to resolve it, if needed.
705 705
706 706 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
707 707 all revision data should be emitted as deltas against the revision
708 708 emitted just prior. The initial revision should be a delta against its
709 709 1st parent.
710 710 """
711 711
712 712
713 713 class ifilemutation(interfaceutil.Interface):
714 714 """Storage interface for mutation events of a tracked file."""
715 715
716 716 def add(filedata, meta, transaction, linkrev, p1, p2):
717 717 """Add a new revision to the store.
718 718
719 719 Takes file data, dictionary of metadata, a transaction, linkrev,
720 720 and parent nodes.
721 721
722 722 Returns the node that was added.
723 723
724 724 May no-op if a revision matching the supplied data is already stored.
725 725 """
726 726
727 727 def addrevision(
728 728 revisiondata,
729 729 transaction,
730 730 linkrev,
731 731 p1,
732 732 p2,
733 733 node=None,
734 734 flags=0,
735 735 cachedelta=None,
736 736 ):
737 737 """Add a new revision to the store.
738 738
739 739 This is similar to ``add()`` except it operates at a lower level.
740 740
741 741 The data passed in already contains a metadata header, if any.
742 742
743 743 ``node`` and ``flags`` can be used to define the expected node and
744 744 the flags to use with storage. ``flags`` is a bitwise value composed
745 745 of the various ``REVISION_FLAG_*`` constants.
746 746
747 747 ``add()`` is usually called when adding files from e.g. the working
748 748 directory. ``addrevision()`` is often called by ``add()`` and for
749 749 scenarios where revision data has already been computed, such as when
750 750 applying raw data from a peer repo.
751 751 """
752 752
753 753 def addgroup(
754 754 deltas,
755 755 linkmapper,
756 756 transaction,
757 757 addrevisioncb=None,
758 758 duplicaterevisioncb=None,
759 759 maybemissingparents=False,
760 760 ):
761 761 """Process a series of deltas for storage.
762 762
763 763 ``deltas`` is an iterable of 7-tuples of
764 764 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
765 765 to add.
766 766
767 767 The ``delta`` field contains ``mpatch`` data to apply to a base
768 768 revision, identified by ``deltabase``. The base node can be
769 769 ``nullid``, in which case the header from the delta can be ignored
770 770 and the delta used as the fulltext.
771 771
772 ``alwayscache`` instructs the lower layers to cache the content of the
773 newly added revision, even if it needs to be explicitly computed.
774 This used to be the default when ``addrevisioncb`` was provided up to
775 Mercurial 5.8.
776
772 777 ``addrevisioncb`` should be called for each node as it is committed.
778 ``duplicaterevisioncb`` should be called for each pre-existing node.
773 779
774 780 ``maybemissingparents`` is a bool indicating whether the incoming
775 781 data may reference parents/ancestor revisions that aren't present.
776 782 This flag is set when receiving data into a "shallow" store that
777 783 doesn't hold all history.
778 784
779 785 Returns a list of nodes that were processed. A node will be in the list
780 786 even if it existed in the store previously.
781 787 """
782 788
783 789 def censorrevision(tr, node, tombstone=b''):
784 790 """Remove the content of a single revision.
785 791
786 792 The specified ``node`` will have its content purged from storage.
787 793 Future attempts to access the revision data for this node will
788 794 result in failure.
789 795
790 796 A ``tombstone`` message can optionally be stored. This message may be
791 797 displayed to users when they attempt to access the missing revision
792 798 data.
793 799
794 800 Storage backends may have stored deltas against the previous content
795 801 in this revision. As part of censoring a revision, these storage
796 802 backends are expected to rewrite any internally stored deltas such
797 803 that they no longer reference the deleted content.
798 804 """
799 805
800 806 def getstrippoint(minlink):
801 807 """Find the minimum revision that must be stripped to strip a linkrev.
802 808
803 809 Returns a 2-tuple containing the minimum revision number and a set
804 810 of all revisions numbers that would be broken by this strip.
805 811
806 812 TODO this is highly revlog centric and should be abstracted into
807 813 a higher-level deletion API. ``repair.strip()`` relies on this.
808 814 """
809 815
810 816 def strip(minlink, transaction):
811 817 """Remove storage of items starting at a linkrev.
812 818
813 819 This uses ``getstrippoint()`` to determine the first node to remove.
814 820 Then it effectively truncates storage for all revisions after that.
815 821
816 822 TODO this is highly revlog centric and should be abstracted into a
817 823 higher-level deletion API.
818 824 """
819 825
820 826
821 827 class ifilestorage(ifileindex, ifiledata, ifilemutation):
822 828 """Complete storage interface for a single tracked file."""
823 829
824 830 def files():
825 831 """Obtain paths that are backing storage for this file.
826 832
827 833 TODO this is used heavily by verify code and there should probably
828 834 be a better API for that.
829 835 """
830 836
831 837 def storageinfo(
832 838 exclusivefiles=False,
833 839 sharedfiles=False,
834 840 revisionscount=False,
835 841 trackedsize=False,
836 842 storedsize=False,
837 843 ):
838 844 """Obtain information about storage for this file's data.
839 845
840 846 Returns a dict describing storage for this tracked path. The keys
841 847 in the dict map to arguments of the same. The arguments are bools
842 848 indicating whether to calculate and obtain that data.
843 849
844 850 exclusivefiles
845 851 Iterable of (vfs, path) describing files that are exclusively
846 852 used to back storage for this tracked path.
847 853
848 854 sharedfiles
849 855 Iterable of (vfs, path) describing files that are used to back
850 856 storage for this tracked path. Those files may also provide storage
851 857 for other stored entities.
852 858
853 859 revisionscount
854 860 Number of revisions available for retrieval.
855 861
856 862 trackedsize
857 863 Total size in bytes of all tracked revisions. This is a sum of the
858 864 length of the fulltext of all revisions.
859 865
860 866 storedsize
861 867 Total size in bytes used to store data for all tracked revisions.
862 868 This is commonly less than ``trackedsize`` due to internal usage
863 869 of deltas rather than fulltext revisions.
864 870
865 871 Not all storage backends may support all queries are have a reasonable
866 872 value to use. In that case, the value should be set to ``None`` and
867 873 callers are expected to handle this special value.
868 874 """
869 875
870 876 def verifyintegrity(state):
871 877 """Verifies the integrity of file storage.
872 878
873 879 ``state`` is a dict holding state of the verifier process. It can be
874 880 used to communicate data between invocations of multiple storage
875 881 primitives.
876 882
877 883 If individual revisions cannot have their revision content resolved,
878 884 the method is expected to set the ``skipread`` key to a set of nodes
879 885 that encountered problems. If set, the method can also add the node(s)
880 886 to ``safe_renamed`` in order to indicate nodes that may perform the
881 887 rename checks with currently accessible data.
882 888
883 889 The method yields objects conforming to the ``iverifyproblem``
884 890 interface.
885 891 """
886 892
887 893
888 894 class idirs(interfaceutil.Interface):
889 895 """Interface representing a collection of directories from paths.
890 896
891 897 This interface is essentially a derived data structure representing
892 898 directories from a collection of paths.
893 899 """
894 900
895 901 def addpath(path):
896 902 """Add a path to the collection.
897 903
898 904 All directories in the path will be added to the collection.
899 905 """
900 906
901 907 def delpath(path):
902 908 """Remove a path from the collection.
903 909
904 910 If the removal was the last path in a particular directory, the
905 911 directory is removed from the collection.
906 912 """
907 913
908 914 def __iter__():
909 915 """Iterate over the directories in this collection of paths."""
910 916
911 917 def __contains__(path):
912 918 """Whether a specific directory is in this collection."""
913 919
914 920
915 921 class imanifestdict(interfaceutil.Interface):
916 922 """Interface representing a manifest data structure.
917 923
918 924 A manifest is effectively a dict mapping paths to entries. Each entry
919 925 consists of a binary node and extra flags affecting that entry.
920 926 """
921 927
922 928 def __getitem__(path):
923 929 """Returns the binary node value for a path in the manifest.
924 930
925 931 Raises ``KeyError`` if the path does not exist in the manifest.
926 932
927 933 Equivalent to ``self.find(path)[0]``.
928 934 """
929 935
930 936 def find(path):
931 937 """Returns the entry for a path in the manifest.
932 938
933 939 Returns a 2-tuple of (node, flags).
934 940
935 941 Raises ``KeyError`` if the path does not exist in the manifest.
936 942 """
937 943
938 944 def __len__():
939 945 """Return the number of entries in the manifest."""
940 946
941 947 def __nonzero__():
942 948 """Returns True if the manifest has entries, False otherwise."""
943 949
944 950 __bool__ = __nonzero__
945 951
946 952 def __setitem__(path, node):
947 953 """Define the node value for a path in the manifest.
948 954
949 955 If the path is already in the manifest, its flags will be copied to
950 956 the new entry.
951 957 """
952 958
953 959 def __contains__(path):
954 960 """Whether a path exists in the manifest."""
955 961
956 962 def __delitem__(path):
957 963 """Remove a path from the manifest.
958 964
959 965 Raises ``KeyError`` if the path is not in the manifest.
960 966 """
961 967
962 968 def __iter__():
963 969 """Iterate over paths in the manifest."""
964 970
965 971 def iterkeys():
966 972 """Iterate over paths in the manifest."""
967 973
968 974 def keys():
969 975 """Obtain a list of paths in the manifest."""
970 976
971 977 def filesnotin(other, match=None):
972 978 """Obtain the set of paths in this manifest but not in another.
973 979
974 980 ``match`` is an optional matcher function to be applied to both
975 981 manifests.
976 982
977 983 Returns a set of paths.
978 984 """
979 985
980 986 def dirs():
981 987 """Returns an object implementing the ``idirs`` interface."""
982 988
983 989 def hasdir(dir):
984 990 """Returns a bool indicating if a directory is in this manifest."""
985 991
986 992 def walk(match):
987 993 """Generator of paths in manifest satisfying a matcher.
988 994
989 995 If the matcher has explicit files listed and they don't exist in
990 996 the manifest, ``match.bad()`` is called for each missing file.
991 997 """
992 998
993 999 def diff(other, match=None, clean=False):
994 1000 """Find differences between this manifest and another.
995 1001
996 1002 This manifest is compared to ``other``.
997 1003
998 1004 If ``match`` is provided, the two manifests are filtered against this
999 1005 matcher and only entries satisfying the matcher are compared.
1000 1006
1001 1007 If ``clean`` is True, unchanged files are included in the returned
1002 1008 object.
1003 1009
1004 1010 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1005 1011 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1006 1012 represents the node and flags for this manifest and ``(node2, flag2)``
1007 1013 are the same for the other manifest.
1008 1014 """
1009 1015
1010 1016 def setflag(path, flag):
1011 1017 """Set the flag value for a given path.
1012 1018
1013 1019 Raises ``KeyError`` if the path is not already in the manifest.
1014 1020 """
1015 1021
1016 1022 def get(path, default=None):
1017 1023 """Obtain the node value for a path or a default value if missing."""
1018 1024
1019 1025 def flags(path):
1020 1026 """Return the flags value for a path (default: empty bytestring)."""
1021 1027
1022 1028 def copy():
1023 1029 """Return a copy of this manifest."""
1024 1030
1025 1031 def items():
1026 1032 """Returns an iterable of (path, node) for items in this manifest."""
1027 1033
1028 1034 def iteritems():
1029 1035 """Identical to items()."""
1030 1036
1031 1037 def iterentries():
1032 1038 """Returns an iterable of (path, node, flags) for this manifest.
1033 1039
1034 1040 Similar to ``iteritems()`` except items are a 3-tuple and include
1035 1041 flags.
1036 1042 """
1037 1043
1038 1044 def text():
1039 1045 """Obtain the raw data representation for this manifest.
1040 1046
1041 1047 Result is used to create a manifest revision.
1042 1048 """
1043 1049
1044 1050 def fastdelta(base, changes):
1045 1051 """Obtain a delta between this manifest and another given changes.
1046 1052
1047 1053 ``base`` in the raw data representation for another manifest.
1048 1054
1049 1055 ``changes`` is an iterable of ``(path, to_delete)``.
1050 1056
1051 1057 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1052 1058 delta between ``base`` and this manifest.
1053 1059
1054 1060 If this manifest implementation can't support ``fastdelta()``,
1055 1061 raise ``mercurial.manifest.FastdeltaUnavailable``.
1056 1062 """
1057 1063
1058 1064
1059 1065 class imanifestrevisionbase(interfaceutil.Interface):
1060 1066 """Base interface representing a single revision of a manifest.
1061 1067
1062 1068 Should not be used as a primary interface: should always be inherited
1063 1069 as part of a larger interface.
1064 1070 """
1065 1071
1066 1072 def copy():
1067 1073 """Obtain a copy of this manifest instance.
1068 1074
1069 1075 Returns an object conforming to the ``imanifestrevisionwritable``
1070 1076 interface. The instance will be associated with the same
1071 1077 ``imanifestlog`` collection as this instance.
1072 1078 """
1073 1079
1074 1080 def read():
1075 1081 """Obtain the parsed manifest data structure.
1076 1082
1077 1083 The returned object conforms to the ``imanifestdict`` interface.
1078 1084 """
1079 1085
1080 1086
1081 1087 class imanifestrevisionstored(imanifestrevisionbase):
1082 1088 """Interface representing a manifest revision committed to storage."""
1083 1089
1084 1090 def node():
1085 1091 """The binary node for this manifest."""
1086 1092
1087 1093 parents = interfaceutil.Attribute(
1088 1094 """List of binary nodes that are parents for this manifest revision."""
1089 1095 )
1090 1096
1091 1097 def readdelta(shallow=False):
1092 1098 """Obtain the manifest data structure representing changes from parent.
1093 1099
1094 1100 This manifest is compared to its 1st parent. A new manifest representing
1095 1101 those differences is constructed.
1096 1102
1097 1103 The returned object conforms to the ``imanifestdict`` interface.
1098 1104 """
1099 1105
1100 1106 def readfast(shallow=False):
1101 1107 """Calls either ``read()`` or ``readdelta()``.
1102 1108
1103 1109 The faster of the two options is called.
1104 1110 """
1105 1111
1106 1112 def find(key):
1107 1113 """Calls self.read().find(key)``.
1108 1114
1109 1115 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1110 1116 """
1111 1117
1112 1118
1113 1119 class imanifestrevisionwritable(imanifestrevisionbase):
1114 1120 """Interface representing a manifest revision that can be committed."""
1115 1121
1116 1122 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1117 1123 """Add this revision to storage.
1118 1124
1119 1125 Takes a transaction object, the changeset revision number it will
1120 1126 be associated with, its parent nodes, and lists of added and
1121 1127 removed paths.
1122 1128
1123 1129 If match is provided, storage can choose not to inspect or write out
1124 1130 items that do not match. Storage is still required to be able to provide
1125 1131 the full manifest in the future for any directories written (these
1126 1132 manifests should not be "narrowed on disk").
1127 1133
1128 1134 Returns the binary node of the created revision.
1129 1135 """
1130 1136
1131 1137
1132 1138 class imanifeststorage(interfaceutil.Interface):
1133 1139 """Storage interface for manifest data."""
1134 1140
1135 1141 tree = interfaceutil.Attribute(
1136 1142 """The path to the directory this manifest tracks.
1137 1143
1138 1144 The empty bytestring represents the root manifest.
1139 1145 """
1140 1146 )
1141 1147
1142 1148 index = interfaceutil.Attribute(
1143 1149 """An ``ifilerevisionssequence`` instance."""
1144 1150 )
1145 1151
1146 1152 indexfile = interfaceutil.Attribute(
1147 1153 """Path of revlog index file.
1148 1154
1149 1155 TODO this is revlog specific and should not be exposed.
1150 1156 """
1151 1157 )
1152 1158
1153 1159 opener = interfaceutil.Attribute(
1154 1160 """VFS opener to use to access underlying files used for storage.
1155 1161
1156 1162 TODO this is revlog specific and should not be exposed.
1157 1163 """
1158 1164 )
1159 1165
1160 1166 version = interfaceutil.Attribute(
1161 1167 """Revlog version number.
1162 1168
1163 1169 TODO this is revlog specific and should not be exposed.
1164 1170 """
1165 1171 )
1166 1172
1167 1173 _generaldelta = interfaceutil.Attribute(
1168 1174 """Whether generaldelta storage is being used.
1169 1175
1170 1176 TODO this is revlog specific and should not be exposed.
1171 1177 """
1172 1178 )
1173 1179
1174 1180 fulltextcache = interfaceutil.Attribute(
1175 1181 """Dict with cache of fulltexts.
1176 1182
1177 1183 TODO this doesn't feel appropriate for the storage interface.
1178 1184 """
1179 1185 )
1180 1186
1181 1187 def __len__():
1182 1188 """Obtain the number of revisions stored for this manifest."""
1183 1189
1184 1190 def __iter__():
1185 1191 """Iterate over revision numbers for this manifest."""
1186 1192
1187 1193 def rev(node):
1188 1194 """Obtain the revision number given a binary node.
1189 1195
1190 1196 Raises ``error.LookupError`` if the node is not known.
1191 1197 """
1192 1198
1193 1199 def node(rev):
1194 1200 """Obtain the node value given a revision number.
1195 1201
1196 1202 Raises ``error.LookupError`` if the revision is not known.
1197 1203 """
1198 1204
1199 1205 def lookup(value):
1200 1206 """Attempt to resolve a value to a node.
1201 1207
1202 1208 Value can be a binary node, hex node, revision number, or a bytes
1203 1209 that can be converted to an integer.
1204 1210
1205 1211 Raises ``error.LookupError`` if a ndoe could not be resolved.
1206 1212 """
1207 1213
1208 1214 def parents(node):
1209 1215 """Returns a 2-tuple of parent nodes for a node.
1210 1216
1211 1217 Values will be ``nullid`` if the parent is empty.
1212 1218 """
1213 1219
1214 1220 def parentrevs(rev):
1215 1221 """Like parents() but operates on revision numbers."""
1216 1222
1217 1223 def linkrev(rev):
1218 1224 """Obtain the changeset revision number a revision is linked to."""
1219 1225
1220 1226 def revision(node, _df=None, raw=False):
1221 1227 """Obtain fulltext data for a node."""
1222 1228
1223 1229 def rawdata(node, _df=None):
1224 1230 """Obtain raw data for a node."""
1225 1231
1226 1232 def revdiff(rev1, rev2):
1227 1233 """Obtain a delta between two revision numbers.
1228 1234
1229 1235 The returned data is the result of ``bdiff.bdiff()`` on the raw
1230 1236 revision data.
1231 1237 """
1232 1238
1233 1239 def cmp(node, fulltext):
1234 1240 """Compare fulltext to another revision.
1235 1241
1236 1242 Returns True if the fulltext is different from what is stored.
1237 1243 """
1238 1244
1239 1245 def emitrevisions(
1240 1246 nodes,
1241 1247 nodesorder=None,
1242 1248 revisiondata=False,
1243 1249 assumehaveparentrevisions=False,
1244 1250 ):
1245 1251 """Produce ``irevisiondelta`` describing revisions.
1246 1252
1247 1253 See the documentation for ``ifiledata`` for more.
1248 1254 """
1249 1255
1250 1256 def addgroup(
1251 1257 deltas,
1252 1258 linkmapper,
1253 1259 transaction,
1254 1260 addrevisioncb=None,
1255 1261 duplicaterevisioncb=None,
1256 1262 ):
1257 1263 """Process a series of deltas for storage.
1258 1264
1259 1265 See the documentation in ``ifilemutation`` for more.
1260 1266 """
1261 1267
1262 1268 def rawsize(rev):
1263 1269 """Obtain the size of tracked data.
1264 1270
1265 1271 Is equivalent to ``len(m.rawdata(node))``.
1266 1272
1267 1273 TODO this method is only used by upgrade code and may be removed.
1268 1274 """
1269 1275
1270 1276 def getstrippoint(minlink):
1271 1277 """Find minimum revision that must be stripped to strip a linkrev.
1272 1278
1273 1279 See the documentation in ``ifilemutation`` for more.
1274 1280 """
1275 1281
1276 1282 def strip(minlink, transaction):
1277 1283 """Remove storage of items starting at a linkrev.
1278 1284
1279 1285 See the documentation in ``ifilemutation`` for more.
1280 1286 """
1281 1287
1282 1288 def checksize():
1283 1289 """Obtain the expected sizes of backing files.
1284 1290
1285 1291 TODO this is used by verify and it should not be part of the interface.
1286 1292 """
1287 1293
1288 1294 def files():
1289 1295 """Obtain paths that are backing storage for this manifest.
1290 1296
1291 1297 TODO this is used by verify and there should probably be a better API
1292 1298 for this functionality.
1293 1299 """
1294 1300
1295 1301 def deltaparent(rev):
1296 1302 """Obtain the revision that a revision is delta'd against.
1297 1303
1298 1304 TODO delta encoding is an implementation detail of storage and should
1299 1305 not be exposed to the storage interface.
1300 1306 """
1301 1307
1302 1308 def clone(tr, dest, **kwargs):
1303 1309 """Clone this instance to another."""
1304 1310
1305 1311 def clearcaches(clear_persisted_data=False):
1306 1312 """Clear any caches associated with this instance."""
1307 1313
1308 1314 def dirlog(d):
1309 1315 """Obtain a manifest storage instance for a tree."""
1310 1316
1311 1317 def add(
1312 1318 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1313 1319 ):
1314 1320 """Add a revision to storage.
1315 1321
1316 1322 ``m`` is an object conforming to ``imanifestdict``.
1317 1323
1318 1324 ``link`` is the linkrev revision number.
1319 1325
1320 1326 ``p1`` and ``p2`` are the parent revision numbers.
1321 1327
1322 1328 ``added`` and ``removed`` are iterables of added and removed paths,
1323 1329 respectively.
1324 1330
1325 1331 ``readtree`` is a function that can be used to read the child tree(s)
1326 1332 when recursively writing the full tree structure when using
1327 1333 treemanifets.
1328 1334
1329 1335 ``match`` is a matcher that can be used to hint to storage that not all
1330 1336 paths must be inspected; this is an optimization and can be safely
1331 1337 ignored. Note that the storage must still be able to reproduce a full
1332 1338 manifest including files that did not match.
1333 1339 """
1334 1340
1335 1341 def storageinfo(
1336 1342 exclusivefiles=False,
1337 1343 sharedfiles=False,
1338 1344 revisionscount=False,
1339 1345 trackedsize=False,
1340 1346 storedsize=False,
1341 1347 ):
1342 1348 """Obtain information about storage for this manifest's data.
1343 1349
1344 1350 See ``ifilestorage.storageinfo()`` for a description of this method.
1345 1351 This one behaves the same way, except for manifest data.
1346 1352 """
1347 1353
1348 1354
1349 1355 class imanifestlog(interfaceutil.Interface):
1350 1356 """Interface representing a collection of manifest snapshots.
1351 1357
1352 1358 Represents the root manifest in a repository.
1353 1359
1354 1360 Also serves as a means to access nested tree manifests and to cache
1355 1361 tree manifests.
1356 1362 """
1357 1363
1358 1364 def __getitem__(node):
1359 1365 """Obtain a manifest instance for a given binary node.
1360 1366
1361 1367 Equivalent to calling ``self.get('', node)``.
1362 1368
1363 1369 The returned object conforms to the ``imanifestrevisionstored``
1364 1370 interface.
1365 1371 """
1366 1372
1367 1373 def get(tree, node, verify=True):
1368 1374 """Retrieve the manifest instance for a given directory and binary node.
1369 1375
1370 1376 ``node`` always refers to the node of the root manifest (which will be
1371 1377 the only manifest if flat manifests are being used).
1372 1378
1373 1379 If ``tree`` is the empty string, the root manifest is returned.
1374 1380 Otherwise the manifest for the specified directory will be returned
1375 1381 (requires tree manifests).
1376 1382
1377 1383 If ``verify`` is True, ``LookupError`` is raised if the node is not
1378 1384 known.
1379 1385
1380 1386 The returned object conforms to the ``imanifestrevisionstored``
1381 1387 interface.
1382 1388 """
1383 1389
1384 1390 def getstorage(tree):
1385 1391 """Retrieve an interface to storage for a particular tree.
1386 1392
1387 1393 If ``tree`` is the empty bytestring, storage for the root manifest will
1388 1394 be returned. Otherwise storage for a tree manifest is returned.
1389 1395
1390 1396 TODO formalize interface for returned object.
1391 1397 """
1392 1398
1393 1399 def clearcaches():
1394 1400 """Clear caches associated with this collection."""
1395 1401
1396 1402 def rev(node):
1397 1403 """Obtain the revision number for a binary node.
1398 1404
1399 1405 Raises ``error.LookupError`` if the node is not known.
1400 1406 """
1401 1407
1402 1408 def update_caches(transaction):
1403 1409 """update whatever cache are relevant for the used storage."""
1404 1410
1405 1411
1406 1412 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1407 1413 """Local repository sub-interface providing access to tracked file storage.
1408 1414
1409 1415 This interface defines how a repository accesses storage for a single
1410 1416 tracked file path.
1411 1417 """
1412 1418
1413 1419 def file(f):
1414 1420 """Obtain a filelog for a tracked path.
1415 1421
1416 1422 The returned type conforms to the ``ifilestorage`` interface.
1417 1423 """
1418 1424
1419 1425
1420 1426 class ilocalrepositorymain(interfaceutil.Interface):
1421 1427 """Main interface for local repositories.
1422 1428
1423 1429 This currently captures the reality of things - not how things should be.
1424 1430 """
1425 1431
1426 1432 supportedformats = interfaceutil.Attribute(
1427 1433 """Set of requirements that apply to stream clone.
1428 1434
1429 1435 This is actually a class attribute and is shared among all instances.
1430 1436 """
1431 1437 )
1432 1438
1433 1439 supported = interfaceutil.Attribute(
1434 1440 """Set of requirements that this repo is capable of opening."""
1435 1441 )
1436 1442
1437 1443 requirements = interfaceutil.Attribute(
1438 1444 """Set of requirements this repo uses."""
1439 1445 )
1440 1446
1441 1447 features = interfaceutil.Attribute(
1442 1448 """Set of "features" this repository supports.
1443 1449
1444 1450 A "feature" is a loosely-defined term. It can refer to a feature
1445 1451 in the classical sense or can describe an implementation detail
1446 1452 of the repository. For example, a ``readonly`` feature may denote
1447 1453 the repository as read-only. Or a ``revlogfilestore`` feature may
1448 1454 denote that the repository is using revlogs for file storage.
1449 1455
1450 1456 The intent of features is to provide a machine-queryable mechanism
1451 1457 for repo consumers to test for various repository characteristics.
1452 1458
1453 1459 Features are similar to ``requirements``. The main difference is that
1454 1460 requirements are stored on-disk and represent requirements to open the
1455 1461 repository. Features are more run-time capabilities of the repository
1456 1462 and more granular capabilities (which may be derived from requirements).
1457 1463 """
1458 1464 )
1459 1465
1460 1466 filtername = interfaceutil.Attribute(
1461 1467 """Name of the repoview that is active on this repo."""
1462 1468 )
1463 1469
1464 1470 wvfs = interfaceutil.Attribute(
1465 1471 """VFS used to access the working directory."""
1466 1472 )
1467 1473
1468 1474 vfs = interfaceutil.Attribute(
1469 1475 """VFS rooted at the .hg directory.
1470 1476
1471 1477 Used to access repository data not in the store.
1472 1478 """
1473 1479 )
1474 1480
1475 1481 svfs = interfaceutil.Attribute(
1476 1482 """VFS rooted at the store.
1477 1483
1478 1484 Used to access repository data in the store. Typically .hg/store.
1479 1485 But can point elsewhere if the store is shared.
1480 1486 """
1481 1487 )
1482 1488
1483 1489 root = interfaceutil.Attribute(
1484 1490 """Path to the root of the working directory."""
1485 1491 )
1486 1492
1487 1493 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1488 1494
1489 1495 origroot = interfaceutil.Attribute(
1490 1496 """The filesystem path that was used to construct the repo."""
1491 1497 )
1492 1498
1493 1499 auditor = interfaceutil.Attribute(
1494 1500 """A pathauditor for the working directory.
1495 1501
1496 1502 This checks if a path refers to a nested repository.
1497 1503
1498 1504 Operates on the filesystem.
1499 1505 """
1500 1506 )
1501 1507
1502 1508 nofsauditor = interfaceutil.Attribute(
1503 1509 """A pathauditor for the working directory.
1504 1510
1505 1511 This is like ``auditor`` except it doesn't do filesystem checks.
1506 1512 """
1507 1513 )
1508 1514
1509 1515 baseui = interfaceutil.Attribute(
1510 1516 """Original ui instance passed into constructor."""
1511 1517 )
1512 1518
1513 1519 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1514 1520
1515 1521 sharedpath = interfaceutil.Attribute(
1516 1522 """Path to the .hg directory of the repo this repo was shared from."""
1517 1523 )
1518 1524
1519 1525 store = interfaceutil.Attribute("""A store instance.""")
1520 1526
1521 1527 spath = interfaceutil.Attribute("""Path to the store.""")
1522 1528
1523 1529 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1524 1530
1525 1531 cachevfs = interfaceutil.Attribute(
1526 1532 """A VFS used to access the cache directory.
1527 1533
1528 1534 Typically .hg/cache.
1529 1535 """
1530 1536 )
1531 1537
1532 1538 wcachevfs = interfaceutil.Attribute(
1533 1539 """A VFS used to access the cache directory dedicated to working copy
1534 1540
1535 1541 Typically .hg/wcache.
1536 1542 """
1537 1543 )
1538 1544
1539 1545 filteredrevcache = interfaceutil.Attribute(
1540 1546 """Holds sets of revisions to be filtered."""
1541 1547 )
1542 1548
1543 1549 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1544 1550
1545 1551 filecopiesmode = interfaceutil.Attribute(
1546 1552 """The way files copies should be dealt with in this repo."""
1547 1553 )
1548 1554
1549 1555 def close():
1550 1556 """Close the handle on this repository."""
1551 1557
1552 1558 def peer():
1553 1559 """Obtain an object conforming to the ``peer`` interface."""
1554 1560
1555 1561 def unfiltered():
1556 1562 """Obtain an unfiltered/raw view of this repo."""
1557 1563
1558 1564 def filtered(name, visibilityexceptions=None):
1559 1565 """Obtain a named view of this repository."""
1560 1566
1561 1567 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1562 1568
1563 1569 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1564 1570
1565 1571 manifestlog = interfaceutil.Attribute(
1566 1572 """An instance conforming to the ``imanifestlog`` interface.
1567 1573
1568 1574 Provides access to manifests for the repository.
1569 1575 """
1570 1576 )
1571 1577
1572 1578 dirstate = interfaceutil.Attribute("""Working directory state.""")
1573 1579
1574 1580 narrowpats = interfaceutil.Attribute(
1575 1581 """Matcher patterns for this repository's narrowspec."""
1576 1582 )
1577 1583
1578 1584 def narrowmatch(match=None, includeexact=False):
1579 1585 """Obtain a matcher for the narrowspec."""
1580 1586
1581 1587 def setnarrowpats(newincludes, newexcludes):
1582 1588 """Define the narrowspec for this repository."""
1583 1589
1584 1590 def __getitem__(changeid):
1585 1591 """Try to resolve a changectx."""
1586 1592
1587 1593 def __contains__(changeid):
1588 1594 """Whether a changeset exists."""
1589 1595
1590 1596 def __nonzero__():
1591 1597 """Always returns True."""
1592 1598 return True
1593 1599
1594 1600 __bool__ = __nonzero__
1595 1601
1596 1602 def __len__():
1597 1603 """Returns the number of changesets in the repo."""
1598 1604
1599 1605 def __iter__():
1600 1606 """Iterate over revisions in the changelog."""
1601 1607
1602 1608 def revs(expr, *args):
1603 1609 """Evaluate a revset.
1604 1610
1605 1611 Emits revisions.
1606 1612 """
1607 1613
1608 1614 def set(expr, *args):
1609 1615 """Evaluate a revset.
1610 1616
1611 1617 Emits changectx instances.
1612 1618 """
1613 1619
1614 1620 def anyrevs(specs, user=False, localalias=None):
1615 1621 """Find revisions matching one of the given revsets."""
1616 1622
1617 1623 def url():
1618 1624 """Returns a string representing the location of this repo."""
1619 1625
1620 1626 def hook(name, throw=False, **args):
1621 1627 """Call a hook."""
1622 1628
1623 1629 def tags():
1624 1630 """Return a mapping of tag to node."""
1625 1631
1626 1632 def tagtype(tagname):
1627 1633 """Return the type of a given tag."""
1628 1634
1629 1635 def tagslist():
1630 1636 """Return a list of tags ordered by revision."""
1631 1637
1632 1638 def nodetags(node):
1633 1639 """Return the tags associated with a node."""
1634 1640
1635 1641 def nodebookmarks(node):
1636 1642 """Return the list of bookmarks pointing to the specified node."""
1637 1643
1638 1644 def branchmap():
1639 1645 """Return a mapping of branch to heads in that branch."""
1640 1646
1641 1647 def revbranchcache():
1642 1648 pass
1643 1649
1644 1650 def register_changeset(rev, changelogrevision):
1645 1651 """Extension point for caches for new nodes.
1646 1652
1647 1653 Multiple consumers are expected to need parts of the changelogrevision,
1648 1654 so it is provided as optimization to avoid duplicate lookups. A simple
1649 1655 cache would be fragile when other revisions are accessed, too."""
1650 1656 pass
1651 1657
1652 1658 def branchtip(branchtip, ignoremissing=False):
1653 1659 """Return the tip node for a given branch."""
1654 1660
1655 1661 def lookup(key):
1656 1662 """Resolve the node for a revision."""
1657 1663
1658 1664 def lookupbranch(key):
1659 1665 """Look up the branch name of the given revision or branch name."""
1660 1666
1661 1667 def known(nodes):
1662 1668 """Determine whether a series of nodes is known.
1663 1669
1664 1670 Returns a list of bools.
1665 1671 """
1666 1672
1667 1673 def local():
1668 1674 """Whether the repository is local."""
1669 1675 return True
1670 1676
1671 1677 def publishing():
1672 1678 """Whether the repository is a publishing repository."""
1673 1679
1674 1680 def cancopy():
1675 1681 pass
1676 1682
1677 1683 def shared():
1678 1684 """The type of shared repository or None."""
1679 1685
1680 1686 def wjoin(f, *insidef):
1681 1687 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1682 1688
1683 1689 def setparents(p1, p2):
1684 1690 """Set the parent nodes of the working directory."""
1685 1691
1686 1692 def filectx(path, changeid=None, fileid=None):
1687 1693 """Obtain a filectx for the given file revision."""
1688 1694
1689 1695 def getcwd():
1690 1696 """Obtain the current working directory from the dirstate."""
1691 1697
1692 1698 def pathto(f, cwd=None):
1693 1699 """Obtain the relative path to a file."""
1694 1700
1695 1701 def adddatafilter(name, fltr):
1696 1702 pass
1697 1703
1698 1704 def wread(filename):
1699 1705 """Read a file from wvfs, using data filters."""
1700 1706
1701 1707 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1702 1708 """Write data to a file in the wvfs, using data filters."""
1703 1709
1704 1710 def wwritedata(filename, data):
1705 1711 """Resolve data for writing to the wvfs, using data filters."""
1706 1712
1707 1713 def currenttransaction():
1708 1714 """Obtain the current transaction instance or None."""
1709 1715
1710 1716 def transaction(desc, report=None):
1711 1717 """Open a new transaction to write to the repository."""
1712 1718
1713 1719 def undofiles():
1714 1720 """Returns a list of (vfs, path) for files to undo transactions."""
1715 1721
1716 1722 def recover():
1717 1723 """Roll back an interrupted transaction."""
1718 1724
1719 1725 def rollback(dryrun=False, force=False):
1720 1726 """Undo the last transaction.
1721 1727
1722 1728 DANGEROUS.
1723 1729 """
1724 1730
1725 1731 def updatecaches(tr=None, full=False):
1726 1732 """Warm repo caches."""
1727 1733
1728 1734 def invalidatecaches():
1729 1735 """Invalidate cached data due to the repository mutating."""
1730 1736
1731 1737 def invalidatevolatilesets():
1732 1738 pass
1733 1739
1734 1740 def invalidatedirstate():
1735 1741 """Invalidate the dirstate."""
1736 1742
1737 1743 def invalidate(clearfilecache=False):
1738 1744 pass
1739 1745
1740 1746 def invalidateall():
1741 1747 pass
1742 1748
1743 1749 def lock(wait=True):
1744 1750 """Lock the repository store and return a lock instance."""
1745 1751
1746 1752 def wlock(wait=True):
1747 1753 """Lock the non-store parts of the repository."""
1748 1754
1749 1755 def currentwlock():
1750 1756 """Return the wlock if it's held or None."""
1751 1757
1752 1758 def checkcommitpatterns(wctx, match, status, fail):
1753 1759 pass
1754 1760
1755 1761 def commit(
1756 1762 text=b'',
1757 1763 user=None,
1758 1764 date=None,
1759 1765 match=None,
1760 1766 force=False,
1761 1767 editor=False,
1762 1768 extra=None,
1763 1769 ):
1764 1770 """Add a new revision to the repository."""
1765 1771
1766 1772 def commitctx(ctx, error=False, origctx=None):
1767 1773 """Commit a commitctx instance to the repository."""
1768 1774
1769 1775 def destroying():
1770 1776 """Inform the repository that nodes are about to be destroyed."""
1771 1777
1772 1778 def destroyed():
1773 1779 """Inform the repository that nodes have been destroyed."""
1774 1780
1775 1781 def status(
1776 1782 node1=b'.',
1777 1783 node2=None,
1778 1784 match=None,
1779 1785 ignored=False,
1780 1786 clean=False,
1781 1787 unknown=False,
1782 1788 listsubrepos=False,
1783 1789 ):
1784 1790 """Convenience method to call repo[x].status()."""
1785 1791
1786 1792 def addpostdsstatus(ps):
1787 1793 pass
1788 1794
1789 1795 def postdsstatus():
1790 1796 pass
1791 1797
1792 1798 def clearpostdsstatus():
1793 1799 pass
1794 1800
1795 1801 def heads(start=None):
1796 1802 """Obtain list of nodes that are DAG heads."""
1797 1803
1798 1804 def branchheads(branch=None, start=None, closed=False):
1799 1805 pass
1800 1806
1801 1807 def branches(nodes):
1802 1808 pass
1803 1809
1804 1810 def between(pairs):
1805 1811 pass
1806 1812
1807 1813 def checkpush(pushop):
1808 1814 pass
1809 1815
1810 1816 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1811 1817
1812 1818 def pushkey(namespace, key, old, new):
1813 1819 pass
1814 1820
1815 1821 def listkeys(namespace):
1816 1822 pass
1817 1823
1818 1824 def debugwireargs(one, two, three=None, four=None, five=None):
1819 1825 pass
1820 1826
1821 1827 def savecommitmessage(text):
1822 1828 pass
1823 1829
1824 1830
1825 1831 class completelocalrepository(
1826 1832 ilocalrepositorymain, ilocalrepositoryfilestorage
1827 1833 ):
1828 1834 """Complete interface for a local repository."""
1829 1835
1830 1836
1831 1837 class iwireprotocolcommandcacher(interfaceutil.Interface):
1832 1838 """Represents a caching backend for wire protocol commands.
1833 1839
1834 1840 Wire protocol version 2 supports transparent caching of many commands.
1835 1841 To leverage this caching, servers can activate objects that cache
1836 1842 command responses. Objects handle both cache writing and reading.
1837 1843 This interface defines how that response caching mechanism works.
1838 1844
1839 1845 Wire protocol version 2 commands emit a series of objects that are
1840 1846 serialized and sent to the client. The caching layer exists between
1841 1847 the invocation of the command function and the sending of its output
1842 1848 objects to an output layer.
1843 1849
1844 1850 Instances of this interface represent a binding to a cache that
1845 1851 can serve a response (in place of calling a command function) and/or
1846 1852 write responses to a cache for subsequent use.
1847 1853
1848 1854 When a command request arrives, the following happens with regards
1849 1855 to this interface:
1850 1856
1851 1857 1. The server determines whether the command request is cacheable.
1852 1858 2. If it is, an instance of this interface is spawned.
1853 1859 3. The cacher is activated in a context manager (``__enter__`` is called).
1854 1860 4. A cache *key* for that request is derived. This will call the
1855 1861 instance's ``adjustcachekeystate()`` method so the derivation
1856 1862 can be influenced.
1857 1863 5. The cacher is informed of the derived cache key via a call to
1858 1864 ``setcachekey()``.
1859 1865 6. The cacher's ``lookup()`` method is called to test for presence of
1860 1866 the derived key in the cache.
1861 1867 7. If ``lookup()`` returns a hit, that cached result is used in place
1862 1868 of invoking the command function. ``__exit__`` is called and the instance
1863 1869 is discarded.
1864 1870 8. The command function is invoked.
1865 1871 9. ``onobject()`` is called for each object emitted by the command
1866 1872 function.
1867 1873 10. After the final object is seen, ``onfinished()`` is called.
1868 1874 11. ``__exit__`` is called to signal the end of use of the instance.
1869 1875
1870 1876 Cache *key* derivation can be influenced by the instance.
1871 1877
1872 1878 Cache keys are initially derived by a deterministic representation of
1873 1879 the command request. This includes the command name, arguments, protocol
1874 1880 version, etc. This initial key derivation is performed by CBOR-encoding a
1875 1881 data structure and feeding that output into a hasher.
1876 1882
1877 1883 Instances of this interface can influence this initial key derivation
1878 1884 via ``adjustcachekeystate()``.
1879 1885
1880 1886 The instance is informed of the derived cache key via a call to
1881 1887 ``setcachekey()``. The instance must store the key locally so it can
1882 1888 be consulted on subsequent operations that may require it.
1883 1889
1884 1890 When constructed, the instance has access to a callable that can be used
1885 1891 for encoding response objects. This callable receives as its single
1886 1892 argument an object emitted by a command function. It returns an iterable
1887 1893 of bytes chunks representing the encoded object. Unless the cacher is
1888 1894 caching native Python objects in memory or has a way of reconstructing
1889 1895 the original Python objects, implementations typically call this function
1890 1896 to produce bytes from the output objects and then store those bytes in
1891 1897 the cache. When it comes time to re-emit those bytes, they are wrapped
1892 1898 in a ``wireprototypes.encodedresponse`` instance to tell the output
1893 1899 layer that they are pre-encoded.
1894 1900
1895 1901 When receiving the objects emitted by the command function, instances
1896 1902 can choose what to do with those objects. The simplest thing to do is
1897 1903 re-emit the original objects. They will be forwarded to the output
1898 1904 layer and will be processed as if the cacher did not exist.
1899 1905
1900 1906 Implementations could also choose to not emit objects - instead locally
1901 1907 buffering objects or their encoded representation. They could then emit
1902 1908 a single "coalesced" object when ``onfinished()`` is called. In
1903 1909 this way, the implementation would function as a filtering layer of
1904 1910 sorts.
1905 1911
1906 1912 When caching objects, typically the encoded form of the object will
1907 1913 be stored. Keep in mind that if the original object is forwarded to
1908 1914 the output layer, it will need to be encoded there as well. For large
1909 1915 output, this redundant encoding could add overhead. Implementations
1910 1916 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1911 1917 instances to avoid this overhead.
1912 1918 """
1913 1919
1914 1920 def __enter__():
1915 1921 """Marks the instance as active.
1916 1922
1917 1923 Should return self.
1918 1924 """
1919 1925
1920 1926 def __exit__(exctype, excvalue, exctb):
1921 1927 """Called when cacher is no longer used.
1922 1928
1923 1929 This can be used by implementations to perform cleanup actions (e.g.
1924 1930 disconnecting network sockets, aborting a partially cached response.
1925 1931 """
1926 1932
1927 1933 def adjustcachekeystate(state):
1928 1934 """Influences cache key derivation by adjusting state to derive key.
1929 1935
1930 1936 A dict defining the state used to derive the cache key is passed.
1931 1937
1932 1938 Implementations can modify this dict to record additional state that
1933 1939 is wanted to influence key derivation.
1934 1940
1935 1941 Implementations are *highly* encouraged to not modify or delete
1936 1942 existing keys.
1937 1943 """
1938 1944
1939 1945 def setcachekey(key):
1940 1946 """Record the derived cache key for this request.
1941 1947
1942 1948 Instances may mutate the key for internal usage, as desired. e.g.
1943 1949 instances may wish to prepend the repo name, introduce path
1944 1950 components for filesystem or URL addressing, etc. Behavior is up to
1945 1951 the cache.
1946 1952
1947 1953 Returns a bool indicating if the request is cacheable by this
1948 1954 instance.
1949 1955 """
1950 1956
1951 1957 def lookup():
1952 1958 """Attempt to resolve an entry in the cache.
1953 1959
1954 1960 The instance is instructed to look for the cache key that it was
1955 1961 informed about via the call to ``setcachekey()``.
1956 1962
1957 1963 If there's no cache hit or the cacher doesn't wish to use the cached
1958 1964 entry, ``None`` should be returned.
1959 1965
1960 1966 Else, a dict defining the cached result should be returned. The
1961 1967 dict may have the following keys:
1962 1968
1963 1969 objs
1964 1970 An iterable of objects that should be sent to the client. That
1965 1971 iterable of objects is expected to be what the command function
1966 1972 would return if invoked or an equivalent representation thereof.
1967 1973 """
1968 1974
1969 1975 def onobject(obj):
1970 1976 """Called when a new object is emitted from the command function.
1971 1977
1972 1978 Receives as its argument the object that was emitted from the
1973 1979 command function.
1974 1980
1975 1981 This method returns an iterator of objects to forward to the output
1976 1982 layer. The easiest implementation is a generator that just
1977 1983 ``yield obj``.
1978 1984 """
1979 1985
1980 1986 def onfinished():
1981 1987 """Called after all objects have been emitted from the command function.
1982 1988
1983 1989 Implementations should return an iterator of objects to forward to
1984 1990 the output layer.
1985 1991
1986 1992 This method can be a generator.
1987 1993 """
@@ -1,2349 +1,2351 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 flags = n[-1:]
62 62 if flags in _manifestflags:
63 63 n = n[:-1]
64 64 nl -= 1
65 65 else:
66 66 flags = b''
67 67 if nl not in (40, 64):
68 68 raise ValueError(b'Invalid manifest line')
69 69
70 70 yield f, bin(n), flags
71 71
72 72
73 73 def _text(it):
74 74 files = []
75 75 lines = []
76 76 for f, n, fl in it:
77 77 files.append(f)
78 78 # if this is changed to support newlines in filenames,
79 79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81 81
82 82 _checkforbidden(files)
83 83 return b''.join(lines)
84 84
85 85
86 86 class lazymanifestiter(object):
87 87 def __init__(self, lm):
88 88 self.pos = 0
89 89 self.lm = lm
90 90
91 91 def __iter__(self):
92 92 return self
93 93
94 94 def next(self):
95 95 try:
96 96 data, pos = self.lm._get(self.pos)
97 97 except IndexError:
98 98 raise StopIteration
99 99 if pos == -1:
100 100 self.pos += 1
101 101 return data[0]
102 102 self.pos += 1
103 103 zeropos = data.find(b'\x00', pos)
104 104 return data[pos:zeropos]
105 105
106 106 __next__ = next
107 107
108 108
109 109 class lazymanifestiterentries(object):
110 110 def __init__(self, lm):
111 111 self.lm = lm
112 112 self.pos = 0
113 113
114 114 def __iter__(self):
115 115 return self
116 116
117 117 def next(self):
118 118 try:
119 119 data, pos = self.lm._get(self.pos)
120 120 except IndexError:
121 121 raise StopIteration
122 122 if pos == -1:
123 123 self.pos += 1
124 124 return data
125 125 zeropos = data.find(b'\x00', pos)
126 126 nlpos = data.find(b'\n', pos)
127 127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 128 raise error.StorageError(b'Invalid manifest line')
129 129 flags = data[nlpos - 1 : nlpos]
130 130 if flags in _manifestflags:
131 131 hlen = nlpos - zeropos - 2
132 132 else:
133 133 hlen = nlpos - zeropos - 1
134 134 flags = b''
135 135 if hlen not in (40, 64):
136 136 raise error.StorageError(b'Invalid manifest line')
137 137 hashval = unhexlify(
138 138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 139 )
140 140 self.pos += 1
141 141 return (data[pos:zeropos], hashval, flags)
142 142
143 143 __next__ = next
144 144
145 145
146 146 def unhexlify(data, extra, pos, length):
147 147 s = bin(data[pos : pos + length])
148 148 if extra:
149 149 s += chr(extra & 0xFF)
150 150 return s
151 151
152 152
153 153 def _cmp(a, b):
154 154 return (a > b) - (a < b)
155 155
156 156
157 157 _manifestflags = {b'', b'l', b't', b'x'}
158 158
159 159
160 160 class _lazymanifest(object):
161 161 """A pure python manifest backed by a byte string. It is supplimented with
162 162 internal lists as it is modified, until it is compacted back to a pure byte
163 163 string.
164 164
165 165 ``data`` is the initial manifest data.
166 166
167 167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 168 values are offsets into ``data``, negative values are offsets into the
169 169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 170 ``positions``. The values are encoded such that when walking the list and
171 171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 172 sorted by filename.
173 173
174 174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 175 modified since the manifest was created or compacted.
176 176 """
177 177
178 178 def __init__(
179 179 self,
180 180 data,
181 181 positions=None,
182 182 extrainfo=None,
183 183 extradata=None,
184 184 hasremovals=False,
185 185 ):
186 186 if positions is None:
187 187 self.positions = self.findlines(data)
188 188 self.extrainfo = [0] * len(self.positions)
189 189 self.data = data
190 190 self.extradata = []
191 191 self.hasremovals = False
192 192 else:
193 193 self.positions = positions[:]
194 194 self.extrainfo = extrainfo[:]
195 195 self.extradata = extradata[:]
196 196 self.data = data
197 197 self.hasremovals = hasremovals
198 198
199 199 def findlines(self, data):
200 200 if not data:
201 201 return []
202 202 pos = data.find(b"\n")
203 203 if pos == -1 or data[-1:] != b'\n':
204 204 raise ValueError(b"Manifest did not end in a newline.")
205 205 positions = [0]
206 206 prev = data[: data.find(b'\x00')]
207 207 while pos < len(data) - 1 and pos != -1:
208 208 positions.append(pos + 1)
209 209 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
210 210 if nexts < prev:
211 211 raise ValueError(b"Manifest lines not in sorted order.")
212 212 prev = nexts
213 213 pos = data.find(b"\n", pos + 1)
214 214 return positions
215 215
216 216 def _get(self, index):
217 217 # get the position encoded in pos:
218 218 # positive number is an index in 'data'
219 219 # negative number is in extrapieces
220 220 pos = self.positions[index]
221 221 if pos >= 0:
222 222 return self.data, pos
223 223 return self.extradata[-pos - 1], -1
224 224
225 225 def _getkey(self, pos):
226 226 if pos >= 0:
227 227 return self.data[pos : self.data.find(b'\x00', pos + 1)]
228 228 return self.extradata[-pos - 1][0]
229 229
230 230 def bsearch(self, key):
231 231 first = 0
232 232 last = len(self.positions) - 1
233 233
234 234 while first <= last:
235 235 midpoint = (first + last) // 2
236 236 nextpos = self.positions[midpoint]
237 237 candidate = self._getkey(nextpos)
238 238 r = _cmp(key, candidate)
239 239 if r == 0:
240 240 return midpoint
241 241 else:
242 242 if r < 0:
243 243 last = midpoint - 1
244 244 else:
245 245 first = midpoint + 1
246 246 return -1
247 247
248 248 def bsearch2(self, key):
249 249 # same as the above, but will always return the position
250 250 # done for performance reasons
251 251 first = 0
252 252 last = len(self.positions) - 1
253 253
254 254 while first <= last:
255 255 midpoint = (first + last) // 2
256 256 nextpos = self.positions[midpoint]
257 257 candidate = self._getkey(nextpos)
258 258 r = _cmp(key, candidate)
259 259 if r == 0:
260 260 return (midpoint, True)
261 261 else:
262 262 if r < 0:
263 263 last = midpoint - 1
264 264 else:
265 265 first = midpoint + 1
266 266 return (first, False)
267 267
268 268 def __contains__(self, key):
269 269 return self.bsearch(key) != -1
270 270
271 271 def __getitem__(self, key):
272 272 if not isinstance(key, bytes):
273 273 raise TypeError(b"getitem: manifest keys must be a bytes.")
274 274 needle = self.bsearch(key)
275 275 if needle == -1:
276 276 raise KeyError
277 277 data, pos = self._get(needle)
278 278 if pos == -1:
279 279 return (data[1], data[2])
280 280 zeropos = data.find(b'\x00', pos)
281 281 nlpos = data.find(b'\n', zeropos)
282 282 assert 0 <= needle <= len(self.positions)
283 283 assert len(self.extrainfo) == len(self.positions)
284 284 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
285 285 raise error.StorageError(b'Invalid manifest line')
286 286 hlen = nlpos - zeropos - 1
287 287 flags = data[nlpos - 1 : nlpos]
288 288 if flags in _manifestflags:
289 289 hlen -= 1
290 290 else:
291 291 flags = b''
292 292 if hlen not in (40, 64):
293 293 raise error.StorageError(b'Invalid manifest line')
294 294 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
295 295 return (hashval, flags)
296 296
297 297 def __delitem__(self, key):
298 298 needle, found = self.bsearch2(key)
299 299 if not found:
300 300 raise KeyError
301 301 cur = self.positions[needle]
302 302 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
303 303 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
304 304 if cur >= 0:
305 305 # This does NOT unsort the list as far as the search functions are
306 306 # concerned, as they only examine lines mapped by self.positions.
307 307 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
308 308 self.hasremovals = True
309 309
310 310 def __setitem__(self, key, value):
311 311 if not isinstance(key, bytes):
312 312 raise TypeError(b"setitem: manifest keys must be a byte string.")
313 313 if not isinstance(value, tuple) or len(value) != 2:
314 314 raise TypeError(
315 315 b"Manifest values must be a tuple of (node, flags)."
316 316 )
317 317 hashval = value[0]
318 318 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
319 319 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
320 320 flags = value[1]
321 321 if not isinstance(flags, bytes) or len(flags) > 1:
322 322 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
323 323 needle, found = self.bsearch2(key)
324 324 if found:
325 325 # put the item
326 326 pos = self.positions[needle]
327 327 if pos < 0:
328 328 self.extradata[-pos - 1] = (key, hashval, value[1])
329 329 else:
330 330 # just don't bother
331 331 self.extradata.append((key, hashval, value[1]))
332 332 self.positions[needle] = -len(self.extradata)
333 333 else:
334 334 # not found, put it in with extra positions
335 335 self.extradata.append((key, hashval, value[1]))
336 336 self.positions = (
337 337 self.positions[:needle]
338 338 + [-len(self.extradata)]
339 339 + self.positions[needle:]
340 340 )
341 341 self.extrainfo = (
342 342 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
343 343 )
344 344
345 345 def copy(self):
346 346 # XXX call _compact like in C?
347 347 return _lazymanifest(
348 348 self.data,
349 349 self.positions,
350 350 self.extrainfo,
351 351 self.extradata,
352 352 self.hasremovals,
353 353 )
354 354
355 355 def _compact(self):
356 356 # hopefully not called TOO often
357 357 if len(self.extradata) == 0 and not self.hasremovals:
358 358 return
359 359 l = []
360 360 i = 0
361 361 offset = 0
362 362 self.extrainfo = [0] * len(self.positions)
363 363 while i < len(self.positions):
364 364 if self.positions[i] >= 0:
365 365 cur = self.positions[i]
366 366 last_cut = cur
367 367
368 368 # Collect all contiguous entries in the buffer at the current
369 369 # offset, breaking out only for added/modified items held in
370 370 # extradata, or a deleted line prior to the next position.
371 371 while True:
372 372 self.positions[i] = offset
373 373 i += 1
374 374 if i == len(self.positions) or self.positions[i] < 0:
375 375 break
376 376
377 377 # A removed file has no positions[] entry, but does have an
378 378 # overwritten first byte. Break out and find the end of the
379 379 # current good entry/entries if there is a removed file
380 380 # before the next position.
381 381 if (
382 382 self.hasremovals
383 383 and self.data.find(b'\n\x00', cur, self.positions[i])
384 384 != -1
385 385 ):
386 386 break
387 387
388 388 offset += self.positions[i] - cur
389 389 cur = self.positions[i]
390 390 end_cut = self.data.find(b'\n', cur)
391 391 if end_cut != -1:
392 392 end_cut += 1
393 393 offset += end_cut - cur
394 394 l.append(self.data[last_cut:end_cut])
395 395 else:
396 396 while i < len(self.positions) and self.positions[i] < 0:
397 397 cur = self.positions[i]
398 398 t = self.extradata[-cur - 1]
399 399 l.append(self._pack(t))
400 400 self.positions[i] = offset
401 401 # Hashes are either 20 bytes (old sha1s) or 32
402 402 # bytes (new non-sha1).
403 403 hlen = 20
404 404 if len(t[1]) > 25:
405 405 hlen = 32
406 406 if len(t[1]) > hlen:
407 407 self.extrainfo[i] = ord(t[1][hlen + 1])
408 408 offset += len(l[-1])
409 409 i += 1
410 410 self.data = b''.join(l)
411 411 self.hasremovals = False
412 412 self.extradata = []
413 413
414 414 def _pack(self, d):
415 415 n = d[1]
416 416 assert len(n) in (20, 32)
417 417 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
418 418
419 419 def text(self):
420 420 self._compact()
421 421 return self.data
422 422
423 423 def diff(self, m2, clean=False):
424 424 '''Finds changes between the current manifest and m2.'''
425 425 # XXX think whether efficiency matters here
426 426 diff = {}
427 427
428 428 for fn, e1, flags in self.iterentries():
429 429 if fn not in m2:
430 430 diff[fn] = (e1, flags), (None, b'')
431 431 else:
432 432 e2 = m2[fn]
433 433 if (e1, flags) != e2:
434 434 diff[fn] = (e1, flags), e2
435 435 elif clean:
436 436 diff[fn] = None
437 437
438 438 for fn, e2, flags in m2.iterentries():
439 439 if fn not in self:
440 440 diff[fn] = (None, b''), (e2, flags)
441 441
442 442 return diff
443 443
444 444 def iterentries(self):
445 445 return lazymanifestiterentries(self)
446 446
447 447 def iterkeys(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __iter__(self):
451 451 return lazymanifestiter(self)
452 452
453 453 def __len__(self):
454 454 return len(self.positions)
455 455
456 456 def filtercopy(self, filterfn):
457 457 # XXX should be optimized
458 458 c = _lazymanifest(b'')
459 459 for f, n, fl in self.iterentries():
460 460 if filterfn(f):
461 461 c[f] = n, fl
462 462 return c
463 463
464 464
465 465 try:
466 466 _lazymanifest = parsers.lazymanifest
467 467 except AttributeError:
468 468 pass
469 469
470 470
471 471 @interfaceutil.implementer(repository.imanifestdict)
472 472 class manifestdict(object):
473 473 def __init__(self, data=b''):
474 474 self._lm = _lazymanifest(data)
475 475
476 476 def __getitem__(self, key):
477 477 return self._lm[key][0]
478 478
479 479 def find(self, key):
480 480 return self._lm[key]
481 481
482 482 def __len__(self):
483 483 return len(self._lm)
484 484
485 485 def __nonzero__(self):
486 486 # nonzero is covered by the __len__ function, but implementing it here
487 487 # makes it easier for extensions to override.
488 488 return len(self._lm) != 0
489 489
490 490 __bool__ = __nonzero__
491 491
492 492 def __setitem__(self, key, node):
493 493 self._lm[key] = node, self.flags(key)
494 494
495 495 def __contains__(self, key):
496 496 if key is None:
497 497 return False
498 498 return key in self._lm
499 499
500 500 def __delitem__(self, key):
501 501 del self._lm[key]
502 502
503 503 def __iter__(self):
504 504 return self._lm.__iter__()
505 505
506 506 def iterkeys(self):
507 507 return self._lm.iterkeys()
508 508
509 509 def keys(self):
510 510 return list(self.iterkeys())
511 511
512 512 def filesnotin(self, m2, match=None):
513 513 '''Set of files in this manifest that are not in the other'''
514 514 if match is not None:
515 515 match = matchmod.badmatch(match, lambda path, msg: None)
516 516 sm2 = set(m2.walk(match))
517 517 return {f for f in self.walk(match) if f not in sm2}
518 518 return {f for f in self if f not in m2}
519 519
520 520 @propertycache
521 521 def _dirs(self):
522 522 return pathutil.dirs(self)
523 523
524 524 def dirs(self):
525 525 return self._dirs
526 526
527 527 def hasdir(self, dir):
528 528 return dir in self._dirs
529 529
530 530 def _filesfastpath(self, match):
531 531 """Checks whether we can correctly and quickly iterate over matcher
532 532 files instead of over manifest files."""
533 533 files = match.files()
534 534 return len(files) < 100 and (
535 535 match.isexact()
536 536 or (match.prefix() and all(fn in self for fn in files))
537 537 )
538 538
539 539 def walk(self, match):
540 540 """Generates matching file names.
541 541
542 542 Equivalent to manifest.matches(match).iterkeys(), but without creating
543 543 an entirely new manifest.
544 544
545 545 It also reports nonexistent files by marking them bad with match.bad().
546 546 """
547 547 if match.always():
548 548 for f in iter(self):
549 549 yield f
550 550 return
551 551
552 552 fset = set(match.files())
553 553
554 554 # avoid the entire walk if we're only looking for specific files
555 555 if self._filesfastpath(match):
556 556 for fn in sorted(fset):
557 557 if fn in self:
558 558 yield fn
559 559 return
560 560
561 561 for fn in self:
562 562 if fn in fset:
563 563 # specified pattern is the exact name
564 564 fset.remove(fn)
565 565 if match(fn):
566 566 yield fn
567 567
568 568 # for dirstate.walk, files=[''] means "walk the whole tree".
569 569 # follow that here, too
570 570 fset.discard(b'')
571 571
572 572 for fn in sorted(fset):
573 573 if not self.hasdir(fn):
574 574 match.bad(fn, None)
575 575
576 576 def _matches(self, match):
577 577 '''generate a new manifest filtered by the match argument'''
578 578 if match.always():
579 579 return self.copy()
580 580
581 581 if self._filesfastpath(match):
582 582 m = manifestdict()
583 583 lm = self._lm
584 584 for fn in match.files():
585 585 if fn in lm:
586 586 m._lm[fn] = lm[fn]
587 587 return m
588 588
589 589 m = manifestdict()
590 590 m._lm = self._lm.filtercopy(match)
591 591 return m
592 592
593 593 def diff(self, m2, match=None, clean=False):
594 594 """Finds changes between the current manifest and m2.
595 595
596 596 Args:
597 597 m2: the manifest to which this manifest should be compared.
598 598 clean: if true, include files unchanged between these manifests
599 599 with a None value in the returned dictionary.
600 600
601 601 The result is returned as a dict with filename as key and
602 602 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
603 603 nodeid in the current/other manifest and fl1/fl2 is the flag
604 604 in the current/other manifest. Where the file does not exist,
605 605 the nodeid will be None and the flags will be the empty
606 606 string.
607 607 """
608 608 if match:
609 609 m1 = self._matches(match)
610 610 m2 = m2._matches(match)
611 611 return m1.diff(m2, clean=clean)
612 612 return self._lm.diff(m2._lm, clean)
613 613
614 614 def setflag(self, key, flag):
615 615 if flag not in _manifestflags:
616 616 raise TypeError(b"Invalid manifest flag set.")
617 617 self._lm[key] = self[key], flag
618 618
619 619 def get(self, key, default=None):
620 620 try:
621 621 return self._lm[key][0]
622 622 except KeyError:
623 623 return default
624 624
625 625 def flags(self, key):
626 626 try:
627 627 return self._lm[key][1]
628 628 except KeyError:
629 629 return b''
630 630
631 631 def copy(self):
632 632 c = manifestdict()
633 633 c._lm = self._lm.copy()
634 634 return c
635 635
636 636 def items(self):
637 637 return (x[:2] for x in self._lm.iterentries())
638 638
639 639 def iteritems(self):
640 640 return (x[:2] for x in self._lm.iterentries())
641 641
642 642 def iterentries(self):
643 643 return self._lm.iterentries()
644 644
645 645 def text(self):
646 646 # most likely uses native version
647 647 return self._lm.text()
648 648
649 649 def fastdelta(self, base, changes):
650 650 """Given a base manifest text as a bytearray and a list of changes
651 651 relative to that text, compute a delta that can be used by revlog.
652 652 """
653 653 delta = []
654 654 dstart = None
655 655 dend = None
656 656 dline = [b""]
657 657 start = 0
658 658 # zero copy representation of base as a buffer
659 659 addbuf = util.buffer(base)
660 660
661 661 changes = list(changes)
662 662 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
663 663 # start with a readonly loop that finds the offset of
664 664 # each line and creates the deltas
665 665 for f, todelete in changes:
666 666 # bs will either be the index of the item or the insert point
667 667 start, end = _msearch(addbuf, f, start)
668 668 if not todelete:
669 669 h, fl = self._lm[f]
670 670 l = b"%s\0%s%s\n" % (f, hex(h), fl)
671 671 else:
672 672 if start == end:
673 673 # item we want to delete was not found, error out
674 674 raise AssertionError(
675 675 _(b"failed to remove %s from manifest") % f
676 676 )
677 677 l = b""
678 678 if dstart is not None and dstart <= start and dend >= start:
679 679 if dend < end:
680 680 dend = end
681 681 if l:
682 682 dline.append(l)
683 683 else:
684 684 if dstart is not None:
685 685 delta.append([dstart, dend, b"".join(dline)])
686 686 dstart = start
687 687 dend = end
688 688 dline = [l]
689 689
690 690 if dstart is not None:
691 691 delta.append([dstart, dend, b"".join(dline)])
692 692 # apply the delta to the base, and get a delta for addrevision
693 693 deltatext, arraytext = _addlistdelta(base, delta)
694 694 else:
695 695 # For large changes, it's much cheaper to just build the text and
696 696 # diff it.
697 697 arraytext = bytearray(self.text())
698 698 deltatext = mdiff.textdiff(
699 699 util.buffer(base), util.buffer(arraytext)
700 700 )
701 701
702 702 return arraytext, deltatext
703 703
704 704
705 705 def _msearch(m, s, lo=0, hi=None):
706 706 """return a tuple (start, end) that says where to find s within m.
707 707
708 708 If the string is found m[start:end] are the line containing
709 709 that string. If start == end the string was not found and
710 710 they indicate the proper sorted insertion point.
711 711
712 712 m should be a buffer, a memoryview or a byte string.
713 713 s is a byte string"""
714 714
715 715 def advance(i, c):
716 716 while i < lenm and m[i : i + 1] != c:
717 717 i += 1
718 718 return i
719 719
720 720 if not s:
721 721 return (lo, lo)
722 722 lenm = len(m)
723 723 if not hi:
724 724 hi = lenm
725 725 while lo < hi:
726 726 mid = (lo + hi) // 2
727 727 start = mid
728 728 while start > 0 and m[start - 1 : start] != b'\n':
729 729 start -= 1
730 730 end = advance(start, b'\0')
731 731 if bytes(m[start:end]) < s:
732 732 # we know that after the null there are 40 bytes of sha1
733 733 # this translates to the bisect lo = mid + 1
734 734 lo = advance(end + 40, b'\n') + 1
735 735 else:
736 736 # this translates to the bisect hi = mid
737 737 hi = start
738 738 end = advance(lo, b'\0')
739 739 found = m[lo:end]
740 740 if s == found:
741 741 # we know that after the null there are 40 bytes of sha1
742 742 end = advance(end + 40, b'\n')
743 743 return (lo, end + 1)
744 744 else:
745 745 return (lo, lo)
746 746
747 747
748 748 def _checkforbidden(l):
749 749 """Check filenames for illegal characters."""
750 750 for f in l:
751 751 if b'\n' in f or b'\r' in f:
752 752 raise error.StorageError(
753 753 _(b"'\\n' and '\\r' disallowed in filenames: %r")
754 754 % pycompat.bytestr(f)
755 755 )
756 756
757 757
758 758 # apply the changes collected during the bisect loop to our addlist
759 759 # return a delta suitable for addrevision
760 760 def _addlistdelta(addlist, x):
761 761 # for large addlist arrays, building a new array is cheaper
762 762 # than repeatedly modifying the existing one
763 763 currentposition = 0
764 764 newaddlist = bytearray()
765 765
766 766 for start, end, content in x:
767 767 newaddlist += addlist[currentposition:start]
768 768 if content:
769 769 newaddlist += bytearray(content)
770 770
771 771 currentposition = end
772 772
773 773 newaddlist += addlist[currentposition:]
774 774
775 775 deltatext = b"".join(
776 776 struct.pack(b">lll", start, end, len(content)) + content
777 777 for start, end, content in x
778 778 )
779 779 return deltatext, newaddlist
780 780
781 781
782 782 def _splittopdir(f):
783 783 if b'/' in f:
784 784 dir, subpath = f.split(b'/', 1)
785 785 return dir + b'/', subpath
786 786 else:
787 787 return b'', f
788 788
789 789
790 790 _noop = lambda s: None
791 791
792 792
793 793 @interfaceutil.implementer(repository.imanifestdict)
794 794 class treemanifest(object):
795 795 def __init__(self, dir=b'', text=b''):
796 796 self._dir = dir
797 797 self._node = nullid
798 798 self._loadfunc = _noop
799 799 self._copyfunc = _noop
800 800 self._dirty = False
801 801 self._dirs = {}
802 802 self._lazydirs = {}
803 803 # Using _lazymanifest here is a little slower than plain old dicts
804 804 self._files = {}
805 805 self._flags = {}
806 806 if text:
807 807
808 808 def readsubtree(subdir, subm):
809 809 raise AssertionError(
810 810 b'treemanifest constructor only accepts flat manifests'
811 811 )
812 812
813 813 self.parse(text, readsubtree)
814 814 self._dirty = True # Mark flat manifest dirty after parsing
815 815
816 816 def _subpath(self, path):
817 817 return self._dir + path
818 818
819 819 def _loadalllazy(self):
820 820 selfdirs = self._dirs
821 821 subpath = self._subpath
822 822 for d, (node, readsubtree, docopy) in pycompat.iteritems(
823 823 self._lazydirs
824 824 ):
825 825 if docopy:
826 826 selfdirs[d] = readsubtree(subpath(d), node).copy()
827 827 else:
828 828 selfdirs[d] = readsubtree(subpath(d), node)
829 829 self._lazydirs = {}
830 830
831 831 def _loadlazy(self, d):
832 832 v = self._lazydirs.get(d)
833 833 if v:
834 834 node, readsubtree, docopy = v
835 835 if docopy:
836 836 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
837 837 else:
838 838 self._dirs[d] = readsubtree(self._subpath(d), node)
839 839 del self._lazydirs[d]
840 840
841 841 def _loadchildrensetlazy(self, visit):
842 842 if not visit:
843 843 return None
844 844 if visit == b'all' or visit == b'this':
845 845 self._loadalllazy()
846 846 return None
847 847
848 848 loadlazy = self._loadlazy
849 849 for k in visit:
850 850 loadlazy(k + b'/')
851 851 return visit
852 852
853 853 def _loaddifflazy(self, t1, t2):
854 854 """load items in t1 and t2 if they're needed for diffing.
855 855
856 856 The criteria currently is:
857 857 - if it's not present in _lazydirs in either t1 or t2, load it in the
858 858 other (it may already be loaded or it may not exist, doesn't matter)
859 859 - if it's present in _lazydirs in both, compare the nodeid; if it
860 860 differs, load it in both
861 861 """
862 862 toloadlazy = []
863 863 for d, v1 in pycompat.iteritems(t1._lazydirs):
864 864 v2 = t2._lazydirs.get(d)
865 865 if not v2 or v2[0] != v1[0]:
866 866 toloadlazy.append(d)
867 867 for d, v1 in pycompat.iteritems(t2._lazydirs):
868 868 if d not in t1._lazydirs:
869 869 toloadlazy.append(d)
870 870
871 871 for d in toloadlazy:
872 872 t1._loadlazy(d)
873 873 t2._loadlazy(d)
874 874
875 875 def __len__(self):
876 876 self._load()
877 877 size = len(self._files)
878 878 self._loadalllazy()
879 879 for m in self._dirs.values():
880 880 size += m.__len__()
881 881 return size
882 882
883 883 def __nonzero__(self):
884 884 # Faster than "__len() != 0" since it avoids loading sub-manifests
885 885 return not self._isempty()
886 886
887 887 __bool__ = __nonzero__
888 888
889 889 def _isempty(self):
890 890 self._load() # for consistency; already loaded by all callers
891 891 # See if we can skip loading everything.
892 892 if self._files or (
893 893 self._dirs and any(not m._isempty() for m in self._dirs.values())
894 894 ):
895 895 return False
896 896 self._loadalllazy()
897 897 return not self._dirs or all(m._isempty() for m in self._dirs.values())
898 898
899 899 @encoding.strmethod
900 900 def __repr__(self):
901 901 return (
902 902 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
903 903 % (
904 904 self._dir,
905 905 hex(self._node),
906 906 bool(self._loadfunc is _noop),
907 907 self._dirty,
908 908 id(self),
909 909 )
910 910 )
911 911
912 912 def dir(self):
913 913 """The directory that this tree manifest represents, including a
914 914 trailing '/'. Empty string for the repo root directory."""
915 915 return self._dir
916 916
917 917 def node(self):
918 918 """This node of this instance. nullid for unsaved instances. Should
919 919 be updated when the instance is read or written from a revlog.
920 920 """
921 921 assert not self._dirty
922 922 return self._node
923 923
924 924 def setnode(self, node):
925 925 self._node = node
926 926 self._dirty = False
927 927
928 928 def iterentries(self):
929 929 self._load()
930 930 self._loadalllazy()
931 931 for p, n in sorted(
932 932 itertools.chain(self._dirs.items(), self._files.items())
933 933 ):
934 934 if p in self._files:
935 935 yield self._subpath(p), n, self._flags.get(p, b'')
936 936 else:
937 937 for x in n.iterentries():
938 938 yield x
939 939
940 940 def items(self):
941 941 self._load()
942 942 self._loadalllazy()
943 943 for p, n in sorted(
944 944 itertools.chain(self._dirs.items(), self._files.items())
945 945 ):
946 946 if p in self._files:
947 947 yield self._subpath(p), n
948 948 else:
949 949 for f, sn in pycompat.iteritems(n):
950 950 yield f, sn
951 951
952 952 iteritems = items
953 953
954 954 def iterkeys(self):
955 955 self._load()
956 956 self._loadalllazy()
957 957 for p in sorted(itertools.chain(self._dirs, self._files)):
958 958 if p in self._files:
959 959 yield self._subpath(p)
960 960 else:
961 961 for f in self._dirs[p]:
962 962 yield f
963 963
964 964 def keys(self):
965 965 return list(self.iterkeys())
966 966
967 967 def __iter__(self):
968 968 return self.iterkeys()
969 969
970 970 def __contains__(self, f):
971 971 if f is None:
972 972 return False
973 973 self._load()
974 974 dir, subpath = _splittopdir(f)
975 975 if dir:
976 976 self._loadlazy(dir)
977 977
978 978 if dir not in self._dirs:
979 979 return False
980 980
981 981 return self._dirs[dir].__contains__(subpath)
982 982 else:
983 983 return f in self._files
984 984
985 985 def get(self, f, default=None):
986 986 self._load()
987 987 dir, subpath = _splittopdir(f)
988 988 if dir:
989 989 self._loadlazy(dir)
990 990
991 991 if dir not in self._dirs:
992 992 return default
993 993 return self._dirs[dir].get(subpath, default)
994 994 else:
995 995 return self._files.get(f, default)
996 996
997 997 def __getitem__(self, f):
998 998 self._load()
999 999 dir, subpath = _splittopdir(f)
1000 1000 if dir:
1001 1001 self._loadlazy(dir)
1002 1002
1003 1003 return self._dirs[dir].__getitem__(subpath)
1004 1004 else:
1005 1005 return self._files[f]
1006 1006
1007 1007 def flags(self, f):
1008 1008 self._load()
1009 1009 dir, subpath = _splittopdir(f)
1010 1010 if dir:
1011 1011 self._loadlazy(dir)
1012 1012
1013 1013 if dir not in self._dirs:
1014 1014 return b''
1015 1015 return self._dirs[dir].flags(subpath)
1016 1016 else:
1017 1017 if f in self._lazydirs or f in self._dirs:
1018 1018 return b''
1019 1019 return self._flags.get(f, b'')
1020 1020
1021 1021 def find(self, f):
1022 1022 self._load()
1023 1023 dir, subpath = _splittopdir(f)
1024 1024 if dir:
1025 1025 self._loadlazy(dir)
1026 1026
1027 1027 return self._dirs[dir].find(subpath)
1028 1028 else:
1029 1029 return self._files[f], self._flags.get(f, b'')
1030 1030
1031 1031 def __delitem__(self, f):
1032 1032 self._load()
1033 1033 dir, subpath = _splittopdir(f)
1034 1034 if dir:
1035 1035 self._loadlazy(dir)
1036 1036
1037 1037 self._dirs[dir].__delitem__(subpath)
1038 1038 # If the directory is now empty, remove it
1039 1039 if self._dirs[dir]._isempty():
1040 1040 del self._dirs[dir]
1041 1041 else:
1042 1042 del self._files[f]
1043 1043 if f in self._flags:
1044 1044 del self._flags[f]
1045 1045 self._dirty = True
1046 1046
1047 1047 def __setitem__(self, f, n):
1048 1048 assert n is not None
1049 1049 self._load()
1050 1050 dir, subpath = _splittopdir(f)
1051 1051 if dir:
1052 1052 self._loadlazy(dir)
1053 1053 if dir not in self._dirs:
1054 1054 self._dirs[dir] = treemanifest(self._subpath(dir))
1055 1055 self._dirs[dir].__setitem__(subpath, n)
1056 1056 else:
1057 1057 # manifest nodes are either 20 bytes or 32 bytes,
1058 1058 # depending on the hash in use. Assert this as historically
1059 1059 # sometimes extra bytes were added.
1060 1060 assert len(n) in (20, 32)
1061 1061 self._files[f] = n
1062 1062 self._dirty = True
1063 1063
1064 1064 def _load(self):
1065 1065 if self._loadfunc is not _noop:
1066 1066 lf, self._loadfunc = self._loadfunc, _noop
1067 1067 lf(self)
1068 1068 elif self._copyfunc is not _noop:
1069 1069 cf, self._copyfunc = self._copyfunc, _noop
1070 1070 cf(self)
1071 1071
1072 1072 def setflag(self, f, flags):
1073 1073 """Set the flags (symlink, executable) for path f."""
1074 1074 if flags not in _manifestflags:
1075 1075 raise TypeError(b"Invalid manifest flag set.")
1076 1076 self._load()
1077 1077 dir, subpath = _splittopdir(f)
1078 1078 if dir:
1079 1079 self._loadlazy(dir)
1080 1080 if dir not in self._dirs:
1081 1081 self._dirs[dir] = treemanifest(self._subpath(dir))
1082 1082 self._dirs[dir].setflag(subpath, flags)
1083 1083 else:
1084 1084 self._flags[f] = flags
1085 1085 self._dirty = True
1086 1086
1087 1087 def copy(self):
1088 1088 copy = treemanifest(self._dir)
1089 1089 copy._node = self._node
1090 1090 copy._dirty = self._dirty
1091 1091 if self._copyfunc is _noop:
1092 1092
1093 1093 def _copyfunc(s):
1094 1094 self._load()
1095 1095 s._lazydirs = {
1096 1096 d: (n, r, True)
1097 1097 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1098 1098 }
1099 1099 sdirs = s._dirs
1100 1100 for d, v in pycompat.iteritems(self._dirs):
1101 1101 sdirs[d] = v.copy()
1102 1102 s._files = dict.copy(self._files)
1103 1103 s._flags = dict.copy(self._flags)
1104 1104
1105 1105 if self._loadfunc is _noop:
1106 1106 _copyfunc(copy)
1107 1107 else:
1108 1108 copy._copyfunc = _copyfunc
1109 1109 else:
1110 1110 copy._copyfunc = self._copyfunc
1111 1111 return copy
1112 1112
1113 1113 def filesnotin(self, m2, match=None):
1114 1114 '''Set of files in this manifest that are not in the other'''
1115 1115 if match and not match.always():
1116 1116 m1 = self._matches(match)
1117 1117 m2 = m2._matches(match)
1118 1118 return m1.filesnotin(m2)
1119 1119
1120 1120 files = set()
1121 1121
1122 1122 def _filesnotin(t1, t2):
1123 1123 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1124 1124 return
1125 1125 t1._load()
1126 1126 t2._load()
1127 1127 self._loaddifflazy(t1, t2)
1128 1128 for d, m1 in pycompat.iteritems(t1._dirs):
1129 1129 if d in t2._dirs:
1130 1130 m2 = t2._dirs[d]
1131 1131 _filesnotin(m1, m2)
1132 1132 else:
1133 1133 files.update(m1.iterkeys())
1134 1134
1135 1135 for fn in t1._files:
1136 1136 if fn not in t2._files:
1137 1137 files.add(t1._subpath(fn))
1138 1138
1139 1139 _filesnotin(self, m2)
1140 1140 return files
1141 1141
1142 1142 @propertycache
1143 1143 def _alldirs(self):
1144 1144 return pathutil.dirs(self)
1145 1145
1146 1146 def dirs(self):
1147 1147 return self._alldirs
1148 1148
1149 1149 def hasdir(self, dir):
1150 1150 self._load()
1151 1151 topdir, subdir = _splittopdir(dir)
1152 1152 if topdir:
1153 1153 self._loadlazy(topdir)
1154 1154 if topdir in self._dirs:
1155 1155 return self._dirs[topdir].hasdir(subdir)
1156 1156 return False
1157 1157 dirslash = dir + b'/'
1158 1158 return dirslash in self._dirs or dirslash in self._lazydirs
1159 1159
1160 1160 def walk(self, match):
1161 1161 """Generates matching file names.
1162 1162
1163 1163 It also reports nonexistent files by marking them bad with match.bad().
1164 1164 """
1165 1165 if match.always():
1166 1166 for f in iter(self):
1167 1167 yield f
1168 1168 return
1169 1169
1170 1170 fset = set(match.files())
1171 1171
1172 1172 for fn in self._walk(match):
1173 1173 if fn in fset:
1174 1174 # specified pattern is the exact name
1175 1175 fset.remove(fn)
1176 1176 yield fn
1177 1177
1178 1178 # for dirstate.walk, files=[''] means "walk the whole tree".
1179 1179 # follow that here, too
1180 1180 fset.discard(b'')
1181 1181
1182 1182 for fn in sorted(fset):
1183 1183 if not self.hasdir(fn):
1184 1184 match.bad(fn, None)
1185 1185
1186 1186 def _walk(self, match):
1187 1187 '''Recursively generates matching file names for walk().'''
1188 1188 visit = match.visitchildrenset(self._dir[:-1])
1189 1189 if not visit:
1190 1190 return
1191 1191
1192 1192 # yield this dir's files and walk its submanifests
1193 1193 self._load()
1194 1194 visit = self._loadchildrensetlazy(visit)
1195 1195 for p in sorted(list(self._dirs) + list(self._files)):
1196 1196 if p in self._files:
1197 1197 fullp = self._subpath(p)
1198 1198 if match(fullp):
1199 1199 yield fullp
1200 1200 else:
1201 1201 if not visit or p[:-1] in visit:
1202 1202 for f in self._dirs[p]._walk(match):
1203 1203 yield f
1204 1204
1205 1205 def _matches(self, match):
1206 1206 """recursively generate a new manifest filtered by the match argument."""
1207 1207 if match.always():
1208 1208 return self.copy()
1209 1209 return self._matches_inner(match)
1210 1210
1211 1211 def _matches_inner(self, match):
1212 1212 if match.always():
1213 1213 return self.copy()
1214 1214
1215 1215 visit = match.visitchildrenset(self._dir[:-1])
1216 1216 if visit == b'all':
1217 1217 return self.copy()
1218 1218 ret = treemanifest(self._dir)
1219 1219 if not visit:
1220 1220 return ret
1221 1221
1222 1222 self._load()
1223 1223 for fn in self._files:
1224 1224 # While visitchildrenset *usually* lists only subdirs, this is
1225 1225 # actually up to the matcher and may have some files in the set().
1226 1226 # If visit == 'this', we should obviously look at the files in this
1227 1227 # directory; if visit is a set, and fn is in it, we should inspect
1228 1228 # fn (but no need to inspect things not in the set).
1229 1229 if visit != b'this' and fn not in visit:
1230 1230 continue
1231 1231 fullp = self._subpath(fn)
1232 1232 # visitchildrenset isn't perfect, we still need to call the regular
1233 1233 # matcher code to further filter results.
1234 1234 if not match(fullp):
1235 1235 continue
1236 1236 ret._files[fn] = self._files[fn]
1237 1237 if fn in self._flags:
1238 1238 ret._flags[fn] = self._flags[fn]
1239 1239
1240 1240 visit = self._loadchildrensetlazy(visit)
1241 1241 for dir, subm in pycompat.iteritems(self._dirs):
1242 1242 if visit and dir[:-1] not in visit:
1243 1243 continue
1244 1244 m = subm._matches_inner(match)
1245 1245 if not m._isempty():
1246 1246 ret._dirs[dir] = m
1247 1247
1248 1248 if not ret._isempty():
1249 1249 ret._dirty = True
1250 1250 return ret
1251 1251
1252 1252 def fastdelta(self, base, changes):
1253 1253 raise FastdeltaUnavailable()
1254 1254
1255 1255 def diff(self, m2, match=None, clean=False):
1256 1256 """Finds changes between the current manifest and m2.
1257 1257
1258 1258 Args:
1259 1259 m2: the manifest to which this manifest should be compared.
1260 1260 clean: if true, include files unchanged between these manifests
1261 1261 with a None value in the returned dictionary.
1262 1262
1263 1263 The result is returned as a dict with filename as key and
1264 1264 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1265 1265 nodeid in the current/other manifest and fl1/fl2 is the flag
1266 1266 in the current/other manifest. Where the file does not exist,
1267 1267 the nodeid will be None and the flags will be the empty
1268 1268 string.
1269 1269 """
1270 1270 if match and not match.always():
1271 1271 m1 = self._matches(match)
1272 1272 m2 = m2._matches(match)
1273 1273 return m1.diff(m2, clean=clean)
1274 1274 result = {}
1275 1275 emptytree = treemanifest()
1276 1276
1277 1277 def _iterativediff(t1, t2, stack):
1278 1278 """compares two tree manifests and append new tree-manifests which
1279 1279 needs to be compared to stack"""
1280 1280 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1281 1281 return
1282 1282 t1._load()
1283 1283 t2._load()
1284 1284 self._loaddifflazy(t1, t2)
1285 1285
1286 1286 for d, m1 in pycompat.iteritems(t1._dirs):
1287 1287 m2 = t2._dirs.get(d, emptytree)
1288 1288 stack.append((m1, m2))
1289 1289
1290 1290 for d, m2 in pycompat.iteritems(t2._dirs):
1291 1291 if d not in t1._dirs:
1292 1292 stack.append((emptytree, m2))
1293 1293
1294 1294 for fn, n1 in pycompat.iteritems(t1._files):
1295 1295 fl1 = t1._flags.get(fn, b'')
1296 1296 n2 = t2._files.get(fn, None)
1297 1297 fl2 = t2._flags.get(fn, b'')
1298 1298 if n1 != n2 or fl1 != fl2:
1299 1299 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1300 1300 elif clean:
1301 1301 result[t1._subpath(fn)] = None
1302 1302
1303 1303 for fn, n2 in pycompat.iteritems(t2._files):
1304 1304 if fn not in t1._files:
1305 1305 fl2 = t2._flags.get(fn, b'')
1306 1306 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1307 1307
1308 1308 stackls = []
1309 1309 _iterativediff(self, m2, stackls)
1310 1310 while stackls:
1311 1311 t1, t2 = stackls.pop()
1312 1312 # stackls is populated in the function call
1313 1313 _iterativediff(t1, t2, stackls)
1314 1314 return result
1315 1315
1316 1316 def unmodifiedsince(self, m2):
1317 1317 return not self._dirty and not m2._dirty and self._node == m2._node
1318 1318
1319 1319 def parse(self, text, readsubtree):
1320 1320 selflazy = self._lazydirs
1321 1321 for f, n, fl in _parse(text):
1322 1322 if fl == b't':
1323 1323 f = f + b'/'
1324 1324 # False below means "doesn't need to be copied" and can use the
1325 1325 # cached value from readsubtree directly.
1326 1326 selflazy[f] = (n, readsubtree, False)
1327 1327 elif b'/' in f:
1328 1328 # This is a flat manifest, so use __setitem__ and setflag rather
1329 1329 # than assigning directly to _files and _flags, so we can
1330 1330 # assign a path in a subdirectory, and to mark dirty (compared
1331 1331 # to nullid).
1332 1332 self[f] = n
1333 1333 if fl:
1334 1334 self.setflag(f, fl)
1335 1335 else:
1336 1336 # Assigning to _files and _flags avoids marking as dirty,
1337 1337 # and should be a little faster.
1338 1338 self._files[f] = n
1339 1339 if fl:
1340 1340 self._flags[f] = fl
1341 1341
1342 1342 def text(self):
1343 1343 """Get the full data of this manifest as a bytestring."""
1344 1344 self._load()
1345 1345 return _text(self.iterentries())
1346 1346
1347 1347 def dirtext(self):
1348 1348 """Get the full data of this directory as a bytestring. Make sure that
1349 1349 any submanifests have been written first, so their nodeids are correct.
1350 1350 """
1351 1351 self._load()
1352 1352 flags = self.flags
1353 1353 lazydirs = [
1354 1354 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1355 1355 ]
1356 1356 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1357 1357 files = [(f, self._files[f], flags(f)) for f in self._files]
1358 1358 return _text(sorted(dirs + files + lazydirs))
1359 1359
1360 1360 def read(self, gettext, readsubtree):
1361 1361 def _load_for_read(s):
1362 1362 s.parse(gettext(), readsubtree)
1363 1363 s._dirty = False
1364 1364
1365 1365 self._loadfunc = _load_for_read
1366 1366
1367 1367 def writesubtrees(self, m1, m2, writesubtree, match):
1368 1368 self._load() # for consistency; should never have any effect here
1369 1369 m1._load()
1370 1370 m2._load()
1371 1371 emptytree = treemanifest()
1372 1372
1373 1373 def getnode(m, d):
1374 1374 ld = m._lazydirs.get(d)
1375 1375 if ld:
1376 1376 return ld[0]
1377 1377 return m._dirs.get(d, emptytree)._node
1378 1378
1379 1379 # let's skip investigating things that `match` says we do not need.
1380 1380 visit = match.visitchildrenset(self._dir[:-1])
1381 1381 visit = self._loadchildrensetlazy(visit)
1382 1382 if visit == b'this' or visit == b'all':
1383 1383 visit = None
1384 1384 for d, subm in pycompat.iteritems(self._dirs):
1385 1385 if visit and d[:-1] not in visit:
1386 1386 continue
1387 1387 subp1 = getnode(m1, d)
1388 1388 subp2 = getnode(m2, d)
1389 1389 if subp1 == nullid:
1390 1390 subp1, subp2 = subp2, subp1
1391 1391 writesubtree(subm, subp1, subp2, match)
1392 1392
1393 1393 def walksubtrees(self, matcher=None):
1394 1394 """Returns an iterator of the subtrees of this manifest, including this
1395 1395 manifest itself.
1396 1396
1397 1397 If `matcher` is provided, it only returns subtrees that match.
1398 1398 """
1399 1399 if matcher and not matcher.visitdir(self._dir[:-1]):
1400 1400 return
1401 1401 if not matcher or matcher(self._dir[:-1]):
1402 1402 yield self
1403 1403
1404 1404 self._load()
1405 1405 # OPT: use visitchildrenset to avoid loading everything.
1406 1406 self._loadalllazy()
1407 1407 for d, subm in pycompat.iteritems(self._dirs):
1408 1408 for subtree in subm.walksubtrees(matcher=matcher):
1409 1409 yield subtree
1410 1410
1411 1411
1412 1412 class manifestfulltextcache(util.lrucachedict):
1413 1413 """File-backed LRU cache for the manifest cache
1414 1414
1415 1415 File consists of entries, up to EOF:
1416 1416
1417 1417 - 20 bytes node, 4 bytes length, <length> manifest data
1418 1418
1419 1419 These are written in reverse cache order (oldest to newest).
1420 1420
1421 1421 """
1422 1422
1423 1423 _file = b'manifestfulltextcache'
1424 1424
1425 1425 def __init__(self, max):
1426 1426 super(manifestfulltextcache, self).__init__(max)
1427 1427 self._dirty = False
1428 1428 self._read = False
1429 1429 self._opener = None
1430 1430
1431 1431 def read(self):
1432 1432 if self._read or self._opener is None:
1433 1433 return
1434 1434
1435 1435 try:
1436 1436 with self._opener(self._file) as fp:
1437 1437 set = super(manifestfulltextcache, self).__setitem__
1438 1438 # ignore trailing data, this is a cache, corruption is skipped
1439 1439 while True:
1440 1440 # TODO do we need to do work here for sha1 portability?
1441 1441 node = fp.read(20)
1442 1442 if len(node) < 20:
1443 1443 break
1444 1444 try:
1445 1445 size = struct.unpack(b'>L', fp.read(4))[0]
1446 1446 except struct.error:
1447 1447 break
1448 1448 value = bytearray(fp.read(size))
1449 1449 if len(value) != size:
1450 1450 break
1451 1451 set(node, value)
1452 1452 except IOError:
1453 1453 # the file is allowed to be missing
1454 1454 pass
1455 1455
1456 1456 self._read = True
1457 1457 self._dirty = False
1458 1458
1459 1459 def write(self):
1460 1460 if not self._dirty or self._opener is None:
1461 1461 return
1462 1462 # rotate backwards to the first used node
1463 1463 try:
1464 1464 with self._opener(
1465 1465 self._file, b'w', atomictemp=True, checkambig=True
1466 1466 ) as fp:
1467 1467 node = self._head.prev
1468 1468 while True:
1469 1469 if node.key in self._cache:
1470 1470 fp.write(node.key)
1471 1471 fp.write(struct.pack(b'>L', len(node.value)))
1472 1472 fp.write(node.value)
1473 1473 if node is self._head:
1474 1474 break
1475 1475 node = node.prev
1476 1476 except IOError:
1477 1477 # We could not write the cache (eg: permission error)
1478 1478 # the content can be missing.
1479 1479 #
1480 1480 # We could try harder and see if we could recreate a wcache
1481 1481 # directory were we coudl write too.
1482 1482 #
1483 1483 # XXX the error pass silently, having some way to issue an error
1484 1484 # log `ui.log` would be nice.
1485 1485 pass
1486 1486
1487 1487 def __len__(self):
1488 1488 if not self._read:
1489 1489 self.read()
1490 1490 return super(manifestfulltextcache, self).__len__()
1491 1491
1492 1492 def __contains__(self, k):
1493 1493 if not self._read:
1494 1494 self.read()
1495 1495 return super(manifestfulltextcache, self).__contains__(k)
1496 1496
1497 1497 def __iter__(self):
1498 1498 if not self._read:
1499 1499 self.read()
1500 1500 return super(manifestfulltextcache, self).__iter__()
1501 1501
1502 1502 def __getitem__(self, k):
1503 1503 if not self._read:
1504 1504 self.read()
1505 1505 # the cache lru order can change on read
1506 1506 setdirty = self._cache.get(k) is not self._head
1507 1507 value = super(manifestfulltextcache, self).__getitem__(k)
1508 1508 if setdirty:
1509 1509 self._dirty = True
1510 1510 return value
1511 1511
1512 1512 def __setitem__(self, k, v):
1513 1513 if not self._read:
1514 1514 self.read()
1515 1515 super(manifestfulltextcache, self).__setitem__(k, v)
1516 1516 self._dirty = True
1517 1517
1518 1518 def __delitem__(self, k):
1519 1519 if not self._read:
1520 1520 self.read()
1521 1521 super(manifestfulltextcache, self).__delitem__(k)
1522 1522 self._dirty = True
1523 1523
1524 1524 def get(self, k, default=None):
1525 1525 if not self._read:
1526 1526 self.read()
1527 1527 return super(manifestfulltextcache, self).get(k, default=default)
1528 1528
1529 1529 def clear(self, clear_persisted_data=False):
1530 1530 super(manifestfulltextcache, self).clear()
1531 1531 if clear_persisted_data:
1532 1532 self._dirty = True
1533 1533 self.write()
1534 1534 self._read = False
1535 1535
1536 1536
1537 1537 # and upper bound of what we expect from compression
1538 1538 # (real live value seems to be "3")
1539 1539 MAXCOMPRESSION = 3
1540 1540
1541 1541
1542 1542 class FastdeltaUnavailable(Exception):
1543 1543 """Exception raised when fastdelta isn't usable on a manifest."""
1544 1544
1545 1545
1546 1546 @interfaceutil.implementer(repository.imanifeststorage)
1547 1547 class manifestrevlog(object):
1548 1548 """A revlog that stores manifest texts. This is responsible for caching the
1549 1549 full-text manifest contents.
1550 1550 """
1551 1551
1552 1552 def __init__(
1553 1553 self,
1554 1554 opener,
1555 1555 tree=b'',
1556 1556 dirlogcache=None,
1557 1557 indexfile=None,
1558 1558 treemanifest=False,
1559 1559 ):
1560 1560 """Constructs a new manifest revlog
1561 1561
1562 1562 `indexfile` - used by extensions to have two manifests at once, like
1563 1563 when transitioning between flatmanifeset and treemanifests.
1564 1564
1565 1565 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1566 1566 options can also be used to make this a tree manifest revlog. The opener
1567 1567 option takes precedence, so if it is set to True, we ignore whatever
1568 1568 value is passed in to the constructor.
1569 1569 """
1570 1570 # During normal operations, we expect to deal with not more than four
1571 1571 # revs at a time (such as during commit --amend). When rebasing large
1572 1572 # stacks of commits, the number can go up, hence the config knob below.
1573 1573 cachesize = 4
1574 1574 optiontreemanifest = False
1575 1575 opts = getattr(opener, 'options', None)
1576 1576 if opts is not None:
1577 1577 cachesize = opts.get(b'manifestcachesize', cachesize)
1578 1578 optiontreemanifest = opts.get(b'treemanifest', False)
1579 1579
1580 1580 self._treeondisk = optiontreemanifest or treemanifest
1581 1581
1582 1582 self._fulltextcache = manifestfulltextcache(cachesize)
1583 1583
1584 1584 if tree:
1585 1585 assert self._treeondisk, b'opts is %r' % opts
1586 1586
1587 1587 if indexfile is None:
1588 1588 indexfile = b'00manifest.i'
1589 1589 if tree:
1590 1590 indexfile = b"meta/" + tree + indexfile
1591 1591
1592 1592 self.tree = tree
1593 1593
1594 1594 # The dirlogcache is kept on the root manifest log
1595 1595 if tree:
1596 1596 self._dirlogcache = dirlogcache
1597 1597 else:
1598 1598 self._dirlogcache = {b'': self}
1599 1599
1600 1600 self._revlog = revlog.revlog(
1601 1601 opener,
1602 1602 indexfile,
1603 1603 # only root indexfile is cached
1604 1604 checkambig=not bool(tree),
1605 1605 mmaplargeindex=True,
1606 1606 upperboundcomp=MAXCOMPRESSION,
1607 1607 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1608 1608 )
1609 1609
1610 1610 self.index = self._revlog.index
1611 1611 self.version = self._revlog.version
1612 1612 self._generaldelta = self._revlog._generaldelta
1613 1613
1614 1614 def _setupmanifestcachehooks(self, repo):
1615 1615 """Persist the manifestfulltextcache on lock release"""
1616 1616 if not util.safehasattr(repo, b'_wlockref'):
1617 1617 return
1618 1618
1619 1619 self._fulltextcache._opener = repo.wcachevfs
1620 1620 if repo._currentlock(repo._wlockref) is None:
1621 1621 return
1622 1622
1623 1623 reporef = weakref.ref(repo)
1624 1624 manifestrevlogref = weakref.ref(self)
1625 1625
1626 1626 def persistmanifestcache(success):
1627 1627 # Repo is in an unknown state, do not persist.
1628 1628 if not success:
1629 1629 return
1630 1630
1631 1631 repo = reporef()
1632 1632 self = manifestrevlogref()
1633 1633 if repo is None or self is None:
1634 1634 return
1635 1635 if repo.manifestlog.getstorage(b'') is not self:
1636 1636 # there's a different manifest in play now, abort
1637 1637 return
1638 1638 self._fulltextcache.write()
1639 1639
1640 1640 repo._afterlock(persistmanifestcache)
1641 1641
1642 1642 @property
1643 1643 def fulltextcache(self):
1644 1644 return self._fulltextcache
1645 1645
1646 1646 def clearcaches(self, clear_persisted_data=False):
1647 1647 self._revlog.clearcaches()
1648 1648 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1649 1649 self._dirlogcache = {self.tree: self}
1650 1650
1651 1651 def dirlog(self, d):
1652 1652 if d:
1653 1653 assert self._treeondisk
1654 1654 if d not in self._dirlogcache:
1655 1655 mfrevlog = manifestrevlog(
1656 1656 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1657 1657 )
1658 1658 self._dirlogcache[d] = mfrevlog
1659 1659 return self._dirlogcache[d]
1660 1660
1661 1661 def add(
1662 1662 self,
1663 1663 m,
1664 1664 transaction,
1665 1665 link,
1666 1666 p1,
1667 1667 p2,
1668 1668 added,
1669 1669 removed,
1670 1670 readtree=None,
1671 1671 match=None,
1672 1672 ):
1673 1673 """add some manifest entry in to the manifest log
1674 1674
1675 1675 input:
1676 1676
1677 1677 m: the manifest dict we want to store
1678 1678 transaction: the open transaction
1679 1679 p1: manifest-node of p1
1680 1680 p2: manifest-node of p2
1681 1681 added: file added/changed compared to parent
1682 1682 removed: file removed compared to parent
1683 1683
1684 1684 tree manifest input:
1685 1685
1686 1686 readtree: a function to read a subtree
1687 1687 match: a filematcher for the subpart of the tree manifest
1688 1688 """
1689 1689 try:
1690 1690 if p1 not in self.fulltextcache:
1691 1691 raise FastdeltaUnavailable()
1692 1692 # If our first parent is in the manifest cache, we can
1693 1693 # compute a delta here using properties we know about the
1694 1694 # manifest up-front, which may save time later for the
1695 1695 # revlog layer.
1696 1696
1697 1697 _checkforbidden(added)
1698 1698 # combine the changed lists into one sorted iterator
1699 1699 work = heapq.merge(
1700 1700 [(x, False) for x in sorted(added)],
1701 1701 [(x, True) for x in sorted(removed)],
1702 1702 )
1703 1703
1704 1704 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1705 1705 cachedelta = self._revlog.rev(p1), deltatext
1706 1706 text = util.buffer(arraytext)
1707 1707 n = self._revlog.addrevision(
1708 1708 text, transaction, link, p1, p2, cachedelta
1709 1709 )
1710 1710 except FastdeltaUnavailable:
1711 1711 # The first parent manifest isn't already loaded or the
1712 1712 # manifest implementation doesn't support fastdelta, so
1713 1713 # we'll just encode a fulltext of the manifest and pass
1714 1714 # that through to the revlog layer, and let it handle the
1715 1715 # delta process.
1716 1716 if self._treeondisk:
1717 1717 assert readtree, b"readtree must be set for treemanifest writes"
1718 1718 assert match, b"match must be specified for treemanifest writes"
1719 1719 m1 = readtree(self.tree, p1)
1720 1720 m2 = readtree(self.tree, p2)
1721 1721 n = self._addtree(
1722 1722 m, transaction, link, m1, m2, readtree, match=match
1723 1723 )
1724 1724 arraytext = None
1725 1725 else:
1726 1726 text = m.text()
1727 1727 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1728 1728 arraytext = bytearray(text)
1729 1729
1730 1730 if arraytext is not None:
1731 1731 self.fulltextcache[n] = arraytext
1732 1732
1733 1733 return n
1734 1734
1735 1735 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1736 1736 # If the manifest is unchanged compared to one parent,
1737 1737 # don't write a new revision
1738 1738 if self.tree != b'' and (
1739 1739 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1740 1740 ):
1741 1741 return m.node()
1742 1742
1743 1743 def writesubtree(subm, subp1, subp2, match):
1744 1744 sublog = self.dirlog(subm.dir())
1745 1745 sublog.add(
1746 1746 subm,
1747 1747 transaction,
1748 1748 link,
1749 1749 subp1,
1750 1750 subp2,
1751 1751 None,
1752 1752 None,
1753 1753 readtree=readtree,
1754 1754 match=match,
1755 1755 )
1756 1756
1757 1757 m.writesubtrees(m1, m2, writesubtree, match)
1758 1758 text = m.dirtext()
1759 1759 n = None
1760 1760 if self.tree != b'':
1761 1761 # Double-check whether contents are unchanged to one parent
1762 1762 if text == m1.dirtext():
1763 1763 n = m1.node()
1764 1764 elif text == m2.dirtext():
1765 1765 n = m2.node()
1766 1766
1767 1767 if not n:
1768 1768 n = self._revlog.addrevision(
1769 1769 text, transaction, link, m1.node(), m2.node()
1770 1770 )
1771 1771
1772 1772 # Save nodeid so parent manifest can calculate its nodeid
1773 1773 m.setnode(n)
1774 1774 return n
1775 1775
1776 1776 def __len__(self):
1777 1777 return len(self._revlog)
1778 1778
1779 1779 def __iter__(self):
1780 1780 return self._revlog.__iter__()
1781 1781
1782 1782 def rev(self, node):
1783 1783 return self._revlog.rev(node)
1784 1784
1785 1785 def node(self, rev):
1786 1786 return self._revlog.node(rev)
1787 1787
1788 1788 def lookup(self, value):
1789 1789 return self._revlog.lookup(value)
1790 1790
1791 1791 def parentrevs(self, rev):
1792 1792 return self._revlog.parentrevs(rev)
1793 1793
1794 1794 def parents(self, node):
1795 1795 return self._revlog.parents(node)
1796 1796
1797 1797 def linkrev(self, rev):
1798 1798 return self._revlog.linkrev(rev)
1799 1799
1800 1800 def checksize(self):
1801 1801 return self._revlog.checksize()
1802 1802
1803 1803 def revision(self, node, _df=None, raw=False):
1804 1804 return self._revlog.revision(node, _df=_df, raw=raw)
1805 1805
1806 1806 def rawdata(self, node, _df=None):
1807 1807 return self._revlog.rawdata(node, _df=_df)
1808 1808
1809 1809 def revdiff(self, rev1, rev2):
1810 1810 return self._revlog.revdiff(rev1, rev2)
1811 1811
1812 1812 def cmp(self, node, text):
1813 1813 return self._revlog.cmp(node, text)
1814 1814
1815 1815 def deltaparent(self, rev):
1816 1816 return self._revlog.deltaparent(rev)
1817 1817
1818 1818 def emitrevisions(
1819 1819 self,
1820 1820 nodes,
1821 1821 nodesorder=None,
1822 1822 revisiondata=False,
1823 1823 assumehaveparentrevisions=False,
1824 1824 deltamode=repository.CG_DELTAMODE_STD,
1825 1825 ):
1826 1826 return self._revlog.emitrevisions(
1827 1827 nodes,
1828 1828 nodesorder=nodesorder,
1829 1829 revisiondata=revisiondata,
1830 1830 assumehaveparentrevisions=assumehaveparentrevisions,
1831 1831 deltamode=deltamode,
1832 1832 )
1833 1833
1834 1834 def addgroup(
1835 1835 self,
1836 1836 deltas,
1837 1837 linkmapper,
1838 1838 transaction,
1839 alwayscache=False,
1839 1840 addrevisioncb=None,
1840 1841 duplicaterevisioncb=None,
1841 1842 ):
1842 1843 return self._revlog.addgroup(
1843 1844 deltas,
1844 1845 linkmapper,
1845 1846 transaction,
1847 alwayscache=alwayscache,
1846 1848 addrevisioncb=addrevisioncb,
1847 1849 duplicaterevisioncb=duplicaterevisioncb,
1848 1850 )
1849 1851
1850 1852 def rawsize(self, rev):
1851 1853 return self._revlog.rawsize(rev)
1852 1854
1853 1855 def getstrippoint(self, minlink):
1854 1856 return self._revlog.getstrippoint(minlink)
1855 1857
1856 1858 def strip(self, minlink, transaction):
1857 1859 return self._revlog.strip(minlink, transaction)
1858 1860
1859 1861 def files(self):
1860 1862 return self._revlog.files()
1861 1863
1862 1864 def clone(self, tr, destrevlog, **kwargs):
1863 1865 if not isinstance(destrevlog, manifestrevlog):
1864 1866 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1865 1867
1866 1868 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1867 1869
1868 1870 def storageinfo(
1869 1871 self,
1870 1872 exclusivefiles=False,
1871 1873 sharedfiles=False,
1872 1874 revisionscount=False,
1873 1875 trackedsize=False,
1874 1876 storedsize=False,
1875 1877 ):
1876 1878 return self._revlog.storageinfo(
1877 1879 exclusivefiles=exclusivefiles,
1878 1880 sharedfiles=sharedfiles,
1879 1881 revisionscount=revisionscount,
1880 1882 trackedsize=trackedsize,
1881 1883 storedsize=storedsize,
1882 1884 )
1883 1885
1884 1886 @property
1885 1887 def indexfile(self):
1886 1888 return self._revlog.indexfile
1887 1889
1888 1890 @indexfile.setter
1889 1891 def indexfile(self, value):
1890 1892 self._revlog.indexfile = value
1891 1893
1892 1894 @property
1893 1895 def opener(self):
1894 1896 return self._revlog.opener
1895 1897
1896 1898 @opener.setter
1897 1899 def opener(self, value):
1898 1900 self._revlog.opener = value
1899 1901
1900 1902
1901 1903 @interfaceutil.implementer(repository.imanifestlog)
1902 1904 class manifestlog(object):
1903 1905 """A collection class representing the collection of manifest snapshots
1904 1906 referenced by commits in the repository.
1905 1907
1906 1908 In this situation, 'manifest' refers to the abstract concept of a snapshot
1907 1909 of the list of files in the given commit. Consumers of the output of this
1908 1910 class do not care about the implementation details of the actual manifests
1909 1911 they receive (i.e. tree or flat or lazily loaded, etc)."""
1910 1912
1911 1913 def __init__(self, opener, repo, rootstore, narrowmatch):
1912 1914 usetreemanifest = False
1913 1915 cachesize = 4
1914 1916
1915 1917 opts = getattr(opener, 'options', None)
1916 1918 if opts is not None:
1917 1919 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1918 1920 cachesize = opts.get(b'manifestcachesize', cachesize)
1919 1921
1920 1922 self._treemanifests = usetreemanifest
1921 1923
1922 1924 self._rootstore = rootstore
1923 1925 self._rootstore._setupmanifestcachehooks(repo)
1924 1926 self._narrowmatch = narrowmatch
1925 1927
1926 1928 # A cache of the manifestctx or treemanifestctx for each directory
1927 1929 self._dirmancache = {}
1928 1930 self._dirmancache[b''] = util.lrucachedict(cachesize)
1929 1931
1930 1932 self._cachesize = cachesize
1931 1933
1932 1934 def __getitem__(self, node):
1933 1935 """Retrieves the manifest instance for the given node. Throws a
1934 1936 LookupError if not found.
1935 1937 """
1936 1938 return self.get(b'', node)
1937 1939
1938 1940 def get(self, tree, node, verify=True):
1939 1941 """Retrieves the manifest instance for the given node. Throws a
1940 1942 LookupError if not found.
1941 1943
1942 1944 `verify` - if True an exception will be thrown if the node is not in
1943 1945 the revlog
1944 1946 """
1945 1947 if node in self._dirmancache.get(tree, ()):
1946 1948 return self._dirmancache[tree][node]
1947 1949
1948 1950 if not self._narrowmatch.always():
1949 1951 if not self._narrowmatch.visitdir(tree[:-1]):
1950 1952 return excludeddirmanifestctx(tree, node)
1951 1953 if tree:
1952 1954 if self._rootstore._treeondisk:
1953 1955 if verify:
1954 1956 # Side-effect is LookupError is raised if node doesn't
1955 1957 # exist.
1956 1958 self.getstorage(tree).rev(node)
1957 1959
1958 1960 m = treemanifestctx(self, tree, node)
1959 1961 else:
1960 1962 raise error.Abort(
1961 1963 _(
1962 1964 b"cannot ask for manifest directory '%s' in a flat "
1963 1965 b"manifest"
1964 1966 )
1965 1967 % tree
1966 1968 )
1967 1969 else:
1968 1970 if verify:
1969 1971 # Side-effect is LookupError is raised if node doesn't exist.
1970 1972 self._rootstore.rev(node)
1971 1973
1972 1974 if self._treemanifests:
1973 1975 m = treemanifestctx(self, b'', node)
1974 1976 else:
1975 1977 m = manifestctx(self, node)
1976 1978
1977 1979 if node != nullid:
1978 1980 mancache = self._dirmancache.get(tree)
1979 1981 if not mancache:
1980 1982 mancache = util.lrucachedict(self._cachesize)
1981 1983 self._dirmancache[tree] = mancache
1982 1984 mancache[node] = m
1983 1985 return m
1984 1986
1985 1987 def getstorage(self, tree):
1986 1988 return self._rootstore.dirlog(tree)
1987 1989
1988 1990 def clearcaches(self, clear_persisted_data=False):
1989 1991 self._dirmancache.clear()
1990 1992 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1991 1993
1992 1994 def rev(self, node):
1993 1995 return self._rootstore.rev(node)
1994 1996
1995 1997 def update_caches(self, transaction):
1996 1998 return self._rootstore._revlog.update_caches(transaction=transaction)
1997 1999
1998 2000
1999 2001 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2000 2002 class memmanifestctx(object):
2001 2003 def __init__(self, manifestlog):
2002 2004 self._manifestlog = manifestlog
2003 2005 self._manifestdict = manifestdict()
2004 2006
2005 2007 def _storage(self):
2006 2008 return self._manifestlog.getstorage(b'')
2007 2009
2008 2010 def copy(self):
2009 2011 memmf = memmanifestctx(self._manifestlog)
2010 2012 memmf._manifestdict = self.read().copy()
2011 2013 return memmf
2012 2014
2013 2015 def read(self):
2014 2016 return self._manifestdict
2015 2017
2016 2018 def write(self, transaction, link, p1, p2, added, removed, match=None):
2017 2019 return self._storage().add(
2018 2020 self._manifestdict,
2019 2021 transaction,
2020 2022 link,
2021 2023 p1,
2022 2024 p2,
2023 2025 added,
2024 2026 removed,
2025 2027 match=match,
2026 2028 )
2027 2029
2028 2030
2029 2031 @interfaceutil.implementer(repository.imanifestrevisionstored)
2030 2032 class manifestctx(object):
2031 2033 """A class representing a single revision of a manifest, including its
2032 2034 contents, its parent revs, and its linkrev.
2033 2035 """
2034 2036
2035 2037 def __init__(self, manifestlog, node):
2036 2038 self._manifestlog = manifestlog
2037 2039 self._data = None
2038 2040
2039 2041 self._node = node
2040 2042
2041 2043 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2042 2044 # but let's add it later when something needs it and we can load it
2043 2045 # lazily.
2044 2046 # self.p1, self.p2 = store.parents(node)
2045 2047 # rev = store.rev(node)
2046 2048 # self.linkrev = store.linkrev(rev)
2047 2049
2048 2050 def _storage(self):
2049 2051 return self._manifestlog.getstorage(b'')
2050 2052
2051 2053 def node(self):
2052 2054 return self._node
2053 2055
2054 2056 def copy(self):
2055 2057 memmf = memmanifestctx(self._manifestlog)
2056 2058 memmf._manifestdict = self.read().copy()
2057 2059 return memmf
2058 2060
2059 2061 @propertycache
2060 2062 def parents(self):
2061 2063 return self._storage().parents(self._node)
2062 2064
2063 2065 def read(self):
2064 2066 if self._data is None:
2065 2067 if self._node == nullid:
2066 2068 self._data = manifestdict()
2067 2069 else:
2068 2070 store = self._storage()
2069 2071 if self._node in store.fulltextcache:
2070 2072 text = pycompat.bytestr(store.fulltextcache[self._node])
2071 2073 else:
2072 2074 text = store.revision(self._node)
2073 2075 arraytext = bytearray(text)
2074 2076 store.fulltextcache[self._node] = arraytext
2075 2077 self._data = manifestdict(text)
2076 2078 return self._data
2077 2079
2078 2080 def readfast(self, shallow=False):
2079 2081 """Calls either readdelta or read, based on which would be less work.
2080 2082 readdelta is called if the delta is against the p1, and therefore can be
2081 2083 read quickly.
2082 2084
2083 2085 If `shallow` is True, nothing changes since this is a flat manifest.
2084 2086 """
2085 2087 store = self._storage()
2086 2088 r = store.rev(self._node)
2087 2089 deltaparent = store.deltaparent(r)
2088 2090 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2089 2091 return self.readdelta()
2090 2092 return self.read()
2091 2093
2092 2094 def readdelta(self, shallow=False):
2093 2095 """Returns a manifest containing just the entries that are present
2094 2096 in this manifest, but not in its p1 manifest. This is efficient to read
2095 2097 if the revlog delta is already p1.
2096 2098
2097 2099 Changing the value of `shallow` has no effect on flat manifests.
2098 2100 """
2099 2101 store = self._storage()
2100 2102 r = store.rev(self._node)
2101 2103 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2102 2104 return manifestdict(d)
2103 2105
2104 2106 def find(self, key):
2105 2107 return self.read().find(key)
2106 2108
2107 2109
2108 2110 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2109 2111 class memtreemanifestctx(object):
2110 2112 def __init__(self, manifestlog, dir=b''):
2111 2113 self._manifestlog = manifestlog
2112 2114 self._dir = dir
2113 2115 self._treemanifest = treemanifest()
2114 2116
2115 2117 def _storage(self):
2116 2118 return self._manifestlog.getstorage(b'')
2117 2119
2118 2120 def copy(self):
2119 2121 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2120 2122 memmf._treemanifest = self._treemanifest.copy()
2121 2123 return memmf
2122 2124
2123 2125 def read(self):
2124 2126 return self._treemanifest
2125 2127
2126 2128 def write(self, transaction, link, p1, p2, added, removed, match=None):
2127 2129 def readtree(dir, node):
2128 2130 return self._manifestlog.get(dir, node).read()
2129 2131
2130 2132 return self._storage().add(
2131 2133 self._treemanifest,
2132 2134 transaction,
2133 2135 link,
2134 2136 p1,
2135 2137 p2,
2136 2138 added,
2137 2139 removed,
2138 2140 readtree=readtree,
2139 2141 match=match,
2140 2142 )
2141 2143
2142 2144
2143 2145 @interfaceutil.implementer(repository.imanifestrevisionstored)
2144 2146 class treemanifestctx(object):
2145 2147 def __init__(self, manifestlog, dir, node):
2146 2148 self._manifestlog = manifestlog
2147 2149 self._dir = dir
2148 2150 self._data = None
2149 2151
2150 2152 self._node = node
2151 2153
2152 2154 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2153 2155 # we can instantiate treemanifestctx objects for directories we don't
2154 2156 # have on disk.
2155 2157 # self.p1, self.p2 = store.parents(node)
2156 2158 # rev = store.rev(node)
2157 2159 # self.linkrev = store.linkrev(rev)
2158 2160
2159 2161 def _storage(self):
2160 2162 narrowmatch = self._manifestlog._narrowmatch
2161 2163 if not narrowmatch.always():
2162 2164 if not narrowmatch.visitdir(self._dir[:-1]):
2163 2165 return excludedmanifestrevlog(self._dir)
2164 2166 return self._manifestlog.getstorage(self._dir)
2165 2167
2166 2168 def read(self):
2167 2169 if self._data is None:
2168 2170 store = self._storage()
2169 2171 if self._node == nullid:
2170 2172 self._data = treemanifest()
2171 2173 # TODO accessing non-public API
2172 2174 elif store._treeondisk:
2173 2175 m = treemanifest(dir=self._dir)
2174 2176
2175 2177 def gettext():
2176 2178 return store.revision(self._node)
2177 2179
2178 2180 def readsubtree(dir, subm):
2179 2181 # Set verify to False since we need to be able to create
2180 2182 # subtrees for trees that don't exist on disk.
2181 2183 return self._manifestlog.get(dir, subm, verify=False).read()
2182 2184
2183 2185 m.read(gettext, readsubtree)
2184 2186 m.setnode(self._node)
2185 2187 self._data = m
2186 2188 else:
2187 2189 if self._node in store.fulltextcache:
2188 2190 text = pycompat.bytestr(store.fulltextcache[self._node])
2189 2191 else:
2190 2192 text = store.revision(self._node)
2191 2193 arraytext = bytearray(text)
2192 2194 store.fulltextcache[self._node] = arraytext
2193 2195 self._data = treemanifest(dir=self._dir, text=text)
2194 2196
2195 2197 return self._data
2196 2198
2197 2199 def node(self):
2198 2200 return self._node
2199 2201
2200 2202 def copy(self):
2201 2203 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2202 2204 memmf._treemanifest = self.read().copy()
2203 2205 return memmf
2204 2206
2205 2207 @propertycache
2206 2208 def parents(self):
2207 2209 return self._storage().parents(self._node)
2208 2210
2209 2211 def readdelta(self, shallow=False):
2210 2212 """Returns a manifest containing just the entries that are present
2211 2213 in this manifest, but not in its p1 manifest. This is efficient to read
2212 2214 if the revlog delta is already p1.
2213 2215
2214 2216 If `shallow` is True, this will read the delta for this directory,
2215 2217 without recursively reading subdirectory manifests. Instead, any
2216 2218 subdirectory entry will be reported as it appears in the manifest, i.e.
2217 2219 the subdirectory will be reported among files and distinguished only by
2218 2220 its 't' flag.
2219 2221 """
2220 2222 store = self._storage()
2221 2223 if shallow:
2222 2224 r = store.rev(self._node)
2223 2225 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2224 2226 return manifestdict(d)
2225 2227 else:
2226 2228 # Need to perform a slow delta
2227 2229 r0 = store.deltaparent(store.rev(self._node))
2228 2230 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2229 2231 m1 = self.read()
2230 2232 md = treemanifest(dir=self._dir)
2231 2233 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2232 2234 if n1:
2233 2235 md[f] = n1
2234 2236 if fl1:
2235 2237 md.setflag(f, fl1)
2236 2238 return md
2237 2239
2238 2240 def readfast(self, shallow=False):
2239 2241 """Calls either readdelta or read, based on which would be less work.
2240 2242 readdelta is called if the delta is against the p1, and therefore can be
2241 2243 read quickly.
2242 2244
2243 2245 If `shallow` is True, it only returns the entries from this manifest,
2244 2246 and not any submanifests.
2245 2247 """
2246 2248 store = self._storage()
2247 2249 r = store.rev(self._node)
2248 2250 deltaparent = store.deltaparent(r)
2249 2251 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2250 2252 return self.readdelta(shallow=shallow)
2251 2253
2252 2254 if shallow:
2253 2255 return manifestdict(store.revision(self._node))
2254 2256 else:
2255 2257 return self.read()
2256 2258
2257 2259 def find(self, key):
2258 2260 return self.read().find(key)
2259 2261
2260 2262
2261 2263 class excludeddir(treemanifest):
2262 2264 """Stand-in for a directory that is excluded from the repository.
2263 2265
2264 2266 With narrowing active on a repository that uses treemanifests,
2265 2267 some of the directory revlogs will be excluded from the resulting
2266 2268 clone. This is a huge storage win for clients, but means we need
2267 2269 some sort of pseudo-manifest to surface to internals so we can
2268 2270 detect a merge conflict outside the narrowspec. That's what this
2269 2271 class is: it stands in for a directory whose node is known, but
2270 2272 whose contents are unknown.
2271 2273 """
2272 2274
2273 2275 def __init__(self, dir, node):
2274 2276 super(excludeddir, self).__init__(dir)
2275 2277 self._node = node
2276 2278 # Add an empty file, which will be included by iterators and such,
2277 2279 # appearing as the directory itself (i.e. something like "dir/")
2278 2280 self._files[b''] = node
2279 2281 self._flags[b''] = b't'
2280 2282
2281 2283 # Manifests outside the narrowspec should never be modified, so avoid
2282 2284 # copying. This makes a noticeable difference when there are very many
2283 2285 # directories outside the narrowspec. Also, it makes sense for the copy to
2284 2286 # be of the same type as the original, which would not happen with the
2285 2287 # super type's copy().
2286 2288 def copy(self):
2287 2289 return self
2288 2290
2289 2291
2290 2292 class excludeddirmanifestctx(treemanifestctx):
2291 2293 """context wrapper for excludeddir - see that docstring for rationale"""
2292 2294
2293 2295 def __init__(self, dir, node):
2294 2296 self._dir = dir
2295 2297 self._node = node
2296 2298
2297 2299 def read(self):
2298 2300 return excludeddir(self._dir, self._node)
2299 2301
2300 2302 def readfast(self, shallow=False):
2301 2303 # special version of readfast since we don't have underlying storage
2302 2304 return self.read()
2303 2305
2304 2306 def write(self, *args):
2305 2307 raise error.ProgrammingError(
2306 2308 b'attempt to write manifest from excluded dir %s' % self._dir
2307 2309 )
2308 2310
2309 2311
2310 2312 class excludedmanifestrevlog(manifestrevlog):
2311 2313 """Stand-in for excluded treemanifest revlogs.
2312 2314
2313 2315 When narrowing is active on a treemanifest repository, we'll have
2314 2316 references to directories we can't see due to the revlog being
2315 2317 skipped. This class exists to conform to the manifestrevlog
2316 2318 interface for those directories and proactively prevent writes to
2317 2319 outside the narrowspec.
2318 2320 """
2319 2321
2320 2322 def __init__(self, dir):
2321 2323 self._dir = dir
2322 2324
2323 2325 def __len__(self):
2324 2326 raise error.ProgrammingError(
2325 2327 b'attempt to get length of excluded dir %s' % self._dir
2326 2328 )
2327 2329
2328 2330 def rev(self, node):
2329 2331 raise error.ProgrammingError(
2330 2332 b'attempt to get rev from excluded dir %s' % self._dir
2331 2333 )
2332 2334
2333 2335 def linkrev(self, node):
2334 2336 raise error.ProgrammingError(
2335 2337 b'attempt to get linkrev from excluded dir %s' % self._dir
2336 2338 )
2337 2339
2338 2340 def node(self, rev):
2339 2341 raise error.ProgrammingError(
2340 2342 b'attempt to get node from excluded dir %s' % self._dir
2341 2343 )
2342 2344
2343 2345 def add(self, *args, **kwargs):
2344 2346 # We should never write entries in dirlogs outside the narrow clone.
2345 2347 # However, the method still gets called from writesubtree() in
2346 2348 # _addtree(), so we need to handle it. We should possibly make that
2347 2349 # avoid calling add() with a clean manifest (_dirty is always False
2348 2350 # in excludeddir instances).
2349 2351 pass
@@ -1,3089 +1,3090 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False, {}
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text, sidedata):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def getoffset(q):
141 141 return int(q >> 16)
142 142
143 143
144 144 def gettype(q):
145 145 return int(q & 0xFFFF)
146 146
147 147
148 148 def offset_type(offset, type):
149 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 150 raise ValueError(b'unknown revlog index flags')
151 151 return int(int(offset) << 16 | type)
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @attr.s(slots=True, frozen=True)
175 175 class _revisioninfo(object):
176 176 """Information about a revision that allows building its fulltext
177 177 node: expected hash of the revision
178 178 p1, p2: parent revs of the revision
179 179 btext: built text cache consisting of a one-element list
180 180 cachedelta: (baserev, uncompressed_delta) or None
181 181 flags: flags associated to the revision storage
182 182
183 183 One of btext[0] or cachedelta must be set.
184 184 """
185 185
186 186 node = attr.ib()
187 187 p1 = attr.ib()
188 188 p2 = attr.ib()
189 189 btext = attr.ib()
190 190 textlen = attr.ib()
191 191 cachedelta = attr.ib()
192 192 flags = attr.ib()
193 193
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208
209 209 @interfaceutil.implementer(repository.iverifyproblem)
210 210 @attr.s(frozen=True)
211 211 class revlogproblem(object):
212 212 warning = attr.ib(default=None)
213 213 error = attr.ib(default=None)
214 214 node = attr.ib(default=None)
215 215
216 216
217 217 # index v0:
218 218 # 4 bytes: offset
219 219 # 4 bytes: compressed length
220 220 # 4 bytes: base rev
221 221 # 4 bytes: link rev
222 222 # 20 bytes: parent 1 nodeid
223 223 # 20 bytes: parent 2 nodeid
224 224 # 20 bytes: nodeid
225 225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 226 indexformatv0_pack = indexformatv0.pack
227 227 indexformatv0_unpack = indexformatv0.unpack
228 228
229 229
230 230 class revlogoldindex(list):
231 231 @property
232 232 def nodemap(self):
233 233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 235 return self._nodemap
236 236
237 237 @util.propertycache
238 238 def _nodemap(self):
239 239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 240 for r in range(0, len(self)):
241 241 n = self[r][7]
242 242 nodemap[n] = r
243 243 return nodemap
244 244
245 245 def has_node(self, node):
246 246 """return True if the node exist in the index"""
247 247 return node in self._nodemap
248 248
249 249 def rev(self, node):
250 250 """return a revision for a node
251 251
252 252 If the node is unknown, raise a RevlogError"""
253 253 return self._nodemap[node]
254 254
255 255 def get_rev(self, node):
256 256 """return a revision for a node
257 257
258 258 If the node is unknown, return None"""
259 259 return self._nodemap.get(node)
260 260
261 261 def append(self, tup):
262 262 self._nodemap[tup[7]] = len(self)
263 263 super(revlogoldindex, self).append(tup)
264 264
265 265 def __delitem__(self, i):
266 266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 268 for r in pycompat.xrange(i.start, len(self)):
269 269 del self._nodemap[self[r][7]]
270 270 super(revlogoldindex, self).__delitem__(i)
271 271
272 272 def clearcaches(self):
273 273 self.__dict__.pop('_nodemap', None)
274 274
275 275 def __getitem__(self, i):
276 276 if i == -1:
277 277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 278 return list.__getitem__(self, i)
279 279
280 280
281 281 class revlogoldio(object):
282 282 def __init__(self):
283 283 self.size = indexformatv0.size
284 284
285 285 def parseindex(self, data, inline):
286 286 s = self.size
287 287 index = []
288 288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 289 n = off = 0
290 290 l = len(data)
291 291 while off + s <= l:
292 292 cur = data[off : off + s]
293 293 off += s
294 294 e = indexformatv0_unpack(cur)
295 295 # transform to revlogv1 format
296 296 e2 = (
297 297 offset_type(e[0], 0),
298 298 e[1],
299 299 -1,
300 300 e[2],
301 301 e[3],
302 302 nodemap.get(e[4], nullrev),
303 303 nodemap.get(e[5], nullrev),
304 304 e[6],
305 305 )
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 index = revlogoldindex(index)
311 311 return index, None
312 312
313 313 def packentry(self, entry, node, version, rev):
314 314 if gettype(entry[0]):
315 315 raise error.RevlogError(
316 316 _(b'index entry flags need revlog version 1')
317 317 )
318 318 e2 = (
319 319 getoffset(entry[0]),
320 320 entry[1],
321 321 entry[3],
322 322 entry[4],
323 323 node(entry[5]),
324 324 node(entry[6]),
325 325 entry[7],
326 326 )
327 327 return indexformatv0_pack(*e2)
328 328
329 329
330 330 # index ng:
331 331 # 6 bytes: offset
332 332 # 2 bytes: flags
333 333 # 4 bytes: compressed length
334 334 # 4 bytes: uncompressed length
335 335 # 4 bytes: base rev
336 336 # 4 bytes: link rev
337 337 # 4 bytes: parent 1 rev
338 338 # 4 bytes: parent 2 rev
339 339 # 32 bytes: nodeid
340 340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 341 indexformatng_pack = indexformatng.pack
342 342 versionformat = struct.Struct(b">I")
343 343 versionformat_pack = versionformat.pack
344 344 versionformat_unpack = versionformat.unpack
345 345
346 346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 347 # signed integer)
348 348 _maxentrysize = 0x7FFFFFFF
349 349
350 350
351 351 class revlogio(object):
352 352 def __init__(self):
353 353 self.size = indexformatng.size
354 354
355 355 def parseindex(self, data, inline):
356 356 # call the C implementation to parse the index data
357 357 index, cache = parsers.parse_index2(data, inline)
358 358 return index, cache
359 359
360 360 def packentry(self, entry, node, version, rev):
361 361 p = indexformatng_pack(*entry)
362 362 if rev == 0:
363 363 p = versionformat_pack(version) + p[4:]
364 364 return p
365 365
366 366
367 367 NodemapRevlogIO = None
368 368
369 369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 370
371 371 class NodemapRevlogIO(revlogio):
372 372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 373
374 374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 375 """
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 379 return index, cache
380 380
381 381
382 382 class rustrevlogio(revlogio):
383 383 def parseindex(self, data, inline):
384 384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 385 return rustrevlog.MixedIndex(index), cache
386 386
387 387
388 388 class revlog(object):
389 389 """
390 390 the underlying revision storage object
391 391
392 392 A revlog consists of two parts, an index and the revision data.
393 393
394 394 The index is a file with a fixed record size containing
395 395 information on each revision, including its nodeid (hash), the
396 396 nodeids of its parents, the position and offset of its data within
397 397 the data file, and the revision it's based on. Finally, each entry
398 398 contains a linkrev entry that can serve as a pointer to external
399 399 data.
400 400
401 401 The revision data itself is a linear collection of data chunks.
402 402 Each chunk represents a revision and is usually represented as a
403 403 delta against the previous chunk. To bound lookup time, runs of
404 404 deltas are limited to about 2 times the length of the original
405 405 version data. This makes retrieval of a version proportional to
406 406 its size, or O(1) relative to the number of revisions.
407 407
408 408 Both pieces of the revlog are written to in an append-only
409 409 fashion, which means we never need to rewrite a file to insert or
410 410 remove data, and can use some simple techniques to avoid the need
411 411 for locking while reading.
412 412
413 413 If checkambig, indexfile is opened with checkambig=True at
414 414 writing, to avoid file stat ambiguity.
415 415
416 416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 417 index will be mmapped rather than read if it is larger than the
418 418 configured threshold.
419 419
420 420 If censorable is True, the revlog can have censored revisions.
421 421
422 422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 423 compression for the data content.
424 424 """
425 425
426 426 _flagserrorclass = error.RevlogError
427 427
428 428 def __init__(
429 429 self,
430 430 opener,
431 431 indexfile,
432 432 datafile=None,
433 433 checkambig=False,
434 434 mmaplargeindex=False,
435 435 censorable=False,
436 436 upperboundcomp=None,
437 437 persistentnodemap=False,
438 438 ):
439 439 """
440 440 create a revlog object
441 441
442 442 opener is a function that abstracts the file opening operation
443 443 and can be used to implement COW semantics or the like.
444 444
445 445 """
446 446 self.upperboundcomp = upperboundcomp
447 447 self.indexfile = indexfile
448 448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 449 self.nodemap_file = None
450 450 if persistentnodemap:
451 451 if indexfile.endswith(b'.a'):
452 452 pending_path = indexfile[:-4] + b".n.a"
453 453 if opener.exists(pending_path):
454 454 self.nodemap_file = pending_path
455 455 else:
456 456 self.nodemap_file = indexfile[:-4] + b".n"
457 457 else:
458 458 self.nodemap_file = indexfile[:-2] + b".n"
459 459
460 460 self.opener = opener
461 461 # When True, indexfile is opened with checkambig=True at writing, to
462 462 # avoid file stat ambiguity.
463 463 self._checkambig = checkambig
464 464 self._mmaplargeindex = mmaplargeindex
465 465 self._censorable = censorable
466 466 # 3-tuple of (node, rev, text) for a raw revision.
467 467 self._revisioncache = None
468 468 # Maps rev to chain base rev.
469 469 self._chainbasecache = util.lrucachedict(100)
470 470 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
471 471 self._chunkcache = (0, b'')
472 472 # How much data to read and cache into the raw revlog data cache.
473 473 self._chunkcachesize = 65536
474 474 self._maxchainlen = None
475 475 self._deltabothparents = True
476 476 self.index = None
477 477 self._nodemap_docket = None
478 478 # Mapping of partial identifiers to full nodes.
479 479 self._pcache = {}
480 480 # Mapping of revision integer to full node.
481 481 self._compengine = b'zlib'
482 482 self._compengineopts = {}
483 483 self._maxdeltachainspan = -1
484 484 self._withsparseread = False
485 485 self._sparserevlog = False
486 486 self._srdensitythreshold = 0.50
487 487 self._srmingapsize = 262144
488 488
489 489 # Make copy of flag processors so each revlog instance can support
490 490 # custom flags.
491 491 self._flagprocessors = dict(flagutil.flagprocessors)
492 492
493 493 # 2-tuple of file handles being used for active writing.
494 494 self._writinghandles = None
495 495
496 496 self._loadindex()
497 497
498 498 def _loadindex(self):
499 499 mmapindexthreshold = None
500 500 opts = self.opener.options
501 501
502 502 if b'revlogv2' in opts:
503 503 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
504 504 elif b'revlogv1' in opts:
505 505 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
506 506 if b'generaldelta' in opts:
507 507 newversionflags |= FLAG_GENERALDELTA
508 508 elif b'revlogv0' in self.opener.options:
509 509 newversionflags = REVLOGV0
510 510 else:
511 511 newversionflags = REVLOG_DEFAULT_VERSION
512 512
513 513 if b'chunkcachesize' in opts:
514 514 self._chunkcachesize = opts[b'chunkcachesize']
515 515 if b'maxchainlen' in opts:
516 516 self._maxchainlen = opts[b'maxchainlen']
517 517 if b'deltabothparents' in opts:
518 518 self._deltabothparents = opts[b'deltabothparents']
519 519 self._lazydelta = bool(opts.get(b'lazydelta', True))
520 520 self._lazydeltabase = False
521 521 if self._lazydelta:
522 522 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
523 523 if b'compengine' in opts:
524 524 self._compengine = opts[b'compengine']
525 525 if b'zlib.level' in opts:
526 526 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
527 527 if b'zstd.level' in opts:
528 528 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
529 529 if b'maxdeltachainspan' in opts:
530 530 self._maxdeltachainspan = opts[b'maxdeltachainspan']
531 531 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
532 532 mmapindexthreshold = opts[b'mmapindexthreshold']
533 533 self.hassidedata = bool(opts.get(b'side-data', False))
534 534 if self.hassidedata:
535 535 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
536 536 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
537 537 withsparseread = bool(opts.get(b'with-sparse-read', False))
538 538 # sparse-revlog forces sparse-read
539 539 self._withsparseread = self._sparserevlog or withsparseread
540 540 if b'sparse-read-density-threshold' in opts:
541 541 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
542 542 if b'sparse-read-min-gap-size' in opts:
543 543 self._srmingapsize = opts[b'sparse-read-min-gap-size']
544 544 if opts.get(b'enableellipsis'):
545 545 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
546 546
547 547 # revlog v0 doesn't have flag processors
548 548 for flag, processor in pycompat.iteritems(
549 549 opts.get(b'flagprocessors', {})
550 550 ):
551 551 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
552 552
553 553 if self._chunkcachesize <= 0:
554 554 raise error.RevlogError(
555 555 _(b'revlog chunk cache size %r is not greater than 0')
556 556 % self._chunkcachesize
557 557 )
558 558 elif self._chunkcachesize & (self._chunkcachesize - 1):
559 559 raise error.RevlogError(
560 560 _(b'revlog chunk cache size %r is not a power of 2')
561 561 % self._chunkcachesize
562 562 )
563 563
564 564 indexdata = b''
565 565 self._initempty = True
566 566 try:
567 567 with self._indexfp() as f:
568 568 if (
569 569 mmapindexthreshold is not None
570 570 and self.opener.fstat(f).st_size >= mmapindexthreshold
571 571 ):
572 572 # TODO: should .close() to release resources without
573 573 # relying on Python GC
574 574 indexdata = util.buffer(util.mmapread(f))
575 575 else:
576 576 indexdata = f.read()
577 577 if len(indexdata) > 0:
578 578 versionflags = versionformat_unpack(indexdata[:4])[0]
579 579 self._initempty = False
580 580 else:
581 581 versionflags = newversionflags
582 582 except IOError as inst:
583 583 if inst.errno != errno.ENOENT:
584 584 raise
585 585
586 586 versionflags = newversionflags
587 587
588 588 self.version = versionflags
589 589
590 590 flags = versionflags & ~0xFFFF
591 591 fmt = versionflags & 0xFFFF
592 592
593 593 if fmt == REVLOGV0:
594 594 if flags:
595 595 raise error.RevlogError(
596 596 _(b'unknown flags (%#04x) in version %d revlog %s')
597 597 % (flags >> 16, fmt, self.indexfile)
598 598 )
599 599
600 600 self._inline = False
601 601 self._generaldelta = False
602 602
603 603 elif fmt == REVLOGV1:
604 604 if flags & ~REVLOGV1_FLAGS:
605 605 raise error.RevlogError(
606 606 _(b'unknown flags (%#04x) in version %d revlog %s')
607 607 % (flags >> 16, fmt, self.indexfile)
608 608 )
609 609
610 610 self._inline = versionflags & FLAG_INLINE_DATA
611 611 self._generaldelta = versionflags & FLAG_GENERALDELTA
612 612
613 613 elif fmt == REVLOGV2:
614 614 if flags & ~REVLOGV2_FLAGS:
615 615 raise error.RevlogError(
616 616 _(b'unknown flags (%#04x) in version %d revlog %s')
617 617 % (flags >> 16, fmt, self.indexfile)
618 618 )
619 619
620 620 self._inline = versionflags & FLAG_INLINE_DATA
621 621 # generaldelta implied by version 2 revlogs.
622 622 self._generaldelta = True
623 623
624 624 else:
625 625 raise error.RevlogError(
626 626 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
627 627 )
628 628 # sparse-revlog can't be on without general-delta (issue6056)
629 629 if not self._generaldelta:
630 630 self._sparserevlog = False
631 631
632 632 self._storedeltachains = True
633 633
634 634 devel_nodemap = (
635 635 self.nodemap_file
636 636 and opts.get(b'devel-force-nodemap', False)
637 637 and NodemapRevlogIO is not None
638 638 )
639 639
640 640 use_rust_index = False
641 641 if rustrevlog is not None:
642 642 if self.nodemap_file is not None:
643 643 use_rust_index = True
644 644 else:
645 645 use_rust_index = self.opener.options.get(b'rust.index')
646 646
647 647 self._io = revlogio()
648 648 if self.version == REVLOGV0:
649 649 self._io = revlogoldio()
650 650 elif devel_nodemap:
651 651 self._io = NodemapRevlogIO()
652 652 elif use_rust_index:
653 653 self._io = rustrevlogio()
654 654 try:
655 655 d = self._io.parseindex(indexdata, self._inline)
656 656 index, _chunkcache = d
657 657 use_nodemap = (
658 658 not self._inline
659 659 and self.nodemap_file is not None
660 660 and util.safehasattr(index, 'update_nodemap_data')
661 661 )
662 662 if use_nodemap:
663 663 nodemap_data = nodemaputil.persisted_data(self)
664 664 if nodemap_data is not None:
665 665 docket = nodemap_data[0]
666 666 if (
667 667 len(d[0]) > docket.tip_rev
668 668 and d[0][docket.tip_rev][7] == docket.tip_node
669 669 ):
670 670 # no changelog tampering
671 671 self._nodemap_docket = docket
672 672 index.update_nodemap_data(*nodemap_data)
673 673 except (ValueError, IndexError):
674 674 raise error.RevlogError(
675 675 _(b"index %s is corrupted") % self.indexfile
676 676 )
677 677 self.index, self._chunkcache = d
678 678 if not self._chunkcache:
679 679 self._chunkclear()
680 680 # revnum -> (chain-length, sum-delta-length)
681 681 self._chaininfocache = util.lrucachedict(500)
682 682 # revlog header -> revlog compressor
683 683 self._decompressors = {}
684 684
685 685 @util.propertycache
686 686 def _compressor(self):
687 687 engine = util.compengines[self._compengine]
688 688 return engine.revlogcompressor(self._compengineopts)
689 689
690 690 def _indexfp(self, mode=b'r'):
691 691 """file object for the revlog's index file"""
692 692 args = {'mode': mode}
693 693 if mode != b'r':
694 694 args['checkambig'] = self._checkambig
695 695 if mode == b'w':
696 696 args['atomictemp'] = True
697 697 return self.opener(self.indexfile, **args)
698 698
699 699 def _datafp(self, mode=b'r'):
700 700 """file object for the revlog's data file"""
701 701 return self.opener(self.datafile, mode=mode)
702 702
703 703 @contextlib.contextmanager
704 704 def _datareadfp(self, existingfp=None):
705 705 """file object suitable to read data"""
706 706 # Use explicit file handle, if given.
707 707 if existingfp is not None:
708 708 yield existingfp
709 709
710 710 # Use a file handle being actively used for writes, if available.
711 711 # There is some danger to doing this because reads will seek the
712 712 # file. However, _writeentry() performs a SEEK_END before all writes,
713 713 # so we should be safe.
714 714 elif self._writinghandles:
715 715 if self._inline:
716 716 yield self._writinghandles[0]
717 717 else:
718 718 yield self._writinghandles[1]
719 719
720 720 # Otherwise open a new file handle.
721 721 else:
722 722 if self._inline:
723 723 func = self._indexfp
724 724 else:
725 725 func = self._datafp
726 726 with func() as fp:
727 727 yield fp
728 728
729 729 def tiprev(self):
730 730 return len(self.index) - 1
731 731
732 732 def tip(self):
733 733 return self.node(self.tiprev())
734 734
735 735 def __contains__(self, rev):
736 736 return 0 <= rev < len(self)
737 737
738 738 def __len__(self):
739 739 return len(self.index)
740 740
741 741 def __iter__(self):
742 742 return iter(pycompat.xrange(len(self)))
743 743
744 744 def revs(self, start=0, stop=None):
745 745 """iterate over all rev in this revlog (from start to stop)"""
746 746 return storageutil.iterrevs(len(self), start=start, stop=stop)
747 747
748 748 @property
749 749 def nodemap(self):
750 750 msg = (
751 751 b"revlog.nodemap is deprecated, "
752 752 b"use revlog.index.[has_node|rev|get_rev]"
753 753 )
754 754 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
755 755 return self.index.nodemap
756 756
757 757 @property
758 758 def _nodecache(self):
759 759 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
760 760 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
761 761 return self.index.nodemap
762 762
763 763 def hasnode(self, node):
764 764 try:
765 765 self.rev(node)
766 766 return True
767 767 except KeyError:
768 768 return False
769 769
770 770 def candelta(self, baserev, rev):
771 771 """whether two revisions (baserev, rev) can be delta-ed or not"""
772 772 # Disable delta if either rev requires a content-changing flag
773 773 # processor (ex. LFS). This is because such flag processor can alter
774 774 # the rawtext content that the delta will be based on, and two clients
775 775 # could have a same revlog node with different flags (i.e. different
776 776 # rawtext contents) and the delta could be incompatible.
777 777 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
778 778 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
779 779 ):
780 780 return False
781 781 return True
782 782
783 783 def update_caches(self, transaction):
784 784 if self.nodemap_file is not None:
785 785 if transaction is None:
786 786 nodemaputil.update_persistent_nodemap(self)
787 787 else:
788 788 nodemaputil.setup_persistent_nodemap(transaction, self)
789 789
790 790 def clearcaches(self):
791 791 self._revisioncache = None
792 792 self._chainbasecache.clear()
793 793 self._chunkcache = (0, b'')
794 794 self._pcache = {}
795 795 self._nodemap_docket = None
796 796 self.index.clearcaches()
797 797 # The python code is the one responsible for validating the docket, we
798 798 # end up having to refresh it here.
799 799 use_nodemap = (
800 800 not self._inline
801 801 and self.nodemap_file is not None
802 802 and util.safehasattr(self.index, 'update_nodemap_data')
803 803 )
804 804 if use_nodemap:
805 805 nodemap_data = nodemaputil.persisted_data(self)
806 806 if nodemap_data is not None:
807 807 self._nodemap_docket = nodemap_data[0]
808 808 self.index.update_nodemap_data(*nodemap_data)
809 809
810 810 def rev(self, node):
811 811 try:
812 812 return self.index.rev(node)
813 813 except TypeError:
814 814 raise
815 815 except error.RevlogError:
816 816 # parsers.c radix tree lookup failed
817 817 if node == wdirid or node in wdirfilenodeids:
818 818 raise error.WdirUnsupported
819 819 raise error.LookupError(node, self.indexfile, _(b'no node'))
820 820
821 821 # Accessors for index entries.
822 822
823 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 824 # are flags.
825 825 def start(self, rev):
826 826 return int(self.index[rev][0] >> 16)
827 827
828 828 def flags(self, rev):
829 829 return self.index[rev][0] & 0xFFFF
830 830
831 831 def length(self, rev):
832 832 return self.index[rev][1]
833 833
834 834 def rawsize(self, rev):
835 835 """return the length of the uncompressed text for a given revision"""
836 836 l = self.index[rev][2]
837 837 if l >= 0:
838 838 return l
839 839
840 840 t = self.rawdata(rev)
841 841 return len(t)
842 842
843 843 def size(self, rev):
844 844 """length of non-raw text (processed by a "read" flag processor)"""
845 845 # fast path: if no "read" flag processor could change the content,
846 846 # size is rawsize. note: ELLIPSIS is known to not change the content.
847 847 flags = self.flags(rev)
848 848 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
849 849 return self.rawsize(rev)
850 850
851 851 return len(self.revision(rev, raw=False))
852 852
853 853 def chainbase(self, rev):
854 854 base = self._chainbasecache.get(rev)
855 855 if base is not None:
856 856 return base
857 857
858 858 index = self.index
859 859 iterrev = rev
860 860 base = index[iterrev][3]
861 861 while base != iterrev:
862 862 iterrev = base
863 863 base = index[iterrev][3]
864 864
865 865 self._chainbasecache[rev] = base
866 866 return base
867 867
868 868 def linkrev(self, rev):
869 869 return self.index[rev][4]
870 870
871 871 def parentrevs(self, rev):
872 872 try:
873 873 entry = self.index[rev]
874 874 except IndexError:
875 875 if rev == wdirrev:
876 876 raise error.WdirUnsupported
877 877 raise
878 878
879 879 return entry[5], entry[6]
880 880
881 881 # fast parentrevs(rev) where rev isn't filtered
882 882 _uncheckedparentrevs = parentrevs
883 883
884 884 def node(self, rev):
885 885 try:
886 886 return self.index[rev][7]
887 887 except IndexError:
888 888 if rev == wdirrev:
889 889 raise error.WdirUnsupported
890 890 raise
891 891
892 892 # Derived from index values.
893 893
894 894 def end(self, rev):
895 895 return self.start(rev) + self.length(rev)
896 896
897 897 def parents(self, node):
898 898 i = self.index
899 899 d = i[self.rev(node)]
900 900 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
901 901
902 902 def chainlen(self, rev):
903 903 return self._chaininfo(rev)[0]
904 904
905 905 def _chaininfo(self, rev):
906 906 chaininfocache = self._chaininfocache
907 907 if rev in chaininfocache:
908 908 return chaininfocache[rev]
909 909 index = self.index
910 910 generaldelta = self._generaldelta
911 911 iterrev = rev
912 912 e = index[iterrev]
913 913 clen = 0
914 914 compresseddeltalen = 0
915 915 while iterrev != e[3]:
916 916 clen += 1
917 917 compresseddeltalen += e[1]
918 918 if generaldelta:
919 919 iterrev = e[3]
920 920 else:
921 921 iterrev -= 1
922 922 if iterrev in chaininfocache:
923 923 t = chaininfocache[iterrev]
924 924 clen += t[0]
925 925 compresseddeltalen += t[1]
926 926 break
927 927 e = index[iterrev]
928 928 else:
929 929 # Add text length of base since decompressing that also takes
930 930 # work. For cache hits the length is already included.
931 931 compresseddeltalen += e[1]
932 932 r = (clen, compresseddeltalen)
933 933 chaininfocache[rev] = r
934 934 return r
935 935
936 936 def _deltachain(self, rev, stoprev=None):
937 937 """Obtain the delta chain for a revision.
938 938
939 939 ``stoprev`` specifies a revision to stop at. If not specified, we
940 940 stop at the base of the chain.
941 941
942 942 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
943 943 revs in ascending order and ``stopped`` is a bool indicating whether
944 944 ``stoprev`` was hit.
945 945 """
946 946 # Try C implementation.
947 947 try:
948 948 return self.index.deltachain(rev, stoprev, self._generaldelta)
949 949 except AttributeError:
950 950 pass
951 951
952 952 chain = []
953 953
954 954 # Alias to prevent attribute lookup in tight loop.
955 955 index = self.index
956 956 generaldelta = self._generaldelta
957 957
958 958 iterrev = rev
959 959 e = index[iterrev]
960 960 while iterrev != e[3] and iterrev != stoprev:
961 961 chain.append(iterrev)
962 962 if generaldelta:
963 963 iterrev = e[3]
964 964 else:
965 965 iterrev -= 1
966 966 e = index[iterrev]
967 967
968 968 if iterrev == stoprev:
969 969 stopped = True
970 970 else:
971 971 chain.append(iterrev)
972 972 stopped = False
973 973
974 974 chain.reverse()
975 975 return chain, stopped
976 976
977 977 def ancestors(self, revs, stoprev=0, inclusive=False):
978 978 """Generate the ancestors of 'revs' in reverse revision order.
979 979 Does not generate revs lower than stoprev.
980 980
981 981 See the documentation for ancestor.lazyancestors for more details."""
982 982
983 983 # first, make sure start revisions aren't filtered
984 984 revs = list(revs)
985 985 checkrev = self.node
986 986 for r in revs:
987 987 checkrev(r)
988 988 # and we're sure ancestors aren't filtered as well
989 989
990 990 if rustancestor is not None:
991 991 lazyancestors = rustancestor.LazyAncestors
992 992 arg = self.index
993 993 else:
994 994 lazyancestors = ancestor.lazyancestors
995 995 arg = self._uncheckedparentrevs
996 996 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
997 997
998 998 def descendants(self, revs):
999 999 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1000 1000
1001 1001 def findcommonmissing(self, common=None, heads=None):
1002 1002 """Return a tuple of the ancestors of common and the ancestors of heads
1003 1003 that are not ancestors of common. In revset terminology, we return the
1004 1004 tuple:
1005 1005
1006 1006 ::common, (::heads) - (::common)
1007 1007
1008 1008 The list is sorted by revision number, meaning it is
1009 1009 topologically sorted.
1010 1010
1011 1011 'heads' and 'common' are both lists of node IDs. If heads is
1012 1012 not supplied, uses all of the revlog's heads. If common is not
1013 1013 supplied, uses nullid."""
1014 1014 if common is None:
1015 1015 common = [nullid]
1016 1016 if heads is None:
1017 1017 heads = self.heads()
1018 1018
1019 1019 common = [self.rev(n) for n in common]
1020 1020 heads = [self.rev(n) for n in heads]
1021 1021
1022 1022 # we want the ancestors, but inclusive
1023 1023 class lazyset(object):
1024 1024 def __init__(self, lazyvalues):
1025 1025 self.addedvalues = set()
1026 1026 self.lazyvalues = lazyvalues
1027 1027
1028 1028 def __contains__(self, value):
1029 1029 return value in self.addedvalues or value in self.lazyvalues
1030 1030
1031 1031 def __iter__(self):
1032 1032 added = self.addedvalues
1033 1033 for r in added:
1034 1034 yield r
1035 1035 for r in self.lazyvalues:
1036 1036 if not r in added:
1037 1037 yield r
1038 1038
1039 1039 def add(self, value):
1040 1040 self.addedvalues.add(value)
1041 1041
1042 1042 def update(self, values):
1043 1043 self.addedvalues.update(values)
1044 1044
1045 1045 has = lazyset(self.ancestors(common))
1046 1046 has.add(nullrev)
1047 1047 has.update(common)
1048 1048
1049 1049 # take all ancestors from heads that aren't in has
1050 1050 missing = set()
1051 1051 visit = collections.deque(r for r in heads if r not in has)
1052 1052 while visit:
1053 1053 r = visit.popleft()
1054 1054 if r in missing:
1055 1055 continue
1056 1056 else:
1057 1057 missing.add(r)
1058 1058 for p in self.parentrevs(r):
1059 1059 if p not in has:
1060 1060 visit.append(p)
1061 1061 missing = list(missing)
1062 1062 missing.sort()
1063 1063 return has, [self.node(miss) for miss in missing]
1064 1064
1065 1065 def incrementalmissingrevs(self, common=None):
1066 1066 """Return an object that can be used to incrementally compute the
1067 1067 revision numbers of the ancestors of arbitrary sets that are not
1068 1068 ancestors of common. This is an ancestor.incrementalmissingancestors
1069 1069 object.
1070 1070
1071 1071 'common' is a list of revision numbers. If common is not supplied, uses
1072 1072 nullrev.
1073 1073 """
1074 1074 if common is None:
1075 1075 common = [nullrev]
1076 1076
1077 1077 if rustancestor is not None:
1078 1078 return rustancestor.MissingAncestors(self.index, common)
1079 1079 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1080 1080
1081 1081 def findmissingrevs(self, common=None, heads=None):
1082 1082 """Return the revision numbers of the ancestors of heads that
1083 1083 are not ancestors of common.
1084 1084
1085 1085 More specifically, return a list of revision numbers corresponding to
1086 1086 nodes N such that every N satisfies the following constraints:
1087 1087
1088 1088 1. N is an ancestor of some node in 'heads'
1089 1089 2. N is not an ancestor of any node in 'common'
1090 1090
1091 1091 The list is sorted by revision number, meaning it is
1092 1092 topologically sorted.
1093 1093
1094 1094 'heads' and 'common' are both lists of revision numbers. If heads is
1095 1095 not supplied, uses all of the revlog's heads. If common is not
1096 1096 supplied, uses nullid."""
1097 1097 if common is None:
1098 1098 common = [nullrev]
1099 1099 if heads is None:
1100 1100 heads = self.headrevs()
1101 1101
1102 1102 inc = self.incrementalmissingrevs(common=common)
1103 1103 return inc.missingancestors(heads)
1104 1104
1105 1105 def findmissing(self, common=None, heads=None):
1106 1106 """Return the ancestors of heads that are not ancestors of common.
1107 1107
1108 1108 More specifically, return a list of nodes N such that every N
1109 1109 satisfies the following constraints:
1110 1110
1111 1111 1. N is an ancestor of some node in 'heads'
1112 1112 2. N is not an ancestor of any node in 'common'
1113 1113
1114 1114 The list is sorted by revision number, meaning it is
1115 1115 topologically sorted.
1116 1116
1117 1117 'heads' and 'common' are both lists of node IDs. If heads is
1118 1118 not supplied, uses all of the revlog's heads. If common is not
1119 1119 supplied, uses nullid."""
1120 1120 if common is None:
1121 1121 common = [nullid]
1122 1122 if heads is None:
1123 1123 heads = self.heads()
1124 1124
1125 1125 common = [self.rev(n) for n in common]
1126 1126 heads = [self.rev(n) for n in heads]
1127 1127
1128 1128 inc = self.incrementalmissingrevs(common=common)
1129 1129 return [self.node(r) for r in inc.missingancestors(heads)]
1130 1130
1131 1131 def nodesbetween(self, roots=None, heads=None):
1132 1132 """Return a topological path from 'roots' to 'heads'.
1133 1133
1134 1134 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1135 1135 topologically sorted list of all nodes N that satisfy both of
1136 1136 these constraints:
1137 1137
1138 1138 1. N is a descendant of some node in 'roots'
1139 1139 2. N is an ancestor of some node in 'heads'
1140 1140
1141 1141 Every node is considered to be both a descendant and an ancestor
1142 1142 of itself, so every reachable node in 'roots' and 'heads' will be
1143 1143 included in 'nodes'.
1144 1144
1145 1145 'outroots' is the list of reachable nodes in 'roots', i.e., the
1146 1146 subset of 'roots' that is returned in 'nodes'. Likewise,
1147 1147 'outheads' is the subset of 'heads' that is also in 'nodes'.
1148 1148
1149 1149 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1150 1150 unspecified, uses nullid as the only root. If 'heads' is
1151 1151 unspecified, uses list of all of the revlog's heads."""
1152 1152 nonodes = ([], [], [])
1153 1153 if roots is not None:
1154 1154 roots = list(roots)
1155 1155 if not roots:
1156 1156 return nonodes
1157 1157 lowestrev = min([self.rev(n) for n in roots])
1158 1158 else:
1159 1159 roots = [nullid] # Everybody's a descendant of nullid
1160 1160 lowestrev = nullrev
1161 1161 if (lowestrev == nullrev) and (heads is None):
1162 1162 # We want _all_ the nodes!
1163 1163 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1164 1164 if heads is None:
1165 1165 # All nodes are ancestors, so the latest ancestor is the last
1166 1166 # node.
1167 1167 highestrev = len(self) - 1
1168 1168 # Set ancestors to None to signal that every node is an ancestor.
1169 1169 ancestors = None
1170 1170 # Set heads to an empty dictionary for later discovery of heads
1171 1171 heads = {}
1172 1172 else:
1173 1173 heads = list(heads)
1174 1174 if not heads:
1175 1175 return nonodes
1176 1176 ancestors = set()
1177 1177 # Turn heads into a dictionary so we can remove 'fake' heads.
1178 1178 # Also, later we will be using it to filter out the heads we can't
1179 1179 # find from roots.
1180 1180 heads = dict.fromkeys(heads, False)
1181 1181 # Start at the top and keep marking parents until we're done.
1182 1182 nodestotag = set(heads)
1183 1183 # Remember where the top was so we can use it as a limit later.
1184 1184 highestrev = max([self.rev(n) for n in nodestotag])
1185 1185 while nodestotag:
1186 1186 # grab a node to tag
1187 1187 n = nodestotag.pop()
1188 1188 # Never tag nullid
1189 1189 if n == nullid:
1190 1190 continue
1191 1191 # A node's revision number represents its place in a
1192 1192 # topologically sorted list of nodes.
1193 1193 r = self.rev(n)
1194 1194 if r >= lowestrev:
1195 1195 if n not in ancestors:
1196 1196 # If we are possibly a descendant of one of the roots
1197 1197 # and we haven't already been marked as an ancestor
1198 1198 ancestors.add(n) # Mark as ancestor
1199 1199 # Add non-nullid parents to list of nodes to tag.
1200 1200 nodestotag.update(
1201 1201 [p for p in self.parents(n) if p != nullid]
1202 1202 )
1203 1203 elif n in heads: # We've seen it before, is it a fake head?
1204 1204 # So it is, real heads should not be the ancestors of
1205 1205 # any other heads.
1206 1206 heads.pop(n)
1207 1207 if not ancestors:
1208 1208 return nonodes
1209 1209 # Now that we have our set of ancestors, we want to remove any
1210 1210 # roots that are not ancestors.
1211 1211
1212 1212 # If one of the roots was nullid, everything is included anyway.
1213 1213 if lowestrev > nullrev:
1214 1214 # But, since we weren't, let's recompute the lowest rev to not
1215 1215 # include roots that aren't ancestors.
1216 1216
1217 1217 # Filter out roots that aren't ancestors of heads
1218 1218 roots = [root for root in roots if root in ancestors]
1219 1219 # Recompute the lowest revision
1220 1220 if roots:
1221 1221 lowestrev = min([self.rev(root) for root in roots])
1222 1222 else:
1223 1223 # No more roots? Return empty list
1224 1224 return nonodes
1225 1225 else:
1226 1226 # We are descending from nullid, and don't need to care about
1227 1227 # any other roots.
1228 1228 lowestrev = nullrev
1229 1229 roots = [nullid]
1230 1230 # Transform our roots list into a set.
1231 1231 descendants = set(roots)
1232 1232 # Also, keep the original roots so we can filter out roots that aren't
1233 1233 # 'real' roots (i.e. are descended from other roots).
1234 1234 roots = descendants.copy()
1235 1235 # Our topologically sorted list of output nodes.
1236 1236 orderedout = []
1237 1237 # Don't start at nullid since we don't want nullid in our output list,
1238 1238 # and if nullid shows up in descendants, empty parents will look like
1239 1239 # they're descendants.
1240 1240 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1241 1241 n = self.node(r)
1242 1242 isdescendant = False
1243 1243 if lowestrev == nullrev: # Everybody is a descendant of nullid
1244 1244 isdescendant = True
1245 1245 elif n in descendants:
1246 1246 # n is already a descendant
1247 1247 isdescendant = True
1248 1248 # This check only needs to be done here because all the roots
1249 1249 # will start being marked is descendants before the loop.
1250 1250 if n in roots:
1251 1251 # If n was a root, check if it's a 'real' root.
1252 1252 p = tuple(self.parents(n))
1253 1253 # If any of its parents are descendants, it's not a root.
1254 1254 if (p[0] in descendants) or (p[1] in descendants):
1255 1255 roots.remove(n)
1256 1256 else:
1257 1257 p = tuple(self.parents(n))
1258 1258 # A node is a descendant if either of its parents are
1259 1259 # descendants. (We seeded the dependents list with the roots
1260 1260 # up there, remember?)
1261 1261 if (p[0] in descendants) or (p[1] in descendants):
1262 1262 descendants.add(n)
1263 1263 isdescendant = True
1264 1264 if isdescendant and ((ancestors is None) or (n in ancestors)):
1265 1265 # Only include nodes that are both descendants and ancestors.
1266 1266 orderedout.append(n)
1267 1267 if (ancestors is not None) and (n in heads):
1268 1268 # We're trying to figure out which heads are reachable
1269 1269 # from roots.
1270 1270 # Mark this head as having been reached
1271 1271 heads[n] = True
1272 1272 elif ancestors is None:
1273 1273 # Otherwise, we're trying to discover the heads.
1274 1274 # Assume this is a head because if it isn't, the next step
1275 1275 # will eventually remove it.
1276 1276 heads[n] = True
1277 1277 # But, obviously its parents aren't.
1278 1278 for p in self.parents(n):
1279 1279 heads.pop(p, None)
1280 1280 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1281 1281 roots = list(roots)
1282 1282 assert orderedout
1283 1283 assert roots
1284 1284 assert heads
1285 1285 return (orderedout, roots, heads)
1286 1286
1287 1287 def headrevs(self, revs=None):
1288 1288 if revs is None:
1289 1289 try:
1290 1290 return self.index.headrevs()
1291 1291 except AttributeError:
1292 1292 return self._headrevs()
1293 1293 if rustdagop is not None:
1294 1294 return rustdagop.headrevs(self.index, revs)
1295 1295 return dagop.headrevs(revs, self._uncheckedparentrevs)
1296 1296
1297 1297 def computephases(self, roots):
1298 1298 return self.index.computephasesmapsets(roots)
1299 1299
1300 1300 def _headrevs(self):
1301 1301 count = len(self)
1302 1302 if not count:
1303 1303 return [nullrev]
1304 1304 # we won't iter over filtered rev so nobody is a head at start
1305 1305 ishead = [0] * (count + 1)
1306 1306 index = self.index
1307 1307 for r in self:
1308 1308 ishead[r] = 1 # I may be an head
1309 1309 e = index[r]
1310 1310 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1311 1311 return [r for r, val in enumerate(ishead) if val]
1312 1312
1313 1313 def heads(self, start=None, stop=None):
1314 1314 """return the list of all nodes that have no children
1315 1315
1316 1316 if start is specified, only heads that are descendants of
1317 1317 start will be returned
1318 1318 if stop is specified, it will consider all the revs from stop
1319 1319 as if they had no children
1320 1320 """
1321 1321 if start is None and stop is None:
1322 1322 if not len(self):
1323 1323 return [nullid]
1324 1324 return [self.node(r) for r in self.headrevs()]
1325 1325
1326 1326 if start is None:
1327 1327 start = nullrev
1328 1328 else:
1329 1329 start = self.rev(start)
1330 1330
1331 1331 stoprevs = {self.rev(n) for n in stop or []}
1332 1332
1333 1333 revs = dagop.headrevssubset(
1334 1334 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1335 1335 )
1336 1336
1337 1337 return [self.node(rev) for rev in revs]
1338 1338
1339 1339 def children(self, node):
1340 1340 """find the children of a given node"""
1341 1341 c = []
1342 1342 p = self.rev(node)
1343 1343 for r in self.revs(start=p + 1):
1344 1344 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1345 1345 if prevs:
1346 1346 for pr in prevs:
1347 1347 if pr == p:
1348 1348 c.append(self.node(r))
1349 1349 elif p == nullrev:
1350 1350 c.append(self.node(r))
1351 1351 return c
1352 1352
1353 1353 def commonancestorsheads(self, a, b):
1354 1354 """calculate all the heads of the common ancestors of nodes a and b"""
1355 1355 a, b = self.rev(a), self.rev(b)
1356 1356 ancs = self._commonancestorsheads(a, b)
1357 1357 return pycompat.maplist(self.node, ancs)
1358 1358
1359 1359 def _commonancestorsheads(self, *revs):
1360 1360 """calculate all the heads of the common ancestors of revs"""
1361 1361 try:
1362 1362 ancs = self.index.commonancestorsheads(*revs)
1363 1363 except (AttributeError, OverflowError): # C implementation failed
1364 1364 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1365 1365 return ancs
1366 1366
1367 1367 def isancestor(self, a, b):
1368 1368 """return True if node a is an ancestor of node b
1369 1369
1370 1370 A revision is considered an ancestor of itself."""
1371 1371 a, b = self.rev(a), self.rev(b)
1372 1372 return self.isancestorrev(a, b)
1373 1373
1374 1374 def isancestorrev(self, a, b):
1375 1375 """return True if revision a is an ancestor of revision b
1376 1376
1377 1377 A revision is considered an ancestor of itself.
1378 1378
1379 1379 The implementation of this is trivial but the use of
1380 1380 reachableroots is not."""
1381 1381 if a == nullrev:
1382 1382 return True
1383 1383 elif a == b:
1384 1384 return True
1385 1385 elif a > b:
1386 1386 return False
1387 1387 return bool(self.reachableroots(a, [b], [a], includepath=False))
1388 1388
1389 1389 def reachableroots(self, minroot, heads, roots, includepath=False):
1390 1390 """return (heads(::(<roots> and <roots>::<heads>)))
1391 1391
1392 1392 If includepath is True, return (<roots>::<heads>)."""
1393 1393 try:
1394 1394 return self.index.reachableroots2(
1395 1395 minroot, heads, roots, includepath
1396 1396 )
1397 1397 except AttributeError:
1398 1398 return dagop._reachablerootspure(
1399 1399 self.parentrevs, minroot, roots, heads, includepath
1400 1400 )
1401 1401
1402 1402 def ancestor(self, a, b):
1403 1403 """calculate the "best" common ancestor of nodes a and b"""
1404 1404
1405 1405 a, b = self.rev(a), self.rev(b)
1406 1406 try:
1407 1407 ancs = self.index.ancestors(a, b)
1408 1408 except (AttributeError, OverflowError):
1409 1409 ancs = ancestor.ancestors(self.parentrevs, a, b)
1410 1410 if ancs:
1411 1411 # choose a consistent winner when there's a tie
1412 1412 return min(map(self.node, ancs))
1413 1413 return nullid
1414 1414
1415 1415 def _match(self, id):
1416 1416 if isinstance(id, int):
1417 1417 # rev
1418 1418 return self.node(id)
1419 1419 if len(id) == 20:
1420 1420 # possibly a binary node
1421 1421 # odds of a binary node being all hex in ASCII are 1 in 10**25
1422 1422 try:
1423 1423 node = id
1424 1424 self.rev(node) # quick search the index
1425 1425 return node
1426 1426 except error.LookupError:
1427 1427 pass # may be partial hex id
1428 1428 try:
1429 1429 # str(rev)
1430 1430 rev = int(id)
1431 1431 if b"%d" % rev != id:
1432 1432 raise ValueError
1433 1433 if rev < 0:
1434 1434 rev = len(self) + rev
1435 1435 if rev < 0 or rev >= len(self):
1436 1436 raise ValueError
1437 1437 return self.node(rev)
1438 1438 except (ValueError, OverflowError):
1439 1439 pass
1440 1440 if len(id) == 40:
1441 1441 try:
1442 1442 # a full hex nodeid?
1443 1443 node = bin(id)
1444 1444 self.rev(node)
1445 1445 return node
1446 1446 except (TypeError, error.LookupError):
1447 1447 pass
1448 1448
1449 1449 def _partialmatch(self, id):
1450 1450 # we don't care wdirfilenodeids as they should be always full hash
1451 1451 maybewdir = wdirhex.startswith(id)
1452 1452 try:
1453 1453 partial = self.index.partialmatch(id)
1454 1454 if partial and self.hasnode(partial):
1455 1455 if maybewdir:
1456 1456 # single 'ff...' match in radix tree, ambiguous with wdir
1457 1457 raise error.RevlogError
1458 1458 return partial
1459 1459 if maybewdir:
1460 1460 # no 'ff...' match in radix tree, wdir identified
1461 1461 raise error.WdirUnsupported
1462 1462 return None
1463 1463 except error.RevlogError:
1464 1464 # parsers.c radix tree lookup gave multiple matches
1465 1465 # fast path: for unfiltered changelog, radix tree is accurate
1466 1466 if not getattr(self, 'filteredrevs', None):
1467 1467 raise error.AmbiguousPrefixLookupError(
1468 1468 id, self.indexfile, _(b'ambiguous identifier')
1469 1469 )
1470 1470 # fall through to slow path that filters hidden revisions
1471 1471 except (AttributeError, ValueError):
1472 1472 # we are pure python, or key was too short to search radix tree
1473 1473 pass
1474 1474
1475 1475 if id in self._pcache:
1476 1476 return self._pcache[id]
1477 1477
1478 1478 if len(id) <= 40:
1479 1479 try:
1480 1480 # hex(node)[:...]
1481 1481 l = len(id) // 2 # grab an even number of digits
1482 1482 prefix = bin(id[: l * 2])
1483 1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1484 1484 nl = [
1485 1485 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1486 1486 ]
1487 1487 if nullhex.startswith(id):
1488 1488 nl.append(nullid)
1489 1489 if len(nl) > 0:
1490 1490 if len(nl) == 1 and not maybewdir:
1491 1491 self._pcache[id] = nl[0]
1492 1492 return nl[0]
1493 1493 raise error.AmbiguousPrefixLookupError(
1494 1494 id, self.indexfile, _(b'ambiguous identifier')
1495 1495 )
1496 1496 if maybewdir:
1497 1497 raise error.WdirUnsupported
1498 1498 return None
1499 1499 except TypeError:
1500 1500 pass
1501 1501
1502 1502 def lookup(self, id):
1503 1503 """locate a node based on:
1504 1504 - revision number or str(revision number)
1505 1505 - nodeid or subset of hex nodeid
1506 1506 """
1507 1507 n = self._match(id)
1508 1508 if n is not None:
1509 1509 return n
1510 1510 n = self._partialmatch(id)
1511 1511 if n:
1512 1512 return n
1513 1513
1514 1514 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1515 1515
1516 1516 def shortest(self, node, minlength=1):
1517 1517 """Find the shortest unambiguous prefix that matches node."""
1518 1518
1519 1519 def isvalid(prefix):
1520 1520 try:
1521 1521 matchednode = self._partialmatch(prefix)
1522 1522 except error.AmbiguousPrefixLookupError:
1523 1523 return False
1524 1524 except error.WdirUnsupported:
1525 1525 # single 'ff...' match
1526 1526 return True
1527 1527 if matchednode is None:
1528 1528 raise error.LookupError(node, self.indexfile, _(b'no node'))
1529 1529 return True
1530 1530
1531 1531 def maybewdir(prefix):
1532 1532 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1533 1533
1534 1534 hexnode = hex(node)
1535 1535
1536 1536 def disambiguate(hexnode, minlength):
1537 1537 """Disambiguate against wdirid."""
1538 1538 for length in range(minlength, len(hexnode) + 1):
1539 1539 prefix = hexnode[:length]
1540 1540 if not maybewdir(prefix):
1541 1541 return prefix
1542 1542
1543 1543 if not getattr(self, 'filteredrevs', None):
1544 1544 try:
1545 1545 length = max(self.index.shortest(node), minlength)
1546 1546 return disambiguate(hexnode, length)
1547 1547 except error.RevlogError:
1548 1548 if node != wdirid:
1549 1549 raise error.LookupError(node, self.indexfile, _(b'no node'))
1550 1550 except AttributeError:
1551 1551 # Fall through to pure code
1552 1552 pass
1553 1553
1554 1554 if node == wdirid:
1555 1555 for length in range(minlength, len(hexnode) + 1):
1556 1556 prefix = hexnode[:length]
1557 1557 if isvalid(prefix):
1558 1558 return prefix
1559 1559
1560 1560 for length in range(minlength, len(hexnode) + 1):
1561 1561 prefix = hexnode[:length]
1562 1562 if isvalid(prefix):
1563 1563 return disambiguate(hexnode, length)
1564 1564
1565 1565 def cmp(self, node, text):
1566 1566 """compare text with a given file revision
1567 1567
1568 1568 returns True if text is different than what is stored.
1569 1569 """
1570 1570 p1, p2 = self.parents(node)
1571 1571 return storageutil.hashrevisionsha1(text, p1, p2) != node
1572 1572
1573 1573 def _cachesegment(self, offset, data):
1574 1574 """Add a segment to the revlog cache.
1575 1575
1576 1576 Accepts an absolute offset and the data that is at that location.
1577 1577 """
1578 1578 o, d = self._chunkcache
1579 1579 # try to add to existing cache
1580 1580 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1581 1581 self._chunkcache = o, d + data
1582 1582 else:
1583 1583 self._chunkcache = offset, data
1584 1584
1585 1585 def _readsegment(self, offset, length, df=None):
1586 1586 """Load a segment of raw data from the revlog.
1587 1587
1588 1588 Accepts an absolute offset, length to read, and an optional existing
1589 1589 file handle to read from.
1590 1590
1591 1591 If an existing file handle is passed, it will be seeked and the
1592 1592 original seek position will NOT be restored.
1593 1593
1594 1594 Returns a str or buffer of raw byte data.
1595 1595
1596 1596 Raises if the requested number of bytes could not be read.
1597 1597 """
1598 1598 # Cache data both forward and backward around the requested
1599 1599 # data, in a fixed size window. This helps speed up operations
1600 1600 # involving reading the revlog backwards.
1601 1601 cachesize = self._chunkcachesize
1602 1602 realoffset = offset & ~(cachesize - 1)
1603 1603 reallength = (
1604 1604 (offset + length + cachesize) & ~(cachesize - 1)
1605 1605 ) - realoffset
1606 1606 with self._datareadfp(df) as df:
1607 1607 df.seek(realoffset)
1608 1608 d = df.read(reallength)
1609 1609
1610 1610 self._cachesegment(realoffset, d)
1611 1611 if offset != realoffset or reallength != length:
1612 1612 startoffset = offset - realoffset
1613 1613 if len(d) - startoffset < length:
1614 1614 raise error.RevlogError(
1615 1615 _(
1616 1616 b'partial read of revlog %s; expected %d bytes from '
1617 1617 b'offset %d, got %d'
1618 1618 )
1619 1619 % (
1620 1620 self.indexfile if self._inline else self.datafile,
1621 1621 length,
1622 1622 realoffset,
1623 1623 len(d) - startoffset,
1624 1624 )
1625 1625 )
1626 1626
1627 1627 return util.buffer(d, startoffset, length)
1628 1628
1629 1629 if len(d) < length:
1630 1630 raise error.RevlogError(
1631 1631 _(
1632 1632 b'partial read of revlog %s; expected %d bytes from offset '
1633 1633 b'%d, got %d'
1634 1634 )
1635 1635 % (
1636 1636 self.indexfile if self._inline else self.datafile,
1637 1637 length,
1638 1638 offset,
1639 1639 len(d),
1640 1640 )
1641 1641 )
1642 1642
1643 1643 return d
1644 1644
1645 1645 def _getsegment(self, offset, length, df=None):
1646 1646 """Obtain a segment of raw data from the revlog.
1647 1647
1648 1648 Accepts an absolute offset, length of bytes to obtain, and an
1649 1649 optional file handle to the already-opened revlog. If the file
1650 1650 handle is used, it's original seek position will not be preserved.
1651 1651
1652 1652 Requests for data may be returned from a cache.
1653 1653
1654 1654 Returns a str or a buffer instance of raw byte data.
1655 1655 """
1656 1656 o, d = self._chunkcache
1657 1657 l = len(d)
1658 1658
1659 1659 # is it in the cache?
1660 1660 cachestart = offset - o
1661 1661 cacheend = cachestart + length
1662 1662 if cachestart >= 0 and cacheend <= l:
1663 1663 if cachestart == 0 and cacheend == l:
1664 1664 return d # avoid a copy
1665 1665 return util.buffer(d, cachestart, cacheend - cachestart)
1666 1666
1667 1667 return self._readsegment(offset, length, df=df)
1668 1668
1669 1669 def _getsegmentforrevs(self, startrev, endrev, df=None):
1670 1670 """Obtain a segment of raw data corresponding to a range of revisions.
1671 1671
1672 1672 Accepts the start and end revisions and an optional already-open
1673 1673 file handle to be used for reading. If the file handle is read, its
1674 1674 seek position will not be preserved.
1675 1675
1676 1676 Requests for data may be satisfied by a cache.
1677 1677
1678 1678 Returns a 2-tuple of (offset, data) for the requested range of
1679 1679 revisions. Offset is the integer offset from the beginning of the
1680 1680 revlog and data is a str or buffer of the raw byte data.
1681 1681
1682 1682 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1683 1683 to determine where each revision's data begins and ends.
1684 1684 """
1685 1685 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1686 1686 # (functions are expensive).
1687 1687 index = self.index
1688 1688 istart = index[startrev]
1689 1689 start = int(istart[0] >> 16)
1690 1690 if startrev == endrev:
1691 1691 end = start + istart[1]
1692 1692 else:
1693 1693 iend = index[endrev]
1694 1694 end = int(iend[0] >> 16) + iend[1]
1695 1695
1696 1696 if self._inline:
1697 1697 start += (startrev + 1) * self._io.size
1698 1698 end += (endrev + 1) * self._io.size
1699 1699 length = end - start
1700 1700
1701 1701 return start, self._getsegment(start, length, df=df)
1702 1702
1703 1703 def _chunk(self, rev, df=None):
1704 1704 """Obtain a single decompressed chunk for a revision.
1705 1705
1706 1706 Accepts an integer revision and an optional already-open file handle
1707 1707 to be used for reading. If used, the seek position of the file will not
1708 1708 be preserved.
1709 1709
1710 1710 Returns a str holding uncompressed data for the requested revision.
1711 1711 """
1712 1712 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1713 1713
1714 1714 def _chunks(self, revs, df=None, targetsize=None):
1715 1715 """Obtain decompressed chunks for the specified revisions.
1716 1716
1717 1717 Accepts an iterable of numeric revisions that are assumed to be in
1718 1718 ascending order. Also accepts an optional already-open file handle
1719 1719 to be used for reading. If used, the seek position of the file will
1720 1720 not be preserved.
1721 1721
1722 1722 This function is similar to calling ``self._chunk()`` multiple times,
1723 1723 but is faster.
1724 1724
1725 1725 Returns a list with decompressed data for each requested revision.
1726 1726 """
1727 1727 if not revs:
1728 1728 return []
1729 1729 start = self.start
1730 1730 length = self.length
1731 1731 inline = self._inline
1732 1732 iosize = self._io.size
1733 1733 buffer = util.buffer
1734 1734
1735 1735 l = []
1736 1736 ladd = l.append
1737 1737
1738 1738 if not self._withsparseread:
1739 1739 slicedchunks = (revs,)
1740 1740 else:
1741 1741 slicedchunks = deltautil.slicechunk(
1742 1742 self, revs, targetsize=targetsize
1743 1743 )
1744 1744
1745 1745 for revschunk in slicedchunks:
1746 1746 firstrev = revschunk[0]
1747 1747 # Skip trailing revisions with empty diff
1748 1748 for lastrev in revschunk[::-1]:
1749 1749 if length(lastrev) != 0:
1750 1750 break
1751 1751
1752 1752 try:
1753 1753 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1754 1754 except OverflowError:
1755 1755 # issue4215 - we can't cache a run of chunks greater than
1756 1756 # 2G on Windows
1757 1757 return [self._chunk(rev, df=df) for rev in revschunk]
1758 1758
1759 1759 decomp = self.decompress
1760 1760 for rev in revschunk:
1761 1761 chunkstart = start(rev)
1762 1762 if inline:
1763 1763 chunkstart += (rev + 1) * iosize
1764 1764 chunklength = length(rev)
1765 1765 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1766 1766
1767 1767 return l
1768 1768
1769 1769 def _chunkclear(self):
1770 1770 """Clear the raw chunk cache."""
1771 1771 self._chunkcache = (0, b'')
1772 1772
1773 1773 def deltaparent(self, rev):
1774 1774 """return deltaparent of the given revision"""
1775 1775 base = self.index[rev][3]
1776 1776 if base == rev:
1777 1777 return nullrev
1778 1778 elif self._generaldelta:
1779 1779 return base
1780 1780 else:
1781 1781 return rev - 1
1782 1782
1783 1783 def issnapshot(self, rev):
1784 1784 """tells whether rev is a snapshot"""
1785 1785 if not self._sparserevlog:
1786 1786 return self.deltaparent(rev) == nullrev
1787 1787 elif util.safehasattr(self.index, b'issnapshot'):
1788 1788 # directly assign the method to cache the testing and access
1789 1789 self.issnapshot = self.index.issnapshot
1790 1790 return self.issnapshot(rev)
1791 1791 if rev == nullrev:
1792 1792 return True
1793 1793 entry = self.index[rev]
1794 1794 base = entry[3]
1795 1795 if base == rev:
1796 1796 return True
1797 1797 if base == nullrev:
1798 1798 return True
1799 1799 p1 = entry[5]
1800 1800 p2 = entry[6]
1801 1801 if base == p1 or base == p2:
1802 1802 return False
1803 1803 return self.issnapshot(base)
1804 1804
1805 1805 def snapshotdepth(self, rev):
1806 1806 """number of snapshot in the chain before this one"""
1807 1807 if not self.issnapshot(rev):
1808 1808 raise error.ProgrammingError(b'revision %d not a snapshot')
1809 1809 return len(self._deltachain(rev)[0]) - 1
1810 1810
1811 1811 def revdiff(self, rev1, rev2):
1812 1812 """return or calculate a delta between two revisions
1813 1813
1814 1814 The delta calculated is in binary form and is intended to be written to
1815 1815 revlog data directly. So this function needs raw revision data.
1816 1816 """
1817 1817 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1818 1818 return bytes(self._chunk(rev2))
1819 1819
1820 1820 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1821 1821
1822 1822 def _processflags(self, text, flags, operation, raw=False):
1823 1823 """deprecated entry point to access flag processors"""
1824 1824 msg = b'_processflag(...) use the specialized variant'
1825 1825 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1826 1826 if raw:
1827 1827 return text, flagutil.processflagsraw(self, text, flags)
1828 1828 elif operation == b'read':
1829 1829 return flagutil.processflagsread(self, text, flags)
1830 1830 else: # write operation
1831 1831 return flagutil.processflagswrite(self, text, flags, None)
1832 1832
1833 1833 def revision(self, nodeorrev, _df=None, raw=False):
1834 1834 """return an uncompressed revision of a given node or revision
1835 1835 number.
1836 1836
1837 1837 _df - an existing file handle to read from. (internal-only)
1838 1838 raw - an optional argument specifying if the revision data is to be
1839 1839 treated as raw data when applying flag transforms. 'raw' should be set
1840 1840 to True when generating changegroups or in debug commands.
1841 1841 """
1842 1842 if raw:
1843 1843 msg = (
1844 1844 b'revlog.revision(..., raw=True) is deprecated, '
1845 1845 b'use revlog.rawdata(...)'
1846 1846 )
1847 1847 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1848 1848 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1849 1849
1850 1850 def sidedata(self, nodeorrev, _df=None):
1851 1851 """a map of extra data related to the changeset but not part of the hash
1852 1852
1853 1853 This function currently return a dictionary. However, more advanced
1854 1854 mapping object will likely be used in the future for a more
1855 1855 efficient/lazy code.
1856 1856 """
1857 1857 return self._revisiondata(nodeorrev, _df)[1]
1858 1858
1859 1859 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1860 1860 # deal with <nodeorrev> argument type
1861 1861 if isinstance(nodeorrev, int):
1862 1862 rev = nodeorrev
1863 1863 node = self.node(rev)
1864 1864 else:
1865 1865 node = nodeorrev
1866 1866 rev = None
1867 1867
1868 1868 # fast path the special `nullid` rev
1869 1869 if node == nullid:
1870 1870 return b"", {}
1871 1871
1872 1872 # ``rawtext`` is the text as stored inside the revlog. Might be the
1873 1873 # revision or might need to be processed to retrieve the revision.
1874 1874 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1875 1875
1876 1876 if raw and validated:
1877 1877 # if we don't want to process the raw text and that raw
1878 1878 # text is cached, we can exit early.
1879 1879 return rawtext, {}
1880 1880 if rev is None:
1881 1881 rev = self.rev(node)
1882 1882 # the revlog's flag for this revision
1883 1883 # (usually alter its state or content)
1884 1884 flags = self.flags(rev)
1885 1885
1886 1886 if validated and flags == REVIDX_DEFAULT_FLAGS:
1887 1887 # no extra flags set, no flag processor runs, text = rawtext
1888 1888 return rawtext, {}
1889 1889
1890 1890 sidedata = {}
1891 1891 if raw:
1892 1892 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1893 1893 text = rawtext
1894 1894 else:
1895 1895 try:
1896 1896 r = flagutil.processflagsread(self, rawtext, flags)
1897 1897 except error.SidedataHashError as exc:
1898 1898 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1899 1899 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1900 1900 raise error.RevlogError(msg)
1901 1901 text, validatehash, sidedata = r
1902 1902 if validatehash:
1903 1903 self.checkhash(text, node, rev=rev)
1904 1904 if not validated:
1905 1905 self._revisioncache = (node, rev, rawtext)
1906 1906
1907 1907 return text, sidedata
1908 1908
1909 1909 def _rawtext(self, node, rev, _df=None):
1910 1910 """return the possibly unvalidated rawtext for a revision
1911 1911
1912 1912 returns (rev, rawtext, validated)
1913 1913 """
1914 1914
1915 1915 # revision in the cache (could be useful to apply delta)
1916 1916 cachedrev = None
1917 1917 # An intermediate text to apply deltas to
1918 1918 basetext = None
1919 1919
1920 1920 # Check if we have the entry in cache
1921 1921 # The cache entry looks like (node, rev, rawtext)
1922 1922 if self._revisioncache:
1923 1923 if self._revisioncache[0] == node:
1924 1924 return (rev, self._revisioncache[2], True)
1925 1925 cachedrev = self._revisioncache[1]
1926 1926
1927 1927 if rev is None:
1928 1928 rev = self.rev(node)
1929 1929
1930 1930 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1931 1931 if stopped:
1932 1932 basetext = self._revisioncache[2]
1933 1933
1934 1934 # drop cache to save memory, the caller is expected to
1935 1935 # update self._revisioncache after validating the text
1936 1936 self._revisioncache = None
1937 1937
1938 1938 targetsize = None
1939 1939 rawsize = self.index[rev][2]
1940 1940 if 0 <= rawsize:
1941 1941 targetsize = 4 * rawsize
1942 1942
1943 1943 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1944 1944 if basetext is None:
1945 1945 basetext = bytes(bins[0])
1946 1946 bins = bins[1:]
1947 1947
1948 1948 rawtext = mdiff.patches(basetext, bins)
1949 1949 del basetext # let us have a chance to free memory early
1950 1950 return (rev, rawtext, False)
1951 1951
1952 1952 def rawdata(self, nodeorrev, _df=None):
1953 1953 """return an uncompressed raw data of a given node or revision number.
1954 1954
1955 1955 _df - an existing file handle to read from. (internal-only)
1956 1956 """
1957 1957 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1958 1958
1959 1959 def hash(self, text, p1, p2):
1960 1960 """Compute a node hash.
1961 1961
1962 1962 Available as a function so that subclasses can replace the hash
1963 1963 as needed.
1964 1964 """
1965 1965 return storageutil.hashrevisionsha1(text, p1, p2)
1966 1966
1967 1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1968 1968 """Check node hash integrity.
1969 1969
1970 1970 Available as a function so that subclasses can extend hash mismatch
1971 1971 behaviors as needed.
1972 1972 """
1973 1973 try:
1974 1974 if p1 is None and p2 is None:
1975 1975 p1, p2 = self.parents(node)
1976 1976 if node != self.hash(text, p1, p2):
1977 1977 # Clear the revision cache on hash failure. The revision cache
1978 1978 # only stores the raw revision and clearing the cache does have
1979 1979 # the side-effect that we won't have a cache hit when the raw
1980 1980 # revision data is accessed. But this case should be rare and
1981 1981 # it is extra work to teach the cache about the hash
1982 1982 # verification state.
1983 1983 if self._revisioncache and self._revisioncache[0] == node:
1984 1984 self._revisioncache = None
1985 1985
1986 1986 revornode = rev
1987 1987 if revornode is None:
1988 1988 revornode = templatefilters.short(hex(node))
1989 1989 raise error.RevlogError(
1990 1990 _(b"integrity check failed on %s:%s")
1991 1991 % (self.indexfile, pycompat.bytestr(revornode))
1992 1992 )
1993 1993 except error.RevlogError:
1994 1994 if self._censorable and storageutil.iscensoredtext(text):
1995 1995 raise error.CensoredNodeError(self.indexfile, node, text)
1996 1996 raise
1997 1997
1998 1998 def _enforceinlinesize(self, tr, fp=None):
1999 1999 """Check if the revlog is too big for inline and convert if so.
2000 2000
2001 2001 This should be called after revisions are added to the revlog. If the
2002 2002 revlog has grown too large to be an inline revlog, it will convert it
2003 2003 to use multiple index and data files.
2004 2004 """
2005 2005 tiprev = len(self) - 1
2006 2006 if (
2007 2007 not self._inline
2008 2008 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2009 2009 ):
2010 2010 return
2011 2011
2012 2012 troffset = tr.findoffset(self.indexfile)
2013 2013 if troffset is None:
2014 2014 raise error.RevlogError(
2015 2015 _(b"%s not found in the transaction") % self.indexfile
2016 2016 )
2017 2017 trindex = 0
2018 2018 tr.add(self.datafile, 0)
2019 2019
2020 2020 if fp:
2021 2021 fp.flush()
2022 2022 fp.close()
2023 2023 # We can't use the cached file handle after close(). So prevent
2024 2024 # its usage.
2025 2025 self._writinghandles = None
2026 2026
2027 2027 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2028 2028 for r in self:
2029 2029 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2030 2030 if troffset <= self.start(r):
2031 2031 trindex = r
2032 2032
2033 2033 with self._indexfp(b'w') as fp:
2034 2034 self.version &= ~FLAG_INLINE_DATA
2035 2035 self._inline = False
2036 2036 io = self._io
2037 2037 for i in self:
2038 2038 e = io.packentry(self.index[i], self.node, self.version, i)
2039 2039 fp.write(e)
2040 2040
2041 2041 # the temp file replace the real index when we exit the context
2042 2042 # manager
2043 2043
2044 2044 tr.replace(self.indexfile, trindex * self._io.size)
2045 2045 nodemaputil.setup_persistent_nodemap(tr, self)
2046 2046 self._chunkclear()
2047 2047
2048 2048 def _nodeduplicatecallback(self, transaction, node):
2049 2049 """called when trying to add a node already stored."""
2050 2050
2051 2051 def addrevision(
2052 2052 self,
2053 2053 text,
2054 2054 transaction,
2055 2055 link,
2056 2056 p1,
2057 2057 p2,
2058 2058 cachedelta=None,
2059 2059 node=None,
2060 2060 flags=REVIDX_DEFAULT_FLAGS,
2061 2061 deltacomputer=None,
2062 2062 sidedata=None,
2063 2063 ):
2064 2064 """add a revision to the log
2065 2065
2066 2066 text - the revision data to add
2067 2067 transaction - the transaction object used for rollback
2068 2068 link - the linkrev data to add
2069 2069 p1, p2 - the parent nodeids of the revision
2070 2070 cachedelta - an optional precomputed delta
2071 2071 node - nodeid of revision; typically node is not specified, and it is
2072 2072 computed by default as hash(text, p1, p2), however subclasses might
2073 2073 use different hashing method (and override checkhash() in such case)
2074 2074 flags - the known flags to set on the revision
2075 2075 deltacomputer - an optional deltacomputer instance shared between
2076 2076 multiple calls
2077 2077 """
2078 2078 if link == nullrev:
2079 2079 raise error.RevlogError(
2080 2080 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2081 2081 )
2082 2082
2083 2083 if sidedata is None:
2084 2084 sidedata = {}
2085 2085 flags = flags & ~REVIDX_SIDEDATA
2086 2086 elif not self.hassidedata:
2087 2087 raise error.ProgrammingError(
2088 2088 _(b"trying to add sidedata to a revlog who don't support them")
2089 2089 )
2090 2090 else:
2091 2091 flags |= REVIDX_SIDEDATA
2092 2092
2093 2093 if flags:
2094 2094 node = node or self.hash(text, p1, p2)
2095 2095
2096 2096 rawtext, validatehash = flagutil.processflagswrite(
2097 2097 self, text, flags, sidedata=sidedata
2098 2098 )
2099 2099
2100 2100 # If the flag processor modifies the revision data, ignore any provided
2101 2101 # cachedelta.
2102 2102 if rawtext != text:
2103 2103 cachedelta = None
2104 2104
2105 2105 if len(rawtext) > _maxentrysize:
2106 2106 raise error.RevlogError(
2107 2107 _(
2108 2108 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2109 2109 )
2110 2110 % (self.indexfile, len(rawtext))
2111 2111 )
2112 2112
2113 2113 node = node or self.hash(rawtext, p1, p2)
2114 2114 if self.index.has_node(node):
2115 2115 return node
2116 2116
2117 2117 if validatehash:
2118 2118 self.checkhash(rawtext, node, p1=p1, p2=p2)
2119 2119
2120 2120 return self.addrawrevision(
2121 2121 rawtext,
2122 2122 transaction,
2123 2123 link,
2124 2124 p1,
2125 2125 p2,
2126 2126 node,
2127 2127 flags,
2128 2128 cachedelta=cachedelta,
2129 2129 deltacomputer=deltacomputer,
2130 2130 )
2131 2131
2132 2132 def addrawrevision(
2133 2133 self,
2134 2134 rawtext,
2135 2135 transaction,
2136 2136 link,
2137 2137 p1,
2138 2138 p2,
2139 2139 node,
2140 2140 flags,
2141 2141 cachedelta=None,
2142 2142 deltacomputer=None,
2143 2143 ):
2144 2144 """add a raw revision with known flags, node and parents
2145 2145 useful when reusing a revision not stored in this revlog (ex: received
2146 2146 over wire, or read from an external bundle).
2147 2147 """
2148 2148 dfh = None
2149 2149 if not self._inline:
2150 2150 dfh = self._datafp(b"a+")
2151 2151 ifh = self._indexfp(b"a+")
2152 2152 try:
2153 2153 return self._addrevision(
2154 2154 node,
2155 2155 rawtext,
2156 2156 transaction,
2157 2157 link,
2158 2158 p1,
2159 2159 p2,
2160 2160 flags,
2161 2161 cachedelta,
2162 2162 ifh,
2163 2163 dfh,
2164 2164 deltacomputer=deltacomputer,
2165 2165 )
2166 2166 finally:
2167 2167 if dfh:
2168 2168 dfh.close()
2169 2169 ifh.close()
2170 2170
2171 2171 def compress(self, data):
2172 2172 """Generate a possibly-compressed representation of data."""
2173 2173 if not data:
2174 2174 return b'', data
2175 2175
2176 2176 compressed = self._compressor.compress(data)
2177 2177
2178 2178 if compressed:
2179 2179 # The revlog compressor added the header in the returned data.
2180 2180 return b'', compressed
2181 2181
2182 2182 if data[0:1] == b'\0':
2183 2183 return b'', data
2184 2184 return b'u', data
2185 2185
2186 2186 def decompress(self, data):
2187 2187 """Decompress a revlog chunk.
2188 2188
2189 2189 The chunk is expected to begin with a header identifying the
2190 2190 format type so it can be routed to an appropriate decompressor.
2191 2191 """
2192 2192 if not data:
2193 2193 return data
2194 2194
2195 2195 # Revlogs are read much more frequently than they are written and many
2196 2196 # chunks only take microseconds to decompress, so performance is
2197 2197 # important here.
2198 2198 #
2199 2199 # We can make a few assumptions about revlogs:
2200 2200 #
2201 2201 # 1) the majority of chunks will be compressed (as opposed to inline
2202 2202 # raw data).
2203 2203 # 2) decompressing *any* data will likely by at least 10x slower than
2204 2204 # returning raw inline data.
2205 2205 # 3) we want to prioritize common and officially supported compression
2206 2206 # engines
2207 2207 #
2208 2208 # It follows that we want to optimize for "decompress compressed data
2209 2209 # when encoded with common and officially supported compression engines"
2210 2210 # case over "raw data" and "data encoded by less common or non-official
2211 2211 # compression engines." That is why we have the inline lookup first
2212 2212 # followed by the compengines lookup.
2213 2213 #
2214 2214 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2215 2215 # compressed chunks. And this matters for changelog and manifest reads.
2216 2216 t = data[0:1]
2217 2217
2218 2218 if t == b'x':
2219 2219 try:
2220 2220 return _zlibdecompress(data)
2221 2221 except zlib.error as e:
2222 2222 raise error.RevlogError(
2223 2223 _(b'revlog decompress error: %s')
2224 2224 % stringutil.forcebytestr(e)
2225 2225 )
2226 2226 # '\0' is more common than 'u' so it goes first.
2227 2227 elif t == b'\0':
2228 2228 return data
2229 2229 elif t == b'u':
2230 2230 return util.buffer(data, 1)
2231 2231
2232 2232 try:
2233 2233 compressor = self._decompressors[t]
2234 2234 except KeyError:
2235 2235 try:
2236 2236 engine = util.compengines.forrevlogheader(t)
2237 2237 compressor = engine.revlogcompressor(self._compengineopts)
2238 2238 self._decompressors[t] = compressor
2239 2239 except KeyError:
2240 2240 raise error.RevlogError(_(b'unknown compression type %r') % t)
2241 2241
2242 2242 return compressor.decompress(data)
2243 2243
2244 2244 def _addrevision(
2245 2245 self,
2246 2246 node,
2247 2247 rawtext,
2248 2248 transaction,
2249 2249 link,
2250 2250 p1,
2251 2251 p2,
2252 2252 flags,
2253 2253 cachedelta,
2254 2254 ifh,
2255 2255 dfh,
2256 2256 alwayscache=False,
2257 2257 deltacomputer=None,
2258 2258 ):
2259 2259 """internal function to add revisions to the log
2260 2260
2261 2261 see addrevision for argument descriptions.
2262 2262
2263 2263 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2264 2264
2265 2265 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2266 2266 be used.
2267 2267
2268 2268 invariants:
2269 2269 - rawtext is optional (can be None); if not set, cachedelta must be set.
2270 2270 if both are set, they must correspond to each other.
2271 2271 """
2272 2272 if node == nullid:
2273 2273 raise error.RevlogError(
2274 2274 _(b"%s: attempt to add null revision") % self.indexfile
2275 2275 )
2276 2276 if node == wdirid or node in wdirfilenodeids:
2277 2277 raise error.RevlogError(
2278 2278 _(b"%s: attempt to add wdir revision") % self.indexfile
2279 2279 )
2280 2280
2281 2281 if self._inline:
2282 2282 fh = ifh
2283 2283 else:
2284 2284 fh = dfh
2285 2285
2286 2286 btext = [rawtext]
2287 2287
2288 2288 curr = len(self)
2289 2289 prev = curr - 1
2290 2290 offset = self.end(prev)
2291 2291 p1r, p2r = self.rev(p1), self.rev(p2)
2292 2292
2293 2293 # full versions are inserted when the needed deltas
2294 2294 # become comparable to the uncompressed text
2295 2295 if rawtext is None:
2296 2296 # need rawtext size, before changed by flag processors, which is
2297 2297 # the non-raw size. use revlog explicitly to avoid filelog's extra
2298 2298 # logic that might remove metadata size.
2299 2299 textlen = mdiff.patchedsize(
2300 2300 revlog.size(self, cachedelta[0]), cachedelta[1]
2301 2301 )
2302 2302 else:
2303 2303 textlen = len(rawtext)
2304 2304
2305 2305 if deltacomputer is None:
2306 2306 deltacomputer = deltautil.deltacomputer(self)
2307 2307
2308 2308 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2309 2309
2310 2310 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2311 2311
2312 2312 e = (
2313 2313 offset_type(offset, flags),
2314 2314 deltainfo.deltalen,
2315 2315 textlen,
2316 2316 deltainfo.base,
2317 2317 link,
2318 2318 p1r,
2319 2319 p2r,
2320 2320 node,
2321 2321 )
2322 2322 self.index.append(e)
2323 2323
2324 2324 entry = self._io.packentry(e, self.node, self.version, curr)
2325 2325 self._writeentry(
2326 2326 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2327 2327 )
2328 2328
2329 2329 rawtext = btext[0]
2330 2330
2331 2331 if alwayscache and rawtext is None:
2332 2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2333 2333
2334 2334 if type(rawtext) == bytes: # only accept immutable objects
2335 2335 self._revisioncache = (node, curr, rawtext)
2336 2336 self._chainbasecache[curr] = deltainfo.chainbase
2337 2337 return node
2338 2338
2339 2339 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2340 2340 # Files opened in a+ mode have inconsistent behavior on various
2341 2341 # platforms. Windows requires that a file positioning call be made
2342 2342 # when the file handle transitions between reads and writes. See
2343 2343 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2344 2344 # platforms, Python or the platform itself can be buggy. Some versions
2345 2345 # of Solaris have been observed to not append at the end of the file
2346 2346 # if the file was seeked to before the end. See issue4943 for more.
2347 2347 #
2348 2348 # We work around this issue by inserting a seek() before writing.
2349 2349 # Note: This is likely not necessary on Python 3. However, because
2350 2350 # the file handle is reused for reads and may be seeked there, we need
2351 2351 # to be careful before changing this.
2352 2352 ifh.seek(0, os.SEEK_END)
2353 2353 if dfh:
2354 2354 dfh.seek(0, os.SEEK_END)
2355 2355
2356 2356 curr = len(self) - 1
2357 2357 if not self._inline:
2358 2358 transaction.add(self.datafile, offset)
2359 2359 transaction.add(self.indexfile, curr * len(entry))
2360 2360 if data[0]:
2361 2361 dfh.write(data[0])
2362 2362 dfh.write(data[1])
2363 2363 ifh.write(entry)
2364 2364 else:
2365 2365 offset += curr * self._io.size
2366 2366 transaction.add(self.indexfile, offset)
2367 2367 ifh.write(entry)
2368 2368 ifh.write(data[0])
2369 2369 ifh.write(data[1])
2370 2370 self._enforceinlinesize(transaction, ifh)
2371 2371 nodemaputil.setup_persistent_nodemap(transaction, self)
2372 2372
2373 2373 def addgroup(
2374 2374 self,
2375 2375 deltas,
2376 2376 linkmapper,
2377 2377 transaction,
2378 alwayscache=False,
2378 2379 addrevisioncb=None,
2379 2380 duplicaterevisioncb=None,
2380 2381 ):
2381 2382 """
2382 2383 add a delta group
2383 2384
2384 2385 given a set of deltas, add them to the revision log. the
2385 2386 first delta is against its parent, which should be in our
2386 2387 log, the rest are against the previous delta.
2387 2388
2388 2389 If ``addrevisioncb`` is defined, it will be called with arguments of
2389 2390 this revlog and the node that was added.
2390 2391 """
2391 2392
2392 2393 if self._writinghandles:
2393 2394 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2394 2395
2395 2396 r = len(self)
2396 2397 end = 0
2397 2398 if r:
2398 2399 end = self.end(r - 1)
2399 2400 ifh = self._indexfp(b"a+")
2400 2401 isize = r * self._io.size
2401 2402 if self._inline:
2402 2403 transaction.add(self.indexfile, end + isize)
2403 2404 dfh = None
2404 2405 else:
2405 2406 transaction.add(self.indexfile, isize)
2406 2407 transaction.add(self.datafile, end)
2407 2408 dfh = self._datafp(b"a+")
2408 2409
2409 2410 def flush():
2410 2411 if dfh:
2411 2412 dfh.flush()
2412 2413 ifh.flush()
2413 2414
2414 2415 self._writinghandles = (ifh, dfh)
2415 2416 empty = True
2416 2417
2417 2418 try:
2418 2419 deltacomputer = deltautil.deltacomputer(self)
2419 2420 # loop through our set of deltas
2420 2421 for data in deltas:
2421 2422 node, p1, p2, linknode, deltabase, delta, flags = data
2422 2423 link = linkmapper(linknode)
2423 2424 flags = flags or REVIDX_DEFAULT_FLAGS
2424 2425
2425 2426 if self.index.has_node(node):
2426 2427 # this can happen if two branches make the same change
2427 2428 self._nodeduplicatecallback(transaction, node)
2428 2429 if duplicaterevisioncb:
2429 2430 duplicaterevisioncb(self, node)
2430 2431 empty = False
2431 2432 continue
2432 2433
2433 2434 for p in (p1, p2):
2434 2435 if not self.index.has_node(p):
2435 2436 raise error.LookupError(
2436 2437 p, self.indexfile, _(b'unknown parent')
2437 2438 )
2438 2439
2439 2440 if not self.index.has_node(deltabase):
2440 2441 raise error.LookupError(
2441 2442 deltabase, self.indexfile, _(b'unknown delta base')
2442 2443 )
2443 2444
2444 2445 baserev = self.rev(deltabase)
2445 2446
2446 2447 if baserev != nullrev and self.iscensored(baserev):
2447 2448 # if base is censored, delta must be full replacement in a
2448 2449 # single patch operation
2449 2450 hlen = struct.calcsize(b">lll")
2450 2451 oldlen = self.rawsize(baserev)
2451 2452 newlen = len(delta) - hlen
2452 2453 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2453 2454 raise error.CensoredBaseError(
2454 2455 self.indexfile, self.node(baserev)
2455 2456 )
2456 2457
2457 2458 if not flags and self._peek_iscensored(baserev, delta, flush):
2458 2459 flags |= REVIDX_ISCENSORED
2459 2460
2460 2461 # We assume consumers of addrevisioncb will want to retrieve
2461 2462 # the added revision, which will require a call to
2462 2463 # revision(). revision() will fast path if there is a cache
2463 2464 # hit. So, we tell _addrevision() to always cache in this case.
2464 2465 # We're only using addgroup() in the context of changegroup
2465 2466 # generation so the revision data can always be handled as raw
2466 2467 # by the flagprocessor.
2467 2468 self._addrevision(
2468 2469 node,
2469 2470 None,
2470 2471 transaction,
2471 2472 link,
2472 2473 p1,
2473 2474 p2,
2474 2475 flags,
2475 2476 (baserev, delta),
2476 2477 ifh,
2477 2478 dfh,
2478 alwayscache=bool(addrevisioncb),
2479 alwayscache=alwayscache,
2479 2480 deltacomputer=deltacomputer,
2480 2481 )
2481 2482
2482 2483 if addrevisioncb:
2483 2484 addrevisioncb(self, node)
2484 2485 empty = False
2485 2486
2486 2487 if not dfh and not self._inline:
2487 2488 # addrevision switched from inline to conventional
2488 2489 # reopen the index
2489 2490 ifh.close()
2490 2491 dfh = self._datafp(b"a+")
2491 2492 ifh = self._indexfp(b"a+")
2492 2493 self._writinghandles = (ifh, dfh)
2493 2494 finally:
2494 2495 self._writinghandles = None
2495 2496
2496 2497 if dfh:
2497 2498 dfh.close()
2498 2499 ifh.close()
2499 2500 return not empty
2500 2501
2501 2502 def iscensored(self, rev):
2502 2503 """Check if a file revision is censored."""
2503 2504 if not self._censorable:
2504 2505 return False
2505 2506
2506 2507 return self.flags(rev) & REVIDX_ISCENSORED
2507 2508
2508 2509 def _peek_iscensored(self, baserev, delta, flush):
2509 2510 """Quickly check if a delta produces a censored revision."""
2510 2511 if not self._censorable:
2511 2512 return False
2512 2513
2513 2514 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2514 2515
2515 2516 def getstrippoint(self, minlink):
2516 2517 """find the minimum rev that must be stripped to strip the linkrev
2517 2518
2518 2519 Returns a tuple containing the minimum rev and a set of all revs that
2519 2520 have linkrevs that will be broken by this strip.
2520 2521 """
2521 2522 return storageutil.resolvestripinfo(
2522 2523 minlink,
2523 2524 len(self) - 1,
2524 2525 self.headrevs(),
2525 2526 self.linkrev,
2526 2527 self.parentrevs,
2527 2528 )
2528 2529
2529 2530 def strip(self, minlink, transaction):
2530 2531 """truncate the revlog on the first revision with a linkrev >= minlink
2531 2532
2532 2533 This function is called when we're stripping revision minlink and
2533 2534 its descendants from the repository.
2534 2535
2535 2536 We have to remove all revisions with linkrev >= minlink, because
2536 2537 the equivalent changelog revisions will be renumbered after the
2537 2538 strip.
2538 2539
2539 2540 So we truncate the revlog on the first of these revisions, and
2540 2541 trust that the caller has saved the revisions that shouldn't be
2541 2542 removed and that it'll re-add them after this truncation.
2542 2543 """
2543 2544 if len(self) == 0:
2544 2545 return
2545 2546
2546 2547 rev, _ = self.getstrippoint(minlink)
2547 2548 if rev == len(self):
2548 2549 return
2549 2550
2550 2551 # first truncate the files on disk
2551 2552 end = self.start(rev)
2552 2553 if not self._inline:
2553 2554 transaction.add(self.datafile, end)
2554 2555 end = rev * self._io.size
2555 2556 else:
2556 2557 end += rev * self._io.size
2557 2558
2558 2559 transaction.add(self.indexfile, end)
2559 2560
2560 2561 # then reset internal state in memory to forget those revisions
2561 2562 self._revisioncache = None
2562 2563 self._chaininfocache = util.lrucachedict(500)
2563 2564 self._chunkclear()
2564 2565
2565 2566 del self.index[rev:-1]
2566 2567
2567 2568 def checksize(self):
2568 2569 """Check size of index and data files
2569 2570
2570 2571 return a (dd, di) tuple.
2571 2572 - dd: extra bytes for the "data" file
2572 2573 - di: extra bytes for the "index" file
2573 2574
2574 2575 A healthy revlog will return (0, 0).
2575 2576 """
2576 2577 expected = 0
2577 2578 if len(self):
2578 2579 expected = max(0, self.end(len(self) - 1))
2579 2580
2580 2581 try:
2581 2582 with self._datafp() as f:
2582 2583 f.seek(0, io.SEEK_END)
2583 2584 actual = f.tell()
2584 2585 dd = actual - expected
2585 2586 except IOError as inst:
2586 2587 if inst.errno != errno.ENOENT:
2587 2588 raise
2588 2589 dd = 0
2589 2590
2590 2591 try:
2591 2592 f = self.opener(self.indexfile)
2592 2593 f.seek(0, io.SEEK_END)
2593 2594 actual = f.tell()
2594 2595 f.close()
2595 2596 s = self._io.size
2596 2597 i = max(0, actual // s)
2597 2598 di = actual - (i * s)
2598 2599 if self._inline:
2599 2600 databytes = 0
2600 2601 for r in self:
2601 2602 databytes += max(0, self.length(r))
2602 2603 dd = 0
2603 2604 di = actual - len(self) * s - databytes
2604 2605 except IOError as inst:
2605 2606 if inst.errno != errno.ENOENT:
2606 2607 raise
2607 2608 di = 0
2608 2609
2609 2610 return (dd, di)
2610 2611
2611 2612 def files(self):
2612 2613 res = [self.indexfile]
2613 2614 if not self._inline:
2614 2615 res.append(self.datafile)
2615 2616 return res
2616 2617
2617 2618 def emitrevisions(
2618 2619 self,
2619 2620 nodes,
2620 2621 nodesorder=None,
2621 2622 revisiondata=False,
2622 2623 assumehaveparentrevisions=False,
2623 2624 deltamode=repository.CG_DELTAMODE_STD,
2624 2625 ):
2625 2626 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2626 2627 raise error.ProgrammingError(
2627 2628 b'unhandled value for nodesorder: %s' % nodesorder
2628 2629 )
2629 2630
2630 2631 if nodesorder is None and not self._generaldelta:
2631 2632 nodesorder = b'storage'
2632 2633
2633 2634 if (
2634 2635 not self._storedeltachains
2635 2636 and deltamode != repository.CG_DELTAMODE_PREV
2636 2637 ):
2637 2638 deltamode = repository.CG_DELTAMODE_FULL
2638 2639
2639 2640 return storageutil.emitrevisions(
2640 2641 self,
2641 2642 nodes,
2642 2643 nodesorder,
2643 2644 revlogrevisiondelta,
2644 2645 deltaparentfn=self.deltaparent,
2645 2646 candeltafn=self.candelta,
2646 2647 rawsizefn=self.rawsize,
2647 2648 revdifffn=self.revdiff,
2648 2649 flagsfn=self.flags,
2649 2650 deltamode=deltamode,
2650 2651 revisiondata=revisiondata,
2651 2652 assumehaveparentrevisions=assumehaveparentrevisions,
2652 2653 )
2653 2654
2654 2655 DELTAREUSEALWAYS = b'always'
2655 2656 DELTAREUSESAMEREVS = b'samerevs'
2656 2657 DELTAREUSENEVER = b'never'
2657 2658
2658 2659 DELTAREUSEFULLADD = b'fulladd'
2659 2660
2660 2661 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2661 2662
2662 2663 def clone(
2663 2664 self,
2664 2665 tr,
2665 2666 destrevlog,
2666 2667 addrevisioncb=None,
2667 2668 deltareuse=DELTAREUSESAMEREVS,
2668 2669 forcedeltabothparents=None,
2669 2670 sidedatacompanion=None,
2670 2671 ):
2671 2672 """Copy this revlog to another, possibly with format changes.
2672 2673
2673 2674 The destination revlog will contain the same revisions and nodes.
2674 2675 However, it may not be bit-for-bit identical due to e.g. delta encoding
2675 2676 differences.
2676 2677
2677 2678 The ``deltareuse`` argument control how deltas from the existing revlog
2678 2679 are preserved in the destination revlog. The argument can have the
2679 2680 following values:
2680 2681
2681 2682 DELTAREUSEALWAYS
2682 2683 Deltas will always be reused (if possible), even if the destination
2683 2684 revlog would not select the same revisions for the delta. This is the
2684 2685 fastest mode of operation.
2685 2686 DELTAREUSESAMEREVS
2686 2687 Deltas will be reused if the destination revlog would pick the same
2687 2688 revisions for the delta. This mode strikes a balance between speed
2688 2689 and optimization.
2689 2690 DELTAREUSENEVER
2690 2691 Deltas will never be reused. This is the slowest mode of execution.
2691 2692 This mode can be used to recompute deltas (e.g. if the diff/delta
2692 2693 algorithm changes).
2693 2694 DELTAREUSEFULLADD
2694 2695 Revision will be re-added as if their were new content. This is
2695 2696 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2696 2697 eg: large file detection and handling.
2697 2698
2698 2699 Delta computation can be slow, so the choice of delta reuse policy can
2699 2700 significantly affect run time.
2700 2701
2701 2702 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2702 2703 two extremes. Deltas will be reused if they are appropriate. But if the
2703 2704 delta could choose a better revision, it will do so. This means if you
2704 2705 are converting a non-generaldelta revlog to a generaldelta revlog,
2705 2706 deltas will be recomputed if the delta's parent isn't a parent of the
2706 2707 revision.
2707 2708
2708 2709 In addition to the delta policy, the ``forcedeltabothparents``
2709 2710 argument controls whether to force compute deltas against both parents
2710 2711 for merges. By default, the current default is used.
2711 2712
2712 2713 If not None, the `sidedatacompanion` is callable that accept two
2713 2714 arguments:
2714 2715
2715 2716 (srcrevlog, rev)
2716 2717
2717 2718 and return a quintet that control changes to sidedata content from the
2718 2719 old revision to the new clone result:
2719 2720
2720 2721 (dropall, filterout, update, new_flags, dropped_flags)
2721 2722
2722 2723 * if `dropall` is True, all sidedata should be dropped
2723 2724 * `filterout` is a set of sidedata keys that should be dropped
2724 2725 * `update` is a mapping of additionnal/new key -> value
2725 2726 * new_flags is a bitfields of new flags that the revision should get
2726 2727 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2727 2728 """
2728 2729 if deltareuse not in self.DELTAREUSEALL:
2729 2730 raise ValueError(
2730 2731 _(b'value for deltareuse invalid: %s') % deltareuse
2731 2732 )
2732 2733
2733 2734 if len(destrevlog):
2734 2735 raise ValueError(_(b'destination revlog is not empty'))
2735 2736
2736 2737 if getattr(self, 'filteredrevs', None):
2737 2738 raise ValueError(_(b'source revlog has filtered revisions'))
2738 2739 if getattr(destrevlog, 'filteredrevs', None):
2739 2740 raise ValueError(_(b'destination revlog has filtered revisions'))
2740 2741
2741 2742 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2742 2743 # if possible.
2743 2744 oldlazydelta = destrevlog._lazydelta
2744 2745 oldlazydeltabase = destrevlog._lazydeltabase
2745 2746 oldamd = destrevlog._deltabothparents
2746 2747
2747 2748 try:
2748 2749 if deltareuse == self.DELTAREUSEALWAYS:
2749 2750 destrevlog._lazydeltabase = True
2750 2751 destrevlog._lazydelta = True
2751 2752 elif deltareuse == self.DELTAREUSESAMEREVS:
2752 2753 destrevlog._lazydeltabase = False
2753 2754 destrevlog._lazydelta = True
2754 2755 elif deltareuse == self.DELTAREUSENEVER:
2755 2756 destrevlog._lazydeltabase = False
2756 2757 destrevlog._lazydelta = False
2757 2758
2758 2759 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2759 2760
2760 2761 self._clone(
2761 2762 tr,
2762 2763 destrevlog,
2763 2764 addrevisioncb,
2764 2765 deltareuse,
2765 2766 forcedeltabothparents,
2766 2767 sidedatacompanion,
2767 2768 )
2768 2769
2769 2770 finally:
2770 2771 destrevlog._lazydelta = oldlazydelta
2771 2772 destrevlog._lazydeltabase = oldlazydeltabase
2772 2773 destrevlog._deltabothparents = oldamd
2773 2774
2774 2775 def _clone(
2775 2776 self,
2776 2777 tr,
2777 2778 destrevlog,
2778 2779 addrevisioncb,
2779 2780 deltareuse,
2780 2781 forcedeltabothparents,
2781 2782 sidedatacompanion,
2782 2783 ):
2783 2784 """perform the core duty of `revlog.clone` after parameter processing"""
2784 2785 deltacomputer = deltautil.deltacomputer(destrevlog)
2785 2786 index = self.index
2786 2787 for rev in self:
2787 2788 entry = index[rev]
2788 2789
2789 2790 # Some classes override linkrev to take filtered revs into
2790 2791 # account. Use raw entry from index.
2791 2792 flags = entry[0] & 0xFFFF
2792 2793 linkrev = entry[4]
2793 2794 p1 = index[entry[5]][7]
2794 2795 p2 = index[entry[6]][7]
2795 2796 node = entry[7]
2796 2797
2797 2798 sidedataactions = (False, [], {}, 0, 0)
2798 2799 if sidedatacompanion is not None:
2799 2800 sidedataactions = sidedatacompanion(self, rev)
2800 2801
2801 2802 # (Possibly) reuse the delta from the revlog if allowed and
2802 2803 # the revlog chunk is a delta.
2803 2804 cachedelta = None
2804 2805 rawtext = None
2805 2806 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2806 2807 dropall = sidedataactions[0]
2807 2808 filterout = sidedataactions[1]
2808 2809 update = sidedataactions[2]
2809 2810 new_flags = sidedataactions[3]
2810 2811 dropped_flags = sidedataactions[4]
2811 2812 text, sidedata = self._revisiondata(rev)
2812 2813 if dropall:
2813 2814 sidedata = {}
2814 2815 for key in filterout:
2815 2816 sidedata.pop(key, None)
2816 2817 sidedata.update(update)
2817 2818 if not sidedata:
2818 2819 sidedata = None
2819 2820
2820 2821 flags |= new_flags
2821 2822 flags &= ~dropped_flags
2822 2823
2823 2824 destrevlog.addrevision(
2824 2825 text,
2825 2826 tr,
2826 2827 linkrev,
2827 2828 p1,
2828 2829 p2,
2829 2830 cachedelta=cachedelta,
2830 2831 node=node,
2831 2832 flags=flags,
2832 2833 deltacomputer=deltacomputer,
2833 2834 sidedata=sidedata,
2834 2835 )
2835 2836 else:
2836 2837 if destrevlog._lazydelta:
2837 2838 dp = self.deltaparent(rev)
2838 2839 if dp != nullrev:
2839 2840 cachedelta = (dp, bytes(self._chunk(rev)))
2840 2841
2841 2842 if not cachedelta:
2842 2843 rawtext = self.rawdata(rev)
2843 2844
2844 2845 ifh = destrevlog.opener(
2845 2846 destrevlog.indexfile, b'a+', checkambig=False
2846 2847 )
2847 2848 dfh = None
2848 2849 if not destrevlog._inline:
2849 2850 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2850 2851 try:
2851 2852 destrevlog._addrevision(
2852 2853 node,
2853 2854 rawtext,
2854 2855 tr,
2855 2856 linkrev,
2856 2857 p1,
2857 2858 p2,
2858 2859 flags,
2859 2860 cachedelta,
2860 2861 ifh,
2861 2862 dfh,
2862 2863 deltacomputer=deltacomputer,
2863 2864 )
2864 2865 finally:
2865 2866 if dfh:
2866 2867 dfh.close()
2867 2868 ifh.close()
2868 2869
2869 2870 if addrevisioncb:
2870 2871 addrevisioncb(self, rev, node)
2871 2872
2872 2873 def censorrevision(self, tr, censornode, tombstone=b''):
2873 2874 if (self.version & 0xFFFF) == REVLOGV0:
2874 2875 raise error.RevlogError(
2875 2876 _(b'cannot censor with version %d revlogs') % self.version
2876 2877 )
2877 2878
2878 2879 censorrev = self.rev(censornode)
2879 2880 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2880 2881
2881 2882 if len(tombstone) > self.rawsize(censorrev):
2882 2883 raise error.Abort(
2883 2884 _(b'censor tombstone must be no longer than censored data')
2884 2885 )
2885 2886
2886 2887 # Rewriting the revlog in place is hard. Our strategy for censoring is
2887 2888 # to create a new revlog, copy all revisions to it, then replace the
2888 2889 # revlogs on transaction close.
2889 2890
2890 2891 newindexfile = self.indexfile + b'.tmpcensored'
2891 2892 newdatafile = self.datafile + b'.tmpcensored'
2892 2893
2893 2894 # This is a bit dangerous. We could easily have a mismatch of state.
2894 2895 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2895 2896 newrl.version = self.version
2896 2897 newrl._generaldelta = self._generaldelta
2897 2898 newrl._io = self._io
2898 2899
2899 2900 for rev in self.revs():
2900 2901 node = self.node(rev)
2901 2902 p1, p2 = self.parents(node)
2902 2903
2903 2904 if rev == censorrev:
2904 2905 newrl.addrawrevision(
2905 2906 tombstone,
2906 2907 tr,
2907 2908 self.linkrev(censorrev),
2908 2909 p1,
2909 2910 p2,
2910 2911 censornode,
2911 2912 REVIDX_ISCENSORED,
2912 2913 )
2913 2914
2914 2915 if newrl.deltaparent(rev) != nullrev:
2915 2916 raise error.Abort(
2916 2917 _(
2917 2918 b'censored revision stored as delta; '
2918 2919 b'cannot censor'
2919 2920 ),
2920 2921 hint=_(
2921 2922 b'censoring of revlogs is not '
2922 2923 b'fully implemented; please report '
2923 2924 b'this bug'
2924 2925 ),
2925 2926 )
2926 2927 continue
2927 2928
2928 2929 if self.iscensored(rev):
2929 2930 if self.deltaparent(rev) != nullrev:
2930 2931 raise error.Abort(
2931 2932 _(
2932 2933 b'cannot censor due to censored '
2933 2934 b'revision having delta stored'
2934 2935 )
2935 2936 )
2936 2937 rawtext = self._chunk(rev)
2937 2938 else:
2938 2939 rawtext = self.rawdata(rev)
2939 2940
2940 2941 newrl.addrawrevision(
2941 2942 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2942 2943 )
2943 2944
2944 2945 tr.addbackup(self.indexfile, location=b'store')
2945 2946 if not self._inline:
2946 2947 tr.addbackup(self.datafile, location=b'store')
2947 2948
2948 2949 self.opener.rename(newrl.indexfile, self.indexfile)
2949 2950 if not self._inline:
2950 2951 self.opener.rename(newrl.datafile, self.datafile)
2951 2952
2952 2953 self.clearcaches()
2953 2954 self._loadindex()
2954 2955
2955 2956 def verifyintegrity(self, state):
2956 2957 """Verifies the integrity of the revlog.
2957 2958
2958 2959 Yields ``revlogproblem`` instances describing problems that are
2959 2960 found.
2960 2961 """
2961 2962 dd, di = self.checksize()
2962 2963 if dd:
2963 2964 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2964 2965 if di:
2965 2966 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2966 2967
2967 2968 version = self.version & 0xFFFF
2968 2969
2969 2970 # The verifier tells us what version revlog we should be.
2970 2971 if version != state[b'expectedversion']:
2971 2972 yield revlogproblem(
2972 2973 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2973 2974 % (self.indexfile, version, state[b'expectedversion'])
2974 2975 )
2975 2976
2976 2977 state[b'skipread'] = set()
2977 2978 state[b'safe_renamed'] = set()
2978 2979
2979 2980 for rev in self:
2980 2981 node = self.node(rev)
2981 2982
2982 2983 # Verify contents. 4 cases to care about:
2983 2984 #
2984 2985 # common: the most common case
2985 2986 # rename: with a rename
2986 2987 # meta: file content starts with b'\1\n', the metadata
2987 2988 # header defined in filelog.py, but without a rename
2988 2989 # ext: content stored externally
2989 2990 #
2990 2991 # More formally, their differences are shown below:
2991 2992 #
2992 2993 # | common | rename | meta | ext
2993 2994 # -------------------------------------------------------
2994 2995 # flags() | 0 | 0 | 0 | not 0
2995 2996 # renamed() | False | True | False | ?
2996 2997 # rawtext[0:2]=='\1\n'| False | True | True | ?
2997 2998 #
2998 2999 # "rawtext" means the raw text stored in revlog data, which
2999 3000 # could be retrieved by "rawdata(rev)". "text"
3000 3001 # mentioned below is "revision(rev)".
3001 3002 #
3002 3003 # There are 3 different lengths stored physically:
3003 3004 # 1. L1: rawsize, stored in revlog index
3004 3005 # 2. L2: len(rawtext), stored in revlog data
3005 3006 # 3. L3: len(text), stored in revlog data if flags==0, or
3006 3007 # possibly somewhere else if flags!=0
3007 3008 #
3008 3009 # L1 should be equal to L2. L3 could be different from them.
3009 3010 # "text" may or may not affect commit hash depending on flag
3010 3011 # processors (see flagutil.addflagprocessor).
3011 3012 #
3012 3013 # | common | rename | meta | ext
3013 3014 # -------------------------------------------------
3014 3015 # rawsize() | L1 | L1 | L1 | L1
3015 3016 # size() | L1 | L2-LM | L1(*) | L1 (?)
3016 3017 # len(rawtext) | L2 | L2 | L2 | L2
3017 3018 # len(text) | L2 | L2 | L2 | L3
3018 3019 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3019 3020 #
3020 3021 # LM: length of metadata, depending on rawtext
3021 3022 # (*): not ideal, see comment in filelog.size
3022 3023 # (?): could be "- len(meta)" if the resolved content has
3023 3024 # rename metadata
3024 3025 #
3025 3026 # Checks needed to be done:
3026 3027 # 1. length check: L1 == L2, in all cases.
3027 3028 # 2. hash check: depending on flag processor, we may need to
3028 3029 # use either "text" (external), or "rawtext" (in revlog).
3029 3030
3030 3031 try:
3031 3032 skipflags = state.get(b'skipflags', 0)
3032 3033 if skipflags:
3033 3034 skipflags &= self.flags(rev)
3034 3035
3035 3036 _verify_revision(self, skipflags, state, node)
3036 3037
3037 3038 l1 = self.rawsize(rev)
3038 3039 l2 = len(self.rawdata(node))
3039 3040
3040 3041 if l1 != l2:
3041 3042 yield revlogproblem(
3042 3043 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3043 3044 node=node,
3044 3045 )
3045 3046
3046 3047 except error.CensoredNodeError:
3047 3048 if state[b'erroroncensored']:
3048 3049 yield revlogproblem(
3049 3050 error=_(b'censored file data'), node=node
3050 3051 )
3051 3052 state[b'skipread'].add(node)
3052 3053 except Exception as e:
3053 3054 yield revlogproblem(
3054 3055 error=_(b'unpacking %s: %s')
3055 3056 % (short(node), stringutil.forcebytestr(e)),
3056 3057 node=node,
3057 3058 )
3058 3059 state[b'skipread'].add(node)
3059 3060
3060 3061 def storageinfo(
3061 3062 self,
3062 3063 exclusivefiles=False,
3063 3064 sharedfiles=False,
3064 3065 revisionscount=False,
3065 3066 trackedsize=False,
3066 3067 storedsize=False,
3067 3068 ):
3068 3069 d = {}
3069 3070
3070 3071 if exclusivefiles:
3071 3072 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3072 3073 if not self._inline:
3073 3074 d[b'exclusivefiles'].append((self.opener, self.datafile))
3074 3075
3075 3076 if sharedfiles:
3076 3077 d[b'sharedfiles'] = []
3077 3078
3078 3079 if revisionscount:
3079 3080 d[b'revisionscount'] = len(self)
3080 3081
3081 3082 if trackedsize:
3082 3083 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3083 3084
3084 3085 if storedsize:
3085 3086 d[b'storedsize'] = sum(
3086 3087 self.opener.stat(path).st_size for path in self.files()
3087 3088 )
3088 3089
3089 3090 return d
@@ -1,287 +1,288 b''
1 1 # unionrepo.py - repository class for viewing union of repository changesets
2 2 #
3 3 # Derived from bundlerepo.py
4 4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Repository class for "in-memory pull" of one local repository to another,
11 11 allowing operations like diff and log with revsets.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18
19 19 from . import (
20 20 changelog,
21 21 cmdutil,
22 22 encoding,
23 23 error,
24 24 filelog,
25 25 localrepo,
26 26 manifest,
27 27 mdiff,
28 28 pathutil,
29 29 revlog,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33
34 34
35 35 class unionrevlog(revlog.revlog):
36 36 def __init__(self, opener, indexfile, revlog2, linkmapper):
37 37 # How it works:
38 38 # To retrieve a revision, we just need to know the node id so we can
39 39 # look it up in revlog2.
40 40 #
41 41 # To differentiate a rev in the second revlog from a rev in the revlog,
42 42 # we check revision against repotiprev.
43 43 opener = vfsmod.readonlyvfs(opener)
44 44 revlog.revlog.__init__(self, opener, indexfile)
45 45 self.revlog2 = revlog2
46 46
47 47 n = len(self)
48 48 self.repotiprev = n - 1
49 49 self.bundlerevs = set() # used by 'bundle()' revset expression
50 50 for rev2 in self.revlog2:
51 51 rev = self.revlog2.index[rev2]
52 52 # rev numbers - in revlog2, very different from self.rev
53 53 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
54 54 flags = _start & 0xFFFF
55 55
56 56 if linkmapper is None: # link is to same revlog
57 57 assert linkrev == rev2 # we never link back
58 58 link = n
59 59 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
60 60 link = linkmapper(linkrev)
61 61
62 62 if linkmapper is not None: # link is to same revlog
63 63 base = linkmapper(base)
64 64
65 65 this_rev = self.index.get_rev(node)
66 66 if this_rev is not None:
67 67 # this happens for the common revlog revisions
68 68 self.bundlerevs.add(this_rev)
69 69 continue
70 70
71 71 p1node = self.revlog2.node(p1rev)
72 72 p2node = self.revlog2.node(p2rev)
73 73
74 74 # TODO: it's probably wrong to set compressed length to -1, but
75 75 # I have no idea if csize is valid in the base revlog context.
76 76 e = (
77 77 flags,
78 78 -1,
79 79 rsize,
80 80 base,
81 81 link,
82 82 self.rev(p1node),
83 83 self.rev(p2node),
84 84 node,
85 85 )
86 86 self.index.append(e)
87 87 self.bundlerevs.add(n)
88 88 n += 1
89 89
90 90 def _chunk(self, rev):
91 91 if rev <= self.repotiprev:
92 92 return revlog.revlog._chunk(self, rev)
93 93 return self.revlog2._chunk(self.node(rev))
94 94
95 95 def revdiff(self, rev1, rev2):
96 96 """return or calculate a delta between two revisions"""
97 97 if rev1 > self.repotiprev and rev2 > self.repotiprev:
98 98 return self.revlog2.revdiff(
99 99 self.revlog2.rev(self.node(rev1)),
100 100 self.revlog2.rev(self.node(rev2)),
101 101 )
102 102 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
103 103 return super(unionrevlog, self).revdiff(rev1, rev2)
104 104
105 105 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
106 106
107 107 def _revisiondata(self, nodeorrev, _df=None, raw=False):
108 108 if isinstance(nodeorrev, int):
109 109 rev = nodeorrev
110 110 node = self.node(rev)
111 111 else:
112 112 node = nodeorrev
113 113 rev = self.rev(node)
114 114
115 115 if rev > self.repotiprev:
116 116 # work around manifestrevlog NOT being a revlog
117 117 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
118 118 func = revlog2._revisiondata
119 119 else:
120 120 func = super(unionrevlog, self)._revisiondata
121 121 return func(node, _df=_df, raw=raw)
122 122
123 123 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
124 124 raise NotImplementedError
125 125
126 126 def addgroup(
127 127 self,
128 128 deltas,
129 129 linkmapper,
130 130 transaction,
131 alwayscache=False,
131 132 addrevisioncb=None,
132 133 duplicaterevisioncb=None,
133 134 maybemissingparents=False,
134 135 ):
135 136 raise NotImplementedError
136 137
137 138 def strip(self, minlink, transaction):
138 139 raise NotImplementedError
139 140
140 141 def checksize(self):
141 142 raise NotImplementedError
142 143
143 144
144 145 class unionchangelog(unionrevlog, changelog.changelog):
145 146 def __init__(self, opener, opener2):
146 147 changelog.changelog.__init__(self, opener)
147 148 linkmapper = None
148 149 changelog2 = changelog.changelog(opener2)
149 150 unionrevlog.__init__(
150 151 self, opener, self.indexfile, changelog2, linkmapper
151 152 )
152 153
153 154
154 155 class unionmanifest(unionrevlog, manifest.manifestrevlog):
155 156 def __init__(self, opener, opener2, linkmapper):
156 157 manifest.manifestrevlog.__init__(self, opener)
157 158 manifest2 = manifest.manifestrevlog(opener2)
158 159 unionrevlog.__init__(
159 160 self, opener, self.indexfile, manifest2, linkmapper
160 161 )
161 162
162 163
163 164 class unionfilelog(filelog.filelog):
164 165 def __init__(self, opener, path, opener2, linkmapper, repo):
165 166 filelog.filelog.__init__(self, opener, path)
166 167 filelog2 = filelog.filelog(opener2, path)
167 168 self._revlog = unionrevlog(
168 169 opener, self.indexfile, filelog2._revlog, linkmapper
169 170 )
170 171 self._repo = repo
171 172 self.repotiprev = self._revlog.repotiprev
172 173 self.revlog2 = self._revlog.revlog2
173 174
174 175 def iscensored(self, rev):
175 176 """Check if a revision is censored."""
176 177 if rev <= self.repotiprev:
177 178 return filelog.filelog.iscensored(self, rev)
178 179 node = self.node(rev)
179 180 return self.revlog2.iscensored(self.revlog2.rev(node))
180 181
181 182
182 183 class unionpeer(localrepo.localpeer):
183 184 def canpush(self):
184 185 return False
185 186
186 187
187 188 class unionrepository(object):
188 189 """Represents the union of data in 2 repositories.
189 190
190 191 Instances are not usable if constructed directly. Use ``instance()``
191 192 or ``makeunionrepository()`` to create a usable instance.
192 193 """
193 194
194 195 def __init__(self, repo2, url):
195 196 self.repo2 = repo2
196 197 self._url = url
197 198
198 199 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
199 200
200 201 @localrepo.unfilteredpropertycache
201 202 def changelog(self):
202 203 return unionchangelog(self.svfs, self.repo2.svfs)
203 204
204 205 @localrepo.unfilteredpropertycache
205 206 def manifestlog(self):
206 207 rootstore = unionmanifest(
207 208 self.svfs, self.repo2.svfs, self.unfiltered()._clrev
208 209 )
209 210 return manifest.manifestlog(
210 211 self.svfs, self, rootstore, self.narrowmatch()
211 212 )
212 213
213 214 def _clrev(self, rev2):
214 215 """map from repo2 changelog rev to temporary rev in self.changelog"""
215 216 node = self.repo2.changelog.node(rev2)
216 217 return self.changelog.rev(node)
217 218
218 219 def url(self):
219 220 return self._url
220 221
221 222 def file(self, f):
222 223 return unionfilelog(
223 224 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
224 225 )
225 226
226 227 def close(self):
227 228 self.repo2.close()
228 229
229 230 def cancopy(self):
230 231 return False
231 232
232 233 def peer(self):
233 234 return unionpeer(self)
234 235
235 236 def getcwd(self):
236 237 return encoding.getcwd() # always outside the repo
237 238
238 239
239 240 def instance(ui, path, create, intents=None, createopts=None):
240 241 if create:
241 242 raise error.Abort(_(b'cannot create new union repository'))
242 243 parentpath = ui.config(b"bundle", b"mainreporoot")
243 244 if not parentpath:
244 245 # try to find the correct path to the working directory repo
245 246 parentpath = cmdutil.findrepo(encoding.getcwd())
246 247 if parentpath is None:
247 248 parentpath = b''
248 249 if parentpath:
249 250 # Try to make the full path relative so we get a nice, short URL.
250 251 # In particular, we don't want temp dir names in test outputs.
251 252 cwd = encoding.getcwd()
252 253 if parentpath == cwd:
253 254 parentpath = b''
254 255 else:
255 256 cwd = pathutil.normasprefix(cwd)
256 257 if parentpath.startswith(cwd):
257 258 parentpath = parentpath[len(cwd) :]
258 259 if path.startswith(b'union:'):
259 260 s = path.split(b":", 1)[1].split(b"+", 1)
260 261 if len(s) == 1:
261 262 repopath, repopath2 = parentpath, s[0]
262 263 else:
263 264 repopath, repopath2 = s
264 265 else:
265 266 repopath, repopath2 = parentpath, path
266 267
267 268 return makeunionrepository(ui, repopath, repopath2)
268 269
269 270
270 271 def makeunionrepository(ui, repopath1, repopath2):
271 272 """Make a union repository object from 2 local repo paths."""
272 273 repo1 = localrepo.instance(ui, repopath1, create=False)
273 274 repo2 = localrepo.instance(ui, repopath2, create=False)
274 275
275 276 url = b'union:%s+%s' % (
276 277 util.expandpath(repopath1),
277 278 util.expandpath(repopath2),
278 279 )
279 280
280 281 class derivedunionrepository(unionrepository, repo1.__class__):
281 282 pass
282 283
283 284 repo = repo1
284 285 repo.__class__ = derivedunionrepository
285 286 unionrepository.__init__(repo1, repo2, url)
286 287
287 288 return repo
General Comments 0
You need to be logged in to leave comments. Login now