##// END OF EJS Templates
repository: establish API for emitting revision deltas...
Gregory Szorc -
r39267:b41d023a default
parent child Browse files
Show More
@@ -1,1486 +1,1396 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagop,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 revlog,
35 34 util,
36 35 )
37 36
38 37 from .utils import (
39 38 interfaceutil,
40 39 stringutil,
41 40 )
42 41
43 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 45
47 46 LFS_REQUIREMENT = 'lfs'
48 47
49 48 readexactly = util.readexactly
50 49
51 50 def getchunk(stream):
52 51 """return the next chunk from stream as a string"""
53 52 d = readexactly(stream, 4)
54 53 l = struct.unpack(">l", d)[0]
55 54 if l <= 4:
56 55 if l:
57 56 raise error.Abort(_("invalid chunk length %d") % l)
58 57 return ""
59 58 return readexactly(stream, l - 4)
60 59
61 60 def chunkheader(length):
62 61 """return a changegroup chunk header (string)"""
63 62 return struct.pack(">l", length + 4)
64 63
65 64 def closechunk():
66 65 """return a changegroup chunk header (string) for a zero-length chunk"""
67 66 return struct.pack(">l", 0)
68 67
69 68 def _fileheader(path):
70 69 """Obtain a changegroup chunk header for a named path."""
71 70 return chunkheader(len(path)) + path
72 71
73 72 def writechunks(ui, chunks, filename, vfs=None):
74 73 """Write chunks to a file and return its filename.
75 74
76 75 The stream is assumed to be a bundle file.
77 76 Existing files will not be overwritten.
78 77 If no filename is specified, a temporary file is created.
79 78 """
80 79 fh = None
81 80 cleanup = None
82 81 try:
83 82 if filename:
84 83 if vfs:
85 84 fh = vfs.open(filename, "wb")
86 85 else:
87 86 # Increase default buffer size because default is usually
88 87 # small (4k is common on Linux).
89 88 fh = open(filename, "wb", 131072)
90 89 else:
91 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 91 fh = os.fdopen(fd, r"wb")
93 92 cleanup = filename
94 93 for c in chunks:
95 94 fh.write(c)
96 95 cleanup = None
97 96 return filename
98 97 finally:
99 98 if fh is not None:
100 99 fh.close()
101 100 if cleanup is not None:
102 101 if filename and vfs:
103 102 vfs.unlink(cleanup)
104 103 else:
105 104 os.unlink(cleanup)
106 105
107 106 class cg1unpacker(object):
108 107 """Unpacker for cg1 changegroup streams.
109 108
110 109 A changegroup unpacker handles the framing of the revision data in
111 110 the wire format. Most consumers will want to use the apply()
112 111 method to add the changes from the changegroup to a repository.
113 112
114 113 If you're forwarding a changegroup unmodified to another consumer,
115 114 use getchunks(), which returns an iterator of changegroup
116 115 chunks. This is mostly useful for cases where you need to know the
117 116 data stream has ended by observing the end of the changegroup.
118 117
119 118 deltachunk() is useful only if you're applying delta data. Most
120 119 consumers should prefer apply() instead.
121 120
122 121 A few other public methods exist. Those are used only for
123 122 bundlerepo and some debug commands - their use is discouraged.
124 123 """
125 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 125 deltaheadersize = deltaheader.size
127 126 version = '01'
128 127 _grouplistcount = 1 # One list of files after the manifests
129 128
130 129 def __init__(self, fh, alg, extras=None):
131 130 if alg is None:
132 131 alg = 'UN'
133 132 if alg not in util.compengines.supportedbundletypes:
134 133 raise error.Abort(_('unknown stream compression type: %s')
135 134 % alg)
136 135 if alg == 'BZ':
137 136 alg = '_truncatedBZ'
138 137
139 138 compengine = util.compengines.forbundletype(alg)
140 139 self._stream = compengine.decompressorreader(fh)
141 140 self._type = alg
142 141 self.extras = extras or {}
143 142 self.callback = None
144 143
145 144 # These methods (compressed, read, seek, tell) all appear to only
146 145 # be used by bundlerepo, but it's a little hard to tell.
147 146 def compressed(self):
148 147 return self._type is not None and self._type != 'UN'
149 148 def read(self, l):
150 149 return self._stream.read(l)
151 150 def seek(self, pos):
152 151 return self._stream.seek(pos)
153 152 def tell(self):
154 153 return self._stream.tell()
155 154 def close(self):
156 155 return self._stream.close()
157 156
158 157 def _chunklength(self):
159 158 d = readexactly(self._stream, 4)
160 159 l = struct.unpack(">l", d)[0]
161 160 if l <= 4:
162 161 if l:
163 162 raise error.Abort(_("invalid chunk length %d") % l)
164 163 return 0
165 164 if self.callback:
166 165 self.callback()
167 166 return l - 4
168 167
169 168 def changelogheader(self):
170 169 """v10 does not have a changelog header chunk"""
171 170 return {}
172 171
173 172 def manifestheader(self):
174 173 """v10 does not have a manifest header chunk"""
175 174 return {}
176 175
177 176 def filelogheader(self):
178 177 """return the header of the filelogs chunk, v10 only has the filename"""
179 178 l = self._chunklength()
180 179 if not l:
181 180 return {}
182 181 fname = readexactly(self._stream, l)
183 182 return {'filename': fname}
184 183
185 184 def _deltaheader(self, headertuple, prevnode):
186 185 node, p1, p2, cs = headertuple
187 186 if prevnode is None:
188 187 deltabase = p1
189 188 else:
190 189 deltabase = prevnode
191 190 flags = 0
192 191 return node, p1, p2, deltabase, cs, flags
193 192
194 193 def deltachunk(self, prevnode):
195 194 l = self._chunklength()
196 195 if not l:
197 196 return {}
198 197 headerdata = readexactly(self._stream, self.deltaheadersize)
199 198 header = self.deltaheader.unpack(headerdata)
200 199 delta = readexactly(self._stream, l - self.deltaheadersize)
201 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 201 return (node, p1, p2, cs, deltabase, delta, flags)
203 202
204 203 def getchunks(self):
205 204 """returns all the chunks contains in the bundle
206 205
207 206 Used when you need to forward the binary stream to a file or another
208 207 network API. To do so, it parse the changegroup data, otherwise it will
209 208 block in case of sshrepo because it don't know the end of the stream.
210 209 """
211 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 213 # filelogs.
215 214 #
216 215 # Changelog and manifestlog parts are terminated with empty chunks. The
217 216 # tree and file parts are a list of entry sections. Each entry section
218 217 # is a series of chunks terminating in an empty chunk. The list of these
219 218 # entry sections is terminated in yet another empty chunk, so we know
220 219 # we've reached the end of the tree/file list when we reach an empty
221 220 # chunk that was proceeded by no non-empty chunks.
222 221
223 222 parts = 0
224 223 while parts < 2 + self._grouplistcount:
225 224 noentries = True
226 225 while True:
227 226 chunk = getchunk(self)
228 227 if not chunk:
229 228 # The first two empty chunks represent the end of the
230 229 # changelog and the manifestlog portions. The remaining
231 230 # empty chunks represent either A) the end of individual
232 231 # tree or file entries in the file list, or B) the end of
233 232 # the entire list. It's the end of the entire list if there
234 233 # were no entries (i.e. noentries is True).
235 234 if parts < 2:
236 235 parts += 1
237 236 elif noentries:
238 237 parts += 1
239 238 break
240 239 noentries = False
241 240 yield chunkheader(len(chunk))
242 241 pos = 0
243 242 while pos < len(chunk):
244 243 next = pos + 2**20
245 244 yield chunk[pos:next]
246 245 pos = next
247 246 yield closechunk()
248 247
249 248 def _unpackmanifests(self, repo, revmap, trp, prog):
250 249 self.callback = prog.increment
251 250 # no need to check for empty manifest group here:
252 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 252 # no new manifest will be created and the manifest group will
254 253 # be empty during the pull
255 254 self.manifestheader()
256 255 deltas = self.deltaiter()
257 256 repo.manifestlog.addgroup(deltas, revmap, trp)
258 257 prog.complete()
259 258 self.callback = None
260 259
261 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 261 expectedtotal=None):
263 262 """Add the changegroup returned by source.read() to this repo.
264 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 264 the URL of the repo where this changegroup is coming from.
266 265
267 266 Return an integer summarizing the change to this repo:
268 267 - nothing changed or no source: 0
269 268 - more heads than before: 1+added heads (2..n)
270 269 - fewer heads than before: -1-removed heads (-2..-n)
271 270 - number of heads stays the same: 1
272 271 """
273 272 repo = repo.unfiltered()
274 273 def csmap(x):
275 274 repo.ui.debug("add changeset %s\n" % short(x))
276 275 return len(cl)
277 276
278 277 def revmap(x):
279 278 return cl.rev(x)
280 279
281 280 changesets = files = revisions = 0
282 281
283 282 try:
284 283 # The transaction may already carry source information. In this
285 284 # case we use the top level data. We overwrite the argument
286 285 # because we need to use the top level value (if they exist)
287 286 # in this function.
288 287 srctype = tr.hookargs.setdefault('source', srctype)
289 288 url = tr.hookargs.setdefault('url', url)
290 289 repo.hook('prechangegroup',
291 290 throw=True, **pycompat.strkwargs(tr.hookargs))
292 291
293 292 # write changelog data to temp files so concurrent readers
294 293 # will not see an inconsistent view
295 294 cl = repo.changelog
296 295 cl.delayupdate(tr)
297 296 oldheads = set(cl.heads())
298 297
299 298 trp = weakref.proxy(tr)
300 299 # pull off the changeset group
301 300 repo.ui.status(_("adding changesets\n"))
302 301 clstart = len(cl)
303 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 303 total=expectedtotal)
305 304 self.callback = progress.increment
306 305
307 306 efiles = set()
308 307 def onchangelog(cl, node):
309 308 efiles.update(cl.readfiles(node))
310 309
311 310 self.changelogheader()
312 311 deltas = self.deltaiter()
313 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 313 efiles = len(efiles)
315 314
316 315 if not cgnodes:
317 316 repo.ui.develwarn('applied empty changegroup',
318 317 config='warn-empty-changegroup')
319 318 clend = len(cl)
320 319 changesets = clend - clstart
321 320 progress.complete()
322 321 self.callback = None
323 322
324 323 # pull off the manifest group
325 324 repo.ui.status(_("adding manifests\n"))
326 325 # We know that we'll never have more manifests than we had
327 326 # changesets.
328 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 328 total=changesets)
330 329 self._unpackmanifests(repo, revmap, trp, progress)
331 330
332 331 needfiles = {}
333 332 if repo.ui.configbool('server', 'validate'):
334 333 cl = repo.changelog
335 334 ml = repo.manifestlog
336 335 # validate incoming csets have their manifests
337 336 for cset in pycompat.xrange(clstart, clend):
338 337 mfnode = cl.changelogrevision(cset).manifest
339 338 mfest = ml[mfnode].readdelta()
340 339 # store file cgnodes we must see
341 340 for f, n in mfest.iteritems():
342 341 needfiles.setdefault(f, set()).add(n)
343 342
344 343 # process the files
345 344 repo.ui.status(_("adding file changes\n"))
346 345 newrevs, newfiles = _addchangegroupfiles(
347 346 repo, self, revmap, trp, efiles, needfiles)
348 347 revisions += newrevs
349 348 files += newfiles
350 349
351 350 deltaheads = 0
352 351 if oldheads:
353 352 heads = cl.heads()
354 353 deltaheads = len(heads) - len(oldheads)
355 354 for h in heads:
356 355 if h not in oldheads and repo[h].closesbranch():
357 356 deltaheads -= 1
358 357 htext = ""
359 358 if deltaheads:
360 359 htext = _(" (%+d heads)") % deltaheads
361 360
362 361 repo.ui.status(_("added %d changesets"
363 362 " with %d changes to %d files%s\n")
364 363 % (changesets, revisions, files, htext))
365 364 repo.invalidatevolatilesets()
366 365
367 366 if changesets > 0:
368 367 if 'node' not in tr.hookargs:
369 368 tr.hookargs['node'] = hex(cl.node(clstart))
370 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 370 hookargs = dict(tr.hookargs)
372 371 else:
373 372 hookargs = dict(tr.hookargs)
374 373 hookargs['node'] = hex(cl.node(clstart))
375 374 hookargs['node_last'] = hex(cl.node(clend - 1))
376 375 repo.hook('pretxnchangegroup',
377 376 throw=True, **pycompat.strkwargs(hookargs))
378 377
379 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 379 phaseall = None
381 380 if srctype in ('push', 'serve'):
382 381 # Old servers can not push the boundary themselves.
383 382 # New servers won't push the boundary if changeset already
384 383 # exists locally as secret
385 384 #
386 385 # We should not use added here but the list of all change in
387 386 # the bundle
388 387 if repo.publishing():
389 388 targetphase = phaseall = phases.public
390 389 else:
391 390 # closer target phase computation
392 391
393 392 # Those changesets have been pushed from the
394 393 # outside, their phases are going to be pushed
395 394 # alongside. Therefor `targetphase` is
396 395 # ignored.
397 396 targetphase = phaseall = phases.draft
398 397 if added:
399 398 phases.registernew(repo, tr, targetphase, added)
400 399 if phaseall is not None:
401 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 401
403 402 if changesets > 0:
404 403
405 404 def runhooks():
406 405 # These hooks run when the lock releases, not when the
407 406 # transaction closes. So it's possible for the changelog
408 407 # to have changed since we last saw it.
409 408 if clstart >= len(repo):
410 409 return
411 410
412 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 412
414 413 for n in added:
415 414 args = hookargs.copy()
416 415 args['node'] = hex(n)
417 416 del args['node_last']
418 417 repo.hook("incoming", **pycompat.strkwargs(args))
419 418
420 419 newheads = [h for h in repo.heads()
421 420 if h not in oldheads]
422 421 repo.ui.log("incoming",
423 422 "%d incoming changes - new heads: %s\n",
424 423 len(added),
425 424 ', '.join([hex(c[:6]) for c in newheads]))
426 425
427 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 427 lambda tr: repo._afterlock(runhooks))
429 428 finally:
430 429 repo.ui.flush()
431 430 # never return 0 here:
432 431 if deltaheads < 0:
433 432 ret = deltaheads - 1
434 433 else:
435 434 ret = deltaheads + 1
436 435 return ret
437 436
438 437 def deltaiter(self):
439 438 """
440 439 returns an iterator of the deltas in this changegroup
441 440
442 441 Useful for passing to the underlying storage system to be stored.
443 442 """
444 443 chain = None
445 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 446 yield chunkdata
448 447 chain = chunkdata[0]
449 448
450 449 class cg2unpacker(cg1unpacker):
451 450 """Unpacker for cg2 streams.
452 451
453 452 cg2 streams add support for generaldelta, so the delta header
454 453 format is slightly different. All other features about the data
455 454 remain the same.
456 455 """
457 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 457 deltaheadersize = deltaheader.size
459 458 version = '02'
460 459
461 460 def _deltaheader(self, headertuple, prevnode):
462 461 node, p1, p2, deltabase, cs = headertuple
463 462 flags = 0
464 463 return node, p1, p2, deltabase, cs, flags
465 464
466 465 class cg3unpacker(cg2unpacker):
467 466 """Unpacker for cg3 streams.
468 467
469 468 cg3 streams add support for exchanging treemanifests and revlog
470 469 flags. It adds the revlog flags to the delta header and an empty chunk
471 470 separating manifests and files.
472 471 """
473 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 473 deltaheadersize = deltaheader.size
475 474 version = '03'
476 475 _grouplistcount = 2 # One list of manifests and one list of files
477 476
478 477 def _deltaheader(self, headertuple, prevnode):
479 478 node, p1, p2, deltabase, cs, flags = headertuple
480 479 return node, p1, p2, deltabase, cs, flags
481 480
482 481 def _unpackmanifests(self, repo, revmap, trp, prog):
483 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 483 for chunkdata in iter(self.filelogheader, {}):
485 484 # If we get here, there are directory manifests in the changegroup
486 485 d = chunkdata["filename"]
487 486 repo.ui.debug("adding %s revisions\n" % d)
488 487 dirlog = repo.manifestlog._revlog.dirlog(d)
489 488 deltas = self.deltaiter()
490 489 if not dirlog.addgroup(deltas, revmap, trp):
491 490 raise error.Abort(_("received dir revlog group is empty"))
492 491
493 492 class headerlessfixup(object):
494 493 def __init__(self, fh, h):
495 494 self._h = h
496 495 self._fh = fh
497 496 def read(self, n):
498 497 if self._h:
499 498 d, self._h = self._h[:n], self._h[n:]
500 499 if len(d) < n:
501 500 d += readexactly(self._fh, n - len(d))
502 501 return d
503 502 return readexactly(self._fh, n)
504 503
505 504 @interfaceutil.implementer(repository.irevisiondeltarequest)
506 505 @attr.s(slots=True, frozen=True)
507 506 class revisiondeltarequest(object):
508 507 node = attr.ib()
509 508 linknode = attr.ib()
510 509 p1node = attr.ib()
511 510 p2node = attr.ib()
512 511 basenode = attr.ib()
513 512 ellipsis = attr.ib(default=False)
514 513
515 @interfaceutil.implementer(repository.irevisiondelta)
516 @attr.s(slots=True, frozen=True)
517 class revisiondelta(object):
518 node = attr.ib()
519 p1node = attr.ib()
520 p2node = attr.ib()
521 basenode = attr.ib()
522 linknode = attr.ib()
523 flags = attr.ib()
524 baserevisionsize = attr.ib()
525 revision = attr.ib()
526 delta = attr.ib()
527
528 514 def _revisiondeltatochunks(delta, headerfn):
529 515 """Serialize a revisiondelta to changegroup chunks."""
530 516
531 517 # The captured revision delta may be encoded as a delta against
532 518 # a base revision or as a full revision. The changegroup format
533 519 # requires that everything on the wire be deltas. So for full
534 520 # revisions, we need to invent a header that says to rewrite
535 521 # data.
536 522
537 523 if delta.delta is not None:
538 524 prefix, data = b'', delta.delta
539 525 elif delta.basenode == nullid:
540 526 data = delta.revision
541 527 prefix = mdiff.trivialdiffheader(len(data))
542 528 else:
543 529 data = delta.revision
544 530 prefix = mdiff.replacediffheader(delta.baserevisionsize,
545 531 len(data))
546 532
547 533 meta = headerfn(delta)
548 534
549 535 yield chunkheader(len(meta) + len(prefix) + len(data))
550 536 yield meta
551 537 if prefix:
552 538 yield prefix
553 539 yield data
554 540
555 541 def _sortnodesnormal(store, nodes, reorder):
556 542 """Sort nodes for changegroup generation and turn into revnums."""
557 543 # for generaldelta revlogs, we linearize the revs; this will both be
558 544 # much quicker and generate a much smaller bundle
559 545 if (store._generaldelta and reorder is None) or reorder:
560 546 revs = set(store.rev(n) for n in nodes)
561 547 return dagop.linearize(revs, store.parentrevs)
562 548 else:
563 549 return sorted([store.rev(n) for n in nodes])
564 550
565 551 def _sortnodesellipsis(store, nodes, cl, lookup):
566 552 """Sort nodes for changegroup generation and turn into revnums."""
567 553 # Ellipses serving mode.
568 554 #
569 555 # In a perfect world, we'd generate better ellipsis-ified graphs
570 556 # for non-changelog revlogs. In practice, we haven't started doing
571 557 # that yet, so the resulting DAGs for the manifestlog and filelogs
572 558 # are actually full of bogus parentage on all the ellipsis
573 559 # nodes. This has the side effect that, while the contents are
574 560 # correct, the individual DAGs might be completely out of whack in
575 561 # a case like 882681bc3166 and its ancestors (back about 10
576 562 # revisions or so) in the main hg repo.
577 563 #
578 564 # The one invariant we *know* holds is that the new (potentially
579 565 # bogus) DAG shape will be valid if we order the nodes in the
580 566 # order that they're introduced in dramatis personae by the
581 567 # changelog, so what we do is we sort the non-changelog histories
582 568 # by the order in which they are used by the changelog.
583 569 key = lambda n: cl.rev(lookup(n))
584 570 return [store.rev(n) for n in sorted(nodes, key=key)]
585 571
586 def _handlerevisiondeltarequest(store, request, prevnode):
587 """Obtain a revisiondelta from a revisiondeltarequest"""
588
589 node = request.node
590 rev = store.rev(node)
591
592 # Requesting a full revision.
593 if request.basenode == nullid:
594 baserev = nullrev
595 # Requesting an explicit revision.
596 elif request.basenode is not None:
597 baserev = store.rev(request.basenode)
598 # Allowing us to choose.
599 else:
600 p1, p2 = store.parentrevs(rev)
601 dp = store.deltaparent(rev)
602
603 if dp == nullrev and store.storedeltachains:
604 # Avoid sending full revisions when delta parent is null. Pick prev
605 # in that case. It's tempting to pick p1 in this case, as p1 will
606 # be smaller in the common case. However, computing a delta against
607 # p1 may require resolving the raw text of p1, which could be
608 # expensive. The revlog caches should have prev cached, meaning
609 # less CPU for changegroup generation. There is likely room to add
610 # a flag and/or config option to control this behavior.
611 baserev = store.rev(prevnode)
612 elif dp == nullrev:
613 # revlog is configured to use full snapshot for a reason,
614 # stick to full snapshot.
615 baserev = nullrev
616 elif dp not in (p1, p2, store.rev(prevnode)):
617 # Pick prev when we can't be sure remote has the base revision.
618 baserev = store.rev(prevnode)
619 else:
620 baserev = dp
621
622 if baserev != nullrev and not store.candelta(baserev, rev):
623 baserev = nullrev
624
625 revision = None
626 delta = None
627 baserevisionsize = None
628
629 if store.iscensored(baserev) or store.iscensored(rev):
630 try:
631 revision = store.revision(node, raw=True)
632 except error.CensoredNodeError as e:
633 revision = e.tombstone
634
635 if baserev != nullrev:
636 baserevisionsize = store.rawsize(baserev)
637
638 elif baserev == nullrev:
639 revision = store.revision(node, raw=True)
640 else:
641 delta = store.revdiff(baserev, rev)
642
643 extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
644
645 return revisiondelta(
646 node=node,
647 p1node=request.p1node,
648 p2node=request.p2node,
649 linknode=request.linknode,
650 basenode=store.node(baserev),
651 flags=store.flags(rev) | extraflags,
652 baserevisionsize=baserevisionsize,
653 revision=revision,
654 delta=delta,
655 )
656
657 572 def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
658 573 linknode, clrevtolocalrev, fullclnodes,
659 574 precomputedellipsis):
660 575 linkparents = precomputedellipsis[linkrev]
661 576 def local(clrev):
662 577 """Turn a changelog revnum into a local revnum.
663 578
664 579 The ellipsis dag is stored as revnums on the changelog,
665 580 but when we're producing ellipsis entries for
666 581 non-changelog revlogs, we need to turn those numbers into
667 582 something local. This does that for us, and during the
668 583 changelog sending phase will also expand the stored
669 584 mappings as needed.
670 585 """
671 586 if clrev == nullrev:
672 587 return nullrev
673 588
674 589 if ischangelog:
675 590 return clrev
676 591
677 592 # Walk the ellipsis-ized changelog breadth-first looking for a
678 593 # change that has been linked from the current revlog.
679 594 #
680 595 # For a flat manifest revlog only a single step should be necessary
681 596 # as all relevant changelog entries are relevant to the flat
682 597 # manifest.
683 598 #
684 599 # For a filelog or tree manifest dirlog however not every changelog
685 600 # entry will have been relevant, so we need to skip some changelog
686 601 # nodes even after ellipsis-izing.
687 602 walk = [clrev]
688 603 while walk:
689 604 p = walk[0]
690 605 walk = walk[1:]
691 606 if p in clrevtolocalrev:
692 607 return clrevtolocalrev[p]
693 608 elif p in fullclnodes:
694 609 walk.extend([pp for pp in cl.parentrevs(p)
695 610 if pp != nullrev])
696 611 elif p in precomputedellipsis:
697 612 walk.extend([pp for pp in precomputedellipsis[p]
698 613 if pp != nullrev])
699 614 else:
700 615 # In this case, we've got an ellipsis with parents
701 616 # outside the current bundle (likely an
702 617 # incremental pull). We "know" that we can use the
703 618 # value of this same revlog at whatever revision
704 619 # is pointed to by linknode. "Know" is in scare
705 620 # quotes because I haven't done enough examination
706 621 # of edge cases to convince myself this is really
707 622 # a fact - it works for all the (admittedly
708 623 # thorough) cases in our testsuite, but I would be
709 624 # somewhat unsurprised to find a case in the wild
710 625 # where this breaks down a bit. That said, I don't
711 626 # know if it would hurt anything.
712 627 for i in pycompat.xrange(rev, 0, -1):
713 628 if store.linkrev(i) == clrev:
714 629 return i
715 630 # We failed to resolve a parent for this node, so
716 631 # we crash the changegroup construction.
717 632 raise error.Abort(
718 633 'unable to resolve parent while packing %r %r'
719 634 ' for changeset %r' % (store.indexfile, rev, clrev))
720 635
721 636 return nullrev
722 637
723 638 if not linkparents or (
724 639 store.parentrevs(rev) == (nullrev, nullrev)):
725 640 p1, p2 = nullrev, nullrev
726 641 elif len(linkparents) == 1:
727 642 p1, = sorted(local(p) for p in linkparents)
728 643 p2 = nullrev
729 644 else:
730 645 p1, p2 = sorted(local(p) for p in linkparents)
731 646
732 647 p1node, p2node = store.node(p1), store.node(p2)
733 648
734 649 # TODO: try and actually send deltas for ellipsis data blocks
735 650 return revisiondeltarequest(
736 651 node=node,
737 652 p1node=p1node,
738 653 p2node=p2node,
739 654 linknode=linknode,
740 655 basenode=nullid,
741 656 ellipsis=True,
742 657 )
743 658
744 659 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
745 660 allowreorder,
746 661 units=None,
747 662 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
748 663 precomputedellipsis=None):
749 664 """Calculate deltas for a set of revisions.
750 665
751 666 Is a generator of ``revisiondelta`` instances.
752 667
753 668 If units is not None, progress detail will be generated, units specifies
754 669 the type of revlog that is touched (changelog, manifest, etc.).
755 670 """
756 671 if not nodes:
757 672 return
758 673
759 674 # We perform two passes over the revisions whose data we will emit.
760 675 #
761 676 # In the first pass, we obtain information about the deltas that will
762 677 # be generated. This involves computing linknodes and adjusting the
763 678 # request to take shallow fetching into account. The end result of
764 679 # this pass is a list of "request" objects stating which deltas
765 680 # to obtain.
766 681 #
767 682 # The second pass is simply resolving the requested deltas.
768 683
769 684 cl = repo.changelog
770 685
771 686 if ischangelog:
772 687 # Changelog doesn't benefit from reordering revisions. So send
773 688 # out revisions in store order.
774 689 # TODO the API would be cleaner if this were controlled by the
775 690 # store producing the deltas.
776 691 revs = sorted(cl.rev(n) for n in nodes)
777 692 elif ellipses:
778 693 revs = _sortnodesellipsis(store, nodes, cl, lookup)
779 694 else:
780 695 revs = _sortnodesnormal(store, nodes, allowreorder)
781 696
782 697 # In the first pass, collect info about the deltas we'll be
783 698 # generating.
784 699 requests = []
785 700
786 701 # Add the parent of the first rev.
787 702 revs.insert(0, store.parentrevs(revs[0])[0])
788 703
789 704 for i in pycompat.xrange(len(revs) - 1):
790 705 prev = revs[i]
791 706 curr = revs[i + 1]
792 707
793 708 node = store.node(curr)
794 709 linknode = lookup(node)
795 710 p1node, p2node = store.parents(node)
796 711
797 712 if ellipses:
798 713 linkrev = cl.rev(linknode)
799 714 clrevtolocalrev[linkrev] = curr
800 715
801 716 # This is a node to send in full, because the changeset it
802 717 # corresponds to was a full changeset.
803 718 if linknode in fullclnodes:
804 719 requests.append(revisiondeltarequest(
805 720 node=node,
806 721 p1node=p1node,
807 722 p2node=p2node,
808 723 linknode=linknode,
809 724 basenode=None,
810 725 ))
811 726
812 727 elif linkrev not in precomputedellipsis:
813 728 pass
814 729 else:
815 730 requests.append(_makenarrowdeltarequest(
816 731 cl, store, ischangelog, curr, node, linkrev, linknode,
817 732 clrevtolocalrev, fullclnodes,
818 733 precomputedellipsis))
819 734 else:
820 735 requests.append(revisiondeltarequest(
821 736 node=node,
822 737 p1node=p1node,
823 738 p2node=p2node,
824 739 linknode=linknode,
825 740 basenode=store.node(prev) if forcedeltaparentprev else None,
826 741 ))
827 742
828 743 # We expect the first pass to be fast, so we only engage the progress
829 744 # meter for constructing the revision deltas.
830 745 progress = None
831 746 if units is not None:
832 747 progress = repo.ui.makeprogress(_('bundling'), unit=units,
833 748 total=len(requests))
834 749
835 prevnode = store.node(revs[0])
836 for i, request in enumerate(requests):
750 for i, delta in enumerate(store.emitrevisiondeltas(requests)):
837 751 if progress:
838 752 progress.update(i + 1)
839 753
840 delta = _handlerevisiondeltarequest(store, request, prevnode)
841
842 754 yield delta
843 755
844 prevnode = request.node
845
846 756 if progress:
847 757 progress.complete()
848 758
849 759 class cgpacker(object):
850 760 def __init__(self, repo, filematcher, version, allowreorder,
851 761 builddeltaheader, manifestsend,
852 762 forcedeltaparentprev=False,
853 763 bundlecaps=None, ellipses=False,
854 764 shallow=False, ellipsisroots=None, fullnodes=None):
855 765 """Given a source repo, construct a bundler.
856 766
857 767 filematcher is a matcher that matches on files to include in the
858 768 changegroup. Used to facilitate sparse changegroups.
859 769
860 770 allowreorder controls whether reordering of revisions is allowed.
861 771 This value is used when ``bundle.reorder`` is ``auto`` or isn't
862 772 set.
863 773
864 774 forcedeltaparentprev indicates whether delta parents must be against
865 775 the previous revision in a delta group. This should only be used for
866 776 compatibility with changegroup version 1.
867 777
868 778 builddeltaheader is a callable that constructs the header for a group
869 779 delta.
870 780
871 781 manifestsend is a chunk to send after manifests have been fully emitted.
872 782
873 783 ellipses indicates whether ellipsis serving mode is enabled.
874 784
875 785 bundlecaps is optional and can be used to specify the set of
876 786 capabilities which can be used to build the bundle. While bundlecaps is
877 787 unused in core Mercurial, extensions rely on this feature to communicate
878 788 capabilities to customize the changegroup packer.
879 789
880 790 shallow indicates whether shallow data might be sent. The packer may
881 791 need to pack file contents not introduced by the changes being packed.
882 792
883 793 fullnodes is the set of changelog nodes which should not be ellipsis
884 794 nodes. We store this rather than the set of nodes that should be
885 795 ellipsis because for very large histories we expect this to be
886 796 significantly smaller.
887 797 """
888 798 assert filematcher
889 799 self._filematcher = filematcher
890 800
891 801 self.version = version
892 802 self._forcedeltaparentprev = forcedeltaparentprev
893 803 self._builddeltaheader = builddeltaheader
894 804 self._manifestsend = manifestsend
895 805 self._ellipses = ellipses
896 806
897 807 # Set of capabilities we can use to build the bundle.
898 808 if bundlecaps is None:
899 809 bundlecaps = set()
900 810 self._bundlecaps = bundlecaps
901 811 self._isshallow = shallow
902 812 self._fullclnodes = fullnodes
903 813
904 814 # Maps ellipsis revs to their roots at the changelog level.
905 815 self._precomputedellipsis = ellipsisroots
906 816
907 817 # experimental config: bundle.reorder
908 818 reorder = repo.ui.config('bundle', 'reorder')
909 819 if reorder == 'auto':
910 820 self._reorder = allowreorder
911 821 else:
912 822 self._reorder = stringutil.parsebool(reorder)
913 823
914 824 self._repo = repo
915 825
916 826 if self._repo.ui.verbose and not self._repo.ui.debugflag:
917 827 self._verbosenote = self._repo.ui.note
918 828 else:
919 829 self._verbosenote = lambda s: None
920 830
921 831 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
922 832 """Yield a sequence of changegroup byte chunks."""
923 833
924 834 repo = self._repo
925 835 cl = repo.changelog
926 836
927 837 self._verbosenote(_('uncompressed size of bundle content:\n'))
928 838 size = 0
929 839
930 840 clstate, deltas = self._generatechangelog(cl, clnodes)
931 841 for delta in deltas:
932 842 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
933 843 size += len(chunk)
934 844 yield chunk
935 845
936 846 close = closechunk()
937 847 size += len(close)
938 848 yield closechunk()
939 849
940 850 self._verbosenote(_('%8.i (changelog)\n') % size)
941 851
942 852 clrevorder = clstate['clrevorder']
943 853 mfs = clstate['mfs']
944 854 changedfiles = clstate['changedfiles']
945 855
946 856 # We need to make sure that the linkrev in the changegroup refers to
947 857 # the first changeset that introduced the manifest or file revision.
948 858 # The fastpath is usually safer than the slowpath, because the filelogs
949 859 # are walked in revlog order.
950 860 #
951 861 # When taking the slowpath with reorder=None and the manifest revlog
952 862 # uses generaldelta, the manifest may be walked in the "wrong" order.
953 863 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
954 864 # cc0ff93d0c0c).
955 865 #
956 866 # When taking the fastpath, we are only vulnerable to reordering
957 867 # of the changelog itself. The changelog never uses generaldelta, so
958 868 # it is only reordered when reorder=True. To handle this case, we
959 869 # simply take the slowpath, which already has the 'clrevorder' logic.
960 870 # This was also fixed in cc0ff93d0c0c.
961 871 fastpathlinkrev = fastpathlinkrev and not self._reorder
962 872 # Treemanifests don't work correctly with fastpathlinkrev
963 873 # either, because we don't discover which directory nodes to
964 874 # send along with files. This could probably be fixed.
965 875 fastpathlinkrev = fastpathlinkrev and (
966 876 'treemanifest' not in repo.requirements)
967 877
968 878 fnodes = {} # needed file nodes
969 879
970 880 size = 0
971 881 it = self.generatemanifests(
972 882 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
973 883 clstate['clrevtomanifestrev'])
974 884
975 885 for dir, deltas in it:
976 886 if dir:
977 887 assert self.version == b'03'
978 888 chunk = _fileheader(dir)
979 889 size += len(chunk)
980 890 yield chunk
981 891
982 892 for delta in deltas:
983 893 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
984 894 for chunk in chunks:
985 895 size += len(chunk)
986 896 yield chunk
987 897
988 898 close = closechunk()
989 899 size += len(close)
990 900 yield close
991 901
992 902 self._verbosenote(_('%8.i (manifests)\n') % size)
993 903 yield self._manifestsend
994 904
995 905 mfdicts = None
996 906 if self._ellipses and self._isshallow:
997 907 mfdicts = [(self._repo.manifestlog[n].read(), lr)
998 908 for (n, lr) in mfs.iteritems()]
999 909
1000 910 mfs.clear()
1001 911 clrevs = set(cl.rev(x) for x in clnodes)
1002 912
1003 913 it = self.generatefiles(changedfiles, commonrevs,
1004 914 source, mfdicts, fastpathlinkrev,
1005 915 fnodes, clrevs)
1006 916
1007 917 for path, deltas in it:
1008 918 h = _fileheader(path)
1009 919 size = len(h)
1010 920 yield h
1011 921
1012 922 for delta in deltas:
1013 923 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1014 924 for chunk in chunks:
1015 925 size += len(chunk)
1016 926 yield chunk
1017 927
1018 928 close = closechunk()
1019 929 size += len(close)
1020 930 yield close
1021 931
1022 932 self._verbosenote(_('%8.i %s\n') % (size, path))
1023 933
1024 934 yield closechunk()
1025 935
1026 936 if clnodes:
1027 937 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
1028 938
1029 939 def _generatechangelog(self, cl, nodes):
1030 940 """Generate data for changelog chunks.
1031 941
1032 942 Returns a 2-tuple of a dict containing state and an iterable of
1033 943 byte chunks. The state will not be fully populated until the
1034 944 chunk stream has been fully consumed.
1035 945 """
1036 946 clrevorder = {}
1037 947 mfs = {} # needed manifests
1038 948 mfl = self._repo.manifestlog
1039 949 # TODO violates storage abstraction.
1040 950 mfrevlog = mfl._revlog
1041 951 changedfiles = set()
1042 952 clrevtomanifestrev = {}
1043 953
1044 954 # Callback for the changelog, used to collect changed files and
1045 955 # manifest nodes.
1046 956 # Returns the linkrev node (identity in the changelog case).
1047 957 def lookupcl(x):
1048 958 c = cl.read(x)
1049 959 clrevorder[x] = len(clrevorder)
1050 960
1051 961 if self._ellipses:
1052 962 # Only update mfs if x is going to be sent. Otherwise we
1053 963 # end up with bogus linkrevs specified for manifests and
1054 964 # we skip some manifest nodes that we should otherwise
1055 965 # have sent.
1056 966 if (x in self._fullclnodes
1057 967 or cl.rev(x) in self._precomputedellipsis):
1058 968 n = c[0]
1059 969 # Record the first changeset introducing this manifest
1060 970 # version.
1061 971 mfs.setdefault(n, x)
1062 972 # Set this narrow-specific dict so we have the lowest
1063 973 # manifest revnum to look up for this cl revnum. (Part of
1064 974 # mapping changelog ellipsis parents to manifest ellipsis
1065 975 # parents)
1066 976 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
1067 977 # We can't trust the changed files list in the changeset if the
1068 978 # client requested a shallow clone.
1069 979 if self._isshallow:
1070 980 changedfiles.update(mfl[c[0]].read().keys())
1071 981 else:
1072 982 changedfiles.update(c[3])
1073 983 else:
1074 984
1075 985 n = c[0]
1076 986 # record the first changeset introducing this manifest version
1077 987 mfs.setdefault(n, x)
1078 988 # Record a complete list of potentially-changed files in
1079 989 # this manifest.
1080 990 changedfiles.update(c[3])
1081 991
1082 992 return x
1083 993
1084 994 state = {
1085 995 'clrevorder': clrevorder,
1086 996 'mfs': mfs,
1087 997 'changedfiles': changedfiles,
1088 998 'clrevtomanifestrev': clrevtomanifestrev,
1089 999 }
1090 1000
1091 1001 gen = deltagroup(
1092 1002 self._repo, cl, nodes, True, lookupcl,
1093 1003 self._forcedeltaparentprev,
1094 1004 # Reorder settings are currently ignored for changelog.
1095 1005 True,
1096 1006 ellipses=self._ellipses,
1097 1007 units=_('changesets'),
1098 1008 clrevtolocalrev={},
1099 1009 fullclnodes=self._fullclnodes,
1100 1010 precomputedellipsis=self._precomputedellipsis)
1101 1011
1102 1012 return state, gen
1103 1013
1104 1014 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1105 1015 fnodes, source, clrevtolocalrev):
1106 1016 """Returns an iterator of changegroup chunks containing manifests.
1107 1017
1108 1018 `source` is unused here, but is used by extensions like remotefilelog to
1109 1019 change what is sent based in pulls vs pushes, etc.
1110 1020 """
1111 1021 repo = self._repo
1112 1022 mfl = repo.manifestlog
1113 1023 dirlog = mfl._revlog.dirlog
1114 1024 tmfnodes = {'': mfs}
1115 1025
1116 1026 # Callback for the manifest, used to collect linkrevs for filelog
1117 1027 # revisions.
1118 1028 # Returns the linkrev node (collected in lookupcl).
1119 1029 def makelookupmflinknode(dir, nodes):
1120 1030 if fastpathlinkrev:
1121 1031 assert not dir
1122 1032 return mfs.__getitem__
1123 1033
1124 1034 def lookupmflinknode(x):
1125 1035 """Callback for looking up the linknode for manifests.
1126 1036
1127 1037 Returns the linkrev node for the specified manifest.
1128 1038
1129 1039 SIDE EFFECT:
1130 1040
1131 1041 1) fclnodes gets populated with the list of relevant
1132 1042 file nodes if we're not using fastpathlinkrev
1133 1043 2) When treemanifests are in use, collects treemanifest nodes
1134 1044 to send
1135 1045
1136 1046 Note that this means manifests must be completely sent to
1137 1047 the client before you can trust the list of files and
1138 1048 treemanifests to send.
1139 1049 """
1140 1050 clnode = nodes[x]
1141 1051 mdata = mfl.get(dir, x).readfast(shallow=True)
1142 1052 for p, n, fl in mdata.iterentries():
1143 1053 if fl == 't': # subdirectory manifest
1144 1054 subdir = dir + p + '/'
1145 1055 tmfclnodes = tmfnodes.setdefault(subdir, {})
1146 1056 tmfclnode = tmfclnodes.setdefault(n, clnode)
1147 1057 if clrevorder[clnode] < clrevorder[tmfclnode]:
1148 1058 tmfclnodes[n] = clnode
1149 1059 else:
1150 1060 f = dir + p
1151 1061 fclnodes = fnodes.setdefault(f, {})
1152 1062 fclnode = fclnodes.setdefault(n, clnode)
1153 1063 if clrevorder[clnode] < clrevorder[fclnode]:
1154 1064 fclnodes[n] = clnode
1155 1065 return clnode
1156 1066 return lookupmflinknode
1157 1067
1158 1068 while tmfnodes:
1159 1069 dir, nodes = tmfnodes.popitem()
1160 1070 store = dirlog(dir)
1161 1071
1162 1072 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1163 1073 prunednodes = []
1164 1074 else:
1165 1075 frev, flr = store.rev, store.linkrev
1166 1076 prunednodes = [n for n in nodes
1167 1077 if flr(frev(n)) not in commonrevs]
1168 1078
1169 1079 if dir and not prunednodes:
1170 1080 continue
1171 1081
1172 1082 lookupfn = makelookupmflinknode(dir, nodes)
1173 1083
1174 1084 deltas = deltagroup(
1175 1085 self._repo, store, prunednodes, False, lookupfn,
1176 1086 self._forcedeltaparentprev, self._reorder,
1177 1087 ellipses=self._ellipses,
1178 1088 units=_('manifests'),
1179 1089 clrevtolocalrev=clrevtolocalrev,
1180 1090 fullclnodes=self._fullclnodes,
1181 1091 precomputedellipsis=self._precomputedellipsis)
1182 1092
1183 1093 yield dir, deltas
1184 1094
1185 1095 # The 'source' parameter is useful for extensions
1186 1096 def generatefiles(self, changedfiles, commonrevs, source,
1187 1097 mfdicts, fastpathlinkrev, fnodes, clrevs):
1188 1098 changedfiles = list(filter(self._filematcher, changedfiles))
1189 1099
1190 1100 if not fastpathlinkrev:
1191 1101 def normallinknodes(unused, fname):
1192 1102 return fnodes.get(fname, {})
1193 1103 else:
1194 1104 cln = self._repo.changelog.node
1195 1105
1196 1106 def normallinknodes(store, fname):
1197 1107 flinkrev = store.linkrev
1198 1108 fnode = store.node
1199 1109 revs = ((r, flinkrev(r)) for r in store)
1200 1110 return dict((fnode(r), cln(lr))
1201 1111 for r, lr in revs if lr in clrevs)
1202 1112
1203 1113 clrevtolocalrev = {}
1204 1114
1205 1115 if self._isshallow:
1206 1116 # In a shallow clone, the linknodes callback needs to also include
1207 1117 # those file nodes that are in the manifests we sent but weren't
1208 1118 # introduced by those manifests.
1209 1119 commonctxs = [self._repo[c] for c in commonrevs]
1210 1120 clrev = self._repo.changelog.rev
1211 1121
1212 1122 # Defining this function has a side-effect of overriding the
1213 1123 # function of the same name that was passed in as an argument.
1214 1124 # TODO have caller pass in appropriate function.
1215 1125 def linknodes(flog, fname):
1216 1126 for c in commonctxs:
1217 1127 try:
1218 1128 fnode = c.filenode(fname)
1219 1129 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1220 1130 except error.ManifestLookupError:
1221 1131 pass
1222 1132 links = normallinknodes(flog, fname)
1223 1133 if len(links) != len(mfdicts):
1224 1134 for mf, lr in mfdicts:
1225 1135 fnode = mf.get(fname, None)
1226 1136 if fnode in links:
1227 1137 links[fnode] = min(links[fnode], lr, key=clrev)
1228 1138 elif fnode:
1229 1139 links[fnode] = lr
1230 1140 return links
1231 1141 else:
1232 1142 linknodes = normallinknodes
1233 1143
1234 1144 repo = self._repo
1235 1145 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1236 1146 total=len(changedfiles))
1237 1147 for i, fname in enumerate(sorted(changedfiles)):
1238 1148 filerevlog = repo.file(fname)
1239 1149 if not filerevlog:
1240 1150 raise error.Abort(_("empty or missing file data for %s") %
1241 1151 fname)
1242 1152
1243 1153 clrevtolocalrev.clear()
1244 1154
1245 1155 linkrevnodes = linknodes(filerevlog, fname)
1246 1156 # Lookup for filenodes, we collected the linkrev nodes above in the
1247 1157 # fastpath case and with lookupmf in the slowpath case.
1248 1158 def lookupfilelog(x):
1249 1159 return linkrevnodes[x]
1250 1160
1251 1161 frev, flr = filerevlog.rev, filerevlog.linkrev
1252 1162 filenodes = [n for n in linkrevnodes
1253 1163 if flr(frev(n)) not in commonrevs]
1254 1164
1255 1165 if not filenodes:
1256 1166 continue
1257 1167
1258 1168 progress.update(i + 1, item=fname)
1259 1169
1260 1170 deltas = deltagroup(
1261 1171 self._repo, filerevlog, filenodes, False, lookupfilelog,
1262 1172 self._forcedeltaparentprev, self._reorder,
1263 1173 ellipses=self._ellipses,
1264 1174 clrevtolocalrev=clrevtolocalrev,
1265 1175 fullclnodes=self._fullclnodes,
1266 1176 precomputedellipsis=self._precomputedellipsis)
1267 1177
1268 1178 yield fname, deltas
1269 1179
1270 1180 progress.complete()
1271 1181
1272 1182 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1273 1183 shallow=False, ellipsisroots=None, fullnodes=None):
1274 1184 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1275 1185 d.node, d.p1node, d.p2node, d.linknode)
1276 1186
1277 1187 return cgpacker(repo, filematcher, b'01',
1278 1188 allowreorder=None,
1279 1189 builddeltaheader=builddeltaheader,
1280 1190 manifestsend=b'',
1281 1191 forcedeltaparentprev=True,
1282 1192 bundlecaps=bundlecaps,
1283 1193 ellipses=ellipses,
1284 1194 shallow=shallow,
1285 1195 ellipsisroots=ellipsisroots,
1286 1196 fullnodes=fullnodes)
1287 1197
1288 1198 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1289 1199 shallow=False, ellipsisroots=None, fullnodes=None):
1290 1200 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1291 1201 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1292 1202
1293 1203 # Since generaldelta is directly supported by cg2, reordering
1294 1204 # generally doesn't help, so we disable it by default (treating
1295 1205 # bundle.reorder=auto just like bundle.reorder=False).
1296 1206 return cgpacker(repo, filematcher, b'02',
1297 1207 allowreorder=False,
1298 1208 builddeltaheader=builddeltaheader,
1299 1209 manifestsend=b'',
1300 1210 bundlecaps=bundlecaps,
1301 1211 ellipses=ellipses,
1302 1212 shallow=shallow,
1303 1213 ellipsisroots=ellipsisroots,
1304 1214 fullnodes=fullnodes)
1305 1215
1306 1216 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1307 1217 shallow=False, ellipsisroots=None, fullnodes=None):
1308 1218 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1309 1219 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1310 1220
1311 1221 return cgpacker(repo, filematcher, b'03',
1312 1222 allowreorder=False,
1313 1223 builddeltaheader=builddeltaheader,
1314 1224 manifestsend=closechunk(),
1315 1225 bundlecaps=bundlecaps,
1316 1226 ellipses=ellipses,
1317 1227 shallow=shallow,
1318 1228 ellipsisroots=ellipsisroots,
1319 1229 fullnodes=fullnodes)
1320 1230
1321 1231 _packermap = {'01': (_makecg1packer, cg1unpacker),
1322 1232 # cg2 adds support for exchanging generaldelta
1323 1233 '02': (_makecg2packer, cg2unpacker),
1324 1234 # cg3 adds support for exchanging revlog flags and treemanifests
1325 1235 '03': (_makecg3packer, cg3unpacker),
1326 1236 }
1327 1237
1328 1238 def allsupportedversions(repo):
1329 1239 versions = set(_packermap.keys())
1330 1240 if not (repo.ui.configbool('experimental', 'changegroup3') or
1331 1241 repo.ui.configbool('experimental', 'treemanifest') or
1332 1242 'treemanifest' in repo.requirements):
1333 1243 versions.discard('03')
1334 1244 return versions
1335 1245
1336 1246 # Changegroup versions that can be applied to the repo
1337 1247 def supportedincomingversions(repo):
1338 1248 return allsupportedversions(repo)
1339 1249
1340 1250 # Changegroup versions that can be created from the repo
1341 1251 def supportedoutgoingversions(repo):
1342 1252 versions = allsupportedversions(repo)
1343 1253 if 'treemanifest' in repo.requirements:
1344 1254 # Versions 01 and 02 support only flat manifests and it's just too
1345 1255 # expensive to convert between the flat manifest and tree manifest on
1346 1256 # the fly. Since tree manifests are hashed differently, all of history
1347 1257 # would have to be converted. Instead, we simply don't even pretend to
1348 1258 # support versions 01 and 02.
1349 1259 versions.discard('01')
1350 1260 versions.discard('02')
1351 1261 if repository.NARROW_REQUIREMENT in repo.requirements:
1352 1262 # Versions 01 and 02 don't support revlog flags, and we need to
1353 1263 # support that for stripping and unbundling to work.
1354 1264 versions.discard('01')
1355 1265 versions.discard('02')
1356 1266 if LFS_REQUIREMENT in repo.requirements:
1357 1267 # Versions 01 and 02 don't support revlog flags, and we need to
1358 1268 # mark LFS entries with REVIDX_EXTSTORED.
1359 1269 versions.discard('01')
1360 1270 versions.discard('02')
1361 1271
1362 1272 return versions
1363 1273
1364 1274 def localversion(repo):
1365 1275 # Finds the best version to use for bundles that are meant to be used
1366 1276 # locally, such as those from strip and shelve, and temporary bundles.
1367 1277 return max(supportedoutgoingversions(repo))
1368 1278
1369 1279 def safeversion(repo):
1370 1280 # Finds the smallest version that it's safe to assume clients of the repo
1371 1281 # will support. For example, all hg versions that support generaldelta also
1372 1282 # support changegroup 02.
1373 1283 versions = supportedoutgoingversions(repo)
1374 1284 if 'generaldelta' in repo.requirements:
1375 1285 versions.discard('01')
1376 1286 assert versions
1377 1287 return min(versions)
1378 1288
1379 1289 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1380 1290 ellipses=False, shallow=False, ellipsisroots=None,
1381 1291 fullnodes=None):
1382 1292 assert version in supportedoutgoingversions(repo)
1383 1293
1384 1294 if filematcher is None:
1385 1295 filematcher = matchmod.alwaysmatcher(repo.root, '')
1386 1296
1387 1297 if version == '01' and not filematcher.always():
1388 1298 raise error.ProgrammingError('version 01 changegroups do not support '
1389 1299 'sparse file matchers')
1390 1300
1391 1301 if ellipses and version in (b'01', b'02'):
1392 1302 raise error.Abort(
1393 1303 _('ellipsis nodes require at least cg3 on client and server, '
1394 1304 'but negotiated version %s') % version)
1395 1305
1396 1306 # Requested files could include files not in the local store. So
1397 1307 # filter those out.
1398 1308 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1399 1309 filematcher)
1400 1310
1401 1311 fn = _packermap[version][0]
1402 1312 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1403 1313 shallow=shallow, ellipsisroots=ellipsisroots,
1404 1314 fullnodes=fullnodes)
1405 1315
1406 1316 def getunbundler(version, fh, alg, extras=None):
1407 1317 return _packermap[version][1](fh, alg, extras=extras)
1408 1318
1409 1319 def _changegroupinfo(repo, nodes, source):
1410 1320 if repo.ui.verbose or source == 'bundle':
1411 1321 repo.ui.status(_("%d changesets found\n") % len(nodes))
1412 1322 if repo.ui.debugflag:
1413 1323 repo.ui.debug("list of changesets:\n")
1414 1324 for node in nodes:
1415 1325 repo.ui.debug("%s\n" % hex(node))
1416 1326
1417 1327 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1418 1328 bundlecaps=None):
1419 1329 cgstream = makestream(repo, outgoing, version, source,
1420 1330 fastpath=fastpath, bundlecaps=bundlecaps)
1421 1331 return getunbundler(version, util.chunkbuffer(cgstream), None,
1422 1332 {'clcount': len(outgoing.missing) })
1423 1333
1424 1334 def makestream(repo, outgoing, version, source, fastpath=False,
1425 1335 bundlecaps=None, filematcher=None):
1426 1336 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1427 1337 filematcher=filematcher)
1428 1338
1429 1339 repo = repo.unfiltered()
1430 1340 commonrevs = outgoing.common
1431 1341 csets = outgoing.missing
1432 1342 heads = outgoing.missingheads
1433 1343 # We go through the fast path if we get told to, or if all (unfiltered
1434 1344 # heads have been requested (since we then know there all linkrevs will
1435 1345 # be pulled by the client).
1436 1346 heads.sort()
1437 1347 fastpathlinkrev = fastpath or (
1438 1348 repo.filtername is None and heads == sorted(repo.heads()))
1439 1349
1440 1350 repo.hook('preoutgoing', throw=True, source=source)
1441 1351 _changegroupinfo(repo, csets, source)
1442 1352 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1443 1353
1444 1354 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1445 1355 revisions = 0
1446 1356 files = 0
1447 1357 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1448 1358 total=expectedfiles)
1449 1359 for chunkdata in iter(source.filelogheader, {}):
1450 1360 files += 1
1451 1361 f = chunkdata["filename"]
1452 1362 repo.ui.debug("adding %s revisions\n" % f)
1453 1363 progress.increment()
1454 1364 fl = repo.file(f)
1455 1365 o = len(fl)
1456 1366 try:
1457 1367 deltas = source.deltaiter()
1458 1368 if not fl.addgroup(deltas, revmap, trp):
1459 1369 raise error.Abort(_("received file revlog group is empty"))
1460 1370 except error.CensoredBaseError as e:
1461 1371 raise error.Abort(_("received delta base is censored: %s") % e)
1462 1372 revisions += len(fl) - o
1463 1373 if f in needfiles:
1464 1374 needs = needfiles[f]
1465 1375 for new in pycompat.xrange(o, len(fl)):
1466 1376 n = fl.node(new)
1467 1377 if n in needs:
1468 1378 needs.remove(n)
1469 1379 else:
1470 1380 raise error.Abort(
1471 1381 _("received spurious file revlog entry"))
1472 1382 if not needs:
1473 1383 del needfiles[f]
1474 1384 progress.complete()
1475 1385
1476 1386 for f, needs in needfiles.iteritems():
1477 1387 fl = repo.file(f)
1478 1388 for n in needs:
1479 1389 try:
1480 1390 fl.rev(n)
1481 1391 except error.LookupError:
1482 1392 raise error.Abort(
1483 1393 _('missing file data for %s:%s - run hg verify') %
1484 1394 (f, hex(n)))
1485 1395
1486 1396 return revisions, files
@@ -1,269 +1,272 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from . import (
11 11 error,
12 12 repository,
13 13 revlog,
14 14 )
15 15 from .utils import (
16 16 interfaceutil,
17 17 )
18 18
19 19 @interfaceutil.implementer(repository.ifilestorage)
20 20 class filelog(object):
21 21 def __init__(self, opener, path):
22 22 self._revlog = revlog.revlog(opener,
23 23 '/'.join(('data', path + '.i')),
24 24 censorable=True)
25 25 # full name of the user visible file, relative to the repository root
26 26 self.filename = path
27 27 self.index = self._revlog.index
28 28 self.version = self._revlog.version
29 29 self.storedeltachains = self._revlog.storedeltachains
30 30 self._generaldelta = self._revlog._generaldelta
31 31
32 32 def __len__(self):
33 33 return len(self._revlog)
34 34
35 35 def __iter__(self):
36 36 return self._revlog.__iter__()
37 37
38 38 def revs(self, start=0, stop=None):
39 39 return self._revlog.revs(start=start, stop=stop)
40 40
41 41 def parents(self, node):
42 42 return self._revlog.parents(node)
43 43
44 44 def parentrevs(self, rev):
45 45 return self._revlog.parentrevs(rev)
46 46
47 47 def rev(self, node):
48 48 return self._revlog.rev(node)
49 49
50 50 def node(self, rev):
51 51 return self._revlog.node(rev)
52 52
53 53 def lookup(self, node):
54 54 return self._revlog.lookup(node)
55 55
56 56 def linkrev(self, rev):
57 57 return self._revlog.linkrev(rev)
58 58
59 59 def flags(self, rev):
60 60 return self._revlog.flags(rev)
61 61
62 62 def commonancestorsheads(self, node1, node2):
63 63 return self._revlog.commonancestorsheads(node1, node2)
64 64
65 65 def descendants(self, revs):
66 66 return self._revlog.descendants(revs)
67 67
68 68 def headrevs(self):
69 69 return self._revlog.headrevs()
70 70
71 71 def heads(self, start=None, stop=None):
72 72 return self._revlog.heads(start, stop)
73 73
74 74 def children(self, node):
75 75 return self._revlog.children(node)
76 76
77 77 def deltaparent(self, rev):
78 78 return self._revlog.deltaparent(rev)
79 79
80 80 def candelta(self, baserev, rev):
81 81 return self._revlog.candelta(baserev, rev)
82 82
83 83 def iscensored(self, rev):
84 84 return self._revlog.iscensored(rev)
85 85
86 86 def rawsize(self, rev):
87 87 return self._revlog.rawsize(rev)
88 88
89 89 def checkhash(self, text, node, p1=None, p2=None, rev=None):
90 90 return self._revlog.checkhash(text, node, p1=p1, p2=p2, rev=rev)
91 91
92 92 def revision(self, node, _df=None, raw=False):
93 93 return self._revlog.revision(node, _df=_df, raw=raw)
94 94
95 95 def revdiff(self, rev1, rev2):
96 96 return self._revlog.revdiff(rev1, rev2)
97 97
98 def emitrevisiondeltas(self, requests):
99 return self._revlog.emitrevisiondeltas(requests)
100
98 101 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
99 102 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
100 103 cachedelta=None):
101 104 return self._revlog.addrevision(revisiondata, transaction, linkrev,
102 105 p1, p2, node=node, flags=flags,
103 106 cachedelta=cachedelta)
104 107
105 108 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
106 109 return self._revlog.addgroup(deltas, linkmapper, transaction,
107 110 addrevisioncb=addrevisioncb)
108 111
109 112 def getstrippoint(self, minlink):
110 113 return self._revlog.getstrippoint(minlink)
111 114
112 115 def strip(self, minlink, transaction):
113 116 return self._revlog.strip(minlink, transaction)
114 117
115 118 def files(self):
116 119 return self._revlog.files()
117 120
118 121 def checksize(self):
119 122 return self._revlog.checksize()
120 123
121 124 def read(self, node):
122 125 t = self.revision(node)
123 126 if not t.startswith('\1\n'):
124 127 return t
125 128 s = t.index('\1\n', 2)
126 129 return t[s + 2:]
127 130
128 131 def add(self, text, meta, transaction, link, p1=None, p2=None):
129 132 if meta or text.startswith('\1\n'):
130 133 text = revlog.packmeta(meta, text)
131 134 return self.addrevision(text, transaction, link, p1, p2)
132 135
133 136 def renamed(self, node):
134 137 if self.parents(node)[0] != revlog.nullid:
135 138 return False
136 139 t = self.revision(node)
137 140 m = revlog.parsemeta(t)[0]
138 141 # copy and copyrev occur in pairs. In rare cases due to bugs,
139 142 # one can occur without the other.
140 143 if m and "copy" in m and "copyrev" in m:
141 144 return (m["copy"], revlog.bin(m["copyrev"]))
142 145 return False
143 146
144 147 def size(self, rev):
145 148 """return the size of a given revision"""
146 149
147 150 # for revisions with renames, we have to go the slow way
148 151 node = self.node(rev)
149 152 if self.renamed(node):
150 153 return len(self.read(node))
151 154 if self.iscensored(rev):
152 155 return 0
153 156
154 157 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
155 158 return self._revlog.size(rev)
156 159
157 160 def cmp(self, node, text):
158 161 """compare text with a given file revision
159 162
160 163 returns True if text is different than what is stored.
161 164 """
162 165
163 166 t = text
164 167 if text.startswith('\1\n'):
165 168 t = '\1\n\1\n' + text
166 169
167 170 samehashes = not self._revlog.cmp(node, t)
168 171 if samehashes:
169 172 return False
170 173
171 174 # censored files compare against the empty file
172 175 if self.iscensored(self.rev(node)):
173 176 return text != ''
174 177
175 178 # renaming a file produces a different hash, even if the data
176 179 # remains unchanged. Check if it's the case (slow):
177 180 if self.renamed(node):
178 181 t2 = self.read(node)
179 182 return t2 != text
180 183
181 184 return True
182 185
183 186 @property
184 187 def filename(self):
185 188 return self._revlog.filename
186 189
187 190 @filename.setter
188 191 def filename(self, value):
189 192 self._revlog.filename = value
190 193
191 194 # TODO these aren't part of the interface and aren't internal methods.
192 195 # Callers should be fixed to not use them.
193 196 @property
194 197 def indexfile(self):
195 198 return self._revlog.indexfile
196 199
197 200 @indexfile.setter
198 201 def indexfile(self, value):
199 202 self._revlog.indexfile = value
200 203
201 204 @property
202 205 def datafile(self):
203 206 return self._revlog.datafile
204 207
205 208 @property
206 209 def opener(self):
207 210 return self._revlog.opener
208 211
209 212 @property
210 213 def _lazydeltabase(self):
211 214 return self._revlog._lazydeltabase
212 215
213 216 @_lazydeltabase.setter
214 217 def _lazydeltabase(self, value):
215 218 self._revlog._lazydeltabase = value
216 219
217 220 @property
218 221 def _deltabothparents(self):
219 222 return self._revlog._deltabothparents
220 223
221 224 @_deltabothparents.setter
222 225 def _deltabothparents(self, value):
223 226 self._revlog._deltabothparents = value
224 227
225 228 @property
226 229 def _inline(self):
227 230 return self._revlog._inline
228 231
229 232 @property
230 233 def _withsparseread(self):
231 234 return getattr(self._revlog, '_withsparseread', False)
232 235
233 236 @property
234 237 def _srmingapsize(self):
235 238 return self._revlog._srmingapsize
236 239
237 240 @property
238 241 def _srdensitythreshold(self):
239 242 return self._revlog._srdensitythreshold
240 243
241 244 def _deltachain(self, rev, stoprev=None):
242 245 return self._revlog._deltachain(rev, stoprev)
243 246
244 247 def chainbase(self, rev):
245 248 return self._revlog.chainbase(rev)
246 249
247 250 def chainlen(self, rev):
248 251 return self._revlog.chainlen(rev)
249 252
250 253 def clone(self, tr, destrevlog, **kwargs):
251 254 if not isinstance(destrevlog, filelog):
252 255 raise error.ProgrammingError('expected filelog to clone()')
253 256
254 257 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
255 258
256 259 def start(self, rev):
257 260 return self._revlog.start(rev)
258 261
259 262 def end(self, rev):
260 263 return self._revlog.end(rev)
261 264
262 265 def length(self, rev):
263 266 return self._revlog.length(rev)
264 267
265 268 def compress(self, data):
266 269 return self._revlog.compress(data)
267 270
268 271 def _addrevision(self, *args, **kwargs):
269 272 return self._revlog._addrevision(*args, **kwargs)
@@ -1,1387 +1,1411 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from . import (
12 12 error,
13 13 )
14 14 from .utils import (
15 15 interfaceutil,
16 16 )
17 17
18 18 # When narrowing is finalized and no longer subject to format changes,
19 19 # we should move this to just "narrow" or similar.
20 20 NARROW_REQUIREMENT = 'narrowhg-experimental'
21 21
22 22 class ipeerconnection(interfaceutil.Interface):
23 23 """Represents a "connection" to a repository.
24 24
25 25 This is the base interface for representing a connection to a repository.
26 26 It holds basic properties and methods applicable to all peer types.
27 27
28 28 This is not a complete interface definition and should not be used
29 29 outside of this module.
30 30 """
31 31 ui = interfaceutil.Attribute("""ui.ui instance""")
32 32
33 33 def url():
34 34 """Returns a URL string representing this peer.
35 35
36 36 Currently, implementations expose the raw URL used to construct the
37 37 instance. It may contain credentials as part of the URL. The
38 38 expectations of the value aren't well-defined and this could lead to
39 39 data leakage.
40 40
41 41 TODO audit/clean consumers and more clearly define the contents of this
42 42 value.
43 43 """
44 44
45 45 def local():
46 46 """Returns a local repository instance.
47 47
48 48 If the peer represents a local repository, returns an object that
49 49 can be used to interface with it. Otherwise returns ``None``.
50 50 """
51 51
52 52 def peer():
53 53 """Returns an object conforming to this interface.
54 54
55 55 Most implementations will ``return self``.
56 56 """
57 57
58 58 def canpush():
59 59 """Returns a boolean indicating if this peer can be pushed to."""
60 60
61 61 def close():
62 62 """Close the connection to this peer.
63 63
64 64 This is called when the peer will no longer be used. Resources
65 65 associated with the peer should be cleaned up.
66 66 """
67 67
68 68 class ipeercapabilities(interfaceutil.Interface):
69 69 """Peer sub-interface related to capabilities."""
70 70
71 71 def capable(name):
72 72 """Determine support for a named capability.
73 73
74 74 Returns ``False`` if capability not supported.
75 75
76 76 Returns ``True`` if boolean capability is supported. Returns a string
77 77 if capability support is non-boolean.
78 78
79 79 Capability strings may or may not map to wire protocol capabilities.
80 80 """
81 81
82 82 def requirecap(name, purpose):
83 83 """Require a capability to be present.
84 84
85 85 Raises a ``CapabilityError`` if the capability isn't present.
86 86 """
87 87
88 88 class ipeercommands(interfaceutil.Interface):
89 89 """Client-side interface for communicating over the wire protocol.
90 90
91 91 This interface is used as a gateway to the Mercurial wire protocol.
92 92 methods commonly call wire protocol commands of the same name.
93 93 """
94 94
95 95 def branchmap():
96 96 """Obtain heads in named branches.
97 97
98 98 Returns a dict mapping branch name to an iterable of nodes that are
99 99 heads on that branch.
100 100 """
101 101
102 102 def capabilities():
103 103 """Obtain capabilities of the peer.
104 104
105 105 Returns a set of string capabilities.
106 106 """
107 107
108 108 def clonebundles():
109 109 """Obtains the clone bundles manifest for the repo.
110 110
111 111 Returns the manifest as unparsed bytes.
112 112 """
113 113
114 114 def debugwireargs(one, two, three=None, four=None, five=None):
115 115 """Used to facilitate debugging of arguments passed over the wire."""
116 116
117 117 def getbundle(source, **kwargs):
118 118 """Obtain remote repository data as a bundle.
119 119
120 120 This command is how the bulk of repository data is transferred from
121 121 the peer to the local repository
122 122
123 123 Returns a generator of bundle data.
124 124 """
125 125
126 126 def heads():
127 127 """Determine all known head revisions in the peer.
128 128
129 129 Returns an iterable of binary nodes.
130 130 """
131 131
132 132 def known(nodes):
133 133 """Determine whether multiple nodes are known.
134 134
135 135 Accepts an iterable of nodes whose presence to check for.
136 136
137 137 Returns an iterable of booleans indicating of the corresponding node
138 138 at that index is known to the peer.
139 139 """
140 140
141 141 def listkeys(namespace):
142 142 """Obtain all keys in a pushkey namespace.
143 143
144 144 Returns an iterable of key names.
145 145 """
146 146
147 147 def lookup(key):
148 148 """Resolve a value to a known revision.
149 149
150 150 Returns a binary node of the resolved revision on success.
151 151 """
152 152
153 153 def pushkey(namespace, key, old, new):
154 154 """Set a value using the ``pushkey`` protocol.
155 155
156 156 Arguments correspond to the pushkey namespace and key to operate on and
157 157 the old and new values for that key.
158 158
159 159 Returns a string with the peer result. The value inside varies by the
160 160 namespace.
161 161 """
162 162
163 163 def stream_out():
164 164 """Obtain streaming clone data.
165 165
166 166 Successful result should be a generator of data chunks.
167 167 """
168 168
169 169 def unbundle(bundle, heads, url):
170 170 """Transfer repository data to the peer.
171 171
172 172 This is how the bulk of data during a push is transferred.
173 173
174 174 Returns the integer number of heads added to the peer.
175 175 """
176 176
177 177 class ipeerlegacycommands(interfaceutil.Interface):
178 178 """Interface for implementing support for legacy wire protocol commands.
179 179
180 180 Wire protocol commands transition to legacy status when they are no longer
181 181 used by modern clients. To facilitate identifying which commands are
182 182 legacy, the interfaces are split.
183 183 """
184 184
185 185 def between(pairs):
186 186 """Obtain nodes between pairs of nodes.
187 187
188 188 ``pairs`` is an iterable of node pairs.
189 189
190 190 Returns an iterable of iterables of nodes corresponding to each
191 191 requested pair.
192 192 """
193 193
194 194 def branches(nodes):
195 195 """Obtain ancestor changesets of specific nodes back to a branch point.
196 196
197 197 For each requested node, the peer finds the first ancestor node that is
198 198 a DAG root or is a merge.
199 199
200 200 Returns an iterable of iterables with the resolved values for each node.
201 201 """
202 202
203 203 def changegroup(nodes, source):
204 204 """Obtain a changegroup with data for descendants of specified nodes."""
205 205
206 206 def changegroupsubset(bases, heads, source):
207 207 pass
208 208
209 209 class ipeercommandexecutor(interfaceutil.Interface):
210 210 """Represents a mechanism to execute remote commands.
211 211
212 212 This is the primary interface for requesting that wire protocol commands
213 213 be executed. Instances of this interface are active in a context manager
214 214 and have a well-defined lifetime. When the context manager exits, all
215 215 outstanding requests are waited on.
216 216 """
217 217
218 218 def callcommand(name, args):
219 219 """Request that a named command be executed.
220 220
221 221 Receives the command name and a dictionary of command arguments.
222 222
223 223 Returns a ``concurrent.futures.Future`` that will resolve to the
224 224 result of that command request. That exact value is left up to
225 225 the implementation and possibly varies by command.
226 226
227 227 Not all commands can coexist with other commands in an executor
228 228 instance: it depends on the underlying wire protocol transport being
229 229 used and the command itself.
230 230
231 231 Implementations MAY call ``sendcommands()`` automatically if the
232 232 requested command can not coexist with other commands in this executor.
233 233
234 234 Implementations MAY call ``sendcommands()`` automatically when the
235 235 future's ``result()`` is called. So, consumers using multiple
236 236 commands with an executor MUST ensure that ``result()`` is not called
237 237 until all command requests have been issued.
238 238 """
239 239
240 240 def sendcommands():
241 241 """Trigger submission of queued command requests.
242 242
243 243 Not all transports submit commands as soon as they are requested to
244 244 run. When called, this method forces queued command requests to be
245 245 issued. It will no-op if all commands have already been sent.
246 246
247 247 When called, no more new commands may be issued with this executor.
248 248 """
249 249
250 250 def close():
251 251 """Signal that this command request is finished.
252 252
253 253 When called, no more new commands may be issued. All outstanding
254 254 commands that have previously been issued are waited on before
255 255 returning. This not only includes waiting for the futures to resolve,
256 256 but also waiting for all response data to arrive. In other words,
257 257 calling this waits for all on-wire state for issued command requests
258 258 to finish.
259 259
260 260 When used as a context manager, this method is called when exiting the
261 261 context manager.
262 262
263 263 This method may call ``sendcommands()`` if there are buffered commands.
264 264 """
265 265
266 266 class ipeerrequests(interfaceutil.Interface):
267 267 """Interface for executing commands on a peer."""
268 268
269 269 def commandexecutor():
270 270 """A context manager that resolves to an ipeercommandexecutor.
271 271
272 272 The object this resolves to can be used to issue command requests
273 273 to the peer.
274 274
275 275 Callers should call its ``callcommand`` method to issue command
276 276 requests.
277 277
278 278 A new executor should be obtained for each distinct set of commands
279 279 (possibly just a single command) that the consumer wants to execute
280 280 as part of a single operation or round trip. This is because some
281 281 peers are half-duplex and/or don't support persistent connections.
282 282 e.g. in the case of HTTP peers, commands sent to an executor represent
283 283 a single HTTP request. While some peers may support multiple command
284 284 sends over the wire per executor, consumers need to code to the least
285 285 capable peer. So it should be assumed that command executors buffer
286 286 called commands until they are told to send them and that each
287 287 command executor could result in a new connection or wire-level request
288 288 being issued.
289 289 """
290 290
291 291 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
292 292 """Unified interface for peer repositories.
293 293
294 294 All peer instances must conform to this interface.
295 295 """
296 296
297 297 @interfaceutil.implementer(ipeerbase)
298 298 class peer(object):
299 299 """Base class for peer repositories."""
300 300
301 301 def capable(self, name):
302 302 caps = self.capabilities()
303 303 if name in caps:
304 304 return True
305 305
306 306 name = '%s=' % name
307 307 for cap in caps:
308 308 if cap.startswith(name):
309 309 return cap[len(name):]
310 310
311 311 return False
312 312
313 313 def requirecap(self, name, purpose):
314 314 if self.capable(name):
315 315 return
316 316
317 317 raise error.CapabilityError(
318 318 _('cannot %s; remote repository does not support the %r '
319 319 'capability') % (purpose, name))
320 320
321 321 class irevisiondelta(interfaceutil.Interface):
322 322 """Represents a delta between one revision and another.
323 323
324 324 Instances convey enough information to allow a revision to be exchanged
325 325 with another repository.
326 326
327 327 Instances represent the fulltext revision data or a delta against
328 328 another revision. Therefore the ``revision`` and ``delta`` attributes
329 329 are mutually exclusive.
330 330
331 331 Typically used for changegroup generation.
332 332 """
333 333
334 334 node = interfaceutil.Attribute(
335 335 """20 byte node of this revision.""")
336 336
337 337 p1node = interfaceutil.Attribute(
338 338 """20 byte node of 1st parent of this revision.""")
339 339
340 340 p2node = interfaceutil.Attribute(
341 341 """20 byte node of 2nd parent of this revision.""")
342 342
343 343 linknode = interfaceutil.Attribute(
344 344 """20 byte node of the changelog revision this node is linked to.""")
345 345
346 346 flags = interfaceutil.Attribute(
347 347 """2 bytes of integer flags that apply to this revision.""")
348 348
349 349 basenode = interfaceutil.Attribute(
350 350 """20 byte node of the revision this data is a delta against.
351 351
352 352 ``nullid`` indicates that the revision is a full revision and not
353 353 a delta.
354 354 """)
355 355
356 356 baserevisionsize = interfaceutil.Attribute(
357 357 """Size of base revision this delta is against.
358 358
359 359 May be ``None`` if ``basenode`` is ``nullid``.
360 360 """)
361 361
362 362 revision = interfaceutil.Attribute(
363 363 """Raw fulltext of revision data for this node.""")
364 364
365 365 delta = interfaceutil.Attribute(
366 366 """Delta between ``basenode`` and ``node``.
367 367
368 368 Stored in the bdiff delta format.
369 369 """)
370 370
371 371 class irevisiondeltarequest(interfaceutil.Interface):
372 372 """Represents a request to generate an ``irevisiondelta``."""
373 373
374 374 node = interfaceutil.Attribute(
375 375 """20 byte node of revision being requested.""")
376 376
377 377 p1node = interfaceutil.Attribute(
378 378 """20 byte node of 1st parent of revision.""")
379 379
380 380 p2node = interfaceutil.Attribute(
381 381 """20 byte node of 2nd parent of revision.""")
382 382
383 383 linknode = interfaceutil.Attribute(
384 384 """20 byte node to store in ``linknode`` attribute.""")
385 385
386 386 basenode = interfaceutil.Attribute(
387 387 """Base revision that delta should be generated against.
388 388
389 389 If ``nullid``, the derived ``irevisiondelta`` should have its
390 390 ``revision`` field populated and no delta should be generated.
391 391
392 392 If ``None``, the delta may be generated against any revision that
393 393 is an ancestor of this revision. Or a full revision may be used.
394 394
395 395 If any other value, the delta should be produced against that
396 396 revision.
397 397 """)
398 398
399 399 ellipsis = interfaceutil.Attribute(
400 400 """Boolean on whether the ellipsis flag should be set.""")
401 401
402 402 class ifilerevisionssequence(interfaceutil.Interface):
403 403 """Contains index data for all revisions of a file.
404 404
405 405 Types implementing this behave like lists of tuples. The index
406 406 in the list corresponds to the revision number. The values contain
407 407 index metadata.
408 408
409 409 The *null* revision (revision number -1) is always the last item
410 410 in the index.
411 411 """
412 412
413 413 def __len__():
414 414 """The total number of revisions."""
415 415
416 416 def __getitem__(rev):
417 417 """Returns the object having a specific revision number.
418 418
419 419 Returns an 8-tuple with the following fields:
420 420
421 421 offset+flags
422 422 Contains the offset and flags for the revision. 64-bit unsigned
423 423 integer where first 6 bytes are the offset and the next 2 bytes
424 424 are flags. The offset can be 0 if it is not used by the store.
425 425 compressed size
426 426 Size of the revision data in the store. It can be 0 if it isn't
427 427 needed by the store.
428 428 uncompressed size
429 429 Fulltext size. It can be 0 if it isn't needed by the store.
430 430 base revision
431 431 Revision number of revision the delta for storage is encoded
432 432 against. -1 indicates not encoded against a base revision.
433 433 link revision
434 434 Revision number of changelog revision this entry is related to.
435 435 p1 revision
436 436 Revision number of 1st parent. -1 if no 1st parent.
437 437 p2 revision
438 438 Revision number of 2nd parent. -1 if no 1st parent.
439 439 node
440 440 Binary node value for this revision number.
441 441
442 442 Negative values should index off the end of the sequence. ``-1``
443 443 should return the null revision. ``-2`` should return the most
444 444 recent revision.
445 445 """
446 446
447 447 def __contains__(rev):
448 448 """Whether a revision number exists."""
449 449
450 450 def insert(self, i, entry):
451 451 """Add an item to the index at specific revision."""
452 452
453 453 class ifileindex(interfaceutil.Interface):
454 454 """Storage interface for index data of a single file.
455 455
456 456 File storage data is divided into index metadata and data storage.
457 457 This interface defines the index portion of the interface.
458 458
459 459 The index logically consists of:
460 460
461 461 * A mapping between revision numbers and nodes.
462 462 * DAG data (storing and querying the relationship between nodes).
463 463 * Metadata to facilitate storage.
464 464 """
465 465 index = interfaceutil.Attribute(
466 466 """An ``ifilerevisionssequence`` instance.""")
467 467
468 468 def __len__():
469 469 """Obtain the number of revisions stored for this file."""
470 470
471 471 def __iter__():
472 472 """Iterate over revision numbers for this file."""
473 473
474 474 def revs(start=0, stop=None):
475 475 """Iterate over revision numbers for this file, with control."""
476 476
477 477 def parents(node):
478 478 """Returns a 2-tuple of parent nodes for a revision.
479 479
480 480 Values will be ``nullid`` if the parent is empty.
481 481 """
482 482
483 483 def parentrevs(rev):
484 484 """Like parents() but operates on revision numbers."""
485 485
486 486 def rev(node):
487 487 """Obtain the revision number given a node.
488 488
489 489 Raises ``error.LookupError`` if the node is not known.
490 490 """
491 491
492 492 def node(rev):
493 493 """Obtain the node value given a revision number.
494 494
495 495 Raises ``IndexError`` if the node is not known.
496 496 """
497 497
498 498 def lookup(node):
499 499 """Attempt to resolve a value to a node.
500 500
501 501 Value can be a binary node, hex node, revision number, or a string
502 502 that can be converted to an integer.
503 503
504 504 Raises ``error.LookupError`` if a node could not be resolved.
505 505 """
506 506
507 507 def linkrev(rev):
508 508 """Obtain the changeset revision number a revision is linked to."""
509 509
510 510 def flags(rev):
511 511 """Obtain flags used to affect storage of a revision."""
512 512
513 513 def iscensored(rev):
514 514 """Return whether a revision's content has been censored."""
515 515
516 516 def commonancestorsheads(node1, node2):
517 517 """Obtain an iterable of nodes containing heads of common ancestors.
518 518
519 519 See ``ancestor.commonancestorsheads()``.
520 520 """
521 521
522 522 def descendants(revs):
523 523 """Obtain descendant revision numbers for a set of revision numbers.
524 524
525 525 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
526 526 """
527 527
528 528 def headrevs():
529 529 """Obtain a list of revision numbers that are DAG heads.
530 530
531 531 The list is sorted oldest to newest.
532 532
533 533 TODO determine if sorting is required.
534 534 """
535 535
536 536 def heads(start=None, stop=None):
537 537 """Obtain a list of nodes that are DAG heads, with control.
538 538
539 539 The set of revisions examined can be limited by specifying
540 540 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
541 541 iterable of nodes. DAG traversal starts at earlier revision
542 542 ``start`` and iterates forward until any node in ``stop`` is
543 543 encountered.
544 544 """
545 545
546 546 def children(node):
547 547 """Obtain nodes that are children of a node.
548 548
549 549 Returns a list of nodes.
550 550 """
551 551
552 552 def deltaparent(rev):
553 553 """"Return the revision that is a suitable parent to delta against."""
554 554
555 555 def candelta(baserev, rev):
556 556 """"Whether a delta can be generated between two revisions."""
557 557
558 558 class ifiledata(interfaceutil.Interface):
559 559 """Storage interface for data storage of a specific file.
560 560
561 561 This complements ``ifileindex`` and provides an interface for accessing
562 562 data for a tracked file.
563 563 """
564 564 def rawsize(rev):
565 565 """The size of the fulltext data for a revision as stored."""
566 566
567 567 def size(rev):
568 568 """Obtain the fulltext size of file data.
569 569
570 570 Any metadata is excluded from size measurements. Use ``rawsize()`` if
571 571 metadata size is important.
572 572 """
573 573
574 574 def checkhash(fulltext, node, p1=None, p2=None, rev=None):
575 575 """Validate the stored hash of a given fulltext and node.
576 576
577 577 Raises ``error.RevlogError`` is hash validation fails.
578 578 """
579 579
580 580 def revision(node, raw=False):
581 581 """"Obtain fulltext data for a node.
582 582
583 583 By default, any storage transformations are applied before the data
584 584 is returned. If ``raw`` is True, non-raw storage transformations
585 585 are not applied.
586 586
587 587 The fulltext data may contain a header containing metadata. Most
588 588 consumers should use ``read()`` to obtain the actual file data.
589 589 """
590 590
591 591 def read(node):
592 592 """Resolve file fulltext data.
593 593
594 594 This is similar to ``revision()`` except any metadata in the data
595 595 headers is stripped.
596 596 """
597 597
598 598 def renamed(node):
599 599 """Obtain copy metadata for a node.
600 600
601 601 Returns ``False`` if no copy metadata is stored or a 2-tuple of
602 602 (path, node) from which this revision was copied.
603 603 """
604 604
605 605 def cmp(node, fulltext):
606 606 """Compare fulltext to another revision.
607 607
608 608 Returns True if the fulltext is different from what is stored.
609 609
610 610 This takes copy metadata into account.
611 611
612 612 TODO better document the copy metadata and censoring logic.
613 613 """
614 614
615 615 def revdiff(rev1, rev2):
616 616 """Obtain a delta between two revision numbers.
617 617
618 618 Operates on raw data in the store (``revision(node, raw=True)``).
619 619
620 620 The returned data is the result of ``bdiff.bdiff`` on the raw
621 621 revision data.
622 622 """
623 623
624 def emitrevisiondeltas(requests):
625 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
626
627 Given an iterable of objects conforming to the ``irevisiondeltarequest``
628 interface, emits objects conforming to the ``irevisiondelta``
629 interface.
630
631 This method is a generator.
632
633 ``irevisiondelta`` should be emitted in the same order of
634 ``irevisiondeltarequest`` that was passed in.
635
636 The emitted objects MUST conform by the results of
637 ``irevisiondeltarequest``. Namely, they must respect any requests
638 for building a delta from a specific ``basenode`` if defined.
639
640 When sending deltas, implementations must take into account whether
641 the client has the base delta before encoding a delta against that
642 revision. A revision encountered previously in ``requests`` is
643 always a suitable base revision. An example of a bad delta is a delta
644 against a non-ancestor revision. Another example of a bad delta is a
645 delta against a censored revision.
646 """
647
624 648 class ifilemutation(interfaceutil.Interface):
625 649 """Storage interface for mutation events of a tracked file."""
626 650
627 651 def add(filedata, meta, transaction, linkrev, p1, p2):
628 652 """Add a new revision to the store.
629 653
630 654 Takes file data, dictionary of metadata, a transaction, linkrev,
631 655 and parent nodes.
632 656
633 657 Returns the node that was added.
634 658
635 659 May no-op if a revision matching the supplied data is already stored.
636 660 """
637 661
638 662 def addrevision(revisiondata, transaction, linkrev, p1, p2, node=None,
639 663 flags=0, cachedelta=None):
640 664 """Add a new revision to the store.
641 665
642 666 This is similar to ``add()`` except it operates at a lower level.
643 667
644 668 The data passed in already contains a metadata header, if any.
645 669
646 670 ``node`` and ``flags`` can be used to define the expected node and
647 671 the flags to use with storage.
648 672
649 673 ``add()`` is usually called when adding files from e.g. the working
650 674 directory. ``addrevision()`` is often called by ``add()`` and for
651 675 scenarios where revision data has already been computed, such as when
652 676 applying raw data from a peer repo.
653 677 """
654 678
655 679 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
656 680 """Process a series of deltas for storage.
657 681
658 682 ``deltas`` is an iterable of 7-tuples of
659 683 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
660 684 to add.
661 685
662 686 The ``delta`` field contains ``mpatch`` data to apply to a base
663 687 revision, identified by ``deltabase``. The base node can be
664 688 ``nullid``, in which case the header from the delta can be ignored
665 689 and the delta used as the fulltext.
666 690
667 691 ``addrevisioncb`` should be called for each node as it is committed.
668 692
669 693 Returns a list of nodes that were processed. A node will be in the list
670 694 even if it existed in the store previously.
671 695 """
672 696
673 697 def getstrippoint(minlink):
674 698 """Find the minimum revision that must be stripped to strip a linkrev.
675 699
676 700 Returns a 2-tuple containing the minimum revision number and a set
677 701 of all revisions numbers that would be broken by this strip.
678 702
679 703 TODO this is highly revlog centric and should be abstracted into
680 704 a higher-level deletion API. ``repair.strip()`` relies on this.
681 705 """
682 706
683 707 def strip(minlink, transaction):
684 708 """Remove storage of items starting at a linkrev.
685 709
686 710 This uses ``getstrippoint()`` to determine the first node to remove.
687 711 Then it effectively truncates storage for all revisions after that.
688 712
689 713 TODO this is highly revlog centric and should be abstracted into a
690 714 higher-level deletion API.
691 715 """
692 716
693 717 class ifilestorage(ifileindex, ifiledata, ifilemutation):
694 718 """Complete storage interface for a single tracked file."""
695 719
696 720 version = interfaceutil.Attribute(
697 721 """Version number of storage.
698 722
699 723 TODO this feels revlog centric and could likely be removed.
700 724 """)
701 725
702 726 storedeltachains = interfaceutil.Attribute(
703 727 """Whether the store stores deltas.
704 728
705 729 TODO deltachains are revlog centric. This can probably removed
706 730 once there are better abstractions for obtaining/writing
707 731 data.
708 732 """)
709 733
710 734 _generaldelta = interfaceutil.Attribute(
711 735 """Whether deltas can be against any parent revision.
712 736
713 737 TODO this is used by changegroup code and it could probably be
714 738 folded into another API.
715 739 """)
716 740
717 741 def files():
718 742 """Obtain paths that are backing storage for this file.
719 743
720 744 TODO this is used heavily by verify code and there should probably
721 745 be a better API for that.
722 746 """
723 747
724 748 def checksize():
725 749 """Obtain the expected sizes of backing files.
726 750
727 751 TODO this is used by verify and it should not be part of the interface.
728 752 """
729 753
730 754 class idirs(interfaceutil.Interface):
731 755 """Interface representing a collection of directories from paths.
732 756
733 757 This interface is essentially a derived data structure representing
734 758 directories from a collection of paths.
735 759 """
736 760
737 761 def addpath(path):
738 762 """Add a path to the collection.
739 763
740 764 All directories in the path will be added to the collection.
741 765 """
742 766
743 767 def delpath(path):
744 768 """Remove a path from the collection.
745 769
746 770 If the removal was the last path in a particular directory, the
747 771 directory is removed from the collection.
748 772 """
749 773
750 774 def __iter__():
751 775 """Iterate over the directories in this collection of paths."""
752 776
753 777 def __contains__(path):
754 778 """Whether a specific directory is in this collection."""
755 779
756 780 class imanifestdict(interfaceutil.Interface):
757 781 """Interface representing a manifest data structure.
758 782
759 783 A manifest is effectively a dict mapping paths to entries. Each entry
760 784 consists of a binary node and extra flags affecting that entry.
761 785 """
762 786
763 787 def __getitem__(path):
764 788 """Returns the binary node value for a path in the manifest.
765 789
766 790 Raises ``KeyError`` if the path does not exist in the manifest.
767 791
768 792 Equivalent to ``self.find(path)[0]``.
769 793 """
770 794
771 795 def find(path):
772 796 """Returns the entry for a path in the manifest.
773 797
774 798 Returns a 2-tuple of (node, flags).
775 799
776 800 Raises ``KeyError`` if the path does not exist in the manifest.
777 801 """
778 802
779 803 def __len__():
780 804 """Return the number of entries in the manifest."""
781 805
782 806 def __nonzero__():
783 807 """Returns True if the manifest has entries, False otherwise."""
784 808
785 809 __bool__ = __nonzero__
786 810
787 811 def __setitem__(path, node):
788 812 """Define the node value for a path in the manifest.
789 813
790 814 If the path is already in the manifest, its flags will be copied to
791 815 the new entry.
792 816 """
793 817
794 818 def __contains__(path):
795 819 """Whether a path exists in the manifest."""
796 820
797 821 def __delitem__(path):
798 822 """Remove a path from the manifest.
799 823
800 824 Raises ``KeyError`` if the path is not in the manifest.
801 825 """
802 826
803 827 def __iter__():
804 828 """Iterate over paths in the manifest."""
805 829
806 830 def iterkeys():
807 831 """Iterate over paths in the manifest."""
808 832
809 833 def keys():
810 834 """Obtain a list of paths in the manifest."""
811 835
812 836 def filesnotin(other, match=None):
813 837 """Obtain the set of paths in this manifest but not in another.
814 838
815 839 ``match`` is an optional matcher function to be applied to both
816 840 manifests.
817 841
818 842 Returns a set of paths.
819 843 """
820 844
821 845 def dirs():
822 846 """Returns an object implementing the ``idirs`` interface."""
823 847
824 848 def hasdir(dir):
825 849 """Returns a bool indicating if a directory is in this manifest."""
826 850
827 851 def matches(match):
828 852 """Generate a new manifest filtered through a matcher.
829 853
830 854 Returns an object conforming to the ``imanifestdict`` interface.
831 855 """
832 856
833 857 def walk(match):
834 858 """Generator of paths in manifest satisfying a matcher.
835 859
836 860 This is equivalent to ``self.matches(match).iterkeys()`` except a new
837 861 manifest object is not created.
838 862
839 863 If the matcher has explicit files listed and they don't exist in
840 864 the manifest, ``match.bad()`` is called for each missing file.
841 865 """
842 866
843 867 def diff(other, match=None, clean=False):
844 868 """Find differences between this manifest and another.
845 869
846 870 This manifest is compared to ``other``.
847 871
848 872 If ``match`` is provided, the two manifests are filtered against this
849 873 matcher and only entries satisfying the matcher are compared.
850 874
851 875 If ``clean`` is True, unchanged files are included in the returned
852 876 object.
853 877
854 878 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
855 879 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
856 880 represents the node and flags for this manifest and ``(node2, flag2)``
857 881 are the same for the other manifest.
858 882 """
859 883
860 884 def setflag(path, flag):
861 885 """Set the flag value for a given path.
862 886
863 887 Raises ``KeyError`` if the path is not already in the manifest.
864 888 """
865 889
866 890 def get(path, default=None):
867 891 """Obtain the node value for a path or a default value if missing."""
868 892
869 893 def flags(path, default=''):
870 894 """Return the flags value for a path or a default value if missing."""
871 895
872 896 def copy():
873 897 """Return a copy of this manifest."""
874 898
875 899 def items():
876 900 """Returns an iterable of (path, node) for items in this manifest."""
877 901
878 902 def iteritems():
879 903 """Identical to items()."""
880 904
881 905 def iterentries():
882 906 """Returns an iterable of (path, node, flags) for this manifest.
883 907
884 908 Similar to ``iteritems()`` except items are a 3-tuple and include
885 909 flags.
886 910 """
887 911
888 912 def text():
889 913 """Obtain the raw data representation for this manifest.
890 914
891 915 Result is used to create a manifest revision.
892 916 """
893 917
894 918 def fastdelta(base, changes):
895 919 """Obtain a delta between this manifest and another given changes.
896 920
897 921 ``base`` in the raw data representation for another manifest.
898 922
899 923 ``changes`` is an iterable of ``(path, to_delete)``.
900 924
901 925 Returns a 2-tuple containing ``bytearray(self.text())`` and the
902 926 delta between ``base`` and this manifest.
903 927 """
904 928
905 929 class imanifestrevisionbase(interfaceutil.Interface):
906 930 """Base interface representing a single revision of a manifest.
907 931
908 932 Should not be used as a primary interface: should always be inherited
909 933 as part of a larger interface.
910 934 """
911 935
912 936 def new():
913 937 """Obtain a new manifest instance.
914 938
915 939 Returns an object conforming to the ``imanifestrevisionwritable``
916 940 interface. The instance will be associated with the same
917 941 ``imanifestlog`` collection as this instance.
918 942 """
919 943
920 944 def copy():
921 945 """Obtain a copy of this manifest instance.
922 946
923 947 Returns an object conforming to the ``imanifestrevisionwritable``
924 948 interface. The instance will be associated with the same
925 949 ``imanifestlog`` collection as this instance.
926 950 """
927 951
928 952 def read():
929 953 """Obtain the parsed manifest data structure.
930 954
931 955 The returned object conforms to the ``imanifestdict`` interface.
932 956 """
933 957
934 958 class imanifestrevisionstored(imanifestrevisionbase):
935 959 """Interface representing a manifest revision committed to storage."""
936 960
937 961 def node():
938 962 """The binary node for this manifest."""
939 963
940 964 parents = interfaceutil.Attribute(
941 965 """List of binary nodes that are parents for this manifest revision."""
942 966 )
943 967
944 968 def readdelta(shallow=False):
945 969 """Obtain the manifest data structure representing changes from parent.
946 970
947 971 This manifest is compared to its 1st parent. A new manifest representing
948 972 those differences is constructed.
949 973
950 974 The returned object conforms to the ``imanifestdict`` interface.
951 975 """
952 976
953 977 def readfast(shallow=False):
954 978 """Calls either ``read()`` or ``readdelta()``.
955 979
956 980 The faster of the two options is called.
957 981 """
958 982
959 983 def find(key):
960 984 """Calls self.read().find(key)``.
961 985
962 986 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
963 987 """
964 988
965 989 class imanifestrevisionwritable(imanifestrevisionbase):
966 990 """Interface representing a manifest revision that can be committed."""
967 991
968 992 def write(transaction, linkrev, p1node, p2node, added, removed):
969 993 """Add this revision to storage.
970 994
971 995 Takes a transaction object, the changeset revision number it will
972 996 be associated with, its parent nodes, and lists of added and
973 997 removed paths.
974 998
975 999 Returns the binary node of the created revision.
976 1000 """
977 1001
978 1002 class imanifestlog(interfaceutil.Interface):
979 1003 """Interface representing a collection of manifest snapshots."""
980 1004
981 1005 def __getitem__(node):
982 1006 """Obtain a manifest instance for a given binary node.
983 1007
984 1008 Equivalent to calling ``self.get('', node)``.
985 1009
986 1010 The returned object conforms to the ``imanifestrevisionstored``
987 1011 interface.
988 1012 """
989 1013
990 1014 def get(dir, node, verify=True):
991 1015 """Retrieve the manifest instance for a given directory and binary node.
992 1016
993 1017 ``node`` always refers to the node of the root manifest (which will be
994 1018 the only manifest if flat manifests are being used).
995 1019
996 1020 If ``dir`` is the empty string, the root manifest is returned. Otherwise
997 1021 the manifest for the specified directory will be returned (requires
998 1022 tree manifests).
999 1023
1000 1024 If ``verify`` is True, ``LookupError`` is raised if the node is not
1001 1025 known.
1002 1026
1003 1027 The returned object conforms to the ``imanifestrevisionstored``
1004 1028 interface.
1005 1029 """
1006 1030
1007 1031 def clearcaches():
1008 1032 """Clear caches associated with this collection."""
1009 1033
1010 1034 def rev(node):
1011 1035 """Obtain the revision number for a binary node.
1012 1036
1013 1037 Raises ``error.LookupError`` if the node is not known.
1014 1038 """
1015 1039
1016 1040 def addgroup(deltas, linkmapper, transaction):
1017 1041 """Process a series of deltas for storage.
1018 1042
1019 1043 ``deltas`` is an iterable of 7-tuples of
1020 1044 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
1021 1045 to add.
1022 1046
1023 1047 The ``delta`` field contains ``mpatch`` data to apply to a base
1024 1048 revision, identified by ``deltabase``. The base node can be
1025 1049 ``nullid``, in which case the header from the delta can be ignored
1026 1050 and the delta used as the fulltext.
1027 1051
1028 1052 Returns a list of nodes that were processed. A node will be in the list
1029 1053 even if it existed in the store previously.
1030 1054 """
1031 1055
1032 1056 class completelocalrepository(interfaceutil.Interface):
1033 1057 """Monolithic interface for local repositories.
1034 1058
1035 1059 This currently captures the reality of things - not how things should be.
1036 1060 """
1037 1061
1038 1062 supportedformats = interfaceutil.Attribute(
1039 1063 """Set of requirements that apply to stream clone.
1040 1064
1041 1065 This is actually a class attribute and is shared among all instances.
1042 1066 """)
1043 1067
1044 1068 openerreqs = interfaceutil.Attribute(
1045 1069 """Set of requirements that are passed to the opener.
1046 1070
1047 1071 This is actually a class attribute and is shared among all instances.
1048 1072 """)
1049 1073
1050 1074 supported = interfaceutil.Attribute(
1051 1075 """Set of requirements that this repo is capable of opening.""")
1052 1076
1053 1077 requirements = interfaceutil.Attribute(
1054 1078 """Set of requirements this repo uses.""")
1055 1079
1056 1080 filtername = interfaceutil.Attribute(
1057 1081 """Name of the repoview that is active on this repo.""")
1058 1082
1059 1083 wvfs = interfaceutil.Attribute(
1060 1084 """VFS used to access the working directory.""")
1061 1085
1062 1086 vfs = interfaceutil.Attribute(
1063 1087 """VFS rooted at the .hg directory.
1064 1088
1065 1089 Used to access repository data not in the store.
1066 1090 """)
1067 1091
1068 1092 svfs = interfaceutil.Attribute(
1069 1093 """VFS rooted at the store.
1070 1094
1071 1095 Used to access repository data in the store. Typically .hg/store.
1072 1096 But can point elsewhere if the store is shared.
1073 1097 """)
1074 1098
1075 1099 root = interfaceutil.Attribute(
1076 1100 """Path to the root of the working directory.""")
1077 1101
1078 1102 path = interfaceutil.Attribute(
1079 1103 """Path to the .hg directory.""")
1080 1104
1081 1105 origroot = interfaceutil.Attribute(
1082 1106 """The filesystem path that was used to construct the repo.""")
1083 1107
1084 1108 auditor = interfaceutil.Attribute(
1085 1109 """A pathauditor for the working directory.
1086 1110
1087 1111 This checks if a path refers to a nested repository.
1088 1112
1089 1113 Operates on the filesystem.
1090 1114 """)
1091 1115
1092 1116 nofsauditor = interfaceutil.Attribute(
1093 1117 """A pathauditor for the working directory.
1094 1118
1095 1119 This is like ``auditor`` except it doesn't do filesystem checks.
1096 1120 """)
1097 1121
1098 1122 baseui = interfaceutil.Attribute(
1099 1123 """Original ui instance passed into constructor.""")
1100 1124
1101 1125 ui = interfaceutil.Attribute(
1102 1126 """Main ui instance for this instance.""")
1103 1127
1104 1128 sharedpath = interfaceutil.Attribute(
1105 1129 """Path to the .hg directory of the repo this repo was shared from.""")
1106 1130
1107 1131 store = interfaceutil.Attribute(
1108 1132 """A store instance.""")
1109 1133
1110 1134 spath = interfaceutil.Attribute(
1111 1135 """Path to the store.""")
1112 1136
1113 1137 sjoin = interfaceutil.Attribute(
1114 1138 """Alias to self.store.join.""")
1115 1139
1116 1140 cachevfs = interfaceutil.Attribute(
1117 1141 """A VFS used to access the cache directory.
1118 1142
1119 1143 Typically .hg/cache.
1120 1144 """)
1121 1145
1122 1146 filteredrevcache = interfaceutil.Attribute(
1123 1147 """Holds sets of revisions to be filtered.""")
1124 1148
1125 1149 names = interfaceutil.Attribute(
1126 1150 """A ``namespaces`` instance.""")
1127 1151
1128 1152 def close():
1129 1153 """Close the handle on this repository."""
1130 1154
1131 1155 def peer():
1132 1156 """Obtain an object conforming to the ``peer`` interface."""
1133 1157
1134 1158 def unfiltered():
1135 1159 """Obtain an unfiltered/raw view of this repo."""
1136 1160
1137 1161 def filtered(name, visibilityexceptions=None):
1138 1162 """Obtain a named view of this repository."""
1139 1163
1140 1164 obsstore = interfaceutil.Attribute(
1141 1165 """A store of obsolescence data.""")
1142 1166
1143 1167 changelog = interfaceutil.Attribute(
1144 1168 """A handle on the changelog revlog.""")
1145 1169
1146 1170 manifestlog = interfaceutil.Attribute(
1147 1171 """An instance conforming to the ``imanifestlog`` interface.
1148 1172
1149 1173 Provides access to manifests for the repository.
1150 1174 """)
1151 1175
1152 1176 dirstate = interfaceutil.Attribute(
1153 1177 """Working directory state.""")
1154 1178
1155 1179 narrowpats = interfaceutil.Attribute(
1156 1180 """Matcher patterns for this repository's narrowspec.""")
1157 1181
1158 1182 def narrowmatch():
1159 1183 """Obtain a matcher for the narrowspec."""
1160 1184
1161 1185 def setnarrowpats(newincludes, newexcludes):
1162 1186 """Define the narrowspec for this repository."""
1163 1187
1164 1188 def __getitem__(changeid):
1165 1189 """Try to resolve a changectx."""
1166 1190
1167 1191 def __contains__(changeid):
1168 1192 """Whether a changeset exists."""
1169 1193
1170 1194 def __nonzero__():
1171 1195 """Always returns True."""
1172 1196 return True
1173 1197
1174 1198 __bool__ = __nonzero__
1175 1199
1176 1200 def __len__():
1177 1201 """Returns the number of changesets in the repo."""
1178 1202
1179 1203 def __iter__():
1180 1204 """Iterate over revisions in the changelog."""
1181 1205
1182 1206 def revs(expr, *args):
1183 1207 """Evaluate a revset.
1184 1208
1185 1209 Emits revisions.
1186 1210 """
1187 1211
1188 1212 def set(expr, *args):
1189 1213 """Evaluate a revset.
1190 1214
1191 1215 Emits changectx instances.
1192 1216 """
1193 1217
1194 1218 def anyrevs(specs, user=False, localalias=None):
1195 1219 """Find revisions matching one of the given revsets."""
1196 1220
1197 1221 def url():
1198 1222 """Returns a string representing the location of this repo."""
1199 1223
1200 1224 def hook(name, throw=False, **args):
1201 1225 """Call a hook."""
1202 1226
1203 1227 def tags():
1204 1228 """Return a mapping of tag to node."""
1205 1229
1206 1230 def tagtype(tagname):
1207 1231 """Return the type of a given tag."""
1208 1232
1209 1233 def tagslist():
1210 1234 """Return a list of tags ordered by revision."""
1211 1235
1212 1236 def nodetags(node):
1213 1237 """Return the tags associated with a node."""
1214 1238
1215 1239 def nodebookmarks(node):
1216 1240 """Return the list of bookmarks pointing to the specified node."""
1217 1241
1218 1242 def branchmap():
1219 1243 """Return a mapping of branch to heads in that branch."""
1220 1244
1221 1245 def revbranchcache():
1222 1246 pass
1223 1247
1224 1248 def branchtip(branchtip, ignoremissing=False):
1225 1249 """Return the tip node for a given branch."""
1226 1250
1227 1251 def lookup(key):
1228 1252 """Resolve the node for a revision."""
1229 1253
1230 1254 def lookupbranch(key):
1231 1255 """Look up the branch name of the given revision or branch name."""
1232 1256
1233 1257 def known(nodes):
1234 1258 """Determine whether a series of nodes is known.
1235 1259
1236 1260 Returns a list of bools.
1237 1261 """
1238 1262
1239 1263 def local():
1240 1264 """Whether the repository is local."""
1241 1265 return True
1242 1266
1243 1267 def publishing():
1244 1268 """Whether the repository is a publishing repository."""
1245 1269
1246 1270 def cancopy():
1247 1271 pass
1248 1272
1249 1273 def shared():
1250 1274 """The type of shared repository or None."""
1251 1275
1252 1276 def wjoin(f, *insidef):
1253 1277 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1254 1278
1255 1279 def file(f):
1256 1280 """Obtain a filelog for a tracked path.
1257 1281
1258 1282 The returned type conforms to the ``ifilestorage`` interface.
1259 1283 """
1260 1284
1261 1285 def setparents(p1, p2):
1262 1286 """Set the parent nodes of the working directory."""
1263 1287
1264 1288 def filectx(path, changeid=None, fileid=None):
1265 1289 """Obtain a filectx for the given file revision."""
1266 1290
1267 1291 def getcwd():
1268 1292 """Obtain the current working directory from the dirstate."""
1269 1293
1270 1294 def pathto(f, cwd=None):
1271 1295 """Obtain the relative path to a file."""
1272 1296
1273 1297 def adddatafilter(name, fltr):
1274 1298 pass
1275 1299
1276 1300 def wread(filename):
1277 1301 """Read a file from wvfs, using data filters."""
1278 1302
1279 1303 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1280 1304 """Write data to a file in the wvfs, using data filters."""
1281 1305
1282 1306 def wwritedata(filename, data):
1283 1307 """Resolve data for writing to the wvfs, using data filters."""
1284 1308
1285 1309 def currenttransaction():
1286 1310 """Obtain the current transaction instance or None."""
1287 1311
1288 1312 def transaction(desc, report=None):
1289 1313 """Open a new transaction to write to the repository."""
1290 1314
1291 1315 def undofiles():
1292 1316 """Returns a list of (vfs, path) for files to undo transactions."""
1293 1317
1294 1318 def recover():
1295 1319 """Roll back an interrupted transaction."""
1296 1320
1297 1321 def rollback(dryrun=False, force=False):
1298 1322 """Undo the last transaction.
1299 1323
1300 1324 DANGEROUS.
1301 1325 """
1302 1326
1303 1327 def updatecaches(tr=None, full=False):
1304 1328 """Warm repo caches."""
1305 1329
1306 1330 def invalidatecaches():
1307 1331 """Invalidate cached data due to the repository mutating."""
1308 1332
1309 1333 def invalidatevolatilesets():
1310 1334 pass
1311 1335
1312 1336 def invalidatedirstate():
1313 1337 """Invalidate the dirstate."""
1314 1338
1315 1339 def invalidate(clearfilecache=False):
1316 1340 pass
1317 1341
1318 1342 def invalidateall():
1319 1343 pass
1320 1344
1321 1345 def lock(wait=True):
1322 1346 """Lock the repository store and return a lock instance."""
1323 1347
1324 1348 def wlock(wait=True):
1325 1349 """Lock the non-store parts of the repository."""
1326 1350
1327 1351 def currentwlock():
1328 1352 """Return the wlock if it's held or None."""
1329 1353
1330 1354 def checkcommitpatterns(wctx, vdirs, match, status, fail):
1331 1355 pass
1332 1356
1333 1357 def commit(text='', user=None, date=None, match=None, force=False,
1334 1358 editor=False, extra=None):
1335 1359 """Add a new revision to the repository."""
1336 1360
1337 1361 def commitctx(ctx, error=False):
1338 1362 """Commit a commitctx instance to the repository."""
1339 1363
1340 1364 def destroying():
1341 1365 """Inform the repository that nodes are about to be destroyed."""
1342 1366
1343 1367 def destroyed():
1344 1368 """Inform the repository that nodes have been destroyed."""
1345 1369
1346 1370 def status(node1='.', node2=None, match=None, ignored=False,
1347 1371 clean=False, unknown=False, listsubrepos=False):
1348 1372 """Convenience method to call repo[x].status()."""
1349 1373
1350 1374 def addpostdsstatus(ps):
1351 1375 pass
1352 1376
1353 1377 def postdsstatus():
1354 1378 pass
1355 1379
1356 1380 def clearpostdsstatus():
1357 1381 pass
1358 1382
1359 1383 def heads(start=None):
1360 1384 """Obtain list of nodes that are DAG heads."""
1361 1385
1362 1386 def branchheads(branch=None, start=None, closed=False):
1363 1387 pass
1364 1388
1365 1389 def branches(nodes):
1366 1390 pass
1367 1391
1368 1392 def between(pairs):
1369 1393 pass
1370 1394
1371 1395 def checkpush(pushop):
1372 1396 pass
1373 1397
1374 1398 prepushoutgoinghooks = interfaceutil.Attribute(
1375 1399 """util.hooks instance.""")
1376 1400
1377 1401 def pushkey(namespace, key, old, new):
1378 1402 pass
1379 1403
1380 1404 def listkeys(namespace):
1381 1405 pass
1382 1406
1383 1407 def debugwireargs(one, two, three=None, four=None, five=None):
1384 1408 pass
1385 1409
1386 1410 def savecommitmessage(text):
1387 1411 pass
@@ -1,3076 +1,3172 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullhex,
31 31 nullid,
32 32 nullrev,
33 33 wdirfilenodeids,
34 34 wdirhex,
35 35 wdirid,
36 36 wdirrev,
37 37 )
38 38 from .i18n import _
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from . import (
43 43 ancestor,
44 44 error,
45 45 mdiff,
46 46 policy,
47 47 pycompat,
48 repository,
48 49 templatefilters,
49 50 util,
50 51 )
51 52 from .utils import (
53 interfaceutil,
52 54 stringutil,
53 55 )
54 56
55 57 parsers = policy.importmod(r'parsers')
56 58
57 59 # Aliased for performance.
58 60 _zlibdecompress = zlib.decompress
59 61
60 62 # revlog header flags
61 63 REVLOGV0 = 0
62 64 REVLOGV1 = 1
63 65 # Dummy value until file format is finalized.
64 66 # Reminder: change the bounds check in revlog.__init__ when this is changed.
65 67 REVLOGV2 = 0xDEAD
66 68 FLAG_INLINE_DATA = (1 << 16)
67 69 FLAG_GENERALDELTA = (1 << 17)
68 70 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
69 71 REVLOG_DEFAULT_FORMAT = REVLOGV1
70 72 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
71 73 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
72 74 REVLOGV2_FLAGS = REVLOGV1_FLAGS
73 75
74 76 # revlog index flags
75 77 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
76 78 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
77 79 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
78 80 REVIDX_DEFAULT_FLAGS = 0
79 81 # stable order in which flags need to be processed and their processors applied
80 82 REVIDX_FLAGS_ORDER = [
81 83 REVIDX_ISCENSORED,
82 84 REVIDX_ELLIPSIS,
83 85 REVIDX_EXTSTORED,
84 86 ]
85 87 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
86 88 # bitmark for flags that could cause rawdata content change
87 89 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
88 90
89 91 # max size of revlog with inline data
90 92 _maxinline = 131072
91 93 _chunksize = 1048576
92 94
93 95 RevlogError = error.RevlogError
94 96 LookupError = error.LookupError
95 97 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
96 98 CensoredNodeError = error.CensoredNodeError
97 99 ProgrammingError = error.ProgrammingError
98 100
99 101 # Store flag processors (cf. 'addflagprocessor()' to register)
100 102 _flagprocessors = {
101 103 REVIDX_ISCENSORED: None,
102 104 }
103 105
104 106 _mdre = re.compile('\1\n')
105 107 def parsemeta(text):
106 108 """return (metadatadict, metadatasize)"""
107 109 # text can be buffer, so we can't use .startswith or .index
108 110 if text[:2] != '\1\n':
109 111 return None, None
110 112 s = _mdre.search(text, 2).start()
111 113 mtext = text[2:s]
112 114 meta = {}
113 115 for l in mtext.splitlines():
114 116 k, v = l.split(": ", 1)
115 117 meta[k] = v
116 118 return meta, (s + 2)
117 119
118 120 def packmeta(meta, text):
119 121 keys = sorted(meta)
120 122 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
121 123 return "\1\n%s\1\n%s" % (metatext, text)
122 124
123 125 def _censoredtext(text):
124 126 m, offs = parsemeta(text)
125 127 return m and "censored" in m
126 128
127 129 def addflagprocessor(flag, processor):
128 130 """Register a flag processor on a revision data flag.
129 131
130 132 Invariant:
131 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
132 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
133 135 - Only one flag processor can be registered on a specific flag.
134 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
135 137 following signatures:
136 138 - (read) f(self, rawtext) -> text, bool
137 139 - (write) f(self, text) -> rawtext, bool
138 140 - (raw) f(self, rawtext) -> bool
139 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
140 142 directly visible to the user.
141 143 The boolean returned by these transforms is used to determine whether
142 144 the returned text can be used for hash integrity checking. For example,
143 145 if "write" returns False, then "text" is used to generate hash. If
144 146 "write" returns True, that basically means "rawtext" returned by "write"
145 147 should be used to generate hash. Usually, "write" and "read" return
146 148 different booleans. And "raw" returns a same boolean as "write".
147 149
148 150 Note: The 'raw' transform is used for changegroup generation and in some
149 151 debug commands. In this case the transform only indicates whether the
150 152 contents can be used for hash integrity checks.
151 153 """
152 154 if not flag & REVIDX_KNOWN_FLAGS:
153 155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
154 156 raise ProgrammingError(msg)
155 157 if flag not in REVIDX_FLAGS_ORDER:
156 158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
157 159 raise ProgrammingError(msg)
158 160 if flag in _flagprocessors:
159 161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
160 162 raise error.Abort(msg)
161 163 _flagprocessors[flag] = processor
162 164
163 165 def getoffset(q):
164 166 return int(q >> 16)
165 167
166 168 def gettype(q):
167 169 return int(q & 0xFFFF)
168 170
169 171 def offset_type(offset, type):
170 172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
171 173 raise ValueError('unknown revlog index flags')
172 174 return int(int(offset) << 16 | type)
173 175
174 176 _nullhash = hashlib.sha1(nullid)
175 177
176 178 def hash(text, p1, p2):
177 179 """generate a hash from the given text and its parent hashes
178 180
179 181 This hash combines both the current file contents and its history
180 182 in a manner that makes it easy to distinguish nodes with the same
181 183 content in the revision graph.
182 184 """
183 185 # As of now, if one of the parent node is null, p2 is null
184 186 if p2 == nullid:
185 187 # deep copy of a hash is faster than creating one
186 188 s = _nullhash.copy()
187 189 s.update(p1)
188 190 else:
189 191 # none of the parent nodes are nullid
190 192 if p1 < p2:
191 193 a = p1
192 194 b = p2
193 195 else:
194 196 a = p2
195 197 b = p1
196 198 s = hashlib.sha1(a)
197 199 s.update(b)
198 200 s.update(text)
199 201 return s.digest()
200 202
201 203 class _testrevlog(object):
202 204 """minimalist fake revlog to use in doctests"""
203 205
204 206 def __init__(self, data, density=0.5, mingap=0):
205 207 """data is an list of revision payload boundaries"""
206 208 self._data = data
207 209 self._srdensitythreshold = density
208 210 self._srmingapsize = mingap
209 211
210 212 def start(self, rev):
211 213 if rev == 0:
212 214 return 0
213 215 return self._data[rev - 1]
214 216
215 217 def end(self, rev):
216 218 return self._data[rev]
217 219
218 220 def length(self, rev):
219 221 return self.end(rev) - self.start(rev)
220 222
221 223 def __len__(self):
222 224 return len(self._data)
223 225
224 226 def _trimchunk(revlog, revs, startidx, endidx=None):
225 227 """returns revs[startidx:endidx] without empty trailing revs
226 228
227 229 Doctest Setup
228 230 >>> revlog = _testrevlog([
229 231 ... 5, #0
230 232 ... 10, #1
231 233 ... 12, #2
232 234 ... 12, #3 (empty)
233 235 ... 17, #4
234 236 ... 21, #5
235 237 ... 21, #6 (empty)
236 238 ... ])
237 239
238 240 Contiguous cases:
239 241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
240 242 [0, 1, 2, 3, 4, 5]
241 243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
242 244 [0, 1, 2, 3, 4]
243 245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
244 246 [0, 1, 2]
245 247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
246 248 [2]
247 249 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
248 250 [3, 4, 5]
249 251 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
250 252 [3, 4]
251 253
252 254 Discontiguous cases:
253 255 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
254 256 [1, 3, 5]
255 257 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
256 258 [1]
257 259 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
258 260 [3, 5]
259 261 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
260 262 [3, 5]
261 263 """
262 264 length = revlog.length
263 265
264 266 if endidx is None:
265 267 endidx = len(revs)
266 268
267 269 # If we have a non-emtpy delta candidate, there are nothing to trim
268 270 if revs[endidx - 1] < len(revlog):
269 271 # Trim empty revs at the end, except the very first revision of a chain
270 272 while (endidx > 1
271 273 and endidx > startidx
272 274 and length(revs[endidx - 1]) == 0):
273 275 endidx -= 1
274 276
275 277 return revs[startidx:endidx]
276 278
277 279 def _segmentspan(revlog, revs, deltainfo=None):
278 280 """Get the byte span of a segment of revisions
279 281
280 282 revs is a sorted array of revision numbers
281 283
282 284 >>> revlog = _testrevlog([
283 285 ... 5, #0
284 286 ... 10, #1
285 287 ... 12, #2
286 288 ... 12, #3 (empty)
287 289 ... 17, #4
288 290 ... ])
289 291
290 292 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
291 293 17
292 294 >>> _segmentspan(revlog, [0, 4])
293 295 17
294 296 >>> _segmentspan(revlog, [3, 4])
295 297 5
296 298 >>> _segmentspan(revlog, [1, 2, 3,])
297 299 7
298 300 >>> _segmentspan(revlog, [1, 3])
299 301 7
300 302 """
301 303 if not revs:
302 304 return 0
303 305 if deltainfo is not None and len(revlog) <= revs[-1]:
304 306 if len(revs) == 1:
305 307 return deltainfo.deltalen
306 308 offset = revlog.end(len(revlog) - 1)
307 309 end = deltainfo.deltalen + offset
308 310 else:
309 311 end = revlog.end(revs[-1])
310 312 return end - revlog.start(revs[0])
311 313
312 314 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
313 315 """slice revs to reduce the amount of unrelated data to be read from disk.
314 316
315 317 ``revs`` is sliced into groups that should be read in one time.
316 318 Assume that revs are sorted.
317 319
318 320 The initial chunk is sliced until the overall density (payload/chunks-span
319 321 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
320 322 `revlog._srmingapsize` is skipped.
321 323
322 324 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
323 325 For consistency with other slicing choice, this limit won't go lower than
324 326 `revlog._srmingapsize`.
325 327
326 328 If individual revisions chunk are larger than this limit, they will still
327 329 be raised individually.
328 330
329 331 >>> revlog = _testrevlog([
330 332 ... 5, #00 (5)
331 333 ... 10, #01 (5)
332 334 ... 12, #02 (2)
333 335 ... 12, #03 (empty)
334 336 ... 27, #04 (15)
335 337 ... 31, #05 (4)
336 338 ... 31, #06 (empty)
337 339 ... 42, #07 (11)
338 340 ... 47, #08 (5)
339 341 ... 47, #09 (empty)
340 342 ... 48, #10 (1)
341 343 ... 51, #11 (3)
342 344 ... 74, #12 (23)
343 345 ... 85, #13 (11)
344 346 ... 86, #14 (1)
345 347 ... 91, #15 (5)
346 348 ... ])
347 349
348 350 >>> list(_slicechunk(revlog, list(range(16))))
349 351 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
350 352 >>> list(_slicechunk(revlog, [0, 15]))
351 353 [[0], [15]]
352 354 >>> list(_slicechunk(revlog, [0, 11, 15]))
353 355 [[0], [11], [15]]
354 356 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
355 357 [[0], [11, 13, 15]]
356 358 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
357 359 [[1, 2], [5, 8, 10, 11], [14]]
358 360
359 361 Slicing with a maximum chunk size
360 362 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
361 363 [[0], [11], [13], [15]]
362 364 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
363 365 [[0], [11], [13, 15]]
364 366 """
365 367 if targetsize is not None:
366 368 targetsize = max(targetsize, revlog._srmingapsize)
367 369 # targetsize should not be specified when evaluating delta candidates:
368 370 # * targetsize is used to ensure we stay within specification when reading,
369 371 # * deltainfo is used to pick are good delta chain when writing.
370 372 if not (deltainfo is None or targetsize is None):
371 373 msg = 'cannot use `targetsize` with a `deltainfo`'
372 374 raise error.ProgrammingError(msg)
373 375 for chunk in _slicechunktodensity(revlog, revs,
374 376 deltainfo,
375 377 revlog._srdensitythreshold,
376 378 revlog._srmingapsize):
377 379 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
378 380 yield subchunk
379 381
380 382 def _slicechunktosize(revlog, revs, targetsize=None):
381 383 """slice revs to match the target size
382 384
383 385 This is intended to be used on chunk that density slicing selected by that
384 386 are still too large compared to the read garantee of revlog. This might
385 387 happens when "minimal gap size" interrupted the slicing or when chain are
386 388 built in a way that create large blocks next to each other.
387 389
388 390 >>> revlog = _testrevlog([
389 391 ... 3, #0 (3)
390 392 ... 5, #1 (2)
391 393 ... 6, #2 (1)
392 394 ... 8, #3 (2)
393 395 ... 8, #4 (empty)
394 396 ... 11, #5 (3)
395 397 ... 12, #6 (1)
396 398 ... 13, #7 (1)
397 399 ... 14, #8 (1)
398 400 ... ])
399 401
400 402 Cases where chunk is already small enough
401 403 >>> list(_slicechunktosize(revlog, [0], 3))
402 404 [[0]]
403 405 >>> list(_slicechunktosize(revlog, [6, 7], 3))
404 406 [[6, 7]]
405 407 >>> list(_slicechunktosize(revlog, [0], None))
406 408 [[0]]
407 409 >>> list(_slicechunktosize(revlog, [6, 7], None))
408 410 [[6, 7]]
409 411
410 412 cases where we need actual slicing
411 413 >>> list(_slicechunktosize(revlog, [0, 1], 3))
412 414 [[0], [1]]
413 415 >>> list(_slicechunktosize(revlog, [1, 3], 3))
414 416 [[1], [3]]
415 417 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
416 418 [[1, 2], [3]]
417 419 >>> list(_slicechunktosize(revlog, [3, 5], 3))
418 420 [[3], [5]]
419 421 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
420 422 [[3], [5]]
421 423 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
422 424 [[5], [6, 7, 8]]
423 425 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
424 426 [[0], [1, 2], [3], [5], [6, 7, 8]]
425 427
426 428 Case with too large individual chunk (must return valid chunk)
427 429 >>> list(_slicechunktosize(revlog, [0, 1], 2))
428 430 [[0], [1]]
429 431 >>> list(_slicechunktosize(revlog, [1, 3], 1))
430 432 [[1], [3]]
431 433 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
432 434 [[3], [5]]
433 435 """
434 436 assert targetsize is None or 0 <= targetsize
435 437 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
436 438 yield revs
437 439 return
438 440
439 441 startrevidx = 0
440 442 startdata = revlog.start(revs[0])
441 443 endrevidx = 0
442 444 iterrevs = enumerate(revs)
443 445 next(iterrevs) # skip first rev.
444 446 for idx, r in iterrevs:
445 447 span = revlog.end(r) - startdata
446 448 if span <= targetsize:
447 449 endrevidx = idx
448 450 else:
449 451 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
450 452 if chunk:
451 453 yield chunk
452 454 startrevidx = idx
453 455 startdata = revlog.start(r)
454 456 endrevidx = idx
455 457 yield _trimchunk(revlog, revs, startrevidx)
456 458
457 459 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
458 460 mingapsize=0):
459 461 """slice revs to reduce the amount of unrelated data to be read from disk.
460 462
461 463 ``revs`` is sliced into groups that should be read in one time.
462 464 Assume that revs are sorted.
463 465
464 466 ``deltainfo`` is a _deltainfo instance of a revision that we would append
465 467 to the top of the revlog.
466 468
467 469 The initial chunk is sliced until the overall density (payload/chunks-span
468 470 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
469 471 skipped.
470 472
471 473 >>> revlog = _testrevlog([
472 474 ... 5, #00 (5)
473 475 ... 10, #01 (5)
474 476 ... 12, #02 (2)
475 477 ... 12, #03 (empty)
476 478 ... 27, #04 (15)
477 479 ... 31, #05 (4)
478 480 ... 31, #06 (empty)
479 481 ... 42, #07 (11)
480 482 ... 47, #08 (5)
481 483 ... 47, #09 (empty)
482 484 ... 48, #10 (1)
483 485 ... 51, #11 (3)
484 486 ... 74, #12 (23)
485 487 ... 85, #13 (11)
486 488 ... 86, #14 (1)
487 489 ... 91, #15 (5)
488 490 ... ])
489 491
490 492 >>> list(_slicechunktodensity(revlog, list(range(16))))
491 493 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
492 494 >>> list(_slicechunktodensity(revlog, [0, 15]))
493 495 [[0], [15]]
494 496 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
495 497 [[0], [11], [15]]
496 498 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
497 499 [[0], [11, 13, 15]]
498 500 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
499 501 [[1, 2], [5, 8, 10, 11], [14]]
500 502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
501 503 ... mingapsize=20))
502 504 [[1, 2, 3, 5, 8, 10, 11], [14]]
503 505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
504 506 ... targetdensity=0.95))
505 507 [[1, 2], [5], [8, 10, 11], [14]]
506 508 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
507 509 ... targetdensity=0.95, mingapsize=12))
508 510 [[1, 2], [5, 8, 10, 11], [14]]
509 511 """
510 512 start = revlog.start
511 513 length = revlog.length
512 514
513 515 if len(revs) <= 1:
514 516 yield revs
515 517 return
516 518
517 519 nextrev = len(revlog)
518 520 nextoffset = revlog.end(nextrev - 1)
519 521
520 522 if deltainfo is None:
521 523 deltachainspan = _segmentspan(revlog, revs)
522 524 chainpayload = sum(length(r) for r in revs)
523 525 else:
524 526 deltachainspan = deltainfo.distance
525 527 chainpayload = deltainfo.compresseddeltalen
526 528
527 529 if deltachainspan < mingapsize:
528 530 yield revs
529 531 return
530 532
531 533 readdata = deltachainspan
532 534
533 535 if deltachainspan:
534 536 density = chainpayload / float(deltachainspan)
535 537 else:
536 538 density = 1.0
537 539
538 540 if density >= targetdensity:
539 541 yield revs
540 542 return
541 543
542 544 if deltainfo is not None and deltainfo.deltalen:
543 545 revs = list(revs)
544 546 revs.append(nextrev)
545 547
546 548 # Store the gaps in a heap to have them sorted by decreasing size
547 549 gapsheap = []
548 550 heapq.heapify(gapsheap)
549 551 prevend = None
550 552 for i, rev in enumerate(revs):
551 553 if rev < nextrev:
552 554 revstart = start(rev)
553 555 revlen = length(rev)
554 556 else:
555 557 revstart = nextoffset
556 558 revlen = deltainfo.deltalen
557 559
558 560 # Skip empty revisions to form larger holes
559 561 if revlen == 0:
560 562 continue
561 563
562 564 if prevend is not None:
563 565 gapsize = revstart - prevend
564 566 # only consider holes that are large enough
565 567 if gapsize > mingapsize:
566 568 heapq.heappush(gapsheap, (-gapsize, i))
567 569
568 570 prevend = revstart + revlen
569 571
570 572 # Collect the indices of the largest holes until the density is acceptable
571 573 indicesheap = []
572 574 heapq.heapify(indicesheap)
573 575 while gapsheap and density < targetdensity:
574 576 oppgapsize, gapidx = heapq.heappop(gapsheap)
575 577
576 578 heapq.heappush(indicesheap, gapidx)
577 579
578 580 # the gap sizes are stored as negatives to be sorted decreasingly
579 581 # by the heap
580 582 readdata -= (-oppgapsize)
581 583 if readdata > 0:
582 584 density = chainpayload / float(readdata)
583 585 else:
584 586 density = 1.0
585 587
586 588 # Cut the revs at collected indices
587 589 previdx = 0
588 590 while indicesheap:
589 591 idx = heapq.heappop(indicesheap)
590 592
591 593 chunk = _trimchunk(revlog, revs, previdx, idx)
592 594 if chunk:
593 595 yield chunk
594 596
595 597 previdx = idx
596 598
597 599 chunk = _trimchunk(revlog, revs, previdx)
598 600 if chunk:
599 601 yield chunk
600 602
601 603 @attr.s(slots=True, frozen=True)
602 604 class _deltainfo(object):
603 605 distance = attr.ib()
604 606 deltalen = attr.ib()
605 607 data = attr.ib()
606 608 base = attr.ib()
607 609 chainbase = attr.ib()
608 610 chainlen = attr.ib()
609 611 compresseddeltalen = attr.ib()
610 612 snapshotdepth = attr.ib()
611 613
612 614 class _deltacomputer(object):
613 615 def __init__(self, revlog):
614 616 self.revlog = revlog
615 617
616 618 def _getcandidaterevs(self, p1, p2, cachedelta):
617 619 """
618 620 Provides revisions that present an interest to be diffed against,
619 621 grouped by level of easiness.
620 622 """
621 623 revlog = self.revlog
622 624 gdelta = revlog._generaldelta
623 625 curr = len(revlog)
624 626 prev = curr - 1
625 627 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
626 628
627 629 # should we try to build a delta?
628 630 if prev != nullrev and revlog.storedeltachains:
629 631 tested = set()
630 632 # This condition is true most of the time when processing
631 633 # changegroup data into a generaldelta repo. The only time it
632 634 # isn't true is if this is the first revision in a delta chain
633 635 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
634 636 if cachedelta and gdelta and revlog._lazydeltabase:
635 637 # Assume what we received from the server is a good choice
636 638 # build delta will reuse the cache
637 639 yield (cachedelta[0],)
638 640 tested.add(cachedelta[0])
639 641
640 642 if gdelta:
641 643 # exclude already lazy tested base if any
642 644 parents = [p for p in (p1r, p2r)
643 645 if p != nullrev and p not in tested]
644 646
645 647 if not revlog._deltabothparents and len(parents) == 2:
646 648 parents.sort()
647 649 # To minimize the chance of having to build a fulltext,
648 650 # pick first whichever parent is closest to us (max rev)
649 651 yield (parents[1],)
650 652 # then the other one (min rev) if the first did not fit
651 653 yield (parents[0],)
652 654 tested.update(parents)
653 655 elif len(parents) > 0:
654 656 # Test all parents (1 or 2), and keep the best candidate
655 657 yield parents
656 658 tested.update(parents)
657 659
658 660 if prev not in tested:
659 661 # other approach failed try against prev to hopefully save us a
660 662 # fulltext.
661 663 yield (prev,)
662 664 tested.add(prev)
663 665
664 666 def buildtext(self, revinfo, fh):
665 667 """Builds a fulltext version of a revision
666 668
667 669 revinfo: _revisioninfo instance that contains all needed info
668 670 fh: file handle to either the .i or the .d revlog file,
669 671 depending on whether it is inlined or not
670 672 """
671 673 btext = revinfo.btext
672 674 if btext[0] is not None:
673 675 return btext[0]
674 676
675 677 revlog = self.revlog
676 678 cachedelta = revinfo.cachedelta
677 679 flags = revinfo.flags
678 680 node = revinfo.node
679 681
680 682 baserev = cachedelta[0]
681 683 delta = cachedelta[1]
682 684 # special case deltas which replace entire base; no need to decode
683 685 # base revision. this neatly avoids censored bases, which throw when
684 686 # they're decoded.
685 687 hlen = struct.calcsize(">lll")
686 688 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
687 689 len(delta) - hlen):
688 690 btext[0] = delta[hlen:]
689 691 else:
690 692 # deltabase is rawtext before changed by flag processors, which is
691 693 # equivalent to non-raw text
692 694 basetext = revlog.revision(baserev, _df=fh, raw=False)
693 695 btext[0] = mdiff.patch(basetext, delta)
694 696
695 697 try:
696 698 res = revlog._processflags(btext[0], flags, 'read', raw=True)
697 699 btext[0], validatehash = res
698 700 if validatehash:
699 701 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
700 702 if flags & REVIDX_ISCENSORED:
701 703 raise RevlogError(_('node %s is not censored') % node)
702 704 except CensoredNodeError:
703 705 # must pass the censored index flag to add censored revisions
704 706 if not flags & REVIDX_ISCENSORED:
705 707 raise
706 708 return btext[0]
707 709
708 710 def _builddeltadiff(self, base, revinfo, fh):
709 711 revlog = self.revlog
710 712 t = self.buildtext(revinfo, fh)
711 713 if revlog.iscensored(base):
712 714 # deltas based on a censored revision must replace the
713 715 # full content in one patch, so delta works everywhere
714 716 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
715 717 delta = header + t
716 718 else:
717 719 ptext = revlog.revision(base, _df=fh, raw=True)
718 720 delta = mdiff.textdiff(ptext, t)
719 721
720 722 return delta
721 723
722 724 def _builddeltainfo(self, revinfo, base, fh):
723 725 # can we use the cached delta?
724 726 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
725 727 delta = revinfo.cachedelta[1]
726 728 else:
727 729 delta = self._builddeltadiff(base, revinfo, fh)
728 730 revlog = self.revlog
729 731 header, data = revlog.compress(delta)
730 732 deltalen = len(header) + len(data)
731 733 chainbase = revlog.chainbase(base)
732 734 offset = revlog.end(len(revlog) - 1)
733 735 dist = deltalen + offset - revlog.start(chainbase)
734 736 if revlog._generaldelta:
735 737 deltabase = base
736 738 else:
737 739 deltabase = chainbase
738 740 chainlen, compresseddeltalen = revlog._chaininfo(base)
739 741 chainlen += 1
740 742 compresseddeltalen += deltalen
741 743
742 744 revlog = self.revlog
743 745 snapshotdepth = None
744 746 if deltabase == nullrev:
745 747 snapshotdepth = 0
746 748 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
747 749 # A delta chain should always be one full snapshot,
748 750 # zero or more semi-snapshots, and zero or more deltas
749 751 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
750 752 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
751 753 snapshotdepth = len(revlog._deltachain(deltabase)[0])
752 754
753 755 return _deltainfo(dist, deltalen, (header, data), deltabase,
754 756 chainbase, chainlen, compresseddeltalen,
755 757 snapshotdepth)
756 758
757 759 def finddeltainfo(self, revinfo, fh):
758 760 """Find an acceptable delta against a candidate revision
759 761
760 762 revinfo: information about the revision (instance of _revisioninfo)
761 763 fh: file handle to either the .i or the .d revlog file,
762 764 depending on whether it is inlined or not
763 765
764 766 Returns the first acceptable candidate revision, as ordered by
765 767 _getcandidaterevs
766 768 """
767 769 if not revinfo.textlen:
768 770 return None # empty file do not need delta
769 771
770 772 cachedelta = revinfo.cachedelta
771 773 p1 = revinfo.p1
772 774 p2 = revinfo.p2
773 775 revlog = self.revlog
774 776
775 777 deltalength = self.revlog.length
776 778 deltaparent = self.revlog.deltaparent
777 779
778 780 deltainfo = None
779 781 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
780 782 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
781 783 # filter out delta base that will never produce good delta
782 784 candidaterevs = [r for r in candidaterevs
783 785 if self.revlog.length(r) <= deltas_limit]
784 786 nominateddeltas = []
785 787 for candidaterev in candidaterevs:
786 788 # skip over empty delta (no need to include them in a chain)
787 789 while candidaterev != nullrev and not deltalength(candidaterev):
788 790 candidaterev = deltaparent(candidaterev)
789 791 # no need to try a delta against nullid, this will be handled
790 792 # by fulltext later.
791 793 if candidaterev == nullrev:
792 794 continue
793 795 # no delta for rawtext-changing revs (see "candelta" for why)
794 796 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
795 797 continue
796 798 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
797 799 if revlog._isgooddeltainfo(candidatedelta, revinfo):
798 800 nominateddeltas.append(candidatedelta)
799 801 if nominateddeltas:
800 802 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
801 803 break
802 804
803 805 return deltainfo
804 806
805 807 @attr.s(slots=True, frozen=True)
806 808 class _revisioninfo(object):
807 809 """Information about a revision that allows building its fulltext
808 810 node: expected hash of the revision
809 811 p1, p2: parent revs of the revision
810 812 btext: built text cache consisting of a one-element list
811 813 cachedelta: (baserev, uncompressed_delta) or None
812 814 flags: flags associated to the revision storage
813 815
814 816 One of btext[0] or cachedelta must be set.
815 817 """
816 818 node = attr.ib()
817 819 p1 = attr.ib()
818 820 p2 = attr.ib()
819 821 btext = attr.ib()
820 822 textlen = attr.ib()
821 823 cachedelta = attr.ib()
822 824 flags = attr.ib()
823 825
826 @interfaceutil.implementer(repository.irevisiondelta)
827 @attr.s(slots=True, frozen=True)
828 class revlogrevisiondelta(object):
829 node = attr.ib()
830 p1node = attr.ib()
831 p2node = attr.ib()
832 basenode = attr.ib()
833 linknode = attr.ib()
834 flags = attr.ib()
835 baserevisionsize = attr.ib()
836 revision = attr.ib()
837 delta = attr.ib()
838
824 839 # index v0:
825 840 # 4 bytes: offset
826 841 # 4 bytes: compressed length
827 842 # 4 bytes: base rev
828 843 # 4 bytes: link rev
829 844 # 20 bytes: parent 1 nodeid
830 845 # 20 bytes: parent 2 nodeid
831 846 # 20 bytes: nodeid
832 847 indexformatv0 = struct.Struct(">4l20s20s20s")
833 848 indexformatv0_pack = indexformatv0.pack
834 849 indexformatv0_unpack = indexformatv0.unpack
835 850
836 851 class revlogoldindex(list):
837 852 def __getitem__(self, i):
838 853 if i == -1:
839 854 return (0, 0, 0, -1, -1, -1, -1, nullid)
840 855 return list.__getitem__(self, i)
841 856
842 857 # maximum <delta-chain-data>/<revision-text-length> ratio
843 858 LIMIT_DELTA2TEXT = 2
844 859
845 860 class revlogoldio(object):
846 861 def __init__(self):
847 862 self.size = indexformatv0.size
848 863
849 864 def parseindex(self, data, inline):
850 865 s = self.size
851 866 index = []
852 867 nodemap = {nullid: nullrev}
853 868 n = off = 0
854 869 l = len(data)
855 870 while off + s <= l:
856 871 cur = data[off:off + s]
857 872 off += s
858 873 e = indexformatv0_unpack(cur)
859 874 # transform to revlogv1 format
860 875 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
861 876 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
862 877 index.append(e2)
863 878 nodemap[e[6]] = n
864 879 n += 1
865 880
866 881 return revlogoldindex(index), nodemap, None
867 882
868 883 def packentry(self, entry, node, version, rev):
869 884 if gettype(entry[0]):
870 885 raise RevlogError(_('index entry flags need revlog version 1'))
871 886 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
872 887 node(entry[5]), node(entry[6]), entry[7])
873 888 return indexformatv0_pack(*e2)
874 889
875 890 # index ng:
876 891 # 6 bytes: offset
877 892 # 2 bytes: flags
878 893 # 4 bytes: compressed length
879 894 # 4 bytes: uncompressed length
880 895 # 4 bytes: base rev
881 896 # 4 bytes: link rev
882 897 # 4 bytes: parent 1 rev
883 898 # 4 bytes: parent 2 rev
884 899 # 32 bytes: nodeid
885 900 indexformatng = struct.Struct(">Qiiiiii20s12x")
886 901 indexformatng_pack = indexformatng.pack
887 902 versionformat = struct.Struct(">I")
888 903 versionformat_pack = versionformat.pack
889 904 versionformat_unpack = versionformat.unpack
890 905
891 906 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
892 907 # signed integer)
893 908 _maxentrysize = 0x7fffffff
894 909
895 910 class revlogio(object):
896 911 def __init__(self):
897 912 self.size = indexformatng.size
898 913
899 914 def parseindex(self, data, inline):
900 915 # call the C implementation to parse the index data
901 916 index, cache = parsers.parse_index2(data, inline)
902 917 return index, getattr(index, 'nodemap', None), cache
903 918
904 919 def packentry(self, entry, node, version, rev):
905 920 p = indexformatng_pack(*entry)
906 921 if rev == 0:
907 922 p = versionformat_pack(version) + p[4:]
908 923 return p
909 924
910 925 class revlog(object):
911 926 """
912 927 the underlying revision storage object
913 928
914 929 A revlog consists of two parts, an index and the revision data.
915 930
916 931 The index is a file with a fixed record size containing
917 932 information on each revision, including its nodeid (hash), the
918 933 nodeids of its parents, the position and offset of its data within
919 934 the data file, and the revision it's based on. Finally, each entry
920 935 contains a linkrev entry that can serve as a pointer to external
921 936 data.
922 937
923 938 The revision data itself is a linear collection of data chunks.
924 939 Each chunk represents a revision and is usually represented as a
925 940 delta against the previous chunk. To bound lookup time, runs of
926 941 deltas are limited to about 2 times the length of the original
927 942 version data. This makes retrieval of a version proportional to
928 943 its size, or O(1) relative to the number of revisions.
929 944
930 945 Both pieces of the revlog are written to in an append-only
931 946 fashion, which means we never need to rewrite a file to insert or
932 947 remove data, and can use some simple techniques to avoid the need
933 948 for locking while reading.
934 949
935 950 If checkambig, indexfile is opened with checkambig=True at
936 951 writing, to avoid file stat ambiguity.
937 952
938 953 If mmaplargeindex is True, and an mmapindexthreshold is set, the
939 954 index will be mmapped rather than read if it is larger than the
940 955 configured threshold.
941 956
942 957 If censorable is True, the revlog can have censored revisions.
943 958 """
944 959 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
945 960 mmaplargeindex=False, censorable=False):
946 961 """
947 962 create a revlog object
948 963
949 964 opener is a function that abstracts the file opening operation
950 965 and can be used to implement COW semantics or the like.
951 966 """
952 967 self.indexfile = indexfile
953 968 self.datafile = datafile or (indexfile[:-2] + ".d")
954 969 self.opener = opener
955 970 # When True, indexfile is opened with checkambig=True at writing, to
956 971 # avoid file stat ambiguity.
957 972 self._checkambig = checkambig
958 973 self._censorable = censorable
959 974 # 3-tuple of (node, rev, text) for a raw revision.
960 975 self._cache = None
961 976 # Maps rev to chain base rev.
962 977 self._chainbasecache = util.lrucachedict(100)
963 978 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
964 979 self._chunkcache = (0, '')
965 980 # How much data to read and cache into the raw revlog data cache.
966 981 self._chunkcachesize = 65536
967 982 self._maxchainlen = None
968 983 self._deltabothparents = True
969 984 self.index = []
970 985 # Mapping of partial identifiers to full nodes.
971 986 self._pcache = {}
972 987 # Mapping of revision integer to full node.
973 988 self._nodecache = {nullid: nullrev}
974 989 self._nodepos = None
975 990 self._compengine = 'zlib'
976 991 self._maxdeltachainspan = -1
977 992 self._withsparseread = False
978 993 self._sparserevlog = False
979 994 self._srdensitythreshold = 0.50
980 995 self._srmingapsize = 262144
981 996
982 997 mmapindexthreshold = None
983 998 v = REVLOG_DEFAULT_VERSION
984 999 opts = getattr(opener, 'options', None)
985 1000 if opts is not None:
986 1001 if 'revlogv2' in opts:
987 1002 # version 2 revlogs always use generaldelta.
988 1003 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
989 1004 elif 'revlogv1' in opts:
990 1005 if 'generaldelta' in opts:
991 1006 v |= FLAG_GENERALDELTA
992 1007 else:
993 1008 v = 0
994 1009 if 'chunkcachesize' in opts:
995 1010 self._chunkcachesize = opts['chunkcachesize']
996 1011 if 'maxchainlen' in opts:
997 1012 self._maxchainlen = opts['maxchainlen']
998 1013 if 'deltabothparents' in opts:
999 1014 self._deltabothparents = opts['deltabothparents']
1000 1015 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1001 1016 if 'compengine' in opts:
1002 1017 self._compengine = opts['compengine']
1003 1018 if 'maxdeltachainspan' in opts:
1004 1019 self._maxdeltachainspan = opts['maxdeltachainspan']
1005 1020 if mmaplargeindex and 'mmapindexthreshold' in opts:
1006 1021 mmapindexthreshold = opts['mmapindexthreshold']
1007 1022 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1008 1023 withsparseread = bool(opts.get('with-sparse-read', False))
1009 1024 # sparse-revlog forces sparse-read
1010 1025 self._withsparseread = self._sparserevlog or withsparseread
1011 1026 if 'sparse-read-density-threshold' in opts:
1012 1027 self._srdensitythreshold = opts['sparse-read-density-threshold']
1013 1028 if 'sparse-read-min-gap-size' in opts:
1014 1029 self._srmingapsize = opts['sparse-read-min-gap-size']
1015 1030
1016 1031 if self._chunkcachesize <= 0:
1017 1032 raise RevlogError(_('revlog chunk cache size %r is not greater '
1018 1033 'than 0') % self._chunkcachesize)
1019 1034 elif self._chunkcachesize & (self._chunkcachesize - 1):
1020 1035 raise RevlogError(_('revlog chunk cache size %r is not a power '
1021 1036 'of 2') % self._chunkcachesize)
1022 1037
1023 1038 indexdata = ''
1024 1039 self._initempty = True
1025 1040 try:
1026 1041 with self._indexfp() as f:
1027 1042 if (mmapindexthreshold is not None and
1028 1043 self.opener.fstat(f).st_size >= mmapindexthreshold):
1029 1044 indexdata = util.buffer(util.mmapread(f))
1030 1045 else:
1031 1046 indexdata = f.read()
1032 1047 if len(indexdata) > 0:
1033 1048 v = versionformat_unpack(indexdata[:4])[0]
1034 1049 self._initempty = False
1035 1050 except IOError as inst:
1036 1051 if inst.errno != errno.ENOENT:
1037 1052 raise
1038 1053
1039 1054 self.version = v
1040 1055 self._inline = v & FLAG_INLINE_DATA
1041 1056 self._generaldelta = v & FLAG_GENERALDELTA
1042 1057 flags = v & ~0xFFFF
1043 1058 fmt = v & 0xFFFF
1044 1059 if fmt == REVLOGV0:
1045 1060 if flags:
1046 1061 raise RevlogError(_('unknown flags (%#04x) in version %d '
1047 1062 'revlog %s') %
1048 1063 (flags >> 16, fmt, self.indexfile))
1049 1064 elif fmt == REVLOGV1:
1050 1065 if flags & ~REVLOGV1_FLAGS:
1051 1066 raise RevlogError(_('unknown flags (%#04x) in version %d '
1052 1067 'revlog %s') %
1053 1068 (flags >> 16, fmt, self.indexfile))
1054 1069 elif fmt == REVLOGV2:
1055 1070 if flags & ~REVLOGV2_FLAGS:
1056 1071 raise RevlogError(_('unknown flags (%#04x) in version %d '
1057 1072 'revlog %s') %
1058 1073 (flags >> 16, fmt, self.indexfile))
1059 1074 else:
1060 1075 raise RevlogError(_('unknown version (%d) in revlog %s') %
1061 1076 (fmt, self.indexfile))
1062 1077
1063 1078 self.storedeltachains = True
1064 1079
1065 1080 self._io = revlogio()
1066 1081 if self.version == REVLOGV0:
1067 1082 self._io = revlogoldio()
1068 1083 try:
1069 1084 d = self._io.parseindex(indexdata, self._inline)
1070 1085 except (ValueError, IndexError):
1071 1086 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1072 1087 self.index, nodemap, self._chunkcache = d
1073 1088 if nodemap is not None:
1074 1089 self.nodemap = self._nodecache = nodemap
1075 1090 if not self._chunkcache:
1076 1091 self._chunkclear()
1077 1092 # revnum -> (chain-length, sum-delta-length)
1078 1093 self._chaininfocache = {}
1079 1094 # revlog header -> revlog compressor
1080 1095 self._decompressors = {}
1081 1096
1082 1097 @util.propertycache
1083 1098 def _compressor(self):
1084 1099 return util.compengines[self._compengine].revlogcompressor()
1085 1100
1086 1101 def _indexfp(self, mode='r'):
1087 1102 """file object for the revlog's index file"""
1088 1103 args = {r'mode': mode}
1089 1104 if mode != 'r':
1090 1105 args[r'checkambig'] = self._checkambig
1091 1106 if mode == 'w':
1092 1107 args[r'atomictemp'] = True
1093 1108 return self.opener(self.indexfile, **args)
1094 1109
1095 1110 def _datafp(self, mode='r'):
1096 1111 """file object for the revlog's data file"""
1097 1112 return self.opener(self.datafile, mode=mode)
1098 1113
1099 1114 @contextlib.contextmanager
1100 1115 def _datareadfp(self, existingfp=None):
1101 1116 """file object suitable to read data"""
1102 1117 if existingfp is not None:
1103 1118 yield existingfp
1104 1119 else:
1105 1120 if self._inline:
1106 1121 func = self._indexfp
1107 1122 else:
1108 1123 func = self._datafp
1109 1124 with func() as fp:
1110 1125 yield fp
1111 1126
1112 1127 def tip(self):
1113 1128 return self.node(len(self.index) - 1)
1114 1129 def __contains__(self, rev):
1115 1130 return 0 <= rev < len(self)
1116 1131 def __len__(self):
1117 1132 return len(self.index)
1118 1133 def __iter__(self):
1119 1134 return iter(pycompat.xrange(len(self)))
1120 1135 def revs(self, start=0, stop=None):
1121 1136 """iterate over all rev in this revlog (from start to stop)"""
1122 1137 step = 1
1123 1138 length = len(self)
1124 1139 if stop is not None:
1125 1140 if start > stop:
1126 1141 step = -1
1127 1142 stop += step
1128 1143 if stop > length:
1129 1144 stop = length
1130 1145 else:
1131 1146 stop = length
1132 1147 return pycompat.xrange(start, stop, step)
1133 1148
1134 1149 @util.propertycache
1135 1150 def nodemap(self):
1136 1151 if self.index:
1137 1152 # populate mapping down to the initial node
1138 1153 node0 = self.index[0][7] # get around changelog filtering
1139 1154 self.rev(node0)
1140 1155 return self._nodecache
1141 1156
1142 1157 def hasnode(self, node):
1143 1158 try:
1144 1159 self.rev(node)
1145 1160 return True
1146 1161 except KeyError:
1147 1162 return False
1148 1163
1149 1164 def candelta(self, baserev, rev):
1150 1165 """whether two revisions (baserev, rev) can be delta-ed or not"""
1151 1166 # Disable delta if either rev requires a content-changing flag
1152 1167 # processor (ex. LFS). This is because such flag processor can alter
1153 1168 # the rawtext content that the delta will be based on, and two clients
1154 1169 # could have a same revlog node with different flags (i.e. different
1155 1170 # rawtext contents) and the delta could be incompatible.
1156 1171 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1157 1172 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1158 1173 return False
1159 1174 return True
1160 1175
1161 1176 def clearcaches(self):
1162 1177 self._cache = None
1163 1178 self._chainbasecache.clear()
1164 1179 self._chunkcache = (0, '')
1165 1180 self._pcache = {}
1166 1181
1167 1182 try:
1168 1183 self._nodecache.clearcaches()
1169 1184 except AttributeError:
1170 1185 self._nodecache = {nullid: nullrev}
1171 1186 self._nodepos = None
1172 1187
1173 1188 def rev(self, node):
1174 1189 try:
1175 1190 return self._nodecache[node]
1176 1191 except TypeError:
1177 1192 raise
1178 1193 except RevlogError:
1179 1194 # parsers.c radix tree lookup failed
1180 1195 if node == wdirid or node in wdirfilenodeids:
1181 1196 raise error.WdirUnsupported
1182 1197 raise LookupError(node, self.indexfile, _('no node'))
1183 1198 except KeyError:
1184 1199 # pure python cache lookup failed
1185 1200 n = self._nodecache
1186 1201 i = self.index
1187 1202 p = self._nodepos
1188 1203 if p is None:
1189 1204 p = len(i) - 1
1190 1205 else:
1191 1206 assert p < len(i)
1192 1207 for r in pycompat.xrange(p, -1, -1):
1193 1208 v = i[r][7]
1194 1209 n[v] = r
1195 1210 if v == node:
1196 1211 self._nodepos = r - 1
1197 1212 return r
1198 1213 if node == wdirid or node in wdirfilenodeids:
1199 1214 raise error.WdirUnsupported
1200 1215 raise LookupError(node, self.indexfile, _('no node'))
1201 1216
1202 1217 # Accessors for index entries.
1203 1218
1204 1219 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1205 1220 # are flags.
1206 1221 def start(self, rev):
1207 1222 return int(self.index[rev][0] >> 16)
1208 1223
1209 1224 def flags(self, rev):
1210 1225 return self.index[rev][0] & 0xFFFF
1211 1226
1212 1227 def length(self, rev):
1213 1228 return self.index[rev][1]
1214 1229
1215 1230 def rawsize(self, rev):
1216 1231 """return the length of the uncompressed text for a given revision"""
1217 1232 l = self.index[rev][2]
1218 1233 if l >= 0:
1219 1234 return l
1220 1235
1221 1236 t = self.revision(rev, raw=True)
1222 1237 return len(t)
1223 1238
1224 1239 def size(self, rev):
1225 1240 """length of non-raw text (processed by a "read" flag processor)"""
1226 1241 # fast path: if no "read" flag processor could change the content,
1227 1242 # size is rawsize. note: ELLIPSIS is known to not change the content.
1228 1243 flags = self.flags(rev)
1229 1244 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1230 1245 return self.rawsize(rev)
1231 1246
1232 1247 return len(self.revision(rev, raw=False))
1233 1248
1234 1249 def chainbase(self, rev):
1235 1250 base = self._chainbasecache.get(rev)
1236 1251 if base is not None:
1237 1252 return base
1238 1253
1239 1254 index = self.index
1240 1255 iterrev = rev
1241 1256 base = index[iterrev][3]
1242 1257 while base != iterrev:
1243 1258 iterrev = base
1244 1259 base = index[iterrev][3]
1245 1260
1246 1261 self._chainbasecache[rev] = base
1247 1262 return base
1248 1263
1249 1264 def linkrev(self, rev):
1250 1265 return self.index[rev][4]
1251 1266
1252 1267 def parentrevs(self, rev):
1253 1268 try:
1254 1269 entry = self.index[rev]
1255 1270 except IndexError:
1256 1271 if rev == wdirrev:
1257 1272 raise error.WdirUnsupported
1258 1273 raise
1259 1274
1260 1275 return entry[5], entry[6]
1261 1276
1262 1277 def node(self, rev):
1263 1278 try:
1264 1279 return self.index[rev][7]
1265 1280 except IndexError:
1266 1281 if rev == wdirrev:
1267 1282 raise error.WdirUnsupported
1268 1283 raise
1269 1284
1270 1285 # Derived from index values.
1271 1286
1272 1287 def end(self, rev):
1273 1288 return self.start(rev) + self.length(rev)
1274 1289
1275 1290 def parents(self, node):
1276 1291 i = self.index
1277 1292 d = i[self.rev(node)]
1278 1293 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1279 1294
1280 1295 def chainlen(self, rev):
1281 1296 return self._chaininfo(rev)[0]
1282 1297
1283 1298 def _chaininfo(self, rev):
1284 1299 chaininfocache = self._chaininfocache
1285 1300 if rev in chaininfocache:
1286 1301 return chaininfocache[rev]
1287 1302 index = self.index
1288 1303 generaldelta = self._generaldelta
1289 1304 iterrev = rev
1290 1305 e = index[iterrev]
1291 1306 clen = 0
1292 1307 compresseddeltalen = 0
1293 1308 while iterrev != e[3]:
1294 1309 clen += 1
1295 1310 compresseddeltalen += e[1]
1296 1311 if generaldelta:
1297 1312 iterrev = e[3]
1298 1313 else:
1299 1314 iterrev -= 1
1300 1315 if iterrev in chaininfocache:
1301 1316 t = chaininfocache[iterrev]
1302 1317 clen += t[0]
1303 1318 compresseddeltalen += t[1]
1304 1319 break
1305 1320 e = index[iterrev]
1306 1321 else:
1307 1322 # Add text length of base since decompressing that also takes
1308 1323 # work. For cache hits the length is already included.
1309 1324 compresseddeltalen += e[1]
1310 1325 r = (clen, compresseddeltalen)
1311 1326 chaininfocache[rev] = r
1312 1327 return r
1313 1328
1314 1329 def _deltachain(self, rev, stoprev=None):
1315 1330 """Obtain the delta chain for a revision.
1316 1331
1317 1332 ``stoprev`` specifies a revision to stop at. If not specified, we
1318 1333 stop at the base of the chain.
1319 1334
1320 1335 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1321 1336 revs in ascending order and ``stopped`` is a bool indicating whether
1322 1337 ``stoprev`` was hit.
1323 1338 """
1324 1339 # Try C implementation.
1325 1340 try:
1326 1341 return self.index.deltachain(rev, stoprev, self._generaldelta)
1327 1342 except AttributeError:
1328 1343 pass
1329 1344
1330 1345 chain = []
1331 1346
1332 1347 # Alias to prevent attribute lookup in tight loop.
1333 1348 index = self.index
1334 1349 generaldelta = self._generaldelta
1335 1350
1336 1351 iterrev = rev
1337 1352 e = index[iterrev]
1338 1353 while iterrev != e[3] and iterrev != stoprev:
1339 1354 chain.append(iterrev)
1340 1355 if generaldelta:
1341 1356 iterrev = e[3]
1342 1357 else:
1343 1358 iterrev -= 1
1344 1359 e = index[iterrev]
1345 1360
1346 1361 if iterrev == stoprev:
1347 1362 stopped = True
1348 1363 else:
1349 1364 chain.append(iterrev)
1350 1365 stopped = False
1351 1366
1352 1367 chain.reverse()
1353 1368 return chain, stopped
1354 1369
1355 1370 def ancestors(self, revs, stoprev=0, inclusive=False):
1356 1371 """Generate the ancestors of 'revs' in reverse topological order.
1357 1372 Does not generate revs lower than stoprev.
1358 1373
1359 1374 See the documentation for ancestor.lazyancestors for more details."""
1360 1375
1361 1376 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1362 1377 inclusive=inclusive)
1363 1378
1364 1379 def descendants(self, revs):
1365 1380 """Generate the descendants of 'revs' in revision order.
1366 1381
1367 1382 Yield a sequence of revision numbers starting with a child of
1368 1383 some rev in revs, i.e., each revision is *not* considered a
1369 1384 descendant of itself. Results are ordered by revision number (a
1370 1385 topological sort)."""
1371 1386 first = min(revs)
1372 1387 if first == nullrev:
1373 1388 for i in self:
1374 1389 yield i
1375 1390 return
1376 1391
1377 1392 seen = set(revs)
1378 1393 for i in self.revs(start=first + 1):
1379 1394 for x in self.parentrevs(i):
1380 1395 if x != nullrev and x in seen:
1381 1396 seen.add(i)
1382 1397 yield i
1383 1398 break
1384 1399
1385 1400 def findcommonmissing(self, common=None, heads=None):
1386 1401 """Return a tuple of the ancestors of common and the ancestors of heads
1387 1402 that are not ancestors of common. In revset terminology, we return the
1388 1403 tuple:
1389 1404
1390 1405 ::common, (::heads) - (::common)
1391 1406
1392 1407 The list is sorted by revision number, meaning it is
1393 1408 topologically sorted.
1394 1409
1395 1410 'heads' and 'common' are both lists of node IDs. If heads is
1396 1411 not supplied, uses all of the revlog's heads. If common is not
1397 1412 supplied, uses nullid."""
1398 1413 if common is None:
1399 1414 common = [nullid]
1400 1415 if heads is None:
1401 1416 heads = self.heads()
1402 1417
1403 1418 common = [self.rev(n) for n in common]
1404 1419 heads = [self.rev(n) for n in heads]
1405 1420
1406 1421 # we want the ancestors, but inclusive
1407 1422 class lazyset(object):
1408 1423 def __init__(self, lazyvalues):
1409 1424 self.addedvalues = set()
1410 1425 self.lazyvalues = lazyvalues
1411 1426
1412 1427 def __contains__(self, value):
1413 1428 return value in self.addedvalues or value in self.lazyvalues
1414 1429
1415 1430 def __iter__(self):
1416 1431 added = self.addedvalues
1417 1432 for r in added:
1418 1433 yield r
1419 1434 for r in self.lazyvalues:
1420 1435 if not r in added:
1421 1436 yield r
1422 1437
1423 1438 def add(self, value):
1424 1439 self.addedvalues.add(value)
1425 1440
1426 1441 def update(self, values):
1427 1442 self.addedvalues.update(values)
1428 1443
1429 1444 has = lazyset(self.ancestors(common))
1430 1445 has.add(nullrev)
1431 1446 has.update(common)
1432 1447
1433 1448 # take all ancestors from heads that aren't in has
1434 1449 missing = set()
1435 1450 visit = collections.deque(r for r in heads if r not in has)
1436 1451 while visit:
1437 1452 r = visit.popleft()
1438 1453 if r in missing:
1439 1454 continue
1440 1455 else:
1441 1456 missing.add(r)
1442 1457 for p in self.parentrevs(r):
1443 1458 if p not in has:
1444 1459 visit.append(p)
1445 1460 missing = list(missing)
1446 1461 missing.sort()
1447 1462 return has, [self.node(miss) for miss in missing]
1448 1463
1449 1464 def incrementalmissingrevs(self, common=None):
1450 1465 """Return an object that can be used to incrementally compute the
1451 1466 revision numbers of the ancestors of arbitrary sets that are not
1452 1467 ancestors of common. This is an ancestor.incrementalmissingancestors
1453 1468 object.
1454 1469
1455 1470 'common' is a list of revision numbers. If common is not supplied, uses
1456 1471 nullrev.
1457 1472 """
1458 1473 if common is None:
1459 1474 common = [nullrev]
1460 1475
1461 1476 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1462 1477
1463 1478 def findmissingrevs(self, common=None, heads=None):
1464 1479 """Return the revision numbers of the ancestors of heads that
1465 1480 are not ancestors of common.
1466 1481
1467 1482 More specifically, return a list of revision numbers corresponding to
1468 1483 nodes N such that every N satisfies the following constraints:
1469 1484
1470 1485 1. N is an ancestor of some node in 'heads'
1471 1486 2. N is not an ancestor of any node in 'common'
1472 1487
1473 1488 The list is sorted by revision number, meaning it is
1474 1489 topologically sorted.
1475 1490
1476 1491 'heads' and 'common' are both lists of revision numbers. If heads is
1477 1492 not supplied, uses all of the revlog's heads. If common is not
1478 1493 supplied, uses nullid."""
1479 1494 if common is None:
1480 1495 common = [nullrev]
1481 1496 if heads is None:
1482 1497 heads = self.headrevs()
1483 1498
1484 1499 inc = self.incrementalmissingrevs(common=common)
1485 1500 return inc.missingancestors(heads)
1486 1501
1487 1502 def findmissing(self, common=None, heads=None):
1488 1503 """Return the ancestors of heads that are not ancestors of common.
1489 1504
1490 1505 More specifically, return a list of nodes N such that every N
1491 1506 satisfies the following constraints:
1492 1507
1493 1508 1. N is an ancestor of some node in 'heads'
1494 1509 2. N is not an ancestor of any node in 'common'
1495 1510
1496 1511 The list is sorted by revision number, meaning it is
1497 1512 topologically sorted.
1498 1513
1499 1514 'heads' and 'common' are both lists of node IDs. If heads is
1500 1515 not supplied, uses all of the revlog's heads. If common is not
1501 1516 supplied, uses nullid."""
1502 1517 if common is None:
1503 1518 common = [nullid]
1504 1519 if heads is None:
1505 1520 heads = self.heads()
1506 1521
1507 1522 common = [self.rev(n) for n in common]
1508 1523 heads = [self.rev(n) for n in heads]
1509 1524
1510 1525 inc = self.incrementalmissingrevs(common=common)
1511 1526 return [self.node(r) for r in inc.missingancestors(heads)]
1512 1527
1513 1528 def nodesbetween(self, roots=None, heads=None):
1514 1529 """Return a topological path from 'roots' to 'heads'.
1515 1530
1516 1531 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1517 1532 topologically sorted list of all nodes N that satisfy both of
1518 1533 these constraints:
1519 1534
1520 1535 1. N is a descendant of some node in 'roots'
1521 1536 2. N is an ancestor of some node in 'heads'
1522 1537
1523 1538 Every node is considered to be both a descendant and an ancestor
1524 1539 of itself, so every reachable node in 'roots' and 'heads' will be
1525 1540 included in 'nodes'.
1526 1541
1527 1542 'outroots' is the list of reachable nodes in 'roots', i.e., the
1528 1543 subset of 'roots' that is returned in 'nodes'. Likewise,
1529 1544 'outheads' is the subset of 'heads' that is also in 'nodes'.
1530 1545
1531 1546 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1532 1547 unspecified, uses nullid as the only root. If 'heads' is
1533 1548 unspecified, uses list of all of the revlog's heads."""
1534 1549 nonodes = ([], [], [])
1535 1550 if roots is not None:
1536 1551 roots = list(roots)
1537 1552 if not roots:
1538 1553 return nonodes
1539 1554 lowestrev = min([self.rev(n) for n in roots])
1540 1555 else:
1541 1556 roots = [nullid] # Everybody's a descendant of nullid
1542 1557 lowestrev = nullrev
1543 1558 if (lowestrev == nullrev) and (heads is None):
1544 1559 # We want _all_ the nodes!
1545 1560 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1546 1561 if heads is None:
1547 1562 # All nodes are ancestors, so the latest ancestor is the last
1548 1563 # node.
1549 1564 highestrev = len(self) - 1
1550 1565 # Set ancestors to None to signal that every node is an ancestor.
1551 1566 ancestors = None
1552 1567 # Set heads to an empty dictionary for later discovery of heads
1553 1568 heads = {}
1554 1569 else:
1555 1570 heads = list(heads)
1556 1571 if not heads:
1557 1572 return nonodes
1558 1573 ancestors = set()
1559 1574 # Turn heads into a dictionary so we can remove 'fake' heads.
1560 1575 # Also, later we will be using it to filter out the heads we can't
1561 1576 # find from roots.
1562 1577 heads = dict.fromkeys(heads, False)
1563 1578 # Start at the top and keep marking parents until we're done.
1564 1579 nodestotag = set(heads)
1565 1580 # Remember where the top was so we can use it as a limit later.
1566 1581 highestrev = max([self.rev(n) for n in nodestotag])
1567 1582 while nodestotag:
1568 1583 # grab a node to tag
1569 1584 n = nodestotag.pop()
1570 1585 # Never tag nullid
1571 1586 if n == nullid:
1572 1587 continue
1573 1588 # A node's revision number represents its place in a
1574 1589 # topologically sorted list of nodes.
1575 1590 r = self.rev(n)
1576 1591 if r >= lowestrev:
1577 1592 if n not in ancestors:
1578 1593 # If we are possibly a descendant of one of the roots
1579 1594 # and we haven't already been marked as an ancestor
1580 1595 ancestors.add(n) # Mark as ancestor
1581 1596 # Add non-nullid parents to list of nodes to tag.
1582 1597 nodestotag.update([p for p in self.parents(n) if
1583 1598 p != nullid])
1584 1599 elif n in heads: # We've seen it before, is it a fake head?
1585 1600 # So it is, real heads should not be the ancestors of
1586 1601 # any other heads.
1587 1602 heads.pop(n)
1588 1603 if not ancestors:
1589 1604 return nonodes
1590 1605 # Now that we have our set of ancestors, we want to remove any
1591 1606 # roots that are not ancestors.
1592 1607
1593 1608 # If one of the roots was nullid, everything is included anyway.
1594 1609 if lowestrev > nullrev:
1595 1610 # But, since we weren't, let's recompute the lowest rev to not
1596 1611 # include roots that aren't ancestors.
1597 1612
1598 1613 # Filter out roots that aren't ancestors of heads
1599 1614 roots = [root for root in roots if root in ancestors]
1600 1615 # Recompute the lowest revision
1601 1616 if roots:
1602 1617 lowestrev = min([self.rev(root) for root in roots])
1603 1618 else:
1604 1619 # No more roots? Return empty list
1605 1620 return nonodes
1606 1621 else:
1607 1622 # We are descending from nullid, and don't need to care about
1608 1623 # any other roots.
1609 1624 lowestrev = nullrev
1610 1625 roots = [nullid]
1611 1626 # Transform our roots list into a set.
1612 1627 descendants = set(roots)
1613 1628 # Also, keep the original roots so we can filter out roots that aren't
1614 1629 # 'real' roots (i.e. are descended from other roots).
1615 1630 roots = descendants.copy()
1616 1631 # Our topologically sorted list of output nodes.
1617 1632 orderedout = []
1618 1633 # Don't start at nullid since we don't want nullid in our output list,
1619 1634 # and if nullid shows up in descendants, empty parents will look like
1620 1635 # they're descendants.
1621 1636 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1622 1637 n = self.node(r)
1623 1638 isdescendant = False
1624 1639 if lowestrev == nullrev: # Everybody is a descendant of nullid
1625 1640 isdescendant = True
1626 1641 elif n in descendants:
1627 1642 # n is already a descendant
1628 1643 isdescendant = True
1629 1644 # This check only needs to be done here because all the roots
1630 1645 # will start being marked is descendants before the loop.
1631 1646 if n in roots:
1632 1647 # If n was a root, check if it's a 'real' root.
1633 1648 p = tuple(self.parents(n))
1634 1649 # If any of its parents are descendants, it's not a root.
1635 1650 if (p[0] in descendants) or (p[1] in descendants):
1636 1651 roots.remove(n)
1637 1652 else:
1638 1653 p = tuple(self.parents(n))
1639 1654 # A node is a descendant if either of its parents are
1640 1655 # descendants. (We seeded the dependents list with the roots
1641 1656 # up there, remember?)
1642 1657 if (p[0] in descendants) or (p[1] in descendants):
1643 1658 descendants.add(n)
1644 1659 isdescendant = True
1645 1660 if isdescendant and ((ancestors is None) or (n in ancestors)):
1646 1661 # Only include nodes that are both descendants and ancestors.
1647 1662 orderedout.append(n)
1648 1663 if (ancestors is not None) and (n in heads):
1649 1664 # We're trying to figure out which heads are reachable
1650 1665 # from roots.
1651 1666 # Mark this head as having been reached
1652 1667 heads[n] = True
1653 1668 elif ancestors is None:
1654 1669 # Otherwise, we're trying to discover the heads.
1655 1670 # Assume this is a head because if it isn't, the next step
1656 1671 # will eventually remove it.
1657 1672 heads[n] = True
1658 1673 # But, obviously its parents aren't.
1659 1674 for p in self.parents(n):
1660 1675 heads.pop(p, None)
1661 1676 heads = [head for head, flag in heads.iteritems() if flag]
1662 1677 roots = list(roots)
1663 1678 assert orderedout
1664 1679 assert roots
1665 1680 assert heads
1666 1681 return (orderedout, roots, heads)
1667 1682
1668 1683 def headrevs(self):
1669 1684 try:
1670 1685 return self.index.headrevs()
1671 1686 except AttributeError:
1672 1687 return self._headrevs()
1673 1688
1674 1689 def computephases(self, roots):
1675 1690 return self.index.computephasesmapsets(roots)
1676 1691
1677 1692 def _headrevs(self):
1678 1693 count = len(self)
1679 1694 if not count:
1680 1695 return [nullrev]
1681 1696 # we won't iter over filtered rev so nobody is a head at start
1682 1697 ishead = [0] * (count + 1)
1683 1698 index = self.index
1684 1699 for r in self:
1685 1700 ishead[r] = 1 # I may be an head
1686 1701 e = index[r]
1687 1702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1688 1703 return [r for r, val in enumerate(ishead) if val]
1689 1704
1690 1705 def heads(self, start=None, stop=None):
1691 1706 """return the list of all nodes that have no children
1692 1707
1693 1708 if start is specified, only heads that are descendants of
1694 1709 start will be returned
1695 1710 if stop is specified, it will consider all the revs from stop
1696 1711 as if they had no children
1697 1712 """
1698 1713 if start is None and stop is None:
1699 1714 if not len(self):
1700 1715 return [nullid]
1701 1716 return [self.node(r) for r in self.headrevs()]
1702 1717
1703 1718 if start is None:
1704 1719 start = nullid
1705 1720 if stop is None:
1706 1721 stop = []
1707 1722 stoprevs = set([self.rev(n) for n in stop])
1708 1723 startrev = self.rev(start)
1709 1724 reachable = {startrev}
1710 1725 heads = {startrev}
1711 1726
1712 1727 parentrevs = self.parentrevs
1713 1728 for r in self.revs(start=startrev + 1):
1714 1729 for p in parentrevs(r):
1715 1730 if p in reachable:
1716 1731 if r not in stoprevs:
1717 1732 reachable.add(r)
1718 1733 heads.add(r)
1719 1734 if p in heads and p not in stoprevs:
1720 1735 heads.remove(p)
1721 1736
1722 1737 return [self.node(r) for r in heads]
1723 1738
1724 1739 def children(self, node):
1725 1740 """find the children of a given node"""
1726 1741 c = []
1727 1742 p = self.rev(node)
1728 1743 for r in self.revs(start=p + 1):
1729 1744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1730 1745 if prevs:
1731 1746 for pr in prevs:
1732 1747 if pr == p:
1733 1748 c.append(self.node(r))
1734 1749 elif p == nullrev:
1735 1750 c.append(self.node(r))
1736 1751 return c
1737 1752
1738 1753 def commonancestorsheads(self, a, b):
1739 1754 """calculate all the heads of the common ancestors of nodes a and b"""
1740 1755 a, b = self.rev(a), self.rev(b)
1741 1756 ancs = self._commonancestorsheads(a, b)
1742 1757 return pycompat.maplist(self.node, ancs)
1743 1758
1744 1759 def _commonancestorsheads(self, *revs):
1745 1760 """calculate all the heads of the common ancestors of revs"""
1746 1761 try:
1747 1762 ancs = self.index.commonancestorsheads(*revs)
1748 1763 except (AttributeError, OverflowError): # C implementation failed
1749 1764 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1750 1765 return ancs
1751 1766
1752 1767 def isancestor(self, a, b):
1753 1768 """return True if node a is an ancestor of node b
1754 1769
1755 1770 A revision is considered an ancestor of itself."""
1756 1771 a, b = self.rev(a), self.rev(b)
1757 1772 return self.isancestorrev(a, b)
1758 1773
1759 1774 def isancestorrev(self, a, b):
1760 1775 """return True if revision a is an ancestor of revision b
1761 1776
1762 1777 A revision is considered an ancestor of itself.
1763 1778
1764 1779 The implementation of this is trivial but the use of
1765 1780 commonancestorsheads is not."""
1766 1781 if a == nullrev:
1767 1782 return True
1768 1783 elif a == b:
1769 1784 return True
1770 1785 elif a > b:
1771 1786 return False
1772 1787 return a in self._commonancestorsheads(a, b)
1773 1788
1774 1789 def ancestor(self, a, b):
1775 1790 """calculate the "best" common ancestor of nodes a and b"""
1776 1791
1777 1792 a, b = self.rev(a), self.rev(b)
1778 1793 try:
1779 1794 ancs = self.index.ancestors(a, b)
1780 1795 except (AttributeError, OverflowError):
1781 1796 ancs = ancestor.ancestors(self.parentrevs, a, b)
1782 1797 if ancs:
1783 1798 # choose a consistent winner when there's a tie
1784 1799 return min(map(self.node, ancs))
1785 1800 return nullid
1786 1801
1787 1802 def _match(self, id):
1788 1803 if isinstance(id, int):
1789 1804 # rev
1790 1805 return self.node(id)
1791 1806 if len(id) == 20:
1792 1807 # possibly a binary node
1793 1808 # odds of a binary node being all hex in ASCII are 1 in 10**25
1794 1809 try:
1795 1810 node = id
1796 1811 self.rev(node) # quick search the index
1797 1812 return node
1798 1813 except LookupError:
1799 1814 pass # may be partial hex id
1800 1815 try:
1801 1816 # str(rev)
1802 1817 rev = int(id)
1803 1818 if "%d" % rev != id:
1804 1819 raise ValueError
1805 1820 if rev < 0:
1806 1821 rev = len(self) + rev
1807 1822 if rev < 0 or rev >= len(self):
1808 1823 raise ValueError
1809 1824 return self.node(rev)
1810 1825 except (ValueError, OverflowError):
1811 1826 pass
1812 1827 if len(id) == 40:
1813 1828 try:
1814 1829 # a full hex nodeid?
1815 1830 node = bin(id)
1816 1831 self.rev(node)
1817 1832 return node
1818 1833 except (TypeError, LookupError):
1819 1834 pass
1820 1835
1821 1836 def _partialmatch(self, id):
1822 1837 # we don't care wdirfilenodeids as they should be always full hash
1823 1838 maybewdir = wdirhex.startswith(id)
1824 1839 try:
1825 1840 partial = self.index.partialmatch(id)
1826 1841 if partial and self.hasnode(partial):
1827 1842 if maybewdir:
1828 1843 # single 'ff...' match in radix tree, ambiguous with wdir
1829 1844 raise RevlogError
1830 1845 return partial
1831 1846 if maybewdir:
1832 1847 # no 'ff...' match in radix tree, wdir identified
1833 1848 raise error.WdirUnsupported
1834 1849 return None
1835 1850 except RevlogError:
1836 1851 # parsers.c radix tree lookup gave multiple matches
1837 1852 # fast path: for unfiltered changelog, radix tree is accurate
1838 1853 if not getattr(self, 'filteredrevs', None):
1839 1854 raise AmbiguousPrefixLookupError(id, self.indexfile,
1840 1855 _('ambiguous identifier'))
1841 1856 # fall through to slow path that filters hidden revisions
1842 1857 except (AttributeError, ValueError):
1843 1858 # we are pure python, or key was too short to search radix tree
1844 1859 pass
1845 1860
1846 1861 if id in self._pcache:
1847 1862 return self._pcache[id]
1848 1863
1849 1864 if len(id) <= 40:
1850 1865 try:
1851 1866 # hex(node)[:...]
1852 1867 l = len(id) // 2 # grab an even number of digits
1853 1868 prefix = bin(id[:l * 2])
1854 1869 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1855 1870 nl = [n for n in nl if hex(n).startswith(id) and
1856 1871 self.hasnode(n)]
1857 1872 if nullhex.startswith(id):
1858 1873 nl.append(nullid)
1859 1874 if len(nl) > 0:
1860 1875 if len(nl) == 1 and not maybewdir:
1861 1876 self._pcache[id] = nl[0]
1862 1877 return nl[0]
1863 1878 raise AmbiguousPrefixLookupError(id, self.indexfile,
1864 1879 _('ambiguous identifier'))
1865 1880 if maybewdir:
1866 1881 raise error.WdirUnsupported
1867 1882 return None
1868 1883 except TypeError:
1869 1884 pass
1870 1885
1871 1886 def lookup(self, id):
1872 1887 """locate a node based on:
1873 1888 - revision number or str(revision number)
1874 1889 - nodeid or subset of hex nodeid
1875 1890 """
1876 1891 n = self._match(id)
1877 1892 if n is not None:
1878 1893 return n
1879 1894 n = self._partialmatch(id)
1880 1895 if n:
1881 1896 return n
1882 1897
1883 1898 raise LookupError(id, self.indexfile, _('no match found'))
1884 1899
1885 1900 def shortest(self, node, minlength=1):
1886 1901 """Find the shortest unambiguous prefix that matches node."""
1887 1902 def isvalid(prefix):
1888 1903 try:
1889 1904 node = self._partialmatch(prefix)
1890 1905 except error.RevlogError:
1891 1906 return False
1892 1907 except error.WdirUnsupported:
1893 1908 # single 'ff...' match
1894 1909 return True
1895 1910 if node is None:
1896 1911 raise LookupError(node, self.indexfile, _('no node'))
1897 1912 return True
1898 1913
1899 1914 def maybewdir(prefix):
1900 1915 return all(c == 'f' for c in prefix)
1901 1916
1902 1917 hexnode = hex(node)
1903 1918
1904 1919 def disambiguate(hexnode, minlength):
1905 1920 """Disambiguate against wdirid."""
1906 1921 for length in range(minlength, 41):
1907 1922 prefix = hexnode[:length]
1908 1923 if not maybewdir(prefix):
1909 1924 return prefix
1910 1925
1911 1926 if not getattr(self, 'filteredrevs', None):
1912 1927 try:
1913 1928 length = max(self.index.shortest(node), minlength)
1914 1929 return disambiguate(hexnode, length)
1915 1930 except RevlogError:
1916 1931 if node != wdirid:
1917 1932 raise LookupError(node, self.indexfile, _('no node'))
1918 1933 except AttributeError:
1919 1934 # Fall through to pure code
1920 1935 pass
1921 1936
1922 1937 if node == wdirid:
1923 1938 for length in range(minlength, 41):
1924 1939 prefix = hexnode[:length]
1925 1940 if isvalid(prefix):
1926 1941 return prefix
1927 1942
1928 1943 for length in range(minlength, 41):
1929 1944 prefix = hexnode[:length]
1930 1945 if isvalid(prefix):
1931 1946 return disambiguate(hexnode, length)
1932 1947
1933 1948 def cmp(self, node, text):
1934 1949 """compare text with a given file revision
1935 1950
1936 1951 returns True if text is different than what is stored.
1937 1952 """
1938 1953 p1, p2 = self.parents(node)
1939 1954 return hash(text, p1, p2) != node
1940 1955
1941 1956 def _cachesegment(self, offset, data):
1942 1957 """Add a segment to the revlog cache.
1943 1958
1944 1959 Accepts an absolute offset and the data that is at that location.
1945 1960 """
1946 1961 o, d = self._chunkcache
1947 1962 # try to add to existing cache
1948 1963 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1949 1964 self._chunkcache = o, d + data
1950 1965 else:
1951 1966 self._chunkcache = offset, data
1952 1967
1953 1968 def _readsegment(self, offset, length, df=None):
1954 1969 """Load a segment of raw data from the revlog.
1955 1970
1956 1971 Accepts an absolute offset, length to read, and an optional existing
1957 1972 file handle to read from.
1958 1973
1959 1974 If an existing file handle is passed, it will be seeked and the
1960 1975 original seek position will NOT be restored.
1961 1976
1962 1977 Returns a str or buffer of raw byte data.
1963 1978 """
1964 1979 # Cache data both forward and backward around the requested
1965 1980 # data, in a fixed size window. This helps speed up operations
1966 1981 # involving reading the revlog backwards.
1967 1982 cachesize = self._chunkcachesize
1968 1983 realoffset = offset & ~(cachesize - 1)
1969 1984 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1970 1985 - realoffset)
1971 1986 with self._datareadfp(df) as df:
1972 1987 df.seek(realoffset)
1973 1988 d = df.read(reallength)
1974 1989 self._cachesegment(realoffset, d)
1975 1990 if offset != realoffset or reallength != length:
1976 1991 return util.buffer(d, offset - realoffset, length)
1977 1992 return d
1978 1993
1979 1994 def _getsegment(self, offset, length, df=None):
1980 1995 """Obtain a segment of raw data from the revlog.
1981 1996
1982 1997 Accepts an absolute offset, length of bytes to obtain, and an
1983 1998 optional file handle to the already-opened revlog. If the file
1984 1999 handle is used, it's original seek position will not be preserved.
1985 2000
1986 2001 Requests for data may be returned from a cache.
1987 2002
1988 2003 Returns a str or a buffer instance of raw byte data.
1989 2004 """
1990 2005 o, d = self._chunkcache
1991 2006 l = len(d)
1992 2007
1993 2008 # is it in the cache?
1994 2009 cachestart = offset - o
1995 2010 cacheend = cachestart + length
1996 2011 if cachestart >= 0 and cacheend <= l:
1997 2012 if cachestart == 0 and cacheend == l:
1998 2013 return d # avoid a copy
1999 2014 return util.buffer(d, cachestart, cacheend - cachestart)
2000 2015
2001 2016 return self._readsegment(offset, length, df=df)
2002 2017
2003 2018 def _getsegmentforrevs(self, startrev, endrev, df=None):
2004 2019 """Obtain a segment of raw data corresponding to a range of revisions.
2005 2020
2006 2021 Accepts the start and end revisions and an optional already-open
2007 2022 file handle to be used for reading. If the file handle is read, its
2008 2023 seek position will not be preserved.
2009 2024
2010 2025 Requests for data may be satisfied by a cache.
2011 2026
2012 2027 Returns a 2-tuple of (offset, data) for the requested range of
2013 2028 revisions. Offset is the integer offset from the beginning of the
2014 2029 revlog and data is a str or buffer of the raw byte data.
2015 2030
2016 2031 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2017 2032 to determine where each revision's data begins and ends.
2018 2033 """
2019 2034 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2020 2035 # (functions are expensive).
2021 2036 index = self.index
2022 2037 istart = index[startrev]
2023 2038 start = int(istart[0] >> 16)
2024 2039 if startrev == endrev:
2025 2040 end = start + istart[1]
2026 2041 else:
2027 2042 iend = index[endrev]
2028 2043 end = int(iend[0] >> 16) + iend[1]
2029 2044
2030 2045 if self._inline:
2031 2046 start += (startrev + 1) * self._io.size
2032 2047 end += (endrev + 1) * self._io.size
2033 2048 length = end - start
2034 2049
2035 2050 return start, self._getsegment(start, length, df=df)
2036 2051
2037 2052 def _chunk(self, rev, df=None):
2038 2053 """Obtain a single decompressed chunk for a revision.
2039 2054
2040 2055 Accepts an integer revision and an optional already-open file handle
2041 2056 to be used for reading. If used, the seek position of the file will not
2042 2057 be preserved.
2043 2058
2044 2059 Returns a str holding uncompressed data for the requested revision.
2045 2060 """
2046 2061 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2047 2062
2048 2063 def _chunks(self, revs, df=None, targetsize=None):
2049 2064 """Obtain decompressed chunks for the specified revisions.
2050 2065
2051 2066 Accepts an iterable of numeric revisions that are assumed to be in
2052 2067 ascending order. Also accepts an optional already-open file handle
2053 2068 to be used for reading. If used, the seek position of the file will
2054 2069 not be preserved.
2055 2070
2056 2071 This function is similar to calling ``self._chunk()`` multiple times,
2057 2072 but is faster.
2058 2073
2059 2074 Returns a list with decompressed data for each requested revision.
2060 2075 """
2061 2076 if not revs:
2062 2077 return []
2063 2078 start = self.start
2064 2079 length = self.length
2065 2080 inline = self._inline
2066 2081 iosize = self._io.size
2067 2082 buffer = util.buffer
2068 2083
2069 2084 l = []
2070 2085 ladd = l.append
2071 2086
2072 2087 if not self._withsparseread:
2073 2088 slicedchunks = (revs,)
2074 2089 else:
2075 2090 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2076 2091
2077 2092 for revschunk in slicedchunks:
2078 2093 firstrev = revschunk[0]
2079 2094 # Skip trailing revisions with empty diff
2080 2095 for lastrev in revschunk[::-1]:
2081 2096 if length(lastrev) != 0:
2082 2097 break
2083 2098
2084 2099 try:
2085 2100 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2086 2101 except OverflowError:
2087 2102 # issue4215 - we can't cache a run of chunks greater than
2088 2103 # 2G on Windows
2089 2104 return [self._chunk(rev, df=df) for rev in revschunk]
2090 2105
2091 2106 decomp = self.decompress
2092 2107 for rev in revschunk:
2093 2108 chunkstart = start(rev)
2094 2109 if inline:
2095 2110 chunkstart += (rev + 1) * iosize
2096 2111 chunklength = length(rev)
2097 2112 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2098 2113
2099 2114 return l
2100 2115
2101 2116 def _chunkclear(self):
2102 2117 """Clear the raw chunk cache."""
2103 2118 self._chunkcache = (0, '')
2104 2119
2105 2120 def deltaparent(self, rev):
2106 2121 """return deltaparent of the given revision"""
2107 2122 base = self.index[rev][3]
2108 2123 if base == rev:
2109 2124 return nullrev
2110 2125 elif self._generaldelta:
2111 2126 return base
2112 2127 else:
2113 2128 return rev - 1
2114 2129
2115 2130 def issnapshot(self, rev):
2116 2131 """tells whether rev is a snapshot
2117 2132 """
2118 2133 if rev == nullrev:
2119 2134 return True
2120 2135 deltap = self.deltaparent(rev)
2121 2136 if deltap == nullrev:
2122 2137 return True
2123 2138 p1, p2 = self.parentrevs(rev)
2124 2139 if deltap in (p1, p2):
2125 2140 return False
2126 2141 return self.issnapshot(deltap)
2127 2142
2128 2143 def snapshotdepth(self, rev):
2129 2144 """number of snapshot in the chain before this one"""
2130 2145 if not self.issnapshot(rev):
2131 2146 raise ProgrammingError('revision %d not a snapshot')
2132 2147 return len(self._deltachain(rev)[0]) - 1
2133 2148
2134 2149 def revdiff(self, rev1, rev2):
2135 2150 """return or calculate a delta between two revisions
2136 2151
2137 2152 The delta calculated is in binary form and is intended to be written to
2138 2153 revlog data directly. So this function needs raw revision data.
2139 2154 """
2140 2155 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2141 2156 return bytes(self._chunk(rev2))
2142 2157
2143 2158 return mdiff.textdiff(self.revision(rev1, raw=True),
2144 2159 self.revision(rev2, raw=True))
2145 2160
2146 2161 def revision(self, nodeorrev, _df=None, raw=False):
2147 2162 """return an uncompressed revision of a given node or revision
2148 2163 number.
2149 2164
2150 2165 _df - an existing file handle to read from. (internal-only)
2151 2166 raw - an optional argument specifying if the revision data is to be
2152 2167 treated as raw data when applying flag transforms. 'raw' should be set
2153 2168 to True when generating changegroups or in debug commands.
2154 2169 """
2155 2170 if isinstance(nodeorrev, int):
2156 2171 rev = nodeorrev
2157 2172 node = self.node(rev)
2158 2173 else:
2159 2174 node = nodeorrev
2160 2175 rev = None
2161 2176
2162 2177 cachedrev = None
2163 2178 flags = None
2164 2179 rawtext = None
2165 2180 if node == nullid:
2166 2181 return ""
2167 2182 if self._cache:
2168 2183 if self._cache[0] == node:
2169 2184 # _cache only stores rawtext
2170 2185 if raw:
2171 2186 return self._cache[2]
2172 2187 # duplicated, but good for perf
2173 2188 if rev is None:
2174 2189 rev = self.rev(node)
2175 2190 if flags is None:
2176 2191 flags = self.flags(rev)
2177 2192 # no extra flags set, no flag processor runs, text = rawtext
2178 2193 if flags == REVIDX_DEFAULT_FLAGS:
2179 2194 return self._cache[2]
2180 2195 # rawtext is reusable. need to run flag processor
2181 2196 rawtext = self._cache[2]
2182 2197
2183 2198 cachedrev = self._cache[1]
2184 2199
2185 2200 # look up what we need to read
2186 2201 if rawtext is None:
2187 2202 if rev is None:
2188 2203 rev = self.rev(node)
2189 2204
2190 2205 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2191 2206 if stopped:
2192 2207 rawtext = self._cache[2]
2193 2208
2194 2209 # drop cache to save memory
2195 2210 self._cache = None
2196 2211
2197 2212 targetsize = None
2198 2213 rawsize = self.index[rev][2]
2199 2214 if 0 <= rawsize:
2200 2215 targetsize = 4 * rawsize
2201 2216
2202 2217 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2203 2218 if rawtext is None:
2204 2219 rawtext = bytes(bins[0])
2205 2220 bins = bins[1:]
2206 2221
2207 2222 rawtext = mdiff.patches(rawtext, bins)
2208 2223 self._cache = (node, rev, rawtext)
2209 2224
2210 2225 if flags is None:
2211 2226 if rev is None:
2212 2227 rev = self.rev(node)
2213 2228 flags = self.flags(rev)
2214 2229
2215 2230 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2216 2231 if validatehash:
2217 2232 self.checkhash(text, node, rev=rev)
2218 2233
2219 2234 return text
2220 2235
2221 2236 def hash(self, text, p1, p2):
2222 2237 """Compute a node hash.
2223 2238
2224 2239 Available as a function so that subclasses can replace the hash
2225 2240 as needed.
2226 2241 """
2227 2242 return hash(text, p1, p2)
2228 2243
2229 2244 def _processflags(self, text, flags, operation, raw=False):
2230 2245 """Inspect revision data flags and applies transforms defined by
2231 2246 registered flag processors.
2232 2247
2233 2248 ``text`` - the revision data to process
2234 2249 ``flags`` - the revision flags
2235 2250 ``operation`` - the operation being performed (read or write)
2236 2251 ``raw`` - an optional argument describing if the raw transform should be
2237 2252 applied.
2238 2253
2239 2254 This method processes the flags in the order (or reverse order if
2240 2255 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2241 2256 flag processors registered for present flags. The order of flags defined
2242 2257 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2243 2258
2244 2259 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2245 2260 processed text and ``validatehash`` is a bool indicating whether the
2246 2261 returned text should be checked for hash integrity.
2247 2262
2248 2263 Note: If the ``raw`` argument is set, it has precedence over the
2249 2264 operation and will only update the value of ``validatehash``.
2250 2265 """
2251 2266 # fast path: no flag processors will run
2252 2267 if flags == 0:
2253 2268 return text, True
2254 2269 if not operation in ('read', 'write'):
2255 2270 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2256 2271 # Check all flags are known.
2257 2272 if flags & ~REVIDX_KNOWN_FLAGS:
2258 2273 raise RevlogError(_("incompatible revision flag '%#x'") %
2259 2274 (flags & ~REVIDX_KNOWN_FLAGS))
2260 2275 validatehash = True
2261 2276 # Depending on the operation (read or write), the order might be
2262 2277 # reversed due to non-commutative transforms.
2263 2278 orderedflags = REVIDX_FLAGS_ORDER
2264 2279 if operation == 'write':
2265 2280 orderedflags = reversed(orderedflags)
2266 2281
2267 2282 for flag in orderedflags:
2268 2283 # If a flagprocessor has been registered for a known flag, apply the
2269 2284 # related operation transform and update result tuple.
2270 2285 if flag & flags:
2271 2286 vhash = True
2272 2287
2273 2288 if flag not in _flagprocessors:
2274 2289 message = _("missing processor for flag '%#x'") % (flag)
2275 2290 raise RevlogError(message)
2276 2291
2277 2292 processor = _flagprocessors[flag]
2278 2293 if processor is not None:
2279 2294 readtransform, writetransform, rawtransform = processor
2280 2295
2281 2296 if raw:
2282 2297 vhash = rawtransform(self, text)
2283 2298 elif operation == 'read':
2284 2299 text, vhash = readtransform(self, text)
2285 2300 else: # write operation
2286 2301 text, vhash = writetransform(self, text)
2287 2302 validatehash = validatehash and vhash
2288 2303
2289 2304 return text, validatehash
2290 2305
2291 2306 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2292 2307 """Check node hash integrity.
2293 2308
2294 2309 Available as a function so that subclasses can extend hash mismatch
2295 2310 behaviors as needed.
2296 2311 """
2297 2312 try:
2298 2313 if p1 is None and p2 is None:
2299 2314 p1, p2 = self.parents(node)
2300 2315 if node != self.hash(text, p1, p2):
2301 2316 revornode = rev
2302 2317 if revornode is None:
2303 2318 revornode = templatefilters.short(hex(node))
2304 2319 raise RevlogError(_("integrity check failed on %s:%s")
2305 2320 % (self.indexfile, pycompat.bytestr(revornode)))
2306 2321 except RevlogError:
2307 2322 if self._censorable and _censoredtext(text):
2308 2323 raise error.CensoredNodeError(self.indexfile, node, text)
2309 2324 raise
2310 2325
2311 2326 def _enforceinlinesize(self, tr, fp=None):
2312 2327 """Check if the revlog is too big for inline and convert if so.
2313 2328
2314 2329 This should be called after revisions are added to the revlog. If the
2315 2330 revlog has grown too large to be an inline revlog, it will convert it
2316 2331 to use multiple index and data files.
2317 2332 """
2318 2333 tiprev = len(self) - 1
2319 2334 if (not self._inline or
2320 2335 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2321 2336 return
2322 2337
2323 2338 trinfo = tr.find(self.indexfile)
2324 2339 if trinfo is None:
2325 2340 raise RevlogError(_("%s not found in the transaction")
2326 2341 % self.indexfile)
2327 2342
2328 2343 trindex = trinfo[2]
2329 2344 if trindex is not None:
2330 2345 dataoff = self.start(trindex)
2331 2346 else:
2332 2347 # revlog was stripped at start of transaction, use all leftover data
2333 2348 trindex = len(self) - 1
2334 2349 dataoff = self.end(tiprev)
2335 2350
2336 2351 tr.add(self.datafile, dataoff)
2337 2352
2338 2353 if fp:
2339 2354 fp.flush()
2340 2355 fp.close()
2341 2356
2342 2357 with self._datafp('w') as df:
2343 2358 for r in self:
2344 2359 df.write(self._getsegmentforrevs(r, r)[1])
2345 2360
2346 2361 with self._indexfp('w') as fp:
2347 2362 self.version &= ~FLAG_INLINE_DATA
2348 2363 self._inline = False
2349 2364 io = self._io
2350 2365 for i in self:
2351 2366 e = io.packentry(self.index[i], self.node, self.version, i)
2352 2367 fp.write(e)
2353 2368
2354 2369 # the temp file replace the real index when we exit the context
2355 2370 # manager
2356 2371
2357 2372 tr.replace(self.indexfile, trindex * self._io.size)
2358 2373 self._chunkclear()
2359 2374
2360 2375 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2361 2376 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2362 2377 """add a revision to the log
2363 2378
2364 2379 text - the revision data to add
2365 2380 transaction - the transaction object used for rollback
2366 2381 link - the linkrev data to add
2367 2382 p1, p2 - the parent nodeids of the revision
2368 2383 cachedelta - an optional precomputed delta
2369 2384 node - nodeid of revision; typically node is not specified, and it is
2370 2385 computed by default as hash(text, p1, p2), however subclasses might
2371 2386 use different hashing method (and override checkhash() in such case)
2372 2387 flags - the known flags to set on the revision
2373 2388 deltacomputer - an optional _deltacomputer instance shared between
2374 2389 multiple calls
2375 2390 """
2376 2391 if link == nullrev:
2377 2392 raise RevlogError(_("attempted to add linkrev -1 to %s")
2378 2393 % self.indexfile)
2379 2394
2380 2395 if flags:
2381 2396 node = node or self.hash(text, p1, p2)
2382 2397
2383 2398 rawtext, validatehash = self._processflags(text, flags, 'write')
2384 2399
2385 2400 # If the flag processor modifies the revision data, ignore any provided
2386 2401 # cachedelta.
2387 2402 if rawtext != text:
2388 2403 cachedelta = None
2389 2404
2390 2405 if len(rawtext) > _maxentrysize:
2391 2406 raise RevlogError(
2392 2407 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2393 2408 % (self.indexfile, len(rawtext)))
2394 2409
2395 2410 node = node or self.hash(rawtext, p1, p2)
2396 2411 if node in self.nodemap:
2397 2412 return node
2398 2413
2399 2414 if validatehash:
2400 2415 self.checkhash(rawtext, node, p1=p1, p2=p2)
2401 2416
2402 2417 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2403 2418 flags, cachedelta=cachedelta,
2404 2419 deltacomputer=deltacomputer)
2405 2420
2406 2421 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2407 2422 cachedelta=None, deltacomputer=None):
2408 2423 """add a raw revision with known flags, node and parents
2409 2424 useful when reusing a revision not stored in this revlog (ex: received
2410 2425 over wire, or read from an external bundle).
2411 2426 """
2412 2427 dfh = None
2413 2428 if not self._inline:
2414 2429 dfh = self._datafp("a+")
2415 2430 ifh = self._indexfp("a+")
2416 2431 try:
2417 2432 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2418 2433 flags, cachedelta, ifh, dfh,
2419 2434 deltacomputer=deltacomputer)
2420 2435 finally:
2421 2436 if dfh:
2422 2437 dfh.close()
2423 2438 ifh.close()
2424 2439
2425 2440 def compress(self, data):
2426 2441 """Generate a possibly-compressed representation of data."""
2427 2442 if not data:
2428 2443 return '', data
2429 2444
2430 2445 compressed = self._compressor.compress(data)
2431 2446
2432 2447 if compressed:
2433 2448 # The revlog compressor added the header in the returned data.
2434 2449 return '', compressed
2435 2450
2436 2451 if data[0:1] == '\0':
2437 2452 return '', data
2438 2453 return 'u', data
2439 2454
2440 2455 def decompress(self, data):
2441 2456 """Decompress a revlog chunk.
2442 2457
2443 2458 The chunk is expected to begin with a header identifying the
2444 2459 format type so it can be routed to an appropriate decompressor.
2445 2460 """
2446 2461 if not data:
2447 2462 return data
2448 2463
2449 2464 # Revlogs are read much more frequently than they are written and many
2450 2465 # chunks only take microseconds to decompress, so performance is
2451 2466 # important here.
2452 2467 #
2453 2468 # We can make a few assumptions about revlogs:
2454 2469 #
2455 2470 # 1) the majority of chunks will be compressed (as opposed to inline
2456 2471 # raw data).
2457 2472 # 2) decompressing *any* data will likely by at least 10x slower than
2458 2473 # returning raw inline data.
2459 2474 # 3) we want to prioritize common and officially supported compression
2460 2475 # engines
2461 2476 #
2462 2477 # It follows that we want to optimize for "decompress compressed data
2463 2478 # when encoded with common and officially supported compression engines"
2464 2479 # case over "raw data" and "data encoded by less common or non-official
2465 2480 # compression engines." That is why we have the inline lookup first
2466 2481 # followed by the compengines lookup.
2467 2482 #
2468 2483 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2469 2484 # compressed chunks. And this matters for changelog and manifest reads.
2470 2485 t = data[0:1]
2471 2486
2472 2487 if t == 'x':
2473 2488 try:
2474 2489 return _zlibdecompress(data)
2475 2490 except zlib.error as e:
2476 2491 raise RevlogError(_('revlog decompress error: %s') %
2477 2492 stringutil.forcebytestr(e))
2478 2493 # '\0' is more common than 'u' so it goes first.
2479 2494 elif t == '\0':
2480 2495 return data
2481 2496 elif t == 'u':
2482 2497 return util.buffer(data, 1)
2483 2498
2484 2499 try:
2485 2500 compressor = self._decompressors[t]
2486 2501 except KeyError:
2487 2502 try:
2488 2503 engine = util.compengines.forrevlogheader(t)
2489 2504 compressor = engine.revlogcompressor()
2490 2505 self._decompressors[t] = compressor
2491 2506 except KeyError:
2492 2507 raise RevlogError(_('unknown compression type %r') % t)
2493 2508
2494 2509 return compressor.decompress(data)
2495 2510
2496 2511 def _isgooddeltainfo(self, deltainfo, revinfo):
2497 2512 """Returns True if the given delta is good. Good means that it is within
2498 2513 the disk span, disk size, and chain length bounds that we know to be
2499 2514 performant."""
2500 2515 if deltainfo is None:
2501 2516 return False
2502 2517
2503 2518 # - 'deltainfo.distance' is the distance from the base revision --
2504 2519 # bounding it limits the amount of I/O we need to do.
2505 2520 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2506 2521 # deltas we need to apply -- bounding it limits the amount of CPU
2507 2522 # we consume.
2508 2523
2509 2524 if self._sparserevlog:
2510 2525 # As sparse-read will be used, we can consider that the distance,
2511 2526 # instead of being the span of the whole chunk,
2512 2527 # is the span of the largest read chunk
2513 2528 base = deltainfo.base
2514 2529
2515 2530 if base != nullrev:
2516 2531 deltachain = self._deltachain(base)[0]
2517 2532 else:
2518 2533 deltachain = []
2519 2534
2520 2535 # search for the first non-snapshot revision
2521 2536 for idx, r in enumerate(deltachain):
2522 2537 if not self.issnapshot(r):
2523 2538 break
2524 2539 deltachain = deltachain[idx:]
2525 2540 chunks = _slicechunk(self, deltachain, deltainfo)
2526 2541 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2527 2542 distance = max(all_span)
2528 2543 else:
2529 2544 distance = deltainfo.distance
2530 2545
2531 2546 textlen = revinfo.textlen
2532 2547 defaultmax = textlen * 4
2533 2548 maxdist = self._maxdeltachainspan
2534 2549 if not maxdist:
2535 2550 maxdist = distance # ensure the conditional pass
2536 2551 maxdist = max(maxdist, defaultmax)
2537 2552 if self._sparserevlog and maxdist < self._srmingapsize:
2538 2553 # In multiple place, we are ignoring irrelevant data range below a
2539 2554 # certain size. Be also apply this tradeoff here and relax span
2540 2555 # constraint for small enought content.
2541 2556 maxdist = self._srmingapsize
2542 2557
2543 2558 # Bad delta from read span:
2544 2559 #
2545 2560 # If the span of data read is larger than the maximum allowed.
2546 2561 if maxdist < distance:
2547 2562 return False
2548 2563
2549 2564 # Bad delta from new delta size:
2550 2565 #
2551 2566 # If the delta size is larger than the target text, storing the
2552 2567 # delta will be inefficient.
2553 2568 if textlen < deltainfo.deltalen:
2554 2569 return False
2555 2570
2556 2571 # Bad delta from cumulated payload size:
2557 2572 #
2558 2573 # If the sum of delta get larger than K * target text length.
2559 2574 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2560 2575 return False
2561 2576
2562 2577 # Bad delta from chain length:
2563 2578 #
2564 2579 # If the number of delta in the chain gets too high.
2565 2580 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2566 2581 return False
2567 2582
2568 2583 # bad delta from intermediate snapshot size limit
2569 2584 #
2570 2585 # If an intermediate snapshot size is higher than the limit. The
2571 2586 # limit exist to prevent endless chain of intermediate delta to be
2572 2587 # created.
2573 2588 if (deltainfo.snapshotdepth is not None and
2574 2589 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2575 2590 return False
2576 2591
2577 2592 # bad delta if new intermediate snapshot is larger than the previous
2578 2593 # snapshot
2579 2594 if (deltainfo.snapshotdepth
2580 2595 and self.length(deltainfo.base) < deltainfo.deltalen):
2581 2596 return False
2582 2597
2583 2598 return True
2584 2599
2585 2600 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2586 2601 cachedelta, ifh, dfh, alwayscache=False,
2587 2602 deltacomputer=None):
2588 2603 """internal function to add revisions to the log
2589 2604
2590 2605 see addrevision for argument descriptions.
2591 2606
2592 2607 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2593 2608
2594 2609 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2595 2610 be used.
2596 2611
2597 2612 invariants:
2598 2613 - rawtext is optional (can be None); if not set, cachedelta must be set.
2599 2614 if both are set, they must correspond to each other.
2600 2615 """
2601 2616 if node == nullid:
2602 2617 raise RevlogError(_("%s: attempt to add null revision") %
2603 2618 (self.indexfile))
2604 2619 if node == wdirid or node in wdirfilenodeids:
2605 2620 raise RevlogError(_("%s: attempt to add wdir revision") %
2606 2621 (self.indexfile))
2607 2622
2608 2623 if self._inline:
2609 2624 fh = ifh
2610 2625 else:
2611 2626 fh = dfh
2612 2627
2613 2628 btext = [rawtext]
2614 2629
2615 2630 curr = len(self)
2616 2631 prev = curr - 1
2617 2632 offset = self.end(prev)
2618 2633 p1r, p2r = self.rev(p1), self.rev(p2)
2619 2634
2620 2635 # full versions are inserted when the needed deltas
2621 2636 # become comparable to the uncompressed text
2622 2637 if rawtext is None:
2623 2638 # need rawtext size, before changed by flag processors, which is
2624 2639 # the non-raw size. use revlog explicitly to avoid filelog's extra
2625 2640 # logic that might remove metadata size.
2626 2641 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2627 2642 cachedelta[1])
2628 2643 else:
2629 2644 textlen = len(rawtext)
2630 2645
2631 2646 if deltacomputer is None:
2632 2647 deltacomputer = _deltacomputer(self)
2633 2648
2634 2649 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2635 2650
2636 2651 # no delta for flag processor revision (see "candelta" for why)
2637 2652 # not calling candelta since only one revision needs test, also to
2638 2653 # avoid overhead fetching flags again.
2639 2654 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2640 2655 deltainfo = None
2641 2656 else:
2642 2657 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2643 2658
2644 2659 if deltainfo is not None:
2645 2660 base = deltainfo.base
2646 2661 chainbase = deltainfo.chainbase
2647 2662 data = deltainfo.data
2648 2663 l = deltainfo.deltalen
2649 2664 else:
2650 2665 rawtext = deltacomputer.buildtext(revinfo, fh)
2651 2666 data = self.compress(rawtext)
2652 2667 l = len(data[1]) + len(data[0])
2653 2668 base = chainbase = curr
2654 2669
2655 2670 e = (offset_type(offset, flags), l, textlen,
2656 2671 base, link, p1r, p2r, node)
2657 2672 self.index.append(e)
2658 2673 self.nodemap[node] = curr
2659 2674
2660 2675 entry = self._io.packentry(e, self.node, self.version, curr)
2661 2676 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2662 2677
2663 2678 if alwayscache and rawtext is None:
2664 2679 rawtext = deltacomputer.buildtext(revinfo, fh)
2665 2680
2666 2681 if type(rawtext) == bytes: # only accept immutable objects
2667 2682 self._cache = (node, curr, rawtext)
2668 2683 self._chainbasecache[curr] = chainbase
2669 2684 return node
2670 2685
2671 2686 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2672 2687 # Files opened in a+ mode have inconsistent behavior on various
2673 2688 # platforms. Windows requires that a file positioning call be made
2674 2689 # when the file handle transitions between reads and writes. See
2675 2690 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2676 2691 # platforms, Python or the platform itself can be buggy. Some versions
2677 2692 # of Solaris have been observed to not append at the end of the file
2678 2693 # if the file was seeked to before the end. See issue4943 for more.
2679 2694 #
2680 2695 # We work around this issue by inserting a seek() before writing.
2681 2696 # Note: This is likely not necessary on Python 3.
2682 2697 ifh.seek(0, os.SEEK_END)
2683 2698 if dfh:
2684 2699 dfh.seek(0, os.SEEK_END)
2685 2700
2686 2701 curr = len(self) - 1
2687 2702 if not self._inline:
2688 2703 transaction.add(self.datafile, offset)
2689 2704 transaction.add(self.indexfile, curr * len(entry))
2690 2705 if data[0]:
2691 2706 dfh.write(data[0])
2692 2707 dfh.write(data[1])
2693 2708 ifh.write(entry)
2694 2709 else:
2695 2710 offset += curr * self._io.size
2696 2711 transaction.add(self.indexfile, offset, curr)
2697 2712 ifh.write(entry)
2698 2713 ifh.write(data[0])
2699 2714 ifh.write(data[1])
2700 2715 self._enforceinlinesize(transaction, ifh)
2701 2716
2702 2717 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2703 2718 """
2704 2719 add a delta group
2705 2720
2706 2721 given a set of deltas, add them to the revision log. the
2707 2722 first delta is against its parent, which should be in our
2708 2723 log, the rest are against the previous delta.
2709 2724
2710 2725 If ``addrevisioncb`` is defined, it will be called with arguments of
2711 2726 this revlog and the node that was added.
2712 2727 """
2713 2728
2714 2729 nodes = []
2715 2730
2716 2731 r = len(self)
2717 2732 end = 0
2718 2733 if r:
2719 2734 end = self.end(r - 1)
2720 2735 ifh = self._indexfp("a+")
2721 2736 isize = r * self._io.size
2722 2737 if self._inline:
2723 2738 transaction.add(self.indexfile, end + isize, r)
2724 2739 dfh = None
2725 2740 else:
2726 2741 transaction.add(self.indexfile, isize, r)
2727 2742 transaction.add(self.datafile, end)
2728 2743 dfh = self._datafp("a+")
2729 2744 def flush():
2730 2745 if dfh:
2731 2746 dfh.flush()
2732 2747 ifh.flush()
2733 2748 try:
2734 2749 deltacomputer = _deltacomputer(self)
2735 2750 # loop through our set of deltas
2736 2751 for data in deltas:
2737 2752 node, p1, p2, linknode, deltabase, delta, flags = data
2738 2753 link = linkmapper(linknode)
2739 2754 flags = flags or REVIDX_DEFAULT_FLAGS
2740 2755
2741 2756 nodes.append(node)
2742 2757
2743 2758 if node in self.nodemap:
2744 2759 # this can happen if two branches make the same change
2745 2760 continue
2746 2761
2747 2762 for p in (p1, p2):
2748 2763 if p not in self.nodemap:
2749 2764 raise LookupError(p, self.indexfile,
2750 2765 _('unknown parent'))
2751 2766
2752 2767 if deltabase not in self.nodemap:
2753 2768 raise LookupError(deltabase, self.indexfile,
2754 2769 _('unknown delta base'))
2755 2770
2756 2771 baserev = self.rev(deltabase)
2757 2772
2758 2773 if baserev != nullrev and self.iscensored(baserev):
2759 2774 # if base is censored, delta must be full replacement in a
2760 2775 # single patch operation
2761 2776 hlen = struct.calcsize(">lll")
2762 2777 oldlen = self.rawsize(baserev)
2763 2778 newlen = len(delta) - hlen
2764 2779 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2765 2780 raise error.CensoredBaseError(self.indexfile,
2766 2781 self.node(baserev))
2767 2782
2768 2783 if not flags and self._peek_iscensored(baserev, delta, flush):
2769 2784 flags |= REVIDX_ISCENSORED
2770 2785
2771 2786 # We assume consumers of addrevisioncb will want to retrieve
2772 2787 # the added revision, which will require a call to
2773 2788 # revision(). revision() will fast path if there is a cache
2774 2789 # hit. So, we tell _addrevision() to always cache in this case.
2775 2790 # We're only using addgroup() in the context of changegroup
2776 2791 # generation so the revision data can always be handled as raw
2777 2792 # by the flagprocessor.
2778 2793 self._addrevision(node, None, transaction, link,
2779 2794 p1, p2, flags, (baserev, delta),
2780 2795 ifh, dfh,
2781 2796 alwayscache=bool(addrevisioncb),
2782 2797 deltacomputer=deltacomputer)
2783 2798
2784 2799 if addrevisioncb:
2785 2800 addrevisioncb(self, node)
2786 2801
2787 2802 if not dfh and not self._inline:
2788 2803 # addrevision switched from inline to conventional
2789 2804 # reopen the index
2790 2805 ifh.close()
2791 2806 dfh = self._datafp("a+")
2792 2807 ifh = self._indexfp("a+")
2793 2808 finally:
2794 2809 if dfh:
2795 2810 dfh.close()
2796 2811 ifh.close()
2797 2812
2798 2813 return nodes
2799 2814
2800 2815 def iscensored(self, rev):
2801 2816 """Check if a file revision is censored."""
2802 2817 if not self._censorable:
2803 2818 return False
2804 2819
2805 2820 return self.flags(rev) & REVIDX_ISCENSORED
2806 2821
2807 2822 def _peek_iscensored(self, baserev, delta, flush):
2808 2823 """Quickly check if a delta produces a censored revision."""
2809 2824 if not self._censorable:
2810 2825 return False
2811 2826
2812 2827 # Fragile heuristic: unless new file meta keys are added alphabetically
2813 2828 # preceding "censored", all censored revisions are prefixed by
2814 2829 # "\1\ncensored:". A delta producing such a censored revision must be a
2815 2830 # full-replacement delta, so we inspect the first and only patch in the
2816 2831 # delta for this prefix.
2817 2832 hlen = struct.calcsize(">lll")
2818 2833 if len(delta) <= hlen:
2819 2834 return False
2820 2835
2821 2836 oldlen = self.rawsize(baserev)
2822 2837 newlen = len(delta) - hlen
2823 2838 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2824 2839 return False
2825 2840
2826 2841 add = "\1\ncensored:"
2827 2842 addlen = len(add)
2828 2843 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2829 2844
2830 2845 def getstrippoint(self, minlink):
2831 2846 """find the minimum rev that must be stripped to strip the linkrev
2832 2847
2833 2848 Returns a tuple containing the minimum rev and a set of all revs that
2834 2849 have linkrevs that will be broken by this strip.
2835 2850 """
2836 2851 brokenrevs = set()
2837 2852 strippoint = len(self)
2838 2853
2839 2854 heads = {}
2840 2855 futurelargelinkrevs = set()
2841 2856 for head in self.headrevs():
2842 2857 headlinkrev = self.linkrev(head)
2843 2858 heads[head] = headlinkrev
2844 2859 if headlinkrev >= minlink:
2845 2860 futurelargelinkrevs.add(headlinkrev)
2846 2861
2847 2862 # This algorithm involves walking down the rev graph, starting at the
2848 2863 # heads. Since the revs are topologically sorted according to linkrev,
2849 2864 # once all head linkrevs are below the minlink, we know there are
2850 2865 # no more revs that could have a linkrev greater than minlink.
2851 2866 # So we can stop walking.
2852 2867 while futurelargelinkrevs:
2853 2868 strippoint -= 1
2854 2869 linkrev = heads.pop(strippoint)
2855 2870
2856 2871 if linkrev < minlink:
2857 2872 brokenrevs.add(strippoint)
2858 2873 else:
2859 2874 futurelargelinkrevs.remove(linkrev)
2860 2875
2861 2876 for p in self.parentrevs(strippoint):
2862 2877 if p != nullrev:
2863 2878 plinkrev = self.linkrev(p)
2864 2879 heads[p] = plinkrev
2865 2880 if plinkrev >= minlink:
2866 2881 futurelargelinkrevs.add(plinkrev)
2867 2882
2868 2883 return strippoint, brokenrevs
2869 2884
2870 2885 def strip(self, minlink, transaction):
2871 2886 """truncate the revlog on the first revision with a linkrev >= minlink
2872 2887
2873 2888 This function is called when we're stripping revision minlink and
2874 2889 its descendants from the repository.
2875 2890
2876 2891 We have to remove all revisions with linkrev >= minlink, because
2877 2892 the equivalent changelog revisions will be renumbered after the
2878 2893 strip.
2879 2894
2880 2895 So we truncate the revlog on the first of these revisions, and
2881 2896 trust that the caller has saved the revisions that shouldn't be
2882 2897 removed and that it'll re-add them after this truncation.
2883 2898 """
2884 2899 if len(self) == 0:
2885 2900 return
2886 2901
2887 2902 rev, _ = self.getstrippoint(minlink)
2888 2903 if rev == len(self):
2889 2904 return
2890 2905
2891 2906 # first truncate the files on disk
2892 2907 end = self.start(rev)
2893 2908 if not self._inline:
2894 2909 transaction.add(self.datafile, end)
2895 2910 end = rev * self._io.size
2896 2911 else:
2897 2912 end += rev * self._io.size
2898 2913
2899 2914 transaction.add(self.indexfile, end)
2900 2915
2901 2916 # then reset internal state in memory to forget those revisions
2902 2917 self._cache = None
2903 2918 self._chaininfocache = {}
2904 2919 self._chunkclear()
2905 2920 for x in pycompat.xrange(rev, len(self)):
2906 2921 del self.nodemap[self.node(x)]
2907 2922
2908 2923 del self.index[rev:-1]
2909 2924 self._nodepos = None
2910 2925
2911 2926 def checksize(self):
2912 2927 expected = 0
2913 2928 if len(self):
2914 2929 expected = max(0, self.end(len(self) - 1))
2915 2930
2916 2931 try:
2917 2932 with self._datafp() as f:
2918 2933 f.seek(0, 2)
2919 2934 actual = f.tell()
2920 2935 dd = actual - expected
2921 2936 except IOError as inst:
2922 2937 if inst.errno != errno.ENOENT:
2923 2938 raise
2924 2939 dd = 0
2925 2940
2926 2941 try:
2927 2942 f = self.opener(self.indexfile)
2928 2943 f.seek(0, 2)
2929 2944 actual = f.tell()
2930 2945 f.close()
2931 2946 s = self._io.size
2932 2947 i = max(0, actual // s)
2933 2948 di = actual - (i * s)
2934 2949 if self._inline:
2935 2950 databytes = 0
2936 2951 for r in self:
2937 2952 databytes += max(0, self.length(r))
2938 2953 dd = 0
2939 2954 di = actual - len(self) * s - databytes
2940 2955 except IOError as inst:
2941 2956 if inst.errno != errno.ENOENT:
2942 2957 raise
2943 2958 di = 0
2944 2959
2945 2960 return (dd, di)
2946 2961
2947 2962 def files(self):
2948 2963 res = [self.indexfile]
2949 2964 if not self._inline:
2950 2965 res.append(self.datafile)
2951 2966 return res
2952 2967
2968 def emitrevisiondeltas(self, requests):
2969 frev = self.rev
2970
2971 prevrev = None
2972 for request in requests:
2973 node = request.node
2974 rev = frev(node)
2975
2976 if prevrev is None:
2977 prevrev = self.index[rev][5]
2978
2979 # Requesting a full revision.
2980 if request.basenode == nullid:
2981 baserev = nullrev
2982 # Requesting an explicit revision.
2983 elif request.basenode is not None:
2984 baserev = frev(request.basenode)
2985 # Allowing us to choose.
2986 else:
2987 p1rev, p2rev = self.parentrevs(rev)
2988 deltaparentrev = self.deltaparent(rev)
2989
2990 # Avoid sending full revisions when delta parent is null. Pick
2991 # prev in that case. It's tempting to pick p1 in this case, as
2992 # p1 will be smaller in the common case. However, computing a
2993 # delta against p1 may require resolving the raw text of p1,
2994 # which could be expensive. The revlog caches should have prev
2995 # cached, meaning less CPU for delta generation. There is
2996 # likely room to add a flag and/or config option to control this
2997 # behavior.
2998 if deltaparentrev == nullrev and self.storedeltachains:
2999 baserev = prevrev
3000
3001 # Revlog is configured to use full snapshot for a reason.
3002 # Stick to full snapshot.
3003 elif deltaparentrev == nullrev:
3004 baserev = nullrev
3005
3006 # Pick previous when we can't be sure the base is available
3007 # on consumer.
3008 elif deltaparentrev not in (p1rev, p2rev, prevrev):
3009 baserev = prevrev
3010 else:
3011 baserev = deltaparentrev
3012
3013 if baserev != nullrev and not self.candelta(baserev, rev):
3014 baserev = nullrev
3015
3016 revision = None
3017 delta = None
3018 baserevisionsize = None
3019
3020 if self.iscensored(baserev) or self.iscensored(rev):
3021 try:
3022 revision = self.revision(node, raw=True)
3023 except error.CensoredNodeError as e:
3024 revision = e.tombstone
3025
3026 if baserev != nullrev:
3027 baserevisionsize = self.rawsize(baserev)
3028
3029 elif baserev == nullrev:
3030 revision = self.revision(node, raw=True)
3031 else:
3032 delta = self.revdiff(baserev, rev)
3033
3034 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
3035
3036 yield revlogrevisiondelta(
3037 node=node,
3038 p1node=request.p1node,
3039 p2node=request.p2node,
3040 linknode=request.linknode,
3041 basenode=self.node(baserev),
3042 flags=self.flags(rev) | extraflags,
3043 baserevisionsize=baserevisionsize,
3044 revision=revision,
3045 delta=delta)
3046
3047 prevrev = rev
3048
2953 3049 DELTAREUSEALWAYS = 'always'
2954 3050 DELTAREUSESAMEREVS = 'samerevs'
2955 3051 DELTAREUSENEVER = 'never'
2956 3052
2957 3053 DELTAREUSEFULLADD = 'fulladd'
2958 3054
2959 3055 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2960 3056
2961 3057 def clone(self, tr, destrevlog, addrevisioncb=None,
2962 3058 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2963 3059 """Copy this revlog to another, possibly with format changes.
2964 3060
2965 3061 The destination revlog will contain the same revisions and nodes.
2966 3062 However, it may not be bit-for-bit identical due to e.g. delta encoding
2967 3063 differences.
2968 3064
2969 3065 The ``deltareuse`` argument control how deltas from the existing revlog
2970 3066 are preserved in the destination revlog. The argument can have the
2971 3067 following values:
2972 3068
2973 3069 DELTAREUSEALWAYS
2974 3070 Deltas will always be reused (if possible), even if the destination
2975 3071 revlog would not select the same revisions for the delta. This is the
2976 3072 fastest mode of operation.
2977 3073 DELTAREUSESAMEREVS
2978 3074 Deltas will be reused if the destination revlog would pick the same
2979 3075 revisions for the delta. This mode strikes a balance between speed
2980 3076 and optimization.
2981 3077 DELTAREUSENEVER
2982 3078 Deltas will never be reused. This is the slowest mode of execution.
2983 3079 This mode can be used to recompute deltas (e.g. if the diff/delta
2984 3080 algorithm changes).
2985 3081
2986 3082 Delta computation can be slow, so the choice of delta reuse policy can
2987 3083 significantly affect run time.
2988 3084
2989 3085 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2990 3086 two extremes. Deltas will be reused if they are appropriate. But if the
2991 3087 delta could choose a better revision, it will do so. This means if you
2992 3088 are converting a non-generaldelta revlog to a generaldelta revlog,
2993 3089 deltas will be recomputed if the delta's parent isn't a parent of the
2994 3090 revision.
2995 3091
2996 3092 In addition to the delta policy, the ``deltabothparents`` argument
2997 3093 controls whether to compute deltas against both parents for merges.
2998 3094 By default, the current default is used.
2999 3095 """
3000 3096 if deltareuse not in self.DELTAREUSEALL:
3001 3097 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3002 3098
3003 3099 if len(destrevlog):
3004 3100 raise ValueError(_('destination revlog is not empty'))
3005 3101
3006 3102 if getattr(self, 'filteredrevs', None):
3007 3103 raise ValueError(_('source revlog has filtered revisions'))
3008 3104 if getattr(destrevlog, 'filteredrevs', None):
3009 3105 raise ValueError(_('destination revlog has filtered revisions'))
3010 3106
3011 3107 # lazydeltabase controls whether to reuse a cached delta, if possible.
3012 3108 oldlazydeltabase = destrevlog._lazydeltabase
3013 3109 oldamd = destrevlog._deltabothparents
3014 3110
3015 3111 try:
3016 3112 if deltareuse == self.DELTAREUSEALWAYS:
3017 3113 destrevlog._lazydeltabase = True
3018 3114 elif deltareuse == self.DELTAREUSESAMEREVS:
3019 3115 destrevlog._lazydeltabase = False
3020 3116
3021 3117 destrevlog._deltabothparents = deltabothparents or oldamd
3022 3118
3023 3119 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3024 3120 self.DELTAREUSESAMEREVS)
3025 3121
3026 3122 deltacomputer = _deltacomputer(destrevlog)
3027 3123 index = self.index
3028 3124 for rev in self:
3029 3125 entry = index[rev]
3030 3126
3031 3127 # Some classes override linkrev to take filtered revs into
3032 3128 # account. Use raw entry from index.
3033 3129 flags = entry[0] & 0xffff
3034 3130 linkrev = entry[4]
3035 3131 p1 = index[entry[5]][7]
3036 3132 p2 = index[entry[6]][7]
3037 3133 node = entry[7]
3038 3134
3039 3135 # (Possibly) reuse the delta from the revlog if allowed and
3040 3136 # the revlog chunk is a delta.
3041 3137 cachedelta = None
3042 3138 rawtext = None
3043 3139 if populatecachedelta:
3044 3140 dp = self.deltaparent(rev)
3045 3141 if dp != nullrev:
3046 3142 cachedelta = (dp, bytes(self._chunk(rev)))
3047 3143
3048 3144 if not cachedelta:
3049 3145 rawtext = self.revision(rev, raw=True)
3050 3146
3051 3147
3052 3148 if deltareuse == self.DELTAREUSEFULLADD:
3053 3149 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3054 3150 cachedelta=cachedelta,
3055 3151 node=node, flags=flags,
3056 3152 deltacomputer=deltacomputer)
3057 3153 else:
3058 3154 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3059 3155 checkambig=False)
3060 3156 dfh = None
3061 3157 if not destrevlog._inline:
3062 3158 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3063 3159 try:
3064 3160 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3065 3161 p2, flags, cachedelta, ifh, dfh,
3066 3162 deltacomputer=deltacomputer)
3067 3163 finally:
3068 3164 if dfh:
3069 3165 dfh.close()
3070 3166 ifh.close()
3071 3167
3072 3168 if addrevisioncb:
3073 3169 addrevisioncb(self, rev, node)
3074 3170 finally:
3075 3171 destrevlog._lazydeltabase = oldlazydeltabase
3076 3172 destrevlog._deltabothparents = oldamd
@@ -1,689 +1,751 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import (
25 attr,
25 26 cbor,
26 27 )
27 28 from mercurial import (
28 29 ancestor,
29 30 bundlerepo,
30 31 error,
31 32 extensions,
32 33 localrepo,
33 34 mdiff,
34 35 pycompat,
35 36 repository,
36 37 revlog,
37 38 store,
38 39 verify,
39 40 )
40 41 from mercurial.utils import (
41 42 interfaceutil,
42 43 )
43 44
44 45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
45 46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
46 47 # be specifying the version(s) of Mercurial they are tested with, or
47 48 # leave the attribute unspecified.
48 49 testedwith = 'ships-with-hg-core'
49 50
50 51 REQUIREMENT = 'testonly-simplestore'
51 52
52 53 def validatenode(node):
53 54 if isinstance(node, int):
54 55 raise ValueError('expected node; got int')
55 56
56 57 if len(node) != 20:
57 58 raise ValueError('expected 20 byte node')
58 59
59 60 def validaterev(rev):
60 61 if not isinstance(rev, int):
61 62 raise ValueError('expected int')
62 63
64 @interfaceutil.implementer(repository.irevisiondelta)
65 @attr.s(slots=True, frozen=True)
66 class simplestorerevisiondelta(object):
67 node = attr.ib()
68 p1node = attr.ib()
69 p2node = attr.ib()
70 basenode = attr.ib()
71 linknode = attr.ib()
72 flags = attr.ib()
73 baserevisionsize = attr.ib()
74 revision = attr.ib()
75 delta = attr.ib()
76
63 77 @interfaceutil.implementer(repository.ifilestorage)
64 78 class filestorage(object):
65 79 """Implements storage for a tracked path.
66 80
67 81 Data is stored in the VFS in a directory corresponding to the tracked
68 82 path.
69 83
70 84 Index data is stored in an ``index`` file using CBOR.
71 85
72 86 Fulltext data is stored in files having names of the node.
73 87 """
74 88
75 89 def __init__(self, svfs, path):
76 90 self._svfs = svfs
77 91 self._path = path
78 92
79 93 self._storepath = b'/'.join([b'data', path])
80 94 self._indexpath = b'/'.join([self._storepath, b'index'])
81 95
82 96 indexdata = self._svfs.tryread(self._indexpath)
83 97 if indexdata:
84 98 indexdata = cbor.loads(indexdata)
85 99
86 100 self._indexdata = indexdata or []
87 101 self._indexbynode = {}
88 102 self._indexbyrev = {}
89 103 self.index = []
90 104 self._refreshindex()
91 105
92 106 # This is used by changegroup code :/
93 107 self._generaldelta = True
94 108 self.storedeltachains = False
95 109
96 110 self.version = 1
97 111
98 112 def _refreshindex(self):
99 113 self._indexbynode.clear()
100 114 self._indexbyrev.clear()
101 115 self.index = []
102 116
103 117 for i, entry in enumerate(self._indexdata):
104 118 self._indexbynode[entry[b'node']] = entry
105 119 self._indexbyrev[i] = entry
106 120
107 121 self._indexbynode[nullid] = {
108 122 b'node': nullid,
109 123 b'p1': nullid,
110 124 b'p2': nullid,
111 125 b'linkrev': nullrev,
112 126 b'flags': 0,
113 127 }
114 128
115 129 self._indexbyrev[nullrev] = {
116 130 b'node': nullid,
117 131 b'p1': nullid,
118 132 b'p2': nullid,
119 133 b'linkrev': nullrev,
120 134 b'flags': 0,
121 135 }
122 136
123 137 for i, entry in enumerate(self._indexdata):
124 138 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
125 139
126 140 # start, length, rawsize, chainbase, linkrev, p1, p2, node
127 141 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
128 142 entry[b'node']))
129 143
130 144 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 145
132 146 def __len__(self):
133 147 return len(self._indexdata)
134 148
135 149 def __iter__(self):
136 150 return iter(range(len(self)))
137 151
138 152 def revs(self, start=0, stop=None):
139 153 step = 1
140 154 if stop is not None:
141 155 if start > stop:
142 156 step = -1
143 157
144 158 stop += step
145 159 else:
146 160 stop = len(self)
147 161
148 162 return range(start, stop, step)
149 163
150 164 def parents(self, node):
151 165 validatenode(node)
152 166
153 167 if node not in self._indexbynode:
154 168 raise KeyError('unknown node')
155 169
156 170 entry = self._indexbynode[node]
157 171
158 172 return entry[b'p1'], entry[b'p2']
159 173
160 174 def parentrevs(self, rev):
161 175 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
162 176 return self.rev(p1), self.rev(p2)
163 177
164 178 def rev(self, node):
165 179 validatenode(node)
166 180
167 181 try:
168 182 self._indexbynode[node]
169 183 except KeyError:
170 184 raise error.LookupError(node, self._indexpath, _('no node'))
171 185
172 186 for rev, entry in self._indexbyrev.items():
173 187 if entry[b'node'] == node:
174 188 return rev
175 189
176 190 raise error.ProgrammingError('this should not occur')
177 191
178 192 def node(self, rev):
179 193 validaterev(rev)
180 194
181 195 return self._indexbyrev[rev][b'node']
182 196
183 197 def lookup(self, node):
184 198 if isinstance(node, int):
185 199 return self.node(node)
186 200
187 201 if len(node) == 20:
188 202 self.rev(node)
189 203 return node
190 204
191 205 try:
192 206 rev = int(node)
193 207 if '%d' % rev != node:
194 208 raise ValueError
195 209
196 210 if rev < 0:
197 211 rev = len(self) + rev
198 212 if rev < 0 or rev >= len(self):
199 213 raise ValueError
200 214
201 215 return self.node(rev)
202 216 except (ValueError, OverflowError):
203 217 pass
204 218
205 219 if len(node) == 40:
206 220 try:
207 221 rawnode = bin(node)
208 222 self.rev(rawnode)
209 223 return rawnode
210 224 except TypeError:
211 225 pass
212 226
213 227 raise error.LookupError(node, self._path, _('invalid lookup input'))
214 228
215 229 def linkrev(self, rev):
216 230 validaterev(rev)
217 231
218 232 return self._indexbyrev[rev][b'linkrev']
219 233
220 234 def flags(self, rev):
221 235 validaterev(rev)
222 236
223 237 return self._indexbyrev[rev][b'flags']
224 238
225 239 def deltaparent(self, rev):
226 240 validaterev(rev)
227 241
228 242 p1node = self.parents(self.node(rev))[0]
229 243 return self.rev(p1node)
230 244
231 245 def candelta(self, baserev, rev):
232 246 validaterev(baserev)
233 247 validaterev(rev)
234 248
235 249 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
236 250 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
237 251 return False
238 252
239 253 return True
240 254
241 255 def rawsize(self, rev):
242 256 validaterev(rev)
243 257 node = self.node(rev)
244 258 return len(self.revision(node, raw=True))
245 259
246 260 def _processflags(self, text, flags, operation, raw=False):
247 261 if flags == 0:
248 262 return text, True
249 263
250 264 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
251 265 raise error.RevlogError(_("incompatible revision flag '%#x'") %
252 266 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
253 267
254 268 validatehash = True
255 269 # Depending on the operation (read or write), the order might be
256 270 # reversed due to non-commutative transforms.
257 271 orderedflags = revlog.REVIDX_FLAGS_ORDER
258 272 if operation == 'write':
259 273 orderedflags = reversed(orderedflags)
260 274
261 275 for flag in orderedflags:
262 276 # If a flagprocessor has been registered for a known flag, apply the
263 277 # related operation transform and update result tuple.
264 278 if flag & flags:
265 279 vhash = True
266 280
267 281 if flag not in revlog._flagprocessors:
268 282 message = _("missing processor for flag '%#x'") % (flag)
269 283 raise revlog.RevlogError(message)
270 284
271 285 processor = revlog._flagprocessors[flag]
272 286 if processor is not None:
273 287 readtransform, writetransform, rawtransform = processor
274 288
275 289 if raw:
276 290 vhash = rawtransform(self, text)
277 291 elif operation == 'read':
278 292 text, vhash = readtransform(self, text)
279 293 else: # write operation
280 294 text, vhash = writetransform(self, text)
281 295 validatehash = validatehash and vhash
282 296
283 297 return text, validatehash
284 298
285 299 def checkhash(self, text, node, p1=None, p2=None, rev=None):
286 300 if p1 is None and p2 is None:
287 301 p1, p2 = self.parents(node)
288 302 if node != revlog.hash(text, p1, p2):
289 303 raise error.RevlogError(_("integrity check failed on %s") %
290 304 self._path)
291 305
292 306 def revision(self, node, raw=False):
293 307 validatenode(node)
294 308
295 309 if node == nullid:
296 310 return b''
297 311
298 312 rev = self.rev(node)
299 313 flags = self.flags(rev)
300 314
301 315 path = b'/'.join([self._storepath, hex(node)])
302 316 rawtext = self._svfs.read(path)
303 317
304 318 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
305 319 if validatehash:
306 320 self.checkhash(text, node, rev=rev)
307 321
308 322 return text
309 323
310 324 def read(self, node):
311 325 validatenode(node)
312 326
313 327 revision = self.revision(node)
314 328
315 329 if not revision.startswith(b'\1\n'):
316 330 return revision
317 331
318 332 start = revision.index(b'\1\n', 2)
319 333 return revision[start + 2:]
320 334
321 335 def renamed(self, node):
322 336 validatenode(node)
323 337
324 338 if self.parents(node)[0] != nullid:
325 339 return False
326 340
327 341 fulltext = self.revision(node)
328 342 m = revlog.parsemeta(fulltext)[0]
329 343
330 344 if m and 'copy' in m:
331 345 return m['copy'], bin(m['copyrev'])
332 346
333 347 return False
334 348
335 349 def cmp(self, node, text):
336 350 validatenode(node)
337 351
338 352 t = text
339 353
340 354 if text.startswith(b'\1\n'):
341 355 t = b'\1\n\1\n' + text
342 356
343 357 p1, p2 = self.parents(node)
344 358
345 359 if revlog.hash(t, p1, p2) == node:
346 360 return False
347 361
348 362 if self.iscensored(self.rev(node)):
349 363 return text != b''
350 364
351 365 if self.renamed(node):
352 366 t2 = self.read(node)
353 367 return t2 != text
354 368
355 369 return True
356 370
357 371 def size(self, rev):
358 372 validaterev(rev)
359 373
360 374 node = self._indexbyrev[rev][b'node']
361 375
362 376 if self.renamed(node):
363 377 return len(self.read(node))
364 378
365 379 if self.iscensored(rev):
366 380 return 0
367 381
368 382 return len(self.revision(node))
369 383
370 384 def iscensored(self, rev):
371 385 validaterev(rev)
372 386
373 387 return self.flags(rev) & revlog.REVIDX_ISCENSORED
374 388
375 389 def commonancestorsheads(self, a, b):
376 390 validatenode(a)
377 391 validatenode(b)
378 392
379 393 a = self.rev(a)
380 394 b = self.rev(b)
381 395
382 396 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
383 397 return pycompat.maplist(self.node, ancestors)
384 398
385 399 def descendants(self, revs):
386 400 # This is a copy of revlog.descendants()
387 401 first = min(revs)
388 402 if first == nullrev:
389 403 for i in self:
390 404 yield i
391 405 return
392 406
393 407 seen = set(revs)
394 408 for i in self.revs(start=first + 1):
395 409 for x in self.parentrevs(i):
396 410 if x != nullrev and x in seen:
397 411 seen.add(i)
398 412 yield i
399 413 break
400 414
401 415 # Required by verify.
402 416 def files(self):
403 417 entries = self._svfs.listdir(self._storepath)
404 418
405 419 # Strip out undo.backup.* files created as part of transaction
406 420 # recording.
407 421 entries = [f for f in entries if not f.startswith('undo.backup.')]
408 422
409 423 return [b'/'.join((self._storepath, f)) for f in entries]
410 424
411 425 # Required by verify.
412 426 def checksize(self):
413 427 return 0, 0
414 428
415 429 def add(self, text, meta, transaction, linkrev, p1, p2):
416 430 if meta or text.startswith(b'\1\n'):
417 431 text = revlog.packmeta(meta, text)
418 432
419 433 return self.addrevision(text, transaction, linkrev, p1, p2)
420 434
421 435 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
422 436 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
423 437 validatenode(p1)
424 438 validatenode(p2)
425 439
426 440 if flags:
427 441 node = node or revlog.hash(text, p1, p2)
428 442
429 443 rawtext, validatehash = self._processflags(text, flags, 'write')
430 444
431 445 node = node or revlog.hash(text, p1, p2)
432 446
433 447 if node in self._indexbynode:
434 448 return node
435 449
436 450 if validatehash:
437 451 self.checkhash(rawtext, node, p1=p1, p2=p2)
438 452
439 453 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
440 454 flags)
441 455
442 456 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
443 457 transaction.addbackup(self._indexpath)
444 458
445 459 path = b'/'.join([self._storepath, hex(node)])
446 460
447 461 self._svfs.write(path, rawtext)
448 462
449 463 self._indexdata.append({
450 464 b'node': node,
451 465 b'p1': p1,
452 466 b'p2': p2,
453 467 b'linkrev': link,
454 468 b'flags': flags,
455 469 })
456 470
457 471 self._reflectindexupdate()
458 472
459 473 return node
460 474
461 475 def _reflectindexupdate(self):
462 476 self._refreshindex()
463 477 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
464 478
465 479 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
466 480 nodes = []
467 481
468 482 transaction.addbackup(self._indexpath)
469 483
470 484 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
471 485 linkrev = linkmapper(linknode)
472 486 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
473 487
474 488 nodes.append(node)
475 489
476 490 if node in self._indexbynode:
477 491 continue
478 492
479 493 # Need to resolve the fulltext from the delta base.
480 494 if deltabase == nullid:
481 495 text = mdiff.patch(b'', delta)
482 496 else:
483 497 text = mdiff.patch(self.revision(deltabase), delta)
484 498
485 499 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
486 500 flags)
487 501
488 502 if addrevisioncb:
489 503 addrevisioncb(self, node)
490 504
491 505 return nodes
492 506
493 507 def revdiff(self, rev1, rev2):
494 508 validaterev(rev1)
495 509 validaterev(rev2)
496 510
497 511 node1 = self.node(rev1)
498 512 node2 = self.node(rev2)
499 513
500 514 return mdiff.textdiff(self.revision(node1, raw=True),
501 515 self.revision(node2, raw=True))
502 516
517 def emitrevisiondeltas(self, requests):
518 for request in requests:
519 node = request.node
520 rev = self.rev(node)
521
522 if request.basenode == nullid:
523 baserev = nullrev
524 elif request.basenode is not None:
525 baserev = self.rev(request.basenode)
526 else:
527 # This is a test extension and we can do simple things
528 # for choosing a delta parent.
529 baserev = self.deltaparent(rev)
530
531 if baserev != nullrev and not self.candelta(baserev, rev):
532 baserev = nullrev
533
534 revision = None
535 delta = None
536 baserevisionsize = None
537
538 if self.iscensored(baserev) or self.iscensored(rev):
539 try:
540 revision = self.revision(node, raw=True)
541 except error.CensoredNodeError as e:
542 revision = e.tombstone
543
544 if baserev != nullrev:
545 baserevisionsize = self.rawsize(baserev)
546
547 elif baserev == nullrev:
548 revision = self.revision(node, raw=True)
549 else:
550 delta = self.revdiff(baserev, rev)
551
552 extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
553
554 yield simplestorerevisiondelta(
555 node=node,
556 p1node=request.p1node,
557 p2node=request.p2node,
558 linknode=request.linknode,
559 basenode=self.node(baserev),
560 flags=self.flags(rev) | extraflags,
561 baserevisionsize=baserevisionsize,
562 revision=revision,
563 delta=delta)
564
503 565 def headrevs(self):
504 566 # Assume all revisions are heads by default.
505 567 revishead = {rev: True for rev in self._indexbyrev}
506 568
507 569 for rev, entry in self._indexbyrev.items():
508 570 # Unset head flag for all seen parents.
509 571 revishead[self.rev(entry[b'p1'])] = False
510 572 revishead[self.rev(entry[b'p2'])] = False
511 573
512 574 return [rev for rev, ishead in sorted(revishead.items())
513 575 if ishead]
514 576
515 577 def heads(self, start=None, stop=None):
516 578 # This is copied from revlog.py.
517 579 if start is None and stop is None:
518 580 if not len(self):
519 581 return [nullid]
520 582 return [self.node(r) for r in self.headrevs()]
521 583
522 584 if start is None:
523 585 start = nullid
524 586 if stop is None:
525 587 stop = []
526 588 stoprevs = set([self.rev(n) for n in stop])
527 589 startrev = self.rev(start)
528 590 reachable = {startrev}
529 591 heads = {startrev}
530 592
531 593 parentrevs = self.parentrevs
532 594 for r in self.revs(start=startrev + 1):
533 595 for p in parentrevs(r):
534 596 if p in reachable:
535 597 if r not in stoprevs:
536 598 reachable.add(r)
537 599 heads.add(r)
538 600 if p in heads and p not in stoprevs:
539 601 heads.remove(p)
540 602
541 603 return [self.node(r) for r in heads]
542 604
543 605 def children(self, node):
544 606 validatenode(node)
545 607
546 608 # This is a copy of revlog.children().
547 609 c = []
548 610 p = self.rev(node)
549 611 for r in self.revs(start=p + 1):
550 612 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
551 613 if prevs:
552 614 for pr in prevs:
553 615 if pr == p:
554 616 c.append(self.node(r))
555 617 elif p == nullrev:
556 618 c.append(self.node(r))
557 619 return c
558 620
559 621 def getstrippoint(self, minlink):
560 622
561 623 # This is largely a copy of revlog.getstrippoint().
562 624 brokenrevs = set()
563 625 strippoint = len(self)
564 626
565 627 heads = {}
566 628 futurelargelinkrevs = set()
567 629 for head in self.headrevs():
568 630 headlinkrev = self.linkrev(head)
569 631 heads[head] = headlinkrev
570 632 if headlinkrev >= minlink:
571 633 futurelargelinkrevs.add(headlinkrev)
572 634
573 635 # This algorithm involves walking down the rev graph, starting at the
574 636 # heads. Since the revs are topologically sorted according to linkrev,
575 637 # once all head linkrevs are below the minlink, we know there are
576 638 # no more revs that could have a linkrev greater than minlink.
577 639 # So we can stop walking.
578 640 while futurelargelinkrevs:
579 641 strippoint -= 1
580 642 linkrev = heads.pop(strippoint)
581 643
582 644 if linkrev < minlink:
583 645 brokenrevs.add(strippoint)
584 646 else:
585 647 futurelargelinkrevs.remove(linkrev)
586 648
587 649 for p in self.parentrevs(strippoint):
588 650 if p != nullrev:
589 651 plinkrev = self.linkrev(p)
590 652 heads[p] = plinkrev
591 653 if plinkrev >= minlink:
592 654 futurelargelinkrevs.add(plinkrev)
593 655
594 656 return strippoint, brokenrevs
595 657
596 658 def strip(self, minlink, transaction):
597 659 if not len(self):
598 660 return
599 661
600 662 rev, _ignored = self.getstrippoint(minlink)
601 663 if rev == len(self):
602 664 return
603 665
604 666 # Purge index data starting at the requested revision.
605 667 self._indexdata[rev:] = []
606 668 self._reflectindexupdate()
607 669
608 670 def issimplestorefile(f, kind, st):
609 671 if kind != stat.S_IFREG:
610 672 return False
611 673
612 674 if store.isrevlog(f, kind, st):
613 675 return False
614 676
615 677 # Ignore transaction undo files.
616 678 if f.startswith('undo.'):
617 679 return False
618 680
619 681 # Otherwise assume it belongs to the simple store.
620 682 return True
621 683
622 684 class simplestore(store.encodedstore):
623 685 def datafiles(self):
624 686 for x in super(simplestore, self).datafiles():
625 687 yield x
626 688
627 689 # Supplement with non-revlog files.
628 690 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
629 691
630 692 for unencoded, encoded, size in extrafiles:
631 693 try:
632 694 unencoded = store.decodefilename(unencoded)
633 695 except KeyError:
634 696 unencoded = None
635 697
636 698 yield unencoded, encoded, size
637 699
638 700 def reposetup(ui, repo):
639 701 if not repo.local():
640 702 return
641 703
642 704 if isinstance(repo, bundlerepo.bundlerepository):
643 705 raise error.Abort(_('cannot use simple store with bundlerepo'))
644 706
645 707 class simplestorerepo(repo.__class__):
646 708 def file(self, f):
647 709 return filestorage(self.svfs, f)
648 710
649 711 repo.__class__ = simplestorerepo
650 712
651 713 def featuresetup(ui, supported):
652 714 supported.add(REQUIREMENT)
653 715
654 716 def newreporequirements(orig, repo):
655 717 """Modifies default requirements for new repos to use the simple store."""
656 718 requirements = orig(repo)
657 719
658 720 # These requirements are only used to affect creation of the store
659 721 # object. We have our own store. So we can remove them.
660 722 # TODO do this once we feel like taking the test hit.
661 723 #if 'fncache' in requirements:
662 724 # requirements.remove('fncache')
663 725 #if 'dotencode' in requirements:
664 726 # requirements.remove('dotencode')
665 727
666 728 requirements.add(REQUIREMENT)
667 729
668 730 return requirements
669 731
670 732 def makestore(orig, requirements, path, vfstype):
671 733 if REQUIREMENT not in requirements:
672 734 return orig(requirements, path, vfstype)
673 735
674 736 return simplestore(path, vfstype)
675 737
676 738 def verifierinit(orig, self, *args, **kwargs):
677 739 orig(self, *args, **kwargs)
678 740
679 741 # We don't care that files in the store don't align with what is
680 742 # advertised. So suppress these warnings.
681 743 self.warnorphanstorefiles = False
682 744
683 745 def extsetup(ui):
684 746 localrepo.featuresetupfuncs.add(featuresetup)
685 747
686 748 extensions.wrapfunction(localrepo, 'newreporequirements',
687 749 newreporequirements)
688 750 extensions.wrapfunction(store, 'store', makestore)
689 751 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
@@ -1,225 +1,226 b''
1 1 # Test that certain objects conform to well-defined interfaces.
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 from mercurial import encoding
6 6 encoding.environ[b'HGREALINTERFACES'] = b'1'
7 7
8 8 import os
9 9 import subprocess
10 10 import sys
11 11
12 12 # Only run if tests are run in a repo
13 13 if subprocess.call(['python', '%s/hghave' % os.environ['TESTDIR'],
14 14 'test-repo']):
15 15 sys.exit(80)
16 16
17 17 from mercurial.thirdparty.zope import (
18 18 interface as zi,
19 19 )
20 20 from mercurial.thirdparty.zope.interface import (
21 21 verify as ziverify,
22 22 )
23 23 from mercurial import (
24 24 changegroup,
25 25 bundlerepo,
26 26 filelog,
27 27 httppeer,
28 28 localrepo,
29 29 manifest,
30 30 pycompat,
31 31 repository,
32 revlog,
32 33 sshpeer,
33 34 statichttprepo,
34 35 ui as uimod,
35 36 unionrepo,
36 37 vfs as vfsmod,
37 38 wireprotoserver,
38 39 wireprototypes,
39 40 wireprotov1peer,
40 41 wireprotov2server,
41 42 )
42 43
43 44 rootdir = pycompat.fsencode(
44 45 os.path.normpath(os.path.join(os.path.dirname(__file__), '..')))
45 46
46 47 def checkzobject(o, allowextra=False):
47 48 """Verify an object with a zope interface."""
48 49 ifaces = zi.providedBy(o)
49 50 if not ifaces:
50 51 print('%r does not provide any zope interfaces' % o)
51 52 return
52 53
53 54 # Run zope.interface's built-in verification routine. This verifies that
54 55 # everything that is supposed to be present is present.
55 56 for iface in ifaces:
56 57 ziverify.verifyObject(iface, o)
57 58
58 59 if allowextra:
59 60 return
60 61
61 62 # Now verify that the object provides no extra public attributes that
62 63 # aren't declared as part of interfaces.
63 64 allowed = set()
64 65 for iface in ifaces:
65 66 allowed |= set(iface.names(all=True))
66 67
67 68 public = {a for a in dir(o) if not a.startswith('_')}
68 69
69 70 for attr in sorted(public - allowed):
70 71 print('public attribute not declared in interfaces: %s.%s' % (
71 72 o.__class__.__name__, attr))
72 73
73 74 # Facilitates testing localpeer.
74 75 class dummyrepo(object):
75 76 def __init__(self):
76 77 self.ui = uimod.ui()
77 78 def filtered(self, name):
78 79 pass
79 80 def _restrictcapabilities(self, caps):
80 81 pass
81 82
82 83 class dummyopener(object):
83 84 handlers = []
84 85
85 86 # Facilitates testing sshpeer without requiring a server.
86 87 class badpeer(httppeer.httppeer):
87 88 def __init__(self):
88 89 super(badpeer, self).__init__(None, None, None, dummyopener(), None,
89 90 None)
90 91 self.badattribute = True
91 92
92 93 def badmethod(self):
93 94 pass
94 95
95 96 class dummypipe(object):
96 97 def close(self):
97 98 pass
98 99
99 100 def main():
100 101 ui = uimod.ui()
101 102 # Needed so we can open a local repo with obsstore without a warning.
102 103 ui.setconfig('experimental', 'evolution.createmarkers', True)
103 104
104 105 checkzobject(badpeer())
105 106
106 107 ziverify.verifyClass(repository.ipeerbase, httppeer.httppeer)
107 108 checkzobject(httppeer.httppeer(None, None, None, dummyopener(), None, None))
108 109
109 110 ziverify.verifyClass(repository.ipeerconnection,
110 111 httppeer.httpv2peer)
111 112 ziverify.verifyClass(repository.ipeercapabilities,
112 113 httppeer.httpv2peer)
113 114 checkzobject(httppeer.httpv2peer(None, b'', b'', None, None, None))
114 115
115 116 ziverify.verifyClass(repository.ipeerbase,
116 117 localrepo.localpeer)
117 118 checkzobject(localrepo.localpeer(dummyrepo()))
118 119
119 120 ziverify.verifyClass(repository.ipeercommandexecutor,
120 121 localrepo.localcommandexecutor)
121 122 checkzobject(localrepo.localcommandexecutor(None))
122 123
123 124 ziverify.verifyClass(repository.ipeercommandexecutor,
124 125 wireprotov1peer.peerexecutor)
125 126 checkzobject(wireprotov1peer.peerexecutor(None))
126 127
127 128 ziverify.verifyClass(repository.ipeerbase, sshpeer.sshv1peer)
128 129 checkzobject(sshpeer.sshv1peer(ui, b'ssh://localhost/foo', b'', dummypipe(),
129 130 dummypipe(), None, None))
130 131
131 132 ziverify.verifyClass(repository.ipeerbase, sshpeer.sshv2peer)
132 133 checkzobject(sshpeer.sshv2peer(ui, b'ssh://localhost/foo', b'', dummypipe(),
133 134 dummypipe(), None, None))
134 135
135 136 ziverify.verifyClass(repository.ipeerbase, bundlerepo.bundlepeer)
136 137 checkzobject(bundlerepo.bundlepeer(dummyrepo()))
137 138
138 139 ziverify.verifyClass(repository.ipeerbase, statichttprepo.statichttppeer)
139 140 checkzobject(statichttprepo.statichttppeer(dummyrepo()))
140 141
141 142 ziverify.verifyClass(repository.ipeerbase, unionrepo.unionpeer)
142 143 checkzobject(unionrepo.unionpeer(dummyrepo()))
143 144
144 145 ziverify.verifyClass(repository.completelocalrepository,
145 146 localrepo.localrepository)
146 147 repo = localrepo.localrepository(ui, rootdir)
147 148 checkzobject(repo)
148 149
149 150 ziverify.verifyClass(wireprototypes.baseprotocolhandler,
150 151 wireprotoserver.sshv1protocolhandler)
151 152 ziverify.verifyClass(wireprototypes.baseprotocolhandler,
152 153 wireprotoserver.sshv2protocolhandler)
153 154 ziverify.verifyClass(wireprototypes.baseprotocolhandler,
154 155 wireprotoserver.httpv1protocolhandler)
155 156 ziverify.verifyClass(wireprototypes.baseprotocolhandler,
156 157 wireprotov2server.httpv2protocolhandler)
157 158
158 159 sshv1 = wireprotoserver.sshv1protocolhandler(None, None, None)
159 160 checkzobject(sshv1)
160 161 sshv2 = wireprotoserver.sshv2protocolhandler(None, None, None)
161 162 checkzobject(sshv2)
162 163
163 164 httpv1 = wireprotoserver.httpv1protocolhandler(None, None, None)
164 165 checkzobject(httpv1)
165 166 httpv2 = wireprotov2server.httpv2protocolhandler(None, None)
166 167 checkzobject(httpv2)
167 168
168 169 ziverify.verifyClass(repository.ifilestorage, filelog.filelog)
169 170 ziverify.verifyClass(repository.imanifestdict, manifest.manifestdict)
170 171 ziverify.verifyClass(repository.imanifestrevisionstored,
171 172 manifest.manifestctx)
172 173 ziverify.verifyClass(repository.imanifestrevisionwritable,
173 174 manifest.memmanifestctx)
174 175 ziverify.verifyClass(repository.imanifestrevisionstored,
175 176 manifest.treemanifestctx)
176 177 ziverify.verifyClass(repository.imanifestrevisionwritable,
177 178 manifest.memtreemanifestctx)
178 179 ziverify.verifyClass(repository.imanifestlog, manifest.manifestlog)
179 180
180 181 vfs = vfsmod.vfs(b'.')
181 182 fl = filelog.filelog(vfs, b'dummy.i')
182 183 checkzobject(fl, allowextra=True)
183 184
184 185 # Conforms to imanifestlog.
185 186 ml = manifest.manifestlog(vfs, repo)
186 187 checkzobject(ml)
187 188 checkzobject(repo.manifestlog)
188 189
189 190 # Conforms to imanifestrevision.
190 191 mctx = ml[repo[0].manifestnode()]
191 192 checkzobject(mctx)
192 193
193 194 # Conforms to imanifestrevisionwritable.
194 195 checkzobject(mctx.new())
195 196 checkzobject(mctx.copy())
196 197
197 198 # Conforms to imanifestdict.
198 199 checkzobject(mctx.read())
199 200
200 201 ziverify.verifyClass(repository.irevisiondelta,
201 changegroup.revisiondelta)
202 revlog.revlogrevisiondelta)
202 203 ziverify.verifyClass(repository.irevisiondeltarequest,
203 204 changegroup.revisiondeltarequest)
204 205
205 rd = changegroup.revisiondelta(
206 rd = revlog.revlogrevisiondelta(
206 207 node=b'',
207 208 p1node=b'',
208 209 p2node=b'',
209 210 basenode=b'',
210 211 linknode=b'',
211 212 flags=b'',
212 213 baserevisionsize=None,
213 214 revision=b'',
214 215 delta=None)
215 216 checkzobject(rd)
216 217
217 218 rdr = changegroup.revisiondeltarequest(
218 219 node=b'',
219 220 linknode=b'',
220 221 p1node=b'',
221 222 p2node=b'',
222 223 basenode=b'')
223 224 checkzobject(rdr)
224 225
225 226 main()
General Comments 0
You need to be logged in to leave comments. Login now