##// END OF EJS Templates
changegroup: make delta header struct formatters actual structs...
Gregory Szorc -
r38932:271854ad default
parent child Browse files
Show More
@@ -1,1377 +1,1376 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
44 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
45 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 deltaheadersize = struct.calcsize(deltaheader)
122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 header = struct.unpack(self.deltaheader, headerdata)
195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 deltaheadersize = struct.calcsize(deltaheader)
454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 deltaheadersize = struct.calcsize(deltaheader)
470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cg1packer(object):
523 523 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
524 524
525 525 def __init__(self, repo, filematcher, version, bundlecaps=None):
526 526 """Given a source repo, construct a bundler.
527 527
528 528 filematcher is a matcher that matches on files to include in the
529 529 changegroup. Used to facilitate sparse changegroups.
530 530
531 531 bundlecaps is optional and can be used to specify the set of
532 532 capabilities which can be used to build the bundle. While bundlecaps is
533 533 unused in core Mercurial, extensions rely on this feature to communicate
534 534 capabilities to customize the changegroup packer.
535 535 """
536 536 assert filematcher
537 537 self._filematcher = filematcher
538 538
539 539 self.version = version
540 540
541 541 # Set of capabilities we can use to build the bundle.
542 542 if bundlecaps is None:
543 543 bundlecaps = set()
544 544 self._bundlecaps = bundlecaps
545 545 # experimental config: bundle.reorder
546 546 reorder = repo.ui.config('bundle', 'reorder')
547 547 if reorder == 'auto':
548 548 reorder = None
549 549 else:
550 550 reorder = stringutil.parsebool(reorder)
551 551 self._repo = repo
552 552 self._reorder = reorder
553 553 if self._repo.ui.verbose and not self._repo.ui.debugflag:
554 554 self._verbosenote = self._repo.ui.note
555 555 else:
556 556 self._verbosenote = lambda s: None
557 557
558 558 def close(self):
559 559 # Ellipses serving mode.
560 560 getattr(self, 'clrev_to_localrev', {}).clear()
561 561 if getattr(self, 'next_clrev_to_localrev', {}):
562 562 self.clrev_to_localrev = self.next_clrev_to_localrev
563 563 del self.next_clrev_to_localrev
564 564 self.changelog_done = True
565 565
566 566 return closechunk()
567 567
568 568 def fileheader(self, fname):
569 569 return chunkheader(len(fname)) + fname
570 570
571 571 # Extracted both for clarity and for overriding in extensions.
572 572 def _sortgroup(self, store, nodelist, lookup):
573 573 """Sort nodes for change group and turn them into revnums."""
574 574 # Ellipses serving mode.
575 575 #
576 576 # In a perfect world, we'd generate better ellipsis-ified graphs
577 577 # for non-changelog revlogs. In practice, we haven't started doing
578 578 # that yet, so the resulting DAGs for the manifestlog and filelogs
579 579 # are actually full of bogus parentage on all the ellipsis
580 580 # nodes. This has the side effect that, while the contents are
581 581 # correct, the individual DAGs might be completely out of whack in
582 582 # a case like 882681bc3166 and its ancestors (back about 10
583 583 # revisions or so) in the main hg repo.
584 584 #
585 585 # The one invariant we *know* holds is that the new (potentially
586 586 # bogus) DAG shape will be valid if we order the nodes in the
587 587 # order that they're introduced in dramatis personae by the
588 588 # changelog, so what we do is we sort the non-changelog histories
589 589 # by the order in which they are used by the changelog.
590 590 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
591 591 key = lambda n: self.clnode_to_rev[lookup(n)]
592 592 return [store.rev(n) for n in sorted(nodelist, key=key)]
593 593
594 594 # for generaldelta revlogs, we linearize the revs; this will both be
595 595 # much quicker and generate a much smaller bundle
596 596 if (store._generaldelta and self._reorder is None) or self._reorder:
597 597 dag = dagutil.revlogdag(store)
598 598 return dag.linearize(set(store.rev(n) for n in nodelist))
599 599 else:
600 600 return sorted([store.rev(n) for n in nodelist])
601 601
602 602 def group(self, nodelist, store, lookup, units=None):
603 603 """Calculate a delta group, yielding a sequence of changegroup chunks
604 604 (strings).
605 605
606 606 Given a list of changeset revs, return a set of deltas and
607 607 metadata corresponding to nodes. The first delta is
608 608 first parent(nodelist[0]) -> nodelist[0], the receiver is
609 609 guaranteed to have this parent as it has all history before
610 610 these changesets. In the case firstparent is nullrev the
611 611 changegroup starts with a full revision.
612 612
613 613 If units is not None, progress detail will be generated, units specifies
614 614 the type of revlog that is touched (changelog, manifest, etc.).
615 615 """
616 616 # if we don't have any revisions touched by these changesets, bail
617 617 if len(nodelist) == 0:
618 618 yield self.close()
619 619 return
620 620
621 621 revs = self._sortgroup(store, nodelist, lookup)
622 622
623 623 # add the parent of the first rev
624 624 p = store.parentrevs(revs[0])[0]
625 625 revs.insert(0, p)
626 626
627 627 # build deltas
628 628 progress = None
629 629 if units is not None:
630 630 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
631 631 total=(len(revs) - 1))
632 632 for r in pycompat.xrange(len(revs) - 1):
633 633 if progress:
634 634 progress.update(r + 1)
635 635 prev, curr = revs[r], revs[r + 1]
636 636 linknode = lookup(store.node(curr))
637 637 for c in self.revchunk(store, curr, prev, linknode):
638 638 yield c
639 639
640 640 if progress:
641 641 progress.complete()
642 642 yield self.close()
643 643
644 644 # filter any nodes that claim to be part of the known set
645 645 def prune(self, store, missing, commonrevs):
646 646 # TODO this violates storage abstraction for manifests.
647 647 if isinstance(store, manifest.manifestrevlog):
648 648 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
649 649 return []
650 650
651 651 rr, rl = store.rev, store.linkrev
652 652 return [n for n in missing if rl(rr(n)) not in commonrevs]
653 653
654 654 def _packmanifests(self, dir, mfnodes, lookuplinknode):
655 655 """Pack flat manifests into a changegroup stream."""
656 656 assert not dir
657 657 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
658 658 lookuplinknode, units=_('manifests')):
659 659 yield chunk
660 660
661 661 def _manifestsdone(self):
662 662 return ''
663 663
664 664 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
665 665 '''yield a sequence of changegroup chunks (strings)'''
666 666 repo = self._repo
667 667 cl = repo.changelog
668 668
669 669 clrevorder = {}
670 670 mfs = {} # needed manifests
671 671 fnodes = {} # needed file nodes
672 672 mfl = repo.manifestlog
673 673 # TODO violates storage abstraction.
674 674 mfrevlog = mfl._revlog
675 675 changedfiles = set()
676 676
677 677 ellipsesmode = util.safehasattr(self, 'full_nodes')
678 678
679 679 # Callback for the changelog, used to collect changed files and
680 680 # manifest nodes.
681 681 # Returns the linkrev node (identity in the changelog case).
682 682 def lookupcl(x):
683 683 c = cl.read(x)
684 684 clrevorder[x] = len(clrevorder)
685 685
686 686 if ellipsesmode:
687 687 # Only update mfs if x is going to be sent. Otherwise we
688 688 # end up with bogus linkrevs specified for manifests and
689 689 # we skip some manifest nodes that we should otherwise
690 690 # have sent.
691 691 if (x in self.full_nodes
692 692 or cl.rev(x) in self.precomputed_ellipsis):
693 693 n = c[0]
694 694 # Record the first changeset introducing this manifest
695 695 # version.
696 696 mfs.setdefault(n, x)
697 697 # Set this narrow-specific dict so we have the lowest
698 698 # manifest revnum to look up for this cl revnum. (Part of
699 699 # mapping changelog ellipsis parents to manifest ellipsis
700 700 # parents)
701 701 self.next_clrev_to_localrev.setdefault(cl.rev(x),
702 702 mfrevlog.rev(n))
703 703 # We can't trust the changed files list in the changeset if the
704 704 # client requested a shallow clone.
705 705 if self.is_shallow:
706 706 changedfiles.update(mfl[c[0]].read().keys())
707 707 else:
708 708 changedfiles.update(c[3])
709 709 else:
710 710
711 711 n = c[0]
712 712 # record the first changeset introducing this manifest version
713 713 mfs.setdefault(n, x)
714 714 # Record a complete list of potentially-changed files in
715 715 # this manifest.
716 716 changedfiles.update(c[3])
717 717
718 718 return x
719 719
720 720 self._verbosenote(_('uncompressed size of bundle content:\n'))
721 721 size = 0
722 722 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
723 723 size += len(chunk)
724 724 yield chunk
725 725 self._verbosenote(_('%8.i (changelog)\n') % size)
726 726
727 727 # We need to make sure that the linkrev in the changegroup refers to
728 728 # the first changeset that introduced the manifest or file revision.
729 729 # The fastpath is usually safer than the slowpath, because the filelogs
730 730 # are walked in revlog order.
731 731 #
732 732 # When taking the slowpath with reorder=None and the manifest revlog
733 733 # uses generaldelta, the manifest may be walked in the "wrong" order.
734 734 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
735 735 # cc0ff93d0c0c).
736 736 #
737 737 # When taking the fastpath, we are only vulnerable to reordering
738 738 # of the changelog itself. The changelog never uses generaldelta, so
739 739 # it is only reordered when reorder=True. To handle this case, we
740 740 # simply take the slowpath, which already has the 'clrevorder' logic.
741 741 # This was also fixed in cc0ff93d0c0c.
742 742 fastpathlinkrev = fastpathlinkrev and not self._reorder
743 743 # Treemanifests don't work correctly with fastpathlinkrev
744 744 # either, because we don't discover which directory nodes to
745 745 # send along with files. This could probably be fixed.
746 746 fastpathlinkrev = fastpathlinkrev and (
747 747 'treemanifest' not in repo.requirements)
748 748
749 749 for chunk in self.generatemanifests(commonrevs, clrevorder,
750 750 fastpathlinkrev, mfs, fnodes, source):
751 751 yield chunk
752 752
753 753 if ellipsesmode:
754 754 mfdicts = None
755 755 if self.is_shallow:
756 756 mfdicts = [(self._repo.manifestlog[n].read(), lr)
757 757 for (n, lr) in mfs.iteritems()]
758 758
759 759 mfs.clear()
760 760 clrevs = set(cl.rev(x) for x in clnodes)
761 761
762 762 if not fastpathlinkrev:
763 763 def linknodes(unused, fname):
764 764 return fnodes.get(fname, {})
765 765 else:
766 766 cln = cl.node
767 767 def linknodes(filerevlog, fname):
768 768 llr = filerevlog.linkrev
769 769 fln = filerevlog.node
770 770 revs = ((r, llr(r)) for r in filerevlog)
771 771 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
772 772
773 773 if ellipsesmode:
774 774 # We need to pass the mfdicts variable down into
775 775 # generatefiles(), but more than one command might have
776 776 # wrapped generatefiles so we can't modify the function
777 777 # signature. Instead, we pass the data to ourselves using an
778 778 # instance attribute. I'm sorry.
779 779 self._mfdicts = mfdicts
780 780
781 781 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
782 782 source):
783 783 yield chunk
784 784
785 785 yield self.close()
786 786
787 787 if clnodes:
788 788 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
789 789
790 790 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
791 791 fnodes, source):
792 792 """Returns an iterator of changegroup chunks containing manifests.
793 793
794 794 `source` is unused here, but is used by extensions like remotefilelog to
795 795 change what is sent based in pulls vs pushes, etc.
796 796 """
797 797 repo = self._repo
798 798 mfl = repo.manifestlog
799 799 dirlog = mfl._revlog.dirlog
800 800 tmfnodes = {'': mfs}
801 801
802 802 # Callback for the manifest, used to collect linkrevs for filelog
803 803 # revisions.
804 804 # Returns the linkrev node (collected in lookupcl).
805 805 def makelookupmflinknode(dir, nodes):
806 806 if fastpathlinkrev:
807 807 assert not dir
808 808 return mfs.__getitem__
809 809
810 810 def lookupmflinknode(x):
811 811 """Callback for looking up the linknode for manifests.
812 812
813 813 Returns the linkrev node for the specified manifest.
814 814
815 815 SIDE EFFECT:
816 816
817 817 1) fclnodes gets populated with the list of relevant
818 818 file nodes if we're not using fastpathlinkrev
819 819 2) When treemanifests are in use, collects treemanifest nodes
820 820 to send
821 821
822 822 Note that this means manifests must be completely sent to
823 823 the client before you can trust the list of files and
824 824 treemanifests to send.
825 825 """
826 826 clnode = nodes[x]
827 827 mdata = mfl.get(dir, x).readfast(shallow=True)
828 828 for p, n, fl in mdata.iterentries():
829 829 if fl == 't': # subdirectory manifest
830 830 subdir = dir + p + '/'
831 831 tmfclnodes = tmfnodes.setdefault(subdir, {})
832 832 tmfclnode = tmfclnodes.setdefault(n, clnode)
833 833 if clrevorder[clnode] < clrevorder[tmfclnode]:
834 834 tmfclnodes[n] = clnode
835 835 else:
836 836 f = dir + p
837 837 fclnodes = fnodes.setdefault(f, {})
838 838 fclnode = fclnodes.setdefault(n, clnode)
839 839 if clrevorder[clnode] < clrevorder[fclnode]:
840 840 fclnodes[n] = clnode
841 841 return clnode
842 842 return lookupmflinknode
843 843
844 844 size = 0
845 845 while tmfnodes:
846 846 dir, nodes = tmfnodes.popitem()
847 847 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
848 848 if not dir or prunednodes:
849 849 for x in self._packmanifests(dir, prunednodes,
850 850 makelookupmflinknode(dir, nodes)):
851 851 size += len(x)
852 852 yield x
853 853 self._verbosenote(_('%8.i (manifests)\n') % size)
854 854 yield self._manifestsdone()
855 855
856 856 # The 'source' parameter is useful for extensions
857 857 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
858 858 changedfiles = list(filter(self._filematcher, changedfiles))
859 859
860 860 if getattr(self, 'is_shallow', False):
861 861 # See comment in generate() for why this sadness is a thing.
862 862 mfdicts = self._mfdicts
863 863 del self._mfdicts
864 864 # In a shallow clone, the linknodes callback needs to also include
865 865 # those file nodes that are in the manifests we sent but weren't
866 866 # introduced by those manifests.
867 867 commonctxs = [self._repo[c] for c in commonrevs]
868 868 oldlinknodes = linknodes
869 869 clrev = self._repo.changelog.rev
870 870
871 871 # Defining this function has a side-effect of overriding the
872 872 # function of the same name that was passed in as an argument.
873 873 # TODO have caller pass in appropriate function.
874 874 def linknodes(flog, fname):
875 875 for c in commonctxs:
876 876 try:
877 877 fnode = c.filenode(fname)
878 878 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
879 879 except error.ManifestLookupError:
880 880 pass
881 881 links = oldlinknodes(flog, fname)
882 882 if len(links) != len(mfdicts):
883 883 for mf, lr in mfdicts:
884 884 fnode = mf.get(fname, None)
885 885 if fnode in links:
886 886 links[fnode] = min(links[fnode], lr, key=clrev)
887 887 elif fnode:
888 888 links[fnode] = lr
889 889 return links
890 890
891 891 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
892 892
893 893 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
894 894 repo = self._repo
895 895 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
896 896 total=len(changedfiles))
897 897 for i, fname in enumerate(sorted(changedfiles)):
898 898 filerevlog = repo.file(fname)
899 899 if not filerevlog:
900 900 raise error.Abort(_("empty or missing file data for %s") %
901 901 fname)
902 902
903 903 linkrevnodes = linknodes(filerevlog, fname)
904 904 # Lookup for filenodes, we collected the linkrev nodes above in the
905 905 # fastpath case and with lookupmf in the slowpath case.
906 906 def lookupfilelog(x):
907 907 return linkrevnodes[x]
908 908
909 909 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
910 910 if filenodes:
911 911 progress.update(i + 1, item=fname)
912 912 h = self.fileheader(fname)
913 913 size = len(h)
914 914 yield h
915 915 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
916 916 size += len(chunk)
917 917 yield chunk
918 918 self._verbosenote(_('%8.i %s\n') % (size, fname))
919 919 progress.complete()
920 920
921 921 def deltaparent(self, store, rev, p1, p2, prev):
922 922 if not store.candelta(prev, rev):
923 923 raise error.ProgrammingError('cg1 should not be used in this case')
924 924 return prev
925 925
926 926 def revchunk(self, store, rev, prev, linknode):
927 927 if util.safehasattr(self, 'full_nodes'):
928 928 fn = self._revisiondeltanarrow
929 929 else:
930 930 fn = self._revisiondeltanormal
931 931
932 932 delta = fn(store, rev, prev, linknode)
933 933 if not delta:
934 934 return
935 935
936 936 meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
937 937 delta.basenode, delta.linknode,
938 938 delta.flags)
939 939 l = len(meta) + sum(len(x) for x in delta.deltachunks)
940 940
941 941 yield chunkheader(l)
942 942 yield meta
943 943 for x in delta.deltachunks:
944 944 yield x
945 945
946 946 def _revisiondeltanormal(self, store, rev, prev, linknode):
947 947 node = store.node(rev)
948 948 p1, p2 = store.parentrevs(rev)
949 949 base = self.deltaparent(store, rev, p1, p2, prev)
950 950
951 951 prefix = ''
952 952 if store.iscensored(base) or store.iscensored(rev):
953 953 try:
954 954 delta = store.revision(node, raw=True)
955 955 except error.CensoredNodeError as e:
956 956 delta = e.tombstone
957 957 if base == nullrev:
958 958 prefix = mdiff.trivialdiffheader(len(delta))
959 959 else:
960 960 baselen = store.rawsize(base)
961 961 prefix = mdiff.replacediffheader(baselen, len(delta))
962 962 elif base == nullrev:
963 963 delta = store.revision(node, raw=True)
964 964 prefix = mdiff.trivialdiffheader(len(delta))
965 965 else:
966 966 delta = store.revdiff(base, rev)
967 967 p1n, p2n = store.parents(node)
968 968
969 969 return revisiondelta(
970 970 node=node,
971 971 p1node=p1n,
972 972 p2node=p2n,
973 973 basenode=store.node(base),
974 974 linknode=linknode,
975 975 flags=store.flags(rev),
976 976 deltachunks=(prefix, delta),
977 977 )
978 978
979 979 def _revisiondeltanarrow(self, store, rev, prev, linknode):
980 980 # build up some mapping information that's useful later. See
981 981 # the local() nested function below.
982 982 if not self.changelog_done:
983 983 self.clnode_to_rev[linknode] = rev
984 984 linkrev = rev
985 985 self.clrev_to_localrev[linkrev] = rev
986 986 else:
987 987 linkrev = self.clnode_to_rev[linknode]
988 988 self.clrev_to_localrev[linkrev] = rev
989 989
990 990 # This is a node to send in full, because the changeset it
991 991 # corresponds to was a full changeset.
992 992 if linknode in self.full_nodes:
993 993 return self._revisiondeltanormal(store, rev, prev, linknode)
994 994
995 995 # At this point, a node can either be one we should skip or an
996 996 # ellipsis. If it's not an ellipsis, bail immediately.
997 997 if linkrev not in self.precomputed_ellipsis:
998 998 return
999 999
1000 1000 linkparents = self.precomputed_ellipsis[linkrev]
1001 1001 def local(clrev):
1002 1002 """Turn a changelog revnum into a local revnum.
1003 1003
1004 1004 The ellipsis dag is stored as revnums on the changelog,
1005 1005 but when we're producing ellipsis entries for
1006 1006 non-changelog revlogs, we need to turn those numbers into
1007 1007 something local. This does that for us, and during the
1008 1008 changelog sending phase will also expand the stored
1009 1009 mappings as needed.
1010 1010 """
1011 1011 if clrev == nullrev:
1012 1012 return nullrev
1013 1013
1014 1014 if not self.changelog_done:
1015 1015 # If we're doing the changelog, it's possible that we
1016 1016 # have a parent that is already on the client, and we
1017 1017 # need to store some extra mapping information so that
1018 1018 # our contained ellipsis nodes will be able to resolve
1019 1019 # their parents.
1020 1020 if clrev not in self.clrev_to_localrev:
1021 1021 clnode = store.node(clrev)
1022 1022 self.clnode_to_rev[clnode] = clrev
1023 1023 return clrev
1024 1024
1025 1025 # Walk the ellipsis-ized changelog breadth-first looking for a
1026 1026 # change that has been linked from the current revlog.
1027 1027 #
1028 1028 # For a flat manifest revlog only a single step should be necessary
1029 1029 # as all relevant changelog entries are relevant to the flat
1030 1030 # manifest.
1031 1031 #
1032 1032 # For a filelog or tree manifest dirlog however not every changelog
1033 1033 # entry will have been relevant, so we need to skip some changelog
1034 1034 # nodes even after ellipsis-izing.
1035 1035 walk = [clrev]
1036 1036 while walk:
1037 1037 p = walk[0]
1038 1038 walk = walk[1:]
1039 1039 if p in self.clrev_to_localrev:
1040 1040 return self.clrev_to_localrev[p]
1041 1041 elif p in self.full_nodes:
1042 1042 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1043 1043 if pp != nullrev])
1044 1044 elif p in self.precomputed_ellipsis:
1045 1045 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1046 1046 if pp != nullrev])
1047 1047 else:
1048 1048 # In this case, we've got an ellipsis with parents
1049 1049 # outside the current bundle (likely an
1050 1050 # incremental pull). We "know" that we can use the
1051 1051 # value of this same revlog at whatever revision
1052 1052 # is pointed to by linknode. "Know" is in scare
1053 1053 # quotes because I haven't done enough examination
1054 1054 # of edge cases to convince myself this is really
1055 1055 # a fact - it works for all the (admittedly
1056 1056 # thorough) cases in our testsuite, but I would be
1057 1057 # somewhat unsurprised to find a case in the wild
1058 1058 # where this breaks down a bit. That said, I don't
1059 1059 # know if it would hurt anything.
1060 1060 for i in pycompat.xrange(rev, 0, -1):
1061 1061 if store.linkrev(i) == clrev:
1062 1062 return i
1063 1063 # We failed to resolve a parent for this node, so
1064 1064 # we crash the changegroup construction.
1065 1065 raise error.Abort(
1066 1066 'unable to resolve parent while packing %r %r'
1067 1067 ' for changeset %r' % (store.indexfile, rev, clrev))
1068 1068
1069 1069 return nullrev
1070 1070
1071 1071 if not linkparents or (
1072 1072 store.parentrevs(rev) == (nullrev, nullrev)):
1073 1073 p1, p2 = nullrev, nullrev
1074 1074 elif len(linkparents) == 1:
1075 1075 p1, = sorted(local(p) for p in linkparents)
1076 1076 p2 = nullrev
1077 1077 else:
1078 1078 p1, p2 = sorted(local(p) for p in linkparents)
1079 1079
1080 1080 n = store.node(rev)
1081 1081 p1n, p2n = store.node(p1), store.node(p2)
1082 1082 flags = store.flags(rev)
1083 1083 flags |= revlog.REVIDX_ELLIPSIS
1084 1084
1085 1085 # TODO: try and actually send deltas for ellipsis data blocks
1086 1086 data = store.revision(n)
1087 1087 diffheader = mdiff.trivialdiffheader(len(data))
1088 1088
1089 1089 return revisiondelta(
1090 1090 node=n,
1091 1091 p1node=p1n,
1092 1092 p2node=p2n,
1093 1093 basenode=nullid,
1094 1094 linknode=linknode,
1095 1095 flags=flags,
1096 1096 deltachunks=(diffheader, data),
1097 1097 )
1098 1098
1099 1099 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1100 1100 # do nothing with basenode, it is implicitly the previous one in HG10
1101 1101 # do nothing with flags, it is implicitly 0 for cg1 and cg2
1102 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
1102 return self.deltaheader.pack(node, p1n, p2n, linknode)
1103 1103
1104 1104 class cg2packer(cg1packer):
1105 1105 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
1106 1106
1107 1107 def __init__(self, repo, filematcher, version, bundlecaps=None):
1108 1108 super(cg2packer, self).__init__(repo, filematcher, version,
1109 1109 bundlecaps=bundlecaps)
1110 1110
1111 1111 if self._reorder is None:
1112 1112 # Since generaldelta is directly supported by cg2, reordering
1113 1113 # generally doesn't help, so we disable it by default (treating
1114 1114 # bundle.reorder=auto just like bundle.reorder=False).
1115 1115 self._reorder = False
1116 1116
1117 1117 def deltaparent(self, store, rev, p1, p2, prev):
1118 1118 # Narrow ellipses mode.
1119 1119 if util.safehasattr(self, 'full_nodes'):
1120 1120 # TODO: send better deltas when in narrow mode.
1121 1121 #
1122 1122 # changegroup.group() loops over revisions to send,
1123 1123 # including revisions we'll skip. What this means is that
1124 1124 # `prev` will be a potentially useless delta base for all
1125 1125 # ellipsis nodes, as the client likely won't have it. In
1126 1126 # the future we should do bookkeeping about which nodes
1127 1127 # have been sent to the client, and try to be
1128 1128 # significantly smarter about delta bases. This is
1129 1129 # slightly tricky because this same code has to work for
1130 1130 # all revlogs, and we don't have the linkrev/linknode here.
1131 1131 return p1
1132 1132
1133 1133 dp = store.deltaparent(rev)
1134 1134 if dp == nullrev and store.storedeltachains:
1135 1135 # Avoid sending full revisions when delta parent is null. Pick prev
1136 1136 # in that case. It's tempting to pick p1 in this case, as p1 will
1137 1137 # be smaller in the common case. However, computing a delta against
1138 1138 # p1 may require resolving the raw text of p1, which could be
1139 1139 # expensive. The revlog caches should have prev cached, meaning
1140 1140 # less CPU for changegroup generation. There is likely room to add
1141 1141 # a flag and/or config option to control this behavior.
1142 1142 base = prev
1143 1143 elif dp == nullrev:
1144 1144 # revlog is configured to use full snapshot for a reason,
1145 1145 # stick to full snapshot.
1146 1146 base = nullrev
1147 1147 elif dp not in (p1, p2, prev):
1148 1148 # Pick prev when we can't be sure remote has the base revision.
1149 1149 return prev
1150 1150 else:
1151 1151 base = dp
1152 1152 if base != nullrev and not store.candelta(base, rev):
1153 1153 base = nullrev
1154 1154 return base
1155 1155
1156 1156 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1157 1157 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
1158 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
1158 return self.deltaheader.pack(node, p1n, p2n, basenode, linknode)
1159 1159
1160 1160 class cg3packer(cg2packer):
1161 1161 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
1162 1162
1163 1163 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1164 1164 if dir:
1165 1165 yield self.fileheader(dir)
1166 1166
1167 1167 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1168 1168 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1169 1169 units=_('manifests')):
1170 1170 yield chunk
1171 1171
1172 1172 def _manifestsdone(self):
1173 1173 return self.close()
1174 1174
1175 1175 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1176 return struct.pack(
1177 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
1176 return self.deltaheader.pack(node, p1n, p2n, basenode, linknode, flags)
1178 1177
1179 1178 def _makecg1packer(repo, filematcher, bundlecaps):
1180 1179 return cg1packer(repo, filematcher, b'01', bundlecaps=bundlecaps)
1181 1180
1182 1181 def _makecg2packer(repo, filematcher, bundlecaps):
1183 1182 return cg2packer(repo, filematcher, b'02', bundlecaps=bundlecaps)
1184 1183
1185 1184 def _makecg3packer(repo, filematcher, bundlecaps):
1186 1185 return cg3packer(repo, filematcher, b'03', bundlecaps=bundlecaps)
1187 1186
1188 1187 _packermap = {'01': (_makecg1packer, cg1unpacker),
1189 1188 # cg2 adds support for exchanging generaldelta
1190 1189 '02': (_makecg2packer, cg2unpacker),
1191 1190 # cg3 adds support for exchanging revlog flags and treemanifests
1192 1191 '03': (_makecg3packer, cg3unpacker),
1193 1192 }
1194 1193
1195 1194 def allsupportedversions(repo):
1196 1195 versions = set(_packermap.keys())
1197 1196 if not (repo.ui.configbool('experimental', 'changegroup3') or
1198 1197 repo.ui.configbool('experimental', 'treemanifest') or
1199 1198 'treemanifest' in repo.requirements):
1200 1199 versions.discard('03')
1201 1200 return versions
1202 1201
1203 1202 # Changegroup versions that can be applied to the repo
1204 1203 def supportedincomingversions(repo):
1205 1204 return allsupportedversions(repo)
1206 1205
1207 1206 # Changegroup versions that can be created from the repo
1208 1207 def supportedoutgoingversions(repo):
1209 1208 versions = allsupportedversions(repo)
1210 1209 if 'treemanifest' in repo.requirements:
1211 1210 # Versions 01 and 02 support only flat manifests and it's just too
1212 1211 # expensive to convert between the flat manifest and tree manifest on
1213 1212 # the fly. Since tree manifests are hashed differently, all of history
1214 1213 # would have to be converted. Instead, we simply don't even pretend to
1215 1214 # support versions 01 and 02.
1216 1215 versions.discard('01')
1217 1216 versions.discard('02')
1218 1217 if repository.NARROW_REQUIREMENT in repo.requirements:
1219 1218 # Versions 01 and 02 don't support revlog flags, and we need to
1220 1219 # support that for stripping and unbundling to work.
1221 1220 versions.discard('01')
1222 1221 versions.discard('02')
1223 1222 if LFS_REQUIREMENT in repo.requirements:
1224 1223 # Versions 01 and 02 don't support revlog flags, and we need to
1225 1224 # mark LFS entries with REVIDX_EXTSTORED.
1226 1225 versions.discard('01')
1227 1226 versions.discard('02')
1228 1227
1229 1228 return versions
1230 1229
1231 1230 def localversion(repo):
1232 1231 # Finds the best version to use for bundles that are meant to be used
1233 1232 # locally, such as those from strip and shelve, and temporary bundles.
1234 1233 return max(supportedoutgoingversions(repo))
1235 1234
1236 1235 def safeversion(repo):
1237 1236 # Finds the smallest version that it's safe to assume clients of the repo
1238 1237 # will support. For example, all hg versions that support generaldelta also
1239 1238 # support changegroup 02.
1240 1239 versions = supportedoutgoingversions(repo)
1241 1240 if 'generaldelta' in repo.requirements:
1242 1241 versions.discard('01')
1243 1242 assert versions
1244 1243 return min(versions)
1245 1244
1246 1245 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1247 1246 assert version in supportedoutgoingversions(repo)
1248 1247
1249 1248 if filematcher is None:
1250 1249 filematcher = matchmod.alwaysmatcher(repo.root, '')
1251 1250
1252 1251 if version == '01' and not filematcher.always():
1253 1252 raise error.ProgrammingError('version 01 changegroups do not support '
1254 1253 'sparse file matchers')
1255 1254
1256 1255 # Requested files could include files not in the local store. So
1257 1256 # filter those out.
1258 1257 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1259 1258 filematcher)
1260 1259
1261 1260 fn = _packermap[version][0]
1262 1261 return fn(repo, filematcher, bundlecaps)
1263 1262
1264 1263 def getunbundler(version, fh, alg, extras=None):
1265 1264 return _packermap[version][1](fh, alg, extras=extras)
1266 1265
1267 1266 def _changegroupinfo(repo, nodes, source):
1268 1267 if repo.ui.verbose or source == 'bundle':
1269 1268 repo.ui.status(_("%d changesets found\n") % len(nodes))
1270 1269 if repo.ui.debugflag:
1271 1270 repo.ui.debug("list of changesets:\n")
1272 1271 for node in nodes:
1273 1272 repo.ui.debug("%s\n" % hex(node))
1274 1273
1275 1274 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1276 1275 bundlecaps=None):
1277 1276 cgstream = makestream(repo, outgoing, version, source,
1278 1277 fastpath=fastpath, bundlecaps=bundlecaps)
1279 1278 return getunbundler(version, util.chunkbuffer(cgstream), None,
1280 1279 {'clcount': len(outgoing.missing) })
1281 1280
1282 1281 def makestream(repo, outgoing, version, source, fastpath=False,
1283 1282 bundlecaps=None, filematcher=None):
1284 1283 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1285 1284 filematcher=filematcher)
1286 1285
1287 1286 repo = repo.unfiltered()
1288 1287 commonrevs = outgoing.common
1289 1288 csets = outgoing.missing
1290 1289 heads = outgoing.missingheads
1291 1290 # We go through the fast path if we get told to, or if all (unfiltered
1292 1291 # heads have been requested (since we then know there all linkrevs will
1293 1292 # be pulled by the client).
1294 1293 heads.sort()
1295 1294 fastpathlinkrev = fastpath or (
1296 1295 repo.filtername is None and heads == sorted(repo.heads()))
1297 1296
1298 1297 repo.hook('preoutgoing', throw=True, source=source)
1299 1298 _changegroupinfo(repo, csets, source)
1300 1299 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1301 1300
1302 1301 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1303 1302 revisions = 0
1304 1303 files = 0
1305 1304 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1306 1305 total=expectedfiles)
1307 1306 for chunkdata in iter(source.filelogheader, {}):
1308 1307 files += 1
1309 1308 f = chunkdata["filename"]
1310 1309 repo.ui.debug("adding %s revisions\n" % f)
1311 1310 progress.increment()
1312 1311 fl = repo.file(f)
1313 1312 o = len(fl)
1314 1313 try:
1315 1314 deltas = source.deltaiter()
1316 1315 if not fl.addgroup(deltas, revmap, trp):
1317 1316 raise error.Abort(_("received file revlog group is empty"))
1318 1317 except error.CensoredBaseError as e:
1319 1318 raise error.Abort(_("received delta base is censored: %s") % e)
1320 1319 revisions += len(fl) - o
1321 1320 if f in needfiles:
1322 1321 needs = needfiles[f]
1323 1322 for new in pycompat.xrange(o, len(fl)):
1324 1323 n = fl.node(new)
1325 1324 if n in needs:
1326 1325 needs.remove(n)
1327 1326 else:
1328 1327 raise error.Abort(
1329 1328 _("received spurious file revlog entry"))
1330 1329 if not needs:
1331 1330 del needfiles[f]
1332 1331 progress.complete()
1333 1332
1334 1333 for f, needs in needfiles.iteritems():
1335 1334 fl = repo.file(f)
1336 1335 for n in needs:
1337 1336 try:
1338 1337 fl.rev(n)
1339 1338 except error.LookupError:
1340 1339 raise error.Abort(
1341 1340 _('missing file data for %s:%s - run hg verify') %
1342 1341 (f, hex(n)))
1343 1342
1344 1343 return revisions, files
1345 1344
1346 1345 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1347 1346 ellipsisroots, visitnodes, depth, source, version):
1348 1347 if version in ('01', '02'):
1349 1348 raise error.Abort(
1350 1349 'ellipsis nodes require at least cg3 on client and server, '
1351 1350 'but negotiated version %s' % version)
1352 1351 # We wrap cg1packer.revchunk, using a side channel to pass
1353 1352 # relevant_nodes into that area. Then if linknode isn't in the
1354 1353 # set, we know we have an ellipsis node and we should defer
1355 1354 # sending that node's data. We override close() to detect
1356 1355 # pending ellipsis nodes and flush them.
1357 1356 packer = getbundler(version, repo, filematcher=match)
1358 1357 # Give the packer the list of nodes which should not be
1359 1358 # ellipsis nodes. We store this rather than the set of nodes
1360 1359 # that should be an ellipsis because for very large histories
1361 1360 # we expect this to be significantly smaller.
1362 1361 packer.full_nodes = relevant_nodes
1363 1362 # Maps ellipsis revs to their roots at the changelog level.
1364 1363 packer.precomputed_ellipsis = ellipsisroots
1365 1364 # Maps CL revs to per-revlog revisions. Cleared in close() at
1366 1365 # the end of each group.
1367 1366 packer.clrev_to_localrev = {}
1368 1367 packer.next_clrev_to_localrev = {}
1369 1368 # Maps changelog nodes to changelog revs. Filled in once
1370 1369 # during changelog stage and then left unmodified.
1371 1370 packer.clnode_to_rev = {}
1372 1371 packer.changelog_done = False
1373 1372 # If true, informs the packer that it is serving shallow content and might
1374 1373 # need to pack file contents not introduced by the changes being packed.
1375 1374 packer.is_shallow = depth is not None
1376 1375
1377 1376 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now