##// END OF EJS Templates
changegroup: inline ellipsisdata()...
Gregory Szorc -
r38928:eb022ce9 default
parent child Browse files
Show More
@@ -1,1331 +1,1328 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 manifest,
26 26 match as matchmod,
27 27 mdiff,
28 28 phases,
29 29 pycompat,
30 30 repository,
31 31 revlog,
32 32 util,
33 33 )
34 34
35 35 from .utils import (
36 36 stringutil,
37 37 )
38 38
39 39 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
40 40 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
41 41 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
42 42
43 43 LFS_REQUIREMENT = 'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47 def getchunk(stream):
48 48 """return the next chunk from stream as a string"""
49 49 d = readexactly(stream, 4)
50 50 l = struct.unpack(">l", d)[0]
51 51 if l <= 4:
52 52 if l:
53 53 raise error.Abort(_("invalid chunk length %d") % l)
54 54 return ""
55 55 return readexactly(stream, l - 4)
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(">l", length + 4)
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(">l", 0)
64 64
65 65 def writechunks(ui, chunks, filename, vfs=None):
66 66 """Write chunks to a file and return its filename.
67 67
68 68 The stream is assumed to be a bundle file.
69 69 Existing files will not be overwritten.
70 70 If no filename is specified, a temporary file is created.
71 71 """
72 72 fh = None
73 73 cleanup = None
74 74 try:
75 75 if filename:
76 76 if vfs:
77 77 fh = vfs.open(filename, "wb")
78 78 else:
79 79 # Increase default buffer size because default is usually
80 80 # small (4k is common on Linux).
81 81 fh = open(filename, "wb", 131072)
82 82 else:
83 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
84 84 fh = os.fdopen(fd, r"wb")
85 85 cleanup = filename
86 86 for c in chunks:
87 87 fh.write(c)
88 88 cleanup = None
89 89 return filename
90 90 finally:
91 91 if fh is not None:
92 92 fh.close()
93 93 if cleanup is not None:
94 94 if filename and vfs:
95 95 vfs.unlink(cleanup)
96 96 else:
97 97 os.unlink(cleanup)
98 98
99 99 class cg1unpacker(object):
100 100 """Unpacker for cg1 changegroup streams.
101 101
102 102 A changegroup unpacker handles the framing of the revision data in
103 103 the wire format. Most consumers will want to use the apply()
104 104 method to add the changes from the changegroup to a repository.
105 105
106 106 If you're forwarding a changegroup unmodified to another consumer,
107 107 use getchunks(), which returns an iterator of changegroup
108 108 chunks. This is mostly useful for cases where you need to know the
109 109 data stream has ended by observing the end of the changegroup.
110 110
111 111 deltachunk() is useful only if you're applying delta data. Most
112 112 consumers should prefer apply() instead.
113 113
114 114 A few other public methods exist. Those are used only for
115 115 bundlerepo and some debug commands - their use is discouraged.
116 116 """
117 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
118 118 deltaheadersize = struct.calcsize(deltaheader)
119 119 version = '01'
120 120 _grouplistcount = 1 # One list of files after the manifests
121 121
122 122 def __init__(self, fh, alg, extras=None):
123 123 if alg is None:
124 124 alg = 'UN'
125 125 if alg not in util.compengines.supportedbundletypes:
126 126 raise error.Abort(_('unknown stream compression type: %s')
127 127 % alg)
128 128 if alg == 'BZ':
129 129 alg = '_truncatedBZ'
130 130
131 131 compengine = util.compengines.forbundletype(alg)
132 132 self._stream = compengine.decompressorreader(fh)
133 133 self._type = alg
134 134 self.extras = extras or {}
135 135 self.callback = None
136 136
137 137 # These methods (compressed, read, seek, tell) all appear to only
138 138 # be used by bundlerepo, but it's a little hard to tell.
139 139 def compressed(self):
140 140 return self._type is not None and self._type != 'UN'
141 141 def read(self, l):
142 142 return self._stream.read(l)
143 143 def seek(self, pos):
144 144 return self._stream.seek(pos)
145 145 def tell(self):
146 146 return self._stream.tell()
147 147 def close(self):
148 148 return self._stream.close()
149 149
150 150 def _chunklength(self):
151 151 d = readexactly(self._stream, 4)
152 152 l = struct.unpack(">l", d)[0]
153 153 if l <= 4:
154 154 if l:
155 155 raise error.Abort(_("invalid chunk length %d") % l)
156 156 return 0
157 157 if self.callback:
158 158 self.callback()
159 159 return l - 4
160 160
161 161 def changelogheader(self):
162 162 """v10 does not have a changelog header chunk"""
163 163 return {}
164 164
165 165 def manifestheader(self):
166 166 """v10 does not have a manifest header chunk"""
167 167 return {}
168 168
169 169 def filelogheader(self):
170 170 """return the header of the filelogs chunk, v10 only has the filename"""
171 171 l = self._chunklength()
172 172 if not l:
173 173 return {}
174 174 fname = readexactly(self._stream, l)
175 175 return {'filename': fname}
176 176
177 177 def _deltaheader(self, headertuple, prevnode):
178 178 node, p1, p2, cs = headertuple
179 179 if prevnode is None:
180 180 deltabase = p1
181 181 else:
182 182 deltabase = prevnode
183 183 flags = 0
184 184 return node, p1, p2, deltabase, cs, flags
185 185
186 186 def deltachunk(self, prevnode):
187 187 l = self._chunklength()
188 188 if not l:
189 189 return {}
190 190 headerdata = readexactly(self._stream, self.deltaheadersize)
191 191 header = struct.unpack(self.deltaheader, headerdata)
192 192 delta = readexactly(self._stream, l - self.deltaheadersize)
193 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
194 194 return (node, p1, p2, cs, deltabase, delta, flags)
195 195
196 196 def getchunks(self):
197 197 """returns all the chunks contains in the bundle
198 198
199 199 Used when you need to forward the binary stream to a file or another
200 200 network API. To do so, it parse the changegroup data, otherwise it will
201 201 block in case of sshrepo because it don't know the end of the stream.
202 202 """
203 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
204 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
205 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
206 206 # filelogs.
207 207 #
208 208 # Changelog and manifestlog parts are terminated with empty chunks. The
209 209 # tree and file parts are a list of entry sections. Each entry section
210 210 # is a series of chunks terminating in an empty chunk. The list of these
211 211 # entry sections is terminated in yet another empty chunk, so we know
212 212 # we've reached the end of the tree/file list when we reach an empty
213 213 # chunk that was proceeded by no non-empty chunks.
214 214
215 215 parts = 0
216 216 while parts < 2 + self._grouplistcount:
217 217 noentries = True
218 218 while True:
219 219 chunk = getchunk(self)
220 220 if not chunk:
221 221 # The first two empty chunks represent the end of the
222 222 # changelog and the manifestlog portions. The remaining
223 223 # empty chunks represent either A) the end of individual
224 224 # tree or file entries in the file list, or B) the end of
225 225 # the entire list. It's the end of the entire list if there
226 226 # were no entries (i.e. noentries is True).
227 227 if parts < 2:
228 228 parts += 1
229 229 elif noentries:
230 230 parts += 1
231 231 break
232 232 noentries = False
233 233 yield chunkheader(len(chunk))
234 234 pos = 0
235 235 while pos < len(chunk):
236 236 next = pos + 2**20
237 237 yield chunk[pos:next]
238 238 pos = next
239 239 yield closechunk()
240 240
241 241 def _unpackmanifests(self, repo, revmap, trp, prog):
242 242 self.callback = prog.increment
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 248 deltas = self.deltaiter()
249 249 repo.manifestlog.addgroup(deltas, revmap, trp)
250 250 prog.complete()
251 251 self.callback = None
252 252
253 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 254 expectedtotal=None):
255 255 """Add the changegroup returned by source.read() to this repo.
256 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 257 the URL of the repo where this changegroup is coming from.
258 258
259 259 Return an integer summarizing the change to this repo:
260 260 - nothing changed or no source: 0
261 261 - more heads than before: 1+added heads (2..n)
262 262 - fewer heads than before: -1-removed heads (-2..-n)
263 263 - number of heads stays the same: 1
264 264 """
265 265 repo = repo.unfiltered()
266 266 def csmap(x):
267 267 repo.ui.debug("add changeset %s\n" % short(x))
268 268 return len(cl)
269 269
270 270 def revmap(x):
271 271 return cl.rev(x)
272 272
273 273 changesets = files = revisions = 0
274 274
275 275 try:
276 276 # The transaction may already carry source information. In this
277 277 # case we use the top level data. We overwrite the argument
278 278 # because we need to use the top level value (if they exist)
279 279 # in this function.
280 280 srctype = tr.hookargs.setdefault('source', srctype)
281 281 url = tr.hookargs.setdefault('url', url)
282 282 repo.hook('prechangegroup',
283 283 throw=True, **pycompat.strkwargs(tr.hookargs))
284 284
285 285 # write changelog data to temp files so concurrent readers
286 286 # will not see an inconsistent view
287 287 cl = repo.changelog
288 288 cl.delayupdate(tr)
289 289 oldheads = set(cl.heads())
290 290
291 291 trp = weakref.proxy(tr)
292 292 # pull off the changeset group
293 293 repo.ui.status(_("adding changesets\n"))
294 294 clstart = len(cl)
295 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
296 296 total=expectedtotal)
297 297 self.callback = progress.increment
298 298
299 299 efiles = set()
300 300 def onchangelog(cl, node):
301 301 efiles.update(cl.readfiles(node))
302 302
303 303 self.changelogheader()
304 304 deltas = self.deltaiter()
305 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
306 306 efiles = len(efiles)
307 307
308 308 if not cgnodes:
309 309 repo.ui.develwarn('applied empty changegroup',
310 310 config='warn-empty-changegroup')
311 311 clend = len(cl)
312 312 changesets = clend - clstart
313 313 progress.complete()
314 314 self.callback = None
315 315
316 316 # pull off the manifest group
317 317 repo.ui.status(_("adding manifests\n"))
318 318 # We know that we'll never have more manifests than we had
319 319 # changesets.
320 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
321 321 total=changesets)
322 322 self._unpackmanifests(repo, revmap, trp, progress)
323 323
324 324 needfiles = {}
325 325 if repo.ui.configbool('server', 'validate'):
326 326 cl = repo.changelog
327 327 ml = repo.manifestlog
328 328 # validate incoming csets have their manifests
329 329 for cset in pycompat.xrange(clstart, clend):
330 330 mfnode = cl.changelogrevision(cset).manifest
331 331 mfest = ml[mfnode].readdelta()
332 332 # store file cgnodes we must see
333 333 for f, n in mfest.iteritems():
334 334 needfiles.setdefault(f, set()).add(n)
335 335
336 336 # process the files
337 337 repo.ui.status(_("adding file changes\n"))
338 338 newrevs, newfiles = _addchangegroupfiles(
339 339 repo, self, revmap, trp, efiles, needfiles)
340 340 revisions += newrevs
341 341 files += newfiles
342 342
343 343 deltaheads = 0
344 344 if oldheads:
345 345 heads = cl.heads()
346 346 deltaheads = len(heads) - len(oldheads)
347 347 for h in heads:
348 348 if h not in oldheads and repo[h].closesbranch():
349 349 deltaheads -= 1
350 350 htext = ""
351 351 if deltaheads:
352 352 htext = _(" (%+d heads)") % deltaheads
353 353
354 354 repo.ui.status(_("added %d changesets"
355 355 " with %d changes to %d files%s\n")
356 356 % (changesets, revisions, files, htext))
357 357 repo.invalidatevolatilesets()
358 358
359 359 if changesets > 0:
360 360 if 'node' not in tr.hookargs:
361 361 tr.hookargs['node'] = hex(cl.node(clstart))
362 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
363 363 hookargs = dict(tr.hookargs)
364 364 else:
365 365 hookargs = dict(tr.hookargs)
366 366 hookargs['node'] = hex(cl.node(clstart))
367 367 hookargs['node_last'] = hex(cl.node(clend - 1))
368 368 repo.hook('pretxnchangegroup',
369 369 throw=True, **pycompat.strkwargs(hookargs))
370 370
371 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
372 372 phaseall = None
373 373 if srctype in ('push', 'serve'):
374 374 # Old servers can not push the boundary themselves.
375 375 # New servers won't push the boundary if changeset already
376 376 # exists locally as secret
377 377 #
378 378 # We should not use added here but the list of all change in
379 379 # the bundle
380 380 if repo.publishing():
381 381 targetphase = phaseall = phases.public
382 382 else:
383 383 # closer target phase computation
384 384
385 385 # Those changesets have been pushed from the
386 386 # outside, their phases are going to be pushed
387 387 # alongside. Therefor `targetphase` is
388 388 # ignored.
389 389 targetphase = phaseall = phases.draft
390 390 if added:
391 391 phases.registernew(repo, tr, targetphase, added)
392 392 if phaseall is not None:
393 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
394 394
395 395 if changesets > 0:
396 396
397 397 def runhooks():
398 398 # These hooks run when the lock releases, not when the
399 399 # transaction closes. So it's possible for the changelog
400 400 # to have changed since we last saw it.
401 401 if clstart >= len(repo):
402 402 return
403 403
404 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
405 405
406 406 for n in added:
407 407 args = hookargs.copy()
408 408 args['node'] = hex(n)
409 409 del args['node_last']
410 410 repo.hook("incoming", **pycompat.strkwargs(args))
411 411
412 412 newheads = [h for h in repo.heads()
413 413 if h not in oldheads]
414 414 repo.ui.log("incoming",
415 415 "%d incoming changes - new heads: %s\n",
416 416 len(added),
417 417 ', '.join([hex(c[:6]) for c in newheads]))
418 418
419 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
420 420 lambda tr: repo._afterlock(runhooks))
421 421 finally:
422 422 repo.ui.flush()
423 423 # never return 0 here:
424 424 if deltaheads < 0:
425 425 ret = deltaheads - 1
426 426 else:
427 427 ret = deltaheads + 1
428 428 return ret
429 429
430 430 def deltaiter(self):
431 431 """
432 432 returns an iterator of the deltas in this changegroup
433 433
434 434 Useful for passing to the underlying storage system to be stored.
435 435 """
436 436 chain = None
437 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
438 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
439 439 yield chunkdata
440 440 chain = chunkdata[0]
441 441
442 442 class cg2unpacker(cg1unpacker):
443 443 """Unpacker for cg2 streams.
444 444
445 445 cg2 streams add support for generaldelta, so the delta header
446 446 format is slightly different. All other features about the data
447 447 remain the same.
448 448 """
449 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
450 450 deltaheadersize = struct.calcsize(deltaheader)
451 451 version = '02'
452 452
453 453 def _deltaheader(self, headertuple, prevnode):
454 454 node, p1, p2, deltabase, cs = headertuple
455 455 flags = 0
456 456 return node, p1, p2, deltabase, cs, flags
457 457
458 458 class cg3unpacker(cg2unpacker):
459 459 """Unpacker for cg3 streams.
460 460
461 461 cg3 streams add support for exchanging treemanifests and revlog
462 462 flags. It adds the revlog flags to the delta header and an empty chunk
463 463 separating manifests and files.
464 464 """
465 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
466 466 deltaheadersize = struct.calcsize(deltaheader)
467 467 version = '03'
468 468 _grouplistcount = 2 # One list of manifests and one list of files
469 469
470 470 def _deltaheader(self, headertuple, prevnode):
471 471 node, p1, p2, deltabase, cs, flags = headertuple
472 472 return node, p1, p2, deltabase, cs, flags
473 473
474 474 def _unpackmanifests(self, repo, revmap, trp, prog):
475 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
476 476 for chunkdata in iter(self.filelogheader, {}):
477 477 # If we get here, there are directory manifests in the changegroup
478 478 d = chunkdata["filename"]
479 479 repo.ui.debug("adding %s revisions\n" % d)
480 480 dirlog = repo.manifestlog._revlog.dirlog(d)
481 481 deltas = self.deltaiter()
482 482 if not dirlog.addgroup(deltas, revmap, trp):
483 483 raise error.Abort(_("received dir revlog group is empty"))
484 484
485 485 class headerlessfixup(object):
486 486 def __init__(self, fh, h):
487 487 self._h = h
488 488 self._fh = fh
489 489 def read(self, n):
490 490 if self._h:
491 491 d, self._h = self._h[:n], self._h[n:]
492 492 if len(d) < n:
493 493 d += readexactly(self._fh, n - len(d))
494 494 return d
495 495 return readexactly(self._fh, n)
496 496
497 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
498 n = revlog_.node(rev)
499 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
500 flags = revlog_.flags(rev)
501 flags |= revlog.REVIDX_ELLIPSIS
502 meta = packer.builddeltaheader(
503 n, p1n, p2n, nullid, linknode, flags)
504 # TODO: try and actually send deltas for ellipsis data blocks
505 diffheader = mdiff.trivialdiffheader(len(data))
506 l = len(meta) + len(diffheader) + len(data)
507 return ''.join((chunkheader(l),
508 meta,
509 diffheader,
510 data))
511 497
512 498 class cg1packer(object):
513 499 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
514 500 version = '01'
515 501 def __init__(self, repo, filematcher, bundlecaps=None):
516 502 """Given a source repo, construct a bundler.
517 503
518 504 filematcher is a matcher that matches on files to include in the
519 505 changegroup. Used to facilitate sparse changegroups.
520 506
521 507 bundlecaps is optional and can be used to specify the set of
522 508 capabilities which can be used to build the bundle. While bundlecaps is
523 509 unused in core Mercurial, extensions rely on this feature to communicate
524 510 capabilities to customize the changegroup packer.
525 511 """
526 512 assert filematcher
527 513 self._filematcher = filematcher
528 514
529 515 # Set of capabilities we can use to build the bundle.
530 516 if bundlecaps is None:
531 517 bundlecaps = set()
532 518 self._bundlecaps = bundlecaps
533 519 # experimental config: bundle.reorder
534 520 reorder = repo.ui.config('bundle', 'reorder')
535 521 if reorder == 'auto':
536 522 reorder = None
537 523 else:
538 524 reorder = stringutil.parsebool(reorder)
539 525 self._repo = repo
540 526 self._reorder = reorder
541 527 if self._repo.ui.verbose and not self._repo.ui.debugflag:
542 528 self._verbosenote = self._repo.ui.note
543 529 else:
544 530 self._verbosenote = lambda s: None
545 531
546 532 def close(self):
547 533 # Ellipses serving mode.
548 534 getattr(self, 'clrev_to_localrev', {}).clear()
549 535 if getattr(self, 'next_clrev_to_localrev', {}):
550 536 self.clrev_to_localrev = self.next_clrev_to_localrev
551 537 del self.next_clrev_to_localrev
552 538 self.changelog_done = True
553 539
554 540 return closechunk()
555 541
556 542 def fileheader(self, fname):
557 543 return chunkheader(len(fname)) + fname
558 544
559 545 # Extracted both for clarity and for overriding in extensions.
560 546 def _sortgroup(self, store, nodelist, lookup):
561 547 """Sort nodes for change group and turn them into revnums."""
562 548 # Ellipses serving mode.
563 549 #
564 550 # In a perfect world, we'd generate better ellipsis-ified graphs
565 551 # for non-changelog revlogs. In practice, we haven't started doing
566 552 # that yet, so the resulting DAGs for the manifestlog and filelogs
567 553 # are actually full of bogus parentage on all the ellipsis
568 554 # nodes. This has the side effect that, while the contents are
569 555 # correct, the individual DAGs might be completely out of whack in
570 556 # a case like 882681bc3166 and its ancestors (back about 10
571 557 # revisions or so) in the main hg repo.
572 558 #
573 559 # The one invariant we *know* holds is that the new (potentially
574 560 # bogus) DAG shape will be valid if we order the nodes in the
575 561 # order that they're introduced in dramatis personae by the
576 562 # changelog, so what we do is we sort the non-changelog histories
577 563 # by the order in which they are used by the changelog.
578 564 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
579 565 key = lambda n: self.clnode_to_rev[lookup(n)]
580 566 return [store.rev(n) for n in sorted(nodelist, key=key)]
581 567
582 568 # for generaldelta revlogs, we linearize the revs; this will both be
583 569 # much quicker and generate a much smaller bundle
584 570 if (store._generaldelta and self._reorder is None) or self._reorder:
585 571 dag = dagutil.revlogdag(store)
586 572 return dag.linearize(set(store.rev(n) for n in nodelist))
587 573 else:
588 574 return sorted([store.rev(n) for n in nodelist])
589 575
590 576 def group(self, nodelist, store, lookup, units=None):
591 577 """Calculate a delta group, yielding a sequence of changegroup chunks
592 578 (strings).
593 579
594 580 Given a list of changeset revs, return a set of deltas and
595 581 metadata corresponding to nodes. The first delta is
596 582 first parent(nodelist[0]) -> nodelist[0], the receiver is
597 583 guaranteed to have this parent as it has all history before
598 584 these changesets. In the case firstparent is nullrev the
599 585 changegroup starts with a full revision.
600 586
601 587 If units is not None, progress detail will be generated, units specifies
602 588 the type of revlog that is touched (changelog, manifest, etc.).
603 589 """
604 590 # if we don't have any revisions touched by these changesets, bail
605 591 if len(nodelist) == 0:
606 592 yield self.close()
607 593 return
608 594
609 595 revs = self._sortgroup(store, nodelist, lookup)
610 596
611 597 # add the parent of the first rev
612 598 p = store.parentrevs(revs[0])[0]
613 599 revs.insert(0, p)
614 600
615 601 # build deltas
616 602 progress = None
617 603 if units is not None:
618 604 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
619 605 total=(len(revs) - 1))
620 606 for r in pycompat.xrange(len(revs) - 1):
621 607 if progress:
622 608 progress.update(r + 1)
623 609 prev, curr = revs[r], revs[r + 1]
624 610 linknode = lookup(store.node(curr))
625 611 for c in self.revchunk(store, curr, prev, linknode):
626 612 yield c
627 613
628 614 if progress:
629 615 progress.complete()
630 616 yield self.close()
631 617
632 618 # filter any nodes that claim to be part of the known set
633 619 def prune(self, store, missing, commonrevs):
634 620 # TODO this violates storage abstraction for manifests.
635 621 if isinstance(store, manifest.manifestrevlog):
636 622 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
637 623 return []
638 624
639 625 rr, rl = store.rev, store.linkrev
640 626 return [n for n in missing if rl(rr(n)) not in commonrevs]
641 627
642 628 def _packmanifests(self, dir, mfnodes, lookuplinknode):
643 629 """Pack flat manifests into a changegroup stream."""
644 630 assert not dir
645 631 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
646 632 lookuplinknode, units=_('manifests')):
647 633 yield chunk
648 634
649 635 def _manifestsdone(self):
650 636 return ''
651 637
652 638 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
653 639 '''yield a sequence of changegroup chunks (strings)'''
654 640 repo = self._repo
655 641 cl = repo.changelog
656 642
657 643 clrevorder = {}
658 644 mfs = {} # needed manifests
659 645 fnodes = {} # needed file nodes
660 646 mfl = repo.manifestlog
661 647 # TODO violates storage abstraction.
662 648 mfrevlog = mfl._revlog
663 649 changedfiles = set()
664 650
665 651 ellipsesmode = util.safehasattr(self, 'full_nodes')
666 652
667 653 # Callback for the changelog, used to collect changed files and
668 654 # manifest nodes.
669 655 # Returns the linkrev node (identity in the changelog case).
670 656 def lookupcl(x):
671 657 c = cl.read(x)
672 658 clrevorder[x] = len(clrevorder)
673 659
674 660 if ellipsesmode:
675 661 # Only update mfs if x is going to be sent. Otherwise we
676 662 # end up with bogus linkrevs specified for manifests and
677 663 # we skip some manifest nodes that we should otherwise
678 664 # have sent.
679 665 if (x in self.full_nodes
680 666 or cl.rev(x) in self.precomputed_ellipsis):
681 667 n = c[0]
682 668 # Record the first changeset introducing this manifest
683 669 # version.
684 670 mfs.setdefault(n, x)
685 671 # Set this narrow-specific dict so we have the lowest
686 672 # manifest revnum to look up for this cl revnum. (Part of
687 673 # mapping changelog ellipsis parents to manifest ellipsis
688 674 # parents)
689 675 self.next_clrev_to_localrev.setdefault(cl.rev(x),
690 676 mfrevlog.rev(n))
691 677 # We can't trust the changed files list in the changeset if the
692 678 # client requested a shallow clone.
693 679 if self.is_shallow:
694 680 changedfiles.update(mfl[c[0]].read().keys())
695 681 else:
696 682 changedfiles.update(c[3])
697 683 else:
698 684
699 685 n = c[0]
700 686 # record the first changeset introducing this manifest version
701 687 mfs.setdefault(n, x)
702 688 # Record a complete list of potentially-changed files in
703 689 # this manifest.
704 690 changedfiles.update(c[3])
705 691
706 692 return x
707 693
708 694 self._verbosenote(_('uncompressed size of bundle content:\n'))
709 695 size = 0
710 696 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
711 697 size += len(chunk)
712 698 yield chunk
713 699 self._verbosenote(_('%8.i (changelog)\n') % size)
714 700
715 701 # We need to make sure that the linkrev in the changegroup refers to
716 702 # the first changeset that introduced the manifest or file revision.
717 703 # The fastpath is usually safer than the slowpath, because the filelogs
718 704 # are walked in revlog order.
719 705 #
720 706 # When taking the slowpath with reorder=None and the manifest revlog
721 707 # uses generaldelta, the manifest may be walked in the "wrong" order.
722 708 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
723 709 # cc0ff93d0c0c).
724 710 #
725 711 # When taking the fastpath, we are only vulnerable to reordering
726 712 # of the changelog itself. The changelog never uses generaldelta, so
727 713 # it is only reordered when reorder=True. To handle this case, we
728 714 # simply take the slowpath, which already has the 'clrevorder' logic.
729 715 # This was also fixed in cc0ff93d0c0c.
730 716 fastpathlinkrev = fastpathlinkrev and not self._reorder
731 717 # Treemanifests don't work correctly with fastpathlinkrev
732 718 # either, because we don't discover which directory nodes to
733 719 # send along with files. This could probably be fixed.
734 720 fastpathlinkrev = fastpathlinkrev and (
735 721 'treemanifest' not in repo.requirements)
736 722
737 723 for chunk in self.generatemanifests(commonrevs, clrevorder,
738 724 fastpathlinkrev, mfs, fnodes, source):
739 725 yield chunk
740 726
741 727 if ellipsesmode:
742 728 mfdicts = None
743 729 if self.is_shallow:
744 730 mfdicts = [(self._repo.manifestlog[n].read(), lr)
745 731 for (n, lr) in mfs.iteritems()]
746 732
747 733 mfs.clear()
748 734 clrevs = set(cl.rev(x) for x in clnodes)
749 735
750 736 if not fastpathlinkrev:
751 737 def linknodes(unused, fname):
752 738 return fnodes.get(fname, {})
753 739 else:
754 740 cln = cl.node
755 741 def linknodes(filerevlog, fname):
756 742 llr = filerevlog.linkrev
757 743 fln = filerevlog.node
758 744 revs = ((r, llr(r)) for r in filerevlog)
759 745 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
760 746
761 747 if ellipsesmode:
762 748 # We need to pass the mfdicts variable down into
763 749 # generatefiles(), but more than one command might have
764 750 # wrapped generatefiles so we can't modify the function
765 751 # signature. Instead, we pass the data to ourselves using an
766 752 # instance attribute. I'm sorry.
767 753 self._mfdicts = mfdicts
768 754
769 755 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
770 756 source):
771 757 yield chunk
772 758
773 759 yield self.close()
774 760
775 761 if clnodes:
776 762 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
777 763
778 764 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
779 765 fnodes, source):
780 766 """Returns an iterator of changegroup chunks containing manifests.
781 767
782 768 `source` is unused here, but is used by extensions like remotefilelog to
783 769 change what is sent based in pulls vs pushes, etc.
784 770 """
785 771 repo = self._repo
786 772 mfl = repo.manifestlog
787 773 dirlog = mfl._revlog.dirlog
788 774 tmfnodes = {'': mfs}
789 775
790 776 # Callback for the manifest, used to collect linkrevs for filelog
791 777 # revisions.
792 778 # Returns the linkrev node (collected in lookupcl).
793 779 def makelookupmflinknode(dir, nodes):
794 780 if fastpathlinkrev:
795 781 assert not dir
796 782 return mfs.__getitem__
797 783
798 784 def lookupmflinknode(x):
799 785 """Callback for looking up the linknode for manifests.
800 786
801 787 Returns the linkrev node for the specified manifest.
802 788
803 789 SIDE EFFECT:
804 790
805 791 1) fclnodes gets populated with the list of relevant
806 792 file nodes if we're not using fastpathlinkrev
807 793 2) When treemanifests are in use, collects treemanifest nodes
808 794 to send
809 795
810 796 Note that this means manifests must be completely sent to
811 797 the client before you can trust the list of files and
812 798 treemanifests to send.
813 799 """
814 800 clnode = nodes[x]
815 801 mdata = mfl.get(dir, x).readfast(shallow=True)
816 802 for p, n, fl in mdata.iterentries():
817 803 if fl == 't': # subdirectory manifest
818 804 subdir = dir + p + '/'
819 805 tmfclnodes = tmfnodes.setdefault(subdir, {})
820 806 tmfclnode = tmfclnodes.setdefault(n, clnode)
821 807 if clrevorder[clnode] < clrevorder[tmfclnode]:
822 808 tmfclnodes[n] = clnode
823 809 else:
824 810 f = dir + p
825 811 fclnodes = fnodes.setdefault(f, {})
826 812 fclnode = fclnodes.setdefault(n, clnode)
827 813 if clrevorder[clnode] < clrevorder[fclnode]:
828 814 fclnodes[n] = clnode
829 815 return clnode
830 816 return lookupmflinknode
831 817
832 818 size = 0
833 819 while tmfnodes:
834 820 dir, nodes = tmfnodes.popitem()
835 821 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
836 822 if not dir or prunednodes:
837 823 for x in self._packmanifests(dir, prunednodes,
838 824 makelookupmflinknode(dir, nodes)):
839 825 size += len(x)
840 826 yield x
841 827 self._verbosenote(_('%8.i (manifests)\n') % size)
842 828 yield self._manifestsdone()
843 829
844 830 # The 'source' parameter is useful for extensions
845 831 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
846 832 changedfiles = list(filter(self._filematcher, changedfiles))
847 833
848 834 if getattr(self, 'is_shallow', False):
849 835 # See comment in generate() for why this sadness is a thing.
850 836 mfdicts = self._mfdicts
851 837 del self._mfdicts
852 838 # In a shallow clone, the linknodes callback needs to also include
853 839 # those file nodes that are in the manifests we sent but weren't
854 840 # introduced by those manifests.
855 841 commonctxs = [self._repo[c] for c in commonrevs]
856 842 oldlinknodes = linknodes
857 843 clrev = self._repo.changelog.rev
858 844
859 845 # Defining this function has a side-effect of overriding the
860 846 # function of the same name that was passed in as an argument.
861 847 # TODO have caller pass in appropriate function.
862 848 def linknodes(flog, fname):
863 849 for c in commonctxs:
864 850 try:
865 851 fnode = c.filenode(fname)
866 852 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
867 853 except error.ManifestLookupError:
868 854 pass
869 855 links = oldlinknodes(flog, fname)
870 856 if len(links) != len(mfdicts):
871 857 for mf, lr in mfdicts:
872 858 fnode = mf.get(fname, None)
873 859 if fnode in links:
874 860 links[fnode] = min(links[fnode], lr, key=clrev)
875 861 elif fnode:
876 862 links[fnode] = lr
877 863 return links
878 864
879 865 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
880 866
881 867 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
882 868 repo = self._repo
883 869 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
884 870 total=len(changedfiles))
885 871 for i, fname in enumerate(sorted(changedfiles)):
886 872 filerevlog = repo.file(fname)
887 873 if not filerevlog:
888 874 raise error.Abort(_("empty or missing file data for %s") %
889 875 fname)
890 876
891 877 linkrevnodes = linknodes(filerevlog, fname)
892 878 # Lookup for filenodes, we collected the linkrev nodes above in the
893 879 # fastpath case and with lookupmf in the slowpath case.
894 880 def lookupfilelog(x):
895 881 return linkrevnodes[x]
896 882
897 883 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
898 884 if filenodes:
899 885 progress.update(i + 1, item=fname)
900 886 h = self.fileheader(fname)
901 887 size = len(h)
902 888 yield h
903 889 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
904 890 size += len(chunk)
905 891 yield chunk
906 892 self._verbosenote(_('%8.i %s\n') % (size, fname))
907 893 progress.complete()
908 894
909 895 def deltaparent(self, store, rev, p1, p2, prev):
910 896 if not store.candelta(prev, rev):
911 897 raise error.ProgrammingError('cg1 should not be used in this case')
912 898 return prev
913 899
914 900 def revchunk(self, store, rev, prev, linknode):
915 901 if util.safehasattr(self, 'full_nodes'):
916 902 fn = self._revchunknarrow
917 903 else:
918 904 fn = self._revchunknormal
919 905
920 906 return fn(store, rev, prev, linknode)
921 907
922 908 def _revchunknormal(self, store, rev, prev, linknode):
923 909 node = store.node(rev)
924 910 p1, p2 = store.parentrevs(rev)
925 911 base = self.deltaparent(store, rev, p1, p2, prev)
926 912
927 913 prefix = ''
928 914 if store.iscensored(base) or store.iscensored(rev):
929 915 try:
930 916 delta = store.revision(node, raw=True)
931 917 except error.CensoredNodeError as e:
932 918 delta = e.tombstone
933 919 if base == nullrev:
934 920 prefix = mdiff.trivialdiffheader(len(delta))
935 921 else:
936 922 baselen = store.rawsize(base)
937 923 prefix = mdiff.replacediffheader(baselen, len(delta))
938 924 elif base == nullrev:
939 925 delta = store.revision(node, raw=True)
940 926 prefix = mdiff.trivialdiffheader(len(delta))
941 927 else:
942 928 delta = store.revdiff(base, rev)
943 929 p1n, p2n = store.parents(node)
944 930 basenode = store.node(base)
945 931 flags = store.flags(rev)
946 932 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
947 933 meta += prefix
948 934 l = len(meta) + len(delta)
949 935 yield chunkheader(l)
950 936 yield meta
951 937 yield delta
952 938
953 939 def _revchunknarrow(self, store, rev, prev, linknode):
954 940 # build up some mapping information that's useful later. See
955 941 # the local() nested function below.
956 942 if not self.changelog_done:
957 943 self.clnode_to_rev[linknode] = rev
958 944 linkrev = rev
959 945 self.clrev_to_localrev[linkrev] = rev
960 946 else:
961 947 linkrev = self.clnode_to_rev[linknode]
962 948 self.clrev_to_localrev[linkrev] = rev
963 949
964 950 # This is a node to send in full, because the changeset it
965 951 # corresponds to was a full changeset.
966 952 if linknode in self.full_nodes:
967 953 for x in self._revchunknormal(store, rev, prev, linknode):
968 954 yield x
969 955 return
970 956
971 957 # At this point, a node can either be one we should skip or an
972 958 # ellipsis. If it's not an ellipsis, bail immediately.
973 959 if linkrev not in self.precomputed_ellipsis:
974 960 return
975 961
976 962 linkparents = self.precomputed_ellipsis[linkrev]
977 963 def local(clrev):
978 964 """Turn a changelog revnum into a local revnum.
979 965
980 966 The ellipsis dag is stored as revnums on the changelog,
981 967 but when we're producing ellipsis entries for
982 968 non-changelog revlogs, we need to turn those numbers into
983 969 something local. This does that for us, and during the
984 970 changelog sending phase will also expand the stored
985 971 mappings as needed.
986 972 """
987 973 if clrev == nullrev:
988 974 return nullrev
989 975
990 976 if not self.changelog_done:
991 977 # If we're doing the changelog, it's possible that we
992 978 # have a parent that is already on the client, and we
993 979 # need to store some extra mapping information so that
994 980 # our contained ellipsis nodes will be able to resolve
995 981 # their parents.
996 982 if clrev not in self.clrev_to_localrev:
997 983 clnode = store.node(clrev)
998 984 self.clnode_to_rev[clnode] = clrev
999 985 return clrev
1000 986
1001 987 # Walk the ellipsis-ized changelog breadth-first looking for a
1002 988 # change that has been linked from the current revlog.
1003 989 #
1004 990 # For a flat manifest revlog only a single step should be necessary
1005 991 # as all relevant changelog entries are relevant to the flat
1006 992 # manifest.
1007 993 #
1008 994 # For a filelog or tree manifest dirlog however not every changelog
1009 995 # entry will have been relevant, so we need to skip some changelog
1010 996 # nodes even after ellipsis-izing.
1011 997 walk = [clrev]
1012 998 while walk:
1013 999 p = walk[0]
1014 1000 walk = walk[1:]
1015 1001 if p in self.clrev_to_localrev:
1016 1002 return self.clrev_to_localrev[p]
1017 1003 elif p in self.full_nodes:
1018 1004 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1019 1005 if pp != nullrev])
1020 1006 elif p in self.precomputed_ellipsis:
1021 1007 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1022 1008 if pp != nullrev])
1023 1009 else:
1024 1010 # In this case, we've got an ellipsis with parents
1025 1011 # outside the current bundle (likely an
1026 1012 # incremental pull). We "know" that we can use the
1027 1013 # value of this same revlog at whatever revision
1028 1014 # is pointed to by linknode. "Know" is in scare
1029 1015 # quotes because I haven't done enough examination
1030 1016 # of edge cases to convince myself this is really
1031 1017 # a fact - it works for all the (admittedly
1032 1018 # thorough) cases in our testsuite, but I would be
1033 1019 # somewhat unsurprised to find a case in the wild
1034 1020 # where this breaks down a bit. That said, I don't
1035 1021 # know if it would hurt anything.
1036 1022 for i in pycompat.xrange(rev, 0, -1):
1037 1023 if store.linkrev(i) == clrev:
1038 1024 return i
1039 1025 # We failed to resolve a parent for this node, so
1040 1026 # we crash the changegroup construction.
1041 1027 raise error.Abort(
1042 1028 'unable to resolve parent while packing %r %r'
1043 1029 ' for changeset %r' % (store.indexfile, rev, clrev))
1044 1030
1045 1031 return nullrev
1046 1032
1047 1033 if not linkparents or (
1048 1034 store.parentrevs(rev) == (nullrev, nullrev)):
1049 1035 p1, p2 = nullrev, nullrev
1050 1036 elif len(linkparents) == 1:
1051 1037 p1, = sorted(local(p) for p in linkparents)
1052 1038 p2 = nullrev
1053 1039 else:
1054 1040 p1, p2 = sorted(local(p) for p in linkparents)
1041
1055 1042 n = store.node(rev)
1056
1057 yield ellipsisdata(
1058 self, rev, store, p1, p2, store.revision(n), linknode)
1043 p1n, p2n = store.node(p1), store.node(p2)
1044 flags = store.flags(rev)
1045 flags |= revlog.REVIDX_ELLIPSIS
1046 meta = self.builddeltaheader(
1047 n, p1n, p2n, nullid, linknode, flags)
1048 # TODO: try and actually send deltas for ellipsis data blocks
1049 data = store.revision(n)
1050 diffheader = mdiff.trivialdiffheader(len(data))
1051 l = len(meta) + len(diffheader) + len(data)
1052 yield ''.join((chunkheader(l),
1053 meta,
1054 diffheader,
1055 data))
1059 1056
1060 1057 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1061 1058 # do nothing with basenode, it is implicitly the previous one in HG10
1062 1059 # do nothing with flags, it is implicitly 0 for cg1 and cg2
1063 1060 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
1064 1061
1065 1062 class cg2packer(cg1packer):
1066 1063 version = '02'
1067 1064 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
1068 1065
1069 1066 def __init__(self, repo, filematcher, bundlecaps=None):
1070 1067 super(cg2packer, self).__init__(repo, filematcher,
1071 1068 bundlecaps=bundlecaps)
1072 1069
1073 1070 if self._reorder is None:
1074 1071 # Since generaldelta is directly supported by cg2, reordering
1075 1072 # generally doesn't help, so we disable it by default (treating
1076 1073 # bundle.reorder=auto just like bundle.reorder=False).
1077 1074 self._reorder = False
1078 1075
1079 1076 def deltaparent(self, store, rev, p1, p2, prev):
1080 1077 # Narrow ellipses mode.
1081 1078 if util.safehasattr(self, 'full_nodes'):
1082 1079 # TODO: send better deltas when in narrow mode.
1083 1080 #
1084 1081 # changegroup.group() loops over revisions to send,
1085 1082 # including revisions we'll skip. What this means is that
1086 1083 # `prev` will be a potentially useless delta base for all
1087 1084 # ellipsis nodes, as the client likely won't have it. In
1088 1085 # the future we should do bookkeeping about which nodes
1089 1086 # have been sent to the client, and try to be
1090 1087 # significantly smarter about delta bases. This is
1091 1088 # slightly tricky because this same code has to work for
1092 1089 # all revlogs, and we don't have the linkrev/linknode here.
1093 1090 return p1
1094 1091
1095 1092 dp = store.deltaparent(rev)
1096 1093 if dp == nullrev and store.storedeltachains:
1097 1094 # Avoid sending full revisions when delta parent is null. Pick prev
1098 1095 # in that case. It's tempting to pick p1 in this case, as p1 will
1099 1096 # be smaller in the common case. However, computing a delta against
1100 1097 # p1 may require resolving the raw text of p1, which could be
1101 1098 # expensive. The revlog caches should have prev cached, meaning
1102 1099 # less CPU for changegroup generation. There is likely room to add
1103 1100 # a flag and/or config option to control this behavior.
1104 1101 base = prev
1105 1102 elif dp == nullrev:
1106 1103 # revlog is configured to use full snapshot for a reason,
1107 1104 # stick to full snapshot.
1108 1105 base = nullrev
1109 1106 elif dp not in (p1, p2, prev):
1110 1107 # Pick prev when we can't be sure remote has the base revision.
1111 1108 return prev
1112 1109 else:
1113 1110 base = dp
1114 1111 if base != nullrev and not store.candelta(base, rev):
1115 1112 base = nullrev
1116 1113 return base
1117 1114
1118 1115 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1119 1116 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
1120 1117 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
1121 1118
1122 1119 class cg3packer(cg2packer):
1123 1120 version = '03'
1124 1121 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
1125 1122
1126 1123 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1127 1124 if dir:
1128 1125 yield self.fileheader(dir)
1129 1126
1130 1127 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1131 1128 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1132 1129 units=_('manifests')):
1133 1130 yield chunk
1134 1131
1135 1132 def _manifestsdone(self):
1136 1133 return self.close()
1137 1134
1138 1135 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1139 1136 return struct.pack(
1140 1137 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
1141 1138
1142 1139 _packermap = {'01': (cg1packer, cg1unpacker),
1143 1140 # cg2 adds support for exchanging generaldelta
1144 1141 '02': (cg2packer, cg2unpacker),
1145 1142 # cg3 adds support for exchanging revlog flags and treemanifests
1146 1143 '03': (cg3packer, cg3unpacker),
1147 1144 }
1148 1145
1149 1146 def allsupportedversions(repo):
1150 1147 versions = set(_packermap.keys())
1151 1148 if not (repo.ui.configbool('experimental', 'changegroup3') or
1152 1149 repo.ui.configbool('experimental', 'treemanifest') or
1153 1150 'treemanifest' in repo.requirements):
1154 1151 versions.discard('03')
1155 1152 return versions
1156 1153
1157 1154 # Changegroup versions that can be applied to the repo
1158 1155 def supportedincomingversions(repo):
1159 1156 return allsupportedversions(repo)
1160 1157
1161 1158 # Changegroup versions that can be created from the repo
1162 1159 def supportedoutgoingversions(repo):
1163 1160 versions = allsupportedversions(repo)
1164 1161 if 'treemanifest' in repo.requirements:
1165 1162 # Versions 01 and 02 support only flat manifests and it's just too
1166 1163 # expensive to convert between the flat manifest and tree manifest on
1167 1164 # the fly. Since tree manifests are hashed differently, all of history
1168 1165 # would have to be converted. Instead, we simply don't even pretend to
1169 1166 # support versions 01 and 02.
1170 1167 versions.discard('01')
1171 1168 versions.discard('02')
1172 1169 if repository.NARROW_REQUIREMENT in repo.requirements:
1173 1170 # Versions 01 and 02 don't support revlog flags, and we need to
1174 1171 # support that for stripping and unbundling to work.
1175 1172 versions.discard('01')
1176 1173 versions.discard('02')
1177 1174 if LFS_REQUIREMENT in repo.requirements:
1178 1175 # Versions 01 and 02 don't support revlog flags, and we need to
1179 1176 # mark LFS entries with REVIDX_EXTSTORED.
1180 1177 versions.discard('01')
1181 1178 versions.discard('02')
1182 1179
1183 1180 return versions
1184 1181
1185 1182 def localversion(repo):
1186 1183 # Finds the best version to use for bundles that are meant to be used
1187 1184 # locally, such as those from strip and shelve, and temporary bundles.
1188 1185 return max(supportedoutgoingversions(repo))
1189 1186
1190 1187 def safeversion(repo):
1191 1188 # Finds the smallest version that it's safe to assume clients of the repo
1192 1189 # will support. For example, all hg versions that support generaldelta also
1193 1190 # support changegroup 02.
1194 1191 versions = supportedoutgoingversions(repo)
1195 1192 if 'generaldelta' in repo.requirements:
1196 1193 versions.discard('01')
1197 1194 assert versions
1198 1195 return min(versions)
1199 1196
1200 1197 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1201 1198 assert version in supportedoutgoingversions(repo)
1202 1199
1203 1200 if filematcher is None:
1204 1201 filematcher = matchmod.alwaysmatcher(repo.root, '')
1205 1202
1206 1203 if version == '01' and not filematcher.always():
1207 1204 raise error.ProgrammingError('version 01 changegroups do not support '
1208 1205 'sparse file matchers')
1209 1206
1210 1207 # Requested files could include files not in the local store. So
1211 1208 # filter those out.
1212 1209 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1213 1210 filematcher)
1214 1211
1215 1212 return _packermap[version][0](repo, filematcher=filematcher,
1216 1213 bundlecaps=bundlecaps)
1217 1214
1218 1215 def getunbundler(version, fh, alg, extras=None):
1219 1216 return _packermap[version][1](fh, alg, extras=extras)
1220 1217
1221 1218 def _changegroupinfo(repo, nodes, source):
1222 1219 if repo.ui.verbose or source == 'bundle':
1223 1220 repo.ui.status(_("%d changesets found\n") % len(nodes))
1224 1221 if repo.ui.debugflag:
1225 1222 repo.ui.debug("list of changesets:\n")
1226 1223 for node in nodes:
1227 1224 repo.ui.debug("%s\n" % hex(node))
1228 1225
1229 1226 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1230 1227 bundlecaps=None):
1231 1228 cgstream = makestream(repo, outgoing, version, source,
1232 1229 fastpath=fastpath, bundlecaps=bundlecaps)
1233 1230 return getunbundler(version, util.chunkbuffer(cgstream), None,
1234 1231 {'clcount': len(outgoing.missing) })
1235 1232
1236 1233 def makestream(repo, outgoing, version, source, fastpath=False,
1237 1234 bundlecaps=None, filematcher=None):
1238 1235 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1239 1236 filematcher=filematcher)
1240 1237
1241 1238 repo = repo.unfiltered()
1242 1239 commonrevs = outgoing.common
1243 1240 csets = outgoing.missing
1244 1241 heads = outgoing.missingheads
1245 1242 # We go through the fast path if we get told to, or if all (unfiltered
1246 1243 # heads have been requested (since we then know there all linkrevs will
1247 1244 # be pulled by the client).
1248 1245 heads.sort()
1249 1246 fastpathlinkrev = fastpath or (
1250 1247 repo.filtername is None and heads == sorted(repo.heads()))
1251 1248
1252 1249 repo.hook('preoutgoing', throw=True, source=source)
1253 1250 _changegroupinfo(repo, csets, source)
1254 1251 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1255 1252
1256 1253 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1257 1254 revisions = 0
1258 1255 files = 0
1259 1256 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1260 1257 total=expectedfiles)
1261 1258 for chunkdata in iter(source.filelogheader, {}):
1262 1259 files += 1
1263 1260 f = chunkdata["filename"]
1264 1261 repo.ui.debug("adding %s revisions\n" % f)
1265 1262 progress.increment()
1266 1263 fl = repo.file(f)
1267 1264 o = len(fl)
1268 1265 try:
1269 1266 deltas = source.deltaiter()
1270 1267 if not fl.addgroup(deltas, revmap, trp):
1271 1268 raise error.Abort(_("received file revlog group is empty"))
1272 1269 except error.CensoredBaseError as e:
1273 1270 raise error.Abort(_("received delta base is censored: %s") % e)
1274 1271 revisions += len(fl) - o
1275 1272 if f in needfiles:
1276 1273 needs = needfiles[f]
1277 1274 for new in pycompat.xrange(o, len(fl)):
1278 1275 n = fl.node(new)
1279 1276 if n in needs:
1280 1277 needs.remove(n)
1281 1278 else:
1282 1279 raise error.Abort(
1283 1280 _("received spurious file revlog entry"))
1284 1281 if not needs:
1285 1282 del needfiles[f]
1286 1283 progress.complete()
1287 1284
1288 1285 for f, needs in needfiles.iteritems():
1289 1286 fl = repo.file(f)
1290 1287 for n in needs:
1291 1288 try:
1292 1289 fl.rev(n)
1293 1290 except error.LookupError:
1294 1291 raise error.Abort(
1295 1292 _('missing file data for %s:%s - run hg verify') %
1296 1293 (f, hex(n)))
1297 1294
1298 1295 return revisions, files
1299 1296
1300 1297 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1301 1298 ellipsisroots, visitnodes, depth, source, version):
1302 1299 if version in ('01', '02'):
1303 1300 raise error.Abort(
1304 1301 'ellipsis nodes require at least cg3 on client and server, '
1305 1302 'but negotiated version %s' % version)
1306 1303 # We wrap cg1packer.revchunk, using a side channel to pass
1307 1304 # relevant_nodes into that area. Then if linknode isn't in the
1308 1305 # set, we know we have an ellipsis node and we should defer
1309 1306 # sending that node's data. We override close() to detect
1310 1307 # pending ellipsis nodes and flush them.
1311 1308 packer = getbundler(version, repo, filematcher=match)
1312 1309 # Give the packer the list of nodes which should not be
1313 1310 # ellipsis nodes. We store this rather than the set of nodes
1314 1311 # that should be an ellipsis because for very large histories
1315 1312 # we expect this to be significantly smaller.
1316 1313 packer.full_nodes = relevant_nodes
1317 1314 # Maps ellipsis revs to their roots at the changelog level.
1318 1315 packer.precomputed_ellipsis = ellipsisroots
1319 1316 # Maps CL revs to per-revlog revisions. Cleared in close() at
1320 1317 # the end of each group.
1321 1318 packer.clrev_to_localrev = {}
1322 1319 packer.next_clrev_to_localrev = {}
1323 1320 # Maps changelog nodes to changelog revs. Filled in once
1324 1321 # during changelog stage and then left unmodified.
1325 1322 packer.clnode_to_rev = {}
1326 1323 packer.changelog_done = False
1327 1324 # If true, informs the packer that it is serving shallow content and might
1328 1325 # need to pack file contents not introduced by the changes being packed.
1329 1326 packer.is_shallow = depth is not None
1330 1327
1331 1328 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now