##// END OF EJS Templates
py3: fix int formatting of "incoming changes" log
Yuya Nishihara -
r36755:2a502410 default
parent child Browse files
Show More
@@ -1,1005 +1,1005 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import tempfile
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
32 32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
33 33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
34 34
35 35 # When narrowing is finalized and no longer subject to format changes,
36 36 # we should move this to just "narrow" or similar.
37 37 NARROW_REQUIREMENT = 'narrowhg-experimental'
38 38
39 39 readexactly = util.readexactly
40 40
41 41 def getchunk(stream):
42 42 """return the next chunk from stream as a string"""
43 43 d = readexactly(stream, 4)
44 44 l = struct.unpack(">l", d)[0]
45 45 if l <= 4:
46 46 if l:
47 47 raise error.Abort(_("invalid chunk length %d") % l)
48 48 return ""
49 49 return readexactly(stream, l - 4)
50 50
51 51 def chunkheader(length):
52 52 """return a changegroup chunk header (string)"""
53 53 return struct.pack(">l", length + 4)
54 54
55 55 def closechunk():
56 56 """return a changegroup chunk header (string) for a zero-length chunk"""
57 57 return struct.pack(">l", 0)
58 58
59 59 def writechunks(ui, chunks, filename, vfs=None):
60 60 """Write chunks to a file and return its filename.
61 61
62 62 The stream is assumed to be a bundle file.
63 63 Existing files will not be overwritten.
64 64 If no filename is specified, a temporary file is created.
65 65 """
66 66 fh = None
67 67 cleanup = None
68 68 try:
69 69 if filename:
70 70 if vfs:
71 71 fh = vfs.open(filename, "wb")
72 72 else:
73 73 # Increase default buffer size because default is usually
74 74 # small (4k is common on Linux).
75 75 fh = open(filename, "wb", 131072)
76 76 else:
77 77 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
78 78 fh = os.fdopen(fd, pycompat.sysstr("wb"))
79 79 cleanup = filename
80 80 for c in chunks:
81 81 fh.write(c)
82 82 cleanup = None
83 83 return filename
84 84 finally:
85 85 if fh is not None:
86 86 fh.close()
87 87 if cleanup is not None:
88 88 if filename and vfs:
89 89 vfs.unlink(cleanup)
90 90 else:
91 91 os.unlink(cleanup)
92 92
93 93 class cg1unpacker(object):
94 94 """Unpacker for cg1 changegroup streams.
95 95
96 96 A changegroup unpacker handles the framing of the revision data in
97 97 the wire format. Most consumers will want to use the apply()
98 98 method to add the changes from the changegroup to a repository.
99 99
100 100 If you're forwarding a changegroup unmodified to another consumer,
101 101 use getchunks(), which returns an iterator of changegroup
102 102 chunks. This is mostly useful for cases where you need to know the
103 103 data stream has ended by observing the end of the changegroup.
104 104
105 105 deltachunk() is useful only if you're applying delta data. Most
106 106 consumers should prefer apply() instead.
107 107
108 108 A few other public methods exist. Those are used only for
109 109 bundlerepo and some debug commands - their use is discouraged.
110 110 """
111 111 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
112 112 deltaheadersize = struct.calcsize(deltaheader)
113 113 version = '01'
114 114 _grouplistcount = 1 # One list of files after the manifests
115 115
116 116 def __init__(self, fh, alg, extras=None):
117 117 if alg is None:
118 118 alg = 'UN'
119 119 if alg not in util.compengines.supportedbundletypes:
120 120 raise error.Abort(_('unknown stream compression type: %s')
121 121 % alg)
122 122 if alg == 'BZ':
123 123 alg = '_truncatedBZ'
124 124
125 125 compengine = util.compengines.forbundletype(alg)
126 126 self._stream = compengine.decompressorreader(fh)
127 127 self._type = alg
128 128 self.extras = extras or {}
129 129 self.callback = None
130 130
131 131 # These methods (compressed, read, seek, tell) all appear to only
132 132 # be used by bundlerepo, but it's a little hard to tell.
133 133 def compressed(self):
134 134 return self._type is not None and self._type != 'UN'
135 135 def read(self, l):
136 136 return self._stream.read(l)
137 137 def seek(self, pos):
138 138 return self._stream.seek(pos)
139 139 def tell(self):
140 140 return self._stream.tell()
141 141 def close(self):
142 142 return self._stream.close()
143 143
144 144 def _chunklength(self):
145 145 d = readexactly(self._stream, 4)
146 146 l = struct.unpack(">l", d)[0]
147 147 if l <= 4:
148 148 if l:
149 149 raise error.Abort(_("invalid chunk length %d") % l)
150 150 return 0
151 151 if self.callback:
152 152 self.callback()
153 153 return l - 4
154 154
155 155 def changelogheader(self):
156 156 """v10 does not have a changelog header chunk"""
157 157 return {}
158 158
159 159 def manifestheader(self):
160 160 """v10 does not have a manifest header chunk"""
161 161 return {}
162 162
163 163 def filelogheader(self):
164 164 """return the header of the filelogs chunk, v10 only has the filename"""
165 165 l = self._chunklength()
166 166 if not l:
167 167 return {}
168 168 fname = readexactly(self._stream, l)
169 169 return {'filename': fname}
170 170
171 171 def _deltaheader(self, headertuple, prevnode):
172 172 node, p1, p2, cs = headertuple
173 173 if prevnode is None:
174 174 deltabase = p1
175 175 else:
176 176 deltabase = prevnode
177 177 flags = 0
178 178 return node, p1, p2, deltabase, cs, flags
179 179
180 180 def deltachunk(self, prevnode):
181 181 l = self._chunklength()
182 182 if not l:
183 183 return {}
184 184 headerdata = readexactly(self._stream, self.deltaheadersize)
185 185 header = struct.unpack(self.deltaheader, headerdata)
186 186 delta = readexactly(self._stream, l - self.deltaheadersize)
187 187 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
188 188 return (node, p1, p2, cs, deltabase, delta, flags)
189 189
190 190 def getchunks(self):
191 191 """returns all the chunks contains in the bundle
192 192
193 193 Used when you need to forward the binary stream to a file or another
194 194 network API. To do so, it parse the changegroup data, otherwise it will
195 195 block in case of sshrepo because it don't know the end of the stream.
196 196 """
197 197 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
198 198 # and a list of filelogs. For changegroup 3, we expect 4 parts:
199 199 # changelog, manifestlog, a list of tree manifestlogs, and a list of
200 200 # filelogs.
201 201 #
202 202 # Changelog and manifestlog parts are terminated with empty chunks. The
203 203 # tree and file parts are a list of entry sections. Each entry section
204 204 # is a series of chunks terminating in an empty chunk. The list of these
205 205 # entry sections is terminated in yet another empty chunk, so we know
206 206 # we've reached the end of the tree/file list when we reach an empty
207 207 # chunk that was proceeded by no non-empty chunks.
208 208
209 209 parts = 0
210 210 while parts < 2 + self._grouplistcount:
211 211 noentries = True
212 212 while True:
213 213 chunk = getchunk(self)
214 214 if not chunk:
215 215 # The first two empty chunks represent the end of the
216 216 # changelog and the manifestlog portions. The remaining
217 217 # empty chunks represent either A) the end of individual
218 218 # tree or file entries in the file list, or B) the end of
219 219 # the entire list. It's the end of the entire list if there
220 220 # were no entries (i.e. noentries is True).
221 221 if parts < 2:
222 222 parts += 1
223 223 elif noentries:
224 224 parts += 1
225 225 break
226 226 noentries = False
227 227 yield chunkheader(len(chunk))
228 228 pos = 0
229 229 while pos < len(chunk):
230 230 next = pos + 2**20
231 231 yield chunk[pos:next]
232 232 pos = next
233 233 yield closechunk()
234 234
235 235 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
236 236 # We know that we'll never have more manifests than we had
237 237 # changesets.
238 238 self.callback = prog(_('manifests'), numchanges)
239 239 # no need to check for empty manifest group here:
240 240 # if the result of the merge of 1 and 2 is the same in 3 and 4,
241 241 # no new manifest will be created and the manifest group will
242 242 # be empty during the pull
243 243 self.manifestheader()
244 244 deltas = self.deltaiter()
245 245 repo.manifestlog._revlog.addgroup(deltas, revmap, trp)
246 246 repo.ui.progress(_('manifests'), None)
247 247 self.callback = None
248 248
249 249 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
250 250 expectedtotal=None):
251 251 """Add the changegroup returned by source.read() to this repo.
252 252 srctype is a string like 'push', 'pull', or 'unbundle'. url is
253 253 the URL of the repo where this changegroup is coming from.
254 254
255 255 Return an integer summarizing the change to this repo:
256 256 - nothing changed or no source: 0
257 257 - more heads than before: 1+added heads (2..n)
258 258 - fewer heads than before: -1-removed heads (-2..-n)
259 259 - number of heads stays the same: 1
260 260 """
261 261 repo = repo.unfiltered()
262 262 def csmap(x):
263 263 repo.ui.debug("add changeset %s\n" % short(x))
264 264 return len(cl)
265 265
266 266 def revmap(x):
267 267 return cl.rev(x)
268 268
269 269 changesets = files = revisions = 0
270 270
271 271 try:
272 272 # The transaction may already carry source information. In this
273 273 # case we use the top level data. We overwrite the argument
274 274 # because we need to use the top level value (if they exist)
275 275 # in this function.
276 276 srctype = tr.hookargs.setdefault('source', srctype)
277 277 url = tr.hookargs.setdefault('url', url)
278 278 repo.hook('prechangegroup',
279 279 throw=True, **pycompat.strkwargs(tr.hookargs))
280 280
281 281 # write changelog data to temp files so concurrent readers
282 282 # will not see an inconsistent view
283 283 cl = repo.changelog
284 284 cl.delayupdate(tr)
285 285 oldheads = set(cl.heads())
286 286
287 287 trp = weakref.proxy(tr)
288 288 # pull off the changeset group
289 289 repo.ui.status(_("adding changesets\n"))
290 290 clstart = len(cl)
291 291 class prog(object):
292 292 def __init__(self, step, total):
293 293 self._step = step
294 294 self._total = total
295 295 self._count = 1
296 296 def __call__(self):
297 297 repo.ui.progress(self._step, self._count, unit=_('chunks'),
298 298 total=self._total)
299 299 self._count += 1
300 300 self.callback = prog(_('changesets'), expectedtotal)
301 301
302 302 efiles = set()
303 303 def onchangelog(cl, node):
304 304 efiles.update(cl.readfiles(node))
305 305
306 306 self.changelogheader()
307 307 deltas = self.deltaiter()
308 308 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
309 309 efiles = len(efiles)
310 310
311 311 if not cgnodes:
312 312 repo.ui.develwarn('applied empty changegroup',
313 313 config='warn-empty-changegroup')
314 314 clend = len(cl)
315 315 changesets = clend - clstart
316 316 repo.ui.progress(_('changesets'), None)
317 317 self.callback = None
318 318
319 319 # pull off the manifest group
320 320 repo.ui.status(_("adding manifests\n"))
321 321 self._unpackmanifests(repo, revmap, trp, prog, changesets)
322 322
323 323 needfiles = {}
324 324 if repo.ui.configbool('server', 'validate'):
325 325 cl = repo.changelog
326 326 ml = repo.manifestlog
327 327 # validate incoming csets have their manifests
328 328 for cset in xrange(clstart, clend):
329 329 mfnode = cl.changelogrevision(cset).manifest
330 330 mfest = ml[mfnode].readdelta()
331 331 # store file cgnodes we must see
332 332 for f, n in mfest.iteritems():
333 333 needfiles.setdefault(f, set()).add(n)
334 334
335 335 # process the files
336 336 repo.ui.status(_("adding file changes\n"))
337 337 newrevs, newfiles = _addchangegroupfiles(
338 338 repo, self, revmap, trp, efiles, needfiles)
339 339 revisions += newrevs
340 340 files += newfiles
341 341
342 342 deltaheads = 0
343 343 if oldheads:
344 344 heads = cl.heads()
345 345 deltaheads = len(heads) - len(oldheads)
346 346 for h in heads:
347 347 if h not in oldheads and repo[h].closesbranch():
348 348 deltaheads -= 1
349 349 htext = ""
350 350 if deltaheads:
351 351 htext = _(" (%+d heads)") % deltaheads
352 352
353 353 repo.ui.status(_("added %d changesets"
354 354 " with %d changes to %d files%s\n")
355 355 % (changesets, revisions, files, htext))
356 356 repo.invalidatevolatilesets()
357 357
358 358 if changesets > 0:
359 359 if 'node' not in tr.hookargs:
360 360 tr.hookargs['node'] = hex(cl.node(clstart))
361 361 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
362 362 hookargs = dict(tr.hookargs)
363 363 else:
364 364 hookargs = dict(tr.hookargs)
365 365 hookargs['node'] = hex(cl.node(clstart))
366 366 hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 repo.hook('pretxnchangegroup',
368 368 throw=True, **pycompat.strkwargs(hookargs))
369 369
370 370 added = [cl.node(r) for r in xrange(clstart, clend)]
371 371 phaseall = None
372 372 if srctype in ('push', 'serve'):
373 373 # Old servers can not push the boundary themselves.
374 374 # New servers won't push the boundary if changeset already
375 375 # exists locally as secret
376 376 #
377 377 # We should not use added here but the list of all change in
378 378 # the bundle
379 379 if repo.publishing():
380 380 targetphase = phaseall = phases.public
381 381 else:
382 382 # closer target phase computation
383 383
384 384 # Those changesets have been pushed from the
385 385 # outside, their phases are going to be pushed
386 386 # alongside. Therefor `targetphase` is
387 387 # ignored.
388 388 targetphase = phaseall = phases.draft
389 389 if added:
390 390 phases.registernew(repo, tr, targetphase, added)
391 391 if phaseall is not None:
392 392 phases.advanceboundary(repo, tr, phaseall, cgnodes)
393 393
394 394 if changesets > 0:
395 395
396 396 def runhooks():
397 397 # These hooks run when the lock releases, not when the
398 398 # transaction closes. So it's possible for the changelog
399 399 # to have changed since we last saw it.
400 400 if clstart >= len(repo):
401 401 return
402 402
403 403 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
404 404
405 405 for n in added:
406 406 args = hookargs.copy()
407 407 args['node'] = hex(n)
408 408 del args['node_last']
409 409 repo.hook("incoming", **pycompat.strkwargs(args))
410 410
411 411 newheads = [h for h in repo.heads()
412 412 if h not in oldheads]
413 413 repo.ui.log("incoming",
414 "%s incoming changes - new heads: %s\n",
414 "%d incoming changes - new heads: %s\n",
415 415 len(added),
416 416 ', '.join([hex(c[:6]) for c in newheads]))
417 417
418 418 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
419 419 lambda tr: repo._afterlock(runhooks))
420 420 finally:
421 421 repo.ui.flush()
422 422 # never return 0 here:
423 423 if deltaheads < 0:
424 424 ret = deltaheads - 1
425 425 else:
426 426 ret = deltaheads + 1
427 427 return ret
428 428
429 429 def deltaiter(self):
430 430 """
431 431 returns an iterator of the deltas in this changegroup
432 432
433 433 Useful for passing to the underlying storage system to be stored.
434 434 """
435 435 chain = None
436 436 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
437 437 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
438 438 yield chunkdata
439 439 chain = chunkdata[0]
440 440
441 441 class cg2unpacker(cg1unpacker):
442 442 """Unpacker for cg2 streams.
443 443
444 444 cg2 streams add support for generaldelta, so the delta header
445 445 format is slightly different. All other features about the data
446 446 remain the same.
447 447 """
448 448 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
449 449 deltaheadersize = struct.calcsize(deltaheader)
450 450 version = '02'
451 451
452 452 def _deltaheader(self, headertuple, prevnode):
453 453 node, p1, p2, deltabase, cs = headertuple
454 454 flags = 0
455 455 return node, p1, p2, deltabase, cs, flags
456 456
457 457 class cg3unpacker(cg2unpacker):
458 458 """Unpacker for cg3 streams.
459 459
460 460 cg3 streams add support for exchanging treemanifests and revlog
461 461 flags. It adds the revlog flags to the delta header and an empty chunk
462 462 separating manifests and files.
463 463 """
464 464 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
465 465 deltaheadersize = struct.calcsize(deltaheader)
466 466 version = '03'
467 467 _grouplistcount = 2 # One list of manifests and one list of files
468 468
469 469 def _deltaheader(self, headertuple, prevnode):
470 470 node, p1, p2, deltabase, cs, flags = headertuple
471 471 return node, p1, p2, deltabase, cs, flags
472 472
473 473 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
474 474 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
475 475 numchanges)
476 476 for chunkdata in iter(self.filelogheader, {}):
477 477 # If we get here, there are directory manifests in the changegroup
478 478 d = chunkdata["filename"]
479 479 repo.ui.debug("adding %s revisions\n" % d)
480 480 dirlog = repo.manifestlog._revlog.dirlog(d)
481 481 deltas = self.deltaiter()
482 482 if not dirlog.addgroup(deltas, revmap, trp):
483 483 raise error.Abort(_("received dir revlog group is empty"))
484 484
485 485 class headerlessfixup(object):
486 486 def __init__(self, fh, h):
487 487 self._h = h
488 488 self._fh = fh
489 489 def read(self, n):
490 490 if self._h:
491 491 d, self._h = self._h[:n], self._h[n:]
492 492 if len(d) < n:
493 493 d += readexactly(self._fh, n - len(d))
494 494 return d
495 495 return readexactly(self._fh, n)
496 496
497 497 class cg1packer(object):
498 498 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
499 499 version = '01'
500 500 def __init__(self, repo, bundlecaps=None):
501 501 """Given a source repo, construct a bundler.
502 502
503 503 bundlecaps is optional and can be used to specify the set of
504 504 capabilities which can be used to build the bundle. While bundlecaps is
505 505 unused in core Mercurial, extensions rely on this feature to communicate
506 506 capabilities to customize the changegroup packer.
507 507 """
508 508 # Set of capabilities we can use to build the bundle.
509 509 if bundlecaps is None:
510 510 bundlecaps = set()
511 511 self._bundlecaps = bundlecaps
512 512 # experimental config: bundle.reorder
513 513 reorder = repo.ui.config('bundle', 'reorder')
514 514 if reorder == 'auto':
515 515 reorder = None
516 516 else:
517 517 reorder = util.parsebool(reorder)
518 518 self._repo = repo
519 519 self._reorder = reorder
520 520 self._progress = repo.ui.progress
521 521 if self._repo.ui.verbose and not self._repo.ui.debugflag:
522 522 self._verbosenote = self._repo.ui.note
523 523 else:
524 524 self._verbosenote = lambda s: None
525 525
526 526 def close(self):
527 527 return closechunk()
528 528
529 529 def fileheader(self, fname):
530 530 return chunkheader(len(fname)) + fname
531 531
532 532 # Extracted both for clarity and for overriding in extensions.
533 533 def _sortgroup(self, revlog, nodelist, lookup):
534 534 """Sort nodes for change group and turn them into revnums."""
535 535 # for generaldelta revlogs, we linearize the revs; this will both be
536 536 # much quicker and generate a much smaller bundle
537 537 if (revlog._generaldelta and self._reorder is None) or self._reorder:
538 538 dag = dagutil.revlogdag(revlog)
539 539 return dag.linearize(set(revlog.rev(n) for n in nodelist))
540 540 else:
541 541 return sorted([revlog.rev(n) for n in nodelist])
542 542
543 543 def group(self, nodelist, revlog, lookup, units=None):
544 544 """Calculate a delta group, yielding a sequence of changegroup chunks
545 545 (strings).
546 546
547 547 Given a list of changeset revs, return a set of deltas and
548 548 metadata corresponding to nodes. The first delta is
549 549 first parent(nodelist[0]) -> nodelist[0], the receiver is
550 550 guaranteed to have this parent as it has all history before
551 551 these changesets. In the case firstparent is nullrev the
552 552 changegroup starts with a full revision.
553 553
554 554 If units is not None, progress detail will be generated, units specifies
555 555 the type of revlog that is touched (changelog, manifest, etc.).
556 556 """
557 557 # if we don't have any revisions touched by these changesets, bail
558 558 if len(nodelist) == 0:
559 559 yield self.close()
560 560 return
561 561
562 562 revs = self._sortgroup(revlog, nodelist, lookup)
563 563
564 564 # add the parent of the first rev
565 565 p = revlog.parentrevs(revs[0])[0]
566 566 revs.insert(0, p)
567 567
568 568 # build deltas
569 569 total = len(revs) - 1
570 570 msgbundling = _('bundling')
571 571 for r in xrange(len(revs) - 1):
572 572 if units is not None:
573 573 self._progress(msgbundling, r + 1, unit=units, total=total)
574 574 prev, curr = revs[r], revs[r + 1]
575 575 linknode = lookup(revlog.node(curr))
576 576 for c in self.revchunk(revlog, curr, prev, linknode):
577 577 yield c
578 578
579 579 if units is not None:
580 580 self._progress(msgbundling, None)
581 581 yield self.close()
582 582
583 583 # filter any nodes that claim to be part of the known set
584 584 def prune(self, revlog, missing, commonrevs):
585 585 rr, rl = revlog.rev, revlog.linkrev
586 586 return [n for n in missing if rl(rr(n)) not in commonrevs]
587 587
588 588 def _packmanifests(self, dir, mfnodes, lookuplinknode):
589 589 """Pack flat manifests into a changegroup stream."""
590 590 assert not dir
591 591 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
592 592 lookuplinknode, units=_('manifests')):
593 593 yield chunk
594 594
595 595 def _manifestsdone(self):
596 596 return ''
597 597
598 598 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
599 599 '''yield a sequence of changegroup chunks (strings)'''
600 600 repo = self._repo
601 601 cl = repo.changelog
602 602
603 603 clrevorder = {}
604 604 mfs = {} # needed manifests
605 605 fnodes = {} # needed file nodes
606 606 changedfiles = set()
607 607
608 608 # Callback for the changelog, used to collect changed files and manifest
609 609 # nodes.
610 610 # Returns the linkrev node (identity in the changelog case).
611 611 def lookupcl(x):
612 612 c = cl.read(x)
613 613 clrevorder[x] = len(clrevorder)
614 614 n = c[0]
615 615 # record the first changeset introducing this manifest version
616 616 mfs.setdefault(n, x)
617 617 # Record a complete list of potentially-changed files in
618 618 # this manifest.
619 619 changedfiles.update(c[3])
620 620 return x
621 621
622 622 self._verbosenote(_('uncompressed size of bundle content:\n'))
623 623 size = 0
624 624 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
625 625 size += len(chunk)
626 626 yield chunk
627 627 self._verbosenote(_('%8.i (changelog)\n') % size)
628 628
629 629 # We need to make sure that the linkrev in the changegroup refers to
630 630 # the first changeset that introduced the manifest or file revision.
631 631 # The fastpath is usually safer than the slowpath, because the filelogs
632 632 # are walked in revlog order.
633 633 #
634 634 # When taking the slowpath with reorder=None and the manifest revlog
635 635 # uses generaldelta, the manifest may be walked in the "wrong" order.
636 636 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
637 637 # cc0ff93d0c0c).
638 638 #
639 639 # When taking the fastpath, we are only vulnerable to reordering
640 640 # of the changelog itself. The changelog never uses generaldelta, so
641 641 # it is only reordered when reorder=True. To handle this case, we
642 642 # simply take the slowpath, which already has the 'clrevorder' logic.
643 643 # This was also fixed in cc0ff93d0c0c.
644 644 fastpathlinkrev = fastpathlinkrev and not self._reorder
645 645 # Treemanifests don't work correctly with fastpathlinkrev
646 646 # either, because we don't discover which directory nodes to
647 647 # send along with files. This could probably be fixed.
648 648 fastpathlinkrev = fastpathlinkrev and (
649 649 'treemanifest' not in repo.requirements)
650 650
651 651 for chunk in self.generatemanifests(commonrevs, clrevorder,
652 652 fastpathlinkrev, mfs, fnodes, source):
653 653 yield chunk
654 654 mfs.clear()
655 655 clrevs = set(cl.rev(x) for x in clnodes)
656 656
657 657 if not fastpathlinkrev:
658 658 def linknodes(unused, fname):
659 659 return fnodes.get(fname, {})
660 660 else:
661 661 cln = cl.node
662 662 def linknodes(filerevlog, fname):
663 663 llr = filerevlog.linkrev
664 664 fln = filerevlog.node
665 665 revs = ((r, llr(r)) for r in filerevlog)
666 666 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
667 667
668 668 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
669 669 source):
670 670 yield chunk
671 671
672 672 yield self.close()
673 673
674 674 if clnodes:
675 675 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
676 676
677 677 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
678 678 fnodes, source):
679 679 """Returns an iterator of changegroup chunks containing manifests.
680 680
681 681 `source` is unused here, but is used by extensions like remotefilelog to
682 682 change what is sent based in pulls vs pushes, etc.
683 683 """
684 684 repo = self._repo
685 685 mfl = repo.manifestlog
686 686 dirlog = mfl._revlog.dirlog
687 687 tmfnodes = {'': mfs}
688 688
689 689 # Callback for the manifest, used to collect linkrevs for filelog
690 690 # revisions.
691 691 # Returns the linkrev node (collected in lookupcl).
692 692 def makelookupmflinknode(dir, nodes):
693 693 if fastpathlinkrev:
694 694 assert not dir
695 695 return mfs.__getitem__
696 696
697 697 def lookupmflinknode(x):
698 698 """Callback for looking up the linknode for manifests.
699 699
700 700 Returns the linkrev node for the specified manifest.
701 701
702 702 SIDE EFFECT:
703 703
704 704 1) fclnodes gets populated with the list of relevant
705 705 file nodes if we're not using fastpathlinkrev
706 706 2) When treemanifests are in use, collects treemanifest nodes
707 707 to send
708 708
709 709 Note that this means manifests must be completely sent to
710 710 the client before you can trust the list of files and
711 711 treemanifests to send.
712 712 """
713 713 clnode = nodes[x]
714 714 mdata = mfl.get(dir, x).readfast(shallow=True)
715 715 for p, n, fl in mdata.iterentries():
716 716 if fl == 't': # subdirectory manifest
717 717 subdir = dir + p + '/'
718 718 tmfclnodes = tmfnodes.setdefault(subdir, {})
719 719 tmfclnode = tmfclnodes.setdefault(n, clnode)
720 720 if clrevorder[clnode] < clrevorder[tmfclnode]:
721 721 tmfclnodes[n] = clnode
722 722 else:
723 723 f = dir + p
724 724 fclnodes = fnodes.setdefault(f, {})
725 725 fclnode = fclnodes.setdefault(n, clnode)
726 726 if clrevorder[clnode] < clrevorder[fclnode]:
727 727 fclnodes[n] = clnode
728 728 return clnode
729 729 return lookupmflinknode
730 730
731 731 size = 0
732 732 while tmfnodes:
733 733 dir, nodes = tmfnodes.popitem()
734 734 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
735 735 if not dir or prunednodes:
736 736 for x in self._packmanifests(dir, prunednodes,
737 737 makelookupmflinknode(dir, nodes)):
738 738 size += len(x)
739 739 yield x
740 740 self._verbosenote(_('%8.i (manifests)\n') % size)
741 741 yield self._manifestsdone()
742 742
743 743 # The 'source' parameter is useful for extensions
744 744 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
745 745 repo = self._repo
746 746 progress = self._progress
747 747 msgbundling = _('bundling')
748 748
749 749 total = len(changedfiles)
750 750 # for progress output
751 751 msgfiles = _('files')
752 752 for i, fname in enumerate(sorted(changedfiles)):
753 753 filerevlog = repo.file(fname)
754 754 if not filerevlog:
755 755 raise error.Abort(_("empty or missing revlog for %s") % fname)
756 756
757 757 linkrevnodes = linknodes(filerevlog, fname)
758 758 # Lookup for filenodes, we collected the linkrev nodes above in the
759 759 # fastpath case and with lookupmf in the slowpath case.
760 760 def lookupfilelog(x):
761 761 return linkrevnodes[x]
762 762
763 763 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
764 764 if filenodes:
765 765 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
766 766 total=total)
767 767 h = self.fileheader(fname)
768 768 size = len(h)
769 769 yield h
770 770 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
771 771 size += len(chunk)
772 772 yield chunk
773 773 self._verbosenote(_('%8.i %s\n') % (size, fname))
774 774 progress(msgbundling, None)
775 775
776 776 def deltaparent(self, revlog, rev, p1, p2, prev):
777 777 return prev
778 778
779 779 def revchunk(self, revlog, rev, prev, linknode):
780 780 node = revlog.node(rev)
781 781 p1, p2 = revlog.parentrevs(rev)
782 782 base = self.deltaparent(revlog, rev, p1, p2, prev)
783 783
784 784 prefix = ''
785 785 if revlog.iscensored(base) or revlog.iscensored(rev):
786 786 try:
787 787 delta = revlog.revision(node, raw=True)
788 788 except error.CensoredNodeError as e:
789 789 delta = e.tombstone
790 790 if base == nullrev:
791 791 prefix = mdiff.trivialdiffheader(len(delta))
792 792 else:
793 793 baselen = revlog.rawsize(base)
794 794 prefix = mdiff.replacediffheader(baselen, len(delta))
795 795 elif base == nullrev:
796 796 delta = revlog.revision(node, raw=True)
797 797 prefix = mdiff.trivialdiffheader(len(delta))
798 798 else:
799 799 delta = revlog.revdiff(base, rev)
800 800 p1n, p2n = revlog.parents(node)
801 801 basenode = revlog.node(base)
802 802 flags = revlog.flags(rev)
803 803 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
804 804 meta += prefix
805 805 l = len(meta) + len(delta)
806 806 yield chunkheader(l)
807 807 yield meta
808 808 yield delta
809 809 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
810 810 # do nothing with basenode, it is implicitly the previous one in HG10
811 811 # do nothing with flags, it is implicitly 0 for cg1 and cg2
812 812 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
813 813
814 814 class cg2packer(cg1packer):
815 815 version = '02'
816 816 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
817 817
818 818 def __init__(self, repo, bundlecaps=None):
819 819 super(cg2packer, self).__init__(repo, bundlecaps)
820 820 if self._reorder is None:
821 821 # Since generaldelta is directly supported by cg2, reordering
822 822 # generally doesn't help, so we disable it by default (treating
823 823 # bundle.reorder=auto just like bundle.reorder=False).
824 824 self._reorder = False
825 825
826 826 def deltaparent(self, revlog, rev, p1, p2, prev):
827 827 dp = revlog.deltaparent(rev)
828 828 if dp == nullrev and revlog.storedeltachains:
829 829 # Avoid sending full revisions when delta parent is null. Pick prev
830 830 # in that case. It's tempting to pick p1 in this case, as p1 will
831 831 # be smaller in the common case. However, computing a delta against
832 832 # p1 may require resolving the raw text of p1, which could be
833 833 # expensive. The revlog caches should have prev cached, meaning
834 834 # less CPU for changegroup generation. There is likely room to add
835 835 # a flag and/or config option to control this behavior.
836 836 return prev
837 837 elif dp == nullrev:
838 838 # revlog is configured to use full snapshot for a reason,
839 839 # stick to full snapshot.
840 840 return nullrev
841 841 elif dp not in (p1, p2, prev):
842 842 # Pick prev when we can't be sure remote has the base revision.
843 843 return prev
844 844 else:
845 845 return dp
846 846
847 847 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
848 848 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
849 849 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
850 850
851 851 class cg3packer(cg2packer):
852 852 version = '03'
853 853 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
854 854
855 855 def _packmanifests(self, dir, mfnodes, lookuplinknode):
856 856 if dir:
857 857 yield self.fileheader(dir)
858 858
859 859 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
860 860 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
861 861 units=_('manifests')):
862 862 yield chunk
863 863
864 864 def _manifestsdone(self):
865 865 return self.close()
866 866
867 867 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
868 868 return struct.pack(
869 869 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
870 870
871 871 _packermap = {'01': (cg1packer, cg1unpacker),
872 872 # cg2 adds support for exchanging generaldelta
873 873 '02': (cg2packer, cg2unpacker),
874 874 # cg3 adds support for exchanging revlog flags and treemanifests
875 875 '03': (cg3packer, cg3unpacker),
876 876 }
877 877
878 878 def allsupportedversions(repo):
879 879 versions = set(_packermap.keys())
880 880 if not (repo.ui.configbool('experimental', 'changegroup3') or
881 881 repo.ui.configbool('experimental', 'treemanifest') or
882 882 'treemanifest' in repo.requirements):
883 883 versions.discard('03')
884 884 return versions
885 885
886 886 # Changegroup versions that can be applied to the repo
887 887 def supportedincomingversions(repo):
888 888 return allsupportedversions(repo)
889 889
890 890 # Changegroup versions that can be created from the repo
891 891 def supportedoutgoingversions(repo):
892 892 versions = allsupportedversions(repo)
893 893 if 'treemanifest' in repo.requirements:
894 894 # Versions 01 and 02 support only flat manifests and it's just too
895 895 # expensive to convert between the flat manifest and tree manifest on
896 896 # the fly. Since tree manifests are hashed differently, all of history
897 897 # would have to be converted. Instead, we simply don't even pretend to
898 898 # support versions 01 and 02.
899 899 versions.discard('01')
900 900 versions.discard('02')
901 901 if NARROW_REQUIREMENT in repo.requirements:
902 902 # Versions 01 and 02 don't support revlog flags, and we need to
903 903 # support that for stripping and unbundling to work.
904 904 versions.discard('01')
905 905 versions.discard('02')
906 906 return versions
907 907
908 908 def localversion(repo):
909 909 # Finds the best version to use for bundles that are meant to be used
910 910 # locally, such as those from strip and shelve, and temporary bundles.
911 911 return max(supportedoutgoingversions(repo))
912 912
913 913 def safeversion(repo):
914 914 # Finds the smallest version that it's safe to assume clients of the repo
915 915 # will support. For example, all hg versions that support generaldelta also
916 916 # support changegroup 02.
917 917 versions = supportedoutgoingversions(repo)
918 918 if 'generaldelta' in repo.requirements:
919 919 versions.discard('01')
920 920 assert versions
921 921 return min(versions)
922 922
923 923 def getbundler(version, repo, bundlecaps=None):
924 924 assert version in supportedoutgoingversions(repo)
925 925 return _packermap[version][0](repo, bundlecaps)
926 926
927 927 def getunbundler(version, fh, alg, extras=None):
928 928 return _packermap[version][1](fh, alg, extras=extras)
929 929
930 930 def _changegroupinfo(repo, nodes, source):
931 931 if repo.ui.verbose or source == 'bundle':
932 932 repo.ui.status(_("%d changesets found\n") % len(nodes))
933 933 if repo.ui.debugflag:
934 934 repo.ui.debug("list of changesets:\n")
935 935 for node in nodes:
936 936 repo.ui.debug("%s\n" % hex(node))
937 937
938 938 def makechangegroup(repo, outgoing, version, source, fastpath=False,
939 939 bundlecaps=None):
940 940 cgstream = makestream(repo, outgoing, version, source,
941 941 fastpath=fastpath, bundlecaps=bundlecaps)
942 942 return getunbundler(version, util.chunkbuffer(cgstream), None,
943 943 {'clcount': len(outgoing.missing) })
944 944
945 945 def makestream(repo, outgoing, version, source, fastpath=False,
946 946 bundlecaps=None):
947 947 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
948 948
949 949 repo = repo.unfiltered()
950 950 commonrevs = outgoing.common
951 951 csets = outgoing.missing
952 952 heads = outgoing.missingheads
953 953 # We go through the fast path if we get told to, or if all (unfiltered
954 954 # heads have been requested (since we then know there all linkrevs will
955 955 # be pulled by the client).
956 956 heads.sort()
957 957 fastpathlinkrev = fastpath or (
958 958 repo.filtername is None and heads == sorted(repo.heads()))
959 959
960 960 repo.hook('preoutgoing', throw=True, source=source)
961 961 _changegroupinfo(repo, csets, source)
962 962 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
963 963
964 964 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
965 965 revisions = 0
966 966 files = 0
967 967 for chunkdata in iter(source.filelogheader, {}):
968 968 files += 1
969 969 f = chunkdata["filename"]
970 970 repo.ui.debug("adding %s revisions\n" % f)
971 971 repo.ui.progress(_('files'), files, unit=_('files'),
972 972 total=expectedfiles)
973 973 fl = repo.file(f)
974 974 o = len(fl)
975 975 try:
976 976 deltas = source.deltaiter()
977 977 if not fl.addgroup(deltas, revmap, trp):
978 978 raise error.Abort(_("received file revlog group is empty"))
979 979 except error.CensoredBaseError as e:
980 980 raise error.Abort(_("received delta base is censored: %s") % e)
981 981 revisions += len(fl) - o
982 982 if f in needfiles:
983 983 needs = needfiles[f]
984 984 for new in xrange(o, len(fl)):
985 985 n = fl.node(new)
986 986 if n in needs:
987 987 needs.remove(n)
988 988 else:
989 989 raise error.Abort(
990 990 _("received spurious file revlog entry"))
991 991 if not needs:
992 992 del needfiles[f]
993 993 repo.ui.progress(_('files'), None)
994 994
995 995 for f, needs in needfiles.iteritems():
996 996 fl = repo.file(f)
997 997 for n in needs:
998 998 try:
999 999 fl.rev(n)
1000 1000 except error.LookupError:
1001 1001 raise error.Abort(
1002 1002 _('missing file data for %s:%s - run hg verify') %
1003 1003 (f, hex(n)))
1004 1004
1005 1005 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now