##// END OF EJS Templates
changegroup: don't add a second trailing '/' in dir name...
Martin von Zweigbergk -
r27691:b0d23a55 default
parent child Browse files
Show More
@@ -1,1095 +1,1095 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import tempfile
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullid,
19 19 nullrev,
20 20 short,
21 21 )
22 22
23 23 from . import (
24 24 branchmap,
25 25 dagutil,
26 26 discovery,
27 27 error,
28 28 mdiff,
29 29 phases,
30 30 util,
31 31 )
32 32
33 33 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
34 34 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
35 35 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
36 36
37 37 def readexactly(stream, n):
38 38 '''read n bytes from stream.read and abort if less was available'''
39 39 s = stream.read(n)
40 40 if len(s) < n:
41 41 raise error.Abort(_("stream ended unexpectedly"
42 42 " (got %d bytes, expected %d)")
43 43 % (len(s), n))
44 44 return s
45 45
46 46 def getchunk(stream):
47 47 """return the next chunk from stream as a string"""
48 48 d = readexactly(stream, 4)
49 49 l = struct.unpack(">l", d)[0]
50 50 if l <= 4:
51 51 if l:
52 52 raise error.Abort(_("invalid chunk length %d") % l)
53 53 return ""
54 54 return readexactly(stream, l - 4)
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(">l", length + 4)
59 59
60 60 def closechunk():
61 61 """return a changegroup chunk header (string) for a zero-length chunk"""
62 62 return struct.pack(">l", 0)
63 63
64 64 def combineresults(results):
65 65 """logic to combine 0 or more addchangegroup results into one"""
66 66 changedheads = 0
67 67 result = 1
68 68 for ret in results:
69 69 # If any changegroup result is 0, return 0
70 70 if ret == 0:
71 71 result = 0
72 72 break
73 73 if ret < -1:
74 74 changedheads += ret + 1
75 75 elif ret > 1:
76 76 changedheads += ret - 1
77 77 if changedheads > 0:
78 78 result = 1 + changedheads
79 79 elif changedheads < 0:
80 80 result = -1 + changedheads
81 81 return result
82 82
83 83 bundletypes = {
84 84 "": ("", None), # only when using unbundle on ssh and old http servers
85 85 # since the unification ssh accepts a header but there
86 86 # is no capability signaling it.
87 87 "HG20": (), # special-cased below
88 88 "HG10UN": ("HG10UN", None),
89 89 "HG10BZ": ("HG10", 'BZ'),
90 90 "HG10GZ": ("HG10GZ", 'GZ'),
91 91 }
92 92
93 93 # hgweb uses this list to communicate its preferred type
94 94 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
95 95
96 96 def writechunks(ui, chunks, filename, vfs=None):
97 97 """Write chunks to a file and return its filename.
98 98
99 99 The stream is assumed to be a bundle file.
100 100 Existing files will not be overwritten.
101 101 If no filename is specified, a temporary file is created.
102 102 """
103 103 fh = None
104 104 cleanup = None
105 105 try:
106 106 if filename:
107 107 if vfs:
108 108 fh = vfs.open(filename, "wb")
109 109 else:
110 110 fh = open(filename, "wb")
111 111 else:
112 112 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
113 113 fh = os.fdopen(fd, "wb")
114 114 cleanup = filename
115 115 for c in chunks:
116 116 fh.write(c)
117 117 cleanup = None
118 118 return filename
119 119 finally:
120 120 if fh is not None:
121 121 fh.close()
122 122 if cleanup is not None:
123 123 if filename and vfs:
124 124 vfs.unlink(cleanup)
125 125 else:
126 126 os.unlink(cleanup)
127 127
128 128 def writebundle(ui, cg, filename, bundletype, vfs=None, compression=None):
129 129 """Write a bundle file and return its filename.
130 130
131 131 Existing files will not be overwritten.
132 132 If no filename is specified, a temporary file is created.
133 133 bz2 compression can be turned off.
134 134 The bundle file will be deleted in case of errors.
135 135 """
136 136
137 137 if bundletype == "HG20":
138 138 from . import bundle2
139 139 bundle = bundle2.bundle20(ui)
140 140 bundle.setcompression(compression)
141 141 part = bundle.newpart('changegroup', data=cg.getchunks())
142 142 part.addparam('version', cg.version)
143 143 chunkiter = bundle.getchunks()
144 144 else:
145 145 # compression argument is only for the bundle2 case
146 146 assert compression is None
147 147 if cg.version != '01':
148 148 raise error.Abort(_('old bundle types only supports v1 '
149 149 'changegroups'))
150 150 header, comp = bundletypes[bundletype]
151 151 if comp not in util.compressors:
152 152 raise error.Abort(_('unknown stream compression type: %s')
153 153 % comp)
154 154 z = util.compressors[comp]()
155 155 subchunkiter = cg.getchunks()
156 156 def chunkiter():
157 157 yield header
158 158 for chunk in subchunkiter:
159 159 yield z.compress(chunk)
160 160 yield z.flush()
161 161 chunkiter = chunkiter()
162 162
163 163 # parse the changegroup data, otherwise we will block
164 164 # in case of sshrepo because we don't know the end of the stream
165 165
166 166 # an empty chunkgroup is the end of the changegroup
167 167 # a changegroup has at least 2 chunkgroups (changelog and manifest).
168 168 # after that, an empty chunkgroup is the end of the changegroup
169 169 return writechunks(ui, chunkiter, filename, vfs=vfs)
170 170
171 171 class cg1unpacker(object):
172 172 """Unpacker for cg1 changegroup streams.
173 173
174 174 A changegroup unpacker handles the framing of the revision data in
175 175 the wire format. Most consumers will want to use the apply()
176 176 method to add the changes from the changegroup to a repository.
177 177
178 178 If you're forwarding a changegroup unmodified to another consumer,
179 179 use getchunks(), which returns an iterator of changegroup
180 180 chunks. This is mostly useful for cases where you need to know the
181 181 data stream has ended by observing the end of the changegroup.
182 182
183 183 deltachunk() is useful only if you're applying delta data. Most
184 184 consumers should prefer apply() instead.
185 185
186 186 A few other public methods exist. Those are used only for
187 187 bundlerepo and some debug commands - their use is discouraged.
188 188 """
189 189 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
190 190 deltaheadersize = struct.calcsize(deltaheader)
191 191 version = '01'
192 192 def __init__(self, fh, alg):
193 193 if alg == 'UN':
194 194 alg = None # get more modern without breaking too much
195 195 if not alg in util.decompressors:
196 196 raise error.Abort(_('unknown stream compression type: %s')
197 197 % alg)
198 198 if alg == 'BZ':
199 199 alg = '_truncatedBZ'
200 200 self._stream = util.decompressors[alg](fh)
201 201 self._type = alg
202 202 self.callback = None
203 203
204 204 # These methods (compressed, read, seek, tell) all appear to only
205 205 # be used by bundlerepo, but it's a little hard to tell.
206 206 def compressed(self):
207 207 return self._type is not None
208 208 def read(self, l):
209 209 return self._stream.read(l)
210 210 def seek(self, pos):
211 211 return self._stream.seek(pos)
212 212 def tell(self):
213 213 return self._stream.tell()
214 214 def close(self):
215 215 return self._stream.close()
216 216
217 217 def _chunklength(self):
218 218 d = readexactly(self._stream, 4)
219 219 l = struct.unpack(">l", d)[0]
220 220 if l <= 4:
221 221 if l:
222 222 raise error.Abort(_("invalid chunk length %d") % l)
223 223 return 0
224 224 if self.callback:
225 225 self.callback()
226 226 return l - 4
227 227
228 228 def changelogheader(self):
229 229 """v10 does not have a changelog header chunk"""
230 230 return {}
231 231
232 232 def manifestheader(self):
233 233 """v10 does not have a manifest header chunk"""
234 234 return {}
235 235
236 236 def filelogheader(self):
237 237 """return the header of the filelogs chunk, v10 only has the filename"""
238 238 l = self._chunklength()
239 239 if not l:
240 240 return {}
241 241 fname = readexactly(self._stream, l)
242 242 return {'filename': fname}
243 243
244 244 def _deltaheader(self, headertuple, prevnode):
245 245 node, p1, p2, cs = headertuple
246 246 if prevnode is None:
247 247 deltabase = p1
248 248 else:
249 249 deltabase = prevnode
250 250 flags = 0
251 251 return node, p1, p2, deltabase, cs, flags
252 252
253 253 def deltachunk(self, prevnode):
254 254 l = self._chunklength()
255 255 if not l:
256 256 return {}
257 257 headerdata = readexactly(self._stream, self.deltaheadersize)
258 258 header = struct.unpack(self.deltaheader, headerdata)
259 259 delta = readexactly(self._stream, l - self.deltaheadersize)
260 260 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
261 261 return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
262 262 'deltabase': deltabase, 'delta': delta, 'flags': flags}
263 263
264 264 def getchunks(self):
265 265 """returns all the chunks contains in the bundle
266 266
267 267 Used when you need to forward the binary stream to a file or another
268 268 network API. To do so, it parse the changegroup data, otherwise it will
269 269 block in case of sshrepo because it don't know the end of the stream.
270 270 """
271 271 # an empty chunkgroup is the end of the changegroup
272 272 # a changegroup has at least 2 chunkgroups (changelog and manifest).
273 273 # after that, an empty chunkgroup is the end of the changegroup
274 274 empty = False
275 275 count = 0
276 276 while not empty or count <= 2:
277 277 empty = True
278 278 count += 1
279 279 while True:
280 280 chunk = getchunk(self)
281 281 if not chunk:
282 282 break
283 283 empty = False
284 284 yield chunkheader(len(chunk))
285 285 pos = 0
286 286 while pos < len(chunk):
287 287 next = pos + 2**20
288 288 yield chunk[pos:next]
289 289 pos = next
290 290 yield closechunk()
291 291
292 292 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
293 293 # We know that we'll never have more manifests than we had
294 294 # changesets.
295 295 self.callback = prog(_('manifests'), numchanges)
296 296 # no need to check for empty manifest group here:
297 297 # if the result of the merge of 1 and 2 is the same in 3 and 4,
298 298 # no new manifest will be created and the manifest group will
299 299 # be empty during the pull
300 300 self.manifestheader()
301 301 repo.manifest.addgroup(self, revmap, trp)
302 302 repo.ui.progress(_('manifests'), None)
303 303
304 304 def apply(self, repo, srctype, url, emptyok=False,
305 305 targetphase=phases.draft, expectedtotal=None):
306 306 """Add the changegroup returned by source.read() to this repo.
307 307 srctype is a string like 'push', 'pull', or 'unbundle'. url is
308 308 the URL of the repo where this changegroup is coming from.
309 309
310 310 Return an integer summarizing the change to this repo:
311 311 - nothing changed or no source: 0
312 312 - more heads than before: 1+added heads (2..n)
313 313 - fewer heads than before: -1-removed heads (-2..-n)
314 314 - number of heads stays the same: 1
315 315 """
316 316 repo = repo.unfiltered()
317 317 wasempty = (len(repo.changelog) == 0)
318 318 def csmap(x):
319 319 repo.ui.debug("add changeset %s\n" % short(x))
320 320 return len(cl)
321 321
322 322 def revmap(x):
323 323 return cl.rev(x)
324 324
325 325 changesets = files = revisions = 0
326 326
327 327 tr = repo.transaction("\n".join([srctype, util.hidepassword(url)]))
328 328 try:
329 329 # The transaction could have been created before and already
330 330 # carries source information. In this case we use the top
331 331 # level data. We overwrite the argument because we need to use
332 332 # the top level value (if they exist) in this function.
333 333 srctype = tr.hookargs.setdefault('source', srctype)
334 334 url = tr.hookargs.setdefault('url', url)
335 335 repo.hook('prechangegroup', throw=True, **tr.hookargs)
336 336
337 337 # write changelog data to temp files so concurrent readers
338 338 # will not see an inconsistent view
339 339 cl = repo.changelog
340 340 cl.delayupdate(tr)
341 341 oldheads = cl.heads()
342 342
343 343 trp = weakref.proxy(tr)
344 344 # pull off the changeset group
345 345 repo.ui.status(_("adding changesets\n"))
346 346 clstart = len(cl)
347 347 class prog(object):
348 348 def __init__(self, step, total):
349 349 self._step = step
350 350 self._total = total
351 351 self._count = 1
352 352 def __call__(self):
353 353 repo.ui.progress(self._step, self._count, unit=_('chunks'),
354 354 total=self._total)
355 355 self._count += 1
356 356 self.callback = prog(_('changesets'), expectedtotal)
357 357
358 358 efiles = set()
359 359 def onchangelog(cl, node):
360 360 efiles.update(cl.read(node)[3])
361 361
362 362 self.changelogheader()
363 363 srccontent = cl.addgroup(self, csmap, trp,
364 364 addrevisioncb=onchangelog)
365 365 efiles = len(efiles)
366 366
367 367 if not (srccontent or emptyok):
368 368 raise error.Abort(_("received changelog group is empty"))
369 369 clend = len(cl)
370 370 changesets = clend - clstart
371 371 repo.ui.progress(_('changesets'), None)
372 372
373 373 # pull off the manifest group
374 374 repo.ui.status(_("adding manifests\n"))
375 375 self._unpackmanifests(repo, revmap, trp, prog, changesets)
376 376
377 377 needfiles = {}
378 378 if repo.ui.configbool('server', 'validate', default=False):
379 379 # validate incoming csets have their manifests
380 380 for cset in xrange(clstart, clend):
381 381 mfnode = repo.changelog.read(repo.changelog.node(cset))[0]
382 382 mfest = repo.manifest.readdelta(mfnode)
383 383 # store file nodes we must see
384 384 for f, n in mfest.iteritems():
385 385 needfiles.setdefault(f, set()).add(n)
386 386
387 387 # process the files
388 388 repo.ui.status(_("adding file changes\n"))
389 389 self.callback = None
390 390 pr = prog(_('files'), efiles)
391 391 newrevs, newfiles = _addchangegroupfiles(
392 392 repo, self, revmap, trp, pr, needfiles, wasempty)
393 393 revisions += newrevs
394 394 files += newfiles
395 395
396 396 dh = 0
397 397 if oldheads:
398 398 heads = cl.heads()
399 399 dh = len(heads) - len(oldheads)
400 400 for h in heads:
401 401 if h not in oldheads and repo[h].closesbranch():
402 402 dh -= 1
403 403 htext = ""
404 404 if dh:
405 405 htext = _(" (%+d heads)") % dh
406 406
407 407 repo.ui.status(_("added %d changesets"
408 408 " with %d changes to %d files%s\n")
409 409 % (changesets, revisions, files, htext))
410 410 repo.invalidatevolatilesets()
411 411
412 412 if changesets > 0:
413 413 if 'node' not in tr.hookargs:
414 414 tr.hookargs['node'] = hex(cl.node(clstart))
415 415 hookargs = dict(tr.hookargs)
416 416 else:
417 417 hookargs = dict(tr.hookargs)
418 418 hookargs['node'] = hex(cl.node(clstart))
419 419 repo.hook('pretxnchangegroup', throw=True, **hookargs)
420 420
421 421 added = [cl.node(r) for r in xrange(clstart, clend)]
422 422 publishing = repo.publishing()
423 423 if srctype in ('push', 'serve'):
424 424 # Old servers can not push the boundary themselves.
425 425 # New servers won't push the boundary if changeset already
426 426 # exists locally as secret
427 427 #
428 428 # We should not use added here but the list of all change in
429 429 # the bundle
430 430 if publishing:
431 431 phases.advanceboundary(repo, tr, phases.public, srccontent)
432 432 else:
433 433 # Those changesets have been pushed from the outside, their
434 434 # phases are going to be pushed alongside. Therefor
435 435 # `targetphase` is ignored.
436 436 phases.advanceboundary(repo, tr, phases.draft, srccontent)
437 437 phases.retractboundary(repo, tr, phases.draft, added)
438 438 elif srctype != 'strip':
439 439 # publishing only alter behavior during push
440 440 #
441 441 # strip should not touch boundary at all
442 442 phases.retractboundary(repo, tr, targetphase, added)
443 443
444 444 if changesets > 0:
445 445 if srctype != 'strip':
446 446 # During strip, branchcache is invalid but coming call to
447 447 # `destroyed` will repair it.
448 448 # In other case we can safely update cache on disk.
449 449 branchmap.updatecache(repo.filtered('served'))
450 450
451 451 def runhooks():
452 452 # These hooks run when the lock releases, not when the
453 453 # transaction closes. So it's possible for the changelog
454 454 # to have changed since we last saw it.
455 455 if clstart >= len(repo):
456 456 return
457 457
458 458 # forcefully update the on-disk branch cache
459 459 repo.ui.debug("updating the branch cache\n")
460 460 repo.hook("changegroup", **hookargs)
461 461
462 462 for n in added:
463 463 args = hookargs.copy()
464 464 args['node'] = hex(n)
465 465 repo.hook("incoming", **args)
466 466
467 467 newheads = [h for h in repo.heads() if h not in oldheads]
468 468 repo.ui.log("incoming",
469 469 "%s incoming changes - new heads: %s\n",
470 470 len(added),
471 471 ', '.join([hex(c[:6]) for c in newheads]))
472 472
473 473 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
474 474 lambda tr: repo._afterlock(runhooks))
475 475
476 476 tr.close()
477 477
478 478 finally:
479 479 tr.release()
480 480 repo.ui.flush()
481 481 # never return 0 here:
482 482 if dh < 0:
483 483 return dh - 1
484 484 else:
485 485 return dh + 1
486 486
487 487 class cg2unpacker(cg1unpacker):
488 488 """Unpacker for cg2 streams.
489 489
490 490 cg2 streams add support for generaldelta, so the delta header
491 491 format is slightly different. All other features about the data
492 492 remain the same.
493 493 """
494 494 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
495 495 deltaheadersize = struct.calcsize(deltaheader)
496 496 version = '02'
497 497
498 498 def _deltaheader(self, headertuple, prevnode):
499 499 node, p1, p2, deltabase, cs = headertuple
500 500 flags = 0
501 501 return node, p1, p2, deltabase, cs, flags
502 502
503 503 class cg3unpacker(cg2unpacker):
504 504 """Unpacker for cg3 streams.
505 505
506 506 cg3 streams add support for exchanging treemanifests and revlog
507 507 flags, so the only changes from cg2 are the delta header and
508 508 version number.
509 509 """
510 510 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
511 511 deltaheadersize = struct.calcsize(deltaheader)
512 512 version = '03'
513 513
514 514 def _deltaheader(self, headertuple, prevnode):
515 515 node, p1, p2, deltabase, cs, flags = headertuple
516 516 return node, p1, p2, deltabase, cs, flags
517 517
518 518 class headerlessfixup(object):
519 519 def __init__(self, fh, h):
520 520 self._h = h
521 521 self._fh = fh
522 522 def read(self, n):
523 523 if self._h:
524 524 d, self._h = self._h[:n], self._h[n:]
525 525 if len(d) < n:
526 526 d += readexactly(self._fh, n - len(d))
527 527 return d
528 528 return readexactly(self._fh, n)
529 529
530 530 def _moddirs(files):
531 531 """Given a set of modified files, find the list of modified directories.
532 532
533 533 This returns a list of (path to changed dir, changed dir) tuples,
534 534 as that's what the one client needs anyway.
535 535
536 536 >>> _moddirs(['a/b/c.py', 'a/b/c.txt', 'a/d/e/f/g.txt', 'i.txt', ])
537 537 [('/', 'a/'), ('a/', 'b/'), ('a/', 'd/'), ('a/d/', 'e/'), ('a/d/e/', 'f/')]
538 538
539 539 """
540 540 alldirs = set()
541 541 for f in files:
542 542 path = f.split('/')[:-1]
543 543 for i in xrange(len(path) - 1, -1, -1):
544 544 dn = '/'.join(path[:i])
545 545 current = dn + '/', path[i] + '/'
546 546 if current in alldirs:
547 547 break
548 548 alldirs.add(current)
549 549 return sorted(alldirs)
550 550
551 551 class cg1packer(object):
552 552 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
553 553 version = '01'
554 554 def __init__(self, repo, bundlecaps=None):
555 555 """Given a source repo, construct a bundler.
556 556
557 557 bundlecaps is optional and can be used to specify the set of
558 558 capabilities which can be used to build the bundle.
559 559 """
560 560 # Set of capabilities we can use to build the bundle.
561 561 if bundlecaps is None:
562 562 bundlecaps = set()
563 563 self._bundlecaps = bundlecaps
564 564 # experimental config: bundle.reorder
565 565 reorder = repo.ui.config('bundle', 'reorder', 'auto')
566 566 if reorder == 'auto':
567 567 reorder = None
568 568 else:
569 569 reorder = util.parsebool(reorder)
570 570 self._repo = repo
571 571 self._reorder = reorder
572 572 self._progress = repo.ui.progress
573 573 if self._repo.ui.verbose and not self._repo.ui.debugflag:
574 574 self._verbosenote = self._repo.ui.note
575 575 else:
576 576 self._verbosenote = lambda s: None
577 577
578 578 def close(self):
579 579 return closechunk()
580 580
581 581 def fileheader(self, fname):
582 582 return chunkheader(len(fname)) + fname
583 583
584 584 def group(self, nodelist, revlog, lookup, units=None):
585 585 """Calculate a delta group, yielding a sequence of changegroup chunks
586 586 (strings).
587 587
588 588 Given a list of changeset revs, return a set of deltas and
589 589 metadata corresponding to nodes. The first delta is
590 590 first parent(nodelist[0]) -> nodelist[0], the receiver is
591 591 guaranteed to have this parent as it has all history before
592 592 these changesets. In the case firstparent is nullrev the
593 593 changegroup starts with a full revision.
594 594
595 595 If units is not None, progress detail will be generated, units specifies
596 596 the type of revlog that is touched (changelog, manifest, etc.).
597 597 """
598 598 # if we don't have any revisions touched by these changesets, bail
599 599 if len(nodelist) == 0:
600 600 yield self.close()
601 601 return
602 602
603 603 # for generaldelta revlogs, we linearize the revs; this will both be
604 604 # much quicker and generate a much smaller bundle
605 605 if (revlog._generaldelta and self._reorder is None) or self._reorder:
606 606 dag = dagutil.revlogdag(revlog)
607 607 revs = set(revlog.rev(n) for n in nodelist)
608 608 revs = dag.linearize(revs)
609 609 else:
610 610 revs = sorted([revlog.rev(n) for n in nodelist])
611 611
612 612 # add the parent of the first rev
613 613 p = revlog.parentrevs(revs[0])[0]
614 614 revs.insert(0, p)
615 615
616 616 # build deltas
617 617 total = len(revs) - 1
618 618 msgbundling = _('bundling')
619 619 for r in xrange(len(revs) - 1):
620 620 if units is not None:
621 621 self._progress(msgbundling, r + 1, unit=units, total=total)
622 622 prev, curr = revs[r], revs[r + 1]
623 623 linknode = lookup(revlog.node(curr))
624 624 for c in self.revchunk(revlog, curr, prev, linknode):
625 625 yield c
626 626
627 627 if units is not None:
628 628 self._progress(msgbundling, None)
629 629 yield self.close()
630 630
631 631 # filter any nodes that claim to be part of the known set
632 632 def prune(self, revlog, missing, commonrevs):
633 633 rr, rl = revlog.rev, revlog.linkrev
634 634 return [n for n in missing if rl(rr(n)) not in commonrevs]
635 635
636 636 def _packmanifests(self, mfnodes, tmfnodes, lookuplinknode):
637 637 """Pack flat manifests into a changegroup stream."""
638 638 ml = self._repo.manifest
639 639 size = 0
640 640 for chunk in self.group(
641 641 mfnodes, ml, lookuplinknode, units=_('manifests')):
642 642 size += len(chunk)
643 643 yield chunk
644 644 self._verbosenote(_('%8.i (manifests)\n') % size)
645 645 # It looks odd to assert this here, but tmfnodes doesn't get
646 646 # filled in until after we've called lookuplinknode for
647 647 # sending root manifests, so the only way to tell the streams
648 648 # got crossed is to check after we've done all the work.
649 649 assert not tmfnodes
650 650
651 651 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
652 652 '''yield a sequence of changegroup chunks (strings)'''
653 653 repo = self._repo
654 654 cl = repo.changelog
655 655 ml = repo.manifest
656 656
657 657 clrevorder = {}
658 658 mfs = {} # needed manifests
659 659 tmfnodes = {}
660 660 fnodes = {} # needed file nodes
661 661 # maps manifest node id -> set(changed files)
662 662 mfchangedfiles = {}
663 663
664 664 # Callback for the changelog, used to collect changed files and manifest
665 665 # nodes.
666 666 # Returns the linkrev node (identity in the changelog case).
667 667 def lookupcl(x):
668 668 c = cl.read(x)
669 669 clrevorder[x] = len(clrevorder)
670 670 n = c[0]
671 671 # record the first changeset introducing this manifest version
672 672 mfs.setdefault(n, x)
673 673 # Record a complete list of potentially-changed files in
674 674 # this manifest.
675 675 mfchangedfiles.setdefault(n, set()).update(c[3])
676 676 return x
677 677
678 678 self._verbosenote(_('uncompressed size of bundle content:\n'))
679 679 size = 0
680 680 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
681 681 size += len(chunk)
682 682 yield chunk
683 683 self._verbosenote(_('%8.i (changelog)\n') % size)
684 684
685 685 # We need to make sure that the linkrev in the changegroup refers to
686 686 # the first changeset that introduced the manifest or file revision.
687 687 # The fastpath is usually safer than the slowpath, because the filelogs
688 688 # are walked in revlog order.
689 689 #
690 690 # When taking the slowpath with reorder=None and the manifest revlog
691 691 # uses generaldelta, the manifest may be walked in the "wrong" order.
692 692 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
693 693 # cc0ff93d0c0c).
694 694 #
695 695 # When taking the fastpath, we are only vulnerable to reordering
696 696 # of the changelog itself. The changelog never uses generaldelta, so
697 697 # it is only reordered when reorder=True. To handle this case, we
698 698 # simply take the slowpath, which already has the 'clrevorder' logic.
699 699 # This was also fixed in cc0ff93d0c0c.
700 700 fastpathlinkrev = fastpathlinkrev and not self._reorder
701 701 # Treemanifests don't work correctly with fastpathlinkrev
702 702 # either, because we don't discover which directory nodes to
703 703 # send along with files. This could probably be fixed.
704 704 fastpathlinkrev = fastpathlinkrev and (
705 705 'treemanifest' not in repo.requirements)
706 706 # Callback for the manifest, used to collect linkrevs for filelog
707 707 # revisions.
708 708 # Returns the linkrev node (collected in lookupcl).
709 709 if fastpathlinkrev:
710 710 lookupmflinknode = mfs.__getitem__
711 711 else:
712 712 def lookupmflinknode(x):
713 713 """Callback for looking up the linknode for manifests.
714 714
715 715 Returns the linkrev node for the specified manifest.
716 716
717 717 SIDE EFFECT:
718 718
719 719 1) fclnodes gets populated with the list of relevant
720 720 file nodes if we're not using fastpathlinkrev
721 721 2) When treemanifests are in use, collects treemanifest nodes
722 722 to send
723 723
724 724 Note that this means manifests must be completely sent to
725 725 the client before you can trust the list of files and
726 726 treemanifests to send.
727 727 """
728 728 clnode = mfs[x]
729 729 # We no longer actually care about reading deltas of
730 730 # the manifest here, because we already know the list
731 731 # of changed files, so for treemanifests (which
732 732 # lazily-load anyway to *generate* a readdelta) we can
733 733 # just load them with read() and then we'll actually
734 734 # be able to correctly load node IDs from the
735 735 # submanifest entries.
736 736 if 'treemanifest' in repo.requirements:
737 737 mdata = ml.read(x)
738 738 else:
739 739 mdata = ml.readfast(x)
740 740 for f in mfchangedfiles[x]:
741 741 try:
742 742 n = mdata[f]
743 743 except KeyError:
744 744 continue
745 745 # record the first changeset introducing this filelog
746 746 # version
747 747 fclnodes = fnodes.setdefault(f, {})
748 748 fclnode = fclnodes.setdefault(n, clnode)
749 749 if clrevorder[clnode] < clrevorder[fclnode]:
750 750 fclnodes[n] = clnode
751 751 # gather list of changed treemanifest nodes
752 752 if 'treemanifest' in repo.requirements:
753 753 submfs = {'/': mdata}
754 754 for dn, bn in _moddirs(mfchangedfiles[x]):
755 755 submf = submfs[dn]
756 756 submf = submf._dirs[bn]
757 757 submfs[submf.dir()] = submf
758 758 tmfclnodes = tmfnodes.setdefault(submf.dir(), {})
759 759 tmfclnodes.setdefault(submf._node, clnode)
760 760 if clrevorder[clnode] < clrevorder[fclnode]:
761 761 tmfclnodes[n] = clnode
762 762 return clnode
763 763
764 764 mfnodes = self.prune(ml, mfs, commonrevs)
765 765 for x in self._packmanifests(
766 766 mfnodes, tmfnodes, lookupmflinknode):
767 767 yield x
768 768
769 769 mfs.clear()
770 770 clrevs = set(cl.rev(x) for x in clnodes)
771 771
772 772 if not fastpathlinkrev:
773 773 def linknodes(unused, fname):
774 774 return fnodes.get(fname, {})
775 775 else:
776 776 cln = cl.node
777 777 def linknodes(filerevlog, fname):
778 778 llr = filerevlog.linkrev
779 779 fln = filerevlog.node
780 780 revs = ((r, llr(r)) for r in filerevlog)
781 781 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
782 782
783 783 changedfiles = set()
784 784 for x in mfchangedfiles.itervalues():
785 785 changedfiles.update(x)
786 786 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
787 787 source):
788 788 yield chunk
789 789
790 790 yield self.close()
791 791
792 792 if clnodes:
793 793 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
794 794
795 795 # The 'source' parameter is useful for extensions
796 796 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
797 797 repo = self._repo
798 798 progress = self._progress
799 799 msgbundling = _('bundling')
800 800
801 801 total = len(changedfiles)
802 802 # for progress output
803 803 msgfiles = _('files')
804 804 for i, fname in enumerate(sorted(changedfiles)):
805 805 filerevlog = repo.file(fname)
806 806 if not filerevlog:
807 807 raise error.Abort(_("empty or missing revlog for %s") % fname)
808 808
809 809 linkrevnodes = linknodes(filerevlog, fname)
810 810 # Lookup for filenodes, we collected the linkrev nodes above in the
811 811 # fastpath case and with lookupmf in the slowpath case.
812 812 def lookupfilelog(x):
813 813 return linkrevnodes[x]
814 814
815 815 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
816 816 if filenodes:
817 817 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
818 818 total=total)
819 819 h = self.fileheader(fname)
820 820 size = len(h)
821 821 yield h
822 822 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
823 823 size += len(chunk)
824 824 yield chunk
825 825 self._verbosenote(_('%8.i %s\n') % (size, fname))
826 826 progress(msgbundling, None)
827 827
828 828 def deltaparent(self, revlog, rev, p1, p2, prev):
829 829 return prev
830 830
831 831 def revchunk(self, revlog, rev, prev, linknode):
832 832 node = revlog.node(rev)
833 833 p1, p2 = revlog.parentrevs(rev)
834 834 base = self.deltaparent(revlog, rev, p1, p2, prev)
835 835
836 836 prefix = ''
837 837 if revlog.iscensored(base) or revlog.iscensored(rev):
838 838 try:
839 839 delta = revlog.revision(node)
840 840 except error.CensoredNodeError as e:
841 841 delta = e.tombstone
842 842 if base == nullrev:
843 843 prefix = mdiff.trivialdiffheader(len(delta))
844 844 else:
845 845 baselen = revlog.rawsize(base)
846 846 prefix = mdiff.replacediffheader(baselen, len(delta))
847 847 elif base == nullrev:
848 848 delta = revlog.revision(node)
849 849 prefix = mdiff.trivialdiffheader(len(delta))
850 850 else:
851 851 delta = revlog.revdiff(base, rev)
852 852 p1n, p2n = revlog.parents(node)
853 853 basenode = revlog.node(base)
854 854 flags = revlog.flags(rev)
855 855 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
856 856 meta += prefix
857 857 l = len(meta) + len(delta)
858 858 yield chunkheader(l)
859 859 yield meta
860 860 yield delta
861 861 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
862 862 # do nothing with basenode, it is implicitly the previous one in HG10
863 863 # do nothing with flags, it is implicitly 0 for cg1 and cg2
864 864 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
865 865
866 866 class cg2packer(cg1packer):
867 867 version = '02'
868 868 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
869 869
870 870 def __init__(self, repo, bundlecaps=None):
871 871 super(cg2packer, self).__init__(repo, bundlecaps)
872 872 if self._reorder is None:
873 873 # Since generaldelta is directly supported by cg2, reordering
874 874 # generally doesn't help, so we disable it by default (treating
875 875 # bundle.reorder=auto just like bundle.reorder=False).
876 876 self._reorder = False
877 877
878 878 def deltaparent(self, revlog, rev, p1, p2, prev):
879 879 dp = revlog.deltaparent(rev)
880 880 # avoid storing full revisions; pick prev in those cases
881 881 # also pick prev when we can't be sure remote has dp
882 882 if dp == nullrev or (dp != p1 and dp != p2 and dp != prev):
883 883 return prev
884 884 return dp
885 885
886 886 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
887 887 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
888 888 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
889 889
890 890 class cg3packer(cg2packer):
891 891 version = '03'
892 892 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
893 893
894 894 def _packmanifests(self, mfnodes, tmfnodes, lookuplinknode):
895 895 # Note that debug prints are super confusing in this code, as
896 896 # tmfnodes gets populated by the calls to lookuplinknode in
897 897 # the superclass's manifest packer. In the future we should
898 898 # probably see if we can refactor this somehow to be less
899 899 # confusing.
900 900 for x in super(cg3packer, self)._packmanifests(
901 901 mfnodes, {}, lookuplinknode):
902 902 yield x
903 903 dirlog = self._repo.manifest.dirlog
904 904 for name, nodes in tmfnodes.iteritems():
905 905 # For now, directory headers are simply file headers with
906 # a trailing '/' on the path.
907 yield self.fileheader(name + '/')
906 # a trailing '/' on the path (already in the name).
907 yield self.fileheader(name)
908 908 for chunk in self.group(nodes, dirlog(name), nodes.get):
909 909 yield chunk
910 910
911 911 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
912 912 return struct.pack(
913 913 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
914 914
915 915 packermap = {'01': (cg1packer, cg1unpacker),
916 916 # cg2 adds support for exchanging generaldelta
917 917 '02': (cg2packer, cg2unpacker),
918 918 # cg3 adds support for exchanging treemanifests
919 919 '03': (cg3packer, cg3unpacker),
920 920 }
921 921
922 922 def _changegroupinfo(repo, nodes, source):
923 923 if repo.ui.verbose or source == 'bundle':
924 924 repo.ui.status(_("%d changesets found\n") % len(nodes))
925 925 if repo.ui.debugflag:
926 926 repo.ui.debug("list of changesets:\n")
927 927 for node in nodes:
928 928 repo.ui.debug("%s\n" % hex(node))
929 929
930 930 def getsubsetraw(repo, outgoing, bundler, source, fastpath=False):
931 931 repo = repo.unfiltered()
932 932 commonrevs = outgoing.common
933 933 csets = outgoing.missing
934 934 heads = outgoing.missingheads
935 935 # We go through the fast path if we get told to, or if all (unfiltered
936 936 # heads have been requested (since we then know there all linkrevs will
937 937 # be pulled by the client).
938 938 heads.sort()
939 939 fastpathlinkrev = fastpath or (
940 940 repo.filtername is None and heads == sorted(repo.heads()))
941 941
942 942 repo.hook('preoutgoing', throw=True, source=source)
943 943 _changegroupinfo(repo, csets, source)
944 944 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
945 945
946 946 def getsubset(repo, outgoing, bundler, source, fastpath=False):
947 947 gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath)
948 948 return packermap[bundler.version][1](util.chunkbuffer(gengroup), None)
949 949
950 950 def changegroupsubset(repo, roots, heads, source, version='01'):
951 951 """Compute a changegroup consisting of all the nodes that are
952 952 descendants of any of the roots and ancestors of any of the heads.
953 953 Return a chunkbuffer object whose read() method will return
954 954 successive changegroup chunks.
955 955
956 956 It is fairly complex as determining which filenodes and which
957 957 manifest nodes need to be included for the changeset to be complete
958 958 is non-trivial.
959 959
960 960 Another wrinkle is doing the reverse, figuring out which changeset in
961 961 the changegroup a particular filenode or manifestnode belongs to.
962 962 """
963 963 cl = repo.changelog
964 964 if not roots:
965 965 roots = [nullid]
966 966 discbases = []
967 967 for n in roots:
968 968 discbases.extend([p for p in cl.parents(n) if p != nullid])
969 969 # TODO: remove call to nodesbetween.
970 970 csets, roots, heads = cl.nodesbetween(roots, heads)
971 971 included = set(csets)
972 972 discbases = [n for n in discbases if n not in included]
973 973 outgoing = discovery.outgoing(cl, discbases, heads)
974 974 bundler = packermap[version][0](repo)
975 975 return getsubset(repo, outgoing, bundler, source)
976 976
977 977 def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None,
978 978 version='01'):
979 979 """Like getbundle, but taking a discovery.outgoing as an argument.
980 980
981 981 This is only implemented for local repos and reuses potentially
982 982 precomputed sets in outgoing. Returns a raw changegroup generator."""
983 983 if not outgoing.missing:
984 984 return None
985 985 bundler = packermap[version][0](repo, bundlecaps)
986 986 return getsubsetraw(repo, outgoing, bundler, source)
987 987
988 988 def getlocalchangegroup(repo, source, outgoing, bundlecaps=None,
989 989 version='01'):
990 990 """Like getbundle, but taking a discovery.outgoing as an argument.
991 991
992 992 This is only implemented for local repos and reuses potentially
993 993 precomputed sets in outgoing."""
994 994 if not outgoing.missing:
995 995 return None
996 996 bundler = packermap[version][0](repo, bundlecaps)
997 997 return getsubset(repo, outgoing, bundler, source)
998 998
999 999 def computeoutgoing(repo, heads, common):
1000 1000 """Computes which revs are outgoing given a set of common
1001 1001 and a set of heads.
1002 1002
1003 1003 This is a separate function so extensions can have access to
1004 1004 the logic.
1005 1005
1006 1006 Returns a discovery.outgoing object.
1007 1007 """
1008 1008 cl = repo.changelog
1009 1009 if common:
1010 1010 hasnode = cl.hasnode
1011 1011 common = [n for n in common if hasnode(n)]
1012 1012 else:
1013 1013 common = [nullid]
1014 1014 if not heads:
1015 1015 heads = cl.heads()
1016 1016 return discovery.outgoing(cl, common, heads)
1017 1017
1018 1018 def getchangegroup(repo, source, heads=None, common=None, bundlecaps=None,
1019 1019 version='01'):
1020 1020 """Like changegroupsubset, but returns the set difference between the
1021 1021 ancestors of heads and the ancestors common.
1022 1022
1023 1023 If heads is None, use the local heads. If common is None, use [nullid].
1024 1024
1025 1025 The nodes in common might not all be known locally due to the way the
1026 1026 current discovery protocol works.
1027 1027 """
1028 1028 outgoing = computeoutgoing(repo, heads, common)
1029 1029 return getlocalchangegroup(repo, source, outgoing, bundlecaps=bundlecaps,
1030 1030 version=version)
1031 1031
1032 1032 def changegroup(repo, basenodes, source):
1033 1033 # to avoid a race we use changegroupsubset() (issue1320)
1034 1034 return changegroupsubset(repo, basenodes, repo.heads(), source)
1035 1035
1036 1036 def _addchangegroupfiles(repo, source, revmap, trp, pr, needfiles, wasempty):
1037 1037 revisions = 0
1038 1038 files = 0
1039 1039 while True:
1040 1040 chunkdata = source.filelogheader()
1041 1041 if not chunkdata:
1042 1042 break
1043 1043 f = chunkdata["filename"]
1044 1044 repo.ui.debug("adding %s revisions\n" % f)
1045 1045 pr()
1046 1046 directory = (f[-1] == '/')
1047 1047 if directory:
1048 1048 # a directory using treemanifests
1049 1049 # TODO fixup repo requirements safely
1050 1050 if 'treemanifest' not in repo.requirements:
1051 1051 if not wasempty:
1052 1052 raise error.Abort(_(
1053 1053 "bundle contains tree manifests, but local repo is "
1054 1054 "non-empty and does not use tree manifests"))
1055 1055 repo.requirements.add('treemanifest')
1056 1056 repo._applyopenerreqs()
1057 1057 repo._writerequirements()
1058 1058 repo.manifest._treeondisk = True
1059 1059 repo.manifest._treeinmem = True
1060 1060 fl = repo.manifest.dirlog(f)
1061 1061 else:
1062 1062 fl = repo.file(f)
1063 1063 o = len(fl)
1064 1064 try:
1065 1065 if not fl.addgroup(source, revmap, trp):
1066 1066 raise error.Abort(_("received file revlog group is empty"))
1067 1067 except error.CensoredBaseError as e:
1068 1068 raise error.Abort(_("received delta base is censored: %s") % e)
1069 1069 if not directory:
1070 1070 revisions += len(fl) - o
1071 1071 files += 1
1072 1072 if f in needfiles:
1073 1073 needs = needfiles[f]
1074 1074 for new in xrange(o, len(fl)):
1075 1075 n = fl.node(new)
1076 1076 if n in needs:
1077 1077 needs.remove(n)
1078 1078 else:
1079 1079 raise error.Abort(
1080 1080 _("received spurious file revlog entry"))
1081 1081 if not needs:
1082 1082 del needfiles[f]
1083 1083 repo.ui.progress(_('files'), None)
1084 1084
1085 1085 for f, needs in needfiles.iteritems():
1086 1086 fl = repo.file(f)
1087 1087 for n in needs:
1088 1088 try:
1089 1089 fl.rev(n)
1090 1090 except error.LookupError:
1091 1091 raise error.Abort(
1092 1092 _('missing file data for %s:%s - run hg verify') %
1093 1093 (f, hex(n)))
1094 1094
1095 1095 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now