##// END OF EJS Templates
changegroup: move generatefiles() from narrow...
Gregory Szorc -
r38925:a06aab27 default
parent child Browse files
Show More
@@ -1,180 +1,145 b''
1 1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial import (
12 12 changegroup,
13 error,
14 13 extensions,
15 14 node,
16 15 util,
17 16 )
18 17
19 18 def setup():
20 def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
21 source):
22 changedfiles = list(filter(self._filematcher, changedfiles))
23
24 if getattr(self, 'is_shallow', False):
25 # See comment in generate() for why this sadness is a thing.
26 mfdicts = self._mfdicts
27 del self._mfdicts
28 # In a shallow clone, the linknodes callback needs to also include
29 # those file nodes that are in the manifests we sent but weren't
30 # introduced by those manifests.
31 commonctxs = [self._repo[c] for c in commonrevs]
32 oldlinknodes = linknodes
33 clrev = self._repo.changelog.rev
34 def linknodes(flog, fname):
35 for c in commonctxs:
36 try:
37 fnode = c.filenode(fname)
38 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
39 except error.ManifestLookupError:
40 pass
41 links = oldlinknodes(flog, fname)
42 if len(links) != len(mfdicts):
43 for mf, lr in mfdicts:
44 fnode = mf.get(fname, None)
45 if fnode in links:
46 links[fnode] = min(links[fnode], lr, key=clrev)
47 elif fnode:
48 links[fnode] = lr
49 return links
50 return orig(self, changedfiles, linknodes, commonrevs, source)
51 extensions.wrapfunction(
52 changegroup.cg1packer, 'generatefiles', generatefiles)
53
54 19 def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
55 20 '''yield a sequence of changegroup chunks (strings)'''
56 21 # Note: other than delegating to orig, the only deviation in
57 22 # logic from normal hg's generate is marked with BEGIN/END
58 23 # NARROW HACK.
59 24 if not util.safehasattr(self, 'full_nodes'):
60 25 # not sending a narrow bundle
61 26 for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
62 27 yield x
63 28 return
64 29
65 30 repo = self._repo
66 31 cl = repo.changelog
67 32 mfl = repo.manifestlog
68 33 mfrevlog = mfl._revlog
69 34
70 35 clrevorder = {}
71 36 mfs = {} # needed manifests
72 37 fnodes = {} # needed file nodes
73 38 changedfiles = set()
74 39
75 40 # Callback for the changelog, used to collect changed files and manifest
76 41 # nodes.
77 42 # Returns the linkrev node (identity in the changelog case).
78 43 def lookupcl(x):
79 44 c = cl.read(x)
80 45 clrevorder[x] = len(clrevorder)
81 46 # BEGIN NARROW HACK
82 47 #
83 48 # Only update mfs if x is going to be sent. Otherwise we
84 49 # end up with bogus linkrevs specified for manifests and
85 50 # we skip some manifest nodes that we should otherwise
86 51 # have sent.
87 52 if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
88 53 n = c[0]
89 54 # record the first changeset introducing this manifest version
90 55 mfs.setdefault(n, x)
91 56 # Set this narrow-specific dict so we have the lowest manifest
92 57 # revnum to look up for this cl revnum. (Part of mapping
93 58 # changelog ellipsis parents to manifest ellipsis parents)
94 59 self.next_clrev_to_localrev.setdefault(cl.rev(x),
95 60 mfrevlog.rev(n))
96 61 # We can't trust the changed files list in the changeset if the
97 62 # client requested a shallow clone.
98 63 if self.is_shallow:
99 64 changedfiles.update(mfl[c[0]].read().keys())
100 65 else:
101 66 changedfiles.update(c[3])
102 67 # END NARROW HACK
103 68 # Record a complete list of potentially-changed files in
104 69 # this manifest.
105 70 return x
106 71
107 72 self._verbosenote(_('uncompressed size of bundle content:\n'))
108 73 size = 0
109 74 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
110 75 size += len(chunk)
111 76 yield chunk
112 77 self._verbosenote(_('%8.i (changelog)\n') % size)
113 78
114 79 # We need to make sure that the linkrev in the changegroup refers to
115 80 # the first changeset that introduced the manifest or file revision.
116 81 # The fastpath is usually safer than the slowpath, because the filelogs
117 82 # are walked in revlog order.
118 83 #
119 84 # When taking the slowpath with reorder=None and the manifest revlog
120 85 # uses generaldelta, the manifest may be walked in the "wrong" order.
121 86 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
122 87 # cc0ff93d0c0c).
123 88 #
124 89 # When taking the fastpath, we are only vulnerable to reordering
125 90 # of the changelog itself. The changelog never uses generaldelta, so
126 91 # it is only reordered when reorder=True. To handle this case, we
127 92 # simply take the slowpath, which already has the 'clrevorder' logic.
128 93 # This was also fixed in cc0ff93d0c0c.
129 94 fastpathlinkrev = fastpathlinkrev and not self._reorder
130 95 # Treemanifests don't work correctly with fastpathlinkrev
131 96 # either, because we don't discover which directory nodes to
132 97 # send along with files. This could probably be fixed.
133 98 fastpathlinkrev = fastpathlinkrev and (
134 99 'treemanifest' not in repo.requirements)
135 100 # Shallow clones also don't work correctly with fastpathlinkrev
136 101 # because file nodes may need to be sent for a manifest even if they
137 102 # weren't introduced by that manifest.
138 103 fastpathlinkrev = fastpathlinkrev and not self.is_shallow
139 104
140 105 for chunk in self.generatemanifests(commonrevs, clrevorder,
141 106 fastpathlinkrev, mfs, fnodes, source):
142 107 yield chunk
143 108 # BEGIN NARROW HACK
144 109 mfdicts = None
145 110 if self.is_shallow:
146 111 mfdicts = [(self._repo.manifestlog[n].read(), lr)
147 112 for (n, lr) in mfs.iteritems()]
148 113 # END NARROW HACK
149 114 mfs.clear()
150 115 clrevs = set(cl.rev(x) for x in clnodes)
151 116
152 117 if not fastpathlinkrev:
153 118 def linknodes(unused, fname):
154 119 return fnodes.get(fname, {})
155 120 else:
156 121 cln = cl.node
157 122 def linknodes(filerevlog, fname):
158 123 llr = filerevlog.linkrev
159 124 fln = filerevlog.node
160 125 revs = ((r, llr(r)) for r in filerevlog)
161 126 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
162 127
163 128 # BEGIN NARROW HACK
164 129 #
165 130 # We need to pass the mfdicts variable down into
166 131 # generatefiles(), but more than one command might have
167 132 # wrapped generatefiles so we can't modify the function
168 133 # signature. Instead, we pass the data to ourselves using an
169 134 # instance attribute. I'm sorry.
170 135 self._mfdicts = mfdicts
171 136 # END NARROW HACK
172 137 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
173 138 source):
174 139 yield chunk
175 140
176 141 yield self.close()
177 142
178 143 if clnodes:
179 144 repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
180 145 extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
@@ -1,1248 +1,1284 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 manifest,
26 26 match as matchmod,
27 27 mdiff,
28 28 phases,
29 29 pycompat,
30 30 repository,
31 31 revlog,
32 32 util,
33 33 )
34 34
35 35 from .utils import (
36 36 stringutil,
37 37 )
38 38
39 39 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
40 40 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
41 41 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
42 42
43 43 LFS_REQUIREMENT = 'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47 def getchunk(stream):
48 48 """return the next chunk from stream as a string"""
49 49 d = readexactly(stream, 4)
50 50 l = struct.unpack(">l", d)[0]
51 51 if l <= 4:
52 52 if l:
53 53 raise error.Abort(_("invalid chunk length %d") % l)
54 54 return ""
55 55 return readexactly(stream, l - 4)
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(">l", length + 4)
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(">l", 0)
64 64
65 65 def writechunks(ui, chunks, filename, vfs=None):
66 66 """Write chunks to a file and return its filename.
67 67
68 68 The stream is assumed to be a bundle file.
69 69 Existing files will not be overwritten.
70 70 If no filename is specified, a temporary file is created.
71 71 """
72 72 fh = None
73 73 cleanup = None
74 74 try:
75 75 if filename:
76 76 if vfs:
77 77 fh = vfs.open(filename, "wb")
78 78 else:
79 79 # Increase default buffer size because default is usually
80 80 # small (4k is common on Linux).
81 81 fh = open(filename, "wb", 131072)
82 82 else:
83 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
84 84 fh = os.fdopen(fd, r"wb")
85 85 cleanup = filename
86 86 for c in chunks:
87 87 fh.write(c)
88 88 cleanup = None
89 89 return filename
90 90 finally:
91 91 if fh is not None:
92 92 fh.close()
93 93 if cleanup is not None:
94 94 if filename and vfs:
95 95 vfs.unlink(cleanup)
96 96 else:
97 97 os.unlink(cleanup)
98 98
99 99 class cg1unpacker(object):
100 100 """Unpacker for cg1 changegroup streams.
101 101
102 102 A changegroup unpacker handles the framing of the revision data in
103 103 the wire format. Most consumers will want to use the apply()
104 104 method to add the changes from the changegroup to a repository.
105 105
106 106 If you're forwarding a changegroup unmodified to another consumer,
107 107 use getchunks(), which returns an iterator of changegroup
108 108 chunks. This is mostly useful for cases where you need to know the
109 109 data stream has ended by observing the end of the changegroup.
110 110
111 111 deltachunk() is useful only if you're applying delta data. Most
112 112 consumers should prefer apply() instead.
113 113
114 114 A few other public methods exist. Those are used only for
115 115 bundlerepo and some debug commands - their use is discouraged.
116 116 """
117 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
118 118 deltaheadersize = struct.calcsize(deltaheader)
119 119 version = '01'
120 120 _grouplistcount = 1 # One list of files after the manifests
121 121
122 122 def __init__(self, fh, alg, extras=None):
123 123 if alg is None:
124 124 alg = 'UN'
125 125 if alg not in util.compengines.supportedbundletypes:
126 126 raise error.Abort(_('unknown stream compression type: %s')
127 127 % alg)
128 128 if alg == 'BZ':
129 129 alg = '_truncatedBZ'
130 130
131 131 compengine = util.compengines.forbundletype(alg)
132 132 self._stream = compengine.decompressorreader(fh)
133 133 self._type = alg
134 134 self.extras = extras or {}
135 135 self.callback = None
136 136
137 137 # These methods (compressed, read, seek, tell) all appear to only
138 138 # be used by bundlerepo, but it's a little hard to tell.
139 139 def compressed(self):
140 140 return self._type is not None and self._type != 'UN'
141 141 def read(self, l):
142 142 return self._stream.read(l)
143 143 def seek(self, pos):
144 144 return self._stream.seek(pos)
145 145 def tell(self):
146 146 return self._stream.tell()
147 147 def close(self):
148 148 return self._stream.close()
149 149
150 150 def _chunklength(self):
151 151 d = readexactly(self._stream, 4)
152 152 l = struct.unpack(">l", d)[0]
153 153 if l <= 4:
154 154 if l:
155 155 raise error.Abort(_("invalid chunk length %d") % l)
156 156 return 0
157 157 if self.callback:
158 158 self.callback()
159 159 return l - 4
160 160
161 161 def changelogheader(self):
162 162 """v10 does not have a changelog header chunk"""
163 163 return {}
164 164
165 165 def manifestheader(self):
166 166 """v10 does not have a manifest header chunk"""
167 167 return {}
168 168
169 169 def filelogheader(self):
170 170 """return the header of the filelogs chunk, v10 only has the filename"""
171 171 l = self._chunklength()
172 172 if not l:
173 173 return {}
174 174 fname = readexactly(self._stream, l)
175 175 return {'filename': fname}
176 176
177 177 def _deltaheader(self, headertuple, prevnode):
178 178 node, p1, p2, cs = headertuple
179 179 if prevnode is None:
180 180 deltabase = p1
181 181 else:
182 182 deltabase = prevnode
183 183 flags = 0
184 184 return node, p1, p2, deltabase, cs, flags
185 185
186 186 def deltachunk(self, prevnode):
187 187 l = self._chunklength()
188 188 if not l:
189 189 return {}
190 190 headerdata = readexactly(self._stream, self.deltaheadersize)
191 191 header = struct.unpack(self.deltaheader, headerdata)
192 192 delta = readexactly(self._stream, l - self.deltaheadersize)
193 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
194 194 return (node, p1, p2, cs, deltabase, delta, flags)
195 195
196 196 def getchunks(self):
197 197 """returns all the chunks contains in the bundle
198 198
199 199 Used when you need to forward the binary stream to a file or another
200 200 network API. To do so, it parse the changegroup data, otherwise it will
201 201 block in case of sshrepo because it don't know the end of the stream.
202 202 """
203 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
204 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
205 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
206 206 # filelogs.
207 207 #
208 208 # Changelog and manifestlog parts are terminated with empty chunks. The
209 209 # tree and file parts are a list of entry sections. Each entry section
210 210 # is a series of chunks terminating in an empty chunk. The list of these
211 211 # entry sections is terminated in yet another empty chunk, so we know
212 212 # we've reached the end of the tree/file list when we reach an empty
213 213 # chunk that was proceeded by no non-empty chunks.
214 214
215 215 parts = 0
216 216 while parts < 2 + self._grouplistcount:
217 217 noentries = True
218 218 while True:
219 219 chunk = getchunk(self)
220 220 if not chunk:
221 221 # The first two empty chunks represent the end of the
222 222 # changelog and the manifestlog portions. The remaining
223 223 # empty chunks represent either A) the end of individual
224 224 # tree or file entries in the file list, or B) the end of
225 225 # the entire list. It's the end of the entire list if there
226 226 # were no entries (i.e. noentries is True).
227 227 if parts < 2:
228 228 parts += 1
229 229 elif noentries:
230 230 parts += 1
231 231 break
232 232 noentries = False
233 233 yield chunkheader(len(chunk))
234 234 pos = 0
235 235 while pos < len(chunk):
236 236 next = pos + 2**20
237 237 yield chunk[pos:next]
238 238 pos = next
239 239 yield closechunk()
240 240
241 241 def _unpackmanifests(self, repo, revmap, trp, prog):
242 242 self.callback = prog.increment
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 248 deltas = self.deltaiter()
249 249 repo.manifestlog.addgroup(deltas, revmap, trp)
250 250 prog.complete()
251 251 self.callback = None
252 252
253 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 254 expectedtotal=None):
255 255 """Add the changegroup returned by source.read() to this repo.
256 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 257 the URL of the repo where this changegroup is coming from.
258 258
259 259 Return an integer summarizing the change to this repo:
260 260 - nothing changed or no source: 0
261 261 - more heads than before: 1+added heads (2..n)
262 262 - fewer heads than before: -1-removed heads (-2..-n)
263 263 - number of heads stays the same: 1
264 264 """
265 265 repo = repo.unfiltered()
266 266 def csmap(x):
267 267 repo.ui.debug("add changeset %s\n" % short(x))
268 268 return len(cl)
269 269
270 270 def revmap(x):
271 271 return cl.rev(x)
272 272
273 273 changesets = files = revisions = 0
274 274
275 275 try:
276 276 # The transaction may already carry source information. In this
277 277 # case we use the top level data. We overwrite the argument
278 278 # because we need to use the top level value (if they exist)
279 279 # in this function.
280 280 srctype = tr.hookargs.setdefault('source', srctype)
281 281 url = tr.hookargs.setdefault('url', url)
282 282 repo.hook('prechangegroup',
283 283 throw=True, **pycompat.strkwargs(tr.hookargs))
284 284
285 285 # write changelog data to temp files so concurrent readers
286 286 # will not see an inconsistent view
287 287 cl = repo.changelog
288 288 cl.delayupdate(tr)
289 289 oldheads = set(cl.heads())
290 290
291 291 trp = weakref.proxy(tr)
292 292 # pull off the changeset group
293 293 repo.ui.status(_("adding changesets\n"))
294 294 clstart = len(cl)
295 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
296 296 total=expectedtotal)
297 297 self.callback = progress.increment
298 298
299 299 efiles = set()
300 300 def onchangelog(cl, node):
301 301 efiles.update(cl.readfiles(node))
302 302
303 303 self.changelogheader()
304 304 deltas = self.deltaiter()
305 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
306 306 efiles = len(efiles)
307 307
308 308 if not cgnodes:
309 309 repo.ui.develwarn('applied empty changegroup',
310 310 config='warn-empty-changegroup')
311 311 clend = len(cl)
312 312 changesets = clend - clstart
313 313 progress.complete()
314 314 self.callback = None
315 315
316 316 # pull off the manifest group
317 317 repo.ui.status(_("adding manifests\n"))
318 318 # We know that we'll never have more manifests than we had
319 319 # changesets.
320 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
321 321 total=changesets)
322 322 self._unpackmanifests(repo, revmap, trp, progress)
323 323
324 324 needfiles = {}
325 325 if repo.ui.configbool('server', 'validate'):
326 326 cl = repo.changelog
327 327 ml = repo.manifestlog
328 328 # validate incoming csets have their manifests
329 329 for cset in pycompat.xrange(clstart, clend):
330 330 mfnode = cl.changelogrevision(cset).manifest
331 331 mfest = ml[mfnode].readdelta()
332 332 # store file cgnodes we must see
333 333 for f, n in mfest.iteritems():
334 334 needfiles.setdefault(f, set()).add(n)
335 335
336 336 # process the files
337 337 repo.ui.status(_("adding file changes\n"))
338 338 newrevs, newfiles = _addchangegroupfiles(
339 339 repo, self, revmap, trp, efiles, needfiles)
340 340 revisions += newrevs
341 341 files += newfiles
342 342
343 343 deltaheads = 0
344 344 if oldheads:
345 345 heads = cl.heads()
346 346 deltaheads = len(heads) - len(oldheads)
347 347 for h in heads:
348 348 if h not in oldheads and repo[h].closesbranch():
349 349 deltaheads -= 1
350 350 htext = ""
351 351 if deltaheads:
352 352 htext = _(" (%+d heads)") % deltaheads
353 353
354 354 repo.ui.status(_("added %d changesets"
355 355 " with %d changes to %d files%s\n")
356 356 % (changesets, revisions, files, htext))
357 357 repo.invalidatevolatilesets()
358 358
359 359 if changesets > 0:
360 360 if 'node' not in tr.hookargs:
361 361 tr.hookargs['node'] = hex(cl.node(clstart))
362 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
363 363 hookargs = dict(tr.hookargs)
364 364 else:
365 365 hookargs = dict(tr.hookargs)
366 366 hookargs['node'] = hex(cl.node(clstart))
367 367 hookargs['node_last'] = hex(cl.node(clend - 1))
368 368 repo.hook('pretxnchangegroup',
369 369 throw=True, **pycompat.strkwargs(hookargs))
370 370
371 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
372 372 phaseall = None
373 373 if srctype in ('push', 'serve'):
374 374 # Old servers can not push the boundary themselves.
375 375 # New servers won't push the boundary if changeset already
376 376 # exists locally as secret
377 377 #
378 378 # We should not use added here but the list of all change in
379 379 # the bundle
380 380 if repo.publishing():
381 381 targetphase = phaseall = phases.public
382 382 else:
383 383 # closer target phase computation
384 384
385 385 # Those changesets have been pushed from the
386 386 # outside, their phases are going to be pushed
387 387 # alongside. Therefor `targetphase` is
388 388 # ignored.
389 389 targetphase = phaseall = phases.draft
390 390 if added:
391 391 phases.registernew(repo, tr, targetphase, added)
392 392 if phaseall is not None:
393 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
394 394
395 395 if changesets > 0:
396 396
397 397 def runhooks():
398 398 # These hooks run when the lock releases, not when the
399 399 # transaction closes. So it's possible for the changelog
400 400 # to have changed since we last saw it.
401 401 if clstart >= len(repo):
402 402 return
403 403
404 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
405 405
406 406 for n in added:
407 407 args = hookargs.copy()
408 408 args['node'] = hex(n)
409 409 del args['node_last']
410 410 repo.hook("incoming", **pycompat.strkwargs(args))
411 411
412 412 newheads = [h for h in repo.heads()
413 413 if h not in oldheads]
414 414 repo.ui.log("incoming",
415 415 "%d incoming changes - new heads: %s\n",
416 416 len(added),
417 417 ', '.join([hex(c[:6]) for c in newheads]))
418 418
419 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
420 420 lambda tr: repo._afterlock(runhooks))
421 421 finally:
422 422 repo.ui.flush()
423 423 # never return 0 here:
424 424 if deltaheads < 0:
425 425 ret = deltaheads - 1
426 426 else:
427 427 ret = deltaheads + 1
428 428 return ret
429 429
430 430 def deltaiter(self):
431 431 """
432 432 returns an iterator of the deltas in this changegroup
433 433
434 434 Useful for passing to the underlying storage system to be stored.
435 435 """
436 436 chain = None
437 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
438 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
439 439 yield chunkdata
440 440 chain = chunkdata[0]
441 441
442 442 class cg2unpacker(cg1unpacker):
443 443 """Unpacker for cg2 streams.
444 444
445 445 cg2 streams add support for generaldelta, so the delta header
446 446 format is slightly different. All other features about the data
447 447 remain the same.
448 448 """
449 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
450 450 deltaheadersize = struct.calcsize(deltaheader)
451 451 version = '02'
452 452
453 453 def _deltaheader(self, headertuple, prevnode):
454 454 node, p1, p2, deltabase, cs = headertuple
455 455 flags = 0
456 456 return node, p1, p2, deltabase, cs, flags
457 457
458 458 class cg3unpacker(cg2unpacker):
459 459 """Unpacker for cg3 streams.
460 460
461 461 cg3 streams add support for exchanging treemanifests and revlog
462 462 flags. It adds the revlog flags to the delta header and an empty chunk
463 463 separating manifests and files.
464 464 """
465 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
466 466 deltaheadersize = struct.calcsize(deltaheader)
467 467 version = '03'
468 468 _grouplistcount = 2 # One list of manifests and one list of files
469 469
470 470 def _deltaheader(self, headertuple, prevnode):
471 471 node, p1, p2, deltabase, cs, flags = headertuple
472 472 return node, p1, p2, deltabase, cs, flags
473 473
474 474 def _unpackmanifests(self, repo, revmap, trp, prog):
475 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
476 476 for chunkdata in iter(self.filelogheader, {}):
477 477 # If we get here, there are directory manifests in the changegroup
478 478 d = chunkdata["filename"]
479 479 repo.ui.debug("adding %s revisions\n" % d)
480 480 dirlog = repo.manifestlog._revlog.dirlog(d)
481 481 deltas = self.deltaiter()
482 482 if not dirlog.addgroup(deltas, revmap, trp):
483 483 raise error.Abort(_("received dir revlog group is empty"))
484 484
485 485 class headerlessfixup(object):
486 486 def __init__(self, fh, h):
487 487 self._h = h
488 488 self._fh = fh
489 489 def read(self, n):
490 490 if self._h:
491 491 d, self._h = self._h[:n], self._h[n:]
492 492 if len(d) < n:
493 493 d += readexactly(self._fh, n - len(d))
494 494 return d
495 495 return readexactly(self._fh, n)
496 496
497 497 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
498 498 n = revlog_.node(rev)
499 499 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
500 500 flags = revlog_.flags(rev)
501 501 flags |= revlog.REVIDX_ELLIPSIS
502 502 meta = packer.builddeltaheader(
503 503 n, p1n, p2n, nullid, linknode, flags)
504 504 # TODO: try and actually send deltas for ellipsis data blocks
505 505 diffheader = mdiff.trivialdiffheader(len(data))
506 506 l = len(meta) + len(diffheader) + len(data)
507 507 return ''.join((chunkheader(l),
508 508 meta,
509 509 diffheader,
510 510 data))
511 511
512 512 class cg1packer(object):
513 513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
514 514 version = '01'
515 515 def __init__(self, repo, filematcher, bundlecaps=None):
516 516 """Given a source repo, construct a bundler.
517 517
518 518 filematcher is a matcher that matches on files to include in the
519 519 changegroup. Used to facilitate sparse changegroups.
520 520
521 521 bundlecaps is optional and can be used to specify the set of
522 522 capabilities which can be used to build the bundle. While bundlecaps is
523 523 unused in core Mercurial, extensions rely on this feature to communicate
524 524 capabilities to customize the changegroup packer.
525 525 """
526 526 assert filematcher
527 527 self._filematcher = filematcher
528 528
529 529 # Set of capabilities we can use to build the bundle.
530 530 if bundlecaps is None:
531 531 bundlecaps = set()
532 532 self._bundlecaps = bundlecaps
533 533 # experimental config: bundle.reorder
534 534 reorder = repo.ui.config('bundle', 'reorder')
535 535 if reorder == 'auto':
536 536 reorder = None
537 537 else:
538 538 reorder = stringutil.parsebool(reorder)
539 539 self._repo = repo
540 540 self._reorder = reorder
541 541 if self._repo.ui.verbose and not self._repo.ui.debugflag:
542 542 self._verbosenote = self._repo.ui.note
543 543 else:
544 544 self._verbosenote = lambda s: None
545 545
546 546 def close(self):
547 547 # Ellipses serving mode.
548 548 getattr(self, 'clrev_to_localrev', {}).clear()
549 549 if getattr(self, 'next_clrev_to_localrev', {}):
550 550 self.clrev_to_localrev = self.next_clrev_to_localrev
551 551 del self.next_clrev_to_localrev
552 552 self.changelog_done = True
553 553
554 554 return closechunk()
555 555
556 556 def fileheader(self, fname):
557 557 return chunkheader(len(fname)) + fname
558 558
559 559 # Extracted both for clarity and for overriding in extensions.
560 560 def _sortgroup(self, revlog, nodelist, lookup):
561 561 """Sort nodes for change group and turn them into revnums."""
562 562 # Ellipses serving mode.
563 563 #
564 564 # In a perfect world, we'd generate better ellipsis-ified graphs
565 565 # for non-changelog revlogs. In practice, we haven't started doing
566 566 # that yet, so the resulting DAGs for the manifestlog and filelogs
567 567 # are actually full of bogus parentage on all the ellipsis
568 568 # nodes. This has the side effect that, while the contents are
569 569 # correct, the individual DAGs might be completely out of whack in
570 570 # a case like 882681bc3166 and its ancestors (back about 10
571 571 # revisions or so) in the main hg repo.
572 572 #
573 573 # The one invariant we *know* holds is that the new (potentially
574 574 # bogus) DAG shape will be valid if we order the nodes in the
575 575 # order that they're introduced in dramatis personae by the
576 576 # changelog, so what we do is we sort the non-changelog histories
577 577 # by the order in which they are used by the changelog.
578 578 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
579 579 key = lambda n: self.clnode_to_rev[lookup(n)]
580 580 return [revlog.rev(n) for n in sorted(nodelist, key=key)]
581 581
582 582 # for generaldelta revlogs, we linearize the revs; this will both be
583 583 # much quicker and generate a much smaller bundle
584 584 if (revlog._generaldelta and self._reorder is None) or self._reorder:
585 585 dag = dagutil.revlogdag(revlog)
586 586 return dag.linearize(set(revlog.rev(n) for n in nodelist))
587 587 else:
588 588 return sorted([revlog.rev(n) for n in nodelist])
589 589
590 590 def group(self, nodelist, revlog, lookup, units=None):
591 591 """Calculate a delta group, yielding a sequence of changegroup chunks
592 592 (strings).
593 593
594 594 Given a list of changeset revs, return a set of deltas and
595 595 metadata corresponding to nodes. The first delta is
596 596 first parent(nodelist[0]) -> nodelist[0], the receiver is
597 597 guaranteed to have this parent as it has all history before
598 598 these changesets. In the case firstparent is nullrev the
599 599 changegroup starts with a full revision.
600 600
601 601 If units is not None, progress detail will be generated, units specifies
602 602 the type of revlog that is touched (changelog, manifest, etc.).
603 603 """
604 604 # if we don't have any revisions touched by these changesets, bail
605 605 if len(nodelist) == 0:
606 606 yield self.close()
607 607 return
608 608
609 609 revs = self._sortgroup(revlog, nodelist, lookup)
610 610
611 611 # add the parent of the first rev
612 612 p = revlog.parentrevs(revs[0])[0]
613 613 revs.insert(0, p)
614 614
615 615 # build deltas
616 616 progress = None
617 617 if units is not None:
618 618 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
619 619 total=(len(revs) - 1))
620 620 for r in pycompat.xrange(len(revs) - 1):
621 621 if progress:
622 622 progress.update(r + 1)
623 623 prev, curr = revs[r], revs[r + 1]
624 624 linknode = lookup(revlog.node(curr))
625 625 for c in self.revchunk(revlog, curr, prev, linknode):
626 626 yield c
627 627
628 628 if progress:
629 629 progress.complete()
630 630 yield self.close()
631 631
632 632 # filter any nodes that claim to be part of the known set
633 633 def prune(self, revlog, missing, commonrevs):
634 634 # TODO this violates storage abstraction for manifests.
635 635 if isinstance(revlog, manifest.manifestrevlog):
636 636 if not self._filematcher.visitdir(revlog._dir[:-1] or '.'):
637 637 return []
638 638
639 639 rr, rl = revlog.rev, revlog.linkrev
640 640 return [n for n in missing if rl(rr(n)) not in commonrevs]
641 641
642 642 def _packmanifests(self, dir, mfnodes, lookuplinknode):
643 643 """Pack flat manifests into a changegroup stream."""
644 644 assert not dir
645 645 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
646 646 lookuplinknode, units=_('manifests')):
647 647 yield chunk
648 648
649 649 def _manifestsdone(self):
650 650 return ''
651 651
652 652 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
653 653 '''yield a sequence of changegroup chunks (strings)'''
654 654 repo = self._repo
655 655 cl = repo.changelog
656 656
657 657 clrevorder = {}
658 658 mfs = {} # needed manifests
659 659 fnodes = {} # needed file nodes
660 660 changedfiles = set()
661 661
662 662 # Callback for the changelog, used to collect changed files and manifest
663 663 # nodes.
664 664 # Returns the linkrev node (identity in the changelog case).
665 665 def lookupcl(x):
666 666 c = cl.read(x)
667 667 clrevorder[x] = len(clrevorder)
668 668 n = c[0]
669 669 # record the first changeset introducing this manifest version
670 670 mfs.setdefault(n, x)
671 671 # Record a complete list of potentially-changed files in
672 672 # this manifest.
673 673 changedfiles.update(c[3])
674 674 return x
675 675
676 676 self._verbosenote(_('uncompressed size of bundle content:\n'))
677 677 size = 0
678 678 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
679 679 size += len(chunk)
680 680 yield chunk
681 681 self._verbosenote(_('%8.i (changelog)\n') % size)
682 682
683 683 # We need to make sure that the linkrev in the changegroup refers to
684 684 # the first changeset that introduced the manifest or file revision.
685 685 # The fastpath is usually safer than the slowpath, because the filelogs
686 686 # are walked in revlog order.
687 687 #
688 688 # When taking the slowpath with reorder=None and the manifest revlog
689 689 # uses generaldelta, the manifest may be walked in the "wrong" order.
690 690 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
691 691 # cc0ff93d0c0c).
692 692 #
693 693 # When taking the fastpath, we are only vulnerable to reordering
694 694 # of the changelog itself. The changelog never uses generaldelta, so
695 695 # it is only reordered when reorder=True. To handle this case, we
696 696 # simply take the slowpath, which already has the 'clrevorder' logic.
697 697 # This was also fixed in cc0ff93d0c0c.
698 698 fastpathlinkrev = fastpathlinkrev and not self._reorder
699 699 # Treemanifests don't work correctly with fastpathlinkrev
700 700 # either, because we don't discover which directory nodes to
701 701 # send along with files. This could probably be fixed.
702 702 fastpathlinkrev = fastpathlinkrev and (
703 703 'treemanifest' not in repo.requirements)
704 704
705 705 for chunk in self.generatemanifests(commonrevs, clrevorder,
706 706 fastpathlinkrev, mfs, fnodes, source):
707 707 yield chunk
708 708 mfs.clear()
709 709 clrevs = set(cl.rev(x) for x in clnodes)
710 710
711 711 if not fastpathlinkrev:
712 712 def linknodes(unused, fname):
713 713 return fnodes.get(fname, {})
714 714 else:
715 715 cln = cl.node
716 716 def linknodes(filerevlog, fname):
717 717 llr = filerevlog.linkrev
718 718 fln = filerevlog.node
719 719 revs = ((r, llr(r)) for r in filerevlog)
720 720 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
721 721
722 722 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
723 723 source):
724 724 yield chunk
725 725
726 726 yield self.close()
727 727
728 728 if clnodes:
729 729 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
730 730
731 731 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
732 732 fnodes, source):
733 733 """Returns an iterator of changegroup chunks containing manifests.
734 734
735 735 `source` is unused here, but is used by extensions like remotefilelog to
736 736 change what is sent based in pulls vs pushes, etc.
737 737 """
738 738 repo = self._repo
739 739 mfl = repo.manifestlog
740 740 dirlog = mfl._revlog.dirlog
741 741 tmfnodes = {'': mfs}
742 742
743 743 # Callback for the manifest, used to collect linkrevs for filelog
744 744 # revisions.
745 745 # Returns the linkrev node (collected in lookupcl).
746 746 def makelookupmflinknode(dir, nodes):
747 747 if fastpathlinkrev:
748 748 assert not dir
749 749 return mfs.__getitem__
750 750
751 751 def lookupmflinknode(x):
752 752 """Callback for looking up the linknode for manifests.
753 753
754 754 Returns the linkrev node for the specified manifest.
755 755
756 756 SIDE EFFECT:
757 757
758 758 1) fclnodes gets populated with the list of relevant
759 759 file nodes if we're not using fastpathlinkrev
760 760 2) When treemanifests are in use, collects treemanifest nodes
761 761 to send
762 762
763 763 Note that this means manifests must be completely sent to
764 764 the client before you can trust the list of files and
765 765 treemanifests to send.
766 766 """
767 767 clnode = nodes[x]
768 768 mdata = mfl.get(dir, x).readfast(shallow=True)
769 769 for p, n, fl in mdata.iterentries():
770 770 if fl == 't': # subdirectory manifest
771 771 subdir = dir + p + '/'
772 772 tmfclnodes = tmfnodes.setdefault(subdir, {})
773 773 tmfclnode = tmfclnodes.setdefault(n, clnode)
774 774 if clrevorder[clnode] < clrevorder[tmfclnode]:
775 775 tmfclnodes[n] = clnode
776 776 else:
777 777 f = dir + p
778 778 fclnodes = fnodes.setdefault(f, {})
779 779 fclnode = fclnodes.setdefault(n, clnode)
780 780 if clrevorder[clnode] < clrevorder[fclnode]:
781 781 fclnodes[n] = clnode
782 782 return clnode
783 783 return lookupmflinknode
784 784
785 785 size = 0
786 786 while tmfnodes:
787 787 dir, nodes = tmfnodes.popitem()
788 788 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
789 789 if not dir or prunednodes:
790 790 for x in self._packmanifests(dir, prunednodes,
791 791 makelookupmflinknode(dir, nodes)):
792 792 size += len(x)
793 793 yield x
794 794 self._verbosenote(_('%8.i (manifests)\n') % size)
795 795 yield self._manifestsdone()
796 796
797 797 # The 'source' parameter is useful for extensions
798 798 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
799 changedfiles = list(filter(self._filematcher, changedfiles))
800
801 if getattr(self, 'is_shallow', False):
802 # See comment in generate() for why this sadness is a thing.
803 mfdicts = self._mfdicts
804 del self._mfdicts
805 # In a shallow clone, the linknodes callback needs to also include
806 # those file nodes that are in the manifests we sent but weren't
807 # introduced by those manifests.
808 commonctxs = [self._repo[c] for c in commonrevs]
809 oldlinknodes = linknodes
810 clrev = self._repo.changelog.rev
811
812 # Defining this function has a side-effect of overriding the
813 # function of the same name that was passed in as an argument.
814 # TODO have caller pass in appropriate function.
815 def linknodes(flog, fname):
816 for c in commonctxs:
817 try:
818 fnode = c.filenode(fname)
819 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
820 except error.ManifestLookupError:
821 pass
822 links = oldlinknodes(flog, fname)
823 if len(links) != len(mfdicts):
824 for mf, lr in mfdicts:
825 fnode = mf.get(fname, None)
826 if fnode in links:
827 links[fnode] = min(links[fnode], lr, key=clrev)
828 elif fnode:
829 links[fnode] = lr
830 return links
831
832 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
833
834 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
799 835 repo = self._repo
800 836 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
801 837 total=len(changedfiles))
802 838 for i, fname in enumerate(sorted(changedfiles)):
803 839 filerevlog = repo.file(fname)
804 840 if not filerevlog:
805 841 raise error.Abort(_("empty or missing file data for %s") %
806 842 fname)
807 843
808 844 linkrevnodes = linknodes(filerevlog, fname)
809 845 # Lookup for filenodes, we collected the linkrev nodes above in the
810 846 # fastpath case and with lookupmf in the slowpath case.
811 847 def lookupfilelog(x):
812 848 return linkrevnodes[x]
813 849
814 850 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
815 851 if filenodes:
816 852 progress.update(i + 1, item=fname)
817 853 h = self.fileheader(fname)
818 854 size = len(h)
819 855 yield h
820 856 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
821 857 size += len(chunk)
822 858 yield chunk
823 859 self._verbosenote(_('%8.i %s\n') % (size, fname))
824 860 progress.complete()
825 861
826 862 def deltaparent(self, revlog, rev, p1, p2, prev):
827 863 if not revlog.candelta(prev, rev):
828 864 raise error.ProgrammingError('cg1 should not be used in this case')
829 865 return prev
830 866
831 867 def revchunk(self, revlog, rev, prev, linknode):
832 868 if util.safehasattr(self, 'full_nodes'):
833 869 fn = self._revchunknarrow
834 870 else:
835 871 fn = self._revchunknormal
836 872
837 873 return fn(revlog, rev, prev, linknode)
838 874
839 875 def _revchunknormal(self, revlog, rev, prev, linknode):
840 876 node = revlog.node(rev)
841 877 p1, p2 = revlog.parentrevs(rev)
842 878 base = self.deltaparent(revlog, rev, p1, p2, prev)
843 879
844 880 prefix = ''
845 881 if revlog.iscensored(base) or revlog.iscensored(rev):
846 882 try:
847 883 delta = revlog.revision(node, raw=True)
848 884 except error.CensoredNodeError as e:
849 885 delta = e.tombstone
850 886 if base == nullrev:
851 887 prefix = mdiff.trivialdiffheader(len(delta))
852 888 else:
853 889 baselen = revlog.rawsize(base)
854 890 prefix = mdiff.replacediffheader(baselen, len(delta))
855 891 elif base == nullrev:
856 892 delta = revlog.revision(node, raw=True)
857 893 prefix = mdiff.trivialdiffheader(len(delta))
858 894 else:
859 895 delta = revlog.revdiff(base, rev)
860 896 p1n, p2n = revlog.parents(node)
861 897 basenode = revlog.node(base)
862 898 flags = revlog.flags(rev)
863 899 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
864 900 meta += prefix
865 901 l = len(meta) + len(delta)
866 902 yield chunkheader(l)
867 903 yield meta
868 904 yield delta
869 905
870 906 def _revchunknarrow(self, revlog, rev, prev, linknode):
871 907 # build up some mapping information that's useful later. See
872 908 # the local() nested function below.
873 909 if not self.changelog_done:
874 910 self.clnode_to_rev[linknode] = rev
875 911 linkrev = rev
876 912 self.clrev_to_localrev[linkrev] = rev
877 913 else:
878 914 linkrev = self.clnode_to_rev[linknode]
879 915 self.clrev_to_localrev[linkrev] = rev
880 916
881 917 # This is a node to send in full, because the changeset it
882 918 # corresponds to was a full changeset.
883 919 if linknode in self.full_nodes:
884 920 for x in self._revchunknormal(revlog, rev, prev, linknode):
885 921 yield x
886 922 return
887 923
888 924 # At this point, a node can either be one we should skip or an
889 925 # ellipsis. If it's not an ellipsis, bail immediately.
890 926 if linkrev not in self.precomputed_ellipsis:
891 927 return
892 928
893 929 linkparents = self.precomputed_ellipsis[linkrev]
894 930 def local(clrev):
895 931 """Turn a changelog revnum into a local revnum.
896 932
897 933 The ellipsis dag is stored as revnums on the changelog,
898 934 but when we're producing ellipsis entries for
899 935 non-changelog revlogs, we need to turn those numbers into
900 936 something local. This does that for us, and during the
901 937 changelog sending phase will also expand the stored
902 938 mappings as needed.
903 939 """
904 940 if clrev == nullrev:
905 941 return nullrev
906 942
907 943 if not self.changelog_done:
908 944 # If we're doing the changelog, it's possible that we
909 945 # have a parent that is already on the client, and we
910 946 # need to store some extra mapping information so that
911 947 # our contained ellipsis nodes will be able to resolve
912 948 # their parents.
913 949 if clrev not in self.clrev_to_localrev:
914 950 clnode = revlog.node(clrev)
915 951 self.clnode_to_rev[clnode] = clrev
916 952 return clrev
917 953
918 954 # Walk the ellipsis-ized changelog breadth-first looking for a
919 955 # change that has been linked from the current revlog.
920 956 #
921 957 # For a flat manifest revlog only a single step should be necessary
922 958 # as all relevant changelog entries are relevant to the flat
923 959 # manifest.
924 960 #
925 961 # For a filelog or tree manifest dirlog however not every changelog
926 962 # entry will have been relevant, so we need to skip some changelog
927 963 # nodes even after ellipsis-izing.
928 964 walk = [clrev]
929 965 while walk:
930 966 p = walk[0]
931 967 walk = walk[1:]
932 968 if p in self.clrev_to_localrev:
933 969 return self.clrev_to_localrev[p]
934 970 elif p in self.full_nodes:
935 971 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
936 972 if pp != nullrev])
937 973 elif p in self.precomputed_ellipsis:
938 974 walk.extend([pp for pp in self.precomputed_ellipsis[p]
939 975 if pp != nullrev])
940 976 else:
941 977 # In this case, we've got an ellipsis with parents
942 978 # outside the current bundle (likely an
943 979 # incremental pull). We "know" that we can use the
944 980 # value of this same revlog at whatever revision
945 981 # is pointed to by linknode. "Know" is in scare
946 982 # quotes because I haven't done enough examination
947 983 # of edge cases to convince myself this is really
948 984 # a fact - it works for all the (admittedly
949 985 # thorough) cases in our testsuite, but I would be
950 986 # somewhat unsurprised to find a case in the wild
951 987 # where this breaks down a bit. That said, I don't
952 988 # know if it would hurt anything.
953 989 for i in pycompat.xrange(rev, 0, -1):
954 990 if revlog.linkrev(i) == clrev:
955 991 return i
956 992 # We failed to resolve a parent for this node, so
957 993 # we crash the changegroup construction.
958 994 raise error.Abort(
959 995 'unable to resolve parent while packing %r %r'
960 996 ' for changeset %r' % (revlog.indexfile, rev, clrev))
961 997
962 998 return nullrev
963 999
964 1000 if not linkparents or (
965 1001 revlog.parentrevs(rev) == (nullrev, nullrev)):
966 1002 p1, p2 = nullrev, nullrev
967 1003 elif len(linkparents) == 1:
968 1004 p1, = sorted(local(p) for p in linkparents)
969 1005 p2 = nullrev
970 1006 else:
971 1007 p1, p2 = sorted(local(p) for p in linkparents)
972 1008 n = revlog.node(rev)
973 1009
974 1010 yield ellipsisdata(
975 1011 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
976 1012
977 1013 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
978 1014 # do nothing with basenode, it is implicitly the previous one in HG10
979 1015 # do nothing with flags, it is implicitly 0 for cg1 and cg2
980 1016 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
981 1017
982 1018 class cg2packer(cg1packer):
983 1019 version = '02'
984 1020 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
985 1021
986 1022 def __init__(self, repo, filematcher, bundlecaps=None):
987 1023 super(cg2packer, self).__init__(repo, filematcher,
988 1024 bundlecaps=bundlecaps)
989 1025
990 1026 if self._reorder is None:
991 1027 # Since generaldelta is directly supported by cg2, reordering
992 1028 # generally doesn't help, so we disable it by default (treating
993 1029 # bundle.reorder=auto just like bundle.reorder=False).
994 1030 self._reorder = False
995 1031
996 1032 def deltaparent(self, revlog, rev, p1, p2, prev):
997 1033 # Narrow ellipses mode.
998 1034 if util.safehasattr(self, 'full_nodes'):
999 1035 # TODO: send better deltas when in narrow mode.
1000 1036 #
1001 1037 # changegroup.group() loops over revisions to send,
1002 1038 # including revisions we'll skip. What this means is that
1003 1039 # `prev` will be a potentially useless delta base for all
1004 1040 # ellipsis nodes, as the client likely won't have it. In
1005 1041 # the future we should do bookkeeping about which nodes
1006 1042 # have been sent to the client, and try to be
1007 1043 # significantly smarter about delta bases. This is
1008 1044 # slightly tricky because this same code has to work for
1009 1045 # all revlogs, and we don't have the linkrev/linknode here.
1010 1046 return p1
1011 1047
1012 1048 dp = revlog.deltaparent(rev)
1013 1049 if dp == nullrev and revlog.storedeltachains:
1014 1050 # Avoid sending full revisions when delta parent is null. Pick prev
1015 1051 # in that case. It's tempting to pick p1 in this case, as p1 will
1016 1052 # be smaller in the common case. However, computing a delta against
1017 1053 # p1 may require resolving the raw text of p1, which could be
1018 1054 # expensive. The revlog caches should have prev cached, meaning
1019 1055 # less CPU for changegroup generation. There is likely room to add
1020 1056 # a flag and/or config option to control this behavior.
1021 1057 base = prev
1022 1058 elif dp == nullrev:
1023 1059 # revlog is configured to use full snapshot for a reason,
1024 1060 # stick to full snapshot.
1025 1061 base = nullrev
1026 1062 elif dp not in (p1, p2, prev):
1027 1063 # Pick prev when we can't be sure remote has the base revision.
1028 1064 return prev
1029 1065 else:
1030 1066 base = dp
1031 1067 if base != nullrev and not revlog.candelta(base, rev):
1032 1068 base = nullrev
1033 1069 return base
1034 1070
1035 1071 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1036 1072 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
1037 1073 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
1038 1074
1039 1075 class cg3packer(cg2packer):
1040 1076 version = '03'
1041 1077 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
1042 1078
1043 1079 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1044 1080 if dir:
1045 1081 yield self.fileheader(dir)
1046 1082
1047 1083 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1048 1084 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1049 1085 units=_('manifests')):
1050 1086 yield chunk
1051 1087
1052 1088 def _manifestsdone(self):
1053 1089 return self.close()
1054 1090
1055 1091 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1056 1092 return struct.pack(
1057 1093 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
1058 1094
1059 1095 _packermap = {'01': (cg1packer, cg1unpacker),
1060 1096 # cg2 adds support for exchanging generaldelta
1061 1097 '02': (cg2packer, cg2unpacker),
1062 1098 # cg3 adds support for exchanging revlog flags and treemanifests
1063 1099 '03': (cg3packer, cg3unpacker),
1064 1100 }
1065 1101
1066 1102 def allsupportedversions(repo):
1067 1103 versions = set(_packermap.keys())
1068 1104 if not (repo.ui.configbool('experimental', 'changegroup3') or
1069 1105 repo.ui.configbool('experimental', 'treemanifest') or
1070 1106 'treemanifest' in repo.requirements):
1071 1107 versions.discard('03')
1072 1108 return versions
1073 1109
1074 1110 # Changegroup versions that can be applied to the repo
1075 1111 def supportedincomingversions(repo):
1076 1112 return allsupportedversions(repo)
1077 1113
1078 1114 # Changegroup versions that can be created from the repo
1079 1115 def supportedoutgoingversions(repo):
1080 1116 versions = allsupportedversions(repo)
1081 1117 if 'treemanifest' in repo.requirements:
1082 1118 # Versions 01 and 02 support only flat manifests and it's just too
1083 1119 # expensive to convert between the flat manifest and tree manifest on
1084 1120 # the fly. Since tree manifests are hashed differently, all of history
1085 1121 # would have to be converted. Instead, we simply don't even pretend to
1086 1122 # support versions 01 and 02.
1087 1123 versions.discard('01')
1088 1124 versions.discard('02')
1089 1125 if repository.NARROW_REQUIREMENT in repo.requirements:
1090 1126 # Versions 01 and 02 don't support revlog flags, and we need to
1091 1127 # support that for stripping and unbundling to work.
1092 1128 versions.discard('01')
1093 1129 versions.discard('02')
1094 1130 if LFS_REQUIREMENT in repo.requirements:
1095 1131 # Versions 01 and 02 don't support revlog flags, and we need to
1096 1132 # mark LFS entries with REVIDX_EXTSTORED.
1097 1133 versions.discard('01')
1098 1134 versions.discard('02')
1099 1135
1100 1136 return versions
1101 1137
1102 1138 def localversion(repo):
1103 1139 # Finds the best version to use for bundles that are meant to be used
1104 1140 # locally, such as those from strip and shelve, and temporary bundles.
1105 1141 return max(supportedoutgoingversions(repo))
1106 1142
1107 1143 def safeversion(repo):
1108 1144 # Finds the smallest version that it's safe to assume clients of the repo
1109 1145 # will support. For example, all hg versions that support generaldelta also
1110 1146 # support changegroup 02.
1111 1147 versions = supportedoutgoingversions(repo)
1112 1148 if 'generaldelta' in repo.requirements:
1113 1149 versions.discard('01')
1114 1150 assert versions
1115 1151 return min(versions)
1116 1152
1117 1153 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1118 1154 assert version in supportedoutgoingversions(repo)
1119 1155
1120 1156 if filematcher is None:
1121 1157 filematcher = matchmod.alwaysmatcher(repo.root, '')
1122 1158
1123 1159 if version == '01' and not filematcher.always():
1124 1160 raise error.ProgrammingError('version 01 changegroups do not support '
1125 1161 'sparse file matchers')
1126 1162
1127 1163 # Requested files could include files not in the local store. So
1128 1164 # filter those out.
1129 1165 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1130 1166 filematcher)
1131 1167
1132 1168 return _packermap[version][0](repo, filematcher=filematcher,
1133 1169 bundlecaps=bundlecaps)
1134 1170
1135 1171 def getunbundler(version, fh, alg, extras=None):
1136 1172 return _packermap[version][1](fh, alg, extras=extras)
1137 1173
1138 1174 def _changegroupinfo(repo, nodes, source):
1139 1175 if repo.ui.verbose or source == 'bundle':
1140 1176 repo.ui.status(_("%d changesets found\n") % len(nodes))
1141 1177 if repo.ui.debugflag:
1142 1178 repo.ui.debug("list of changesets:\n")
1143 1179 for node in nodes:
1144 1180 repo.ui.debug("%s\n" % hex(node))
1145 1181
1146 1182 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1147 1183 bundlecaps=None):
1148 1184 cgstream = makestream(repo, outgoing, version, source,
1149 1185 fastpath=fastpath, bundlecaps=bundlecaps)
1150 1186 return getunbundler(version, util.chunkbuffer(cgstream), None,
1151 1187 {'clcount': len(outgoing.missing) })
1152 1188
1153 1189 def makestream(repo, outgoing, version, source, fastpath=False,
1154 1190 bundlecaps=None, filematcher=None):
1155 1191 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1156 1192 filematcher=filematcher)
1157 1193
1158 1194 repo = repo.unfiltered()
1159 1195 commonrevs = outgoing.common
1160 1196 csets = outgoing.missing
1161 1197 heads = outgoing.missingheads
1162 1198 # We go through the fast path if we get told to, or if all (unfiltered
1163 1199 # heads have been requested (since we then know there all linkrevs will
1164 1200 # be pulled by the client).
1165 1201 heads.sort()
1166 1202 fastpathlinkrev = fastpath or (
1167 1203 repo.filtername is None and heads == sorted(repo.heads()))
1168 1204
1169 1205 repo.hook('preoutgoing', throw=True, source=source)
1170 1206 _changegroupinfo(repo, csets, source)
1171 1207 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1172 1208
1173 1209 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1174 1210 revisions = 0
1175 1211 files = 0
1176 1212 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1177 1213 total=expectedfiles)
1178 1214 for chunkdata in iter(source.filelogheader, {}):
1179 1215 files += 1
1180 1216 f = chunkdata["filename"]
1181 1217 repo.ui.debug("adding %s revisions\n" % f)
1182 1218 progress.increment()
1183 1219 fl = repo.file(f)
1184 1220 o = len(fl)
1185 1221 try:
1186 1222 deltas = source.deltaiter()
1187 1223 if not fl.addgroup(deltas, revmap, trp):
1188 1224 raise error.Abort(_("received file revlog group is empty"))
1189 1225 except error.CensoredBaseError as e:
1190 1226 raise error.Abort(_("received delta base is censored: %s") % e)
1191 1227 revisions += len(fl) - o
1192 1228 if f in needfiles:
1193 1229 needs = needfiles[f]
1194 1230 for new in pycompat.xrange(o, len(fl)):
1195 1231 n = fl.node(new)
1196 1232 if n in needs:
1197 1233 needs.remove(n)
1198 1234 else:
1199 1235 raise error.Abort(
1200 1236 _("received spurious file revlog entry"))
1201 1237 if not needs:
1202 1238 del needfiles[f]
1203 1239 progress.complete()
1204 1240
1205 1241 for f, needs in needfiles.iteritems():
1206 1242 fl = repo.file(f)
1207 1243 for n in needs:
1208 1244 try:
1209 1245 fl.rev(n)
1210 1246 except error.LookupError:
1211 1247 raise error.Abort(
1212 1248 _('missing file data for %s:%s - run hg verify') %
1213 1249 (f, hex(n)))
1214 1250
1215 1251 return revisions, files
1216 1252
1217 1253 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1218 1254 ellipsisroots, visitnodes, depth, source, version):
1219 1255 if version in ('01', '02'):
1220 1256 raise error.Abort(
1221 1257 'ellipsis nodes require at least cg3 on client and server, '
1222 1258 'but negotiated version %s' % version)
1223 1259 # We wrap cg1packer.revchunk, using a side channel to pass
1224 1260 # relevant_nodes into that area. Then if linknode isn't in the
1225 1261 # set, we know we have an ellipsis node and we should defer
1226 1262 # sending that node's data. We override close() to detect
1227 1263 # pending ellipsis nodes and flush them.
1228 1264 packer = getbundler(version, repo, filematcher=match)
1229 1265 # Give the packer the list of nodes which should not be
1230 1266 # ellipsis nodes. We store this rather than the set of nodes
1231 1267 # that should be an ellipsis because for very large histories
1232 1268 # we expect this to be significantly smaller.
1233 1269 packer.full_nodes = relevant_nodes
1234 1270 # Maps ellipsis revs to their roots at the changelog level.
1235 1271 packer.precomputed_ellipsis = ellipsisroots
1236 1272 # Maps CL revs to per-revlog revisions. Cleared in close() at
1237 1273 # the end of each group.
1238 1274 packer.clrev_to_localrev = {}
1239 1275 packer.next_clrev_to_localrev = {}
1240 1276 # Maps changelog nodes to changelog revs. Filled in once
1241 1277 # during changelog stage and then left unmodified.
1242 1278 packer.clnode_to_rev = {}
1243 1279 packer.changelog_done = False
1244 1280 # If true, informs the packer that it is serving shallow content and might
1245 1281 # need to pack file contents not introduced by the changes being packed.
1246 1282 packer.is_shallow = depth is not None
1247 1283
1248 1284 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now