##// END OF EJS Templates
changegroup: move close() from narrow...
Gregory Szorc -
r38923:75d6139e default
parent child Browse files
Show More
@@ -1,211 +1,202 b''
1 1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial import (
12 12 changegroup,
13 13 error,
14 14 extensions,
15 15 node,
16 16 util,
17 17 )
18 18
19 19 def setup():
20 20 def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
21 21 source):
22 22 changedfiles = list(filter(self._filematcher, changedfiles))
23 23
24 24 if getattr(self, 'is_shallow', False):
25 25 # See comment in generate() for why this sadness is a thing.
26 26 mfdicts = self._mfdicts
27 27 del self._mfdicts
28 28 # In a shallow clone, the linknodes callback needs to also include
29 29 # those file nodes that are in the manifests we sent but weren't
30 30 # introduced by those manifests.
31 31 commonctxs = [self._repo[c] for c in commonrevs]
32 32 oldlinknodes = linknodes
33 33 clrev = self._repo.changelog.rev
34 34 def linknodes(flog, fname):
35 35 for c in commonctxs:
36 36 try:
37 37 fnode = c.filenode(fname)
38 38 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
39 39 except error.ManifestLookupError:
40 40 pass
41 41 links = oldlinknodes(flog, fname)
42 42 if len(links) != len(mfdicts):
43 43 for mf, lr in mfdicts:
44 44 fnode = mf.get(fname, None)
45 45 if fnode in links:
46 46 links[fnode] = min(links[fnode], lr, key=clrev)
47 47 elif fnode:
48 48 links[fnode] = lr
49 49 return links
50 50 return orig(self, changedfiles, linknodes, commonrevs, source)
51 51 extensions.wrapfunction(
52 52 changegroup.cg1packer, 'generatefiles', generatefiles)
53 53
54 def close(orig, self):
55 getattr(self, 'clrev_to_localrev', {}).clear()
56 if getattr(self, 'next_clrev_to_localrev', {}):
57 self.clrev_to_localrev = self.next_clrev_to_localrev
58 del self.next_clrev_to_localrev
59 self.changelog_done = True
60 return orig(self)
61 extensions.wrapfunction(changegroup.cg1packer, 'close', close)
62
63 54 # In a perfect world, we'd generate better ellipsis-ified graphs
64 55 # for non-changelog revlogs. In practice, we haven't started doing
65 56 # that yet, so the resulting DAGs for the manifestlog and filelogs
66 57 # are actually full of bogus parentage on all the ellipsis
67 58 # nodes. This has the side effect that, while the contents are
68 59 # correct, the individual DAGs might be completely out of whack in
69 60 # a case like 882681bc3166 and its ancestors (back about 10
70 61 # revisions or so) in the main hg repo.
71 62 #
72 63 # The one invariant we *know* holds is that the new (potentially
73 64 # bogus) DAG shape will be valid if we order the nodes in the
74 65 # order that they're introduced in dramatis personae by the
75 66 # changelog, so what we do is we sort the non-changelog histories
76 67 # by the order in which they are used by the changelog.
77 68 def _sortgroup(orig, self, revlog, nodelist, lookup):
78 69 if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
79 70 return orig(self, revlog, nodelist, lookup)
80 71 key = lambda n: self.clnode_to_rev[lookup(n)]
81 72 return [revlog.rev(n) for n in sorted(nodelist, key=key)]
82 73
83 74 extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
84 75
85 76 def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
86 77 '''yield a sequence of changegroup chunks (strings)'''
87 78 # Note: other than delegating to orig, the only deviation in
88 79 # logic from normal hg's generate is marked with BEGIN/END
89 80 # NARROW HACK.
90 81 if not util.safehasattr(self, 'full_nodes'):
91 82 # not sending a narrow bundle
92 83 for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
93 84 yield x
94 85 return
95 86
96 87 repo = self._repo
97 88 cl = repo.changelog
98 89 mfl = repo.manifestlog
99 90 mfrevlog = mfl._revlog
100 91
101 92 clrevorder = {}
102 93 mfs = {} # needed manifests
103 94 fnodes = {} # needed file nodes
104 95 changedfiles = set()
105 96
106 97 # Callback for the changelog, used to collect changed files and manifest
107 98 # nodes.
108 99 # Returns the linkrev node (identity in the changelog case).
109 100 def lookupcl(x):
110 101 c = cl.read(x)
111 102 clrevorder[x] = len(clrevorder)
112 103 # BEGIN NARROW HACK
113 104 #
114 105 # Only update mfs if x is going to be sent. Otherwise we
115 106 # end up with bogus linkrevs specified for manifests and
116 107 # we skip some manifest nodes that we should otherwise
117 108 # have sent.
118 109 if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
119 110 n = c[0]
120 111 # record the first changeset introducing this manifest version
121 112 mfs.setdefault(n, x)
122 113 # Set this narrow-specific dict so we have the lowest manifest
123 114 # revnum to look up for this cl revnum. (Part of mapping
124 115 # changelog ellipsis parents to manifest ellipsis parents)
125 116 self.next_clrev_to_localrev.setdefault(cl.rev(x),
126 117 mfrevlog.rev(n))
127 118 # We can't trust the changed files list in the changeset if the
128 119 # client requested a shallow clone.
129 120 if self.is_shallow:
130 121 changedfiles.update(mfl[c[0]].read().keys())
131 122 else:
132 123 changedfiles.update(c[3])
133 124 # END NARROW HACK
134 125 # Record a complete list of potentially-changed files in
135 126 # this manifest.
136 127 return x
137 128
138 129 self._verbosenote(_('uncompressed size of bundle content:\n'))
139 130 size = 0
140 131 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
141 132 size += len(chunk)
142 133 yield chunk
143 134 self._verbosenote(_('%8.i (changelog)\n') % size)
144 135
145 136 # We need to make sure that the linkrev in the changegroup refers to
146 137 # the first changeset that introduced the manifest or file revision.
147 138 # The fastpath is usually safer than the slowpath, because the filelogs
148 139 # are walked in revlog order.
149 140 #
150 141 # When taking the slowpath with reorder=None and the manifest revlog
151 142 # uses generaldelta, the manifest may be walked in the "wrong" order.
152 143 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
153 144 # cc0ff93d0c0c).
154 145 #
155 146 # When taking the fastpath, we are only vulnerable to reordering
156 147 # of the changelog itself. The changelog never uses generaldelta, so
157 148 # it is only reordered when reorder=True. To handle this case, we
158 149 # simply take the slowpath, which already has the 'clrevorder' logic.
159 150 # This was also fixed in cc0ff93d0c0c.
160 151 fastpathlinkrev = fastpathlinkrev and not self._reorder
161 152 # Treemanifests don't work correctly with fastpathlinkrev
162 153 # either, because we don't discover which directory nodes to
163 154 # send along with files. This could probably be fixed.
164 155 fastpathlinkrev = fastpathlinkrev and (
165 156 'treemanifest' not in repo.requirements)
166 157 # Shallow clones also don't work correctly with fastpathlinkrev
167 158 # because file nodes may need to be sent for a manifest even if they
168 159 # weren't introduced by that manifest.
169 160 fastpathlinkrev = fastpathlinkrev and not self.is_shallow
170 161
171 162 for chunk in self.generatemanifests(commonrevs, clrevorder,
172 163 fastpathlinkrev, mfs, fnodes, source):
173 164 yield chunk
174 165 # BEGIN NARROW HACK
175 166 mfdicts = None
176 167 if self.is_shallow:
177 168 mfdicts = [(self._repo.manifestlog[n].read(), lr)
178 169 for (n, lr) in mfs.iteritems()]
179 170 # END NARROW HACK
180 171 mfs.clear()
181 172 clrevs = set(cl.rev(x) for x in clnodes)
182 173
183 174 if not fastpathlinkrev:
184 175 def linknodes(unused, fname):
185 176 return fnodes.get(fname, {})
186 177 else:
187 178 cln = cl.node
188 179 def linknodes(filerevlog, fname):
189 180 llr = filerevlog.linkrev
190 181 fln = filerevlog.node
191 182 revs = ((r, llr(r)) for r in filerevlog)
192 183 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
193 184
194 185 # BEGIN NARROW HACK
195 186 #
196 187 # We need to pass the mfdicts variable down into
197 188 # generatefiles(), but more than one command might have
198 189 # wrapped generatefiles so we can't modify the function
199 190 # signature. Instead, we pass the data to ourselves using an
200 191 # instance attribute. I'm sorry.
201 192 self._mfdicts = mfdicts
202 193 # END NARROW HACK
203 194 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
204 195 source):
205 196 yield chunk
206 197
207 198 yield self.close()
208 199
209 200 if clnodes:
210 201 repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
211 202 extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
@@ -1,1221 +1,1228 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 manifest,
26 26 match as matchmod,
27 27 mdiff,
28 28 phases,
29 29 pycompat,
30 30 repository,
31 31 revlog,
32 32 util,
33 33 )
34 34
35 35 from .utils import (
36 36 stringutil,
37 37 )
38 38
39 39 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
40 40 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
41 41 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
42 42
43 43 LFS_REQUIREMENT = 'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47 def getchunk(stream):
48 48 """return the next chunk from stream as a string"""
49 49 d = readexactly(stream, 4)
50 50 l = struct.unpack(">l", d)[0]
51 51 if l <= 4:
52 52 if l:
53 53 raise error.Abort(_("invalid chunk length %d") % l)
54 54 return ""
55 55 return readexactly(stream, l - 4)
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(">l", length + 4)
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(">l", 0)
64 64
65 65 def writechunks(ui, chunks, filename, vfs=None):
66 66 """Write chunks to a file and return its filename.
67 67
68 68 The stream is assumed to be a bundle file.
69 69 Existing files will not be overwritten.
70 70 If no filename is specified, a temporary file is created.
71 71 """
72 72 fh = None
73 73 cleanup = None
74 74 try:
75 75 if filename:
76 76 if vfs:
77 77 fh = vfs.open(filename, "wb")
78 78 else:
79 79 # Increase default buffer size because default is usually
80 80 # small (4k is common on Linux).
81 81 fh = open(filename, "wb", 131072)
82 82 else:
83 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
84 84 fh = os.fdopen(fd, r"wb")
85 85 cleanup = filename
86 86 for c in chunks:
87 87 fh.write(c)
88 88 cleanup = None
89 89 return filename
90 90 finally:
91 91 if fh is not None:
92 92 fh.close()
93 93 if cleanup is not None:
94 94 if filename and vfs:
95 95 vfs.unlink(cleanup)
96 96 else:
97 97 os.unlink(cleanup)
98 98
99 99 class cg1unpacker(object):
100 100 """Unpacker for cg1 changegroup streams.
101 101
102 102 A changegroup unpacker handles the framing of the revision data in
103 103 the wire format. Most consumers will want to use the apply()
104 104 method to add the changes from the changegroup to a repository.
105 105
106 106 If you're forwarding a changegroup unmodified to another consumer,
107 107 use getchunks(), which returns an iterator of changegroup
108 108 chunks. This is mostly useful for cases where you need to know the
109 109 data stream has ended by observing the end of the changegroup.
110 110
111 111 deltachunk() is useful only if you're applying delta data. Most
112 112 consumers should prefer apply() instead.
113 113
114 114 A few other public methods exist. Those are used only for
115 115 bundlerepo and some debug commands - their use is discouraged.
116 116 """
117 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
118 118 deltaheadersize = struct.calcsize(deltaheader)
119 119 version = '01'
120 120 _grouplistcount = 1 # One list of files after the manifests
121 121
122 122 def __init__(self, fh, alg, extras=None):
123 123 if alg is None:
124 124 alg = 'UN'
125 125 if alg not in util.compengines.supportedbundletypes:
126 126 raise error.Abort(_('unknown stream compression type: %s')
127 127 % alg)
128 128 if alg == 'BZ':
129 129 alg = '_truncatedBZ'
130 130
131 131 compengine = util.compengines.forbundletype(alg)
132 132 self._stream = compengine.decompressorreader(fh)
133 133 self._type = alg
134 134 self.extras = extras or {}
135 135 self.callback = None
136 136
137 137 # These methods (compressed, read, seek, tell) all appear to only
138 138 # be used by bundlerepo, but it's a little hard to tell.
139 139 def compressed(self):
140 140 return self._type is not None and self._type != 'UN'
141 141 def read(self, l):
142 142 return self._stream.read(l)
143 143 def seek(self, pos):
144 144 return self._stream.seek(pos)
145 145 def tell(self):
146 146 return self._stream.tell()
147 147 def close(self):
148 148 return self._stream.close()
149 149
150 150 def _chunklength(self):
151 151 d = readexactly(self._stream, 4)
152 152 l = struct.unpack(">l", d)[0]
153 153 if l <= 4:
154 154 if l:
155 155 raise error.Abort(_("invalid chunk length %d") % l)
156 156 return 0
157 157 if self.callback:
158 158 self.callback()
159 159 return l - 4
160 160
161 161 def changelogheader(self):
162 162 """v10 does not have a changelog header chunk"""
163 163 return {}
164 164
165 165 def manifestheader(self):
166 166 """v10 does not have a manifest header chunk"""
167 167 return {}
168 168
169 169 def filelogheader(self):
170 170 """return the header of the filelogs chunk, v10 only has the filename"""
171 171 l = self._chunklength()
172 172 if not l:
173 173 return {}
174 174 fname = readexactly(self._stream, l)
175 175 return {'filename': fname}
176 176
177 177 def _deltaheader(self, headertuple, prevnode):
178 178 node, p1, p2, cs = headertuple
179 179 if prevnode is None:
180 180 deltabase = p1
181 181 else:
182 182 deltabase = prevnode
183 183 flags = 0
184 184 return node, p1, p2, deltabase, cs, flags
185 185
186 186 def deltachunk(self, prevnode):
187 187 l = self._chunklength()
188 188 if not l:
189 189 return {}
190 190 headerdata = readexactly(self._stream, self.deltaheadersize)
191 191 header = struct.unpack(self.deltaheader, headerdata)
192 192 delta = readexactly(self._stream, l - self.deltaheadersize)
193 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
194 194 return (node, p1, p2, cs, deltabase, delta, flags)
195 195
196 196 def getchunks(self):
197 197 """returns all the chunks contains in the bundle
198 198
199 199 Used when you need to forward the binary stream to a file or another
200 200 network API. To do so, it parse the changegroup data, otherwise it will
201 201 block in case of sshrepo because it don't know the end of the stream.
202 202 """
203 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
204 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
205 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
206 206 # filelogs.
207 207 #
208 208 # Changelog and manifestlog parts are terminated with empty chunks. The
209 209 # tree and file parts are a list of entry sections. Each entry section
210 210 # is a series of chunks terminating in an empty chunk. The list of these
211 211 # entry sections is terminated in yet another empty chunk, so we know
212 212 # we've reached the end of the tree/file list when we reach an empty
213 213 # chunk that was proceeded by no non-empty chunks.
214 214
215 215 parts = 0
216 216 while parts < 2 + self._grouplistcount:
217 217 noentries = True
218 218 while True:
219 219 chunk = getchunk(self)
220 220 if not chunk:
221 221 # The first two empty chunks represent the end of the
222 222 # changelog and the manifestlog portions. The remaining
223 223 # empty chunks represent either A) the end of individual
224 224 # tree or file entries in the file list, or B) the end of
225 225 # the entire list. It's the end of the entire list if there
226 226 # were no entries (i.e. noentries is True).
227 227 if parts < 2:
228 228 parts += 1
229 229 elif noentries:
230 230 parts += 1
231 231 break
232 232 noentries = False
233 233 yield chunkheader(len(chunk))
234 234 pos = 0
235 235 while pos < len(chunk):
236 236 next = pos + 2**20
237 237 yield chunk[pos:next]
238 238 pos = next
239 239 yield closechunk()
240 240
241 241 def _unpackmanifests(self, repo, revmap, trp, prog):
242 242 self.callback = prog.increment
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 248 deltas = self.deltaiter()
249 249 repo.manifestlog.addgroup(deltas, revmap, trp)
250 250 prog.complete()
251 251 self.callback = None
252 252
253 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 254 expectedtotal=None):
255 255 """Add the changegroup returned by source.read() to this repo.
256 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 257 the URL of the repo where this changegroup is coming from.
258 258
259 259 Return an integer summarizing the change to this repo:
260 260 - nothing changed or no source: 0
261 261 - more heads than before: 1+added heads (2..n)
262 262 - fewer heads than before: -1-removed heads (-2..-n)
263 263 - number of heads stays the same: 1
264 264 """
265 265 repo = repo.unfiltered()
266 266 def csmap(x):
267 267 repo.ui.debug("add changeset %s\n" % short(x))
268 268 return len(cl)
269 269
270 270 def revmap(x):
271 271 return cl.rev(x)
272 272
273 273 changesets = files = revisions = 0
274 274
275 275 try:
276 276 # The transaction may already carry source information. In this
277 277 # case we use the top level data. We overwrite the argument
278 278 # because we need to use the top level value (if they exist)
279 279 # in this function.
280 280 srctype = tr.hookargs.setdefault('source', srctype)
281 281 url = tr.hookargs.setdefault('url', url)
282 282 repo.hook('prechangegroup',
283 283 throw=True, **pycompat.strkwargs(tr.hookargs))
284 284
285 285 # write changelog data to temp files so concurrent readers
286 286 # will not see an inconsistent view
287 287 cl = repo.changelog
288 288 cl.delayupdate(tr)
289 289 oldheads = set(cl.heads())
290 290
291 291 trp = weakref.proxy(tr)
292 292 # pull off the changeset group
293 293 repo.ui.status(_("adding changesets\n"))
294 294 clstart = len(cl)
295 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
296 296 total=expectedtotal)
297 297 self.callback = progress.increment
298 298
299 299 efiles = set()
300 300 def onchangelog(cl, node):
301 301 efiles.update(cl.readfiles(node))
302 302
303 303 self.changelogheader()
304 304 deltas = self.deltaiter()
305 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
306 306 efiles = len(efiles)
307 307
308 308 if not cgnodes:
309 309 repo.ui.develwarn('applied empty changegroup',
310 310 config='warn-empty-changegroup')
311 311 clend = len(cl)
312 312 changesets = clend - clstart
313 313 progress.complete()
314 314 self.callback = None
315 315
316 316 # pull off the manifest group
317 317 repo.ui.status(_("adding manifests\n"))
318 318 # We know that we'll never have more manifests than we had
319 319 # changesets.
320 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
321 321 total=changesets)
322 322 self._unpackmanifests(repo, revmap, trp, progress)
323 323
324 324 needfiles = {}
325 325 if repo.ui.configbool('server', 'validate'):
326 326 cl = repo.changelog
327 327 ml = repo.manifestlog
328 328 # validate incoming csets have their manifests
329 329 for cset in pycompat.xrange(clstart, clend):
330 330 mfnode = cl.changelogrevision(cset).manifest
331 331 mfest = ml[mfnode].readdelta()
332 332 # store file cgnodes we must see
333 333 for f, n in mfest.iteritems():
334 334 needfiles.setdefault(f, set()).add(n)
335 335
336 336 # process the files
337 337 repo.ui.status(_("adding file changes\n"))
338 338 newrevs, newfiles = _addchangegroupfiles(
339 339 repo, self, revmap, trp, efiles, needfiles)
340 340 revisions += newrevs
341 341 files += newfiles
342 342
343 343 deltaheads = 0
344 344 if oldheads:
345 345 heads = cl.heads()
346 346 deltaheads = len(heads) - len(oldheads)
347 347 for h in heads:
348 348 if h not in oldheads and repo[h].closesbranch():
349 349 deltaheads -= 1
350 350 htext = ""
351 351 if deltaheads:
352 352 htext = _(" (%+d heads)") % deltaheads
353 353
354 354 repo.ui.status(_("added %d changesets"
355 355 " with %d changes to %d files%s\n")
356 356 % (changesets, revisions, files, htext))
357 357 repo.invalidatevolatilesets()
358 358
359 359 if changesets > 0:
360 360 if 'node' not in tr.hookargs:
361 361 tr.hookargs['node'] = hex(cl.node(clstart))
362 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
363 363 hookargs = dict(tr.hookargs)
364 364 else:
365 365 hookargs = dict(tr.hookargs)
366 366 hookargs['node'] = hex(cl.node(clstart))
367 367 hookargs['node_last'] = hex(cl.node(clend - 1))
368 368 repo.hook('pretxnchangegroup',
369 369 throw=True, **pycompat.strkwargs(hookargs))
370 370
371 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
372 372 phaseall = None
373 373 if srctype in ('push', 'serve'):
374 374 # Old servers can not push the boundary themselves.
375 375 # New servers won't push the boundary if changeset already
376 376 # exists locally as secret
377 377 #
378 378 # We should not use added here but the list of all change in
379 379 # the bundle
380 380 if repo.publishing():
381 381 targetphase = phaseall = phases.public
382 382 else:
383 383 # closer target phase computation
384 384
385 385 # Those changesets have been pushed from the
386 386 # outside, their phases are going to be pushed
387 387 # alongside. Therefor `targetphase` is
388 388 # ignored.
389 389 targetphase = phaseall = phases.draft
390 390 if added:
391 391 phases.registernew(repo, tr, targetphase, added)
392 392 if phaseall is not None:
393 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
394 394
395 395 if changesets > 0:
396 396
397 397 def runhooks():
398 398 # These hooks run when the lock releases, not when the
399 399 # transaction closes. So it's possible for the changelog
400 400 # to have changed since we last saw it.
401 401 if clstart >= len(repo):
402 402 return
403 403
404 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
405 405
406 406 for n in added:
407 407 args = hookargs.copy()
408 408 args['node'] = hex(n)
409 409 del args['node_last']
410 410 repo.hook("incoming", **pycompat.strkwargs(args))
411 411
412 412 newheads = [h for h in repo.heads()
413 413 if h not in oldheads]
414 414 repo.ui.log("incoming",
415 415 "%d incoming changes - new heads: %s\n",
416 416 len(added),
417 417 ', '.join([hex(c[:6]) for c in newheads]))
418 418
419 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
420 420 lambda tr: repo._afterlock(runhooks))
421 421 finally:
422 422 repo.ui.flush()
423 423 # never return 0 here:
424 424 if deltaheads < 0:
425 425 ret = deltaheads - 1
426 426 else:
427 427 ret = deltaheads + 1
428 428 return ret
429 429
430 430 def deltaiter(self):
431 431 """
432 432 returns an iterator of the deltas in this changegroup
433 433
434 434 Useful for passing to the underlying storage system to be stored.
435 435 """
436 436 chain = None
437 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
438 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
439 439 yield chunkdata
440 440 chain = chunkdata[0]
441 441
442 442 class cg2unpacker(cg1unpacker):
443 443 """Unpacker for cg2 streams.
444 444
445 445 cg2 streams add support for generaldelta, so the delta header
446 446 format is slightly different. All other features about the data
447 447 remain the same.
448 448 """
449 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
450 450 deltaheadersize = struct.calcsize(deltaheader)
451 451 version = '02'
452 452
453 453 def _deltaheader(self, headertuple, prevnode):
454 454 node, p1, p2, deltabase, cs = headertuple
455 455 flags = 0
456 456 return node, p1, p2, deltabase, cs, flags
457 457
458 458 class cg3unpacker(cg2unpacker):
459 459 """Unpacker for cg3 streams.
460 460
461 461 cg3 streams add support for exchanging treemanifests and revlog
462 462 flags. It adds the revlog flags to the delta header and an empty chunk
463 463 separating manifests and files.
464 464 """
465 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
466 466 deltaheadersize = struct.calcsize(deltaheader)
467 467 version = '03'
468 468 _grouplistcount = 2 # One list of manifests and one list of files
469 469
470 470 def _deltaheader(self, headertuple, prevnode):
471 471 node, p1, p2, deltabase, cs, flags = headertuple
472 472 return node, p1, p2, deltabase, cs, flags
473 473
474 474 def _unpackmanifests(self, repo, revmap, trp, prog):
475 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
476 476 for chunkdata in iter(self.filelogheader, {}):
477 477 # If we get here, there are directory manifests in the changegroup
478 478 d = chunkdata["filename"]
479 479 repo.ui.debug("adding %s revisions\n" % d)
480 480 dirlog = repo.manifestlog._revlog.dirlog(d)
481 481 deltas = self.deltaiter()
482 482 if not dirlog.addgroup(deltas, revmap, trp):
483 483 raise error.Abort(_("received dir revlog group is empty"))
484 484
485 485 class headerlessfixup(object):
486 486 def __init__(self, fh, h):
487 487 self._h = h
488 488 self._fh = fh
489 489 def read(self, n):
490 490 if self._h:
491 491 d, self._h = self._h[:n], self._h[n:]
492 492 if len(d) < n:
493 493 d += readexactly(self._fh, n - len(d))
494 494 return d
495 495 return readexactly(self._fh, n)
496 496
497 497 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
498 498 n = revlog_.node(rev)
499 499 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
500 500 flags = revlog_.flags(rev)
501 501 flags |= revlog.REVIDX_ELLIPSIS
502 502 meta = packer.builddeltaheader(
503 503 n, p1n, p2n, nullid, linknode, flags)
504 504 # TODO: try and actually send deltas for ellipsis data blocks
505 505 diffheader = mdiff.trivialdiffheader(len(data))
506 506 l = len(meta) + len(diffheader) + len(data)
507 507 return ''.join((chunkheader(l),
508 508 meta,
509 509 diffheader,
510 510 data))
511 511
512 512 class cg1packer(object):
513 513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
514 514 version = '01'
515 515 def __init__(self, repo, filematcher, bundlecaps=None):
516 516 """Given a source repo, construct a bundler.
517 517
518 518 filematcher is a matcher that matches on files to include in the
519 519 changegroup. Used to facilitate sparse changegroups.
520 520
521 521 bundlecaps is optional and can be used to specify the set of
522 522 capabilities which can be used to build the bundle. While bundlecaps is
523 523 unused in core Mercurial, extensions rely on this feature to communicate
524 524 capabilities to customize the changegroup packer.
525 525 """
526 526 assert filematcher
527 527 self._filematcher = filematcher
528 528
529 529 # Set of capabilities we can use to build the bundle.
530 530 if bundlecaps is None:
531 531 bundlecaps = set()
532 532 self._bundlecaps = bundlecaps
533 533 # experimental config: bundle.reorder
534 534 reorder = repo.ui.config('bundle', 'reorder')
535 535 if reorder == 'auto':
536 536 reorder = None
537 537 else:
538 538 reorder = stringutil.parsebool(reorder)
539 539 self._repo = repo
540 540 self._reorder = reorder
541 541 if self._repo.ui.verbose and not self._repo.ui.debugflag:
542 542 self._verbosenote = self._repo.ui.note
543 543 else:
544 544 self._verbosenote = lambda s: None
545 545
546 546 def close(self):
547 # Ellipses serving mode.
548 getattr(self, 'clrev_to_localrev', {}).clear()
549 if getattr(self, 'next_clrev_to_localrev', {}):
550 self.clrev_to_localrev = self.next_clrev_to_localrev
551 del self.next_clrev_to_localrev
552 self.changelog_done = True
553
547 554 return closechunk()
548 555
549 556 def fileheader(self, fname):
550 557 return chunkheader(len(fname)) + fname
551 558
552 559 # Extracted both for clarity and for overriding in extensions.
553 560 def _sortgroup(self, revlog, nodelist, lookup):
554 561 """Sort nodes for change group and turn them into revnums."""
555 562 # for generaldelta revlogs, we linearize the revs; this will both be
556 563 # much quicker and generate a much smaller bundle
557 564 if (revlog._generaldelta and self._reorder is None) or self._reorder:
558 565 dag = dagutil.revlogdag(revlog)
559 566 return dag.linearize(set(revlog.rev(n) for n in nodelist))
560 567 else:
561 568 return sorted([revlog.rev(n) for n in nodelist])
562 569
563 570 def group(self, nodelist, revlog, lookup, units=None):
564 571 """Calculate a delta group, yielding a sequence of changegroup chunks
565 572 (strings).
566 573
567 574 Given a list of changeset revs, return a set of deltas and
568 575 metadata corresponding to nodes. The first delta is
569 576 first parent(nodelist[0]) -> nodelist[0], the receiver is
570 577 guaranteed to have this parent as it has all history before
571 578 these changesets. In the case firstparent is nullrev the
572 579 changegroup starts with a full revision.
573 580
574 581 If units is not None, progress detail will be generated, units specifies
575 582 the type of revlog that is touched (changelog, manifest, etc.).
576 583 """
577 584 # if we don't have any revisions touched by these changesets, bail
578 585 if len(nodelist) == 0:
579 586 yield self.close()
580 587 return
581 588
582 589 revs = self._sortgroup(revlog, nodelist, lookup)
583 590
584 591 # add the parent of the first rev
585 592 p = revlog.parentrevs(revs[0])[0]
586 593 revs.insert(0, p)
587 594
588 595 # build deltas
589 596 progress = None
590 597 if units is not None:
591 598 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
592 599 total=(len(revs) - 1))
593 600 for r in pycompat.xrange(len(revs) - 1):
594 601 if progress:
595 602 progress.update(r + 1)
596 603 prev, curr = revs[r], revs[r + 1]
597 604 linknode = lookup(revlog.node(curr))
598 605 for c in self.revchunk(revlog, curr, prev, linknode):
599 606 yield c
600 607
601 608 if progress:
602 609 progress.complete()
603 610 yield self.close()
604 611
605 612 # filter any nodes that claim to be part of the known set
606 613 def prune(self, revlog, missing, commonrevs):
607 614 # TODO this violates storage abstraction for manifests.
608 615 if isinstance(revlog, manifest.manifestrevlog):
609 616 if not self._filematcher.visitdir(revlog._dir[:-1] or '.'):
610 617 return []
611 618
612 619 rr, rl = revlog.rev, revlog.linkrev
613 620 return [n for n in missing if rl(rr(n)) not in commonrevs]
614 621
615 622 def _packmanifests(self, dir, mfnodes, lookuplinknode):
616 623 """Pack flat manifests into a changegroup stream."""
617 624 assert not dir
618 625 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
619 626 lookuplinknode, units=_('manifests')):
620 627 yield chunk
621 628
622 629 def _manifestsdone(self):
623 630 return ''
624 631
625 632 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
626 633 '''yield a sequence of changegroup chunks (strings)'''
627 634 repo = self._repo
628 635 cl = repo.changelog
629 636
630 637 clrevorder = {}
631 638 mfs = {} # needed manifests
632 639 fnodes = {} # needed file nodes
633 640 changedfiles = set()
634 641
635 642 # Callback for the changelog, used to collect changed files and manifest
636 643 # nodes.
637 644 # Returns the linkrev node (identity in the changelog case).
638 645 def lookupcl(x):
639 646 c = cl.read(x)
640 647 clrevorder[x] = len(clrevorder)
641 648 n = c[0]
642 649 # record the first changeset introducing this manifest version
643 650 mfs.setdefault(n, x)
644 651 # Record a complete list of potentially-changed files in
645 652 # this manifest.
646 653 changedfiles.update(c[3])
647 654 return x
648 655
649 656 self._verbosenote(_('uncompressed size of bundle content:\n'))
650 657 size = 0
651 658 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
652 659 size += len(chunk)
653 660 yield chunk
654 661 self._verbosenote(_('%8.i (changelog)\n') % size)
655 662
656 663 # We need to make sure that the linkrev in the changegroup refers to
657 664 # the first changeset that introduced the manifest or file revision.
658 665 # The fastpath is usually safer than the slowpath, because the filelogs
659 666 # are walked in revlog order.
660 667 #
661 668 # When taking the slowpath with reorder=None and the manifest revlog
662 669 # uses generaldelta, the manifest may be walked in the "wrong" order.
663 670 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
664 671 # cc0ff93d0c0c).
665 672 #
666 673 # When taking the fastpath, we are only vulnerable to reordering
667 674 # of the changelog itself. The changelog never uses generaldelta, so
668 675 # it is only reordered when reorder=True. To handle this case, we
669 676 # simply take the slowpath, which already has the 'clrevorder' logic.
670 677 # This was also fixed in cc0ff93d0c0c.
671 678 fastpathlinkrev = fastpathlinkrev and not self._reorder
672 679 # Treemanifests don't work correctly with fastpathlinkrev
673 680 # either, because we don't discover which directory nodes to
674 681 # send along with files. This could probably be fixed.
675 682 fastpathlinkrev = fastpathlinkrev and (
676 683 'treemanifest' not in repo.requirements)
677 684
678 685 for chunk in self.generatemanifests(commonrevs, clrevorder,
679 686 fastpathlinkrev, mfs, fnodes, source):
680 687 yield chunk
681 688 mfs.clear()
682 689 clrevs = set(cl.rev(x) for x in clnodes)
683 690
684 691 if not fastpathlinkrev:
685 692 def linknodes(unused, fname):
686 693 return fnodes.get(fname, {})
687 694 else:
688 695 cln = cl.node
689 696 def linknodes(filerevlog, fname):
690 697 llr = filerevlog.linkrev
691 698 fln = filerevlog.node
692 699 revs = ((r, llr(r)) for r in filerevlog)
693 700 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
694 701
695 702 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
696 703 source):
697 704 yield chunk
698 705
699 706 yield self.close()
700 707
701 708 if clnodes:
702 709 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
703 710
704 711 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
705 712 fnodes, source):
706 713 """Returns an iterator of changegroup chunks containing manifests.
707 714
708 715 `source` is unused here, but is used by extensions like remotefilelog to
709 716 change what is sent based in pulls vs pushes, etc.
710 717 """
711 718 repo = self._repo
712 719 mfl = repo.manifestlog
713 720 dirlog = mfl._revlog.dirlog
714 721 tmfnodes = {'': mfs}
715 722
716 723 # Callback for the manifest, used to collect linkrevs for filelog
717 724 # revisions.
718 725 # Returns the linkrev node (collected in lookupcl).
719 726 def makelookupmflinknode(dir, nodes):
720 727 if fastpathlinkrev:
721 728 assert not dir
722 729 return mfs.__getitem__
723 730
724 731 def lookupmflinknode(x):
725 732 """Callback for looking up the linknode for manifests.
726 733
727 734 Returns the linkrev node for the specified manifest.
728 735
729 736 SIDE EFFECT:
730 737
731 738 1) fclnodes gets populated with the list of relevant
732 739 file nodes if we're not using fastpathlinkrev
733 740 2) When treemanifests are in use, collects treemanifest nodes
734 741 to send
735 742
736 743 Note that this means manifests must be completely sent to
737 744 the client before you can trust the list of files and
738 745 treemanifests to send.
739 746 """
740 747 clnode = nodes[x]
741 748 mdata = mfl.get(dir, x).readfast(shallow=True)
742 749 for p, n, fl in mdata.iterentries():
743 750 if fl == 't': # subdirectory manifest
744 751 subdir = dir + p + '/'
745 752 tmfclnodes = tmfnodes.setdefault(subdir, {})
746 753 tmfclnode = tmfclnodes.setdefault(n, clnode)
747 754 if clrevorder[clnode] < clrevorder[tmfclnode]:
748 755 tmfclnodes[n] = clnode
749 756 else:
750 757 f = dir + p
751 758 fclnodes = fnodes.setdefault(f, {})
752 759 fclnode = fclnodes.setdefault(n, clnode)
753 760 if clrevorder[clnode] < clrevorder[fclnode]:
754 761 fclnodes[n] = clnode
755 762 return clnode
756 763 return lookupmflinknode
757 764
758 765 size = 0
759 766 while tmfnodes:
760 767 dir, nodes = tmfnodes.popitem()
761 768 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
762 769 if not dir or prunednodes:
763 770 for x in self._packmanifests(dir, prunednodes,
764 771 makelookupmflinknode(dir, nodes)):
765 772 size += len(x)
766 773 yield x
767 774 self._verbosenote(_('%8.i (manifests)\n') % size)
768 775 yield self._manifestsdone()
769 776
770 777 # The 'source' parameter is useful for extensions
771 778 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
772 779 repo = self._repo
773 780 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
774 781 total=len(changedfiles))
775 782 for i, fname in enumerate(sorted(changedfiles)):
776 783 filerevlog = repo.file(fname)
777 784 if not filerevlog:
778 785 raise error.Abort(_("empty or missing file data for %s") %
779 786 fname)
780 787
781 788 linkrevnodes = linknodes(filerevlog, fname)
782 789 # Lookup for filenodes, we collected the linkrev nodes above in the
783 790 # fastpath case and with lookupmf in the slowpath case.
784 791 def lookupfilelog(x):
785 792 return linkrevnodes[x]
786 793
787 794 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
788 795 if filenodes:
789 796 progress.update(i + 1, item=fname)
790 797 h = self.fileheader(fname)
791 798 size = len(h)
792 799 yield h
793 800 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
794 801 size += len(chunk)
795 802 yield chunk
796 803 self._verbosenote(_('%8.i %s\n') % (size, fname))
797 804 progress.complete()
798 805
799 806 def deltaparent(self, revlog, rev, p1, p2, prev):
800 807 if not revlog.candelta(prev, rev):
801 808 raise error.ProgrammingError('cg1 should not be used in this case')
802 809 return prev
803 810
804 811 def revchunk(self, revlog, rev, prev, linknode):
805 812 if util.safehasattr(self, 'full_nodes'):
806 813 fn = self._revchunknarrow
807 814 else:
808 815 fn = self._revchunknormal
809 816
810 817 return fn(revlog, rev, prev, linknode)
811 818
812 819 def _revchunknormal(self, revlog, rev, prev, linknode):
813 820 node = revlog.node(rev)
814 821 p1, p2 = revlog.parentrevs(rev)
815 822 base = self.deltaparent(revlog, rev, p1, p2, prev)
816 823
817 824 prefix = ''
818 825 if revlog.iscensored(base) or revlog.iscensored(rev):
819 826 try:
820 827 delta = revlog.revision(node, raw=True)
821 828 except error.CensoredNodeError as e:
822 829 delta = e.tombstone
823 830 if base == nullrev:
824 831 prefix = mdiff.trivialdiffheader(len(delta))
825 832 else:
826 833 baselen = revlog.rawsize(base)
827 834 prefix = mdiff.replacediffheader(baselen, len(delta))
828 835 elif base == nullrev:
829 836 delta = revlog.revision(node, raw=True)
830 837 prefix = mdiff.trivialdiffheader(len(delta))
831 838 else:
832 839 delta = revlog.revdiff(base, rev)
833 840 p1n, p2n = revlog.parents(node)
834 841 basenode = revlog.node(base)
835 842 flags = revlog.flags(rev)
836 843 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
837 844 meta += prefix
838 845 l = len(meta) + len(delta)
839 846 yield chunkheader(l)
840 847 yield meta
841 848 yield delta
842 849
843 850 def _revchunknarrow(self, revlog, rev, prev, linknode):
844 851 # build up some mapping information that's useful later. See
845 852 # the local() nested function below.
846 853 if not self.changelog_done:
847 854 self.clnode_to_rev[linknode] = rev
848 855 linkrev = rev
849 856 self.clrev_to_localrev[linkrev] = rev
850 857 else:
851 858 linkrev = self.clnode_to_rev[linknode]
852 859 self.clrev_to_localrev[linkrev] = rev
853 860
854 861 # This is a node to send in full, because the changeset it
855 862 # corresponds to was a full changeset.
856 863 if linknode in self.full_nodes:
857 864 for x in self._revchunknormal(revlog, rev, prev, linknode):
858 865 yield x
859 866 return
860 867
861 868 # At this point, a node can either be one we should skip or an
862 869 # ellipsis. If it's not an ellipsis, bail immediately.
863 870 if linkrev not in self.precomputed_ellipsis:
864 871 return
865 872
866 873 linkparents = self.precomputed_ellipsis[linkrev]
867 874 def local(clrev):
868 875 """Turn a changelog revnum into a local revnum.
869 876
870 877 The ellipsis dag is stored as revnums on the changelog,
871 878 but when we're producing ellipsis entries for
872 879 non-changelog revlogs, we need to turn those numbers into
873 880 something local. This does that for us, and during the
874 881 changelog sending phase will also expand the stored
875 882 mappings as needed.
876 883 """
877 884 if clrev == nullrev:
878 885 return nullrev
879 886
880 887 if not self.changelog_done:
881 888 # If we're doing the changelog, it's possible that we
882 889 # have a parent that is already on the client, and we
883 890 # need to store some extra mapping information so that
884 891 # our contained ellipsis nodes will be able to resolve
885 892 # their parents.
886 893 if clrev not in self.clrev_to_localrev:
887 894 clnode = revlog.node(clrev)
888 895 self.clnode_to_rev[clnode] = clrev
889 896 return clrev
890 897
891 898 # Walk the ellipsis-ized changelog breadth-first looking for a
892 899 # change that has been linked from the current revlog.
893 900 #
894 901 # For a flat manifest revlog only a single step should be necessary
895 902 # as all relevant changelog entries are relevant to the flat
896 903 # manifest.
897 904 #
898 905 # For a filelog or tree manifest dirlog however not every changelog
899 906 # entry will have been relevant, so we need to skip some changelog
900 907 # nodes even after ellipsis-izing.
901 908 walk = [clrev]
902 909 while walk:
903 910 p = walk[0]
904 911 walk = walk[1:]
905 912 if p in self.clrev_to_localrev:
906 913 return self.clrev_to_localrev[p]
907 914 elif p in self.full_nodes:
908 915 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
909 916 if pp != nullrev])
910 917 elif p in self.precomputed_ellipsis:
911 918 walk.extend([pp for pp in self.precomputed_ellipsis[p]
912 919 if pp != nullrev])
913 920 else:
914 921 # In this case, we've got an ellipsis with parents
915 922 # outside the current bundle (likely an
916 923 # incremental pull). We "know" that we can use the
917 924 # value of this same revlog at whatever revision
918 925 # is pointed to by linknode. "Know" is in scare
919 926 # quotes because I haven't done enough examination
920 927 # of edge cases to convince myself this is really
921 928 # a fact - it works for all the (admittedly
922 929 # thorough) cases in our testsuite, but I would be
923 930 # somewhat unsurprised to find a case in the wild
924 931 # where this breaks down a bit. That said, I don't
925 932 # know if it would hurt anything.
926 933 for i in pycompat.xrange(rev, 0, -1):
927 934 if revlog.linkrev(i) == clrev:
928 935 return i
929 936 # We failed to resolve a parent for this node, so
930 937 # we crash the changegroup construction.
931 938 raise error.Abort(
932 939 'unable to resolve parent while packing %r %r'
933 940 ' for changeset %r' % (revlog.indexfile, rev, clrev))
934 941
935 942 return nullrev
936 943
937 944 if not linkparents or (
938 945 revlog.parentrevs(rev) == (nullrev, nullrev)):
939 946 p1, p2 = nullrev, nullrev
940 947 elif len(linkparents) == 1:
941 948 p1, = sorted(local(p) for p in linkparents)
942 949 p2 = nullrev
943 950 else:
944 951 p1, p2 = sorted(local(p) for p in linkparents)
945 952 n = revlog.node(rev)
946 953
947 954 yield ellipsisdata(
948 955 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
949 956
950 957 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
951 958 # do nothing with basenode, it is implicitly the previous one in HG10
952 959 # do nothing with flags, it is implicitly 0 for cg1 and cg2
953 960 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
954 961
955 962 class cg2packer(cg1packer):
956 963 version = '02'
957 964 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
958 965
959 966 def __init__(self, repo, filematcher, bundlecaps=None):
960 967 super(cg2packer, self).__init__(repo, filematcher,
961 968 bundlecaps=bundlecaps)
962 969
963 970 if self._reorder is None:
964 971 # Since generaldelta is directly supported by cg2, reordering
965 972 # generally doesn't help, so we disable it by default (treating
966 973 # bundle.reorder=auto just like bundle.reorder=False).
967 974 self._reorder = False
968 975
969 976 def deltaparent(self, revlog, rev, p1, p2, prev):
970 977 # Narrow ellipses mode.
971 978 if util.safehasattr(self, 'full_nodes'):
972 979 # TODO: send better deltas when in narrow mode.
973 980 #
974 981 # changegroup.group() loops over revisions to send,
975 982 # including revisions we'll skip. What this means is that
976 983 # `prev` will be a potentially useless delta base for all
977 984 # ellipsis nodes, as the client likely won't have it. In
978 985 # the future we should do bookkeeping about which nodes
979 986 # have been sent to the client, and try to be
980 987 # significantly smarter about delta bases. This is
981 988 # slightly tricky because this same code has to work for
982 989 # all revlogs, and we don't have the linkrev/linknode here.
983 990 return p1
984 991
985 992 dp = revlog.deltaparent(rev)
986 993 if dp == nullrev and revlog.storedeltachains:
987 994 # Avoid sending full revisions when delta parent is null. Pick prev
988 995 # in that case. It's tempting to pick p1 in this case, as p1 will
989 996 # be smaller in the common case. However, computing a delta against
990 997 # p1 may require resolving the raw text of p1, which could be
991 998 # expensive. The revlog caches should have prev cached, meaning
992 999 # less CPU for changegroup generation. There is likely room to add
993 1000 # a flag and/or config option to control this behavior.
994 1001 base = prev
995 1002 elif dp == nullrev:
996 1003 # revlog is configured to use full snapshot for a reason,
997 1004 # stick to full snapshot.
998 1005 base = nullrev
999 1006 elif dp not in (p1, p2, prev):
1000 1007 # Pick prev when we can't be sure remote has the base revision.
1001 1008 return prev
1002 1009 else:
1003 1010 base = dp
1004 1011 if base != nullrev and not revlog.candelta(base, rev):
1005 1012 base = nullrev
1006 1013 return base
1007 1014
1008 1015 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1009 1016 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
1010 1017 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
1011 1018
1012 1019 class cg3packer(cg2packer):
1013 1020 version = '03'
1014 1021 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
1015 1022
1016 1023 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1017 1024 if dir:
1018 1025 yield self.fileheader(dir)
1019 1026
1020 1027 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1021 1028 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1022 1029 units=_('manifests')):
1023 1030 yield chunk
1024 1031
1025 1032 def _manifestsdone(self):
1026 1033 return self.close()
1027 1034
1028 1035 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1029 1036 return struct.pack(
1030 1037 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
1031 1038
1032 1039 _packermap = {'01': (cg1packer, cg1unpacker),
1033 1040 # cg2 adds support for exchanging generaldelta
1034 1041 '02': (cg2packer, cg2unpacker),
1035 1042 # cg3 adds support for exchanging revlog flags and treemanifests
1036 1043 '03': (cg3packer, cg3unpacker),
1037 1044 }
1038 1045
1039 1046 def allsupportedversions(repo):
1040 1047 versions = set(_packermap.keys())
1041 1048 if not (repo.ui.configbool('experimental', 'changegroup3') or
1042 1049 repo.ui.configbool('experimental', 'treemanifest') or
1043 1050 'treemanifest' in repo.requirements):
1044 1051 versions.discard('03')
1045 1052 return versions
1046 1053
1047 1054 # Changegroup versions that can be applied to the repo
1048 1055 def supportedincomingversions(repo):
1049 1056 return allsupportedversions(repo)
1050 1057
1051 1058 # Changegroup versions that can be created from the repo
1052 1059 def supportedoutgoingversions(repo):
1053 1060 versions = allsupportedversions(repo)
1054 1061 if 'treemanifest' in repo.requirements:
1055 1062 # Versions 01 and 02 support only flat manifests and it's just too
1056 1063 # expensive to convert between the flat manifest and tree manifest on
1057 1064 # the fly. Since tree manifests are hashed differently, all of history
1058 1065 # would have to be converted. Instead, we simply don't even pretend to
1059 1066 # support versions 01 and 02.
1060 1067 versions.discard('01')
1061 1068 versions.discard('02')
1062 1069 if repository.NARROW_REQUIREMENT in repo.requirements:
1063 1070 # Versions 01 and 02 don't support revlog flags, and we need to
1064 1071 # support that for stripping and unbundling to work.
1065 1072 versions.discard('01')
1066 1073 versions.discard('02')
1067 1074 if LFS_REQUIREMENT in repo.requirements:
1068 1075 # Versions 01 and 02 don't support revlog flags, and we need to
1069 1076 # mark LFS entries with REVIDX_EXTSTORED.
1070 1077 versions.discard('01')
1071 1078 versions.discard('02')
1072 1079
1073 1080 return versions
1074 1081
1075 1082 def localversion(repo):
1076 1083 # Finds the best version to use for bundles that are meant to be used
1077 1084 # locally, such as those from strip and shelve, and temporary bundles.
1078 1085 return max(supportedoutgoingversions(repo))
1079 1086
1080 1087 def safeversion(repo):
1081 1088 # Finds the smallest version that it's safe to assume clients of the repo
1082 1089 # will support. For example, all hg versions that support generaldelta also
1083 1090 # support changegroup 02.
1084 1091 versions = supportedoutgoingversions(repo)
1085 1092 if 'generaldelta' in repo.requirements:
1086 1093 versions.discard('01')
1087 1094 assert versions
1088 1095 return min(versions)
1089 1096
1090 1097 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1091 1098 assert version in supportedoutgoingversions(repo)
1092 1099
1093 1100 if filematcher is None:
1094 1101 filematcher = matchmod.alwaysmatcher(repo.root, '')
1095 1102
1096 1103 if version == '01' and not filematcher.always():
1097 1104 raise error.ProgrammingError('version 01 changegroups do not support '
1098 1105 'sparse file matchers')
1099 1106
1100 1107 # Requested files could include files not in the local store. So
1101 1108 # filter those out.
1102 1109 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1103 1110 filematcher)
1104 1111
1105 1112 return _packermap[version][0](repo, filematcher=filematcher,
1106 1113 bundlecaps=bundlecaps)
1107 1114
1108 1115 def getunbundler(version, fh, alg, extras=None):
1109 1116 return _packermap[version][1](fh, alg, extras=extras)
1110 1117
1111 1118 def _changegroupinfo(repo, nodes, source):
1112 1119 if repo.ui.verbose or source == 'bundle':
1113 1120 repo.ui.status(_("%d changesets found\n") % len(nodes))
1114 1121 if repo.ui.debugflag:
1115 1122 repo.ui.debug("list of changesets:\n")
1116 1123 for node in nodes:
1117 1124 repo.ui.debug("%s\n" % hex(node))
1118 1125
1119 1126 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1120 1127 bundlecaps=None):
1121 1128 cgstream = makestream(repo, outgoing, version, source,
1122 1129 fastpath=fastpath, bundlecaps=bundlecaps)
1123 1130 return getunbundler(version, util.chunkbuffer(cgstream), None,
1124 1131 {'clcount': len(outgoing.missing) })
1125 1132
1126 1133 def makestream(repo, outgoing, version, source, fastpath=False,
1127 1134 bundlecaps=None, filematcher=None):
1128 1135 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1129 1136 filematcher=filematcher)
1130 1137
1131 1138 repo = repo.unfiltered()
1132 1139 commonrevs = outgoing.common
1133 1140 csets = outgoing.missing
1134 1141 heads = outgoing.missingheads
1135 1142 # We go through the fast path if we get told to, or if all (unfiltered
1136 1143 # heads have been requested (since we then know there all linkrevs will
1137 1144 # be pulled by the client).
1138 1145 heads.sort()
1139 1146 fastpathlinkrev = fastpath or (
1140 1147 repo.filtername is None and heads == sorted(repo.heads()))
1141 1148
1142 1149 repo.hook('preoutgoing', throw=True, source=source)
1143 1150 _changegroupinfo(repo, csets, source)
1144 1151 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1145 1152
1146 1153 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1147 1154 revisions = 0
1148 1155 files = 0
1149 1156 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1150 1157 total=expectedfiles)
1151 1158 for chunkdata in iter(source.filelogheader, {}):
1152 1159 files += 1
1153 1160 f = chunkdata["filename"]
1154 1161 repo.ui.debug("adding %s revisions\n" % f)
1155 1162 progress.increment()
1156 1163 fl = repo.file(f)
1157 1164 o = len(fl)
1158 1165 try:
1159 1166 deltas = source.deltaiter()
1160 1167 if not fl.addgroup(deltas, revmap, trp):
1161 1168 raise error.Abort(_("received file revlog group is empty"))
1162 1169 except error.CensoredBaseError as e:
1163 1170 raise error.Abort(_("received delta base is censored: %s") % e)
1164 1171 revisions += len(fl) - o
1165 1172 if f in needfiles:
1166 1173 needs = needfiles[f]
1167 1174 for new in pycompat.xrange(o, len(fl)):
1168 1175 n = fl.node(new)
1169 1176 if n in needs:
1170 1177 needs.remove(n)
1171 1178 else:
1172 1179 raise error.Abort(
1173 1180 _("received spurious file revlog entry"))
1174 1181 if not needs:
1175 1182 del needfiles[f]
1176 1183 progress.complete()
1177 1184
1178 1185 for f, needs in needfiles.iteritems():
1179 1186 fl = repo.file(f)
1180 1187 for n in needs:
1181 1188 try:
1182 1189 fl.rev(n)
1183 1190 except error.LookupError:
1184 1191 raise error.Abort(
1185 1192 _('missing file data for %s:%s - run hg verify') %
1186 1193 (f, hex(n)))
1187 1194
1188 1195 return revisions, files
1189 1196
1190 1197 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1191 1198 ellipsisroots, visitnodes, depth, source, version):
1192 1199 if version in ('01', '02'):
1193 1200 raise error.Abort(
1194 1201 'ellipsis nodes require at least cg3 on client and server, '
1195 1202 'but negotiated version %s' % version)
1196 1203 # We wrap cg1packer.revchunk, using a side channel to pass
1197 1204 # relevant_nodes into that area. Then if linknode isn't in the
1198 1205 # set, we know we have an ellipsis node and we should defer
1199 1206 # sending that node's data. We override close() to detect
1200 1207 # pending ellipsis nodes and flush them.
1201 1208 packer = getbundler(version, repo, filematcher=match)
1202 1209 # Give the packer the list of nodes which should not be
1203 1210 # ellipsis nodes. We store this rather than the set of nodes
1204 1211 # that should be an ellipsis because for very large histories
1205 1212 # we expect this to be significantly smaller.
1206 1213 packer.full_nodes = relevant_nodes
1207 1214 # Maps ellipsis revs to their roots at the changelog level.
1208 1215 packer.precomputed_ellipsis = ellipsisroots
1209 1216 # Maps CL revs to per-revlog revisions. Cleared in close() at
1210 1217 # the end of each group.
1211 1218 packer.clrev_to_localrev = {}
1212 1219 packer.next_clrev_to_localrev = {}
1213 1220 # Maps changelog nodes to changelog revs. Filled in once
1214 1221 # during changelog stage and then left unmodified.
1215 1222 packer.clnode_to_rev = {}
1216 1223 packer.changelog_done = False
1217 1224 # If true, informs the packer that it is serving shallow content and might
1218 1225 # need to pack file contents not introduced by the changes being packed.
1219 1226 packer.is_shallow = depth is not None
1220 1227
1221 1228 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now