##// END OF EJS Templates
changegroup: move revchunk() from narrow...
Gregory Szorc -
r38922:66cf046e default
parent child Browse files
Show More
@@ -1,318 +1,211 b''
1 1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial import (
12 12 changegroup,
13 13 error,
14 14 extensions,
15 15 node,
16 pycompat,
17 16 util,
18 17 )
19 18
20 19 def setup():
21 20 def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
22 21 source):
23 22 changedfiles = list(filter(self._filematcher, changedfiles))
24 23
25 24 if getattr(self, 'is_shallow', False):
26 25 # See comment in generate() for why this sadness is a thing.
27 26 mfdicts = self._mfdicts
28 27 del self._mfdicts
29 28 # In a shallow clone, the linknodes callback needs to also include
30 29 # those file nodes that are in the manifests we sent but weren't
31 30 # introduced by those manifests.
32 31 commonctxs = [self._repo[c] for c in commonrevs]
33 32 oldlinknodes = linknodes
34 33 clrev = self._repo.changelog.rev
35 34 def linknodes(flog, fname):
36 35 for c in commonctxs:
37 36 try:
38 37 fnode = c.filenode(fname)
39 38 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
40 39 except error.ManifestLookupError:
41 40 pass
42 41 links = oldlinknodes(flog, fname)
43 42 if len(links) != len(mfdicts):
44 43 for mf, lr in mfdicts:
45 44 fnode = mf.get(fname, None)
46 45 if fnode in links:
47 46 links[fnode] = min(links[fnode], lr, key=clrev)
48 47 elif fnode:
49 48 links[fnode] = lr
50 49 return links
51 50 return orig(self, changedfiles, linknodes, commonrevs, source)
52 51 extensions.wrapfunction(
53 52 changegroup.cg1packer, 'generatefiles', generatefiles)
54 53
55 54 def close(orig, self):
56 55 getattr(self, 'clrev_to_localrev', {}).clear()
57 56 if getattr(self, 'next_clrev_to_localrev', {}):
58 57 self.clrev_to_localrev = self.next_clrev_to_localrev
59 58 del self.next_clrev_to_localrev
60 59 self.changelog_done = True
61 60 return orig(self)
62 61 extensions.wrapfunction(changegroup.cg1packer, 'close', close)
63 62
64 63 # In a perfect world, we'd generate better ellipsis-ified graphs
65 64 # for non-changelog revlogs. In practice, we haven't started doing
66 65 # that yet, so the resulting DAGs for the manifestlog and filelogs
67 66 # are actually full of bogus parentage on all the ellipsis
68 67 # nodes. This has the side effect that, while the contents are
69 68 # correct, the individual DAGs might be completely out of whack in
70 69 # a case like 882681bc3166 and its ancestors (back about 10
71 70 # revisions or so) in the main hg repo.
72 71 #
73 72 # The one invariant we *know* holds is that the new (potentially
74 73 # bogus) DAG shape will be valid if we order the nodes in the
75 74 # order that they're introduced in dramatis personae by the
76 75 # changelog, so what we do is we sort the non-changelog histories
77 76 # by the order in which they are used by the changelog.
78 77 def _sortgroup(orig, self, revlog, nodelist, lookup):
79 78 if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
80 79 return orig(self, revlog, nodelist, lookup)
81 80 key = lambda n: self.clnode_to_rev[lookup(n)]
82 81 return [revlog.rev(n) for n in sorted(nodelist, key=key)]
83 82
84 83 extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
85 84
86 85 def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
87 86 '''yield a sequence of changegroup chunks (strings)'''
88 87 # Note: other than delegating to orig, the only deviation in
89 88 # logic from normal hg's generate is marked with BEGIN/END
90 89 # NARROW HACK.
91 90 if not util.safehasattr(self, 'full_nodes'):
92 91 # not sending a narrow bundle
93 92 for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
94 93 yield x
95 94 return
96 95
97 96 repo = self._repo
98 97 cl = repo.changelog
99 98 mfl = repo.manifestlog
100 99 mfrevlog = mfl._revlog
101 100
102 101 clrevorder = {}
103 102 mfs = {} # needed manifests
104 103 fnodes = {} # needed file nodes
105 104 changedfiles = set()
106 105
107 106 # Callback for the changelog, used to collect changed files and manifest
108 107 # nodes.
109 108 # Returns the linkrev node (identity in the changelog case).
110 109 def lookupcl(x):
111 110 c = cl.read(x)
112 111 clrevorder[x] = len(clrevorder)
113 112 # BEGIN NARROW HACK
114 113 #
115 114 # Only update mfs if x is going to be sent. Otherwise we
116 115 # end up with bogus linkrevs specified for manifests and
117 116 # we skip some manifest nodes that we should otherwise
118 117 # have sent.
119 118 if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
120 119 n = c[0]
121 120 # record the first changeset introducing this manifest version
122 121 mfs.setdefault(n, x)
123 122 # Set this narrow-specific dict so we have the lowest manifest
124 123 # revnum to look up for this cl revnum. (Part of mapping
125 124 # changelog ellipsis parents to manifest ellipsis parents)
126 125 self.next_clrev_to_localrev.setdefault(cl.rev(x),
127 126 mfrevlog.rev(n))
128 127 # We can't trust the changed files list in the changeset if the
129 128 # client requested a shallow clone.
130 129 if self.is_shallow:
131 130 changedfiles.update(mfl[c[0]].read().keys())
132 131 else:
133 132 changedfiles.update(c[3])
134 133 # END NARROW HACK
135 134 # Record a complete list of potentially-changed files in
136 135 # this manifest.
137 136 return x
138 137
139 138 self._verbosenote(_('uncompressed size of bundle content:\n'))
140 139 size = 0
141 140 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
142 141 size += len(chunk)
143 142 yield chunk
144 143 self._verbosenote(_('%8.i (changelog)\n') % size)
145 144
146 145 # We need to make sure that the linkrev in the changegroup refers to
147 146 # the first changeset that introduced the manifest or file revision.
148 147 # The fastpath is usually safer than the slowpath, because the filelogs
149 148 # are walked in revlog order.
150 149 #
151 150 # When taking the slowpath with reorder=None and the manifest revlog
152 151 # uses generaldelta, the manifest may be walked in the "wrong" order.
153 152 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
154 153 # cc0ff93d0c0c).
155 154 #
156 155 # When taking the fastpath, we are only vulnerable to reordering
157 156 # of the changelog itself. The changelog never uses generaldelta, so
158 157 # it is only reordered when reorder=True. To handle this case, we
159 158 # simply take the slowpath, which already has the 'clrevorder' logic.
160 159 # This was also fixed in cc0ff93d0c0c.
161 160 fastpathlinkrev = fastpathlinkrev and not self._reorder
162 161 # Treemanifests don't work correctly with fastpathlinkrev
163 162 # either, because we don't discover which directory nodes to
164 163 # send along with files. This could probably be fixed.
165 164 fastpathlinkrev = fastpathlinkrev and (
166 165 'treemanifest' not in repo.requirements)
167 166 # Shallow clones also don't work correctly with fastpathlinkrev
168 167 # because file nodes may need to be sent for a manifest even if they
169 168 # weren't introduced by that manifest.
170 169 fastpathlinkrev = fastpathlinkrev and not self.is_shallow
171 170
172 171 for chunk in self.generatemanifests(commonrevs, clrevorder,
173 172 fastpathlinkrev, mfs, fnodes, source):
174 173 yield chunk
175 174 # BEGIN NARROW HACK
176 175 mfdicts = None
177 176 if self.is_shallow:
178 177 mfdicts = [(self._repo.manifestlog[n].read(), lr)
179 178 for (n, lr) in mfs.iteritems()]
180 179 # END NARROW HACK
181 180 mfs.clear()
182 181 clrevs = set(cl.rev(x) for x in clnodes)
183 182
184 183 if not fastpathlinkrev:
185 184 def linknodes(unused, fname):
186 185 return fnodes.get(fname, {})
187 186 else:
188 187 cln = cl.node
189 188 def linknodes(filerevlog, fname):
190 189 llr = filerevlog.linkrev
191 190 fln = filerevlog.node
192 191 revs = ((r, llr(r)) for r in filerevlog)
193 192 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
194 193
195 194 # BEGIN NARROW HACK
196 195 #
197 196 # We need to pass the mfdicts variable down into
198 197 # generatefiles(), but more than one command might have
199 198 # wrapped generatefiles so we can't modify the function
200 199 # signature. Instead, we pass the data to ourselves using an
201 200 # instance attribute. I'm sorry.
202 201 self._mfdicts = mfdicts
203 202 # END NARROW HACK
204 203 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
205 204 source):
206 205 yield chunk
207 206
208 207 yield self.close()
209 208
210 209 if clnodes:
211 210 repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
212 211 extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
213
214 def revchunk(orig, self, revlog, rev, prev, linknode):
215 if not util.safehasattr(self, 'full_nodes'):
216 # not sending a narrow changegroup
217 for x in orig(self, revlog, rev, prev, linknode):
218 yield x
219 return
220 # build up some mapping information that's useful later. See
221 # the local() nested function below.
222 if not self.changelog_done:
223 self.clnode_to_rev[linknode] = rev
224 linkrev = rev
225 self.clrev_to_localrev[linkrev] = rev
226 else:
227 linkrev = self.clnode_to_rev[linknode]
228 self.clrev_to_localrev[linkrev] = rev
229 # This is a node to send in full, because the changeset it
230 # corresponds to was a full changeset.
231 if linknode in self.full_nodes:
232 for x in orig(self, revlog, rev, prev, linknode):
233 yield x
234 return
235 # At this point, a node can either be one we should skip or an
236 # ellipsis. If it's not an ellipsis, bail immediately.
237 if linkrev not in self.precomputed_ellipsis:
238 return
239 linkparents = self.precomputed_ellipsis[linkrev]
240 def local(clrev):
241 """Turn a changelog revnum into a local revnum.
242
243 The ellipsis dag is stored as revnums on the changelog,
244 but when we're producing ellipsis entries for
245 non-changelog revlogs, we need to turn those numbers into
246 something local. This does that for us, and during the
247 changelog sending phase will also expand the stored
248 mappings as needed.
249 """
250 if clrev == node.nullrev:
251 return node.nullrev
252 if not self.changelog_done:
253 # If we're doing the changelog, it's possible that we
254 # have a parent that is already on the client, and we
255 # need to store some extra mapping information so that
256 # our contained ellipsis nodes will be able to resolve
257 # their parents.
258 if clrev not in self.clrev_to_localrev:
259 clnode = revlog.node(clrev)
260 self.clnode_to_rev[clnode] = clrev
261 return clrev
262 # Walk the ellipsis-ized changelog breadth-first looking for a
263 # change that has been linked from the current revlog.
264 #
265 # For a flat manifest revlog only a single step should be necessary
266 # as all relevant changelog entries are relevant to the flat
267 # manifest.
268 #
269 # For a filelog or tree manifest dirlog however not every changelog
270 # entry will have been relevant, so we need to skip some changelog
271 # nodes even after ellipsis-izing.
272 walk = [clrev]
273 while walk:
274 p = walk[0]
275 walk = walk[1:]
276 if p in self.clrev_to_localrev:
277 return self.clrev_to_localrev[p]
278 elif p in self.full_nodes:
279 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
280 if pp != node.nullrev])
281 elif p in self.precomputed_ellipsis:
282 walk.extend([pp for pp in self.precomputed_ellipsis[p]
283 if pp != node.nullrev])
284 else:
285 # In this case, we've got an ellipsis with parents
286 # outside the current bundle (likely an
287 # incremental pull). We "know" that we can use the
288 # value of this same revlog at whatever revision
289 # is pointed to by linknode. "Know" is in scare
290 # quotes because I haven't done enough examination
291 # of edge cases to convince myself this is really
292 # a fact - it works for all the (admittedly
293 # thorough) cases in our testsuite, but I would be
294 # somewhat unsurprised to find a case in the wild
295 # where this breaks down a bit. That said, I don't
296 # know if it would hurt anything.
297 for i in pycompat.xrange(rev, 0, -1):
298 if revlog.linkrev(i) == clrev:
299 return i
300 # We failed to resolve a parent for this node, so
301 # we crash the changegroup construction.
302 raise error.Abort(
303 'unable to resolve parent while packing %r %r'
304 ' for changeset %r' % (revlog.indexfile, rev, clrev))
305 return node.nullrev
306
307 if not linkparents or (
308 revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
309 p1, p2 = node.nullrev, node.nullrev
310 elif len(linkparents) == 1:
311 p1, = sorted(local(p) for p in linkparents)
312 p2 = node.nullrev
313 else:
314 p1, p2 = sorted(local(p) for p in linkparents)
315 n = revlog.node(rev)
316 yield changegroup.ellipsisdata(
317 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
318 extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
@@ -1,1105 +1,1221 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 manifest,
26 26 match as matchmod,
27 27 mdiff,
28 28 phases,
29 29 pycompat,
30 30 repository,
31 31 revlog,
32 32 util,
33 33 )
34 34
35 35 from .utils import (
36 36 stringutil,
37 37 )
38 38
39 39 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
40 40 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
41 41 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
42 42
43 43 LFS_REQUIREMENT = 'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47 def getchunk(stream):
48 48 """return the next chunk from stream as a string"""
49 49 d = readexactly(stream, 4)
50 50 l = struct.unpack(">l", d)[0]
51 51 if l <= 4:
52 52 if l:
53 53 raise error.Abort(_("invalid chunk length %d") % l)
54 54 return ""
55 55 return readexactly(stream, l - 4)
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(">l", length + 4)
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(">l", 0)
64 64
65 65 def writechunks(ui, chunks, filename, vfs=None):
66 66 """Write chunks to a file and return its filename.
67 67
68 68 The stream is assumed to be a bundle file.
69 69 Existing files will not be overwritten.
70 70 If no filename is specified, a temporary file is created.
71 71 """
72 72 fh = None
73 73 cleanup = None
74 74 try:
75 75 if filename:
76 76 if vfs:
77 77 fh = vfs.open(filename, "wb")
78 78 else:
79 79 # Increase default buffer size because default is usually
80 80 # small (4k is common on Linux).
81 81 fh = open(filename, "wb", 131072)
82 82 else:
83 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
84 84 fh = os.fdopen(fd, r"wb")
85 85 cleanup = filename
86 86 for c in chunks:
87 87 fh.write(c)
88 88 cleanup = None
89 89 return filename
90 90 finally:
91 91 if fh is not None:
92 92 fh.close()
93 93 if cleanup is not None:
94 94 if filename and vfs:
95 95 vfs.unlink(cleanup)
96 96 else:
97 97 os.unlink(cleanup)
98 98
99 99 class cg1unpacker(object):
100 100 """Unpacker for cg1 changegroup streams.
101 101
102 102 A changegroup unpacker handles the framing of the revision data in
103 103 the wire format. Most consumers will want to use the apply()
104 104 method to add the changes from the changegroup to a repository.
105 105
106 106 If you're forwarding a changegroup unmodified to another consumer,
107 107 use getchunks(), which returns an iterator of changegroup
108 108 chunks. This is mostly useful for cases where you need to know the
109 109 data stream has ended by observing the end of the changegroup.
110 110
111 111 deltachunk() is useful only if you're applying delta data. Most
112 112 consumers should prefer apply() instead.
113 113
114 114 A few other public methods exist. Those are used only for
115 115 bundlerepo and some debug commands - their use is discouraged.
116 116 """
117 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
118 118 deltaheadersize = struct.calcsize(deltaheader)
119 119 version = '01'
120 120 _grouplistcount = 1 # One list of files after the manifests
121 121
122 122 def __init__(self, fh, alg, extras=None):
123 123 if alg is None:
124 124 alg = 'UN'
125 125 if alg not in util.compengines.supportedbundletypes:
126 126 raise error.Abort(_('unknown stream compression type: %s')
127 127 % alg)
128 128 if alg == 'BZ':
129 129 alg = '_truncatedBZ'
130 130
131 131 compengine = util.compengines.forbundletype(alg)
132 132 self._stream = compengine.decompressorreader(fh)
133 133 self._type = alg
134 134 self.extras = extras or {}
135 135 self.callback = None
136 136
137 137 # These methods (compressed, read, seek, tell) all appear to only
138 138 # be used by bundlerepo, but it's a little hard to tell.
139 139 def compressed(self):
140 140 return self._type is not None and self._type != 'UN'
141 141 def read(self, l):
142 142 return self._stream.read(l)
143 143 def seek(self, pos):
144 144 return self._stream.seek(pos)
145 145 def tell(self):
146 146 return self._stream.tell()
147 147 def close(self):
148 148 return self._stream.close()
149 149
150 150 def _chunklength(self):
151 151 d = readexactly(self._stream, 4)
152 152 l = struct.unpack(">l", d)[0]
153 153 if l <= 4:
154 154 if l:
155 155 raise error.Abort(_("invalid chunk length %d") % l)
156 156 return 0
157 157 if self.callback:
158 158 self.callback()
159 159 return l - 4
160 160
161 161 def changelogheader(self):
162 162 """v10 does not have a changelog header chunk"""
163 163 return {}
164 164
165 165 def manifestheader(self):
166 166 """v10 does not have a manifest header chunk"""
167 167 return {}
168 168
169 169 def filelogheader(self):
170 170 """return the header of the filelogs chunk, v10 only has the filename"""
171 171 l = self._chunklength()
172 172 if not l:
173 173 return {}
174 174 fname = readexactly(self._stream, l)
175 175 return {'filename': fname}
176 176
177 177 def _deltaheader(self, headertuple, prevnode):
178 178 node, p1, p2, cs = headertuple
179 179 if prevnode is None:
180 180 deltabase = p1
181 181 else:
182 182 deltabase = prevnode
183 183 flags = 0
184 184 return node, p1, p2, deltabase, cs, flags
185 185
186 186 def deltachunk(self, prevnode):
187 187 l = self._chunklength()
188 188 if not l:
189 189 return {}
190 190 headerdata = readexactly(self._stream, self.deltaheadersize)
191 191 header = struct.unpack(self.deltaheader, headerdata)
192 192 delta = readexactly(self._stream, l - self.deltaheadersize)
193 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
194 194 return (node, p1, p2, cs, deltabase, delta, flags)
195 195
196 196 def getchunks(self):
197 197 """returns all the chunks contains in the bundle
198 198
199 199 Used when you need to forward the binary stream to a file or another
200 200 network API. To do so, it parse the changegroup data, otherwise it will
201 201 block in case of sshrepo because it don't know the end of the stream.
202 202 """
203 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
204 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
205 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
206 206 # filelogs.
207 207 #
208 208 # Changelog and manifestlog parts are terminated with empty chunks. The
209 209 # tree and file parts are a list of entry sections. Each entry section
210 210 # is a series of chunks terminating in an empty chunk. The list of these
211 211 # entry sections is terminated in yet another empty chunk, so we know
212 212 # we've reached the end of the tree/file list when we reach an empty
213 213 # chunk that was proceeded by no non-empty chunks.
214 214
215 215 parts = 0
216 216 while parts < 2 + self._grouplistcount:
217 217 noentries = True
218 218 while True:
219 219 chunk = getchunk(self)
220 220 if not chunk:
221 221 # The first two empty chunks represent the end of the
222 222 # changelog and the manifestlog portions. The remaining
223 223 # empty chunks represent either A) the end of individual
224 224 # tree or file entries in the file list, or B) the end of
225 225 # the entire list. It's the end of the entire list if there
226 226 # were no entries (i.e. noentries is True).
227 227 if parts < 2:
228 228 parts += 1
229 229 elif noentries:
230 230 parts += 1
231 231 break
232 232 noentries = False
233 233 yield chunkheader(len(chunk))
234 234 pos = 0
235 235 while pos < len(chunk):
236 236 next = pos + 2**20
237 237 yield chunk[pos:next]
238 238 pos = next
239 239 yield closechunk()
240 240
241 241 def _unpackmanifests(self, repo, revmap, trp, prog):
242 242 self.callback = prog.increment
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 248 deltas = self.deltaiter()
249 249 repo.manifestlog.addgroup(deltas, revmap, trp)
250 250 prog.complete()
251 251 self.callback = None
252 252
253 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 254 expectedtotal=None):
255 255 """Add the changegroup returned by source.read() to this repo.
256 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 257 the URL of the repo where this changegroup is coming from.
258 258
259 259 Return an integer summarizing the change to this repo:
260 260 - nothing changed or no source: 0
261 261 - more heads than before: 1+added heads (2..n)
262 262 - fewer heads than before: -1-removed heads (-2..-n)
263 263 - number of heads stays the same: 1
264 264 """
265 265 repo = repo.unfiltered()
266 266 def csmap(x):
267 267 repo.ui.debug("add changeset %s\n" % short(x))
268 268 return len(cl)
269 269
270 270 def revmap(x):
271 271 return cl.rev(x)
272 272
273 273 changesets = files = revisions = 0
274 274
275 275 try:
276 276 # The transaction may already carry source information. In this
277 277 # case we use the top level data. We overwrite the argument
278 278 # because we need to use the top level value (if they exist)
279 279 # in this function.
280 280 srctype = tr.hookargs.setdefault('source', srctype)
281 281 url = tr.hookargs.setdefault('url', url)
282 282 repo.hook('prechangegroup',
283 283 throw=True, **pycompat.strkwargs(tr.hookargs))
284 284
285 285 # write changelog data to temp files so concurrent readers
286 286 # will not see an inconsistent view
287 287 cl = repo.changelog
288 288 cl.delayupdate(tr)
289 289 oldheads = set(cl.heads())
290 290
291 291 trp = weakref.proxy(tr)
292 292 # pull off the changeset group
293 293 repo.ui.status(_("adding changesets\n"))
294 294 clstart = len(cl)
295 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
296 296 total=expectedtotal)
297 297 self.callback = progress.increment
298 298
299 299 efiles = set()
300 300 def onchangelog(cl, node):
301 301 efiles.update(cl.readfiles(node))
302 302
303 303 self.changelogheader()
304 304 deltas = self.deltaiter()
305 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
306 306 efiles = len(efiles)
307 307
308 308 if not cgnodes:
309 309 repo.ui.develwarn('applied empty changegroup',
310 310 config='warn-empty-changegroup')
311 311 clend = len(cl)
312 312 changesets = clend - clstart
313 313 progress.complete()
314 314 self.callback = None
315 315
316 316 # pull off the manifest group
317 317 repo.ui.status(_("adding manifests\n"))
318 318 # We know that we'll never have more manifests than we had
319 319 # changesets.
320 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
321 321 total=changesets)
322 322 self._unpackmanifests(repo, revmap, trp, progress)
323 323
324 324 needfiles = {}
325 325 if repo.ui.configbool('server', 'validate'):
326 326 cl = repo.changelog
327 327 ml = repo.manifestlog
328 328 # validate incoming csets have their manifests
329 329 for cset in pycompat.xrange(clstart, clend):
330 330 mfnode = cl.changelogrevision(cset).manifest
331 331 mfest = ml[mfnode].readdelta()
332 332 # store file cgnodes we must see
333 333 for f, n in mfest.iteritems():
334 334 needfiles.setdefault(f, set()).add(n)
335 335
336 336 # process the files
337 337 repo.ui.status(_("adding file changes\n"))
338 338 newrevs, newfiles = _addchangegroupfiles(
339 339 repo, self, revmap, trp, efiles, needfiles)
340 340 revisions += newrevs
341 341 files += newfiles
342 342
343 343 deltaheads = 0
344 344 if oldheads:
345 345 heads = cl.heads()
346 346 deltaheads = len(heads) - len(oldheads)
347 347 for h in heads:
348 348 if h not in oldheads and repo[h].closesbranch():
349 349 deltaheads -= 1
350 350 htext = ""
351 351 if deltaheads:
352 352 htext = _(" (%+d heads)") % deltaheads
353 353
354 354 repo.ui.status(_("added %d changesets"
355 355 " with %d changes to %d files%s\n")
356 356 % (changesets, revisions, files, htext))
357 357 repo.invalidatevolatilesets()
358 358
359 359 if changesets > 0:
360 360 if 'node' not in tr.hookargs:
361 361 tr.hookargs['node'] = hex(cl.node(clstart))
362 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
363 363 hookargs = dict(tr.hookargs)
364 364 else:
365 365 hookargs = dict(tr.hookargs)
366 366 hookargs['node'] = hex(cl.node(clstart))
367 367 hookargs['node_last'] = hex(cl.node(clend - 1))
368 368 repo.hook('pretxnchangegroup',
369 369 throw=True, **pycompat.strkwargs(hookargs))
370 370
371 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
372 372 phaseall = None
373 373 if srctype in ('push', 'serve'):
374 374 # Old servers can not push the boundary themselves.
375 375 # New servers won't push the boundary if changeset already
376 376 # exists locally as secret
377 377 #
378 378 # We should not use added here but the list of all change in
379 379 # the bundle
380 380 if repo.publishing():
381 381 targetphase = phaseall = phases.public
382 382 else:
383 383 # closer target phase computation
384 384
385 385 # Those changesets have been pushed from the
386 386 # outside, their phases are going to be pushed
387 387 # alongside. Therefor `targetphase` is
388 388 # ignored.
389 389 targetphase = phaseall = phases.draft
390 390 if added:
391 391 phases.registernew(repo, tr, targetphase, added)
392 392 if phaseall is not None:
393 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
394 394
395 395 if changesets > 0:
396 396
397 397 def runhooks():
398 398 # These hooks run when the lock releases, not when the
399 399 # transaction closes. So it's possible for the changelog
400 400 # to have changed since we last saw it.
401 401 if clstart >= len(repo):
402 402 return
403 403
404 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
405 405
406 406 for n in added:
407 407 args = hookargs.copy()
408 408 args['node'] = hex(n)
409 409 del args['node_last']
410 410 repo.hook("incoming", **pycompat.strkwargs(args))
411 411
412 412 newheads = [h for h in repo.heads()
413 413 if h not in oldheads]
414 414 repo.ui.log("incoming",
415 415 "%d incoming changes - new heads: %s\n",
416 416 len(added),
417 417 ', '.join([hex(c[:6]) for c in newheads]))
418 418
419 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
420 420 lambda tr: repo._afterlock(runhooks))
421 421 finally:
422 422 repo.ui.flush()
423 423 # never return 0 here:
424 424 if deltaheads < 0:
425 425 ret = deltaheads - 1
426 426 else:
427 427 ret = deltaheads + 1
428 428 return ret
429 429
430 430 def deltaiter(self):
431 431 """
432 432 returns an iterator of the deltas in this changegroup
433 433
434 434 Useful for passing to the underlying storage system to be stored.
435 435 """
436 436 chain = None
437 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
438 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
439 439 yield chunkdata
440 440 chain = chunkdata[0]
441 441
442 442 class cg2unpacker(cg1unpacker):
443 443 """Unpacker for cg2 streams.
444 444
445 445 cg2 streams add support for generaldelta, so the delta header
446 446 format is slightly different. All other features about the data
447 447 remain the same.
448 448 """
449 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
450 450 deltaheadersize = struct.calcsize(deltaheader)
451 451 version = '02'
452 452
453 453 def _deltaheader(self, headertuple, prevnode):
454 454 node, p1, p2, deltabase, cs = headertuple
455 455 flags = 0
456 456 return node, p1, p2, deltabase, cs, flags
457 457
458 458 class cg3unpacker(cg2unpacker):
459 459 """Unpacker for cg3 streams.
460 460
461 461 cg3 streams add support for exchanging treemanifests and revlog
462 462 flags. It adds the revlog flags to the delta header and an empty chunk
463 463 separating manifests and files.
464 464 """
465 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
466 466 deltaheadersize = struct.calcsize(deltaheader)
467 467 version = '03'
468 468 _grouplistcount = 2 # One list of manifests and one list of files
469 469
470 470 def _deltaheader(self, headertuple, prevnode):
471 471 node, p1, p2, deltabase, cs, flags = headertuple
472 472 return node, p1, p2, deltabase, cs, flags
473 473
474 474 def _unpackmanifests(self, repo, revmap, trp, prog):
475 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
476 476 for chunkdata in iter(self.filelogheader, {}):
477 477 # If we get here, there are directory manifests in the changegroup
478 478 d = chunkdata["filename"]
479 479 repo.ui.debug("adding %s revisions\n" % d)
480 480 dirlog = repo.manifestlog._revlog.dirlog(d)
481 481 deltas = self.deltaiter()
482 482 if not dirlog.addgroup(deltas, revmap, trp):
483 483 raise error.Abort(_("received dir revlog group is empty"))
484 484
485 485 class headerlessfixup(object):
486 486 def __init__(self, fh, h):
487 487 self._h = h
488 488 self._fh = fh
489 489 def read(self, n):
490 490 if self._h:
491 491 d, self._h = self._h[:n], self._h[n:]
492 492 if len(d) < n:
493 493 d += readexactly(self._fh, n - len(d))
494 494 return d
495 495 return readexactly(self._fh, n)
496 496
497 497 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
498 498 n = revlog_.node(rev)
499 499 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
500 500 flags = revlog_.flags(rev)
501 501 flags |= revlog.REVIDX_ELLIPSIS
502 502 meta = packer.builddeltaheader(
503 503 n, p1n, p2n, nullid, linknode, flags)
504 504 # TODO: try and actually send deltas for ellipsis data blocks
505 505 diffheader = mdiff.trivialdiffheader(len(data))
506 506 l = len(meta) + len(diffheader) + len(data)
507 507 return ''.join((chunkheader(l),
508 508 meta,
509 509 diffheader,
510 510 data))
511 511
512 512 class cg1packer(object):
513 513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
514 514 version = '01'
515 515 def __init__(self, repo, filematcher, bundlecaps=None):
516 516 """Given a source repo, construct a bundler.
517 517
518 518 filematcher is a matcher that matches on files to include in the
519 519 changegroup. Used to facilitate sparse changegroups.
520 520
521 521 bundlecaps is optional and can be used to specify the set of
522 522 capabilities which can be used to build the bundle. While bundlecaps is
523 523 unused in core Mercurial, extensions rely on this feature to communicate
524 524 capabilities to customize the changegroup packer.
525 525 """
526 526 assert filematcher
527 527 self._filematcher = filematcher
528 528
529 529 # Set of capabilities we can use to build the bundle.
530 530 if bundlecaps is None:
531 531 bundlecaps = set()
532 532 self._bundlecaps = bundlecaps
533 533 # experimental config: bundle.reorder
534 534 reorder = repo.ui.config('bundle', 'reorder')
535 535 if reorder == 'auto':
536 536 reorder = None
537 537 else:
538 538 reorder = stringutil.parsebool(reorder)
539 539 self._repo = repo
540 540 self._reorder = reorder
541 541 if self._repo.ui.verbose and not self._repo.ui.debugflag:
542 542 self._verbosenote = self._repo.ui.note
543 543 else:
544 544 self._verbosenote = lambda s: None
545 545
546 546 def close(self):
547 547 return closechunk()
548 548
549 549 def fileheader(self, fname):
550 550 return chunkheader(len(fname)) + fname
551 551
552 552 # Extracted both for clarity and for overriding in extensions.
553 553 def _sortgroup(self, revlog, nodelist, lookup):
554 554 """Sort nodes for change group and turn them into revnums."""
555 555 # for generaldelta revlogs, we linearize the revs; this will both be
556 556 # much quicker and generate a much smaller bundle
557 557 if (revlog._generaldelta and self._reorder is None) or self._reorder:
558 558 dag = dagutil.revlogdag(revlog)
559 559 return dag.linearize(set(revlog.rev(n) for n in nodelist))
560 560 else:
561 561 return sorted([revlog.rev(n) for n in nodelist])
562 562
563 563 def group(self, nodelist, revlog, lookup, units=None):
564 564 """Calculate a delta group, yielding a sequence of changegroup chunks
565 565 (strings).
566 566
567 567 Given a list of changeset revs, return a set of deltas and
568 568 metadata corresponding to nodes. The first delta is
569 569 first parent(nodelist[0]) -> nodelist[0], the receiver is
570 570 guaranteed to have this parent as it has all history before
571 571 these changesets. In the case firstparent is nullrev the
572 572 changegroup starts with a full revision.
573 573
574 574 If units is not None, progress detail will be generated, units specifies
575 575 the type of revlog that is touched (changelog, manifest, etc.).
576 576 """
577 577 # if we don't have any revisions touched by these changesets, bail
578 578 if len(nodelist) == 0:
579 579 yield self.close()
580 580 return
581 581
582 582 revs = self._sortgroup(revlog, nodelist, lookup)
583 583
584 584 # add the parent of the first rev
585 585 p = revlog.parentrevs(revs[0])[0]
586 586 revs.insert(0, p)
587 587
588 588 # build deltas
589 589 progress = None
590 590 if units is not None:
591 591 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
592 592 total=(len(revs) - 1))
593 593 for r in pycompat.xrange(len(revs) - 1):
594 594 if progress:
595 595 progress.update(r + 1)
596 596 prev, curr = revs[r], revs[r + 1]
597 597 linknode = lookup(revlog.node(curr))
598 598 for c in self.revchunk(revlog, curr, prev, linknode):
599 599 yield c
600 600
601 601 if progress:
602 602 progress.complete()
603 603 yield self.close()
604 604
605 605 # filter any nodes that claim to be part of the known set
606 606 def prune(self, revlog, missing, commonrevs):
607 607 # TODO this violates storage abstraction for manifests.
608 608 if isinstance(revlog, manifest.manifestrevlog):
609 609 if not self._filematcher.visitdir(revlog._dir[:-1] or '.'):
610 610 return []
611 611
612 612 rr, rl = revlog.rev, revlog.linkrev
613 613 return [n for n in missing if rl(rr(n)) not in commonrevs]
614 614
615 615 def _packmanifests(self, dir, mfnodes, lookuplinknode):
616 616 """Pack flat manifests into a changegroup stream."""
617 617 assert not dir
618 618 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
619 619 lookuplinknode, units=_('manifests')):
620 620 yield chunk
621 621
622 622 def _manifestsdone(self):
623 623 return ''
624 624
625 625 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
626 626 '''yield a sequence of changegroup chunks (strings)'''
627 627 repo = self._repo
628 628 cl = repo.changelog
629 629
630 630 clrevorder = {}
631 631 mfs = {} # needed manifests
632 632 fnodes = {} # needed file nodes
633 633 changedfiles = set()
634 634
635 635 # Callback for the changelog, used to collect changed files and manifest
636 636 # nodes.
637 637 # Returns the linkrev node (identity in the changelog case).
638 638 def lookupcl(x):
639 639 c = cl.read(x)
640 640 clrevorder[x] = len(clrevorder)
641 641 n = c[0]
642 642 # record the first changeset introducing this manifest version
643 643 mfs.setdefault(n, x)
644 644 # Record a complete list of potentially-changed files in
645 645 # this manifest.
646 646 changedfiles.update(c[3])
647 647 return x
648 648
649 649 self._verbosenote(_('uncompressed size of bundle content:\n'))
650 650 size = 0
651 651 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
652 652 size += len(chunk)
653 653 yield chunk
654 654 self._verbosenote(_('%8.i (changelog)\n') % size)
655 655
656 656 # We need to make sure that the linkrev in the changegroup refers to
657 657 # the first changeset that introduced the manifest or file revision.
658 658 # The fastpath is usually safer than the slowpath, because the filelogs
659 659 # are walked in revlog order.
660 660 #
661 661 # When taking the slowpath with reorder=None and the manifest revlog
662 662 # uses generaldelta, the manifest may be walked in the "wrong" order.
663 663 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
664 664 # cc0ff93d0c0c).
665 665 #
666 666 # When taking the fastpath, we are only vulnerable to reordering
667 667 # of the changelog itself. The changelog never uses generaldelta, so
668 668 # it is only reordered when reorder=True. To handle this case, we
669 669 # simply take the slowpath, which already has the 'clrevorder' logic.
670 670 # This was also fixed in cc0ff93d0c0c.
671 671 fastpathlinkrev = fastpathlinkrev and not self._reorder
672 672 # Treemanifests don't work correctly with fastpathlinkrev
673 673 # either, because we don't discover which directory nodes to
674 674 # send along with files. This could probably be fixed.
675 675 fastpathlinkrev = fastpathlinkrev and (
676 676 'treemanifest' not in repo.requirements)
677 677
678 678 for chunk in self.generatemanifests(commonrevs, clrevorder,
679 679 fastpathlinkrev, mfs, fnodes, source):
680 680 yield chunk
681 681 mfs.clear()
682 682 clrevs = set(cl.rev(x) for x in clnodes)
683 683
684 684 if not fastpathlinkrev:
685 685 def linknodes(unused, fname):
686 686 return fnodes.get(fname, {})
687 687 else:
688 688 cln = cl.node
689 689 def linknodes(filerevlog, fname):
690 690 llr = filerevlog.linkrev
691 691 fln = filerevlog.node
692 692 revs = ((r, llr(r)) for r in filerevlog)
693 693 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
694 694
695 695 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
696 696 source):
697 697 yield chunk
698 698
699 699 yield self.close()
700 700
701 701 if clnodes:
702 702 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
703 703
704 704 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
705 705 fnodes, source):
706 706 """Returns an iterator of changegroup chunks containing manifests.
707 707
708 708 `source` is unused here, but is used by extensions like remotefilelog to
709 709 change what is sent based in pulls vs pushes, etc.
710 710 """
711 711 repo = self._repo
712 712 mfl = repo.manifestlog
713 713 dirlog = mfl._revlog.dirlog
714 714 tmfnodes = {'': mfs}
715 715
716 716 # Callback for the manifest, used to collect linkrevs for filelog
717 717 # revisions.
718 718 # Returns the linkrev node (collected in lookupcl).
719 719 def makelookupmflinknode(dir, nodes):
720 720 if fastpathlinkrev:
721 721 assert not dir
722 722 return mfs.__getitem__
723 723
724 724 def lookupmflinknode(x):
725 725 """Callback for looking up the linknode for manifests.
726 726
727 727 Returns the linkrev node for the specified manifest.
728 728
729 729 SIDE EFFECT:
730 730
731 731 1) fclnodes gets populated with the list of relevant
732 732 file nodes if we're not using fastpathlinkrev
733 733 2) When treemanifests are in use, collects treemanifest nodes
734 734 to send
735 735
736 736 Note that this means manifests must be completely sent to
737 737 the client before you can trust the list of files and
738 738 treemanifests to send.
739 739 """
740 740 clnode = nodes[x]
741 741 mdata = mfl.get(dir, x).readfast(shallow=True)
742 742 for p, n, fl in mdata.iterentries():
743 743 if fl == 't': # subdirectory manifest
744 744 subdir = dir + p + '/'
745 745 tmfclnodes = tmfnodes.setdefault(subdir, {})
746 746 tmfclnode = tmfclnodes.setdefault(n, clnode)
747 747 if clrevorder[clnode] < clrevorder[tmfclnode]:
748 748 tmfclnodes[n] = clnode
749 749 else:
750 750 f = dir + p
751 751 fclnodes = fnodes.setdefault(f, {})
752 752 fclnode = fclnodes.setdefault(n, clnode)
753 753 if clrevorder[clnode] < clrevorder[fclnode]:
754 754 fclnodes[n] = clnode
755 755 return clnode
756 756 return lookupmflinknode
757 757
758 758 size = 0
759 759 while tmfnodes:
760 760 dir, nodes = tmfnodes.popitem()
761 761 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
762 762 if not dir or prunednodes:
763 763 for x in self._packmanifests(dir, prunednodes,
764 764 makelookupmflinknode(dir, nodes)):
765 765 size += len(x)
766 766 yield x
767 767 self._verbosenote(_('%8.i (manifests)\n') % size)
768 768 yield self._manifestsdone()
769 769
770 770 # The 'source' parameter is useful for extensions
771 771 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
772 772 repo = self._repo
773 773 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
774 774 total=len(changedfiles))
775 775 for i, fname in enumerate(sorted(changedfiles)):
776 776 filerevlog = repo.file(fname)
777 777 if not filerevlog:
778 778 raise error.Abort(_("empty or missing file data for %s") %
779 779 fname)
780 780
781 781 linkrevnodes = linknodes(filerevlog, fname)
782 782 # Lookup for filenodes, we collected the linkrev nodes above in the
783 783 # fastpath case and with lookupmf in the slowpath case.
784 784 def lookupfilelog(x):
785 785 return linkrevnodes[x]
786 786
787 787 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
788 788 if filenodes:
789 789 progress.update(i + 1, item=fname)
790 790 h = self.fileheader(fname)
791 791 size = len(h)
792 792 yield h
793 793 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
794 794 size += len(chunk)
795 795 yield chunk
796 796 self._verbosenote(_('%8.i %s\n') % (size, fname))
797 797 progress.complete()
798 798
799 799 def deltaparent(self, revlog, rev, p1, p2, prev):
800 800 if not revlog.candelta(prev, rev):
801 801 raise error.ProgrammingError('cg1 should not be used in this case')
802 802 return prev
803 803
804 804 def revchunk(self, revlog, rev, prev, linknode):
805 if util.safehasattr(self, 'full_nodes'):
806 fn = self._revchunknarrow
807 else:
808 fn = self._revchunknormal
809
810 return fn(revlog, rev, prev, linknode)
811
812 def _revchunknormal(self, revlog, rev, prev, linknode):
805 813 node = revlog.node(rev)
806 814 p1, p2 = revlog.parentrevs(rev)
807 815 base = self.deltaparent(revlog, rev, p1, p2, prev)
808 816
809 817 prefix = ''
810 818 if revlog.iscensored(base) or revlog.iscensored(rev):
811 819 try:
812 820 delta = revlog.revision(node, raw=True)
813 821 except error.CensoredNodeError as e:
814 822 delta = e.tombstone
815 823 if base == nullrev:
816 824 prefix = mdiff.trivialdiffheader(len(delta))
817 825 else:
818 826 baselen = revlog.rawsize(base)
819 827 prefix = mdiff.replacediffheader(baselen, len(delta))
820 828 elif base == nullrev:
821 829 delta = revlog.revision(node, raw=True)
822 830 prefix = mdiff.trivialdiffheader(len(delta))
823 831 else:
824 832 delta = revlog.revdiff(base, rev)
825 833 p1n, p2n = revlog.parents(node)
826 834 basenode = revlog.node(base)
827 835 flags = revlog.flags(rev)
828 836 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
829 837 meta += prefix
830 838 l = len(meta) + len(delta)
831 839 yield chunkheader(l)
832 840 yield meta
833 841 yield delta
842
843 def _revchunknarrow(self, revlog, rev, prev, linknode):
844 # build up some mapping information that's useful later. See
845 # the local() nested function below.
846 if not self.changelog_done:
847 self.clnode_to_rev[linknode] = rev
848 linkrev = rev
849 self.clrev_to_localrev[linkrev] = rev
850 else:
851 linkrev = self.clnode_to_rev[linknode]
852 self.clrev_to_localrev[linkrev] = rev
853
854 # This is a node to send in full, because the changeset it
855 # corresponds to was a full changeset.
856 if linknode in self.full_nodes:
857 for x in self._revchunknormal(revlog, rev, prev, linknode):
858 yield x
859 return
860
861 # At this point, a node can either be one we should skip or an
862 # ellipsis. If it's not an ellipsis, bail immediately.
863 if linkrev not in self.precomputed_ellipsis:
864 return
865
866 linkparents = self.precomputed_ellipsis[linkrev]
867 def local(clrev):
868 """Turn a changelog revnum into a local revnum.
869
870 The ellipsis dag is stored as revnums on the changelog,
871 but when we're producing ellipsis entries for
872 non-changelog revlogs, we need to turn those numbers into
873 something local. This does that for us, and during the
874 changelog sending phase will also expand the stored
875 mappings as needed.
876 """
877 if clrev == nullrev:
878 return nullrev
879
880 if not self.changelog_done:
881 # If we're doing the changelog, it's possible that we
882 # have a parent that is already on the client, and we
883 # need to store some extra mapping information so that
884 # our contained ellipsis nodes will be able to resolve
885 # their parents.
886 if clrev not in self.clrev_to_localrev:
887 clnode = revlog.node(clrev)
888 self.clnode_to_rev[clnode] = clrev
889 return clrev
890
891 # Walk the ellipsis-ized changelog breadth-first looking for a
892 # change that has been linked from the current revlog.
893 #
894 # For a flat manifest revlog only a single step should be necessary
895 # as all relevant changelog entries are relevant to the flat
896 # manifest.
897 #
898 # For a filelog or tree manifest dirlog however not every changelog
899 # entry will have been relevant, so we need to skip some changelog
900 # nodes even after ellipsis-izing.
901 walk = [clrev]
902 while walk:
903 p = walk[0]
904 walk = walk[1:]
905 if p in self.clrev_to_localrev:
906 return self.clrev_to_localrev[p]
907 elif p in self.full_nodes:
908 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
909 if pp != nullrev])
910 elif p in self.precomputed_ellipsis:
911 walk.extend([pp for pp in self.precomputed_ellipsis[p]
912 if pp != nullrev])
913 else:
914 # In this case, we've got an ellipsis with parents
915 # outside the current bundle (likely an
916 # incremental pull). We "know" that we can use the
917 # value of this same revlog at whatever revision
918 # is pointed to by linknode. "Know" is in scare
919 # quotes because I haven't done enough examination
920 # of edge cases to convince myself this is really
921 # a fact - it works for all the (admittedly
922 # thorough) cases in our testsuite, but I would be
923 # somewhat unsurprised to find a case in the wild
924 # where this breaks down a bit. That said, I don't
925 # know if it would hurt anything.
926 for i in pycompat.xrange(rev, 0, -1):
927 if revlog.linkrev(i) == clrev:
928 return i
929 # We failed to resolve a parent for this node, so
930 # we crash the changegroup construction.
931 raise error.Abort(
932 'unable to resolve parent while packing %r %r'
933 ' for changeset %r' % (revlog.indexfile, rev, clrev))
934
935 return nullrev
936
937 if not linkparents or (
938 revlog.parentrevs(rev) == (nullrev, nullrev)):
939 p1, p2 = nullrev, nullrev
940 elif len(linkparents) == 1:
941 p1, = sorted(local(p) for p in linkparents)
942 p2 = nullrev
943 else:
944 p1, p2 = sorted(local(p) for p in linkparents)
945 n = revlog.node(rev)
946
947 yield ellipsisdata(
948 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
949
834 950 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
835 951 # do nothing with basenode, it is implicitly the previous one in HG10
836 952 # do nothing with flags, it is implicitly 0 for cg1 and cg2
837 953 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
838 954
839 955 class cg2packer(cg1packer):
840 956 version = '02'
841 957 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
842 958
843 959 def __init__(self, repo, filematcher, bundlecaps=None):
844 960 super(cg2packer, self).__init__(repo, filematcher,
845 961 bundlecaps=bundlecaps)
846 962
847 963 if self._reorder is None:
848 964 # Since generaldelta is directly supported by cg2, reordering
849 965 # generally doesn't help, so we disable it by default (treating
850 966 # bundle.reorder=auto just like bundle.reorder=False).
851 967 self._reorder = False
852 968
853 969 def deltaparent(self, revlog, rev, p1, p2, prev):
854 970 # Narrow ellipses mode.
855 971 if util.safehasattr(self, 'full_nodes'):
856 972 # TODO: send better deltas when in narrow mode.
857 973 #
858 974 # changegroup.group() loops over revisions to send,
859 975 # including revisions we'll skip. What this means is that
860 976 # `prev` will be a potentially useless delta base for all
861 977 # ellipsis nodes, as the client likely won't have it. In
862 978 # the future we should do bookkeeping about which nodes
863 979 # have been sent to the client, and try to be
864 980 # significantly smarter about delta bases. This is
865 981 # slightly tricky because this same code has to work for
866 982 # all revlogs, and we don't have the linkrev/linknode here.
867 983 return p1
868 984
869 985 dp = revlog.deltaparent(rev)
870 986 if dp == nullrev and revlog.storedeltachains:
871 987 # Avoid sending full revisions when delta parent is null. Pick prev
872 988 # in that case. It's tempting to pick p1 in this case, as p1 will
873 989 # be smaller in the common case. However, computing a delta against
874 990 # p1 may require resolving the raw text of p1, which could be
875 991 # expensive. The revlog caches should have prev cached, meaning
876 992 # less CPU for changegroup generation. There is likely room to add
877 993 # a flag and/or config option to control this behavior.
878 994 base = prev
879 995 elif dp == nullrev:
880 996 # revlog is configured to use full snapshot for a reason,
881 997 # stick to full snapshot.
882 998 base = nullrev
883 999 elif dp not in (p1, p2, prev):
884 1000 # Pick prev when we can't be sure remote has the base revision.
885 1001 return prev
886 1002 else:
887 1003 base = dp
888 1004 if base != nullrev and not revlog.candelta(base, rev):
889 1005 base = nullrev
890 1006 return base
891 1007
892 1008 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
893 1009 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
894 1010 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
895 1011
896 1012 class cg3packer(cg2packer):
897 1013 version = '03'
898 1014 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
899 1015
900 1016 def _packmanifests(self, dir, mfnodes, lookuplinknode):
901 1017 if dir:
902 1018 yield self.fileheader(dir)
903 1019
904 1020 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
905 1021 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
906 1022 units=_('manifests')):
907 1023 yield chunk
908 1024
909 1025 def _manifestsdone(self):
910 1026 return self.close()
911 1027
912 1028 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
913 1029 return struct.pack(
914 1030 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
915 1031
916 1032 _packermap = {'01': (cg1packer, cg1unpacker),
917 1033 # cg2 adds support for exchanging generaldelta
918 1034 '02': (cg2packer, cg2unpacker),
919 1035 # cg3 adds support for exchanging revlog flags and treemanifests
920 1036 '03': (cg3packer, cg3unpacker),
921 1037 }
922 1038
923 1039 def allsupportedversions(repo):
924 1040 versions = set(_packermap.keys())
925 1041 if not (repo.ui.configbool('experimental', 'changegroup3') or
926 1042 repo.ui.configbool('experimental', 'treemanifest') or
927 1043 'treemanifest' in repo.requirements):
928 1044 versions.discard('03')
929 1045 return versions
930 1046
931 1047 # Changegroup versions that can be applied to the repo
932 1048 def supportedincomingversions(repo):
933 1049 return allsupportedversions(repo)
934 1050
935 1051 # Changegroup versions that can be created from the repo
936 1052 def supportedoutgoingversions(repo):
937 1053 versions = allsupportedversions(repo)
938 1054 if 'treemanifest' in repo.requirements:
939 1055 # Versions 01 and 02 support only flat manifests and it's just too
940 1056 # expensive to convert between the flat manifest and tree manifest on
941 1057 # the fly. Since tree manifests are hashed differently, all of history
942 1058 # would have to be converted. Instead, we simply don't even pretend to
943 1059 # support versions 01 and 02.
944 1060 versions.discard('01')
945 1061 versions.discard('02')
946 1062 if repository.NARROW_REQUIREMENT in repo.requirements:
947 1063 # Versions 01 and 02 don't support revlog flags, and we need to
948 1064 # support that for stripping and unbundling to work.
949 1065 versions.discard('01')
950 1066 versions.discard('02')
951 1067 if LFS_REQUIREMENT in repo.requirements:
952 1068 # Versions 01 and 02 don't support revlog flags, and we need to
953 1069 # mark LFS entries with REVIDX_EXTSTORED.
954 1070 versions.discard('01')
955 1071 versions.discard('02')
956 1072
957 1073 return versions
958 1074
959 1075 def localversion(repo):
960 1076 # Finds the best version to use for bundles that are meant to be used
961 1077 # locally, such as those from strip and shelve, and temporary bundles.
962 1078 return max(supportedoutgoingversions(repo))
963 1079
964 1080 def safeversion(repo):
965 1081 # Finds the smallest version that it's safe to assume clients of the repo
966 1082 # will support. For example, all hg versions that support generaldelta also
967 1083 # support changegroup 02.
968 1084 versions = supportedoutgoingversions(repo)
969 1085 if 'generaldelta' in repo.requirements:
970 1086 versions.discard('01')
971 1087 assert versions
972 1088 return min(versions)
973 1089
974 1090 def getbundler(version, repo, bundlecaps=None, filematcher=None):
975 1091 assert version in supportedoutgoingversions(repo)
976 1092
977 1093 if filematcher is None:
978 1094 filematcher = matchmod.alwaysmatcher(repo.root, '')
979 1095
980 1096 if version == '01' and not filematcher.always():
981 1097 raise error.ProgrammingError('version 01 changegroups do not support '
982 1098 'sparse file matchers')
983 1099
984 1100 # Requested files could include files not in the local store. So
985 1101 # filter those out.
986 1102 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
987 1103 filematcher)
988 1104
989 1105 return _packermap[version][0](repo, filematcher=filematcher,
990 1106 bundlecaps=bundlecaps)
991 1107
992 1108 def getunbundler(version, fh, alg, extras=None):
993 1109 return _packermap[version][1](fh, alg, extras=extras)
994 1110
995 1111 def _changegroupinfo(repo, nodes, source):
996 1112 if repo.ui.verbose or source == 'bundle':
997 1113 repo.ui.status(_("%d changesets found\n") % len(nodes))
998 1114 if repo.ui.debugflag:
999 1115 repo.ui.debug("list of changesets:\n")
1000 1116 for node in nodes:
1001 1117 repo.ui.debug("%s\n" % hex(node))
1002 1118
1003 1119 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1004 1120 bundlecaps=None):
1005 1121 cgstream = makestream(repo, outgoing, version, source,
1006 1122 fastpath=fastpath, bundlecaps=bundlecaps)
1007 1123 return getunbundler(version, util.chunkbuffer(cgstream), None,
1008 1124 {'clcount': len(outgoing.missing) })
1009 1125
1010 1126 def makestream(repo, outgoing, version, source, fastpath=False,
1011 1127 bundlecaps=None, filematcher=None):
1012 1128 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1013 1129 filematcher=filematcher)
1014 1130
1015 1131 repo = repo.unfiltered()
1016 1132 commonrevs = outgoing.common
1017 1133 csets = outgoing.missing
1018 1134 heads = outgoing.missingheads
1019 1135 # We go through the fast path if we get told to, or if all (unfiltered
1020 1136 # heads have been requested (since we then know there all linkrevs will
1021 1137 # be pulled by the client).
1022 1138 heads.sort()
1023 1139 fastpathlinkrev = fastpath or (
1024 1140 repo.filtername is None and heads == sorted(repo.heads()))
1025 1141
1026 1142 repo.hook('preoutgoing', throw=True, source=source)
1027 1143 _changegroupinfo(repo, csets, source)
1028 1144 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1029 1145
1030 1146 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1031 1147 revisions = 0
1032 1148 files = 0
1033 1149 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1034 1150 total=expectedfiles)
1035 1151 for chunkdata in iter(source.filelogheader, {}):
1036 1152 files += 1
1037 1153 f = chunkdata["filename"]
1038 1154 repo.ui.debug("adding %s revisions\n" % f)
1039 1155 progress.increment()
1040 1156 fl = repo.file(f)
1041 1157 o = len(fl)
1042 1158 try:
1043 1159 deltas = source.deltaiter()
1044 1160 if not fl.addgroup(deltas, revmap, trp):
1045 1161 raise error.Abort(_("received file revlog group is empty"))
1046 1162 except error.CensoredBaseError as e:
1047 1163 raise error.Abort(_("received delta base is censored: %s") % e)
1048 1164 revisions += len(fl) - o
1049 1165 if f in needfiles:
1050 1166 needs = needfiles[f]
1051 1167 for new in pycompat.xrange(o, len(fl)):
1052 1168 n = fl.node(new)
1053 1169 if n in needs:
1054 1170 needs.remove(n)
1055 1171 else:
1056 1172 raise error.Abort(
1057 1173 _("received spurious file revlog entry"))
1058 1174 if not needs:
1059 1175 del needfiles[f]
1060 1176 progress.complete()
1061 1177
1062 1178 for f, needs in needfiles.iteritems():
1063 1179 fl = repo.file(f)
1064 1180 for n in needs:
1065 1181 try:
1066 1182 fl.rev(n)
1067 1183 except error.LookupError:
1068 1184 raise error.Abort(
1069 1185 _('missing file data for %s:%s - run hg verify') %
1070 1186 (f, hex(n)))
1071 1187
1072 1188 return revisions, files
1073 1189
1074 1190 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1075 1191 ellipsisroots, visitnodes, depth, source, version):
1076 1192 if version in ('01', '02'):
1077 1193 raise error.Abort(
1078 1194 'ellipsis nodes require at least cg3 on client and server, '
1079 1195 'but negotiated version %s' % version)
1080 1196 # We wrap cg1packer.revchunk, using a side channel to pass
1081 1197 # relevant_nodes into that area. Then if linknode isn't in the
1082 1198 # set, we know we have an ellipsis node and we should defer
1083 1199 # sending that node's data. We override close() to detect
1084 1200 # pending ellipsis nodes and flush them.
1085 1201 packer = getbundler(version, repo, filematcher=match)
1086 1202 # Give the packer the list of nodes which should not be
1087 1203 # ellipsis nodes. We store this rather than the set of nodes
1088 1204 # that should be an ellipsis because for very large histories
1089 1205 # we expect this to be significantly smaller.
1090 1206 packer.full_nodes = relevant_nodes
1091 1207 # Maps ellipsis revs to their roots at the changelog level.
1092 1208 packer.precomputed_ellipsis = ellipsisroots
1093 1209 # Maps CL revs to per-revlog revisions. Cleared in close() at
1094 1210 # the end of each group.
1095 1211 packer.clrev_to_localrev = {}
1096 1212 packer.next_clrev_to_localrev = {}
1097 1213 # Maps changelog nodes to changelog revs. Filled in once
1098 1214 # during changelog stage and then left unmodified.
1099 1215 packer.clnode_to_rev = {}
1100 1216 packer.changelog_done = False
1101 1217 # If true, informs the packer that it is serving shallow content and might
1102 1218 # need to pack file contents not introduced by the changes being packed.
1103 1219 packer.is_shallow = depth is not None
1104 1220
1105 1221 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now