##// END OF EJS Templates
changegroup: move deltaparent() from narrow...
Gregory Szorc -
r38921:5839a170 default
parent child Browse files
Show More
@@ -1,335 +1,318 b''
1 1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial import (
12 12 changegroup,
13 13 error,
14 14 extensions,
15 15 node,
16 16 pycompat,
17 17 util,
18 18 )
19 19
20 20 def setup():
21 21 def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
22 22 source):
23 23 changedfiles = list(filter(self._filematcher, changedfiles))
24 24
25 25 if getattr(self, 'is_shallow', False):
26 26 # See comment in generate() for why this sadness is a thing.
27 27 mfdicts = self._mfdicts
28 28 del self._mfdicts
29 29 # In a shallow clone, the linknodes callback needs to also include
30 30 # those file nodes that are in the manifests we sent but weren't
31 31 # introduced by those manifests.
32 32 commonctxs = [self._repo[c] for c in commonrevs]
33 33 oldlinknodes = linknodes
34 34 clrev = self._repo.changelog.rev
35 35 def linknodes(flog, fname):
36 36 for c in commonctxs:
37 37 try:
38 38 fnode = c.filenode(fname)
39 39 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
40 40 except error.ManifestLookupError:
41 41 pass
42 42 links = oldlinknodes(flog, fname)
43 43 if len(links) != len(mfdicts):
44 44 for mf, lr in mfdicts:
45 45 fnode = mf.get(fname, None)
46 46 if fnode in links:
47 47 links[fnode] = min(links[fnode], lr, key=clrev)
48 48 elif fnode:
49 49 links[fnode] = lr
50 50 return links
51 51 return orig(self, changedfiles, linknodes, commonrevs, source)
52 52 extensions.wrapfunction(
53 53 changegroup.cg1packer, 'generatefiles', generatefiles)
54 54
55 55 def close(orig, self):
56 56 getattr(self, 'clrev_to_localrev', {}).clear()
57 57 if getattr(self, 'next_clrev_to_localrev', {}):
58 58 self.clrev_to_localrev = self.next_clrev_to_localrev
59 59 del self.next_clrev_to_localrev
60 60 self.changelog_done = True
61 61 return orig(self)
62 62 extensions.wrapfunction(changegroup.cg1packer, 'close', close)
63 63
64 64 # In a perfect world, we'd generate better ellipsis-ified graphs
65 65 # for non-changelog revlogs. In practice, we haven't started doing
66 66 # that yet, so the resulting DAGs for the manifestlog and filelogs
67 67 # are actually full of bogus parentage on all the ellipsis
68 68 # nodes. This has the side effect that, while the contents are
69 69 # correct, the individual DAGs might be completely out of whack in
70 70 # a case like 882681bc3166 and its ancestors (back about 10
71 71 # revisions or so) in the main hg repo.
72 72 #
73 73 # The one invariant we *know* holds is that the new (potentially
74 74 # bogus) DAG shape will be valid if we order the nodes in the
75 75 # order that they're introduced in dramatis personae by the
76 76 # changelog, so what we do is we sort the non-changelog histories
77 77 # by the order in which they are used by the changelog.
78 78 def _sortgroup(orig, self, revlog, nodelist, lookup):
79 79 if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
80 80 return orig(self, revlog, nodelist, lookup)
81 81 key = lambda n: self.clnode_to_rev[lookup(n)]
82 82 return [revlog.rev(n) for n in sorted(nodelist, key=key)]
83 83
84 84 extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
85 85
86 86 def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
87 87 '''yield a sequence of changegroup chunks (strings)'''
88 88 # Note: other than delegating to orig, the only deviation in
89 89 # logic from normal hg's generate is marked with BEGIN/END
90 90 # NARROW HACK.
91 91 if not util.safehasattr(self, 'full_nodes'):
92 92 # not sending a narrow bundle
93 93 for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
94 94 yield x
95 95 return
96 96
97 97 repo = self._repo
98 98 cl = repo.changelog
99 99 mfl = repo.manifestlog
100 100 mfrevlog = mfl._revlog
101 101
102 102 clrevorder = {}
103 103 mfs = {} # needed manifests
104 104 fnodes = {} # needed file nodes
105 105 changedfiles = set()
106 106
107 107 # Callback for the changelog, used to collect changed files and manifest
108 108 # nodes.
109 109 # Returns the linkrev node (identity in the changelog case).
110 110 def lookupcl(x):
111 111 c = cl.read(x)
112 112 clrevorder[x] = len(clrevorder)
113 113 # BEGIN NARROW HACK
114 114 #
115 115 # Only update mfs if x is going to be sent. Otherwise we
116 116 # end up with bogus linkrevs specified for manifests and
117 117 # we skip some manifest nodes that we should otherwise
118 118 # have sent.
119 119 if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
120 120 n = c[0]
121 121 # record the first changeset introducing this manifest version
122 122 mfs.setdefault(n, x)
123 123 # Set this narrow-specific dict so we have the lowest manifest
124 124 # revnum to look up for this cl revnum. (Part of mapping
125 125 # changelog ellipsis parents to manifest ellipsis parents)
126 126 self.next_clrev_to_localrev.setdefault(cl.rev(x),
127 127 mfrevlog.rev(n))
128 128 # We can't trust the changed files list in the changeset if the
129 129 # client requested a shallow clone.
130 130 if self.is_shallow:
131 131 changedfiles.update(mfl[c[0]].read().keys())
132 132 else:
133 133 changedfiles.update(c[3])
134 134 # END NARROW HACK
135 135 # Record a complete list of potentially-changed files in
136 136 # this manifest.
137 137 return x
138 138
139 139 self._verbosenote(_('uncompressed size of bundle content:\n'))
140 140 size = 0
141 141 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
142 142 size += len(chunk)
143 143 yield chunk
144 144 self._verbosenote(_('%8.i (changelog)\n') % size)
145 145
146 146 # We need to make sure that the linkrev in the changegroup refers to
147 147 # the first changeset that introduced the manifest or file revision.
148 148 # The fastpath is usually safer than the slowpath, because the filelogs
149 149 # are walked in revlog order.
150 150 #
151 151 # When taking the slowpath with reorder=None and the manifest revlog
152 152 # uses generaldelta, the manifest may be walked in the "wrong" order.
153 153 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
154 154 # cc0ff93d0c0c).
155 155 #
156 156 # When taking the fastpath, we are only vulnerable to reordering
157 157 # of the changelog itself. The changelog never uses generaldelta, so
158 158 # it is only reordered when reorder=True. To handle this case, we
159 159 # simply take the slowpath, which already has the 'clrevorder' logic.
160 160 # This was also fixed in cc0ff93d0c0c.
161 161 fastpathlinkrev = fastpathlinkrev and not self._reorder
162 162 # Treemanifests don't work correctly with fastpathlinkrev
163 163 # either, because we don't discover which directory nodes to
164 164 # send along with files. This could probably be fixed.
165 165 fastpathlinkrev = fastpathlinkrev and (
166 166 'treemanifest' not in repo.requirements)
167 167 # Shallow clones also don't work correctly with fastpathlinkrev
168 168 # because file nodes may need to be sent for a manifest even if they
169 169 # weren't introduced by that manifest.
170 170 fastpathlinkrev = fastpathlinkrev and not self.is_shallow
171 171
172 172 for chunk in self.generatemanifests(commonrevs, clrevorder,
173 173 fastpathlinkrev, mfs, fnodes, source):
174 174 yield chunk
175 175 # BEGIN NARROW HACK
176 176 mfdicts = None
177 177 if self.is_shallow:
178 178 mfdicts = [(self._repo.manifestlog[n].read(), lr)
179 179 for (n, lr) in mfs.iteritems()]
180 180 # END NARROW HACK
181 181 mfs.clear()
182 182 clrevs = set(cl.rev(x) for x in clnodes)
183 183
184 184 if not fastpathlinkrev:
185 185 def linknodes(unused, fname):
186 186 return fnodes.get(fname, {})
187 187 else:
188 188 cln = cl.node
189 189 def linknodes(filerevlog, fname):
190 190 llr = filerevlog.linkrev
191 191 fln = filerevlog.node
192 192 revs = ((r, llr(r)) for r in filerevlog)
193 193 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
194 194
195 195 # BEGIN NARROW HACK
196 196 #
197 197 # We need to pass the mfdicts variable down into
198 198 # generatefiles(), but more than one command might have
199 199 # wrapped generatefiles so we can't modify the function
200 200 # signature. Instead, we pass the data to ourselves using an
201 201 # instance attribute. I'm sorry.
202 202 self._mfdicts = mfdicts
203 203 # END NARROW HACK
204 204 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
205 205 source):
206 206 yield chunk
207 207
208 208 yield self.close()
209 209
210 210 if clnodes:
211 211 repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
212 212 extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
213 213
214 214 def revchunk(orig, self, revlog, rev, prev, linknode):
215 215 if not util.safehasattr(self, 'full_nodes'):
216 216 # not sending a narrow changegroup
217 217 for x in orig(self, revlog, rev, prev, linknode):
218 218 yield x
219 219 return
220 220 # build up some mapping information that's useful later. See
221 221 # the local() nested function below.
222 222 if not self.changelog_done:
223 223 self.clnode_to_rev[linknode] = rev
224 224 linkrev = rev
225 225 self.clrev_to_localrev[linkrev] = rev
226 226 else:
227 227 linkrev = self.clnode_to_rev[linknode]
228 228 self.clrev_to_localrev[linkrev] = rev
229 229 # This is a node to send in full, because the changeset it
230 230 # corresponds to was a full changeset.
231 231 if linknode in self.full_nodes:
232 232 for x in orig(self, revlog, rev, prev, linknode):
233 233 yield x
234 234 return
235 235 # At this point, a node can either be one we should skip or an
236 236 # ellipsis. If it's not an ellipsis, bail immediately.
237 237 if linkrev not in self.precomputed_ellipsis:
238 238 return
239 239 linkparents = self.precomputed_ellipsis[linkrev]
240 240 def local(clrev):
241 241 """Turn a changelog revnum into a local revnum.
242 242
243 243 The ellipsis dag is stored as revnums on the changelog,
244 244 but when we're producing ellipsis entries for
245 245 non-changelog revlogs, we need to turn those numbers into
246 246 something local. This does that for us, and during the
247 247 changelog sending phase will also expand the stored
248 248 mappings as needed.
249 249 """
250 250 if clrev == node.nullrev:
251 251 return node.nullrev
252 252 if not self.changelog_done:
253 253 # If we're doing the changelog, it's possible that we
254 254 # have a parent that is already on the client, and we
255 255 # need to store some extra mapping information so that
256 256 # our contained ellipsis nodes will be able to resolve
257 257 # their parents.
258 258 if clrev not in self.clrev_to_localrev:
259 259 clnode = revlog.node(clrev)
260 260 self.clnode_to_rev[clnode] = clrev
261 261 return clrev
262 262 # Walk the ellipsis-ized changelog breadth-first looking for a
263 263 # change that has been linked from the current revlog.
264 264 #
265 265 # For a flat manifest revlog only a single step should be necessary
266 266 # as all relevant changelog entries are relevant to the flat
267 267 # manifest.
268 268 #
269 269 # For a filelog or tree manifest dirlog however not every changelog
270 270 # entry will have been relevant, so we need to skip some changelog
271 271 # nodes even after ellipsis-izing.
272 272 walk = [clrev]
273 273 while walk:
274 274 p = walk[0]
275 275 walk = walk[1:]
276 276 if p in self.clrev_to_localrev:
277 277 return self.clrev_to_localrev[p]
278 278 elif p in self.full_nodes:
279 279 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
280 280 if pp != node.nullrev])
281 281 elif p in self.precomputed_ellipsis:
282 282 walk.extend([pp for pp in self.precomputed_ellipsis[p]
283 283 if pp != node.nullrev])
284 284 else:
285 285 # In this case, we've got an ellipsis with parents
286 286 # outside the current bundle (likely an
287 287 # incremental pull). We "know" that we can use the
288 288 # value of this same revlog at whatever revision
289 289 # is pointed to by linknode. "Know" is in scare
290 290 # quotes because I haven't done enough examination
291 291 # of edge cases to convince myself this is really
292 292 # a fact - it works for all the (admittedly
293 293 # thorough) cases in our testsuite, but I would be
294 294 # somewhat unsurprised to find a case in the wild
295 295 # where this breaks down a bit. That said, I don't
296 296 # know if it would hurt anything.
297 297 for i in pycompat.xrange(rev, 0, -1):
298 298 if revlog.linkrev(i) == clrev:
299 299 return i
300 300 # We failed to resolve a parent for this node, so
301 301 # we crash the changegroup construction.
302 302 raise error.Abort(
303 303 'unable to resolve parent while packing %r %r'
304 304 ' for changeset %r' % (revlog.indexfile, rev, clrev))
305 305 return node.nullrev
306 306
307 307 if not linkparents or (
308 308 revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
309 309 p1, p2 = node.nullrev, node.nullrev
310 310 elif len(linkparents) == 1:
311 311 p1, = sorted(local(p) for p in linkparents)
312 312 p2 = node.nullrev
313 313 else:
314 314 p1, p2 = sorted(local(p) for p in linkparents)
315 315 n = revlog.node(rev)
316 316 yield changegroup.ellipsisdata(
317 317 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
318 318 extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
319
320 def deltaparent(orig, self, revlog, rev, p1, p2, prev):
321 if util.safehasattr(self, 'full_nodes'):
322 # TODO: send better deltas when in narrow mode.
323 #
324 # changegroup.group() loops over revisions to send,
325 # including revisions we'll skip. What this means is that
326 # `prev` will be a potentially useless delta base for all
327 # ellipsis nodes, as the client likely won't have it. In
328 # the future we should do bookkeeping about which nodes
329 # have been sent to the client, and try to be
330 # significantly smarter about delta bases. This is
331 # slightly tricky because this same code has to work for
332 # all revlogs, and we don't have the linkrev/linknode here.
333 return p1
334 return orig(self, revlog, rev, p1, p2, prev)
335 extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent)
@@ -1,1090 +1,1105 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 manifest,
26 26 match as matchmod,
27 27 mdiff,
28 28 phases,
29 29 pycompat,
30 30 repository,
31 31 revlog,
32 32 util,
33 33 )
34 34
35 35 from .utils import (
36 36 stringutil,
37 37 )
38 38
39 39 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
40 40 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
41 41 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
42 42
43 43 LFS_REQUIREMENT = 'lfs'
44 44
45 45 readexactly = util.readexactly
46 46
47 47 def getchunk(stream):
48 48 """return the next chunk from stream as a string"""
49 49 d = readexactly(stream, 4)
50 50 l = struct.unpack(">l", d)[0]
51 51 if l <= 4:
52 52 if l:
53 53 raise error.Abort(_("invalid chunk length %d") % l)
54 54 return ""
55 55 return readexactly(stream, l - 4)
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(">l", length + 4)
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(">l", 0)
64 64
65 65 def writechunks(ui, chunks, filename, vfs=None):
66 66 """Write chunks to a file and return its filename.
67 67
68 68 The stream is assumed to be a bundle file.
69 69 Existing files will not be overwritten.
70 70 If no filename is specified, a temporary file is created.
71 71 """
72 72 fh = None
73 73 cleanup = None
74 74 try:
75 75 if filename:
76 76 if vfs:
77 77 fh = vfs.open(filename, "wb")
78 78 else:
79 79 # Increase default buffer size because default is usually
80 80 # small (4k is common on Linux).
81 81 fh = open(filename, "wb", 131072)
82 82 else:
83 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
84 84 fh = os.fdopen(fd, r"wb")
85 85 cleanup = filename
86 86 for c in chunks:
87 87 fh.write(c)
88 88 cleanup = None
89 89 return filename
90 90 finally:
91 91 if fh is not None:
92 92 fh.close()
93 93 if cleanup is not None:
94 94 if filename and vfs:
95 95 vfs.unlink(cleanup)
96 96 else:
97 97 os.unlink(cleanup)
98 98
99 99 class cg1unpacker(object):
100 100 """Unpacker for cg1 changegroup streams.
101 101
102 102 A changegroup unpacker handles the framing of the revision data in
103 103 the wire format. Most consumers will want to use the apply()
104 104 method to add the changes from the changegroup to a repository.
105 105
106 106 If you're forwarding a changegroup unmodified to another consumer,
107 107 use getchunks(), which returns an iterator of changegroup
108 108 chunks. This is mostly useful for cases where you need to know the
109 109 data stream has ended by observing the end of the changegroup.
110 110
111 111 deltachunk() is useful only if you're applying delta data. Most
112 112 consumers should prefer apply() instead.
113 113
114 114 A few other public methods exist. Those are used only for
115 115 bundlerepo and some debug commands - their use is discouraged.
116 116 """
117 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
118 118 deltaheadersize = struct.calcsize(deltaheader)
119 119 version = '01'
120 120 _grouplistcount = 1 # One list of files after the manifests
121 121
122 122 def __init__(self, fh, alg, extras=None):
123 123 if alg is None:
124 124 alg = 'UN'
125 125 if alg not in util.compengines.supportedbundletypes:
126 126 raise error.Abort(_('unknown stream compression type: %s')
127 127 % alg)
128 128 if alg == 'BZ':
129 129 alg = '_truncatedBZ'
130 130
131 131 compengine = util.compengines.forbundletype(alg)
132 132 self._stream = compengine.decompressorreader(fh)
133 133 self._type = alg
134 134 self.extras = extras or {}
135 135 self.callback = None
136 136
137 137 # These methods (compressed, read, seek, tell) all appear to only
138 138 # be used by bundlerepo, but it's a little hard to tell.
139 139 def compressed(self):
140 140 return self._type is not None and self._type != 'UN'
141 141 def read(self, l):
142 142 return self._stream.read(l)
143 143 def seek(self, pos):
144 144 return self._stream.seek(pos)
145 145 def tell(self):
146 146 return self._stream.tell()
147 147 def close(self):
148 148 return self._stream.close()
149 149
150 150 def _chunklength(self):
151 151 d = readexactly(self._stream, 4)
152 152 l = struct.unpack(">l", d)[0]
153 153 if l <= 4:
154 154 if l:
155 155 raise error.Abort(_("invalid chunk length %d") % l)
156 156 return 0
157 157 if self.callback:
158 158 self.callback()
159 159 return l - 4
160 160
161 161 def changelogheader(self):
162 162 """v10 does not have a changelog header chunk"""
163 163 return {}
164 164
165 165 def manifestheader(self):
166 166 """v10 does not have a manifest header chunk"""
167 167 return {}
168 168
169 169 def filelogheader(self):
170 170 """return the header of the filelogs chunk, v10 only has the filename"""
171 171 l = self._chunklength()
172 172 if not l:
173 173 return {}
174 174 fname = readexactly(self._stream, l)
175 175 return {'filename': fname}
176 176
177 177 def _deltaheader(self, headertuple, prevnode):
178 178 node, p1, p2, cs = headertuple
179 179 if prevnode is None:
180 180 deltabase = p1
181 181 else:
182 182 deltabase = prevnode
183 183 flags = 0
184 184 return node, p1, p2, deltabase, cs, flags
185 185
186 186 def deltachunk(self, prevnode):
187 187 l = self._chunklength()
188 188 if not l:
189 189 return {}
190 190 headerdata = readexactly(self._stream, self.deltaheadersize)
191 191 header = struct.unpack(self.deltaheader, headerdata)
192 192 delta = readexactly(self._stream, l - self.deltaheadersize)
193 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
194 194 return (node, p1, p2, cs, deltabase, delta, flags)
195 195
196 196 def getchunks(self):
197 197 """returns all the chunks contains in the bundle
198 198
199 199 Used when you need to forward the binary stream to a file or another
200 200 network API. To do so, it parse the changegroup data, otherwise it will
201 201 block in case of sshrepo because it don't know the end of the stream.
202 202 """
203 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
204 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
205 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
206 206 # filelogs.
207 207 #
208 208 # Changelog and manifestlog parts are terminated with empty chunks. The
209 209 # tree and file parts are a list of entry sections. Each entry section
210 210 # is a series of chunks terminating in an empty chunk. The list of these
211 211 # entry sections is terminated in yet another empty chunk, so we know
212 212 # we've reached the end of the tree/file list when we reach an empty
213 213 # chunk that was proceeded by no non-empty chunks.
214 214
215 215 parts = 0
216 216 while parts < 2 + self._grouplistcount:
217 217 noentries = True
218 218 while True:
219 219 chunk = getchunk(self)
220 220 if not chunk:
221 221 # The first two empty chunks represent the end of the
222 222 # changelog and the manifestlog portions. The remaining
223 223 # empty chunks represent either A) the end of individual
224 224 # tree or file entries in the file list, or B) the end of
225 225 # the entire list. It's the end of the entire list if there
226 226 # were no entries (i.e. noentries is True).
227 227 if parts < 2:
228 228 parts += 1
229 229 elif noentries:
230 230 parts += 1
231 231 break
232 232 noentries = False
233 233 yield chunkheader(len(chunk))
234 234 pos = 0
235 235 while pos < len(chunk):
236 236 next = pos + 2**20
237 237 yield chunk[pos:next]
238 238 pos = next
239 239 yield closechunk()
240 240
241 241 def _unpackmanifests(self, repo, revmap, trp, prog):
242 242 self.callback = prog.increment
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 248 deltas = self.deltaiter()
249 249 repo.manifestlog.addgroup(deltas, revmap, trp)
250 250 prog.complete()
251 251 self.callback = None
252 252
253 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 254 expectedtotal=None):
255 255 """Add the changegroup returned by source.read() to this repo.
256 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 257 the URL of the repo where this changegroup is coming from.
258 258
259 259 Return an integer summarizing the change to this repo:
260 260 - nothing changed or no source: 0
261 261 - more heads than before: 1+added heads (2..n)
262 262 - fewer heads than before: -1-removed heads (-2..-n)
263 263 - number of heads stays the same: 1
264 264 """
265 265 repo = repo.unfiltered()
266 266 def csmap(x):
267 267 repo.ui.debug("add changeset %s\n" % short(x))
268 268 return len(cl)
269 269
270 270 def revmap(x):
271 271 return cl.rev(x)
272 272
273 273 changesets = files = revisions = 0
274 274
275 275 try:
276 276 # The transaction may already carry source information. In this
277 277 # case we use the top level data. We overwrite the argument
278 278 # because we need to use the top level value (if they exist)
279 279 # in this function.
280 280 srctype = tr.hookargs.setdefault('source', srctype)
281 281 url = tr.hookargs.setdefault('url', url)
282 282 repo.hook('prechangegroup',
283 283 throw=True, **pycompat.strkwargs(tr.hookargs))
284 284
285 285 # write changelog data to temp files so concurrent readers
286 286 # will not see an inconsistent view
287 287 cl = repo.changelog
288 288 cl.delayupdate(tr)
289 289 oldheads = set(cl.heads())
290 290
291 291 trp = weakref.proxy(tr)
292 292 # pull off the changeset group
293 293 repo.ui.status(_("adding changesets\n"))
294 294 clstart = len(cl)
295 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
296 296 total=expectedtotal)
297 297 self.callback = progress.increment
298 298
299 299 efiles = set()
300 300 def onchangelog(cl, node):
301 301 efiles.update(cl.readfiles(node))
302 302
303 303 self.changelogheader()
304 304 deltas = self.deltaiter()
305 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
306 306 efiles = len(efiles)
307 307
308 308 if not cgnodes:
309 309 repo.ui.develwarn('applied empty changegroup',
310 310 config='warn-empty-changegroup')
311 311 clend = len(cl)
312 312 changesets = clend - clstart
313 313 progress.complete()
314 314 self.callback = None
315 315
316 316 # pull off the manifest group
317 317 repo.ui.status(_("adding manifests\n"))
318 318 # We know that we'll never have more manifests than we had
319 319 # changesets.
320 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
321 321 total=changesets)
322 322 self._unpackmanifests(repo, revmap, trp, progress)
323 323
324 324 needfiles = {}
325 325 if repo.ui.configbool('server', 'validate'):
326 326 cl = repo.changelog
327 327 ml = repo.manifestlog
328 328 # validate incoming csets have their manifests
329 329 for cset in pycompat.xrange(clstart, clend):
330 330 mfnode = cl.changelogrevision(cset).manifest
331 331 mfest = ml[mfnode].readdelta()
332 332 # store file cgnodes we must see
333 333 for f, n in mfest.iteritems():
334 334 needfiles.setdefault(f, set()).add(n)
335 335
336 336 # process the files
337 337 repo.ui.status(_("adding file changes\n"))
338 338 newrevs, newfiles = _addchangegroupfiles(
339 339 repo, self, revmap, trp, efiles, needfiles)
340 340 revisions += newrevs
341 341 files += newfiles
342 342
343 343 deltaheads = 0
344 344 if oldheads:
345 345 heads = cl.heads()
346 346 deltaheads = len(heads) - len(oldheads)
347 347 for h in heads:
348 348 if h not in oldheads and repo[h].closesbranch():
349 349 deltaheads -= 1
350 350 htext = ""
351 351 if deltaheads:
352 352 htext = _(" (%+d heads)") % deltaheads
353 353
354 354 repo.ui.status(_("added %d changesets"
355 355 " with %d changes to %d files%s\n")
356 356 % (changesets, revisions, files, htext))
357 357 repo.invalidatevolatilesets()
358 358
359 359 if changesets > 0:
360 360 if 'node' not in tr.hookargs:
361 361 tr.hookargs['node'] = hex(cl.node(clstart))
362 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
363 363 hookargs = dict(tr.hookargs)
364 364 else:
365 365 hookargs = dict(tr.hookargs)
366 366 hookargs['node'] = hex(cl.node(clstart))
367 367 hookargs['node_last'] = hex(cl.node(clend - 1))
368 368 repo.hook('pretxnchangegroup',
369 369 throw=True, **pycompat.strkwargs(hookargs))
370 370
371 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
372 372 phaseall = None
373 373 if srctype in ('push', 'serve'):
374 374 # Old servers can not push the boundary themselves.
375 375 # New servers won't push the boundary if changeset already
376 376 # exists locally as secret
377 377 #
378 378 # We should not use added here but the list of all change in
379 379 # the bundle
380 380 if repo.publishing():
381 381 targetphase = phaseall = phases.public
382 382 else:
383 383 # closer target phase computation
384 384
385 385 # Those changesets have been pushed from the
386 386 # outside, their phases are going to be pushed
387 387 # alongside. Therefor `targetphase` is
388 388 # ignored.
389 389 targetphase = phaseall = phases.draft
390 390 if added:
391 391 phases.registernew(repo, tr, targetphase, added)
392 392 if phaseall is not None:
393 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
394 394
395 395 if changesets > 0:
396 396
397 397 def runhooks():
398 398 # These hooks run when the lock releases, not when the
399 399 # transaction closes. So it's possible for the changelog
400 400 # to have changed since we last saw it.
401 401 if clstart >= len(repo):
402 402 return
403 403
404 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
405 405
406 406 for n in added:
407 407 args = hookargs.copy()
408 408 args['node'] = hex(n)
409 409 del args['node_last']
410 410 repo.hook("incoming", **pycompat.strkwargs(args))
411 411
412 412 newheads = [h for h in repo.heads()
413 413 if h not in oldheads]
414 414 repo.ui.log("incoming",
415 415 "%d incoming changes - new heads: %s\n",
416 416 len(added),
417 417 ', '.join([hex(c[:6]) for c in newheads]))
418 418
419 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
420 420 lambda tr: repo._afterlock(runhooks))
421 421 finally:
422 422 repo.ui.flush()
423 423 # never return 0 here:
424 424 if deltaheads < 0:
425 425 ret = deltaheads - 1
426 426 else:
427 427 ret = deltaheads + 1
428 428 return ret
429 429
430 430 def deltaiter(self):
431 431 """
432 432 returns an iterator of the deltas in this changegroup
433 433
434 434 Useful for passing to the underlying storage system to be stored.
435 435 """
436 436 chain = None
437 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
438 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
439 439 yield chunkdata
440 440 chain = chunkdata[0]
441 441
442 442 class cg2unpacker(cg1unpacker):
443 443 """Unpacker for cg2 streams.
444 444
445 445 cg2 streams add support for generaldelta, so the delta header
446 446 format is slightly different. All other features about the data
447 447 remain the same.
448 448 """
449 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
450 450 deltaheadersize = struct.calcsize(deltaheader)
451 451 version = '02'
452 452
453 453 def _deltaheader(self, headertuple, prevnode):
454 454 node, p1, p2, deltabase, cs = headertuple
455 455 flags = 0
456 456 return node, p1, p2, deltabase, cs, flags
457 457
458 458 class cg3unpacker(cg2unpacker):
459 459 """Unpacker for cg3 streams.
460 460
461 461 cg3 streams add support for exchanging treemanifests and revlog
462 462 flags. It adds the revlog flags to the delta header and an empty chunk
463 463 separating manifests and files.
464 464 """
465 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
466 466 deltaheadersize = struct.calcsize(deltaheader)
467 467 version = '03'
468 468 _grouplistcount = 2 # One list of manifests and one list of files
469 469
470 470 def _deltaheader(self, headertuple, prevnode):
471 471 node, p1, p2, deltabase, cs, flags = headertuple
472 472 return node, p1, p2, deltabase, cs, flags
473 473
474 474 def _unpackmanifests(self, repo, revmap, trp, prog):
475 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
476 476 for chunkdata in iter(self.filelogheader, {}):
477 477 # If we get here, there are directory manifests in the changegroup
478 478 d = chunkdata["filename"]
479 479 repo.ui.debug("adding %s revisions\n" % d)
480 480 dirlog = repo.manifestlog._revlog.dirlog(d)
481 481 deltas = self.deltaiter()
482 482 if not dirlog.addgroup(deltas, revmap, trp):
483 483 raise error.Abort(_("received dir revlog group is empty"))
484 484
485 485 class headerlessfixup(object):
486 486 def __init__(self, fh, h):
487 487 self._h = h
488 488 self._fh = fh
489 489 def read(self, n):
490 490 if self._h:
491 491 d, self._h = self._h[:n], self._h[n:]
492 492 if len(d) < n:
493 493 d += readexactly(self._fh, n - len(d))
494 494 return d
495 495 return readexactly(self._fh, n)
496 496
497 497 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
498 498 n = revlog_.node(rev)
499 499 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
500 500 flags = revlog_.flags(rev)
501 501 flags |= revlog.REVIDX_ELLIPSIS
502 502 meta = packer.builddeltaheader(
503 503 n, p1n, p2n, nullid, linknode, flags)
504 504 # TODO: try and actually send deltas for ellipsis data blocks
505 505 diffheader = mdiff.trivialdiffheader(len(data))
506 506 l = len(meta) + len(diffheader) + len(data)
507 507 return ''.join((chunkheader(l),
508 508 meta,
509 509 diffheader,
510 510 data))
511 511
512 512 class cg1packer(object):
513 513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
514 514 version = '01'
515 515 def __init__(self, repo, filematcher, bundlecaps=None):
516 516 """Given a source repo, construct a bundler.
517 517
518 518 filematcher is a matcher that matches on files to include in the
519 519 changegroup. Used to facilitate sparse changegroups.
520 520
521 521 bundlecaps is optional and can be used to specify the set of
522 522 capabilities which can be used to build the bundle. While bundlecaps is
523 523 unused in core Mercurial, extensions rely on this feature to communicate
524 524 capabilities to customize the changegroup packer.
525 525 """
526 526 assert filematcher
527 527 self._filematcher = filematcher
528 528
529 529 # Set of capabilities we can use to build the bundle.
530 530 if bundlecaps is None:
531 531 bundlecaps = set()
532 532 self._bundlecaps = bundlecaps
533 533 # experimental config: bundle.reorder
534 534 reorder = repo.ui.config('bundle', 'reorder')
535 535 if reorder == 'auto':
536 536 reorder = None
537 537 else:
538 538 reorder = stringutil.parsebool(reorder)
539 539 self._repo = repo
540 540 self._reorder = reorder
541 541 if self._repo.ui.verbose and not self._repo.ui.debugflag:
542 542 self._verbosenote = self._repo.ui.note
543 543 else:
544 544 self._verbosenote = lambda s: None
545 545
546 546 def close(self):
547 547 return closechunk()
548 548
549 549 def fileheader(self, fname):
550 550 return chunkheader(len(fname)) + fname
551 551
552 552 # Extracted both for clarity and for overriding in extensions.
553 553 def _sortgroup(self, revlog, nodelist, lookup):
554 554 """Sort nodes for change group and turn them into revnums."""
555 555 # for generaldelta revlogs, we linearize the revs; this will both be
556 556 # much quicker and generate a much smaller bundle
557 557 if (revlog._generaldelta and self._reorder is None) or self._reorder:
558 558 dag = dagutil.revlogdag(revlog)
559 559 return dag.linearize(set(revlog.rev(n) for n in nodelist))
560 560 else:
561 561 return sorted([revlog.rev(n) for n in nodelist])
562 562
563 563 def group(self, nodelist, revlog, lookup, units=None):
564 564 """Calculate a delta group, yielding a sequence of changegroup chunks
565 565 (strings).
566 566
567 567 Given a list of changeset revs, return a set of deltas and
568 568 metadata corresponding to nodes. The first delta is
569 569 first parent(nodelist[0]) -> nodelist[0], the receiver is
570 570 guaranteed to have this parent as it has all history before
571 571 these changesets. In the case firstparent is nullrev the
572 572 changegroup starts with a full revision.
573 573
574 574 If units is not None, progress detail will be generated, units specifies
575 575 the type of revlog that is touched (changelog, manifest, etc.).
576 576 """
577 577 # if we don't have any revisions touched by these changesets, bail
578 578 if len(nodelist) == 0:
579 579 yield self.close()
580 580 return
581 581
582 582 revs = self._sortgroup(revlog, nodelist, lookup)
583 583
584 584 # add the parent of the first rev
585 585 p = revlog.parentrevs(revs[0])[0]
586 586 revs.insert(0, p)
587 587
588 588 # build deltas
589 589 progress = None
590 590 if units is not None:
591 591 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
592 592 total=(len(revs) - 1))
593 593 for r in pycompat.xrange(len(revs) - 1):
594 594 if progress:
595 595 progress.update(r + 1)
596 596 prev, curr = revs[r], revs[r + 1]
597 597 linknode = lookup(revlog.node(curr))
598 598 for c in self.revchunk(revlog, curr, prev, linknode):
599 599 yield c
600 600
601 601 if progress:
602 602 progress.complete()
603 603 yield self.close()
604 604
605 605 # filter any nodes that claim to be part of the known set
606 606 def prune(self, revlog, missing, commonrevs):
607 607 # TODO this violates storage abstraction for manifests.
608 608 if isinstance(revlog, manifest.manifestrevlog):
609 609 if not self._filematcher.visitdir(revlog._dir[:-1] or '.'):
610 610 return []
611 611
612 612 rr, rl = revlog.rev, revlog.linkrev
613 613 return [n for n in missing if rl(rr(n)) not in commonrevs]
614 614
615 615 def _packmanifests(self, dir, mfnodes, lookuplinknode):
616 616 """Pack flat manifests into a changegroup stream."""
617 617 assert not dir
618 618 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
619 619 lookuplinknode, units=_('manifests')):
620 620 yield chunk
621 621
622 622 def _manifestsdone(self):
623 623 return ''
624 624
625 625 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
626 626 '''yield a sequence of changegroup chunks (strings)'''
627 627 repo = self._repo
628 628 cl = repo.changelog
629 629
630 630 clrevorder = {}
631 631 mfs = {} # needed manifests
632 632 fnodes = {} # needed file nodes
633 633 changedfiles = set()
634 634
635 635 # Callback for the changelog, used to collect changed files and manifest
636 636 # nodes.
637 637 # Returns the linkrev node (identity in the changelog case).
638 638 def lookupcl(x):
639 639 c = cl.read(x)
640 640 clrevorder[x] = len(clrevorder)
641 641 n = c[0]
642 642 # record the first changeset introducing this manifest version
643 643 mfs.setdefault(n, x)
644 644 # Record a complete list of potentially-changed files in
645 645 # this manifest.
646 646 changedfiles.update(c[3])
647 647 return x
648 648
649 649 self._verbosenote(_('uncompressed size of bundle content:\n'))
650 650 size = 0
651 651 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
652 652 size += len(chunk)
653 653 yield chunk
654 654 self._verbosenote(_('%8.i (changelog)\n') % size)
655 655
656 656 # We need to make sure that the linkrev in the changegroup refers to
657 657 # the first changeset that introduced the manifest or file revision.
658 658 # The fastpath is usually safer than the slowpath, because the filelogs
659 659 # are walked in revlog order.
660 660 #
661 661 # When taking the slowpath with reorder=None and the manifest revlog
662 662 # uses generaldelta, the manifest may be walked in the "wrong" order.
663 663 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
664 664 # cc0ff93d0c0c).
665 665 #
666 666 # When taking the fastpath, we are only vulnerable to reordering
667 667 # of the changelog itself. The changelog never uses generaldelta, so
668 668 # it is only reordered when reorder=True. To handle this case, we
669 669 # simply take the slowpath, which already has the 'clrevorder' logic.
670 670 # This was also fixed in cc0ff93d0c0c.
671 671 fastpathlinkrev = fastpathlinkrev and not self._reorder
672 672 # Treemanifests don't work correctly with fastpathlinkrev
673 673 # either, because we don't discover which directory nodes to
674 674 # send along with files. This could probably be fixed.
675 675 fastpathlinkrev = fastpathlinkrev and (
676 676 'treemanifest' not in repo.requirements)
677 677
678 678 for chunk in self.generatemanifests(commonrevs, clrevorder,
679 679 fastpathlinkrev, mfs, fnodes, source):
680 680 yield chunk
681 681 mfs.clear()
682 682 clrevs = set(cl.rev(x) for x in clnodes)
683 683
684 684 if not fastpathlinkrev:
685 685 def linknodes(unused, fname):
686 686 return fnodes.get(fname, {})
687 687 else:
688 688 cln = cl.node
689 689 def linknodes(filerevlog, fname):
690 690 llr = filerevlog.linkrev
691 691 fln = filerevlog.node
692 692 revs = ((r, llr(r)) for r in filerevlog)
693 693 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
694 694
695 695 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
696 696 source):
697 697 yield chunk
698 698
699 699 yield self.close()
700 700
701 701 if clnodes:
702 702 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
703 703
704 704 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
705 705 fnodes, source):
706 706 """Returns an iterator of changegroup chunks containing manifests.
707 707
708 708 `source` is unused here, but is used by extensions like remotefilelog to
709 709 change what is sent based in pulls vs pushes, etc.
710 710 """
711 711 repo = self._repo
712 712 mfl = repo.manifestlog
713 713 dirlog = mfl._revlog.dirlog
714 714 tmfnodes = {'': mfs}
715 715
716 716 # Callback for the manifest, used to collect linkrevs for filelog
717 717 # revisions.
718 718 # Returns the linkrev node (collected in lookupcl).
719 719 def makelookupmflinknode(dir, nodes):
720 720 if fastpathlinkrev:
721 721 assert not dir
722 722 return mfs.__getitem__
723 723
724 724 def lookupmflinknode(x):
725 725 """Callback for looking up the linknode for manifests.
726 726
727 727 Returns the linkrev node for the specified manifest.
728 728
729 729 SIDE EFFECT:
730 730
731 731 1) fclnodes gets populated with the list of relevant
732 732 file nodes if we're not using fastpathlinkrev
733 733 2) When treemanifests are in use, collects treemanifest nodes
734 734 to send
735 735
736 736 Note that this means manifests must be completely sent to
737 737 the client before you can trust the list of files and
738 738 treemanifests to send.
739 739 """
740 740 clnode = nodes[x]
741 741 mdata = mfl.get(dir, x).readfast(shallow=True)
742 742 for p, n, fl in mdata.iterentries():
743 743 if fl == 't': # subdirectory manifest
744 744 subdir = dir + p + '/'
745 745 tmfclnodes = tmfnodes.setdefault(subdir, {})
746 746 tmfclnode = tmfclnodes.setdefault(n, clnode)
747 747 if clrevorder[clnode] < clrevorder[tmfclnode]:
748 748 tmfclnodes[n] = clnode
749 749 else:
750 750 f = dir + p
751 751 fclnodes = fnodes.setdefault(f, {})
752 752 fclnode = fclnodes.setdefault(n, clnode)
753 753 if clrevorder[clnode] < clrevorder[fclnode]:
754 754 fclnodes[n] = clnode
755 755 return clnode
756 756 return lookupmflinknode
757 757
758 758 size = 0
759 759 while tmfnodes:
760 760 dir, nodes = tmfnodes.popitem()
761 761 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
762 762 if not dir or prunednodes:
763 763 for x in self._packmanifests(dir, prunednodes,
764 764 makelookupmflinknode(dir, nodes)):
765 765 size += len(x)
766 766 yield x
767 767 self._verbosenote(_('%8.i (manifests)\n') % size)
768 768 yield self._manifestsdone()
769 769
770 770 # The 'source' parameter is useful for extensions
771 771 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
772 772 repo = self._repo
773 773 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
774 774 total=len(changedfiles))
775 775 for i, fname in enumerate(sorted(changedfiles)):
776 776 filerevlog = repo.file(fname)
777 777 if not filerevlog:
778 778 raise error.Abort(_("empty or missing file data for %s") %
779 779 fname)
780 780
781 781 linkrevnodes = linknodes(filerevlog, fname)
782 782 # Lookup for filenodes, we collected the linkrev nodes above in the
783 783 # fastpath case and with lookupmf in the slowpath case.
784 784 def lookupfilelog(x):
785 785 return linkrevnodes[x]
786 786
787 787 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
788 788 if filenodes:
789 789 progress.update(i + 1, item=fname)
790 790 h = self.fileheader(fname)
791 791 size = len(h)
792 792 yield h
793 793 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
794 794 size += len(chunk)
795 795 yield chunk
796 796 self._verbosenote(_('%8.i %s\n') % (size, fname))
797 797 progress.complete()
798 798
799 799 def deltaparent(self, revlog, rev, p1, p2, prev):
800 800 if not revlog.candelta(prev, rev):
801 801 raise error.ProgrammingError('cg1 should not be used in this case')
802 802 return prev
803 803
804 804 def revchunk(self, revlog, rev, prev, linknode):
805 805 node = revlog.node(rev)
806 806 p1, p2 = revlog.parentrevs(rev)
807 807 base = self.deltaparent(revlog, rev, p1, p2, prev)
808 808
809 809 prefix = ''
810 810 if revlog.iscensored(base) or revlog.iscensored(rev):
811 811 try:
812 812 delta = revlog.revision(node, raw=True)
813 813 except error.CensoredNodeError as e:
814 814 delta = e.tombstone
815 815 if base == nullrev:
816 816 prefix = mdiff.trivialdiffheader(len(delta))
817 817 else:
818 818 baselen = revlog.rawsize(base)
819 819 prefix = mdiff.replacediffheader(baselen, len(delta))
820 820 elif base == nullrev:
821 821 delta = revlog.revision(node, raw=True)
822 822 prefix = mdiff.trivialdiffheader(len(delta))
823 823 else:
824 824 delta = revlog.revdiff(base, rev)
825 825 p1n, p2n = revlog.parents(node)
826 826 basenode = revlog.node(base)
827 827 flags = revlog.flags(rev)
828 828 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
829 829 meta += prefix
830 830 l = len(meta) + len(delta)
831 831 yield chunkheader(l)
832 832 yield meta
833 833 yield delta
834 834 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
835 835 # do nothing with basenode, it is implicitly the previous one in HG10
836 836 # do nothing with flags, it is implicitly 0 for cg1 and cg2
837 837 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
838 838
839 839 class cg2packer(cg1packer):
840 840 version = '02'
841 841 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
842 842
843 843 def __init__(self, repo, filematcher, bundlecaps=None):
844 844 super(cg2packer, self).__init__(repo, filematcher,
845 845 bundlecaps=bundlecaps)
846 846
847 847 if self._reorder is None:
848 848 # Since generaldelta is directly supported by cg2, reordering
849 849 # generally doesn't help, so we disable it by default (treating
850 850 # bundle.reorder=auto just like bundle.reorder=False).
851 851 self._reorder = False
852 852
853 853 def deltaparent(self, revlog, rev, p1, p2, prev):
854 # Narrow ellipses mode.
855 if util.safehasattr(self, 'full_nodes'):
856 # TODO: send better deltas when in narrow mode.
857 #
858 # changegroup.group() loops over revisions to send,
859 # including revisions we'll skip. What this means is that
860 # `prev` will be a potentially useless delta base for all
861 # ellipsis nodes, as the client likely won't have it. In
862 # the future we should do bookkeeping about which nodes
863 # have been sent to the client, and try to be
864 # significantly smarter about delta bases. This is
865 # slightly tricky because this same code has to work for
866 # all revlogs, and we don't have the linkrev/linknode here.
867 return p1
868
854 869 dp = revlog.deltaparent(rev)
855 870 if dp == nullrev and revlog.storedeltachains:
856 871 # Avoid sending full revisions when delta parent is null. Pick prev
857 872 # in that case. It's tempting to pick p1 in this case, as p1 will
858 873 # be smaller in the common case. However, computing a delta against
859 874 # p1 may require resolving the raw text of p1, which could be
860 875 # expensive. The revlog caches should have prev cached, meaning
861 876 # less CPU for changegroup generation. There is likely room to add
862 877 # a flag and/or config option to control this behavior.
863 878 base = prev
864 879 elif dp == nullrev:
865 880 # revlog is configured to use full snapshot for a reason,
866 881 # stick to full snapshot.
867 882 base = nullrev
868 883 elif dp not in (p1, p2, prev):
869 884 # Pick prev when we can't be sure remote has the base revision.
870 885 return prev
871 886 else:
872 887 base = dp
873 888 if base != nullrev and not revlog.candelta(base, rev):
874 889 base = nullrev
875 890 return base
876 891
877 892 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
878 893 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
879 894 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
880 895
881 896 class cg3packer(cg2packer):
882 897 version = '03'
883 898 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
884 899
885 900 def _packmanifests(self, dir, mfnodes, lookuplinknode):
886 901 if dir:
887 902 yield self.fileheader(dir)
888 903
889 904 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
890 905 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
891 906 units=_('manifests')):
892 907 yield chunk
893 908
894 909 def _manifestsdone(self):
895 910 return self.close()
896 911
897 912 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
898 913 return struct.pack(
899 914 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
900 915
901 916 _packermap = {'01': (cg1packer, cg1unpacker),
902 917 # cg2 adds support for exchanging generaldelta
903 918 '02': (cg2packer, cg2unpacker),
904 919 # cg3 adds support for exchanging revlog flags and treemanifests
905 920 '03': (cg3packer, cg3unpacker),
906 921 }
907 922
908 923 def allsupportedversions(repo):
909 924 versions = set(_packermap.keys())
910 925 if not (repo.ui.configbool('experimental', 'changegroup3') or
911 926 repo.ui.configbool('experimental', 'treemanifest') or
912 927 'treemanifest' in repo.requirements):
913 928 versions.discard('03')
914 929 return versions
915 930
916 931 # Changegroup versions that can be applied to the repo
917 932 def supportedincomingversions(repo):
918 933 return allsupportedversions(repo)
919 934
920 935 # Changegroup versions that can be created from the repo
921 936 def supportedoutgoingversions(repo):
922 937 versions = allsupportedversions(repo)
923 938 if 'treemanifest' in repo.requirements:
924 939 # Versions 01 and 02 support only flat manifests and it's just too
925 940 # expensive to convert between the flat manifest and tree manifest on
926 941 # the fly. Since tree manifests are hashed differently, all of history
927 942 # would have to be converted. Instead, we simply don't even pretend to
928 943 # support versions 01 and 02.
929 944 versions.discard('01')
930 945 versions.discard('02')
931 946 if repository.NARROW_REQUIREMENT in repo.requirements:
932 947 # Versions 01 and 02 don't support revlog flags, and we need to
933 948 # support that for stripping and unbundling to work.
934 949 versions.discard('01')
935 950 versions.discard('02')
936 951 if LFS_REQUIREMENT in repo.requirements:
937 952 # Versions 01 and 02 don't support revlog flags, and we need to
938 953 # mark LFS entries with REVIDX_EXTSTORED.
939 954 versions.discard('01')
940 955 versions.discard('02')
941 956
942 957 return versions
943 958
944 959 def localversion(repo):
945 960 # Finds the best version to use for bundles that are meant to be used
946 961 # locally, such as those from strip and shelve, and temporary bundles.
947 962 return max(supportedoutgoingversions(repo))
948 963
949 964 def safeversion(repo):
950 965 # Finds the smallest version that it's safe to assume clients of the repo
951 966 # will support. For example, all hg versions that support generaldelta also
952 967 # support changegroup 02.
953 968 versions = supportedoutgoingversions(repo)
954 969 if 'generaldelta' in repo.requirements:
955 970 versions.discard('01')
956 971 assert versions
957 972 return min(versions)
958 973
959 974 def getbundler(version, repo, bundlecaps=None, filematcher=None):
960 975 assert version in supportedoutgoingversions(repo)
961 976
962 977 if filematcher is None:
963 978 filematcher = matchmod.alwaysmatcher(repo.root, '')
964 979
965 980 if version == '01' and not filematcher.always():
966 981 raise error.ProgrammingError('version 01 changegroups do not support '
967 982 'sparse file matchers')
968 983
969 984 # Requested files could include files not in the local store. So
970 985 # filter those out.
971 986 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
972 987 filematcher)
973 988
974 989 return _packermap[version][0](repo, filematcher=filematcher,
975 990 bundlecaps=bundlecaps)
976 991
977 992 def getunbundler(version, fh, alg, extras=None):
978 993 return _packermap[version][1](fh, alg, extras=extras)
979 994
980 995 def _changegroupinfo(repo, nodes, source):
981 996 if repo.ui.verbose or source == 'bundle':
982 997 repo.ui.status(_("%d changesets found\n") % len(nodes))
983 998 if repo.ui.debugflag:
984 999 repo.ui.debug("list of changesets:\n")
985 1000 for node in nodes:
986 1001 repo.ui.debug("%s\n" % hex(node))
987 1002
988 1003 def makechangegroup(repo, outgoing, version, source, fastpath=False,
989 1004 bundlecaps=None):
990 1005 cgstream = makestream(repo, outgoing, version, source,
991 1006 fastpath=fastpath, bundlecaps=bundlecaps)
992 1007 return getunbundler(version, util.chunkbuffer(cgstream), None,
993 1008 {'clcount': len(outgoing.missing) })
994 1009
995 1010 def makestream(repo, outgoing, version, source, fastpath=False,
996 1011 bundlecaps=None, filematcher=None):
997 1012 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
998 1013 filematcher=filematcher)
999 1014
1000 1015 repo = repo.unfiltered()
1001 1016 commonrevs = outgoing.common
1002 1017 csets = outgoing.missing
1003 1018 heads = outgoing.missingheads
1004 1019 # We go through the fast path if we get told to, or if all (unfiltered
1005 1020 # heads have been requested (since we then know there all linkrevs will
1006 1021 # be pulled by the client).
1007 1022 heads.sort()
1008 1023 fastpathlinkrev = fastpath or (
1009 1024 repo.filtername is None and heads == sorted(repo.heads()))
1010 1025
1011 1026 repo.hook('preoutgoing', throw=True, source=source)
1012 1027 _changegroupinfo(repo, csets, source)
1013 1028 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1014 1029
1015 1030 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1016 1031 revisions = 0
1017 1032 files = 0
1018 1033 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1019 1034 total=expectedfiles)
1020 1035 for chunkdata in iter(source.filelogheader, {}):
1021 1036 files += 1
1022 1037 f = chunkdata["filename"]
1023 1038 repo.ui.debug("adding %s revisions\n" % f)
1024 1039 progress.increment()
1025 1040 fl = repo.file(f)
1026 1041 o = len(fl)
1027 1042 try:
1028 1043 deltas = source.deltaiter()
1029 1044 if not fl.addgroup(deltas, revmap, trp):
1030 1045 raise error.Abort(_("received file revlog group is empty"))
1031 1046 except error.CensoredBaseError as e:
1032 1047 raise error.Abort(_("received delta base is censored: %s") % e)
1033 1048 revisions += len(fl) - o
1034 1049 if f in needfiles:
1035 1050 needs = needfiles[f]
1036 1051 for new in pycompat.xrange(o, len(fl)):
1037 1052 n = fl.node(new)
1038 1053 if n in needs:
1039 1054 needs.remove(n)
1040 1055 else:
1041 1056 raise error.Abort(
1042 1057 _("received spurious file revlog entry"))
1043 1058 if not needs:
1044 1059 del needfiles[f]
1045 1060 progress.complete()
1046 1061
1047 1062 for f, needs in needfiles.iteritems():
1048 1063 fl = repo.file(f)
1049 1064 for n in needs:
1050 1065 try:
1051 1066 fl.rev(n)
1052 1067 except error.LookupError:
1053 1068 raise error.Abort(
1054 1069 _('missing file data for %s:%s - run hg verify') %
1055 1070 (f, hex(n)))
1056 1071
1057 1072 return revisions, files
1058 1073
1059 1074 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1060 1075 ellipsisroots, visitnodes, depth, source, version):
1061 1076 if version in ('01', '02'):
1062 1077 raise error.Abort(
1063 1078 'ellipsis nodes require at least cg3 on client and server, '
1064 1079 'but negotiated version %s' % version)
1065 1080 # We wrap cg1packer.revchunk, using a side channel to pass
1066 1081 # relevant_nodes into that area. Then if linknode isn't in the
1067 1082 # set, we know we have an ellipsis node and we should defer
1068 1083 # sending that node's data. We override close() to detect
1069 1084 # pending ellipsis nodes and flush them.
1070 1085 packer = getbundler(version, repo, filematcher=match)
1071 1086 # Give the packer the list of nodes which should not be
1072 1087 # ellipsis nodes. We store this rather than the set of nodes
1073 1088 # that should be an ellipsis because for very large histories
1074 1089 # we expect this to be significantly smaller.
1075 1090 packer.full_nodes = relevant_nodes
1076 1091 # Maps ellipsis revs to their roots at the changelog level.
1077 1092 packer.precomputed_ellipsis = ellipsisroots
1078 1093 # Maps CL revs to per-revlog revisions. Cleared in close() at
1079 1094 # the end of each group.
1080 1095 packer.clrev_to_localrev = {}
1081 1096 packer.next_clrev_to_localrev = {}
1082 1097 # Maps changelog nodes to changelog revs. Filled in once
1083 1098 # during changelog stage and then left unmodified.
1084 1099 packer.clnode_to_rev = {}
1085 1100 packer.changelog_done = False
1086 1101 # If true, informs the packer that it is serving shallow content and might
1087 1102 # need to pack file contents not introduced by the changes being packed.
1088 1103 packer.is_shallow = depth is not None
1089 1104
1090 1105 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now