##// END OF EJS Templates
changegroup: move ellipsisdata() from narrow...
Gregory Szorc -
r38919:ee1ea96c default
parent child Browse files
Show More
@@ -1,352 +1,335 b''
1 1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial import (
12 12 changegroup,
13 13 error,
14 14 extensions,
15 mdiff,
16 15 node,
17 16 pycompat,
18 revlog,
19 17 util,
20 18 )
21 19
22 20 def setup():
23 21 def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
24 22 source):
25 23 changedfiles = list(filter(self._filematcher, changedfiles))
26 24
27 25 if getattr(self, 'is_shallow', False):
28 26 # See comment in generate() for why this sadness is a thing.
29 27 mfdicts = self._mfdicts
30 28 del self._mfdicts
31 29 # In a shallow clone, the linknodes callback needs to also include
32 30 # those file nodes that are in the manifests we sent but weren't
33 31 # introduced by those manifests.
34 32 commonctxs = [self._repo[c] for c in commonrevs]
35 33 oldlinknodes = linknodes
36 34 clrev = self._repo.changelog.rev
37 35 def linknodes(flog, fname):
38 36 for c in commonctxs:
39 37 try:
40 38 fnode = c.filenode(fname)
41 39 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
42 40 except error.ManifestLookupError:
43 41 pass
44 42 links = oldlinknodes(flog, fname)
45 43 if len(links) != len(mfdicts):
46 44 for mf, lr in mfdicts:
47 45 fnode = mf.get(fname, None)
48 46 if fnode in links:
49 47 links[fnode] = min(links[fnode], lr, key=clrev)
50 48 elif fnode:
51 49 links[fnode] = lr
52 50 return links
53 51 return orig(self, changedfiles, linknodes, commonrevs, source)
54 52 extensions.wrapfunction(
55 53 changegroup.cg1packer, 'generatefiles', generatefiles)
56 54
57 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
58 n = revlog_.node(rev)
59 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
60 flags = revlog_.flags(rev)
61 flags |= revlog.REVIDX_ELLIPSIS
62 meta = packer.builddeltaheader(
63 n, p1n, p2n, node.nullid, linknode, flags)
64 # TODO: try and actually send deltas for ellipsis data blocks
65 diffheader = mdiff.trivialdiffheader(len(data))
66 l = len(meta) + len(diffheader) + len(data)
67 return ''.join((changegroup.chunkheader(l),
68 meta,
69 diffheader,
70 data))
71
72 55 def close(orig, self):
73 56 getattr(self, 'clrev_to_localrev', {}).clear()
74 57 if getattr(self, 'next_clrev_to_localrev', {}):
75 58 self.clrev_to_localrev = self.next_clrev_to_localrev
76 59 del self.next_clrev_to_localrev
77 60 self.changelog_done = True
78 61 return orig(self)
79 62 extensions.wrapfunction(changegroup.cg1packer, 'close', close)
80 63
81 64 # In a perfect world, we'd generate better ellipsis-ified graphs
82 65 # for non-changelog revlogs. In practice, we haven't started doing
83 66 # that yet, so the resulting DAGs for the manifestlog and filelogs
84 67 # are actually full of bogus parentage on all the ellipsis
85 68 # nodes. This has the side effect that, while the contents are
86 69 # correct, the individual DAGs might be completely out of whack in
87 70 # a case like 882681bc3166 and its ancestors (back about 10
88 71 # revisions or so) in the main hg repo.
89 72 #
90 73 # The one invariant we *know* holds is that the new (potentially
91 74 # bogus) DAG shape will be valid if we order the nodes in the
92 75 # order that they're introduced in dramatis personae by the
93 76 # changelog, so what we do is we sort the non-changelog histories
94 77 # by the order in which they are used by the changelog.
95 78 def _sortgroup(orig, self, revlog, nodelist, lookup):
96 79 if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
97 80 return orig(self, revlog, nodelist, lookup)
98 81 key = lambda n: self.clnode_to_rev[lookup(n)]
99 82 return [revlog.rev(n) for n in sorted(nodelist, key=key)]
100 83
101 84 extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
102 85
103 86 def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
104 87 '''yield a sequence of changegroup chunks (strings)'''
105 88 # Note: other than delegating to orig, the only deviation in
106 89 # logic from normal hg's generate is marked with BEGIN/END
107 90 # NARROW HACK.
108 91 if not util.safehasattr(self, 'full_nodes'):
109 92 # not sending a narrow bundle
110 93 for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
111 94 yield x
112 95 return
113 96
114 97 repo = self._repo
115 98 cl = repo.changelog
116 99 mfl = repo.manifestlog
117 100 mfrevlog = mfl._revlog
118 101
119 102 clrevorder = {}
120 103 mfs = {} # needed manifests
121 104 fnodes = {} # needed file nodes
122 105 changedfiles = set()
123 106
124 107 # Callback for the changelog, used to collect changed files and manifest
125 108 # nodes.
126 109 # Returns the linkrev node (identity in the changelog case).
127 110 def lookupcl(x):
128 111 c = cl.read(x)
129 112 clrevorder[x] = len(clrevorder)
130 113 # BEGIN NARROW HACK
131 114 #
132 115 # Only update mfs if x is going to be sent. Otherwise we
133 116 # end up with bogus linkrevs specified for manifests and
134 117 # we skip some manifest nodes that we should otherwise
135 118 # have sent.
136 119 if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
137 120 n = c[0]
138 121 # record the first changeset introducing this manifest version
139 122 mfs.setdefault(n, x)
140 123 # Set this narrow-specific dict so we have the lowest manifest
141 124 # revnum to look up for this cl revnum. (Part of mapping
142 125 # changelog ellipsis parents to manifest ellipsis parents)
143 126 self.next_clrev_to_localrev.setdefault(cl.rev(x),
144 127 mfrevlog.rev(n))
145 128 # We can't trust the changed files list in the changeset if the
146 129 # client requested a shallow clone.
147 130 if self.is_shallow:
148 131 changedfiles.update(mfl[c[0]].read().keys())
149 132 else:
150 133 changedfiles.update(c[3])
151 134 # END NARROW HACK
152 135 # Record a complete list of potentially-changed files in
153 136 # this manifest.
154 137 return x
155 138
156 139 self._verbosenote(_('uncompressed size of bundle content:\n'))
157 140 size = 0
158 141 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
159 142 size += len(chunk)
160 143 yield chunk
161 144 self._verbosenote(_('%8.i (changelog)\n') % size)
162 145
163 146 # We need to make sure that the linkrev in the changegroup refers to
164 147 # the first changeset that introduced the manifest or file revision.
165 148 # The fastpath is usually safer than the slowpath, because the filelogs
166 149 # are walked in revlog order.
167 150 #
168 151 # When taking the slowpath with reorder=None and the manifest revlog
169 152 # uses generaldelta, the manifest may be walked in the "wrong" order.
170 153 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
171 154 # cc0ff93d0c0c).
172 155 #
173 156 # When taking the fastpath, we are only vulnerable to reordering
174 157 # of the changelog itself. The changelog never uses generaldelta, so
175 158 # it is only reordered when reorder=True. To handle this case, we
176 159 # simply take the slowpath, which already has the 'clrevorder' logic.
177 160 # This was also fixed in cc0ff93d0c0c.
178 161 fastpathlinkrev = fastpathlinkrev and not self._reorder
179 162 # Treemanifests don't work correctly with fastpathlinkrev
180 163 # either, because we don't discover which directory nodes to
181 164 # send along with files. This could probably be fixed.
182 165 fastpathlinkrev = fastpathlinkrev and (
183 166 'treemanifest' not in repo.requirements)
184 167 # Shallow clones also don't work correctly with fastpathlinkrev
185 168 # because file nodes may need to be sent for a manifest even if they
186 169 # weren't introduced by that manifest.
187 170 fastpathlinkrev = fastpathlinkrev and not self.is_shallow
188 171
189 172 for chunk in self.generatemanifests(commonrevs, clrevorder,
190 173 fastpathlinkrev, mfs, fnodes, source):
191 174 yield chunk
192 175 # BEGIN NARROW HACK
193 176 mfdicts = None
194 177 if self.is_shallow:
195 178 mfdicts = [(self._repo.manifestlog[n].read(), lr)
196 179 for (n, lr) in mfs.iteritems()]
197 180 # END NARROW HACK
198 181 mfs.clear()
199 182 clrevs = set(cl.rev(x) for x in clnodes)
200 183
201 184 if not fastpathlinkrev:
202 185 def linknodes(unused, fname):
203 186 return fnodes.get(fname, {})
204 187 else:
205 188 cln = cl.node
206 189 def linknodes(filerevlog, fname):
207 190 llr = filerevlog.linkrev
208 191 fln = filerevlog.node
209 192 revs = ((r, llr(r)) for r in filerevlog)
210 193 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
211 194
212 195 # BEGIN NARROW HACK
213 196 #
214 197 # We need to pass the mfdicts variable down into
215 198 # generatefiles(), but more than one command might have
216 199 # wrapped generatefiles so we can't modify the function
217 200 # signature. Instead, we pass the data to ourselves using an
218 201 # instance attribute. I'm sorry.
219 202 self._mfdicts = mfdicts
220 203 # END NARROW HACK
221 204 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
222 205 source):
223 206 yield chunk
224 207
225 208 yield self.close()
226 209
227 210 if clnodes:
228 211 repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
229 212 extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
230 213
231 214 def revchunk(orig, self, revlog, rev, prev, linknode):
232 215 if not util.safehasattr(self, 'full_nodes'):
233 216 # not sending a narrow changegroup
234 217 for x in orig(self, revlog, rev, prev, linknode):
235 218 yield x
236 219 return
237 220 # build up some mapping information that's useful later. See
238 221 # the local() nested function below.
239 222 if not self.changelog_done:
240 223 self.clnode_to_rev[linknode] = rev
241 224 linkrev = rev
242 225 self.clrev_to_localrev[linkrev] = rev
243 226 else:
244 227 linkrev = self.clnode_to_rev[linknode]
245 228 self.clrev_to_localrev[linkrev] = rev
246 229 # This is a node to send in full, because the changeset it
247 230 # corresponds to was a full changeset.
248 231 if linknode in self.full_nodes:
249 232 for x in orig(self, revlog, rev, prev, linknode):
250 233 yield x
251 234 return
252 235 # At this point, a node can either be one we should skip or an
253 236 # ellipsis. If it's not an ellipsis, bail immediately.
254 237 if linkrev not in self.precomputed_ellipsis:
255 238 return
256 239 linkparents = self.precomputed_ellipsis[linkrev]
257 240 def local(clrev):
258 241 """Turn a changelog revnum into a local revnum.
259 242
260 243 The ellipsis dag is stored as revnums on the changelog,
261 244 but when we're producing ellipsis entries for
262 245 non-changelog revlogs, we need to turn those numbers into
263 246 something local. This does that for us, and during the
264 247 changelog sending phase will also expand the stored
265 248 mappings as needed.
266 249 """
267 250 if clrev == node.nullrev:
268 251 return node.nullrev
269 252 if not self.changelog_done:
270 253 # If we're doing the changelog, it's possible that we
271 254 # have a parent that is already on the client, and we
272 255 # need to store some extra mapping information so that
273 256 # our contained ellipsis nodes will be able to resolve
274 257 # their parents.
275 258 if clrev not in self.clrev_to_localrev:
276 259 clnode = revlog.node(clrev)
277 260 self.clnode_to_rev[clnode] = clrev
278 261 return clrev
279 262 # Walk the ellipsis-ized changelog breadth-first looking for a
280 263 # change that has been linked from the current revlog.
281 264 #
282 265 # For a flat manifest revlog only a single step should be necessary
283 266 # as all relevant changelog entries are relevant to the flat
284 267 # manifest.
285 268 #
286 269 # For a filelog or tree manifest dirlog however not every changelog
287 270 # entry will have been relevant, so we need to skip some changelog
288 271 # nodes even after ellipsis-izing.
289 272 walk = [clrev]
290 273 while walk:
291 274 p = walk[0]
292 275 walk = walk[1:]
293 276 if p in self.clrev_to_localrev:
294 277 return self.clrev_to_localrev[p]
295 278 elif p in self.full_nodes:
296 279 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
297 280 if pp != node.nullrev])
298 281 elif p in self.precomputed_ellipsis:
299 282 walk.extend([pp for pp in self.precomputed_ellipsis[p]
300 283 if pp != node.nullrev])
301 284 else:
302 285 # In this case, we've got an ellipsis with parents
303 286 # outside the current bundle (likely an
304 287 # incremental pull). We "know" that we can use the
305 288 # value of this same revlog at whatever revision
306 289 # is pointed to by linknode. "Know" is in scare
307 290 # quotes because I haven't done enough examination
308 291 # of edge cases to convince myself this is really
309 292 # a fact - it works for all the (admittedly
310 293 # thorough) cases in our testsuite, but I would be
311 294 # somewhat unsurprised to find a case in the wild
312 295 # where this breaks down a bit. That said, I don't
313 296 # know if it would hurt anything.
314 297 for i in pycompat.xrange(rev, 0, -1):
315 298 if revlog.linkrev(i) == clrev:
316 299 return i
317 300 # We failed to resolve a parent for this node, so
318 301 # we crash the changegroup construction.
319 302 raise error.Abort(
320 303 'unable to resolve parent while packing %r %r'
321 304 ' for changeset %r' % (revlog.indexfile, rev, clrev))
322 305 return node.nullrev
323 306
324 307 if not linkparents or (
325 308 revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
326 309 p1, p2 = node.nullrev, node.nullrev
327 310 elif len(linkparents) == 1:
328 311 p1, = sorted(local(p) for p in linkparents)
329 312 p2 = node.nullrev
330 313 else:
331 314 p1, p2 = sorted(local(p) for p in linkparents)
332 315 n = revlog.node(rev)
333 yield ellipsisdata(
316 yield changegroup.ellipsisdata(
334 317 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
335 318 extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
336 319
337 320 def deltaparent(orig, self, revlog, rev, p1, p2, prev):
338 321 if util.safehasattr(self, 'full_nodes'):
339 322 # TODO: send better deltas when in narrow mode.
340 323 #
341 324 # changegroup.group() loops over revisions to send,
342 325 # including revisions we'll skip. What this means is that
343 326 # `prev` will be a potentially useless delta base for all
344 327 # ellipsis nodes, as the client likely won't have it. In
345 328 # the future we should do bookkeeping about which nodes
346 329 # have been sent to the client, and try to be
347 330 # significantly smarter about delta bases. This is
348 331 # slightly tricky because this same code has to work for
349 332 # all revlogs, and we don't have the linkrev/linknode here.
350 333 return p1
351 334 return orig(self, revlog, rev, p1, p2, prev)
352 335 extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent)
@@ -1,1040 +1,1057 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 nullid,
17 18 nullrev,
18 19 short,
19 20 )
20 21
21 22 from . import (
22 23 dagutil,
23 24 error,
24 25 manifest,
25 26 match as matchmod,
26 27 mdiff,
27 28 phases,
28 29 pycompat,
29 30 repository,
31 revlog,
30 32 util,
31 33 )
32 34
33 35 from .utils import (
34 36 stringutil,
35 37 )
36 38
37 39 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
38 40 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
39 41 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
40 42
41 43 LFS_REQUIREMENT = 'lfs'
42 44
43 45 readexactly = util.readexactly
44 46
45 47 def getchunk(stream):
46 48 """return the next chunk from stream as a string"""
47 49 d = readexactly(stream, 4)
48 50 l = struct.unpack(">l", d)[0]
49 51 if l <= 4:
50 52 if l:
51 53 raise error.Abort(_("invalid chunk length %d") % l)
52 54 return ""
53 55 return readexactly(stream, l - 4)
54 56
55 57 def chunkheader(length):
56 58 """return a changegroup chunk header (string)"""
57 59 return struct.pack(">l", length + 4)
58 60
59 61 def closechunk():
60 62 """return a changegroup chunk header (string) for a zero-length chunk"""
61 63 return struct.pack(">l", 0)
62 64
63 65 def writechunks(ui, chunks, filename, vfs=None):
64 66 """Write chunks to a file and return its filename.
65 67
66 68 The stream is assumed to be a bundle file.
67 69 Existing files will not be overwritten.
68 70 If no filename is specified, a temporary file is created.
69 71 """
70 72 fh = None
71 73 cleanup = None
72 74 try:
73 75 if filename:
74 76 if vfs:
75 77 fh = vfs.open(filename, "wb")
76 78 else:
77 79 # Increase default buffer size because default is usually
78 80 # small (4k is common on Linux).
79 81 fh = open(filename, "wb", 131072)
80 82 else:
81 83 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
82 84 fh = os.fdopen(fd, r"wb")
83 85 cleanup = filename
84 86 for c in chunks:
85 87 fh.write(c)
86 88 cleanup = None
87 89 return filename
88 90 finally:
89 91 if fh is not None:
90 92 fh.close()
91 93 if cleanup is not None:
92 94 if filename and vfs:
93 95 vfs.unlink(cleanup)
94 96 else:
95 97 os.unlink(cleanup)
96 98
97 99 class cg1unpacker(object):
98 100 """Unpacker for cg1 changegroup streams.
99 101
100 102 A changegroup unpacker handles the framing of the revision data in
101 103 the wire format. Most consumers will want to use the apply()
102 104 method to add the changes from the changegroup to a repository.
103 105
104 106 If you're forwarding a changegroup unmodified to another consumer,
105 107 use getchunks(), which returns an iterator of changegroup
106 108 chunks. This is mostly useful for cases where you need to know the
107 109 data stream has ended by observing the end of the changegroup.
108 110
109 111 deltachunk() is useful only if you're applying delta data. Most
110 112 consumers should prefer apply() instead.
111 113
112 114 A few other public methods exist. Those are used only for
113 115 bundlerepo and some debug commands - their use is discouraged.
114 116 """
115 117 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
116 118 deltaheadersize = struct.calcsize(deltaheader)
117 119 version = '01'
118 120 _grouplistcount = 1 # One list of files after the manifests
119 121
120 122 def __init__(self, fh, alg, extras=None):
121 123 if alg is None:
122 124 alg = 'UN'
123 125 if alg not in util.compengines.supportedbundletypes:
124 126 raise error.Abort(_('unknown stream compression type: %s')
125 127 % alg)
126 128 if alg == 'BZ':
127 129 alg = '_truncatedBZ'
128 130
129 131 compengine = util.compengines.forbundletype(alg)
130 132 self._stream = compengine.decompressorreader(fh)
131 133 self._type = alg
132 134 self.extras = extras or {}
133 135 self.callback = None
134 136
135 137 # These methods (compressed, read, seek, tell) all appear to only
136 138 # be used by bundlerepo, but it's a little hard to tell.
137 139 def compressed(self):
138 140 return self._type is not None and self._type != 'UN'
139 141 def read(self, l):
140 142 return self._stream.read(l)
141 143 def seek(self, pos):
142 144 return self._stream.seek(pos)
143 145 def tell(self):
144 146 return self._stream.tell()
145 147 def close(self):
146 148 return self._stream.close()
147 149
148 150 def _chunklength(self):
149 151 d = readexactly(self._stream, 4)
150 152 l = struct.unpack(">l", d)[0]
151 153 if l <= 4:
152 154 if l:
153 155 raise error.Abort(_("invalid chunk length %d") % l)
154 156 return 0
155 157 if self.callback:
156 158 self.callback()
157 159 return l - 4
158 160
159 161 def changelogheader(self):
160 162 """v10 does not have a changelog header chunk"""
161 163 return {}
162 164
163 165 def manifestheader(self):
164 166 """v10 does not have a manifest header chunk"""
165 167 return {}
166 168
167 169 def filelogheader(self):
168 170 """return the header of the filelogs chunk, v10 only has the filename"""
169 171 l = self._chunklength()
170 172 if not l:
171 173 return {}
172 174 fname = readexactly(self._stream, l)
173 175 return {'filename': fname}
174 176
175 177 def _deltaheader(self, headertuple, prevnode):
176 178 node, p1, p2, cs = headertuple
177 179 if prevnode is None:
178 180 deltabase = p1
179 181 else:
180 182 deltabase = prevnode
181 183 flags = 0
182 184 return node, p1, p2, deltabase, cs, flags
183 185
184 186 def deltachunk(self, prevnode):
185 187 l = self._chunklength()
186 188 if not l:
187 189 return {}
188 190 headerdata = readexactly(self._stream, self.deltaheadersize)
189 191 header = struct.unpack(self.deltaheader, headerdata)
190 192 delta = readexactly(self._stream, l - self.deltaheadersize)
191 193 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
192 194 return (node, p1, p2, cs, deltabase, delta, flags)
193 195
194 196 def getchunks(self):
195 197 """returns all the chunks contains in the bundle
196 198
197 199 Used when you need to forward the binary stream to a file or another
198 200 network API. To do so, it parse the changegroup data, otherwise it will
199 201 block in case of sshrepo because it don't know the end of the stream.
200 202 """
201 203 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
202 204 # and a list of filelogs. For changegroup 3, we expect 4 parts:
203 205 # changelog, manifestlog, a list of tree manifestlogs, and a list of
204 206 # filelogs.
205 207 #
206 208 # Changelog and manifestlog parts are terminated with empty chunks. The
207 209 # tree and file parts are a list of entry sections. Each entry section
208 210 # is a series of chunks terminating in an empty chunk. The list of these
209 211 # entry sections is terminated in yet another empty chunk, so we know
210 212 # we've reached the end of the tree/file list when we reach an empty
211 213 # chunk that was proceeded by no non-empty chunks.
212 214
213 215 parts = 0
214 216 while parts < 2 + self._grouplistcount:
215 217 noentries = True
216 218 while True:
217 219 chunk = getchunk(self)
218 220 if not chunk:
219 221 # The first two empty chunks represent the end of the
220 222 # changelog and the manifestlog portions. The remaining
221 223 # empty chunks represent either A) the end of individual
222 224 # tree or file entries in the file list, or B) the end of
223 225 # the entire list. It's the end of the entire list if there
224 226 # were no entries (i.e. noentries is True).
225 227 if parts < 2:
226 228 parts += 1
227 229 elif noentries:
228 230 parts += 1
229 231 break
230 232 noentries = False
231 233 yield chunkheader(len(chunk))
232 234 pos = 0
233 235 while pos < len(chunk):
234 236 next = pos + 2**20
235 237 yield chunk[pos:next]
236 238 pos = next
237 239 yield closechunk()
238 240
239 241 def _unpackmanifests(self, repo, revmap, trp, prog):
240 242 self.callback = prog.increment
241 243 # no need to check for empty manifest group here:
242 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
243 245 # no new manifest will be created and the manifest group will
244 246 # be empty during the pull
245 247 self.manifestheader()
246 248 deltas = self.deltaiter()
247 249 repo.manifestlog.addgroup(deltas, revmap, trp)
248 250 prog.complete()
249 251 self.callback = None
250 252
251 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
252 254 expectedtotal=None):
253 255 """Add the changegroup returned by source.read() to this repo.
254 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
255 257 the URL of the repo where this changegroup is coming from.
256 258
257 259 Return an integer summarizing the change to this repo:
258 260 - nothing changed or no source: 0
259 261 - more heads than before: 1+added heads (2..n)
260 262 - fewer heads than before: -1-removed heads (-2..-n)
261 263 - number of heads stays the same: 1
262 264 """
263 265 repo = repo.unfiltered()
264 266 def csmap(x):
265 267 repo.ui.debug("add changeset %s\n" % short(x))
266 268 return len(cl)
267 269
268 270 def revmap(x):
269 271 return cl.rev(x)
270 272
271 273 changesets = files = revisions = 0
272 274
273 275 try:
274 276 # The transaction may already carry source information. In this
275 277 # case we use the top level data. We overwrite the argument
276 278 # because we need to use the top level value (if they exist)
277 279 # in this function.
278 280 srctype = tr.hookargs.setdefault('source', srctype)
279 281 url = tr.hookargs.setdefault('url', url)
280 282 repo.hook('prechangegroup',
281 283 throw=True, **pycompat.strkwargs(tr.hookargs))
282 284
283 285 # write changelog data to temp files so concurrent readers
284 286 # will not see an inconsistent view
285 287 cl = repo.changelog
286 288 cl.delayupdate(tr)
287 289 oldheads = set(cl.heads())
288 290
289 291 trp = weakref.proxy(tr)
290 292 # pull off the changeset group
291 293 repo.ui.status(_("adding changesets\n"))
292 294 clstart = len(cl)
293 295 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
294 296 total=expectedtotal)
295 297 self.callback = progress.increment
296 298
297 299 efiles = set()
298 300 def onchangelog(cl, node):
299 301 efiles.update(cl.readfiles(node))
300 302
301 303 self.changelogheader()
302 304 deltas = self.deltaiter()
303 305 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
304 306 efiles = len(efiles)
305 307
306 308 if not cgnodes:
307 309 repo.ui.develwarn('applied empty changegroup',
308 310 config='warn-empty-changegroup')
309 311 clend = len(cl)
310 312 changesets = clend - clstart
311 313 progress.complete()
312 314 self.callback = None
313 315
314 316 # pull off the manifest group
315 317 repo.ui.status(_("adding manifests\n"))
316 318 # We know that we'll never have more manifests than we had
317 319 # changesets.
318 320 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
319 321 total=changesets)
320 322 self._unpackmanifests(repo, revmap, trp, progress)
321 323
322 324 needfiles = {}
323 325 if repo.ui.configbool('server', 'validate'):
324 326 cl = repo.changelog
325 327 ml = repo.manifestlog
326 328 # validate incoming csets have their manifests
327 329 for cset in pycompat.xrange(clstart, clend):
328 330 mfnode = cl.changelogrevision(cset).manifest
329 331 mfest = ml[mfnode].readdelta()
330 332 # store file cgnodes we must see
331 333 for f, n in mfest.iteritems():
332 334 needfiles.setdefault(f, set()).add(n)
333 335
334 336 # process the files
335 337 repo.ui.status(_("adding file changes\n"))
336 338 newrevs, newfiles = _addchangegroupfiles(
337 339 repo, self, revmap, trp, efiles, needfiles)
338 340 revisions += newrevs
339 341 files += newfiles
340 342
341 343 deltaheads = 0
342 344 if oldheads:
343 345 heads = cl.heads()
344 346 deltaheads = len(heads) - len(oldheads)
345 347 for h in heads:
346 348 if h not in oldheads and repo[h].closesbranch():
347 349 deltaheads -= 1
348 350 htext = ""
349 351 if deltaheads:
350 352 htext = _(" (%+d heads)") % deltaheads
351 353
352 354 repo.ui.status(_("added %d changesets"
353 355 " with %d changes to %d files%s\n")
354 356 % (changesets, revisions, files, htext))
355 357 repo.invalidatevolatilesets()
356 358
357 359 if changesets > 0:
358 360 if 'node' not in tr.hookargs:
359 361 tr.hookargs['node'] = hex(cl.node(clstart))
360 362 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
361 363 hookargs = dict(tr.hookargs)
362 364 else:
363 365 hookargs = dict(tr.hookargs)
364 366 hookargs['node'] = hex(cl.node(clstart))
365 367 hookargs['node_last'] = hex(cl.node(clend - 1))
366 368 repo.hook('pretxnchangegroup',
367 369 throw=True, **pycompat.strkwargs(hookargs))
368 370
369 371 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
370 372 phaseall = None
371 373 if srctype in ('push', 'serve'):
372 374 # Old servers can not push the boundary themselves.
373 375 # New servers won't push the boundary if changeset already
374 376 # exists locally as secret
375 377 #
376 378 # We should not use added here but the list of all change in
377 379 # the bundle
378 380 if repo.publishing():
379 381 targetphase = phaseall = phases.public
380 382 else:
381 383 # closer target phase computation
382 384
383 385 # Those changesets have been pushed from the
384 386 # outside, their phases are going to be pushed
385 387 # alongside. Therefor `targetphase` is
386 388 # ignored.
387 389 targetphase = phaseall = phases.draft
388 390 if added:
389 391 phases.registernew(repo, tr, targetphase, added)
390 392 if phaseall is not None:
391 393 phases.advanceboundary(repo, tr, phaseall, cgnodes)
392 394
393 395 if changesets > 0:
394 396
395 397 def runhooks():
396 398 # These hooks run when the lock releases, not when the
397 399 # transaction closes. So it's possible for the changelog
398 400 # to have changed since we last saw it.
399 401 if clstart >= len(repo):
400 402 return
401 403
402 404 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
403 405
404 406 for n in added:
405 407 args = hookargs.copy()
406 408 args['node'] = hex(n)
407 409 del args['node_last']
408 410 repo.hook("incoming", **pycompat.strkwargs(args))
409 411
410 412 newheads = [h for h in repo.heads()
411 413 if h not in oldheads]
412 414 repo.ui.log("incoming",
413 415 "%d incoming changes - new heads: %s\n",
414 416 len(added),
415 417 ', '.join([hex(c[:6]) for c in newheads]))
416 418
417 419 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
418 420 lambda tr: repo._afterlock(runhooks))
419 421 finally:
420 422 repo.ui.flush()
421 423 # never return 0 here:
422 424 if deltaheads < 0:
423 425 ret = deltaheads - 1
424 426 else:
425 427 ret = deltaheads + 1
426 428 return ret
427 429
428 430 def deltaiter(self):
429 431 """
430 432 returns an iterator of the deltas in this changegroup
431 433
432 434 Useful for passing to the underlying storage system to be stored.
433 435 """
434 436 chain = None
435 437 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
436 438 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
437 439 yield chunkdata
438 440 chain = chunkdata[0]
439 441
440 442 class cg2unpacker(cg1unpacker):
441 443 """Unpacker for cg2 streams.
442 444
443 445 cg2 streams add support for generaldelta, so the delta header
444 446 format is slightly different. All other features about the data
445 447 remain the same.
446 448 """
447 449 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
448 450 deltaheadersize = struct.calcsize(deltaheader)
449 451 version = '02'
450 452
451 453 def _deltaheader(self, headertuple, prevnode):
452 454 node, p1, p2, deltabase, cs = headertuple
453 455 flags = 0
454 456 return node, p1, p2, deltabase, cs, flags
455 457
456 458 class cg3unpacker(cg2unpacker):
457 459 """Unpacker for cg3 streams.
458 460
459 461 cg3 streams add support for exchanging treemanifests and revlog
460 462 flags. It adds the revlog flags to the delta header and an empty chunk
461 463 separating manifests and files.
462 464 """
463 465 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
464 466 deltaheadersize = struct.calcsize(deltaheader)
465 467 version = '03'
466 468 _grouplistcount = 2 # One list of manifests and one list of files
467 469
468 470 def _deltaheader(self, headertuple, prevnode):
469 471 node, p1, p2, deltabase, cs, flags = headertuple
470 472 return node, p1, p2, deltabase, cs, flags
471 473
472 474 def _unpackmanifests(self, repo, revmap, trp, prog):
473 475 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
474 476 for chunkdata in iter(self.filelogheader, {}):
475 477 # If we get here, there are directory manifests in the changegroup
476 478 d = chunkdata["filename"]
477 479 repo.ui.debug("adding %s revisions\n" % d)
478 480 dirlog = repo.manifestlog._revlog.dirlog(d)
479 481 deltas = self.deltaiter()
480 482 if not dirlog.addgroup(deltas, revmap, trp):
481 483 raise error.Abort(_("received dir revlog group is empty"))
482 484
483 485 class headerlessfixup(object):
484 486 def __init__(self, fh, h):
485 487 self._h = h
486 488 self._fh = fh
487 489 def read(self, n):
488 490 if self._h:
489 491 d, self._h = self._h[:n], self._h[n:]
490 492 if len(d) < n:
491 493 d += readexactly(self._fh, n - len(d))
492 494 return d
493 495 return readexactly(self._fh, n)
494 496
497 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
498 n = revlog_.node(rev)
499 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
500 flags = revlog_.flags(rev)
501 flags |= revlog.REVIDX_ELLIPSIS
502 meta = packer.builddeltaheader(
503 n, p1n, p2n, nullid, linknode, flags)
504 # TODO: try and actually send deltas for ellipsis data blocks
505 diffheader = mdiff.trivialdiffheader(len(data))
506 l = len(meta) + len(diffheader) + len(data)
507 return ''.join((chunkheader(l),
508 meta,
509 diffheader,
510 data))
511
495 512 class cg1packer(object):
496 513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
497 514 version = '01'
498 515 def __init__(self, repo, filematcher, bundlecaps=None):
499 516 """Given a source repo, construct a bundler.
500 517
501 518 filematcher is a matcher that matches on files to include in the
502 519 changegroup. Used to facilitate sparse changegroups.
503 520
504 521 bundlecaps is optional and can be used to specify the set of
505 522 capabilities which can be used to build the bundle. While bundlecaps is
506 523 unused in core Mercurial, extensions rely on this feature to communicate
507 524 capabilities to customize the changegroup packer.
508 525 """
509 526 assert filematcher
510 527 self._filematcher = filematcher
511 528
512 529 # Set of capabilities we can use to build the bundle.
513 530 if bundlecaps is None:
514 531 bundlecaps = set()
515 532 self._bundlecaps = bundlecaps
516 533 # experimental config: bundle.reorder
517 534 reorder = repo.ui.config('bundle', 'reorder')
518 535 if reorder == 'auto':
519 536 reorder = None
520 537 else:
521 538 reorder = stringutil.parsebool(reorder)
522 539 self._repo = repo
523 540 self._reorder = reorder
524 541 if self._repo.ui.verbose and not self._repo.ui.debugflag:
525 542 self._verbosenote = self._repo.ui.note
526 543 else:
527 544 self._verbosenote = lambda s: None
528 545
529 546 def close(self):
530 547 return closechunk()
531 548
532 549 def fileheader(self, fname):
533 550 return chunkheader(len(fname)) + fname
534 551
535 552 # Extracted both for clarity and for overriding in extensions.
536 553 def _sortgroup(self, revlog, nodelist, lookup):
537 554 """Sort nodes for change group and turn them into revnums."""
538 555 # for generaldelta revlogs, we linearize the revs; this will both be
539 556 # much quicker and generate a much smaller bundle
540 557 if (revlog._generaldelta and self._reorder is None) or self._reorder:
541 558 dag = dagutil.revlogdag(revlog)
542 559 return dag.linearize(set(revlog.rev(n) for n in nodelist))
543 560 else:
544 561 return sorted([revlog.rev(n) for n in nodelist])
545 562
546 563 def group(self, nodelist, revlog, lookup, units=None):
547 564 """Calculate a delta group, yielding a sequence of changegroup chunks
548 565 (strings).
549 566
550 567 Given a list of changeset revs, return a set of deltas and
551 568 metadata corresponding to nodes. The first delta is
552 569 first parent(nodelist[0]) -> nodelist[0], the receiver is
553 570 guaranteed to have this parent as it has all history before
554 571 these changesets. In the case firstparent is nullrev the
555 572 changegroup starts with a full revision.
556 573
557 574 If units is not None, progress detail will be generated, units specifies
558 575 the type of revlog that is touched (changelog, manifest, etc.).
559 576 """
560 577 # if we don't have any revisions touched by these changesets, bail
561 578 if len(nodelist) == 0:
562 579 yield self.close()
563 580 return
564 581
565 582 revs = self._sortgroup(revlog, nodelist, lookup)
566 583
567 584 # add the parent of the first rev
568 585 p = revlog.parentrevs(revs[0])[0]
569 586 revs.insert(0, p)
570 587
571 588 # build deltas
572 589 progress = None
573 590 if units is not None:
574 591 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
575 592 total=(len(revs) - 1))
576 593 for r in pycompat.xrange(len(revs) - 1):
577 594 if progress:
578 595 progress.update(r + 1)
579 596 prev, curr = revs[r], revs[r + 1]
580 597 linknode = lookup(revlog.node(curr))
581 598 for c in self.revchunk(revlog, curr, prev, linknode):
582 599 yield c
583 600
584 601 if progress:
585 602 progress.complete()
586 603 yield self.close()
587 604
588 605 # filter any nodes that claim to be part of the known set
589 606 def prune(self, revlog, missing, commonrevs):
590 607 # TODO this violates storage abstraction for manifests.
591 608 if isinstance(revlog, manifest.manifestrevlog):
592 609 if not self._filematcher.visitdir(revlog._dir[:-1] or '.'):
593 610 return []
594 611
595 612 rr, rl = revlog.rev, revlog.linkrev
596 613 return [n for n in missing if rl(rr(n)) not in commonrevs]
597 614
598 615 def _packmanifests(self, dir, mfnodes, lookuplinknode):
599 616 """Pack flat manifests into a changegroup stream."""
600 617 assert not dir
601 618 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
602 619 lookuplinknode, units=_('manifests')):
603 620 yield chunk
604 621
605 622 def _manifestsdone(self):
606 623 return ''
607 624
608 625 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
609 626 '''yield a sequence of changegroup chunks (strings)'''
610 627 repo = self._repo
611 628 cl = repo.changelog
612 629
613 630 clrevorder = {}
614 631 mfs = {} # needed manifests
615 632 fnodes = {} # needed file nodes
616 633 changedfiles = set()
617 634
618 635 # Callback for the changelog, used to collect changed files and manifest
619 636 # nodes.
620 637 # Returns the linkrev node (identity in the changelog case).
621 638 def lookupcl(x):
622 639 c = cl.read(x)
623 640 clrevorder[x] = len(clrevorder)
624 641 n = c[0]
625 642 # record the first changeset introducing this manifest version
626 643 mfs.setdefault(n, x)
627 644 # Record a complete list of potentially-changed files in
628 645 # this manifest.
629 646 changedfiles.update(c[3])
630 647 return x
631 648
632 649 self._verbosenote(_('uncompressed size of bundle content:\n'))
633 650 size = 0
634 651 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
635 652 size += len(chunk)
636 653 yield chunk
637 654 self._verbosenote(_('%8.i (changelog)\n') % size)
638 655
639 656 # We need to make sure that the linkrev in the changegroup refers to
640 657 # the first changeset that introduced the manifest or file revision.
641 658 # The fastpath is usually safer than the slowpath, because the filelogs
642 659 # are walked in revlog order.
643 660 #
644 661 # When taking the slowpath with reorder=None and the manifest revlog
645 662 # uses generaldelta, the manifest may be walked in the "wrong" order.
646 663 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
647 664 # cc0ff93d0c0c).
648 665 #
649 666 # When taking the fastpath, we are only vulnerable to reordering
650 667 # of the changelog itself. The changelog never uses generaldelta, so
651 668 # it is only reordered when reorder=True. To handle this case, we
652 669 # simply take the slowpath, which already has the 'clrevorder' logic.
653 670 # This was also fixed in cc0ff93d0c0c.
654 671 fastpathlinkrev = fastpathlinkrev and not self._reorder
655 672 # Treemanifests don't work correctly with fastpathlinkrev
656 673 # either, because we don't discover which directory nodes to
657 674 # send along with files. This could probably be fixed.
658 675 fastpathlinkrev = fastpathlinkrev and (
659 676 'treemanifest' not in repo.requirements)
660 677
661 678 for chunk in self.generatemanifests(commonrevs, clrevorder,
662 679 fastpathlinkrev, mfs, fnodes, source):
663 680 yield chunk
664 681 mfs.clear()
665 682 clrevs = set(cl.rev(x) for x in clnodes)
666 683
667 684 if not fastpathlinkrev:
668 685 def linknodes(unused, fname):
669 686 return fnodes.get(fname, {})
670 687 else:
671 688 cln = cl.node
672 689 def linknodes(filerevlog, fname):
673 690 llr = filerevlog.linkrev
674 691 fln = filerevlog.node
675 692 revs = ((r, llr(r)) for r in filerevlog)
676 693 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
677 694
678 695 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
679 696 source):
680 697 yield chunk
681 698
682 699 yield self.close()
683 700
684 701 if clnodes:
685 702 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
686 703
687 704 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
688 705 fnodes, source):
689 706 """Returns an iterator of changegroup chunks containing manifests.
690 707
691 708 `source` is unused here, but is used by extensions like remotefilelog to
692 709 change what is sent based in pulls vs pushes, etc.
693 710 """
694 711 repo = self._repo
695 712 mfl = repo.manifestlog
696 713 dirlog = mfl._revlog.dirlog
697 714 tmfnodes = {'': mfs}
698 715
699 716 # Callback for the manifest, used to collect linkrevs for filelog
700 717 # revisions.
701 718 # Returns the linkrev node (collected in lookupcl).
702 719 def makelookupmflinknode(dir, nodes):
703 720 if fastpathlinkrev:
704 721 assert not dir
705 722 return mfs.__getitem__
706 723
707 724 def lookupmflinknode(x):
708 725 """Callback for looking up the linknode for manifests.
709 726
710 727 Returns the linkrev node for the specified manifest.
711 728
712 729 SIDE EFFECT:
713 730
714 731 1) fclnodes gets populated with the list of relevant
715 732 file nodes if we're not using fastpathlinkrev
716 733 2) When treemanifests are in use, collects treemanifest nodes
717 734 to send
718 735
719 736 Note that this means manifests must be completely sent to
720 737 the client before you can trust the list of files and
721 738 treemanifests to send.
722 739 """
723 740 clnode = nodes[x]
724 741 mdata = mfl.get(dir, x).readfast(shallow=True)
725 742 for p, n, fl in mdata.iterentries():
726 743 if fl == 't': # subdirectory manifest
727 744 subdir = dir + p + '/'
728 745 tmfclnodes = tmfnodes.setdefault(subdir, {})
729 746 tmfclnode = tmfclnodes.setdefault(n, clnode)
730 747 if clrevorder[clnode] < clrevorder[tmfclnode]:
731 748 tmfclnodes[n] = clnode
732 749 else:
733 750 f = dir + p
734 751 fclnodes = fnodes.setdefault(f, {})
735 752 fclnode = fclnodes.setdefault(n, clnode)
736 753 if clrevorder[clnode] < clrevorder[fclnode]:
737 754 fclnodes[n] = clnode
738 755 return clnode
739 756 return lookupmflinknode
740 757
741 758 size = 0
742 759 while tmfnodes:
743 760 dir, nodes = tmfnodes.popitem()
744 761 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
745 762 if not dir or prunednodes:
746 763 for x in self._packmanifests(dir, prunednodes,
747 764 makelookupmflinknode(dir, nodes)):
748 765 size += len(x)
749 766 yield x
750 767 self._verbosenote(_('%8.i (manifests)\n') % size)
751 768 yield self._manifestsdone()
752 769
753 770 # The 'source' parameter is useful for extensions
754 771 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
755 772 repo = self._repo
756 773 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
757 774 total=len(changedfiles))
758 775 for i, fname in enumerate(sorted(changedfiles)):
759 776 filerevlog = repo.file(fname)
760 777 if not filerevlog:
761 778 raise error.Abort(_("empty or missing file data for %s") %
762 779 fname)
763 780
764 781 linkrevnodes = linknodes(filerevlog, fname)
765 782 # Lookup for filenodes, we collected the linkrev nodes above in the
766 783 # fastpath case and with lookupmf in the slowpath case.
767 784 def lookupfilelog(x):
768 785 return linkrevnodes[x]
769 786
770 787 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
771 788 if filenodes:
772 789 progress.update(i + 1, item=fname)
773 790 h = self.fileheader(fname)
774 791 size = len(h)
775 792 yield h
776 793 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
777 794 size += len(chunk)
778 795 yield chunk
779 796 self._verbosenote(_('%8.i %s\n') % (size, fname))
780 797 progress.complete()
781 798
782 799 def deltaparent(self, revlog, rev, p1, p2, prev):
783 800 if not revlog.candelta(prev, rev):
784 801 raise error.ProgrammingError('cg1 should not be used in this case')
785 802 return prev
786 803
787 804 def revchunk(self, revlog, rev, prev, linknode):
788 805 node = revlog.node(rev)
789 806 p1, p2 = revlog.parentrevs(rev)
790 807 base = self.deltaparent(revlog, rev, p1, p2, prev)
791 808
792 809 prefix = ''
793 810 if revlog.iscensored(base) or revlog.iscensored(rev):
794 811 try:
795 812 delta = revlog.revision(node, raw=True)
796 813 except error.CensoredNodeError as e:
797 814 delta = e.tombstone
798 815 if base == nullrev:
799 816 prefix = mdiff.trivialdiffheader(len(delta))
800 817 else:
801 818 baselen = revlog.rawsize(base)
802 819 prefix = mdiff.replacediffheader(baselen, len(delta))
803 820 elif base == nullrev:
804 821 delta = revlog.revision(node, raw=True)
805 822 prefix = mdiff.trivialdiffheader(len(delta))
806 823 else:
807 824 delta = revlog.revdiff(base, rev)
808 825 p1n, p2n = revlog.parents(node)
809 826 basenode = revlog.node(base)
810 827 flags = revlog.flags(rev)
811 828 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
812 829 meta += prefix
813 830 l = len(meta) + len(delta)
814 831 yield chunkheader(l)
815 832 yield meta
816 833 yield delta
817 834 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
818 835 # do nothing with basenode, it is implicitly the previous one in HG10
819 836 # do nothing with flags, it is implicitly 0 for cg1 and cg2
820 837 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
821 838
822 839 class cg2packer(cg1packer):
823 840 version = '02'
824 841 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
825 842
826 843 def __init__(self, repo, filematcher, bundlecaps=None):
827 844 super(cg2packer, self).__init__(repo, filematcher,
828 845 bundlecaps=bundlecaps)
829 846
830 847 if self._reorder is None:
831 848 # Since generaldelta is directly supported by cg2, reordering
832 849 # generally doesn't help, so we disable it by default (treating
833 850 # bundle.reorder=auto just like bundle.reorder=False).
834 851 self._reorder = False
835 852
836 853 def deltaparent(self, revlog, rev, p1, p2, prev):
837 854 dp = revlog.deltaparent(rev)
838 855 if dp == nullrev and revlog.storedeltachains:
839 856 # Avoid sending full revisions when delta parent is null. Pick prev
840 857 # in that case. It's tempting to pick p1 in this case, as p1 will
841 858 # be smaller in the common case. However, computing a delta against
842 859 # p1 may require resolving the raw text of p1, which could be
843 860 # expensive. The revlog caches should have prev cached, meaning
844 861 # less CPU for changegroup generation. There is likely room to add
845 862 # a flag and/or config option to control this behavior.
846 863 base = prev
847 864 elif dp == nullrev:
848 865 # revlog is configured to use full snapshot for a reason,
849 866 # stick to full snapshot.
850 867 base = nullrev
851 868 elif dp not in (p1, p2, prev):
852 869 # Pick prev when we can't be sure remote has the base revision.
853 870 return prev
854 871 else:
855 872 base = dp
856 873 if base != nullrev and not revlog.candelta(base, rev):
857 874 base = nullrev
858 875 return base
859 876
860 877 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
861 878 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
862 879 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
863 880
864 881 class cg3packer(cg2packer):
865 882 version = '03'
866 883 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
867 884
868 885 def _packmanifests(self, dir, mfnodes, lookuplinknode):
869 886 if dir:
870 887 yield self.fileheader(dir)
871 888
872 889 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
873 890 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
874 891 units=_('manifests')):
875 892 yield chunk
876 893
877 894 def _manifestsdone(self):
878 895 return self.close()
879 896
880 897 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
881 898 return struct.pack(
882 899 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
883 900
884 901 _packermap = {'01': (cg1packer, cg1unpacker),
885 902 # cg2 adds support for exchanging generaldelta
886 903 '02': (cg2packer, cg2unpacker),
887 904 # cg3 adds support for exchanging revlog flags and treemanifests
888 905 '03': (cg3packer, cg3unpacker),
889 906 }
890 907
891 908 def allsupportedversions(repo):
892 909 versions = set(_packermap.keys())
893 910 if not (repo.ui.configbool('experimental', 'changegroup3') or
894 911 repo.ui.configbool('experimental', 'treemanifest') or
895 912 'treemanifest' in repo.requirements):
896 913 versions.discard('03')
897 914 return versions
898 915
899 916 # Changegroup versions that can be applied to the repo
900 917 def supportedincomingversions(repo):
901 918 return allsupportedversions(repo)
902 919
903 920 # Changegroup versions that can be created from the repo
904 921 def supportedoutgoingversions(repo):
905 922 versions = allsupportedversions(repo)
906 923 if 'treemanifest' in repo.requirements:
907 924 # Versions 01 and 02 support only flat manifests and it's just too
908 925 # expensive to convert between the flat manifest and tree manifest on
909 926 # the fly. Since tree manifests are hashed differently, all of history
910 927 # would have to be converted. Instead, we simply don't even pretend to
911 928 # support versions 01 and 02.
912 929 versions.discard('01')
913 930 versions.discard('02')
914 931 if repository.NARROW_REQUIREMENT in repo.requirements:
915 932 # Versions 01 and 02 don't support revlog flags, and we need to
916 933 # support that for stripping and unbundling to work.
917 934 versions.discard('01')
918 935 versions.discard('02')
919 936 if LFS_REQUIREMENT in repo.requirements:
920 937 # Versions 01 and 02 don't support revlog flags, and we need to
921 938 # mark LFS entries with REVIDX_EXTSTORED.
922 939 versions.discard('01')
923 940 versions.discard('02')
924 941
925 942 return versions
926 943
927 944 def localversion(repo):
928 945 # Finds the best version to use for bundles that are meant to be used
929 946 # locally, such as those from strip and shelve, and temporary bundles.
930 947 return max(supportedoutgoingversions(repo))
931 948
932 949 def safeversion(repo):
933 950 # Finds the smallest version that it's safe to assume clients of the repo
934 951 # will support. For example, all hg versions that support generaldelta also
935 952 # support changegroup 02.
936 953 versions = supportedoutgoingversions(repo)
937 954 if 'generaldelta' in repo.requirements:
938 955 versions.discard('01')
939 956 assert versions
940 957 return min(versions)
941 958
942 959 def getbundler(version, repo, bundlecaps=None, filematcher=None):
943 960 assert version in supportedoutgoingversions(repo)
944 961
945 962 if filematcher is None:
946 963 filematcher = matchmod.alwaysmatcher(repo.root, '')
947 964
948 965 if version == '01' and not filematcher.always():
949 966 raise error.ProgrammingError('version 01 changegroups do not support '
950 967 'sparse file matchers')
951 968
952 969 # Requested files could include files not in the local store. So
953 970 # filter those out.
954 971 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
955 972 filematcher)
956 973
957 974 return _packermap[version][0](repo, filematcher=filematcher,
958 975 bundlecaps=bundlecaps)
959 976
960 977 def getunbundler(version, fh, alg, extras=None):
961 978 return _packermap[version][1](fh, alg, extras=extras)
962 979
963 980 def _changegroupinfo(repo, nodes, source):
964 981 if repo.ui.verbose or source == 'bundle':
965 982 repo.ui.status(_("%d changesets found\n") % len(nodes))
966 983 if repo.ui.debugflag:
967 984 repo.ui.debug("list of changesets:\n")
968 985 for node in nodes:
969 986 repo.ui.debug("%s\n" % hex(node))
970 987
971 988 def makechangegroup(repo, outgoing, version, source, fastpath=False,
972 989 bundlecaps=None):
973 990 cgstream = makestream(repo, outgoing, version, source,
974 991 fastpath=fastpath, bundlecaps=bundlecaps)
975 992 return getunbundler(version, util.chunkbuffer(cgstream), None,
976 993 {'clcount': len(outgoing.missing) })
977 994
978 995 def makestream(repo, outgoing, version, source, fastpath=False,
979 996 bundlecaps=None, filematcher=None):
980 997 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
981 998 filematcher=filematcher)
982 999
983 1000 repo = repo.unfiltered()
984 1001 commonrevs = outgoing.common
985 1002 csets = outgoing.missing
986 1003 heads = outgoing.missingheads
987 1004 # We go through the fast path if we get told to, or if all (unfiltered
988 1005 # heads have been requested (since we then know there all linkrevs will
989 1006 # be pulled by the client).
990 1007 heads.sort()
991 1008 fastpathlinkrev = fastpath or (
992 1009 repo.filtername is None and heads == sorted(repo.heads()))
993 1010
994 1011 repo.hook('preoutgoing', throw=True, source=source)
995 1012 _changegroupinfo(repo, csets, source)
996 1013 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
997 1014
998 1015 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
999 1016 revisions = 0
1000 1017 files = 0
1001 1018 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1002 1019 total=expectedfiles)
1003 1020 for chunkdata in iter(source.filelogheader, {}):
1004 1021 files += 1
1005 1022 f = chunkdata["filename"]
1006 1023 repo.ui.debug("adding %s revisions\n" % f)
1007 1024 progress.increment()
1008 1025 fl = repo.file(f)
1009 1026 o = len(fl)
1010 1027 try:
1011 1028 deltas = source.deltaiter()
1012 1029 if not fl.addgroup(deltas, revmap, trp):
1013 1030 raise error.Abort(_("received file revlog group is empty"))
1014 1031 except error.CensoredBaseError as e:
1015 1032 raise error.Abort(_("received delta base is censored: %s") % e)
1016 1033 revisions += len(fl) - o
1017 1034 if f in needfiles:
1018 1035 needs = needfiles[f]
1019 1036 for new in pycompat.xrange(o, len(fl)):
1020 1037 n = fl.node(new)
1021 1038 if n in needs:
1022 1039 needs.remove(n)
1023 1040 else:
1024 1041 raise error.Abort(
1025 1042 _("received spurious file revlog entry"))
1026 1043 if not needs:
1027 1044 del needfiles[f]
1028 1045 progress.complete()
1029 1046
1030 1047 for f, needs in needfiles.iteritems():
1031 1048 fl = repo.file(f)
1032 1049 for n in needs:
1033 1050 try:
1034 1051 fl.rev(n)
1035 1052 except error.LookupError:
1036 1053 raise error.Abort(
1037 1054 _('missing file data for %s:%s - run hg verify') %
1038 1055 (f, hex(n)))
1039 1056
1040 1057 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now