##// END OF EJS Templates
narrow: pass node into revlog.revision()...
Gregory Szorc -
r37358:8cf97e0e default
parent child Browse files
Show More
@@ -1,372 +1,373 b''
1 1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial import (
12 12 changegroup,
13 13 error,
14 14 extensions,
15 15 manifest,
16 16 match as matchmod,
17 17 mdiff,
18 18 node,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 def setup():
24 24
25 25 def _cgmatcher(cgpacker):
26 26 localmatcher = cgpacker._repo.narrowmatch()
27 27 remotematcher = getattr(cgpacker, '_narrow_matcher', lambda: None)()
28 28 if remotematcher:
29 29 return matchmod.intersectmatchers(localmatcher, remotematcher)
30 30 else:
31 31 return localmatcher
32 32
33 33 def prune(orig, self, revlog, missing, commonrevs):
34 34 if isinstance(revlog, manifest.manifestrevlog):
35 35 matcher = _cgmatcher(self)
36 36 if (matcher and
37 37 not matcher.visitdir(revlog._dir[:-1] or '.')):
38 38 return []
39 39 return orig(self, revlog, missing, commonrevs)
40 40
41 41 extensions.wrapfunction(changegroup.cg1packer, 'prune', prune)
42 42
43 43 def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
44 44 source):
45 45 matcher = _cgmatcher(self)
46 46 if matcher:
47 47 changedfiles = list(filter(matcher, changedfiles))
48 48 if getattr(self, 'is_shallow', False):
49 49 # See comment in generate() for why this sadness is a thing.
50 50 mfdicts = self._mfdicts
51 51 del self._mfdicts
52 52 # In a shallow clone, the linknodes callback needs to also include
53 53 # those file nodes that are in the manifests we sent but weren't
54 54 # introduced by those manifests.
55 55 commonctxs = [self._repo[c] for c in commonrevs]
56 56 oldlinknodes = linknodes
57 57 clrev = self._repo.changelog.rev
58 58 def linknodes(flog, fname):
59 59 for c in commonctxs:
60 60 try:
61 61 fnode = c.filenode(fname)
62 62 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
63 63 except error.ManifestLookupError:
64 64 pass
65 65 links = oldlinknodes(flog, fname)
66 66 if len(links) != len(mfdicts):
67 67 for mf, lr in mfdicts:
68 68 fnode = mf.get(fname, None)
69 69 if fnode in links:
70 70 links[fnode] = min(links[fnode], lr, key=clrev)
71 71 elif fnode:
72 72 links[fnode] = lr
73 73 return links
74 74 return orig(self, changedfiles, linknodes, commonrevs, source)
75 75 extensions.wrapfunction(
76 76 changegroup.cg1packer, 'generatefiles', generatefiles)
77 77
78 78 def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
79 79 n = revlog_.node(rev)
80 80 p1n, p2n = revlog_.node(p1), revlog_.node(p2)
81 81 flags = revlog_.flags(rev)
82 82 flags |= revlog.REVIDX_ELLIPSIS
83 83 meta = packer.builddeltaheader(
84 84 n, p1n, p2n, node.nullid, linknode, flags)
85 85 # TODO: try and actually send deltas for ellipsis data blocks
86 86 diffheader = mdiff.trivialdiffheader(len(data))
87 87 l = len(meta) + len(diffheader) + len(data)
88 88 return ''.join((changegroup.chunkheader(l),
89 89 meta,
90 90 diffheader,
91 91 data))
92 92
93 93 def close(orig, self):
94 94 getattr(self, 'clrev_to_localrev', {}).clear()
95 95 if getattr(self, 'next_clrev_to_localrev', {}):
96 96 self.clrev_to_localrev = self.next_clrev_to_localrev
97 97 del self.next_clrev_to_localrev
98 98 self.changelog_done = True
99 99 return orig(self)
100 100 extensions.wrapfunction(changegroup.cg1packer, 'close', close)
101 101
102 102 # In a perfect world, we'd generate better ellipsis-ified graphs
103 103 # for non-changelog revlogs. In practice, we haven't started doing
104 104 # that yet, so the resulting DAGs for the manifestlog and filelogs
105 105 # are actually full of bogus parentage on all the ellipsis
106 106 # nodes. This has the side effect that, while the contents are
107 107 # correct, the individual DAGs might be completely out of whack in
108 108 # a case like 882681bc3166 and its ancestors (back about 10
109 109 # revisions or so) in the main hg repo.
110 110 #
111 111 # The one invariant we *know* holds is that the new (potentially
112 112 # bogus) DAG shape will be valid if we order the nodes in the
113 113 # order that they're introduced in dramatis personae by the
114 114 # changelog, so what we do is we sort the non-changelog histories
115 115 # by the order in which they are used by the changelog.
116 116 def _sortgroup(orig, self, revlog, nodelist, lookup):
117 117 if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
118 118 return orig(self, revlog, nodelist, lookup)
119 119 key = lambda n: self.clnode_to_rev[lookup(n)]
120 120 return [revlog.rev(n) for n in sorted(nodelist, key=key)]
121 121
122 122 extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
123 123
124 124 def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
125 125 '''yield a sequence of changegroup chunks (strings)'''
126 126 # Note: other than delegating to orig, the only deviation in
127 127 # logic from normal hg's generate is marked with BEGIN/END
128 128 # NARROW HACK.
129 129 if not util.safehasattr(self, 'full_nodes'):
130 130 # not sending a narrow bundle
131 131 for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
132 132 yield x
133 133 return
134 134
135 135 repo = self._repo
136 136 cl = repo.changelog
137 137 mfl = repo.manifestlog
138 138 mfrevlog = mfl._revlog
139 139
140 140 clrevorder = {}
141 141 mfs = {} # needed manifests
142 142 fnodes = {} # needed file nodes
143 143 changedfiles = set()
144 144
145 145 # Callback for the changelog, used to collect changed files and manifest
146 146 # nodes.
147 147 # Returns the linkrev node (identity in the changelog case).
148 148 def lookupcl(x):
149 149 c = cl.read(x)
150 150 clrevorder[x] = len(clrevorder)
151 151 # BEGIN NARROW HACK
152 152 #
153 153 # Only update mfs if x is going to be sent. Otherwise we
154 154 # end up with bogus linkrevs specified for manifests and
155 155 # we skip some manifest nodes that we should otherwise
156 156 # have sent.
157 157 if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
158 158 n = c[0]
159 159 # record the first changeset introducing this manifest version
160 160 mfs.setdefault(n, x)
161 161 # Set this narrow-specific dict so we have the lowest manifest
162 162 # revnum to look up for this cl revnum. (Part of mapping
163 163 # changelog ellipsis parents to manifest ellipsis parents)
164 164 self.next_clrev_to_localrev.setdefault(cl.rev(x),
165 165 mfrevlog.rev(n))
166 166 # We can't trust the changed files list in the changeset if the
167 167 # client requested a shallow clone.
168 168 if self.is_shallow:
169 169 changedfiles.update(mfl[c[0]].read().keys())
170 170 else:
171 171 changedfiles.update(c[3])
172 172 # END NARROW HACK
173 173 # Record a complete list of potentially-changed files in
174 174 # this manifest.
175 175 return x
176 176
177 177 self._verbosenote(_('uncompressed size of bundle content:\n'))
178 178 size = 0
179 179 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
180 180 size += len(chunk)
181 181 yield chunk
182 182 self._verbosenote(_('%8.i (changelog)\n') % size)
183 183
184 184 # We need to make sure that the linkrev in the changegroup refers to
185 185 # the first changeset that introduced the manifest or file revision.
186 186 # The fastpath is usually safer than the slowpath, because the filelogs
187 187 # are walked in revlog order.
188 188 #
189 189 # When taking the slowpath with reorder=None and the manifest revlog
190 190 # uses generaldelta, the manifest may be walked in the "wrong" order.
191 191 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
192 192 # cc0ff93d0c0c).
193 193 #
194 194 # When taking the fastpath, we are only vulnerable to reordering
195 195 # of the changelog itself. The changelog never uses generaldelta, so
196 196 # it is only reordered when reorder=True. To handle this case, we
197 197 # simply take the slowpath, which already has the 'clrevorder' logic.
198 198 # This was also fixed in cc0ff93d0c0c.
199 199 fastpathlinkrev = fastpathlinkrev and not self._reorder
200 200 # Treemanifests don't work correctly with fastpathlinkrev
201 201 # either, because we don't discover which directory nodes to
202 202 # send along with files. This could probably be fixed.
203 203 fastpathlinkrev = fastpathlinkrev and (
204 204 'treemanifest' not in repo.requirements)
205 205 # Shallow clones also don't work correctly with fastpathlinkrev
206 206 # because file nodes may need to be sent for a manifest even if they
207 207 # weren't introduced by that manifest.
208 208 fastpathlinkrev = fastpathlinkrev and not self.is_shallow
209 209
210 210 for chunk in self.generatemanifests(commonrevs, clrevorder,
211 211 fastpathlinkrev, mfs, fnodes, source):
212 212 yield chunk
213 213 # BEGIN NARROW HACK
214 214 mfdicts = None
215 215 if self.is_shallow:
216 216 mfdicts = [(self._repo.manifestlog[n].read(), lr)
217 217 for (n, lr) in mfs.iteritems()]
218 218 # END NARROW HACK
219 219 mfs.clear()
220 220 clrevs = set(cl.rev(x) for x in clnodes)
221 221
222 222 if not fastpathlinkrev:
223 223 def linknodes(unused, fname):
224 224 return fnodes.get(fname, {})
225 225 else:
226 226 cln = cl.node
227 227 def linknodes(filerevlog, fname):
228 228 llr = filerevlog.linkrev
229 229 fln = filerevlog.node
230 230 revs = ((r, llr(r)) for r in filerevlog)
231 231 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
232 232
233 233 # BEGIN NARROW HACK
234 234 #
235 235 # We need to pass the mfdicts variable down into
236 236 # generatefiles(), but more than one command might have
237 237 # wrapped generatefiles so we can't modify the function
238 238 # signature. Instead, we pass the data to ourselves using an
239 239 # instance attribute. I'm sorry.
240 240 self._mfdicts = mfdicts
241 241 # END NARROW HACK
242 242 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
243 243 source):
244 244 yield chunk
245 245
246 246 yield self.close()
247 247
248 248 if clnodes:
249 249 repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
250 250 extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
251 251
252 252 def revchunk(orig, self, revlog, rev, prev, linknode):
253 253 if not util.safehasattr(self, 'full_nodes'):
254 254 # not sending a narrow changegroup
255 255 for x in orig(self, revlog, rev, prev, linknode):
256 256 yield x
257 257 return
258 258 # build up some mapping information that's useful later. See
259 259 # the local() nested function below.
260 260 if not self.changelog_done:
261 261 self.clnode_to_rev[linknode] = rev
262 262 linkrev = rev
263 263 self.clrev_to_localrev[linkrev] = rev
264 264 else:
265 265 linkrev = self.clnode_to_rev[linknode]
266 266 self.clrev_to_localrev[linkrev] = rev
267 267 # This is a node to send in full, because the changeset it
268 268 # corresponds to was a full changeset.
269 269 if linknode in self.full_nodes:
270 270 for x in orig(self, revlog, rev, prev, linknode):
271 271 yield x
272 272 return
273 273 # At this point, a node can either be one we should skip or an
274 274 # ellipsis. If it's not an ellipsis, bail immediately.
275 275 if linkrev not in self.precomputed_ellipsis:
276 276 return
277 277 linkparents = self.precomputed_ellipsis[linkrev]
278 278 def local(clrev):
279 279 """Turn a changelog revnum into a local revnum.
280 280
281 281 The ellipsis dag is stored as revnums on the changelog,
282 282 but when we're producing ellipsis entries for
283 283 non-changelog revlogs, we need to turn those numbers into
284 284 something local. This does that for us, and during the
285 285 changelog sending phase will also expand the stored
286 286 mappings as needed.
287 287 """
288 288 if clrev == node.nullrev:
289 289 return node.nullrev
290 290 if not self.changelog_done:
291 291 # If we're doing the changelog, it's possible that we
292 292 # have a parent that is already on the client, and we
293 293 # need to store some extra mapping information so that
294 294 # our contained ellipsis nodes will be able to resolve
295 295 # their parents.
296 296 if clrev not in self.clrev_to_localrev:
297 297 clnode = revlog.node(clrev)
298 298 self.clnode_to_rev[clnode] = clrev
299 299 return clrev
300 300 # Walk the ellipsis-ized changelog breadth-first looking for a
301 301 # change that has been linked from the current revlog.
302 302 #
303 303 # For a flat manifest revlog only a single step should be necessary
304 304 # as all relevant changelog entries are relevant to the flat
305 305 # manifest.
306 306 #
307 307 # For a filelog or tree manifest dirlog however not every changelog
308 308 # entry will have been relevant, so we need to skip some changelog
309 309 # nodes even after ellipsis-izing.
310 310 walk = [clrev]
311 311 while walk:
312 312 p = walk[0]
313 313 walk = walk[1:]
314 314 if p in self.clrev_to_localrev:
315 315 return self.clrev_to_localrev[p]
316 316 elif p in self.full_nodes:
317 317 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
318 318 if pp != node.nullrev])
319 319 elif p in self.precomputed_ellipsis:
320 320 walk.extend([pp for pp in self.precomputed_ellipsis[p]
321 321 if pp != node.nullrev])
322 322 else:
323 323 # In this case, we've got an ellipsis with parents
324 324 # outside the current bundle (likely an
325 325 # incremental pull). We "know" that we can use the
326 326 # value of this same revlog at whatever revision
327 327 # is pointed to by linknode. "Know" is in scare
328 328 # quotes because I haven't done enough examination
329 329 # of edge cases to convince myself this is really
330 330 # a fact - it works for all the (admittedly
331 331 # thorough) cases in our testsuite, but I would be
332 332 # somewhat unsurprised to find a case in the wild
333 333 # where this breaks down a bit. That said, I don't
334 334 # know if it would hurt anything.
335 335 for i in xrange(rev, 0, -1):
336 336 if revlog.linkrev(i) == clrev:
337 337 return i
338 338 # We failed to resolve a parent for this node, so
339 339 # we crash the changegroup construction.
340 340 raise error.Abort(
341 341 'unable to resolve parent while packing %r %r'
342 342 ' for changeset %r' % (revlog.indexfile, rev, clrev))
343 343 return node.nullrev
344 344
345 345 if not linkparents or (
346 346 revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
347 347 p1, p2 = node.nullrev, node.nullrev
348 348 elif len(linkparents) == 1:
349 349 p1, = sorted(local(p) for p in linkparents)
350 350 p2 = node.nullrev
351 351 else:
352 352 p1, p2 = sorted(local(p) for p in linkparents)
353 n = revlog.node(rev)
353 354 yield ellipsisdata(
354 self, rev, revlog, p1, p2, revlog.revision(rev), linknode)
355 self, rev, revlog, p1, p2, revlog.revision(n), linknode)
355 356 extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
356 357
357 358 def deltaparent(orig, self, revlog, rev, p1, p2, prev):
358 359 if util.safehasattr(self, 'full_nodes'):
359 360 # TODO: send better deltas when in narrow mode.
360 361 #
361 362 # changegroup.group() loops over revisions to send,
362 363 # including revisions we'll skip. What this means is that
363 364 # `prev` will be a potentially useless delta base for all
364 365 # ellipsis nodes, as the client likely won't have it. In
365 366 # the future we should do bookkeeping about which nodes
366 367 # have been sent to the client, and try to be
367 368 # significantly smarter about delta bases. This is
368 369 # slightly tricky because this same code has to work for
369 370 # all revlogs, and we don't have the linkrev/linknode here.
370 371 return p1
371 372 return orig(self, revlog, rev, p1, p2, prev)
372 373 extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent)
General Comments 0
You need to be logged in to leave comments. Login now