##// END OF EJS Templates
narrow: restrict manifest iteration by using manifest.walk(matcher)...
Martin von Zweigbergk -
r36227:2ad527f7 default
parent child Browse files
Show More
@@ -1,495 +1,495
1 1 # narrowbundle2.py - bundle2 extensions for narrow repository support
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import errno
12 12 import struct
13 13
14 14 from mercurial.i18n import _
15 15 from mercurial.node import (
16 16 bin,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from mercurial import (
21 21 bundle2,
22 22 changegroup,
23 23 dagutil,
24 24 error,
25 25 exchange,
26 26 extensions,
27 27 narrowspec,
28 28 repair,
29 29 util,
30 30 wireproto,
31 31 )
32 32
33 33 from . import (
34 34 narrowrepo,
35 35 )
36 36
37 37 NARROWCAP = 'narrow'
38 38 _NARROWACL_SECTION = 'narrowhgacl'
39 39 _CHANGESPECPART = NARROWCAP + ':changespec'
40 40 _SPECPART = NARROWCAP + ':spec'
41 41 _SPECPART_INCLUDE = 'include'
42 42 _SPECPART_EXCLUDE = 'exclude'
43 43 _KILLNODESIGNAL = 'KILL'
44 44 _DONESIGNAL = 'DONE'
45 45 _ELIDEDCSHEADER = '>20s20s20sl' # cset id, p1, p2, len(text)
46 46 _ELIDEDMFHEADER = '>20s20s20s20sl' # manifest id, p1, p2, link id, len(text)
47 47 _CSHEADERSIZE = struct.calcsize(_ELIDEDCSHEADER)
48 48 _MFHEADERSIZE = struct.calcsize(_ELIDEDMFHEADER)
49 49
50 50 # When advertising capabilities, always include narrow clone support.
51 51 def getrepocaps_narrow(orig, repo, **kwargs):
52 52 caps = orig(repo, **kwargs)
53 53 caps[NARROWCAP] = ['v0']
54 54 return caps
55 55
56 56 def _computeellipsis(repo, common, heads, known, match, depth=None):
57 57 """Compute the shape of a narrowed DAG.
58 58
59 59 Args:
60 60 repo: The repository we're transferring.
61 61 common: The roots of the DAG range we're transferring.
62 62 May be just [nullid], which means all ancestors of heads.
63 63 heads: The heads of the DAG range we're transferring.
64 64 match: The narrowmatcher that allows us to identify relevant changes.
65 65 depth: If not None, only consider nodes to be full nodes if they are at
66 66 most depth changesets away from one of heads.
67 67
68 68 Returns:
69 69 A tuple of (visitnodes, relevant_nodes, ellipsisroots) where:
70 70
71 71 visitnodes: The list of nodes (either full or ellipsis) which
72 72 need to be sent to the client.
73 73 relevant_nodes: The set of changelog nodes which change a file inside
74 74 the narrowspec. The client needs these as non-ellipsis nodes.
75 75 ellipsisroots: A dict of {rev: parents} that is used in
76 76 narrowchangegroup to produce ellipsis nodes with the
77 77 correct parents.
78 78 """
79 79 cl = repo.changelog
80 80 mfl = repo.manifestlog
81 81
82 82 cldag = dagutil.revlogdag(cl)
83 83 # dagutil does not like nullid/nullrev
84 84 commonrevs = cldag.internalizeall(common - set([nullid])) | set([nullrev])
85 85 headsrevs = cldag.internalizeall(heads)
86 86 if depth:
87 87 revdepth = {h: 0 for h in headsrevs}
88 88
89 89 ellipsisheads = collections.defaultdict(set)
90 90 ellipsisroots = collections.defaultdict(set)
91 91
92 92 def addroot(head, curchange):
93 93 """Add a root to an ellipsis head, splitting heads with 3 roots."""
94 94 ellipsisroots[head].add(curchange)
95 95 # Recursively split ellipsis heads with 3 roots by finding the
96 96 # roots' youngest common descendant which is an elided merge commit.
97 97 # That descendant takes 2 of the 3 roots as its own, and becomes a
98 98 # root of the head.
99 99 while len(ellipsisroots[head]) > 2:
100 100 child, roots = splithead(head)
101 101 splitroots(head, child, roots)
102 102 head = child # Recurse in case we just added a 3rd root
103 103
104 104 def splitroots(head, child, roots):
105 105 ellipsisroots[head].difference_update(roots)
106 106 ellipsisroots[head].add(child)
107 107 ellipsisroots[child].update(roots)
108 108 ellipsisroots[child].discard(child)
109 109
110 110 def splithead(head):
111 111 r1, r2, r3 = sorted(ellipsisroots[head])
112 112 for nr1, nr2 in ((r2, r3), (r1, r3), (r1, r2)):
113 113 mid = repo.revs('sort(merge() & %d::%d & %d::%d, -rev)',
114 114 nr1, head, nr2, head)
115 115 for j in mid:
116 116 if j == nr2:
117 117 return nr2, (nr1, nr2)
118 118 if j not in ellipsisroots or len(ellipsisroots[j]) < 2:
119 119 return j, (nr1, nr2)
120 120 raise error.Abort('Failed to split up ellipsis node! head: %d, '
121 121 'roots: %d %d %d' % (head, r1, r2, r3))
122 122
123 123 missing = list(cl.findmissingrevs(common=commonrevs, heads=headsrevs))
124 124 visit = reversed(missing)
125 125 relevant_nodes = set()
126 126 visitnodes = map(cl.node, missing)
127 127 required = set(headsrevs) | known
128 128 for rev in visit:
129 129 clrev = cl.changelogrevision(rev)
130 130 ps = cldag.parents(rev)
131 131 if depth is not None:
132 132 curdepth = revdepth[rev]
133 133 for p in ps:
134 134 revdepth[p] = min(curdepth + 1, revdepth.get(p, depth + 1))
135 135 needed = False
136 136 shallow_enough = depth is None or revdepth[rev] <= depth
137 137 if shallow_enough:
138 138 curmf = mfl[clrev.manifest].read()
139 139 if ps:
140 140 # We choose to not trust the changed files list in
141 141 # changesets because it's not always correct. TODO: could
142 142 # we trust it for the non-merge case?
143 143 p1mf = mfl[cl.changelogrevision(ps[0]).manifest].read()
144 144 needed = bool(curmf.diff(p1mf, match))
145 145 if not needed and len(ps) > 1:
146 146 # For merge changes, the list of changed files is not
147 147 # helpful, since we need to emit the merge if a file
148 148 # in the narrow spec has changed on either side of the
149 149 # merge. As a result, we do a manifest diff to check.
150 150 p2mf = mfl[cl.changelogrevision(ps[1]).manifest].read()
151 151 needed = bool(curmf.diff(p2mf, match))
152 152 else:
153 153 # For a root node, we need to include the node if any
154 154 # files in the node match the narrowspec.
155 needed = any(match(f) for f in curmf)
155 needed = any(curmf.walk(match))
156 156
157 157 if needed:
158 158 for head in ellipsisheads[rev]:
159 159 addroot(head, rev)
160 160 for p in ps:
161 161 required.add(p)
162 162 relevant_nodes.add(cl.node(rev))
163 163 else:
164 164 if not ps:
165 165 ps = [nullrev]
166 166 if rev in required:
167 167 for head in ellipsisheads[rev]:
168 168 addroot(head, rev)
169 169 for p in ps:
170 170 ellipsisheads[p].add(rev)
171 171 else:
172 172 for p in ps:
173 173 ellipsisheads[p] |= ellipsisheads[rev]
174 174
175 175 # add common changesets as roots of their reachable ellipsis heads
176 176 for c in commonrevs:
177 177 for head in ellipsisheads[c]:
178 178 addroot(head, c)
179 179 return visitnodes, relevant_nodes, ellipsisroots
180 180
181 181 def _packellipsischangegroup(repo, common, match, relevant_nodes,
182 182 ellipsisroots, visitnodes, depth, source, version):
183 183 if version in ('01', '02'):
184 184 raise error.Abort(
185 185 'ellipsis nodes require at least cg3 on client and server, '
186 186 'but negotiated version %s' % version)
187 187 # We wrap cg1packer.revchunk, using a side channel to pass
188 188 # relevant_nodes into that area. Then if linknode isn't in the
189 189 # set, we know we have an ellipsis node and we should defer
190 190 # sending that node's data. We override close() to detect
191 191 # pending ellipsis nodes and flush them.
192 192 packer = changegroup.getbundler(version, repo)
193 193 # Let the packer have access to the narrow matcher so it can
194 194 # omit filelogs and dirlogs as needed
195 195 packer._narrow_matcher = lambda : match
196 196 # Give the packer the list of nodes which should not be
197 197 # ellipsis nodes. We store this rather than the set of nodes
198 198 # that should be an ellipsis because for very large histories
199 199 # we expect this to be significantly smaller.
200 200 packer.full_nodes = relevant_nodes
201 201 # Maps ellipsis revs to their roots at the changelog level.
202 202 packer.precomputed_ellipsis = ellipsisroots
203 203 # Maps CL revs to per-revlog revisions. Cleared in close() at
204 204 # the end of each group.
205 205 packer.clrev_to_localrev = {}
206 206 packer.next_clrev_to_localrev = {}
207 207 # Maps changelog nodes to changelog revs. Filled in once
208 208 # during changelog stage and then left unmodified.
209 209 packer.clnode_to_rev = {}
210 210 packer.changelog_done = False
211 211 # If true, informs the packer that it is serving shallow content and might
212 212 # need to pack file contents not introduced by the changes being packed.
213 213 packer.is_shallow = depth is not None
214 214
215 215 return packer.generate(common, visitnodes, False, source)
216 216
217 217 # Serve a changegroup for a client with a narrow clone.
218 218 def getbundlechangegrouppart_narrow(bundler, repo, source,
219 219 bundlecaps=None, b2caps=None, heads=None,
220 220 common=None, **kwargs):
221 221 cgversions = b2caps.get('changegroup')
222 222 getcgkwargs = {}
223 223 if cgversions: # 3.1 and 3.2 ship with an empty value
224 224 cgversions = [v for v in cgversions
225 225 if v in changegroup.supportedoutgoingversions(repo)]
226 226 if not cgversions:
227 227 raise ValueError(_('no common changegroup version'))
228 228 version = getcgkwargs['version'] = max(cgversions)
229 229 else:
230 230 raise ValueError(_("server does not advertise changegroup version,"
231 231 " can't negotiate support for ellipsis nodes"))
232 232
233 233 include = sorted(filter(bool, kwargs.get('includepats', [])))
234 234 exclude = sorted(filter(bool, kwargs.get('excludepats', [])))
235 235 newmatch = narrowspec.match(repo.root, include=include, exclude=exclude)
236 236 if not repo.ui.configbool("experimental", "narrowservebrokenellipses"):
237 237 outgoing = exchange._computeoutgoing(repo, heads, common)
238 238 if not outgoing.missing:
239 239 return
240 240 def wrappedgetbundler(orig, *args, **kwargs):
241 241 bundler = orig(*args, **kwargs)
242 242 bundler._narrow_matcher = lambda : newmatch
243 243 return bundler
244 244 with extensions.wrappedfunction(changegroup, 'getbundler',
245 245 wrappedgetbundler):
246 246 cg = changegroup.makestream(repo, outgoing, version, source)
247 247 part = bundler.newpart('changegroup', data=cg)
248 248 part.addparam('version', version)
249 249 if 'treemanifest' in repo.requirements:
250 250 part.addparam('treemanifest', '1')
251 251
252 252 if include or exclude:
253 253 narrowspecpart = bundler.newpart(_SPECPART)
254 254 if include:
255 255 narrowspecpart.addparam(
256 256 _SPECPART_INCLUDE, '\n'.join(include), mandatory=True)
257 257 if exclude:
258 258 narrowspecpart.addparam(
259 259 _SPECPART_EXCLUDE, '\n'.join(exclude), mandatory=True)
260 260
261 261 return
262 262
263 263 depth = kwargs.get('depth', None)
264 264 if depth is not None:
265 265 depth = int(depth)
266 266 if depth < 1:
267 267 raise error.Abort(_('depth must be positive, got %d') % depth)
268 268
269 269 heads = set(heads or repo.heads())
270 270 common = set(common or [nullid])
271 271 oldinclude = sorted(filter(bool, kwargs.get('oldincludepats', [])))
272 272 oldexclude = sorted(filter(bool, kwargs.get('oldexcludepats', [])))
273 273 known = {bin(n) for n in kwargs.get('known', [])}
274 274 if known and (oldinclude != include or oldexclude != exclude):
275 275 # Steps:
276 276 # 1. Send kill for "$known & ::common"
277 277 #
278 278 # 2. Send changegroup for ::common
279 279 #
280 280 # 3. Proceed.
281 281 #
282 282 # In the future, we can send kills for only the specific
283 283 # nodes we know should go away or change shape, and then
284 284 # send a data stream that tells the client something like this:
285 285 #
286 286 # a) apply this changegroup
287 287 # b) apply nodes XXX, YYY, ZZZ that you already have
288 288 # c) goto a
289 289 #
290 290 # until they've built up the full new state.
291 291 # Convert to revnums and intersect with "common". The client should
292 292 # have made it a subset of "common" already, but let's be safe.
293 293 known = set(repo.revs("%ln & ::%ln", known, common))
294 294 # TODO: we could send only roots() of this set, and the
295 295 # list of nodes in common, and the client could work out
296 296 # what to strip, instead of us explicitly sending every
297 297 # single node.
298 298 deadrevs = known
299 299 def genkills():
300 300 for r in deadrevs:
301 301 yield _KILLNODESIGNAL
302 302 yield repo.changelog.node(r)
303 303 yield _DONESIGNAL
304 304 bundler.newpart(_CHANGESPECPART, data=genkills())
305 305 newvisit, newfull, newellipsis = _computeellipsis(
306 306 repo, set(), common, known, newmatch)
307 307 if newvisit:
308 308 cg = _packellipsischangegroup(
309 309 repo, common, newmatch, newfull, newellipsis,
310 310 newvisit, depth, source, version)
311 311 part = bundler.newpart('changegroup', data=cg)
312 312 part.addparam('version', version)
313 313 if 'treemanifest' in repo.requirements:
314 314 part.addparam('treemanifest', '1')
315 315
316 316 visitnodes, relevant_nodes, ellipsisroots = _computeellipsis(
317 317 repo, common, heads, set(), newmatch, depth=depth)
318 318
319 319 repo.ui.debug('Found %d relevant revs\n' % len(relevant_nodes))
320 320 if visitnodes:
321 321 cg = _packellipsischangegroup(
322 322 repo, common, newmatch, relevant_nodes, ellipsisroots,
323 323 visitnodes, depth, source, version)
324 324 part = bundler.newpart('changegroup', data=cg)
325 325 part.addparam('version', version)
326 326 if 'treemanifest' in repo.requirements:
327 327 part.addparam('treemanifest', '1')
328 328
329 329 def applyacl_narrow(repo, kwargs):
330 330 ui = repo.ui
331 331 username = ui.shortuser(ui.environ.get('REMOTE_USER') or ui.username())
332 332 user_includes = ui.configlist(
333 333 _NARROWACL_SECTION, username + '.includes',
334 334 ui.configlist(_NARROWACL_SECTION, 'default.includes'))
335 335 user_excludes = ui.configlist(
336 336 _NARROWACL_SECTION, username + '.excludes',
337 337 ui.configlist(_NARROWACL_SECTION, 'default.excludes'))
338 338 if not user_includes:
339 339 raise error.Abort(_("{} configuration for user {} is empty")
340 340 .format(_NARROWACL_SECTION, username))
341 341
342 342 user_includes = [
343 343 'path:.' if p == '*' else 'path:' + p for p in user_includes]
344 344 user_excludes = [
345 345 'path:.' if p == '*' else 'path:' + p for p in user_excludes]
346 346
347 347 req_includes = set(kwargs.get('includepats', []))
348 348 req_excludes = set(kwargs.get('excludepats', []))
349 349
350 350 req_includes, req_excludes, invalid_includes = narrowspec.restrictpatterns(
351 351 req_includes, req_excludes, user_includes, user_excludes)
352 352
353 353 if invalid_includes:
354 354 raise error.Abort(
355 355 _("The following includes are not accessible for {}: {}")
356 356 .format(username, invalid_includes))
357 357
358 358 new_args = {}
359 359 new_args.update(kwargs)
360 360 new_args['includepats'] = req_includes
361 361 if req_excludes:
362 362 new_args['excludepats'] = req_excludes
363 363 return new_args
364 364
365 365 @bundle2.parthandler(_SPECPART, (_SPECPART_INCLUDE, _SPECPART_EXCLUDE))
366 366 def _handlechangespec_2(op, inpart):
367 367 includepats = set(inpart.params.get(_SPECPART_INCLUDE, '').splitlines())
368 368 excludepats = set(inpart.params.get(_SPECPART_EXCLUDE, '').splitlines())
369 369 narrowspec.save(op.repo, includepats, excludepats)
370 370 if not narrowrepo.REQUIREMENT in op.repo.requirements:
371 371 op.repo.requirements.add(narrowrepo.REQUIREMENT)
372 372 op.repo._writerequirements()
373 373 op.repo.invalidate(clearfilecache=True)
374 374
375 375 @bundle2.parthandler(_CHANGESPECPART)
376 376 def _handlechangespec(op, inpart):
377 377 repo = op.repo
378 378 cl = repo.changelog
379 379
380 380 # changesets which need to be stripped entirely. either they're no longer
381 381 # needed in the new narrow spec, or the server is sending a replacement
382 382 # in the changegroup part.
383 383 clkills = set()
384 384
385 385 # A changespec part contains all the updates to ellipsis nodes
386 386 # that will happen as a result of widening or narrowing a
387 387 # repo. All the changes that this block encounters are ellipsis
388 388 # nodes or flags to kill an existing ellipsis.
389 389 chunksignal = changegroup.readexactly(inpart, 4)
390 390 while chunksignal != _DONESIGNAL:
391 391 if chunksignal == _KILLNODESIGNAL:
392 392 # a node used to be an ellipsis but isn't anymore
393 393 ck = changegroup.readexactly(inpart, 20)
394 394 if cl.hasnode(ck):
395 395 clkills.add(ck)
396 396 else:
397 397 raise error.Abort(
398 398 _('unexpected changespec node chunk type: %s') % chunksignal)
399 399 chunksignal = changegroup.readexactly(inpart, 4)
400 400
401 401 if clkills:
402 402 # preserve bookmarks that repair.strip() would otherwise strip
403 403 bmstore = repo._bookmarks
404 404 class dummybmstore(dict):
405 405 def applychanges(self, repo, tr, changes):
406 406 pass
407 407 def recordchange(self, tr): # legacy version
408 408 pass
409 409 repo._bookmarks = dummybmstore()
410 410 chgrpfile = repair.strip(op.ui, repo, list(clkills), backup=True,
411 411 topic='widen')
412 412 repo._bookmarks = bmstore
413 413 if chgrpfile:
414 414 # presence of _widen_bundle attribute activates widen handler later
415 415 op._widen_bundle = chgrpfile
416 416 # Set the new narrowspec if we're widening. The setnewnarrowpats() method
417 417 # will currently always be there when using the core+narrowhg server, but
418 418 # other servers may include a changespec part even when not widening (e.g.
419 419 # because we're deepening a shallow repo).
420 420 if util.safehasattr(repo, 'setnewnarrowpats'):
421 421 repo.setnewnarrowpats()
422 422
423 423 def handlechangegroup_widen(op, inpart):
424 424 """Changegroup exchange handler which restores temporarily-stripped nodes"""
425 425 # We saved a bundle with stripped node data we must now restore.
426 426 # This approach is based on mercurial/repair.py@6ee26a53c111.
427 427 repo = op.repo
428 428 ui = op.ui
429 429
430 430 chgrpfile = op._widen_bundle
431 431 del op._widen_bundle
432 432 vfs = repo.vfs
433 433
434 434 ui.note(_("adding branch\n"))
435 435 f = vfs.open(chgrpfile, "rb")
436 436 try:
437 437 gen = exchange.readbundle(ui, f, chgrpfile, vfs)
438 438 if not ui.verbose:
439 439 # silence internal shuffling chatter
440 440 ui.pushbuffer()
441 441 if isinstance(gen, bundle2.unbundle20):
442 442 with repo.transaction('strip') as tr:
443 443 bundle2.processbundle(repo, gen, lambda: tr)
444 444 else:
445 445 gen.apply(repo, 'strip', 'bundle:' + vfs.join(chgrpfile), True)
446 446 if not ui.verbose:
447 447 ui.popbuffer()
448 448 finally:
449 449 f.close()
450 450
451 451 # remove undo files
452 452 for undovfs, undofile in repo.undofiles():
453 453 try:
454 454 undovfs.unlink(undofile)
455 455 except OSError as e:
456 456 if e.errno != errno.ENOENT:
457 457 ui.warn(_('error removing %s: %s\n') %
458 458 (undovfs.join(undofile), str(e)))
459 459
460 460 # Remove partial backup only if there were no exceptions
461 461 vfs.unlink(chgrpfile)
462 462
463 463 def setup():
464 464 """Enable narrow repo support in bundle2-related extension points."""
465 465 extensions.wrapfunction(bundle2, 'getrepocaps', getrepocaps_narrow)
466 466
467 467 wireproto.gboptsmap['narrow'] = 'boolean'
468 468 wireproto.gboptsmap['depth'] = 'plain'
469 469 wireproto.gboptsmap['oldincludepats'] = 'csv'
470 470 wireproto.gboptsmap['oldexcludepats'] = 'csv'
471 471 wireproto.gboptsmap['includepats'] = 'csv'
472 472 wireproto.gboptsmap['excludepats'] = 'csv'
473 473 wireproto.gboptsmap['known'] = 'csv'
474 474
475 475 # Extend changegroup serving to handle requests from narrow clients.
476 476 origcgfn = exchange.getbundle2partsmapping['changegroup']
477 477 def wrappedcgfn(*args, **kwargs):
478 478 repo = args[1]
479 479 if repo.ui.has_section(_NARROWACL_SECTION):
480 480 getbundlechangegrouppart_narrow(
481 481 *args, **applyacl_narrow(repo, kwargs))
482 482 elif kwargs.get('narrow', False):
483 483 getbundlechangegrouppart_narrow(*args, **kwargs)
484 484 else:
485 485 origcgfn(*args, **kwargs)
486 486 exchange.getbundle2partsmapping['changegroup'] = wrappedcgfn
487 487
488 488 # Extend changegroup receiver so client can fixup after widen requests.
489 489 origcghandler = bundle2.parthandlermapping['changegroup']
490 490 def wrappedcghandler(op, inpart):
491 491 origcghandler(op, inpart)
492 492 if util.safehasattr(op, '_widen_bundle'):
493 493 handlechangegroup_widen(op, inpart)
494 494 wrappedcghandler.params = origcghandler.params
495 495 bundle2.parthandlermapping['changegroup'] = wrappedcghandler
General Comments 0
You need to be logged in to leave comments. Login now