##// END OF EJS Templates
manifest: use `read_delta_parents` when adjusting linkrev in remotefile...
marmoute -
r52675:4704c4c7 default
parent child Browse files
Show More
@@ -1,527 +1,533
1 1 # remotefilectx.py - filectx/workingfilectx implementations for remotefilelog
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import time
10 10
11 11 from mercurial.node import bin, hex, nullrev
12 12 from mercurial import (
13 13 ancestor,
14 14 context,
15 15 error,
16 16 phases,
17 17 util,
18 18 )
19 19 from . import shallowutil
20 20
21 21 propertycache = util.propertycache
22 22 FASTLOG_TIMEOUT_IN_SECS = 0.5
23 23
24 24
25 25 class remotefilectx(context.filectx):
26 26 def __init__(
27 27 self,
28 28 repo,
29 29 path,
30 30 changeid=None,
31 31 fileid=None,
32 32 filelog=None,
33 33 changectx=None,
34 34 ancestormap=None,
35 35 ):
36 36 if fileid == nullrev:
37 37 fileid = repo.nullid
38 38 if fileid and len(fileid) == 40:
39 39 fileid = bin(fileid)
40 40 super(remotefilectx, self).__init__(
41 41 repo, path, changeid, fileid, filelog, changectx
42 42 )
43 43 self._ancestormap = ancestormap
44 44
45 45 def size(self):
46 46 return self._filelog.size(self._filenode)
47 47
48 48 @propertycache
49 49 def _changeid(self):
50 50 if '_changeid' in self.__dict__:
51 51 return self._changeid
52 52 elif '_changectx' in self.__dict__:
53 53 return self._changectx.rev()
54 54 elif '_descendantrev' in self.__dict__:
55 55 # this file context was created from a revision with a known
56 56 # descendant, we can (lazily) correct for linkrev aliases
57 57 linknode = self._adjustlinknode(
58 58 self._path, self._filelog, self._filenode, self._descendantrev
59 59 )
60 60 return self._repo.unfiltered().changelog.rev(linknode)
61 61 else:
62 62 return self.linkrev()
63 63
64 64 def filectx(self, fileid, changeid=None):
65 65 """opens an arbitrary revision of the file without
66 66 opening a new filelog"""
67 67 return remotefilectx(
68 68 self._repo,
69 69 self._path,
70 70 fileid=fileid,
71 71 filelog=self._filelog,
72 72 changeid=changeid,
73 73 )
74 74
75 75 def linkrev(self):
76 76 return self._linkrev
77 77
78 78 @propertycache
79 79 def _linkrev(self):
80 80 if self._filenode == self._repo.nullid:
81 81 return nullrev
82 82
83 83 ancestormap = self.ancestormap()
84 84 p1, p2, linknode, copyfrom = ancestormap[self._filenode]
85 85 rev = self._repo.changelog.index.get_rev(linknode)
86 86 if rev is not None:
87 87 return rev
88 88
89 89 # Search all commits for the appropriate linkrev (slow, but uncommon)
90 90 path = self._path
91 91 fileid = self._filenode
92 92 cl = self._repo.unfiltered().changelog
93 93 mfl = self._repo.manifestlog
94 94
95 95 for rev in range(len(cl) - 1, 0, -1):
96 96 node = cl.node(rev)
97 97 data = cl.read(
98 98 node
99 99 ) # get changeset data (we avoid object creation)
100 100 if path in data[3]: # checking the 'files' field.
101 101 # The file has been touched, check if the hash is what we're
102 102 # looking for.
103 if fileid == mfl[data[0]].readfast().get(path):
103 #
104 # The change has to be against a parent, otherwise we might be
105 # missing linkrev worthy changes.
106 m = mfl[data[0]].read_delta_parents(exact=False)
107 if fileid == m.get(path):
104 108 return rev
105 109
106 110 # Couldn't find the linkrev. This should generally not happen, and will
107 111 # likely cause a crash.
108 112 return None
109 113
110 114 def introrev(self):
111 115 """return the rev of the changeset which introduced this file revision
112 116
113 117 This method is different from linkrev because it take into account the
114 118 changeset the filectx was created from. It ensures the returned
115 119 revision is one of its ancestors. This prevents bugs from
116 120 'linkrev-shadowing' when a file revision is used by multiple
117 121 changesets.
118 122 """
119 123 lkr = self.linkrev()
120 124 attrs = vars(self)
121 125 noctx = not ('_changeid' in attrs or r'_changectx' in attrs)
122 126 if noctx or self.rev() == lkr:
123 127 return lkr
124 128 linknode = self._adjustlinknode(
125 129 self._path,
126 130 self._filelog,
127 131 self._filenode,
128 132 self.rev(),
129 133 inclusive=True,
130 134 )
131 135 return self._repo.changelog.rev(linknode)
132 136
133 137 def renamed(self):
134 138 """check if file was actually renamed in this changeset revision
135 139
136 140 If rename logged in file revision, we report copy for changeset only
137 141 if file revisions linkrev points back to the changeset in question
138 142 or both changeset parents contain different file revisions.
139 143 """
140 144 ancestormap = self.ancestormap()
141 145
142 146 p1, p2, linknode, copyfrom = ancestormap[self._filenode]
143 147 if not copyfrom:
144 148 return None
145 149
146 150 renamed = (copyfrom, p1)
147 151 if self.rev() == self.linkrev():
148 152 return renamed
149 153
150 154 name = self.path()
151 155 fnode = self._filenode
152 156 for p in self._changectx.parents():
153 157 try:
154 158 if fnode == p.filenode(name):
155 159 return None
156 160 except error.LookupError:
157 161 pass
158 162 return renamed
159 163
160 164 def copysource(self):
161 165 copy = self.renamed()
162 166 return copy and copy[0]
163 167
164 168 def ancestormap(self):
165 169 if not self._ancestormap:
166 170 self._ancestormap = self.filelog().ancestormap(self._filenode)
167 171
168 172 return self._ancestormap
169 173
170 174 def parents(self):
171 175 repo = self._repo
172 176 ancestormap = self.ancestormap()
173 177
174 178 p1, p2, linknode, copyfrom = ancestormap[self._filenode]
175 179 results = []
176 180 if p1 != repo.nullid:
177 181 path = copyfrom or self._path
178 182 flog = repo.file(path)
179 183 p1ctx = remotefilectx(
180 184 repo, path, fileid=p1, filelog=flog, ancestormap=ancestormap
181 185 )
182 186 p1ctx._descendantrev = self.rev()
183 187 results.append(p1ctx)
184 188
185 189 if p2 != repo.nullid:
186 190 path = self._path
187 191 flog = repo.file(path)
188 192 p2ctx = remotefilectx(
189 193 repo, path, fileid=p2, filelog=flog, ancestormap=ancestormap
190 194 )
191 195 p2ctx._descendantrev = self.rev()
192 196 results.append(p2ctx)
193 197
194 198 return results
195 199
196 200 def _nodefromancrev(self, ancrev, cl, mfl, path, fnode):
197 201 """returns the node for <path> in <ancrev> if content matches <fnode>"""
198 202 ancctx = cl.read(ancrev) # This avoids object creation.
199 203 manifestnode, files = ancctx[0], ancctx[3]
200 204 # If the file was touched in this ancestor, and the content is similar
201 205 # to the one we are searching for.
202 if path in files and fnode == mfl[manifestnode].readfast().get(path):
203 return cl.node(ancrev)
206 if path in files:
207 m = mfl[manifestnode].read_delta_parents(exact=False)
208 if fnode == m.get(path):
209 return cl.node(ancrev)
204 210 return None
205 211
206 212 def _adjustlinknode(self, path, filelog, fnode, srcrev, inclusive=False):
207 213 """return the first ancestor of <srcrev> introducing <fnode>
208 214
209 215 If the linkrev of the file revision does not point to an ancestor of
210 216 srcrev, we'll walk down the ancestors until we find one introducing
211 217 this file revision.
212 218
213 219 :repo: a localrepository object (used to access changelog and manifest)
214 220 :path: the file path
215 221 :fnode: the nodeid of the file revision
216 222 :filelog: the filelog of this path
217 223 :srcrev: the changeset revision we search ancestors from
218 224 :inclusive: if true, the src revision will also be checked
219 225
220 226 Note: This is based on adjustlinkrev in core, but it's quite different.
221 227
222 228 adjustlinkrev depends on the fact that the linkrev is the bottom most
223 229 node, and uses that as a stopping point for the ancestor traversal. We
224 230 can't do that here because the linknode is not guaranteed to be the
225 231 bottom most one.
226 232
227 233 In our code here, we actually know what a bunch of potential ancestor
228 234 linknodes are, so instead of stopping the cheap-ancestor-traversal when
229 235 we get to a linkrev, we stop when we see any of the known linknodes.
230 236 """
231 237 repo = self._repo
232 238 cl = repo.unfiltered().changelog
233 239 mfl = repo.manifestlog
234 240 ancestormap = self.ancestormap()
235 241 linknode = ancestormap[fnode][2]
236 242
237 243 if srcrev is None:
238 244 # wctx case, used by workingfilectx during mergecopy
239 245 revs = [p.rev() for p in self._repo[None].parents()]
240 246 inclusive = True # we skipped the real (revless) source
241 247 else:
242 248 revs = [srcrev]
243 249
244 250 if self._verifylinknode(revs, linknode):
245 251 return linknode
246 252
247 253 commonlogkwargs = {
248 254 'revs': b' '.join([hex(cl.node(rev)) for rev in revs]),
249 255 'fnode': hex(fnode),
250 256 'filepath': path,
251 257 'user': shallowutil.getusername(repo.ui),
252 258 'reponame': shallowutil.getreponame(repo.ui),
253 259 }
254 260
255 261 repo.ui.log(b'linkrevfixup', b'adjusting linknode\n', **commonlogkwargs)
256 262
257 263 pc = repo._phasecache
258 264 seenpublic = False
259 265 iteranc = cl.ancestors(revs, inclusive=inclusive)
260 266 for ancrev in iteranc:
261 267 # First, check locally-available history.
262 268 lnode = self._nodefromancrev(ancrev, cl, mfl, path, fnode)
263 269 if lnode is not None:
264 270 return lnode
265 271
266 272 # adjusting linknode can be super-slow. To mitigate the issue
267 273 # we use two heuristics: calling fastlog and forcing remotefilelog
268 274 # prefetch
269 275 if not seenpublic and pc.phase(repo, ancrev) == phases.public:
270 276 # TODO: there used to be a codepath to fetch linknodes
271 277 # from a server as a fast path, but it appeared to
272 278 # depend on an API FB added to their phabricator.
273 279 lnode = self._forceprefetch(
274 280 repo, path, fnode, revs, commonlogkwargs
275 281 )
276 282 if lnode:
277 283 return lnode
278 284 seenpublic = True
279 285
280 286 return linknode
281 287
282 288 def _forceprefetch(self, repo, path, fnode, revs, commonlogkwargs):
283 289 # This next part is super non-obvious, so big comment block time!
284 290 #
285 291 # It is possible to get extremely bad performance here when a fairly
286 292 # common set of circumstances occur when this extension is combined
287 293 # with a server-side commit rewriting extension like pushrebase.
288 294 #
289 295 # First, an engineer creates Commit A and pushes it to the server.
290 296 # While the server's data structure will have the correct linkrev
291 297 # for the files touched in Commit A, the client will have the
292 298 # linkrev of the local commit, which is "invalid" because it's not
293 299 # an ancestor of the main line of development.
294 300 #
295 301 # The client will never download the remotefilelog with the correct
296 302 # linkrev as long as nobody else touches that file, since the file
297 303 # data and history hasn't changed since Commit A.
298 304 #
299 305 # After a long time (or a short time in a heavily used repo), if the
300 306 # same engineer returns to change the same file, some commands --
301 307 # such as amends of commits with file moves, logs, diffs, etc --
302 308 # can trigger this _adjustlinknode code. In those cases, finding
303 309 # the correct rev can become quite expensive, as the correct
304 310 # revision is far back in history and we need to walk back through
305 311 # history to find it.
306 312 #
307 313 # In order to improve this situation, we force a prefetch of the
308 314 # remotefilelog data blob for the file we were called on. We do this
309 315 # at most once, when we first see a public commit in the history we
310 316 # are traversing.
311 317 #
312 318 # Forcing the prefetch means we will download the remote blob even
313 319 # if we have the "correct" blob in the local store. Since the union
314 320 # store checks the remote store first, this means we are much more
315 321 # likely to get the correct linkrev at this point.
316 322 #
317 323 # In rare circumstances (such as the server having a suboptimal
318 324 # linkrev for our use case), we will fall back to the old slow path.
319 325 #
320 326 # We may want to add additional heuristics here in the future if
321 327 # the slow path is used too much. One promising possibility is using
322 328 # obsolescence markers to find a more-likely-correct linkrev.
323 329
324 330 logmsg = b''
325 331 start = time.time()
326 332 try:
327 333 repo.fileservice.prefetch([(path, hex(fnode))], force=True)
328 334
329 335 # Now that we've downloaded a new blob from the server,
330 336 # we need to rebuild the ancestor map to recompute the
331 337 # linknodes.
332 338 self._ancestormap = None
333 339 linknode = self.ancestormap()[fnode][2] # 2 is linknode
334 340 if self._verifylinknode(revs, linknode):
335 341 logmsg = b'remotefilelog prefetching succeeded'
336 342 return linknode
337 343 logmsg = b'remotefilelog prefetching not found'
338 344 return None
339 345 except Exception as e:
340 346 logmsg = b'remotefilelog prefetching failed (%s)' % e
341 347 return None
342 348 finally:
343 349 elapsed = time.time() - start
344 350 repo.ui.log(
345 351 b'linkrevfixup',
346 352 logmsg + b'\n',
347 353 elapsed=elapsed * 1000,
348 354 **commonlogkwargs
349 355 )
350 356
351 357 def _verifylinknode(self, revs, linknode):
352 358 """
353 359 Check if a linknode is correct one for the current history.
354 360
355 361 That is, return True if the linkrev is the ancestor of any of the
356 362 passed in revs, otherwise return False.
357 363
358 364 `revs` is a list that usually has one element -- usually the wdir parent
359 365 or the user-passed rev we're looking back from. It may contain two revs
360 366 when there is a merge going on, or zero revs when a root node with no
361 367 parents is being created.
362 368 """
363 369 if not revs:
364 370 return False
365 371 try:
366 372 # Use the C fastpath to check if the given linknode is correct.
367 373 cl = self._repo.unfiltered().changelog
368 374 return any(cl.isancestor(linknode, cl.node(r)) for r in revs)
369 375 except error.LookupError:
370 376 # The linknode read from the blob may have been stripped or
371 377 # otherwise not present in the repository anymore. Do not fail hard
372 378 # in this case. Instead, return false and continue the search for
373 379 # the correct linknode.
374 380 return False
375 381
376 382 def ancestors(self, followfirst=False):
377 383 ancestors = []
378 384 queue = collections.deque((self,))
379 385 seen = set()
380 386 while queue:
381 387 current = queue.pop()
382 388 if current.filenode() in seen:
383 389 continue
384 390 seen.add(current.filenode())
385 391
386 392 ancestors.append(current)
387 393
388 394 parents = current.parents()
389 395 first = True
390 396 for p in parents:
391 397 if first or not followfirst:
392 398 queue.append(p)
393 399 first = False
394 400
395 401 # Remove self
396 402 ancestors.pop(0)
397 403
398 404 # Sort by linkrev
399 405 # The copy tracing algorithm depends on these coming out in order
400 406 ancestors = sorted(ancestors, reverse=True, key=lambda x: x.linkrev())
401 407
402 408 for ancestor in ancestors:
403 409 yield ancestor
404 410
405 411 def ancestor(self, fc2, actx):
406 412 # the easy case: no (relevant) renames
407 413 if fc2.path() == self.path() and self.path() in actx:
408 414 return actx[self.path()]
409 415
410 416 # the next easiest cases: unambiguous predecessor (name trumps
411 417 # history)
412 418 if self.path() in actx and fc2.path() not in actx:
413 419 return actx[self.path()]
414 420 if fc2.path() in actx and self.path() not in actx:
415 421 return actx[fc2.path()]
416 422
417 423 # do a full traversal
418 424 amap = self.ancestormap()
419 425 bmap = fc2.ancestormap()
420 426
421 427 def parents(x):
422 428 f, n = x
423 429 p = amap.get(n) or bmap.get(n)
424 430 if not p:
425 431 return []
426 432
427 433 return [(p[3] or f, p[0]), (f, p[1])]
428 434
429 435 a = (self.path(), self.filenode())
430 436 b = (fc2.path(), fc2.filenode())
431 437 result = ancestor.genericancestor(a, b, parents)
432 438 if result:
433 439 f, n = result
434 440 r = remotefilectx(self._repo, f, fileid=n, ancestormap=amap)
435 441 return r
436 442
437 443 return None
438 444
439 445 def annotate(self, *args, **kwargs):
440 446 introctx = self
441 447 prefetchskip = kwargs.pop('prefetchskip', None)
442 448 if prefetchskip:
443 449 # use introrev so prefetchskip can be accurately tested
444 450 introrev = self.introrev()
445 451 if self.rev() != introrev:
446 452 introctx = remotefilectx(
447 453 self._repo,
448 454 self._path,
449 455 changeid=introrev,
450 456 fileid=self._filenode,
451 457 filelog=self._filelog,
452 458 ancestormap=self._ancestormap,
453 459 )
454 460
455 461 # like self.ancestors, but append to "fetch" and skip visiting parents
456 462 # of nodes in "prefetchskip".
457 463 fetch = []
458 464 seen = set()
459 465 queue = collections.deque((introctx,))
460 466 seen.add(introctx.node())
461 467 while queue:
462 468 current = queue.pop()
463 469 if current.filenode() != self.filenode():
464 470 # this is a "joint point". fastannotate needs contents of
465 471 # "joint point"s to calculate diffs for side branches.
466 472 fetch.append((current.path(), hex(current.filenode())))
467 473 if prefetchskip and current in prefetchskip:
468 474 continue
469 475 for parent in current.parents():
470 476 if parent.node() not in seen:
471 477 seen.add(parent.node())
472 478 queue.append(parent)
473 479
474 480 self._repo.ui.debug(
475 481 b'remotefilelog: prefetching %d files '
476 482 b'for annotate\n' % len(fetch)
477 483 )
478 484 if fetch:
479 485 self._repo.fileservice.prefetch(fetch)
480 486 return super(remotefilectx, self).annotate(*args, **kwargs)
481 487
482 488 # Return empty set so that the hg serve and thg don't stack trace
483 489 def children(self):
484 490 return []
485 491
486 492
487 493 class remoteworkingfilectx(context.workingfilectx, remotefilectx):
488 494 def __init__(self, repo, path, filelog=None, workingctx=None):
489 495 self._ancestormap = None
490 496 super(remoteworkingfilectx, self).__init__(
491 497 repo, path, filelog, workingctx
492 498 )
493 499
494 500 def parents(self):
495 501 return remotefilectx.parents(self)
496 502
497 503 def ancestormap(self):
498 504 if not self._ancestormap:
499 505 path = self._path
500 506 pcl = self._changectx._parents
501 507 renamed = self.renamed()
502 508
503 509 if renamed:
504 510 p1 = renamed
505 511 else:
506 512 p1 = (path, pcl[0]._manifest.get(path, self._repo.nullid))
507 513
508 514 p2 = (path, self._repo.nullid)
509 515 if len(pcl) > 1:
510 516 p2 = (path, pcl[1]._manifest.get(path, self._repo.nullid))
511 517
512 518 m = {}
513 519 if p1[1] != self._repo.nullid:
514 520 p1ctx = self._repo.filectx(p1[0], fileid=p1[1])
515 521 m.update(p1ctx.filelog().ancestormap(p1[1]))
516 522
517 523 if p2[1] != self._repo.nullid:
518 524 p2ctx = self._repo.filectx(p2[0], fileid=p2[1])
519 525 m.update(p2ctx.filelog().ancestormap(p2[1]))
520 526
521 527 copyfrom = b''
522 528 if renamed:
523 529 copyfrom = renamed[0]
524 530 m[None] = (p1[1], p2[1], self._repo.nullid, copyfrom)
525 531 self._ancestormap = m
526 532
527 533 return self._ancestormap
General Comments 0
You need to be logged in to leave comments. Login now