##// END OF EJS Templates
copies: return None instead of ChangingFiles when relevant...
marmoute -
r46246:1720d843 default draft
parent child Browse files
Show More
@@ -1,618 +1,618
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29 from .revlogutils import flagutil
30 30
31 31 _defaultextra = {b'branch': b'default'}
32 32
33 33
34 34 def _string_escape(text):
35 35 """
36 36 >>> from .pycompat import bytechr as chr
37 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 39 >>> s
40 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 41 >>> res = _string_escape(s)
42 42 >>> s == _string_unescape(res)
43 43 True
44 44 """
45 45 # subset of the string_escape codec
46 46 text = (
47 47 text.replace(b'\\', b'\\\\')
48 48 .replace(b'\n', b'\\n')
49 49 .replace(b'\r', b'\\r')
50 50 )
51 51 return text.replace(b'\0', b'\\0')
52 52
53 53
54 54 def _string_unescape(text):
55 55 if b'\\0' in text:
56 56 # fix up \0 without getting into trouble with \\0
57 57 text = text.replace(b'\\\\', b'\\\\\n')
58 58 text = text.replace(b'\\0', b'\0')
59 59 text = text.replace(b'\n', b'')
60 60 return stringutil.unescapestr(text)
61 61
62 62
63 63 def decodeextra(text):
64 64 """
65 65 >>> from .pycompat import bytechr as chr
66 66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 67 ... ).items())
68 68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 70 ... b'baz': chr(92) + chr(0) + b'2'})
71 71 ... ).items())
72 72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 73 """
74 74 extra = _defaultextra.copy()
75 75 for l in text.split(b'\0'):
76 76 if l:
77 77 k, v = _string_unescape(l).split(b':', 1)
78 78 extra[k] = v
79 79 return extra
80 80
81 81
82 82 def encodeextra(d):
83 83 # keys must be sorted to produce a deterministic changelog entry
84 84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 85 return b"\0".join(items)
86 86
87 87
88 88 def stripdesc(desc):
89 89 """strip trailing whitespace and leading and trailing empty lines"""
90 90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 91
92 92
93 93 class appender(object):
94 94 '''the changelog index must be updated last on disk, so we use this class
95 95 to delay writes to it'''
96 96
97 97 def __init__(self, vfs, name, mode, buf):
98 98 self.data = buf
99 99 fp = vfs(name, mode)
100 100 self.fp = fp
101 101 self.offset = fp.tell()
102 102 self.size = vfs.fstat(fp).st_size
103 103 self._end = self.size
104 104
105 105 def end(self):
106 106 return self._end
107 107
108 108 def tell(self):
109 109 return self.offset
110 110
111 111 def flush(self):
112 112 pass
113 113
114 114 @property
115 115 def closed(self):
116 116 return self.fp.closed
117 117
118 118 def close(self):
119 119 self.fp.close()
120 120
121 121 def seek(self, offset, whence=0):
122 122 '''virtual file offset spans real file and data'''
123 123 if whence == 0:
124 124 self.offset = offset
125 125 elif whence == 1:
126 126 self.offset += offset
127 127 elif whence == 2:
128 128 self.offset = self.end() + offset
129 129 if self.offset < self.size:
130 130 self.fp.seek(self.offset)
131 131
132 132 def read(self, count=-1):
133 133 '''only trick here is reads that span real file and data'''
134 134 ret = b""
135 135 if self.offset < self.size:
136 136 s = self.fp.read(count)
137 137 ret = s
138 138 self.offset += len(s)
139 139 if count > 0:
140 140 count -= len(s)
141 141 if count != 0:
142 142 doff = self.offset - self.size
143 143 self.data.insert(0, b"".join(self.data))
144 144 del self.data[1:]
145 145 s = self.data[0][doff : doff + count]
146 146 self.offset += len(s)
147 147 ret += s
148 148 return ret
149 149
150 150 def write(self, s):
151 151 self.data.append(bytes(s))
152 152 self.offset += len(s)
153 153 self._end += len(s)
154 154
155 155 def __enter__(self):
156 156 self.fp.__enter__()
157 157 return self
158 158
159 159 def __exit__(self, *args):
160 160 return self.fp.__exit__(*args)
161 161
162 162
163 163 class _divertopener(object):
164 164 def __init__(self, opener, target):
165 165 self._opener = opener
166 166 self._target = target
167 167
168 168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 169 if name != self._target:
170 170 return self._opener(name, mode, **kwargs)
171 171 return self._opener(name + b".a", mode, **kwargs)
172 172
173 173 def __getattr__(self, attr):
174 174 return getattr(self._opener, attr)
175 175
176 176
177 177 def _delayopener(opener, target, buf):
178 178 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 179
180 180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 181 if name != target:
182 182 return opener(name, mode, **kwargs)
183 183 assert not kwargs
184 184 return appender(opener, name, mode, buf)
185 185
186 186 return _delay
187 187
188 188
189 189 @attr.s
190 190 class _changelogrevision(object):
191 191 # Extensions might modify _defaultextra, so let the constructor below pass
192 192 # it in
193 193 extra = attr.ib()
194 194 manifest = attr.ib(default=nullid)
195 195 user = attr.ib(default=b'')
196 196 date = attr.ib(default=(0, 0))
197 197 files = attr.ib(default=attr.Factory(list))
198 198 filesadded = attr.ib(default=None)
199 199 filesremoved = attr.ib(default=None)
200 200 p1copies = attr.ib(default=None)
201 201 p2copies = attr.ib(default=None)
202 202 description = attr.ib(default=b'')
203 203
204 204
205 205 class changelogrevision(object):
206 206 """Holds results of a parsed changelog revision.
207 207
208 208 Changelog revisions consist of multiple pieces of data, including
209 209 the manifest node, user, and date. This object exposes a view into
210 210 the parsed object.
211 211 """
212 212
213 213 __slots__ = (
214 214 '_offsets',
215 215 '_text',
216 216 '_sidedata',
217 217 '_cpsd',
218 218 '_changes',
219 219 )
220 220
221 221 def __new__(cls, text, sidedata, cpsd):
222 222 if not text:
223 223 return _changelogrevision(extra=_defaultextra)
224 224
225 225 self = super(changelogrevision, cls).__new__(cls)
226 226 # We could return here and implement the following as an __init__.
227 227 # But doing it here is equivalent and saves an extra function call.
228 228
229 229 # format used:
230 230 # nodeid\n : manifest node in ascii
231 231 # user\n : user, no \n or \r allowed
232 232 # time tz extra\n : date (time is int or float, timezone is int)
233 233 # : extra is metadata, encoded and separated by '\0'
234 234 # : older versions ignore it
235 235 # files\n\n : files modified by the cset, no \n or \r allowed
236 236 # (.*) : comment (free text, ideally utf-8)
237 237 #
238 238 # changelog v0 doesn't use extra
239 239
240 240 nl1 = text.index(b'\n')
241 241 nl2 = text.index(b'\n', nl1 + 1)
242 242 nl3 = text.index(b'\n', nl2 + 1)
243 243
244 244 # The list of files may be empty. Which means nl3 is the first of the
245 245 # double newline that precedes the description.
246 246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 247 doublenl = nl3
248 248 else:
249 249 doublenl = text.index(b'\n\n', nl3 + 1)
250 250
251 251 self._offsets = (nl1, nl2, nl3, doublenl)
252 252 self._text = text
253 253 self._sidedata = sidedata
254 254 self._cpsd = cpsd
255 255 self._changes = None
256 256
257 257 return self
258 258
259 259 @property
260 260 def manifest(self):
261 261 return bin(self._text[0 : self._offsets[0]])
262 262
263 263 @property
264 264 def user(self):
265 265 off = self._offsets
266 266 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
267 267
268 268 @property
269 269 def _rawdate(self):
270 270 off = self._offsets
271 271 dateextra = self._text[off[1] + 1 : off[2]]
272 272 return dateextra.split(b' ', 2)[0:2]
273 273
274 274 @property
275 275 def _rawextra(self):
276 276 off = self._offsets
277 277 dateextra = self._text[off[1] + 1 : off[2]]
278 278 fields = dateextra.split(b' ', 2)
279 279 if len(fields) != 3:
280 280 return None
281 281
282 282 return fields[2]
283 283
284 284 @property
285 285 def date(self):
286 286 raw = self._rawdate
287 287 time = float(raw[0])
288 288 # Various tools did silly things with the timezone.
289 289 try:
290 290 timezone = int(raw[1])
291 291 except ValueError:
292 292 timezone = 0
293 293
294 294 return time, timezone
295 295
296 296 @property
297 297 def extra(self):
298 298 raw = self._rawextra
299 299 if raw is None:
300 300 return _defaultextra
301 301
302 302 return decodeextra(raw)
303 303
304 304 @property
305 305 def changes(self):
306 306 if self._changes is not None:
307 307 return self._changes
308 308 if self._cpsd:
309 309 changes = metadata.decode_files_sidedata(self._sidedata)
310 310 else:
311 311 changes = metadata.ChangingFiles(
312 312 touched=self.files or (),
313 313 added=self.filesadded or (),
314 314 removed=self.filesremoved or (),
315 315 p1_copies=self.p1copies or {},
316 316 p2_copies=self.p2copies or {},
317 317 )
318 318 self._changes = changes
319 319 return changes
320 320
321 321 @property
322 322 def files(self):
323 323 if self._cpsd:
324 324 return sorted(self.changes.touched)
325 325 off = self._offsets
326 326 if off[2] == off[3]:
327 327 return []
328 328
329 329 return self._text[off[2] + 1 : off[3]].split(b'\n')
330 330
331 331 @property
332 332 def filesadded(self):
333 333 if self._cpsd:
334 334 return self.changes.added
335 335 else:
336 336 rawindices = self.extra.get(b'filesadded')
337 337 if rawindices is None:
338 338 return None
339 339 return metadata.decodefileindices(self.files, rawindices)
340 340
341 341 @property
342 342 def filesremoved(self):
343 343 if self._cpsd:
344 344 return self.changes.removed
345 345 else:
346 346 rawindices = self.extra.get(b'filesremoved')
347 347 if rawindices is None:
348 348 return None
349 349 return metadata.decodefileindices(self.files, rawindices)
350 350
351 351 @property
352 352 def p1copies(self):
353 353 if self._cpsd:
354 354 return self.changes.copied_from_p1
355 355 else:
356 356 rawcopies = self.extra.get(b'p1copies')
357 357 if rawcopies is None:
358 358 return None
359 359 return metadata.decodecopies(self.files, rawcopies)
360 360
361 361 @property
362 362 def p2copies(self):
363 363 if self._cpsd:
364 364 return self.changes.copied_from_p2
365 365 else:
366 366 rawcopies = self.extra.get(b'p2copies')
367 367 if rawcopies is None:
368 368 return None
369 369 return metadata.decodecopies(self.files, rawcopies)
370 370
371 371 @property
372 372 def description(self):
373 373 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
374 374
375 375
376 376 class changelog(revlog.revlog):
377 377 def __init__(self, opener, trypending=False):
378 378 """Load a changelog revlog using an opener.
379 379
380 380 If ``trypending`` is true, we attempt to load the index from a
381 381 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
382 382 The ``00changelog.i.a`` file contains index (and possibly inline
383 383 revision) data for a transaction that hasn't been finalized yet.
384 384 It exists in a separate file to facilitate readers (such as
385 385 hooks processes) accessing data before a transaction is finalized.
386 386 """
387 387 if trypending and opener.exists(b'00changelog.i.a'):
388 388 indexfile = b'00changelog.i.a'
389 389 else:
390 390 indexfile = b'00changelog.i'
391 391
392 392 datafile = b'00changelog.d'
393 393 revlog.revlog.__init__(
394 394 self,
395 395 opener,
396 396 indexfile,
397 397 datafile=datafile,
398 398 checkambig=True,
399 399 mmaplargeindex=True,
400 400 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
401 401 )
402 402
403 403 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
404 404 # changelogs don't benefit from generaldelta.
405 405
406 406 self.version &= ~revlog.FLAG_GENERALDELTA
407 407 self._generaldelta = False
408 408
409 409 # Delta chains for changelogs tend to be very small because entries
410 410 # tend to be small and don't delta well with each. So disable delta
411 411 # chains.
412 412 self._storedeltachains = False
413 413
414 414 self._realopener = opener
415 415 self._delayed = False
416 416 self._delaybuf = None
417 417 self._divert = False
418 418 self._filteredrevs = frozenset()
419 419 self._filteredrevs_hashcache = {}
420 420 self._copiesstorage = opener.options.get(b'copies-storage')
421 421
422 422 @property
423 423 def filteredrevs(self):
424 424 return self._filteredrevs
425 425
426 426 @filteredrevs.setter
427 427 def filteredrevs(self, val):
428 428 # Ensure all updates go through this function
429 429 assert isinstance(val, frozenset)
430 430 self._filteredrevs = val
431 431 self._filteredrevs_hashcache = {}
432 432
433 433 def delayupdate(self, tr):
434 434 """delay visibility of index updates to other readers"""
435 435
436 436 if not self._delayed:
437 437 if len(self) == 0:
438 438 self._divert = True
439 439 if self._realopener.exists(self.indexfile + b'.a'):
440 440 self._realopener.unlink(self.indexfile + b'.a')
441 441 self.opener = _divertopener(self._realopener, self.indexfile)
442 442 else:
443 443 self._delaybuf = []
444 444 self.opener = _delayopener(
445 445 self._realopener, self.indexfile, self._delaybuf
446 446 )
447 447 self._delayed = True
448 448 tr.addpending(b'cl-%i' % id(self), self._writepending)
449 449 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
450 450
451 451 def _finalize(self, tr):
452 452 """finalize index updates"""
453 453 self._delayed = False
454 454 self.opener = self._realopener
455 455 # move redirected index data back into place
456 456 if self._divert:
457 457 assert not self._delaybuf
458 458 tmpname = self.indexfile + b".a"
459 459 nfile = self.opener.open(tmpname)
460 460 nfile.close()
461 461 self.opener.rename(tmpname, self.indexfile, checkambig=True)
462 462 elif self._delaybuf:
463 463 fp = self.opener(self.indexfile, b'a', checkambig=True)
464 464 fp.write(b"".join(self._delaybuf))
465 465 fp.close()
466 466 self._delaybuf = None
467 467 self._divert = False
468 468 # split when we're done
469 469 self._enforceinlinesize(tr)
470 470
471 471 def _writepending(self, tr):
472 472 """create a file containing the unfinalized state for
473 473 pretxnchangegroup"""
474 474 if self._delaybuf:
475 475 # make a temporary copy of the index
476 476 fp1 = self._realopener(self.indexfile)
477 477 pendingfilename = self.indexfile + b".a"
478 478 # register as a temp file to ensure cleanup on failure
479 479 tr.registertmp(pendingfilename)
480 480 # write existing data
481 481 fp2 = self._realopener(pendingfilename, b"w")
482 482 fp2.write(fp1.read())
483 483 # add pending data
484 484 fp2.write(b"".join(self._delaybuf))
485 485 fp2.close()
486 486 # switch modes so finalize can simply rename
487 487 self._delaybuf = None
488 488 self._divert = True
489 489 self.opener = _divertopener(self._realopener, self.indexfile)
490 490
491 491 if self._divert:
492 492 return True
493 493
494 494 return False
495 495
496 496 def _enforceinlinesize(self, tr, fp=None):
497 497 if not self._delayed:
498 498 revlog.revlog._enforceinlinesize(self, tr, fp)
499 499
500 500 def read(self, node):
501 501 """Obtain data from a parsed changelog revision.
502 502
503 503 Returns a 6-tuple of:
504 504
505 505 - manifest node in binary
506 506 - author/user as a localstr
507 507 - date as a 2-tuple of (time, timezone)
508 508 - list of files
509 509 - commit message as a localstr
510 510 - dict of extra metadata
511 511
512 512 Unless you need to access all fields, consider calling
513 513 ``changelogrevision`` instead, as it is faster for partial object
514 514 access.
515 515 """
516 516 d, s = self._revisiondata(node)
517 517 c = changelogrevision(
518 518 d, s, self._copiesstorage == b'changeset-sidedata'
519 519 )
520 520 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
521 521
522 522 def changelogrevision(self, nodeorrev):
523 523 """Obtain a ``changelogrevision`` for a node or revision."""
524 524 text, sidedata = self._revisiondata(nodeorrev)
525 525 return changelogrevision(
526 526 text, sidedata, self._copiesstorage == b'changeset-sidedata'
527 527 )
528 528
529 529 def readfiles(self, node):
530 530 """
531 531 short version of read that only returns the files modified by the cset
532 532 """
533 533 text = self.revision(node)
534 534 if not text:
535 535 return []
536 536 last = text.index(b"\n\n")
537 537 l = text[:last].split(b'\n')
538 538 return l[3:]
539 539
540 540 def add(
541 541 self,
542 542 manifest,
543 543 files,
544 544 desc,
545 545 transaction,
546 546 p1,
547 547 p2,
548 548 user,
549 549 date=None,
550 550 extra=None,
551 551 ):
552 552 # Convert to UTF-8 encoded bytestrings as the very first
553 553 # thing: calling any method on a localstr object will turn it
554 554 # into a str object and the cached UTF-8 string is thus lost.
555 555 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
556 556
557 557 user = user.strip()
558 558 # An empty username or a username with a "\n" will make the
559 559 # revision text contain two "\n\n" sequences -> corrupt
560 560 # repository since read cannot unpack the revision.
561 561 if not user:
562 562 raise error.StorageError(_(b"empty username"))
563 563 if b"\n" in user:
564 564 raise error.StorageError(
565 565 _(b"username %r contains a newline") % pycompat.bytestr(user)
566 566 )
567 567
568 568 desc = stripdesc(desc)
569 569
570 570 if date:
571 571 parseddate = b"%d %d" % dateutil.parsedate(date)
572 572 else:
573 573 parseddate = b"%d %d" % dateutil.makedate()
574 574 if extra:
575 575 branch = extra.get(b"branch")
576 576 if branch in (b"default", b""):
577 577 del extra[b"branch"]
578 578 elif branch in (b".", b"null", b"tip"):
579 579 raise error.StorageError(
580 580 _(b'the name \'%s\' is reserved') % branch
581 581 )
582 582 sortedfiles = sorted(files.touched)
583 583 flags = 0
584 584 sidedata = None
585 585 if self._copiesstorage == b'changeset-sidedata':
586 586 if (
587 587 files.removed
588 588 or files.merged
589 589 or files.salvaged
590 590 or files.copied_from_p1
591 591 or files.copied_from_p2
592 592 ):
593 593 flags |= flagutil.REVIDX_HASCOPIESINFO
594 594 sidedata = metadata.encode_files_sidedata(files)
595 595
596 596 if extra:
597 597 extra = encodeextra(extra)
598 598 parseddate = b"%s %s" % (parseddate, extra)
599 599 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
600 600 text = b"\n".join(l)
601 601 return self.addrevision(
602 text, transaction, len(self), p1, p2, sidedata=sidedata
602 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
603 603 )
604 604
605 605 def branchinfo(self, rev):
606 606 """return the branch name and open/close state of a revision
607 607
608 608 This function exists because creating a changectx object
609 609 just to access this is costly."""
610 610 extra = self.read(rev)[5]
611 611 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
612 612
613 613 def _nodeduplicatecallback(self, transaction, node):
614 614 # keep track of revisions that got "re-added", eg: unbunde of know rev.
615 615 #
616 616 # We track them in a list to preserve their order from the source bundle
617 617 duplicates = transaction.changes.setdefault(b'revduplicates', [])
618 618 duplicates.append(self.rev(node))
@@ -1,1089 +1,1109
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23
24 24
25 25 from .utils import stringutil
26 26
27 from .revlogutils import flagutil
28
27 29
28 30 def _filter(src, dst, t):
29 31 """filters out invalid copies after chaining"""
30 32
31 33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
32 34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
33 35 # in the following table (not including trivial cases). For example, case 2
34 36 # is where a file existed in 'src' and remained under that name in 'mid' and
35 37 # then was renamed between 'mid' and 'dst'.
36 38 #
37 39 # case src mid dst result
38 40 # 1 x y - -
39 41 # 2 x y y x->y
40 42 # 3 x y x -
41 43 # 4 x y z x->z
42 44 # 5 - x y -
43 45 # 6 x x y x->y
44 46 #
45 47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
46 48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
47 49 # between 5 and 6, so it includes all cases in its result.
48 50 # Cases 1, 3, and 5 are then removed by _filter().
49 51
50 52 for k, v in list(t.items()):
51 53 # remove copies from files that didn't exist
52 54 if v not in src:
53 55 del t[k]
54 56 # remove criss-crossed copies
55 57 elif k in src and v in dst:
56 58 del t[k]
57 59 # remove copies to files that were then removed
58 60 elif k not in dst:
59 61 del t[k]
60 62
61 63
62 64 def _chain(prefix, suffix):
63 65 """chain two sets of copies 'prefix' and 'suffix'"""
64 66 result = prefix.copy()
65 67 for key, value in pycompat.iteritems(suffix):
66 68 result[key] = prefix.get(value, value)
67 69 return result
68 70
69 71
70 72 def _tracefile(fctx, am, basemf):
71 73 """return file context that is the ancestor of fctx present in ancestor
72 74 manifest am
73 75
74 76 Note: we used to try and stop after a given limit, however checking if that
75 77 limit is reached turned out to be very expensive. we are better off
76 78 disabling that feature."""
77 79
78 80 for f in fctx.ancestors():
79 81 path = f.path()
80 82 if am.get(path, None) == f.filenode():
81 83 return path
82 84 if basemf and basemf.get(path, None) == f.filenode():
83 85 return path
84 86
85 87
86 88 def _dirstatecopies(repo, match=None):
87 89 ds = repo.dirstate
88 90 c = ds.copies().copy()
89 91 for k in list(c):
90 92 if ds[k] not in b'anm' or (match and not match(k)):
91 93 del c[k]
92 94 return c
93 95
94 96
95 97 def _computeforwardmissing(a, b, match=None):
96 98 """Computes which files are in b but not a.
97 99 This is its own function so extensions can easily wrap this call to see what
98 100 files _forwardcopies is about to process.
99 101 """
100 102 ma = a.manifest()
101 103 mb = b.manifest()
102 104 return mb.filesnotin(ma, match=match)
103 105
104 106
105 107 def usechangesetcentricalgo(repo):
106 108 """Checks if we should use changeset-centric copy algorithms"""
107 109 if repo.filecopiesmode == b'changeset-sidedata':
108 110 return True
109 111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
110 112 changesetsource = (b'changeset-only', b'compatibility')
111 113 return readfrom in changesetsource
112 114
113 115
114 116 def _committedforwardcopies(a, b, base, match):
115 117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
116 118 # files might have to be traced back to the fctx parent of the last
117 119 # one-side-only changeset, but not further back than that
118 120 repo = a._repo
119 121
120 122 if usechangesetcentricalgo(repo):
121 123 return _changesetforwardcopies(a, b, match)
122 124
123 125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
124 126 dbg = repo.ui.debug
125 127 if debug:
126 128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
127 129 am = a.manifest()
128 130 basemf = None if base is None else base.manifest()
129 131
130 132 # find where new files came from
131 133 # we currently don't try to find where old files went, too expensive
132 134 # this means we can miss a case like 'hg rm b; hg cp a b'
133 135 cm = {}
134 136
135 137 # Computing the forward missing is quite expensive on large manifests, since
136 138 # it compares the entire manifests. We can optimize it in the common use
137 139 # case of computing what copies are in a commit versus its parent (like
138 140 # during a rebase or histedit). Note, we exclude merge commits from this
139 141 # optimization, since the ctx.files() for a merge commit is not correct for
140 142 # this comparison.
141 143 forwardmissingmatch = match
142 144 if b.p1() == a and b.p2().node() == node.nullid:
143 145 filesmatcher = matchmod.exact(b.files())
144 146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
145 147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
146 148
147 149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
148 150
149 151 if debug:
150 152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
151 153
152 154 for f in sorted(missing):
153 155 if debug:
154 156 dbg(b'debug.copies: tracing file: %s\n' % f)
155 157 fctx = b[f]
156 158 fctx._ancestrycontext = ancestrycontext
157 159
158 160 if debug:
159 161 start = util.timer()
160 162 opath = _tracefile(fctx, am, basemf)
161 163 if opath:
162 164 if debug:
163 165 dbg(b'debug.copies: rename of: %s\n' % opath)
164 166 cm[f] = opath
165 167 if debug:
166 168 dbg(
167 169 b'debug.copies: time: %f seconds\n'
168 170 % (util.timer() - start)
169 171 )
170 172 return cm
171 173
172 174
173 175 def _revinfo_getter(repo):
174 176 """returns a function that returns the following data given a <rev>"
175 177
176 178 * p1: revision number of first parent
177 179 * p2: revision number of first parent
178 180 * changes: a ChangingFiles object
179 181 """
180 182 cl = repo.changelog
181 183 parents = cl.parentrevs
184 flags = cl.flags
185
186 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
182 187
183 188 changelogrevision = cl.changelogrevision
184 189
185 190 # A small cache to avoid doing the work twice for merges
186 191 #
187 192 # In the vast majority of cases, if we ask information for a revision
188 193 # about 1 parent, we'll later ask it for the other. So it make sense to
189 194 # keep the information around when reaching the first parent of a merge
190 195 # and dropping it after it was provided for the second parents.
191 196 #
192 197 # It exists cases were only one parent of the merge will be walked. It
193 198 # happens when the "destination" the copy tracing is descendant from a
194 199 # new root, not common with the "source". In that case, we will only walk
195 200 # through merge parents that are descendant of changesets common
196 201 # between "source" and "destination".
197 202 #
198 203 # With the current case implementation if such changesets have a copy
199 204 # information, we'll keep them in memory until the end of
200 205 # _changesetforwardcopies. We don't expect the case to be frequent
201 206 # enough to matters.
202 207 #
203 208 # In addition, it would be possible to reach pathological case, were
204 209 # many first parent are met before any second parent is reached. In
205 210 # that case the cache could grow. If this even become an issue one can
206 211 # safely introduce a maximum cache size. This would trade extra CPU/IO
207 212 # time to save memory.
208 213 merge_caches = {}
209 214
210 215 def revinfo(rev):
211 216 p1, p2 = parents(rev)
212 217 value = None
213 218 e = merge_caches.pop(rev, None)
214 219 if e is not None:
215 220 return e
216 value = (p1, p2, changelogrevision(rev).changes)
221 changes = None
222 if flags(rev) & HASCOPIESINFO:
223 changes = changelogrevision(rev).changes
224 value = (p1, p2, changes)
217 225 if p1 != node.nullrev and p2 != node.nullrev:
218 226 # XXX some case we over cache, IGNORE
219 227 merge_caches[rev] = value
220 228 return value
221 229
222 230 return revinfo
223 231
224 232
225 233 def _changesetforwardcopies(a, b, match):
226 234 if a.rev() in (node.nullrev, b.rev()):
227 235 return {}
228 236
229 237 repo = a.repo().unfiltered()
230 238 children = {}
231 239
232 240 cl = repo.changelog
233 241 isancestor = cl.isancestorrev # XXX we should had chaching to this.
234 242 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 243 mrset = set(missingrevs)
236 244 roots = set()
237 245 for r in missingrevs:
238 246 for p in cl.parentrevs(r):
239 247 if p == node.nullrev:
240 248 continue
241 249 if p not in children:
242 250 children[p] = [r]
243 251 else:
244 252 children[p].append(r)
245 253 if p not in mrset:
246 254 roots.add(p)
247 255 if not roots:
248 256 # no common revision to track copies from
249 257 return {}
250 258 min_root = min(roots)
251 259
252 260 from_head = set(
253 261 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
254 262 )
255 263
256 264 iterrevs = set(from_head)
257 265 iterrevs &= mrset
258 266 iterrevs.update(roots)
259 267 iterrevs.remove(b.rev())
260 268 revs = sorted(iterrevs)
261 269
262 270 if repo.filecopiesmode == b'changeset-sidedata':
263 271 revinfo = _revinfo_getter(repo)
264 272 return _combine_changeset_copies(
265 273 revs, children, b.rev(), revinfo, match, isancestor
266 274 )
267 275 else:
268 276 revinfo = _revinfo_getter_extra(repo)
269 277 return _combine_changeset_copies_extra(
270 278 revs, children, b.rev(), revinfo, match, isancestor
271 279 )
272 280
273 281
274 282 def _combine_changeset_copies(
275 283 revs, children, targetrev, revinfo, match, isancestor
276 284 ):
277 285 """combine the copies information for each item of iterrevs
278 286
279 287 revs: sorted iterable of revision to visit
280 288 children: a {parent: [children]} mapping.
281 289 targetrev: the final copies destination revision (not in iterrevs)
282 290 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
283 291 match: a matcher
284 292
285 293 It returns the aggregated copies information for `targetrev`.
286 294 """
287 295 all_copies = {}
288 296 alwaysmatch = match.always()
289 297 for r in revs:
290 298 copies = all_copies.pop(r, None)
291 299 if copies is None:
292 300 # this is a root
293 301 copies = {}
294 302 for i, c in enumerate(children[r]):
295 303 p1, p2, changes = revinfo(c)
304 childcopies = {}
296 305 if r == p1:
297 306 parent = 1
307 if changes is not None:
298 308 childcopies = changes.copied_from_p1
299 309 else:
300 310 assert r == p2
301 311 parent = 2
312 if changes is not None:
302 313 childcopies = changes.copied_from_p2
303 314 if not alwaysmatch:
304 315 childcopies = {
305 316 dst: src for dst, src in childcopies.items() if match(dst)
306 317 }
307 318 newcopies = copies
308 319 if childcopies:
309 320 newcopies = copies.copy()
310 321 for dest, source in pycompat.iteritems(childcopies):
311 322 prev = copies.get(source)
312 323 if prev is not None and prev[1] is not None:
313 324 source = prev[1]
314 325 newcopies[dest] = (c, source)
315 326 assert newcopies is not copies
327 if changes is not None:
316 328 for f in changes.removed:
317 329 if f in newcopies:
318 330 if newcopies is copies:
319 331 # copy on write to avoid affecting potential other
320 332 # branches. when there are no other branches, this
321 333 # could be avoided.
322 334 newcopies = copies.copy()
323 335 newcopies[f] = (c, None)
324 336 othercopies = all_copies.get(c)
325 337 if othercopies is None:
326 338 all_copies[c] = newcopies
327 339 else:
328 340 # we are the second parent to work on c, we need to merge our
329 341 # work with the other.
330 342 #
331 343 # In case of conflict, parent 1 take precedence over parent 2.
332 344 # This is an arbitrary choice made anew when implementing
333 345 # changeset based copies. It was made without regards with
334 346 # potential filelog related behavior.
335 347 if parent == 1:
336 348 _merge_copies_dict(
337 349 othercopies, newcopies, isancestor, changes
338 350 )
339 351 else:
340 352 _merge_copies_dict(
341 353 newcopies, othercopies, isancestor, changes
342 354 )
343 355 all_copies[c] = newcopies
344 356
345 357 final_copies = {}
346 358 for dest, (tt, source) in all_copies[targetrev].items():
347 359 if source is not None:
348 360 final_copies[dest] = source
349 361 return final_copies
350 362
351 363
352 364 def _merge_copies_dict(minor, major, isancestor, changes):
353 365 """merge two copies-mapping together, minor and major
354 366
355 367 In case of conflict, value from "major" will be picked.
356 368
357 369 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
358 370 ancestors of `high_rev`,
359 371
360 372 - `ismerged(path)`: callable return True if `path` have been merged in the
361 373 current revision,
362 374 """
363 375 for dest, value in major.items():
364 376 other = minor.get(dest)
365 377 if other is None:
366 378 minor[dest] = value
367 379 else:
368 380 new_tt = value[0]
369 381 other_tt = other[0]
370 382 if value[1] == other[1]:
371 383 continue
372 384 # content from "major" wins, unless it is older
373 385 # than the branch point or there is a merge
374 386 if new_tt == other_tt:
375 387 minor[dest] = value
376 elif value[1] is None and dest in changes.salvaged:
388 elif (
389 changes is not None
390 and value[1] is None
391 and dest in changes.salvaged
392 ):
377 393 pass
378 elif other[1] is None and dest in changes.salvaged:
394 elif (
395 changes is not None
396 and other[1] is None
397 and dest in changes.salvaged
398 ):
379 399 minor[dest] = value
380 400 elif not isancestor(new_tt, other_tt):
381 401 minor[dest] = value
382 elif dest in changes.merged:
402 elif changes is not None and dest in changes.merged:
383 403 minor[dest] = value
384 404
385 405
386 406 def _revinfo_getter_extra(repo):
387 407 """return a function that return multiple data given a <rev>"i
388 408
389 409 * p1: revision number of first parent
390 410 * p2: revision number of first parent
391 411 * p1copies: mapping of copies from p1
392 412 * p2copies: mapping of copies from p2
393 413 * removed: a list of removed files
394 414 * ismerged: a callback to know if file was merged in that revision
395 415 """
396 416 cl = repo.changelog
397 417 parents = cl.parentrevs
398 418
399 419 def get_ismerged(rev):
400 420 ctx = repo[rev]
401 421
402 422 def ismerged(path):
403 423 if path not in ctx.files():
404 424 return False
405 425 fctx = ctx[path]
406 426 parents = fctx._filelog.parents(fctx._filenode)
407 427 nb_parents = 0
408 428 for n in parents:
409 429 if n != node.nullid:
410 430 nb_parents += 1
411 431 return nb_parents >= 2
412 432
413 433 return ismerged
414 434
415 435 def revinfo(rev):
416 436 p1, p2 = parents(rev)
417 437 ctx = repo[rev]
418 438 p1copies, p2copies = ctx._copies
419 439 removed = ctx.filesremoved()
420 440 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
421 441
422 442 return revinfo
423 443
424 444
425 445 def _combine_changeset_copies_extra(
426 446 revs, children, targetrev, revinfo, match, isancestor
427 447 ):
428 448 """version of `_combine_changeset_copies` that works with the Google
429 449 specific "extra" based storage for copy information"""
430 450 all_copies = {}
431 451 alwaysmatch = match.always()
432 452 for r in revs:
433 453 copies = all_copies.pop(r, None)
434 454 if copies is None:
435 455 # this is a root
436 456 copies = {}
437 457 for i, c in enumerate(children[r]):
438 458 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
439 459 if r == p1:
440 460 parent = 1
441 461 childcopies = p1copies
442 462 else:
443 463 assert r == p2
444 464 parent = 2
445 465 childcopies = p2copies
446 466 if not alwaysmatch:
447 467 childcopies = {
448 468 dst: src for dst, src in childcopies.items() if match(dst)
449 469 }
450 470 newcopies = copies
451 471 if childcopies:
452 472 newcopies = copies.copy()
453 473 for dest, source in pycompat.iteritems(childcopies):
454 474 prev = copies.get(source)
455 475 if prev is not None and prev[1] is not None:
456 476 source = prev[1]
457 477 newcopies[dest] = (c, source)
458 478 assert newcopies is not copies
459 479 for f in removed:
460 480 if f in newcopies:
461 481 if newcopies is copies:
462 482 # copy on write to avoid affecting potential other
463 483 # branches. when there are no other branches, this
464 484 # could be avoided.
465 485 newcopies = copies.copy()
466 486 newcopies[f] = (c, None)
467 487 othercopies = all_copies.get(c)
468 488 if othercopies is None:
469 489 all_copies[c] = newcopies
470 490 else:
471 491 # we are the second parent to work on c, we need to merge our
472 492 # work with the other.
473 493 #
474 494 # In case of conflict, parent 1 take precedence over parent 2.
475 495 # This is an arbitrary choice made anew when implementing
476 496 # changeset based copies. It was made without regards with
477 497 # potential filelog related behavior.
478 498 if parent == 1:
479 499 _merge_copies_dict_extra(
480 500 othercopies, newcopies, isancestor, ismerged
481 501 )
482 502 else:
483 503 _merge_copies_dict_extra(
484 504 newcopies, othercopies, isancestor, ismerged
485 505 )
486 506 all_copies[c] = newcopies
487 507
488 508 final_copies = {}
489 509 for dest, (tt, source) in all_copies[targetrev].items():
490 510 if source is not None:
491 511 final_copies[dest] = source
492 512 return final_copies
493 513
494 514
495 515 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
496 516 """version of `_merge_copies_dict` that works with the Google
497 517 specific "extra" based storage for copy information"""
498 518 for dest, value in major.items():
499 519 other = minor.get(dest)
500 520 if other is None:
501 521 minor[dest] = value
502 522 else:
503 523 new_tt = value[0]
504 524 other_tt = other[0]
505 525 if value[1] == other[1]:
506 526 continue
507 527 # content from "major" wins, unless it is older
508 528 # than the branch point or there is a merge
509 529 if (
510 530 new_tt == other_tt
511 531 or not isancestor(new_tt, other_tt)
512 532 or ismerged(dest)
513 533 ):
514 534 minor[dest] = value
515 535
516 536
517 537 def _forwardcopies(a, b, base=None, match=None):
518 538 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
519 539
520 540 if base is None:
521 541 base = a
522 542 match = a.repo().narrowmatch(match)
523 543 # check for working copy
524 544 if b.rev() is None:
525 545 cm = _committedforwardcopies(a, b.p1(), base, match)
526 546 # combine copies from dirstate if necessary
527 547 copies = _chain(cm, _dirstatecopies(b._repo, match))
528 548 else:
529 549 copies = _committedforwardcopies(a, b, base, match)
530 550 return copies
531 551
532 552
533 553 def _backwardrenames(a, b, match):
534 554 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
535 555 return {}
536 556
537 557 # Even though we're not taking copies into account, 1:n rename situations
538 558 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
539 559 # arbitrarily pick one of the renames.
540 560 # We don't want to pass in "match" here, since that would filter
541 561 # the destination by it. Since we're reversing the copies, we want
542 562 # to filter the source instead.
543 563 f = _forwardcopies(b, a)
544 564 r = {}
545 565 for k, v in sorted(pycompat.iteritems(f)):
546 566 if match and not match(v):
547 567 continue
548 568 # remove copies
549 569 if v in a:
550 570 continue
551 571 r[v] = k
552 572 return r
553 573
554 574
555 575 def pathcopies(x, y, match=None):
556 576 """find {dst@y: src@x} copy mapping for directed compare"""
557 577 repo = x._repo
558 578 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
559 579 if debug:
560 580 repo.ui.debug(
561 581 b'debug.copies: searching copies from %s to %s\n' % (x, y)
562 582 )
563 583 if x == y or not x or not y:
564 584 return {}
565 585 if y.rev() is None and x == y.p1():
566 586 if debug:
567 587 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
568 588 # short-circuit to avoid issues with merge states
569 589 return _dirstatecopies(repo, match)
570 590 a = y.ancestor(x)
571 591 if a == x:
572 592 if debug:
573 593 repo.ui.debug(b'debug.copies: search mode: forward\n')
574 594 copies = _forwardcopies(x, y, match=match)
575 595 elif a == y:
576 596 if debug:
577 597 repo.ui.debug(b'debug.copies: search mode: backward\n')
578 598 copies = _backwardrenames(x, y, match=match)
579 599 else:
580 600 if debug:
581 601 repo.ui.debug(b'debug.copies: search mode: combined\n')
582 602 base = None
583 603 if a.rev() != node.nullrev:
584 604 base = x
585 605 copies = _chain(
586 606 _backwardrenames(x, a, match=match),
587 607 _forwardcopies(a, y, base, match=match),
588 608 )
589 609 _filter(x, y, copies)
590 610 return copies
591 611
592 612
593 613 def mergecopies(repo, c1, c2, base):
594 614 """
595 615 Finds moves and copies between context c1 and c2 that are relevant for
596 616 merging. 'base' will be used as the merge base.
597 617
598 618 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
599 619 files that were moved/ copied in one merge parent and modified in another.
600 620 For example:
601 621
602 622 o ---> 4 another commit
603 623 |
604 624 | o ---> 3 commit that modifies a.txt
605 625 | /
606 626 o / ---> 2 commit that moves a.txt to b.txt
607 627 |/
608 628 o ---> 1 merge base
609 629
610 630 If we try to rebase revision 3 on revision 4, since there is no a.txt in
611 631 revision 4, and if user have copytrace disabled, we prints the following
612 632 message:
613 633
614 634 ```other changed <file> which local deleted```
615 635
616 636 Returns a tuple where:
617 637
618 638 "branch_copies" an instance of branch_copies.
619 639
620 640 "diverge" is a mapping of source name -> list of destination names
621 641 for divergent renames.
622 642
623 643 This function calls different copytracing algorithms based on config.
624 644 """
625 645 # avoid silly behavior for update from empty dir
626 646 if not c1 or not c2 or c1 == c2:
627 647 return branch_copies(), branch_copies(), {}
628 648
629 649 narrowmatch = c1.repo().narrowmatch()
630 650
631 651 # avoid silly behavior for parent -> working dir
632 652 if c2.node() is None and c1.node() == repo.dirstate.p1():
633 653 return (
634 654 branch_copies(_dirstatecopies(repo, narrowmatch)),
635 655 branch_copies(),
636 656 {},
637 657 )
638 658
639 659 copytracing = repo.ui.config(b'experimental', b'copytrace')
640 660 if stringutil.parsebool(copytracing) is False:
641 661 # stringutil.parsebool() returns None when it is unable to parse the
642 662 # value, so we should rely on making sure copytracing is on such cases
643 663 return branch_copies(), branch_copies(), {}
644 664
645 665 if usechangesetcentricalgo(repo):
646 666 # The heuristics don't make sense when we need changeset-centric algos
647 667 return _fullcopytracing(repo, c1, c2, base)
648 668
649 669 # Copy trace disabling is explicitly below the node == p1 logic above
650 670 # because the logic above is required for a simple copy to be kept across a
651 671 # rebase.
652 672 if copytracing == b'heuristics':
653 673 # Do full copytracing if only non-public revisions are involved as
654 674 # that will be fast enough and will also cover the copies which could
655 675 # be missed by heuristics
656 676 if _isfullcopytraceable(repo, c1, base):
657 677 return _fullcopytracing(repo, c1, c2, base)
658 678 return _heuristicscopytracing(repo, c1, c2, base)
659 679 else:
660 680 return _fullcopytracing(repo, c1, c2, base)
661 681
662 682
663 683 def _isfullcopytraceable(repo, c1, base):
664 684 """ Checks that if base, source and destination are all no-public branches,
665 685 if yes let's use the full copytrace algorithm for increased capabilities
666 686 since it will be fast enough.
667 687
668 688 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
669 689 number of changesets from c1 to base such that if number of changesets are
670 690 more than the limit, full copytracing algorithm won't be used.
671 691 """
672 692 if c1.rev() is None:
673 693 c1 = c1.p1()
674 694 if c1.mutable() and base.mutable():
675 695 sourcecommitlimit = repo.ui.configint(
676 696 b'experimental', b'copytrace.sourcecommitlimit'
677 697 )
678 698 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
679 699 return commits < sourcecommitlimit
680 700 return False
681 701
682 702
683 703 def _checksinglesidecopies(
684 704 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
685 705 ):
686 706 if src not in m2:
687 707 # deleted on side 2
688 708 if src not in m1:
689 709 # renamed on side 1, deleted on side 2
690 710 renamedelete[src] = dsts1
691 711 elif src not in mb:
692 712 # Work around the "short-circuit to avoid issues with merge states"
693 713 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
694 714 # destination doesn't exist in y.
695 715 pass
696 716 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
697 717 return
698 718 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
699 719 # modified on side 2
700 720 for dst in dsts1:
701 721 copy[dst] = src
702 722
703 723
704 724 class branch_copies(object):
705 725 """Information about copies made on one side of a merge/graft.
706 726
707 727 "copy" is a mapping from destination name -> source name,
708 728 where source is in c1 and destination is in c2 or vice-versa.
709 729
710 730 "movewithdir" is a mapping from source name -> destination name,
711 731 where the file at source present in one context but not the other
712 732 needs to be moved to destination by the merge process, because the
713 733 other context moved the directory it is in.
714 734
715 735 "renamedelete" is a mapping of source name -> list of destination
716 736 names for files deleted in c1 that were renamed in c2 or vice-versa.
717 737
718 738 "dirmove" is a mapping of detected source dir -> destination dir renames.
719 739 This is needed for handling changes to new files previously grafted into
720 740 renamed directories.
721 741 """
722 742
723 743 def __init__(
724 744 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
725 745 ):
726 746 self.copy = {} if copy is None else copy
727 747 self.renamedelete = {} if renamedelete is None else renamedelete
728 748 self.dirmove = {} if dirmove is None else dirmove
729 749 self.movewithdir = {} if movewithdir is None else movewithdir
730 750
731 751 def __repr__(self):
732 752 return (
733 753 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
734 754 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
735 755 )
736 756
737 757
738 758 def _fullcopytracing(repo, c1, c2, base):
739 759 """ The full copytracing algorithm which finds all the new files that were
740 760 added from merge base up to the top commit and for each file it checks if
741 761 this file was copied from another file.
742 762
743 763 This is pretty slow when a lot of changesets are involved but will track all
744 764 the copies.
745 765 """
746 766 m1 = c1.manifest()
747 767 m2 = c2.manifest()
748 768 mb = base.manifest()
749 769
750 770 copies1 = pathcopies(base, c1)
751 771 copies2 = pathcopies(base, c2)
752 772
753 773 if not (copies1 or copies2):
754 774 return branch_copies(), branch_copies(), {}
755 775
756 776 inversecopies1 = {}
757 777 inversecopies2 = {}
758 778 for dst, src in copies1.items():
759 779 inversecopies1.setdefault(src, []).append(dst)
760 780 for dst, src in copies2.items():
761 781 inversecopies2.setdefault(src, []).append(dst)
762 782
763 783 copy1 = {}
764 784 copy2 = {}
765 785 diverge = {}
766 786 renamedelete1 = {}
767 787 renamedelete2 = {}
768 788 allsources = set(inversecopies1) | set(inversecopies2)
769 789 for src in allsources:
770 790 dsts1 = inversecopies1.get(src)
771 791 dsts2 = inversecopies2.get(src)
772 792 if dsts1 and dsts2:
773 793 # copied/renamed on both sides
774 794 if src not in m1 and src not in m2:
775 795 # renamed on both sides
776 796 dsts1 = set(dsts1)
777 797 dsts2 = set(dsts2)
778 798 # If there's some overlap in the rename destinations, we
779 799 # consider it not divergent. For example, if side 1 copies 'a'
780 800 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
781 801 # and 'd' and deletes 'a'.
782 802 if dsts1 & dsts2:
783 803 for dst in dsts1 & dsts2:
784 804 copy1[dst] = src
785 805 copy2[dst] = src
786 806 else:
787 807 diverge[src] = sorted(dsts1 | dsts2)
788 808 elif src in m1 and src in m2:
789 809 # copied on both sides
790 810 dsts1 = set(dsts1)
791 811 dsts2 = set(dsts2)
792 812 for dst in dsts1 & dsts2:
793 813 copy1[dst] = src
794 814 copy2[dst] = src
795 815 # TODO: Handle cases where it was renamed on one side and copied
796 816 # on the other side
797 817 elif dsts1:
798 818 # copied/renamed only on side 1
799 819 _checksinglesidecopies(
800 820 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
801 821 )
802 822 elif dsts2:
803 823 # copied/renamed only on side 2
804 824 _checksinglesidecopies(
805 825 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
806 826 )
807 827
808 828 # find interesting file sets from manifests
809 829 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
810 830 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
811 831 u1 = sorted(addedinm1 - addedinm2)
812 832 u2 = sorted(addedinm2 - addedinm1)
813 833
814 834 header = b" unmatched files in %s"
815 835 if u1:
816 836 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
817 837 if u2:
818 838 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
819 839
820 840 if repo.ui.debugflag:
821 841 renamedeleteset = set()
822 842 divergeset = set()
823 843 for dsts in diverge.values():
824 844 divergeset.update(dsts)
825 845 for dsts in renamedelete1.values():
826 846 renamedeleteset.update(dsts)
827 847 for dsts in renamedelete2.values():
828 848 renamedeleteset.update(dsts)
829 849
830 850 repo.ui.debug(
831 851 b" all copies found (* = to merge, ! = divergent, "
832 852 b"% = renamed and deleted):\n"
833 853 )
834 854 for side, copies in ((b"local", copies1), (b"remote", copies2)):
835 855 if not copies:
836 856 continue
837 857 repo.ui.debug(b" on %s side:\n" % side)
838 858 for f in sorted(copies):
839 859 note = b""
840 860 if f in copy1 or f in copy2:
841 861 note += b"*"
842 862 if f in divergeset:
843 863 note += b"!"
844 864 if f in renamedeleteset:
845 865 note += b"%"
846 866 repo.ui.debug(
847 867 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
848 868 )
849 869 del renamedeleteset
850 870 del divergeset
851 871
852 872 repo.ui.debug(b" checking for directory renames\n")
853 873
854 874 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
855 875 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
856 876
857 877 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
858 878 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
859 879
860 880 return branch_copies1, branch_copies2, diverge
861 881
862 882
863 883 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
864 884 """Finds moved directories and files that should move with them.
865 885
866 886 ctx: the context for one of the sides
867 887 copy: files copied on the same side (as ctx)
868 888 fullcopy: files copied on the same side (as ctx), including those that
869 889 merge.manifestmerge() won't care about
870 890 addedfiles: added files on the other side (compared to ctx)
871 891 """
872 892 # generate a directory move map
873 893 d = ctx.dirs()
874 894 invalid = set()
875 895 dirmove = {}
876 896
877 897 # examine each file copy for a potential directory move, which is
878 898 # when all the files in a directory are moved to a new directory
879 899 for dst, src in pycompat.iteritems(fullcopy):
880 900 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
881 901 if dsrc in invalid:
882 902 # already seen to be uninteresting
883 903 continue
884 904 elif dsrc in d and ddst in d:
885 905 # directory wasn't entirely moved locally
886 906 invalid.add(dsrc)
887 907 elif dsrc in dirmove and dirmove[dsrc] != ddst:
888 908 # files from the same directory moved to two different places
889 909 invalid.add(dsrc)
890 910 else:
891 911 # looks good so far
892 912 dirmove[dsrc] = ddst
893 913
894 914 for i in invalid:
895 915 if i in dirmove:
896 916 del dirmove[i]
897 917 del d, invalid
898 918
899 919 if not dirmove:
900 920 return {}, {}
901 921
902 922 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
903 923
904 924 for d in dirmove:
905 925 repo.ui.debug(
906 926 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
907 927 )
908 928
909 929 movewithdir = {}
910 930 # check unaccounted nonoverlapping files against directory moves
911 931 for f in addedfiles:
912 932 if f not in fullcopy:
913 933 for d in dirmove:
914 934 if f.startswith(d):
915 935 # new file added in a directory that was moved, move it
916 936 df = dirmove[d] + f[len(d) :]
917 937 if df not in copy:
918 938 movewithdir[f] = df
919 939 repo.ui.debug(
920 940 b" pending file src: '%s' -> dst: '%s'\n"
921 941 % (f, df)
922 942 )
923 943 break
924 944
925 945 return dirmove, movewithdir
926 946
927 947
928 948 def _heuristicscopytracing(repo, c1, c2, base):
929 949 """ Fast copytracing using filename heuristics
930 950
931 951 Assumes that moves or renames are of following two types:
932 952
933 953 1) Inside a directory only (same directory name but different filenames)
934 954 2) Move from one directory to another
935 955 (same filenames but different directory names)
936 956
937 957 Works only when there are no merge commits in the "source branch".
938 958 Source branch is commits from base up to c2 not including base.
939 959
940 960 If merge is involved it fallbacks to _fullcopytracing().
941 961
942 962 Can be used by setting the following config:
943 963
944 964 [experimental]
945 965 copytrace = heuristics
946 966
947 967 In some cases the copy/move candidates found by heuristics can be very large
948 968 in number and that will make the algorithm slow. The number of possible
949 969 candidates to check can be limited by using the config
950 970 `experimental.copytrace.movecandidateslimit` which defaults to 100.
951 971 """
952 972
953 973 if c1.rev() is None:
954 974 c1 = c1.p1()
955 975 if c2.rev() is None:
956 976 c2 = c2.p1()
957 977
958 978 changedfiles = set()
959 979 m1 = c1.manifest()
960 980 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
961 981 # If base is not in c2 branch, we switch to fullcopytracing
962 982 repo.ui.debug(
963 983 b"switching to full copytracing as base is not "
964 984 b"an ancestor of c2\n"
965 985 )
966 986 return _fullcopytracing(repo, c1, c2, base)
967 987
968 988 ctx = c2
969 989 while ctx != base:
970 990 if len(ctx.parents()) == 2:
971 991 # To keep things simple let's not handle merges
972 992 repo.ui.debug(b"switching to full copytracing because of merges\n")
973 993 return _fullcopytracing(repo, c1, c2, base)
974 994 changedfiles.update(ctx.files())
975 995 ctx = ctx.p1()
976 996
977 997 copies2 = {}
978 998 cp = _forwardcopies(base, c2)
979 999 for dst, src in pycompat.iteritems(cp):
980 1000 if src in m1:
981 1001 copies2[dst] = src
982 1002
983 1003 # file is missing if it isn't present in the destination, but is present in
984 1004 # the base and present in the source.
985 1005 # Presence in the base is important to exclude added files, presence in the
986 1006 # source is important to exclude removed files.
987 1007 filt = lambda f: f not in m1 and f in base and f in c2
988 1008 missingfiles = [f for f in changedfiles if filt(f)]
989 1009
990 1010 copies1 = {}
991 1011 if missingfiles:
992 1012 basenametofilename = collections.defaultdict(list)
993 1013 dirnametofilename = collections.defaultdict(list)
994 1014
995 1015 for f in m1.filesnotin(base.manifest()):
996 1016 basename = os.path.basename(f)
997 1017 dirname = os.path.dirname(f)
998 1018 basenametofilename[basename].append(f)
999 1019 dirnametofilename[dirname].append(f)
1000 1020
1001 1021 for f in missingfiles:
1002 1022 basename = os.path.basename(f)
1003 1023 dirname = os.path.dirname(f)
1004 1024 samebasename = basenametofilename[basename]
1005 1025 samedirname = dirnametofilename[dirname]
1006 1026 movecandidates = samebasename + samedirname
1007 1027 # f is guaranteed to be present in c2, that's why
1008 1028 # c2.filectx(f) won't fail
1009 1029 f2 = c2.filectx(f)
1010 1030 # we can have a lot of candidates which can slow down the heuristics
1011 1031 # config value to limit the number of candidates moves to check
1012 1032 maxcandidates = repo.ui.configint(
1013 1033 b'experimental', b'copytrace.movecandidateslimit'
1014 1034 )
1015 1035
1016 1036 if len(movecandidates) > maxcandidates:
1017 1037 repo.ui.status(
1018 1038 _(
1019 1039 b"skipping copytracing for '%s', more "
1020 1040 b"candidates than the limit: %d\n"
1021 1041 )
1022 1042 % (f, len(movecandidates))
1023 1043 )
1024 1044 continue
1025 1045
1026 1046 for candidate in movecandidates:
1027 1047 f1 = c1.filectx(candidate)
1028 1048 if _related(f1, f2):
1029 1049 # if there are a few related copies then we'll merge
1030 1050 # changes into all of them. This matches the behaviour
1031 1051 # of upstream copytracing
1032 1052 copies1[candidate] = f
1033 1053
1034 1054 return branch_copies(copies1), branch_copies(copies2), {}
1035 1055
1036 1056
1037 1057 def _related(f1, f2):
1038 1058 """return True if f1 and f2 filectx have a common ancestor
1039 1059
1040 1060 Walk back to common ancestor to see if the two files originate
1041 1061 from the same file. Since workingfilectx's rev() is None it messes
1042 1062 up the integer comparison logic, hence the pre-step check for
1043 1063 None (f1 and f2 can only be workingfilectx's initially).
1044 1064 """
1045 1065
1046 1066 if f1 == f2:
1047 1067 return True # a match
1048 1068
1049 1069 g1, g2 = f1.ancestors(), f2.ancestors()
1050 1070 try:
1051 1071 f1r, f2r = f1.linkrev(), f2.linkrev()
1052 1072
1053 1073 if f1r is None:
1054 1074 f1 = next(g1)
1055 1075 if f2r is None:
1056 1076 f2 = next(g2)
1057 1077
1058 1078 while True:
1059 1079 f1r, f2r = f1.linkrev(), f2.linkrev()
1060 1080 if f1r > f2r:
1061 1081 f1 = next(g1)
1062 1082 elif f2r > f1r:
1063 1083 f2 = next(g2)
1064 1084 else: # f1 and f2 point to files in the same linkrev
1065 1085 return f1 == f2 # true if they point to the same file
1066 1086 except StopIteration:
1067 1087 return False
1068 1088
1069 1089
1070 1090 def graftcopies(wctx, ctx, base):
1071 1091 """reproduce copies between base and ctx in the wctx
1072 1092
1073 1093 Unlike mergecopies(), this function will only consider copies between base
1074 1094 and ctx; it will ignore copies between base and wctx. Also unlike
1075 1095 mergecopies(), this function will apply copies to the working copy (instead
1076 1096 of just returning information about the copies). That makes it cheaper
1077 1097 (especially in the common case of base==ctx.p1()) and useful also when
1078 1098 experimental.copytrace=off.
1079 1099
1080 1100 merge.update() will have already marked most copies, but it will only
1081 1101 mark copies if it thinks the source files are related (see
1082 1102 merge._related()). It will also not mark copies if the file wasn't modified
1083 1103 on the local side. This function adds the copies that were "missed"
1084 1104 by merge.update().
1085 1105 """
1086 1106 new_copies = pathcopies(base, ctx)
1087 1107 _filter(wctx.p1(), wctx, new_copies)
1088 1108 for dst, src in pycompat.iteritems(new_copies):
1089 1109 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now