##// END OF EJS Templates
bdiff: gradually enable the popularity hack...
bdiff: gradually enable the popularity hack Patch from Jason Orendorff The lower the threshold, the stronger the popularity hack's influence. So at 3999 lines, the hack is disabled; and at 4000 lines, the hack is enabled at maximum strength (t=4). No source file in mercurial/crew is over 4000 lines. But there are, oh, a few such files in Mozilla. I can testify that this hack causes hg to generate some correct but eyebrow-raising patches. I think the hack should phase in gradually. The threshold should be high for small files where we don't need it so much. Like this: t = (bn < 31000) ? 1000000 / bn : bn / 1000; That would leave the popularity hack disabled for small files, then gradually phase it in: bn < 1000 -- t > bn (popularity hack is completely disabled) bn == 1000 -- t = 1000 (still effectively disabled) bn == 2000 -- t = 500 (only hits unusual files) bn == 10000 -- t = 100 (only hits especially common lines) bn == 31000 -- t = 31 (hack is at maximum power) bn == 32000 -- t = 32 (hack could backfire, ease off)

File last commit:

r9467:4c041f1e default
r9534:8e202431 default
Show More
copies.py
233 lines | 7.3 KiB | text/x-python | PythonLexer
Matt Mackall
copies: move findcopies code to its own module...
r6274 # copies.py - copy detection for Mercurial
#
# Copyright 2008 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.
Matt Mackall
copies: move findcopies code to its own module...
r6274
from i18n import _
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import util
import heapq
Matt Mackall
copies: move findcopies code to its own module...
r6274
def _nonoverlap(d1, d2, d3):
"Return list of elements in d1 not in d2 or d3"
Matt Mackall
replace util.sort with sorted built-in...
r8209 return sorted([d for d in d1 if d not in d3 and d not in d2])
Matt Mackall
copies: move findcopies code to its own module...
r6274
def _dirname(f):
s = f.rfind("/")
if s == -1:
return ""
return f[:s]
def _dirs(files):
Benoit Boissinot
copies: use set instead of dict
r8468 d = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for f in files:
f = _dirname(f)
while f not in d:
Benoit Boissinot
copies: use set instead of dict
r8468 d.add(f)
Matt Mackall
copies: move findcopies code to its own module...
r6274 f = _dirname(f)
return d
def _findoldnames(fctx, limit):
"find files that path was copied from, back to linkrev limit"
old = {}
Benoit Boissinot
copies: use set instead of dict
r8468 seen = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 orig = fctx.path()
Matt Mackall
copies: sort old names by depth
r6424 visit = [(fctx, 0)]
Matt Mackall
copies: move findcopies code to its own module...
r6274 while visit:
Matt Mackall
copies: sort old names by depth
r6424 fc, depth = visit.pop()
Matt Mackall
copies: move findcopies code to its own module...
r6274 s = str(fc)
if s in seen:
continue
Benoit Boissinot
copies: use set instead of dict
r8468 seen.add(s)
Matt Mackall
copies: move findcopies code to its own module...
r6274 if fc.path() != orig and fc.path() not in old:
Matt Mackall
copies: sort old names by depth
r6424 old[fc.path()] = (depth, fc.path()) # remember depth
Alejandro Santos
compat: can't compare two values of unequal datatypes
r9030 if fc.rev() is not None and fc.rev() < limit:
Matt Mackall
copies: move findcopies code to its own module...
r6274 continue
Matt Mackall
copies: sort old names by depth
r6424 visit += [(p, depth - 1) for p in fc.parents()]
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: sort old names by depth
r6424 # return old names sorted by depth
Matt Mackall
replace util.sort with sorted built-in...
r8209 return [o[1] for o in sorted(old.values())]
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 def _findlimit(repo, a, b):
"find the earliest revision that's an ancestor of a or b but not both"
Matt Mackall
symmetricdifference: move back to copies...
r6429 # basic idea:
# - mark a and b with different sides
# - if a parent's children are all on the same side, the parent is
# on that side, otherwise it is on no side
# - walk the graph in topological order with the help of a heap;
# - add unseen parents to side map
# - clear side of any parent that has children on different sides
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 # - track number of interesting revs that might still be on a side
# - track the lowest interesting rev seen
# - quit when interesting revs is zero
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430
cl = repo.changelog
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 working = len(cl) # pseudo rev for the working directory
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if a is None:
a = working
if b is None:
b = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
side = {a: -1, b: 1}
visit = [-a, -b]
heapq.heapify(visit)
interesting = len(visit)
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
while interesting:
r = -heapq.heappop(visit)
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if r == working:
parents = [cl.rev(p) for p in repo.dirstate.parents()]
else:
parents = cl.parentrevs(r)
for p in parents:
Matt Mackall
symmetricdifference: move back to copies...
r6429 if p not in side:
# first time we see p; add it to visit
side[p] = side[r]
if side[p]:
interesting += 1
heapq.heappush(visit, -p)
elif side[p] and side[p] != side[r]:
# p was interesting but now we know better
side[p] = 0
interesting -= 1
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if side[r]:
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = r # lowest rev visited
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 interesting -= 1
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 return limit
Matt Mackall
symmetricdifference: move back to copies...
r6429
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 def copies(repo, c1, c2, ca, checkdirs=False):
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
Find moves and copies between context c1 and c2
"""
# avoid silly behavior for update from empty dir
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if not c1 or not c2 or c1 == c2:
Matt Mackall
copies: move findcopies code to its own module...
r6274 return {}, {}
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 # avoid silly behavior for parent -> working dir
Martin Geisler
use 'x is None' instead of 'x == None'...
r8527 if c2.node() is None and c1.node() == repo.dirstate.parents()[0]:
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 return repo.dirstate.copies(), {}
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = _findlimit(repo, c1.rev(), c2.rev())
Matt Mackall
copies: move findcopies code to its own module...
r6274 m1 = c1.manifest()
m2 = c2.manifest()
ma = ca.manifest()
def makectx(f, n):
if len(n) != 20: # in a working context?
if c1.rev() is None:
return c1.filectx(f)
return c2.filectx(f)
return repo.filectx(f, fileid=n)
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 ctx = util.lrucachefunc(makectx)
Matt Mackall
copies: move findcopies code to its own module...
r6274 copy = {}
fullcopy = {}
diverge = {}
def checkcopies(f, m1, m2):
'''check possible copies of f from m1 to m2'''
c1 = ctx(f, m1[f])
for of in _findoldnames(c1, limit):
fullcopy[f] = of # remember for dir rename detection
if of in m2: # original file not in other manifest?
# if the original file is unchanged on the other branch,
# no merge needed
if m2[of] != ma.get(of):
c2 = ctx(of, m2[of])
ca = c1.ancestor(c2)
# related and named changed on only one side?
Matt Mackall
copies: fix silly precedence bug
r6422 if ca and (ca.path() == f or ca.path() == c2.path()):
Matt Mackall
copies: move findcopies code to its own module...
r6274 if c1 != ca or c2 != ca: # merge needed?
copy[f] = of
elif of in ma:
diverge.setdefault(of, []).append(f)
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
Matt Mackall
copies: move findcopies code to its own module...
r6274
u1 = _nonoverlap(m1, m2, ma)
u2 = _nonoverlap(m2, m1, ma)
if u1:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" unmatched files in local:\n %s\n"
Matt Mackall
copies: move findcopies code to its own module...
r6274 % "\n ".join(u1))
if u2:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" unmatched files in other:\n %s\n"
Matt Mackall
copies: move findcopies code to its own module...
r6274 % "\n ".join(u2))
for f in u1:
checkcopies(f, m1, m2)
for f in u2:
checkcopies(f, m2, m1)
Martin Geisler
replace set-like dictionaries with real sets...
r8152 diverge2 = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for of, fl in diverge.items():
if len(fl) == 1:
del diverge[of] # not actually divergent
else:
Martin Geisler
replace set-like dictionaries with real sets...
r8152 diverge2.update(fl) # reverse map for below
Matt Mackall
copies: move findcopies code to its own module...
r6274
if fullcopy:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274 for f in fullcopy:
note = ""
if f in copy: note += "*"
if f in diverge2: note += "!"
Martin Geisler
copies: don't translate untranslatable string
r8337 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
Matt Mackall
copies: move findcopies code to its own module...
r6274 del diverge2
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 if not fullcopy or not checkdirs:
Matt Mackall
copies: move findcopies code to its own module...
r6274 return copy, diverge
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" checking for directory renames\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274
# generate a directory move map
d1, d2 = _dirs(m1), _dirs(m2)
Benoit Boissinot
copies: use set instead of dict
r8468 invalid = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 dirmove = {}
# examine each file copy for a potential directory move, which is
# when all the files in a directory are moved to a new directory
Dirkjan Ochtman
use dict.iteritems() rather than dict.items()...
r7622 for dst, src in fullcopy.iteritems():
Matt Mackall
copies: move findcopies code to its own module...
r6274 dsrc, ddst = _dirname(src), _dirname(dst)
if dsrc in invalid:
# already seen to be uninteresting
continue
elif dsrc in d1 and ddst in d1:
# directory wasn't entirely moved locally
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in d2 and ddst in d2:
# directory wasn't entirely moved remotely
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in dirmove and dirmove[dsrc] != ddst:
# files from the same directory moved to two different places
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
# looks good so far
dirmove[dsrc + "/"] = ddst + "/"
for i in invalid:
if i in dirmove:
del dirmove[i]
del d1, d2, invalid
if not dirmove:
return copy, diverge
for d in dirmove:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
Matt Mackall
copies: move findcopies code to its own module...
r6274
# check unaccounted nonoverlapping files against directory moves
for f in u1 + u2:
if f not in fullcopy:
for d in dirmove:
if f.startswith(d):
# new file added in a directory that was moved, move it
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 df = dirmove[d] + f[len(d):]
Matt Mackall
copies: don't double-detect items in the directory copy check
r6426 if df not in copy:
copy[f] = df
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
Matt Mackall
copies: move findcopies code to its own module...
r6274 break
return copy, diverge