##// END OF EJS Templates
localrepo: use changelog.hasnode instead of self.__contains__...
localrepo: use changelog.hasnode instead of self.__contains__ Before this patch, releasing the store lock implies the actions below, when the transaction is aborted: 1. "commithook()" scheduled in "localrepository.commit()" is invoked 2. "changectx.__init__()" is invoked via "self.__contains__()" 3. specified ID is examined against "repo.dirstate.p1()" 4. validation function is invoked in "dirstate.p1()" In subsequent patches, "dirstate.invalidate()" invocations for discarding changes are replaced with "dirstateguard", but discarding changes by "dirstateguard" is executed after releasing the store lock: resources are acquired in "wlock => dirstateguard => store lock" order, and are released in reverse order. This may cause that "dirstate.p1()" still refers to the changeset to be rolled-back at (4) above: pushing multiple patches by "hg qpush" is a typical case. When releasing the store lock, such changesets are: - not contained in "repo.changelog", if it is reloaded from ".hg/00changelog.i", as that file was already truncated by "transaction.abort()" - still contained in it, otherwise (this "dirty read" problem is discussed in "Transaction Plan" http://mercurial.selenic.com/wiki/TransactionPlan) Validation function shows "unknown working parent" warning in the former case, but reloading "repo.changelog" depends on the timestamp of ".hg/00changelog.i". This causes occasional test failures. In the case of scheduled "commithook()", it just wants to examine whether "node ID" of committed changeset is still valid or not. Other examinations implied in "changectx.__init__()" are meaningless. To avoid showing the "unknown working parent" warning irregularly, this patch uses "changelog.hasnode()" instead of "node in self" to examine existence of committed changeset.

File last commit:

r16683:525fdb73 default
r24992:7df090c9 default
Show More
similar.py
104 lines | 3.6 KiB | text/x-python | PythonLexer
# similar.py - mechanisms for finding similar files
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from i18n import _
import util
import mdiff
import bdiff
def _findexactmatches(repo, added, removed):
'''find renamed files that have no changes
Takes a list of new filectxs and a list of removed filectxs, and yields
(before, after) tuples of exact matches.
'''
numfiles = len(added) + len(removed)
# Get hashes of removed files.
hashes = {}
for i, fctx in enumerate(removed):
repo.ui.progress(_('searching for exact renames'), i, total=numfiles)
h = util.sha1(fctx.data()).digest()
hashes[h] = fctx
# For each added file, see if it corresponds to a removed file.
for i, fctx in enumerate(added):
repo.ui.progress(_('searching for exact renames'), i + len(removed),
total=numfiles)
h = util.sha1(fctx.data()).digest()
if h in hashes:
yield (hashes[h], fctx)
# Done
repo.ui.progress(_('searching for exact renames'), None)
def _findsimilarmatches(repo, added, removed, threshold):
'''find potentially renamed files based on similar file content
Takes a list of new filectxs and a list of removed filectxs, and yields
(before, after, score) tuples of partial matches.
'''
copies = {}
for i, r in enumerate(removed):
repo.ui.progress(_('searching for similar files'), i,
total=len(removed))
# lazily load text
@util.cachefunc
def data():
orig = r.data()
return orig, mdiff.splitnewlines(orig)
def score(text):
orig, lines = data()
# bdiff.blocks() returns blocks of matching lines
# count the number of bytes in each
equal = 0
matches = bdiff.blocks(text, orig)
for x1, x2, y1, y2 in matches:
for line in lines[y1:y2]:
equal += len(line)
lengths = len(text) + len(orig)
return equal * 2.0 / lengths
for a in added:
bestscore = copies.get(a, (None, threshold))[1]
myscore = score(a.data())
if myscore >= bestscore:
copies[a] = (r, myscore)
repo.ui.progress(_('searching'), None)
for dest, v in copies.iteritems():
source, score = v
yield source, dest, score
def findrenames(repo, added, removed, threshold):
'''find renamed files -- yields (before, after, score) tuples'''
parentctx = repo['.']
workingctx = repo[None]
# Zero length files will be frequently unrelated to each other, and
# tracking the deletion/addition of such a file will probably cause more
# harm than good. We strip them out here to avoid matching them later on.
addedfiles = set([workingctx[fp] for fp in added
if workingctx[fp].size() > 0])
removedfiles = set([parentctx[fp] for fp in removed
if fp in parentctx and parentctx[fp].size() > 0])
# Find exact matches.
for (a, b) in _findexactmatches(repo,
sorted(addedfiles), sorted(removedfiles)):
addedfiles.remove(b)
yield (a.path(), b.path(), 1.0)
# If the user requested similar files to be matched, search for them also.
if threshold < 1.0:
for (a, b, score) in _findsimilarmatches(repo,
sorted(addedfiles), sorted(removedfiles), threshold):
yield (a.path(), b.path(), score)