upstream/mercurial-mirror Commit - r11059:ef4aa90b

Move 'findrenames' code into its own file....

David Greenaway -

r11059:ef4aa90b default

parent child

mercurial/similar.py

0 created 644 +59 0

@@ -0,0 +1,59 b''
	1	# similar.py - mechanisms for finding similar files
	2	#
	3	# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
	4	#
	5	# This software may be used and distributed according to the terms of the
	6	# GNU General Public License version 2 or any later version.
	7
	8	from i18n import _
	9	import util
	10	import mdiff
	11	import bdiff
	12
	13	def findrenames(repo, added, removed, threshold):
	14	'''find renamed files -- yields (before, after, score) tuples'''
	15	copies = {}
	16	ctx = repo['.']
	17	for i, r in enumerate(removed):
	18	repo.ui.progress(_('searching'), i, total=len(removed))
	19	if r not in ctx:
	20	continue
	21	fctx = ctx.filectx(r)
	22
	23	# lazily load text
	24	@util.cachefunc
	25	def data():
	26	orig = fctx.data()
	27	return orig, mdiff.splitnewlines(orig)
	28
	29	def score(text):
	30	if not len(text):
	31	return 0.0
	32	if not fctx.cmp(text):
	33	return 1.0
	34	if threshold == 1.0:
	35	return 0.0
	36	orig, lines = data()
	37	# bdiff.blocks() returns blocks of matching lines
	38	# count the number of bytes in each
	39	equal = 0
	40	matches = bdiff.blocks(text, orig)
	41	for x1, x2, y1, y2 in matches:
	42	for line in lines[y1:y2]:
	43	equal += len(line)
	44
	45	lengths = len(text) + len(orig)
	46	return equal * 2.0 / lengths
	47
	48	for a in added:
	49	bestscore = copies.get(a, (None, threshold))[1]
	50	myscore = score(repo.wread(a))
	51	if myscore >= bestscore:
	52	copies[a] = (r, myscore)
	53	repo.ui.progress(_('searching'), None)
	54
	55	for dest, v in copies.iteritems():
	56	source, score = v
	57	yield source, dest, score
	58
	59

mercurial/cmdutil.py

0 +3 -48

@@ -10,6 +10,7 b' from i18n import _'
10	import os, sys, errno, re, glob, tempfile	10	import os, sys, errno, re, glob, tempfile
11	import mdiff, bdiff, util, templater, patch, error, encoding, templatekw	11	import mdiff, bdiff, util, templater, patch, error, encoding, templatekw
12	import match as _match	12	import match as _match
		13	import similar
13		14
14	revrangesep = ':'	15	revrangesep = ':'
15		16
@@ -286,52 +287,6 b' def matchall(repo):'
286	def matchfiles(repo, files):	287	def matchfiles(repo, files):
287	return _match.exact(repo.root, repo.getcwd(), files)	288	return _match.exact(repo.root, repo.getcwd(), files)
288		289
289	def findrenames(repo, added, removed, threshold):
290	'''find renamed files -- yields (before, after, score) tuples'''
291	copies = {}
292	ctx = repo['.']
293	for i, r in enumerate(removed):
294	repo.ui.progress(_('searching'), i, total=len(removed))
295	if r not in ctx:
296	continue
297	fctx = ctx.filectx(r)
298
299	# lazily load text
300	@util.cachefunc
301	def data():
302	orig = fctx.data()
303	return orig, mdiff.splitnewlines(orig)
304
305	def score(text):
306	if not len(text):
307	return 0.0
308	if not fctx.cmp(text):
309	return 1.0
310	if threshold == 1.0:
311	return 0.0
312	orig, lines = data()
313	# bdiff.blocks() returns blocks of matching lines
314	# count the number of bytes in each
315	equal = 0
316	matches = bdiff.blocks(text, orig)
317	for x1, x2, y1, y2 in matches:
318	for line in lines[y1:y2]:
319	equal += len(line)
320
321	lengths = len(text) + len(orig)
322	return equal * 2.0 / lengths
323
324	for a in added:
325	bestscore = copies.get(a, (None, threshold))[1]
326	myscore = score(repo.wread(a))
327	if myscore >= bestscore:
328	copies[a] = (r, myscore)
329	repo.ui.progress(_('searching'), None)
330
331	for dest, v in copies.iteritems():
332	source, score = v
333	yield source, dest, score
334
335	def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):	290	def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
336	if dry_run is None:	291	if dry_run is None:
337	dry_run = opts.get('dry_run')	292	dry_run = opts.get('dry_run')
@@ -366,8 +321,8 b' def addremove(repo, pats=[], opts={}, dr'
366	added.append(abs)	321	added.append(abs)
367	copies = {}	322	copies = {}
368	if similarity > 0:	323	if similarity > 0:
369	for old, new, score in findrenames(repo, ~~added~~ + ~~unknown~~,	324	for old, new, score in similar.findrenames(repo,
370	removed + deleted, similarity):	325	added + unknown, removed + deleted, similarity):
371	if repo.ui.verbose or not m.exact(old) or not m.exact(new):	326	if repo.ui.verbose or not m.exact(old) or not m.exact(new):
372	repo.ui.status(_('recording removal of %s as rename to %s '	327	repo.ui.status(_('recording removal of %s as rename to %s '
373	'(%d%% similar)\n') %	328	'(%d%% similar)\n') %

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages