##// END OF EJS Templates
Move 'findrenames' code into its own file....
David Greenaway -
r11059:ef4aa90b default
parent child Browse files
Show More
@@ -0,0 +1,59 b''
1 # similar.py - mechanisms for finding similar files
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from i18n import _
9 import util
10 import mdiff
11 import bdiff
12
13 def findrenames(repo, added, removed, threshold):
14 '''find renamed files -- yields (before, after, score) tuples'''
15 copies = {}
16 ctx = repo['.']
17 for i, r in enumerate(removed):
18 repo.ui.progress(_('searching'), i, total=len(removed))
19 if r not in ctx:
20 continue
21 fctx = ctx.filectx(r)
22
23 # lazily load text
24 @util.cachefunc
25 def data():
26 orig = fctx.data()
27 return orig, mdiff.splitnewlines(orig)
28
29 def score(text):
30 if not len(text):
31 return 0.0
32 if not fctx.cmp(text):
33 return 1.0
34 if threshold == 1.0:
35 return 0.0
36 orig, lines = data()
37 # bdiff.blocks() returns blocks of matching lines
38 # count the number of bytes in each
39 equal = 0
40 matches = bdiff.blocks(text, orig)
41 for x1, x2, y1, y2 in matches:
42 for line in lines[y1:y2]:
43 equal += len(line)
44
45 lengths = len(text) + len(orig)
46 return equal * 2.0 / lengths
47
48 for a in added:
49 bestscore = copies.get(a, (None, threshold))[1]
50 myscore = score(repo.wread(a))
51 if myscore >= bestscore:
52 copies[a] = (r, myscore)
53 repo.ui.progress(_('searching'), None)
54
55 for dest, v in copies.iteritems():
56 source, score = v
57 yield source, dest, score
58
59
@@ -10,6 +10,7 b' from i18n import _'
10 import os, sys, errno, re, glob, tempfile
10 import os, sys, errno, re, glob, tempfile
11 import mdiff, bdiff, util, templater, patch, error, encoding, templatekw
11 import mdiff, bdiff, util, templater, patch, error, encoding, templatekw
12 import match as _match
12 import match as _match
13 import similar
13
14
14 revrangesep = ':'
15 revrangesep = ':'
15
16
@@ -286,52 +287,6 b' def matchall(repo):'
286 def matchfiles(repo, files):
287 def matchfiles(repo, files):
287 return _match.exact(repo.root, repo.getcwd(), files)
288 return _match.exact(repo.root, repo.getcwd(), files)
288
289
289 def findrenames(repo, added, removed, threshold):
290 '''find renamed files -- yields (before, after, score) tuples'''
291 copies = {}
292 ctx = repo['.']
293 for i, r in enumerate(removed):
294 repo.ui.progress(_('searching'), i, total=len(removed))
295 if r not in ctx:
296 continue
297 fctx = ctx.filectx(r)
298
299 # lazily load text
300 @util.cachefunc
301 def data():
302 orig = fctx.data()
303 return orig, mdiff.splitnewlines(orig)
304
305 def score(text):
306 if not len(text):
307 return 0.0
308 if not fctx.cmp(text):
309 return 1.0
310 if threshold == 1.0:
311 return 0.0
312 orig, lines = data()
313 # bdiff.blocks() returns blocks of matching lines
314 # count the number of bytes in each
315 equal = 0
316 matches = bdiff.blocks(text, orig)
317 for x1, x2, y1, y2 in matches:
318 for line in lines[y1:y2]:
319 equal += len(line)
320
321 lengths = len(text) + len(orig)
322 return equal * 2.0 / lengths
323
324 for a in added:
325 bestscore = copies.get(a, (None, threshold))[1]
326 myscore = score(repo.wread(a))
327 if myscore >= bestscore:
328 copies[a] = (r, myscore)
329 repo.ui.progress(_('searching'), None)
330
331 for dest, v in copies.iteritems():
332 source, score = v
333 yield source, dest, score
334
335 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
290 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
336 if dry_run is None:
291 if dry_run is None:
337 dry_run = opts.get('dry_run')
292 dry_run = opts.get('dry_run')
@@ -366,8 +321,8 b' def addremove(repo, pats=[], opts={}, dr'
366 added.append(abs)
321 added.append(abs)
367 copies = {}
322 copies = {}
368 if similarity > 0:
323 if similarity > 0:
369 for old, new, score in findrenames(repo, added + unknown,
324 for old, new, score in similar.findrenames(repo,
370 removed + deleted, similarity):
325 added + unknown, removed + deleted, similarity):
371 if repo.ui.verbose or not m.exact(old) or not m.exact(new):
326 if repo.ui.verbose or not m.exact(old) or not m.exact(new):
372 repo.ui.status(_('recording removal of %s as rename to %s '
327 repo.ui.status(_('recording removal of %s as rename to %s '
373 '(%d%% similar)\n') %
328 '(%d%% similar)\n') %
General Comments 0
You need to be logged in to leave comments. Login now