##// END OF EJS Templates
similar: use absolute_import
Gregory Szorc -
r27359:a56c47ed default
parent child Browse files
Show More
@@ -1,104 +1,108
1 1 # similar.py - mechanisms for finding similar files
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 from i18n import _
9 import util
10 import mdiff
11 import bdiff
8 from __future__ import absolute_import
9
10 from .i18n import _
11 from . import (
12 bdiff,
13 mdiff,
14 util,
15 )
12 16
13 17 def _findexactmatches(repo, added, removed):
14 18 '''find renamed files that have no changes
15 19
16 20 Takes a list of new filectxs and a list of removed filectxs, and yields
17 21 (before, after) tuples of exact matches.
18 22 '''
19 23 numfiles = len(added) + len(removed)
20 24
21 25 # Get hashes of removed files.
22 26 hashes = {}
23 27 for i, fctx in enumerate(removed):
24 28 repo.ui.progress(_('searching for exact renames'), i, total=numfiles)
25 29 h = util.sha1(fctx.data()).digest()
26 30 hashes[h] = fctx
27 31
28 32 # For each added file, see if it corresponds to a removed file.
29 33 for i, fctx in enumerate(added):
30 34 repo.ui.progress(_('searching for exact renames'), i + len(removed),
31 35 total=numfiles)
32 36 h = util.sha1(fctx.data()).digest()
33 37 if h in hashes:
34 38 yield (hashes[h], fctx)
35 39
36 40 # Done
37 41 repo.ui.progress(_('searching for exact renames'), None)
38 42
39 43 def _findsimilarmatches(repo, added, removed, threshold):
40 44 '''find potentially renamed files based on similar file content
41 45
42 46 Takes a list of new filectxs and a list of removed filectxs, and yields
43 47 (before, after, score) tuples of partial matches.
44 48 '''
45 49 copies = {}
46 50 for i, r in enumerate(removed):
47 51 repo.ui.progress(_('searching for similar files'), i,
48 52 total=len(removed))
49 53
50 54 # lazily load text
51 55 @util.cachefunc
52 56 def data():
53 57 orig = r.data()
54 58 return orig, mdiff.splitnewlines(orig)
55 59
56 60 def score(text):
57 61 orig, lines = data()
58 62 # bdiff.blocks() returns blocks of matching lines
59 63 # count the number of bytes in each
60 64 equal = 0
61 65 matches = bdiff.blocks(text, orig)
62 66 for x1, x2, y1, y2 in matches:
63 67 for line in lines[y1:y2]:
64 68 equal += len(line)
65 69
66 70 lengths = len(text) + len(orig)
67 71 return equal * 2.0 / lengths
68 72
69 73 for a in added:
70 74 bestscore = copies.get(a, (None, threshold))[1]
71 75 myscore = score(a.data())
72 76 if myscore >= bestscore:
73 77 copies[a] = (r, myscore)
74 78 repo.ui.progress(_('searching'), None)
75 79
76 80 for dest, v in copies.iteritems():
77 81 source, score = v
78 82 yield source, dest, score
79 83
80 84 def findrenames(repo, added, removed, threshold):
81 85 '''find renamed files -- yields (before, after, score) tuples'''
82 86 parentctx = repo['.']
83 87 workingctx = repo[None]
84 88
85 89 # Zero length files will be frequently unrelated to each other, and
86 90 # tracking the deletion/addition of such a file will probably cause more
87 91 # harm than good. We strip them out here to avoid matching them later on.
88 92 addedfiles = set([workingctx[fp] for fp in added
89 93 if workingctx[fp].size() > 0])
90 94 removedfiles = set([parentctx[fp] for fp in removed
91 95 if fp in parentctx and parentctx[fp].size() > 0])
92 96
93 97 # Find exact matches.
94 98 for (a, b) in _findexactmatches(repo,
95 99 sorted(addedfiles), sorted(removedfiles)):
96 100 addedfiles.remove(b)
97 101 yield (a.path(), b.path(), 1.0)
98 102
99 103 # If the user requested similar files to be matched, search for them also.
100 104 if threshold < 1.0:
101 105 for (a, b, score) in _findsimilarmatches(repo,
102 106 sorted(addedfiles), sorted(removedfiles), threshold):
103 107 yield (a.path(), b.path(), score)
104 108
@@ -1,210 +1,209
1 1 #require test-repo
2 2
3 3 $ cd "$TESTDIR"/..
4 4
5 5 $ hg files 'set:(**.py)' | xargs python contrib/check-py3-compat.py
6 6 contrib/casesmash.py not using absolute_import
7 7 contrib/check-code.py not using absolute_import
8 8 contrib/check-code.py requires print_function
9 9 contrib/check-config.py not using absolute_import
10 10 contrib/check-config.py requires print_function
11 11 contrib/debugcmdserver.py not using absolute_import
12 12 contrib/debugcmdserver.py requires print_function
13 13 contrib/debugshell.py not using absolute_import
14 14 contrib/fixpax.py not using absolute_import
15 15 contrib/fixpax.py requires print_function
16 16 contrib/hgclient.py not using absolute_import
17 17 contrib/hgclient.py requires print_function
18 18 contrib/hgfixes/fix_bytes.py not using absolute_import
19 19 contrib/hgfixes/fix_bytesmod.py not using absolute_import
20 20 contrib/hgfixes/fix_leftover_imports.py not using absolute_import
21 21 contrib/import-checker.py not using absolute_import
22 22 contrib/import-checker.py requires print_function
23 23 contrib/memory.py not using absolute_import
24 24 contrib/perf.py not using absolute_import
25 25 contrib/python-hook-examples.py not using absolute_import
26 26 contrib/revsetbenchmarks.py not using absolute_import
27 27 contrib/revsetbenchmarks.py requires print_function
28 28 contrib/showstack.py not using absolute_import
29 29 contrib/synthrepo.py not using absolute_import
30 30 contrib/win32/hgwebdir_wsgi.py not using absolute_import
31 31 doc/check-seclevel.py not using absolute_import
32 32 doc/gendoc.py not using absolute_import
33 33 doc/hgmanpage.py not using absolute_import
34 34 hgext/__init__.py not using absolute_import
35 35 hgext/acl.py not using absolute_import
36 36 hgext/blackbox.py not using absolute_import
37 37 hgext/bugzilla.py not using absolute_import
38 38 hgext/censor.py not using absolute_import
39 39 hgext/children.py not using absolute_import
40 40 hgext/churn.py not using absolute_import
41 41 hgext/clonebundles.py not using absolute_import
42 42 hgext/color.py not using absolute_import
43 43 hgext/convert/__init__.py not using absolute_import
44 44 hgext/convert/bzr.py not using absolute_import
45 45 hgext/convert/common.py not using absolute_import
46 46 hgext/convert/convcmd.py not using absolute_import
47 47 hgext/convert/cvs.py not using absolute_import
48 48 hgext/convert/cvsps.py not using absolute_import
49 49 hgext/convert/darcs.py not using absolute_import
50 50 hgext/convert/filemap.py not using absolute_import
51 51 hgext/convert/git.py not using absolute_import
52 52 hgext/convert/gnuarch.py not using absolute_import
53 53 hgext/convert/hg.py not using absolute_import
54 54 hgext/convert/monotone.py not using absolute_import
55 55 hgext/convert/p4.py not using absolute_import
56 56 hgext/convert/subversion.py not using absolute_import
57 57 hgext/convert/transport.py not using absolute_import
58 58 hgext/eol.py not using absolute_import
59 59 hgext/extdiff.py not using absolute_import
60 60 hgext/factotum.py not using absolute_import
61 61 hgext/fetch.py not using absolute_import
62 62 hgext/gpg.py not using absolute_import
63 63 hgext/graphlog.py not using absolute_import
64 64 hgext/hgcia.py not using absolute_import
65 65 hgext/hgk.py not using absolute_import
66 66 hgext/highlight/__init__.py not using absolute_import
67 67 hgext/highlight/highlight.py not using absolute_import
68 68 hgext/histedit.py not using absolute_import
69 69 hgext/keyword.py not using absolute_import
70 70 hgext/largefiles/__init__.py not using absolute_import
71 71 hgext/largefiles/basestore.py not using absolute_import
72 72 hgext/largefiles/lfcommands.py not using absolute_import
73 73 hgext/largefiles/lfutil.py not using absolute_import
74 74 hgext/largefiles/localstore.py not using absolute_import
75 75 hgext/largefiles/overrides.py not using absolute_import
76 76 hgext/largefiles/proto.py not using absolute_import
77 77 hgext/largefiles/remotestore.py not using absolute_import
78 78 hgext/largefiles/reposetup.py not using absolute_import
79 79 hgext/largefiles/uisetup.py not using absolute_import
80 80 hgext/largefiles/wirestore.py not using absolute_import
81 81 hgext/mq.py not using absolute_import
82 82 hgext/notify.py not using absolute_import
83 83 hgext/pager.py not using absolute_import
84 84 hgext/patchbomb.py not using absolute_import
85 85 hgext/purge.py not using absolute_import
86 86 hgext/rebase.py not using absolute_import
87 87 hgext/record.py not using absolute_import
88 88 hgext/relink.py not using absolute_import
89 89 hgext/schemes.py not using absolute_import
90 90 hgext/share.py not using absolute_import
91 91 hgext/shelve.py not using absolute_import
92 92 hgext/strip.py not using absolute_import
93 93 hgext/transplant.py not using absolute_import
94 94 hgext/win32mbcs.py not using absolute_import
95 95 hgext/win32text.py not using absolute_import
96 96 hgext/zeroconf/Zeroconf.py not using absolute_import
97 97 hgext/zeroconf/Zeroconf.py requires print_function
98 98 hgext/zeroconf/__init__.py not using absolute_import
99 99 i18n/check-translation.py not using absolute_import
100 100 i18n/polib.py not using absolute_import
101 101 mercurial/byterange.py not using absolute_import
102 102 mercurial/cmdutil.py not using absolute_import
103 103 mercurial/commands.py not using absolute_import
104 104 mercurial/context.py not using absolute_import
105 105 mercurial/dirstate.py not using absolute_import
106 106 mercurial/dispatch.py requires print_function
107 107 mercurial/exchange.py not using absolute_import
108 108 mercurial/help.py not using absolute_import
109 109 mercurial/httpclient/__init__.py not using absolute_import
110 110 mercurial/httpclient/_readers.py not using absolute_import
111 111 mercurial/httpclient/socketutil.py not using absolute_import
112 112 mercurial/httpconnection.py not using absolute_import
113 113 mercurial/keepalive.py not using absolute_import
114 114 mercurial/keepalive.py requires print_function
115 115 mercurial/localrepo.py not using absolute_import
116 116 mercurial/lsprof.py requires print_function
117 117 mercurial/lsprofcalltree.py not using absolute_import
118 118 mercurial/lsprofcalltree.py requires print_function
119 119 mercurial/mail.py requires print_function
120 120 mercurial/manifest.py not using absolute_import
121 121 mercurial/mdiff.py not using absolute_import
122 122 mercurial/patch.py not using absolute_import
123 123 mercurial/pvec.py not using absolute_import
124 124 mercurial/py3kcompat.py not using absolute_import
125 125 mercurial/revlog.py not using absolute_import
126 126 mercurial/scmposix.py not using absolute_import
127 127 mercurial/scmutil.py not using absolute_import
128 128 mercurial/scmwindows.py not using absolute_import
129 mercurial/similar.py not using absolute_import
130 129 mercurial/store.py not using absolute_import
131 130 mercurial/windows.py not using absolute_import
132 131 setup.py not using absolute_import
133 132 tests/filterpyflakes.py requires print_function
134 133 tests/generate-working-copy-states.py requires print_function
135 134 tests/get-with-headers.py requires print_function
136 135 tests/heredoctest.py requires print_function
137 136 tests/hypothesishelpers.py not using absolute_import
138 137 tests/hypothesishelpers.py requires print_function
139 138 tests/killdaemons.py not using absolute_import
140 139 tests/md5sum.py not using absolute_import
141 140 tests/mockblackbox.py not using absolute_import
142 141 tests/printenv.py not using absolute_import
143 142 tests/readlink.py not using absolute_import
144 143 tests/readlink.py requires print_function
145 144 tests/revlog-formatv0.py not using absolute_import
146 145 tests/run-tests.py not using absolute_import
147 146 tests/seq.py not using absolute_import
148 147 tests/seq.py requires print_function
149 148 tests/silenttestrunner.py not using absolute_import
150 149 tests/silenttestrunner.py requires print_function
151 150 tests/sitecustomize.py not using absolute_import
152 151 tests/svn-safe-append.py not using absolute_import
153 152 tests/svnxml.py not using absolute_import
154 153 tests/test-ancestor.py requires print_function
155 154 tests/test-atomictempfile.py not using absolute_import
156 155 tests/test-batching.py not using absolute_import
157 156 tests/test-batching.py requires print_function
158 157 tests/test-bdiff.py not using absolute_import
159 158 tests/test-bdiff.py requires print_function
160 159 tests/test-context.py not using absolute_import
161 160 tests/test-context.py requires print_function
162 161 tests/test-demandimport.py not using absolute_import
163 162 tests/test-demandimport.py requires print_function
164 163 tests/test-dispatch.py not using absolute_import
165 164 tests/test-dispatch.py requires print_function
166 165 tests/test-doctest.py not using absolute_import
167 166 tests/test-duplicateoptions.py not using absolute_import
168 167 tests/test-duplicateoptions.py requires print_function
169 168 tests/test-filecache.py not using absolute_import
170 169 tests/test-filecache.py requires print_function
171 170 tests/test-filelog.py not using absolute_import
172 171 tests/test-filelog.py requires print_function
173 172 tests/test-hg-parseurl.py not using absolute_import
174 173 tests/test-hg-parseurl.py requires print_function
175 174 tests/test-hgweb-auth.py not using absolute_import
176 175 tests/test-hgweb-auth.py requires print_function
177 176 tests/test-hgwebdir-paths.py not using absolute_import
178 177 tests/test-hybridencode.py not using absolute_import
179 178 tests/test-hybridencode.py requires print_function
180 179 tests/test-lrucachedict.py not using absolute_import
181 180 tests/test-lrucachedict.py requires print_function
182 181 tests/test-manifest.py not using absolute_import
183 182 tests/test-minirst.py not using absolute_import
184 183 tests/test-minirst.py requires print_function
185 184 tests/test-parseindex2.py not using absolute_import
186 185 tests/test-parseindex2.py requires print_function
187 186 tests/test-pathencode.py not using absolute_import
188 187 tests/test-pathencode.py requires print_function
189 188 tests/test-propertycache.py not using absolute_import
190 189 tests/test-propertycache.py requires print_function
191 190 tests/test-revlog-ancestry.py not using absolute_import
192 191 tests/test-revlog-ancestry.py requires print_function
193 192 tests/test-run-tests.py not using absolute_import
194 193 tests/test-simplemerge.py not using absolute_import
195 194 tests/test-status-inprocess.py not using absolute_import
196 195 tests/test-status-inprocess.py requires print_function
197 196 tests/test-symlink-os-yes-fs-no.py not using absolute_import
198 197 tests/test-trusted.py not using absolute_import
199 198 tests/test-trusted.py requires print_function
200 199 tests/test-ui-color.py not using absolute_import
201 200 tests/test-ui-color.py requires print_function
202 201 tests/test-ui-config.py not using absolute_import
203 202 tests/test-ui-config.py requires print_function
204 203 tests/test-ui-verbosity.py not using absolute_import
205 204 tests/test-ui-verbosity.py requires print_function
206 205 tests/test-url.py not using absolute_import
207 206 tests/test-url.py requires print_function
208 207 tests/test-walkrepo.py requires print_function
209 208 tests/test-wireproto.py requires print_function
210 209 tests/tinyproxy.py requires print_function
General Comments 0
You need to be logged in to leave comments. Login now