Show More
@@ -1,124 +1,126 | |||
|
1 | 1 | # automv.py |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2013-2016 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | """check for unrecorded moves at commit time (EXPERIMENTAL) |
|
8 | 8 | |
|
9 | 9 | This extension checks at commit/amend time if any of the committed files |
|
10 | 10 | comes from an unrecorded mv. |
|
11 | 11 | |
|
12 | 12 | The threshold at which a file is considered a move can be set with the |
|
13 | 13 | ``automv.similarity`` config option. This option takes a percentage between 0 |
|
14 | 14 | (disabled) and 100 (files must be identical), the default is 95. |
|
15 | 15 | |
|
16 | 16 | """ |
|
17 | 17 | |
|
18 | 18 | # Using 95 as a default similarity is based on an analysis of the mercurial |
|
19 | 19 | # repositories of the cpython, mozilla-central & mercurial repositories, as |
|
20 | 20 | # well as 2 very large facebook repositories. At 95 50% of all potential |
|
21 | 21 | # missed moves would be caught, as well as correspond with 87% of all |
|
22 | 22 | # explicitly marked moves. Together, 80% of moved files are 95% similar or |
|
23 | 23 | # more. |
|
24 | 24 | # |
|
25 | 25 | # See http://markmail.org/thread/5pxnljesvufvom57 for context. |
|
26 | 26 | |
|
27 | 27 | |
|
28 | 28 | from mercurial.i18n import _ |
|
29 | 29 | from mercurial import ( |
|
30 | 30 | commands, |
|
31 | 31 | copies, |
|
32 | 32 | error, |
|
33 | 33 | extensions, |
|
34 | 34 | pycompat, |
|
35 | 35 | registrar, |
|
36 | 36 | scmutil, |
|
37 | 37 | similar, |
|
38 | 38 | ) |
|
39 | 39 | |
|
40 | 40 | configtable = {} |
|
41 | 41 | configitem = registrar.configitem(configtable) |
|
42 | 42 | |
|
43 | 43 | configitem( |
|
44 | 44 | b'automv', |
|
45 | 45 | b'similarity', |
|
46 | 46 | default=95, |
|
47 | 47 | ) |
|
48 | 48 | |
|
49 | 49 | |
|
50 | 50 | def extsetup(ui): |
|
51 | 51 | entry = extensions.wrapcommand(commands.table, b'commit', mvcheck) |
|
52 | 52 | entry[1].append( |
|
53 | 53 | (b'', b'no-automv', None, _(b'disable automatic file move detection')) |
|
54 | 54 | ) |
|
55 | 55 | |
|
56 | 56 | |
|
57 | 57 | def mvcheck(orig, ui, repo, *pats, **opts): |
|
58 | 58 | """Hook to check for moves at commit time""" |
|
59 | 59 | opts = pycompat.byteskwargs(opts) |
|
60 | 60 | renames = None |
|
61 | 61 | disabled = opts.pop(b'no_automv', False) |
|
62 | if not disabled: | |
|
63 | threshold = ui.configint(b'automv', b'similarity') | |
|
64 | if not 0 <= threshold <= 100: | |
|
65 | raise error.Abort(_(b'automv.similarity must be between 0 and 100')) | |
|
66 | if threshold > 0: | |
|
67 | match = scmutil.match(repo[None], pats, opts) | |
|
68 | added, removed = _interestingfiles(repo, match) | |
|
69 | uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True) | |
|
70 | renames = _findrenames( | |
|
71 | repo, uipathfn, added, removed, threshold / 100.0 | |
|
72 | ) | |
|
62 | with repo.wlock(): | |
|
63 | if not disabled: | |
|
64 | threshold = ui.configint(b'automv', b'similarity') | |
|
65 | if not 0 <= threshold <= 100: | |
|
66 | raise error.Abort( | |
|
67 | _(b'automv.similarity must be between 0 and 100') | |
|
68 | ) | |
|
69 | if threshold > 0: | |
|
70 | match = scmutil.match(repo[None], pats, opts) | |
|
71 | added, removed = _interestingfiles(repo, match) | |
|
72 | uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True) | |
|
73 | renames = _findrenames( | |
|
74 | repo, uipathfn, added, removed, threshold / 100.0 | |
|
75 | ) | |
|
73 | 76 | |
|
74 | with repo.wlock(): | |
|
75 | 77 | if renames is not None: |
|
76 | 78 | with repo.dirstate.changing_files(repo): |
|
77 | 79 | # XXX this should be wider and integrated with the commit |
|
78 | 80 | # transaction. At the same time as we do the `addremove` logic |
|
79 | 81 | # for commit. However we can't really do better with the |
|
80 | 82 | # current extension structure, and this is not worse than what |
|
81 | 83 | # happened before. |
|
82 | 84 | scmutil._markchanges(repo, (), (), renames) |
|
83 | 85 | return orig(ui, repo, *pats, **pycompat.strkwargs(opts)) |
|
84 | 86 | |
|
85 | 87 | |
|
86 | 88 | def _interestingfiles(repo, matcher): |
|
87 | 89 | """Find what files were added or removed in this commit. |
|
88 | 90 | |
|
89 | 91 | Returns a tuple of two lists: (added, removed). Only files not *already* |
|
90 | 92 | marked as moved are included in the added list. |
|
91 | 93 | |
|
92 | 94 | """ |
|
93 | 95 | stat = repo.status(match=matcher) |
|
94 | 96 | added = stat.added |
|
95 | 97 | removed = stat.removed |
|
96 | 98 | |
|
97 | 99 | copy = copies.pathcopies(repo[b'.'], repo[None], matcher) |
|
98 | 100 | # remove the copy files for which we already have copy info |
|
99 | 101 | added = [f for f in added if f not in copy] |
|
100 | 102 | |
|
101 | 103 | return added, removed |
|
102 | 104 | |
|
103 | 105 | |
|
104 | 106 | def _findrenames(repo, uipathfn, added, removed, similarity): |
|
105 | 107 | """Find what files in added are really moved files. |
|
106 | 108 | |
|
107 | 109 | Any file named in removed that is at least similarity% similar to a file |
|
108 | 110 | in added is seen as a rename. |
|
109 | 111 | |
|
110 | 112 | """ |
|
111 | 113 | renames = {} |
|
112 | 114 | if similarity > 0: |
|
113 | 115 | for src, dst, score in similar.findrenames( |
|
114 | 116 | repo, added, removed, similarity |
|
115 | 117 | ): |
|
116 | 118 | if repo.ui.verbose: |
|
117 | 119 | repo.ui.status( |
|
118 | 120 | _(b'detected move of %s as %s (%d%% similar)\n') |
|
119 | 121 | % (uipathfn(src), uipathfn(dst), score * 100) |
|
120 | 122 | ) |
|
121 | 123 | renames[dst] = src |
|
122 | 124 | if renames: |
|
123 | 125 | repo.ui.status(_(b'detected move of %d files\n') % len(renames)) |
|
124 | 126 | return renames |
General Comments 0
You need to be logged in to leave comments.
Login now