##// END OF EJS Templates
store: introduce a EntryFile object to actually access file info...
marmoute -
r51365:9fdc28e2 default
parent child Browse files
Show More
@@ -1,696 +1,698
1 # narrowcommands.py - command modifications for narrowhg extension
1 # narrowcommands.py - command modifications for narrowhg extension
2 #
2 #
3 # Copyright 2017 Google, Inc.
3 # Copyright 2017 Google, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import itertools
8 import itertools
9 import os
9 import os
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial.node import (
12 from mercurial.node import (
13 hex,
13 hex,
14 short,
14 short,
15 )
15 )
16 from mercurial import (
16 from mercurial import (
17 bundle2,
17 bundle2,
18 cmdutil,
18 cmdutil,
19 commands,
19 commands,
20 discovery,
20 discovery,
21 encoding,
21 encoding,
22 error,
22 error,
23 exchange,
23 exchange,
24 extensions,
24 extensions,
25 hg,
25 hg,
26 narrowspec,
26 narrowspec,
27 pathutil,
27 pathutil,
28 pycompat,
28 pycompat,
29 registrar,
29 registrar,
30 repair,
30 repair,
31 repoview,
31 repoview,
32 requirements,
32 requirements,
33 sparse,
33 sparse,
34 util,
34 util,
35 wireprototypes,
35 wireprototypes,
36 )
36 )
37 from mercurial.utils import (
37 from mercurial.utils import (
38 urlutil,
38 urlutil,
39 )
39 )
40
40
41 table = {}
41 table = {}
42 command = registrar.command(table)
42 command = registrar.command(table)
43
43
44
44
45 def setup():
45 def setup():
46 """Wraps user-facing mercurial commands with narrow-aware versions."""
46 """Wraps user-facing mercurial commands with narrow-aware versions."""
47
47
48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
49 entry[1].append(
49 entry[1].append(
50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
51 )
51 )
52 entry[1].append(
52 entry[1].append(
53 (
53 (
54 b'',
54 b'',
55 b'depth',
55 b'depth',
56 b'',
56 b'',
57 _(b"limit the history fetched by distance from heads"),
57 _(b"limit the history fetched by distance from heads"),
58 )
58 )
59 )
59 )
60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
62 if b'sparse' not in extensions.enabled():
62 if b'sparse' not in extensions.enabled():
63 entry[1].append(
63 entry[1].append(
64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
65 )
65 )
66 entry[1].append(
66 entry[1].append(
67 (
67 (
68 b'',
68 b'',
69 b'exclude',
69 b'exclude',
70 [],
70 [],
71 _(b"do not fetch this file/directory, even if included"),
71 _(b"do not fetch this file/directory, even if included"),
72 )
72 )
73 )
73 )
74
74
75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
76 entry[1].append(
76 entry[1].append(
77 (
77 (
78 b'',
78 b'',
79 b'depth',
79 b'depth',
80 b'',
80 b'',
81 _(b"limit the history fetched by distance from heads"),
81 _(b"limit the history fetched by distance from heads"),
82 )
82 )
83 )
83 )
84
84
85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
86
86
87
87
88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
90 opts = pycompat.byteskwargs(opts)
90 opts = pycompat.byteskwargs(opts)
91 wrappedextraprepare = util.nullcontextmanager()
91 wrappedextraprepare = util.nullcontextmanager()
92 narrowspecfile = opts[b'narrowspec']
92 narrowspecfile = opts[b'narrowspec']
93
93
94 if narrowspecfile:
94 if narrowspecfile:
95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
97 try:
97 try:
98 fdata = util.readfile(filepath)
98 fdata = util.readfile(filepath)
99 except IOError as inst:
99 except IOError as inst:
100 raise error.Abort(
100 raise error.Abort(
101 _(b"cannot read narrowspecs from '%s': %s")
101 _(b"cannot read narrowspecs from '%s': %s")
102 % (filepath, encoding.strtolocal(inst.strerror))
102 % (filepath, encoding.strtolocal(inst.strerror))
103 )
103 )
104
104
105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
106 if profiles:
106 if profiles:
107 raise error.ConfigError(
107 raise error.ConfigError(
108 _(
108 _(
109 b"cannot specify other files using '%include' in"
109 b"cannot specify other files using '%include' in"
110 b" narrowspec"
110 b" narrowspec"
111 )
111 )
112 )
112 )
113
113
114 narrowspec.validatepatterns(includes)
114 narrowspec.validatepatterns(includes)
115 narrowspec.validatepatterns(excludes)
115 narrowspec.validatepatterns(excludes)
116
116
117 # narrowspec is passed so we should assume that user wants narrow clone
117 # narrowspec is passed so we should assume that user wants narrow clone
118 opts[b'narrow'] = True
118 opts[b'narrow'] = True
119 opts[b'include'].extend(includes)
119 opts[b'include'].extend(includes)
120 opts[b'exclude'].extend(excludes)
120 opts[b'exclude'].extend(excludes)
121
121
122 if opts[b'narrow']:
122 if opts[b'narrow']:
123
123
124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
125 orig(pullop, kwargs)
125 orig(pullop, kwargs)
126
126
127 if opts.get(b'depth'):
127 if opts.get(b'depth'):
128 kwargs[b'depth'] = opts[b'depth']
128 kwargs[b'depth'] = opts[b'depth']
129
129
130 wrappedextraprepare = extensions.wrappedfunction(
130 wrappedextraprepare = extensions.wrappedfunction(
131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
132 )
132 )
133
133
134 with wrappedextraprepare:
134 with wrappedextraprepare:
135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
136
136
137
137
138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
139 """Wraps pull command to allow modifying narrow spec."""
139 """Wraps pull command to allow modifying narrow spec."""
140 wrappedextraprepare = util.nullcontextmanager()
140 wrappedextraprepare = util.nullcontextmanager()
141 if requirements.NARROW_REQUIREMENT in repo.requirements:
141 if requirements.NARROW_REQUIREMENT in repo.requirements:
142
142
143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
144 orig(pullop, kwargs)
144 orig(pullop, kwargs)
145 if opts.get('depth'):
145 if opts.get('depth'):
146 kwargs[b'depth'] = opts['depth']
146 kwargs[b'depth'] = opts['depth']
147
147
148 wrappedextraprepare = extensions.wrappedfunction(
148 wrappedextraprepare = extensions.wrappedfunction(
149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
150 )
150 )
151
151
152 with wrappedextraprepare:
152 with wrappedextraprepare:
153 return orig(ui, repo, *args, **opts)
153 return orig(ui, repo, *args, **opts)
154
154
155
155
156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
157 """Wraps archive command to narrow the default includes."""
157 """Wraps archive command to narrow the default includes."""
158 if requirements.NARROW_REQUIREMENT in repo.requirements:
158 if requirements.NARROW_REQUIREMENT in repo.requirements:
159 repo_includes, repo_excludes = repo.narrowpats
159 repo_includes, repo_excludes = repo.narrowpats
160 includes = set(opts.get('include', []))
160 includes = set(opts.get('include', []))
161 excludes = set(opts.get('exclude', []))
161 excludes = set(opts.get('exclude', []))
162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
163 includes, excludes, repo_includes, repo_excludes
163 includes, excludes, repo_includes, repo_excludes
164 )
164 )
165 if includes:
165 if includes:
166 opts['include'] = includes
166 opts['include'] = includes
167 if excludes:
167 if excludes:
168 opts['exclude'] = excludes
168 opts['exclude'] = excludes
169 return orig(ui, repo, *args, **opts)
169 return orig(ui, repo, *args, **opts)
170
170
171
171
172 def pullbundle2extraprepare(orig, pullop, kwargs):
172 def pullbundle2extraprepare(orig, pullop, kwargs):
173 repo = pullop.repo
173 repo = pullop.repo
174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
175 return orig(pullop, kwargs)
175 return orig(pullop, kwargs)
176
176
177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
178 raise error.Abort(_(b"server does not support narrow clones"))
178 raise error.Abort(_(b"server does not support narrow clones"))
179 orig(pullop, kwargs)
179 orig(pullop, kwargs)
180 kwargs[b'narrow'] = True
180 kwargs[b'narrow'] = True
181 include, exclude = repo.narrowpats
181 include, exclude = repo.narrowpats
182 kwargs[b'oldincludepats'] = include
182 kwargs[b'oldincludepats'] = include
183 kwargs[b'oldexcludepats'] = exclude
183 kwargs[b'oldexcludepats'] = exclude
184 if include:
184 if include:
185 kwargs[b'includepats'] = include
185 kwargs[b'includepats'] = include
186 if exclude:
186 if exclude:
187 kwargs[b'excludepats'] = exclude
187 kwargs[b'excludepats'] = exclude
188 # calculate known nodes only in ellipses cases because in non-ellipses cases
188 # calculate known nodes only in ellipses cases because in non-ellipses cases
189 # we have all the nodes
189 # we have all the nodes
190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
191 kwargs[b'known'] = [
191 kwargs[b'known'] = [
192 hex(ctx.node())
192 hex(ctx.node())
193 for ctx in repo.set(b'::%ln', pullop.common)
193 for ctx in repo.set(b'::%ln', pullop.common)
194 if ctx.node() != repo.nullid
194 if ctx.node() != repo.nullid
195 ]
195 ]
196 if not kwargs[b'known']:
196 if not kwargs[b'known']:
197 # Mercurial serializes an empty list as '' and deserializes it as
197 # Mercurial serializes an empty list as '' and deserializes it as
198 # [''], so delete it instead to avoid handling the empty string on
198 # [''], so delete it instead to avoid handling the empty string on
199 # the server.
199 # the server.
200 del kwargs[b'known']
200 del kwargs[b'known']
201
201
202
202
203 extensions.wrapfunction(
203 extensions.wrapfunction(
204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
205 )
205 )
206
206
207
207
208 def _narrow(
208 def _narrow(
209 ui,
209 ui,
210 repo,
210 repo,
211 remote,
211 remote,
212 commoninc,
212 commoninc,
213 oldincludes,
213 oldincludes,
214 oldexcludes,
214 oldexcludes,
215 newincludes,
215 newincludes,
216 newexcludes,
216 newexcludes,
217 force,
217 force,
218 backup,
218 backup,
219 ):
219 ):
220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
222
222
223 # This is essentially doing "hg outgoing" to find all local-only
223 # This is essentially doing "hg outgoing" to find all local-only
224 # commits. We will then check that the local-only commits don't
224 # commits. We will then check that the local-only commits don't
225 # have any changes to files that will be untracked.
225 # have any changes to files that will be untracked.
226 unfi = repo.unfiltered()
226 unfi = repo.unfiltered()
227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
228 ui.status(_(b'looking for local changes to affected paths\n'))
228 ui.status(_(b'looking for local changes to affected paths\n'))
229 progress = ui.makeprogress(
229 progress = ui.makeprogress(
230 topic=_(b'changesets'),
230 topic=_(b'changesets'),
231 unit=_(b'changesets'),
231 unit=_(b'changesets'),
232 total=len(outgoing.missing) + len(outgoing.excluded),
232 total=len(outgoing.missing) + len(outgoing.excluded),
233 )
233 )
234 localnodes = []
234 localnodes = []
235 with progress:
235 with progress:
236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
237 progress.increment()
237 progress.increment()
238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
239 localnodes.append(n)
239 localnodes.append(n)
240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
241 hiddenrevs = repoview.filterrevs(repo, b'visible')
241 hiddenrevs = repoview.filterrevs(repo, b'visible')
242 visibletostrip = list(
242 visibletostrip = list(
243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
244 )
244 )
245 if visibletostrip:
245 if visibletostrip:
246 ui.status(
246 ui.status(
247 _(
247 _(
248 b'The following changeset(s) or their ancestors have '
248 b'The following changeset(s) or their ancestors have '
249 b'local changes not on the remote:\n'
249 b'local changes not on the remote:\n'
250 )
250 )
251 )
251 )
252 maxnodes = 10
252 maxnodes = 10
253 if ui.verbose or len(visibletostrip) <= maxnodes:
253 if ui.verbose or len(visibletostrip) <= maxnodes:
254 for n in visibletostrip:
254 for n in visibletostrip:
255 ui.status(b'%s\n' % short(n))
255 ui.status(b'%s\n' % short(n))
256 else:
256 else:
257 for n in visibletostrip[:maxnodes]:
257 for n in visibletostrip[:maxnodes]:
258 ui.status(b'%s\n' % short(n))
258 ui.status(b'%s\n' % short(n))
259 ui.status(
259 ui.status(
260 _(b'...and %d more, use --verbose to list all\n')
260 _(b'...and %d more, use --verbose to list all\n')
261 % (len(visibletostrip) - maxnodes)
261 % (len(visibletostrip) - maxnodes)
262 )
262 )
263 if not force:
263 if not force:
264 raise error.StateError(
264 raise error.StateError(
265 _(b'local changes found'),
265 _(b'local changes found'),
266 hint=_(b'use --force-delete-local-changes to ignore'),
266 hint=_(b'use --force-delete-local-changes to ignore'),
267 )
267 )
268
268
269 with ui.uninterruptible():
269 with ui.uninterruptible():
270 if revstostrip:
270 if revstostrip:
271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
272 if repo[b'.'].node() in tostrip:
272 if repo[b'.'].node() in tostrip:
273 # stripping working copy, so move to a different commit first
273 # stripping working copy, so move to a different commit first
274 urev = max(
274 urev = max(
275 repo.revs(
275 repo.revs(
276 b'(::%n) - %ln + null',
276 b'(::%n) - %ln + null',
277 repo[b'.'].node(),
277 repo[b'.'].node(),
278 visibletostrip,
278 visibletostrip,
279 )
279 )
280 )
280 )
281 hg.clean(repo, urev)
281 hg.clean(repo, urev)
282 overrides = {(b'devel', b'strip-obsmarkers'): False}
282 overrides = {(b'devel', b'strip-obsmarkers'): False}
283 if backup:
283 if backup:
284 ui.status(_(b'moving unwanted changesets to backup\n'))
284 ui.status(_(b'moving unwanted changesets to backup\n'))
285 else:
285 else:
286 ui.status(_(b'deleting unwanted changesets\n'))
286 ui.status(_(b'deleting unwanted changesets\n'))
287 with ui.configoverride(overrides, b'narrow'):
287 with ui.configoverride(overrides, b'narrow'):
288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
289
289
290 todelete = []
290 todelete = []
291 for entry in repo.store.datafiles():
291 for entry in repo.store.datafiles():
292 f = entry.unencoded_path
292 f = entry.unencoded_path
293 if f.startswith(b'data/'):
293 if f.startswith(b'data/'):
294 file = f[5:-2]
294 file = f[5:-2]
295 if not newmatch(file):
295 if not newmatch(file):
296 todelete.append(f)
296 for file_ in entry.files():
297 todelete.append(file_.unencoded_path)
297 elif f.startswith(b'meta/'):
298 elif f.startswith(b'meta/'):
298 dir = f[5:-13]
299 dir = f[5:-13]
299 dirs = sorted(pathutil.dirs({dir})) + [dir]
300 dirs = sorted(pathutil.dirs({dir})) + [dir]
300 include = True
301 include = True
301 for d in dirs:
302 for d in dirs:
302 visit = newmatch.visitdir(d)
303 visit = newmatch.visitdir(d)
303 if not visit:
304 if not visit:
304 include = False
305 include = False
305 break
306 break
306 if visit == b'all':
307 if visit == b'all':
307 break
308 break
308 if not include:
309 if not include:
309 todelete.append(f)
310 for file_ in entry.files():
311 todelete.append(file_.unencoded_path)
310
312
311 repo.destroying()
313 repo.destroying()
312
314
313 with repo.transaction(b'narrowing'):
315 with repo.transaction(b'narrowing'):
314 # Update narrowspec before removing revlogs, so repo won't be
316 # Update narrowspec before removing revlogs, so repo won't be
315 # corrupt in case of crash
317 # corrupt in case of crash
316 repo.setnarrowpats(newincludes, newexcludes)
318 repo.setnarrowpats(newincludes, newexcludes)
317
319
318 for f in todelete:
320 for f in todelete:
319 ui.status(_(b'deleting %s\n') % f)
321 ui.status(_(b'deleting %s\n') % f)
320 util.unlinkpath(repo.svfs.join(f))
322 util.unlinkpath(repo.svfs.join(f))
321 repo.store.markremoved(f)
323 repo.store.markremoved(f)
322
324
323 ui.status(_(b'deleting unwanted files from working copy\n'))
325 ui.status(_(b'deleting unwanted files from working copy\n'))
324 with repo.dirstate.changing_parents(repo):
326 with repo.dirstate.changing_parents(repo):
325 narrowspec.updateworkingcopy(repo, assumeclean=True)
327 narrowspec.updateworkingcopy(repo, assumeclean=True)
326 narrowspec.copytoworkingcopy(repo)
328 narrowspec.copytoworkingcopy(repo)
327
329
328 repo.destroyed()
330 repo.destroyed()
329
331
330
332
331 def _widen(
333 def _widen(
332 ui,
334 ui,
333 repo,
335 repo,
334 remote,
336 remote,
335 commoninc,
337 commoninc,
336 oldincludes,
338 oldincludes,
337 oldexcludes,
339 oldexcludes,
338 newincludes,
340 newincludes,
339 newexcludes,
341 newexcludes,
340 ):
342 ):
341 # for now we assume that if a server has ellipses enabled, we will be
343 # for now we assume that if a server has ellipses enabled, we will be
342 # exchanging ellipses nodes. In future we should add ellipses as a client
344 # exchanging ellipses nodes. In future we should add ellipses as a client
343 # side requirement (maybe) to distinguish a client is shallow or not and
345 # side requirement (maybe) to distinguish a client is shallow or not and
344 # then send that information to server whether we want ellipses or not.
346 # then send that information to server whether we want ellipses or not.
345 # Theoretically a non-ellipses repo should be able to use narrow
347 # Theoretically a non-ellipses repo should be able to use narrow
346 # functionality from an ellipses enabled server
348 # functionality from an ellipses enabled server
347 remotecap = remote.capabilities()
349 remotecap = remote.capabilities()
348 ellipsesremote = any(
350 ellipsesremote = any(
349 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
351 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
350 )
352 )
351
353
352 # check whether we are talking to a server which supports old version of
354 # check whether we are talking to a server which supports old version of
353 # ellipses capabilities
355 # ellipses capabilities
354 isoldellipses = (
356 isoldellipses = (
355 ellipsesremote
357 ellipsesremote
356 and wireprototypes.ELLIPSESCAP1 in remotecap
358 and wireprototypes.ELLIPSESCAP1 in remotecap
357 and wireprototypes.ELLIPSESCAP not in remotecap
359 and wireprototypes.ELLIPSESCAP not in remotecap
358 )
360 )
359
361
360 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
362 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
361 orig(pullop, kwargs)
363 orig(pullop, kwargs)
362 # The old{in,ex}cludepats have already been set by orig()
364 # The old{in,ex}cludepats have already been set by orig()
363 kwargs[b'includepats'] = newincludes
365 kwargs[b'includepats'] = newincludes
364 kwargs[b'excludepats'] = newexcludes
366 kwargs[b'excludepats'] = newexcludes
365
367
366 wrappedextraprepare = extensions.wrappedfunction(
368 wrappedextraprepare = extensions.wrappedfunction(
367 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
369 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
368 )
370 )
369
371
370 # define a function that narrowbundle2 can call after creating the
372 # define a function that narrowbundle2 can call after creating the
371 # backup bundle, but before applying the bundle from the server
373 # backup bundle, but before applying the bundle from the server
372 def setnewnarrowpats():
374 def setnewnarrowpats():
373 repo.setnarrowpats(newincludes, newexcludes)
375 repo.setnarrowpats(newincludes, newexcludes)
374
376
375 repo.setnewnarrowpats = setnewnarrowpats
377 repo.setnewnarrowpats = setnewnarrowpats
376 # silence the devel-warning of applying an empty changegroup
378 # silence the devel-warning of applying an empty changegroup
377 overrides = {(b'devel', b'all-warnings'): False}
379 overrides = {(b'devel', b'all-warnings'): False}
378
380
379 common = commoninc[0]
381 common = commoninc[0]
380 with ui.uninterruptible():
382 with ui.uninterruptible():
381 if ellipsesremote:
383 if ellipsesremote:
382 ds = repo.dirstate
384 ds = repo.dirstate
383 p1, p2 = ds.p1(), ds.p2()
385 p1, p2 = ds.p1(), ds.p2()
384 with ds.changing_parents(repo):
386 with ds.changing_parents(repo):
385 ds.setparents(repo.nullid, repo.nullid)
387 ds.setparents(repo.nullid, repo.nullid)
386 if isoldellipses:
388 if isoldellipses:
387 with wrappedextraprepare:
389 with wrappedextraprepare:
388 exchange.pull(repo, remote, heads=common)
390 exchange.pull(repo, remote, heads=common)
389 else:
391 else:
390 known = []
392 known = []
391 if ellipsesremote:
393 if ellipsesremote:
392 known = [
394 known = [
393 ctx.node()
395 ctx.node()
394 for ctx in repo.set(b'::%ln', common)
396 for ctx in repo.set(b'::%ln', common)
395 if ctx.node() != repo.nullid
397 if ctx.node() != repo.nullid
396 ]
398 ]
397 with remote.commandexecutor() as e:
399 with remote.commandexecutor() as e:
398 bundle = e.callcommand(
400 bundle = e.callcommand(
399 b'narrow_widen',
401 b'narrow_widen',
400 {
402 {
401 b'oldincludes': oldincludes,
403 b'oldincludes': oldincludes,
402 b'oldexcludes': oldexcludes,
404 b'oldexcludes': oldexcludes,
403 b'newincludes': newincludes,
405 b'newincludes': newincludes,
404 b'newexcludes': newexcludes,
406 b'newexcludes': newexcludes,
405 b'cgversion': b'03',
407 b'cgversion': b'03',
406 b'commonheads': common,
408 b'commonheads': common,
407 b'known': known,
409 b'known': known,
408 b'ellipses': ellipsesremote,
410 b'ellipses': ellipsesremote,
409 },
411 },
410 ).result()
412 ).result()
411
413
412 trmanager = exchange.transactionmanager(
414 trmanager = exchange.transactionmanager(
413 repo, b'widen', remote.url()
415 repo, b'widen', remote.url()
414 )
416 )
415 with trmanager, repo.ui.configoverride(overrides, b'widen'):
417 with trmanager, repo.ui.configoverride(overrides, b'widen'):
416 op = bundle2.bundleoperation(
418 op = bundle2.bundleoperation(
417 repo, trmanager.transaction, source=b'widen'
419 repo, trmanager.transaction, source=b'widen'
418 )
420 )
419 # TODO: we should catch error.Abort here
421 # TODO: we should catch error.Abort here
420 bundle2.processbundle(repo, bundle, op=op, remote=remote)
422 bundle2.processbundle(repo, bundle, op=op, remote=remote)
421
423
422 if ellipsesremote:
424 if ellipsesremote:
423 with ds.changing_parents(repo):
425 with ds.changing_parents(repo):
424 ds.setparents(p1, p2)
426 ds.setparents(p1, p2)
425
427
426 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
428 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
427 repo
429 repo
428 ):
430 ):
429 repo.setnewnarrowpats()
431 repo.setnewnarrowpats()
430 narrowspec.updateworkingcopy(repo)
432 narrowspec.updateworkingcopy(repo)
431 narrowspec.copytoworkingcopy(repo)
433 narrowspec.copytoworkingcopy(repo)
432
434
433
435
434 # TODO(rdamazio): Make new matcher format and update description
436 # TODO(rdamazio): Make new matcher format and update description
435 @command(
437 @command(
436 b'tracked',
438 b'tracked',
437 [
439 [
438 (b'', b'addinclude', [], _(b'new paths to include')),
440 (b'', b'addinclude', [], _(b'new paths to include')),
439 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
441 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
440 (
442 (
441 b'',
443 b'',
442 b'auto-remove-includes',
444 b'auto-remove-includes',
443 False,
445 False,
444 _(b'automatically choose unused includes to remove'),
446 _(b'automatically choose unused includes to remove'),
445 ),
447 ),
446 (b'', b'addexclude', [], _(b'new paths to exclude')),
448 (b'', b'addexclude', [], _(b'new paths to exclude')),
447 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
449 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
448 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
450 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
449 (
451 (
450 b'',
452 b'',
451 b'clear',
453 b'clear',
452 False,
454 False,
453 _(b'whether to replace the existing narrowspec'),
455 _(b'whether to replace the existing narrowspec'),
454 ),
456 ),
455 (
457 (
456 b'',
458 b'',
457 b'force-delete-local-changes',
459 b'force-delete-local-changes',
458 False,
460 False,
459 _(b'forces deletion of local changes when narrowing'),
461 _(b'forces deletion of local changes when narrowing'),
460 ),
462 ),
461 (
463 (
462 b'',
464 b'',
463 b'backup',
465 b'backup',
464 True,
466 True,
465 _(b'back up local changes when narrowing'),
467 _(b'back up local changes when narrowing'),
466 ),
468 ),
467 (
469 (
468 b'',
470 b'',
469 b'update-working-copy',
471 b'update-working-copy',
470 False,
472 False,
471 _(b'update working copy when the store has changed'),
473 _(b'update working copy when the store has changed'),
472 ),
474 ),
473 ]
475 ]
474 + commands.remoteopts,
476 + commands.remoteopts,
475 _(b'[OPTIONS]... [REMOTE]'),
477 _(b'[OPTIONS]... [REMOTE]'),
476 inferrepo=True,
478 inferrepo=True,
477 helpcategory=command.CATEGORY_MAINTENANCE,
479 helpcategory=command.CATEGORY_MAINTENANCE,
478 )
480 )
479 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
481 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
480 """show or change the current narrowspec
482 """show or change the current narrowspec
481
483
482 With no argument, shows the current narrowspec entries, one per line. Each
484 With no argument, shows the current narrowspec entries, one per line. Each
483 line will be prefixed with 'I' or 'X' for included or excluded patterns,
485 line will be prefixed with 'I' or 'X' for included or excluded patterns,
484 respectively.
486 respectively.
485
487
486 The narrowspec is comprised of expressions to match remote files and/or
488 The narrowspec is comprised of expressions to match remote files and/or
487 directories that should be pulled into your client.
489 directories that should be pulled into your client.
488 The narrowspec has *include* and *exclude* expressions, with excludes always
490 The narrowspec has *include* and *exclude* expressions, with excludes always
489 trumping includes: that is, if a file matches an exclude expression, it will
491 trumping includes: that is, if a file matches an exclude expression, it will
490 be excluded even if it also matches an include expression.
492 be excluded even if it also matches an include expression.
491 Excluding files that were never included has no effect.
493 Excluding files that were never included has no effect.
492
494
493 Each included or excluded entry is in the format described by
495 Each included or excluded entry is in the format described by
494 'hg help patterns'.
496 'hg help patterns'.
495
497
496 The options allow you to add or remove included and excluded expressions.
498 The options allow you to add or remove included and excluded expressions.
497
499
498 If --clear is specified, then all previous includes and excludes are DROPPED
500 If --clear is specified, then all previous includes and excludes are DROPPED
499 and replaced by the new ones specified to --addinclude and --addexclude.
501 and replaced by the new ones specified to --addinclude and --addexclude.
500 If --clear is specified without any further options, the narrowspec will be
502 If --clear is specified without any further options, the narrowspec will be
501 empty and will not match any files.
503 empty and will not match any files.
502
504
503 If --auto-remove-includes is specified, then those includes that don't match
505 If --auto-remove-includes is specified, then those includes that don't match
504 any files modified by currently visible local commits (those not shared by
506 any files modified by currently visible local commits (those not shared by
505 the remote) will be added to the set of explicitly specified includes to
507 the remote) will be added to the set of explicitly specified includes to
506 remove.
508 remove.
507
509
508 --import-rules accepts a path to a file containing rules, allowing you to
510 --import-rules accepts a path to a file containing rules, allowing you to
509 add --addinclude, --addexclude rules in bulk. Like the other include and
511 add --addinclude, --addexclude rules in bulk. Like the other include and
510 exclude switches, the changes are applied immediately.
512 exclude switches, the changes are applied immediately.
511 """
513 """
512 opts = pycompat.byteskwargs(opts)
514 opts = pycompat.byteskwargs(opts)
513 if requirements.NARROW_REQUIREMENT not in repo.requirements:
515 if requirements.NARROW_REQUIREMENT not in repo.requirements:
514 raise error.InputError(
516 raise error.InputError(
515 _(
517 _(
516 b'the tracked command is only supported on '
518 b'the tracked command is only supported on '
517 b'repositories cloned with --narrow'
519 b'repositories cloned with --narrow'
518 )
520 )
519 )
521 )
520
522
521 # Before supporting, decide whether it "hg tracked --clear" should mean
523 # Before supporting, decide whether it "hg tracked --clear" should mean
522 # tracking no paths or all paths.
524 # tracking no paths or all paths.
523 if opts[b'clear']:
525 if opts[b'clear']:
524 raise error.InputError(_(b'the --clear option is not yet supported'))
526 raise error.InputError(_(b'the --clear option is not yet supported'))
525
527
526 # import rules from a file
528 # import rules from a file
527 newrules = opts.get(b'import_rules')
529 newrules = opts.get(b'import_rules')
528 if newrules:
530 if newrules:
529 try:
531 try:
530 filepath = os.path.join(encoding.getcwd(), newrules)
532 filepath = os.path.join(encoding.getcwd(), newrules)
531 fdata = util.readfile(filepath)
533 fdata = util.readfile(filepath)
532 except IOError as inst:
534 except IOError as inst:
533 raise error.StorageError(
535 raise error.StorageError(
534 _(b"cannot read narrowspecs from '%s': %s")
536 _(b"cannot read narrowspecs from '%s': %s")
535 % (filepath, encoding.strtolocal(inst.strerror))
537 % (filepath, encoding.strtolocal(inst.strerror))
536 )
538 )
537 includepats, excludepats, profiles = sparse.parseconfig(
539 includepats, excludepats, profiles = sparse.parseconfig(
538 ui, fdata, b'narrow'
540 ui, fdata, b'narrow'
539 )
541 )
540 if profiles:
542 if profiles:
541 raise error.InputError(
543 raise error.InputError(
542 _(
544 _(
543 b"including other spec files using '%include' "
545 b"including other spec files using '%include' "
544 b"is not supported in narrowspec"
546 b"is not supported in narrowspec"
545 )
547 )
546 )
548 )
547 opts[b'addinclude'].extend(includepats)
549 opts[b'addinclude'].extend(includepats)
548 opts[b'addexclude'].extend(excludepats)
550 opts[b'addexclude'].extend(excludepats)
549
551
550 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
552 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
551 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
553 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
552 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
554 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
553 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
555 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
554 autoremoveincludes = opts[b'auto_remove_includes']
556 autoremoveincludes = opts[b'auto_remove_includes']
555
557
556 update_working_copy = opts[b'update_working_copy']
558 update_working_copy = opts[b'update_working_copy']
557 only_show = not (
559 only_show = not (
558 addedincludes
560 addedincludes
559 or removedincludes
561 or removedincludes
560 or addedexcludes
562 or addedexcludes
561 or removedexcludes
563 or removedexcludes
562 or newrules
564 or newrules
563 or autoremoveincludes
565 or autoremoveincludes
564 or update_working_copy
566 or update_working_copy
565 )
567 )
566
568
567 # Only print the current narrowspec.
569 # Only print the current narrowspec.
568 if only_show:
570 if only_show:
569 oldincludes, oldexcludes = repo.narrowpats
571 oldincludes, oldexcludes = repo.narrowpats
570 ui.pager(b'tracked')
572 ui.pager(b'tracked')
571 fm = ui.formatter(b'narrow', opts)
573 fm = ui.formatter(b'narrow', opts)
572 for i in sorted(oldincludes):
574 for i in sorted(oldincludes):
573 fm.startitem()
575 fm.startitem()
574 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
576 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
575 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
577 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
576 for i in sorted(oldexcludes):
578 for i in sorted(oldexcludes):
577 fm.startitem()
579 fm.startitem()
578 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
580 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
579 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
581 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
580 fm.end()
582 fm.end()
581 return 0
583 return 0
582
584
583 with repo.wlock(), repo.lock():
585 with repo.wlock(), repo.lock():
584 oldincludes, oldexcludes = repo.narrowpats
586 oldincludes, oldexcludes = repo.narrowpats
585
587
586 # filter the user passed additions and deletions into actual additions and
588 # filter the user passed additions and deletions into actual additions and
587 # deletions of excludes and includes
589 # deletions of excludes and includes
588 addedincludes -= oldincludes
590 addedincludes -= oldincludes
589 removedincludes &= oldincludes
591 removedincludes &= oldincludes
590 addedexcludes -= oldexcludes
592 addedexcludes -= oldexcludes
591 removedexcludes &= oldexcludes
593 removedexcludes &= oldexcludes
592
594
593 widening = addedincludes or removedexcludes
595 widening = addedincludes or removedexcludes
594 narrowing = removedincludes or addedexcludes
596 narrowing = removedincludes or addedexcludes
595
597
596 if update_working_copy:
598 if update_working_copy:
597 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
599 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
598 repo
600 repo
599 ):
601 ):
600 narrowspec.updateworkingcopy(repo)
602 narrowspec.updateworkingcopy(repo)
601 narrowspec.copytoworkingcopy(repo)
603 narrowspec.copytoworkingcopy(repo)
602 return 0
604 return 0
603
605
604 if not (widening or narrowing or autoremoveincludes):
606 if not (widening or narrowing or autoremoveincludes):
605 ui.status(_(b"nothing to widen or narrow\n"))
607 ui.status(_(b"nothing to widen or narrow\n"))
606 return 0
608 return 0
607
609
608 cmdutil.bailifchanged(repo)
610 cmdutil.bailifchanged(repo)
609
611
610 # Find the revisions we have in common with the remote. These will
612 # Find the revisions we have in common with the remote. These will
611 # be used for finding local-only changes for narrowing. They will
613 # be used for finding local-only changes for narrowing. They will
612 # also define the set of revisions to update for widening.
614 # also define the set of revisions to update for widening.
613 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
615 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
614 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
616 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
615 remote = hg.peer(repo, opts, path)
617 remote = hg.peer(repo, opts, path)
616
618
617 try:
619 try:
618 # check narrow support before doing anything if widening needs to be
620 # check narrow support before doing anything if widening needs to be
619 # performed. In future we should also abort if client is ellipses and
621 # performed. In future we should also abort if client is ellipses and
620 # server does not support ellipses
622 # server does not support ellipses
621 if (
623 if (
622 widening
624 widening
623 and wireprototypes.NARROWCAP not in remote.capabilities()
625 and wireprototypes.NARROWCAP not in remote.capabilities()
624 ):
626 ):
625 raise error.Abort(_(b"server does not support narrow clones"))
627 raise error.Abort(_(b"server does not support narrow clones"))
626
628
627 commoninc = discovery.findcommonincoming(repo, remote)
629 commoninc = discovery.findcommonincoming(repo, remote)
628
630
629 if autoremoveincludes:
631 if autoremoveincludes:
630 outgoing = discovery.findcommonoutgoing(
632 outgoing = discovery.findcommonoutgoing(
631 repo, remote, commoninc=commoninc
633 repo, remote, commoninc=commoninc
632 )
634 )
633 ui.status(_(b'looking for unused includes to remove\n'))
635 ui.status(_(b'looking for unused includes to remove\n'))
634 localfiles = set()
636 localfiles = set()
635 for n in itertools.chain(outgoing.missing, outgoing.excluded):
637 for n in itertools.chain(outgoing.missing, outgoing.excluded):
636 localfiles.update(repo[n].files())
638 localfiles.update(repo[n].files())
637 suggestedremovals = []
639 suggestedremovals = []
638 for include in sorted(oldincludes):
640 for include in sorted(oldincludes):
639 match = narrowspec.match(repo.root, [include], oldexcludes)
641 match = narrowspec.match(repo.root, [include], oldexcludes)
640 if not any(match(f) for f in localfiles):
642 if not any(match(f) for f in localfiles):
641 suggestedremovals.append(include)
643 suggestedremovals.append(include)
642 if suggestedremovals:
644 if suggestedremovals:
643 for s in suggestedremovals:
645 for s in suggestedremovals:
644 ui.status(b'%s\n' % s)
646 ui.status(b'%s\n' % s)
645 if (
647 if (
646 ui.promptchoice(
648 ui.promptchoice(
647 _(
649 _(
648 b'remove these unused includes (yn)?'
650 b'remove these unused includes (yn)?'
649 b'$$ &Yes $$ &No'
651 b'$$ &Yes $$ &No'
650 )
652 )
651 )
653 )
652 == 0
654 == 0
653 ):
655 ):
654 removedincludes.update(suggestedremovals)
656 removedincludes.update(suggestedremovals)
655 narrowing = True
657 narrowing = True
656 else:
658 else:
657 ui.status(_(b'found no unused includes\n'))
659 ui.status(_(b'found no unused includes\n'))
658
660
659 if narrowing:
661 if narrowing:
660 newincludes = oldincludes - removedincludes
662 newincludes = oldincludes - removedincludes
661 newexcludes = oldexcludes | addedexcludes
663 newexcludes = oldexcludes | addedexcludes
662 _narrow(
664 _narrow(
663 ui,
665 ui,
664 repo,
666 repo,
665 remote,
667 remote,
666 commoninc,
668 commoninc,
667 oldincludes,
669 oldincludes,
668 oldexcludes,
670 oldexcludes,
669 newincludes,
671 newincludes,
670 newexcludes,
672 newexcludes,
671 opts[b'force_delete_local_changes'],
673 opts[b'force_delete_local_changes'],
672 opts[b'backup'],
674 opts[b'backup'],
673 )
675 )
674 # _narrow() updated the narrowspec and _widen() below needs to
676 # _narrow() updated the narrowspec and _widen() below needs to
675 # use the updated values as its base (otherwise removed includes
677 # use the updated values as its base (otherwise removed includes
676 # and addedexcludes will be lost in the resulting narrowspec)
678 # and addedexcludes will be lost in the resulting narrowspec)
677 oldincludes = newincludes
679 oldincludes = newincludes
678 oldexcludes = newexcludes
680 oldexcludes = newexcludes
679
681
680 if widening:
682 if widening:
681 newincludes = oldincludes | addedincludes
683 newincludes = oldincludes | addedincludes
682 newexcludes = oldexcludes - removedexcludes
684 newexcludes = oldexcludes - removedexcludes
683 _widen(
685 _widen(
684 ui,
686 ui,
685 repo,
687 repo,
686 remote,
688 remote,
687 commoninc,
689 commoninc,
688 oldincludes,
690 oldincludes,
689 oldexcludes,
691 oldexcludes,
690 newincludes,
692 newincludes,
691 newexcludes,
693 newexcludes,
692 )
694 )
693 finally:
695 finally:
694 remote.close()
696 remote.close()
695
697
696 return 0
698 return 0
@@ -1,901 +1,919
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # files that are "volatile" and might change between listing and streaming
398 # files that are "volatile" and might change between listing and streaming
399 #
399 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted.
401 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403
403
404 # some exception to the above matching
404 # some exception to the above matching
405 #
405 #
406 # XXX This is currently not in use because of issue6542
406 # XXX This is currently not in use because of issue6542
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408
408
409
409
410 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
411 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
412 return None
412 return None
413 return revlog_type(f)
413 return revlog_type(f)
414
414
415
415
416 def revlog_type(f):
416 def revlog_type(f):
417 # XXX we need to filter `undo.` created by the transaction here, however
417 # XXX we need to filter `undo.` created by the transaction here, however
418 # being naive about it also filter revlog for `undo.*` files, leading to
418 # being naive about it also filter revlog for `undo.*` files, leading to
419 # issue6542. So we no longer use EXCLUDED.
419 # issue6542. So we no longer use EXCLUDED.
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 return FILEFLAGS_REVLOG_MAIN
421 return FILEFLAGS_REVLOG_MAIN
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 t = FILETYPE_FILELOG_OTHER
423 t = FILETYPE_FILELOG_OTHER
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 t |= FILEFLAGS_VOLATILE
425 t |= FILEFLAGS_VOLATILE
426 return t
426 return t
427 return None
427 return None
428
428
429
429
430 # the file is part of changelog data
430 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
431 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
432 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
434 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
435 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
436 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
437 FILEFLAGS_OTHER = 1 << 10
438
438
439 # the main entry point for a revlog
439 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
441 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
443
444 # files that are "volatile" and might change between listing and streaming
444 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
445 FILEFLAGS_VOLATILE = 1 << 20
446
446
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
454
454
455
455
456 @attr.s(slots=True)
456 @attr.s(slots=True)
457 class StoreEntry:
457 class StoreEntry:
458 """An entry in the store
458 """An entry in the store
459
459
460 This is returned by `store.walk` and represent some data in the store."""
460 This is returned by `store.walk` and represent some data in the store."""
461
461
462 unencoded_path = attr.ib()
462 unencoded_path = attr.ib()
463 is_revlog = attr.ib(default=False)
463 is_revlog = attr.ib(default=False)
464 revlog_type = attr.ib(default=None)
464 revlog_type = attr.ib(default=None)
465 is_revlog_main = attr.ib(default=None)
465 is_revlog_main = attr.ib(default=None)
466 is_volatile = attr.ib(default=False)
466 is_volatile = attr.ib(default=False)
467 file_size = attr.ib(default=None)
467 file_size = attr.ib(default=None)
468
468
469 def files(self):
470 return [
471 StoreFile(
472 unencoded_path=self.unencoded_path,
473 file_size=self.file_size,
474 is_volatile=self.is_volatile,
475 )
476 ]
477
478
479 @attr.s(slots=True)
480 class StoreFile:
481 """a file matching an entry"""
482
483 unencoded_path = attr.ib()
484 file_size = attr.ib()
485 is_volatile = attr.ib(default=False)
486
469
487
470 class basicstore:
488 class basicstore:
471 '''base class for local repository stores'''
489 '''base class for local repository stores'''
472
490
473 def __init__(self, path, vfstype):
491 def __init__(self, path, vfstype):
474 vfs = vfstype(path)
492 vfs = vfstype(path)
475 self.path = vfs.base
493 self.path = vfs.base
476 self.createmode = _calcmode(vfs)
494 self.createmode = _calcmode(vfs)
477 vfs.createmode = self.createmode
495 vfs.createmode = self.createmode
478 self.rawvfs = vfs
496 self.rawvfs = vfs
479 self.vfs = vfsmod.filtervfs(vfs, encodedir)
497 self.vfs = vfsmod.filtervfs(vfs, encodedir)
480 self.opener = self.vfs
498 self.opener = self.vfs
481
499
482 def join(self, f):
500 def join(self, f):
483 return self.path + b'/' + encodedir(f)
501 return self.path + b'/' + encodedir(f)
484
502
485 def _walk(self, relpath, recurse):
503 def _walk(self, relpath, recurse):
486 '''yields (revlog_type, unencoded, size)'''
504 '''yields (revlog_type, unencoded, size)'''
487 path = self.path
505 path = self.path
488 if relpath:
506 if relpath:
489 path += b'/' + relpath
507 path += b'/' + relpath
490 striplen = len(self.path) + 1
508 striplen = len(self.path) + 1
491 l = []
509 l = []
492 if self.rawvfs.isdir(path):
510 if self.rawvfs.isdir(path):
493 visit = [path]
511 visit = [path]
494 readdir = self.rawvfs.readdir
512 readdir = self.rawvfs.readdir
495 while visit:
513 while visit:
496 p = visit.pop()
514 p = visit.pop()
497 for f, kind, st in readdir(p, stat=True):
515 for f, kind, st in readdir(p, stat=True):
498 fp = p + b'/' + f
516 fp = p + b'/' + f
499 rl_type = is_revlog(f, kind, st)
517 rl_type = is_revlog(f, kind, st)
500 if rl_type is not None:
518 if rl_type is not None:
501 n = util.pconvert(fp[striplen:])
519 n = util.pconvert(fp[striplen:])
502 l.append((rl_type, decodedir(n), st.st_size))
520 l.append((rl_type, decodedir(n), st.st_size))
503 elif kind == stat.S_IFDIR and recurse:
521 elif kind == stat.S_IFDIR and recurse:
504 visit.append(fp)
522 visit.append(fp)
505 l.sort()
523 l.sort()
506 return l
524 return l
507
525
508 def changelog(self, trypending, concurrencychecker=None):
526 def changelog(self, trypending, concurrencychecker=None):
509 return changelog.changelog(
527 return changelog.changelog(
510 self.vfs,
528 self.vfs,
511 trypending=trypending,
529 trypending=trypending,
512 concurrencychecker=concurrencychecker,
530 concurrencychecker=concurrencychecker,
513 )
531 )
514
532
515 def manifestlog(self, repo, storenarrowmatch):
533 def manifestlog(self, repo, storenarrowmatch):
516 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
534 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
517 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
535 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
518
536
519 def datafiles(
537 def datafiles(
520 self, matcher=None, undecodable=None
538 self, matcher=None, undecodable=None
521 ) -> Generator[StoreEntry, None, None]:
539 ) -> Generator[StoreEntry, None, None]:
522 """Like walk, but excluding the changelog and root manifest.
540 """Like walk, but excluding the changelog and root manifest.
523
541
524 When [undecodable] is None, revlogs names that can't be
542 When [undecodable] is None, revlogs names that can't be
525 decoded cause an exception. When it is provided, it should
543 decoded cause an exception. When it is provided, it should
526 be a list and the filenames that can't be decoded are added
544 be a list and the filenames that can't be decoded are added
527 to it instead. This is very rarely needed."""
545 to it instead. This is very rarely needed."""
528 files = self._walk(b'data', True) + self._walk(b'meta', True)
546 files = self._walk(b'data', True) + self._walk(b'meta', True)
529 for (t, u, s) in files:
547 for (t, u, s) in files:
530 if t is not None:
548 if t is not None:
531 yield StoreEntry(
549 yield StoreEntry(
532 unencoded_path=u,
550 unencoded_path=u,
533 is_revlog=True,
551 is_revlog=True,
534 revlog_type=FILEFLAGS_FILELOG,
552 revlog_type=FILEFLAGS_FILELOG,
535 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
553 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
536 is_volatile=bool(t & FILEFLAGS_VOLATILE),
554 is_volatile=bool(t & FILEFLAGS_VOLATILE),
537 file_size=s,
555 file_size=s,
538 )
556 )
539
557
540 def topfiles(self) -> Generator[StoreEntry, None, None]:
558 def topfiles(self) -> Generator[StoreEntry, None, None]:
541 # yield manifest before changelog
559 # yield manifest before changelog
542 files = reversed(self._walk(b'', False))
560 files = reversed(self._walk(b'', False))
543 for (t, u, s) in files:
561 for (t, u, s) in files:
544 if u.startswith(b'00changelog'):
562 if u.startswith(b'00changelog'):
545 revlog_type = FILEFLAGS_CHANGELOG
563 revlog_type = FILEFLAGS_CHANGELOG
546 elif u.startswith(b'00manifest'):
564 elif u.startswith(b'00manifest'):
547 revlog_type = FILEFLAGS_MANIFESTLOG
565 revlog_type = FILEFLAGS_MANIFESTLOG
548 else:
566 else:
549 revlog_type = None
567 revlog_type = None
550 yield StoreEntry(
568 yield StoreEntry(
551 unencoded_path=u,
569 unencoded_path=u,
552 is_revlog=revlog_type is not None,
570 is_revlog=revlog_type is not None,
553 revlog_type=revlog_type,
571 revlog_type=revlog_type,
554 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
572 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
555 is_volatile=bool(t & FILEFLAGS_VOLATILE),
573 is_volatile=bool(t & FILEFLAGS_VOLATILE),
556 file_size=s,
574 file_size=s,
557 )
575 )
558
576
559 def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
577 def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
560 """return files related to data storage (ie: revlogs)
578 """return files related to data storage (ie: revlogs)
561
579
562 yields (file_type, unencoded, size)
580 yields (file_type, unencoded, size)
563
581
564 if a matcher is passed, storage files of only those tracked paths
582 if a matcher is passed, storage files of only those tracked paths
565 are passed with matches the matcher
583 are passed with matches the matcher
566 """
584 """
567 # yield data files first
585 # yield data files first
568 for x in self.datafiles(matcher):
586 for x in self.datafiles(matcher):
569 yield x
587 yield x
570 for x in self.topfiles():
588 for x in self.topfiles():
571 yield x
589 yield x
572
590
573 def copylist(self):
591 def copylist(self):
574 return _data
592 return _data
575
593
576 def write(self, tr):
594 def write(self, tr):
577 pass
595 pass
578
596
579 def invalidatecaches(self):
597 def invalidatecaches(self):
580 pass
598 pass
581
599
582 def markremoved(self, fn):
600 def markremoved(self, fn):
583 pass
601 pass
584
602
585 def __contains__(self, path):
603 def __contains__(self, path):
586 '''Checks if the store contains path'''
604 '''Checks if the store contains path'''
587 path = b"/".join((b"data", path))
605 path = b"/".join((b"data", path))
588 # file?
606 # file?
589 if self.vfs.exists(path + b".i"):
607 if self.vfs.exists(path + b".i"):
590 return True
608 return True
591 # dir?
609 # dir?
592 if not path.endswith(b"/"):
610 if not path.endswith(b"/"):
593 path = path + b"/"
611 path = path + b"/"
594 return self.vfs.exists(path)
612 return self.vfs.exists(path)
595
613
596
614
597 class encodedstore(basicstore):
615 class encodedstore(basicstore):
598 def __init__(self, path, vfstype):
616 def __init__(self, path, vfstype):
599 vfs = vfstype(path + b'/store')
617 vfs = vfstype(path + b'/store')
600 self.path = vfs.base
618 self.path = vfs.base
601 self.createmode = _calcmode(vfs)
619 self.createmode = _calcmode(vfs)
602 vfs.createmode = self.createmode
620 vfs.createmode = self.createmode
603 self.rawvfs = vfs
621 self.rawvfs = vfs
604 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
622 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
605 self.opener = self.vfs
623 self.opener = self.vfs
606
624
607 # note: topfiles would also need a decode phase. It is just that in
625 # note: topfiles would also need a decode phase. It is just that in
608 # practice we do not have any file outside of `data/` that needs encoding.
626 # practice we do not have any file outside of `data/` that needs encoding.
609 # However that might change so we should probably add a test and encoding
627 # However that might change so we should probably add a test and encoding
610 # decoding for it too. see issue6548
628 # decoding for it too. see issue6548
611
629
612 def datafiles(
630 def datafiles(
613 self, matcher=None, undecodable=None
631 self, matcher=None, undecodable=None
614 ) -> Generator[StoreEntry, None, None]:
632 ) -> Generator[StoreEntry, None, None]:
615 for entry in super(encodedstore, self).datafiles():
633 for entry in super(encodedstore, self).datafiles():
616 try:
634 try:
617 f1 = entry.unencoded_path
635 f1 = entry.unencoded_path
618 f2 = decodefilename(f1)
636 f2 = decodefilename(f1)
619 except KeyError:
637 except KeyError:
620 if undecodable is None:
638 if undecodable is None:
621 msg = _(b'undecodable revlog name %s') % f1
639 msg = _(b'undecodable revlog name %s') % f1
622 raise error.StorageError(msg)
640 raise error.StorageError(msg)
623 else:
641 else:
624 undecodable.append(f1)
642 undecodable.append(f1)
625 continue
643 continue
626 if not _matchtrackedpath(f2, matcher):
644 if not _matchtrackedpath(f2, matcher):
627 continue
645 continue
628 entry.unencoded_path = f2
646 entry.unencoded_path = f2
629 yield entry
647 yield entry
630
648
631 def join(self, f):
649 def join(self, f):
632 return self.path + b'/' + encodefilename(f)
650 return self.path + b'/' + encodefilename(f)
633
651
634 def copylist(self):
652 def copylist(self):
635 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
653 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
636
654
637
655
638 class fncache:
656 class fncache:
639 # the filename used to be partially encoded
657 # the filename used to be partially encoded
640 # hence the encodedir/decodedir dance
658 # hence the encodedir/decodedir dance
641 def __init__(self, vfs):
659 def __init__(self, vfs):
642 self.vfs = vfs
660 self.vfs = vfs
643 self._ignores = set()
661 self._ignores = set()
644 self.entries = None
662 self.entries = None
645 self._dirty = False
663 self._dirty = False
646 # set of new additions to fncache
664 # set of new additions to fncache
647 self.addls = set()
665 self.addls = set()
648
666
649 def ensureloaded(self, warn=None):
667 def ensureloaded(self, warn=None):
650 """read the fncache file if not already read.
668 """read the fncache file if not already read.
651
669
652 If the file on disk is corrupted, raise. If warn is provided,
670 If the file on disk is corrupted, raise. If warn is provided,
653 warn and keep going instead."""
671 warn and keep going instead."""
654 if self.entries is None:
672 if self.entries is None:
655 self._load(warn)
673 self._load(warn)
656
674
657 def _load(self, warn=None):
675 def _load(self, warn=None):
658 '''fill the entries from the fncache file'''
676 '''fill the entries from the fncache file'''
659 self._dirty = False
677 self._dirty = False
660 try:
678 try:
661 fp = self.vfs(b'fncache', mode=b'rb')
679 fp = self.vfs(b'fncache', mode=b'rb')
662 except IOError:
680 except IOError:
663 # skip nonexistent file
681 # skip nonexistent file
664 self.entries = set()
682 self.entries = set()
665 return
683 return
666
684
667 self.entries = set()
685 self.entries = set()
668 chunk = b''
686 chunk = b''
669 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
687 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
670 chunk += c
688 chunk += c
671 try:
689 try:
672 p = chunk.rindex(b'\n')
690 p = chunk.rindex(b'\n')
673 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
691 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
674 chunk = chunk[p + 1 :]
692 chunk = chunk[p + 1 :]
675 except ValueError:
693 except ValueError:
676 # substring '\n' not found, maybe the entry is bigger than the
694 # substring '\n' not found, maybe the entry is bigger than the
677 # chunksize, so let's keep iterating
695 # chunksize, so let's keep iterating
678 pass
696 pass
679
697
680 if chunk:
698 if chunk:
681 msg = _(b"fncache does not ends with a newline")
699 msg = _(b"fncache does not ends with a newline")
682 if warn:
700 if warn:
683 warn(msg + b'\n')
701 warn(msg + b'\n')
684 else:
702 else:
685 raise error.Abort(
703 raise error.Abort(
686 msg,
704 msg,
687 hint=_(
705 hint=_(
688 b"use 'hg debugrebuildfncache' to "
706 b"use 'hg debugrebuildfncache' to "
689 b"rebuild the fncache"
707 b"rebuild the fncache"
690 ),
708 ),
691 )
709 )
692 self._checkentries(fp, warn)
710 self._checkentries(fp, warn)
693 fp.close()
711 fp.close()
694
712
695 def _checkentries(self, fp, warn):
713 def _checkentries(self, fp, warn):
696 """make sure there is no empty string in entries"""
714 """make sure there is no empty string in entries"""
697 if b'' in self.entries:
715 if b'' in self.entries:
698 fp.seek(0)
716 fp.seek(0)
699 for n, line in enumerate(fp):
717 for n, line in enumerate(fp):
700 if not line.rstrip(b'\n'):
718 if not line.rstrip(b'\n'):
701 t = _(b'invalid entry in fncache, line %d') % (n + 1)
719 t = _(b'invalid entry in fncache, line %d') % (n + 1)
702 if warn:
720 if warn:
703 warn(t + b'\n')
721 warn(t + b'\n')
704 else:
722 else:
705 raise error.Abort(t)
723 raise error.Abort(t)
706
724
707 def write(self, tr):
725 def write(self, tr):
708 if self._dirty:
726 if self._dirty:
709 assert self.entries is not None
727 assert self.entries is not None
710 self.entries = self.entries | self.addls
728 self.entries = self.entries | self.addls
711 self.addls = set()
729 self.addls = set()
712 tr.addbackup(b'fncache')
730 tr.addbackup(b'fncache')
713 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
731 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
714 if self.entries:
732 if self.entries:
715 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
733 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
716 fp.close()
734 fp.close()
717 self._dirty = False
735 self._dirty = False
718 if self.addls:
736 if self.addls:
719 # if we have just new entries, let's append them to the fncache
737 # if we have just new entries, let's append them to the fncache
720 tr.addbackup(b'fncache')
738 tr.addbackup(b'fncache')
721 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
739 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
722 if self.addls:
740 if self.addls:
723 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
741 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
724 fp.close()
742 fp.close()
725 self.entries = None
743 self.entries = None
726 self.addls = set()
744 self.addls = set()
727
745
728 def addignore(self, fn):
746 def addignore(self, fn):
729 self._ignores.add(fn)
747 self._ignores.add(fn)
730
748
731 def add(self, fn):
749 def add(self, fn):
732 if fn in self._ignores:
750 if fn in self._ignores:
733 return
751 return
734 if self.entries is None:
752 if self.entries is None:
735 self._load()
753 self._load()
736 if fn not in self.entries:
754 if fn not in self.entries:
737 self.addls.add(fn)
755 self.addls.add(fn)
738
756
739 def remove(self, fn):
757 def remove(self, fn):
740 if self.entries is None:
758 if self.entries is None:
741 self._load()
759 self._load()
742 if fn in self.addls:
760 if fn in self.addls:
743 self.addls.remove(fn)
761 self.addls.remove(fn)
744 return
762 return
745 try:
763 try:
746 self.entries.remove(fn)
764 self.entries.remove(fn)
747 self._dirty = True
765 self._dirty = True
748 except KeyError:
766 except KeyError:
749 pass
767 pass
750
768
751 def __contains__(self, fn):
769 def __contains__(self, fn):
752 if fn in self.addls:
770 if fn in self.addls:
753 return True
771 return True
754 if self.entries is None:
772 if self.entries is None:
755 self._load()
773 self._load()
756 return fn in self.entries
774 return fn in self.entries
757
775
758 def __iter__(self):
776 def __iter__(self):
759 if self.entries is None:
777 if self.entries is None:
760 self._load()
778 self._load()
761 return iter(self.entries | self.addls)
779 return iter(self.entries | self.addls)
762
780
763
781
764 class _fncachevfs(vfsmod.proxyvfs):
782 class _fncachevfs(vfsmod.proxyvfs):
765 def __init__(self, vfs, fnc, encode):
783 def __init__(self, vfs, fnc, encode):
766 vfsmod.proxyvfs.__init__(self, vfs)
784 vfsmod.proxyvfs.__init__(self, vfs)
767 self.fncache = fnc
785 self.fncache = fnc
768 self.encode = encode
786 self.encode = encode
769
787
770 def __call__(self, path, mode=b'r', *args, **kw):
788 def __call__(self, path, mode=b'r', *args, **kw):
771 encoded = self.encode(path)
789 encoded = self.encode(path)
772 if (
790 if (
773 mode not in (b'r', b'rb')
791 mode not in (b'r', b'rb')
774 and (path.startswith(b'data/') or path.startswith(b'meta/'))
792 and (path.startswith(b'data/') or path.startswith(b'meta/'))
775 and revlog_type(path) is not None
793 and revlog_type(path) is not None
776 ):
794 ):
777 # do not trigger a fncache load when adding a file that already is
795 # do not trigger a fncache load when adding a file that already is
778 # known to exist.
796 # known to exist.
779 notload = self.fncache.entries is None and self.vfs.exists(encoded)
797 notload = self.fncache.entries is None and self.vfs.exists(encoded)
780 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
798 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
781 # when appending to an existing file, if the file has size zero,
799 # when appending to an existing file, if the file has size zero,
782 # it should be considered as missing. Such zero-size files are
800 # it should be considered as missing. Such zero-size files are
783 # the result of truncation when a transaction is aborted.
801 # the result of truncation when a transaction is aborted.
784 notload = False
802 notload = False
785 if not notload:
803 if not notload:
786 self.fncache.add(path)
804 self.fncache.add(path)
787 return self.vfs(encoded, mode, *args, **kw)
805 return self.vfs(encoded, mode, *args, **kw)
788
806
789 def join(self, path):
807 def join(self, path):
790 if path:
808 if path:
791 return self.vfs.join(self.encode(path))
809 return self.vfs.join(self.encode(path))
792 else:
810 else:
793 return self.vfs.join(path)
811 return self.vfs.join(path)
794
812
795 def register_file(self, path):
813 def register_file(self, path):
796 """generic hook point to lets fncache steer its stew"""
814 """generic hook point to lets fncache steer its stew"""
797 if path.startswith(b'data/') or path.startswith(b'meta/'):
815 if path.startswith(b'data/') or path.startswith(b'meta/'):
798 self.fncache.add(path)
816 self.fncache.add(path)
799
817
800
818
801 class fncachestore(basicstore):
819 class fncachestore(basicstore):
802 def __init__(self, path, vfstype, dotencode):
820 def __init__(self, path, vfstype, dotencode):
803 if dotencode:
821 if dotencode:
804 encode = _pathencode
822 encode = _pathencode
805 else:
823 else:
806 encode = _plainhybridencode
824 encode = _plainhybridencode
807 self.encode = encode
825 self.encode = encode
808 vfs = vfstype(path + b'/store')
826 vfs = vfstype(path + b'/store')
809 self.path = vfs.base
827 self.path = vfs.base
810 self.pathsep = self.path + b'/'
828 self.pathsep = self.path + b'/'
811 self.createmode = _calcmode(vfs)
829 self.createmode = _calcmode(vfs)
812 vfs.createmode = self.createmode
830 vfs.createmode = self.createmode
813 self.rawvfs = vfs
831 self.rawvfs = vfs
814 fnc = fncache(vfs)
832 fnc = fncache(vfs)
815 self.fncache = fnc
833 self.fncache = fnc
816 self.vfs = _fncachevfs(vfs, fnc, encode)
834 self.vfs = _fncachevfs(vfs, fnc, encode)
817 self.opener = self.vfs
835 self.opener = self.vfs
818
836
819 def join(self, f):
837 def join(self, f):
820 return self.pathsep + self.encode(f)
838 return self.pathsep + self.encode(f)
821
839
822 def getsize(self, path):
840 def getsize(self, path):
823 return self.rawvfs.stat(path).st_size
841 return self.rawvfs.stat(path).st_size
824
842
825 def datafiles(
843 def datafiles(
826 self, matcher=None, undecodable=None
844 self, matcher=None, undecodable=None
827 ) -> Generator[StoreEntry, None, None]:
845 ) -> Generator[StoreEntry, None, None]:
828 for f in sorted(self.fncache):
846 for f in sorted(self.fncache):
829 if not _matchtrackedpath(f, matcher):
847 if not _matchtrackedpath(f, matcher):
830 continue
848 continue
831 ef = self.encode(f)
849 ef = self.encode(f)
832 t = revlog_type(f)
850 t = revlog_type(f)
833 if t is None:
851 if t is None:
834 # Note: this should not be in the fncache then…
852 # Note: this should not be in the fncache then…
835 #
853 #
836 # However the fncache might contains such file added by
854 # However the fncache might contains such file added by
837 # previous version of Mercurial.
855 # previous version of Mercurial.
838 continue
856 continue
839 t |= FILEFLAGS_FILELOG
857 t |= FILEFLAGS_FILELOG
840 try:
858 try:
841 yield StoreEntry(
859 yield StoreEntry(
842 unencoded_path=f,
860 unencoded_path=f,
843 is_revlog=True,
861 is_revlog=True,
844 revlog_type=FILEFLAGS_FILELOG,
862 revlog_type=FILEFLAGS_FILELOG,
845 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
863 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
846 is_volatile=bool(t & FILEFLAGS_VOLATILE),
864 is_volatile=bool(t & FILEFLAGS_VOLATILE),
847 file_size=self.getsize(ef),
865 file_size=self.getsize(ef),
848 )
866 )
849 except FileNotFoundError:
867 except FileNotFoundError:
850 pass
868 pass
851
869
852 def copylist(self):
870 def copylist(self):
853 d = (
871 d = (
854 b'bookmarks',
872 b'bookmarks',
855 b'narrowspec',
873 b'narrowspec',
856 b'data',
874 b'data',
857 b'meta',
875 b'meta',
858 b'dh',
876 b'dh',
859 b'fncache',
877 b'fncache',
860 b'phaseroots',
878 b'phaseroots',
861 b'obsstore',
879 b'obsstore',
862 b'00manifest.d',
880 b'00manifest.d',
863 b'00manifest.i',
881 b'00manifest.i',
864 b'00changelog.d',
882 b'00changelog.d',
865 b'00changelog.i',
883 b'00changelog.i',
866 b'requires',
884 b'requires',
867 )
885 )
868 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
886 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
869
887
870 def write(self, tr):
888 def write(self, tr):
871 self.fncache.write(tr)
889 self.fncache.write(tr)
872
890
873 def invalidatecaches(self):
891 def invalidatecaches(self):
874 self.fncache.entries = None
892 self.fncache.entries = None
875 self.fncache.addls = set()
893 self.fncache.addls = set()
876
894
877 def markremoved(self, fn):
895 def markremoved(self, fn):
878 self.fncache.remove(fn)
896 self.fncache.remove(fn)
879
897
880 def _exists(self, f):
898 def _exists(self, f):
881 ef = self.encode(f)
899 ef = self.encode(f)
882 try:
900 try:
883 self.getsize(ef)
901 self.getsize(ef)
884 return True
902 return True
885 except FileNotFoundError:
903 except FileNotFoundError:
886 return False
904 return False
887
905
888 def __contains__(self, path):
906 def __contains__(self, path):
889 '''Checks if the store contains path'''
907 '''Checks if the store contains path'''
890 path = b"/".join((b"data", path))
908 path = b"/".join((b"data", path))
891 # check for files (exact match)
909 # check for files (exact match)
892 e = path + b'.i'
910 e = path + b'.i'
893 if e in self.fncache and self._exists(e):
911 if e in self.fncache and self._exists(e):
894 return True
912 return True
895 # now check for directories (prefix match)
913 # now check for directories (prefix match)
896 if not path.endswith(b'/'):
914 if not path.endswith(b'/'):
897 path += b'/'
915 path += b'/'
898 for e in self.fncache:
916 for e in self.fncache:
899 if e.startswith(path) and self._exists(e):
917 if e.startswith(path) and self._exists(e):
900 return True
918 return True
901 return False
919 return False
@@ -1,937 +1,937
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import os
10 import os
11 import struct
11 import struct
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import open
14 from .pycompat import open
15 from .interfaces import repository
15 from .interfaces import repository
16 from . import (
16 from . import (
17 bookmarks,
17 bookmarks,
18 cacheutil,
18 cacheutil,
19 error,
19 error,
20 narrowspec,
20 narrowspec,
21 phases,
21 phases,
22 pycompat,
22 pycompat,
23 requirements as requirementsmod,
23 requirements as requirementsmod,
24 scmutil,
24 scmutil,
25 store,
25 store,
26 transaction,
26 transaction,
27 util,
27 util,
28 )
28 )
29 from .revlogutils import (
29 from .revlogutils import (
30 nodemap,
30 nodemap,
31 )
31 )
32
32
33
33
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 """determine the final set of requirement for a new stream clone
35 """determine the final set of requirement for a new stream clone
36
36
37 this method combine the "default" requirements that a new repository would
37 this method combine the "default" requirements that a new repository would
38 use with the constaint we get from the stream clone content. We keep local
38 use with the constaint we get from the stream clone content. We keep local
39 configuration choice when possible.
39 configuration choice when possible.
40 """
40 """
41 requirements = set(default_requirements)
41 requirements = set(default_requirements)
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 requirements.update(streamed_requirements)
43 requirements.update(streamed_requirements)
44 return requirements
44 return requirements
45
45
46
46
47 def streamed_requirements(repo):
47 def streamed_requirements(repo):
48 """the set of requirement the new clone will have to support
48 """the set of requirement the new clone will have to support
49
49
50 This is used for advertising the stream options and to generate the actual
50 This is used for advertising the stream options and to generate the actual
51 stream content."""
51 stream content."""
52 requiredformats = (
52 requiredformats = (
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 )
54 )
55 return requiredformats
55 return requiredformats
56
56
57
57
58 def canperformstreamclone(pullop, bundle2=False):
58 def canperformstreamclone(pullop, bundle2=False):
59 """Whether it is possible to perform a streaming clone as part of pull.
59 """Whether it is possible to perform a streaming clone as part of pull.
60
60
61 ``bundle2`` will cause the function to consider stream clone through
61 ``bundle2`` will cause the function to consider stream clone through
62 bundle2 and only through bundle2.
62 bundle2 and only through bundle2.
63
63
64 Returns a tuple of (supported, requirements). ``supported`` is True if
64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 streaming clone is supported and False otherwise. ``requirements`` is
65 streaming clone is supported and False otherwise. ``requirements`` is
66 a set of repo requirements from the remote, or ``None`` if stream clone
66 a set of repo requirements from the remote, or ``None`` if stream clone
67 isn't supported.
67 isn't supported.
68 """
68 """
69 repo = pullop.repo
69 repo = pullop.repo
70 remote = pullop.remote
70 remote = pullop.remote
71
71
72 bundle2supported = False
72 bundle2supported = False
73 if pullop.canusebundle2:
73 if pullop.canusebundle2:
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
75 bundle2supported = True
75 bundle2supported = True
76 # else
76 # else
77 # Server doesn't support bundle2 stream clone or doesn't support
77 # Server doesn't support bundle2 stream clone or doesn't support
78 # the versions we support. Fall back and possibly allow legacy.
78 # the versions we support. Fall back and possibly allow legacy.
79
79
80 # Ensures legacy code path uses available bundle2.
80 # Ensures legacy code path uses available bundle2.
81 if bundle2supported and not bundle2:
81 if bundle2supported and not bundle2:
82 return False, None
82 return False, None
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
84 elif bundle2 and not bundle2supported:
84 elif bundle2 and not bundle2supported:
85 return False, None
85 return False, None
86
86
87 # Streaming clone only works on empty repositories.
87 # Streaming clone only works on empty repositories.
88 if len(repo):
88 if len(repo):
89 return False, None
89 return False, None
90
90
91 # Streaming clone only works if all data is being requested.
91 # Streaming clone only works if all data is being requested.
92 if pullop.heads:
92 if pullop.heads:
93 return False, None
93 return False, None
94
94
95 streamrequested = pullop.streamclonerequested
95 streamrequested = pullop.streamclonerequested
96
96
97 # If we don't have a preference, let the server decide for us. This
97 # If we don't have a preference, let the server decide for us. This
98 # likely only comes into play in LANs.
98 # likely only comes into play in LANs.
99 if streamrequested is None:
99 if streamrequested is None:
100 # The server can advertise whether to prefer streaming clone.
100 # The server can advertise whether to prefer streaming clone.
101 streamrequested = remote.capable(b'stream-preferred')
101 streamrequested = remote.capable(b'stream-preferred')
102
102
103 if not streamrequested:
103 if not streamrequested:
104 return False, None
104 return False, None
105
105
106 # In order for stream clone to work, the client has to support all the
106 # In order for stream clone to work, the client has to support all the
107 # requirements advertised by the server.
107 # requirements advertised by the server.
108 #
108 #
109 # The server advertises its requirements via the "stream" and "streamreqs"
109 # The server advertises its requirements via the "stream" and "streamreqs"
110 # capability. "stream" (a value-less capability) is advertised if and only
110 # capability. "stream" (a value-less capability) is advertised if and only
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 # is advertised and contains a comma-delimited list of requirements.
112 # is advertised and contains a comma-delimited list of requirements.
113 requirements = set()
113 requirements = set()
114 if remote.capable(b'stream'):
114 if remote.capable(b'stream'):
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 else:
116 else:
117 streamreqs = remote.capable(b'streamreqs')
117 streamreqs = remote.capable(b'streamreqs')
118 # This is weird and shouldn't happen with modern servers.
118 # This is weird and shouldn't happen with modern servers.
119 if not streamreqs:
119 if not streamreqs:
120 pullop.repo.ui.warn(
120 pullop.repo.ui.warn(
121 _(
121 _(
122 b'warning: stream clone requested but server has them '
122 b'warning: stream clone requested but server has them '
123 b'disabled\n'
123 b'disabled\n'
124 )
124 )
125 )
125 )
126 return False, None
126 return False, None
127
127
128 streamreqs = set(streamreqs.split(b','))
128 streamreqs = set(streamreqs.split(b','))
129 # Server requires something we don't support. Bail.
129 # Server requires something we don't support. Bail.
130 missingreqs = streamreqs - repo.supported
130 missingreqs = streamreqs - repo.supported
131 if missingreqs:
131 if missingreqs:
132 pullop.repo.ui.warn(
132 pullop.repo.ui.warn(
133 _(
133 _(
134 b'warning: stream clone requested but client is missing '
134 b'warning: stream clone requested but client is missing '
135 b'requirements: %s\n'
135 b'requirements: %s\n'
136 )
136 )
137 % b', '.join(sorted(missingreqs))
137 % b', '.join(sorted(missingreqs))
138 )
138 )
139 pullop.repo.ui.warn(
139 pullop.repo.ui.warn(
140 _(
140 _(
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 b'for more information)\n'
142 b'for more information)\n'
143 )
143 )
144 )
144 )
145 return False, None
145 return False, None
146 requirements = streamreqs
146 requirements = streamreqs
147
147
148 return True, requirements
148 return True, requirements
149
149
150
150
151 def maybeperformlegacystreamclone(pullop):
151 def maybeperformlegacystreamclone(pullop):
152 """Possibly perform a legacy stream clone operation.
152 """Possibly perform a legacy stream clone operation.
153
153
154 Legacy stream clones are performed as part of pull but before all other
154 Legacy stream clones are performed as part of pull but before all other
155 operations.
155 operations.
156
156
157 A legacy stream clone will not be performed if a bundle2 stream clone is
157 A legacy stream clone will not be performed if a bundle2 stream clone is
158 supported.
158 supported.
159 """
159 """
160 from . import localrepo
160 from . import localrepo
161
161
162 supported, requirements = canperformstreamclone(pullop)
162 supported, requirements = canperformstreamclone(pullop)
163
163
164 if not supported:
164 if not supported:
165 return
165 return
166
166
167 repo = pullop.repo
167 repo = pullop.repo
168 remote = pullop.remote
168 remote = pullop.remote
169
169
170 # Save remote branchmap. We will use it later to speed up branchcache
170 # Save remote branchmap. We will use it later to speed up branchcache
171 # creation.
171 # creation.
172 rbranchmap = None
172 rbranchmap = None
173 if remote.capable(b'branchmap'):
173 if remote.capable(b'branchmap'):
174 with remote.commandexecutor() as e:
174 with remote.commandexecutor() as e:
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
176
176
177 repo.ui.status(_(b'streaming all changes\n'))
177 repo.ui.status(_(b'streaming all changes\n'))
178
178
179 with remote.commandexecutor() as e:
179 with remote.commandexecutor() as e:
180 fp = e.callcommand(b'stream_out', {}).result()
180 fp = e.callcommand(b'stream_out', {}).result()
181
181
182 # TODO strictly speaking, this code should all be inside the context
182 # TODO strictly speaking, this code should all be inside the context
183 # manager because the context manager is supposed to ensure all wire state
183 # manager because the context manager is supposed to ensure all wire state
184 # is flushed when exiting. But the legacy peers don't do this, so it
184 # is flushed when exiting. But the legacy peers don't do this, so it
185 # doesn't matter.
185 # doesn't matter.
186 l = fp.readline()
186 l = fp.readline()
187 try:
187 try:
188 resp = int(l)
188 resp = int(l)
189 except ValueError:
189 except ValueError:
190 raise error.ResponseError(
190 raise error.ResponseError(
191 _(b'unexpected response from remote server:'), l
191 _(b'unexpected response from remote server:'), l
192 )
192 )
193 if resp == 1:
193 if resp == 1:
194 raise error.Abort(_(b'operation forbidden by server'))
194 raise error.Abort(_(b'operation forbidden by server'))
195 elif resp == 2:
195 elif resp == 2:
196 raise error.Abort(_(b'locking the remote repository failed'))
196 raise error.Abort(_(b'locking the remote repository failed'))
197 elif resp != 0:
197 elif resp != 0:
198 raise error.Abort(_(b'the server sent an unknown error code'))
198 raise error.Abort(_(b'the server sent an unknown error code'))
199
199
200 l = fp.readline()
200 l = fp.readline()
201 try:
201 try:
202 filecount, bytecount = map(int, l.split(b' ', 1))
202 filecount, bytecount = map(int, l.split(b' ', 1))
203 except (ValueError, TypeError):
203 except (ValueError, TypeError):
204 raise error.ResponseError(
204 raise error.ResponseError(
205 _(b'unexpected response from remote server:'), l
205 _(b'unexpected response from remote server:'), l
206 )
206 )
207
207
208 with repo.lock():
208 with repo.lock():
209 consumev1(repo, fp, filecount, bytecount)
209 consumev1(repo, fp, filecount, bytecount)
210 repo.requirements = new_stream_clone_requirements(
210 repo.requirements = new_stream_clone_requirements(
211 repo.requirements,
211 repo.requirements,
212 requirements,
212 requirements,
213 )
213 )
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 repo.ui, repo.requirements, repo.features
215 repo.ui, repo.requirements, repo.features
216 )
216 )
217 scmutil.writereporequirements(repo)
217 scmutil.writereporequirements(repo)
218 nodemap.post_stream_cleanup(repo)
218 nodemap.post_stream_cleanup(repo)
219
219
220 if rbranchmap:
220 if rbranchmap:
221 repo._branchcaches.replace(repo, rbranchmap)
221 repo._branchcaches.replace(repo, rbranchmap)
222
222
223 repo.invalidate()
223 repo.invalidate()
224
224
225
225
226 def allowservergeneration(repo):
226 def allowservergeneration(repo):
227 """Whether streaming clones are allowed from the server."""
227 """Whether streaming clones are allowed from the server."""
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 return False
229 return False
230
230
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 return False
232 return False
233
233
234 # The way stream clone works makes it impossible to hide secret changesets.
234 # The way stream clone works makes it impossible to hide secret changesets.
235 # So don't allow this by default.
235 # So don't allow this by default.
236 secret = phases.hassecret(repo)
236 secret = phases.hassecret(repo)
237 if secret:
237 if secret:
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239
239
240 return True
240 return True
241
241
242
242
243 # This is it's own function so extensions can override it.
243 # This is it's own function so extensions can override it.
244 def _walkstreamfiles(repo, matcher=None):
244 def _walkstreamfiles(repo, matcher=None):
245 return repo.store.walk(matcher)
245 return repo.store.walk(matcher)
246
246
247
247
248 def generatev1(repo):
248 def generatev1(repo):
249 """Emit content for version 1 of a streaming clone.
249 """Emit content for version 1 of a streaming clone.
250
250
251 This returns a 3-tuple of (file count, byte size, data iterator).
251 This returns a 3-tuple of (file count, byte size, data iterator).
252
252
253 The data iterator consists of N entries for each file being transferred.
253 The data iterator consists of N entries for each file being transferred.
254 Each file entry starts as a line with the file name and integer size
254 Each file entry starts as a line with the file name and integer size
255 delimited by a null byte.
255 delimited by a null byte.
256
256
257 The raw file data follows. Following the raw file data is the next file
257 The raw file data follows. Following the raw file data is the next file
258 entry, or EOF.
258 entry, or EOF.
259
259
260 When used on the wire protocol, an additional line indicating protocol
260 When used on the wire protocol, an additional line indicating protocol
261 success will be prepended to the stream. This function is not responsible
261 success will be prepended to the stream. This function is not responsible
262 for adding it.
262 for adding it.
263
263
264 This function will obtain a repository lock to ensure a consistent view of
264 This function will obtain a repository lock to ensure a consistent view of
265 the store is captured. It therefore may raise LockError.
265 the store is captured. It therefore may raise LockError.
266 """
266 """
267 entries = []
267 entries = []
268 total_bytes = 0
268 total_bytes = 0
269 # Get consistent snapshot of repo, lock during scan.
269 # Get consistent snapshot of repo, lock during scan.
270 with repo.lock():
270 with repo.lock():
271 repo.ui.debug(b'scanning\n')
271 repo.ui.debug(b'scanning\n')
272 for entry in _walkstreamfiles(repo):
272 for entry in _walkstreamfiles(repo):
273 if entry.file_size:
273 for f in entry.files():
274 entries.append((entry.unencoded_path, entry.file_size))
274 if f.file_size:
275 total_bytes += entry.file_size
275 entries.append((f.unencoded_path, f.file_size))
276 total_bytes += f.file_size
276 _test_sync_point_walk_1(repo)
277 _test_sync_point_walk_1(repo)
277 _test_sync_point_walk_2(repo)
278 _test_sync_point_walk_2(repo)
278
279
279 repo.ui.debug(
280 repo.ui.debug(
280 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
281 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
281 )
282 )
282
283
283 svfs = repo.svfs
284 svfs = repo.svfs
284 debugflag = repo.ui.debugflag
285 debugflag = repo.ui.debugflag
285
286
286 def emitrevlogdata():
287 def emitrevlogdata():
287 for name, size in entries:
288 for name, size in entries:
288 if debugflag:
289 if debugflag:
289 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
290 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
290 # partially encode name over the wire for backwards compat
291 # partially encode name over the wire for backwards compat
291 yield b'%s\0%d\n' % (store.encodedir(name), size)
292 yield b'%s\0%d\n' % (store.encodedir(name), size)
292 # auditing at this stage is both pointless (paths are already
293 # auditing at this stage is both pointless (paths are already
293 # trusted by the local repo) and expensive
294 # trusted by the local repo) and expensive
294 with svfs(name, b'rb', auditpath=False) as fp:
295 with svfs(name, b'rb', auditpath=False) as fp:
295 if size <= 65536:
296 if size <= 65536:
296 yield fp.read(size)
297 yield fp.read(size)
297 else:
298 else:
298 for chunk in util.filechunkiter(fp, limit=size):
299 for chunk in util.filechunkiter(fp, limit=size):
299 yield chunk
300 yield chunk
300
301
301 return len(entries), total_bytes, emitrevlogdata()
302 return len(entries), total_bytes, emitrevlogdata()
302
303
303
304
304 def generatev1wireproto(repo):
305 def generatev1wireproto(repo):
305 """Emit content for version 1 of streaming clone suitable for the wire.
306 """Emit content for version 1 of streaming clone suitable for the wire.
306
307
307 This is the data output from ``generatev1()`` with 2 header lines. The
308 This is the data output from ``generatev1()`` with 2 header lines. The
308 first line indicates overall success. The 2nd contains the file count and
309 first line indicates overall success. The 2nd contains the file count and
309 byte size of payload.
310 byte size of payload.
310
311
311 The success line contains "0" for success, "1" for stream generation not
312 The success line contains "0" for success, "1" for stream generation not
312 allowed, and "2" for error locking the repository (possibly indicating
313 allowed, and "2" for error locking the repository (possibly indicating
313 a permissions error for the server process).
314 a permissions error for the server process).
314 """
315 """
315 if not allowservergeneration(repo):
316 if not allowservergeneration(repo):
316 yield b'1\n'
317 yield b'1\n'
317 return
318 return
318
319
319 try:
320 try:
320 filecount, bytecount, it = generatev1(repo)
321 filecount, bytecount, it = generatev1(repo)
321 except error.LockError:
322 except error.LockError:
322 yield b'2\n'
323 yield b'2\n'
323 return
324 return
324
325
325 # Indicates successful response.
326 # Indicates successful response.
326 yield b'0\n'
327 yield b'0\n'
327 yield b'%d %d\n' % (filecount, bytecount)
328 yield b'%d %d\n' % (filecount, bytecount)
328 for chunk in it:
329 for chunk in it:
329 yield chunk
330 yield chunk
330
331
331
332
332 def generatebundlev1(repo, compression=b'UN'):
333 def generatebundlev1(repo, compression=b'UN'):
333 """Emit content for version 1 of a stream clone bundle.
334 """Emit content for version 1 of a stream clone bundle.
334
335
335 The first 4 bytes of the output ("HGS1") denote this as stream clone
336 The first 4 bytes of the output ("HGS1") denote this as stream clone
336 bundle version 1.
337 bundle version 1.
337
338
338 The next 2 bytes indicate the compression type. Only "UN" is currently
339 The next 2 bytes indicate the compression type. Only "UN" is currently
339 supported.
340 supported.
340
341
341 The next 16 bytes are two 64-bit big endian unsigned integers indicating
342 The next 16 bytes are two 64-bit big endian unsigned integers indicating
342 file count and byte count, respectively.
343 file count and byte count, respectively.
343
344
344 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
345 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
345 of the requirements string, including a trailing \0. The following N bytes
346 of the requirements string, including a trailing \0. The following N bytes
346 are the requirements string, which is ASCII containing a comma-delimited
347 are the requirements string, which is ASCII containing a comma-delimited
347 list of repo requirements that are needed to support the data.
348 list of repo requirements that are needed to support the data.
348
349
349 The remaining content is the output of ``generatev1()`` (which may be
350 The remaining content is the output of ``generatev1()`` (which may be
350 compressed in the future).
351 compressed in the future).
351
352
352 Returns a tuple of (requirements, data generator).
353 Returns a tuple of (requirements, data generator).
353 """
354 """
354 if compression != b'UN':
355 if compression != b'UN':
355 raise ValueError(b'we do not support the compression argument yet')
356 raise ValueError(b'we do not support the compression argument yet')
356
357
357 requirements = streamed_requirements(repo)
358 requirements = streamed_requirements(repo)
358 requires = b','.join(sorted(requirements))
359 requires = b','.join(sorted(requirements))
359
360
360 def gen():
361 def gen():
361 yield b'HGS1'
362 yield b'HGS1'
362 yield compression
363 yield compression
363
364
364 filecount, bytecount, it = generatev1(repo)
365 filecount, bytecount, it = generatev1(repo)
365 repo.ui.status(
366 repo.ui.status(
366 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
367 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
367 )
368 )
368
369
369 yield struct.pack(b'>QQ', filecount, bytecount)
370 yield struct.pack(b'>QQ', filecount, bytecount)
370 yield struct.pack(b'>H', len(requires) + 1)
371 yield struct.pack(b'>H', len(requires) + 1)
371 yield requires + b'\0'
372 yield requires + b'\0'
372
373
373 # This is where we'll add compression in the future.
374 # This is where we'll add compression in the future.
374 assert compression == b'UN'
375 assert compression == b'UN'
375
376
376 progress = repo.ui.makeprogress(
377 progress = repo.ui.makeprogress(
377 _(b'bundle'), total=bytecount, unit=_(b'bytes')
378 _(b'bundle'), total=bytecount, unit=_(b'bytes')
378 )
379 )
379 progress.update(0)
380 progress.update(0)
380
381
381 for chunk in it:
382 for chunk in it:
382 progress.increment(step=len(chunk))
383 progress.increment(step=len(chunk))
383 yield chunk
384 yield chunk
384
385
385 progress.complete()
386 progress.complete()
386
387
387 return requirements, gen()
388 return requirements, gen()
388
389
389
390
390 def consumev1(repo, fp, filecount, bytecount):
391 def consumev1(repo, fp, filecount, bytecount):
391 """Apply the contents from version 1 of a streaming clone file handle.
392 """Apply the contents from version 1 of a streaming clone file handle.
392
393
393 This takes the output from "stream_out" and applies it to the specified
394 This takes the output from "stream_out" and applies it to the specified
394 repository.
395 repository.
395
396
396 Like "stream_out," the status line added by the wire protocol is not
397 Like "stream_out," the status line added by the wire protocol is not
397 handled by this function.
398 handled by this function.
398 """
399 """
399 with repo.lock():
400 with repo.lock():
400 repo.ui.status(
401 repo.ui.status(
401 _(b'%d files to transfer, %s of data\n')
402 _(b'%d files to transfer, %s of data\n')
402 % (filecount, util.bytecount(bytecount))
403 % (filecount, util.bytecount(bytecount))
403 )
404 )
404 progress = repo.ui.makeprogress(
405 progress = repo.ui.makeprogress(
405 _(b'clone'), total=bytecount, unit=_(b'bytes')
406 _(b'clone'), total=bytecount, unit=_(b'bytes')
406 )
407 )
407 progress.update(0)
408 progress.update(0)
408 start = util.timer()
409 start = util.timer()
409
410
410 # TODO: get rid of (potential) inconsistency
411 # TODO: get rid of (potential) inconsistency
411 #
412 #
412 # If transaction is started and any @filecache property is
413 # If transaction is started and any @filecache property is
413 # changed at this point, it causes inconsistency between
414 # changed at this point, it causes inconsistency between
414 # in-memory cached property and streamclone-ed file on the
415 # in-memory cached property and streamclone-ed file on the
415 # disk. Nested transaction prevents transaction scope "clone"
416 # disk. Nested transaction prevents transaction scope "clone"
416 # below from writing in-memory changes out at the end of it,
417 # below from writing in-memory changes out at the end of it,
417 # even though in-memory changes are discarded at the end of it
418 # even though in-memory changes are discarded at the end of it
418 # regardless of transaction nesting.
419 # regardless of transaction nesting.
419 #
420 #
420 # But transaction nesting can't be simply prohibited, because
421 # But transaction nesting can't be simply prohibited, because
421 # nesting occurs also in ordinary case (e.g. enabling
422 # nesting occurs also in ordinary case (e.g. enabling
422 # clonebundles).
423 # clonebundles).
423
424
424 with repo.transaction(b'clone'):
425 with repo.transaction(b'clone'):
425 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
426 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
426 for i in range(filecount):
427 for i in range(filecount):
427 # XXX doesn't support '\n' or '\r' in filenames
428 # XXX doesn't support '\n' or '\r' in filenames
428 l = fp.readline()
429 l = fp.readline()
429 try:
430 try:
430 name, size = l.split(b'\0', 1)
431 name, size = l.split(b'\0', 1)
431 size = int(size)
432 size = int(size)
432 except (ValueError, TypeError):
433 except (ValueError, TypeError):
433 raise error.ResponseError(
434 raise error.ResponseError(
434 _(b'unexpected response from remote server:'), l
435 _(b'unexpected response from remote server:'), l
435 )
436 )
436 if repo.ui.debugflag:
437 if repo.ui.debugflag:
437 repo.ui.debug(
438 repo.ui.debug(
438 b'adding %s (%s)\n' % (name, util.bytecount(size))
439 b'adding %s (%s)\n' % (name, util.bytecount(size))
439 )
440 )
440 # for backwards compat, name was partially encoded
441 # for backwards compat, name was partially encoded
441 path = store.decodedir(name)
442 path = store.decodedir(name)
442 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
443 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
443 for chunk in util.filechunkiter(fp, limit=size):
444 for chunk in util.filechunkiter(fp, limit=size):
444 progress.increment(step=len(chunk))
445 progress.increment(step=len(chunk))
445 ofp.write(chunk)
446 ofp.write(chunk)
446
447
447 # force @filecache properties to be reloaded from
448 # force @filecache properties to be reloaded from
448 # streamclone-ed file at next access
449 # streamclone-ed file at next access
449 repo.invalidate(clearfilecache=True)
450 repo.invalidate(clearfilecache=True)
450
451
451 elapsed = util.timer() - start
452 elapsed = util.timer() - start
452 if elapsed <= 0:
453 if elapsed <= 0:
453 elapsed = 0.001
454 elapsed = 0.001
454 progress.complete()
455 progress.complete()
455 repo.ui.status(
456 repo.ui.status(
456 _(b'transferred %s in %.1f seconds (%s/sec)\n')
457 _(b'transferred %s in %.1f seconds (%s/sec)\n')
457 % (
458 % (
458 util.bytecount(bytecount),
459 util.bytecount(bytecount),
459 elapsed,
460 elapsed,
460 util.bytecount(bytecount / elapsed),
461 util.bytecount(bytecount / elapsed),
461 )
462 )
462 )
463 )
463
464
464
465
465 def readbundle1header(fp):
466 def readbundle1header(fp):
466 compression = fp.read(2)
467 compression = fp.read(2)
467 if compression != b'UN':
468 if compression != b'UN':
468 raise error.Abort(
469 raise error.Abort(
469 _(
470 _(
470 b'only uncompressed stream clone bundles are '
471 b'only uncompressed stream clone bundles are '
471 b'supported; got %s'
472 b'supported; got %s'
472 )
473 )
473 % compression
474 % compression
474 )
475 )
475
476
476 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
477 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
477 requireslen = struct.unpack(b'>H', fp.read(2))[0]
478 requireslen = struct.unpack(b'>H', fp.read(2))[0]
478 requires = fp.read(requireslen)
479 requires = fp.read(requireslen)
479
480
480 if not requires.endswith(b'\0'):
481 if not requires.endswith(b'\0'):
481 raise error.Abort(
482 raise error.Abort(
482 _(
483 _(
483 b'malformed stream clone bundle: '
484 b'malformed stream clone bundle: '
484 b'requirements not properly encoded'
485 b'requirements not properly encoded'
485 )
486 )
486 )
487 )
487
488
488 requirements = set(requires.rstrip(b'\0').split(b','))
489 requirements = set(requires.rstrip(b'\0').split(b','))
489
490
490 return filecount, bytecount, requirements
491 return filecount, bytecount, requirements
491
492
492
493
493 def applybundlev1(repo, fp):
494 def applybundlev1(repo, fp):
494 """Apply the content from a stream clone bundle version 1.
495 """Apply the content from a stream clone bundle version 1.
495
496
496 We assume the 4 byte header has been read and validated and the file handle
497 We assume the 4 byte header has been read and validated and the file handle
497 is at the 2 byte compression identifier.
498 is at the 2 byte compression identifier.
498 """
499 """
499 if len(repo):
500 if len(repo):
500 raise error.Abort(
501 raise error.Abort(
501 _(b'cannot apply stream clone bundle on non-empty repo')
502 _(b'cannot apply stream clone bundle on non-empty repo')
502 )
503 )
503
504
504 filecount, bytecount, requirements = readbundle1header(fp)
505 filecount, bytecount, requirements = readbundle1header(fp)
505 missingreqs = requirements - repo.supported
506 missingreqs = requirements - repo.supported
506 if missingreqs:
507 if missingreqs:
507 raise error.Abort(
508 raise error.Abort(
508 _(b'unable to apply stream clone: unsupported format: %s')
509 _(b'unable to apply stream clone: unsupported format: %s')
509 % b', '.join(sorted(missingreqs))
510 % b', '.join(sorted(missingreqs))
510 )
511 )
511
512
512 consumev1(repo, fp, filecount, bytecount)
513 consumev1(repo, fp, filecount, bytecount)
513 nodemap.post_stream_cleanup(repo)
514 nodemap.post_stream_cleanup(repo)
514
515
515
516
516 class streamcloneapplier:
517 class streamcloneapplier:
517 """Class to manage applying streaming clone bundles.
518 """Class to manage applying streaming clone bundles.
518
519
519 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
520 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
520 readers to perform bundle type-specific functionality.
521 readers to perform bundle type-specific functionality.
521 """
522 """
522
523
523 def __init__(self, fh):
524 def __init__(self, fh):
524 self._fh = fh
525 self._fh = fh
525
526
526 def apply(self, repo):
527 def apply(self, repo):
527 return applybundlev1(repo, self._fh)
528 return applybundlev1(repo, self._fh)
528
529
529
530
530 # type of file to stream
531 # type of file to stream
531 _fileappend = 0 # append only file
532 _fileappend = 0 # append only file
532 _filefull = 1 # full snapshot file
533 _filefull = 1 # full snapshot file
533
534
534 # Source of the file
535 # Source of the file
535 _srcstore = b's' # store (svfs)
536 _srcstore = b's' # store (svfs)
536 _srccache = b'c' # cache (cache)
537 _srccache = b'c' # cache (cache)
537
538
538 # This is it's own function so extensions can override it.
539 # This is it's own function so extensions can override it.
539 def _walkstreamfullstorefiles(repo):
540 def _walkstreamfullstorefiles(repo):
540 """list snapshot file from the store"""
541 """list snapshot file from the store"""
541 fnames = []
542 fnames = []
542 if not repo.publishing():
543 if not repo.publishing():
543 fnames.append(b'phaseroots')
544 fnames.append(b'phaseroots')
544 return fnames
545 return fnames
545
546
546
547
547 def _filterfull(entry, copy, vfsmap):
548 def _filterfull(entry, copy, vfsmap):
548 """actually copy the snapshot files"""
549 """actually copy the snapshot files"""
549 src, name, ftype, data = entry
550 src, name, ftype, data = entry
550 if ftype != _filefull:
551 if ftype != _filefull:
551 return entry
552 return entry
552 return (src, name, ftype, copy(vfsmap[src].join(name)))
553 return (src, name, ftype, copy(vfsmap[src].join(name)))
553
554
554
555
555 @contextlib.contextmanager
556 @contextlib.contextmanager
556 def maketempcopies():
557 def maketempcopies():
557 """return a function to temporary copy file"""
558 """return a function to temporary copy file"""
558
559
559 files = []
560 files = []
560 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
561 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
561 try:
562 try:
562
563
563 def copy(src):
564 def copy(src):
564 fd, dst = pycompat.mkstemp(
565 fd, dst = pycompat.mkstemp(
565 prefix=os.path.basename(src), dir=dst_dir
566 prefix=os.path.basename(src), dir=dst_dir
566 )
567 )
567 os.close(fd)
568 os.close(fd)
568 files.append(dst)
569 files.append(dst)
569 util.copyfiles(src, dst, hardlink=True)
570 util.copyfiles(src, dst, hardlink=True)
570 return dst
571 return dst
571
572
572 yield copy
573 yield copy
573 finally:
574 finally:
574 for tmp in files:
575 for tmp in files:
575 util.tryunlink(tmp)
576 util.tryunlink(tmp)
576 util.tryrmdir(dst_dir)
577 util.tryrmdir(dst_dir)
577
578
578
579
579 def _makemap(repo):
580 def _makemap(repo):
580 """make a (src -> vfs) map for the repo"""
581 """make a (src -> vfs) map for the repo"""
581 vfsmap = {
582 vfsmap = {
582 _srcstore: repo.svfs,
583 _srcstore: repo.svfs,
583 _srccache: repo.cachevfs,
584 _srccache: repo.cachevfs,
584 }
585 }
585 # we keep repo.vfs out of the on purpose, ther are too many danger there
586 # we keep repo.vfs out of the on purpose, ther are too many danger there
586 # (eg: .hg/hgrc)
587 # (eg: .hg/hgrc)
587 assert repo.vfs not in vfsmap.values()
588 assert repo.vfs not in vfsmap.values()
588
589
589 return vfsmap
590 return vfsmap
590
591
591
592
592 def _emit2(repo, entries, totalfilesize):
593 def _emit2(repo, entries, totalfilesize):
593 """actually emit the stream bundle"""
594 """actually emit the stream bundle"""
594 vfsmap = _makemap(repo)
595 vfsmap = _makemap(repo)
595 # we keep repo.vfs out of the on purpose, ther are too many danger there
596 # we keep repo.vfs out of the on purpose, ther are too many danger there
596 # (eg: .hg/hgrc),
597 # (eg: .hg/hgrc),
597 #
598 #
598 # this assert is duplicated (from _makemap) as author might think this is
599 # this assert is duplicated (from _makemap) as author might think this is
599 # fine, while this is really not fine.
600 # fine, while this is really not fine.
600 if repo.vfs in vfsmap.values():
601 if repo.vfs in vfsmap.values():
601 raise error.ProgrammingError(
602 raise error.ProgrammingError(
602 b'repo.vfs must not be added to vfsmap for security reasons'
603 b'repo.vfs must not be added to vfsmap for security reasons'
603 )
604 )
604
605
605 progress = repo.ui.makeprogress(
606 progress = repo.ui.makeprogress(
606 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
607 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
607 )
608 )
608 progress.update(0)
609 progress.update(0)
609 with maketempcopies() as copy, progress:
610 with maketempcopies() as copy, progress:
610 # copy is delayed until we are in the try
611 # copy is delayed until we are in the try
611 entries = [_filterfull(e, copy, vfsmap) for e in entries]
612 entries = [_filterfull(e, copy, vfsmap) for e in entries]
612 yield None # this release the lock on the repository
613 yield None # this release the lock on the repository
613 totalbytecount = 0
614 totalbytecount = 0
614
615
615 for src, name, ftype, data in entries:
616 for src, name, ftype, data in entries:
616 vfs = vfsmap[src]
617 vfs = vfsmap[src]
617 yield src
618 yield src
618 yield util.uvarintencode(len(name))
619 yield util.uvarintencode(len(name))
619 if ftype == _fileappend:
620 if ftype == _fileappend:
620 fp = vfs(name)
621 fp = vfs(name)
621 size = data
622 size = data
622 elif ftype == _filefull:
623 elif ftype == _filefull:
623 fp = open(data, b'rb')
624 fp = open(data, b'rb')
624 size = util.fstat(fp).st_size
625 size = util.fstat(fp).st_size
625 bytecount = 0
626 bytecount = 0
626 try:
627 try:
627 yield util.uvarintencode(size)
628 yield util.uvarintencode(size)
628 yield name
629 yield name
629 if size <= 65536:
630 if size <= 65536:
630 chunks = (fp.read(size),)
631 chunks = (fp.read(size),)
631 else:
632 else:
632 chunks = util.filechunkiter(fp, limit=size)
633 chunks = util.filechunkiter(fp, limit=size)
633 for chunk in chunks:
634 for chunk in chunks:
634 bytecount += len(chunk)
635 bytecount += len(chunk)
635 totalbytecount += len(chunk)
636 totalbytecount += len(chunk)
636 progress.update(totalbytecount)
637 progress.update(totalbytecount)
637 yield chunk
638 yield chunk
638 if bytecount != size:
639 if bytecount != size:
639 # Would most likely be caused by a race due to `hg strip` or
640 # Would most likely be caused by a race due to `hg strip` or
640 # a revlog split
641 # a revlog split
641 raise error.Abort(
642 raise error.Abort(
642 _(
643 _(
643 b'clone could only read %d bytes from %s, but '
644 b'clone could only read %d bytes from %s, but '
644 b'expected %d bytes'
645 b'expected %d bytes'
645 )
646 )
646 % (bytecount, name, size)
647 % (bytecount, name, size)
647 )
648 )
648 finally:
649 finally:
649 fp.close()
650 fp.close()
650
651
651
652
652 def _test_sync_point_walk_1(repo):
653 def _test_sync_point_walk_1(repo):
653 """a function for synchronisation during tests"""
654 """a function for synchronisation during tests"""
654
655
655
656
656 def _test_sync_point_walk_2(repo):
657 def _test_sync_point_walk_2(repo):
657 """a function for synchronisation during tests"""
658 """a function for synchronisation during tests"""
658
659
659
660
660 def _v2_walk(repo, includes, excludes, includeobsmarkers):
661 def _v2_walk(repo, includes, excludes, includeobsmarkers):
661 """emit a seris of files information useful to clone a repo
662 """emit a seris of files information useful to clone a repo
662
663
663 return (entries, totalfilesize)
664 return (entries, totalfilesize)
664
665
665 entries is a list of tuple (vfs-key, file-path, file-type, size)
666 entries is a list of tuple (vfs-key, file-path, file-type, size)
666
667
667 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
668 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
668 - `name`: file path of the file to copy (to be feed to the vfss)
669 - `name`: file path of the file to copy (to be feed to the vfss)
669 - `file-type`: do this file need to be copied with the source lock ?
670 - `file-type`: do this file need to be copied with the source lock ?
670 - `size`: the size of the file (or None)
671 - `size`: the size of the file (or None)
671 """
672 """
672 assert repo._currentlock(repo._lockref) is not None
673 assert repo._currentlock(repo._lockref) is not None
673 entries = []
674 entries = []
674 totalfilesize = 0
675 totalfilesize = 0
675
676
676 matcher = None
677 matcher = None
677 if includes or excludes:
678 if includes or excludes:
678 matcher = narrowspec.match(repo.root, includes, excludes)
679 matcher = narrowspec.match(repo.root, includes, excludes)
679
680
680 for entry in _walkstreamfiles(repo, matcher):
681 for entry in _walkstreamfiles(repo, matcher):
681 if entry.file_size:
682 for f in entry.files():
683 if f.file_size:
682 ft = _fileappend
684 ft = _fileappend
683 if entry.is_volatile:
685 if f.is_volatile:
684 ft = _filefull
686 ft = _filefull
685 entries.append(
687 entries.append((_srcstore, f.unencoded_path, ft, f.file_size))
686 (_srcstore, entry.unencoded_path, ft, entry.file_size)
688 totalfilesize += f.file_size
687 )
688 totalfilesize += entry.file_size
689 for name in _walkstreamfullstorefiles(repo):
689 for name in _walkstreamfullstorefiles(repo):
690 if repo.svfs.exists(name):
690 if repo.svfs.exists(name):
691 totalfilesize += repo.svfs.lstat(name).st_size
691 totalfilesize += repo.svfs.lstat(name).st_size
692 entries.append((_srcstore, name, _filefull, None))
692 entries.append((_srcstore, name, _filefull, None))
693 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
693 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
694 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
694 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
695 entries.append((_srcstore, b'obsstore', _filefull, None))
695 entries.append((_srcstore, b'obsstore', _filefull, None))
696 for name in cacheutil.cachetocopy(repo):
696 for name in cacheutil.cachetocopy(repo):
697 if repo.cachevfs.exists(name):
697 if repo.cachevfs.exists(name):
698 totalfilesize += repo.cachevfs.lstat(name).st_size
698 totalfilesize += repo.cachevfs.lstat(name).st_size
699 entries.append((_srccache, name, _filefull, None))
699 entries.append((_srccache, name, _filefull, None))
700 return entries, totalfilesize
700 return entries, totalfilesize
701
701
702
702
703 def generatev2(repo, includes, excludes, includeobsmarkers):
703 def generatev2(repo, includes, excludes, includeobsmarkers):
704 """Emit content for version 2 of a streaming clone.
704 """Emit content for version 2 of a streaming clone.
705
705
706 the data stream consists the following entries:
706 the data stream consists the following entries:
707 1) A char representing the file destination (eg: store or cache)
707 1) A char representing the file destination (eg: store or cache)
708 2) A varint containing the length of the filename
708 2) A varint containing the length of the filename
709 3) A varint containing the length of file data
709 3) A varint containing the length of file data
710 4) N bytes containing the filename (the internal, store-agnostic form)
710 4) N bytes containing the filename (the internal, store-agnostic form)
711 5) N bytes containing the file data
711 5) N bytes containing the file data
712
712
713 Returns a 3-tuple of (file count, file size, data iterator).
713 Returns a 3-tuple of (file count, file size, data iterator).
714 """
714 """
715
715
716 with repo.lock():
716 with repo.lock():
717
717
718 repo.ui.debug(b'scanning\n')
718 repo.ui.debug(b'scanning\n')
719
719
720 entries, totalfilesize = _v2_walk(
720 entries, totalfilesize = _v2_walk(
721 repo,
721 repo,
722 includes=includes,
722 includes=includes,
723 excludes=excludes,
723 excludes=excludes,
724 includeobsmarkers=includeobsmarkers,
724 includeobsmarkers=includeobsmarkers,
725 )
725 )
726
726
727 chunks = _emit2(repo, entries, totalfilesize)
727 chunks = _emit2(repo, entries, totalfilesize)
728 first = next(chunks)
728 first = next(chunks)
729 assert first is None
729 assert first is None
730 _test_sync_point_walk_1(repo)
730 _test_sync_point_walk_1(repo)
731 _test_sync_point_walk_2(repo)
731 _test_sync_point_walk_2(repo)
732
732
733 return len(entries), totalfilesize, chunks
733 return len(entries), totalfilesize, chunks
734
734
735
735
736 @contextlib.contextmanager
736 @contextlib.contextmanager
737 def nested(*ctxs):
737 def nested(*ctxs):
738 this = ctxs[0]
738 this = ctxs[0]
739 rest = ctxs[1:]
739 rest = ctxs[1:]
740 with this:
740 with this:
741 if rest:
741 if rest:
742 with nested(*rest):
742 with nested(*rest):
743 yield
743 yield
744 else:
744 else:
745 yield
745 yield
746
746
747
747
748 def consumev2(repo, fp, filecount, filesize):
748 def consumev2(repo, fp, filecount, filesize):
749 """Apply the contents from a version 2 streaming clone.
749 """Apply the contents from a version 2 streaming clone.
750
750
751 Data is read from an object that only needs to provide a ``read(size)``
751 Data is read from an object that only needs to provide a ``read(size)``
752 method.
752 method.
753 """
753 """
754 with repo.lock():
754 with repo.lock():
755 repo.ui.status(
755 repo.ui.status(
756 _(b'%d files to transfer, %s of data\n')
756 _(b'%d files to transfer, %s of data\n')
757 % (filecount, util.bytecount(filesize))
757 % (filecount, util.bytecount(filesize))
758 )
758 )
759
759
760 start = util.timer()
760 start = util.timer()
761 progress = repo.ui.makeprogress(
761 progress = repo.ui.makeprogress(
762 _(b'clone'), total=filesize, unit=_(b'bytes')
762 _(b'clone'), total=filesize, unit=_(b'bytes')
763 )
763 )
764 progress.update(0)
764 progress.update(0)
765
765
766 vfsmap = _makemap(repo)
766 vfsmap = _makemap(repo)
767 # we keep repo.vfs out of the on purpose, ther are too many danger
767 # we keep repo.vfs out of the on purpose, ther are too many danger
768 # there (eg: .hg/hgrc),
768 # there (eg: .hg/hgrc),
769 #
769 #
770 # this assert is duplicated (from _makemap) as author might think this
770 # this assert is duplicated (from _makemap) as author might think this
771 # is fine, while this is really not fine.
771 # is fine, while this is really not fine.
772 if repo.vfs in vfsmap.values():
772 if repo.vfs in vfsmap.values():
773 raise error.ProgrammingError(
773 raise error.ProgrammingError(
774 b'repo.vfs must not be added to vfsmap for security reasons'
774 b'repo.vfs must not be added to vfsmap for security reasons'
775 )
775 )
776
776
777 with repo.transaction(b'clone'):
777 with repo.transaction(b'clone'):
778 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
778 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
779 with nested(*ctxs):
779 with nested(*ctxs):
780 for i in range(filecount):
780 for i in range(filecount):
781 src = util.readexactly(fp, 1)
781 src = util.readexactly(fp, 1)
782 vfs = vfsmap[src]
782 vfs = vfsmap[src]
783 namelen = util.uvarintdecodestream(fp)
783 namelen = util.uvarintdecodestream(fp)
784 datalen = util.uvarintdecodestream(fp)
784 datalen = util.uvarintdecodestream(fp)
785
785
786 name = util.readexactly(fp, namelen)
786 name = util.readexactly(fp, namelen)
787
787
788 if repo.ui.debugflag:
788 if repo.ui.debugflag:
789 repo.ui.debug(
789 repo.ui.debug(
790 b'adding [%s] %s (%s)\n'
790 b'adding [%s] %s (%s)\n'
791 % (src, name, util.bytecount(datalen))
791 % (src, name, util.bytecount(datalen))
792 )
792 )
793
793
794 with vfs(name, b'w') as ofp:
794 with vfs(name, b'w') as ofp:
795 for chunk in util.filechunkiter(fp, limit=datalen):
795 for chunk in util.filechunkiter(fp, limit=datalen):
796 progress.increment(step=len(chunk))
796 progress.increment(step=len(chunk))
797 ofp.write(chunk)
797 ofp.write(chunk)
798
798
799 # force @filecache properties to be reloaded from
799 # force @filecache properties to be reloaded from
800 # streamclone-ed file at next access
800 # streamclone-ed file at next access
801 repo.invalidate(clearfilecache=True)
801 repo.invalidate(clearfilecache=True)
802
802
803 elapsed = util.timer() - start
803 elapsed = util.timer() - start
804 if elapsed <= 0:
804 if elapsed <= 0:
805 elapsed = 0.001
805 elapsed = 0.001
806 repo.ui.status(
806 repo.ui.status(
807 _(b'transferred %s in %.1f seconds (%s/sec)\n')
807 _(b'transferred %s in %.1f seconds (%s/sec)\n')
808 % (
808 % (
809 util.bytecount(progress.pos),
809 util.bytecount(progress.pos),
810 elapsed,
810 elapsed,
811 util.bytecount(progress.pos / elapsed),
811 util.bytecount(progress.pos / elapsed),
812 )
812 )
813 )
813 )
814 progress.complete()
814 progress.complete()
815
815
816
816
817 def applybundlev2(repo, fp, filecount, filesize, requirements):
817 def applybundlev2(repo, fp, filecount, filesize, requirements):
818 from . import localrepo
818 from . import localrepo
819
819
820 missingreqs = [r for r in requirements if r not in repo.supported]
820 missingreqs = [r for r in requirements if r not in repo.supported]
821 if missingreqs:
821 if missingreqs:
822 raise error.Abort(
822 raise error.Abort(
823 _(b'unable to apply stream clone: unsupported format: %s')
823 _(b'unable to apply stream clone: unsupported format: %s')
824 % b', '.join(sorted(missingreqs))
824 % b', '.join(sorted(missingreqs))
825 )
825 )
826
826
827 consumev2(repo, fp, filecount, filesize)
827 consumev2(repo, fp, filecount, filesize)
828
828
829 repo.requirements = new_stream_clone_requirements(
829 repo.requirements = new_stream_clone_requirements(
830 repo.requirements,
830 repo.requirements,
831 requirements,
831 requirements,
832 )
832 )
833 repo.svfs.options = localrepo.resolvestorevfsoptions(
833 repo.svfs.options = localrepo.resolvestorevfsoptions(
834 repo.ui, repo.requirements, repo.features
834 repo.ui, repo.requirements, repo.features
835 )
835 )
836 scmutil.writereporequirements(repo)
836 scmutil.writereporequirements(repo)
837 nodemap.post_stream_cleanup(repo)
837 nodemap.post_stream_cleanup(repo)
838
838
839
839
840 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
840 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
841 hardlink = [True]
841 hardlink = [True]
842
842
843 def copy_used():
843 def copy_used():
844 hardlink[0] = False
844 hardlink[0] = False
845 progress.topic = _(b'copying')
845 progress.topic = _(b'copying')
846
846
847 for k, path, size in entries:
847 for k, path, size in entries:
848 src_vfs = src_vfs_map[k]
848 src_vfs = src_vfs_map[k]
849 dst_vfs = dst_vfs_map[k]
849 dst_vfs = dst_vfs_map[k]
850 src_path = src_vfs.join(path)
850 src_path = src_vfs.join(path)
851 dst_path = dst_vfs.join(path)
851 dst_path = dst_vfs.join(path)
852 # We cannot use dirname and makedirs of dst_vfs here because the store
852 # We cannot use dirname and makedirs of dst_vfs here because the store
853 # encoding confuses them. See issue 6581 for details.
853 # encoding confuses them. See issue 6581 for details.
854 dirname = os.path.dirname(dst_path)
854 dirname = os.path.dirname(dst_path)
855 if not os.path.exists(dirname):
855 if not os.path.exists(dirname):
856 util.makedirs(dirname)
856 util.makedirs(dirname)
857 dst_vfs.register_file(path)
857 dst_vfs.register_file(path)
858 # XXX we could use the #nb_bytes argument.
858 # XXX we could use the #nb_bytes argument.
859 util.copyfile(
859 util.copyfile(
860 src_path,
860 src_path,
861 dst_path,
861 dst_path,
862 hardlink=hardlink[0],
862 hardlink=hardlink[0],
863 no_hardlink_cb=copy_used,
863 no_hardlink_cb=copy_used,
864 check_fs_hardlink=False,
864 check_fs_hardlink=False,
865 )
865 )
866 progress.increment()
866 progress.increment()
867 return hardlink[0]
867 return hardlink[0]
868
868
869
869
870 def local_copy(src_repo, dest_repo):
870 def local_copy(src_repo, dest_repo):
871 """copy all content from one local repository to another
871 """copy all content from one local repository to another
872
872
873 This is useful for local clone"""
873 This is useful for local clone"""
874 src_store_requirements = {
874 src_store_requirements = {
875 r
875 r
876 for r in src_repo.requirements
876 for r in src_repo.requirements
877 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
877 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
878 }
878 }
879 dest_store_requirements = {
879 dest_store_requirements = {
880 r
880 r
881 for r in dest_repo.requirements
881 for r in dest_repo.requirements
882 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
882 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
883 }
883 }
884 assert src_store_requirements == dest_store_requirements
884 assert src_store_requirements == dest_store_requirements
885
885
886 with dest_repo.lock():
886 with dest_repo.lock():
887 with src_repo.lock():
887 with src_repo.lock():
888
888
889 # bookmark is not integrated to the streaming as it might use the
889 # bookmark is not integrated to the streaming as it might use the
890 # `repo.vfs` and they are too many sentitive data accessible
890 # `repo.vfs` and they are too many sentitive data accessible
891 # through `repo.vfs` to expose it to streaming clone.
891 # through `repo.vfs` to expose it to streaming clone.
892 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
892 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
893 srcbookmarks = src_book_vfs.join(b'bookmarks')
893 srcbookmarks = src_book_vfs.join(b'bookmarks')
894 bm_count = 0
894 bm_count = 0
895 if os.path.exists(srcbookmarks):
895 if os.path.exists(srcbookmarks):
896 bm_count = 1
896 bm_count = 1
897
897
898 entries, totalfilesize = _v2_walk(
898 entries, totalfilesize = _v2_walk(
899 src_repo,
899 src_repo,
900 includes=None,
900 includes=None,
901 excludes=None,
901 excludes=None,
902 includeobsmarkers=True,
902 includeobsmarkers=True,
903 )
903 )
904 src_vfs_map = _makemap(src_repo)
904 src_vfs_map = _makemap(src_repo)
905 dest_vfs_map = _makemap(dest_repo)
905 dest_vfs_map = _makemap(dest_repo)
906 progress = src_repo.ui.makeprogress(
906 progress = src_repo.ui.makeprogress(
907 topic=_(b'linking'),
907 topic=_(b'linking'),
908 total=len(entries) + bm_count,
908 total=len(entries) + bm_count,
909 unit=_(b'files'),
909 unit=_(b'files'),
910 )
910 )
911 # copy files
911 # copy files
912 #
912 #
913 # We could copy the full file while the source repository is locked
913 # We could copy the full file while the source repository is locked
914 # and the other one without the lock. However, in the linking case,
914 # and the other one without the lock. However, in the linking case,
915 # this would also requires checks that nobody is appending any data
915 # this would also requires checks that nobody is appending any data
916 # to the files while we do the clone, so this is not done yet. We
916 # to the files while we do the clone, so this is not done yet. We
917 # could do this blindly when copying files.
917 # could do this blindly when copying files.
918 files = ((k, path, size) for k, path, ftype, size in entries)
918 files = ((k, path, size) for k, path, ftype, size in entries)
919 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
919 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
920
920
921 # copy bookmarks over
921 # copy bookmarks over
922 if bm_count:
922 if bm_count:
923 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
923 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
924 dstbookmarks = dst_book_vfs.join(b'bookmarks')
924 dstbookmarks = dst_book_vfs.join(b'bookmarks')
925 util.copyfile(srcbookmarks, dstbookmarks)
925 util.copyfile(srcbookmarks, dstbookmarks)
926 progress.complete()
926 progress.complete()
927 if hardlink:
927 if hardlink:
928 msg = b'linked %d files\n'
928 msg = b'linked %d files\n'
929 else:
929 else:
930 msg = b'copied %d files\n'
930 msg = b'copied %d files\n'
931 src_repo.ui.debug(msg % (len(entries) + bm_count))
931 src_repo.ui.debug(msg % (len(entries) + bm_count))
932
932
933 with dest_repo.transaction(b"localclone") as tr:
933 with dest_repo.transaction(b"localclone") as tr:
934 dest_repo.store.write(tr)
934 dest_repo.store.write(tr)
935
935
936 # clean up transaction file as they do not make sense
936 # clean up transaction file as they do not make sense
937 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
937 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,625 +1,627
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10
10
11 from .i18n import _
11 from .i18n import _
12 from .node import short
12 from .node import short
13 from .utils import stringutil
13 from .utils import stringutil
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 requirements,
18 requirements,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49 WARN_UNKNOWN_COPY_SOURCE = _(
49 WARN_UNKNOWN_COPY_SOURCE = _(
50 b"warning: copy source of '%s' not in parents of %s"
50 b"warning: copy source of '%s' not in parents of %s"
51 )
51 )
52
52
53 WARN_NULLID_COPY_SOURCE = _(
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
55 )
56
56
57
57
58 class verifier:
58 class verifier:
59 def __init__(self, repo, level=None):
59 def __init__(self, repo, level=None):
60 self.repo = repo.unfiltered()
60 self.repo = repo.unfiltered()
61 self.ui = repo.ui
61 self.ui = repo.ui
62 self.match = repo.narrowmatch()
62 self.match = repo.narrowmatch()
63 if level is None:
63 if level is None:
64 level = VERIFY_DEFAULT
64 level = VERIFY_DEFAULT
65 self._level = level
65 self._level = level
66 self.badrevs = set()
66 self.badrevs = set()
67 self.errors = 0
67 self.errors = 0
68 self.warnings = 0
68 self.warnings = 0
69 self.havecl = len(repo.changelog) > 0
69 self.havecl = len(repo.changelog) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.refersmf = False
73 self.refersmf = False
74 self.fncachewarned = False
74 self.fncachewarned = False
75 # developer config: verify.skipflags
75 # developer config: verify.skipflags
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.warnorphanstorefiles = True
77 self.warnorphanstorefiles = True
78
78
79 def _warn(self, msg):
79 def _warn(self, msg):
80 """record a "warning" level issue"""
80 """record a "warning" level issue"""
81 self.ui.warn(msg + b"\n")
81 self.ui.warn(msg + b"\n")
82 self.warnings += 1
82 self.warnings += 1
83
83
84 def _err(self, linkrev, msg, filename=None):
84 def _err(self, linkrev, msg, filename=None):
85 """record a "error" level issue"""
85 """record a "error" level issue"""
86 if linkrev is not None:
86 if linkrev is not None:
87 self.badrevs.add(linkrev)
87 self.badrevs.add(linkrev)
88 linkrev = b"%d" % linkrev
88 linkrev = b"%d" % linkrev
89 else:
89 else:
90 linkrev = b'?'
90 linkrev = b'?'
91 msg = b"%s: %s" % (linkrev, msg)
91 msg = b"%s: %s" % (linkrev, msg)
92 if filename:
92 if filename:
93 msg = b"%s@%s" % (filename, msg)
93 msg = b"%s@%s" % (filename, msg)
94 self.ui.warn(b" " + msg + b"\n")
94 self.ui.warn(b" " + msg + b"\n")
95 self.errors += 1
95 self.errors += 1
96
96
97 def _exc(self, linkrev, msg, inst, filename=None):
97 def _exc(self, linkrev, msg, inst, filename=None):
98 """record exception raised during the verify process"""
98 """record exception raised during the verify process"""
99 fmsg = stringutil.forcebytestr(inst)
99 fmsg = stringutil.forcebytestr(inst)
100 if not fmsg:
100 if not fmsg:
101 fmsg = pycompat.byterepr(inst)
101 fmsg = pycompat.byterepr(inst)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103
103
104 def _checkrevlog(self, obj, name, linkrev):
104 def _checkrevlog(self, obj, name, linkrev):
105 """verify high level property of a revlog
105 """verify high level property of a revlog
106
106
107 - revlog is present,
107 - revlog is present,
108 - revlog is non-empty,
108 - revlog is non-empty,
109 - sizes (index and data) are correct,
109 - sizes (index and data) are correct,
110 - revlog's format version is correct.
110 - revlog's format version is correct.
111 """
111 """
112 if not len(obj) and (self.havecl or self.havemf):
112 if not len(obj) and (self.havecl or self.havemf):
113 self._err(linkrev, _(b"empty or missing %s") % name)
113 self._err(linkrev, _(b"empty or missing %s") % name)
114 return
114 return
115
115
116 d = obj.checksize()
116 d = obj.checksize()
117 if d[0]:
117 if d[0]:
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 if d[1]:
119 if d[1]:
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121
121
122 if obj._format_version != revlog.REVLOGV0:
122 if obj._format_version != revlog.REVLOGV0:
123 if not self.revlogv1:
123 if not self.revlogv1:
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 elif self.revlogv1:
125 elif self.revlogv1:
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127
127
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 """verify a single revlog entry
129 """verify a single revlog entry
130
130
131 arguments are:
131 arguments are:
132 - obj: the source revlog
132 - obj: the source revlog
133 - i: the revision number
133 - i: the revision number
134 - node: the revision node id
134 - node: the revision node id
135 - seen: nodes previously seen for this revlog
135 - seen: nodes previously seen for this revlog
136 - linkrevs: [changelog-revisions] introducing "node"
136 - linkrevs: [changelog-revisions] introducing "node"
137 - f: string label ("changelog", "manifest", or filename)
137 - f: string label ("changelog", "manifest", or filename)
138
138
139 Performs the following checks:
139 Performs the following checks:
140 - linkrev points to an existing changelog revision,
140 - linkrev points to an existing changelog revision,
141 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to the lowest of these changesets,
142 - linkrev points to the lowest of these changesets,
143 - both parents exist in the revlog,
143 - both parents exist in the revlog,
144 - the revision is not duplicated.
144 - the revision is not duplicated.
145
145
146 Return the linkrev of the revision (or None for changelog's revisions).
146 Return the linkrev of the revision (or None for changelog's revisions).
147 """
147 """
148 lr = obj.linkrev(obj.rev(node))
148 lr = obj.linkrev(obj.rev(node))
149 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or lr >= len(self.repo.changelog):
150 if lr < 0 or lr >= len(self.repo.changelog):
151 msg = _(b"rev %d points to nonexistent changeset %d")
151 msg = _(b"rev %d points to nonexistent changeset %d")
152 else:
152 else:
153 msg = _(b"rev %d points to unexpected changeset %d")
153 msg = _(b"rev %d points to unexpected changeset %d")
154 self._err(None, msg % (i, lr), f)
154 self._err(None, msg % (i, lr), f)
155 if linkrevs:
155 if linkrevs:
156 if f and len(linkrevs) > 1:
156 if f and len(linkrevs) > 1:
157 try:
157 try:
158 # attempt to filter down to real linkrevs
158 # attempt to filter down to real linkrevs
159 linkrevs = []
159 linkrevs = []
160 for lr in linkrevs:
160 for lr in linkrevs:
161 if self.lrugetctx(lr)[f].filenode() == node:
161 if self.lrugetctx(lr)[f].filenode() == node:
162 linkrevs.append(lr)
162 linkrevs.append(lr)
163 except Exception:
163 except Exception:
164 pass
164 pass
165 msg = _(b" (expected %s)")
165 msg = _(b" (expected %s)")
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 self._warn(msg)
167 self._warn(msg)
168 lr = None # can't be trusted
168 lr = None # can't be trusted
169
169
170 try:
170 try:
171 p1, p2 = obj.parents(node)
171 p1, p2 = obj.parents(node)
172 if p1 not in seen and p1 != self.repo.nullid:
172 if p1 not in seen and p1 != self.repo.nullid:
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 self._err(lr, msg, f)
174 self._err(lr, msg, f)
175 if p2 not in seen and p2 != self.repo.nullid:
175 if p2 not in seen and p2 != self.repo.nullid:
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 self._err(lr, msg, f)
177 self._err(lr, msg, f)
178 except Exception as inst:
178 except Exception as inst:
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180
180
181 if node in seen:
181 if node in seen:
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 seen[node] = i
183 seen[node] = i
184 return lr
184 return lr
185
185
186 def verify(self):
186 def verify(self):
187 """verify the content of the Mercurial repository
187 """verify the content of the Mercurial repository
188
188
189 This method run all verifications, displaying issues as they are found.
189 This method run all verifications, displaying issues as they are found.
190
190
191 return 1 if any error have been encountered, 0 otherwise."""
191 return 1 if any error have been encountered, 0 otherwise."""
192 # initial validation and generic report
192 # initial validation and generic report
193 repo = self.repo
193 repo = self.repo
194 ui = repo.ui
194 ui = repo.ui
195 if not repo.url().startswith(b'file:'):
195 if not repo.url().startswith(b'file:'):
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197
197
198 if os.path.exists(repo.sjoin(b"journal")):
198 if os.path.exists(repo.sjoin(b"journal")):
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200
200
201 if ui.verbose or not self.revlogv1:
201 if ui.verbose or not self.revlogv1:
202 ui.status(
202 ui.status(
203 _(b"repository uses revlog format %d\n")
203 _(b"repository uses revlog format %d\n")
204 % (self.revlogv1 and 1 or 0)
204 % (self.revlogv1 and 1 or 0)
205 )
205 )
206
206
207 # data verification
207 # data verification
208 mflinkrevs, filelinkrevs = self._verifychangelog()
208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 filenodes = self._verifymanifest(mflinkrevs)
209 filenodes = self._verifymanifest(mflinkrevs)
210 del mflinkrevs
210 del mflinkrevs
211 self._crosscheckfiles(filelinkrevs, filenodes)
211 self._crosscheckfiles(filelinkrevs, filenodes)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213
213
214 if self.errors:
214 if self.errors:
215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 dirstate_errors = 0
216 dirstate_errors = 0
217 else:
217 else:
218 dirstate_errors = self._verify_dirstate()
218 dirstate_errors = self._verify_dirstate()
219
219
220 # final report
220 # final report
221 ui.status(
221 ui.status(
222 _(b"checked %d changesets with %d changes to %d files\n")
222 _(b"checked %d changesets with %d changes to %d files\n")
223 % (len(repo.changelog), filerevisions, totalfiles)
223 % (len(repo.changelog), filerevisions, totalfiles)
224 )
224 )
225 if self.warnings:
225 if self.warnings:
226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 if self.fncachewarned:
227 if self.fncachewarned:
228 ui.warn(HINT_FNCACHE)
228 ui.warn(HINT_FNCACHE)
229 if self.errors:
229 if self.errors:
230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 if self.badrevs:
231 if self.badrevs:
232 msg = _(b"(first damaged changeset appears to be %d)\n")
232 msg = _(b"(first damaged changeset appears to be %d)\n")
233 msg %= min(self.badrevs)
233 msg %= min(self.badrevs)
234 ui.warn(msg)
234 ui.warn(msg)
235 if dirstate_errors:
235 if dirstate_errors:
236 ui.warn(
236 ui.warn(
237 _(b"dirstate inconsistent with current parent's manifest\n")
237 _(b"dirstate inconsistent with current parent's manifest\n")
238 )
238 )
239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 return 1
240 return 1
241 return 0
241 return 0
242
242
243 def _verifychangelog(self):
243 def _verifychangelog(self):
244 """verify the changelog of a repository
244 """verify the changelog of a repository
245
245
246 The following checks are performed:
246 The following checks are performed:
247 - all of `_checkrevlog` checks,
247 - all of `_checkrevlog` checks,
248 - all of `_checkentry` checks (for each revisions),
248 - all of `_checkentry` checks (for each revisions),
249 - each revision can be read.
249 - each revision can be read.
250
250
251 The function returns some of the data observed in the changesets as a
251 The function returns some of the data observed in the changesets as a
252 (mflinkrevs, filelinkrevs) tuples:
252 (mflinkrevs, filelinkrevs) tuples:
253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255
255
256 If a matcher was specified, filelinkrevs will only contains matched
256 If a matcher was specified, filelinkrevs will only contains matched
257 files.
257 files.
258 """
258 """
259 ui = self.ui
259 ui = self.ui
260 repo = self.repo
260 repo = self.repo
261 match = self.match
261 match = self.match
262 cl = repo.changelog
262 cl = repo.changelog
263
263
264 ui.status(_(b"checking changesets\n"))
264 ui.status(_(b"checking changesets\n"))
265 mflinkrevs = {}
265 mflinkrevs = {}
266 filelinkrevs = {}
266 filelinkrevs = {}
267 seen = {}
267 seen = {}
268 self._checkrevlog(cl, b"changelog", 0)
268 self._checkrevlog(cl, b"changelog", 0)
269 progress = ui.makeprogress(
269 progress = ui.makeprogress(
270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 )
271 )
272 for i in repo:
272 for i in repo:
273 progress.update(i)
273 progress.update(i)
274 n = cl.node(i)
274 n = cl.node(i)
275 self._checkentry(cl, i, n, seen, [i], b"changelog")
275 self._checkentry(cl, i, n, seen, [i], b"changelog")
276
276
277 try:
277 try:
278 changes = cl.read(n)
278 changes = cl.read(n)
279 if changes[0] != self.repo.nullid:
279 if changes[0] != self.repo.nullid:
280 mflinkrevs.setdefault(changes[0], []).append(i)
280 mflinkrevs.setdefault(changes[0], []).append(i)
281 self.refersmf = True
281 self.refersmf = True
282 for f in changes[3]:
282 for f in changes[3]:
283 if match(f):
283 if match(f):
284 filelinkrevs.setdefault(_normpath(f), []).append(i)
284 filelinkrevs.setdefault(_normpath(f), []).append(i)
285 except Exception as inst:
285 except Exception as inst:
286 self.refersmf = True
286 self.refersmf = True
287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
288 progress.complete()
288 progress.complete()
289 return mflinkrevs, filelinkrevs
289 return mflinkrevs, filelinkrevs
290
290
291 def _verifymanifest(
291 def _verifymanifest(
292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
293 ):
293 ):
294 """verify the manifestlog content
294 """verify the manifestlog content
295
295
296 Inputs:
296 Inputs:
297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
298 - dir: a subdirectory to check (for tree manifest repo)
298 - dir: a subdirectory to check (for tree manifest repo)
299 - storefiles: set of currently "orphan" files.
299 - storefiles: set of currently "orphan" files.
300 - subdirprogress: a progress object
300 - subdirprogress: a progress object
301
301
302 This function checks:
302 This function checks:
303 * all of `_checkrevlog` checks (for all manifest related revlogs)
303 * all of `_checkrevlog` checks (for all manifest related revlogs)
304 * all of `_checkentry` checks (for all manifest related revisions)
304 * all of `_checkentry` checks (for all manifest related revisions)
305 * nodes for subdirectory exists in the sub-directory manifest
305 * nodes for subdirectory exists in the sub-directory manifest
306 * each manifest entries have a file path
306 * each manifest entries have a file path
307 * each manifest node refered in mflinkrevs exist in the manifest log
307 * each manifest node refered in mflinkrevs exist in the manifest log
308
308
309 If tree manifest is in use and a matchers is specified, only the
309 If tree manifest is in use and a matchers is specified, only the
310 sub-directories matching it will be verified.
310 sub-directories matching it will be verified.
311
311
312 return a two level mapping:
312 return a two level mapping:
313 {"path" -> { filenode -> changelog-revision}}
313 {"path" -> { filenode -> changelog-revision}}
314
314
315 This mapping primarily contains entries for every files in the
315 This mapping primarily contains entries for every files in the
316 repository. In addition, when tree-manifest is used, it also contains
316 repository. In addition, when tree-manifest is used, it also contains
317 sub-directory entries.
317 sub-directory entries.
318
318
319 If a matcher is provided, only matching paths will be included.
319 If a matcher is provided, only matching paths will be included.
320 """
320 """
321 repo = self.repo
321 repo = self.repo
322 ui = self.ui
322 ui = self.ui
323 match = self.match
323 match = self.match
324 mfl = self.repo.manifestlog
324 mfl = self.repo.manifestlog
325 mf = mfl.getstorage(dir)
325 mf = mfl.getstorage(dir)
326
326
327 if not dir:
327 if not dir:
328 self.ui.status(_(b"checking manifests\n"))
328 self.ui.status(_(b"checking manifests\n"))
329
329
330 filenodes = {}
330 filenodes = {}
331 subdirnodes = {}
331 subdirnodes = {}
332 seen = {}
332 seen = {}
333 label = b"manifest"
333 label = b"manifest"
334 if dir:
334 if dir:
335 label = dir
335 label = dir
336 revlogfiles = mf.files()
336 revlogfiles = mf.files()
337 storefiles.difference_update(revlogfiles)
337 storefiles.difference_update(revlogfiles)
338 if subdirprogress: # should be true since we're in a subdirectory
338 if subdirprogress: # should be true since we're in a subdirectory
339 subdirprogress.increment()
339 subdirprogress.increment()
340 if self.refersmf:
340 if self.refersmf:
341 # Do not check manifest if there are only changelog entries with
341 # Do not check manifest if there are only changelog entries with
342 # null manifests.
342 # null manifests.
343 self._checkrevlog(mf._revlog, label, 0)
343 self._checkrevlog(mf._revlog, label, 0)
344 progress = ui.makeprogress(
344 progress = ui.makeprogress(
345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
346 )
346 )
347 for i in mf:
347 for i in mf:
348 if not dir:
348 if not dir:
349 progress.update(i)
349 progress.update(i)
350 n = mf.node(i)
350 n = mf.node(i)
351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
352 if n in mflinkrevs:
352 if n in mflinkrevs:
353 del mflinkrevs[n]
353 del mflinkrevs[n]
354 elif dir:
354 elif dir:
355 msg = _(b"%s not in parent-directory manifest") % short(n)
355 msg = _(b"%s not in parent-directory manifest") % short(n)
356 self._err(lr, msg, label)
356 self._err(lr, msg, label)
357 else:
357 else:
358 self._err(lr, _(b"%s not in changesets") % short(n), label)
358 self._err(lr, _(b"%s not in changesets") % short(n), label)
359
359
360 try:
360 try:
361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
362 for f, fn, fl in mfdelta.iterentries():
362 for f, fn, fl in mfdelta.iterentries():
363 if not f:
363 if not f:
364 self._err(lr, _(b"entry without name in manifest"))
364 self._err(lr, _(b"entry without name in manifest"))
365 elif f == b"/dev/null": # ignore this in very old repos
365 elif f == b"/dev/null": # ignore this in very old repos
366 continue
366 continue
367 fullpath = dir + _normpath(f)
367 fullpath = dir + _normpath(f)
368 if fl == b't':
368 if fl == b't':
369 if not match.visitdir(fullpath):
369 if not match.visitdir(fullpath):
370 continue
370 continue
371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
372 sdn.setdefault(fn, []).append(lr)
372 sdn.setdefault(fn, []).append(lr)
373 else:
373 else:
374 if not match(fullpath):
374 if not match(fullpath):
375 continue
375 continue
376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
377 except Exception as inst:
377 except Exception as inst:
378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
379 if self._level >= VERIFY_FULL:
379 if self._level >= VERIFY_FULL:
380 try:
380 try:
381 # Various issues can affect manifest. So we read each full
381 # Various issues can affect manifest. So we read each full
382 # text from storage. This triggers the checks from the core
382 # text from storage. This triggers the checks from the core
383 # code (eg: hash verification, filename are ordered, etc.)
383 # code (eg: hash verification, filename are ordered, etc.)
384 mfdelta = mfl.get(dir, n).read()
384 mfdelta = mfl.get(dir, n).read()
385 except Exception as inst:
385 except Exception as inst:
386 msg = _(b"reading full manifest %s") % short(n)
386 msg = _(b"reading full manifest %s") % short(n)
387 self._exc(lr, msg, inst, label)
387 self._exc(lr, msg, inst, label)
388
388
389 if not dir:
389 if not dir:
390 progress.complete()
390 progress.complete()
391
391
392 if self.havemf:
392 if self.havemf:
393 # since we delete entry in `mflinkrevs` during iteration, any
393 # since we delete entry in `mflinkrevs` during iteration, any
394 # remaining entries are "missing". We need to issue errors for them.
394 # remaining entries are "missing". We need to issue errors for them.
395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
396 for c, m in sorted(changesetpairs):
396 for c, m in sorted(changesetpairs):
397 if dir:
397 if dir:
398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
399 else:
399 else:
400 msg = _(b"changeset refers to unknown revision %s")
400 msg = _(b"changeset refers to unknown revision %s")
401 msg %= short(m)
401 msg %= short(m)
402 self._err(c, msg, label)
402 self._err(c, msg, label)
403
403
404 if not dir and subdirnodes:
404 if not dir and subdirnodes:
405 self.ui.status(_(b"checking directory manifests\n"))
405 self.ui.status(_(b"checking directory manifests\n"))
406 storefiles = set()
406 storefiles = set()
407 subdirs = set()
407 subdirs = set()
408 revlogv1 = self.revlogv1
408 revlogv1 = self.revlogv1
409 undecodable = []
409 undecodable = []
410 for entry in repo.store.datafiles(undecodable=undecodable):
410 for entry in repo.store.datafiles(undecodable=undecodable):
411 f = entry.unencoded_path
411 for file_ in entry.files():
412 size = entry.file_size
412 f = file_.unencoded_path
413 size = file_.file_size
413 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
414 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
414 storefiles.add(_normpath(f))
415 storefiles.add(_normpath(f))
415 subdirs.add(os.path.dirname(f))
416 subdirs.add(os.path.dirname(f))
416 for f in undecodable:
417 for f in undecodable:
417 self._err(None, _(b"cannot decode filename '%s'") % f)
418 self._err(None, _(b"cannot decode filename '%s'") % f)
418 subdirprogress = ui.makeprogress(
419 subdirprogress = ui.makeprogress(
419 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
420 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
420 )
421 )
421
422
422 for subdir, linkrevs in subdirnodes.items():
423 for subdir, linkrevs in subdirnodes.items():
423 subdirfilenodes = self._verifymanifest(
424 subdirfilenodes = self._verifymanifest(
424 linkrevs, subdir, storefiles, subdirprogress
425 linkrevs, subdir, storefiles, subdirprogress
425 )
426 )
426 for f, onefilenodes in subdirfilenodes.items():
427 for f, onefilenodes in subdirfilenodes.items():
427 filenodes.setdefault(f, {}).update(onefilenodes)
428 filenodes.setdefault(f, {}).update(onefilenodes)
428
429
429 if not dir and subdirnodes:
430 if not dir and subdirnodes:
430 assert subdirprogress is not None # help pytype
431 assert subdirprogress is not None # help pytype
431 subdirprogress.complete()
432 subdirprogress.complete()
432 if self.warnorphanstorefiles:
433 if self.warnorphanstorefiles:
433 for f in sorted(storefiles):
434 for f in sorted(storefiles):
434 self._warn(_(b"warning: orphan data file '%s'") % f)
435 self._warn(_(b"warning: orphan data file '%s'") % f)
435
436
436 return filenodes
437 return filenodes
437
438
438 def _crosscheckfiles(self, filelinkrevs, filenodes):
439 def _crosscheckfiles(self, filelinkrevs, filenodes):
439 repo = self.repo
440 repo = self.repo
440 ui = self.ui
441 ui = self.ui
441 ui.status(_(b"crosschecking files in changesets and manifests\n"))
442 ui.status(_(b"crosschecking files in changesets and manifests\n"))
442
443
443 total = len(filelinkrevs) + len(filenodes)
444 total = len(filelinkrevs) + len(filenodes)
444 progress = ui.makeprogress(
445 progress = ui.makeprogress(
445 _(b'crosschecking'), unit=_(b'files'), total=total
446 _(b'crosschecking'), unit=_(b'files'), total=total
446 )
447 )
447 if self.havemf:
448 if self.havemf:
448 for f in sorted(filelinkrevs):
449 for f in sorted(filelinkrevs):
449 progress.increment()
450 progress.increment()
450 if f not in filenodes:
451 if f not in filenodes:
451 lr = filelinkrevs[f][0]
452 lr = filelinkrevs[f][0]
452 self._err(lr, _(b"in changeset but not in manifest"), f)
453 self._err(lr, _(b"in changeset but not in manifest"), f)
453
454
454 if self.havecl:
455 if self.havecl:
455 for f in sorted(filenodes):
456 for f in sorted(filenodes):
456 progress.increment()
457 progress.increment()
457 if f not in filelinkrevs:
458 if f not in filelinkrevs:
458 try:
459 try:
459 fl = repo.file(f)
460 fl = repo.file(f)
460 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
461 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
461 except Exception:
462 except Exception:
462 lr = None
463 lr = None
463 self._err(lr, _(b"in manifest but not in changeset"), f)
464 self._err(lr, _(b"in manifest but not in changeset"), f)
464
465
465 progress.complete()
466 progress.complete()
466
467
467 def _verifyfiles(self, filenodes, filelinkrevs):
468 def _verifyfiles(self, filenodes, filelinkrevs):
468 repo = self.repo
469 repo = self.repo
469 ui = self.ui
470 ui = self.ui
470 lrugetctx = self.lrugetctx
471 lrugetctx = self.lrugetctx
471 revlogv1 = self.revlogv1
472 revlogv1 = self.revlogv1
472 havemf = self.havemf
473 havemf = self.havemf
473 ui.status(_(b"checking files\n"))
474 ui.status(_(b"checking files\n"))
474
475
475 storefiles = set()
476 storefiles = set()
476 undecodable = []
477 undecodable = []
477 for entry in repo.store.datafiles(undecodable=undecodable):
478 for entry in repo.store.datafiles(undecodable=undecodable):
478 size = entry.file_size
479 for file_ in entry.files():
479 f = entry.unencoded_path
480 size = file_.file_size
481 f = file_.unencoded_path
480 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
482 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
481 storefiles.add(_normpath(f))
483 storefiles.add(_normpath(f))
482 for f in undecodable:
484 for f in undecodable:
483 self._err(None, _(b"cannot decode filename '%s'") % f)
485 self._err(None, _(b"cannot decode filename '%s'") % f)
484
486
485 state = {
487 state = {
486 # TODO this assumes revlog storage for changelog.
488 # TODO this assumes revlog storage for changelog.
487 b'expectedversion': self.repo.changelog._format_version,
489 b'expectedversion': self.repo.changelog._format_version,
488 b'skipflags': self.skipflags,
490 b'skipflags': self.skipflags,
489 # experimental config: censor.policy
491 # experimental config: censor.policy
490 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
492 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
491 }
493 }
492
494
493 files = sorted(set(filenodes) | set(filelinkrevs))
495 files = sorted(set(filenodes) | set(filelinkrevs))
494 revisions = 0
496 revisions = 0
495 progress = ui.makeprogress(
497 progress = ui.makeprogress(
496 _(b'checking'), unit=_(b'files'), total=len(files)
498 _(b'checking'), unit=_(b'files'), total=len(files)
497 )
499 )
498 for i, f in enumerate(files):
500 for i, f in enumerate(files):
499 progress.update(i, item=f)
501 progress.update(i, item=f)
500 try:
502 try:
501 linkrevs = filelinkrevs[f]
503 linkrevs = filelinkrevs[f]
502 except KeyError:
504 except KeyError:
503 # in manifest but not in changelog
505 # in manifest but not in changelog
504 linkrevs = []
506 linkrevs = []
505
507
506 if linkrevs:
508 if linkrevs:
507 lr = linkrevs[0]
509 lr = linkrevs[0]
508 else:
510 else:
509 lr = None
511 lr = None
510
512
511 try:
513 try:
512 fl = repo.file(f)
514 fl = repo.file(f)
513 except error.StorageError as e:
515 except error.StorageError as e:
514 self._err(lr, _(b"broken revlog! (%s)") % e, f)
516 self._err(lr, _(b"broken revlog! (%s)") % e, f)
515 continue
517 continue
516
518
517 for ff in fl.files():
519 for ff in fl.files():
518 try:
520 try:
519 storefiles.remove(ff)
521 storefiles.remove(ff)
520 except KeyError:
522 except KeyError:
521 if self.warnorphanstorefiles:
523 if self.warnorphanstorefiles:
522 msg = _(b" warning: revlog '%s' not in fncache!")
524 msg = _(b" warning: revlog '%s' not in fncache!")
523 self._warn(msg % ff)
525 self._warn(msg % ff)
524 self.fncachewarned = True
526 self.fncachewarned = True
525
527
526 if not len(fl) and (self.havecl or self.havemf):
528 if not len(fl) and (self.havecl or self.havemf):
527 self._err(lr, _(b"empty or missing %s") % f)
529 self._err(lr, _(b"empty or missing %s") % f)
528 else:
530 else:
529 # Guard against implementations not setting this.
531 # Guard against implementations not setting this.
530 state[b'skipread'] = set()
532 state[b'skipread'] = set()
531 state[b'safe_renamed'] = set()
533 state[b'safe_renamed'] = set()
532
534
533 for problem in fl.verifyintegrity(state):
535 for problem in fl.verifyintegrity(state):
534 if problem.node is not None:
536 if problem.node is not None:
535 linkrev = fl.linkrev(fl.rev(problem.node))
537 linkrev = fl.linkrev(fl.rev(problem.node))
536 else:
538 else:
537 linkrev = None
539 linkrev = None
538
540
539 if problem.warning:
541 if problem.warning:
540 self._warn(problem.warning)
542 self._warn(problem.warning)
541 elif problem.error:
543 elif problem.error:
542 linkrev_msg = linkrev if linkrev is not None else lr
544 linkrev_msg = linkrev if linkrev is not None else lr
543 self._err(linkrev_msg, problem.error, f)
545 self._err(linkrev_msg, problem.error, f)
544 else:
546 else:
545 raise error.ProgrammingError(
547 raise error.ProgrammingError(
546 b'problem instance does not set warning or error '
548 b'problem instance does not set warning or error '
547 b'attribute: %s' % problem.msg
549 b'attribute: %s' % problem.msg
548 )
550 )
549
551
550 seen = {}
552 seen = {}
551 for i in fl:
553 for i in fl:
552 revisions += 1
554 revisions += 1
553 n = fl.node(i)
555 n = fl.node(i)
554 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
556 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
555 if f in filenodes:
557 if f in filenodes:
556 if havemf and n not in filenodes[f]:
558 if havemf and n not in filenodes[f]:
557 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
559 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
558 else:
560 else:
559 del filenodes[f][n]
561 del filenodes[f][n]
560
562
561 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
563 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
562 continue
564 continue
563
565
564 # check renames
566 # check renames
565 try:
567 try:
566 # This requires resolving fulltext (at least on revlogs,
568 # This requires resolving fulltext (at least on revlogs,
567 # though not with LFS revisions). We may want
569 # though not with LFS revisions). We may want
568 # ``verifyintegrity()`` to pass a set of nodes with
570 # ``verifyintegrity()`` to pass a set of nodes with
569 # rename metadata as an optimization.
571 # rename metadata as an optimization.
570 rp = fl.renamed(n)
572 rp = fl.renamed(n)
571 if rp:
573 if rp:
572 if lr is not None and ui.verbose:
574 if lr is not None and ui.verbose:
573 ctx = lrugetctx(lr)
575 ctx = lrugetctx(lr)
574 if not any(rp[0] in pctx for pctx in ctx.parents()):
576 if not any(rp[0] in pctx for pctx in ctx.parents()):
575 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
577 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
576 fl2 = repo.file(rp[0])
578 fl2 = repo.file(rp[0])
577 if not len(fl2):
579 if not len(fl2):
578 m = _(b"empty or missing copy source revlog %s:%s")
580 m = _(b"empty or missing copy source revlog %s:%s")
579 self._err(lr, m % (rp[0], short(rp[1])), f)
581 self._err(lr, m % (rp[0], short(rp[1])), f)
580 elif rp[1] == self.repo.nullid:
582 elif rp[1] == self.repo.nullid:
581 msg = WARN_NULLID_COPY_SOURCE
583 msg = WARN_NULLID_COPY_SOURCE
582 msg %= (f, lr, rp[0], short(rp[1]))
584 msg %= (f, lr, rp[0], short(rp[1]))
583 ui.note(msg)
585 ui.note(msg)
584 else:
586 else:
585 fl2.rev(rp[1])
587 fl2.rev(rp[1])
586 except Exception as inst:
588 except Exception as inst:
587 self._exc(
589 self._exc(
588 lr, _(b"checking rename of %s") % short(n), inst, f
590 lr, _(b"checking rename of %s") % short(n), inst, f
589 )
591 )
590
592
591 # cross-check
593 # cross-check
592 if f in filenodes:
594 if f in filenodes:
593 fns = [(v, k) for k, v in filenodes[f].items()]
595 fns = [(v, k) for k, v in filenodes[f].items()]
594 for lr, node in sorted(fns):
596 for lr, node in sorted(fns):
595 msg = _(b"manifest refers to unknown revision %s")
597 msg = _(b"manifest refers to unknown revision %s")
596 self._err(lr, msg % short(node), f)
598 self._err(lr, msg % short(node), f)
597 progress.complete()
599 progress.complete()
598
600
599 if self.warnorphanstorefiles:
601 if self.warnorphanstorefiles:
600 for f in sorted(storefiles):
602 for f in sorted(storefiles):
601 self._warn(_(b"warning: orphan data file '%s'") % f)
603 self._warn(_(b"warning: orphan data file '%s'") % f)
602
604
603 return len(files), revisions
605 return len(files), revisions
604
606
605 def _verify_dirstate(self):
607 def _verify_dirstate(self):
606 """Check that the dirstate is consistent with the parent's manifest"""
608 """Check that the dirstate is consistent with the parent's manifest"""
607 repo = self.repo
609 repo = self.repo
608 ui = self.ui
610 ui = self.ui
609 ui.status(_(b"checking dirstate\n"))
611 ui.status(_(b"checking dirstate\n"))
610
612
611 parent1, parent2 = repo.dirstate.parents()
613 parent1, parent2 = repo.dirstate.parents()
612 m1 = repo[parent1].manifest()
614 m1 = repo[parent1].manifest()
613 m2 = repo[parent2].manifest()
615 m2 = repo[parent2].manifest()
614 dirstate_errors = 0
616 dirstate_errors = 0
615
617
616 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
618 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
617 narrow_matcher = repo.narrowmatch() if is_narrow else None
619 narrow_matcher = repo.narrowmatch() if is_narrow else None
618
620
619 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
621 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
620 ui.error(err)
622 ui.error(err)
621 dirstate_errors += 1
623 dirstate_errors += 1
622
624
623 if dirstate_errors:
625 if dirstate_errors:
624 self.errors += dirstate_errors
626 self.errors += dirstate_errors
625 return dirstate_errors
627 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now