##// END OF EJS Templates
store: return just one filename in walk functions...
Valentin Gatien-Baron -
r48691:2174f54a default
parent child Browse files
Show More
@@ -1,694 +1,694 b''
1 # narrowcommands.py - command modifications for narrowhg extension
1 # narrowcommands.py - command modifications for narrowhg extension
2 #
2 #
3 # Copyright 2017 Google, Inc.
3 # Copyright 2017 Google, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import itertools
9 import itertools
10 import os
10 import os
11
11
12 from mercurial.i18n import _
12 from mercurial.i18n import _
13 from mercurial.node import (
13 from mercurial.node import (
14 hex,
14 hex,
15 short,
15 short,
16 )
16 )
17 from mercurial import (
17 from mercurial import (
18 bundle2,
18 bundle2,
19 cmdutil,
19 cmdutil,
20 commands,
20 commands,
21 discovery,
21 discovery,
22 encoding,
22 encoding,
23 error,
23 error,
24 exchange,
24 exchange,
25 extensions,
25 extensions,
26 hg,
26 hg,
27 narrowspec,
27 narrowspec,
28 pathutil,
28 pathutil,
29 pycompat,
29 pycompat,
30 registrar,
30 registrar,
31 repair,
31 repair,
32 repoview,
32 repoview,
33 requirements,
33 requirements,
34 sparse,
34 sparse,
35 util,
35 util,
36 wireprototypes,
36 wireprototypes,
37 )
37 )
38 from mercurial.utils import (
38 from mercurial.utils import (
39 urlutil,
39 urlutil,
40 )
40 )
41
41
42 table = {}
42 table = {}
43 command = registrar.command(table)
43 command = registrar.command(table)
44
44
45
45
46 def setup():
46 def setup():
47 """Wraps user-facing mercurial commands with narrow-aware versions."""
47 """Wraps user-facing mercurial commands with narrow-aware versions."""
48
48
49 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
49 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
50 entry[1].append(
50 entry[1].append(
51 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
51 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
52 )
52 )
53 entry[1].append(
53 entry[1].append(
54 (
54 (
55 b'',
55 b'',
56 b'depth',
56 b'depth',
57 b'',
57 b'',
58 _(b"limit the history fetched by distance from heads"),
58 _(b"limit the history fetched by distance from heads"),
59 )
59 )
60 )
60 )
61 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
61 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
62 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
62 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
63 if b'sparse' not in extensions.enabled():
63 if b'sparse' not in extensions.enabled():
64 entry[1].append(
64 entry[1].append(
65 (b'', b'include', [], _(b"specifically fetch this file/directory"))
65 (b'', b'include', [], _(b"specifically fetch this file/directory"))
66 )
66 )
67 entry[1].append(
67 entry[1].append(
68 (
68 (
69 b'',
69 b'',
70 b'exclude',
70 b'exclude',
71 [],
71 [],
72 _(b"do not fetch this file/directory, even if included"),
72 _(b"do not fetch this file/directory, even if included"),
73 )
73 )
74 )
74 )
75
75
76 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
76 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
77 entry[1].append(
77 entry[1].append(
78 (
78 (
79 b'',
79 b'',
80 b'depth',
80 b'depth',
81 b'',
81 b'',
82 _(b"limit the history fetched by distance from heads"),
82 _(b"limit the history fetched by distance from heads"),
83 )
83 )
84 )
84 )
85
85
86 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
86 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
87
87
88
88
89 def clonenarrowcmd(orig, ui, repo, *args, **opts):
89 def clonenarrowcmd(orig, ui, repo, *args, **opts):
90 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
90 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
91 opts = pycompat.byteskwargs(opts)
91 opts = pycompat.byteskwargs(opts)
92 wrappedextraprepare = util.nullcontextmanager()
92 wrappedextraprepare = util.nullcontextmanager()
93 narrowspecfile = opts[b'narrowspec']
93 narrowspecfile = opts[b'narrowspec']
94
94
95 if narrowspecfile:
95 if narrowspecfile:
96 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
96 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
97 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
97 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
98 try:
98 try:
99 fdata = util.readfile(filepath)
99 fdata = util.readfile(filepath)
100 except IOError as inst:
100 except IOError as inst:
101 raise error.Abort(
101 raise error.Abort(
102 _(b"cannot read narrowspecs from '%s': %s")
102 _(b"cannot read narrowspecs from '%s': %s")
103 % (filepath, encoding.strtolocal(inst.strerror))
103 % (filepath, encoding.strtolocal(inst.strerror))
104 )
104 )
105
105
106 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
106 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
107 if profiles:
107 if profiles:
108 raise error.ConfigError(
108 raise error.ConfigError(
109 _(
109 _(
110 b"cannot specify other files using '%include' in"
110 b"cannot specify other files using '%include' in"
111 b" narrowspec"
111 b" narrowspec"
112 )
112 )
113 )
113 )
114
114
115 narrowspec.validatepatterns(includes)
115 narrowspec.validatepatterns(includes)
116 narrowspec.validatepatterns(excludes)
116 narrowspec.validatepatterns(excludes)
117
117
118 # narrowspec is passed so we should assume that user wants narrow clone
118 # narrowspec is passed so we should assume that user wants narrow clone
119 opts[b'narrow'] = True
119 opts[b'narrow'] = True
120 opts[b'include'].extend(includes)
120 opts[b'include'].extend(includes)
121 opts[b'exclude'].extend(excludes)
121 opts[b'exclude'].extend(excludes)
122
122
123 if opts[b'narrow']:
123 if opts[b'narrow']:
124
124
125 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
125 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
126 orig(pullop, kwargs)
126 orig(pullop, kwargs)
127
127
128 if opts.get(b'depth'):
128 if opts.get(b'depth'):
129 kwargs[b'depth'] = opts[b'depth']
129 kwargs[b'depth'] = opts[b'depth']
130
130
131 wrappedextraprepare = extensions.wrappedfunction(
131 wrappedextraprepare = extensions.wrappedfunction(
132 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
132 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
133 )
133 )
134
134
135 with wrappedextraprepare:
135 with wrappedextraprepare:
136 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
136 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
137
137
138
138
139 def pullnarrowcmd(orig, ui, repo, *args, **opts):
139 def pullnarrowcmd(orig, ui, repo, *args, **opts):
140 """Wraps pull command to allow modifying narrow spec."""
140 """Wraps pull command to allow modifying narrow spec."""
141 wrappedextraprepare = util.nullcontextmanager()
141 wrappedextraprepare = util.nullcontextmanager()
142 if requirements.NARROW_REQUIREMENT in repo.requirements:
142 if requirements.NARROW_REQUIREMENT in repo.requirements:
143
143
144 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
144 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
145 orig(pullop, kwargs)
145 orig(pullop, kwargs)
146 if opts.get('depth'):
146 if opts.get('depth'):
147 kwargs[b'depth'] = opts['depth']
147 kwargs[b'depth'] = opts['depth']
148
148
149 wrappedextraprepare = extensions.wrappedfunction(
149 wrappedextraprepare = extensions.wrappedfunction(
150 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
150 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
151 )
151 )
152
152
153 with wrappedextraprepare:
153 with wrappedextraprepare:
154 return orig(ui, repo, *args, **opts)
154 return orig(ui, repo, *args, **opts)
155
155
156
156
157 def archivenarrowcmd(orig, ui, repo, *args, **opts):
157 def archivenarrowcmd(orig, ui, repo, *args, **opts):
158 """Wraps archive command to narrow the default includes."""
158 """Wraps archive command to narrow the default includes."""
159 if requirements.NARROW_REQUIREMENT in repo.requirements:
159 if requirements.NARROW_REQUIREMENT in repo.requirements:
160 repo_includes, repo_excludes = repo.narrowpats
160 repo_includes, repo_excludes = repo.narrowpats
161 includes = set(opts.get('include', []))
161 includes = set(opts.get('include', []))
162 excludes = set(opts.get('exclude', []))
162 excludes = set(opts.get('exclude', []))
163 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
163 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
164 includes, excludes, repo_includes, repo_excludes
164 includes, excludes, repo_includes, repo_excludes
165 )
165 )
166 if includes:
166 if includes:
167 opts['include'] = includes
167 opts['include'] = includes
168 if excludes:
168 if excludes:
169 opts['exclude'] = excludes
169 opts['exclude'] = excludes
170 return orig(ui, repo, *args, **opts)
170 return orig(ui, repo, *args, **opts)
171
171
172
172
173 def pullbundle2extraprepare(orig, pullop, kwargs):
173 def pullbundle2extraprepare(orig, pullop, kwargs):
174 repo = pullop.repo
174 repo = pullop.repo
175 if requirements.NARROW_REQUIREMENT not in repo.requirements:
175 if requirements.NARROW_REQUIREMENT not in repo.requirements:
176 return orig(pullop, kwargs)
176 return orig(pullop, kwargs)
177
177
178 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
178 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
179 raise error.Abort(_(b"server does not support narrow clones"))
179 raise error.Abort(_(b"server does not support narrow clones"))
180 orig(pullop, kwargs)
180 orig(pullop, kwargs)
181 kwargs[b'narrow'] = True
181 kwargs[b'narrow'] = True
182 include, exclude = repo.narrowpats
182 include, exclude = repo.narrowpats
183 kwargs[b'oldincludepats'] = include
183 kwargs[b'oldincludepats'] = include
184 kwargs[b'oldexcludepats'] = exclude
184 kwargs[b'oldexcludepats'] = exclude
185 if include:
185 if include:
186 kwargs[b'includepats'] = include
186 kwargs[b'includepats'] = include
187 if exclude:
187 if exclude:
188 kwargs[b'excludepats'] = exclude
188 kwargs[b'excludepats'] = exclude
189 # calculate known nodes only in ellipses cases because in non-ellipses cases
189 # calculate known nodes only in ellipses cases because in non-ellipses cases
190 # we have all the nodes
190 # we have all the nodes
191 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
191 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
192 kwargs[b'known'] = [
192 kwargs[b'known'] = [
193 hex(ctx.node())
193 hex(ctx.node())
194 for ctx in repo.set(b'::%ln', pullop.common)
194 for ctx in repo.set(b'::%ln', pullop.common)
195 if ctx.node() != repo.nullid
195 if ctx.node() != repo.nullid
196 ]
196 ]
197 if not kwargs[b'known']:
197 if not kwargs[b'known']:
198 # Mercurial serializes an empty list as '' and deserializes it as
198 # Mercurial serializes an empty list as '' and deserializes it as
199 # [''], so delete it instead to avoid handling the empty string on
199 # [''], so delete it instead to avoid handling the empty string on
200 # the server.
200 # the server.
201 del kwargs[b'known']
201 del kwargs[b'known']
202
202
203
203
204 extensions.wrapfunction(
204 extensions.wrapfunction(
205 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
205 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
206 )
206 )
207
207
208
208
209 def _narrow(
209 def _narrow(
210 ui,
210 ui,
211 repo,
211 repo,
212 remote,
212 remote,
213 commoninc,
213 commoninc,
214 oldincludes,
214 oldincludes,
215 oldexcludes,
215 oldexcludes,
216 newincludes,
216 newincludes,
217 newexcludes,
217 newexcludes,
218 force,
218 force,
219 backup,
219 backup,
220 ):
220 ):
221 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
221 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
222 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
222 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
223
223
224 # This is essentially doing "hg outgoing" to find all local-only
224 # This is essentially doing "hg outgoing" to find all local-only
225 # commits. We will then check that the local-only commits don't
225 # commits. We will then check that the local-only commits don't
226 # have any changes to files that will be untracked.
226 # have any changes to files that will be untracked.
227 unfi = repo.unfiltered()
227 unfi = repo.unfiltered()
228 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
228 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
229 ui.status(_(b'looking for local changes to affected paths\n'))
229 ui.status(_(b'looking for local changes to affected paths\n'))
230 progress = ui.makeprogress(
230 progress = ui.makeprogress(
231 topic=_(b'changesets'),
231 topic=_(b'changesets'),
232 unit=_(b'changesets'),
232 unit=_(b'changesets'),
233 total=len(outgoing.missing) + len(outgoing.excluded),
233 total=len(outgoing.missing) + len(outgoing.excluded),
234 )
234 )
235 localnodes = []
235 localnodes = []
236 with progress:
236 with progress:
237 for n in itertools.chain(outgoing.missing, outgoing.excluded):
237 for n in itertools.chain(outgoing.missing, outgoing.excluded):
238 progress.increment()
238 progress.increment()
239 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
239 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
240 localnodes.append(n)
240 localnodes.append(n)
241 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
241 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
242 hiddenrevs = repoview.filterrevs(repo, b'visible')
242 hiddenrevs = repoview.filterrevs(repo, b'visible')
243 visibletostrip = list(
243 visibletostrip = list(
244 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
244 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
245 )
245 )
246 if visibletostrip:
246 if visibletostrip:
247 ui.status(
247 ui.status(
248 _(
248 _(
249 b'The following changeset(s) or their ancestors have '
249 b'The following changeset(s) or their ancestors have '
250 b'local changes not on the remote:\n'
250 b'local changes not on the remote:\n'
251 )
251 )
252 )
252 )
253 maxnodes = 10
253 maxnodes = 10
254 if ui.verbose or len(visibletostrip) <= maxnodes:
254 if ui.verbose or len(visibletostrip) <= maxnodes:
255 for n in visibletostrip:
255 for n in visibletostrip:
256 ui.status(b'%s\n' % short(n))
256 ui.status(b'%s\n' % short(n))
257 else:
257 else:
258 for n in visibletostrip[:maxnodes]:
258 for n in visibletostrip[:maxnodes]:
259 ui.status(b'%s\n' % short(n))
259 ui.status(b'%s\n' % short(n))
260 ui.status(
260 ui.status(
261 _(b'...and %d more, use --verbose to list all\n')
261 _(b'...and %d more, use --verbose to list all\n')
262 % (len(visibletostrip) - maxnodes)
262 % (len(visibletostrip) - maxnodes)
263 )
263 )
264 if not force:
264 if not force:
265 raise error.StateError(
265 raise error.StateError(
266 _(b'local changes found'),
266 _(b'local changes found'),
267 hint=_(b'use --force-delete-local-changes to ignore'),
267 hint=_(b'use --force-delete-local-changes to ignore'),
268 )
268 )
269
269
270 with ui.uninterruptible():
270 with ui.uninterruptible():
271 if revstostrip:
271 if revstostrip:
272 tostrip = [unfi.changelog.node(r) for r in revstostrip]
272 tostrip = [unfi.changelog.node(r) for r in revstostrip]
273 if repo[b'.'].node() in tostrip:
273 if repo[b'.'].node() in tostrip:
274 # stripping working copy, so move to a different commit first
274 # stripping working copy, so move to a different commit first
275 urev = max(
275 urev = max(
276 repo.revs(
276 repo.revs(
277 b'(::%n) - %ln + null',
277 b'(::%n) - %ln + null',
278 repo[b'.'].node(),
278 repo[b'.'].node(),
279 visibletostrip,
279 visibletostrip,
280 )
280 )
281 )
281 )
282 hg.clean(repo, urev)
282 hg.clean(repo, urev)
283 overrides = {(b'devel', b'strip-obsmarkers'): False}
283 overrides = {(b'devel', b'strip-obsmarkers'): False}
284 if backup:
284 if backup:
285 ui.status(_(b'moving unwanted changesets to backup\n'))
285 ui.status(_(b'moving unwanted changesets to backup\n'))
286 else:
286 else:
287 ui.status(_(b'deleting unwanted changesets\n'))
287 ui.status(_(b'deleting unwanted changesets\n'))
288 with ui.configoverride(overrides, b'narrow'):
288 with ui.configoverride(overrides, b'narrow'):
289 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
289 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
290
290
291 todelete = []
291 todelete = []
292 for t, f, f2, size in repo.store.datafiles():
292 for t, f, size in repo.store.datafiles():
293 if f.startswith(b'data/'):
293 if f.startswith(b'data/'):
294 file = f[5:-2]
294 file = f[5:-2]
295 if not newmatch(file):
295 if not newmatch(file):
296 todelete.append(f)
296 todelete.append(f)
297 elif f.startswith(b'meta/'):
297 elif f.startswith(b'meta/'):
298 dir = f[5:-13]
298 dir = f[5:-13]
299 dirs = sorted(pathutil.dirs({dir})) + [dir]
299 dirs = sorted(pathutil.dirs({dir})) + [dir]
300 include = True
300 include = True
301 for d in dirs:
301 for d in dirs:
302 visit = newmatch.visitdir(d)
302 visit = newmatch.visitdir(d)
303 if not visit:
303 if not visit:
304 include = False
304 include = False
305 break
305 break
306 if visit == b'all':
306 if visit == b'all':
307 break
307 break
308 if not include:
308 if not include:
309 todelete.append(f)
309 todelete.append(f)
310
310
311 repo.destroying()
311 repo.destroying()
312
312
313 with repo.transaction(b'narrowing'):
313 with repo.transaction(b'narrowing'):
314 # Update narrowspec before removing revlogs, so repo won't be
314 # Update narrowspec before removing revlogs, so repo won't be
315 # corrupt in case of crash
315 # corrupt in case of crash
316 repo.setnarrowpats(newincludes, newexcludes)
316 repo.setnarrowpats(newincludes, newexcludes)
317
317
318 for f in todelete:
318 for f in todelete:
319 ui.status(_(b'deleting %s\n') % f)
319 ui.status(_(b'deleting %s\n') % f)
320 util.unlinkpath(repo.svfs.join(f))
320 util.unlinkpath(repo.svfs.join(f))
321 repo.store.markremoved(f)
321 repo.store.markremoved(f)
322
322
323 ui.status(_(b'deleting unwanted files from working copy\n'))
323 ui.status(_(b'deleting unwanted files from working copy\n'))
324 with repo.dirstate.parentchange():
324 with repo.dirstate.parentchange():
325 narrowspec.updateworkingcopy(repo, assumeclean=True)
325 narrowspec.updateworkingcopy(repo, assumeclean=True)
326 narrowspec.copytoworkingcopy(repo)
326 narrowspec.copytoworkingcopy(repo)
327
327
328 repo.destroyed()
328 repo.destroyed()
329
329
330
330
331 def _widen(
331 def _widen(
332 ui,
332 ui,
333 repo,
333 repo,
334 remote,
334 remote,
335 commoninc,
335 commoninc,
336 oldincludes,
336 oldincludes,
337 oldexcludes,
337 oldexcludes,
338 newincludes,
338 newincludes,
339 newexcludes,
339 newexcludes,
340 ):
340 ):
341 # for now we assume that if a server has ellipses enabled, we will be
341 # for now we assume that if a server has ellipses enabled, we will be
342 # exchanging ellipses nodes. In future we should add ellipses as a client
342 # exchanging ellipses nodes. In future we should add ellipses as a client
343 # side requirement (maybe) to distinguish a client is shallow or not and
343 # side requirement (maybe) to distinguish a client is shallow or not and
344 # then send that information to server whether we want ellipses or not.
344 # then send that information to server whether we want ellipses or not.
345 # Theoretically a non-ellipses repo should be able to use narrow
345 # Theoretically a non-ellipses repo should be able to use narrow
346 # functionality from an ellipses enabled server
346 # functionality from an ellipses enabled server
347 remotecap = remote.capabilities()
347 remotecap = remote.capabilities()
348 ellipsesremote = any(
348 ellipsesremote = any(
349 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
349 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
350 )
350 )
351
351
352 # check whether we are talking to a server which supports old version of
352 # check whether we are talking to a server which supports old version of
353 # ellipses capabilities
353 # ellipses capabilities
354 isoldellipses = (
354 isoldellipses = (
355 ellipsesremote
355 ellipsesremote
356 and wireprototypes.ELLIPSESCAP1 in remotecap
356 and wireprototypes.ELLIPSESCAP1 in remotecap
357 and wireprototypes.ELLIPSESCAP not in remotecap
357 and wireprototypes.ELLIPSESCAP not in remotecap
358 )
358 )
359
359
360 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
360 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
361 orig(pullop, kwargs)
361 orig(pullop, kwargs)
362 # The old{in,ex}cludepats have already been set by orig()
362 # The old{in,ex}cludepats have already been set by orig()
363 kwargs[b'includepats'] = newincludes
363 kwargs[b'includepats'] = newincludes
364 kwargs[b'excludepats'] = newexcludes
364 kwargs[b'excludepats'] = newexcludes
365
365
366 wrappedextraprepare = extensions.wrappedfunction(
366 wrappedextraprepare = extensions.wrappedfunction(
367 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
367 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
368 )
368 )
369
369
370 # define a function that narrowbundle2 can call after creating the
370 # define a function that narrowbundle2 can call after creating the
371 # backup bundle, but before applying the bundle from the server
371 # backup bundle, but before applying the bundle from the server
372 def setnewnarrowpats():
372 def setnewnarrowpats():
373 repo.setnarrowpats(newincludes, newexcludes)
373 repo.setnarrowpats(newincludes, newexcludes)
374
374
375 repo.setnewnarrowpats = setnewnarrowpats
375 repo.setnewnarrowpats = setnewnarrowpats
376 # silence the devel-warning of applying an empty changegroup
376 # silence the devel-warning of applying an empty changegroup
377 overrides = {(b'devel', b'all-warnings'): False}
377 overrides = {(b'devel', b'all-warnings'): False}
378
378
379 common = commoninc[0]
379 common = commoninc[0]
380 with ui.uninterruptible():
380 with ui.uninterruptible():
381 if ellipsesremote:
381 if ellipsesremote:
382 ds = repo.dirstate
382 ds = repo.dirstate
383 p1, p2 = ds.p1(), ds.p2()
383 p1, p2 = ds.p1(), ds.p2()
384 with ds.parentchange():
384 with ds.parentchange():
385 ds.setparents(repo.nullid, repo.nullid)
385 ds.setparents(repo.nullid, repo.nullid)
386 if isoldellipses:
386 if isoldellipses:
387 with wrappedextraprepare:
387 with wrappedextraprepare:
388 exchange.pull(repo, remote, heads=common)
388 exchange.pull(repo, remote, heads=common)
389 else:
389 else:
390 known = []
390 known = []
391 if ellipsesremote:
391 if ellipsesremote:
392 known = [
392 known = [
393 ctx.node()
393 ctx.node()
394 for ctx in repo.set(b'::%ln', common)
394 for ctx in repo.set(b'::%ln', common)
395 if ctx.node() != repo.nullid
395 if ctx.node() != repo.nullid
396 ]
396 ]
397 with remote.commandexecutor() as e:
397 with remote.commandexecutor() as e:
398 bundle = e.callcommand(
398 bundle = e.callcommand(
399 b'narrow_widen',
399 b'narrow_widen',
400 {
400 {
401 b'oldincludes': oldincludes,
401 b'oldincludes': oldincludes,
402 b'oldexcludes': oldexcludes,
402 b'oldexcludes': oldexcludes,
403 b'newincludes': newincludes,
403 b'newincludes': newincludes,
404 b'newexcludes': newexcludes,
404 b'newexcludes': newexcludes,
405 b'cgversion': b'03',
405 b'cgversion': b'03',
406 b'commonheads': common,
406 b'commonheads': common,
407 b'known': known,
407 b'known': known,
408 b'ellipses': ellipsesremote,
408 b'ellipses': ellipsesremote,
409 },
409 },
410 ).result()
410 ).result()
411
411
412 trmanager = exchange.transactionmanager(
412 trmanager = exchange.transactionmanager(
413 repo, b'widen', remote.url()
413 repo, b'widen', remote.url()
414 )
414 )
415 with trmanager, repo.ui.configoverride(overrides, b'widen'):
415 with trmanager, repo.ui.configoverride(overrides, b'widen'):
416 op = bundle2.bundleoperation(
416 op = bundle2.bundleoperation(
417 repo, trmanager.transaction, source=b'widen'
417 repo, trmanager.transaction, source=b'widen'
418 )
418 )
419 # TODO: we should catch error.Abort here
419 # TODO: we should catch error.Abort here
420 bundle2.processbundle(repo, bundle, op=op)
420 bundle2.processbundle(repo, bundle, op=op)
421
421
422 if ellipsesremote:
422 if ellipsesremote:
423 with ds.parentchange():
423 with ds.parentchange():
424 ds.setparents(p1, p2)
424 ds.setparents(p1, p2)
425
425
426 with repo.transaction(b'widening'), repo.dirstate.parentchange():
426 with repo.transaction(b'widening'), repo.dirstate.parentchange():
427 repo.setnewnarrowpats()
427 repo.setnewnarrowpats()
428 narrowspec.updateworkingcopy(repo)
428 narrowspec.updateworkingcopy(repo)
429 narrowspec.copytoworkingcopy(repo)
429 narrowspec.copytoworkingcopy(repo)
430
430
431
431
432 # TODO(rdamazio): Make new matcher format and update description
432 # TODO(rdamazio): Make new matcher format and update description
433 @command(
433 @command(
434 b'tracked',
434 b'tracked',
435 [
435 [
436 (b'', b'addinclude', [], _(b'new paths to include')),
436 (b'', b'addinclude', [], _(b'new paths to include')),
437 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
437 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
438 (
438 (
439 b'',
439 b'',
440 b'auto-remove-includes',
440 b'auto-remove-includes',
441 False,
441 False,
442 _(b'automatically choose unused includes to remove'),
442 _(b'automatically choose unused includes to remove'),
443 ),
443 ),
444 (b'', b'addexclude', [], _(b'new paths to exclude')),
444 (b'', b'addexclude', [], _(b'new paths to exclude')),
445 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
445 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
446 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
446 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
447 (
447 (
448 b'',
448 b'',
449 b'clear',
449 b'clear',
450 False,
450 False,
451 _(b'whether to replace the existing narrowspec'),
451 _(b'whether to replace the existing narrowspec'),
452 ),
452 ),
453 (
453 (
454 b'',
454 b'',
455 b'force-delete-local-changes',
455 b'force-delete-local-changes',
456 False,
456 False,
457 _(b'forces deletion of local changes when narrowing'),
457 _(b'forces deletion of local changes when narrowing'),
458 ),
458 ),
459 (
459 (
460 b'',
460 b'',
461 b'backup',
461 b'backup',
462 True,
462 True,
463 _(b'back up local changes when narrowing'),
463 _(b'back up local changes when narrowing'),
464 ),
464 ),
465 (
465 (
466 b'',
466 b'',
467 b'update-working-copy',
467 b'update-working-copy',
468 False,
468 False,
469 _(b'update working copy when the store has changed'),
469 _(b'update working copy when the store has changed'),
470 ),
470 ),
471 ]
471 ]
472 + commands.remoteopts,
472 + commands.remoteopts,
473 _(b'[OPTIONS]... [REMOTE]'),
473 _(b'[OPTIONS]... [REMOTE]'),
474 inferrepo=True,
474 inferrepo=True,
475 helpcategory=command.CATEGORY_MAINTENANCE,
475 helpcategory=command.CATEGORY_MAINTENANCE,
476 )
476 )
477 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
477 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
478 """show or change the current narrowspec
478 """show or change the current narrowspec
479
479
480 With no argument, shows the current narrowspec entries, one per line. Each
480 With no argument, shows the current narrowspec entries, one per line. Each
481 line will be prefixed with 'I' or 'X' for included or excluded patterns,
481 line will be prefixed with 'I' or 'X' for included or excluded patterns,
482 respectively.
482 respectively.
483
483
484 The narrowspec is comprised of expressions to match remote files and/or
484 The narrowspec is comprised of expressions to match remote files and/or
485 directories that should be pulled into your client.
485 directories that should be pulled into your client.
486 The narrowspec has *include* and *exclude* expressions, with excludes always
486 The narrowspec has *include* and *exclude* expressions, with excludes always
487 trumping includes: that is, if a file matches an exclude expression, it will
487 trumping includes: that is, if a file matches an exclude expression, it will
488 be excluded even if it also matches an include expression.
488 be excluded even if it also matches an include expression.
489 Excluding files that were never included has no effect.
489 Excluding files that were never included has no effect.
490
490
491 Each included or excluded entry is in the format described by
491 Each included or excluded entry is in the format described by
492 'hg help patterns'.
492 'hg help patterns'.
493
493
494 The options allow you to add or remove included and excluded expressions.
494 The options allow you to add or remove included and excluded expressions.
495
495
496 If --clear is specified, then all previous includes and excludes are DROPPED
496 If --clear is specified, then all previous includes and excludes are DROPPED
497 and replaced by the new ones specified to --addinclude and --addexclude.
497 and replaced by the new ones specified to --addinclude and --addexclude.
498 If --clear is specified without any further options, the narrowspec will be
498 If --clear is specified without any further options, the narrowspec will be
499 empty and will not match any files.
499 empty and will not match any files.
500
500
501 If --auto-remove-includes is specified, then those includes that don't match
501 If --auto-remove-includes is specified, then those includes that don't match
502 any files modified by currently visible local commits (those not shared by
502 any files modified by currently visible local commits (those not shared by
503 the remote) will be added to the set of explicitly specified includes to
503 the remote) will be added to the set of explicitly specified includes to
504 remove.
504 remove.
505
505
506 --import-rules accepts a path to a file containing rules, allowing you to
506 --import-rules accepts a path to a file containing rules, allowing you to
507 add --addinclude, --addexclude rules in bulk. Like the other include and
507 add --addinclude, --addexclude rules in bulk. Like the other include and
508 exclude switches, the changes are applied immediately.
508 exclude switches, the changes are applied immediately.
509 """
509 """
510 opts = pycompat.byteskwargs(opts)
510 opts = pycompat.byteskwargs(opts)
511 if requirements.NARROW_REQUIREMENT not in repo.requirements:
511 if requirements.NARROW_REQUIREMENT not in repo.requirements:
512 raise error.InputError(
512 raise error.InputError(
513 _(
513 _(
514 b'the tracked command is only supported on '
514 b'the tracked command is only supported on '
515 b'repositories cloned with --narrow'
515 b'repositories cloned with --narrow'
516 )
516 )
517 )
517 )
518
518
519 # Before supporting, decide whether it "hg tracked --clear" should mean
519 # Before supporting, decide whether it "hg tracked --clear" should mean
520 # tracking no paths or all paths.
520 # tracking no paths or all paths.
521 if opts[b'clear']:
521 if opts[b'clear']:
522 raise error.InputError(_(b'the --clear option is not yet supported'))
522 raise error.InputError(_(b'the --clear option is not yet supported'))
523
523
524 # import rules from a file
524 # import rules from a file
525 newrules = opts.get(b'import_rules')
525 newrules = opts.get(b'import_rules')
526 if newrules:
526 if newrules:
527 try:
527 try:
528 filepath = os.path.join(encoding.getcwd(), newrules)
528 filepath = os.path.join(encoding.getcwd(), newrules)
529 fdata = util.readfile(filepath)
529 fdata = util.readfile(filepath)
530 except IOError as inst:
530 except IOError as inst:
531 raise error.StorageError(
531 raise error.StorageError(
532 _(b"cannot read narrowspecs from '%s': %s")
532 _(b"cannot read narrowspecs from '%s': %s")
533 % (filepath, encoding.strtolocal(inst.strerror))
533 % (filepath, encoding.strtolocal(inst.strerror))
534 )
534 )
535 includepats, excludepats, profiles = sparse.parseconfig(
535 includepats, excludepats, profiles = sparse.parseconfig(
536 ui, fdata, b'narrow'
536 ui, fdata, b'narrow'
537 )
537 )
538 if profiles:
538 if profiles:
539 raise error.InputError(
539 raise error.InputError(
540 _(
540 _(
541 b"including other spec files using '%include' "
541 b"including other spec files using '%include' "
542 b"is not supported in narrowspec"
542 b"is not supported in narrowspec"
543 )
543 )
544 )
544 )
545 opts[b'addinclude'].extend(includepats)
545 opts[b'addinclude'].extend(includepats)
546 opts[b'addexclude'].extend(excludepats)
546 opts[b'addexclude'].extend(excludepats)
547
547
548 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
548 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
549 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
549 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
550 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
550 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
551 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
551 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
552 autoremoveincludes = opts[b'auto_remove_includes']
552 autoremoveincludes = opts[b'auto_remove_includes']
553
553
554 update_working_copy = opts[b'update_working_copy']
554 update_working_copy = opts[b'update_working_copy']
555 only_show = not (
555 only_show = not (
556 addedincludes
556 addedincludes
557 or removedincludes
557 or removedincludes
558 or addedexcludes
558 or addedexcludes
559 or removedexcludes
559 or removedexcludes
560 or newrules
560 or newrules
561 or autoremoveincludes
561 or autoremoveincludes
562 or update_working_copy
562 or update_working_copy
563 )
563 )
564
564
565 oldincludes, oldexcludes = repo.narrowpats
565 oldincludes, oldexcludes = repo.narrowpats
566
566
567 # filter the user passed additions and deletions into actual additions and
567 # filter the user passed additions and deletions into actual additions and
568 # deletions of excludes and includes
568 # deletions of excludes and includes
569 addedincludes -= oldincludes
569 addedincludes -= oldincludes
570 removedincludes &= oldincludes
570 removedincludes &= oldincludes
571 addedexcludes -= oldexcludes
571 addedexcludes -= oldexcludes
572 removedexcludes &= oldexcludes
572 removedexcludes &= oldexcludes
573
573
574 widening = addedincludes or removedexcludes
574 widening = addedincludes or removedexcludes
575 narrowing = removedincludes or addedexcludes
575 narrowing = removedincludes or addedexcludes
576
576
577 # Only print the current narrowspec.
577 # Only print the current narrowspec.
578 if only_show:
578 if only_show:
579 ui.pager(b'tracked')
579 ui.pager(b'tracked')
580 fm = ui.formatter(b'narrow', opts)
580 fm = ui.formatter(b'narrow', opts)
581 for i in sorted(oldincludes):
581 for i in sorted(oldincludes):
582 fm.startitem()
582 fm.startitem()
583 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
583 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
584 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
584 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
585 for i in sorted(oldexcludes):
585 for i in sorted(oldexcludes):
586 fm.startitem()
586 fm.startitem()
587 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
587 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
588 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
588 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
589 fm.end()
589 fm.end()
590 return 0
590 return 0
591
591
592 if update_working_copy:
592 if update_working_copy:
593 with repo.wlock(), repo.lock(), repo.transaction(
593 with repo.wlock(), repo.lock(), repo.transaction(
594 b'narrow-wc'
594 b'narrow-wc'
595 ), repo.dirstate.parentchange():
595 ), repo.dirstate.parentchange():
596 narrowspec.updateworkingcopy(repo)
596 narrowspec.updateworkingcopy(repo)
597 narrowspec.copytoworkingcopy(repo)
597 narrowspec.copytoworkingcopy(repo)
598 return 0
598 return 0
599
599
600 if not (widening or narrowing or autoremoveincludes):
600 if not (widening or narrowing or autoremoveincludes):
601 ui.status(_(b"nothing to widen or narrow\n"))
601 ui.status(_(b"nothing to widen or narrow\n"))
602 return 0
602 return 0
603
603
604 with repo.wlock(), repo.lock():
604 with repo.wlock(), repo.lock():
605 cmdutil.bailifchanged(repo)
605 cmdutil.bailifchanged(repo)
606
606
607 # Find the revisions we have in common with the remote. These will
607 # Find the revisions we have in common with the remote. These will
608 # be used for finding local-only changes for narrowing. They will
608 # be used for finding local-only changes for narrowing. They will
609 # also define the set of revisions to update for widening.
609 # also define the set of revisions to update for widening.
610 r = urlutil.get_unique_pull_path(b'tracked', repo, ui, remotepath)
610 r = urlutil.get_unique_pull_path(b'tracked', repo, ui, remotepath)
611 url, branches = r
611 url, branches = r
612 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(url))
612 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(url))
613 remote = hg.peer(repo, opts, url)
613 remote = hg.peer(repo, opts, url)
614
614
615 try:
615 try:
616 # check narrow support before doing anything if widening needs to be
616 # check narrow support before doing anything if widening needs to be
617 # performed. In future we should also abort if client is ellipses and
617 # performed. In future we should also abort if client is ellipses and
618 # server does not support ellipses
618 # server does not support ellipses
619 if (
619 if (
620 widening
620 widening
621 and wireprototypes.NARROWCAP not in remote.capabilities()
621 and wireprototypes.NARROWCAP not in remote.capabilities()
622 ):
622 ):
623 raise error.Abort(_(b"server does not support narrow clones"))
623 raise error.Abort(_(b"server does not support narrow clones"))
624
624
625 commoninc = discovery.findcommonincoming(repo, remote)
625 commoninc = discovery.findcommonincoming(repo, remote)
626
626
627 if autoremoveincludes:
627 if autoremoveincludes:
628 outgoing = discovery.findcommonoutgoing(
628 outgoing = discovery.findcommonoutgoing(
629 repo, remote, commoninc=commoninc
629 repo, remote, commoninc=commoninc
630 )
630 )
631 ui.status(_(b'looking for unused includes to remove\n'))
631 ui.status(_(b'looking for unused includes to remove\n'))
632 localfiles = set()
632 localfiles = set()
633 for n in itertools.chain(outgoing.missing, outgoing.excluded):
633 for n in itertools.chain(outgoing.missing, outgoing.excluded):
634 localfiles.update(repo[n].files())
634 localfiles.update(repo[n].files())
635 suggestedremovals = []
635 suggestedremovals = []
636 for include in sorted(oldincludes):
636 for include in sorted(oldincludes):
637 match = narrowspec.match(repo.root, [include], oldexcludes)
637 match = narrowspec.match(repo.root, [include], oldexcludes)
638 if not any(match(f) for f in localfiles):
638 if not any(match(f) for f in localfiles):
639 suggestedremovals.append(include)
639 suggestedremovals.append(include)
640 if suggestedremovals:
640 if suggestedremovals:
641 for s in suggestedremovals:
641 for s in suggestedremovals:
642 ui.status(b'%s\n' % s)
642 ui.status(b'%s\n' % s)
643 if (
643 if (
644 ui.promptchoice(
644 ui.promptchoice(
645 _(
645 _(
646 b'remove these unused includes (yn)?'
646 b'remove these unused includes (yn)?'
647 b'$$ &Yes $$ &No'
647 b'$$ &Yes $$ &No'
648 )
648 )
649 )
649 )
650 == 0
650 == 0
651 ):
651 ):
652 removedincludes.update(suggestedremovals)
652 removedincludes.update(suggestedremovals)
653 narrowing = True
653 narrowing = True
654 else:
654 else:
655 ui.status(_(b'found no unused includes\n'))
655 ui.status(_(b'found no unused includes\n'))
656
656
657 if narrowing:
657 if narrowing:
658 newincludes = oldincludes - removedincludes
658 newincludes = oldincludes - removedincludes
659 newexcludes = oldexcludes | addedexcludes
659 newexcludes = oldexcludes | addedexcludes
660 _narrow(
660 _narrow(
661 ui,
661 ui,
662 repo,
662 repo,
663 remote,
663 remote,
664 commoninc,
664 commoninc,
665 oldincludes,
665 oldincludes,
666 oldexcludes,
666 oldexcludes,
667 newincludes,
667 newincludes,
668 newexcludes,
668 newexcludes,
669 opts[b'force_delete_local_changes'],
669 opts[b'force_delete_local_changes'],
670 opts[b'backup'],
670 opts[b'backup'],
671 )
671 )
672 # _narrow() updated the narrowspec and _widen() below needs to
672 # _narrow() updated the narrowspec and _widen() below needs to
673 # use the updated values as its base (otherwise removed includes
673 # use the updated values as its base (otherwise removed includes
674 # and addedexcludes will be lost in the resulting narrowspec)
674 # and addedexcludes will be lost in the resulting narrowspec)
675 oldincludes = newincludes
675 oldincludes = newincludes
676 oldexcludes = newexcludes
676 oldexcludes = newexcludes
677
677
678 if widening:
678 if widening:
679 newincludes = oldincludes | addedincludes
679 newincludes = oldincludes | addedincludes
680 newexcludes = oldexcludes - removedexcludes
680 newexcludes = oldexcludes - removedexcludes
681 _widen(
681 _widen(
682 ui,
682 ui,
683 repo,
683 repo,
684 remote,
684 remote,
685 commoninc,
685 commoninc,
686 oldincludes,
686 oldincludes,
687 oldexcludes,
687 oldexcludes,
688 newincludes,
688 newincludes,
689 newexcludes,
689 newexcludes,
690 )
690 )
691 finally:
691 finally:
692 remote.close()
692 remote.close()
693
693
694 return 0
694 return 0
@@ -1,399 +1,399 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import threading
3 import threading
4
4
5 from mercurial.node import (
5 from mercurial.node import (
6 hex,
6 hex,
7 sha1nodeconstants,
7 sha1nodeconstants,
8 )
8 )
9 from mercurial.pycompat import getattr
9 from mercurial.pycompat import getattr
10 from mercurial import (
10 from mercurial import (
11 mdiff,
11 mdiff,
12 pycompat,
12 pycompat,
13 revlog,
13 revlog,
14 )
14 )
15 from . import (
15 from . import (
16 basestore,
16 basestore,
17 constants,
17 constants,
18 shallowutil,
18 shallowutil,
19 )
19 )
20
20
21
21
22 class ChainIndicies(object):
22 class ChainIndicies(object):
23 """A static class for easy reference to the delta chain indicies."""
23 """A static class for easy reference to the delta chain indicies."""
24
24
25 # The filename of this revision delta
25 # The filename of this revision delta
26 NAME = 0
26 NAME = 0
27 # The mercurial file node for this revision delta
27 # The mercurial file node for this revision delta
28 NODE = 1
28 NODE = 1
29 # The filename of the delta base's revision. This is useful when delta
29 # The filename of the delta base's revision. This is useful when delta
30 # between different files (like in the case of a move or copy, we can delta
30 # between different files (like in the case of a move or copy, we can delta
31 # against the original file content).
31 # against the original file content).
32 BASENAME = 2
32 BASENAME = 2
33 # The mercurial file node for the delta base revision. This is the nullid if
33 # The mercurial file node for the delta base revision. This is the nullid if
34 # this delta is a full text.
34 # this delta is a full text.
35 BASENODE = 3
35 BASENODE = 3
36 # The actual delta or full text data.
36 # The actual delta or full text data.
37 DATA = 4
37 DATA = 4
38
38
39
39
40 class unioncontentstore(basestore.baseunionstore):
40 class unioncontentstore(basestore.baseunionstore):
41 def __init__(self, *args, **kwargs):
41 def __init__(self, *args, **kwargs):
42 super(unioncontentstore, self).__init__(*args, **kwargs)
42 super(unioncontentstore, self).__init__(*args, **kwargs)
43
43
44 self.stores = args
44 self.stores = args
45 self.writestore = kwargs.get('writestore')
45 self.writestore = kwargs.get('writestore')
46
46
47 # If allowincomplete==True then the union store can return partial
47 # If allowincomplete==True then the union store can return partial
48 # delta chains, otherwise it will throw a KeyError if a full
48 # delta chains, otherwise it will throw a KeyError if a full
49 # deltachain can't be found.
49 # deltachain can't be found.
50 self.allowincomplete = kwargs.get('allowincomplete', False)
50 self.allowincomplete = kwargs.get('allowincomplete', False)
51
51
52 def get(self, name, node):
52 def get(self, name, node):
53 """Fetches the full text revision contents of the given name+node pair.
53 """Fetches the full text revision contents of the given name+node pair.
54 If the full text doesn't exist, throws a KeyError.
54 If the full text doesn't exist, throws a KeyError.
55
55
56 Under the hood, this uses getdeltachain() across all the stores to build
56 Under the hood, this uses getdeltachain() across all the stores to build
57 up a full chain to produce the full text.
57 up a full chain to produce the full text.
58 """
58 """
59 chain = self.getdeltachain(name, node)
59 chain = self.getdeltachain(name, node)
60
60
61 if chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
61 if chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
62 # If we didn't receive a full chain, throw
62 # If we didn't receive a full chain, throw
63 raise KeyError((name, hex(node)))
63 raise KeyError((name, hex(node)))
64
64
65 # The last entry in the chain is a full text, so we start our delta
65 # The last entry in the chain is a full text, so we start our delta
66 # applies with that.
66 # applies with that.
67 fulltext = chain.pop()[ChainIndicies.DATA]
67 fulltext = chain.pop()[ChainIndicies.DATA]
68
68
69 text = fulltext
69 text = fulltext
70 while chain:
70 while chain:
71 delta = chain.pop()[ChainIndicies.DATA]
71 delta = chain.pop()[ChainIndicies.DATA]
72 text = mdiff.patches(text, [delta])
72 text = mdiff.patches(text, [delta])
73
73
74 return text
74 return text
75
75
76 @basestore.baseunionstore.retriable
76 @basestore.baseunionstore.retriable
77 def getdelta(self, name, node):
77 def getdelta(self, name, node):
78 """Return the single delta entry for the given name/node pair."""
78 """Return the single delta entry for the given name/node pair."""
79 for store in self.stores:
79 for store in self.stores:
80 try:
80 try:
81 return store.getdelta(name, node)
81 return store.getdelta(name, node)
82 except KeyError:
82 except KeyError:
83 pass
83 pass
84
84
85 raise KeyError((name, hex(node)))
85 raise KeyError((name, hex(node)))
86
86
87 def getdeltachain(self, name, node):
87 def getdeltachain(self, name, node):
88 """Returns the deltachain for the given name/node pair.
88 """Returns the deltachain for the given name/node pair.
89
89
90 Returns an ordered list of:
90 Returns an ordered list of:
91
91
92 [(name, node, deltabasename, deltabasenode, deltacontent),...]
92 [(name, node, deltabasename, deltabasenode, deltacontent),...]
93
93
94 where the chain is terminated by a full text entry with a nullid
94 where the chain is terminated by a full text entry with a nullid
95 deltabasenode.
95 deltabasenode.
96 """
96 """
97 chain = self._getpartialchain(name, node)
97 chain = self._getpartialchain(name, node)
98 while chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
98 while chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
99 x, x, deltabasename, deltabasenode, x = chain[-1]
99 x, x, deltabasename, deltabasenode, x = chain[-1]
100 try:
100 try:
101 morechain = self._getpartialchain(deltabasename, deltabasenode)
101 morechain = self._getpartialchain(deltabasename, deltabasenode)
102 chain.extend(morechain)
102 chain.extend(morechain)
103 except KeyError:
103 except KeyError:
104 # If we allow incomplete chains, don't throw.
104 # If we allow incomplete chains, don't throw.
105 if not self.allowincomplete:
105 if not self.allowincomplete:
106 raise
106 raise
107 break
107 break
108
108
109 return chain
109 return chain
110
110
111 @basestore.baseunionstore.retriable
111 @basestore.baseunionstore.retriable
112 def getmeta(self, name, node):
112 def getmeta(self, name, node):
113 """Returns the metadata dict for given node."""
113 """Returns the metadata dict for given node."""
114 for store in self.stores:
114 for store in self.stores:
115 try:
115 try:
116 return store.getmeta(name, node)
116 return store.getmeta(name, node)
117 except KeyError:
117 except KeyError:
118 pass
118 pass
119 raise KeyError((name, hex(node)))
119 raise KeyError((name, hex(node)))
120
120
121 def getmetrics(self):
121 def getmetrics(self):
122 metrics = [s.getmetrics() for s in self.stores]
122 metrics = [s.getmetrics() for s in self.stores]
123 return shallowutil.sumdicts(*metrics)
123 return shallowutil.sumdicts(*metrics)
124
124
125 @basestore.baseunionstore.retriable
125 @basestore.baseunionstore.retriable
126 def _getpartialchain(self, name, node):
126 def _getpartialchain(self, name, node):
127 """Returns a partial delta chain for the given name/node pair.
127 """Returns a partial delta chain for the given name/node pair.
128
128
129 A partial chain is a chain that may not be terminated in a full-text.
129 A partial chain is a chain that may not be terminated in a full-text.
130 """
130 """
131 for store in self.stores:
131 for store in self.stores:
132 try:
132 try:
133 return store.getdeltachain(name, node)
133 return store.getdeltachain(name, node)
134 except KeyError:
134 except KeyError:
135 pass
135 pass
136
136
137 raise KeyError((name, hex(node)))
137 raise KeyError((name, hex(node)))
138
138
139 def add(self, name, node, data):
139 def add(self, name, node, data):
140 raise RuntimeError(
140 raise RuntimeError(
141 b"cannot add content only to remotefilelog contentstore"
141 b"cannot add content only to remotefilelog contentstore"
142 )
142 )
143
143
144 def getmissing(self, keys):
144 def getmissing(self, keys):
145 missing = keys
145 missing = keys
146 for store in self.stores:
146 for store in self.stores:
147 if missing:
147 if missing:
148 missing = store.getmissing(missing)
148 missing = store.getmissing(missing)
149 return missing
149 return missing
150
150
151 def addremotefilelognode(self, name, node, data):
151 def addremotefilelognode(self, name, node, data):
152 if self.writestore:
152 if self.writestore:
153 self.writestore.addremotefilelognode(name, node, data)
153 self.writestore.addremotefilelognode(name, node, data)
154 else:
154 else:
155 raise RuntimeError(b"no writable store configured")
155 raise RuntimeError(b"no writable store configured")
156
156
157 def markledger(self, ledger, options=None):
157 def markledger(self, ledger, options=None):
158 for store in self.stores:
158 for store in self.stores:
159 store.markledger(ledger, options)
159 store.markledger(ledger, options)
160
160
161
161
162 class remotefilelogcontentstore(basestore.basestore):
162 class remotefilelogcontentstore(basestore.basestore):
163 def __init__(self, *args, **kwargs):
163 def __init__(self, *args, **kwargs):
164 super(remotefilelogcontentstore, self).__init__(*args, **kwargs)
164 super(remotefilelogcontentstore, self).__init__(*args, **kwargs)
165 self._threaddata = threading.local()
165 self._threaddata = threading.local()
166
166
167 def get(self, name, node):
167 def get(self, name, node):
168 # return raw revision text
168 # return raw revision text
169 data = self._getdata(name, node)
169 data = self._getdata(name, node)
170
170
171 offset, size, flags = shallowutil.parsesizeflags(data)
171 offset, size, flags = shallowutil.parsesizeflags(data)
172 content = data[offset : offset + size]
172 content = data[offset : offset + size]
173
173
174 ancestormap = shallowutil.ancestormap(data)
174 ancestormap = shallowutil.ancestormap(data)
175 p1, p2, linknode, copyfrom = ancestormap[node]
175 p1, p2, linknode, copyfrom = ancestormap[node]
176 copyrev = None
176 copyrev = None
177 if copyfrom:
177 if copyfrom:
178 copyrev = hex(p1)
178 copyrev = hex(p1)
179
179
180 self._updatemetacache(node, size, flags)
180 self._updatemetacache(node, size, flags)
181
181
182 # lfs tracks renames in its own metadata, remove hg copy metadata,
182 # lfs tracks renames in its own metadata, remove hg copy metadata,
183 # because copy metadata will be re-added by lfs flag processor.
183 # because copy metadata will be re-added by lfs flag processor.
184 if flags & revlog.REVIDX_EXTSTORED:
184 if flags & revlog.REVIDX_EXTSTORED:
185 copyrev = copyfrom = None
185 copyrev = copyfrom = None
186 revision = shallowutil.createrevlogtext(content, copyfrom, copyrev)
186 revision = shallowutil.createrevlogtext(content, copyfrom, copyrev)
187 return revision
187 return revision
188
188
189 def getdelta(self, name, node):
189 def getdelta(self, name, node):
190 # Since remotefilelog content stores only contain full texts, just
190 # Since remotefilelog content stores only contain full texts, just
191 # return that.
191 # return that.
192 revision = self.get(name, node)
192 revision = self.get(name, node)
193 return (
193 return (
194 revision,
194 revision,
195 name,
195 name,
196 sha1nodeconstants.nullid,
196 sha1nodeconstants.nullid,
197 self.getmeta(name, node),
197 self.getmeta(name, node),
198 )
198 )
199
199
200 def getdeltachain(self, name, node):
200 def getdeltachain(self, name, node):
201 # Since remotefilelog content stores just contain full texts, we return
201 # Since remotefilelog content stores just contain full texts, we return
202 # a fake delta chain that just consists of a single full text revision.
202 # a fake delta chain that just consists of a single full text revision.
203 # The nullid in the deltabasenode slot indicates that the revision is a
203 # The nullid in the deltabasenode slot indicates that the revision is a
204 # fulltext.
204 # fulltext.
205 revision = self.get(name, node)
205 revision = self.get(name, node)
206 return [(name, node, None, sha1nodeconstants.nullid, revision)]
206 return [(name, node, None, sha1nodeconstants.nullid, revision)]
207
207
208 def getmeta(self, name, node):
208 def getmeta(self, name, node):
209 self._sanitizemetacache()
209 self._sanitizemetacache()
210 if node != self._threaddata.metacache[0]:
210 if node != self._threaddata.metacache[0]:
211 data = self._getdata(name, node)
211 data = self._getdata(name, node)
212 offset, size, flags = shallowutil.parsesizeflags(data)
212 offset, size, flags = shallowutil.parsesizeflags(data)
213 self._updatemetacache(node, size, flags)
213 self._updatemetacache(node, size, flags)
214 return self._threaddata.metacache[1]
214 return self._threaddata.metacache[1]
215
215
216 def add(self, name, node, data):
216 def add(self, name, node, data):
217 raise RuntimeError(
217 raise RuntimeError(
218 b"cannot add content only to remotefilelog contentstore"
218 b"cannot add content only to remotefilelog contentstore"
219 )
219 )
220
220
221 def _sanitizemetacache(self):
221 def _sanitizemetacache(self):
222 metacache = getattr(self._threaddata, 'metacache', None)
222 metacache = getattr(self._threaddata, 'metacache', None)
223 if metacache is None:
223 if metacache is None:
224 self._threaddata.metacache = (None, None) # (node, meta)
224 self._threaddata.metacache = (None, None) # (node, meta)
225
225
226 def _updatemetacache(self, node, size, flags):
226 def _updatemetacache(self, node, size, flags):
227 self._sanitizemetacache()
227 self._sanitizemetacache()
228 if node == self._threaddata.metacache[0]:
228 if node == self._threaddata.metacache[0]:
229 return
229 return
230 meta = {constants.METAKEYFLAG: flags, constants.METAKEYSIZE: size}
230 meta = {constants.METAKEYFLAG: flags, constants.METAKEYSIZE: size}
231 self._threaddata.metacache = (node, meta)
231 self._threaddata.metacache = (node, meta)
232
232
233
233
234 class remotecontentstore(object):
234 class remotecontentstore(object):
235 def __init__(self, ui, fileservice, shared):
235 def __init__(self, ui, fileservice, shared):
236 self._fileservice = fileservice
236 self._fileservice = fileservice
237 # type(shared) is usually remotefilelogcontentstore
237 # type(shared) is usually remotefilelogcontentstore
238 self._shared = shared
238 self._shared = shared
239
239
240 def get(self, name, node):
240 def get(self, name, node):
241 self._fileservice.prefetch(
241 self._fileservice.prefetch(
242 [(name, hex(node))], force=True, fetchdata=True
242 [(name, hex(node))], force=True, fetchdata=True
243 )
243 )
244 return self._shared.get(name, node)
244 return self._shared.get(name, node)
245
245
246 def getdelta(self, name, node):
246 def getdelta(self, name, node):
247 revision = self.get(name, node)
247 revision = self.get(name, node)
248 return (
248 return (
249 revision,
249 revision,
250 name,
250 name,
251 sha1nodeconstants.nullid,
251 sha1nodeconstants.nullid,
252 self._shared.getmeta(name, node),
252 self._shared.getmeta(name, node),
253 )
253 )
254
254
255 def getdeltachain(self, name, node):
255 def getdeltachain(self, name, node):
256 # Since our remote content stores just contain full texts, we return a
256 # Since our remote content stores just contain full texts, we return a
257 # fake delta chain that just consists of a single full text revision.
257 # fake delta chain that just consists of a single full text revision.
258 # The nullid in the deltabasenode slot indicates that the revision is a
258 # The nullid in the deltabasenode slot indicates that the revision is a
259 # fulltext.
259 # fulltext.
260 revision = self.get(name, node)
260 revision = self.get(name, node)
261 return [(name, node, None, sha1nodeconstants.nullid, revision)]
261 return [(name, node, None, sha1nodeconstants.nullid, revision)]
262
262
263 def getmeta(self, name, node):
263 def getmeta(self, name, node):
264 self._fileservice.prefetch(
264 self._fileservice.prefetch(
265 [(name, hex(node))], force=True, fetchdata=True
265 [(name, hex(node))], force=True, fetchdata=True
266 )
266 )
267 return self._shared.getmeta(name, node)
267 return self._shared.getmeta(name, node)
268
268
269 def add(self, name, node, data):
269 def add(self, name, node, data):
270 raise RuntimeError(b"cannot add to a remote store")
270 raise RuntimeError(b"cannot add to a remote store")
271
271
272 def getmissing(self, keys):
272 def getmissing(self, keys):
273 return keys
273 return keys
274
274
275 def markledger(self, ledger, options=None):
275 def markledger(self, ledger, options=None):
276 pass
276 pass
277
277
278
278
279 class manifestrevlogstore(object):
279 class manifestrevlogstore(object):
280 def __init__(self, repo):
280 def __init__(self, repo):
281 self._store = repo.store
281 self._store = repo.store
282 self._svfs = repo.svfs
282 self._svfs = repo.svfs
283 self._revlogs = dict()
283 self._revlogs = dict()
284 self._cl = revlog.revlog(self._svfs, radix=b'00changelog.i')
284 self._cl = revlog.revlog(self._svfs, radix=b'00changelog.i')
285 self._repackstartlinkrev = 0
285 self._repackstartlinkrev = 0
286
286
287 def get(self, name, node):
287 def get(self, name, node):
288 return self._revlog(name).rawdata(node)
288 return self._revlog(name).rawdata(node)
289
289
290 def getdelta(self, name, node):
290 def getdelta(self, name, node):
291 revision = self.get(name, node)
291 revision = self.get(name, node)
292 return revision, name, self._cl.nullid, self.getmeta(name, node)
292 return revision, name, self._cl.nullid, self.getmeta(name, node)
293
293
294 def getdeltachain(self, name, node):
294 def getdeltachain(self, name, node):
295 revision = self.get(name, node)
295 revision = self.get(name, node)
296 return [(name, node, None, self._cl.nullid, revision)]
296 return [(name, node, None, self._cl.nullid, revision)]
297
297
298 def getmeta(self, name, node):
298 def getmeta(self, name, node):
299 rl = self._revlog(name)
299 rl = self._revlog(name)
300 rev = rl.rev(node)
300 rev = rl.rev(node)
301 return {
301 return {
302 constants.METAKEYFLAG: rl.flags(rev),
302 constants.METAKEYFLAG: rl.flags(rev),
303 constants.METAKEYSIZE: rl.rawsize(rev),
303 constants.METAKEYSIZE: rl.rawsize(rev),
304 }
304 }
305
305
306 def getancestors(self, name, node, known=None):
306 def getancestors(self, name, node, known=None):
307 if known is None:
307 if known is None:
308 known = set()
308 known = set()
309 if node in known:
309 if node in known:
310 return []
310 return []
311
311
312 rl = self._revlog(name)
312 rl = self._revlog(name)
313 ancestors = {}
313 ancestors = {}
314 missing = {node}
314 missing = {node}
315 for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
315 for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
316 ancnode = rl.node(ancrev)
316 ancnode = rl.node(ancrev)
317 missing.discard(ancnode)
317 missing.discard(ancnode)
318
318
319 p1, p2 = rl.parents(ancnode)
319 p1, p2 = rl.parents(ancnode)
320 if p1 != self._cl.nullid and p1 not in known:
320 if p1 != self._cl.nullid and p1 not in known:
321 missing.add(p1)
321 missing.add(p1)
322 if p2 != self._cl.nullid and p2 not in known:
322 if p2 != self._cl.nullid and p2 not in known:
323 missing.add(p2)
323 missing.add(p2)
324
324
325 linknode = self._cl.node(rl.linkrev(ancrev))
325 linknode = self._cl.node(rl.linkrev(ancrev))
326 ancestors[rl.node(ancrev)] = (p1, p2, linknode, b'')
326 ancestors[rl.node(ancrev)] = (p1, p2, linknode, b'')
327 if not missing:
327 if not missing:
328 break
328 break
329 return ancestors
329 return ancestors
330
330
331 def getnodeinfo(self, name, node):
331 def getnodeinfo(self, name, node):
332 cl = self._cl
332 cl = self._cl
333 rl = self._revlog(name)
333 rl = self._revlog(name)
334 parents = rl.parents(node)
334 parents = rl.parents(node)
335 linkrev = rl.linkrev(rl.rev(node))
335 linkrev = rl.linkrev(rl.rev(node))
336 return (parents[0], parents[1], cl.node(linkrev), None)
336 return (parents[0], parents[1], cl.node(linkrev), None)
337
337
338 def add(self, *args):
338 def add(self, *args):
339 raise RuntimeError(b"cannot add to a revlog store")
339 raise RuntimeError(b"cannot add to a revlog store")
340
340
341 def _revlog(self, name):
341 def _revlog(self, name):
342 rl = self._revlogs.get(name)
342 rl = self._revlogs.get(name)
343 if rl is None:
343 if rl is None:
344 revlogname = b'00manifesttree'
344 revlogname = b'00manifesttree'
345 if name != b'':
345 if name != b'':
346 revlogname = b'meta/%s/00manifest' % name
346 revlogname = b'meta/%s/00manifest' % name
347 rl = revlog.revlog(self._svfs, radix=revlogname)
347 rl = revlog.revlog(self._svfs, radix=revlogname)
348 self._revlogs[name] = rl
348 self._revlogs[name] = rl
349 return rl
349 return rl
350
350
351 def getmissing(self, keys):
351 def getmissing(self, keys):
352 missing = []
352 missing = []
353 for name, node in keys:
353 for name, node in keys:
354 mfrevlog = self._revlog(name)
354 mfrevlog = self._revlog(name)
355 if node not in mfrevlog.nodemap:
355 if node not in mfrevlog.nodemap:
356 missing.append((name, node))
356 missing.append((name, node))
357
357
358 return missing
358 return missing
359
359
360 def setrepacklinkrevrange(self, startrev, endrev):
360 def setrepacklinkrevrange(self, startrev, endrev):
361 self._repackstartlinkrev = startrev
361 self._repackstartlinkrev = startrev
362 self._repackendlinkrev = endrev
362 self._repackendlinkrev = endrev
363
363
364 def markledger(self, ledger, options=None):
364 def markledger(self, ledger, options=None):
365 if options and options.get(constants.OPTION_PACKSONLY):
365 if options and options.get(constants.OPTION_PACKSONLY):
366 return
366 return
367 treename = b''
367 treename = b''
368 rl = revlog.revlog(self._svfs, radix=b'00manifesttree')
368 rl = revlog.revlog(self._svfs, radix=b'00manifesttree')
369 startlinkrev = self._repackstartlinkrev
369 startlinkrev = self._repackstartlinkrev
370 endlinkrev = self._repackendlinkrev
370 endlinkrev = self._repackendlinkrev
371 for rev in pycompat.xrange(len(rl) - 1, -1, -1):
371 for rev in pycompat.xrange(len(rl) - 1, -1, -1):
372 linkrev = rl.linkrev(rev)
372 linkrev = rl.linkrev(rev)
373 if linkrev < startlinkrev:
373 if linkrev < startlinkrev:
374 break
374 break
375 if linkrev > endlinkrev:
375 if linkrev > endlinkrev:
376 continue
376 continue
377 node = rl.node(rev)
377 node = rl.node(rev)
378 ledger.markdataentry(self, treename, node)
378 ledger.markdataentry(self, treename, node)
379 ledger.markhistoryentry(self, treename, node)
379 ledger.markhistoryentry(self, treename, node)
380
380
381 for t, path, encoded, size in self._store.datafiles():
381 for t, path, size in self._store.datafiles():
382 if path[:5] != b'meta/' or path[-2:] != b'.i':
382 if path[:5] != b'meta/' or path[-2:] != b'.i':
383 continue
383 continue
384
384
385 treename = path[5 : -len(b'/00manifest')]
385 treename = path[5 : -len(b'/00manifest')]
386
386
387 rl = revlog.revlog(self._svfs, indexfile=path[:-2])
387 rl = revlog.revlog(self._svfs, indexfile=path[:-2])
388 for rev in pycompat.xrange(len(rl) - 1, -1, -1):
388 for rev in pycompat.xrange(len(rl) - 1, -1, -1):
389 linkrev = rl.linkrev(rev)
389 linkrev = rl.linkrev(rev)
390 if linkrev < startlinkrev:
390 if linkrev < startlinkrev:
391 break
391 break
392 if linkrev > endlinkrev:
392 if linkrev > endlinkrev:
393 continue
393 continue
394 node = rl.node(rev)
394 node = rl.node(rev)
395 ledger.markdataentry(self, treename, node)
395 ledger.markdataentry(self, treename, node)
396 ledger.markhistoryentry(self, treename, node)
396 ledger.markhistoryentry(self, treename, node)
397
397
398 def cleanup(self, ledger):
398 def cleanup(self, ledger):
399 pass
399 pass
@@ -1,441 +1,441 b''
1 # remotefilelogserver.py - server logic for a remotefilelog server
1 # remotefilelogserver.py - server logic for a remotefilelog server
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import errno
9 import errno
10 import os
10 import os
11 import stat
11 import stat
12 import time
12 import time
13 import zlib
13 import zlib
14
14
15 from mercurial.i18n import _
15 from mercurial.i18n import _
16 from mercurial.node import bin, hex
16 from mercurial.node import bin, hex
17 from mercurial.pycompat import open
17 from mercurial.pycompat import open
18 from mercurial import (
18 from mercurial import (
19 changegroup,
19 changegroup,
20 changelog,
20 changelog,
21 context,
21 context,
22 error,
22 error,
23 extensions,
23 extensions,
24 match,
24 match,
25 pycompat,
25 pycompat,
26 scmutil,
26 scmutil,
27 store,
27 store,
28 streamclone,
28 streamclone,
29 util,
29 util,
30 wireprotoserver,
30 wireprotoserver,
31 wireprototypes,
31 wireprototypes,
32 wireprotov1server,
32 wireprotov1server,
33 )
33 )
34 from . import (
34 from . import (
35 constants,
35 constants,
36 shallowutil,
36 shallowutil,
37 )
37 )
38
38
39 _sshv1server = wireprotoserver.sshv1protocolhandler
39 _sshv1server = wireprotoserver.sshv1protocolhandler
40
40
41
41
42 def setupserver(ui, repo):
42 def setupserver(ui, repo):
43 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
43 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
44 onetimesetup(ui)
44 onetimesetup(ui)
45
45
46 # don't send files to shallow clients during pulls
46 # don't send files to shallow clients during pulls
47 def generatefiles(
47 def generatefiles(
48 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
48 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
49 ):
49 ):
50 caps = self._bundlecaps or []
50 caps = self._bundlecaps or []
51 if constants.BUNDLE2_CAPABLITY in caps:
51 if constants.BUNDLE2_CAPABLITY in caps:
52 # only send files that don't match the specified patterns
52 # only send files that don't match the specified patterns
53 includepattern = None
53 includepattern = None
54 excludepattern = None
54 excludepattern = None
55 for cap in self._bundlecaps or []:
55 for cap in self._bundlecaps or []:
56 if cap.startswith(b"includepattern="):
56 if cap.startswith(b"includepattern="):
57 includepattern = cap[len(b"includepattern=") :].split(b'\0')
57 includepattern = cap[len(b"includepattern=") :].split(b'\0')
58 elif cap.startswith(b"excludepattern="):
58 elif cap.startswith(b"excludepattern="):
59 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
59 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
60
60
61 m = match.always()
61 m = match.always()
62 if includepattern or excludepattern:
62 if includepattern or excludepattern:
63 m = match.match(
63 m = match.match(
64 repo.root, b'', None, includepattern, excludepattern
64 repo.root, b'', None, includepattern, excludepattern
65 )
65 )
66
66
67 changedfiles = list([f for f in changedfiles if not m(f)])
67 changedfiles = list([f for f in changedfiles if not m(f)])
68 return orig(
68 return orig(
69 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
69 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
70 )
70 )
71
71
72 extensions.wrapfunction(
72 extensions.wrapfunction(
73 changegroup.cgpacker, b'generatefiles', generatefiles
73 changegroup.cgpacker, b'generatefiles', generatefiles
74 )
74 )
75
75
76
76
77 onetime = False
77 onetime = False
78
78
79
79
80 def onetimesetup(ui):
80 def onetimesetup(ui):
81 """Configures the wireprotocol for both clients and servers."""
81 """Configures the wireprotocol for both clients and servers."""
82 global onetime
82 global onetime
83 if onetime:
83 if onetime:
84 return
84 return
85 onetime = True
85 onetime = True
86
86
87 # support file content requests
87 # support file content requests
88 wireprotov1server.wireprotocommand(
88 wireprotov1server.wireprotocommand(
89 b'x_rfl_getflogheads', b'path', permission=b'pull'
89 b'x_rfl_getflogheads', b'path', permission=b'pull'
90 )(getflogheads)
90 )(getflogheads)
91 wireprotov1server.wireprotocommand(
91 wireprotov1server.wireprotocommand(
92 b'x_rfl_getfiles', b'', permission=b'pull'
92 b'x_rfl_getfiles', b'', permission=b'pull'
93 )(getfiles)
93 )(getfiles)
94 wireprotov1server.wireprotocommand(
94 wireprotov1server.wireprotocommand(
95 b'x_rfl_getfile', b'file node', permission=b'pull'
95 b'x_rfl_getfile', b'file node', permission=b'pull'
96 )(getfile)
96 )(getfile)
97
97
98 class streamstate(object):
98 class streamstate(object):
99 match = None
99 match = None
100 shallowremote = False
100 shallowremote = False
101 noflatmf = False
101 noflatmf = False
102
102
103 state = streamstate()
103 state = streamstate()
104
104
105 def stream_out_shallow(repo, proto, other):
105 def stream_out_shallow(repo, proto, other):
106 includepattern = None
106 includepattern = None
107 excludepattern = None
107 excludepattern = None
108 raw = other.get(b'includepattern')
108 raw = other.get(b'includepattern')
109 if raw:
109 if raw:
110 includepattern = raw.split(b'\0')
110 includepattern = raw.split(b'\0')
111 raw = other.get(b'excludepattern')
111 raw = other.get(b'excludepattern')
112 if raw:
112 if raw:
113 excludepattern = raw.split(b'\0')
113 excludepattern = raw.split(b'\0')
114
114
115 oldshallow = state.shallowremote
115 oldshallow = state.shallowremote
116 oldmatch = state.match
116 oldmatch = state.match
117 oldnoflatmf = state.noflatmf
117 oldnoflatmf = state.noflatmf
118 try:
118 try:
119 state.shallowremote = True
119 state.shallowremote = True
120 state.match = match.always()
120 state.match = match.always()
121 state.noflatmf = other.get(b'noflatmanifest') == b'True'
121 state.noflatmf = other.get(b'noflatmanifest') == b'True'
122 if includepattern or excludepattern:
122 if includepattern or excludepattern:
123 state.match = match.match(
123 state.match = match.match(
124 repo.root, b'', None, includepattern, excludepattern
124 repo.root, b'', None, includepattern, excludepattern
125 )
125 )
126 streamres = wireprotov1server.stream(repo, proto)
126 streamres = wireprotov1server.stream(repo, proto)
127
127
128 # Force the first value to execute, so the file list is computed
128 # Force the first value to execute, so the file list is computed
129 # within the try/finally scope
129 # within the try/finally scope
130 first = next(streamres.gen)
130 first = next(streamres.gen)
131 second = next(streamres.gen)
131 second = next(streamres.gen)
132
132
133 def gen():
133 def gen():
134 yield first
134 yield first
135 yield second
135 yield second
136 for value in streamres.gen:
136 for value in streamres.gen:
137 yield value
137 yield value
138
138
139 return wireprototypes.streamres(gen())
139 return wireprototypes.streamres(gen())
140 finally:
140 finally:
141 state.shallowremote = oldshallow
141 state.shallowremote = oldshallow
142 state.match = oldmatch
142 state.match = oldmatch
143 state.noflatmf = oldnoflatmf
143 state.noflatmf = oldnoflatmf
144
144
145 wireprotov1server.commands[b'stream_out_shallow'] = (
145 wireprotov1server.commands[b'stream_out_shallow'] = (
146 stream_out_shallow,
146 stream_out_shallow,
147 b'*',
147 b'*',
148 )
148 )
149
149
150 # don't clone filelogs to shallow clients
150 # don't clone filelogs to shallow clients
151 def _walkstreamfiles(orig, repo, matcher=None):
151 def _walkstreamfiles(orig, repo, matcher=None):
152 if state.shallowremote:
152 if state.shallowremote:
153 # if we are shallow ourselves, stream our local commits
153 # if we are shallow ourselves, stream our local commits
154 if shallowutil.isenabled(repo):
154 if shallowutil.isenabled(repo):
155 striplen = len(repo.store.path) + 1
155 striplen = len(repo.store.path) + 1
156 readdir = repo.store.rawvfs.readdir
156 readdir = repo.store.rawvfs.readdir
157 visit = [os.path.join(repo.store.path, b'data')]
157 visit = [os.path.join(repo.store.path, b'data')]
158 while visit:
158 while visit:
159 p = visit.pop()
159 p = visit.pop()
160 for f, kind, st in readdir(p, stat=True):
160 for f, kind, st in readdir(p, stat=True):
161 fp = p + b'/' + f
161 fp = p + b'/' + f
162 if kind == stat.S_IFREG:
162 if kind == stat.S_IFREG:
163 if not fp.endswith(b'.i') and not fp.endswith(
163 if not fp.endswith(b'.i') and not fp.endswith(
164 b'.d'
164 b'.d'
165 ):
165 ):
166 n = util.pconvert(fp[striplen:])
166 n = util.pconvert(fp[striplen:])
167 d = store.decodedir(n)
167 d = store.decodedir(n)
168 t = store.FILETYPE_OTHER
168 t = store.FILETYPE_OTHER
169 yield (t, d, n, st.st_size)
169 yield (t, d, st.st_size)
170 if kind == stat.S_IFDIR:
170 if kind == stat.S_IFDIR:
171 visit.append(fp)
171 visit.append(fp)
172
172
173 if scmutil.istreemanifest(repo):
173 if scmutil.istreemanifest(repo):
174 for (t, u, e, s) in repo.store.datafiles():
174 for (t, u, s) in repo.store.datafiles():
175 if u.startswith(b'meta/') and (
175 if u.startswith(b'meta/') and (
176 u.endswith(b'.i') or u.endswith(b'.d')
176 u.endswith(b'.i') or u.endswith(b'.d')
177 ):
177 ):
178 yield (t, u, e, s)
178 yield (t, u, s)
179
179
180 # Return .d and .i files that do not match the shallow pattern
180 # Return .d and .i files that do not match the shallow pattern
181 match = state.match
181 match = state.match
182 if match and not match.always():
182 if match and not match.always():
183 for (t, u, e, s) in repo.store.datafiles():
183 for (t, u, s) in repo.store.datafiles():
184 f = u[5:-2] # trim data/... and .i/.d
184 f = u[5:-2] # trim data/... and .i/.d
185 if not state.match(f):
185 if not state.match(f):
186 yield (t, u, e, s)
186 yield (t, u, s)
187
187
188 for x in repo.store.topfiles():
188 for x in repo.store.topfiles():
189 if state.noflatmf and x[1][:11] == b'00manifest.':
189 if state.noflatmf and x[1][:11] == b'00manifest.':
190 continue
190 continue
191 yield x
191 yield x
192
192
193 elif shallowutil.isenabled(repo):
193 elif shallowutil.isenabled(repo):
194 # don't allow cloning from a shallow repo to a full repo
194 # don't allow cloning from a shallow repo to a full repo
195 # since it would require fetching every version of every
195 # since it would require fetching every version of every
196 # file in order to create the revlogs.
196 # file in order to create the revlogs.
197 raise error.Abort(
197 raise error.Abort(
198 _(b"Cannot clone from a shallow repo to a full repo.")
198 _(b"Cannot clone from a shallow repo to a full repo.")
199 )
199 )
200 else:
200 else:
201 for x in orig(repo, matcher):
201 for x in orig(repo, matcher):
202 yield x
202 yield x
203
203
204 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
204 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
205
205
206 # expose remotefilelog capabilities
206 # expose remotefilelog capabilities
207 def _capabilities(orig, repo, proto):
207 def _capabilities(orig, repo, proto):
208 caps = orig(repo, proto)
208 caps = orig(repo, proto)
209 if shallowutil.isenabled(repo) or ui.configbool(
209 if shallowutil.isenabled(repo) or ui.configbool(
210 b'remotefilelog', b'server'
210 b'remotefilelog', b'server'
211 ):
211 ):
212 if isinstance(proto, _sshv1server):
212 if isinstance(proto, _sshv1server):
213 # legacy getfiles method which only works over ssh
213 # legacy getfiles method which only works over ssh
214 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
214 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
215 caps.append(b'x_rfl_getflogheads')
215 caps.append(b'x_rfl_getflogheads')
216 caps.append(b'x_rfl_getfile')
216 caps.append(b'x_rfl_getfile')
217 return caps
217 return caps
218
218
219 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
219 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
220
220
221 def _adjustlinkrev(orig, self, *args, **kwargs):
221 def _adjustlinkrev(orig, self, *args, **kwargs):
222 # When generating file blobs, taking the real path is too slow on large
222 # When generating file blobs, taking the real path is too slow on large
223 # repos, so force it to just return the linkrev directly.
223 # repos, so force it to just return the linkrev directly.
224 repo = self._repo
224 repo = self._repo
225 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
225 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
226 return self._filelog.linkrev(self._filelog.rev(self._filenode))
226 return self._filelog.linkrev(self._filelog.rev(self._filenode))
227 return orig(self, *args, **kwargs)
227 return orig(self, *args, **kwargs)
228
228
229 extensions.wrapfunction(
229 extensions.wrapfunction(
230 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
230 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
231 )
231 )
232
232
233 def _iscmd(orig, cmd):
233 def _iscmd(orig, cmd):
234 if cmd == b'x_rfl_getfiles':
234 if cmd == b'x_rfl_getfiles':
235 return False
235 return False
236 return orig(cmd)
236 return orig(cmd)
237
237
238 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
238 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
239
239
240
240
241 def _loadfileblob(repo, cachepath, path, node):
241 def _loadfileblob(repo, cachepath, path, node):
242 filecachepath = os.path.join(cachepath, path, hex(node))
242 filecachepath = os.path.join(cachepath, path, hex(node))
243 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
243 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
244 filectx = repo.filectx(path, fileid=node)
244 filectx = repo.filectx(path, fileid=node)
245 if filectx.node() == repo.nullid:
245 if filectx.node() == repo.nullid:
246 repo.changelog = changelog.changelog(repo.svfs)
246 repo.changelog = changelog.changelog(repo.svfs)
247 filectx = repo.filectx(path, fileid=node)
247 filectx = repo.filectx(path, fileid=node)
248
248
249 text = createfileblob(filectx)
249 text = createfileblob(filectx)
250 # TODO configurable compression engines
250 # TODO configurable compression engines
251 text = zlib.compress(text)
251 text = zlib.compress(text)
252
252
253 # everything should be user & group read/writable
253 # everything should be user & group read/writable
254 oldumask = os.umask(0o002)
254 oldumask = os.umask(0o002)
255 try:
255 try:
256 dirname = os.path.dirname(filecachepath)
256 dirname = os.path.dirname(filecachepath)
257 if not os.path.exists(dirname):
257 if not os.path.exists(dirname):
258 try:
258 try:
259 os.makedirs(dirname)
259 os.makedirs(dirname)
260 except OSError as ex:
260 except OSError as ex:
261 if ex.errno != errno.EEXIST:
261 if ex.errno != errno.EEXIST:
262 raise
262 raise
263
263
264 f = None
264 f = None
265 try:
265 try:
266 f = util.atomictempfile(filecachepath, b"wb")
266 f = util.atomictempfile(filecachepath, b"wb")
267 f.write(text)
267 f.write(text)
268 except (IOError, OSError):
268 except (IOError, OSError):
269 # Don't abort if the user only has permission to read,
269 # Don't abort if the user only has permission to read,
270 # and not write.
270 # and not write.
271 pass
271 pass
272 finally:
272 finally:
273 if f:
273 if f:
274 f.close()
274 f.close()
275 finally:
275 finally:
276 os.umask(oldumask)
276 os.umask(oldumask)
277 else:
277 else:
278 with open(filecachepath, b"rb") as f:
278 with open(filecachepath, b"rb") as f:
279 text = f.read()
279 text = f.read()
280 return text
280 return text
281
281
282
282
283 def getflogheads(repo, proto, path):
283 def getflogheads(repo, proto, path):
284 """A server api for requesting a filelog's heads"""
284 """A server api for requesting a filelog's heads"""
285 flog = repo.file(path)
285 flog = repo.file(path)
286 heads = flog.heads()
286 heads = flog.heads()
287 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
287 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
288
288
289
289
290 def getfile(repo, proto, file, node):
290 def getfile(repo, proto, file, node):
291 """A server api for requesting a particular version of a file. Can be used
291 """A server api for requesting a particular version of a file. Can be used
292 in batches to request many files at once. The return protocol is:
292 in batches to request many files at once. The return protocol is:
293 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
293 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
294 non-zero for an error.
294 non-zero for an error.
295
295
296 data is a compressed blob with revlog flag and ancestors information. See
296 data is a compressed blob with revlog flag and ancestors information. See
297 createfileblob for its content.
297 createfileblob for its content.
298 """
298 """
299 if shallowutil.isenabled(repo):
299 if shallowutil.isenabled(repo):
300 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
300 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
301 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
301 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
302 if not cachepath:
302 if not cachepath:
303 cachepath = os.path.join(repo.path, b"remotefilelogcache")
303 cachepath = os.path.join(repo.path, b"remotefilelogcache")
304 node = bin(node.strip())
304 node = bin(node.strip())
305 if node == repo.nullid:
305 if node == repo.nullid:
306 return b'0\0'
306 return b'0\0'
307 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
307 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
308
308
309
309
310 def getfiles(repo, proto):
310 def getfiles(repo, proto):
311 """A server api for requesting particular versions of particular files."""
311 """A server api for requesting particular versions of particular files."""
312 if shallowutil.isenabled(repo):
312 if shallowutil.isenabled(repo):
313 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
313 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
314 if not isinstance(proto, _sshv1server):
314 if not isinstance(proto, _sshv1server):
315 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
315 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
316
316
317 def streamer():
317 def streamer():
318 fin = proto._fin
318 fin = proto._fin
319
319
320 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
320 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
321 if not cachepath:
321 if not cachepath:
322 cachepath = os.path.join(repo.path, b"remotefilelogcache")
322 cachepath = os.path.join(repo.path, b"remotefilelogcache")
323
323
324 while True:
324 while True:
325 request = fin.readline()[:-1]
325 request = fin.readline()[:-1]
326 if not request:
326 if not request:
327 break
327 break
328
328
329 node = bin(request[:40])
329 node = bin(request[:40])
330 if node == repo.nullid:
330 if node == repo.nullid:
331 yield b'0\n'
331 yield b'0\n'
332 continue
332 continue
333
333
334 path = request[40:]
334 path = request[40:]
335
335
336 text = _loadfileblob(repo, cachepath, path, node)
336 text = _loadfileblob(repo, cachepath, path, node)
337
337
338 yield b'%d\n%s' % (len(text), text)
338 yield b'%d\n%s' % (len(text), text)
339
339
340 # it would be better to only flush after processing a whole batch
340 # it would be better to only flush after processing a whole batch
341 # but currently we don't know if there are more requests coming
341 # but currently we don't know if there are more requests coming
342 proto._fout.flush()
342 proto._fout.flush()
343
343
344 return wireprototypes.streamres(streamer())
344 return wireprototypes.streamres(streamer())
345
345
346
346
347 def createfileblob(filectx):
347 def createfileblob(filectx):
348 """
348 """
349 format:
349 format:
350 v0:
350 v0:
351 str(len(rawtext)) + '\0' + rawtext + ancestortext
351 str(len(rawtext)) + '\0' + rawtext + ancestortext
352 v1:
352 v1:
353 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
353 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
354 metalist := metalist + '\n' + meta | meta
354 metalist := metalist + '\n' + meta | meta
355 meta := sizemeta | flagmeta
355 meta := sizemeta | flagmeta
356 sizemeta := METAKEYSIZE + str(len(rawtext))
356 sizemeta := METAKEYSIZE + str(len(rawtext))
357 flagmeta := METAKEYFLAG + str(flag)
357 flagmeta := METAKEYFLAG + str(flag)
358
358
359 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
359 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
360 length of 1.
360 length of 1.
361 """
361 """
362 flog = filectx.filelog()
362 flog = filectx.filelog()
363 frev = filectx.filerev()
363 frev = filectx.filerev()
364 revlogflags = flog._revlog.flags(frev)
364 revlogflags = flog._revlog.flags(frev)
365 if revlogflags == 0:
365 if revlogflags == 0:
366 # normal files
366 # normal files
367 text = filectx.data()
367 text = filectx.data()
368 else:
368 else:
369 # lfs, read raw revision data
369 # lfs, read raw revision data
370 text = flog.rawdata(frev)
370 text = flog.rawdata(frev)
371
371
372 repo = filectx._repo
372 repo = filectx._repo
373
373
374 ancestors = [filectx]
374 ancestors = [filectx]
375
375
376 try:
376 try:
377 repo.forcelinkrev = True
377 repo.forcelinkrev = True
378 ancestors.extend([f for f in filectx.ancestors()])
378 ancestors.extend([f for f in filectx.ancestors()])
379
379
380 ancestortext = b""
380 ancestortext = b""
381 for ancestorctx in ancestors:
381 for ancestorctx in ancestors:
382 parents = ancestorctx.parents()
382 parents = ancestorctx.parents()
383 p1 = repo.nullid
383 p1 = repo.nullid
384 p2 = repo.nullid
384 p2 = repo.nullid
385 if len(parents) > 0:
385 if len(parents) > 0:
386 p1 = parents[0].filenode()
386 p1 = parents[0].filenode()
387 if len(parents) > 1:
387 if len(parents) > 1:
388 p2 = parents[1].filenode()
388 p2 = parents[1].filenode()
389
389
390 copyname = b""
390 copyname = b""
391 rename = ancestorctx.renamed()
391 rename = ancestorctx.renamed()
392 if rename:
392 if rename:
393 copyname = rename[0]
393 copyname = rename[0]
394 linknode = ancestorctx.node()
394 linknode = ancestorctx.node()
395 ancestortext += b"%s%s%s%s%s\0" % (
395 ancestortext += b"%s%s%s%s%s\0" % (
396 ancestorctx.filenode(),
396 ancestorctx.filenode(),
397 p1,
397 p1,
398 p2,
398 p2,
399 linknode,
399 linknode,
400 copyname,
400 copyname,
401 )
401 )
402 finally:
402 finally:
403 repo.forcelinkrev = False
403 repo.forcelinkrev = False
404
404
405 header = shallowutil.buildfileblobheader(len(text), revlogflags)
405 header = shallowutil.buildfileblobheader(len(text), revlogflags)
406
406
407 return b"%s\0%s%s" % (header, text, ancestortext)
407 return b"%s\0%s%s" % (header, text, ancestortext)
408
408
409
409
410 def gcserver(ui, repo):
410 def gcserver(ui, repo):
411 if not repo.ui.configbool(b"remotefilelog", b"server"):
411 if not repo.ui.configbool(b"remotefilelog", b"server"):
412 return
412 return
413
413
414 neededfiles = set()
414 neededfiles = set()
415 heads = repo.revs(b"heads(tip~25000:) - null")
415 heads = repo.revs(b"heads(tip~25000:) - null")
416
416
417 cachepath = repo.vfs.join(b"remotefilelogcache")
417 cachepath = repo.vfs.join(b"remotefilelogcache")
418 for head in heads:
418 for head in heads:
419 mf = repo[head].manifest()
419 mf = repo[head].manifest()
420 for filename, filenode in pycompat.iteritems(mf):
420 for filename, filenode in pycompat.iteritems(mf):
421 filecachepath = os.path.join(cachepath, filename, hex(filenode))
421 filecachepath = os.path.join(cachepath, filename, hex(filenode))
422 neededfiles.add(filecachepath)
422 neededfiles.add(filecachepath)
423
423
424 # delete unneeded older files
424 # delete unneeded older files
425 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
425 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
426 expiration = time.time() - (days * 24 * 60 * 60)
426 expiration = time.time() - (days * 24 * 60 * 60)
427
427
428 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
428 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
429 progress.update(0)
429 progress.update(0)
430 for root, dirs, files in os.walk(cachepath):
430 for root, dirs, files in os.walk(cachepath):
431 for file in files:
431 for file in files:
432 filepath = os.path.join(root, file)
432 filepath = os.path.join(root, file)
433 progress.increment()
433 progress.increment()
434 if filepath in neededfiles:
434 if filepath in neededfiles:
435 continue
435 continue
436
436
437 stat = os.stat(filepath)
437 stat = os.stat(filepath)
438 if stat.st_mtime < expiration:
438 if stat.st_mtime < expiration:
439 os.remove(filepath)
439 os.remove(filepath)
440
440
441 progress.complete()
441 progress.complete()
@@ -1,563 +1,563 b''
1 # repair.py - functions for repository repair for mercurial
1 # repair.py - functions for repository repair for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 # Copyright 2007 Olivia Mackall
4 # Copyright 2007 Olivia Mackall
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12
12
13 from .i18n import _
13 from .i18n import _
14 from .node import (
14 from .node import (
15 hex,
15 hex,
16 short,
16 short,
17 )
17 )
18 from . import (
18 from . import (
19 bundle2,
19 bundle2,
20 changegroup,
20 changegroup,
21 discovery,
21 discovery,
22 error,
22 error,
23 exchange,
23 exchange,
24 obsolete,
24 obsolete,
25 obsutil,
25 obsutil,
26 pathutil,
26 pathutil,
27 phases,
27 phases,
28 pycompat,
28 pycompat,
29 requirements,
29 requirements,
30 scmutil,
30 scmutil,
31 util,
31 util,
32 )
32 )
33 from .utils import (
33 from .utils import (
34 hashutil,
34 hashutil,
35 stringutil,
35 stringutil,
36 urlutil,
36 urlutil,
37 )
37 )
38
38
39
39
40 def backupbundle(
40 def backupbundle(
41 repo, bases, heads, node, suffix, compress=True, obsolescence=True
41 repo, bases, heads, node, suffix, compress=True, obsolescence=True
42 ):
42 ):
43 """create a bundle with the specified revisions as a backup"""
43 """create a bundle with the specified revisions as a backup"""
44
44
45 backupdir = b"strip-backup"
45 backupdir = b"strip-backup"
46 vfs = repo.vfs
46 vfs = repo.vfs
47 if not vfs.isdir(backupdir):
47 if not vfs.isdir(backupdir):
48 vfs.mkdir(backupdir)
48 vfs.mkdir(backupdir)
49
49
50 # Include a hash of all the nodes in the filename for uniqueness
50 # Include a hash of all the nodes in the filename for uniqueness
51 allcommits = repo.set(b'%ln::%ln', bases, heads)
51 allcommits = repo.set(b'%ln::%ln', bases, heads)
52 allhashes = sorted(c.hex() for c in allcommits)
52 allhashes = sorted(c.hex() for c in allcommits)
53 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
53 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
54 name = b"%s/%s-%s-%s.hg" % (
54 name = b"%s/%s-%s-%s.hg" % (
55 backupdir,
55 backupdir,
56 short(node),
56 short(node),
57 hex(totalhash[:4]),
57 hex(totalhash[:4]),
58 suffix,
58 suffix,
59 )
59 )
60
60
61 cgversion = changegroup.localversion(repo)
61 cgversion = changegroup.localversion(repo)
62 comp = None
62 comp = None
63 if cgversion != b'01':
63 if cgversion != b'01':
64 bundletype = b"HG20"
64 bundletype = b"HG20"
65 if compress:
65 if compress:
66 comp = b'BZ'
66 comp = b'BZ'
67 elif compress:
67 elif compress:
68 bundletype = b"HG10BZ"
68 bundletype = b"HG10BZ"
69 else:
69 else:
70 bundletype = b"HG10UN"
70 bundletype = b"HG10UN"
71
71
72 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
72 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
73 contentopts = {
73 contentopts = {
74 b'cg.version': cgversion,
74 b'cg.version': cgversion,
75 b'obsolescence': obsolescence,
75 b'obsolescence': obsolescence,
76 b'phases': True,
76 b'phases': True,
77 }
77 }
78 return bundle2.writenewbundle(
78 return bundle2.writenewbundle(
79 repo.ui,
79 repo.ui,
80 repo,
80 repo,
81 b'strip',
81 b'strip',
82 name,
82 name,
83 bundletype,
83 bundletype,
84 outgoing,
84 outgoing,
85 contentopts,
85 contentopts,
86 vfs,
86 vfs,
87 compression=comp,
87 compression=comp,
88 )
88 )
89
89
90
90
91 def _collectfiles(repo, striprev):
91 def _collectfiles(repo, striprev):
92 """find out the filelogs affected by the strip"""
92 """find out the filelogs affected by the strip"""
93 files = set()
93 files = set()
94
94
95 for x in pycompat.xrange(striprev, len(repo)):
95 for x in pycompat.xrange(striprev, len(repo)):
96 files.update(repo[x].files())
96 files.update(repo[x].files())
97
97
98 return sorted(files)
98 return sorted(files)
99
99
100
100
101 def _collectrevlog(revlog, striprev):
101 def _collectrevlog(revlog, striprev):
102 _, brokenset = revlog.getstrippoint(striprev)
102 _, brokenset = revlog.getstrippoint(striprev)
103 return [revlog.linkrev(r) for r in brokenset]
103 return [revlog.linkrev(r) for r in brokenset]
104
104
105
105
106 def _collectbrokencsets(repo, files, striprev):
106 def _collectbrokencsets(repo, files, striprev):
107 """return the changesets which will be broken by the truncation"""
107 """return the changesets which will be broken by the truncation"""
108 s = set()
108 s = set()
109
109
110 for revlog in manifestrevlogs(repo):
110 for revlog in manifestrevlogs(repo):
111 s.update(_collectrevlog(revlog, striprev))
111 s.update(_collectrevlog(revlog, striprev))
112 for fname in files:
112 for fname in files:
113 s.update(_collectrevlog(repo.file(fname), striprev))
113 s.update(_collectrevlog(repo.file(fname), striprev))
114
114
115 return s
115 return s
116
116
117
117
118 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
118 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
119 # This function requires the caller to lock the repo, but it operates
119 # This function requires the caller to lock the repo, but it operates
120 # within a transaction of its own, and thus requires there to be no current
120 # within a transaction of its own, and thus requires there to be no current
121 # transaction when it is called.
121 # transaction when it is called.
122 if repo.currenttransaction() is not None:
122 if repo.currenttransaction() is not None:
123 raise error.ProgrammingError(b'cannot strip from inside a transaction')
123 raise error.ProgrammingError(b'cannot strip from inside a transaction')
124
124
125 # Simple way to maintain backwards compatibility for this
125 # Simple way to maintain backwards compatibility for this
126 # argument.
126 # argument.
127 if backup in [b'none', b'strip']:
127 if backup in [b'none', b'strip']:
128 backup = False
128 backup = False
129
129
130 repo = repo.unfiltered()
130 repo = repo.unfiltered()
131 repo.destroying()
131 repo.destroying()
132 vfs = repo.vfs
132 vfs = repo.vfs
133 # load bookmark before changelog to avoid side effect from outdated
133 # load bookmark before changelog to avoid side effect from outdated
134 # changelog (see repo._refreshchangelog)
134 # changelog (see repo._refreshchangelog)
135 repo._bookmarks
135 repo._bookmarks
136 cl = repo.changelog
136 cl = repo.changelog
137
137
138 # TODO handle undo of merge sets
138 # TODO handle undo of merge sets
139 if isinstance(nodelist, bytes):
139 if isinstance(nodelist, bytes):
140 nodelist = [nodelist]
140 nodelist = [nodelist]
141 striplist = [cl.rev(node) for node in nodelist]
141 striplist = [cl.rev(node) for node in nodelist]
142 striprev = min(striplist)
142 striprev = min(striplist)
143
143
144 files = _collectfiles(repo, striprev)
144 files = _collectfiles(repo, striprev)
145 saverevs = _collectbrokencsets(repo, files, striprev)
145 saverevs = _collectbrokencsets(repo, files, striprev)
146
146
147 # Some revisions with rev > striprev may not be descendants of striprev.
147 # Some revisions with rev > striprev may not be descendants of striprev.
148 # We have to find these revisions and put them in a bundle, so that
148 # We have to find these revisions and put them in a bundle, so that
149 # we can restore them after the truncations.
149 # we can restore them after the truncations.
150 # To create the bundle we use repo.changegroupsubset which requires
150 # To create the bundle we use repo.changegroupsubset which requires
151 # the list of heads and bases of the set of interesting revisions.
151 # the list of heads and bases of the set of interesting revisions.
152 # (head = revision in the set that has no descendant in the set;
152 # (head = revision in the set that has no descendant in the set;
153 # base = revision in the set that has no ancestor in the set)
153 # base = revision in the set that has no ancestor in the set)
154 tostrip = set(striplist)
154 tostrip = set(striplist)
155 saveheads = set(saverevs)
155 saveheads = set(saverevs)
156 for r in cl.revs(start=striprev + 1):
156 for r in cl.revs(start=striprev + 1):
157 if any(p in tostrip for p in cl.parentrevs(r)):
157 if any(p in tostrip for p in cl.parentrevs(r)):
158 tostrip.add(r)
158 tostrip.add(r)
159
159
160 if r not in tostrip:
160 if r not in tostrip:
161 saverevs.add(r)
161 saverevs.add(r)
162 saveheads.difference_update(cl.parentrevs(r))
162 saveheads.difference_update(cl.parentrevs(r))
163 saveheads.add(r)
163 saveheads.add(r)
164 saveheads = [cl.node(r) for r in saveheads]
164 saveheads = [cl.node(r) for r in saveheads]
165
165
166 # compute base nodes
166 # compute base nodes
167 if saverevs:
167 if saverevs:
168 descendants = set(cl.descendants(saverevs))
168 descendants = set(cl.descendants(saverevs))
169 saverevs.difference_update(descendants)
169 saverevs.difference_update(descendants)
170 savebases = [cl.node(r) for r in saverevs]
170 savebases = [cl.node(r) for r in saverevs]
171 stripbases = [cl.node(r) for r in tostrip]
171 stripbases = [cl.node(r) for r in tostrip]
172
172
173 stripobsidx = obsmarkers = ()
173 stripobsidx = obsmarkers = ()
174 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
174 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
175 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
175 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
176 if obsmarkers:
176 if obsmarkers:
177 stripobsidx = [
177 stripobsidx = [
178 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
178 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
179 ]
179 ]
180
180
181 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
181 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
182
182
183 backupfile = None
183 backupfile = None
184 node = nodelist[-1]
184 node = nodelist[-1]
185 if backup:
185 if backup:
186 backupfile = _createstripbackup(repo, stripbases, node, topic)
186 backupfile = _createstripbackup(repo, stripbases, node, topic)
187 # create a changegroup for all the branches we need to keep
187 # create a changegroup for all the branches we need to keep
188 tmpbundlefile = None
188 tmpbundlefile = None
189 if saveheads:
189 if saveheads:
190 # do not compress temporary bundle if we remove it from disk later
190 # do not compress temporary bundle if we remove it from disk later
191 #
191 #
192 # We do not include obsolescence, it might re-introduce prune markers
192 # We do not include obsolescence, it might re-introduce prune markers
193 # we are trying to strip. This is harmless since the stripped markers
193 # we are trying to strip. This is harmless since the stripped markers
194 # are already backed up and we did not touched the markers for the
194 # are already backed up and we did not touched the markers for the
195 # saved changesets.
195 # saved changesets.
196 tmpbundlefile = backupbundle(
196 tmpbundlefile = backupbundle(
197 repo,
197 repo,
198 savebases,
198 savebases,
199 saveheads,
199 saveheads,
200 node,
200 node,
201 b'temp',
201 b'temp',
202 compress=False,
202 compress=False,
203 obsolescence=False,
203 obsolescence=False,
204 )
204 )
205
205
206 with ui.uninterruptible():
206 with ui.uninterruptible():
207 try:
207 try:
208 with repo.transaction(b"strip") as tr:
208 with repo.transaction(b"strip") as tr:
209 # TODO this code violates the interface abstraction of the
209 # TODO this code violates the interface abstraction of the
210 # transaction and makes assumptions that file storage is
210 # transaction and makes assumptions that file storage is
211 # using append-only files. We'll need some kind of storage
211 # using append-only files. We'll need some kind of storage
212 # API to handle stripping for us.
212 # API to handle stripping for us.
213 oldfiles = set(tr._offsetmap.keys())
213 oldfiles = set(tr._offsetmap.keys())
214 oldfiles.update(tr._newfiles)
214 oldfiles.update(tr._newfiles)
215
215
216 tr.startgroup()
216 tr.startgroup()
217 cl.strip(striprev, tr)
217 cl.strip(striprev, tr)
218 stripmanifest(repo, striprev, tr, files)
218 stripmanifest(repo, striprev, tr, files)
219
219
220 for fn in files:
220 for fn in files:
221 repo.file(fn).strip(striprev, tr)
221 repo.file(fn).strip(striprev, tr)
222 tr.endgroup()
222 tr.endgroup()
223
223
224 entries = tr.readjournal()
224 entries = tr.readjournal()
225
225
226 for file, troffset in entries:
226 for file, troffset in entries:
227 if file in oldfiles:
227 if file in oldfiles:
228 continue
228 continue
229 with repo.svfs(file, b'a', checkambig=True) as fp:
229 with repo.svfs(file, b'a', checkambig=True) as fp:
230 fp.truncate(troffset)
230 fp.truncate(troffset)
231 if troffset == 0:
231 if troffset == 0:
232 repo.store.markremoved(file)
232 repo.store.markremoved(file)
233
233
234 deleteobsmarkers(repo.obsstore, stripobsidx)
234 deleteobsmarkers(repo.obsstore, stripobsidx)
235 del repo.obsstore
235 del repo.obsstore
236 repo.invalidatevolatilesets()
236 repo.invalidatevolatilesets()
237 repo._phasecache.filterunknown(repo)
237 repo._phasecache.filterunknown(repo)
238
238
239 if tmpbundlefile:
239 if tmpbundlefile:
240 ui.note(_(b"adding branch\n"))
240 ui.note(_(b"adding branch\n"))
241 f = vfs.open(tmpbundlefile, b"rb")
241 f = vfs.open(tmpbundlefile, b"rb")
242 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
242 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
243 # silence internal shuffling chatter
243 # silence internal shuffling chatter
244 maybe_silent = (
244 maybe_silent = (
245 repo.ui.silent()
245 repo.ui.silent()
246 if not repo.ui.verbose
246 if not repo.ui.verbose
247 else util.nullcontextmanager()
247 else util.nullcontextmanager()
248 )
248 )
249 with maybe_silent:
249 with maybe_silent:
250 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
250 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
251 txnname = b'strip'
251 txnname = b'strip'
252 if not isinstance(gen, bundle2.unbundle20):
252 if not isinstance(gen, bundle2.unbundle20):
253 txnname = b"strip\n%s" % urlutil.hidepassword(
253 txnname = b"strip\n%s" % urlutil.hidepassword(
254 tmpbundleurl
254 tmpbundleurl
255 )
255 )
256 with repo.transaction(txnname) as tr:
256 with repo.transaction(txnname) as tr:
257 bundle2.applybundle(
257 bundle2.applybundle(
258 repo, gen, tr, source=b'strip', url=tmpbundleurl
258 repo, gen, tr, source=b'strip', url=tmpbundleurl
259 )
259 )
260 f.close()
260 f.close()
261
261
262 with repo.transaction(b'repair') as tr:
262 with repo.transaction(b'repair') as tr:
263 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
263 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
264 repo._bookmarks.applychanges(repo, tr, bmchanges)
264 repo._bookmarks.applychanges(repo, tr, bmchanges)
265
265
266 # remove undo files
266 # remove undo files
267 for undovfs, undofile in repo.undofiles():
267 for undovfs, undofile in repo.undofiles():
268 try:
268 try:
269 undovfs.unlink(undofile)
269 undovfs.unlink(undofile)
270 except OSError as e:
270 except OSError as e:
271 if e.errno != errno.ENOENT:
271 if e.errno != errno.ENOENT:
272 ui.warn(
272 ui.warn(
273 _(b'error removing %s: %s\n')
273 _(b'error removing %s: %s\n')
274 % (
274 % (
275 undovfs.join(undofile),
275 undovfs.join(undofile),
276 stringutil.forcebytestr(e),
276 stringutil.forcebytestr(e),
277 )
277 )
278 )
278 )
279
279
280 except: # re-raises
280 except: # re-raises
281 if backupfile:
281 if backupfile:
282 ui.warn(
282 ui.warn(
283 _(b"strip failed, backup bundle stored in '%s'\n")
283 _(b"strip failed, backup bundle stored in '%s'\n")
284 % vfs.join(backupfile)
284 % vfs.join(backupfile)
285 )
285 )
286 if tmpbundlefile:
286 if tmpbundlefile:
287 ui.warn(
287 ui.warn(
288 _(b"strip failed, unrecovered changes stored in '%s'\n")
288 _(b"strip failed, unrecovered changes stored in '%s'\n")
289 % vfs.join(tmpbundlefile)
289 % vfs.join(tmpbundlefile)
290 )
290 )
291 ui.warn(
291 ui.warn(
292 _(
292 _(
293 b"(fix the problem, then recover the changesets with "
293 b"(fix the problem, then recover the changesets with "
294 b"\"hg unbundle '%s'\")\n"
294 b"\"hg unbundle '%s'\")\n"
295 )
295 )
296 % vfs.join(tmpbundlefile)
296 % vfs.join(tmpbundlefile)
297 )
297 )
298 raise
298 raise
299 else:
299 else:
300 if tmpbundlefile:
300 if tmpbundlefile:
301 # Remove temporary bundle only if there were no exceptions
301 # Remove temporary bundle only if there were no exceptions
302 vfs.unlink(tmpbundlefile)
302 vfs.unlink(tmpbundlefile)
303
303
304 repo.destroyed()
304 repo.destroyed()
305 # return the backup file path (or None if 'backup' was False) so
305 # return the backup file path (or None if 'backup' was False) so
306 # extensions can use it
306 # extensions can use it
307 return backupfile
307 return backupfile
308
308
309
309
310 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
310 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
311 """perform a "soft" strip using the archived phase"""
311 """perform a "soft" strip using the archived phase"""
312 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
312 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
313 if not tostrip:
313 if not tostrip:
314 return None
314 return None
315
315
316 backupfile = None
316 backupfile = None
317 if backup:
317 if backup:
318 node = tostrip[0]
318 node = tostrip[0]
319 backupfile = _createstripbackup(repo, tostrip, node, topic)
319 backupfile = _createstripbackup(repo, tostrip, node, topic)
320
320
321 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
321 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
322 with repo.transaction(b'strip') as tr:
322 with repo.transaction(b'strip') as tr:
323 phases.retractboundary(repo, tr, phases.archived, tostrip)
323 phases.retractboundary(repo, tr, phases.archived, tostrip)
324 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
324 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
325 repo._bookmarks.applychanges(repo, tr, bmchanges)
325 repo._bookmarks.applychanges(repo, tr, bmchanges)
326 return backupfile
326 return backupfile
327
327
328
328
329 def _bookmarkmovements(repo, tostrip):
329 def _bookmarkmovements(repo, tostrip):
330 # compute necessary bookmark movement
330 # compute necessary bookmark movement
331 bm = repo._bookmarks
331 bm = repo._bookmarks
332 updatebm = []
332 updatebm = []
333 for m in bm:
333 for m in bm:
334 rev = repo[bm[m]].rev()
334 rev = repo[bm[m]].rev()
335 if rev in tostrip:
335 if rev in tostrip:
336 updatebm.append(m)
336 updatebm.append(m)
337 newbmtarget = None
337 newbmtarget = None
338 # If we need to move bookmarks, compute bookmark
338 # If we need to move bookmarks, compute bookmark
339 # targets. Otherwise we can skip doing this logic.
339 # targets. Otherwise we can skip doing this logic.
340 if updatebm:
340 if updatebm:
341 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
341 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
342 # but is much faster
342 # but is much faster
343 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
343 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
344 if newbmtarget:
344 if newbmtarget:
345 newbmtarget = repo[newbmtarget.first()].node()
345 newbmtarget = repo[newbmtarget.first()].node()
346 else:
346 else:
347 newbmtarget = b'.'
347 newbmtarget = b'.'
348 return newbmtarget, updatebm
348 return newbmtarget, updatebm
349
349
350
350
351 def _createstripbackup(repo, stripbases, node, topic):
351 def _createstripbackup(repo, stripbases, node, topic):
352 # backup the changeset we are about to strip
352 # backup the changeset we are about to strip
353 vfs = repo.vfs
353 vfs = repo.vfs
354 cl = repo.changelog
354 cl = repo.changelog
355 backupfile = backupbundle(repo, stripbases, cl.heads(), node, topic)
355 backupfile = backupbundle(repo, stripbases, cl.heads(), node, topic)
356 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
356 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
357 repo.ui.log(
357 repo.ui.log(
358 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
358 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
359 )
359 )
360 return backupfile
360 return backupfile
361
361
362
362
363 def safestriproots(ui, repo, nodes):
363 def safestriproots(ui, repo, nodes):
364 """return list of roots of nodes where descendants are covered by nodes"""
364 """return list of roots of nodes where descendants are covered by nodes"""
365 torev = repo.unfiltered().changelog.rev
365 torev = repo.unfiltered().changelog.rev
366 revs = {torev(n) for n in nodes}
366 revs = {torev(n) for n in nodes}
367 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
367 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
368 # orphaned = affected - wanted
368 # orphaned = affected - wanted
369 # affected = descendants(roots(wanted))
369 # affected = descendants(roots(wanted))
370 # wanted = revs
370 # wanted = revs
371 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
371 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
372 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
372 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
373 notstrip = revs - tostrip
373 notstrip = revs - tostrip
374 if notstrip:
374 if notstrip:
375 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
375 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
376 ui.warn(
376 ui.warn(
377 _(b'warning: orphaned descendants detected, not stripping %s\n')
377 _(b'warning: orphaned descendants detected, not stripping %s\n')
378 % nodestr
378 % nodestr
379 )
379 )
380 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
380 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
381
381
382
382
383 class stripcallback(object):
383 class stripcallback(object):
384 """used as a transaction postclose callback"""
384 """used as a transaction postclose callback"""
385
385
386 def __init__(self, ui, repo, backup, topic):
386 def __init__(self, ui, repo, backup, topic):
387 self.ui = ui
387 self.ui = ui
388 self.repo = repo
388 self.repo = repo
389 self.backup = backup
389 self.backup = backup
390 self.topic = topic or b'backup'
390 self.topic = topic or b'backup'
391 self.nodelist = []
391 self.nodelist = []
392
392
393 def addnodes(self, nodes):
393 def addnodes(self, nodes):
394 self.nodelist.extend(nodes)
394 self.nodelist.extend(nodes)
395
395
396 def __call__(self, tr):
396 def __call__(self, tr):
397 roots = safestriproots(self.ui, self.repo, self.nodelist)
397 roots = safestriproots(self.ui, self.repo, self.nodelist)
398 if roots:
398 if roots:
399 strip(self.ui, self.repo, roots, self.backup, self.topic)
399 strip(self.ui, self.repo, roots, self.backup, self.topic)
400
400
401
401
402 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
402 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
403 """like strip, but works inside transaction and won't strip irreverent revs
403 """like strip, but works inside transaction and won't strip irreverent revs
404
404
405 nodelist must explicitly contain all descendants. Otherwise a warning will
405 nodelist must explicitly contain all descendants. Otherwise a warning will
406 be printed that some nodes are not stripped.
406 be printed that some nodes are not stripped.
407
407
408 Will do a backup if `backup` is True. The last non-None "topic" will be
408 Will do a backup if `backup` is True. The last non-None "topic" will be
409 used as the backup topic name. The default backup topic name is "backup".
409 used as the backup topic name. The default backup topic name is "backup".
410 """
410 """
411 tr = repo.currenttransaction()
411 tr = repo.currenttransaction()
412 if not tr:
412 if not tr:
413 nodes = safestriproots(ui, repo, nodelist)
413 nodes = safestriproots(ui, repo, nodelist)
414 return strip(ui, repo, nodes, backup=backup, topic=topic)
414 return strip(ui, repo, nodes, backup=backup, topic=topic)
415 # transaction postclose callbacks are called in alphabet order.
415 # transaction postclose callbacks are called in alphabet order.
416 # use '\xff' as prefix so we are likely to be called last.
416 # use '\xff' as prefix so we are likely to be called last.
417 callback = tr.getpostclose(b'\xffstrip')
417 callback = tr.getpostclose(b'\xffstrip')
418 if callback is None:
418 if callback is None:
419 callback = stripcallback(ui, repo, backup=backup, topic=topic)
419 callback = stripcallback(ui, repo, backup=backup, topic=topic)
420 tr.addpostclose(b'\xffstrip', callback)
420 tr.addpostclose(b'\xffstrip', callback)
421 if topic:
421 if topic:
422 callback.topic = topic
422 callback.topic = topic
423 callback.addnodes(nodelist)
423 callback.addnodes(nodelist)
424
424
425
425
426 def stripmanifest(repo, striprev, tr, files):
426 def stripmanifest(repo, striprev, tr, files):
427 for revlog in manifestrevlogs(repo):
427 for revlog in manifestrevlogs(repo):
428 revlog.strip(striprev, tr)
428 revlog.strip(striprev, tr)
429
429
430
430
431 def manifestrevlogs(repo):
431 def manifestrevlogs(repo):
432 yield repo.manifestlog.getstorage(b'')
432 yield repo.manifestlog.getstorage(b'')
433 if scmutil.istreemanifest(repo):
433 if scmutil.istreemanifest(repo):
434 # This logic is safe if treemanifest isn't enabled, but also
434 # This logic is safe if treemanifest isn't enabled, but also
435 # pointless, so we skip it if treemanifest isn't enabled.
435 # pointless, so we skip it if treemanifest isn't enabled.
436 for t, unencoded, encoded, size in repo.store.datafiles():
436 for t, unencoded, size in repo.store.datafiles():
437 if unencoded.startswith(b'meta/') and unencoded.endswith(
437 if unencoded.startswith(b'meta/') and unencoded.endswith(
438 b'00manifest.i'
438 b'00manifest.i'
439 ):
439 ):
440 dir = unencoded[5:-12]
440 dir = unencoded[5:-12]
441 yield repo.manifestlog.getstorage(dir)
441 yield repo.manifestlog.getstorage(dir)
442
442
443
443
444 def rebuildfncache(ui, repo, only_data=False):
444 def rebuildfncache(ui, repo, only_data=False):
445 """Rebuilds the fncache file from repo history.
445 """Rebuilds the fncache file from repo history.
446
446
447 Missing entries will be added. Extra entries will be removed.
447 Missing entries will be added. Extra entries will be removed.
448 """
448 """
449 repo = repo.unfiltered()
449 repo = repo.unfiltered()
450
450
451 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
451 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
452 ui.warn(
452 ui.warn(
453 _(
453 _(
454 b'(not rebuilding fncache because repository does not '
454 b'(not rebuilding fncache because repository does not '
455 b'support fncache)\n'
455 b'support fncache)\n'
456 )
456 )
457 )
457 )
458 return
458 return
459
459
460 with repo.lock():
460 with repo.lock():
461 fnc = repo.store.fncache
461 fnc = repo.store.fncache
462 fnc.ensureloaded(warn=ui.warn)
462 fnc.ensureloaded(warn=ui.warn)
463
463
464 oldentries = set(fnc.entries)
464 oldentries = set(fnc.entries)
465 newentries = set()
465 newentries = set()
466 seenfiles = set()
466 seenfiles = set()
467
467
468 if only_data:
468 if only_data:
469 # Trust the listing of .i from the fncache, but not the .d. This is
469 # Trust the listing of .i from the fncache, but not the .d. This is
470 # much faster, because we only need to stat every possible .d files,
470 # much faster, because we only need to stat every possible .d files,
471 # instead of reading the full changelog
471 # instead of reading the full changelog
472 for f in fnc:
472 for f in fnc:
473 if f[:5] == b'data/' and f[-2:] == b'.i':
473 if f[:5] == b'data/' and f[-2:] == b'.i':
474 seenfiles.add(f[5:-2])
474 seenfiles.add(f[5:-2])
475 newentries.add(f)
475 newentries.add(f)
476 dataf = f[:-2] + b'.d'
476 dataf = f[:-2] + b'.d'
477 if repo.store._exists(dataf):
477 if repo.store._exists(dataf):
478 newentries.add(dataf)
478 newentries.add(dataf)
479 else:
479 else:
480 progress = ui.makeprogress(
480 progress = ui.makeprogress(
481 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
481 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
482 )
482 )
483 for rev in repo:
483 for rev in repo:
484 progress.update(rev)
484 progress.update(rev)
485
485
486 ctx = repo[rev]
486 ctx = repo[rev]
487 for f in ctx.files():
487 for f in ctx.files():
488 # This is to minimize I/O.
488 # This is to minimize I/O.
489 if f in seenfiles:
489 if f in seenfiles:
490 continue
490 continue
491 seenfiles.add(f)
491 seenfiles.add(f)
492
492
493 i = b'data/%s.i' % f
493 i = b'data/%s.i' % f
494 d = b'data/%s.d' % f
494 d = b'data/%s.d' % f
495
495
496 if repo.store._exists(i):
496 if repo.store._exists(i):
497 newentries.add(i)
497 newentries.add(i)
498 if repo.store._exists(d):
498 if repo.store._exists(d):
499 newentries.add(d)
499 newentries.add(d)
500
500
501 progress.complete()
501 progress.complete()
502
502
503 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
503 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
504 # This logic is safe if treemanifest isn't enabled, but also
504 # This logic is safe if treemanifest isn't enabled, but also
505 # pointless, so we skip it if treemanifest isn't enabled.
505 # pointless, so we skip it if treemanifest isn't enabled.
506 for dir in pathutil.dirs(seenfiles):
506 for dir in pathutil.dirs(seenfiles):
507 i = b'meta/%s/00manifest.i' % dir
507 i = b'meta/%s/00manifest.i' % dir
508 d = b'meta/%s/00manifest.d' % dir
508 d = b'meta/%s/00manifest.d' % dir
509
509
510 if repo.store._exists(i):
510 if repo.store._exists(i):
511 newentries.add(i)
511 newentries.add(i)
512 if repo.store._exists(d):
512 if repo.store._exists(d):
513 newentries.add(d)
513 newentries.add(d)
514
514
515 addcount = len(newentries - oldentries)
515 addcount = len(newentries - oldentries)
516 removecount = len(oldentries - newentries)
516 removecount = len(oldentries - newentries)
517 for p in sorted(oldentries - newentries):
517 for p in sorted(oldentries - newentries):
518 ui.write(_(b'removing %s\n') % p)
518 ui.write(_(b'removing %s\n') % p)
519 for p in sorted(newentries - oldentries):
519 for p in sorted(newentries - oldentries):
520 ui.write(_(b'adding %s\n') % p)
520 ui.write(_(b'adding %s\n') % p)
521
521
522 if addcount or removecount:
522 if addcount or removecount:
523 ui.write(
523 ui.write(
524 _(b'%d items added, %d removed from fncache\n')
524 _(b'%d items added, %d removed from fncache\n')
525 % (addcount, removecount)
525 % (addcount, removecount)
526 )
526 )
527 fnc.entries = newentries
527 fnc.entries = newentries
528 fnc._dirty = True
528 fnc._dirty = True
529
529
530 with repo.transaction(b'fncache') as tr:
530 with repo.transaction(b'fncache') as tr:
531 fnc.write(tr)
531 fnc.write(tr)
532 else:
532 else:
533 ui.write(_(b'fncache already up to date\n'))
533 ui.write(_(b'fncache already up to date\n'))
534
534
535
535
536 def deleteobsmarkers(obsstore, indices):
536 def deleteobsmarkers(obsstore, indices):
537 """Delete some obsmarkers from obsstore and return how many were deleted
537 """Delete some obsmarkers from obsstore and return how many were deleted
538
538
539 'indices' is a list of ints which are the indices
539 'indices' is a list of ints which are the indices
540 of the markers to be deleted.
540 of the markers to be deleted.
541
541
542 Every invocation of this function completely rewrites the obsstore file,
542 Every invocation of this function completely rewrites the obsstore file,
543 skipping the markers we want to be removed. The new temporary file is
543 skipping the markers we want to be removed. The new temporary file is
544 created, remaining markers are written there and on .close() this file
544 created, remaining markers are written there and on .close() this file
545 gets atomically renamed to obsstore, thus guaranteeing consistency."""
545 gets atomically renamed to obsstore, thus guaranteeing consistency."""
546 if not indices:
546 if not indices:
547 # we don't want to rewrite the obsstore with the same content
547 # we don't want to rewrite the obsstore with the same content
548 return
548 return
549
549
550 left = []
550 left = []
551 current = obsstore._all
551 current = obsstore._all
552 n = 0
552 n = 0
553 for i, m in enumerate(current):
553 for i, m in enumerate(current):
554 if i in indices:
554 if i in indices:
555 n += 1
555 n += 1
556 continue
556 continue
557 left.append(m)
557 left.append(m)
558
558
559 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
559 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
560 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
560 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
561 newobsstorefile.write(bytes)
561 newobsstorefile.write(bytes)
562 newobsstorefile.close()
562 newobsstorefile.close()
563 return n
563 return n
@@ -1,886 +1,886 b''
1 # censor code related to censoring revision
1 # censor code related to censoring revision
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import binascii
10 import binascii
11 import contextlib
11 import contextlib
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from ..node import (
15 from ..node import (
16 nullrev,
16 nullrev,
17 )
17 )
18 from .constants import (
18 from .constants import (
19 COMP_MODE_PLAIN,
19 COMP_MODE_PLAIN,
20 ENTRY_DATA_COMPRESSED_LENGTH,
20 ENTRY_DATA_COMPRESSED_LENGTH,
21 ENTRY_DATA_COMPRESSION_MODE,
21 ENTRY_DATA_COMPRESSION_MODE,
22 ENTRY_DATA_OFFSET,
22 ENTRY_DATA_OFFSET,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 ENTRY_DELTA_BASE,
24 ENTRY_DELTA_BASE,
25 ENTRY_LINK_REV,
25 ENTRY_LINK_REV,
26 ENTRY_NODE_ID,
26 ENTRY_NODE_ID,
27 ENTRY_PARENT_1,
27 ENTRY_PARENT_1,
28 ENTRY_PARENT_2,
28 ENTRY_PARENT_2,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 ENTRY_SIDEDATA_OFFSET,
31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 REVIDX_ISCENSORED,
33 REVLOGV0,
33 REVLOGV0,
34 REVLOGV1,
34 REVLOGV1,
35 )
35 )
36 from ..i18n import _
36 from ..i18n import _
37
37
38 from .. import (
38 from .. import (
39 error,
39 error,
40 mdiff,
40 mdiff,
41 pycompat,
41 pycompat,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 )
44 )
45 from ..utils import (
45 from ..utils import (
46 storageutil,
46 storageutil,
47 )
47 )
48 from . import (
48 from . import (
49 constants,
49 constants,
50 deltas,
50 deltas,
51 )
51 )
52
52
53
53
54 def v1_censor(rl, tr, censornode, tombstone=b''):
54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 """censors a revision in a "version 1" revlog"""
55 """censors a revision in a "version 1" revlog"""
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57
57
58 # avoid cycle
58 # avoid cycle
59 from .. import revlog
59 from .. import revlog
60
60
61 censorrev = rl.rev(censornode)
61 censorrev = rl.rev(censornode)
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63
63
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 # to create a new revlog, copy all revisions to it, then replace the
65 # to create a new revlog, copy all revisions to it, then replace the
66 # revlogs on transaction close.
66 # revlogs on transaction close.
67 #
67 #
68 # This is a bit dangerous. We could easily have a mismatch of state.
68 # This is a bit dangerous. We could easily have a mismatch of state.
69 newrl = revlog.revlog(
69 newrl = revlog.revlog(
70 rl.opener,
70 rl.opener,
71 target=rl.target,
71 target=rl.target,
72 radix=rl.radix,
72 radix=rl.radix,
73 postfix=b'tmpcensored',
73 postfix=b'tmpcensored',
74 censorable=True,
74 censorable=True,
75 )
75 )
76 newrl._format_version = rl._format_version
76 newrl._format_version = rl._format_version
77 newrl._format_flags = rl._format_flags
77 newrl._format_flags = rl._format_flags
78 newrl._generaldelta = rl._generaldelta
78 newrl._generaldelta = rl._generaldelta
79 newrl._parse_index = rl._parse_index
79 newrl._parse_index = rl._parse_index
80
80
81 for rev in rl.revs():
81 for rev in rl.revs():
82 node = rl.node(rev)
82 node = rl.node(rev)
83 p1, p2 = rl.parents(node)
83 p1, p2 = rl.parents(node)
84
84
85 if rev == censorrev:
85 if rev == censorrev:
86 newrl.addrawrevision(
86 newrl.addrawrevision(
87 tombstone,
87 tombstone,
88 tr,
88 tr,
89 rl.linkrev(censorrev),
89 rl.linkrev(censorrev),
90 p1,
90 p1,
91 p2,
91 p2,
92 censornode,
92 censornode,
93 constants.REVIDX_ISCENSORED,
93 constants.REVIDX_ISCENSORED,
94 )
94 )
95
95
96 if newrl.deltaparent(rev) != nullrev:
96 if newrl.deltaparent(rev) != nullrev:
97 m = _(b'censored revision stored as delta; cannot censor')
97 m = _(b'censored revision stored as delta; cannot censor')
98 h = _(
98 h = _(
99 b'censoring of revlogs is not fully implemented;'
99 b'censoring of revlogs is not fully implemented;'
100 b' please report this bug'
100 b' please report this bug'
101 )
101 )
102 raise error.Abort(m, hint=h)
102 raise error.Abort(m, hint=h)
103 continue
103 continue
104
104
105 if rl.iscensored(rev):
105 if rl.iscensored(rev):
106 if rl.deltaparent(rev) != nullrev:
106 if rl.deltaparent(rev) != nullrev:
107 m = _(
107 m = _(
108 b'cannot censor due to censored '
108 b'cannot censor due to censored '
109 b'revision having delta stored'
109 b'revision having delta stored'
110 )
110 )
111 raise error.Abort(m)
111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._chunk(rev)
113 else:
113 else:
114 rawtext = rl.rawdata(rev)
114 rawtext = rl.rawdata(rev)
115
115
116 newrl.addrawrevision(
116 newrl.addrawrevision(
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 )
118 )
119
119
120 tr.addbackup(rl._indexfile, location=b'store')
120 tr.addbackup(rl._indexfile, location=b'store')
121 if not rl._inline:
121 if not rl._inline:
122 tr.addbackup(rl._datafile, location=b'store')
122 tr.addbackup(rl._datafile, location=b'store')
123
123
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 if not rl._inline:
125 if not rl._inline:
126 rl.opener.rename(newrl._datafile, rl._datafile)
126 rl.opener.rename(newrl._datafile, rl._datafile)
127
127
128 rl.clearcaches()
128 rl.clearcaches()
129 rl._loadindex()
129 rl._loadindex()
130
130
131
131
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 """censors a revision in a "version 2" revlog"""
133 """censors a revision in a "version 2" revlog"""
134 assert revlog._format_version != REVLOGV0, revlog._format_version
134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
136
136
137 censor_revs = {revlog.rev(censornode)}
137 censor_revs = {revlog.rev(censornode)}
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139
139
140
140
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 """rewrite a revlog to censor some of its content
142 """rewrite a revlog to censor some of its content
143
143
144 General principle
144 General principle
145
145
146 We create new revlog files (index/data/sidedata) to copy the content of
146 We create new revlog files (index/data/sidedata) to copy the content of
147 the existing data without the censored data.
147 the existing data without the censored data.
148
148
149 We need to recompute new delta for any revision that used the censored
149 We need to recompute new delta for any revision that used the censored
150 revision as delta base. As the cumulative size of the new delta may be
150 revision as delta base. As the cumulative size of the new delta may be
151 large, we store them in a temporary file until they are stored in their
151 large, we store them in a temporary file until they are stored in their
152 final destination.
152 final destination.
153
153
154 All data before the censored data can be blindly copied. The rest needs
154 All data before the censored data can be blindly copied. The rest needs
155 to be copied as we go and the associated index entry needs adjustement.
155 to be copied as we go and the associated index entry needs adjustement.
156 """
156 """
157 assert revlog._format_version != REVLOGV0, revlog._format_version
157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
159
159
160 old_index = revlog.index
160 old_index = revlog.index
161 docket = revlog._docket
161 docket = revlog._docket
162
162
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164
164
165 first_excl_rev = min(censor_revs)
165 first_excl_rev = min(censor_revs)
166
166
167 first_excl_entry = revlog.index[first_excl_rev]
167 first_excl_entry = revlog.index[first_excl_rev]
168 index_cutoff = revlog.index.entry_size * first_excl_rev
168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171
171
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 rewritten_entries = _precompute_rewritten_delta(
174 rewritten_entries = _precompute_rewritten_delta(
175 revlog,
175 revlog,
176 old_index,
176 old_index,
177 censor_revs,
177 censor_revs,
178 tmp_storage,
178 tmp_storage,
179 )
179 )
180
180
181 all_files = _setup_new_files(
181 all_files = _setup_new_files(
182 revlog,
182 revlog,
183 index_cutoff,
183 index_cutoff,
184 data_cutoff,
184 data_cutoff,
185 sidedata_cutoff,
185 sidedata_cutoff,
186 )
186 )
187
187
188 # we dont need to open the old index file since its content already
188 # we dont need to open the old index file since its content already
189 # exist in a usable form in `old_index`.
189 # exist in a usable form in `old_index`.
190 with all_files() as open_files:
190 with all_files() as open_files:
191 (
191 (
192 old_data_file,
192 old_data_file,
193 old_sidedata_file,
193 old_sidedata_file,
194 new_index_file,
194 new_index_file,
195 new_data_file,
195 new_data_file,
196 new_sidedata_file,
196 new_sidedata_file,
197 ) = open_files
197 ) = open_files
198
198
199 # writing the censored revision
199 # writing the censored revision
200
200
201 # Writing all subsequent revisions
201 # Writing all subsequent revisions
202 for rev in range(first_excl_rev, len(old_index)):
202 for rev in range(first_excl_rev, len(old_index)):
203 if rev in censor_revs:
203 if rev in censor_revs:
204 _rewrite_censor(
204 _rewrite_censor(
205 revlog,
205 revlog,
206 old_index,
206 old_index,
207 open_files,
207 open_files,
208 rev,
208 rev,
209 tombstone,
209 tombstone,
210 )
210 )
211 else:
211 else:
212 _rewrite_simple(
212 _rewrite_simple(
213 revlog,
213 revlog,
214 old_index,
214 old_index,
215 open_files,
215 open_files,
216 rev,
216 rev,
217 rewritten_entries,
217 rewritten_entries,
218 tmp_storage,
218 tmp_storage,
219 )
219 )
220 docket.write(transaction=None, stripping=True)
220 docket.write(transaction=None, stripping=True)
221
221
222
222
223 def _precompute_rewritten_delta(
223 def _precompute_rewritten_delta(
224 revlog,
224 revlog,
225 old_index,
225 old_index,
226 excluded_revs,
226 excluded_revs,
227 tmp_storage,
227 tmp_storage,
228 ):
228 ):
229 """Compute new delta for revisions whose delta is based on revision that
229 """Compute new delta for revisions whose delta is based on revision that
230 will not survive as is.
230 will not survive as is.
231
231
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 """
233 """
234 dc = deltas.deltacomputer(revlog)
234 dc = deltas.deltacomputer(revlog)
235 rewritten_entries = {}
235 rewritten_entries = {}
236 first_excl_rev = min(excluded_revs)
236 first_excl_rev = min(excluded_revs)
237 with revlog._segmentfile._open_read() as dfh:
237 with revlog._segmentfile._open_read() as dfh:
238 for rev in range(first_excl_rev, len(old_index)):
238 for rev in range(first_excl_rev, len(old_index)):
239 if rev in excluded_revs:
239 if rev in excluded_revs:
240 # this revision will be preserved as is, so we don't need to
240 # this revision will be preserved as is, so we don't need to
241 # consider recomputing a delta.
241 # consider recomputing a delta.
242 continue
242 continue
243 entry = old_index[rev]
243 entry = old_index[rev]
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 continue
245 continue
246 # This is a revision that use the censored revision as the base
246 # This is a revision that use the censored revision as the base
247 # for its delta. We need a need new deltas
247 # for its delta. We need a need new deltas
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 # this revision is empty, we can delta against nullrev
249 # this revision is empty, we can delta against nullrev
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 else:
251 else:
252
252
253 text = revlog.rawdata(rev, _df=dfh)
253 text = revlog.rawdata(rev, _df=dfh)
254 info = revlogutils.revisioninfo(
254 info = revlogutils.revisioninfo(
255 node=entry[ENTRY_NODE_ID],
255 node=entry[ENTRY_NODE_ID],
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 btext=[text],
258 btext=[text],
259 textlen=len(text),
259 textlen=len(text),
260 cachedelta=None,
260 cachedelta=None,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 )
262 )
263 d = dc.finddeltainfo(
263 d = dc.finddeltainfo(
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 )
265 )
266 default_comp = revlog._docket.default_compression_header
266 default_comp = revlog._docket.default_compression_header
267 comp_mode, d = deltas.delta_compression(default_comp, d)
267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 # using `tell` is a bit lazy, but we are not here for speed
268 # using `tell` is a bit lazy, but we are not here for speed
269 start = tmp_storage.tell()
269 start = tmp_storage.tell()
270 tmp_storage.write(d.data[1])
270 tmp_storage.write(d.data[1])
271 end = tmp_storage.tell()
271 end = tmp_storage.tell()
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 return rewritten_entries
273 return rewritten_entries
274
274
275
275
276 def _setup_new_files(
276 def _setup_new_files(
277 revlog,
277 revlog,
278 index_cutoff,
278 index_cutoff,
279 data_cutoff,
279 data_cutoff,
280 sidedata_cutoff,
280 sidedata_cutoff,
281 ):
281 ):
282 """
282 """
283
283
284 return a context manager to open all the relevant files:
284 return a context manager to open all the relevant files:
285 - old_data_file,
285 - old_data_file,
286 - old_sidedata_file,
286 - old_sidedata_file,
287 - new_index_file,
287 - new_index_file,
288 - new_data_file,
288 - new_data_file,
289 - new_sidedata_file,
289 - new_sidedata_file,
290
290
291 The old_index_file is not here because it is accessed through the
291 The old_index_file is not here because it is accessed through the
292 `old_index` object if the caller function.
292 `old_index` object if the caller function.
293 """
293 """
294 docket = revlog._docket
294 docket = revlog._docket
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298
298
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302
302
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(
305 util.copyfile(
306 old_sidedata_filepath,
306 old_sidedata_filepath,
307 new_sidedata_filepath,
307 new_sidedata_filepath,
308 nb_bytes=sidedata_cutoff,
308 nb_bytes=sidedata_cutoff,
309 )
309 )
310 revlog.opener.register_file(docket.index_filepath())
310 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.data_filepath())
311 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
313
313
314 docket.index_end = index_cutoff
314 docket.index_end = index_cutoff
315 docket.data_end = data_cutoff
315 docket.data_end = data_cutoff
316 docket.sidedata_end = sidedata_cutoff
316 docket.sidedata_end = sidedata_cutoff
317
317
318 # reload the revlog internal information
318 # reload the revlog internal information
319 revlog.clearcaches()
319 revlog.clearcaches()
320 revlog._loadindex(docket=docket)
320 revlog._loadindex(docket=docket)
321
321
322 @contextlib.contextmanager
322 @contextlib.contextmanager
323 def all_files_opener():
323 def all_files_opener():
324 # hide opening in an helper function to please check-code, black
324 # hide opening in an helper function to please check-code, black
325 # and various python version at the same time
325 # and various python version at the same time
326 with open(old_data_filepath, 'rb') as old_data_file:
326 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(
330 with open(
331 new_sidedata_filepath, 'r+b'
331 new_sidedata_filepath, 'r+b'
332 ) as new_sidedata_file:
332 ) as new_sidedata_file:
333 new_index_file.seek(0, os.SEEK_END)
333 new_index_file.seek(0, os.SEEK_END)
334 assert new_index_file.tell() == index_cutoff
334 assert new_index_file.tell() == index_cutoff
335 new_data_file.seek(0, os.SEEK_END)
335 new_data_file.seek(0, os.SEEK_END)
336 assert new_data_file.tell() == data_cutoff
336 assert new_data_file.tell() == data_cutoff
337 new_sidedata_file.seek(0, os.SEEK_END)
337 new_sidedata_file.seek(0, os.SEEK_END)
338 assert new_sidedata_file.tell() == sidedata_cutoff
338 assert new_sidedata_file.tell() == sidedata_cutoff
339 yield (
339 yield (
340 old_data_file,
340 old_data_file,
341 old_sidedata_file,
341 old_sidedata_file,
342 new_index_file,
342 new_index_file,
343 new_data_file,
343 new_data_file,
344 new_sidedata_file,
344 new_sidedata_file,
345 )
345 )
346
346
347 return all_files_opener
347 return all_files_opener
348
348
349
349
350 def _rewrite_simple(
350 def _rewrite_simple(
351 revlog,
351 revlog,
352 old_index,
352 old_index,
353 all_files,
353 all_files,
354 rev,
354 rev,
355 rewritten_entries,
355 rewritten_entries,
356 tmp_storage,
356 tmp_storage,
357 ):
357 ):
358 """append a normal revision to the index after the rewritten one(s)"""
358 """append a normal revision to the index after the rewritten one(s)"""
359 (
359 (
360 old_data_file,
360 old_data_file,
361 old_sidedata_file,
361 old_sidedata_file,
362 new_index_file,
362 new_index_file,
363 new_data_file,
363 new_data_file,
364 new_sidedata_file,
364 new_sidedata_file,
365 ) = all_files
365 ) = all_files
366 entry = old_index[rev]
366 entry = old_index[rev]
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369
369
370 if rev not in rewritten_entries:
370 if rev not in rewritten_entries:
371 old_data_file.seek(old_data_offset)
371 old_data_file.seek(old_data_offset)
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data = old_data_file.read(new_data_size)
373 new_data = old_data_file.read(new_data_size)
374 data_delta_base = entry[ENTRY_DELTA_BASE]
374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 else:
376 else:
377 (
377 (
378 data_delta_base,
378 data_delta_base,
379 start,
379 start,
380 end,
380 end,
381 d_comp_mode,
381 d_comp_mode,
382 ) = rewritten_entries[rev]
382 ) = rewritten_entries[rev]
383 new_data_size = end - start
383 new_data_size = end - start
384 tmp_storage.seek(start)
384 tmp_storage.seek(start)
385 new_data = tmp_storage.read(new_data_size)
385 new_data = tmp_storage.read(new_data_size)
386
386
387 # It might be faster to group continuous read/write operation,
387 # It might be faster to group continuous read/write operation,
388 # however, this is censor, an operation that is not focussed
388 # however, this is censor, an operation that is not focussed
389 # around stellar performance. So I have not written this
389 # around stellar performance. So I have not written this
390 # optimisation yet.
390 # optimisation yet.
391 new_data_offset = new_data_file.tell()
391 new_data_offset = new_data_file.tell()
392 new_data_file.write(new_data)
392 new_data_file.write(new_data)
393
393
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 new_sidedata_offset = new_sidedata_file.tell()
395 new_sidedata_offset = new_sidedata_file.tell()
396 if 0 < sidedata_size:
396 if 0 < sidedata_size:
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_file.seek(old_sidedata_offset)
398 old_sidedata_file.seek(old_sidedata_offset)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata_file.write(new_sidedata)
400 new_sidedata_file.write(new_sidedata)
401
401
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 assert data_delta_base <= rev, (data_delta_base, rev)
404 assert data_delta_base <= rev, (data_delta_base, rev)
405
405
406 new_entry = revlogutils.entry(
406 new_entry = revlogutils.entry(
407 flags=flags,
407 flags=flags,
408 data_offset=new_data_offset,
408 data_offset=new_data_offset,
409 data_compressed_length=new_data_size,
409 data_compressed_length=new_data_size,
410 data_uncompressed_length=data_uncompressed_length,
410 data_uncompressed_length=data_uncompressed_length,
411 data_delta_base=data_delta_base,
411 data_delta_base=data_delta_base,
412 link_rev=entry[ENTRY_LINK_REV],
412 link_rev=entry[ENTRY_LINK_REV],
413 parent_rev_1=entry[ENTRY_PARENT_1],
413 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_2=entry[ENTRY_PARENT_2],
414 parent_rev_2=entry[ENTRY_PARENT_2],
415 node_id=entry[ENTRY_NODE_ID],
415 node_id=entry[ENTRY_NODE_ID],
416 sidedata_offset=new_sidedata_offset,
416 sidedata_offset=new_sidedata_offset,
417 sidedata_compressed_length=sidedata_size,
417 sidedata_compressed_length=sidedata_size,
418 data_compression_mode=d_comp_mode,
418 data_compression_mode=d_comp_mode,
419 sidedata_compression_mode=sd_com_mode,
419 sidedata_compression_mode=sd_com_mode,
420 )
420 )
421 revlog.index.append(new_entry)
421 revlog.index.append(new_entry)
422 entry_bin = revlog.index.entry_binary(rev)
422 entry_bin = revlog.index.entry_binary(rev)
423 new_index_file.write(entry_bin)
423 new_index_file.write(entry_bin)
424
424
425 revlog._docket.index_end = new_index_file.tell()
425 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428
428
429
429
430 def _rewrite_censor(
430 def _rewrite_censor(
431 revlog,
431 revlog,
432 old_index,
432 old_index,
433 all_files,
433 all_files,
434 rev,
434 rev,
435 tombstone,
435 tombstone,
436 ):
436 ):
437 """rewrite and append a censored revision"""
437 """rewrite and append a censored revision"""
438 (
438 (
439 old_data_file,
439 old_data_file,
440 old_sidedata_file,
440 old_sidedata_file,
441 new_index_file,
441 new_index_file,
442 new_data_file,
442 new_data_file,
443 new_sidedata_file,
443 new_sidedata_file,
444 ) = all_files
444 ) = all_files
445 entry = old_index[rev]
445 entry = old_index[rev]
446
446
447 # XXX consider trying the default compression too
447 # XXX consider trying the default compression too
448 new_data_size = len(tombstone)
448 new_data_size = len(tombstone)
449 new_data_offset = new_data_file.tell()
449 new_data_offset = new_data_file.tell()
450 new_data_file.write(tombstone)
450 new_data_file.write(tombstone)
451
451
452 # we are not adding any sidedata as they might leak info about the censored version
452 # we are not adding any sidedata as they might leak info about the censored version
453
453
454 link_rev = entry[ENTRY_LINK_REV]
454 link_rev = entry[ENTRY_LINK_REV]
455
455
456 p1 = entry[ENTRY_PARENT_1]
456 p1 = entry[ENTRY_PARENT_1]
457 p2 = entry[ENTRY_PARENT_2]
457 p2 = entry[ENTRY_PARENT_2]
458
458
459 new_entry = revlogutils.entry(
459 new_entry = revlogutils.entry(
460 flags=constants.REVIDX_ISCENSORED,
460 flags=constants.REVIDX_ISCENSORED,
461 data_offset=new_data_offset,
461 data_offset=new_data_offset,
462 data_compressed_length=new_data_size,
462 data_compressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
464 data_delta_base=rev,
464 data_delta_base=rev,
465 link_rev=link_rev,
465 link_rev=link_rev,
466 parent_rev_1=p1,
466 parent_rev_1=p1,
467 parent_rev_2=p2,
467 parent_rev_2=p2,
468 node_id=entry[ENTRY_NODE_ID],
468 node_id=entry[ENTRY_NODE_ID],
469 sidedata_offset=0,
469 sidedata_offset=0,
470 sidedata_compressed_length=0,
470 sidedata_compressed_length=0,
471 data_compression_mode=COMP_MODE_PLAIN,
471 data_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 )
473 )
474 revlog.index.append(new_entry)
474 revlog.index.append(new_entry)
475 entry_bin = revlog.index.entry_binary(rev)
475 entry_bin = revlog.index.entry_binary(rev)
476 new_index_file.write(entry_bin)
476 new_index_file.write(entry_bin)
477 revlog._docket.index_end = new_index_file.tell()
477 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
479
479
480
480
481 def _get_filename_from_filelog_index(path):
481 def _get_filename_from_filelog_index(path):
482 # Drop the extension and the `data/` prefix
482 # Drop the extension and the `data/` prefix
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 if len(path_part) < 2:
484 if len(path_part) < 2:
485 msg = _(b"cannot recognize filelog from filename: '%s'")
485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg %= path
486 msg %= path
487 raise error.Abort(msg)
487 raise error.Abort(msg)
488
488
489 return path_part[1]
489 return path_part[1]
490
490
491
491
492 def _filelog_from_filename(repo, path):
492 def _filelog_from_filename(repo, path):
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494
494
495 from .. import filelog # avoid cycle
495 from .. import filelog # avoid cycle
496
496
497 fl = filelog.filelog(repo.svfs, path)
497 fl = filelog.filelog(repo.svfs, path)
498 return fl
498 return fl
499
499
500
500
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 from ..pure import parsers # avoid cycle
503 from ..pure import parsers # avoid cycle
504
504
505 if repo._currentlock(repo._lockref) is None:
505 if repo._currentlock(repo._lockref) is None:
506 # Let's be paranoid about it
506 # Let's be paranoid about it
507 msg = "repo needs to be locked to rewrite parents"
507 msg = "repo needs to be locked to rewrite parents"
508 raise error.ProgrammingError(msg)
508 raise error.ProgrammingError(msg)
509
509
510 index_format = parsers.IndexObject.index_format
510 index_format = parsers.IndexObject.index_format
511 entry = rl.index[rev]
511 entry = rl.index[rev]
512 new_entry = list(entry)
512 new_entry = list(entry)
513 new_entry[5], new_entry[6] = entry[6], entry[5]
513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 packed = index_format.pack(*new_entry[:8])
514 packed = index_format.pack(*new_entry[:8])
515 fp.seek(offset)
515 fp.seek(offset)
516 fp.write(packed)
516 fp.write(packed)
517
517
518
518
519 def _reorder_filelog_parents(repo, fl, to_fix):
519 def _reorder_filelog_parents(repo, fl, to_fix):
520 """
520 """
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 new version to disk, overwriting the old one with a rename.
522 new version to disk, overwriting the old one with a rename.
523 """
523 """
524 from ..pure import parsers # avoid cycle
524 from ..pure import parsers # avoid cycle
525
525
526 ui = repo.ui
526 ui = repo.ui
527 assert len(to_fix) > 0
527 assert len(to_fix) > 0
528 rl = fl._revlog
528 rl = fl._revlog
529 if rl._format_version != constants.REVLOGV1:
529 if rl._format_version != constants.REVLOGV1:
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 raise error.ProgrammingError(msg)
531 raise error.ProgrammingError(msg)
532
532
533 index_file = rl._indexfile
533 index_file = rl._indexfile
534 new_file_path = index_file + b'.tmp-parents-fix'
534 new_file_path = index_file + b'.tmp-parents-fix'
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536
536
537 with ui.uninterruptible():
537 with ui.uninterruptible():
538 try:
538 try:
539 util.copyfile(
539 util.copyfile(
540 rl.opener.join(index_file),
540 rl.opener.join(index_file),
541 rl.opener.join(new_file_path),
541 rl.opener.join(new_file_path),
542 checkambig=rl._checkambig,
542 checkambig=rl._checkambig,
543 )
543 )
544
544
545 with rl.opener(new_file_path, mode=b"r+") as fp:
545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 if rl._inline:
546 if rl._inline:
547 index = parsers.InlinedIndexObject(fp.read())
547 index = parsers.InlinedIndexObject(fp.read())
548 for rev in fl.revs():
548 for rev in fl.revs():
549 if rev in to_fix:
549 if rev in to_fix:
550 offset = index._calculate_index(rev)
550 offset = index._calculate_index(rev)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 ui.write(repaired_msg % (rev, index_file))
552 ui.write(repaired_msg % (rev, index_file))
553 else:
553 else:
554 index_format = parsers.IndexObject.index_format
554 index_format = parsers.IndexObject.index_format
555 for rev in to_fix:
555 for rev in to_fix:
556 offset = rev * index_format.size
556 offset = rev * index_format.size
557 _write_swapped_parents(repo, rl, rev, offset, fp)
557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 ui.write(repaired_msg % (rev, index_file))
558 ui.write(repaired_msg % (rev, index_file))
559
559
560 rl.opener.rename(new_file_path, index_file)
560 rl.opener.rename(new_file_path, index_file)
561 rl.clearcaches()
561 rl.clearcaches()
562 rl._loadindex()
562 rl._loadindex()
563 finally:
563 finally:
564 util.tryunlink(new_file_path)
564 util.tryunlink(new_file_path)
565
565
566
566
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 full_text = lambda: fl._revlog.rawdata(filerev)
568 full_text = lambda: fl._revlog.rawdata(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 return _is_revision_affected_inner(
570 return _is_revision_affected_inner(
571 full_text, parent_revs, filerev, metadata_cache
571 full_text, parent_revs, filerev, metadata_cache
572 )
572 )
573
573
574
574
575 def _is_revision_affected_inner(
575 def _is_revision_affected_inner(
576 full_text,
576 full_text,
577 parents_revs,
577 parents_revs,
578 filerev,
578 filerev,
579 metadata_cache=None,
579 metadata_cache=None,
580 ):
580 ):
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 special meaning compared to the reverse in the context of filelog-based
582 special meaning compared to the reverse in the context of filelog-based
583 copytracing. issue6528 exists because new code assumed that parent ordering
583 copytracing. issue6528 exists because new code assumed that parent ordering
584 didn't matter, so this detects if the revision contains metadata (since
584 didn't matter, so this detects if the revision contains metadata (since
585 it's only used for filelog-based copytracing) and its parents are in the
585 it's only used for filelog-based copytracing) and its parents are in the
586 "wrong" order."""
586 "wrong" order."""
587 try:
587 try:
588 raw_text = full_text()
588 raw_text = full_text()
589 except error.CensoredNodeError:
589 except error.CensoredNodeError:
590 # We don't care about censored nodes as they never carry metadata
590 # We don't care about censored nodes as they never carry metadata
591 return False
591 return False
592 has_meta = raw_text.startswith(b'\x01\n')
592 has_meta = raw_text.startswith(b'\x01\n')
593 if metadata_cache is not None:
593 if metadata_cache is not None:
594 metadata_cache[filerev] = has_meta
594 metadata_cache[filerev] = has_meta
595 if has_meta:
595 if has_meta:
596 (p1, p2) = parents_revs()
596 (p1, p2) = parents_revs()
597 if p1 != nullrev and p2 == nullrev:
597 if p1 != nullrev and p2 == nullrev:
598 return True
598 return True
599 return False
599 return False
600
600
601
601
602 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
602 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
603 rl = fl._revlog
603 rl = fl._revlog
604 is_censored = lambda: rl.iscensored(filerev)
604 is_censored = lambda: rl.iscensored(filerev)
605 delta_base = lambda: rl.deltaparent(filerev)
605 delta_base = lambda: rl.deltaparent(filerev)
606 delta = lambda: rl._chunk(filerev)
606 delta = lambda: rl._chunk(filerev)
607 full_text = lambda: rl.rawdata(filerev)
607 full_text = lambda: rl.rawdata(filerev)
608 parent_revs = lambda: rl.parentrevs(filerev)
608 parent_revs = lambda: rl.parentrevs(filerev)
609 return _is_revision_affected_fast_inner(
609 return _is_revision_affected_fast_inner(
610 is_censored,
610 is_censored,
611 delta_base,
611 delta_base,
612 delta,
612 delta,
613 full_text,
613 full_text,
614 parent_revs,
614 parent_revs,
615 filerev,
615 filerev,
616 metadata_cache,
616 metadata_cache,
617 )
617 )
618
618
619
619
620 def _is_revision_affected_fast_inner(
620 def _is_revision_affected_fast_inner(
621 is_censored,
621 is_censored,
622 delta_base,
622 delta_base,
623 delta,
623 delta,
624 full_text,
624 full_text,
625 parent_revs,
625 parent_revs,
626 filerev,
626 filerev,
627 metadata_cache,
627 metadata_cache,
628 ):
628 ):
629 """Optimization fast-path for `_is_revision_affected`.
629 """Optimization fast-path for `_is_revision_affected`.
630
630
631 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
631 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
632 revision to check if its base has metadata, saving computation of the full
632 revision to check if its base has metadata, saving computation of the full
633 text, instead looking at the current delta.
633 text, instead looking at the current delta.
634
634
635 This optimization only works if the revisions are looked at in order."""
635 This optimization only works if the revisions are looked at in order."""
636
636
637 if is_censored():
637 if is_censored():
638 # Censored revisions don't contain metadata, so they cannot be affected
638 # Censored revisions don't contain metadata, so they cannot be affected
639 metadata_cache[filerev] = False
639 metadata_cache[filerev] = False
640 return False
640 return False
641
641
642 p1, p2 = parent_revs()
642 p1, p2 = parent_revs()
643 if p1 == nullrev or p2 != nullrev:
643 if p1 == nullrev or p2 != nullrev:
644 return False
644 return False
645
645
646 delta_parent = delta_base()
646 delta_parent = delta_base()
647 parent_has_metadata = metadata_cache.get(delta_parent)
647 parent_has_metadata = metadata_cache.get(delta_parent)
648 if parent_has_metadata is None:
648 if parent_has_metadata is None:
649 return _is_revision_affected_inner(
649 return _is_revision_affected_inner(
650 full_text,
650 full_text,
651 parent_revs,
651 parent_revs,
652 filerev,
652 filerev,
653 metadata_cache,
653 metadata_cache,
654 )
654 )
655
655
656 chunk = delta()
656 chunk = delta()
657 if not len(chunk):
657 if not len(chunk):
658 # No diff for this revision
658 # No diff for this revision
659 return parent_has_metadata
659 return parent_has_metadata
660
660
661 header_length = 12
661 header_length = 12
662 if len(chunk) < header_length:
662 if len(chunk) < header_length:
663 raise error.Abort(_(b"patch cannot be decoded"))
663 raise error.Abort(_(b"patch cannot be decoded"))
664
664
665 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
665 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
666
666
667 if start < 2: # len(b'\x01\n') == 2
667 if start < 2: # len(b'\x01\n') == 2
668 # This delta does *something* to the metadata marker (if any).
668 # This delta does *something* to the metadata marker (if any).
669 # Check it the slow way
669 # Check it the slow way
670 is_affected = _is_revision_affected_inner(
670 is_affected = _is_revision_affected_inner(
671 full_text,
671 full_text,
672 parent_revs,
672 parent_revs,
673 filerev,
673 filerev,
674 metadata_cache,
674 metadata_cache,
675 )
675 )
676 return is_affected
676 return is_affected
677
677
678 # The diff did not remove or add the metadata header, it's then in the same
678 # The diff did not remove or add the metadata header, it's then in the same
679 # situation as its parent
679 # situation as its parent
680 metadata_cache[filerev] = parent_has_metadata
680 metadata_cache[filerev] = parent_has_metadata
681 return parent_has_metadata
681 return parent_has_metadata
682
682
683
683
684 def _from_report(ui, repo, context, from_report, dry_run):
684 def _from_report(ui, repo, context, from_report, dry_run):
685 """
685 """
686 Fix the revisions given in the `from_report` file, but still checks if the
686 Fix the revisions given in the `from_report` file, but still checks if the
687 revisions are indeed affected to prevent an unfortunate cyclic situation
687 revisions are indeed affected to prevent an unfortunate cyclic situation
688 where we'd swap well-ordered parents again.
688 where we'd swap well-ordered parents again.
689
689
690 See the doc for `debug_fix_issue6528` for the format documentation.
690 See the doc for `debug_fix_issue6528` for the format documentation.
691 """
691 """
692 ui.write(_(b"loading report file '%s'\n") % from_report)
692 ui.write(_(b"loading report file '%s'\n") % from_report)
693
693
694 with context(), open(from_report, mode='rb') as f:
694 with context(), open(from_report, mode='rb') as f:
695 for line in f.read().split(b'\n'):
695 for line in f.read().split(b'\n'):
696 if not line:
696 if not line:
697 continue
697 continue
698 filenodes, filename = line.split(b' ', 1)
698 filenodes, filename = line.split(b' ', 1)
699 fl = _filelog_from_filename(repo, filename)
699 fl = _filelog_from_filename(repo, filename)
700 to_fix = set(
700 to_fix = set(
701 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
701 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
702 )
702 )
703 excluded = set()
703 excluded = set()
704
704
705 for filerev in to_fix:
705 for filerev in to_fix:
706 if _is_revision_affected(fl, filerev):
706 if _is_revision_affected(fl, filerev):
707 msg = b"found affected revision %d for filelog '%s'\n"
707 msg = b"found affected revision %d for filelog '%s'\n"
708 ui.warn(msg % (filerev, filename))
708 ui.warn(msg % (filerev, filename))
709 else:
709 else:
710 msg = _(b"revision %s of file '%s' is not affected\n")
710 msg = _(b"revision %s of file '%s' is not affected\n")
711 msg %= (binascii.hexlify(fl.node(filerev)), filename)
711 msg %= (binascii.hexlify(fl.node(filerev)), filename)
712 ui.warn(msg)
712 ui.warn(msg)
713 excluded.add(filerev)
713 excluded.add(filerev)
714
714
715 to_fix = to_fix - excluded
715 to_fix = to_fix - excluded
716 if not to_fix:
716 if not to_fix:
717 msg = _(b"no affected revisions were found for '%s'\n")
717 msg = _(b"no affected revisions were found for '%s'\n")
718 ui.write(msg % filename)
718 ui.write(msg % filename)
719 continue
719 continue
720 if not dry_run:
720 if not dry_run:
721 _reorder_filelog_parents(repo, fl, sorted(to_fix))
721 _reorder_filelog_parents(repo, fl, sorted(to_fix))
722
722
723
723
724 def filter_delta_issue6528(revlog, deltas_iter):
724 def filter_delta_issue6528(revlog, deltas_iter):
725 """filter incomind deltas to repaire issue 6528 on the fly"""
725 """filter incomind deltas to repaire issue 6528 on the fly"""
726 metadata_cache = {}
726 metadata_cache = {}
727
727
728 deltacomputer = deltas.deltacomputer(revlog)
728 deltacomputer = deltas.deltacomputer(revlog)
729
729
730 for rev, d in enumerate(deltas_iter, len(revlog)):
730 for rev, d in enumerate(deltas_iter, len(revlog)):
731 (
731 (
732 node,
732 node,
733 p1_node,
733 p1_node,
734 p2_node,
734 p2_node,
735 linknode,
735 linknode,
736 deltabase,
736 deltabase,
737 delta,
737 delta,
738 flags,
738 flags,
739 sidedata,
739 sidedata,
740 ) = d
740 ) = d
741
741
742 if not revlog.index.has_node(deltabase):
742 if not revlog.index.has_node(deltabase):
743 raise error.LookupError(
743 raise error.LookupError(
744 deltabase, revlog.radix, _(b'unknown parent')
744 deltabase, revlog.radix, _(b'unknown parent')
745 )
745 )
746 base_rev = revlog.rev(deltabase)
746 base_rev = revlog.rev(deltabase)
747 if not revlog.index.has_node(p1_node):
747 if not revlog.index.has_node(p1_node):
748 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
748 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
749 p1_rev = revlog.rev(p1_node)
749 p1_rev = revlog.rev(p1_node)
750 if not revlog.index.has_node(p2_node):
750 if not revlog.index.has_node(p2_node):
751 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
751 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
752 p2_rev = revlog.rev(p2_node)
752 p2_rev = revlog.rev(p2_node)
753
753
754 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
754 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
755 delta_base = lambda: revlog.rev(delta_base)
755 delta_base = lambda: revlog.rev(delta_base)
756 delta_base = lambda: base_rev
756 delta_base = lambda: base_rev
757 parent_revs = lambda: (p1_rev, p2_rev)
757 parent_revs = lambda: (p1_rev, p2_rev)
758
758
759 def full_text():
759 def full_text():
760 # note: being able to reuse the full text computation in the
760 # note: being able to reuse the full text computation in the
761 # underlying addrevision would be useful however this is a bit too
761 # underlying addrevision would be useful however this is a bit too
762 # intrusive the for the "quick" issue6528 we are writing before the
762 # intrusive the for the "quick" issue6528 we are writing before the
763 # 5.8 release
763 # 5.8 release
764 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
764 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
765
765
766 revinfo = revlogutils.revisioninfo(
766 revinfo = revlogutils.revisioninfo(
767 node,
767 node,
768 p1_node,
768 p1_node,
769 p2_node,
769 p2_node,
770 [None],
770 [None],
771 textlen,
771 textlen,
772 (base_rev, delta),
772 (base_rev, delta),
773 flags,
773 flags,
774 )
774 )
775 # cached by the global "writing" context
775 # cached by the global "writing" context
776 assert revlog._writinghandles is not None
776 assert revlog._writinghandles is not None
777 if revlog._inline:
777 if revlog._inline:
778 fh = revlog._writinghandles[0]
778 fh = revlog._writinghandles[0]
779 else:
779 else:
780 fh = revlog._writinghandles[1]
780 fh = revlog._writinghandles[1]
781 return deltacomputer.buildtext(revinfo, fh)
781 return deltacomputer.buildtext(revinfo, fh)
782
782
783 is_affected = _is_revision_affected_fast_inner(
783 is_affected = _is_revision_affected_fast_inner(
784 is_censored,
784 is_censored,
785 delta_base,
785 delta_base,
786 lambda: delta,
786 lambda: delta,
787 full_text,
787 full_text,
788 parent_revs,
788 parent_revs,
789 rev,
789 rev,
790 metadata_cache,
790 metadata_cache,
791 )
791 )
792 if is_affected:
792 if is_affected:
793 d = (
793 d = (
794 node,
794 node,
795 p2_node,
795 p2_node,
796 p1_node,
796 p1_node,
797 linknode,
797 linknode,
798 deltabase,
798 deltabase,
799 delta,
799 delta,
800 flags,
800 flags,
801 sidedata,
801 sidedata,
802 )
802 )
803 yield d
803 yield d
804
804
805
805
806 def repair_issue6528(
806 def repair_issue6528(
807 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
807 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
808 ):
808 ):
809 from .. import store # avoid cycle
809 from .. import store # avoid cycle
810
810
811 @contextlib.contextmanager
811 @contextlib.contextmanager
812 def context():
812 def context():
813 if dry_run or to_report: # No need for locking
813 if dry_run or to_report: # No need for locking
814 yield
814 yield
815 else:
815 else:
816 with repo.wlock(), repo.lock():
816 with repo.wlock(), repo.lock():
817 yield
817 yield
818
818
819 if from_report:
819 if from_report:
820 return _from_report(ui, repo, context, from_report, dry_run)
820 return _from_report(ui, repo, context, from_report, dry_run)
821
821
822 report_entries = []
822 report_entries = []
823
823
824 with context():
824 with context():
825 files = list(
825 files = list(
826 (file_type, path)
826 (file_type, path)
827 for (file_type, path, _e, _s) in repo.store.datafiles()
827 for (file_type, path, _s) in repo.store.datafiles()
828 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
828 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
829 )
829 )
830
830
831 progress = ui.makeprogress(
831 progress = ui.makeprogress(
832 _(b"looking for affected revisions"),
832 _(b"looking for affected revisions"),
833 unit=_(b"filelogs"),
833 unit=_(b"filelogs"),
834 total=len(files),
834 total=len(files),
835 )
835 )
836 found_nothing = True
836 found_nothing = True
837
837
838 for file_type, path in files:
838 for file_type, path in files:
839 if (
839 if (
840 not path.endswith(b'.i')
840 not path.endswith(b'.i')
841 or not file_type & store.FILEFLAGS_FILELOG
841 or not file_type & store.FILEFLAGS_FILELOG
842 ):
842 ):
843 continue
843 continue
844 progress.increment()
844 progress.increment()
845 filename = _get_filename_from_filelog_index(path)
845 filename = _get_filename_from_filelog_index(path)
846 fl = _filelog_from_filename(repo, filename)
846 fl = _filelog_from_filename(repo, filename)
847
847
848 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
848 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
849 to_fix = set()
849 to_fix = set()
850 metadata_cache = {}
850 metadata_cache = {}
851 for filerev in fl.revs():
851 for filerev in fl.revs():
852 affected = _is_revision_affected_fast(
852 affected = _is_revision_affected_fast(
853 repo, fl, filerev, metadata_cache
853 repo, fl, filerev, metadata_cache
854 )
854 )
855 if paranoid:
855 if paranoid:
856 slow = _is_revision_affected(fl, filerev)
856 slow = _is_revision_affected(fl, filerev)
857 if slow != affected:
857 if slow != affected:
858 msg = _(b"paranoid check failed for '%s' at node %s")
858 msg = _(b"paranoid check failed for '%s' at node %s")
859 node = binascii.hexlify(fl.node(filerev))
859 node = binascii.hexlify(fl.node(filerev))
860 raise error.Abort(msg % (filename, node))
860 raise error.Abort(msg % (filename, node))
861 if affected:
861 if affected:
862 msg = b"found affected revision %d for filelog '%s'\n"
862 msg = b"found affected revision %d for filelog '%s'\n"
863 ui.warn(msg % (filerev, path))
863 ui.warn(msg % (filerev, path))
864 found_nothing = False
864 found_nothing = False
865 if not dry_run:
865 if not dry_run:
866 if to_report:
866 if to_report:
867 to_fix.add(binascii.hexlify(fl.node(filerev)))
867 to_fix.add(binascii.hexlify(fl.node(filerev)))
868 else:
868 else:
869 to_fix.add(filerev)
869 to_fix.add(filerev)
870
870
871 if to_fix:
871 if to_fix:
872 to_fix = sorted(to_fix)
872 to_fix = sorted(to_fix)
873 if to_report:
873 if to_report:
874 report_entries.append((filename, to_fix))
874 report_entries.append((filename, to_fix))
875 else:
875 else:
876 _reorder_filelog_parents(repo, fl, to_fix)
876 _reorder_filelog_parents(repo, fl, to_fix)
877
877
878 if found_nothing:
878 if found_nothing:
879 ui.write(_(b"no affected revisions were found\n"))
879 ui.write(_(b"no affected revisions were found\n"))
880
880
881 if to_report and report_entries:
881 if to_report and report_entries:
882 with open(to_report, mode="wb") as f:
882 with open(to_report, mode="wb") as f:
883 for path, to_fix in report_entries:
883 for path, to_fix in report_entries:
884 f.write(b"%s %s\n" % (b",".join(to_fix), path))
884 f.write(b"%s %s\n" % (b",".join(to_fix), path))
885
885
886 progress.complete()
886 progress.complete()
@@ -1,839 +1,850 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import functools
11 import functools
12 import os
12 import os
13 import re
13 import re
14 import stat
14 import stat
15
15
16 from .i18n import _
16 from .i18n import _
17 from .pycompat import getattr
17 from .pycompat import getattr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in pycompat.iteritems(cmap):
148 for k, v in pycompat.iteritems(cmap):
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in pycompat.xrange(1, 4):
154 for l in pycompat.xrange(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join(
165 lambda s: b''.join(
166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
167 ),
167 ),
168 lambda s: b''.join(list(decode(s))),
168 lambda s: b''.join(list(decode(s))),
169 )
169 )
170
170
171
171
172 _encodefname, _decodefname = _buildencodefun()
172 _encodefname, _decodefname = _buildencodefun()
173
173
174
174
175 def encodefilename(s):
175 def encodefilename(s):
176 """
176 """
177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
179 """
179 """
180 return _encodefname(encodedir(s))
180 return _encodefname(encodedir(s))
181
181
182
182
183 def decodefilename(s):
183 def decodefilename(s):
184 """
184 """
185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
187 """
187 """
188 return decodedir(_decodefname(s))
188 return decodedir(_decodefname(s))
189
189
190
190
191 def _buildlowerencodefun():
191 def _buildlowerencodefun():
192 """
192 """
193 >>> f = _buildlowerencodefun()
193 >>> f = _buildlowerencodefun()
194 >>> f(b'nothing/special.txt')
194 >>> f(b'nothing/special.txt')
195 'nothing/special.txt'
195 'nothing/special.txt'
196 >>> f(b'HELLO')
196 >>> f(b'HELLO')
197 'hello'
197 'hello'
198 >>> f(b'hello:world?')
198 >>> f(b'hello:world?')
199 'hello~3aworld~3f'
199 'hello~3aworld~3f'
200 >>> f(b'the\\x07quick\\xADshot')
200 >>> f(b'the\\x07quick\\xADshot')
201 'the~07quick~adshot'
201 'the~07quick~adshot'
202 """
202 """
203 xchr = pycompat.bytechr
203 xchr = pycompat.bytechr
204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
205 for x in _reserved():
205 for x in _reserved():
206 cmap[xchr(x)] = b"~%02x" % x
206 cmap[xchr(x)] = b"~%02x" % x
207 for x in range(ord(b"A"), ord(b"Z") + 1):
207 for x in range(ord(b"A"), ord(b"Z") + 1):
208 cmap[xchr(x)] = xchr(x).lower()
208 cmap[xchr(x)] = xchr(x).lower()
209
209
210 def lowerencode(s):
210 def lowerencode(s):
211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
212
212
213 return lowerencode
213 return lowerencode
214
214
215
215
216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
217
217
218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
221
221
222
222
223 def _auxencode(path, dotencode):
223 def _auxencode(path, dotencode):
224 """
224 """
225 Encodes filenames containing names reserved by Windows or which end in
225 Encodes filenames containing names reserved by Windows or which end in
226 period or space. Does not touch other single reserved characters c.
226 period or space. Does not touch other single reserved characters c.
227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
228 Additionally encodes space or period at the beginning, if dotencode is
228 Additionally encodes space or period at the beginning, if dotencode is
229 True. Parameter path is assumed to be all lowercase.
229 True. Parameter path is assumed to be all lowercase.
230 A segment only needs encoding if a reserved name appears as a
230 A segment only needs encoding if a reserved name appears as a
231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
232 doesn't need encoding.
232 doesn't need encoding.
233
233
234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
235 >>> _auxencode(s.split(b'/'), True)
235 >>> _auxencode(s.split(b'/'), True)
236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
238 >>> _auxencode(s.split(b'/'), False)
238 >>> _auxencode(s.split(b'/'), False)
239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
240 >>> _auxencode([b'foo. '], True)
240 >>> _auxencode([b'foo. '], True)
241 ['foo.~20']
241 ['foo.~20']
242 >>> _auxencode([b' .foo'], True)
242 >>> _auxencode([b' .foo'], True)
243 ['~20.foo']
243 ['~20.foo']
244 """
244 """
245 for i, n in enumerate(path):
245 for i, n in enumerate(path):
246 if not n:
246 if not n:
247 continue
247 continue
248 if dotencode and n[0] in b'. ':
248 if dotencode and n[0] in b'. ':
249 n = b"~%02x" % ord(n[0:1]) + n[1:]
249 n = b"~%02x" % ord(n[0:1]) + n[1:]
250 path[i] = n
250 path[i] = n
251 else:
251 else:
252 l = n.find(b'.')
252 l = n.find(b'.')
253 if l == -1:
253 if l == -1:
254 l = len(n)
254 l = len(n)
255 if (l == 3 and n[:3] in _winres3) or (
255 if (l == 3 and n[:3] in _winres3) or (
256 l == 4
256 l == 4
257 and n[3:4] <= b'9'
257 and n[3:4] <= b'9'
258 and n[3:4] >= b'1'
258 and n[3:4] >= b'1'
259 and n[:3] in _winres4
259 and n[:3] in _winres4
260 ):
260 ):
261 # encode third letter ('aux' -> 'au~78')
261 # encode third letter ('aux' -> 'au~78')
262 ec = b"~%02x" % ord(n[2:3])
262 ec = b"~%02x" % ord(n[2:3])
263 n = n[0:2] + ec + n[3:]
263 n = n[0:2] + ec + n[3:]
264 path[i] = n
264 path[i] = n
265 if n[-1] in b'. ':
265 if n[-1] in b'. ':
266 # encode last period or space ('foo...' -> 'foo..~2e')
266 # encode last period or space ('foo...' -> 'foo..~2e')
267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
268 return path
268 return path
269
269
270
270
271 _maxstorepathlen = 120
271 _maxstorepathlen = 120
272 _dirprefixlen = 8
272 _dirprefixlen = 8
273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
274
274
275
275
276 def _hashencode(path, dotencode):
276 def _hashencode(path, dotencode):
277 digest = hex(hashutil.sha1(path).digest())
277 digest = hex(hashutil.sha1(path).digest())
278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
279 parts = _auxencode(le, dotencode)
279 parts = _auxencode(le, dotencode)
280 basename = parts[-1]
280 basename = parts[-1]
281 _root, ext = os.path.splitext(basename)
281 _root, ext = os.path.splitext(basename)
282 sdirs = []
282 sdirs = []
283 sdirslen = 0
283 sdirslen = 0
284 for p in parts[:-1]:
284 for p in parts[:-1]:
285 d = p[:_dirprefixlen]
285 d = p[:_dirprefixlen]
286 if d[-1] in b'. ':
286 if d[-1] in b'. ':
287 # Windows can't access dirs ending in period or space
287 # Windows can't access dirs ending in period or space
288 d = d[:-1] + b'_'
288 d = d[:-1] + b'_'
289 if sdirslen == 0:
289 if sdirslen == 0:
290 t = len(d)
290 t = len(d)
291 else:
291 else:
292 t = sdirslen + 1 + len(d)
292 t = sdirslen + 1 + len(d)
293 if t > _maxshortdirslen:
293 if t > _maxshortdirslen:
294 break
294 break
295 sdirs.append(d)
295 sdirs.append(d)
296 sdirslen = t
296 sdirslen = t
297 dirs = b'/'.join(sdirs)
297 dirs = b'/'.join(sdirs)
298 if len(dirs) > 0:
298 if len(dirs) > 0:
299 dirs += b'/'
299 dirs += b'/'
300 res = b'dh/' + dirs + digest + ext
300 res = b'dh/' + dirs + digest + ext
301 spaceleft = _maxstorepathlen - len(res)
301 spaceleft = _maxstorepathlen - len(res)
302 if spaceleft > 0:
302 if spaceleft > 0:
303 filler = basename[:spaceleft]
303 filler = basename[:spaceleft]
304 res = b'dh/' + dirs + filler + digest + ext
304 res = b'dh/' + dirs + filler + digest + ext
305 return res
305 return res
306
306
307
307
308 def _hybridencode(path, dotencode):
308 def _hybridencode(path, dotencode):
309 """encodes path with a length limit
309 """encodes path with a length limit
310
310
311 Encodes all paths that begin with 'data/', according to the following.
311 Encodes all paths that begin with 'data/', according to the following.
312
312
313 Default encoding (reversible):
313 Default encoding (reversible):
314
314
315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
316 characters are encoded as '~xx', where xx is the two digit hex code
316 characters are encoded as '~xx', where xx is the two digit hex code
317 of the character (see encodefilename).
317 of the character (see encodefilename).
318 Relevant path components consisting of Windows reserved filenames are
318 Relevant path components consisting of Windows reserved filenames are
319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
320
320
321 Hashed encoding (not reversible):
321 Hashed encoding (not reversible):
322
322
323 If the default-encoded path is longer than _maxstorepathlen, a
323 If the default-encoded path is longer than _maxstorepathlen, a
324 non-reversible hybrid hashing of the path is done instead.
324 non-reversible hybrid hashing of the path is done instead.
325 This encoding uses up to _dirprefixlen characters of all directory
325 This encoding uses up to _dirprefixlen characters of all directory
326 levels of the lowerencoded path, but not more levels than can fit into
326 levels of the lowerencoded path, but not more levels than can fit into
327 _maxshortdirslen.
327 _maxshortdirslen.
328 Then follows the filler followed by the sha digest of the full path.
328 Then follows the filler followed by the sha digest of the full path.
329 The filler is the beginning of the basename of the lowerencoded path
329 The filler is the beginning of the basename of the lowerencoded path
330 (the basename is everything after the last path separator). The filler
330 (the basename is everything after the last path separator). The filler
331 is as long as possible, filling in characters from the basename until
331 is as long as possible, filling in characters from the basename until
332 the encoded path has _maxstorepathlen characters (or all chars of the
332 the encoded path has _maxstorepathlen characters (or all chars of the
333 basename have been taken).
333 basename have been taken).
334 The extension (e.g. '.i' or '.d') is preserved.
334 The extension (e.g. '.i' or '.d') is preserved.
335
335
336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
337 encoding was used.
337 encoding was used.
338 """
338 """
339 path = encodedir(path)
339 path = encodedir(path)
340 ef = _encodefname(path).split(b'/')
340 ef = _encodefname(path).split(b'/')
341 res = b'/'.join(_auxencode(ef, dotencode))
341 res = b'/'.join(_auxencode(ef, dotencode))
342 if len(res) > _maxstorepathlen:
342 if len(res) > _maxstorepathlen:
343 res = _hashencode(path, dotencode)
343 res = _hashencode(path, dotencode)
344 return res
344 return res
345
345
346
346
347 def _pathencode(path):
347 def _pathencode(path):
348 de = encodedir(path)
348 de = encodedir(path)
349 if len(path) > _maxstorepathlen:
349 if len(path) > _maxstorepathlen:
350 return _hashencode(de, True)
350 return _hashencode(de, True)
351 ef = _encodefname(de).split(b'/')
351 ef = _encodefname(de).split(b'/')
352 res = b'/'.join(_auxencode(ef, True))
352 res = b'/'.join(_auxencode(ef, True))
353 if len(res) > _maxstorepathlen:
353 if len(res) > _maxstorepathlen:
354 return _hashencode(de, True)
354 return _hashencode(de, True)
355 return res
355 return res
356
356
357
357
358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
359
359
360
360
361 def _plainhybridencode(f):
361 def _plainhybridencode(f):
362 return _hybridencode(f, False)
362 return _hybridencode(f, False)
363
363
364
364
365 def _calcmode(vfs):
365 def _calcmode(vfs):
366 try:
366 try:
367 # files in .hg/ will be created using this mode
367 # files in .hg/ will be created using this mode
368 mode = vfs.stat().st_mode
368 mode = vfs.stat().st_mode
369 # avoid some useless chmods
369 # avoid some useless chmods
370 if (0o777 & ~util.umask) == (0o777 & mode):
370 if (0o777 & ~util.umask) == (0o777 & mode):
371 mode = None
371 mode = None
372 except OSError:
372 except OSError:
373 mode = None
373 mode = None
374 return mode
374 return mode
375
375
376
376
377 _data = [
377 _data = [
378 b'bookmarks',
378 b'bookmarks',
379 b'narrowspec',
379 b'narrowspec',
380 b'data',
380 b'data',
381 b'meta',
381 b'meta',
382 b'00manifest.d',
382 b'00manifest.d',
383 b'00manifest.i',
383 b'00manifest.i',
384 b'00changelog.d',
384 b'00changelog.d',
385 b'00changelog.i',
385 b'00changelog.i',
386 b'phaseroots',
386 b'phaseroots',
387 b'obsstore',
387 b'obsstore',
388 b'requires',
388 b'requires',
389 ]
389 ]
390
390
391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (
392 REVLOG_FILES_OTHER_EXT = (
393 b'.idx',
393 b'.idx',
394 b'.d',
394 b'.d',
395 b'.dat',
395 b'.dat',
396 b'.n',
396 b'.n',
397 b'.nd',
397 b'.nd',
398 b'.sda',
398 b'.sda',
399 b'd.tmpcensored',
399 b'd.tmpcensored',
400 )
400 )
401 # files that are "volatile" and might change between listing and streaming
401 # files that are "volatile" and might change between listing and streaming
402 #
402 #
403 # note: the ".nd" file are nodemap data and won't "change" but they might be
403 # note: the ".nd" file are nodemap data and won't "change" but they might be
404 # deleted.
404 # deleted.
405 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
405 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
406
406
407 # some exception to the above matching
407 # some exception to the above matching
408 #
408 #
409 # XXX This is currently not in use because of issue6542
409 # XXX This is currently not in use because of issue6542
410 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
410 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
411
411
412
412
413 def is_revlog(f, kind, st):
413 def is_revlog(f, kind, st):
414 if kind != stat.S_IFREG:
414 if kind != stat.S_IFREG:
415 return None
415 return None
416 return revlog_type(f)
416 return revlog_type(f)
417
417
418
418
419 def revlog_type(f):
419 def revlog_type(f):
420 # XXX we need to filter `undo.` created by the transaction here, however
420 # XXX we need to filter `undo.` created by the transaction here, however
421 # being naive about it also filter revlog for `undo.*` files, leading to
421 # being naive about it also filter revlog for `undo.*` files, leading to
422 # issue6542. So we no longer use EXCLUDED.
422 # issue6542. So we no longer use EXCLUDED.
423 if f.endswith(REVLOG_FILES_MAIN_EXT):
423 if f.endswith(REVLOG_FILES_MAIN_EXT):
424 return FILEFLAGS_REVLOG_MAIN
424 return FILEFLAGS_REVLOG_MAIN
425 elif f.endswith(REVLOG_FILES_OTHER_EXT):
425 elif f.endswith(REVLOG_FILES_OTHER_EXT):
426 t = FILETYPE_FILELOG_OTHER
426 t = FILETYPE_FILELOG_OTHER
427 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
427 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
428 t |= FILEFLAGS_VOLATILE
428 t |= FILEFLAGS_VOLATILE
429 return t
429 return t
430 return None
430 return None
431
431
432
432
433 # the file is part of changelog data
433 # the file is part of changelog data
434 FILEFLAGS_CHANGELOG = 1 << 13
434 FILEFLAGS_CHANGELOG = 1 << 13
435 # the file is part of manifest data
435 # the file is part of manifest data
436 FILEFLAGS_MANIFESTLOG = 1 << 12
436 FILEFLAGS_MANIFESTLOG = 1 << 12
437 # the file is part of filelog data
437 # the file is part of filelog data
438 FILEFLAGS_FILELOG = 1 << 11
438 FILEFLAGS_FILELOG = 1 << 11
439 # file that are not directly part of a revlog
439 # file that are not directly part of a revlog
440 FILEFLAGS_OTHER = 1 << 10
440 FILEFLAGS_OTHER = 1 << 10
441
441
442 # the main entry point for a revlog
442 # the main entry point for a revlog
443 FILEFLAGS_REVLOG_MAIN = 1 << 1
443 FILEFLAGS_REVLOG_MAIN = 1 << 1
444 # a secondary file for a revlog
444 # a secondary file for a revlog
445 FILEFLAGS_REVLOG_OTHER = 1 << 0
445 FILEFLAGS_REVLOG_OTHER = 1 << 0
446
446
447 # files that are "volatile" and might change between listing and streaming
447 # files that are "volatile" and might change between listing and streaming
448 FILEFLAGS_VOLATILE = 1 << 20
448 FILEFLAGS_VOLATILE = 1 << 20
449
449
450 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_OTHER = FILEFLAGS_OTHER
456 FILETYPE_OTHER = FILEFLAGS_OTHER
457
457
458
458
459 class basicstore(object):
459 class basicstore(object):
460 '''base class for local repository stores'''
460 '''base class for local repository stores'''
461
461
462 def __init__(self, path, vfstype):
462 def __init__(self, path, vfstype):
463 vfs = vfstype(path)
463 vfs = vfstype(path)
464 self.path = vfs.base
464 self.path = vfs.base
465 self.createmode = _calcmode(vfs)
465 self.createmode = _calcmode(vfs)
466 vfs.createmode = self.createmode
466 vfs.createmode = self.createmode
467 self.rawvfs = vfs
467 self.rawvfs = vfs
468 self.vfs = vfsmod.filtervfs(vfs, encodedir)
468 self.vfs = vfsmod.filtervfs(vfs, encodedir)
469 self.opener = self.vfs
469 self.opener = self.vfs
470
470
471 def join(self, f):
471 def join(self, f):
472 return self.path + b'/' + encodedir(f)
472 return self.path + b'/' + encodedir(f)
473
473
474 def _walk(self, relpath, recurse):
474 def _walk(self, relpath, recurse):
475 '''yields (unencoded, encoded, size)'''
475 '''yields (revlog_type, unencoded, size)'''
476 path = self.path
476 path = self.path
477 if relpath:
477 if relpath:
478 path += b'/' + relpath
478 path += b'/' + relpath
479 striplen = len(self.path) + 1
479 striplen = len(self.path) + 1
480 l = []
480 l = []
481 if self.rawvfs.isdir(path):
481 if self.rawvfs.isdir(path):
482 visit = [path]
482 visit = [path]
483 readdir = self.rawvfs.readdir
483 readdir = self.rawvfs.readdir
484 while visit:
484 while visit:
485 p = visit.pop()
485 p = visit.pop()
486 for f, kind, st in readdir(p, stat=True):
486 for f, kind, st in readdir(p, stat=True):
487 fp = p + b'/' + f
487 fp = p + b'/' + f
488 rl_type = is_revlog(f, kind, st)
488 rl_type = is_revlog(f, kind, st)
489 if rl_type is not None:
489 if rl_type is not None:
490 n = util.pconvert(fp[striplen:])
490 n = util.pconvert(fp[striplen:])
491 l.append((rl_type, decodedir(n), n, st.st_size))
491 l.append((rl_type, decodedir(n), st.st_size))
492 elif kind == stat.S_IFDIR and recurse:
492 elif kind == stat.S_IFDIR and recurse:
493 visit.append(fp)
493 visit.append(fp)
494 l.sort()
494 l.sort()
495 return l
495 return l
496
496
497 def changelog(self, trypending, concurrencychecker=None):
497 def changelog(self, trypending, concurrencychecker=None):
498 return changelog.changelog(
498 return changelog.changelog(
499 self.vfs,
499 self.vfs,
500 trypending=trypending,
500 trypending=trypending,
501 concurrencychecker=concurrencychecker,
501 concurrencychecker=concurrencychecker,
502 )
502 )
503
503
504 def manifestlog(self, repo, storenarrowmatch):
504 def manifestlog(self, repo, storenarrowmatch):
505 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
505 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
506 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
506 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
507
507
508 def datafiles(self, matcher=None):
508 def datafiles(self, matcher=None, undecodable=None):
509 """Like walk, but excluding the changelog and root manifest.
510
511 When [undecodable] is None, revlogs names that can't be
512 decoded cause an exception. When it is provided, it should
513 be a list and the filenames that can't be decoded are added
514 to it instead. This is very rarely needed."""
509 files = self._walk(b'data', True) + self._walk(b'meta', True)
515 files = self._walk(b'data', True) + self._walk(b'meta', True)
510 for (t, u, e, s) in files:
516 for (t, u, s) in files:
511 yield (FILEFLAGS_FILELOG | t, u, e, s)
517 yield (FILEFLAGS_FILELOG | t, u, s)
512
518
513 def topfiles(self):
519 def topfiles(self):
514 # yield manifest before changelog
520 # yield manifest before changelog
515 files = reversed(self._walk(b'', False))
521 files = reversed(self._walk(b'', False))
516 for (t, u, e, s) in files:
522 for (t, u, s) in files:
517 if u.startswith(b'00changelog'):
523 if u.startswith(b'00changelog'):
518 yield (FILEFLAGS_CHANGELOG | t, u, e, s)
524 yield (FILEFLAGS_CHANGELOG | t, u, s)
519 elif u.startswith(b'00manifest'):
525 elif u.startswith(b'00manifest'):
520 yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
526 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
521 else:
527 else:
522 yield (FILETYPE_OTHER | t, u, e, s)
528 yield (FILETYPE_OTHER | t, u, s)
523
529
524 def walk(self, matcher=None):
530 def walk(self, matcher=None):
525 """return file related to data storage (ie: revlogs)
531 """return file related to data storage (ie: revlogs)
526
532
527 yields (file_type, unencoded, encoded, size)
533 yields (file_type, unencoded, size)
528
534
529 if a matcher is passed, storage files of only those tracked paths
535 if a matcher is passed, storage files of only those tracked paths
530 are passed with matches the matcher
536 are passed with matches the matcher
531 """
537 """
532 # yield data files first
538 # yield data files first
533 for x in self.datafiles(matcher):
539 for x in self.datafiles(matcher):
534 yield x
540 yield x
535 for x in self.topfiles():
541 for x in self.topfiles():
536 yield x
542 yield x
537
543
538 def copylist(self):
544 def copylist(self):
539 return _data
545 return _data
540
546
541 def write(self, tr):
547 def write(self, tr):
542 pass
548 pass
543
549
544 def invalidatecaches(self):
550 def invalidatecaches(self):
545 pass
551 pass
546
552
547 def markremoved(self, fn):
553 def markremoved(self, fn):
548 pass
554 pass
549
555
550 def __contains__(self, path):
556 def __contains__(self, path):
551 '''Checks if the store contains path'''
557 '''Checks if the store contains path'''
552 path = b"/".join((b"data", path))
558 path = b"/".join((b"data", path))
553 # file?
559 # file?
554 if self.vfs.exists(path + b".i"):
560 if self.vfs.exists(path + b".i"):
555 return True
561 return True
556 # dir?
562 # dir?
557 if not path.endswith(b"/"):
563 if not path.endswith(b"/"):
558 path = path + b"/"
564 path = path + b"/"
559 return self.vfs.exists(path)
565 return self.vfs.exists(path)
560
566
561
567
562 class encodedstore(basicstore):
568 class encodedstore(basicstore):
563 def __init__(self, path, vfstype):
569 def __init__(self, path, vfstype):
564 vfs = vfstype(path + b'/store')
570 vfs = vfstype(path + b'/store')
565 self.path = vfs.base
571 self.path = vfs.base
566 self.createmode = _calcmode(vfs)
572 self.createmode = _calcmode(vfs)
567 vfs.createmode = self.createmode
573 vfs.createmode = self.createmode
568 self.rawvfs = vfs
574 self.rawvfs = vfs
569 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
575 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
570 self.opener = self.vfs
576 self.opener = self.vfs
571
577
572 # note: topfiles would also need a decode phase. It is just that in
578 # note: topfiles would also need a decode phase. It is just that in
573 # practice we do not have any file outside of `data/` that needs encoding.
579 # practice we do not have any file outside of `data/` that needs encoding.
574 # However that might change so we should probably add a test and encoding
580 # However that might change so we should probably add a test and encoding
575 # decoding for it too. see issue6548
581 # decoding for it too. see issue6548
576
582
577 def datafiles(self, matcher=None):
583 def datafiles(self, matcher=None, undecodable=None):
578 for t, a, b, size in super(encodedstore, self).datafiles():
584 for t, f1, size in super(encodedstore, self).datafiles():
579 try:
585 try:
580 a = decodefilename(a)
586 f2 = decodefilename(f1)
581 except KeyError:
587 except KeyError:
582 a = None
588 if undecodable is None:
583 if a is not None and not _matchtrackedpath(a, matcher):
589 msg = _(b'undecodable revlog name %s') % f1
590 raise error.StorageError(msg)
591 else:
592 undecodable.append(f1)
593 continue
594 if not _matchtrackedpath(f2, matcher):
584 continue
595 continue
585 yield t, a, b, size
596 yield t, f2, size
586
597
587 def join(self, f):
598 def join(self, f):
588 return self.path + b'/' + encodefilename(f)
599 return self.path + b'/' + encodefilename(f)
589
600
590 def copylist(self):
601 def copylist(self):
591 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
602 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
592
603
593
604
594 class fncache(object):
605 class fncache(object):
595 # the filename used to be partially encoded
606 # the filename used to be partially encoded
596 # hence the encodedir/decodedir dance
607 # hence the encodedir/decodedir dance
597 def __init__(self, vfs):
608 def __init__(self, vfs):
598 self.vfs = vfs
609 self.vfs = vfs
599 self.entries = None
610 self.entries = None
600 self._dirty = False
611 self._dirty = False
601 # set of new additions to fncache
612 # set of new additions to fncache
602 self.addls = set()
613 self.addls = set()
603
614
604 def ensureloaded(self, warn=None):
615 def ensureloaded(self, warn=None):
605 """read the fncache file if not already read.
616 """read the fncache file if not already read.
606
617
607 If the file on disk is corrupted, raise. If warn is provided,
618 If the file on disk is corrupted, raise. If warn is provided,
608 warn and keep going instead."""
619 warn and keep going instead."""
609 if self.entries is None:
620 if self.entries is None:
610 self._load(warn)
621 self._load(warn)
611
622
612 def _load(self, warn=None):
623 def _load(self, warn=None):
613 '''fill the entries from the fncache file'''
624 '''fill the entries from the fncache file'''
614 self._dirty = False
625 self._dirty = False
615 try:
626 try:
616 fp = self.vfs(b'fncache', mode=b'rb')
627 fp = self.vfs(b'fncache', mode=b'rb')
617 except IOError:
628 except IOError:
618 # skip nonexistent file
629 # skip nonexistent file
619 self.entries = set()
630 self.entries = set()
620 return
631 return
621
632
622 self.entries = set()
633 self.entries = set()
623 chunk = b''
634 chunk = b''
624 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
635 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
625 chunk += c
636 chunk += c
626 try:
637 try:
627 p = chunk.rindex(b'\n')
638 p = chunk.rindex(b'\n')
628 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
639 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
629 chunk = chunk[p + 1 :]
640 chunk = chunk[p + 1 :]
630 except ValueError:
641 except ValueError:
631 # substring '\n' not found, maybe the entry is bigger than the
642 # substring '\n' not found, maybe the entry is bigger than the
632 # chunksize, so let's keep iterating
643 # chunksize, so let's keep iterating
633 pass
644 pass
634
645
635 if chunk:
646 if chunk:
636 msg = _(b"fncache does not ends with a newline")
647 msg = _(b"fncache does not ends with a newline")
637 if warn:
648 if warn:
638 warn(msg + b'\n')
649 warn(msg + b'\n')
639 else:
650 else:
640 raise error.Abort(
651 raise error.Abort(
641 msg,
652 msg,
642 hint=_(
653 hint=_(
643 b"use 'hg debugrebuildfncache' to "
654 b"use 'hg debugrebuildfncache' to "
644 b"rebuild the fncache"
655 b"rebuild the fncache"
645 ),
656 ),
646 )
657 )
647 self._checkentries(fp, warn)
658 self._checkentries(fp, warn)
648 fp.close()
659 fp.close()
649
660
650 def _checkentries(self, fp, warn):
661 def _checkentries(self, fp, warn):
651 """make sure there is no empty string in entries"""
662 """make sure there is no empty string in entries"""
652 if b'' in self.entries:
663 if b'' in self.entries:
653 fp.seek(0)
664 fp.seek(0)
654 for n, line in enumerate(util.iterfile(fp)):
665 for n, line in enumerate(util.iterfile(fp)):
655 if not line.rstrip(b'\n'):
666 if not line.rstrip(b'\n'):
656 t = _(b'invalid entry in fncache, line %d') % (n + 1)
667 t = _(b'invalid entry in fncache, line %d') % (n + 1)
657 if warn:
668 if warn:
658 warn(t + b'\n')
669 warn(t + b'\n')
659 else:
670 else:
660 raise error.Abort(t)
671 raise error.Abort(t)
661
672
662 def write(self, tr):
673 def write(self, tr):
663 if self._dirty:
674 if self._dirty:
664 assert self.entries is not None
675 assert self.entries is not None
665 self.entries = self.entries | self.addls
676 self.entries = self.entries | self.addls
666 self.addls = set()
677 self.addls = set()
667 tr.addbackup(b'fncache')
678 tr.addbackup(b'fncache')
668 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
679 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
669 if self.entries:
680 if self.entries:
670 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
681 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
671 fp.close()
682 fp.close()
672 self._dirty = False
683 self._dirty = False
673 if self.addls:
684 if self.addls:
674 # if we have just new entries, let's append them to the fncache
685 # if we have just new entries, let's append them to the fncache
675 tr.addbackup(b'fncache')
686 tr.addbackup(b'fncache')
676 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
687 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
677 if self.addls:
688 if self.addls:
678 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
689 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
679 fp.close()
690 fp.close()
680 self.entries = None
691 self.entries = None
681 self.addls = set()
692 self.addls = set()
682
693
683 def add(self, fn):
694 def add(self, fn):
684 if self.entries is None:
695 if self.entries is None:
685 self._load()
696 self._load()
686 if fn not in self.entries:
697 if fn not in self.entries:
687 self.addls.add(fn)
698 self.addls.add(fn)
688
699
689 def remove(self, fn):
700 def remove(self, fn):
690 if self.entries is None:
701 if self.entries is None:
691 self._load()
702 self._load()
692 if fn in self.addls:
703 if fn in self.addls:
693 self.addls.remove(fn)
704 self.addls.remove(fn)
694 return
705 return
695 try:
706 try:
696 self.entries.remove(fn)
707 self.entries.remove(fn)
697 self._dirty = True
708 self._dirty = True
698 except KeyError:
709 except KeyError:
699 pass
710 pass
700
711
701 def __contains__(self, fn):
712 def __contains__(self, fn):
702 if fn in self.addls:
713 if fn in self.addls:
703 return True
714 return True
704 if self.entries is None:
715 if self.entries is None:
705 self._load()
716 self._load()
706 return fn in self.entries
717 return fn in self.entries
707
718
708 def __iter__(self):
719 def __iter__(self):
709 if self.entries is None:
720 if self.entries is None:
710 self._load()
721 self._load()
711 return iter(self.entries | self.addls)
722 return iter(self.entries | self.addls)
712
723
713
724
714 class _fncachevfs(vfsmod.proxyvfs):
725 class _fncachevfs(vfsmod.proxyvfs):
715 def __init__(self, vfs, fnc, encode):
726 def __init__(self, vfs, fnc, encode):
716 vfsmod.proxyvfs.__init__(self, vfs)
727 vfsmod.proxyvfs.__init__(self, vfs)
717 self.fncache = fnc
728 self.fncache = fnc
718 self.encode = encode
729 self.encode = encode
719
730
720 def __call__(self, path, mode=b'r', *args, **kw):
731 def __call__(self, path, mode=b'r', *args, **kw):
721 encoded = self.encode(path)
732 encoded = self.encode(path)
722 if mode not in (b'r', b'rb') and (
733 if mode not in (b'r', b'rb') and (
723 path.startswith(b'data/') or path.startswith(b'meta/')
734 path.startswith(b'data/') or path.startswith(b'meta/')
724 ):
735 ):
725 # do not trigger a fncache load when adding a file that already is
736 # do not trigger a fncache load when adding a file that already is
726 # known to exist.
737 # known to exist.
727 notload = self.fncache.entries is None and self.vfs.exists(encoded)
738 notload = self.fncache.entries is None and self.vfs.exists(encoded)
728 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
739 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
729 # when appending to an existing file, if the file has size zero,
740 # when appending to an existing file, if the file has size zero,
730 # it should be considered as missing. Such zero-size files are
741 # it should be considered as missing. Such zero-size files are
731 # the result of truncation when a transaction is aborted.
742 # the result of truncation when a transaction is aborted.
732 notload = False
743 notload = False
733 if not notload:
744 if not notload:
734 self.fncache.add(path)
745 self.fncache.add(path)
735 return self.vfs(encoded, mode, *args, **kw)
746 return self.vfs(encoded, mode, *args, **kw)
736
747
737 def join(self, path):
748 def join(self, path):
738 if path:
749 if path:
739 return self.vfs.join(self.encode(path))
750 return self.vfs.join(self.encode(path))
740 else:
751 else:
741 return self.vfs.join(path)
752 return self.vfs.join(path)
742
753
743 def register_file(self, path):
754 def register_file(self, path):
744 """generic hook point to lets fncache steer its stew"""
755 """generic hook point to lets fncache steer its stew"""
745 if path.startswith(b'data/') or path.startswith(b'meta/'):
756 if path.startswith(b'data/') or path.startswith(b'meta/'):
746 self.fncache.add(path)
757 self.fncache.add(path)
747
758
748
759
749 class fncachestore(basicstore):
760 class fncachestore(basicstore):
750 def __init__(self, path, vfstype, dotencode):
761 def __init__(self, path, vfstype, dotencode):
751 if dotencode:
762 if dotencode:
752 encode = _pathencode
763 encode = _pathencode
753 else:
764 else:
754 encode = _plainhybridencode
765 encode = _plainhybridencode
755 self.encode = encode
766 self.encode = encode
756 vfs = vfstype(path + b'/store')
767 vfs = vfstype(path + b'/store')
757 self.path = vfs.base
768 self.path = vfs.base
758 self.pathsep = self.path + b'/'
769 self.pathsep = self.path + b'/'
759 self.createmode = _calcmode(vfs)
770 self.createmode = _calcmode(vfs)
760 vfs.createmode = self.createmode
771 vfs.createmode = self.createmode
761 self.rawvfs = vfs
772 self.rawvfs = vfs
762 fnc = fncache(vfs)
773 fnc = fncache(vfs)
763 self.fncache = fnc
774 self.fncache = fnc
764 self.vfs = _fncachevfs(vfs, fnc, encode)
775 self.vfs = _fncachevfs(vfs, fnc, encode)
765 self.opener = self.vfs
776 self.opener = self.vfs
766
777
767 def join(self, f):
778 def join(self, f):
768 return self.pathsep + self.encode(f)
779 return self.pathsep + self.encode(f)
769
780
770 def getsize(self, path):
781 def getsize(self, path):
771 return self.rawvfs.stat(path).st_size
782 return self.rawvfs.stat(path).st_size
772
783
773 def datafiles(self, matcher=None):
784 def datafiles(self, matcher=None, undecodable=None):
774 for f in sorted(self.fncache):
785 for f in sorted(self.fncache):
775 if not _matchtrackedpath(f, matcher):
786 if not _matchtrackedpath(f, matcher):
776 continue
787 continue
777 ef = self.encode(f)
788 ef = self.encode(f)
778 try:
789 try:
779 t = revlog_type(f)
790 t = revlog_type(f)
780 assert t is not None, f
791 assert t is not None, f
781 t |= FILEFLAGS_FILELOG
792 t |= FILEFLAGS_FILELOG
782 yield t, f, ef, self.getsize(ef)
793 yield t, f, self.getsize(ef)
783 except OSError as err:
794 except OSError as err:
784 if err.errno != errno.ENOENT:
795 if err.errno != errno.ENOENT:
785 raise
796 raise
786
797
787 def copylist(self):
798 def copylist(self):
788 d = (
799 d = (
789 b'bookmarks',
800 b'bookmarks',
790 b'narrowspec',
801 b'narrowspec',
791 b'data',
802 b'data',
792 b'meta',
803 b'meta',
793 b'dh',
804 b'dh',
794 b'fncache',
805 b'fncache',
795 b'phaseroots',
806 b'phaseroots',
796 b'obsstore',
807 b'obsstore',
797 b'00manifest.d',
808 b'00manifest.d',
798 b'00manifest.i',
809 b'00manifest.i',
799 b'00changelog.d',
810 b'00changelog.d',
800 b'00changelog.i',
811 b'00changelog.i',
801 b'requires',
812 b'requires',
802 )
813 )
803 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
814 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
804
815
805 def write(self, tr):
816 def write(self, tr):
806 self.fncache.write(tr)
817 self.fncache.write(tr)
807
818
808 def invalidatecaches(self):
819 def invalidatecaches(self):
809 self.fncache.entries = None
820 self.fncache.entries = None
810 self.fncache.addls = set()
821 self.fncache.addls = set()
811
822
812 def markremoved(self, fn):
823 def markremoved(self, fn):
813 self.fncache.remove(fn)
824 self.fncache.remove(fn)
814
825
815 def _exists(self, f):
826 def _exists(self, f):
816 ef = self.encode(f)
827 ef = self.encode(f)
817 try:
828 try:
818 self.getsize(ef)
829 self.getsize(ef)
819 return True
830 return True
820 except OSError as err:
831 except OSError as err:
821 if err.errno != errno.ENOENT:
832 if err.errno != errno.ENOENT:
822 raise
833 raise
823 # nonexistent entry
834 # nonexistent entry
824 return False
835 return False
825
836
826 def __contains__(self, path):
837 def __contains__(self, path):
827 '''Checks if the store contains path'''
838 '''Checks if the store contains path'''
828 path = b"/".join((b"data", path))
839 path = b"/".join((b"data", path))
829 # check for files (exact match)
840 # check for files (exact match)
830 e = path + b'.i'
841 e = path + b'.i'
831 if e in self.fncache and self._exists(e):
842 if e in self.fncache and self._exists(e):
832 return True
843 return True
833 # now check for directories (prefix match)
844 # now check for directories (prefix match)
834 if not path.endswith(b'/'):
845 if not path.endswith(b'/'):
835 path += b'/'
846 path += b'/'
836 for e in self.fncache:
847 for e in self.fncache:
837 if e.startswith(path) and self._exists(e):
848 if e.startswith(path) and self._exists(e):
838 return True
849 return True
839 return False
850 return False
@@ -1,918 +1,918 b''
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import contextlib
10 import contextlib
11 import errno
11 import errno
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from .interfaces import repository
17 from .interfaces import repository
18 from . import (
18 from . import (
19 bookmarks,
19 bookmarks,
20 cacheutil,
20 cacheutil,
21 error,
21 error,
22 narrowspec,
22 narrowspec,
23 phases,
23 phases,
24 pycompat,
24 pycompat,
25 requirements as requirementsmod,
25 requirements as requirementsmod,
26 scmutil,
26 scmutil,
27 store,
27 store,
28 util,
28 util,
29 )
29 )
30 from .utils import (
30 from .utils import (
31 stringutil,
31 stringutil,
32 )
32 )
33
33
34
34
35 def canperformstreamclone(pullop, bundle2=False):
35 def canperformstreamclone(pullop, bundle2=False):
36 """Whether it is possible to perform a streaming clone as part of pull.
36 """Whether it is possible to perform a streaming clone as part of pull.
37
37
38 ``bundle2`` will cause the function to consider stream clone through
38 ``bundle2`` will cause the function to consider stream clone through
39 bundle2 and only through bundle2.
39 bundle2 and only through bundle2.
40
40
41 Returns a tuple of (supported, requirements). ``supported`` is True if
41 Returns a tuple of (supported, requirements). ``supported`` is True if
42 streaming clone is supported and False otherwise. ``requirements`` is
42 streaming clone is supported and False otherwise. ``requirements`` is
43 a set of repo requirements from the remote, or ``None`` if stream clone
43 a set of repo requirements from the remote, or ``None`` if stream clone
44 isn't supported.
44 isn't supported.
45 """
45 """
46 repo = pullop.repo
46 repo = pullop.repo
47 remote = pullop.remote
47 remote = pullop.remote
48
48
49 bundle2supported = False
49 bundle2supported = False
50 if pullop.canusebundle2:
50 if pullop.canusebundle2:
51 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
51 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
52 bundle2supported = True
52 bundle2supported = True
53 # else
53 # else
54 # Server doesn't support bundle2 stream clone or doesn't support
54 # Server doesn't support bundle2 stream clone or doesn't support
55 # the versions we support. Fall back and possibly allow legacy.
55 # the versions we support. Fall back and possibly allow legacy.
56
56
57 # Ensures legacy code path uses available bundle2.
57 # Ensures legacy code path uses available bundle2.
58 if bundle2supported and not bundle2:
58 if bundle2supported and not bundle2:
59 return False, None
59 return False, None
60 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
60 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
61 elif bundle2 and not bundle2supported:
61 elif bundle2 and not bundle2supported:
62 return False, None
62 return False, None
63
63
64 # Streaming clone only works on empty repositories.
64 # Streaming clone only works on empty repositories.
65 if len(repo):
65 if len(repo):
66 return False, None
66 return False, None
67
67
68 # Streaming clone only works if all data is being requested.
68 # Streaming clone only works if all data is being requested.
69 if pullop.heads:
69 if pullop.heads:
70 return False, None
70 return False, None
71
71
72 streamrequested = pullop.streamclonerequested
72 streamrequested = pullop.streamclonerequested
73
73
74 # If we don't have a preference, let the server decide for us. This
74 # If we don't have a preference, let the server decide for us. This
75 # likely only comes into play in LANs.
75 # likely only comes into play in LANs.
76 if streamrequested is None:
76 if streamrequested is None:
77 # The server can advertise whether to prefer streaming clone.
77 # The server can advertise whether to prefer streaming clone.
78 streamrequested = remote.capable(b'stream-preferred')
78 streamrequested = remote.capable(b'stream-preferred')
79
79
80 if not streamrequested:
80 if not streamrequested:
81 return False, None
81 return False, None
82
82
83 # In order for stream clone to work, the client has to support all the
83 # In order for stream clone to work, the client has to support all the
84 # requirements advertised by the server.
84 # requirements advertised by the server.
85 #
85 #
86 # The server advertises its requirements via the "stream" and "streamreqs"
86 # The server advertises its requirements via the "stream" and "streamreqs"
87 # capability. "stream" (a value-less capability) is advertised if and only
87 # capability. "stream" (a value-less capability) is advertised if and only
88 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
88 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
89 # is advertised and contains a comma-delimited list of requirements.
89 # is advertised and contains a comma-delimited list of requirements.
90 requirements = set()
90 requirements = set()
91 if remote.capable(b'stream'):
91 if remote.capable(b'stream'):
92 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
92 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
93 else:
93 else:
94 streamreqs = remote.capable(b'streamreqs')
94 streamreqs = remote.capable(b'streamreqs')
95 # This is weird and shouldn't happen with modern servers.
95 # This is weird and shouldn't happen with modern servers.
96 if not streamreqs:
96 if not streamreqs:
97 pullop.repo.ui.warn(
97 pullop.repo.ui.warn(
98 _(
98 _(
99 b'warning: stream clone requested but server has them '
99 b'warning: stream clone requested but server has them '
100 b'disabled\n'
100 b'disabled\n'
101 )
101 )
102 )
102 )
103 return False, None
103 return False, None
104
104
105 streamreqs = set(streamreqs.split(b','))
105 streamreqs = set(streamreqs.split(b','))
106 # Server requires something we don't support. Bail.
106 # Server requires something we don't support. Bail.
107 missingreqs = streamreqs - repo.supportedformats
107 missingreqs = streamreqs - repo.supportedformats
108 if missingreqs:
108 if missingreqs:
109 pullop.repo.ui.warn(
109 pullop.repo.ui.warn(
110 _(
110 _(
111 b'warning: stream clone requested but client is missing '
111 b'warning: stream clone requested but client is missing '
112 b'requirements: %s\n'
112 b'requirements: %s\n'
113 )
113 )
114 % b', '.join(sorted(missingreqs))
114 % b', '.join(sorted(missingreqs))
115 )
115 )
116 pullop.repo.ui.warn(
116 pullop.repo.ui.warn(
117 _(
117 _(
118 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
118 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
119 b'for more information)\n'
119 b'for more information)\n'
120 )
120 )
121 )
121 )
122 return False, None
122 return False, None
123 requirements = streamreqs
123 requirements = streamreqs
124
124
125 return True, requirements
125 return True, requirements
126
126
127
127
128 def maybeperformlegacystreamclone(pullop):
128 def maybeperformlegacystreamclone(pullop):
129 """Possibly perform a legacy stream clone operation.
129 """Possibly perform a legacy stream clone operation.
130
130
131 Legacy stream clones are performed as part of pull but before all other
131 Legacy stream clones are performed as part of pull but before all other
132 operations.
132 operations.
133
133
134 A legacy stream clone will not be performed if a bundle2 stream clone is
134 A legacy stream clone will not be performed if a bundle2 stream clone is
135 supported.
135 supported.
136 """
136 """
137 from . import localrepo
137 from . import localrepo
138
138
139 supported, requirements = canperformstreamclone(pullop)
139 supported, requirements = canperformstreamclone(pullop)
140
140
141 if not supported:
141 if not supported:
142 return
142 return
143
143
144 repo = pullop.repo
144 repo = pullop.repo
145 remote = pullop.remote
145 remote = pullop.remote
146
146
147 # Save remote branchmap. We will use it later to speed up branchcache
147 # Save remote branchmap. We will use it later to speed up branchcache
148 # creation.
148 # creation.
149 rbranchmap = None
149 rbranchmap = None
150 if remote.capable(b'branchmap'):
150 if remote.capable(b'branchmap'):
151 with remote.commandexecutor() as e:
151 with remote.commandexecutor() as e:
152 rbranchmap = e.callcommand(b'branchmap', {}).result()
152 rbranchmap = e.callcommand(b'branchmap', {}).result()
153
153
154 repo.ui.status(_(b'streaming all changes\n'))
154 repo.ui.status(_(b'streaming all changes\n'))
155
155
156 with remote.commandexecutor() as e:
156 with remote.commandexecutor() as e:
157 fp = e.callcommand(b'stream_out', {}).result()
157 fp = e.callcommand(b'stream_out', {}).result()
158
158
159 # TODO strictly speaking, this code should all be inside the context
159 # TODO strictly speaking, this code should all be inside the context
160 # manager because the context manager is supposed to ensure all wire state
160 # manager because the context manager is supposed to ensure all wire state
161 # is flushed when exiting. But the legacy peers don't do this, so it
161 # is flushed when exiting. But the legacy peers don't do this, so it
162 # doesn't matter.
162 # doesn't matter.
163 l = fp.readline()
163 l = fp.readline()
164 try:
164 try:
165 resp = int(l)
165 resp = int(l)
166 except ValueError:
166 except ValueError:
167 raise error.ResponseError(
167 raise error.ResponseError(
168 _(b'unexpected response from remote server:'), l
168 _(b'unexpected response from remote server:'), l
169 )
169 )
170 if resp == 1:
170 if resp == 1:
171 raise error.Abort(_(b'operation forbidden by server'))
171 raise error.Abort(_(b'operation forbidden by server'))
172 elif resp == 2:
172 elif resp == 2:
173 raise error.Abort(_(b'locking the remote repository failed'))
173 raise error.Abort(_(b'locking the remote repository failed'))
174 elif resp != 0:
174 elif resp != 0:
175 raise error.Abort(_(b'the server sent an unknown error code'))
175 raise error.Abort(_(b'the server sent an unknown error code'))
176
176
177 l = fp.readline()
177 l = fp.readline()
178 try:
178 try:
179 filecount, bytecount = map(int, l.split(b' ', 1))
179 filecount, bytecount = map(int, l.split(b' ', 1))
180 except (ValueError, TypeError):
180 except (ValueError, TypeError):
181 raise error.ResponseError(
181 raise error.ResponseError(
182 _(b'unexpected response from remote server:'), l
182 _(b'unexpected response from remote server:'), l
183 )
183 )
184
184
185 with repo.lock():
185 with repo.lock():
186 consumev1(repo, fp, filecount, bytecount)
186 consumev1(repo, fp, filecount, bytecount)
187
187
188 # new requirements = old non-format requirements +
188 # new requirements = old non-format requirements +
189 # new format-related remote requirements
189 # new format-related remote requirements
190 # requirements from the streamed-in repository
190 # requirements from the streamed-in repository
191 repo.requirements = requirements | (
191 repo.requirements = requirements | (
192 repo.requirements - repo.supportedformats
192 repo.requirements - repo.supportedformats
193 )
193 )
194 repo.svfs.options = localrepo.resolvestorevfsoptions(
194 repo.svfs.options = localrepo.resolvestorevfsoptions(
195 repo.ui, repo.requirements, repo.features
195 repo.ui, repo.requirements, repo.features
196 )
196 )
197 scmutil.writereporequirements(repo)
197 scmutil.writereporequirements(repo)
198
198
199 if rbranchmap:
199 if rbranchmap:
200 repo._branchcaches.replace(repo, rbranchmap)
200 repo._branchcaches.replace(repo, rbranchmap)
201
201
202 repo.invalidate()
202 repo.invalidate()
203
203
204
204
205 def allowservergeneration(repo):
205 def allowservergeneration(repo):
206 """Whether streaming clones are allowed from the server."""
206 """Whether streaming clones are allowed from the server."""
207 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
207 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
208 return False
208 return False
209
209
210 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
210 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
211 return False
211 return False
212
212
213 # The way stream clone works makes it impossible to hide secret changesets.
213 # The way stream clone works makes it impossible to hide secret changesets.
214 # So don't allow this by default.
214 # So don't allow this by default.
215 secret = phases.hassecret(repo)
215 secret = phases.hassecret(repo)
216 if secret:
216 if secret:
217 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
217 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
218
218
219 return True
219 return True
220
220
221
221
222 # This is it's own function so extensions can override it.
222 # This is it's own function so extensions can override it.
223 def _walkstreamfiles(repo, matcher=None):
223 def _walkstreamfiles(repo, matcher=None):
224 return repo.store.walk(matcher)
224 return repo.store.walk(matcher)
225
225
226
226
227 def generatev1(repo):
227 def generatev1(repo):
228 """Emit content for version 1 of a streaming clone.
228 """Emit content for version 1 of a streaming clone.
229
229
230 This returns a 3-tuple of (file count, byte size, data iterator).
230 This returns a 3-tuple of (file count, byte size, data iterator).
231
231
232 The data iterator consists of N entries for each file being transferred.
232 The data iterator consists of N entries for each file being transferred.
233 Each file entry starts as a line with the file name and integer size
233 Each file entry starts as a line with the file name and integer size
234 delimited by a null byte.
234 delimited by a null byte.
235
235
236 The raw file data follows. Following the raw file data is the next file
236 The raw file data follows. Following the raw file data is the next file
237 entry, or EOF.
237 entry, or EOF.
238
238
239 When used on the wire protocol, an additional line indicating protocol
239 When used on the wire protocol, an additional line indicating protocol
240 success will be prepended to the stream. This function is not responsible
240 success will be prepended to the stream. This function is not responsible
241 for adding it.
241 for adding it.
242
242
243 This function will obtain a repository lock to ensure a consistent view of
243 This function will obtain a repository lock to ensure a consistent view of
244 the store is captured. It therefore may raise LockError.
244 the store is captured. It therefore may raise LockError.
245 """
245 """
246 entries = []
246 entries = []
247 total_bytes = 0
247 total_bytes = 0
248 # Get consistent snapshot of repo, lock during scan.
248 # Get consistent snapshot of repo, lock during scan.
249 with repo.lock():
249 with repo.lock():
250 repo.ui.debug(b'scanning\n')
250 repo.ui.debug(b'scanning\n')
251 for file_type, name, ename, size in _walkstreamfiles(repo):
251 for file_type, name, size in _walkstreamfiles(repo):
252 if size:
252 if size:
253 entries.append((name, size))
253 entries.append((name, size))
254 total_bytes += size
254 total_bytes += size
255 _test_sync_point_walk_1(repo)
255 _test_sync_point_walk_1(repo)
256 _test_sync_point_walk_2(repo)
256 _test_sync_point_walk_2(repo)
257
257
258 repo.ui.debug(
258 repo.ui.debug(
259 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
259 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
260 )
260 )
261
261
262 svfs = repo.svfs
262 svfs = repo.svfs
263 debugflag = repo.ui.debugflag
263 debugflag = repo.ui.debugflag
264
264
265 def emitrevlogdata():
265 def emitrevlogdata():
266 for name, size in entries:
266 for name, size in entries:
267 if debugflag:
267 if debugflag:
268 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
268 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
269 # partially encode name over the wire for backwards compat
269 # partially encode name over the wire for backwards compat
270 yield b'%s\0%d\n' % (store.encodedir(name), size)
270 yield b'%s\0%d\n' % (store.encodedir(name), size)
271 # auditing at this stage is both pointless (paths are already
271 # auditing at this stage is both pointless (paths are already
272 # trusted by the local repo) and expensive
272 # trusted by the local repo) and expensive
273 with svfs(name, b'rb', auditpath=False) as fp:
273 with svfs(name, b'rb', auditpath=False) as fp:
274 if size <= 65536:
274 if size <= 65536:
275 yield fp.read(size)
275 yield fp.read(size)
276 else:
276 else:
277 for chunk in util.filechunkiter(fp, limit=size):
277 for chunk in util.filechunkiter(fp, limit=size):
278 yield chunk
278 yield chunk
279
279
280 return len(entries), total_bytes, emitrevlogdata()
280 return len(entries), total_bytes, emitrevlogdata()
281
281
282
282
283 def generatev1wireproto(repo):
283 def generatev1wireproto(repo):
284 """Emit content for version 1 of streaming clone suitable for the wire.
284 """Emit content for version 1 of streaming clone suitable for the wire.
285
285
286 This is the data output from ``generatev1()`` with 2 header lines. The
286 This is the data output from ``generatev1()`` with 2 header lines. The
287 first line indicates overall success. The 2nd contains the file count and
287 first line indicates overall success. The 2nd contains the file count and
288 byte size of payload.
288 byte size of payload.
289
289
290 The success line contains "0" for success, "1" for stream generation not
290 The success line contains "0" for success, "1" for stream generation not
291 allowed, and "2" for error locking the repository (possibly indicating
291 allowed, and "2" for error locking the repository (possibly indicating
292 a permissions error for the server process).
292 a permissions error for the server process).
293 """
293 """
294 if not allowservergeneration(repo):
294 if not allowservergeneration(repo):
295 yield b'1\n'
295 yield b'1\n'
296 return
296 return
297
297
298 try:
298 try:
299 filecount, bytecount, it = generatev1(repo)
299 filecount, bytecount, it = generatev1(repo)
300 except error.LockError:
300 except error.LockError:
301 yield b'2\n'
301 yield b'2\n'
302 return
302 return
303
303
304 # Indicates successful response.
304 # Indicates successful response.
305 yield b'0\n'
305 yield b'0\n'
306 yield b'%d %d\n' % (filecount, bytecount)
306 yield b'%d %d\n' % (filecount, bytecount)
307 for chunk in it:
307 for chunk in it:
308 yield chunk
308 yield chunk
309
309
310
310
311 def generatebundlev1(repo, compression=b'UN'):
311 def generatebundlev1(repo, compression=b'UN'):
312 """Emit content for version 1 of a stream clone bundle.
312 """Emit content for version 1 of a stream clone bundle.
313
313
314 The first 4 bytes of the output ("HGS1") denote this as stream clone
314 The first 4 bytes of the output ("HGS1") denote this as stream clone
315 bundle version 1.
315 bundle version 1.
316
316
317 The next 2 bytes indicate the compression type. Only "UN" is currently
317 The next 2 bytes indicate the compression type. Only "UN" is currently
318 supported.
318 supported.
319
319
320 The next 16 bytes are two 64-bit big endian unsigned integers indicating
320 The next 16 bytes are two 64-bit big endian unsigned integers indicating
321 file count and byte count, respectively.
321 file count and byte count, respectively.
322
322
323 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
323 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
324 of the requirements string, including a trailing \0. The following N bytes
324 of the requirements string, including a trailing \0. The following N bytes
325 are the requirements string, which is ASCII containing a comma-delimited
325 are the requirements string, which is ASCII containing a comma-delimited
326 list of repo requirements that are needed to support the data.
326 list of repo requirements that are needed to support the data.
327
327
328 The remaining content is the output of ``generatev1()`` (which may be
328 The remaining content is the output of ``generatev1()`` (which may be
329 compressed in the future).
329 compressed in the future).
330
330
331 Returns a tuple of (requirements, data generator).
331 Returns a tuple of (requirements, data generator).
332 """
332 """
333 if compression != b'UN':
333 if compression != b'UN':
334 raise ValueError(b'we do not support the compression argument yet')
334 raise ValueError(b'we do not support the compression argument yet')
335
335
336 requirements = repo.requirements & repo.supportedformats
336 requirements = repo.requirements & repo.supportedformats
337 requires = b','.join(sorted(requirements))
337 requires = b','.join(sorted(requirements))
338
338
339 def gen():
339 def gen():
340 yield b'HGS1'
340 yield b'HGS1'
341 yield compression
341 yield compression
342
342
343 filecount, bytecount, it = generatev1(repo)
343 filecount, bytecount, it = generatev1(repo)
344 repo.ui.status(
344 repo.ui.status(
345 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
345 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
346 )
346 )
347
347
348 yield struct.pack(b'>QQ', filecount, bytecount)
348 yield struct.pack(b'>QQ', filecount, bytecount)
349 yield struct.pack(b'>H', len(requires) + 1)
349 yield struct.pack(b'>H', len(requires) + 1)
350 yield requires + b'\0'
350 yield requires + b'\0'
351
351
352 # This is where we'll add compression in the future.
352 # This is where we'll add compression in the future.
353 assert compression == b'UN'
353 assert compression == b'UN'
354
354
355 progress = repo.ui.makeprogress(
355 progress = repo.ui.makeprogress(
356 _(b'bundle'), total=bytecount, unit=_(b'bytes')
356 _(b'bundle'), total=bytecount, unit=_(b'bytes')
357 )
357 )
358 progress.update(0)
358 progress.update(0)
359
359
360 for chunk in it:
360 for chunk in it:
361 progress.increment(step=len(chunk))
361 progress.increment(step=len(chunk))
362 yield chunk
362 yield chunk
363
363
364 progress.complete()
364 progress.complete()
365
365
366 return requirements, gen()
366 return requirements, gen()
367
367
368
368
369 def consumev1(repo, fp, filecount, bytecount):
369 def consumev1(repo, fp, filecount, bytecount):
370 """Apply the contents from version 1 of a streaming clone file handle.
370 """Apply the contents from version 1 of a streaming clone file handle.
371
371
372 This takes the output from "stream_out" and applies it to the specified
372 This takes the output from "stream_out" and applies it to the specified
373 repository.
373 repository.
374
374
375 Like "stream_out," the status line added by the wire protocol is not
375 Like "stream_out," the status line added by the wire protocol is not
376 handled by this function.
376 handled by this function.
377 """
377 """
378 with repo.lock():
378 with repo.lock():
379 repo.ui.status(
379 repo.ui.status(
380 _(b'%d files to transfer, %s of data\n')
380 _(b'%d files to transfer, %s of data\n')
381 % (filecount, util.bytecount(bytecount))
381 % (filecount, util.bytecount(bytecount))
382 )
382 )
383 progress = repo.ui.makeprogress(
383 progress = repo.ui.makeprogress(
384 _(b'clone'), total=bytecount, unit=_(b'bytes')
384 _(b'clone'), total=bytecount, unit=_(b'bytes')
385 )
385 )
386 progress.update(0)
386 progress.update(0)
387 start = util.timer()
387 start = util.timer()
388
388
389 # TODO: get rid of (potential) inconsistency
389 # TODO: get rid of (potential) inconsistency
390 #
390 #
391 # If transaction is started and any @filecache property is
391 # If transaction is started and any @filecache property is
392 # changed at this point, it causes inconsistency between
392 # changed at this point, it causes inconsistency between
393 # in-memory cached property and streamclone-ed file on the
393 # in-memory cached property and streamclone-ed file on the
394 # disk. Nested transaction prevents transaction scope "clone"
394 # disk. Nested transaction prevents transaction scope "clone"
395 # below from writing in-memory changes out at the end of it,
395 # below from writing in-memory changes out at the end of it,
396 # even though in-memory changes are discarded at the end of it
396 # even though in-memory changes are discarded at the end of it
397 # regardless of transaction nesting.
397 # regardless of transaction nesting.
398 #
398 #
399 # But transaction nesting can't be simply prohibited, because
399 # But transaction nesting can't be simply prohibited, because
400 # nesting occurs also in ordinary case (e.g. enabling
400 # nesting occurs also in ordinary case (e.g. enabling
401 # clonebundles).
401 # clonebundles).
402
402
403 with repo.transaction(b'clone'):
403 with repo.transaction(b'clone'):
404 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
404 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
405 for i in pycompat.xrange(filecount):
405 for i in pycompat.xrange(filecount):
406 # XXX doesn't support '\n' or '\r' in filenames
406 # XXX doesn't support '\n' or '\r' in filenames
407 l = fp.readline()
407 l = fp.readline()
408 try:
408 try:
409 name, size = l.split(b'\0', 1)
409 name, size = l.split(b'\0', 1)
410 size = int(size)
410 size = int(size)
411 except (ValueError, TypeError):
411 except (ValueError, TypeError):
412 raise error.ResponseError(
412 raise error.ResponseError(
413 _(b'unexpected response from remote server:'), l
413 _(b'unexpected response from remote server:'), l
414 )
414 )
415 if repo.ui.debugflag:
415 if repo.ui.debugflag:
416 repo.ui.debug(
416 repo.ui.debug(
417 b'adding %s (%s)\n' % (name, util.bytecount(size))
417 b'adding %s (%s)\n' % (name, util.bytecount(size))
418 )
418 )
419 # for backwards compat, name was partially encoded
419 # for backwards compat, name was partially encoded
420 path = store.decodedir(name)
420 path = store.decodedir(name)
421 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
421 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
422 for chunk in util.filechunkiter(fp, limit=size):
422 for chunk in util.filechunkiter(fp, limit=size):
423 progress.increment(step=len(chunk))
423 progress.increment(step=len(chunk))
424 ofp.write(chunk)
424 ofp.write(chunk)
425
425
426 # force @filecache properties to be reloaded from
426 # force @filecache properties to be reloaded from
427 # streamclone-ed file at next access
427 # streamclone-ed file at next access
428 repo.invalidate(clearfilecache=True)
428 repo.invalidate(clearfilecache=True)
429
429
430 elapsed = util.timer() - start
430 elapsed = util.timer() - start
431 if elapsed <= 0:
431 if elapsed <= 0:
432 elapsed = 0.001
432 elapsed = 0.001
433 progress.complete()
433 progress.complete()
434 repo.ui.status(
434 repo.ui.status(
435 _(b'transferred %s in %.1f seconds (%s/sec)\n')
435 _(b'transferred %s in %.1f seconds (%s/sec)\n')
436 % (
436 % (
437 util.bytecount(bytecount),
437 util.bytecount(bytecount),
438 elapsed,
438 elapsed,
439 util.bytecount(bytecount / elapsed),
439 util.bytecount(bytecount / elapsed),
440 )
440 )
441 )
441 )
442
442
443
443
444 def readbundle1header(fp):
444 def readbundle1header(fp):
445 compression = fp.read(2)
445 compression = fp.read(2)
446 if compression != b'UN':
446 if compression != b'UN':
447 raise error.Abort(
447 raise error.Abort(
448 _(
448 _(
449 b'only uncompressed stream clone bundles are '
449 b'only uncompressed stream clone bundles are '
450 b'supported; got %s'
450 b'supported; got %s'
451 )
451 )
452 % compression
452 % compression
453 )
453 )
454
454
455 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
455 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
456 requireslen = struct.unpack(b'>H', fp.read(2))[0]
456 requireslen = struct.unpack(b'>H', fp.read(2))[0]
457 requires = fp.read(requireslen)
457 requires = fp.read(requireslen)
458
458
459 if not requires.endswith(b'\0'):
459 if not requires.endswith(b'\0'):
460 raise error.Abort(
460 raise error.Abort(
461 _(
461 _(
462 b'malformed stream clone bundle: '
462 b'malformed stream clone bundle: '
463 b'requirements not properly encoded'
463 b'requirements not properly encoded'
464 )
464 )
465 )
465 )
466
466
467 requirements = set(requires.rstrip(b'\0').split(b','))
467 requirements = set(requires.rstrip(b'\0').split(b','))
468
468
469 return filecount, bytecount, requirements
469 return filecount, bytecount, requirements
470
470
471
471
472 def applybundlev1(repo, fp):
472 def applybundlev1(repo, fp):
473 """Apply the content from a stream clone bundle version 1.
473 """Apply the content from a stream clone bundle version 1.
474
474
475 We assume the 4 byte header has been read and validated and the file handle
475 We assume the 4 byte header has been read and validated and the file handle
476 is at the 2 byte compression identifier.
476 is at the 2 byte compression identifier.
477 """
477 """
478 if len(repo):
478 if len(repo):
479 raise error.Abort(
479 raise error.Abort(
480 _(b'cannot apply stream clone bundle on non-empty repo')
480 _(b'cannot apply stream clone bundle on non-empty repo')
481 )
481 )
482
482
483 filecount, bytecount, requirements = readbundle1header(fp)
483 filecount, bytecount, requirements = readbundle1header(fp)
484 missingreqs = requirements - repo.supportedformats
484 missingreqs = requirements - repo.supportedformats
485 if missingreqs:
485 if missingreqs:
486 raise error.Abort(
486 raise error.Abort(
487 _(b'unable to apply stream clone: unsupported format: %s')
487 _(b'unable to apply stream clone: unsupported format: %s')
488 % b', '.join(sorted(missingreqs))
488 % b', '.join(sorted(missingreqs))
489 )
489 )
490
490
491 consumev1(repo, fp, filecount, bytecount)
491 consumev1(repo, fp, filecount, bytecount)
492
492
493
493
494 class streamcloneapplier(object):
494 class streamcloneapplier(object):
495 """Class to manage applying streaming clone bundles.
495 """Class to manage applying streaming clone bundles.
496
496
497 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
497 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
498 readers to perform bundle type-specific functionality.
498 readers to perform bundle type-specific functionality.
499 """
499 """
500
500
501 def __init__(self, fh):
501 def __init__(self, fh):
502 self._fh = fh
502 self._fh = fh
503
503
504 def apply(self, repo):
504 def apply(self, repo):
505 return applybundlev1(repo, self._fh)
505 return applybundlev1(repo, self._fh)
506
506
507
507
508 # type of file to stream
508 # type of file to stream
509 _fileappend = 0 # append only file
509 _fileappend = 0 # append only file
510 _filefull = 1 # full snapshot file
510 _filefull = 1 # full snapshot file
511
511
512 # Source of the file
512 # Source of the file
513 _srcstore = b's' # store (svfs)
513 _srcstore = b's' # store (svfs)
514 _srccache = b'c' # cache (cache)
514 _srccache = b'c' # cache (cache)
515
515
516 # This is it's own function so extensions can override it.
516 # This is it's own function so extensions can override it.
517 def _walkstreamfullstorefiles(repo):
517 def _walkstreamfullstorefiles(repo):
518 """list snapshot file from the store"""
518 """list snapshot file from the store"""
519 fnames = []
519 fnames = []
520 if not repo.publishing():
520 if not repo.publishing():
521 fnames.append(b'phaseroots')
521 fnames.append(b'phaseroots')
522 return fnames
522 return fnames
523
523
524
524
525 def _filterfull(entry, copy, vfsmap):
525 def _filterfull(entry, copy, vfsmap):
526 """actually copy the snapshot files"""
526 """actually copy the snapshot files"""
527 src, name, ftype, data = entry
527 src, name, ftype, data = entry
528 if ftype != _filefull:
528 if ftype != _filefull:
529 return entry
529 return entry
530 return (src, name, ftype, copy(vfsmap[src].join(name)))
530 return (src, name, ftype, copy(vfsmap[src].join(name)))
531
531
532
532
533 @contextlib.contextmanager
533 @contextlib.contextmanager
534 def maketempcopies():
534 def maketempcopies():
535 """return a function to temporary copy file"""
535 """return a function to temporary copy file"""
536 files = []
536 files = []
537 try:
537 try:
538
538
539 def copy(src):
539 def copy(src):
540 fd, dst = pycompat.mkstemp()
540 fd, dst = pycompat.mkstemp()
541 os.close(fd)
541 os.close(fd)
542 files.append(dst)
542 files.append(dst)
543 util.copyfiles(src, dst, hardlink=True)
543 util.copyfiles(src, dst, hardlink=True)
544 return dst
544 return dst
545
545
546 yield copy
546 yield copy
547 finally:
547 finally:
548 for tmp in files:
548 for tmp in files:
549 util.tryunlink(tmp)
549 util.tryunlink(tmp)
550
550
551
551
552 def _makemap(repo):
552 def _makemap(repo):
553 """make a (src -> vfs) map for the repo"""
553 """make a (src -> vfs) map for the repo"""
554 vfsmap = {
554 vfsmap = {
555 _srcstore: repo.svfs,
555 _srcstore: repo.svfs,
556 _srccache: repo.cachevfs,
556 _srccache: repo.cachevfs,
557 }
557 }
558 # we keep repo.vfs out of the on purpose, ther are too many danger there
558 # we keep repo.vfs out of the on purpose, ther are too many danger there
559 # (eg: .hg/hgrc)
559 # (eg: .hg/hgrc)
560 assert repo.vfs not in vfsmap.values()
560 assert repo.vfs not in vfsmap.values()
561
561
562 return vfsmap
562 return vfsmap
563
563
564
564
565 def _emit2(repo, entries, totalfilesize):
565 def _emit2(repo, entries, totalfilesize):
566 """actually emit the stream bundle"""
566 """actually emit the stream bundle"""
567 vfsmap = _makemap(repo)
567 vfsmap = _makemap(repo)
568 # we keep repo.vfs out of the on purpose, ther are too many danger there
568 # we keep repo.vfs out of the on purpose, ther are too many danger there
569 # (eg: .hg/hgrc),
569 # (eg: .hg/hgrc),
570 #
570 #
571 # this assert is duplicated (from _makemap) as author might think this is
571 # this assert is duplicated (from _makemap) as author might think this is
572 # fine, while this is really not fine.
572 # fine, while this is really not fine.
573 if repo.vfs in vfsmap.values():
573 if repo.vfs in vfsmap.values():
574 raise error.ProgrammingError(
574 raise error.ProgrammingError(
575 b'repo.vfs must not be added to vfsmap for security reasons'
575 b'repo.vfs must not be added to vfsmap for security reasons'
576 )
576 )
577
577
578 progress = repo.ui.makeprogress(
578 progress = repo.ui.makeprogress(
579 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
579 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
580 )
580 )
581 progress.update(0)
581 progress.update(0)
582 with maketempcopies() as copy, progress:
582 with maketempcopies() as copy, progress:
583 # copy is delayed until we are in the try
583 # copy is delayed until we are in the try
584 entries = [_filterfull(e, copy, vfsmap) for e in entries]
584 entries = [_filterfull(e, copy, vfsmap) for e in entries]
585 yield None # this release the lock on the repository
585 yield None # this release the lock on the repository
586 totalbytecount = 0
586 totalbytecount = 0
587
587
588 for src, name, ftype, data in entries:
588 for src, name, ftype, data in entries:
589 vfs = vfsmap[src]
589 vfs = vfsmap[src]
590 yield src
590 yield src
591 yield util.uvarintencode(len(name))
591 yield util.uvarintencode(len(name))
592 if ftype == _fileappend:
592 if ftype == _fileappend:
593 fp = vfs(name)
593 fp = vfs(name)
594 size = data
594 size = data
595 elif ftype == _filefull:
595 elif ftype == _filefull:
596 fp = open(data, b'rb')
596 fp = open(data, b'rb')
597 size = util.fstat(fp).st_size
597 size = util.fstat(fp).st_size
598 bytecount = 0
598 bytecount = 0
599 try:
599 try:
600 yield util.uvarintencode(size)
600 yield util.uvarintencode(size)
601 yield name
601 yield name
602 if size <= 65536:
602 if size <= 65536:
603 chunks = (fp.read(size),)
603 chunks = (fp.read(size),)
604 else:
604 else:
605 chunks = util.filechunkiter(fp, limit=size)
605 chunks = util.filechunkiter(fp, limit=size)
606 for chunk in chunks:
606 for chunk in chunks:
607 bytecount += len(chunk)
607 bytecount += len(chunk)
608 totalbytecount += len(chunk)
608 totalbytecount += len(chunk)
609 progress.update(totalbytecount)
609 progress.update(totalbytecount)
610 yield chunk
610 yield chunk
611 if bytecount != size:
611 if bytecount != size:
612 # Would most likely be caused by a race due to `hg strip` or
612 # Would most likely be caused by a race due to `hg strip` or
613 # a revlog split
613 # a revlog split
614 raise error.Abort(
614 raise error.Abort(
615 _(
615 _(
616 b'clone could only read %d bytes from %s, but '
616 b'clone could only read %d bytes from %s, but '
617 b'expected %d bytes'
617 b'expected %d bytes'
618 )
618 )
619 % (bytecount, name, size)
619 % (bytecount, name, size)
620 )
620 )
621 finally:
621 finally:
622 fp.close()
622 fp.close()
623
623
624
624
625 def _test_sync_point_walk_1(repo):
625 def _test_sync_point_walk_1(repo):
626 """a function for synchronisation during tests"""
626 """a function for synchronisation during tests"""
627
627
628
628
629 def _test_sync_point_walk_2(repo):
629 def _test_sync_point_walk_2(repo):
630 """a function for synchronisation during tests"""
630 """a function for synchronisation during tests"""
631
631
632
632
633 def _v2_walk(repo, includes, excludes, includeobsmarkers):
633 def _v2_walk(repo, includes, excludes, includeobsmarkers):
634 """emit a seris of files information useful to clone a repo
634 """emit a seris of files information useful to clone a repo
635
635
636 return (entries, totalfilesize)
636 return (entries, totalfilesize)
637
637
638 entries is a list of tuple (vfs-key, file-path, file-type, size)
638 entries is a list of tuple (vfs-key, file-path, file-type, size)
639
639
640 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
640 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
641 - `name`: file path of the file to copy (to be feed to the vfss)
641 - `name`: file path of the file to copy (to be feed to the vfss)
642 - `file-type`: do this file need to be copied with the source lock ?
642 - `file-type`: do this file need to be copied with the source lock ?
643 - `size`: the size of the file (or None)
643 - `size`: the size of the file (or None)
644 """
644 """
645 assert repo._currentlock(repo._lockref) is not None
645 assert repo._currentlock(repo._lockref) is not None
646 entries = []
646 entries = []
647 totalfilesize = 0
647 totalfilesize = 0
648
648
649 matcher = None
649 matcher = None
650 if includes or excludes:
650 if includes or excludes:
651 matcher = narrowspec.match(repo.root, includes, excludes)
651 matcher = narrowspec.match(repo.root, includes, excludes)
652
652
653 for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
653 for rl_type, name, size in _walkstreamfiles(repo, matcher):
654 if size:
654 if size:
655 ft = _fileappend
655 ft = _fileappend
656 if rl_type & store.FILEFLAGS_VOLATILE:
656 if rl_type & store.FILEFLAGS_VOLATILE:
657 ft = _filefull
657 ft = _filefull
658 entries.append((_srcstore, name, ft, size))
658 entries.append((_srcstore, name, ft, size))
659 totalfilesize += size
659 totalfilesize += size
660 for name in _walkstreamfullstorefiles(repo):
660 for name in _walkstreamfullstorefiles(repo):
661 if repo.svfs.exists(name):
661 if repo.svfs.exists(name):
662 totalfilesize += repo.svfs.lstat(name).st_size
662 totalfilesize += repo.svfs.lstat(name).st_size
663 entries.append((_srcstore, name, _filefull, None))
663 entries.append((_srcstore, name, _filefull, None))
664 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
664 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
665 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
665 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
666 entries.append((_srcstore, b'obsstore', _filefull, None))
666 entries.append((_srcstore, b'obsstore', _filefull, None))
667 for name in cacheutil.cachetocopy(repo):
667 for name in cacheutil.cachetocopy(repo):
668 if repo.cachevfs.exists(name):
668 if repo.cachevfs.exists(name):
669 totalfilesize += repo.cachevfs.lstat(name).st_size
669 totalfilesize += repo.cachevfs.lstat(name).st_size
670 entries.append((_srccache, name, _filefull, None))
670 entries.append((_srccache, name, _filefull, None))
671 return entries, totalfilesize
671 return entries, totalfilesize
672
672
673
673
674 def generatev2(repo, includes, excludes, includeobsmarkers):
674 def generatev2(repo, includes, excludes, includeobsmarkers):
675 """Emit content for version 2 of a streaming clone.
675 """Emit content for version 2 of a streaming clone.
676
676
677 the data stream consists the following entries:
677 the data stream consists the following entries:
678 1) A char representing the file destination (eg: store or cache)
678 1) A char representing the file destination (eg: store or cache)
679 2) A varint containing the length of the filename
679 2) A varint containing the length of the filename
680 3) A varint containing the length of file data
680 3) A varint containing the length of file data
681 4) N bytes containing the filename (the internal, store-agnostic form)
681 4) N bytes containing the filename (the internal, store-agnostic form)
682 5) N bytes containing the file data
682 5) N bytes containing the file data
683
683
684 Returns a 3-tuple of (file count, file size, data iterator).
684 Returns a 3-tuple of (file count, file size, data iterator).
685 """
685 """
686
686
687 with repo.lock():
687 with repo.lock():
688
688
689 repo.ui.debug(b'scanning\n')
689 repo.ui.debug(b'scanning\n')
690
690
691 entries, totalfilesize = _v2_walk(
691 entries, totalfilesize = _v2_walk(
692 repo,
692 repo,
693 includes=includes,
693 includes=includes,
694 excludes=excludes,
694 excludes=excludes,
695 includeobsmarkers=includeobsmarkers,
695 includeobsmarkers=includeobsmarkers,
696 )
696 )
697
697
698 chunks = _emit2(repo, entries, totalfilesize)
698 chunks = _emit2(repo, entries, totalfilesize)
699 first = next(chunks)
699 first = next(chunks)
700 assert first is None
700 assert first is None
701 _test_sync_point_walk_1(repo)
701 _test_sync_point_walk_1(repo)
702 _test_sync_point_walk_2(repo)
702 _test_sync_point_walk_2(repo)
703
703
704 return len(entries), totalfilesize, chunks
704 return len(entries), totalfilesize, chunks
705
705
706
706
707 @contextlib.contextmanager
707 @contextlib.contextmanager
708 def nested(*ctxs):
708 def nested(*ctxs):
709 this = ctxs[0]
709 this = ctxs[0]
710 rest = ctxs[1:]
710 rest = ctxs[1:]
711 with this:
711 with this:
712 if rest:
712 if rest:
713 with nested(*rest):
713 with nested(*rest):
714 yield
714 yield
715 else:
715 else:
716 yield
716 yield
717
717
718
718
719 def consumev2(repo, fp, filecount, filesize):
719 def consumev2(repo, fp, filecount, filesize):
720 """Apply the contents from a version 2 streaming clone.
720 """Apply the contents from a version 2 streaming clone.
721
721
722 Data is read from an object that only needs to provide a ``read(size)``
722 Data is read from an object that only needs to provide a ``read(size)``
723 method.
723 method.
724 """
724 """
725 with repo.lock():
725 with repo.lock():
726 repo.ui.status(
726 repo.ui.status(
727 _(b'%d files to transfer, %s of data\n')
727 _(b'%d files to transfer, %s of data\n')
728 % (filecount, util.bytecount(filesize))
728 % (filecount, util.bytecount(filesize))
729 )
729 )
730
730
731 start = util.timer()
731 start = util.timer()
732 progress = repo.ui.makeprogress(
732 progress = repo.ui.makeprogress(
733 _(b'clone'), total=filesize, unit=_(b'bytes')
733 _(b'clone'), total=filesize, unit=_(b'bytes')
734 )
734 )
735 progress.update(0)
735 progress.update(0)
736
736
737 vfsmap = _makemap(repo)
737 vfsmap = _makemap(repo)
738 # we keep repo.vfs out of the on purpose, ther are too many danger
738 # we keep repo.vfs out of the on purpose, ther are too many danger
739 # there (eg: .hg/hgrc),
739 # there (eg: .hg/hgrc),
740 #
740 #
741 # this assert is duplicated (from _makemap) as author might think this
741 # this assert is duplicated (from _makemap) as author might think this
742 # is fine, while this is really not fine.
742 # is fine, while this is really not fine.
743 if repo.vfs in vfsmap.values():
743 if repo.vfs in vfsmap.values():
744 raise error.ProgrammingError(
744 raise error.ProgrammingError(
745 b'repo.vfs must not be added to vfsmap for security reasons'
745 b'repo.vfs must not be added to vfsmap for security reasons'
746 )
746 )
747
747
748 with repo.transaction(b'clone'):
748 with repo.transaction(b'clone'):
749 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
749 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
750 with nested(*ctxs):
750 with nested(*ctxs):
751 for i in range(filecount):
751 for i in range(filecount):
752 src = util.readexactly(fp, 1)
752 src = util.readexactly(fp, 1)
753 vfs = vfsmap[src]
753 vfs = vfsmap[src]
754 namelen = util.uvarintdecodestream(fp)
754 namelen = util.uvarintdecodestream(fp)
755 datalen = util.uvarintdecodestream(fp)
755 datalen = util.uvarintdecodestream(fp)
756
756
757 name = util.readexactly(fp, namelen)
757 name = util.readexactly(fp, namelen)
758
758
759 if repo.ui.debugflag:
759 if repo.ui.debugflag:
760 repo.ui.debug(
760 repo.ui.debug(
761 b'adding [%s] %s (%s)\n'
761 b'adding [%s] %s (%s)\n'
762 % (src, name, util.bytecount(datalen))
762 % (src, name, util.bytecount(datalen))
763 )
763 )
764
764
765 with vfs(name, b'w') as ofp:
765 with vfs(name, b'w') as ofp:
766 for chunk in util.filechunkiter(fp, limit=datalen):
766 for chunk in util.filechunkiter(fp, limit=datalen):
767 progress.increment(step=len(chunk))
767 progress.increment(step=len(chunk))
768 ofp.write(chunk)
768 ofp.write(chunk)
769
769
770 # force @filecache properties to be reloaded from
770 # force @filecache properties to be reloaded from
771 # streamclone-ed file at next access
771 # streamclone-ed file at next access
772 repo.invalidate(clearfilecache=True)
772 repo.invalidate(clearfilecache=True)
773
773
774 elapsed = util.timer() - start
774 elapsed = util.timer() - start
775 if elapsed <= 0:
775 if elapsed <= 0:
776 elapsed = 0.001
776 elapsed = 0.001
777 repo.ui.status(
777 repo.ui.status(
778 _(b'transferred %s in %.1f seconds (%s/sec)\n')
778 _(b'transferred %s in %.1f seconds (%s/sec)\n')
779 % (
779 % (
780 util.bytecount(progress.pos),
780 util.bytecount(progress.pos),
781 elapsed,
781 elapsed,
782 util.bytecount(progress.pos / elapsed),
782 util.bytecount(progress.pos / elapsed),
783 )
783 )
784 )
784 )
785 progress.complete()
785 progress.complete()
786
786
787
787
788 def applybundlev2(repo, fp, filecount, filesize, requirements):
788 def applybundlev2(repo, fp, filecount, filesize, requirements):
789 from . import localrepo
789 from . import localrepo
790
790
791 missingreqs = [r for r in requirements if r not in repo.supported]
791 missingreqs = [r for r in requirements if r not in repo.supported]
792 if missingreqs:
792 if missingreqs:
793 raise error.Abort(
793 raise error.Abort(
794 _(b'unable to apply stream clone: unsupported format: %s')
794 _(b'unable to apply stream clone: unsupported format: %s')
795 % b', '.join(sorted(missingreqs))
795 % b', '.join(sorted(missingreqs))
796 )
796 )
797
797
798 consumev2(repo, fp, filecount, filesize)
798 consumev2(repo, fp, filecount, filesize)
799
799
800 # new requirements = old non-format requirements +
800 # new requirements = old non-format requirements +
801 # new format-related remote requirements
801 # new format-related remote requirements
802 # requirements from the streamed-in repository
802 # requirements from the streamed-in repository
803 repo.requirements = set(requirements) | (
803 repo.requirements = set(requirements) | (
804 repo.requirements - repo.supportedformats
804 repo.requirements - repo.supportedformats
805 )
805 )
806 repo.svfs.options = localrepo.resolvestorevfsoptions(
806 repo.svfs.options = localrepo.resolvestorevfsoptions(
807 repo.ui, repo.requirements, repo.features
807 repo.ui, repo.requirements, repo.features
808 )
808 )
809 scmutil.writereporequirements(repo)
809 scmutil.writereporequirements(repo)
810
810
811
811
812 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
812 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
813 hardlink = [True]
813 hardlink = [True]
814
814
815 def copy_used():
815 def copy_used():
816 hardlink[0] = False
816 hardlink[0] = False
817 progress.topic = _(b'copying')
817 progress.topic = _(b'copying')
818
818
819 for k, path, size in entries:
819 for k, path, size in entries:
820 src_vfs = src_vfs_map[k]
820 src_vfs = src_vfs_map[k]
821 dst_vfs = dst_vfs_map[k]
821 dst_vfs = dst_vfs_map[k]
822 src_path = src_vfs.join(path)
822 src_path = src_vfs.join(path)
823 dst_path = dst_vfs.join(path)
823 dst_path = dst_vfs.join(path)
824 dirname = dst_vfs.dirname(path)
824 dirname = dst_vfs.dirname(path)
825 if not dst_vfs.exists(dirname):
825 if not dst_vfs.exists(dirname):
826 dst_vfs.makedirs(dirname)
826 dst_vfs.makedirs(dirname)
827 dst_vfs.register_file(path)
827 dst_vfs.register_file(path)
828 # XXX we could use the #nb_bytes argument.
828 # XXX we could use the #nb_bytes argument.
829 util.copyfile(
829 util.copyfile(
830 src_path,
830 src_path,
831 dst_path,
831 dst_path,
832 hardlink=hardlink[0],
832 hardlink=hardlink[0],
833 no_hardlink_cb=copy_used,
833 no_hardlink_cb=copy_used,
834 check_fs_hardlink=False,
834 check_fs_hardlink=False,
835 )
835 )
836 progress.increment()
836 progress.increment()
837 return hardlink[0]
837 return hardlink[0]
838
838
839
839
840 def local_copy(src_repo, dest_repo):
840 def local_copy(src_repo, dest_repo):
841 """copy all content from one local repository to another
841 """copy all content from one local repository to another
842
842
843 This is useful for local clone"""
843 This is useful for local clone"""
844 src_store_requirements = {
844 src_store_requirements = {
845 r
845 r
846 for r in src_repo.requirements
846 for r in src_repo.requirements
847 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
847 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
848 }
848 }
849 dest_store_requirements = {
849 dest_store_requirements = {
850 r
850 r
851 for r in dest_repo.requirements
851 for r in dest_repo.requirements
852 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
852 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
853 }
853 }
854 assert src_store_requirements == dest_store_requirements
854 assert src_store_requirements == dest_store_requirements
855
855
856 with dest_repo.lock():
856 with dest_repo.lock():
857 with src_repo.lock():
857 with src_repo.lock():
858
858
859 # bookmark is not integrated to the streaming as it might use the
859 # bookmark is not integrated to the streaming as it might use the
860 # `repo.vfs` and they are too many sentitive data accessible
860 # `repo.vfs` and they are too many sentitive data accessible
861 # through `repo.vfs` to expose it to streaming clone.
861 # through `repo.vfs` to expose it to streaming clone.
862 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
862 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
863 srcbookmarks = src_book_vfs.join(b'bookmarks')
863 srcbookmarks = src_book_vfs.join(b'bookmarks')
864 bm_count = 0
864 bm_count = 0
865 if os.path.exists(srcbookmarks):
865 if os.path.exists(srcbookmarks):
866 bm_count = 1
866 bm_count = 1
867
867
868 entries, totalfilesize = _v2_walk(
868 entries, totalfilesize = _v2_walk(
869 src_repo,
869 src_repo,
870 includes=None,
870 includes=None,
871 excludes=None,
871 excludes=None,
872 includeobsmarkers=True,
872 includeobsmarkers=True,
873 )
873 )
874 src_vfs_map = _makemap(src_repo)
874 src_vfs_map = _makemap(src_repo)
875 dest_vfs_map = _makemap(dest_repo)
875 dest_vfs_map = _makemap(dest_repo)
876 progress = src_repo.ui.makeprogress(
876 progress = src_repo.ui.makeprogress(
877 topic=_(b'linking'),
877 topic=_(b'linking'),
878 total=len(entries) + bm_count,
878 total=len(entries) + bm_count,
879 unit=_(b'files'),
879 unit=_(b'files'),
880 )
880 )
881 # copy files
881 # copy files
882 #
882 #
883 # We could copy the full file while the source repository is locked
883 # We could copy the full file while the source repository is locked
884 # and the other one without the lock. However, in the linking case,
884 # and the other one without the lock. However, in the linking case,
885 # this would also requires checks that nobody is appending any data
885 # this would also requires checks that nobody is appending any data
886 # to the files while we do the clone, so this is not done yet. We
886 # to the files while we do the clone, so this is not done yet. We
887 # could do this blindly when copying files.
887 # could do this blindly when copying files.
888 files = ((k, path, size) for k, path, ftype, size in entries)
888 files = ((k, path, size) for k, path, ftype, size in entries)
889 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
889 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
890
890
891 # copy bookmarks over
891 # copy bookmarks over
892 if bm_count:
892 if bm_count:
893 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
893 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
894 dstbookmarks = dst_book_vfs.join(b'bookmarks')
894 dstbookmarks = dst_book_vfs.join(b'bookmarks')
895 util.copyfile(srcbookmarks, dstbookmarks)
895 util.copyfile(srcbookmarks, dstbookmarks)
896 progress.complete()
896 progress.complete()
897 if hardlink:
897 if hardlink:
898 msg = b'linked %d files\n'
898 msg = b'linked %d files\n'
899 else:
899 else:
900 msg = b'copied %d files\n'
900 msg = b'copied %d files\n'
901 src_repo.ui.debug(msg % (len(entries) + bm_count))
901 src_repo.ui.debug(msg % (len(entries) + bm_count))
902
902
903 with dest_repo.transaction(b"localclone") as tr:
903 with dest_repo.transaction(b"localclone") as tr:
904 dest_repo.store.write(tr)
904 dest_repo.store.write(tr)
905
905
906 # clean up transaction file as they do not make sense
906 # clean up transaction file as they do not make sense
907 undo_files = [(dest_repo.svfs, b'undo.backupfiles')]
907 undo_files = [(dest_repo.svfs, b'undo.backupfiles')]
908 undo_files.extend(dest_repo.undofiles())
908 undo_files.extend(dest_repo.undofiles())
909 for undovfs, undofile in undo_files:
909 for undovfs, undofile in undo_files:
910 try:
910 try:
911 undovfs.unlink(undofile)
911 undovfs.unlink(undofile)
912 except OSError as e:
912 except OSError as e:
913 if e.errno != errno.ENOENT:
913 if e.errno != errno.ENOENT:
914 msg = _(b'error removing %s: %s\n')
914 msg = _(b'error removing %s: %s\n')
915 path = undovfs.join(undofile)
915 path = undovfs.join(undofile)
916 e_msg = stringutil.forcebytestr(e)
916 e_msg = stringutil.forcebytestr(e)
917 msg %= (path, e_msg)
917 msg %= (path, e_msg)
918 dest_repo.ui.warn(msg)
918 dest_repo.ui.warn(msg)
@@ -1,649 +1,649 b''
1 # upgrade.py - functions for in place upgrade of Mercurial repository
1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 #
2 #
3 # Copyright (c) 2016-present, Gregory Szorc
3 # Copyright (c) 2016-present, Gregory Szorc
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import stat
10 import stat
11
11
12 from ..i18n import _
12 from ..i18n import _
13 from ..pycompat import getattr
13 from ..pycompat import getattr
14 from .. import (
14 from .. import (
15 changelog,
15 changelog,
16 error,
16 error,
17 filelog,
17 filelog,
18 manifest,
18 manifest,
19 metadata,
19 metadata,
20 pycompat,
20 pycompat,
21 requirements,
21 requirements,
22 scmutil,
22 scmutil,
23 store,
23 store,
24 util,
24 util,
25 vfs as vfsmod,
25 vfs as vfsmod,
26 )
26 )
27 from ..revlogutils import (
27 from ..revlogutils import (
28 constants as revlogconst,
28 constants as revlogconst,
29 flagutil,
29 flagutil,
30 nodemap,
30 nodemap,
31 sidedata as sidedatamod,
31 sidedata as sidedatamod,
32 )
32 )
33 from . import actions as upgrade_actions
33 from . import actions as upgrade_actions
34
34
35
35
36 def get_sidedata_helpers(srcrepo, dstrepo):
36 def get_sidedata_helpers(srcrepo, dstrepo):
37 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
37 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
38 sequential = pycompat.iswindows or not use_w
38 sequential = pycompat.iswindows or not use_w
39 if not sequential:
39 if not sequential:
40 srcrepo.register_sidedata_computer(
40 srcrepo.register_sidedata_computer(
41 revlogconst.KIND_CHANGELOG,
41 revlogconst.KIND_CHANGELOG,
42 sidedatamod.SD_FILES,
42 sidedatamod.SD_FILES,
43 (sidedatamod.SD_FILES,),
43 (sidedatamod.SD_FILES,),
44 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
44 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
45 flagutil.REVIDX_HASCOPIESINFO,
45 flagutil.REVIDX_HASCOPIESINFO,
46 replace=True,
46 replace=True,
47 )
47 )
48 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
48 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
49
49
50
50
51 def _revlogfrompath(repo, rl_type, path):
51 def _revlogfrompath(repo, rl_type, path):
52 """Obtain a revlog from a repo path.
52 """Obtain a revlog from a repo path.
53
53
54 An instance of the appropriate class is returned.
54 An instance of the appropriate class is returned.
55 """
55 """
56 if rl_type & store.FILEFLAGS_CHANGELOG:
56 if rl_type & store.FILEFLAGS_CHANGELOG:
57 return changelog.changelog(repo.svfs)
57 return changelog.changelog(repo.svfs)
58 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
58 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
59 mandir = b''
59 mandir = b''
60 if b'/' in path:
60 if b'/' in path:
61 mandir = path.rsplit(b'/', 1)[0]
61 mandir = path.rsplit(b'/', 1)[0]
62 return manifest.manifestrevlog(
62 return manifest.manifestrevlog(
63 repo.nodeconstants, repo.svfs, tree=mandir
63 repo.nodeconstants, repo.svfs, tree=mandir
64 )
64 )
65 else:
65 else:
66 # drop the extension and the `data/` prefix
66 # drop the extension and the `data/` prefix
67 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
67 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
68 if len(path_part) < 2:
68 if len(path_part) < 2:
69 msg = _(b'cannot recognize revlog from filename: %s')
69 msg = _(b'cannot recognize revlog from filename: %s')
70 msg %= path
70 msg %= path
71 raise error.Abort(msg)
71 raise error.Abort(msg)
72 path = path_part[1]
72 path = path_part[1]
73 return filelog.filelog(repo.svfs, path)
73 return filelog.filelog(repo.svfs, path)
74
74
75
75
76 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
76 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
77 """copy all relevant files for `oldrl` into `destrepo` store
77 """copy all relevant files for `oldrl` into `destrepo` store
78
78
79 Files are copied "as is" without any transformation. The copy is performed
79 Files are copied "as is" without any transformation. The copy is performed
80 without extra checks. Callers are responsible for making sure the copied
80 without extra checks. Callers are responsible for making sure the copied
81 content is compatible with format of the destination repository.
81 content is compatible with format of the destination repository.
82 """
82 """
83 oldrl = getattr(oldrl, '_revlog', oldrl)
83 oldrl = getattr(oldrl, '_revlog', oldrl)
84 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
84 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
85 newrl = getattr(newrl, '_revlog', newrl)
85 newrl = getattr(newrl, '_revlog', newrl)
86
86
87 oldvfs = oldrl.opener
87 oldvfs = oldrl.opener
88 newvfs = newrl.opener
88 newvfs = newrl.opener
89 oldindex = oldvfs.join(oldrl._indexfile)
89 oldindex = oldvfs.join(oldrl._indexfile)
90 newindex = newvfs.join(newrl._indexfile)
90 newindex = newvfs.join(newrl._indexfile)
91 olddata = oldvfs.join(oldrl._datafile)
91 olddata = oldvfs.join(oldrl._datafile)
92 newdata = newvfs.join(newrl._datafile)
92 newdata = newvfs.join(newrl._datafile)
93
93
94 with newvfs(newrl._indexfile, b'w'):
94 with newvfs(newrl._indexfile, b'w'):
95 pass # create all the directories
95 pass # create all the directories
96
96
97 util.copyfile(oldindex, newindex)
97 util.copyfile(oldindex, newindex)
98 copydata = oldrl.opener.exists(oldrl._datafile)
98 copydata = oldrl.opener.exists(oldrl._datafile)
99 if copydata:
99 if copydata:
100 util.copyfile(olddata, newdata)
100 util.copyfile(olddata, newdata)
101
101
102 if rl_type & store.FILEFLAGS_FILELOG:
102 if rl_type & store.FILEFLAGS_FILELOG:
103 destrepo.svfs.fncache.add(unencodedname)
103 destrepo.svfs.fncache.add(unencodedname)
104 if copydata:
104 if copydata:
105 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
105 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
106
106
107
107
108 UPGRADE_CHANGELOG = b"changelog"
108 UPGRADE_CHANGELOG = b"changelog"
109 UPGRADE_MANIFEST = b"manifest"
109 UPGRADE_MANIFEST = b"manifest"
110 UPGRADE_FILELOGS = b"all-filelogs"
110 UPGRADE_FILELOGS = b"all-filelogs"
111
111
112 UPGRADE_ALL_REVLOGS = frozenset(
112 UPGRADE_ALL_REVLOGS = frozenset(
113 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
113 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
114 )
114 )
115
115
116
116
117 def matchrevlog(revlogfilter, rl_type):
117 def matchrevlog(revlogfilter, rl_type):
118 """check if a revlog is selected for cloning.
118 """check if a revlog is selected for cloning.
119
119
120 In other words, are there any updates which need to be done on revlog
120 In other words, are there any updates which need to be done on revlog
121 or it can be blindly copied.
121 or it can be blindly copied.
122
122
123 The store entry is checked against the passed filter"""
123 The store entry is checked against the passed filter"""
124 if rl_type & store.FILEFLAGS_CHANGELOG:
124 if rl_type & store.FILEFLAGS_CHANGELOG:
125 return UPGRADE_CHANGELOG in revlogfilter
125 return UPGRADE_CHANGELOG in revlogfilter
126 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
126 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
127 return UPGRADE_MANIFEST in revlogfilter
127 return UPGRADE_MANIFEST in revlogfilter
128 assert rl_type & store.FILEFLAGS_FILELOG
128 assert rl_type & store.FILEFLAGS_FILELOG
129 return UPGRADE_FILELOGS in revlogfilter
129 return UPGRADE_FILELOGS in revlogfilter
130
130
131
131
132 def _perform_clone(
132 def _perform_clone(
133 ui,
133 ui,
134 dstrepo,
134 dstrepo,
135 tr,
135 tr,
136 old_revlog,
136 old_revlog,
137 rl_type,
137 rl_type,
138 unencoded,
138 unencoded,
139 upgrade_op,
139 upgrade_op,
140 sidedata_helpers,
140 sidedata_helpers,
141 oncopiedrevision,
141 oncopiedrevision,
142 ):
142 ):
143 """returns the new revlog object created"""
143 """returns the new revlog object created"""
144 newrl = None
144 newrl = None
145 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
145 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
146 ui.note(
146 ui.note(
147 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
147 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
148 )
148 )
149 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
149 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
150 old_revlog.clone(
150 old_revlog.clone(
151 tr,
151 tr,
152 newrl,
152 newrl,
153 addrevisioncb=oncopiedrevision,
153 addrevisioncb=oncopiedrevision,
154 deltareuse=upgrade_op.delta_reuse_mode,
154 deltareuse=upgrade_op.delta_reuse_mode,
155 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
155 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
156 sidedata_helpers=sidedata_helpers,
156 sidedata_helpers=sidedata_helpers,
157 )
157 )
158 else:
158 else:
159 msg = _(b'blindly copying %s containing %i revisions\n')
159 msg = _(b'blindly copying %s containing %i revisions\n')
160 ui.note(msg % (unencoded, len(old_revlog)))
160 ui.note(msg % (unencoded, len(old_revlog)))
161 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
161 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
162
162
163 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
163 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
164 return newrl
164 return newrl
165
165
166
166
167 def _clonerevlogs(
167 def _clonerevlogs(
168 ui,
168 ui,
169 srcrepo,
169 srcrepo,
170 dstrepo,
170 dstrepo,
171 tr,
171 tr,
172 upgrade_op,
172 upgrade_op,
173 ):
173 ):
174 """Copy revlogs between 2 repos."""
174 """Copy revlogs between 2 repos."""
175 revcount = 0
175 revcount = 0
176 srcsize = 0
176 srcsize = 0
177 srcrawsize = 0
177 srcrawsize = 0
178 dstsize = 0
178 dstsize = 0
179 fcount = 0
179 fcount = 0
180 frevcount = 0
180 frevcount = 0
181 fsrcsize = 0
181 fsrcsize = 0
182 frawsize = 0
182 frawsize = 0
183 fdstsize = 0
183 fdstsize = 0
184 mcount = 0
184 mcount = 0
185 mrevcount = 0
185 mrevcount = 0
186 msrcsize = 0
186 msrcsize = 0
187 mrawsize = 0
187 mrawsize = 0
188 mdstsize = 0
188 mdstsize = 0
189 crevcount = 0
189 crevcount = 0
190 csrcsize = 0
190 csrcsize = 0
191 crawsize = 0
191 crawsize = 0
192 cdstsize = 0
192 cdstsize = 0
193
193
194 alldatafiles = list(srcrepo.store.walk())
194 alldatafiles = list(srcrepo.store.walk())
195 # mapping of data files which needs to be cloned
195 # mapping of data files which needs to be cloned
196 # key is unencoded filename
196 # key is unencoded filename
197 # value is revlog_object_from_srcrepo
197 # value is revlog_object_from_srcrepo
198 manifests = {}
198 manifests = {}
199 changelogs = {}
199 changelogs = {}
200 filelogs = {}
200 filelogs = {}
201
201
202 # Perform a pass to collect metadata. This validates we can open all
202 # Perform a pass to collect metadata. This validates we can open all
203 # source files and allows a unified progress bar to be displayed.
203 # source files and allows a unified progress bar to be displayed.
204 for rl_type, unencoded, encoded, size in alldatafiles:
204 for rl_type, unencoded, size in alldatafiles:
205 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
205 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
206 continue
206 continue
207
207
208 # the store.walk function will wrongly pickup transaction backup and
208 # the store.walk function will wrongly pickup transaction backup and
209 # get confused. As a quick fix for 5.9 release, we ignore those.
209 # get confused. As a quick fix for 5.9 release, we ignore those.
210 # (this is not a module constants because it seems better to keep the
210 # (this is not a module constants because it seems better to keep the
211 # hack together)
211 # hack together)
212 skip_undo = (
212 skip_undo = (
213 b'undo.backup.00changelog.i',
213 b'undo.backup.00changelog.i',
214 b'undo.backup.00manifest.i',
214 b'undo.backup.00manifest.i',
215 )
215 )
216 if unencoded in skip_undo:
216 if unencoded in skip_undo:
217 continue
217 continue
218
218
219 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
219 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
220
220
221 info = rl.storageinfo(
221 info = rl.storageinfo(
222 exclusivefiles=True,
222 exclusivefiles=True,
223 revisionscount=True,
223 revisionscount=True,
224 trackedsize=True,
224 trackedsize=True,
225 storedsize=True,
225 storedsize=True,
226 )
226 )
227
227
228 revcount += info[b'revisionscount'] or 0
228 revcount += info[b'revisionscount'] or 0
229 datasize = info[b'storedsize'] or 0
229 datasize = info[b'storedsize'] or 0
230 rawsize = info[b'trackedsize'] or 0
230 rawsize = info[b'trackedsize'] or 0
231
231
232 srcsize += datasize
232 srcsize += datasize
233 srcrawsize += rawsize
233 srcrawsize += rawsize
234
234
235 # This is for the separate progress bars.
235 # This is for the separate progress bars.
236 if rl_type & store.FILEFLAGS_CHANGELOG:
236 if rl_type & store.FILEFLAGS_CHANGELOG:
237 changelogs[unencoded] = (rl_type, rl)
237 changelogs[unencoded] = (rl_type, rl)
238 crevcount += len(rl)
238 crevcount += len(rl)
239 csrcsize += datasize
239 csrcsize += datasize
240 crawsize += rawsize
240 crawsize += rawsize
241 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
241 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
242 manifests[unencoded] = (rl_type, rl)
242 manifests[unencoded] = (rl_type, rl)
243 mcount += 1
243 mcount += 1
244 mrevcount += len(rl)
244 mrevcount += len(rl)
245 msrcsize += datasize
245 msrcsize += datasize
246 mrawsize += rawsize
246 mrawsize += rawsize
247 elif rl_type & store.FILEFLAGS_FILELOG:
247 elif rl_type & store.FILEFLAGS_FILELOG:
248 filelogs[unencoded] = (rl_type, rl)
248 filelogs[unencoded] = (rl_type, rl)
249 fcount += 1
249 fcount += 1
250 frevcount += len(rl)
250 frevcount += len(rl)
251 fsrcsize += datasize
251 fsrcsize += datasize
252 frawsize += rawsize
252 frawsize += rawsize
253 else:
253 else:
254 error.ProgrammingError(b'unknown revlog type')
254 error.ProgrammingError(b'unknown revlog type')
255
255
256 if not revcount:
256 if not revcount:
257 return
257 return
258
258
259 ui.status(
259 ui.status(
260 _(
260 _(
261 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
261 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
262 b'%d in changelog)\n'
262 b'%d in changelog)\n'
263 )
263 )
264 % (revcount, frevcount, mrevcount, crevcount)
264 % (revcount, frevcount, mrevcount, crevcount)
265 )
265 )
266 ui.status(
266 ui.status(
267 _(b'migrating %s in store; %s tracked data\n')
267 _(b'migrating %s in store; %s tracked data\n')
268 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
268 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
269 )
269 )
270
270
271 # Used to keep track of progress.
271 # Used to keep track of progress.
272 progress = None
272 progress = None
273
273
274 def oncopiedrevision(rl, rev, node):
274 def oncopiedrevision(rl, rev, node):
275 progress.increment()
275 progress.increment()
276
276
277 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
277 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
278
278
279 # Migrating filelogs
279 # Migrating filelogs
280 ui.status(
280 ui.status(
281 _(
281 _(
282 b'migrating %d filelogs containing %d revisions '
282 b'migrating %d filelogs containing %d revisions '
283 b'(%s in store; %s tracked data)\n'
283 b'(%s in store; %s tracked data)\n'
284 )
284 )
285 % (
285 % (
286 fcount,
286 fcount,
287 frevcount,
287 frevcount,
288 util.bytecount(fsrcsize),
288 util.bytecount(fsrcsize),
289 util.bytecount(frawsize),
289 util.bytecount(frawsize),
290 )
290 )
291 )
291 )
292 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
292 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
293 for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
293 for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
294 newrl = _perform_clone(
294 newrl = _perform_clone(
295 ui,
295 ui,
296 dstrepo,
296 dstrepo,
297 tr,
297 tr,
298 oldrl,
298 oldrl,
299 rl_type,
299 rl_type,
300 unencoded,
300 unencoded,
301 upgrade_op,
301 upgrade_op,
302 sidedata_helpers,
302 sidedata_helpers,
303 oncopiedrevision,
303 oncopiedrevision,
304 )
304 )
305 info = newrl.storageinfo(storedsize=True)
305 info = newrl.storageinfo(storedsize=True)
306 fdstsize += info[b'storedsize'] or 0
306 fdstsize += info[b'storedsize'] or 0
307 ui.status(
307 ui.status(
308 _(
308 _(
309 b'finished migrating %d filelog revisions across %d '
309 b'finished migrating %d filelog revisions across %d '
310 b'filelogs; change in size: %s\n'
310 b'filelogs; change in size: %s\n'
311 )
311 )
312 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
312 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
313 )
313 )
314
314
315 # Migrating manifests
315 # Migrating manifests
316 ui.status(
316 ui.status(
317 _(
317 _(
318 b'migrating %d manifests containing %d revisions '
318 b'migrating %d manifests containing %d revisions '
319 b'(%s in store; %s tracked data)\n'
319 b'(%s in store; %s tracked data)\n'
320 )
320 )
321 % (
321 % (
322 mcount,
322 mcount,
323 mrevcount,
323 mrevcount,
324 util.bytecount(msrcsize),
324 util.bytecount(msrcsize),
325 util.bytecount(mrawsize),
325 util.bytecount(mrawsize),
326 )
326 )
327 )
327 )
328 if progress:
328 if progress:
329 progress.complete()
329 progress.complete()
330 progress = srcrepo.ui.makeprogress(
330 progress = srcrepo.ui.makeprogress(
331 _(b'manifest revisions'), total=mrevcount
331 _(b'manifest revisions'), total=mrevcount
332 )
332 )
333 for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
333 for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
334 newrl = _perform_clone(
334 newrl = _perform_clone(
335 ui,
335 ui,
336 dstrepo,
336 dstrepo,
337 tr,
337 tr,
338 oldrl,
338 oldrl,
339 rl_type,
339 rl_type,
340 unencoded,
340 unencoded,
341 upgrade_op,
341 upgrade_op,
342 sidedata_helpers,
342 sidedata_helpers,
343 oncopiedrevision,
343 oncopiedrevision,
344 )
344 )
345 info = newrl.storageinfo(storedsize=True)
345 info = newrl.storageinfo(storedsize=True)
346 mdstsize += info[b'storedsize'] or 0
346 mdstsize += info[b'storedsize'] or 0
347 ui.status(
347 ui.status(
348 _(
348 _(
349 b'finished migrating %d manifest revisions across %d '
349 b'finished migrating %d manifest revisions across %d '
350 b'manifests; change in size: %s\n'
350 b'manifests; change in size: %s\n'
351 )
351 )
352 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
352 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
353 )
353 )
354
354
355 # Migrating changelog
355 # Migrating changelog
356 ui.status(
356 ui.status(
357 _(
357 _(
358 b'migrating changelog containing %d revisions '
358 b'migrating changelog containing %d revisions '
359 b'(%s in store; %s tracked data)\n'
359 b'(%s in store; %s tracked data)\n'
360 )
360 )
361 % (
361 % (
362 crevcount,
362 crevcount,
363 util.bytecount(csrcsize),
363 util.bytecount(csrcsize),
364 util.bytecount(crawsize),
364 util.bytecount(crawsize),
365 )
365 )
366 )
366 )
367 if progress:
367 if progress:
368 progress.complete()
368 progress.complete()
369 progress = srcrepo.ui.makeprogress(
369 progress = srcrepo.ui.makeprogress(
370 _(b'changelog revisions'), total=crevcount
370 _(b'changelog revisions'), total=crevcount
371 )
371 )
372 for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
372 for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
373 newrl = _perform_clone(
373 newrl = _perform_clone(
374 ui,
374 ui,
375 dstrepo,
375 dstrepo,
376 tr,
376 tr,
377 oldrl,
377 oldrl,
378 rl_type,
378 rl_type,
379 unencoded,
379 unencoded,
380 upgrade_op,
380 upgrade_op,
381 sidedata_helpers,
381 sidedata_helpers,
382 oncopiedrevision,
382 oncopiedrevision,
383 )
383 )
384 info = newrl.storageinfo(storedsize=True)
384 info = newrl.storageinfo(storedsize=True)
385 cdstsize += info[b'storedsize'] or 0
385 cdstsize += info[b'storedsize'] or 0
386 progress.complete()
386 progress.complete()
387 ui.status(
387 ui.status(
388 _(
388 _(
389 b'finished migrating %d changelog revisions; change in size: '
389 b'finished migrating %d changelog revisions; change in size: '
390 b'%s\n'
390 b'%s\n'
391 )
391 )
392 % (crevcount, util.bytecount(cdstsize - csrcsize))
392 % (crevcount, util.bytecount(cdstsize - csrcsize))
393 )
393 )
394
394
395 dstsize = fdstsize + mdstsize + cdstsize
395 dstsize = fdstsize + mdstsize + cdstsize
396 ui.status(
396 ui.status(
397 _(
397 _(
398 b'finished migrating %d total revisions; total change in store '
398 b'finished migrating %d total revisions; total change in store '
399 b'size: %s\n'
399 b'size: %s\n'
400 )
400 )
401 % (revcount, util.bytecount(dstsize - srcsize))
401 % (revcount, util.bytecount(dstsize - srcsize))
402 )
402 )
403
403
404
404
405 def _files_to_copy_post_revlog_clone(srcrepo):
405 def _files_to_copy_post_revlog_clone(srcrepo):
406 """yields files which should be copied to destination after revlogs
406 """yields files which should be copied to destination after revlogs
407 are cloned"""
407 are cloned"""
408 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
408 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
409 # don't copy revlogs as they are already cloned
409 # don't copy revlogs as they are already cloned
410 if store.revlog_type(path) is not None:
410 if store.revlog_type(path) is not None:
411 continue
411 continue
412 # Skip transaction related files.
412 # Skip transaction related files.
413 if path.startswith(b'undo'):
413 if path.startswith(b'undo'):
414 continue
414 continue
415 # Only copy regular files.
415 # Only copy regular files.
416 if kind != stat.S_IFREG:
416 if kind != stat.S_IFREG:
417 continue
417 continue
418 # Skip other skipped files.
418 # Skip other skipped files.
419 if path in (b'lock', b'fncache'):
419 if path in (b'lock', b'fncache'):
420 continue
420 continue
421 # TODO: should we skip cache too?
421 # TODO: should we skip cache too?
422
422
423 yield path
423 yield path
424
424
425
425
426 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
426 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
427 """Replace the stores after current repository is upgraded
427 """Replace the stores after current repository is upgraded
428
428
429 Creates a backup of current repository store at backup path
429 Creates a backup of current repository store at backup path
430 Replaces upgraded store files in current repo from upgraded one
430 Replaces upgraded store files in current repo from upgraded one
431
431
432 Arguments:
432 Arguments:
433 currentrepo: repo object of current repository
433 currentrepo: repo object of current repository
434 upgradedrepo: repo object of the upgraded data
434 upgradedrepo: repo object of the upgraded data
435 backupvfs: vfs object for the backup path
435 backupvfs: vfs object for the backup path
436 upgrade_op: upgrade operation object
436 upgrade_op: upgrade operation object
437 to be used to decide what all is upgraded
437 to be used to decide what all is upgraded
438 """
438 """
439 # TODO: don't blindly rename everything in store
439 # TODO: don't blindly rename everything in store
440 # There can be upgrades where store is not touched at all
440 # There can be upgrades where store is not touched at all
441 if upgrade_op.backup_store:
441 if upgrade_op.backup_store:
442 util.rename(currentrepo.spath, backupvfs.join(b'store'))
442 util.rename(currentrepo.spath, backupvfs.join(b'store'))
443 else:
443 else:
444 currentrepo.vfs.rmtree(b'store', forcibly=True)
444 currentrepo.vfs.rmtree(b'store', forcibly=True)
445 util.rename(upgradedrepo.spath, currentrepo.spath)
445 util.rename(upgradedrepo.spath, currentrepo.spath)
446
446
447
447
448 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
448 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
449 """Hook point for extensions to perform additional actions during upgrade.
449 """Hook point for extensions to perform additional actions during upgrade.
450
450
451 This function is called after revlogs and store files have been copied but
451 This function is called after revlogs and store files have been copied but
452 before the new store is swapped into the original location.
452 before the new store is swapped into the original location.
453 """
453 """
454
454
455
455
456 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
456 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
457 """Do the low-level work of upgrading a repository.
457 """Do the low-level work of upgrading a repository.
458
458
459 The upgrade is effectively performed as a copy between a source
459 The upgrade is effectively performed as a copy between a source
460 repository and a temporary destination repository.
460 repository and a temporary destination repository.
461
461
462 The source repository is unmodified for as long as possible so the
462 The source repository is unmodified for as long as possible so the
463 upgrade can abort at any time without causing loss of service for
463 upgrade can abort at any time without causing loss of service for
464 readers and without corrupting the source repository.
464 readers and without corrupting the source repository.
465 """
465 """
466 assert srcrepo.currentwlock()
466 assert srcrepo.currentwlock()
467 assert dstrepo.currentwlock()
467 assert dstrepo.currentwlock()
468 backuppath = None
468 backuppath = None
469 backupvfs = None
469 backupvfs = None
470
470
471 ui.status(
471 ui.status(
472 _(
472 _(
473 b'(it is safe to interrupt this process any time before '
473 b'(it is safe to interrupt this process any time before '
474 b'data migration completes)\n'
474 b'data migration completes)\n'
475 )
475 )
476 )
476 )
477
477
478 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
478 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
479 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
479 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
480 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
480 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
481 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
481 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
482
482
483 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
483 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
484 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
484 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
485 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
485 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
486 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
486 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
487
487
488 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
488 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
489 return
489 return
490
490
491 if upgrade_op.requirements_only:
491 if upgrade_op.requirements_only:
492 ui.status(_(b'upgrading repository requirements\n'))
492 ui.status(_(b'upgrading repository requirements\n'))
493 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
493 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
494 # if there is only one action and that is persistent nodemap upgrade
494 # if there is only one action and that is persistent nodemap upgrade
495 # directly write the nodemap file and update requirements instead of going
495 # directly write the nodemap file and update requirements instead of going
496 # through the whole cloning process
496 # through the whole cloning process
497 elif (
497 elif (
498 len(upgrade_op.upgrade_actions) == 1
498 len(upgrade_op.upgrade_actions) == 1
499 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
499 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
500 and not upgrade_op.removed_actions
500 and not upgrade_op.removed_actions
501 ):
501 ):
502 ui.status(
502 ui.status(
503 _(b'upgrading repository to use persistent nodemap feature\n')
503 _(b'upgrading repository to use persistent nodemap feature\n')
504 )
504 )
505 with srcrepo.transaction(b'upgrade') as tr:
505 with srcrepo.transaction(b'upgrade') as tr:
506 unfi = srcrepo.unfiltered()
506 unfi = srcrepo.unfiltered()
507 cl = unfi.changelog
507 cl = unfi.changelog
508 nodemap.persist_nodemap(tr, cl, force=True)
508 nodemap.persist_nodemap(tr, cl, force=True)
509 # we want to directly operate on the underlying revlog to force
509 # we want to directly operate on the underlying revlog to force
510 # create a nodemap file. This is fine since this is upgrade code
510 # create a nodemap file. This is fine since this is upgrade code
511 # and it heavily relies on repository being revlog based
511 # and it heavily relies on repository being revlog based
512 # hence accessing private attributes can be justified
512 # hence accessing private attributes can be justified
513 nodemap.persist_nodemap(
513 nodemap.persist_nodemap(
514 tr, unfi.manifestlog._rootstore._revlog, force=True
514 tr, unfi.manifestlog._rootstore._revlog, force=True
515 )
515 )
516 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
516 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
517 elif (
517 elif (
518 len(upgrade_op.removed_actions) == 1
518 len(upgrade_op.removed_actions) == 1
519 and [
519 and [
520 x
520 x
521 for x in upgrade_op.removed_actions
521 for x in upgrade_op.removed_actions
522 if x.name == b'persistent-nodemap'
522 if x.name == b'persistent-nodemap'
523 ]
523 ]
524 and not upgrade_op.upgrade_actions
524 and not upgrade_op.upgrade_actions
525 ):
525 ):
526 ui.status(
526 ui.status(
527 _(b'downgrading repository to not use persistent nodemap feature\n')
527 _(b'downgrading repository to not use persistent nodemap feature\n')
528 )
528 )
529 with srcrepo.transaction(b'upgrade') as tr:
529 with srcrepo.transaction(b'upgrade') as tr:
530 unfi = srcrepo.unfiltered()
530 unfi = srcrepo.unfiltered()
531 cl = unfi.changelog
531 cl = unfi.changelog
532 nodemap.delete_nodemap(tr, srcrepo, cl)
532 nodemap.delete_nodemap(tr, srcrepo, cl)
533 # check comment 20 lines above for accessing private attributes
533 # check comment 20 lines above for accessing private attributes
534 nodemap.delete_nodemap(
534 nodemap.delete_nodemap(
535 tr, srcrepo, unfi.manifestlog._rootstore._revlog
535 tr, srcrepo, unfi.manifestlog._rootstore._revlog
536 )
536 )
537 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
537 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
538 else:
538 else:
539 with dstrepo.transaction(b'upgrade') as tr:
539 with dstrepo.transaction(b'upgrade') as tr:
540 _clonerevlogs(
540 _clonerevlogs(
541 ui,
541 ui,
542 srcrepo,
542 srcrepo,
543 dstrepo,
543 dstrepo,
544 tr,
544 tr,
545 upgrade_op,
545 upgrade_op,
546 )
546 )
547
547
548 # Now copy other files in the store directory.
548 # Now copy other files in the store directory.
549 for p in _files_to_copy_post_revlog_clone(srcrepo):
549 for p in _files_to_copy_post_revlog_clone(srcrepo):
550 srcrepo.ui.status(_(b'copying %s\n') % p)
550 srcrepo.ui.status(_(b'copying %s\n') % p)
551 src = srcrepo.store.rawvfs.join(p)
551 src = srcrepo.store.rawvfs.join(p)
552 dst = dstrepo.store.rawvfs.join(p)
552 dst = dstrepo.store.rawvfs.join(p)
553 util.copyfile(src, dst, copystat=True)
553 util.copyfile(src, dst, copystat=True)
554
554
555 finishdatamigration(ui, srcrepo, dstrepo, requirements)
555 finishdatamigration(ui, srcrepo, dstrepo, requirements)
556
556
557 ui.status(_(b'data fully upgraded in a temporary repository\n'))
557 ui.status(_(b'data fully upgraded in a temporary repository\n'))
558
558
559 if upgrade_op.backup_store:
559 if upgrade_op.backup_store:
560 backuppath = pycompat.mkdtemp(
560 backuppath = pycompat.mkdtemp(
561 prefix=b'upgradebackup.', dir=srcrepo.path
561 prefix=b'upgradebackup.', dir=srcrepo.path
562 )
562 )
563 backupvfs = vfsmod.vfs(backuppath)
563 backupvfs = vfsmod.vfs(backuppath)
564
564
565 # Make a backup of requires file first, as it is the first to be modified.
565 # Make a backup of requires file first, as it is the first to be modified.
566 util.copyfile(
566 util.copyfile(
567 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
567 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
568 )
568 )
569
569
570 # We install an arbitrary requirement that clients must not support
570 # We install an arbitrary requirement that clients must not support
571 # as a mechanism to lock out new clients during the data swap. This is
571 # as a mechanism to lock out new clients during the data swap. This is
572 # better than allowing a client to continue while the repository is in
572 # better than allowing a client to continue while the repository is in
573 # an inconsistent state.
573 # an inconsistent state.
574 ui.status(
574 ui.status(
575 _(
575 _(
576 b'marking source repository as being upgraded; clients will be '
576 b'marking source repository as being upgraded; clients will be '
577 b'unable to read from repository\n'
577 b'unable to read from repository\n'
578 )
578 )
579 )
579 )
580 scmutil.writereporequirements(
580 scmutil.writereporequirements(
581 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
581 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
582 )
582 )
583
583
584 ui.status(_(b'starting in-place swap of repository data\n'))
584 ui.status(_(b'starting in-place swap of repository data\n'))
585 if upgrade_op.backup_store:
585 if upgrade_op.backup_store:
586 ui.status(
586 ui.status(
587 _(b'replaced files will be backed up at %s\n') % backuppath
587 _(b'replaced files will be backed up at %s\n') % backuppath
588 )
588 )
589
589
590 # Now swap in the new store directory. Doing it as a rename should make
590 # Now swap in the new store directory. Doing it as a rename should make
591 # the operation nearly instantaneous and atomic (at least in well-behaved
591 # the operation nearly instantaneous and atomic (at least in well-behaved
592 # environments).
592 # environments).
593 ui.status(_(b'replacing store...\n'))
593 ui.status(_(b'replacing store...\n'))
594 tstart = util.timer()
594 tstart = util.timer()
595 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
595 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
596 elapsed = util.timer() - tstart
596 elapsed = util.timer() - tstart
597 ui.status(
597 ui.status(
598 _(
598 _(
599 b'store replacement complete; repository was inconsistent for '
599 b'store replacement complete; repository was inconsistent for '
600 b'%0.1fs\n'
600 b'%0.1fs\n'
601 )
601 )
602 % elapsed
602 % elapsed
603 )
603 )
604
604
605 # We first write the requirements file. Any new requirements will lock
605 # We first write the requirements file. Any new requirements will lock
606 # out legacy clients.
606 # out legacy clients.
607 ui.status(
607 ui.status(
608 _(
608 _(
609 b'finalizing requirements file and making repository readable '
609 b'finalizing requirements file and making repository readable '
610 b'again\n'
610 b'again\n'
611 )
611 )
612 )
612 )
613 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
613 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
614
614
615 if upgrade_op.backup_store:
615 if upgrade_op.backup_store:
616 # The lock file from the old store won't be removed because nothing has a
616 # The lock file from the old store won't be removed because nothing has a
617 # reference to its new location. So clean it up manually. Alternatively, we
617 # reference to its new location. So clean it up manually. Alternatively, we
618 # could update srcrepo.svfs and other variables to point to the new
618 # could update srcrepo.svfs and other variables to point to the new
619 # location. This is simpler.
619 # location. This is simpler.
620 assert backupvfs is not None # help pytype
620 assert backupvfs is not None # help pytype
621 backupvfs.unlink(b'store/lock')
621 backupvfs.unlink(b'store/lock')
622
622
623 return backuppath
623 return backuppath
624
624
625
625
626 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
626 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
627 if upgrade_op.backup_store:
627 if upgrade_op.backup_store:
628 backuppath = pycompat.mkdtemp(
628 backuppath = pycompat.mkdtemp(
629 prefix=b'upgradebackup.', dir=srcrepo.path
629 prefix=b'upgradebackup.', dir=srcrepo.path
630 )
630 )
631 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
631 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
632 backupvfs = vfsmod.vfs(backuppath)
632 backupvfs = vfsmod.vfs(backuppath)
633 util.copyfile(
633 util.copyfile(
634 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
634 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
635 )
635 )
636 util.copyfile(
636 util.copyfile(
637 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
637 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
638 )
638 )
639
639
640 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
640 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
641 srcrepo.dirstate._map._use_dirstate_tree = True
641 srcrepo.dirstate._map._use_dirstate_tree = True
642 srcrepo.dirstate._map.preload()
642 srcrepo.dirstate._map.preload()
643 srcrepo.dirstate._use_dirstate_v2 = new == b'v2'
643 srcrepo.dirstate._use_dirstate_v2 = new == b'v2'
644 srcrepo.dirstate._map._use_dirstate_v2 = srcrepo.dirstate._use_dirstate_v2
644 srcrepo.dirstate._map._use_dirstate_v2 = srcrepo.dirstate._use_dirstate_v2
645 srcrepo.dirstate._dirty = True
645 srcrepo.dirstate._dirty = True
646 srcrepo.vfs.unlink(b'dirstate')
646 srcrepo.vfs.unlink(b'dirstate')
647 srcrepo.dirstate.write(None)
647 srcrepo.dirstate.write(None)
648
648
649 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
649 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
@@ -1,586 +1,588 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49 WARN_UNKNOWN_COPY_SOURCE = _(
49 WARN_UNKNOWN_COPY_SOURCE = _(
50 b"warning: copy source of '%s' not in parents of %s"
50 b"warning: copy source of '%s' not in parents of %s"
51 )
51 )
52
52
53 WARN_NULLID_COPY_SOURCE = _(
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
55 )
56
56
57
57
58 class verifier(object):
58 class verifier(object):
59 def __init__(self, repo, level=None):
59 def __init__(self, repo, level=None):
60 self.repo = repo.unfiltered()
60 self.repo = repo.unfiltered()
61 self.ui = repo.ui
61 self.ui = repo.ui
62 self.match = repo.narrowmatch()
62 self.match = repo.narrowmatch()
63 if level is None:
63 if level is None:
64 level = VERIFY_DEFAULT
64 level = VERIFY_DEFAULT
65 self._level = level
65 self._level = level
66 self.badrevs = set()
66 self.badrevs = set()
67 self.errors = 0
67 self.errors = 0
68 self.warnings = 0
68 self.warnings = 0
69 self.havecl = len(repo.changelog) > 0
69 self.havecl = len(repo.changelog) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.refersmf = False
73 self.refersmf = False
74 self.fncachewarned = False
74 self.fncachewarned = False
75 # developer config: verify.skipflags
75 # developer config: verify.skipflags
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.warnorphanstorefiles = True
77 self.warnorphanstorefiles = True
78
78
79 def _warn(self, msg):
79 def _warn(self, msg):
80 """record a "warning" level issue"""
80 """record a "warning" level issue"""
81 self.ui.warn(msg + b"\n")
81 self.ui.warn(msg + b"\n")
82 self.warnings += 1
82 self.warnings += 1
83
83
84 def _err(self, linkrev, msg, filename=None):
84 def _err(self, linkrev, msg, filename=None):
85 """record a "error" level issue"""
85 """record a "error" level issue"""
86 if linkrev is not None:
86 if linkrev is not None:
87 self.badrevs.add(linkrev)
87 self.badrevs.add(linkrev)
88 linkrev = b"%d" % linkrev
88 linkrev = b"%d" % linkrev
89 else:
89 else:
90 linkrev = b'?'
90 linkrev = b'?'
91 msg = b"%s: %s" % (linkrev, msg)
91 msg = b"%s: %s" % (linkrev, msg)
92 if filename:
92 if filename:
93 msg = b"%s@%s" % (filename, msg)
93 msg = b"%s@%s" % (filename, msg)
94 self.ui.warn(b" " + msg + b"\n")
94 self.ui.warn(b" " + msg + b"\n")
95 self.errors += 1
95 self.errors += 1
96
96
97 def _exc(self, linkrev, msg, inst, filename=None):
97 def _exc(self, linkrev, msg, inst, filename=None):
98 """record exception raised during the verify process"""
98 """record exception raised during the verify process"""
99 fmsg = stringutil.forcebytestr(inst)
99 fmsg = stringutil.forcebytestr(inst)
100 if not fmsg:
100 if not fmsg:
101 fmsg = pycompat.byterepr(inst)
101 fmsg = pycompat.byterepr(inst)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103
103
104 def _checkrevlog(self, obj, name, linkrev):
104 def _checkrevlog(self, obj, name, linkrev):
105 """verify high level property of a revlog
105 """verify high level property of a revlog
106
106
107 - revlog is present,
107 - revlog is present,
108 - revlog is non-empty,
108 - revlog is non-empty,
109 - sizes (index and data) are correct,
109 - sizes (index and data) are correct,
110 - revlog's format version is correct.
110 - revlog's format version is correct.
111 """
111 """
112 if not len(obj) and (self.havecl or self.havemf):
112 if not len(obj) and (self.havecl or self.havemf):
113 self._err(linkrev, _(b"empty or missing %s") % name)
113 self._err(linkrev, _(b"empty or missing %s") % name)
114 return
114 return
115
115
116 d = obj.checksize()
116 d = obj.checksize()
117 if d[0]:
117 if d[0]:
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 if d[1]:
119 if d[1]:
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121
121
122 if obj._format_version != revlog.REVLOGV0:
122 if obj._format_version != revlog.REVLOGV0:
123 if not self.revlogv1:
123 if not self.revlogv1:
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 elif self.revlogv1:
125 elif self.revlogv1:
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127
127
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 """verify a single revlog entry
129 """verify a single revlog entry
130
130
131 arguments are:
131 arguments are:
132 - obj: the source revlog
132 - obj: the source revlog
133 - i: the revision number
133 - i: the revision number
134 - node: the revision node id
134 - node: the revision node id
135 - seen: nodes previously seen for this revlog
135 - seen: nodes previously seen for this revlog
136 - linkrevs: [changelog-revisions] introducing "node"
136 - linkrevs: [changelog-revisions] introducing "node"
137 - f: string label ("changelog", "manifest", or filename)
137 - f: string label ("changelog", "manifest", or filename)
138
138
139 Performs the following checks:
139 Performs the following checks:
140 - linkrev points to an existing changelog revision,
140 - linkrev points to an existing changelog revision,
141 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to the lowest of these changesets,
142 - linkrev points to the lowest of these changesets,
143 - both parents exist in the revlog,
143 - both parents exist in the revlog,
144 - the revision is not duplicated.
144 - the revision is not duplicated.
145
145
146 Return the linkrev of the revision (or None for changelog's revisions).
146 Return the linkrev of the revision (or None for changelog's revisions).
147 """
147 """
148 lr = obj.linkrev(obj.rev(node))
148 lr = obj.linkrev(obj.rev(node))
149 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or lr >= len(self.repo.changelog):
150 if lr < 0 or lr >= len(self.repo.changelog):
151 msg = _(b"rev %d points to nonexistent changeset %d")
151 msg = _(b"rev %d points to nonexistent changeset %d")
152 else:
152 else:
153 msg = _(b"rev %d points to unexpected changeset %d")
153 msg = _(b"rev %d points to unexpected changeset %d")
154 self._err(None, msg % (i, lr), f)
154 self._err(None, msg % (i, lr), f)
155 if linkrevs:
155 if linkrevs:
156 if f and len(linkrevs) > 1:
156 if f and len(linkrevs) > 1:
157 try:
157 try:
158 # attempt to filter down to real linkrevs
158 # attempt to filter down to real linkrevs
159 linkrevs = []
159 linkrevs = []
160 for lr in linkrevs:
160 for lr in linkrevs:
161 if self.lrugetctx(lr)[f].filenode() == node:
161 if self.lrugetctx(lr)[f].filenode() == node:
162 linkrevs.append(lr)
162 linkrevs.append(lr)
163 except Exception:
163 except Exception:
164 pass
164 pass
165 msg = _(b" (expected %s)")
165 msg = _(b" (expected %s)")
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 self._warn(msg)
167 self._warn(msg)
168 lr = None # can't be trusted
168 lr = None # can't be trusted
169
169
170 try:
170 try:
171 p1, p2 = obj.parents(node)
171 p1, p2 = obj.parents(node)
172 if p1 not in seen and p1 != self.repo.nullid:
172 if p1 not in seen and p1 != self.repo.nullid:
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 self._err(lr, msg, f)
174 self._err(lr, msg, f)
175 if p2 not in seen and p2 != self.repo.nullid:
175 if p2 not in seen and p2 != self.repo.nullid:
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 self._err(lr, msg, f)
177 self._err(lr, msg, f)
178 except Exception as inst:
178 except Exception as inst:
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180
180
181 if node in seen:
181 if node in seen:
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 seen[node] = i
183 seen[node] = i
184 return lr
184 return lr
185
185
186 def verify(self):
186 def verify(self):
187 """verify the content of the Mercurial repository
187 """verify the content of the Mercurial repository
188
188
189 This method run all verifications, displaying issues as they are found.
189 This method run all verifications, displaying issues as they are found.
190
190
191 return 1 if any error have been encountered, 0 otherwise."""
191 return 1 if any error have been encountered, 0 otherwise."""
192 # initial validation and generic report
192 # initial validation and generic report
193 repo = self.repo
193 repo = self.repo
194 ui = repo.ui
194 ui = repo.ui
195 if not repo.url().startswith(b'file:'):
195 if not repo.url().startswith(b'file:'):
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197
197
198 if os.path.exists(repo.sjoin(b"journal")):
198 if os.path.exists(repo.sjoin(b"journal")):
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200
200
201 if ui.verbose or not self.revlogv1:
201 if ui.verbose or not self.revlogv1:
202 ui.status(
202 ui.status(
203 _(b"repository uses revlog format %d\n")
203 _(b"repository uses revlog format %d\n")
204 % (self.revlogv1 and 1 or 0)
204 % (self.revlogv1 and 1 or 0)
205 )
205 )
206
206
207 # data verification
207 # data verification
208 mflinkrevs, filelinkrevs = self._verifychangelog()
208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 filenodes = self._verifymanifest(mflinkrevs)
209 filenodes = self._verifymanifest(mflinkrevs)
210 del mflinkrevs
210 del mflinkrevs
211 self._crosscheckfiles(filelinkrevs, filenodes)
211 self._crosscheckfiles(filelinkrevs, filenodes)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213
213
214 # final report
214 # final report
215 ui.status(
215 ui.status(
216 _(b"checked %d changesets with %d changes to %d files\n")
216 _(b"checked %d changesets with %d changes to %d files\n")
217 % (len(repo.changelog), filerevisions, totalfiles)
217 % (len(repo.changelog), filerevisions, totalfiles)
218 )
218 )
219 if self.warnings:
219 if self.warnings:
220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
221 if self.fncachewarned:
221 if self.fncachewarned:
222 ui.warn(HINT_FNCACHE)
222 ui.warn(HINT_FNCACHE)
223 if self.errors:
223 if self.errors:
224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
225 if self.badrevs:
225 if self.badrevs:
226 msg = _(b"(first damaged changeset appears to be %d)\n")
226 msg = _(b"(first damaged changeset appears to be %d)\n")
227 msg %= min(self.badrevs)
227 msg %= min(self.badrevs)
228 ui.warn(msg)
228 ui.warn(msg)
229 return 1
229 return 1
230 return 0
230 return 0
231
231
232 def _verifychangelog(self):
232 def _verifychangelog(self):
233 """verify the changelog of a repository
233 """verify the changelog of a repository
234
234
235 The following checks are performed:
235 The following checks are performed:
236 - all of `_checkrevlog` checks,
236 - all of `_checkrevlog` checks,
237 - all of `_checkentry` checks (for each revisions),
237 - all of `_checkentry` checks (for each revisions),
238 - each revision can be read.
238 - each revision can be read.
239
239
240 The function returns some of the data observed in the changesets as a
240 The function returns some of the data observed in the changesets as a
241 (mflinkrevs, filelinkrevs) tuples:
241 (mflinkrevs, filelinkrevs) tuples:
242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
244
244
245 If a matcher was specified, filelinkrevs will only contains matched
245 If a matcher was specified, filelinkrevs will only contains matched
246 files.
246 files.
247 """
247 """
248 ui = self.ui
248 ui = self.ui
249 repo = self.repo
249 repo = self.repo
250 match = self.match
250 match = self.match
251 cl = repo.changelog
251 cl = repo.changelog
252
252
253 ui.status(_(b"checking changesets\n"))
253 ui.status(_(b"checking changesets\n"))
254 mflinkrevs = {}
254 mflinkrevs = {}
255 filelinkrevs = {}
255 filelinkrevs = {}
256 seen = {}
256 seen = {}
257 self._checkrevlog(cl, b"changelog", 0)
257 self._checkrevlog(cl, b"changelog", 0)
258 progress = ui.makeprogress(
258 progress = ui.makeprogress(
259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
260 )
260 )
261 for i in repo:
261 for i in repo:
262 progress.update(i)
262 progress.update(i)
263 n = cl.node(i)
263 n = cl.node(i)
264 self._checkentry(cl, i, n, seen, [i], b"changelog")
264 self._checkentry(cl, i, n, seen, [i], b"changelog")
265
265
266 try:
266 try:
267 changes = cl.read(n)
267 changes = cl.read(n)
268 if changes[0] != self.repo.nullid:
268 if changes[0] != self.repo.nullid:
269 mflinkrevs.setdefault(changes[0], []).append(i)
269 mflinkrevs.setdefault(changes[0], []).append(i)
270 self.refersmf = True
270 self.refersmf = True
271 for f in changes[3]:
271 for f in changes[3]:
272 if match(f):
272 if match(f):
273 filelinkrevs.setdefault(_normpath(f), []).append(i)
273 filelinkrevs.setdefault(_normpath(f), []).append(i)
274 except Exception as inst:
274 except Exception as inst:
275 self.refersmf = True
275 self.refersmf = True
276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
277 progress.complete()
277 progress.complete()
278 return mflinkrevs, filelinkrevs
278 return mflinkrevs, filelinkrevs
279
279
280 def _verifymanifest(
280 def _verifymanifest(
281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
282 ):
282 ):
283 """verify the manifestlog content
283 """verify the manifestlog content
284
284
285 Inputs:
285 Inputs:
286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
287 - dir: a subdirectory to check (for tree manifest repo)
287 - dir: a subdirectory to check (for tree manifest repo)
288 - storefiles: set of currently "orphan" files.
288 - storefiles: set of currently "orphan" files.
289 - subdirprogress: a progress object
289 - subdirprogress: a progress object
290
290
291 This function checks:
291 This function checks:
292 * all of `_checkrevlog` checks (for all manifest related revlogs)
292 * all of `_checkrevlog` checks (for all manifest related revlogs)
293 * all of `_checkentry` checks (for all manifest related revisions)
293 * all of `_checkentry` checks (for all manifest related revisions)
294 * nodes for subdirectory exists in the sub-directory manifest
294 * nodes for subdirectory exists in the sub-directory manifest
295 * each manifest entries have a file path
295 * each manifest entries have a file path
296 * each manifest node refered in mflinkrevs exist in the manifest log
296 * each manifest node refered in mflinkrevs exist in the manifest log
297
297
298 If tree manifest is in use and a matchers is specified, only the
298 If tree manifest is in use and a matchers is specified, only the
299 sub-directories matching it will be verified.
299 sub-directories matching it will be verified.
300
300
301 return a two level mapping:
301 return a two level mapping:
302 {"path" -> { filenode -> changelog-revision}}
302 {"path" -> { filenode -> changelog-revision}}
303
303
304 This mapping primarily contains entries for every files in the
304 This mapping primarily contains entries for every files in the
305 repository. In addition, when tree-manifest is used, it also contains
305 repository. In addition, when tree-manifest is used, it also contains
306 sub-directory entries.
306 sub-directory entries.
307
307
308 If a matcher is provided, only matching paths will be included.
308 If a matcher is provided, only matching paths will be included.
309 """
309 """
310 repo = self.repo
310 repo = self.repo
311 ui = self.ui
311 ui = self.ui
312 match = self.match
312 match = self.match
313 mfl = self.repo.manifestlog
313 mfl = self.repo.manifestlog
314 mf = mfl.getstorage(dir)
314 mf = mfl.getstorage(dir)
315
315
316 if not dir:
316 if not dir:
317 self.ui.status(_(b"checking manifests\n"))
317 self.ui.status(_(b"checking manifests\n"))
318
318
319 filenodes = {}
319 filenodes = {}
320 subdirnodes = {}
320 subdirnodes = {}
321 seen = {}
321 seen = {}
322 label = b"manifest"
322 label = b"manifest"
323 if dir:
323 if dir:
324 label = dir
324 label = dir
325 revlogfiles = mf.files()
325 revlogfiles = mf.files()
326 storefiles.difference_update(revlogfiles)
326 storefiles.difference_update(revlogfiles)
327 if subdirprogress: # should be true since we're in a subdirectory
327 if subdirprogress: # should be true since we're in a subdirectory
328 subdirprogress.increment()
328 subdirprogress.increment()
329 if self.refersmf:
329 if self.refersmf:
330 # Do not check manifest if there are only changelog entries with
330 # Do not check manifest if there are only changelog entries with
331 # null manifests.
331 # null manifests.
332 self._checkrevlog(mf._revlog, label, 0)
332 self._checkrevlog(mf._revlog, label, 0)
333 progress = ui.makeprogress(
333 progress = ui.makeprogress(
334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
335 )
335 )
336 for i in mf:
336 for i in mf:
337 if not dir:
337 if not dir:
338 progress.update(i)
338 progress.update(i)
339 n = mf.node(i)
339 n = mf.node(i)
340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
341 if n in mflinkrevs:
341 if n in mflinkrevs:
342 del mflinkrevs[n]
342 del mflinkrevs[n]
343 elif dir:
343 elif dir:
344 msg = _(b"%s not in parent-directory manifest") % short(n)
344 msg = _(b"%s not in parent-directory manifest") % short(n)
345 self._err(lr, msg, label)
345 self._err(lr, msg, label)
346 else:
346 else:
347 self._err(lr, _(b"%s not in changesets") % short(n), label)
347 self._err(lr, _(b"%s not in changesets") % short(n), label)
348
348
349 try:
349 try:
350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
351 for f, fn, fl in mfdelta.iterentries():
351 for f, fn, fl in mfdelta.iterentries():
352 if not f:
352 if not f:
353 self._err(lr, _(b"entry without name in manifest"))
353 self._err(lr, _(b"entry without name in manifest"))
354 elif f == b"/dev/null": # ignore this in very old repos
354 elif f == b"/dev/null": # ignore this in very old repos
355 continue
355 continue
356 fullpath = dir + _normpath(f)
356 fullpath = dir + _normpath(f)
357 if fl == b't':
357 if fl == b't':
358 if not match.visitdir(fullpath):
358 if not match.visitdir(fullpath):
359 continue
359 continue
360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
361 sdn.setdefault(fn, []).append(lr)
361 sdn.setdefault(fn, []).append(lr)
362 else:
362 else:
363 if not match(fullpath):
363 if not match(fullpath):
364 continue
364 continue
365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
366 except Exception as inst:
366 except Exception as inst:
367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
368 if self._level >= VERIFY_FULL:
368 if self._level >= VERIFY_FULL:
369 try:
369 try:
370 # Various issues can affect manifest. So we read each full
370 # Various issues can affect manifest. So we read each full
371 # text from storage. This triggers the checks from the core
371 # text from storage. This triggers the checks from the core
372 # code (eg: hash verification, filename are ordered, etc.)
372 # code (eg: hash verification, filename are ordered, etc.)
373 mfdelta = mfl.get(dir, n).read()
373 mfdelta = mfl.get(dir, n).read()
374 except Exception as inst:
374 except Exception as inst:
375 msg = _(b"reading full manifest %s") % short(n)
375 msg = _(b"reading full manifest %s") % short(n)
376 self._exc(lr, msg, inst, label)
376 self._exc(lr, msg, inst, label)
377
377
378 if not dir:
378 if not dir:
379 progress.complete()
379 progress.complete()
380
380
381 if self.havemf:
381 if self.havemf:
382 # since we delete entry in `mflinkrevs` during iteration, any
382 # since we delete entry in `mflinkrevs` during iteration, any
383 # remaining entries are "missing". We need to issue errors for them.
383 # remaining entries are "missing". We need to issue errors for them.
384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
385 for c, m in sorted(changesetpairs):
385 for c, m in sorted(changesetpairs):
386 if dir:
386 if dir:
387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
388 else:
388 else:
389 msg = _(b"changeset refers to unknown revision %s")
389 msg = _(b"changeset refers to unknown revision %s")
390 msg %= short(m)
390 msg %= short(m)
391 self._err(c, msg, label)
391 self._err(c, msg, label)
392
392
393 if not dir and subdirnodes:
393 if not dir and subdirnodes:
394 self.ui.status(_(b"checking directory manifests\n"))
394 self.ui.status(_(b"checking directory manifests\n"))
395 storefiles = set()
395 storefiles = set()
396 subdirs = set()
396 subdirs = set()
397 revlogv1 = self.revlogv1
397 revlogv1 = self.revlogv1
398 for t, f, f2, size in repo.store.datafiles():
398 undecodable = []
399 if not f:
399 for t, f, size in repo.store.datafiles(undecodable=undecodable):
400 self._err(None, _(b"cannot decode filename '%s'") % f2)
400 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
402 storefiles.add(_normpath(f))
401 storefiles.add(_normpath(f))
403 subdirs.add(os.path.dirname(f))
402 subdirs.add(os.path.dirname(f))
403 for f in undecodable:
404 self._err(None, _(b"cannot decode filename '%s'") % f)
404 subdirprogress = ui.makeprogress(
405 subdirprogress = ui.makeprogress(
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 )
407 )
407
408
408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 subdirfilenodes = self._verifymanifest(
410 subdirfilenodes = self._verifymanifest(
410 linkrevs, subdir, storefiles, subdirprogress
411 linkrevs, subdir, storefiles, subdirprogress
411 )
412 )
412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 filenodes.setdefault(f, {}).update(onefilenodes)
414 filenodes.setdefault(f, {}).update(onefilenodes)
414
415
415 if not dir and subdirnodes:
416 if not dir and subdirnodes:
416 assert subdirprogress is not None # help pytype
417 assert subdirprogress is not None # help pytype
417 subdirprogress.complete()
418 subdirprogress.complete()
418 if self.warnorphanstorefiles:
419 if self.warnorphanstorefiles:
419 for f in sorted(storefiles):
420 for f in sorted(storefiles):
420 self._warn(_(b"warning: orphan data file '%s'") % f)
421 self._warn(_(b"warning: orphan data file '%s'") % f)
421
422
422 return filenodes
423 return filenodes
423
424
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 repo = self.repo
426 repo = self.repo
426 ui = self.ui
427 ui = self.ui
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428
429
429 total = len(filelinkrevs) + len(filenodes)
430 total = len(filelinkrevs) + len(filenodes)
430 progress = ui.makeprogress(
431 progress = ui.makeprogress(
431 _(b'crosschecking'), unit=_(b'files'), total=total
432 _(b'crosschecking'), unit=_(b'files'), total=total
432 )
433 )
433 if self.havemf:
434 if self.havemf:
434 for f in sorted(filelinkrevs):
435 for f in sorted(filelinkrevs):
435 progress.increment()
436 progress.increment()
436 if f not in filenodes:
437 if f not in filenodes:
437 lr = filelinkrevs[f][0]
438 lr = filelinkrevs[f][0]
438 self._err(lr, _(b"in changeset but not in manifest"), f)
439 self._err(lr, _(b"in changeset but not in manifest"), f)
439
440
440 if self.havecl:
441 if self.havecl:
441 for f in sorted(filenodes):
442 for f in sorted(filenodes):
442 progress.increment()
443 progress.increment()
443 if f not in filelinkrevs:
444 if f not in filelinkrevs:
444 try:
445 try:
445 fl = repo.file(f)
446 fl = repo.file(f)
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 except Exception:
448 except Exception:
448 lr = None
449 lr = None
449 self._err(lr, _(b"in manifest but not in changeset"), f)
450 self._err(lr, _(b"in manifest but not in changeset"), f)
450
451
451 progress.complete()
452 progress.complete()
452
453
453 def _verifyfiles(self, filenodes, filelinkrevs):
454 def _verifyfiles(self, filenodes, filelinkrevs):
454 repo = self.repo
455 repo = self.repo
455 ui = self.ui
456 ui = self.ui
456 lrugetctx = self.lrugetctx
457 lrugetctx = self.lrugetctx
457 revlogv1 = self.revlogv1
458 revlogv1 = self.revlogv1
458 havemf = self.havemf
459 havemf = self.havemf
459 ui.status(_(b"checking files\n"))
460 ui.status(_(b"checking files\n"))
460
461
461 storefiles = set()
462 storefiles = set()
462 for rl_type, f, f2, size in repo.store.datafiles():
463 undecodable = []
463 if not f:
464 for t, f, size in repo.store.datafiles(undecodable=undecodable):
464 self._err(None, _(b"cannot decode filename '%s'") % f2)
465 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
466 storefiles.add(_normpath(f))
466 storefiles.add(_normpath(f))
467 for f in undecodable:
468 self._err(None, _(b"cannot decode filename '%s'") % f)
467
469
468 state = {
470 state = {
469 # TODO this assumes revlog storage for changelog.
471 # TODO this assumes revlog storage for changelog.
470 b'expectedversion': self.repo.changelog._format_version,
472 b'expectedversion': self.repo.changelog._format_version,
471 b'skipflags': self.skipflags,
473 b'skipflags': self.skipflags,
472 # experimental config: censor.policy
474 # experimental config: censor.policy
473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
475 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 }
476 }
475
477
476 files = sorted(set(filenodes) | set(filelinkrevs))
478 files = sorted(set(filenodes) | set(filelinkrevs))
477 revisions = 0
479 revisions = 0
478 progress = ui.makeprogress(
480 progress = ui.makeprogress(
479 _(b'checking'), unit=_(b'files'), total=len(files)
481 _(b'checking'), unit=_(b'files'), total=len(files)
480 )
482 )
481 for i, f in enumerate(files):
483 for i, f in enumerate(files):
482 progress.update(i, item=f)
484 progress.update(i, item=f)
483 try:
485 try:
484 linkrevs = filelinkrevs[f]
486 linkrevs = filelinkrevs[f]
485 except KeyError:
487 except KeyError:
486 # in manifest but not in changelog
488 # in manifest but not in changelog
487 linkrevs = []
489 linkrevs = []
488
490
489 if linkrevs:
491 if linkrevs:
490 lr = linkrevs[0]
492 lr = linkrevs[0]
491 else:
493 else:
492 lr = None
494 lr = None
493
495
494 try:
496 try:
495 fl = repo.file(f)
497 fl = repo.file(f)
496 except error.StorageError as e:
498 except error.StorageError as e:
497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
499 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 continue
500 continue
499
501
500 for ff in fl.files():
502 for ff in fl.files():
501 try:
503 try:
502 storefiles.remove(ff)
504 storefiles.remove(ff)
503 except KeyError:
505 except KeyError:
504 if self.warnorphanstorefiles:
506 if self.warnorphanstorefiles:
505 msg = _(b" warning: revlog '%s' not in fncache!")
507 msg = _(b" warning: revlog '%s' not in fncache!")
506 self._warn(msg % ff)
508 self._warn(msg % ff)
507 self.fncachewarned = True
509 self.fncachewarned = True
508
510
509 if not len(fl) and (self.havecl or self.havemf):
511 if not len(fl) and (self.havecl or self.havemf):
510 self._err(lr, _(b"empty or missing %s") % f)
512 self._err(lr, _(b"empty or missing %s") % f)
511 else:
513 else:
512 # Guard against implementations not setting this.
514 # Guard against implementations not setting this.
513 state[b'skipread'] = set()
515 state[b'skipread'] = set()
514 state[b'safe_renamed'] = set()
516 state[b'safe_renamed'] = set()
515
517
516 for problem in fl.verifyintegrity(state):
518 for problem in fl.verifyintegrity(state):
517 if problem.node is not None:
519 if problem.node is not None:
518 linkrev = fl.linkrev(fl.rev(problem.node))
520 linkrev = fl.linkrev(fl.rev(problem.node))
519 else:
521 else:
520 linkrev = None
522 linkrev = None
521
523
522 if problem.warning:
524 if problem.warning:
523 self._warn(problem.warning)
525 self._warn(problem.warning)
524 elif problem.error:
526 elif problem.error:
525 linkrev_msg = linkrev if linkrev is not None else lr
527 linkrev_msg = linkrev if linkrev is not None else lr
526 self._err(linkrev_msg, problem.error, f)
528 self._err(linkrev_msg, problem.error, f)
527 else:
529 else:
528 raise error.ProgrammingError(
530 raise error.ProgrammingError(
529 b'problem instance does not set warning or error '
531 b'problem instance does not set warning or error '
530 b'attribute: %s' % problem.msg
532 b'attribute: %s' % problem.msg
531 )
533 )
532
534
533 seen = {}
535 seen = {}
534 for i in fl:
536 for i in fl:
535 revisions += 1
537 revisions += 1
536 n = fl.node(i)
538 n = fl.node(i)
537 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
539 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
538 if f in filenodes:
540 if f in filenodes:
539 if havemf and n not in filenodes[f]:
541 if havemf and n not in filenodes[f]:
540 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
542 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
541 else:
543 else:
542 del filenodes[f][n]
544 del filenodes[f][n]
543
545
544 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
546 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
545 continue
547 continue
546
548
547 # check renames
549 # check renames
548 try:
550 try:
549 # This requires resolving fulltext (at least on revlogs,
551 # This requires resolving fulltext (at least on revlogs,
550 # though not with LFS revisions). We may want
552 # though not with LFS revisions). We may want
551 # ``verifyintegrity()`` to pass a set of nodes with
553 # ``verifyintegrity()`` to pass a set of nodes with
552 # rename metadata as an optimization.
554 # rename metadata as an optimization.
553 rp = fl.renamed(n)
555 rp = fl.renamed(n)
554 if rp:
556 if rp:
555 if lr is not None and ui.verbose:
557 if lr is not None and ui.verbose:
556 ctx = lrugetctx(lr)
558 ctx = lrugetctx(lr)
557 if not any(rp[0] in pctx for pctx in ctx.parents()):
559 if not any(rp[0] in pctx for pctx in ctx.parents()):
558 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
560 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
559 fl2 = repo.file(rp[0])
561 fl2 = repo.file(rp[0])
560 if not len(fl2):
562 if not len(fl2):
561 m = _(b"empty or missing copy source revlog %s:%s")
563 m = _(b"empty or missing copy source revlog %s:%s")
562 self._err(lr, m % (rp[0], short(rp[1])), f)
564 self._err(lr, m % (rp[0], short(rp[1])), f)
563 elif rp[1] == self.repo.nullid:
565 elif rp[1] == self.repo.nullid:
564 msg = WARN_NULLID_COPY_SOURCE
566 msg = WARN_NULLID_COPY_SOURCE
565 msg %= (f, lr, rp[0], short(rp[1]))
567 msg %= (f, lr, rp[0], short(rp[1]))
566 ui.note(msg)
568 ui.note(msg)
567 else:
569 else:
568 fl2.rev(rp[1])
570 fl2.rev(rp[1])
569 except Exception as inst:
571 except Exception as inst:
570 self._exc(
572 self._exc(
571 lr, _(b"checking rename of %s") % short(n), inst, f
573 lr, _(b"checking rename of %s") % short(n), inst, f
572 )
574 )
573
575
574 # cross-check
576 # cross-check
575 if f in filenodes:
577 if f in filenodes:
576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
578 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 for lr, node in sorted(fns):
579 for lr, node in sorted(fns):
578 msg = _(b"manifest refers to unknown revision %s")
580 msg = _(b"manifest refers to unknown revision %s")
579 self._err(lr, msg % short(node), f)
581 self._err(lr, msg % short(node), f)
580 progress.complete()
582 progress.complete()
581
583
582 if self.warnorphanstorefiles:
584 if self.warnorphanstorefiles:
583 for f in sorted(storefiles):
585 for f in sorted(storefiles):
584 self._warn(_(b"warning: orphan data file '%s'") % f)
586 self._warn(_(b"warning: orphan data file '%s'") % f)
585
587
586 return len(files), revisions
588 return len(files), revisions
@@ -1,1613 +1,1613 b''
1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
2 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
2 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 #
3 #
4 # This software may be used and distributed according to the terms of the
4 # This software may be used and distributed according to the terms of the
5 # GNU General Public License version 2 or any later version.
5 # GNU General Public License version 2 or any later version.
6
6
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import collections
9 import collections
10 import contextlib
10 import contextlib
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import hex
13 from .node import hex
14 from . import (
14 from . import (
15 discovery,
15 discovery,
16 encoding,
16 encoding,
17 error,
17 error,
18 match as matchmod,
18 match as matchmod,
19 narrowspec,
19 narrowspec,
20 pycompat,
20 pycompat,
21 streamclone,
21 streamclone,
22 templatefilters,
22 templatefilters,
23 util,
23 util,
24 wireprotoframing,
24 wireprotoframing,
25 wireprototypes,
25 wireprototypes,
26 )
26 )
27 from .interfaces import util as interfaceutil
27 from .interfaces import util as interfaceutil
28 from .utils import (
28 from .utils import (
29 cborutil,
29 cborutil,
30 hashutil,
30 hashutil,
31 stringutil,
31 stringutil,
32 )
32 )
33
33
34 FRAMINGTYPE = b'application/mercurial-exp-framing-0006'
34 FRAMINGTYPE = b'application/mercurial-exp-framing-0006'
35
35
36 HTTP_WIREPROTO_V2 = wireprototypes.HTTP_WIREPROTO_V2
36 HTTP_WIREPROTO_V2 = wireprototypes.HTTP_WIREPROTO_V2
37
37
38 COMMANDS = wireprototypes.commanddict()
38 COMMANDS = wireprototypes.commanddict()
39
39
40 # Value inserted into cache key computation function. Change the value to
40 # Value inserted into cache key computation function. Change the value to
41 # force new cache keys for every command request. This should be done when
41 # force new cache keys for every command request. This should be done when
42 # there is a change to how caching works, etc.
42 # there is a change to how caching works, etc.
43 GLOBAL_CACHE_VERSION = 1
43 GLOBAL_CACHE_VERSION = 1
44
44
45
45
46 def handlehttpv2request(rctx, req, res, checkperm, urlparts):
46 def handlehttpv2request(rctx, req, res, checkperm, urlparts):
47 from .hgweb import common as hgwebcommon
47 from .hgweb import common as hgwebcommon
48
48
49 # URL space looks like: <permissions>/<command>, where <permission> can
49 # URL space looks like: <permissions>/<command>, where <permission> can
50 # be ``ro`` or ``rw`` to signal read-only or read-write, respectively.
50 # be ``ro`` or ``rw`` to signal read-only or read-write, respectively.
51
51
52 # Root URL does nothing meaningful... yet.
52 # Root URL does nothing meaningful... yet.
53 if not urlparts:
53 if not urlparts:
54 res.status = b'200 OK'
54 res.status = b'200 OK'
55 res.headers[b'Content-Type'] = b'text/plain'
55 res.headers[b'Content-Type'] = b'text/plain'
56 res.setbodybytes(_(b'HTTP version 2 API handler'))
56 res.setbodybytes(_(b'HTTP version 2 API handler'))
57 return
57 return
58
58
59 if len(urlparts) == 1:
59 if len(urlparts) == 1:
60 res.status = b'404 Not Found'
60 res.status = b'404 Not Found'
61 res.headers[b'Content-Type'] = b'text/plain'
61 res.headers[b'Content-Type'] = b'text/plain'
62 res.setbodybytes(
62 res.setbodybytes(
63 _(b'do not know how to process %s\n') % req.dispatchpath
63 _(b'do not know how to process %s\n') % req.dispatchpath
64 )
64 )
65 return
65 return
66
66
67 permission, command = urlparts[0:2]
67 permission, command = urlparts[0:2]
68
68
69 if permission not in (b'ro', b'rw'):
69 if permission not in (b'ro', b'rw'):
70 res.status = b'404 Not Found'
70 res.status = b'404 Not Found'
71 res.headers[b'Content-Type'] = b'text/plain'
71 res.headers[b'Content-Type'] = b'text/plain'
72 res.setbodybytes(_(b'unknown permission: %s') % permission)
72 res.setbodybytes(_(b'unknown permission: %s') % permission)
73 return
73 return
74
74
75 if req.method != b'POST':
75 if req.method != b'POST':
76 res.status = b'405 Method Not Allowed'
76 res.status = b'405 Method Not Allowed'
77 res.headers[b'Allow'] = b'POST'
77 res.headers[b'Allow'] = b'POST'
78 res.setbodybytes(_(b'commands require POST requests'))
78 res.setbodybytes(_(b'commands require POST requests'))
79 return
79 return
80
80
81 # At some point we'll want to use our own API instead of recycling the
81 # At some point we'll want to use our own API instead of recycling the
82 # behavior of version 1 of the wire protocol...
82 # behavior of version 1 of the wire protocol...
83 # TODO return reasonable responses - not responses that overload the
83 # TODO return reasonable responses - not responses that overload the
84 # HTTP status line message for error reporting.
84 # HTTP status line message for error reporting.
85 try:
85 try:
86 checkperm(rctx, req, b'pull' if permission == b'ro' else b'push')
86 checkperm(rctx, req, b'pull' if permission == b'ro' else b'push')
87 except hgwebcommon.ErrorResponse as e:
87 except hgwebcommon.ErrorResponse as e:
88 res.status = hgwebcommon.statusmessage(
88 res.status = hgwebcommon.statusmessage(
89 e.code, stringutil.forcebytestr(e)
89 e.code, stringutil.forcebytestr(e)
90 )
90 )
91 for k, v in e.headers:
91 for k, v in e.headers:
92 res.headers[k] = v
92 res.headers[k] = v
93 res.setbodybytes(b'permission denied')
93 res.setbodybytes(b'permission denied')
94 return
94 return
95
95
96 # We have a special endpoint to reflect the request back at the client.
96 # We have a special endpoint to reflect the request back at the client.
97 if command == b'debugreflect':
97 if command == b'debugreflect':
98 _processhttpv2reflectrequest(rctx.repo.ui, rctx.repo, req, res)
98 _processhttpv2reflectrequest(rctx.repo.ui, rctx.repo, req, res)
99 return
99 return
100
100
101 # Extra commands that we handle that aren't really wire protocol
101 # Extra commands that we handle that aren't really wire protocol
102 # commands. Think extra hard before making this hackery available to
102 # commands. Think extra hard before making this hackery available to
103 # extension.
103 # extension.
104 extracommands = {b'multirequest'}
104 extracommands = {b'multirequest'}
105
105
106 if command not in COMMANDS and command not in extracommands:
106 if command not in COMMANDS and command not in extracommands:
107 res.status = b'404 Not Found'
107 res.status = b'404 Not Found'
108 res.headers[b'Content-Type'] = b'text/plain'
108 res.headers[b'Content-Type'] = b'text/plain'
109 res.setbodybytes(_(b'unknown wire protocol command: %s\n') % command)
109 res.setbodybytes(_(b'unknown wire protocol command: %s\n') % command)
110 return
110 return
111
111
112 repo = rctx.repo
112 repo = rctx.repo
113 ui = repo.ui
113 ui = repo.ui
114
114
115 proto = httpv2protocolhandler(req, ui)
115 proto = httpv2protocolhandler(req, ui)
116
116
117 if (
117 if (
118 not COMMANDS.commandavailable(command, proto)
118 not COMMANDS.commandavailable(command, proto)
119 and command not in extracommands
119 and command not in extracommands
120 ):
120 ):
121 res.status = b'404 Not Found'
121 res.status = b'404 Not Found'
122 res.headers[b'Content-Type'] = b'text/plain'
122 res.headers[b'Content-Type'] = b'text/plain'
123 res.setbodybytes(_(b'invalid wire protocol command: %s') % command)
123 res.setbodybytes(_(b'invalid wire protocol command: %s') % command)
124 return
124 return
125
125
126 # TODO consider cases where proxies may add additional Accept headers.
126 # TODO consider cases where proxies may add additional Accept headers.
127 if req.headers.get(b'Accept') != FRAMINGTYPE:
127 if req.headers.get(b'Accept') != FRAMINGTYPE:
128 res.status = b'406 Not Acceptable'
128 res.status = b'406 Not Acceptable'
129 res.headers[b'Content-Type'] = b'text/plain'
129 res.headers[b'Content-Type'] = b'text/plain'
130 res.setbodybytes(
130 res.setbodybytes(
131 _(b'client MUST specify Accept header with value: %s\n')
131 _(b'client MUST specify Accept header with value: %s\n')
132 % FRAMINGTYPE
132 % FRAMINGTYPE
133 )
133 )
134 return
134 return
135
135
136 if req.headers.get(b'Content-Type') != FRAMINGTYPE:
136 if req.headers.get(b'Content-Type') != FRAMINGTYPE:
137 res.status = b'415 Unsupported Media Type'
137 res.status = b'415 Unsupported Media Type'
138 # TODO we should send a response with appropriate media type,
138 # TODO we should send a response with appropriate media type,
139 # since client does Accept it.
139 # since client does Accept it.
140 res.headers[b'Content-Type'] = b'text/plain'
140 res.headers[b'Content-Type'] = b'text/plain'
141 res.setbodybytes(
141 res.setbodybytes(
142 _(b'client MUST send Content-Type header with value: %s\n')
142 _(b'client MUST send Content-Type header with value: %s\n')
143 % FRAMINGTYPE
143 % FRAMINGTYPE
144 )
144 )
145 return
145 return
146
146
147 _processhttpv2request(ui, repo, req, res, permission, command, proto)
147 _processhttpv2request(ui, repo, req, res, permission, command, proto)
148
148
149
149
150 def _processhttpv2reflectrequest(ui, repo, req, res):
150 def _processhttpv2reflectrequest(ui, repo, req, res):
151 """Reads unified frame protocol request and dumps out state to client.
151 """Reads unified frame protocol request and dumps out state to client.
152
152
153 This special endpoint can be used to help debug the wire protocol.
153 This special endpoint can be used to help debug the wire protocol.
154
154
155 Instead of routing the request through the normal dispatch mechanism,
155 Instead of routing the request through the normal dispatch mechanism,
156 we instead read all frames, decode them, and feed them into our state
156 we instead read all frames, decode them, and feed them into our state
157 tracker. We then dump the log of all that activity back out to the
157 tracker. We then dump the log of all that activity back out to the
158 client.
158 client.
159 """
159 """
160 # Reflection APIs have a history of being abused, accidentally disclosing
160 # Reflection APIs have a history of being abused, accidentally disclosing
161 # sensitive data, etc. So we have a config knob.
161 # sensitive data, etc. So we have a config knob.
162 if not ui.configbool(b'experimental', b'web.api.debugreflect'):
162 if not ui.configbool(b'experimental', b'web.api.debugreflect'):
163 res.status = b'404 Not Found'
163 res.status = b'404 Not Found'
164 res.headers[b'Content-Type'] = b'text/plain'
164 res.headers[b'Content-Type'] = b'text/plain'
165 res.setbodybytes(_(b'debugreflect service not available'))
165 res.setbodybytes(_(b'debugreflect service not available'))
166 return
166 return
167
167
168 # We assume we have a unified framing protocol request body.
168 # We assume we have a unified framing protocol request body.
169
169
170 reactor = wireprotoframing.serverreactor(ui)
170 reactor = wireprotoframing.serverreactor(ui)
171 states = []
171 states = []
172
172
173 while True:
173 while True:
174 frame = wireprotoframing.readframe(req.bodyfh)
174 frame = wireprotoframing.readframe(req.bodyfh)
175
175
176 if not frame:
176 if not frame:
177 states.append(b'received: <no frame>')
177 states.append(b'received: <no frame>')
178 break
178 break
179
179
180 states.append(
180 states.append(
181 b'received: %d %d %d %s'
181 b'received: %d %d %d %s'
182 % (frame.typeid, frame.flags, frame.requestid, frame.payload)
182 % (frame.typeid, frame.flags, frame.requestid, frame.payload)
183 )
183 )
184
184
185 action, meta = reactor.onframerecv(frame)
185 action, meta = reactor.onframerecv(frame)
186 states.append(templatefilters.json((action, meta)))
186 states.append(templatefilters.json((action, meta)))
187
187
188 action, meta = reactor.oninputeof()
188 action, meta = reactor.oninputeof()
189 meta[b'action'] = action
189 meta[b'action'] = action
190 states.append(templatefilters.json(meta))
190 states.append(templatefilters.json(meta))
191
191
192 res.status = b'200 OK'
192 res.status = b'200 OK'
193 res.headers[b'Content-Type'] = b'text/plain'
193 res.headers[b'Content-Type'] = b'text/plain'
194 res.setbodybytes(b'\n'.join(states))
194 res.setbodybytes(b'\n'.join(states))
195
195
196
196
197 def _processhttpv2request(ui, repo, req, res, authedperm, reqcommand, proto):
197 def _processhttpv2request(ui, repo, req, res, authedperm, reqcommand, proto):
198 """Post-validation handler for HTTPv2 requests.
198 """Post-validation handler for HTTPv2 requests.
199
199
200 Called when the HTTP request contains unified frame-based protocol
200 Called when the HTTP request contains unified frame-based protocol
201 frames for evaluation.
201 frames for evaluation.
202 """
202 """
203 # TODO Some HTTP clients are full duplex and can receive data before
203 # TODO Some HTTP clients are full duplex and can receive data before
204 # the entire request is transmitted. Figure out a way to indicate support
204 # the entire request is transmitted. Figure out a way to indicate support
205 # for that so we can opt into full duplex mode.
205 # for that so we can opt into full duplex mode.
206 reactor = wireprotoframing.serverreactor(ui, deferoutput=True)
206 reactor = wireprotoframing.serverreactor(ui, deferoutput=True)
207 seencommand = False
207 seencommand = False
208
208
209 outstream = None
209 outstream = None
210
210
211 while True:
211 while True:
212 frame = wireprotoframing.readframe(req.bodyfh)
212 frame = wireprotoframing.readframe(req.bodyfh)
213 if not frame:
213 if not frame:
214 break
214 break
215
215
216 action, meta = reactor.onframerecv(frame)
216 action, meta = reactor.onframerecv(frame)
217
217
218 if action == b'wantframe':
218 if action == b'wantframe':
219 # Need more data before we can do anything.
219 # Need more data before we can do anything.
220 continue
220 continue
221 elif action == b'runcommand':
221 elif action == b'runcommand':
222 # Defer creating output stream because we need to wait for
222 # Defer creating output stream because we need to wait for
223 # protocol settings frames so proper encoding can be applied.
223 # protocol settings frames so proper encoding can be applied.
224 if not outstream:
224 if not outstream:
225 outstream = reactor.makeoutputstream()
225 outstream = reactor.makeoutputstream()
226
226
227 sentoutput = _httpv2runcommand(
227 sentoutput = _httpv2runcommand(
228 ui,
228 ui,
229 repo,
229 repo,
230 req,
230 req,
231 res,
231 res,
232 authedperm,
232 authedperm,
233 reqcommand,
233 reqcommand,
234 reactor,
234 reactor,
235 outstream,
235 outstream,
236 meta,
236 meta,
237 issubsequent=seencommand,
237 issubsequent=seencommand,
238 )
238 )
239
239
240 if sentoutput:
240 if sentoutput:
241 return
241 return
242
242
243 seencommand = True
243 seencommand = True
244
244
245 elif action == b'error':
245 elif action == b'error':
246 # TODO define proper error mechanism.
246 # TODO define proper error mechanism.
247 res.status = b'200 OK'
247 res.status = b'200 OK'
248 res.headers[b'Content-Type'] = b'text/plain'
248 res.headers[b'Content-Type'] = b'text/plain'
249 res.setbodybytes(meta[b'message'] + b'\n')
249 res.setbodybytes(meta[b'message'] + b'\n')
250 return
250 return
251 else:
251 else:
252 raise error.ProgrammingError(
252 raise error.ProgrammingError(
253 b'unhandled action from frame processor: %s' % action
253 b'unhandled action from frame processor: %s' % action
254 )
254 )
255
255
256 action, meta = reactor.oninputeof()
256 action, meta = reactor.oninputeof()
257 if action == b'sendframes':
257 if action == b'sendframes':
258 # We assume we haven't started sending the response yet. If we're
258 # We assume we haven't started sending the response yet. If we're
259 # wrong, the response type will raise an exception.
259 # wrong, the response type will raise an exception.
260 res.status = b'200 OK'
260 res.status = b'200 OK'
261 res.headers[b'Content-Type'] = FRAMINGTYPE
261 res.headers[b'Content-Type'] = FRAMINGTYPE
262 res.setbodygen(meta[b'framegen'])
262 res.setbodygen(meta[b'framegen'])
263 elif action == b'noop':
263 elif action == b'noop':
264 pass
264 pass
265 else:
265 else:
266 raise error.ProgrammingError(
266 raise error.ProgrammingError(
267 b'unhandled action from frame processor: %s' % action
267 b'unhandled action from frame processor: %s' % action
268 )
268 )
269
269
270
270
271 def _httpv2runcommand(
271 def _httpv2runcommand(
272 ui,
272 ui,
273 repo,
273 repo,
274 req,
274 req,
275 res,
275 res,
276 authedperm,
276 authedperm,
277 reqcommand,
277 reqcommand,
278 reactor,
278 reactor,
279 outstream,
279 outstream,
280 command,
280 command,
281 issubsequent,
281 issubsequent,
282 ):
282 ):
283 """Dispatch a wire protocol command made from HTTPv2 requests.
283 """Dispatch a wire protocol command made from HTTPv2 requests.
284
284
285 The authenticated permission (``authedperm``) along with the original
285 The authenticated permission (``authedperm``) along with the original
286 command from the URL (``reqcommand``) are passed in.
286 command from the URL (``reqcommand``) are passed in.
287 """
287 """
288 # We already validated that the session has permissions to perform the
288 # We already validated that the session has permissions to perform the
289 # actions in ``authedperm``. In the unified frame protocol, the canonical
289 # actions in ``authedperm``. In the unified frame protocol, the canonical
290 # command to run is expressed in a frame. However, the URL also requested
290 # command to run is expressed in a frame. However, the URL also requested
291 # to run a specific command. We need to be careful that the command we
291 # to run a specific command. We need to be careful that the command we
292 # run doesn't have permissions requirements greater than what was granted
292 # run doesn't have permissions requirements greater than what was granted
293 # by ``authedperm``.
293 # by ``authedperm``.
294 #
294 #
295 # Our rule for this is we only allow one command per HTTP request and
295 # Our rule for this is we only allow one command per HTTP request and
296 # that command must match the command in the URL. However, we make
296 # that command must match the command in the URL. However, we make
297 # an exception for the ``multirequest`` URL. This URL is allowed to
297 # an exception for the ``multirequest`` URL. This URL is allowed to
298 # execute multiple commands. We double check permissions of each command
298 # execute multiple commands. We double check permissions of each command
299 # as it is invoked to ensure there is no privilege escalation.
299 # as it is invoked to ensure there is no privilege escalation.
300 # TODO consider allowing multiple commands to regular command URLs
300 # TODO consider allowing multiple commands to regular command URLs
301 # iff each command is the same.
301 # iff each command is the same.
302
302
303 proto = httpv2protocolhandler(req, ui, args=command[b'args'])
303 proto = httpv2protocolhandler(req, ui, args=command[b'args'])
304
304
305 if reqcommand == b'multirequest':
305 if reqcommand == b'multirequest':
306 if not COMMANDS.commandavailable(command[b'command'], proto):
306 if not COMMANDS.commandavailable(command[b'command'], proto):
307 # TODO proper error mechanism
307 # TODO proper error mechanism
308 res.status = b'200 OK'
308 res.status = b'200 OK'
309 res.headers[b'Content-Type'] = b'text/plain'
309 res.headers[b'Content-Type'] = b'text/plain'
310 res.setbodybytes(
310 res.setbodybytes(
311 _(b'wire protocol command not available: %s')
311 _(b'wire protocol command not available: %s')
312 % command[b'command']
312 % command[b'command']
313 )
313 )
314 return True
314 return True
315
315
316 # TODO don't use assert here, since it may be elided by -O.
316 # TODO don't use assert here, since it may be elided by -O.
317 assert authedperm in (b'ro', b'rw')
317 assert authedperm in (b'ro', b'rw')
318 wirecommand = COMMANDS[command[b'command']]
318 wirecommand = COMMANDS[command[b'command']]
319 assert wirecommand.permission in (b'push', b'pull')
319 assert wirecommand.permission in (b'push', b'pull')
320
320
321 if authedperm == b'ro' and wirecommand.permission != b'pull':
321 if authedperm == b'ro' and wirecommand.permission != b'pull':
322 # TODO proper error mechanism
322 # TODO proper error mechanism
323 res.status = b'403 Forbidden'
323 res.status = b'403 Forbidden'
324 res.headers[b'Content-Type'] = b'text/plain'
324 res.headers[b'Content-Type'] = b'text/plain'
325 res.setbodybytes(
325 res.setbodybytes(
326 _(b'insufficient permissions to execute command: %s')
326 _(b'insufficient permissions to execute command: %s')
327 % command[b'command']
327 % command[b'command']
328 )
328 )
329 return True
329 return True
330
330
331 # TODO should we also call checkperm() here? Maybe not if we're going
331 # TODO should we also call checkperm() here? Maybe not if we're going
332 # to overhaul that API. The granted scope from the URL check should
332 # to overhaul that API. The granted scope from the URL check should
333 # be good enough.
333 # be good enough.
334
334
335 else:
335 else:
336 # Don't allow multiple commands outside of ``multirequest`` URL.
336 # Don't allow multiple commands outside of ``multirequest`` URL.
337 if issubsequent:
337 if issubsequent:
338 # TODO proper error mechanism
338 # TODO proper error mechanism
339 res.status = b'200 OK'
339 res.status = b'200 OK'
340 res.headers[b'Content-Type'] = b'text/plain'
340 res.headers[b'Content-Type'] = b'text/plain'
341 res.setbodybytes(
341 res.setbodybytes(
342 _(b'multiple commands cannot be issued to this URL')
342 _(b'multiple commands cannot be issued to this URL')
343 )
343 )
344 return True
344 return True
345
345
346 if reqcommand != command[b'command']:
346 if reqcommand != command[b'command']:
347 # TODO define proper error mechanism
347 # TODO define proper error mechanism
348 res.status = b'200 OK'
348 res.status = b'200 OK'
349 res.headers[b'Content-Type'] = b'text/plain'
349 res.headers[b'Content-Type'] = b'text/plain'
350 res.setbodybytes(_(b'command in frame must match command in URL'))
350 res.setbodybytes(_(b'command in frame must match command in URL'))
351 return True
351 return True
352
352
353 res.status = b'200 OK'
353 res.status = b'200 OK'
354 res.headers[b'Content-Type'] = FRAMINGTYPE
354 res.headers[b'Content-Type'] = FRAMINGTYPE
355
355
356 try:
356 try:
357 objs = dispatch(repo, proto, command[b'command'], command[b'redirect'])
357 objs = dispatch(repo, proto, command[b'command'], command[b'redirect'])
358
358
359 action, meta = reactor.oncommandresponsereadyobjects(
359 action, meta = reactor.oncommandresponsereadyobjects(
360 outstream, command[b'requestid'], objs
360 outstream, command[b'requestid'], objs
361 )
361 )
362
362
363 except error.WireprotoCommandError as e:
363 except error.WireprotoCommandError as e:
364 action, meta = reactor.oncommanderror(
364 action, meta = reactor.oncommanderror(
365 outstream, command[b'requestid'], e.message, e.messageargs
365 outstream, command[b'requestid'], e.message, e.messageargs
366 )
366 )
367
367
368 except Exception as e:
368 except Exception as e:
369 action, meta = reactor.onservererror(
369 action, meta = reactor.onservererror(
370 outstream,
370 outstream,
371 command[b'requestid'],
371 command[b'requestid'],
372 _(b'exception when invoking command: %s')
372 _(b'exception when invoking command: %s')
373 % stringutil.forcebytestr(e),
373 % stringutil.forcebytestr(e),
374 )
374 )
375
375
376 if action == b'sendframes':
376 if action == b'sendframes':
377 res.setbodygen(meta[b'framegen'])
377 res.setbodygen(meta[b'framegen'])
378 return True
378 return True
379 elif action == b'noop':
379 elif action == b'noop':
380 return False
380 return False
381 else:
381 else:
382 raise error.ProgrammingError(
382 raise error.ProgrammingError(
383 b'unhandled event from reactor: %s' % action
383 b'unhandled event from reactor: %s' % action
384 )
384 )
385
385
386
386
387 def getdispatchrepo(repo, proto, command):
387 def getdispatchrepo(repo, proto, command):
388 viewconfig = repo.ui.config(b'server', b'view')
388 viewconfig = repo.ui.config(b'server', b'view')
389 return repo.filtered(viewconfig)
389 return repo.filtered(viewconfig)
390
390
391
391
392 def dispatch(repo, proto, command, redirect):
392 def dispatch(repo, proto, command, redirect):
393 """Run a wire protocol command.
393 """Run a wire protocol command.
394
394
395 Returns an iterable of objects that will be sent to the client.
395 Returns an iterable of objects that will be sent to the client.
396 """
396 """
397 repo = getdispatchrepo(repo, proto, command)
397 repo = getdispatchrepo(repo, proto, command)
398
398
399 entry = COMMANDS[command]
399 entry = COMMANDS[command]
400 func = entry.func
400 func = entry.func
401 spec = entry.args
401 spec = entry.args
402
402
403 args = proto.getargs(spec)
403 args = proto.getargs(spec)
404
404
405 # There is some duplicate boilerplate code here for calling the command and
405 # There is some duplicate boilerplate code here for calling the command and
406 # emitting objects. It is either that or a lot of indented code that looks
406 # emitting objects. It is either that or a lot of indented code that looks
407 # like a pyramid (since there are a lot of code paths that result in not
407 # like a pyramid (since there are a lot of code paths that result in not
408 # using the cacher).
408 # using the cacher).
409 callcommand = lambda: func(repo, proto, **pycompat.strkwargs(args))
409 callcommand = lambda: func(repo, proto, **pycompat.strkwargs(args))
410
410
411 # Request is not cacheable. Don't bother instantiating a cacher.
411 # Request is not cacheable. Don't bother instantiating a cacher.
412 if not entry.cachekeyfn:
412 if not entry.cachekeyfn:
413 for o in callcommand():
413 for o in callcommand():
414 yield o
414 yield o
415 return
415 return
416
416
417 if redirect:
417 if redirect:
418 redirecttargets = redirect[b'targets']
418 redirecttargets = redirect[b'targets']
419 redirecthashes = redirect[b'hashes']
419 redirecthashes = redirect[b'hashes']
420 else:
420 else:
421 redirecttargets = []
421 redirecttargets = []
422 redirecthashes = []
422 redirecthashes = []
423
423
424 cacher = makeresponsecacher(
424 cacher = makeresponsecacher(
425 repo,
425 repo,
426 proto,
426 proto,
427 command,
427 command,
428 args,
428 args,
429 cborutil.streamencode,
429 cborutil.streamencode,
430 redirecttargets=redirecttargets,
430 redirecttargets=redirecttargets,
431 redirecthashes=redirecthashes,
431 redirecthashes=redirecthashes,
432 )
432 )
433
433
434 # But we have no cacher. Do default handling.
434 # But we have no cacher. Do default handling.
435 if not cacher:
435 if not cacher:
436 for o in callcommand():
436 for o in callcommand():
437 yield o
437 yield o
438 return
438 return
439
439
440 with cacher:
440 with cacher:
441 cachekey = entry.cachekeyfn(
441 cachekey = entry.cachekeyfn(
442 repo, proto, cacher, **pycompat.strkwargs(args)
442 repo, proto, cacher, **pycompat.strkwargs(args)
443 )
443 )
444
444
445 # No cache key or the cacher doesn't like it. Do default handling.
445 # No cache key or the cacher doesn't like it. Do default handling.
446 if cachekey is None or not cacher.setcachekey(cachekey):
446 if cachekey is None or not cacher.setcachekey(cachekey):
447 for o in callcommand():
447 for o in callcommand():
448 yield o
448 yield o
449 return
449 return
450
450
451 # Serve it from the cache, if possible.
451 # Serve it from the cache, if possible.
452 cached = cacher.lookup()
452 cached = cacher.lookup()
453
453
454 if cached:
454 if cached:
455 for o in cached[b'objs']:
455 for o in cached[b'objs']:
456 yield o
456 yield o
457 return
457 return
458
458
459 # Else call the command and feed its output into the cacher, allowing
459 # Else call the command and feed its output into the cacher, allowing
460 # the cacher to buffer/mutate objects as it desires.
460 # the cacher to buffer/mutate objects as it desires.
461 for o in callcommand():
461 for o in callcommand():
462 for o in cacher.onobject(o):
462 for o in cacher.onobject(o):
463 yield o
463 yield o
464
464
465 for o in cacher.onfinished():
465 for o in cacher.onfinished():
466 yield o
466 yield o
467
467
468
468
469 @interfaceutil.implementer(wireprototypes.baseprotocolhandler)
469 @interfaceutil.implementer(wireprototypes.baseprotocolhandler)
470 class httpv2protocolhandler(object):
470 class httpv2protocolhandler(object):
471 def __init__(self, req, ui, args=None):
471 def __init__(self, req, ui, args=None):
472 self._req = req
472 self._req = req
473 self._ui = ui
473 self._ui = ui
474 self._args = args
474 self._args = args
475
475
476 @property
476 @property
477 def name(self):
477 def name(self):
478 return HTTP_WIREPROTO_V2
478 return HTTP_WIREPROTO_V2
479
479
480 def getargs(self, args):
480 def getargs(self, args):
481 # First look for args that were passed but aren't registered on this
481 # First look for args that were passed but aren't registered on this
482 # command.
482 # command.
483 extra = set(self._args) - set(args)
483 extra = set(self._args) - set(args)
484 if extra:
484 if extra:
485 raise error.WireprotoCommandError(
485 raise error.WireprotoCommandError(
486 b'unsupported argument to command: %s'
486 b'unsupported argument to command: %s'
487 % b', '.join(sorted(extra))
487 % b', '.join(sorted(extra))
488 )
488 )
489
489
490 # And look for required arguments that are missing.
490 # And look for required arguments that are missing.
491 missing = {a for a in args if args[a][b'required']} - set(self._args)
491 missing = {a for a in args if args[a][b'required']} - set(self._args)
492
492
493 if missing:
493 if missing:
494 raise error.WireprotoCommandError(
494 raise error.WireprotoCommandError(
495 b'missing required arguments: %s' % b', '.join(sorted(missing))
495 b'missing required arguments: %s' % b', '.join(sorted(missing))
496 )
496 )
497
497
498 # Now derive the arguments to pass to the command, taking into
498 # Now derive the arguments to pass to the command, taking into
499 # account the arguments specified by the client.
499 # account the arguments specified by the client.
500 data = {}
500 data = {}
501 for k, meta in sorted(args.items()):
501 for k, meta in sorted(args.items()):
502 # This argument wasn't passed by the client.
502 # This argument wasn't passed by the client.
503 if k not in self._args:
503 if k not in self._args:
504 data[k] = meta[b'default']()
504 data[k] = meta[b'default']()
505 continue
505 continue
506
506
507 v = self._args[k]
507 v = self._args[k]
508
508
509 # Sets may be expressed as lists. Silently normalize.
509 # Sets may be expressed as lists. Silently normalize.
510 if meta[b'type'] == b'set' and isinstance(v, list):
510 if meta[b'type'] == b'set' and isinstance(v, list):
511 v = set(v)
511 v = set(v)
512
512
513 # TODO consider more/stronger type validation.
513 # TODO consider more/stronger type validation.
514
514
515 data[k] = v
515 data[k] = v
516
516
517 return data
517 return data
518
518
519 def getprotocaps(self):
519 def getprotocaps(self):
520 # Protocol capabilities are currently not implemented for HTTP V2.
520 # Protocol capabilities are currently not implemented for HTTP V2.
521 return set()
521 return set()
522
522
523 def getpayload(self):
523 def getpayload(self):
524 raise NotImplementedError
524 raise NotImplementedError
525
525
526 @contextlib.contextmanager
526 @contextlib.contextmanager
527 def mayberedirectstdio(self):
527 def mayberedirectstdio(self):
528 raise NotImplementedError
528 raise NotImplementedError
529
529
530 def client(self):
530 def client(self):
531 raise NotImplementedError
531 raise NotImplementedError
532
532
533 def addcapabilities(self, repo, caps):
533 def addcapabilities(self, repo, caps):
534 return caps
534 return caps
535
535
536 def checkperm(self, perm):
536 def checkperm(self, perm):
537 raise NotImplementedError
537 raise NotImplementedError
538
538
539
539
540 def httpv2apidescriptor(req, repo):
540 def httpv2apidescriptor(req, repo):
541 proto = httpv2protocolhandler(req, repo.ui)
541 proto = httpv2protocolhandler(req, repo.ui)
542
542
543 return _capabilitiesv2(repo, proto)
543 return _capabilitiesv2(repo, proto)
544
544
545
545
546 def _capabilitiesv2(repo, proto):
546 def _capabilitiesv2(repo, proto):
547 """Obtain the set of capabilities for version 2 transports.
547 """Obtain the set of capabilities for version 2 transports.
548
548
549 These capabilities are distinct from the capabilities for version 1
549 These capabilities are distinct from the capabilities for version 1
550 transports.
550 transports.
551 """
551 """
552 caps = {
552 caps = {
553 b'commands': {},
553 b'commands': {},
554 b'framingmediatypes': [FRAMINGTYPE],
554 b'framingmediatypes': [FRAMINGTYPE],
555 b'pathfilterprefixes': set(narrowspec.VALID_PREFIXES),
555 b'pathfilterprefixes': set(narrowspec.VALID_PREFIXES),
556 }
556 }
557
557
558 for command, entry in COMMANDS.items():
558 for command, entry in COMMANDS.items():
559 args = {}
559 args = {}
560
560
561 for arg, meta in entry.args.items():
561 for arg, meta in entry.args.items():
562 args[arg] = {
562 args[arg] = {
563 # TODO should this be a normalized type using CBOR's
563 # TODO should this be a normalized type using CBOR's
564 # terminology?
564 # terminology?
565 b'type': meta[b'type'],
565 b'type': meta[b'type'],
566 b'required': meta[b'required'],
566 b'required': meta[b'required'],
567 }
567 }
568
568
569 if not meta[b'required']:
569 if not meta[b'required']:
570 args[arg][b'default'] = meta[b'default']()
570 args[arg][b'default'] = meta[b'default']()
571
571
572 if meta[b'validvalues']:
572 if meta[b'validvalues']:
573 args[arg][b'validvalues'] = meta[b'validvalues']
573 args[arg][b'validvalues'] = meta[b'validvalues']
574
574
575 # TODO this type of check should be defined in a per-command callback.
575 # TODO this type of check should be defined in a per-command callback.
576 if (
576 if (
577 command == b'rawstorefiledata'
577 command == b'rawstorefiledata'
578 and not streamclone.allowservergeneration(repo)
578 and not streamclone.allowservergeneration(repo)
579 ):
579 ):
580 continue
580 continue
581
581
582 caps[b'commands'][command] = {
582 caps[b'commands'][command] = {
583 b'args': args,
583 b'args': args,
584 b'permissions': [entry.permission],
584 b'permissions': [entry.permission],
585 }
585 }
586
586
587 if entry.extracapabilitiesfn:
587 if entry.extracapabilitiesfn:
588 extracaps = entry.extracapabilitiesfn(repo, proto)
588 extracaps = entry.extracapabilitiesfn(repo, proto)
589 caps[b'commands'][command].update(extracaps)
589 caps[b'commands'][command].update(extracaps)
590
590
591 caps[b'rawrepoformats'] = sorted(repo.requirements & repo.supportedformats)
591 caps[b'rawrepoformats'] = sorted(repo.requirements & repo.supportedformats)
592
592
593 targets = getadvertisedredirecttargets(repo, proto)
593 targets = getadvertisedredirecttargets(repo, proto)
594 if targets:
594 if targets:
595 caps[b'redirect'] = {
595 caps[b'redirect'] = {
596 b'targets': [],
596 b'targets': [],
597 b'hashes': [b'sha256', b'sha1'],
597 b'hashes': [b'sha256', b'sha1'],
598 }
598 }
599
599
600 for target in targets:
600 for target in targets:
601 entry = {
601 entry = {
602 b'name': target[b'name'],
602 b'name': target[b'name'],
603 b'protocol': target[b'protocol'],
603 b'protocol': target[b'protocol'],
604 b'uris': target[b'uris'],
604 b'uris': target[b'uris'],
605 }
605 }
606
606
607 for key in (b'snirequired', b'tlsversions'):
607 for key in (b'snirequired', b'tlsversions'):
608 if key in target:
608 if key in target:
609 entry[key] = target[key]
609 entry[key] = target[key]
610
610
611 caps[b'redirect'][b'targets'].append(entry)
611 caps[b'redirect'][b'targets'].append(entry)
612
612
613 return proto.addcapabilities(repo, caps)
613 return proto.addcapabilities(repo, caps)
614
614
615
615
616 def getadvertisedredirecttargets(repo, proto):
616 def getadvertisedredirecttargets(repo, proto):
617 """Obtain a list of content redirect targets.
617 """Obtain a list of content redirect targets.
618
618
619 Returns a list containing potential redirect targets that will be
619 Returns a list containing potential redirect targets that will be
620 advertised in capabilities data. Each dict MUST have the following
620 advertised in capabilities data. Each dict MUST have the following
621 keys:
621 keys:
622
622
623 name
623 name
624 The name of this redirect target. This is the identifier clients use
624 The name of this redirect target. This is the identifier clients use
625 to refer to a target. It is transferred as part of every command
625 to refer to a target. It is transferred as part of every command
626 request.
626 request.
627
627
628 protocol
628 protocol
629 Network protocol used by this target. Typically this is the string
629 Network protocol used by this target. Typically this is the string
630 in front of the ``://`` in a URL. e.g. ``https``.
630 in front of the ``://`` in a URL. e.g. ``https``.
631
631
632 uris
632 uris
633 List of representative URIs for this target. Clients can use the
633 List of representative URIs for this target. Clients can use the
634 URIs to test parsing for compatibility or for ordering preference
634 URIs to test parsing for compatibility or for ordering preference
635 for which target to use.
635 for which target to use.
636
636
637 The following optional keys are recognized:
637 The following optional keys are recognized:
638
638
639 snirequired
639 snirequired
640 Bool indicating if Server Name Indication (SNI) is required to
640 Bool indicating if Server Name Indication (SNI) is required to
641 connect to this target.
641 connect to this target.
642
642
643 tlsversions
643 tlsversions
644 List of bytes indicating which TLS versions are supported by this
644 List of bytes indicating which TLS versions are supported by this
645 target.
645 target.
646
646
647 By default, clients reflect the target order advertised by servers
647 By default, clients reflect the target order advertised by servers
648 and servers will use the first client-advertised target when picking
648 and servers will use the first client-advertised target when picking
649 a redirect target. So targets should be advertised in the order the
649 a redirect target. So targets should be advertised in the order the
650 server prefers they be used.
650 server prefers they be used.
651 """
651 """
652 return []
652 return []
653
653
654
654
655 def wireprotocommand(
655 def wireprotocommand(
656 name,
656 name,
657 args=None,
657 args=None,
658 permission=b'push',
658 permission=b'push',
659 cachekeyfn=None,
659 cachekeyfn=None,
660 extracapabilitiesfn=None,
660 extracapabilitiesfn=None,
661 ):
661 ):
662 """Decorator to declare a wire protocol command.
662 """Decorator to declare a wire protocol command.
663
663
664 ``name`` is the name of the wire protocol command being provided.
664 ``name`` is the name of the wire protocol command being provided.
665
665
666 ``args`` is a dict defining arguments accepted by the command. Keys are
666 ``args`` is a dict defining arguments accepted by the command. Keys are
667 the argument name. Values are dicts with the following keys:
667 the argument name. Values are dicts with the following keys:
668
668
669 ``type``
669 ``type``
670 The argument data type. Must be one of the following string
670 The argument data type. Must be one of the following string
671 literals: ``bytes``, ``int``, ``list``, ``dict``, ``set``,
671 literals: ``bytes``, ``int``, ``list``, ``dict``, ``set``,
672 or ``bool``.
672 or ``bool``.
673
673
674 ``default``
674 ``default``
675 A callable returning the default value for this argument. If not
675 A callable returning the default value for this argument. If not
676 specified, ``None`` will be the default value.
676 specified, ``None`` will be the default value.
677
677
678 ``example``
678 ``example``
679 An example value for this argument.
679 An example value for this argument.
680
680
681 ``validvalues``
681 ``validvalues``
682 Set of recognized values for this argument.
682 Set of recognized values for this argument.
683
683
684 ``permission`` defines the permission type needed to run this command.
684 ``permission`` defines the permission type needed to run this command.
685 Can be ``push`` or ``pull``. These roughly map to read-write and read-only,
685 Can be ``push`` or ``pull``. These roughly map to read-write and read-only,
686 respectively. Default is to assume command requires ``push`` permissions
686 respectively. Default is to assume command requires ``push`` permissions
687 because otherwise commands not declaring their permissions could modify
687 because otherwise commands not declaring their permissions could modify
688 a repository that is supposed to be read-only.
688 a repository that is supposed to be read-only.
689
689
690 ``cachekeyfn`` defines an optional callable that can derive the
690 ``cachekeyfn`` defines an optional callable that can derive the
691 cache key for this request.
691 cache key for this request.
692
692
693 ``extracapabilitiesfn`` defines an optional callable that defines extra
693 ``extracapabilitiesfn`` defines an optional callable that defines extra
694 command capabilities/parameters that are advertised next to the command
694 command capabilities/parameters that are advertised next to the command
695 in the capabilities data structure describing the server. The callable
695 in the capabilities data structure describing the server. The callable
696 receives as arguments the repository and protocol objects. It returns
696 receives as arguments the repository and protocol objects. It returns
697 a dict of extra fields to add to the command descriptor.
697 a dict of extra fields to add to the command descriptor.
698
698
699 Wire protocol commands are generators of objects to be serialized and
699 Wire protocol commands are generators of objects to be serialized and
700 sent to the client.
700 sent to the client.
701
701
702 If a command raises an uncaught exception, this will be translated into
702 If a command raises an uncaught exception, this will be translated into
703 a command error.
703 a command error.
704
704
705 All commands can opt in to being cacheable by defining a function
705 All commands can opt in to being cacheable by defining a function
706 (``cachekeyfn``) that is called to derive a cache key. This function
706 (``cachekeyfn``) that is called to derive a cache key. This function
707 receives the same arguments as the command itself plus a ``cacher``
707 receives the same arguments as the command itself plus a ``cacher``
708 argument containing the active cacher for the request and returns a bytes
708 argument containing the active cacher for the request and returns a bytes
709 containing the key in a cache the response to this command may be cached
709 containing the key in a cache the response to this command may be cached
710 under.
710 under.
711 """
711 """
712 transports = {
712 transports = {
713 k for k, v in wireprototypes.TRANSPORTS.items() if v[b'version'] == 2
713 k for k, v in wireprototypes.TRANSPORTS.items() if v[b'version'] == 2
714 }
714 }
715
715
716 if permission not in (b'push', b'pull'):
716 if permission not in (b'push', b'pull'):
717 raise error.ProgrammingError(
717 raise error.ProgrammingError(
718 b'invalid wire protocol permission; '
718 b'invalid wire protocol permission; '
719 b'got %s; expected "push" or "pull"' % permission
719 b'got %s; expected "push" or "pull"' % permission
720 )
720 )
721
721
722 if args is None:
722 if args is None:
723 args = {}
723 args = {}
724
724
725 if not isinstance(args, dict):
725 if not isinstance(args, dict):
726 raise error.ProgrammingError(
726 raise error.ProgrammingError(
727 b'arguments for version 2 commands must be declared as dicts'
727 b'arguments for version 2 commands must be declared as dicts'
728 )
728 )
729
729
730 for arg, meta in args.items():
730 for arg, meta in args.items():
731 if arg == b'*':
731 if arg == b'*':
732 raise error.ProgrammingError(
732 raise error.ProgrammingError(
733 b'* argument name not allowed on version 2 commands'
733 b'* argument name not allowed on version 2 commands'
734 )
734 )
735
735
736 if not isinstance(meta, dict):
736 if not isinstance(meta, dict):
737 raise error.ProgrammingError(
737 raise error.ProgrammingError(
738 b'arguments for version 2 commands '
738 b'arguments for version 2 commands '
739 b'must declare metadata as a dict'
739 b'must declare metadata as a dict'
740 )
740 )
741
741
742 if b'type' not in meta:
742 if b'type' not in meta:
743 raise error.ProgrammingError(
743 raise error.ProgrammingError(
744 b'%s argument for command %s does not '
744 b'%s argument for command %s does not '
745 b'declare type field' % (arg, name)
745 b'declare type field' % (arg, name)
746 )
746 )
747
747
748 if meta[b'type'] not in (
748 if meta[b'type'] not in (
749 b'bytes',
749 b'bytes',
750 b'int',
750 b'int',
751 b'list',
751 b'list',
752 b'dict',
752 b'dict',
753 b'set',
753 b'set',
754 b'bool',
754 b'bool',
755 ):
755 ):
756 raise error.ProgrammingError(
756 raise error.ProgrammingError(
757 b'%s argument for command %s has '
757 b'%s argument for command %s has '
758 b'illegal type: %s' % (arg, name, meta[b'type'])
758 b'illegal type: %s' % (arg, name, meta[b'type'])
759 )
759 )
760
760
761 if b'example' not in meta:
761 if b'example' not in meta:
762 raise error.ProgrammingError(
762 raise error.ProgrammingError(
763 b'%s argument for command %s does not '
763 b'%s argument for command %s does not '
764 b'declare example field' % (arg, name)
764 b'declare example field' % (arg, name)
765 )
765 )
766
766
767 meta[b'required'] = b'default' not in meta
767 meta[b'required'] = b'default' not in meta
768
768
769 meta.setdefault(b'default', lambda: None)
769 meta.setdefault(b'default', lambda: None)
770 meta.setdefault(b'validvalues', None)
770 meta.setdefault(b'validvalues', None)
771
771
772 def register(func):
772 def register(func):
773 if name in COMMANDS:
773 if name in COMMANDS:
774 raise error.ProgrammingError(
774 raise error.ProgrammingError(
775 b'%s command already registered for version 2' % name
775 b'%s command already registered for version 2' % name
776 )
776 )
777
777
778 COMMANDS[name] = wireprototypes.commandentry(
778 COMMANDS[name] = wireprototypes.commandentry(
779 func,
779 func,
780 args=args,
780 args=args,
781 transports=transports,
781 transports=transports,
782 permission=permission,
782 permission=permission,
783 cachekeyfn=cachekeyfn,
783 cachekeyfn=cachekeyfn,
784 extracapabilitiesfn=extracapabilitiesfn,
784 extracapabilitiesfn=extracapabilitiesfn,
785 )
785 )
786
786
787 return func
787 return func
788
788
789 return register
789 return register
790
790
791
791
792 def makecommandcachekeyfn(command, localversion=None, allargs=False):
792 def makecommandcachekeyfn(command, localversion=None, allargs=False):
793 """Construct a cache key derivation function with common features.
793 """Construct a cache key derivation function with common features.
794
794
795 By default, the cache key is a hash of:
795 By default, the cache key is a hash of:
796
796
797 * The command name.
797 * The command name.
798 * A global cache version number.
798 * A global cache version number.
799 * A local cache version number (passed via ``localversion``).
799 * A local cache version number (passed via ``localversion``).
800 * All the arguments passed to the command.
800 * All the arguments passed to the command.
801 * The media type used.
801 * The media type used.
802 * Wire protocol version string.
802 * Wire protocol version string.
803 * The repository path.
803 * The repository path.
804 """
804 """
805 if not allargs:
805 if not allargs:
806 raise error.ProgrammingError(
806 raise error.ProgrammingError(
807 b'only allargs=True is currently supported'
807 b'only allargs=True is currently supported'
808 )
808 )
809
809
810 if localversion is None:
810 if localversion is None:
811 raise error.ProgrammingError(b'must set localversion argument value')
811 raise error.ProgrammingError(b'must set localversion argument value')
812
812
813 def cachekeyfn(repo, proto, cacher, **args):
813 def cachekeyfn(repo, proto, cacher, **args):
814 spec = COMMANDS[command]
814 spec = COMMANDS[command]
815
815
816 # Commands that mutate the repo can not be cached.
816 # Commands that mutate the repo can not be cached.
817 if spec.permission == b'push':
817 if spec.permission == b'push':
818 return None
818 return None
819
819
820 # TODO config option to disable caching.
820 # TODO config option to disable caching.
821
821
822 # Our key derivation strategy is to construct a data structure
822 # Our key derivation strategy is to construct a data structure
823 # holding everything that could influence cacheability and to hash
823 # holding everything that could influence cacheability and to hash
824 # the CBOR representation of that. Using CBOR seems like it might
824 # the CBOR representation of that. Using CBOR seems like it might
825 # be overkill. However, simpler hashing mechanisms are prone to
825 # be overkill. However, simpler hashing mechanisms are prone to
826 # duplicate input issues. e.g. if you just concatenate two values,
826 # duplicate input issues. e.g. if you just concatenate two values,
827 # "foo"+"bar" is identical to "fo"+"obar". Using CBOR provides
827 # "foo"+"bar" is identical to "fo"+"obar". Using CBOR provides
828 # "padding" between values and prevents these problems.
828 # "padding" between values and prevents these problems.
829
829
830 # Seed the hash with various data.
830 # Seed the hash with various data.
831 state = {
831 state = {
832 # To invalidate all cache keys.
832 # To invalidate all cache keys.
833 b'globalversion': GLOBAL_CACHE_VERSION,
833 b'globalversion': GLOBAL_CACHE_VERSION,
834 # More granular cache key invalidation.
834 # More granular cache key invalidation.
835 b'localversion': localversion,
835 b'localversion': localversion,
836 # Cache keys are segmented by command.
836 # Cache keys are segmented by command.
837 b'command': command,
837 b'command': command,
838 # Throw in the media type and API version strings so changes
838 # Throw in the media type and API version strings so changes
839 # to exchange semantics invalid cache.
839 # to exchange semantics invalid cache.
840 b'mediatype': FRAMINGTYPE,
840 b'mediatype': FRAMINGTYPE,
841 b'version': HTTP_WIREPROTO_V2,
841 b'version': HTTP_WIREPROTO_V2,
842 # So same requests for different repos don't share cache keys.
842 # So same requests for different repos don't share cache keys.
843 b'repo': repo.root,
843 b'repo': repo.root,
844 }
844 }
845
845
846 # The arguments passed to us will have already been normalized.
846 # The arguments passed to us will have already been normalized.
847 # Default values will be set, etc. This is important because it
847 # Default values will be set, etc. This is important because it
848 # means that it doesn't matter if clients send an explicit argument
848 # means that it doesn't matter if clients send an explicit argument
849 # or rely on the default value: it will all normalize to the same
849 # or rely on the default value: it will all normalize to the same
850 # set of arguments on the server and therefore the same cache key.
850 # set of arguments on the server and therefore the same cache key.
851 #
851 #
852 # Arguments by their very nature must support being encoded to CBOR.
852 # Arguments by their very nature must support being encoded to CBOR.
853 # And the CBOR encoder is deterministic. So we hash the arguments
853 # And the CBOR encoder is deterministic. So we hash the arguments
854 # by feeding the CBOR of their representation into the hasher.
854 # by feeding the CBOR of their representation into the hasher.
855 if allargs:
855 if allargs:
856 state[b'args'] = pycompat.byteskwargs(args)
856 state[b'args'] = pycompat.byteskwargs(args)
857
857
858 cacher.adjustcachekeystate(state)
858 cacher.adjustcachekeystate(state)
859
859
860 hasher = hashutil.sha1()
860 hasher = hashutil.sha1()
861 for chunk in cborutil.streamencode(state):
861 for chunk in cborutil.streamencode(state):
862 hasher.update(chunk)
862 hasher.update(chunk)
863
863
864 return pycompat.sysbytes(hasher.hexdigest())
864 return pycompat.sysbytes(hasher.hexdigest())
865
865
866 return cachekeyfn
866 return cachekeyfn
867
867
868
868
869 def makeresponsecacher(
869 def makeresponsecacher(
870 repo, proto, command, args, objencoderfn, redirecttargets, redirecthashes
870 repo, proto, command, args, objencoderfn, redirecttargets, redirecthashes
871 ):
871 ):
872 """Construct a cacher for a cacheable command.
872 """Construct a cacher for a cacheable command.
873
873
874 Returns an ``iwireprotocolcommandcacher`` instance.
874 Returns an ``iwireprotocolcommandcacher`` instance.
875
875
876 Extensions can monkeypatch this function to provide custom caching
876 Extensions can monkeypatch this function to provide custom caching
877 backends.
877 backends.
878 """
878 """
879 return None
879 return None
880
880
881
881
882 def resolvenodes(repo, revisions):
882 def resolvenodes(repo, revisions):
883 """Resolve nodes from a revisions specifier data structure."""
883 """Resolve nodes from a revisions specifier data structure."""
884 cl = repo.changelog
884 cl = repo.changelog
885 clhasnode = cl.hasnode
885 clhasnode = cl.hasnode
886
886
887 seen = set()
887 seen = set()
888 nodes = []
888 nodes = []
889
889
890 if not isinstance(revisions, list):
890 if not isinstance(revisions, list):
891 raise error.WireprotoCommandError(
891 raise error.WireprotoCommandError(
892 b'revisions must be defined as an array'
892 b'revisions must be defined as an array'
893 )
893 )
894
894
895 for spec in revisions:
895 for spec in revisions:
896 if b'type' not in spec:
896 if b'type' not in spec:
897 raise error.WireprotoCommandError(
897 raise error.WireprotoCommandError(
898 b'type key not present in revision specifier'
898 b'type key not present in revision specifier'
899 )
899 )
900
900
901 typ = spec[b'type']
901 typ = spec[b'type']
902
902
903 if typ == b'changesetexplicit':
903 if typ == b'changesetexplicit':
904 if b'nodes' not in spec:
904 if b'nodes' not in spec:
905 raise error.WireprotoCommandError(
905 raise error.WireprotoCommandError(
906 b'nodes key not present in changesetexplicit revision '
906 b'nodes key not present in changesetexplicit revision '
907 b'specifier'
907 b'specifier'
908 )
908 )
909
909
910 for node in spec[b'nodes']:
910 for node in spec[b'nodes']:
911 if node not in seen:
911 if node not in seen:
912 nodes.append(node)
912 nodes.append(node)
913 seen.add(node)
913 seen.add(node)
914
914
915 elif typ == b'changesetexplicitdepth':
915 elif typ == b'changesetexplicitdepth':
916 for key in (b'nodes', b'depth'):
916 for key in (b'nodes', b'depth'):
917 if key not in spec:
917 if key not in spec:
918 raise error.WireprotoCommandError(
918 raise error.WireprotoCommandError(
919 b'%s key not present in changesetexplicitdepth revision '
919 b'%s key not present in changesetexplicitdepth revision '
920 b'specifier',
920 b'specifier',
921 (key,),
921 (key,),
922 )
922 )
923
923
924 for rev in repo.revs(
924 for rev in repo.revs(
925 b'ancestors(%ln, %s)', spec[b'nodes'], spec[b'depth'] - 1
925 b'ancestors(%ln, %s)', spec[b'nodes'], spec[b'depth'] - 1
926 ):
926 ):
927 node = cl.node(rev)
927 node = cl.node(rev)
928
928
929 if node not in seen:
929 if node not in seen:
930 nodes.append(node)
930 nodes.append(node)
931 seen.add(node)
931 seen.add(node)
932
932
933 elif typ == b'changesetdagrange':
933 elif typ == b'changesetdagrange':
934 for key in (b'roots', b'heads'):
934 for key in (b'roots', b'heads'):
935 if key not in spec:
935 if key not in spec:
936 raise error.WireprotoCommandError(
936 raise error.WireprotoCommandError(
937 b'%s key not present in changesetdagrange revision '
937 b'%s key not present in changesetdagrange revision '
938 b'specifier',
938 b'specifier',
939 (key,),
939 (key,),
940 )
940 )
941
941
942 if not spec[b'heads']:
942 if not spec[b'heads']:
943 raise error.WireprotoCommandError(
943 raise error.WireprotoCommandError(
944 b'heads key in changesetdagrange cannot be empty'
944 b'heads key in changesetdagrange cannot be empty'
945 )
945 )
946
946
947 if spec[b'roots']:
947 if spec[b'roots']:
948 common = [n for n in spec[b'roots'] if clhasnode(n)]
948 common = [n for n in spec[b'roots'] if clhasnode(n)]
949 else:
949 else:
950 common = [repo.nullid]
950 common = [repo.nullid]
951
951
952 for n in discovery.outgoing(repo, common, spec[b'heads']).missing:
952 for n in discovery.outgoing(repo, common, spec[b'heads']).missing:
953 if n not in seen:
953 if n not in seen:
954 nodes.append(n)
954 nodes.append(n)
955 seen.add(n)
955 seen.add(n)
956
956
957 else:
957 else:
958 raise error.WireprotoCommandError(
958 raise error.WireprotoCommandError(
959 b'unknown revision specifier type: %s', (typ,)
959 b'unknown revision specifier type: %s', (typ,)
960 )
960 )
961
961
962 return nodes
962 return nodes
963
963
964
964
965 @wireprotocommand(b'branchmap', permission=b'pull')
965 @wireprotocommand(b'branchmap', permission=b'pull')
966 def branchmapv2(repo, proto):
966 def branchmapv2(repo, proto):
967 yield {
967 yield {
968 encoding.fromlocal(k): v
968 encoding.fromlocal(k): v
969 for k, v in pycompat.iteritems(repo.branchmap())
969 for k, v in pycompat.iteritems(repo.branchmap())
970 }
970 }
971
971
972
972
973 @wireprotocommand(b'capabilities', permission=b'pull')
973 @wireprotocommand(b'capabilities', permission=b'pull')
974 def capabilitiesv2(repo, proto):
974 def capabilitiesv2(repo, proto):
975 yield _capabilitiesv2(repo, proto)
975 yield _capabilitiesv2(repo, proto)
976
976
977
977
978 @wireprotocommand(
978 @wireprotocommand(
979 b'changesetdata',
979 b'changesetdata',
980 args={
980 args={
981 b'revisions': {
981 b'revisions': {
982 b'type': b'list',
982 b'type': b'list',
983 b'example': [
983 b'example': [
984 {
984 {
985 b'type': b'changesetexplicit',
985 b'type': b'changesetexplicit',
986 b'nodes': [b'abcdef...'],
986 b'nodes': [b'abcdef...'],
987 }
987 }
988 ],
988 ],
989 },
989 },
990 b'fields': {
990 b'fields': {
991 b'type': b'set',
991 b'type': b'set',
992 b'default': set,
992 b'default': set,
993 b'example': {b'parents', b'revision'},
993 b'example': {b'parents', b'revision'},
994 b'validvalues': {b'bookmarks', b'parents', b'phase', b'revision'},
994 b'validvalues': {b'bookmarks', b'parents', b'phase', b'revision'},
995 },
995 },
996 },
996 },
997 permission=b'pull',
997 permission=b'pull',
998 )
998 )
999 def changesetdata(repo, proto, revisions, fields):
999 def changesetdata(repo, proto, revisions, fields):
1000 # TODO look for unknown fields and abort when they can't be serviced.
1000 # TODO look for unknown fields and abort when they can't be serviced.
1001 # This could probably be validated by dispatcher using validvalues.
1001 # This could probably be validated by dispatcher using validvalues.
1002
1002
1003 cl = repo.changelog
1003 cl = repo.changelog
1004 outgoing = resolvenodes(repo, revisions)
1004 outgoing = resolvenodes(repo, revisions)
1005 publishing = repo.publishing()
1005 publishing = repo.publishing()
1006
1006
1007 if outgoing:
1007 if outgoing:
1008 repo.hook(b'preoutgoing', throw=True, source=b'serve')
1008 repo.hook(b'preoutgoing', throw=True, source=b'serve')
1009
1009
1010 yield {
1010 yield {
1011 b'totalitems': len(outgoing),
1011 b'totalitems': len(outgoing),
1012 }
1012 }
1013
1013
1014 # The phases of nodes already transferred to the client may have changed
1014 # The phases of nodes already transferred to the client may have changed
1015 # since the client last requested data. We send phase-only records
1015 # since the client last requested data. We send phase-only records
1016 # for these revisions, if requested.
1016 # for these revisions, if requested.
1017 # TODO actually do this. We'll probably want to emit phase heads
1017 # TODO actually do this. We'll probably want to emit phase heads
1018 # in the ancestry set of the outgoing revisions. This will ensure
1018 # in the ancestry set of the outgoing revisions. This will ensure
1019 # that phase updates within that set are seen.
1019 # that phase updates within that set are seen.
1020 if b'phase' in fields:
1020 if b'phase' in fields:
1021 pass
1021 pass
1022
1022
1023 nodebookmarks = {}
1023 nodebookmarks = {}
1024 for mark, node in repo._bookmarks.items():
1024 for mark, node in repo._bookmarks.items():
1025 nodebookmarks.setdefault(node, set()).add(mark)
1025 nodebookmarks.setdefault(node, set()).add(mark)
1026
1026
1027 # It is already topologically sorted by revision number.
1027 # It is already topologically sorted by revision number.
1028 for node in outgoing:
1028 for node in outgoing:
1029 d = {
1029 d = {
1030 b'node': node,
1030 b'node': node,
1031 }
1031 }
1032
1032
1033 if b'parents' in fields:
1033 if b'parents' in fields:
1034 d[b'parents'] = cl.parents(node)
1034 d[b'parents'] = cl.parents(node)
1035
1035
1036 if b'phase' in fields:
1036 if b'phase' in fields:
1037 if publishing:
1037 if publishing:
1038 d[b'phase'] = b'public'
1038 d[b'phase'] = b'public'
1039 else:
1039 else:
1040 ctx = repo[node]
1040 ctx = repo[node]
1041 d[b'phase'] = ctx.phasestr()
1041 d[b'phase'] = ctx.phasestr()
1042
1042
1043 if b'bookmarks' in fields and node in nodebookmarks:
1043 if b'bookmarks' in fields and node in nodebookmarks:
1044 d[b'bookmarks'] = sorted(nodebookmarks[node])
1044 d[b'bookmarks'] = sorted(nodebookmarks[node])
1045 del nodebookmarks[node]
1045 del nodebookmarks[node]
1046
1046
1047 followingmeta = []
1047 followingmeta = []
1048 followingdata = []
1048 followingdata = []
1049
1049
1050 if b'revision' in fields:
1050 if b'revision' in fields:
1051 revisiondata = cl.revision(node)
1051 revisiondata = cl.revision(node)
1052 followingmeta.append((b'revision', len(revisiondata)))
1052 followingmeta.append((b'revision', len(revisiondata)))
1053 followingdata.append(revisiondata)
1053 followingdata.append(revisiondata)
1054
1054
1055 # TODO make it possible for extensions to wrap a function or register
1055 # TODO make it possible for extensions to wrap a function or register
1056 # a handler to service custom fields.
1056 # a handler to service custom fields.
1057
1057
1058 if followingmeta:
1058 if followingmeta:
1059 d[b'fieldsfollowing'] = followingmeta
1059 d[b'fieldsfollowing'] = followingmeta
1060
1060
1061 yield d
1061 yield d
1062
1062
1063 for extra in followingdata:
1063 for extra in followingdata:
1064 yield extra
1064 yield extra
1065
1065
1066 # If requested, send bookmarks from nodes that didn't have revision
1066 # If requested, send bookmarks from nodes that didn't have revision
1067 # data sent so receiver is aware of any bookmark updates.
1067 # data sent so receiver is aware of any bookmark updates.
1068 if b'bookmarks' in fields:
1068 if b'bookmarks' in fields:
1069 for node, marks in sorted(pycompat.iteritems(nodebookmarks)):
1069 for node, marks in sorted(pycompat.iteritems(nodebookmarks)):
1070 yield {
1070 yield {
1071 b'node': node,
1071 b'node': node,
1072 b'bookmarks': sorted(marks),
1072 b'bookmarks': sorted(marks),
1073 }
1073 }
1074
1074
1075
1075
1076 class FileAccessError(Exception):
1076 class FileAccessError(Exception):
1077 """Represents an error accessing a specific file."""
1077 """Represents an error accessing a specific file."""
1078
1078
1079 def __init__(self, path, msg, args):
1079 def __init__(self, path, msg, args):
1080 self.path = path
1080 self.path = path
1081 self.msg = msg
1081 self.msg = msg
1082 self.args = args
1082 self.args = args
1083
1083
1084
1084
1085 def getfilestore(repo, proto, path):
1085 def getfilestore(repo, proto, path):
1086 """Obtain a file storage object for use with wire protocol.
1086 """Obtain a file storage object for use with wire protocol.
1087
1087
1088 Exists as a standalone function so extensions can monkeypatch to add
1088 Exists as a standalone function so extensions can monkeypatch to add
1089 access control.
1089 access control.
1090 """
1090 """
1091 # This seems to work even if the file doesn't exist. So catch
1091 # This seems to work even if the file doesn't exist. So catch
1092 # "empty" files and return an error.
1092 # "empty" files and return an error.
1093 fl = repo.file(path)
1093 fl = repo.file(path)
1094
1094
1095 if not len(fl):
1095 if not len(fl):
1096 raise FileAccessError(path, b'unknown file: %s', (path,))
1096 raise FileAccessError(path, b'unknown file: %s', (path,))
1097
1097
1098 return fl
1098 return fl
1099
1099
1100
1100
1101 def emitfilerevisions(repo, path, revisions, linknodes, fields):
1101 def emitfilerevisions(repo, path, revisions, linknodes, fields):
1102 for revision in revisions:
1102 for revision in revisions:
1103 d = {
1103 d = {
1104 b'node': revision.node,
1104 b'node': revision.node,
1105 }
1105 }
1106
1106
1107 if b'parents' in fields:
1107 if b'parents' in fields:
1108 d[b'parents'] = [revision.p1node, revision.p2node]
1108 d[b'parents'] = [revision.p1node, revision.p2node]
1109
1109
1110 if b'linknode' in fields:
1110 if b'linknode' in fields:
1111 d[b'linknode'] = linknodes[revision.node]
1111 d[b'linknode'] = linknodes[revision.node]
1112
1112
1113 followingmeta = []
1113 followingmeta = []
1114 followingdata = []
1114 followingdata = []
1115
1115
1116 if b'revision' in fields:
1116 if b'revision' in fields:
1117 if revision.revision is not None:
1117 if revision.revision is not None:
1118 followingmeta.append((b'revision', len(revision.revision)))
1118 followingmeta.append((b'revision', len(revision.revision)))
1119 followingdata.append(revision.revision)
1119 followingdata.append(revision.revision)
1120 else:
1120 else:
1121 d[b'deltabasenode'] = revision.basenode
1121 d[b'deltabasenode'] = revision.basenode
1122 followingmeta.append((b'delta', len(revision.delta)))
1122 followingmeta.append((b'delta', len(revision.delta)))
1123 followingdata.append(revision.delta)
1123 followingdata.append(revision.delta)
1124
1124
1125 if followingmeta:
1125 if followingmeta:
1126 d[b'fieldsfollowing'] = followingmeta
1126 d[b'fieldsfollowing'] = followingmeta
1127
1127
1128 yield d
1128 yield d
1129
1129
1130 for extra in followingdata:
1130 for extra in followingdata:
1131 yield extra
1131 yield extra
1132
1132
1133
1133
1134 def makefilematcher(repo, pathfilter):
1134 def makefilematcher(repo, pathfilter):
1135 """Construct a matcher from a path filter dict."""
1135 """Construct a matcher from a path filter dict."""
1136
1136
1137 # Validate values.
1137 # Validate values.
1138 if pathfilter:
1138 if pathfilter:
1139 for key in (b'include', b'exclude'):
1139 for key in (b'include', b'exclude'):
1140 for pattern in pathfilter.get(key, []):
1140 for pattern in pathfilter.get(key, []):
1141 if not pattern.startswith((b'path:', b'rootfilesin:')):
1141 if not pattern.startswith((b'path:', b'rootfilesin:')):
1142 raise error.WireprotoCommandError(
1142 raise error.WireprotoCommandError(
1143 b'%s pattern must begin with `path:` or `rootfilesin:`; '
1143 b'%s pattern must begin with `path:` or `rootfilesin:`; '
1144 b'got %s',
1144 b'got %s',
1145 (key, pattern),
1145 (key, pattern),
1146 )
1146 )
1147
1147
1148 if pathfilter:
1148 if pathfilter:
1149 matcher = matchmod.match(
1149 matcher = matchmod.match(
1150 repo.root,
1150 repo.root,
1151 b'',
1151 b'',
1152 include=pathfilter.get(b'include', []),
1152 include=pathfilter.get(b'include', []),
1153 exclude=pathfilter.get(b'exclude', []),
1153 exclude=pathfilter.get(b'exclude', []),
1154 )
1154 )
1155 else:
1155 else:
1156 matcher = matchmod.match(repo.root, b'')
1156 matcher = matchmod.match(repo.root, b'')
1157
1157
1158 # Requested patterns could include files not in the local store. So
1158 # Requested patterns could include files not in the local store. So
1159 # filter those out.
1159 # filter those out.
1160 return repo.narrowmatch(matcher)
1160 return repo.narrowmatch(matcher)
1161
1161
1162
1162
1163 @wireprotocommand(
1163 @wireprotocommand(
1164 b'filedata',
1164 b'filedata',
1165 args={
1165 args={
1166 b'haveparents': {
1166 b'haveparents': {
1167 b'type': b'bool',
1167 b'type': b'bool',
1168 b'default': lambda: False,
1168 b'default': lambda: False,
1169 b'example': True,
1169 b'example': True,
1170 },
1170 },
1171 b'nodes': {
1171 b'nodes': {
1172 b'type': b'list',
1172 b'type': b'list',
1173 b'example': [b'0123456...'],
1173 b'example': [b'0123456...'],
1174 },
1174 },
1175 b'fields': {
1175 b'fields': {
1176 b'type': b'set',
1176 b'type': b'set',
1177 b'default': set,
1177 b'default': set,
1178 b'example': {b'parents', b'revision'},
1178 b'example': {b'parents', b'revision'},
1179 b'validvalues': {b'parents', b'revision', b'linknode'},
1179 b'validvalues': {b'parents', b'revision', b'linknode'},
1180 },
1180 },
1181 b'path': {
1181 b'path': {
1182 b'type': b'bytes',
1182 b'type': b'bytes',
1183 b'example': b'foo.txt',
1183 b'example': b'foo.txt',
1184 },
1184 },
1185 },
1185 },
1186 permission=b'pull',
1186 permission=b'pull',
1187 # TODO censoring a file revision won't invalidate the cache.
1187 # TODO censoring a file revision won't invalidate the cache.
1188 # Figure out a way to take censoring into account when deriving
1188 # Figure out a way to take censoring into account when deriving
1189 # the cache key.
1189 # the cache key.
1190 cachekeyfn=makecommandcachekeyfn(b'filedata', 1, allargs=True),
1190 cachekeyfn=makecommandcachekeyfn(b'filedata', 1, allargs=True),
1191 )
1191 )
1192 def filedata(repo, proto, haveparents, nodes, fields, path):
1192 def filedata(repo, proto, haveparents, nodes, fields, path):
1193 # TODO this API allows access to file revisions that are attached to
1193 # TODO this API allows access to file revisions that are attached to
1194 # secret changesets. filesdata does not have this problem. Maybe this
1194 # secret changesets. filesdata does not have this problem. Maybe this
1195 # API should be deleted?
1195 # API should be deleted?
1196
1196
1197 try:
1197 try:
1198 # Extensions may wish to access the protocol handler.
1198 # Extensions may wish to access the protocol handler.
1199 store = getfilestore(repo, proto, path)
1199 store = getfilestore(repo, proto, path)
1200 except FileAccessError as e:
1200 except FileAccessError as e:
1201 raise error.WireprotoCommandError(e.msg, e.args)
1201 raise error.WireprotoCommandError(e.msg, e.args)
1202
1202
1203 clnode = repo.changelog.node
1203 clnode = repo.changelog.node
1204 linknodes = {}
1204 linknodes = {}
1205
1205
1206 # Validate requested nodes.
1206 # Validate requested nodes.
1207 for node in nodes:
1207 for node in nodes:
1208 try:
1208 try:
1209 store.rev(node)
1209 store.rev(node)
1210 except error.LookupError:
1210 except error.LookupError:
1211 raise error.WireprotoCommandError(
1211 raise error.WireprotoCommandError(
1212 b'unknown file node: %s', (hex(node),)
1212 b'unknown file node: %s', (hex(node),)
1213 )
1213 )
1214
1214
1215 # TODO by creating the filectx against a specific file revision
1215 # TODO by creating the filectx against a specific file revision
1216 # instead of changeset, linkrev() is always used. This is wrong for
1216 # instead of changeset, linkrev() is always used. This is wrong for
1217 # cases where linkrev() may refer to a hidden changeset. But since this
1217 # cases where linkrev() may refer to a hidden changeset. But since this
1218 # API doesn't know anything about changesets, we're not sure how to
1218 # API doesn't know anything about changesets, we're not sure how to
1219 # disambiguate the linknode. Perhaps we should delete this API?
1219 # disambiguate the linknode. Perhaps we should delete this API?
1220 fctx = repo.filectx(path, fileid=node)
1220 fctx = repo.filectx(path, fileid=node)
1221 linknodes[node] = clnode(fctx.introrev())
1221 linknodes[node] = clnode(fctx.introrev())
1222
1222
1223 revisions = store.emitrevisions(
1223 revisions = store.emitrevisions(
1224 nodes,
1224 nodes,
1225 revisiondata=b'revision' in fields,
1225 revisiondata=b'revision' in fields,
1226 assumehaveparentrevisions=haveparents,
1226 assumehaveparentrevisions=haveparents,
1227 )
1227 )
1228
1228
1229 yield {
1229 yield {
1230 b'totalitems': len(nodes),
1230 b'totalitems': len(nodes),
1231 }
1231 }
1232
1232
1233 for o in emitfilerevisions(repo, path, revisions, linknodes, fields):
1233 for o in emitfilerevisions(repo, path, revisions, linknodes, fields):
1234 yield o
1234 yield o
1235
1235
1236
1236
1237 def filesdatacapabilities(repo, proto):
1237 def filesdatacapabilities(repo, proto):
1238 batchsize = repo.ui.configint(
1238 batchsize = repo.ui.configint(
1239 b'experimental', b'server.filesdata.recommended-batch-size'
1239 b'experimental', b'server.filesdata.recommended-batch-size'
1240 )
1240 )
1241 return {
1241 return {
1242 b'recommendedbatchsize': batchsize,
1242 b'recommendedbatchsize': batchsize,
1243 }
1243 }
1244
1244
1245
1245
1246 @wireprotocommand(
1246 @wireprotocommand(
1247 b'filesdata',
1247 b'filesdata',
1248 args={
1248 args={
1249 b'haveparents': {
1249 b'haveparents': {
1250 b'type': b'bool',
1250 b'type': b'bool',
1251 b'default': lambda: False,
1251 b'default': lambda: False,
1252 b'example': True,
1252 b'example': True,
1253 },
1253 },
1254 b'fields': {
1254 b'fields': {
1255 b'type': b'set',
1255 b'type': b'set',
1256 b'default': set,
1256 b'default': set,
1257 b'example': {b'parents', b'revision'},
1257 b'example': {b'parents', b'revision'},
1258 b'validvalues': {
1258 b'validvalues': {
1259 b'firstchangeset',
1259 b'firstchangeset',
1260 b'linknode',
1260 b'linknode',
1261 b'parents',
1261 b'parents',
1262 b'revision',
1262 b'revision',
1263 },
1263 },
1264 },
1264 },
1265 b'pathfilter': {
1265 b'pathfilter': {
1266 b'type': b'dict',
1266 b'type': b'dict',
1267 b'default': lambda: None,
1267 b'default': lambda: None,
1268 b'example': {b'include': [b'path:tests']},
1268 b'example': {b'include': [b'path:tests']},
1269 },
1269 },
1270 b'revisions': {
1270 b'revisions': {
1271 b'type': b'list',
1271 b'type': b'list',
1272 b'example': [
1272 b'example': [
1273 {
1273 {
1274 b'type': b'changesetexplicit',
1274 b'type': b'changesetexplicit',
1275 b'nodes': [b'abcdef...'],
1275 b'nodes': [b'abcdef...'],
1276 }
1276 }
1277 ],
1277 ],
1278 },
1278 },
1279 },
1279 },
1280 permission=b'pull',
1280 permission=b'pull',
1281 # TODO censoring a file revision won't invalidate the cache.
1281 # TODO censoring a file revision won't invalidate the cache.
1282 # Figure out a way to take censoring into account when deriving
1282 # Figure out a way to take censoring into account when deriving
1283 # the cache key.
1283 # the cache key.
1284 cachekeyfn=makecommandcachekeyfn(b'filesdata', 1, allargs=True),
1284 cachekeyfn=makecommandcachekeyfn(b'filesdata', 1, allargs=True),
1285 extracapabilitiesfn=filesdatacapabilities,
1285 extracapabilitiesfn=filesdatacapabilities,
1286 )
1286 )
1287 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions):
1287 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions):
1288 # TODO This should operate on a repo that exposes obsolete changesets. There
1288 # TODO This should operate on a repo that exposes obsolete changesets. There
1289 # is a race between a client making a push that obsoletes a changeset and
1289 # is a race between a client making a push that obsoletes a changeset and
1290 # another client fetching files data for that changeset. If a client has a
1290 # another client fetching files data for that changeset. If a client has a
1291 # changeset, it should probably be allowed to access files data for that
1291 # changeset, it should probably be allowed to access files data for that
1292 # changeset.
1292 # changeset.
1293
1293
1294 outgoing = resolvenodes(repo, revisions)
1294 outgoing = resolvenodes(repo, revisions)
1295 filematcher = makefilematcher(repo, pathfilter)
1295 filematcher = makefilematcher(repo, pathfilter)
1296
1296
1297 # path -> {fnode: linknode}
1297 # path -> {fnode: linknode}
1298 fnodes = collections.defaultdict(dict)
1298 fnodes = collections.defaultdict(dict)
1299
1299
1300 # We collect the set of relevant file revisions by iterating the changeset
1300 # We collect the set of relevant file revisions by iterating the changeset
1301 # revisions and either walking the set of files recorded in the changeset
1301 # revisions and either walking the set of files recorded in the changeset
1302 # or by walking the manifest at that revision. There is probably room for a
1302 # or by walking the manifest at that revision. There is probably room for a
1303 # storage-level API to request this data, as it can be expensive to compute
1303 # storage-level API to request this data, as it can be expensive to compute
1304 # and would benefit from caching or alternate storage from what revlogs
1304 # and would benefit from caching or alternate storage from what revlogs
1305 # provide.
1305 # provide.
1306 for node in outgoing:
1306 for node in outgoing:
1307 ctx = repo[node]
1307 ctx = repo[node]
1308 mctx = ctx.manifestctx()
1308 mctx = ctx.manifestctx()
1309 md = mctx.read()
1309 md = mctx.read()
1310
1310
1311 if haveparents:
1311 if haveparents:
1312 checkpaths = ctx.files()
1312 checkpaths = ctx.files()
1313 else:
1313 else:
1314 checkpaths = md.keys()
1314 checkpaths = md.keys()
1315
1315
1316 for path in checkpaths:
1316 for path in checkpaths:
1317 fnode = md[path]
1317 fnode = md[path]
1318
1318
1319 if path in fnodes and fnode in fnodes[path]:
1319 if path in fnodes and fnode in fnodes[path]:
1320 continue
1320 continue
1321
1321
1322 if not filematcher(path):
1322 if not filematcher(path):
1323 continue
1323 continue
1324
1324
1325 fnodes[path].setdefault(fnode, node)
1325 fnodes[path].setdefault(fnode, node)
1326
1326
1327 yield {
1327 yield {
1328 b'totalpaths': len(fnodes),
1328 b'totalpaths': len(fnodes),
1329 b'totalitems': sum(len(v) for v in fnodes.values()),
1329 b'totalitems': sum(len(v) for v in fnodes.values()),
1330 }
1330 }
1331
1331
1332 for path, filenodes in sorted(fnodes.items()):
1332 for path, filenodes in sorted(fnodes.items()):
1333 try:
1333 try:
1334 store = getfilestore(repo, proto, path)
1334 store = getfilestore(repo, proto, path)
1335 except FileAccessError as e:
1335 except FileAccessError as e:
1336 raise error.WireprotoCommandError(e.msg, e.args)
1336 raise error.WireprotoCommandError(e.msg, e.args)
1337
1337
1338 yield {
1338 yield {
1339 b'path': path,
1339 b'path': path,
1340 b'totalitems': len(filenodes),
1340 b'totalitems': len(filenodes),
1341 }
1341 }
1342
1342
1343 revisions = store.emitrevisions(
1343 revisions = store.emitrevisions(
1344 filenodes.keys(),
1344 filenodes.keys(),
1345 revisiondata=b'revision' in fields,
1345 revisiondata=b'revision' in fields,
1346 assumehaveparentrevisions=haveparents,
1346 assumehaveparentrevisions=haveparents,
1347 )
1347 )
1348
1348
1349 for o in emitfilerevisions(repo, path, revisions, filenodes, fields):
1349 for o in emitfilerevisions(repo, path, revisions, filenodes, fields):
1350 yield o
1350 yield o
1351
1351
1352
1352
1353 @wireprotocommand(
1353 @wireprotocommand(
1354 b'heads',
1354 b'heads',
1355 args={
1355 args={
1356 b'publiconly': {
1356 b'publiconly': {
1357 b'type': b'bool',
1357 b'type': b'bool',
1358 b'default': lambda: False,
1358 b'default': lambda: False,
1359 b'example': False,
1359 b'example': False,
1360 },
1360 },
1361 },
1361 },
1362 permission=b'pull',
1362 permission=b'pull',
1363 )
1363 )
1364 def headsv2(repo, proto, publiconly):
1364 def headsv2(repo, proto, publiconly):
1365 if publiconly:
1365 if publiconly:
1366 repo = repo.filtered(b'immutable')
1366 repo = repo.filtered(b'immutable')
1367
1367
1368 yield repo.heads()
1368 yield repo.heads()
1369
1369
1370
1370
1371 @wireprotocommand(
1371 @wireprotocommand(
1372 b'known',
1372 b'known',
1373 args={
1373 args={
1374 b'nodes': {
1374 b'nodes': {
1375 b'type': b'list',
1375 b'type': b'list',
1376 b'default': list,
1376 b'default': list,
1377 b'example': [b'deadbeef'],
1377 b'example': [b'deadbeef'],
1378 },
1378 },
1379 },
1379 },
1380 permission=b'pull',
1380 permission=b'pull',
1381 )
1381 )
1382 def knownv2(repo, proto, nodes):
1382 def knownv2(repo, proto, nodes):
1383 result = b''.join(b'1' if n else b'0' for n in repo.known(nodes))
1383 result = b''.join(b'1' if n else b'0' for n in repo.known(nodes))
1384 yield result
1384 yield result
1385
1385
1386
1386
1387 @wireprotocommand(
1387 @wireprotocommand(
1388 b'listkeys',
1388 b'listkeys',
1389 args={
1389 args={
1390 b'namespace': {
1390 b'namespace': {
1391 b'type': b'bytes',
1391 b'type': b'bytes',
1392 b'example': b'ns',
1392 b'example': b'ns',
1393 },
1393 },
1394 },
1394 },
1395 permission=b'pull',
1395 permission=b'pull',
1396 )
1396 )
1397 def listkeysv2(repo, proto, namespace):
1397 def listkeysv2(repo, proto, namespace):
1398 keys = repo.listkeys(encoding.tolocal(namespace))
1398 keys = repo.listkeys(encoding.tolocal(namespace))
1399 keys = {
1399 keys = {
1400 encoding.fromlocal(k): encoding.fromlocal(v)
1400 encoding.fromlocal(k): encoding.fromlocal(v)
1401 for k, v in pycompat.iteritems(keys)
1401 for k, v in pycompat.iteritems(keys)
1402 }
1402 }
1403
1403
1404 yield keys
1404 yield keys
1405
1405
1406
1406
1407 @wireprotocommand(
1407 @wireprotocommand(
1408 b'lookup',
1408 b'lookup',
1409 args={
1409 args={
1410 b'key': {
1410 b'key': {
1411 b'type': b'bytes',
1411 b'type': b'bytes',
1412 b'example': b'foo',
1412 b'example': b'foo',
1413 },
1413 },
1414 },
1414 },
1415 permission=b'pull',
1415 permission=b'pull',
1416 )
1416 )
1417 def lookupv2(repo, proto, key):
1417 def lookupv2(repo, proto, key):
1418 key = encoding.tolocal(key)
1418 key = encoding.tolocal(key)
1419
1419
1420 # TODO handle exception.
1420 # TODO handle exception.
1421 node = repo.lookup(key)
1421 node = repo.lookup(key)
1422
1422
1423 yield node
1423 yield node
1424
1424
1425
1425
1426 def manifestdatacapabilities(repo, proto):
1426 def manifestdatacapabilities(repo, proto):
1427 batchsize = repo.ui.configint(
1427 batchsize = repo.ui.configint(
1428 b'experimental', b'server.manifestdata.recommended-batch-size'
1428 b'experimental', b'server.manifestdata.recommended-batch-size'
1429 )
1429 )
1430
1430
1431 return {
1431 return {
1432 b'recommendedbatchsize': batchsize,
1432 b'recommendedbatchsize': batchsize,
1433 }
1433 }
1434
1434
1435
1435
1436 @wireprotocommand(
1436 @wireprotocommand(
1437 b'manifestdata',
1437 b'manifestdata',
1438 args={
1438 args={
1439 b'nodes': {
1439 b'nodes': {
1440 b'type': b'list',
1440 b'type': b'list',
1441 b'example': [b'0123456...'],
1441 b'example': [b'0123456...'],
1442 },
1442 },
1443 b'haveparents': {
1443 b'haveparents': {
1444 b'type': b'bool',
1444 b'type': b'bool',
1445 b'default': lambda: False,
1445 b'default': lambda: False,
1446 b'example': True,
1446 b'example': True,
1447 },
1447 },
1448 b'fields': {
1448 b'fields': {
1449 b'type': b'set',
1449 b'type': b'set',
1450 b'default': set,
1450 b'default': set,
1451 b'example': {b'parents', b'revision'},
1451 b'example': {b'parents', b'revision'},
1452 b'validvalues': {b'parents', b'revision'},
1452 b'validvalues': {b'parents', b'revision'},
1453 },
1453 },
1454 b'tree': {
1454 b'tree': {
1455 b'type': b'bytes',
1455 b'type': b'bytes',
1456 b'example': b'',
1456 b'example': b'',
1457 },
1457 },
1458 },
1458 },
1459 permission=b'pull',
1459 permission=b'pull',
1460 cachekeyfn=makecommandcachekeyfn(b'manifestdata', 1, allargs=True),
1460 cachekeyfn=makecommandcachekeyfn(b'manifestdata', 1, allargs=True),
1461 extracapabilitiesfn=manifestdatacapabilities,
1461 extracapabilitiesfn=manifestdatacapabilities,
1462 )
1462 )
1463 def manifestdata(repo, proto, haveparents, nodes, fields, tree):
1463 def manifestdata(repo, proto, haveparents, nodes, fields, tree):
1464 store = repo.manifestlog.getstorage(tree)
1464 store = repo.manifestlog.getstorage(tree)
1465
1465
1466 # Validate the node is known and abort on unknown revisions.
1466 # Validate the node is known and abort on unknown revisions.
1467 for node in nodes:
1467 for node in nodes:
1468 try:
1468 try:
1469 store.rev(node)
1469 store.rev(node)
1470 except error.LookupError:
1470 except error.LookupError:
1471 raise error.WireprotoCommandError(b'unknown node: %s', (node,))
1471 raise error.WireprotoCommandError(b'unknown node: %s', (node,))
1472
1472
1473 revisions = store.emitrevisions(
1473 revisions = store.emitrevisions(
1474 nodes,
1474 nodes,
1475 revisiondata=b'revision' in fields,
1475 revisiondata=b'revision' in fields,
1476 assumehaveparentrevisions=haveparents,
1476 assumehaveparentrevisions=haveparents,
1477 )
1477 )
1478
1478
1479 yield {
1479 yield {
1480 b'totalitems': len(nodes),
1480 b'totalitems': len(nodes),
1481 }
1481 }
1482
1482
1483 for revision in revisions:
1483 for revision in revisions:
1484 d = {
1484 d = {
1485 b'node': revision.node,
1485 b'node': revision.node,
1486 }
1486 }
1487
1487
1488 if b'parents' in fields:
1488 if b'parents' in fields:
1489 d[b'parents'] = [revision.p1node, revision.p2node]
1489 d[b'parents'] = [revision.p1node, revision.p2node]
1490
1490
1491 followingmeta = []
1491 followingmeta = []
1492 followingdata = []
1492 followingdata = []
1493
1493
1494 if b'revision' in fields:
1494 if b'revision' in fields:
1495 if revision.revision is not None:
1495 if revision.revision is not None:
1496 followingmeta.append((b'revision', len(revision.revision)))
1496 followingmeta.append((b'revision', len(revision.revision)))
1497 followingdata.append(revision.revision)
1497 followingdata.append(revision.revision)
1498 else:
1498 else:
1499 d[b'deltabasenode'] = revision.basenode
1499 d[b'deltabasenode'] = revision.basenode
1500 followingmeta.append((b'delta', len(revision.delta)))
1500 followingmeta.append((b'delta', len(revision.delta)))
1501 followingdata.append(revision.delta)
1501 followingdata.append(revision.delta)
1502
1502
1503 if followingmeta:
1503 if followingmeta:
1504 d[b'fieldsfollowing'] = followingmeta
1504 d[b'fieldsfollowing'] = followingmeta
1505
1505
1506 yield d
1506 yield d
1507
1507
1508 for extra in followingdata:
1508 for extra in followingdata:
1509 yield extra
1509 yield extra
1510
1510
1511
1511
1512 @wireprotocommand(
1512 @wireprotocommand(
1513 b'pushkey',
1513 b'pushkey',
1514 args={
1514 args={
1515 b'namespace': {
1515 b'namespace': {
1516 b'type': b'bytes',
1516 b'type': b'bytes',
1517 b'example': b'ns',
1517 b'example': b'ns',
1518 },
1518 },
1519 b'key': {
1519 b'key': {
1520 b'type': b'bytes',
1520 b'type': b'bytes',
1521 b'example': b'key',
1521 b'example': b'key',
1522 },
1522 },
1523 b'old': {
1523 b'old': {
1524 b'type': b'bytes',
1524 b'type': b'bytes',
1525 b'example': b'old',
1525 b'example': b'old',
1526 },
1526 },
1527 b'new': {
1527 b'new': {
1528 b'type': b'bytes',
1528 b'type': b'bytes',
1529 b'example': b'new',
1529 b'example': b'new',
1530 },
1530 },
1531 },
1531 },
1532 permission=b'push',
1532 permission=b'push',
1533 )
1533 )
1534 def pushkeyv2(repo, proto, namespace, key, old, new):
1534 def pushkeyv2(repo, proto, namespace, key, old, new):
1535 # TODO handle ui output redirection
1535 # TODO handle ui output redirection
1536 yield repo.pushkey(
1536 yield repo.pushkey(
1537 encoding.tolocal(namespace),
1537 encoding.tolocal(namespace),
1538 encoding.tolocal(key),
1538 encoding.tolocal(key),
1539 encoding.tolocal(old),
1539 encoding.tolocal(old),
1540 encoding.tolocal(new),
1540 encoding.tolocal(new),
1541 )
1541 )
1542
1542
1543
1543
1544 @wireprotocommand(
1544 @wireprotocommand(
1545 b'rawstorefiledata',
1545 b'rawstorefiledata',
1546 args={
1546 args={
1547 b'files': {
1547 b'files': {
1548 b'type': b'list',
1548 b'type': b'list',
1549 b'example': [b'changelog', b'manifestlog'],
1549 b'example': [b'changelog', b'manifestlog'],
1550 },
1550 },
1551 b'pathfilter': {
1551 b'pathfilter': {
1552 b'type': b'list',
1552 b'type': b'list',
1553 b'default': lambda: None,
1553 b'default': lambda: None,
1554 b'example': {b'include': [b'path:tests']},
1554 b'example': {b'include': [b'path:tests']},
1555 },
1555 },
1556 },
1556 },
1557 permission=b'pull',
1557 permission=b'pull',
1558 )
1558 )
1559 def rawstorefiledata(repo, proto, files, pathfilter):
1559 def rawstorefiledata(repo, proto, files, pathfilter):
1560 if not streamclone.allowservergeneration(repo):
1560 if not streamclone.allowservergeneration(repo):
1561 raise error.WireprotoCommandError(b'stream clone is disabled')
1561 raise error.WireprotoCommandError(b'stream clone is disabled')
1562
1562
1563 # TODO support dynamically advertising what store files "sets" are
1563 # TODO support dynamically advertising what store files "sets" are
1564 # available. For now, we support changelog, manifestlog, and files.
1564 # available. For now, we support changelog, manifestlog, and files.
1565 files = set(files)
1565 files = set(files)
1566 allowedfiles = {b'changelog', b'manifestlog'}
1566 allowedfiles = {b'changelog', b'manifestlog'}
1567
1567
1568 unsupported = files - allowedfiles
1568 unsupported = files - allowedfiles
1569 if unsupported:
1569 if unsupported:
1570 raise error.WireprotoCommandError(
1570 raise error.WireprotoCommandError(
1571 b'unknown file type: %s', (b', '.join(sorted(unsupported)),)
1571 b'unknown file type: %s', (b', '.join(sorted(unsupported)),)
1572 )
1572 )
1573
1573
1574 with repo.lock():
1574 with repo.lock():
1575 topfiles = list(repo.store.topfiles())
1575 topfiles = list(repo.store.topfiles())
1576
1576
1577 sendfiles = []
1577 sendfiles = []
1578 totalsize = 0
1578 totalsize = 0
1579
1579
1580 # TODO this is a bunch of storage layer interface abstractions because
1580 # TODO this is a bunch of storage layer interface abstractions because
1581 # it assumes revlogs.
1581 # it assumes revlogs.
1582 for rl_type, name, encodedname, size in topfiles:
1582 for rl_type, name, size in topfiles:
1583 # XXX use the `rl_type` for that
1583 # XXX use the `rl_type` for that
1584 if b'changelog' in files and name.startswith(b'00changelog'):
1584 if b'changelog' in files and name.startswith(b'00changelog'):
1585 pass
1585 pass
1586 elif b'manifestlog' in files and name.startswith(b'00manifest'):
1586 elif b'manifestlog' in files and name.startswith(b'00manifest'):
1587 pass
1587 pass
1588 else:
1588 else:
1589 continue
1589 continue
1590
1590
1591 sendfiles.append((b'store', name, size))
1591 sendfiles.append((b'store', name, size))
1592 totalsize += size
1592 totalsize += size
1593
1593
1594 yield {
1594 yield {
1595 b'filecount': len(sendfiles),
1595 b'filecount': len(sendfiles),
1596 b'totalsize': totalsize,
1596 b'totalsize': totalsize,
1597 }
1597 }
1598
1598
1599 for location, name, size in sendfiles:
1599 for location, name, size in sendfiles:
1600 yield {
1600 yield {
1601 b'location': location,
1601 b'location': location,
1602 b'path': name,
1602 b'path': name,
1603 b'size': size,
1603 b'size': size,
1604 }
1604 }
1605
1605
1606 # We have to use a closure for this to ensure the context manager is
1606 # We have to use a closure for this to ensure the context manager is
1607 # closed only after sending the final chunk.
1607 # closed only after sending the final chunk.
1608 def getfiledata():
1608 def getfiledata():
1609 with repo.svfs(name, b'rb', auditpath=False) as fh:
1609 with repo.svfs(name, b'rb', auditpath=False) as fh:
1610 for chunk in util.filechunkiter(fh, limit=size):
1610 for chunk in util.filechunkiter(fh, limit=size):
1611 yield chunk
1611 yield chunk
1612
1612
1613 yield wireprototypes.indefinitebytestringresponse(getfiledata())
1613 yield wireprototypes.indefinitebytestringresponse(getfiledata())
@@ -1,741 +1,745 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullrev,
21 nullrev,
22 )
22 )
23 from mercurial.thirdparty import attr
23 from mercurial.thirdparty import attr
24 from mercurial import (
24 from mercurial import (
25 ancestor,
25 ancestor,
26 bundlerepo,
26 bundlerepo,
27 error,
27 error,
28 extensions,
28 extensions,
29 localrepo,
29 localrepo,
30 mdiff,
30 mdiff,
31 pycompat,
31 pycompat,
32 revlog,
32 revlog,
33 store,
33 store,
34 verify,
34 verify,
35 )
35 )
36 from mercurial.interfaces import (
36 from mercurial.interfaces import (
37 repository,
37 repository,
38 util as interfaceutil,
38 util as interfaceutil,
39 )
39 )
40 from mercurial.utils import (
40 from mercurial.utils import (
41 cborutil,
41 cborutil,
42 storageutil,
42 storageutil,
43 )
43 )
44 from mercurial.revlogutils import flagutil
44 from mercurial.revlogutils import flagutil
45
45
46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 # be specifying the version(s) of Mercurial they are tested with, or
48 # be specifying the version(s) of Mercurial they are tested with, or
49 # leave the attribute unspecified.
49 # leave the attribute unspecified.
50 testedwith = b'ships-with-hg-core'
50 testedwith = b'ships-with-hg-core'
51
51
52 REQUIREMENT = b'testonly-simplestore'
52 REQUIREMENT = b'testonly-simplestore'
53
53
54
54
55 def validatenode(node):
55 def validatenode(node):
56 if isinstance(node, int):
56 if isinstance(node, int):
57 raise ValueError('expected node; got int')
57 raise ValueError('expected node; got int')
58
58
59 if len(node) != 20:
59 if len(node) != 20:
60 raise ValueError('expected 20 byte node')
60 raise ValueError('expected 20 byte node')
61
61
62
62
63 def validaterev(rev):
63 def validaterev(rev):
64 if not isinstance(rev, int):
64 if not isinstance(rev, int):
65 raise ValueError('expected int')
65 raise ValueError('expected int')
66
66
67
67
68 class simplestoreerror(error.StorageError):
68 class simplestoreerror(error.StorageError):
69 pass
69 pass
70
70
71
71
72 @interfaceutil.implementer(repository.irevisiondelta)
72 @interfaceutil.implementer(repository.irevisiondelta)
73 @attr.s(slots=True)
73 @attr.s(slots=True)
74 class simplestorerevisiondelta(object):
74 class simplestorerevisiondelta(object):
75 node = attr.ib()
75 node = attr.ib()
76 p1node = attr.ib()
76 p1node = attr.ib()
77 p2node = attr.ib()
77 p2node = attr.ib()
78 basenode = attr.ib()
78 basenode = attr.ib()
79 flags = attr.ib()
79 flags = attr.ib()
80 baserevisionsize = attr.ib()
80 baserevisionsize = attr.ib()
81 revision = attr.ib()
81 revision = attr.ib()
82 delta = attr.ib()
82 delta = attr.ib()
83 linknode = attr.ib(default=None)
83 linknode = attr.ib(default=None)
84
84
85
85
86 @interfaceutil.implementer(repository.iverifyproblem)
86 @interfaceutil.implementer(repository.iverifyproblem)
87 @attr.s(frozen=True)
87 @attr.s(frozen=True)
88 class simplefilestoreproblem(object):
88 class simplefilestoreproblem(object):
89 warning = attr.ib(default=None)
89 warning = attr.ib(default=None)
90 error = attr.ib(default=None)
90 error = attr.ib(default=None)
91 node = attr.ib(default=None)
91 node = attr.ib(default=None)
92
92
93
93
94 @interfaceutil.implementer(repository.ifilestorage)
94 @interfaceutil.implementer(repository.ifilestorage)
95 class filestorage(object):
95 class filestorage(object):
96 """Implements storage for a tracked path.
96 """Implements storage for a tracked path.
97
97
98 Data is stored in the VFS in a directory corresponding to the tracked
98 Data is stored in the VFS in a directory corresponding to the tracked
99 path.
99 path.
100
100
101 Index data is stored in an ``index`` file using CBOR.
101 Index data is stored in an ``index`` file using CBOR.
102
102
103 Fulltext data is stored in files having names of the node.
103 Fulltext data is stored in files having names of the node.
104 """
104 """
105
105
106 _flagserrorclass = simplestoreerror
106 _flagserrorclass = simplestoreerror
107
107
108 def __init__(self, repo, svfs, path):
108 def __init__(self, repo, svfs, path):
109 self.nullid = repo.nullid
109 self.nullid = repo.nullid
110 self._repo = repo
110 self._repo = repo
111 self._svfs = svfs
111 self._svfs = svfs
112 self._path = path
112 self._path = path
113
113
114 self._storepath = b'/'.join([b'data', path])
114 self._storepath = b'/'.join([b'data', path])
115 self._indexpath = b'/'.join([self._storepath, b'index'])
115 self._indexpath = b'/'.join([self._storepath, b'index'])
116
116
117 indexdata = self._svfs.tryread(self._indexpath)
117 indexdata = self._svfs.tryread(self._indexpath)
118 if indexdata:
118 if indexdata:
119 indexdata = cborutil.decodeall(indexdata)
119 indexdata = cborutil.decodeall(indexdata)
120
120
121 self._indexdata = indexdata or []
121 self._indexdata = indexdata or []
122 self._indexbynode = {}
122 self._indexbynode = {}
123 self._indexbyrev = {}
123 self._indexbyrev = {}
124 self._index = []
124 self._index = []
125 self._refreshindex()
125 self._refreshindex()
126
126
127 self._flagprocessors = dict(flagutil.flagprocessors)
127 self._flagprocessors = dict(flagutil.flagprocessors)
128
128
129 def _refreshindex(self):
129 def _refreshindex(self):
130 self._indexbynode.clear()
130 self._indexbynode.clear()
131 self._indexbyrev.clear()
131 self._indexbyrev.clear()
132 self._index = []
132 self._index = []
133
133
134 for i, entry in enumerate(self._indexdata):
134 for i, entry in enumerate(self._indexdata):
135 self._indexbynode[entry[b'node']] = entry
135 self._indexbynode[entry[b'node']] = entry
136 self._indexbyrev[i] = entry
136 self._indexbyrev[i] = entry
137
137
138 self._indexbynode[self._repo.nullid] = {
138 self._indexbynode[self._repo.nullid] = {
139 b'node': self._repo.nullid,
139 b'node': self._repo.nullid,
140 b'p1': self._repo.nullid,
140 b'p1': self._repo.nullid,
141 b'p2': self._repo.nullid,
141 b'p2': self._repo.nullid,
142 b'linkrev': nullrev,
142 b'linkrev': nullrev,
143 b'flags': 0,
143 b'flags': 0,
144 }
144 }
145
145
146 self._indexbyrev[nullrev] = {
146 self._indexbyrev[nullrev] = {
147 b'node': self._repo.nullid,
147 b'node': self._repo.nullid,
148 b'p1': self._repo.nullid,
148 b'p1': self._repo.nullid,
149 b'p2': self._repo.nullid,
149 b'p2': self._repo.nullid,
150 b'linkrev': nullrev,
150 b'linkrev': nullrev,
151 b'flags': 0,
151 b'flags': 0,
152 }
152 }
153
153
154 for i, entry in enumerate(self._indexdata):
154 for i, entry in enumerate(self._indexdata):
155 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
155 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
156
156
157 # start, length, rawsize, chainbase, linkrev, p1, p2, node
157 # start, length, rawsize, chainbase, linkrev, p1, p2, node
158 self._index.append(
158 self._index.append(
159 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
159 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
160 )
160 )
161
161
162 self._index.append((0, 0, 0, -1, -1, -1, -1, self._repo.nullid))
162 self._index.append((0, 0, 0, -1, -1, -1, -1, self._repo.nullid))
163
163
164 def __len__(self):
164 def __len__(self):
165 return len(self._indexdata)
165 return len(self._indexdata)
166
166
167 def __iter__(self):
167 def __iter__(self):
168 return iter(range(len(self)))
168 return iter(range(len(self)))
169
169
170 def revs(self, start=0, stop=None):
170 def revs(self, start=0, stop=None):
171 step = 1
171 step = 1
172 if stop is not None:
172 if stop is not None:
173 if start > stop:
173 if start > stop:
174 step = -1
174 step = -1
175
175
176 stop += step
176 stop += step
177 else:
177 else:
178 stop = len(self)
178 stop = len(self)
179
179
180 return range(start, stop, step)
180 return range(start, stop, step)
181
181
182 def parents(self, node):
182 def parents(self, node):
183 validatenode(node)
183 validatenode(node)
184
184
185 if node not in self._indexbynode:
185 if node not in self._indexbynode:
186 raise KeyError('unknown node')
186 raise KeyError('unknown node')
187
187
188 entry = self._indexbynode[node]
188 entry = self._indexbynode[node]
189
189
190 return entry[b'p1'], entry[b'p2']
190 return entry[b'p1'], entry[b'p2']
191
191
192 def parentrevs(self, rev):
192 def parentrevs(self, rev):
193 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
193 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
194 return self.rev(p1), self.rev(p2)
194 return self.rev(p1), self.rev(p2)
195
195
196 def rev(self, node):
196 def rev(self, node):
197 validatenode(node)
197 validatenode(node)
198
198
199 try:
199 try:
200 self._indexbynode[node]
200 self._indexbynode[node]
201 except KeyError:
201 except KeyError:
202 raise error.LookupError(node, self._indexpath, _('no node'))
202 raise error.LookupError(node, self._indexpath, _('no node'))
203
203
204 for rev, entry in self._indexbyrev.items():
204 for rev, entry in self._indexbyrev.items():
205 if entry[b'node'] == node:
205 if entry[b'node'] == node:
206 return rev
206 return rev
207
207
208 raise error.ProgrammingError(b'this should not occur')
208 raise error.ProgrammingError(b'this should not occur')
209
209
210 def node(self, rev):
210 def node(self, rev):
211 validaterev(rev)
211 validaterev(rev)
212
212
213 return self._indexbyrev[rev][b'node']
213 return self._indexbyrev[rev][b'node']
214
214
215 def hasnode(self, node):
215 def hasnode(self, node):
216 validatenode(node)
216 validatenode(node)
217 return node in self._indexbynode
217 return node in self._indexbynode
218
218
219 def censorrevision(self, tr, censornode, tombstone=b''):
219 def censorrevision(self, tr, censornode, tombstone=b''):
220 raise NotImplementedError('TODO')
220 raise NotImplementedError('TODO')
221
221
222 def lookup(self, node):
222 def lookup(self, node):
223 if isinstance(node, int):
223 if isinstance(node, int):
224 return self.node(node)
224 return self.node(node)
225
225
226 if len(node) == 20:
226 if len(node) == 20:
227 self.rev(node)
227 self.rev(node)
228 return node
228 return node
229
229
230 try:
230 try:
231 rev = int(node)
231 rev = int(node)
232 if '%d' % rev != node:
232 if '%d' % rev != node:
233 raise ValueError
233 raise ValueError
234
234
235 if rev < 0:
235 if rev < 0:
236 rev = len(self) + rev
236 rev = len(self) + rev
237 if rev < 0 or rev >= len(self):
237 if rev < 0 or rev >= len(self):
238 raise ValueError
238 raise ValueError
239
239
240 return self.node(rev)
240 return self.node(rev)
241 except (ValueError, OverflowError):
241 except (ValueError, OverflowError):
242 pass
242 pass
243
243
244 if len(node) == 40:
244 if len(node) == 40:
245 try:
245 try:
246 rawnode = bin(node)
246 rawnode = bin(node)
247 self.rev(rawnode)
247 self.rev(rawnode)
248 return rawnode
248 return rawnode
249 except TypeError:
249 except TypeError:
250 pass
250 pass
251
251
252 raise error.LookupError(node, self._path, _('invalid lookup input'))
252 raise error.LookupError(node, self._path, _('invalid lookup input'))
253
253
254 def linkrev(self, rev):
254 def linkrev(self, rev):
255 validaterev(rev)
255 validaterev(rev)
256
256
257 return self._indexbyrev[rev][b'linkrev']
257 return self._indexbyrev[rev][b'linkrev']
258
258
259 def _flags(self, rev):
259 def _flags(self, rev):
260 validaterev(rev)
260 validaterev(rev)
261
261
262 return self._indexbyrev[rev][b'flags']
262 return self._indexbyrev[rev][b'flags']
263
263
264 def _candelta(self, baserev, rev):
264 def _candelta(self, baserev, rev):
265 validaterev(baserev)
265 validaterev(baserev)
266 validaterev(rev)
266 validaterev(rev)
267
267
268 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
268 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
269 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
269 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
270 ):
270 ):
271 return False
271 return False
272
272
273 return True
273 return True
274
274
275 def checkhash(self, text, node, p1=None, p2=None, rev=None):
275 def checkhash(self, text, node, p1=None, p2=None, rev=None):
276 if p1 is None and p2 is None:
276 if p1 is None and p2 is None:
277 p1, p2 = self.parents(node)
277 p1, p2 = self.parents(node)
278 if node != storageutil.hashrevisionsha1(text, p1, p2):
278 if node != storageutil.hashrevisionsha1(text, p1, p2):
279 raise simplestoreerror(
279 raise simplestoreerror(
280 _("integrity check failed on %s") % self._path
280 _("integrity check failed on %s") % self._path
281 )
281 )
282
282
283 def revision(self, nodeorrev, raw=False):
283 def revision(self, nodeorrev, raw=False):
284 if isinstance(nodeorrev, int):
284 if isinstance(nodeorrev, int):
285 node = self.node(nodeorrev)
285 node = self.node(nodeorrev)
286 else:
286 else:
287 node = nodeorrev
287 node = nodeorrev
288 validatenode(node)
288 validatenode(node)
289
289
290 if node == self._repo.nullid:
290 if node == self._repo.nullid:
291 return b''
291 return b''
292
292
293 rev = self.rev(node)
293 rev = self.rev(node)
294 flags = self._flags(rev)
294 flags = self._flags(rev)
295
295
296 path = b'/'.join([self._storepath, hex(node)])
296 path = b'/'.join([self._storepath, hex(node)])
297 rawtext = self._svfs.read(path)
297 rawtext = self._svfs.read(path)
298
298
299 if raw:
299 if raw:
300 validatehash = flagutil.processflagsraw(self, rawtext, flags)
300 validatehash = flagutil.processflagsraw(self, rawtext, flags)
301 text = rawtext
301 text = rawtext
302 else:
302 else:
303 r = flagutil.processflagsread(self, rawtext, flags)
303 r = flagutil.processflagsread(self, rawtext, flags)
304 text, validatehash = r
304 text, validatehash = r
305 if validatehash:
305 if validatehash:
306 self.checkhash(text, node, rev=rev)
306 self.checkhash(text, node, rev=rev)
307
307
308 return text
308 return text
309
309
310 def rawdata(self, nodeorrev):
310 def rawdata(self, nodeorrev):
311 return self.revision(raw=True)
311 return self.revision(raw=True)
312
312
313 def read(self, node):
313 def read(self, node):
314 validatenode(node)
314 validatenode(node)
315
315
316 revision = self.revision(node)
316 revision = self.revision(node)
317
317
318 if not revision.startswith(b'\1\n'):
318 if not revision.startswith(b'\1\n'):
319 return revision
319 return revision
320
320
321 start = revision.index(b'\1\n', 2)
321 start = revision.index(b'\1\n', 2)
322 return revision[start + 2 :]
322 return revision[start + 2 :]
323
323
324 def renamed(self, node):
324 def renamed(self, node):
325 validatenode(node)
325 validatenode(node)
326
326
327 if self.parents(node)[0] != self._repo.nullid:
327 if self.parents(node)[0] != self._repo.nullid:
328 return False
328 return False
329
329
330 fulltext = self.revision(node)
330 fulltext = self.revision(node)
331 m = storageutil.parsemeta(fulltext)[0]
331 m = storageutil.parsemeta(fulltext)[0]
332
332
333 if m and 'copy' in m:
333 if m and 'copy' in m:
334 return m['copy'], bin(m['copyrev'])
334 return m['copy'], bin(m['copyrev'])
335
335
336 return False
336 return False
337
337
338 def cmp(self, node, text):
338 def cmp(self, node, text):
339 validatenode(node)
339 validatenode(node)
340
340
341 t = text
341 t = text
342
342
343 if text.startswith(b'\1\n'):
343 if text.startswith(b'\1\n'):
344 t = b'\1\n\1\n' + text
344 t = b'\1\n\1\n' + text
345
345
346 p1, p2 = self.parents(node)
346 p1, p2 = self.parents(node)
347
347
348 if storageutil.hashrevisionsha1(t, p1, p2) == node:
348 if storageutil.hashrevisionsha1(t, p1, p2) == node:
349 return False
349 return False
350
350
351 if self.iscensored(self.rev(node)):
351 if self.iscensored(self.rev(node)):
352 return text != b''
352 return text != b''
353
353
354 if self.renamed(node):
354 if self.renamed(node):
355 t2 = self.read(node)
355 t2 = self.read(node)
356 return t2 != text
356 return t2 != text
357
357
358 return True
358 return True
359
359
360 def size(self, rev):
360 def size(self, rev):
361 validaterev(rev)
361 validaterev(rev)
362
362
363 node = self._indexbyrev[rev][b'node']
363 node = self._indexbyrev[rev][b'node']
364
364
365 if self.renamed(node):
365 if self.renamed(node):
366 return len(self.read(node))
366 return len(self.read(node))
367
367
368 if self.iscensored(rev):
368 if self.iscensored(rev):
369 return 0
369 return 0
370
370
371 return len(self.revision(node))
371 return len(self.revision(node))
372
372
373 def iscensored(self, rev):
373 def iscensored(self, rev):
374 validaterev(rev)
374 validaterev(rev)
375
375
376 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
376 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
377
377
378 def commonancestorsheads(self, a, b):
378 def commonancestorsheads(self, a, b):
379 validatenode(a)
379 validatenode(a)
380 validatenode(b)
380 validatenode(b)
381
381
382 a = self.rev(a)
382 a = self.rev(a)
383 b = self.rev(b)
383 b = self.rev(b)
384
384
385 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
385 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
386 return pycompat.maplist(self.node, ancestors)
386 return pycompat.maplist(self.node, ancestors)
387
387
388 def descendants(self, revs):
388 def descendants(self, revs):
389 # This is a copy of revlog.descendants()
389 # This is a copy of revlog.descendants()
390 first = min(revs)
390 first = min(revs)
391 if first == nullrev:
391 if first == nullrev:
392 for i in self:
392 for i in self:
393 yield i
393 yield i
394 return
394 return
395
395
396 seen = set(revs)
396 seen = set(revs)
397 for i in self.revs(start=first + 1):
397 for i in self.revs(start=first + 1):
398 for x in self.parentrevs(i):
398 for x in self.parentrevs(i):
399 if x != nullrev and x in seen:
399 if x != nullrev and x in seen:
400 seen.add(i)
400 seen.add(i)
401 yield i
401 yield i
402 break
402 break
403
403
404 # Required by verify.
404 # Required by verify.
405 def files(self):
405 def files(self):
406 entries = self._svfs.listdir(self._storepath)
406 entries = self._svfs.listdir(self._storepath)
407
407
408 # Strip out undo.backup.* files created as part of transaction
408 # Strip out undo.backup.* files created as part of transaction
409 # recording.
409 # recording.
410 entries = [f for f in entries if not f.startswith('undo.backup.')]
410 entries = [f for f in entries if not f.startswith('undo.backup.')]
411
411
412 return [b'/'.join((self._storepath, f)) for f in entries]
412 return [b'/'.join((self._storepath, f)) for f in entries]
413
413
414 def storageinfo(
414 def storageinfo(
415 self,
415 self,
416 exclusivefiles=False,
416 exclusivefiles=False,
417 sharedfiles=False,
417 sharedfiles=False,
418 revisionscount=False,
418 revisionscount=False,
419 trackedsize=False,
419 trackedsize=False,
420 storedsize=False,
420 storedsize=False,
421 ):
421 ):
422 # TODO do a real implementation of this
422 # TODO do a real implementation of this
423 return {
423 return {
424 'exclusivefiles': [],
424 'exclusivefiles': [],
425 'sharedfiles': [],
425 'sharedfiles': [],
426 'revisionscount': len(self),
426 'revisionscount': len(self),
427 'trackedsize': 0,
427 'trackedsize': 0,
428 'storedsize': None,
428 'storedsize': None,
429 }
429 }
430
430
431 def verifyintegrity(self, state):
431 def verifyintegrity(self, state):
432 state['skipread'] = set()
432 state['skipread'] = set()
433 for rev in self:
433 for rev in self:
434 node = self.node(rev)
434 node = self.node(rev)
435 try:
435 try:
436 self.revision(node)
436 self.revision(node)
437 except Exception as e:
437 except Exception as e:
438 yield simplefilestoreproblem(
438 yield simplefilestoreproblem(
439 error='unpacking %s: %s' % (node, e), node=node
439 error='unpacking %s: %s' % (node, e), node=node
440 )
440 )
441 state['skipread'].add(node)
441 state['skipread'].add(node)
442
442
443 def emitrevisions(
443 def emitrevisions(
444 self,
444 self,
445 nodes,
445 nodes,
446 nodesorder=None,
446 nodesorder=None,
447 revisiondata=False,
447 revisiondata=False,
448 assumehaveparentrevisions=False,
448 assumehaveparentrevisions=False,
449 deltamode=repository.CG_DELTAMODE_STD,
449 deltamode=repository.CG_DELTAMODE_STD,
450 sidedata_helpers=None,
450 sidedata_helpers=None,
451 ):
451 ):
452 # TODO this will probably break on some ordering options.
452 # TODO this will probably break on some ordering options.
453 nodes = [n for n in nodes if n != self._repo.nullid]
453 nodes = [n for n in nodes if n != self._repo.nullid]
454 if not nodes:
454 if not nodes:
455 return
455 return
456 for delta in storageutil.emitrevisions(
456 for delta in storageutil.emitrevisions(
457 self,
457 self,
458 nodes,
458 nodes,
459 nodesorder,
459 nodesorder,
460 simplestorerevisiondelta,
460 simplestorerevisiondelta,
461 revisiondata=revisiondata,
461 revisiondata=revisiondata,
462 assumehaveparentrevisions=assumehaveparentrevisions,
462 assumehaveparentrevisions=assumehaveparentrevisions,
463 deltamode=deltamode,
463 deltamode=deltamode,
464 sidedata_helpers=sidedata_helpers,
464 sidedata_helpers=sidedata_helpers,
465 ):
465 ):
466 yield delta
466 yield delta
467
467
468 def add(self, text, meta, transaction, linkrev, p1, p2):
468 def add(self, text, meta, transaction, linkrev, p1, p2):
469 if meta or text.startswith(b'\1\n'):
469 if meta or text.startswith(b'\1\n'):
470 text = storageutil.packmeta(meta, text)
470 text = storageutil.packmeta(meta, text)
471
471
472 return self.addrevision(text, transaction, linkrev, p1, p2)
472 return self.addrevision(text, transaction, linkrev, p1, p2)
473
473
474 def addrevision(
474 def addrevision(
475 self,
475 self,
476 text,
476 text,
477 transaction,
477 transaction,
478 linkrev,
478 linkrev,
479 p1,
479 p1,
480 p2,
480 p2,
481 node=None,
481 node=None,
482 flags=revlog.REVIDX_DEFAULT_FLAGS,
482 flags=revlog.REVIDX_DEFAULT_FLAGS,
483 cachedelta=None,
483 cachedelta=None,
484 ):
484 ):
485 validatenode(p1)
485 validatenode(p1)
486 validatenode(p2)
486 validatenode(p2)
487
487
488 if flags:
488 if flags:
489 node = node or storageutil.hashrevisionsha1(text, p1, p2)
489 node = node or storageutil.hashrevisionsha1(text, p1, p2)
490
490
491 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
491 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
492
492
493 node = node or storageutil.hashrevisionsha1(text, p1, p2)
493 node = node or storageutil.hashrevisionsha1(text, p1, p2)
494
494
495 if node in self._indexbynode:
495 if node in self._indexbynode:
496 return node
496 return node
497
497
498 if validatehash:
498 if validatehash:
499 self.checkhash(rawtext, node, p1=p1, p2=p2)
499 self.checkhash(rawtext, node, p1=p1, p2=p2)
500
500
501 return self._addrawrevision(
501 return self._addrawrevision(
502 node, rawtext, transaction, linkrev, p1, p2, flags
502 node, rawtext, transaction, linkrev, p1, p2, flags
503 )
503 )
504
504
505 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
505 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
506 transaction.addbackup(self._indexpath)
506 transaction.addbackup(self._indexpath)
507
507
508 path = b'/'.join([self._storepath, hex(node)])
508 path = b'/'.join([self._storepath, hex(node)])
509
509
510 self._svfs.write(path, rawtext)
510 self._svfs.write(path, rawtext)
511
511
512 self._indexdata.append(
512 self._indexdata.append(
513 {
513 {
514 b'node': node,
514 b'node': node,
515 b'p1': p1,
515 b'p1': p1,
516 b'p2': p2,
516 b'p2': p2,
517 b'linkrev': link,
517 b'linkrev': link,
518 b'flags': flags,
518 b'flags': flags,
519 }
519 }
520 )
520 )
521
521
522 self._reflectindexupdate()
522 self._reflectindexupdate()
523
523
524 return node
524 return node
525
525
526 def _reflectindexupdate(self):
526 def _reflectindexupdate(self):
527 self._refreshindex()
527 self._refreshindex()
528 self._svfs.write(
528 self._svfs.write(
529 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
529 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
530 )
530 )
531
531
532 def addgroup(
532 def addgroup(
533 self,
533 self,
534 deltas,
534 deltas,
535 linkmapper,
535 linkmapper,
536 transaction,
536 transaction,
537 addrevisioncb=None,
537 addrevisioncb=None,
538 duplicaterevisioncb=None,
538 duplicaterevisioncb=None,
539 maybemissingparents=False,
539 maybemissingparents=False,
540 ):
540 ):
541 if maybemissingparents:
541 if maybemissingparents:
542 raise error.Abort(
542 raise error.Abort(
543 _('simple store does not support missing parents ' 'write mode')
543 _('simple store does not support missing parents ' 'write mode')
544 )
544 )
545
545
546 empty = True
546 empty = True
547
547
548 transaction.addbackup(self._indexpath)
548 transaction.addbackup(self._indexpath)
549
549
550 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
550 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
551 linkrev = linkmapper(linknode)
551 linkrev = linkmapper(linknode)
552 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
552 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
553
553
554 if node in self._indexbynode:
554 if node in self._indexbynode:
555 if duplicaterevisioncb:
555 if duplicaterevisioncb:
556 duplicaterevisioncb(self, self.rev(node))
556 duplicaterevisioncb(self, self.rev(node))
557 empty = False
557 empty = False
558 continue
558 continue
559
559
560 # Need to resolve the fulltext from the delta base.
560 # Need to resolve the fulltext from the delta base.
561 if deltabase == self._repo.nullid:
561 if deltabase == self._repo.nullid:
562 text = mdiff.patch(b'', delta)
562 text = mdiff.patch(b'', delta)
563 else:
563 else:
564 text = mdiff.patch(self.revision(deltabase), delta)
564 text = mdiff.patch(self.revision(deltabase), delta)
565
565
566 rev = self._addrawrevision(
566 rev = self._addrawrevision(
567 node, text, transaction, linkrev, p1, p2, flags
567 node, text, transaction, linkrev, p1, p2, flags
568 )
568 )
569
569
570 if addrevisioncb:
570 if addrevisioncb:
571 addrevisioncb(self, rev)
571 addrevisioncb(self, rev)
572 empty = False
572 empty = False
573 return not empty
573 return not empty
574
574
575 def _headrevs(self):
575 def _headrevs(self):
576 # Assume all revisions are heads by default.
576 # Assume all revisions are heads by default.
577 revishead = {rev: True for rev in self._indexbyrev}
577 revishead = {rev: True for rev in self._indexbyrev}
578
578
579 for rev, entry in self._indexbyrev.items():
579 for rev, entry in self._indexbyrev.items():
580 # Unset head flag for all seen parents.
580 # Unset head flag for all seen parents.
581 revishead[self.rev(entry[b'p1'])] = False
581 revishead[self.rev(entry[b'p1'])] = False
582 revishead[self.rev(entry[b'p2'])] = False
582 revishead[self.rev(entry[b'p2'])] = False
583
583
584 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
584 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
585
585
586 def heads(self, start=None, stop=None):
586 def heads(self, start=None, stop=None):
587 # This is copied from revlog.py.
587 # This is copied from revlog.py.
588 if start is None and stop is None:
588 if start is None and stop is None:
589 if not len(self):
589 if not len(self):
590 return [self._repo.nullid]
590 return [self._repo.nullid]
591 return [self.node(r) for r in self._headrevs()]
591 return [self.node(r) for r in self._headrevs()]
592
592
593 if start is None:
593 if start is None:
594 start = self._repo.nullid
594 start = self._repo.nullid
595 if stop is None:
595 if stop is None:
596 stop = []
596 stop = []
597 stoprevs = {self.rev(n) for n in stop}
597 stoprevs = {self.rev(n) for n in stop}
598 startrev = self.rev(start)
598 startrev = self.rev(start)
599 reachable = {startrev}
599 reachable = {startrev}
600 heads = {startrev}
600 heads = {startrev}
601
601
602 parentrevs = self.parentrevs
602 parentrevs = self.parentrevs
603 for r in self.revs(start=startrev + 1):
603 for r in self.revs(start=startrev + 1):
604 for p in parentrevs(r):
604 for p in parentrevs(r):
605 if p in reachable:
605 if p in reachable:
606 if r not in stoprevs:
606 if r not in stoprevs:
607 reachable.add(r)
607 reachable.add(r)
608 heads.add(r)
608 heads.add(r)
609 if p in heads and p not in stoprevs:
609 if p in heads and p not in stoprevs:
610 heads.remove(p)
610 heads.remove(p)
611
611
612 return [self.node(r) for r in heads]
612 return [self.node(r) for r in heads]
613
613
614 def children(self, node):
614 def children(self, node):
615 validatenode(node)
615 validatenode(node)
616
616
617 # This is a copy of revlog.children().
617 # This is a copy of revlog.children().
618 c = []
618 c = []
619 p = self.rev(node)
619 p = self.rev(node)
620 for r in self.revs(start=p + 1):
620 for r in self.revs(start=p + 1):
621 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
621 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
622 if prevs:
622 if prevs:
623 for pr in prevs:
623 for pr in prevs:
624 if pr == p:
624 if pr == p:
625 c.append(self.node(r))
625 c.append(self.node(r))
626 elif p == nullrev:
626 elif p == nullrev:
627 c.append(self.node(r))
627 c.append(self.node(r))
628 return c
628 return c
629
629
630 def getstrippoint(self, minlink):
630 def getstrippoint(self, minlink):
631 return storageutil.resolvestripinfo(
631 return storageutil.resolvestripinfo(
632 minlink,
632 minlink,
633 len(self) - 1,
633 len(self) - 1,
634 self._headrevs(),
634 self._headrevs(),
635 self.linkrev,
635 self.linkrev,
636 self.parentrevs,
636 self.parentrevs,
637 )
637 )
638
638
639 def strip(self, minlink, transaction):
639 def strip(self, minlink, transaction):
640 if not len(self):
640 if not len(self):
641 return
641 return
642
642
643 rev, _ignored = self.getstrippoint(minlink)
643 rev, _ignored = self.getstrippoint(minlink)
644 if rev == len(self):
644 if rev == len(self):
645 return
645 return
646
646
647 # Purge index data starting at the requested revision.
647 # Purge index data starting at the requested revision.
648 self._indexdata[rev:] = []
648 self._indexdata[rev:] = []
649 self._reflectindexupdate()
649 self._reflectindexupdate()
650
650
651
651
652 def issimplestorefile(f, kind, st):
652 def issimplestorefile(f, kind, st):
653 if kind != stat.S_IFREG:
653 if kind != stat.S_IFREG:
654 return False
654 return False
655
655
656 if store.isrevlog(f, kind, st):
656 if store.isrevlog(f, kind, st):
657 return False
657 return False
658
658
659 # Ignore transaction undo files.
659 # Ignore transaction undo files.
660 if f.startswith('undo.'):
660 if f.startswith('undo.'):
661 return False
661 return False
662
662
663 # Otherwise assume it belongs to the simple store.
663 # Otherwise assume it belongs to the simple store.
664 return True
664 return True
665
665
666
666
667 class simplestore(store.encodedstore):
667 class simplestore(store.encodedstore):
668 def datafiles(self):
668 def datafiles(self, undecodable=None):
669 for x in super(simplestore, self).datafiles():
669 for x in super(simplestore, self).datafiles():
670 yield x
670 yield x
671
671
672 # Supplement with non-revlog files.
672 # Supplement with non-revlog files.
673 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
673 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
674
674
675 for unencoded, encoded, size in extrafiles:
675 for f1, size in extrafiles:
676 try:
676 try:
677 unencoded = store.decodefilename(unencoded)
677 f2 = store.decodefilename(f1)
678 except KeyError:
678 except KeyError:
679 unencoded = None
679 if undecodable is None:
680 raise error.StorageError(b'undecodable revlog name %s' % f1)
681 else:
682 undecodable.append(f1)
683 continue
680
684
681 yield unencoded, encoded, size
685 yield f2, size
682
686
683
687
684 def reposetup(ui, repo):
688 def reposetup(ui, repo):
685 if not repo.local():
689 if not repo.local():
686 return
690 return
687
691
688 if isinstance(repo, bundlerepo.bundlerepository):
692 if isinstance(repo, bundlerepo.bundlerepository):
689 raise error.Abort(_('cannot use simple store with bundlerepo'))
693 raise error.Abort(_('cannot use simple store with bundlerepo'))
690
694
691 class simplestorerepo(repo.__class__):
695 class simplestorerepo(repo.__class__):
692 def file(self, f):
696 def file(self, f):
693 return filestorage(repo, self.svfs, f)
697 return filestorage(repo, self.svfs, f)
694
698
695 repo.__class__ = simplestorerepo
699 repo.__class__ = simplestorerepo
696
700
697
701
698 def featuresetup(ui, supported):
702 def featuresetup(ui, supported):
699 supported.add(REQUIREMENT)
703 supported.add(REQUIREMENT)
700
704
701
705
702 def newreporequirements(orig, ui, createopts):
706 def newreporequirements(orig, ui, createopts):
703 """Modifies default requirements for new repos to use the simple store."""
707 """Modifies default requirements for new repos to use the simple store."""
704 requirements = orig(ui, createopts)
708 requirements = orig(ui, createopts)
705
709
706 # These requirements are only used to affect creation of the store
710 # These requirements are only used to affect creation of the store
707 # object. We have our own store. So we can remove them.
711 # object. We have our own store. So we can remove them.
708 # TODO do this once we feel like taking the test hit.
712 # TODO do this once we feel like taking the test hit.
709 # if 'fncache' in requirements:
713 # if 'fncache' in requirements:
710 # requirements.remove('fncache')
714 # requirements.remove('fncache')
711 # if 'dotencode' in requirements:
715 # if 'dotencode' in requirements:
712 # requirements.remove('dotencode')
716 # requirements.remove('dotencode')
713
717
714 requirements.add(REQUIREMENT)
718 requirements.add(REQUIREMENT)
715
719
716 return requirements
720 return requirements
717
721
718
722
719 def makestore(orig, requirements, path, vfstype):
723 def makestore(orig, requirements, path, vfstype):
720 if REQUIREMENT not in requirements:
724 if REQUIREMENT not in requirements:
721 return orig(requirements, path, vfstype)
725 return orig(requirements, path, vfstype)
722
726
723 return simplestore(path, vfstype)
727 return simplestore(path, vfstype)
724
728
725
729
726 def verifierinit(orig, self, *args, **kwargs):
730 def verifierinit(orig, self, *args, **kwargs):
727 orig(self, *args, **kwargs)
731 orig(self, *args, **kwargs)
728
732
729 # We don't care that files in the store don't align with what is
733 # We don't care that files in the store don't align with what is
730 # advertised. So suppress these warnings.
734 # advertised. So suppress these warnings.
731 self.warnorphanstorefiles = False
735 self.warnorphanstorefiles = False
732
736
733
737
734 def extsetup(ui):
738 def extsetup(ui):
735 localrepo.featuresetupfuncs.add(featuresetup)
739 localrepo.featuresetupfuncs.add(featuresetup)
736
740
737 extensions.wrapfunction(
741 extensions.wrapfunction(
738 localrepo, 'newreporequirements', newreporequirements
742 localrepo, 'newreporequirements', newreporequirements
739 )
743 )
740 extensions.wrapfunction(localrepo, 'makestore', makestore)
744 extensions.wrapfunction(localrepo, 'makestore', makestore)
741 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
745 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now