##// END OF EJS Templates
store: return just one filename in walk functions...
Valentin Gatien-Baron -
r48691:2174f54a default
parent child Browse files
Show More
@@ -1,694 +1,694 b''
1 1 # narrowcommands.py - command modifications for narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import itertools
10 10 import os
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial.node import (
14 14 hex,
15 15 short,
16 16 )
17 17 from mercurial import (
18 18 bundle2,
19 19 cmdutil,
20 20 commands,
21 21 discovery,
22 22 encoding,
23 23 error,
24 24 exchange,
25 25 extensions,
26 26 hg,
27 27 narrowspec,
28 28 pathutil,
29 29 pycompat,
30 30 registrar,
31 31 repair,
32 32 repoview,
33 33 requirements,
34 34 sparse,
35 35 util,
36 36 wireprototypes,
37 37 )
38 38 from mercurial.utils import (
39 39 urlutil,
40 40 )
41 41
42 42 table = {}
43 43 command = registrar.command(table)
44 44
45 45
46 46 def setup():
47 47 """Wraps user-facing mercurial commands with narrow-aware versions."""
48 48
49 49 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
50 50 entry[1].append(
51 51 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
52 52 )
53 53 entry[1].append(
54 54 (
55 55 b'',
56 56 b'depth',
57 57 b'',
58 58 _(b"limit the history fetched by distance from heads"),
59 59 )
60 60 )
61 61 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
62 62 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
63 63 if b'sparse' not in extensions.enabled():
64 64 entry[1].append(
65 65 (b'', b'include', [], _(b"specifically fetch this file/directory"))
66 66 )
67 67 entry[1].append(
68 68 (
69 69 b'',
70 70 b'exclude',
71 71 [],
72 72 _(b"do not fetch this file/directory, even if included"),
73 73 )
74 74 )
75 75
76 76 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
77 77 entry[1].append(
78 78 (
79 79 b'',
80 80 b'depth',
81 81 b'',
82 82 _(b"limit the history fetched by distance from heads"),
83 83 )
84 84 )
85 85
86 86 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
87 87
88 88
89 89 def clonenarrowcmd(orig, ui, repo, *args, **opts):
90 90 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
91 91 opts = pycompat.byteskwargs(opts)
92 92 wrappedextraprepare = util.nullcontextmanager()
93 93 narrowspecfile = opts[b'narrowspec']
94 94
95 95 if narrowspecfile:
96 96 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
97 97 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
98 98 try:
99 99 fdata = util.readfile(filepath)
100 100 except IOError as inst:
101 101 raise error.Abort(
102 102 _(b"cannot read narrowspecs from '%s': %s")
103 103 % (filepath, encoding.strtolocal(inst.strerror))
104 104 )
105 105
106 106 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
107 107 if profiles:
108 108 raise error.ConfigError(
109 109 _(
110 110 b"cannot specify other files using '%include' in"
111 111 b" narrowspec"
112 112 )
113 113 )
114 114
115 115 narrowspec.validatepatterns(includes)
116 116 narrowspec.validatepatterns(excludes)
117 117
118 118 # narrowspec is passed so we should assume that user wants narrow clone
119 119 opts[b'narrow'] = True
120 120 opts[b'include'].extend(includes)
121 121 opts[b'exclude'].extend(excludes)
122 122
123 123 if opts[b'narrow']:
124 124
125 125 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
126 126 orig(pullop, kwargs)
127 127
128 128 if opts.get(b'depth'):
129 129 kwargs[b'depth'] = opts[b'depth']
130 130
131 131 wrappedextraprepare = extensions.wrappedfunction(
132 132 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
133 133 )
134 134
135 135 with wrappedextraprepare:
136 136 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
137 137
138 138
139 139 def pullnarrowcmd(orig, ui, repo, *args, **opts):
140 140 """Wraps pull command to allow modifying narrow spec."""
141 141 wrappedextraprepare = util.nullcontextmanager()
142 142 if requirements.NARROW_REQUIREMENT in repo.requirements:
143 143
144 144 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
145 145 orig(pullop, kwargs)
146 146 if opts.get('depth'):
147 147 kwargs[b'depth'] = opts['depth']
148 148
149 149 wrappedextraprepare = extensions.wrappedfunction(
150 150 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
151 151 )
152 152
153 153 with wrappedextraprepare:
154 154 return orig(ui, repo, *args, **opts)
155 155
156 156
157 157 def archivenarrowcmd(orig, ui, repo, *args, **opts):
158 158 """Wraps archive command to narrow the default includes."""
159 159 if requirements.NARROW_REQUIREMENT in repo.requirements:
160 160 repo_includes, repo_excludes = repo.narrowpats
161 161 includes = set(opts.get('include', []))
162 162 excludes = set(opts.get('exclude', []))
163 163 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
164 164 includes, excludes, repo_includes, repo_excludes
165 165 )
166 166 if includes:
167 167 opts['include'] = includes
168 168 if excludes:
169 169 opts['exclude'] = excludes
170 170 return orig(ui, repo, *args, **opts)
171 171
172 172
173 173 def pullbundle2extraprepare(orig, pullop, kwargs):
174 174 repo = pullop.repo
175 175 if requirements.NARROW_REQUIREMENT not in repo.requirements:
176 176 return orig(pullop, kwargs)
177 177
178 178 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
179 179 raise error.Abort(_(b"server does not support narrow clones"))
180 180 orig(pullop, kwargs)
181 181 kwargs[b'narrow'] = True
182 182 include, exclude = repo.narrowpats
183 183 kwargs[b'oldincludepats'] = include
184 184 kwargs[b'oldexcludepats'] = exclude
185 185 if include:
186 186 kwargs[b'includepats'] = include
187 187 if exclude:
188 188 kwargs[b'excludepats'] = exclude
189 189 # calculate known nodes only in ellipses cases because in non-ellipses cases
190 190 # we have all the nodes
191 191 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
192 192 kwargs[b'known'] = [
193 193 hex(ctx.node())
194 194 for ctx in repo.set(b'::%ln', pullop.common)
195 195 if ctx.node() != repo.nullid
196 196 ]
197 197 if not kwargs[b'known']:
198 198 # Mercurial serializes an empty list as '' and deserializes it as
199 199 # [''], so delete it instead to avoid handling the empty string on
200 200 # the server.
201 201 del kwargs[b'known']
202 202
203 203
204 204 extensions.wrapfunction(
205 205 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
206 206 )
207 207
208 208
209 209 def _narrow(
210 210 ui,
211 211 repo,
212 212 remote,
213 213 commoninc,
214 214 oldincludes,
215 215 oldexcludes,
216 216 newincludes,
217 217 newexcludes,
218 218 force,
219 219 backup,
220 220 ):
221 221 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
222 222 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
223 223
224 224 # This is essentially doing "hg outgoing" to find all local-only
225 225 # commits. We will then check that the local-only commits don't
226 226 # have any changes to files that will be untracked.
227 227 unfi = repo.unfiltered()
228 228 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
229 229 ui.status(_(b'looking for local changes to affected paths\n'))
230 230 progress = ui.makeprogress(
231 231 topic=_(b'changesets'),
232 232 unit=_(b'changesets'),
233 233 total=len(outgoing.missing) + len(outgoing.excluded),
234 234 )
235 235 localnodes = []
236 236 with progress:
237 237 for n in itertools.chain(outgoing.missing, outgoing.excluded):
238 238 progress.increment()
239 239 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
240 240 localnodes.append(n)
241 241 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
242 242 hiddenrevs = repoview.filterrevs(repo, b'visible')
243 243 visibletostrip = list(
244 244 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
245 245 )
246 246 if visibletostrip:
247 247 ui.status(
248 248 _(
249 249 b'The following changeset(s) or their ancestors have '
250 250 b'local changes not on the remote:\n'
251 251 )
252 252 )
253 253 maxnodes = 10
254 254 if ui.verbose or len(visibletostrip) <= maxnodes:
255 255 for n in visibletostrip:
256 256 ui.status(b'%s\n' % short(n))
257 257 else:
258 258 for n in visibletostrip[:maxnodes]:
259 259 ui.status(b'%s\n' % short(n))
260 260 ui.status(
261 261 _(b'...and %d more, use --verbose to list all\n')
262 262 % (len(visibletostrip) - maxnodes)
263 263 )
264 264 if not force:
265 265 raise error.StateError(
266 266 _(b'local changes found'),
267 267 hint=_(b'use --force-delete-local-changes to ignore'),
268 268 )
269 269
270 270 with ui.uninterruptible():
271 271 if revstostrip:
272 272 tostrip = [unfi.changelog.node(r) for r in revstostrip]
273 273 if repo[b'.'].node() in tostrip:
274 274 # stripping working copy, so move to a different commit first
275 275 urev = max(
276 276 repo.revs(
277 277 b'(::%n) - %ln + null',
278 278 repo[b'.'].node(),
279 279 visibletostrip,
280 280 )
281 281 )
282 282 hg.clean(repo, urev)
283 283 overrides = {(b'devel', b'strip-obsmarkers'): False}
284 284 if backup:
285 285 ui.status(_(b'moving unwanted changesets to backup\n'))
286 286 else:
287 287 ui.status(_(b'deleting unwanted changesets\n'))
288 288 with ui.configoverride(overrides, b'narrow'):
289 289 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
290 290
291 291 todelete = []
292 for t, f, f2, size in repo.store.datafiles():
292 for t, f, size in repo.store.datafiles():
293 293 if f.startswith(b'data/'):
294 294 file = f[5:-2]
295 295 if not newmatch(file):
296 296 todelete.append(f)
297 297 elif f.startswith(b'meta/'):
298 298 dir = f[5:-13]
299 299 dirs = sorted(pathutil.dirs({dir})) + [dir]
300 300 include = True
301 301 for d in dirs:
302 302 visit = newmatch.visitdir(d)
303 303 if not visit:
304 304 include = False
305 305 break
306 306 if visit == b'all':
307 307 break
308 308 if not include:
309 309 todelete.append(f)
310 310
311 311 repo.destroying()
312 312
313 313 with repo.transaction(b'narrowing'):
314 314 # Update narrowspec before removing revlogs, so repo won't be
315 315 # corrupt in case of crash
316 316 repo.setnarrowpats(newincludes, newexcludes)
317 317
318 318 for f in todelete:
319 319 ui.status(_(b'deleting %s\n') % f)
320 320 util.unlinkpath(repo.svfs.join(f))
321 321 repo.store.markremoved(f)
322 322
323 323 ui.status(_(b'deleting unwanted files from working copy\n'))
324 324 with repo.dirstate.parentchange():
325 325 narrowspec.updateworkingcopy(repo, assumeclean=True)
326 326 narrowspec.copytoworkingcopy(repo)
327 327
328 328 repo.destroyed()
329 329
330 330
331 331 def _widen(
332 332 ui,
333 333 repo,
334 334 remote,
335 335 commoninc,
336 336 oldincludes,
337 337 oldexcludes,
338 338 newincludes,
339 339 newexcludes,
340 340 ):
341 341 # for now we assume that if a server has ellipses enabled, we will be
342 342 # exchanging ellipses nodes. In future we should add ellipses as a client
343 343 # side requirement (maybe) to distinguish a client is shallow or not and
344 344 # then send that information to server whether we want ellipses or not.
345 345 # Theoretically a non-ellipses repo should be able to use narrow
346 346 # functionality from an ellipses enabled server
347 347 remotecap = remote.capabilities()
348 348 ellipsesremote = any(
349 349 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
350 350 )
351 351
352 352 # check whether we are talking to a server which supports old version of
353 353 # ellipses capabilities
354 354 isoldellipses = (
355 355 ellipsesremote
356 356 and wireprototypes.ELLIPSESCAP1 in remotecap
357 357 and wireprototypes.ELLIPSESCAP not in remotecap
358 358 )
359 359
360 360 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
361 361 orig(pullop, kwargs)
362 362 # The old{in,ex}cludepats have already been set by orig()
363 363 kwargs[b'includepats'] = newincludes
364 364 kwargs[b'excludepats'] = newexcludes
365 365
366 366 wrappedextraprepare = extensions.wrappedfunction(
367 367 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
368 368 )
369 369
370 370 # define a function that narrowbundle2 can call after creating the
371 371 # backup bundle, but before applying the bundle from the server
372 372 def setnewnarrowpats():
373 373 repo.setnarrowpats(newincludes, newexcludes)
374 374
375 375 repo.setnewnarrowpats = setnewnarrowpats
376 376 # silence the devel-warning of applying an empty changegroup
377 377 overrides = {(b'devel', b'all-warnings'): False}
378 378
379 379 common = commoninc[0]
380 380 with ui.uninterruptible():
381 381 if ellipsesremote:
382 382 ds = repo.dirstate
383 383 p1, p2 = ds.p1(), ds.p2()
384 384 with ds.parentchange():
385 385 ds.setparents(repo.nullid, repo.nullid)
386 386 if isoldellipses:
387 387 with wrappedextraprepare:
388 388 exchange.pull(repo, remote, heads=common)
389 389 else:
390 390 known = []
391 391 if ellipsesremote:
392 392 known = [
393 393 ctx.node()
394 394 for ctx in repo.set(b'::%ln', common)
395 395 if ctx.node() != repo.nullid
396 396 ]
397 397 with remote.commandexecutor() as e:
398 398 bundle = e.callcommand(
399 399 b'narrow_widen',
400 400 {
401 401 b'oldincludes': oldincludes,
402 402 b'oldexcludes': oldexcludes,
403 403 b'newincludes': newincludes,
404 404 b'newexcludes': newexcludes,
405 405 b'cgversion': b'03',
406 406 b'commonheads': common,
407 407 b'known': known,
408 408 b'ellipses': ellipsesremote,
409 409 },
410 410 ).result()
411 411
412 412 trmanager = exchange.transactionmanager(
413 413 repo, b'widen', remote.url()
414 414 )
415 415 with trmanager, repo.ui.configoverride(overrides, b'widen'):
416 416 op = bundle2.bundleoperation(
417 417 repo, trmanager.transaction, source=b'widen'
418 418 )
419 419 # TODO: we should catch error.Abort here
420 420 bundle2.processbundle(repo, bundle, op=op)
421 421
422 422 if ellipsesremote:
423 423 with ds.parentchange():
424 424 ds.setparents(p1, p2)
425 425
426 426 with repo.transaction(b'widening'), repo.dirstate.parentchange():
427 427 repo.setnewnarrowpats()
428 428 narrowspec.updateworkingcopy(repo)
429 429 narrowspec.copytoworkingcopy(repo)
430 430
431 431
432 432 # TODO(rdamazio): Make new matcher format and update description
433 433 @command(
434 434 b'tracked',
435 435 [
436 436 (b'', b'addinclude', [], _(b'new paths to include')),
437 437 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
438 438 (
439 439 b'',
440 440 b'auto-remove-includes',
441 441 False,
442 442 _(b'automatically choose unused includes to remove'),
443 443 ),
444 444 (b'', b'addexclude', [], _(b'new paths to exclude')),
445 445 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
446 446 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
447 447 (
448 448 b'',
449 449 b'clear',
450 450 False,
451 451 _(b'whether to replace the existing narrowspec'),
452 452 ),
453 453 (
454 454 b'',
455 455 b'force-delete-local-changes',
456 456 False,
457 457 _(b'forces deletion of local changes when narrowing'),
458 458 ),
459 459 (
460 460 b'',
461 461 b'backup',
462 462 True,
463 463 _(b'back up local changes when narrowing'),
464 464 ),
465 465 (
466 466 b'',
467 467 b'update-working-copy',
468 468 False,
469 469 _(b'update working copy when the store has changed'),
470 470 ),
471 471 ]
472 472 + commands.remoteopts,
473 473 _(b'[OPTIONS]... [REMOTE]'),
474 474 inferrepo=True,
475 475 helpcategory=command.CATEGORY_MAINTENANCE,
476 476 )
477 477 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
478 478 """show or change the current narrowspec
479 479
480 480 With no argument, shows the current narrowspec entries, one per line. Each
481 481 line will be prefixed with 'I' or 'X' for included or excluded patterns,
482 482 respectively.
483 483
484 484 The narrowspec is comprised of expressions to match remote files and/or
485 485 directories that should be pulled into your client.
486 486 The narrowspec has *include* and *exclude* expressions, with excludes always
487 487 trumping includes: that is, if a file matches an exclude expression, it will
488 488 be excluded even if it also matches an include expression.
489 489 Excluding files that were never included has no effect.
490 490
491 491 Each included or excluded entry is in the format described by
492 492 'hg help patterns'.
493 493
494 494 The options allow you to add or remove included and excluded expressions.
495 495
496 496 If --clear is specified, then all previous includes and excludes are DROPPED
497 497 and replaced by the new ones specified to --addinclude and --addexclude.
498 498 If --clear is specified without any further options, the narrowspec will be
499 499 empty and will not match any files.
500 500
501 501 If --auto-remove-includes is specified, then those includes that don't match
502 502 any files modified by currently visible local commits (those not shared by
503 503 the remote) will be added to the set of explicitly specified includes to
504 504 remove.
505 505
506 506 --import-rules accepts a path to a file containing rules, allowing you to
507 507 add --addinclude, --addexclude rules in bulk. Like the other include and
508 508 exclude switches, the changes are applied immediately.
509 509 """
510 510 opts = pycompat.byteskwargs(opts)
511 511 if requirements.NARROW_REQUIREMENT not in repo.requirements:
512 512 raise error.InputError(
513 513 _(
514 514 b'the tracked command is only supported on '
515 515 b'repositories cloned with --narrow'
516 516 )
517 517 )
518 518
519 519 # Before supporting, decide whether it "hg tracked --clear" should mean
520 520 # tracking no paths or all paths.
521 521 if opts[b'clear']:
522 522 raise error.InputError(_(b'the --clear option is not yet supported'))
523 523
524 524 # import rules from a file
525 525 newrules = opts.get(b'import_rules')
526 526 if newrules:
527 527 try:
528 528 filepath = os.path.join(encoding.getcwd(), newrules)
529 529 fdata = util.readfile(filepath)
530 530 except IOError as inst:
531 531 raise error.StorageError(
532 532 _(b"cannot read narrowspecs from '%s': %s")
533 533 % (filepath, encoding.strtolocal(inst.strerror))
534 534 )
535 535 includepats, excludepats, profiles = sparse.parseconfig(
536 536 ui, fdata, b'narrow'
537 537 )
538 538 if profiles:
539 539 raise error.InputError(
540 540 _(
541 541 b"including other spec files using '%include' "
542 542 b"is not supported in narrowspec"
543 543 )
544 544 )
545 545 opts[b'addinclude'].extend(includepats)
546 546 opts[b'addexclude'].extend(excludepats)
547 547
548 548 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
549 549 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
550 550 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
551 551 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
552 552 autoremoveincludes = opts[b'auto_remove_includes']
553 553
554 554 update_working_copy = opts[b'update_working_copy']
555 555 only_show = not (
556 556 addedincludes
557 557 or removedincludes
558 558 or addedexcludes
559 559 or removedexcludes
560 560 or newrules
561 561 or autoremoveincludes
562 562 or update_working_copy
563 563 )
564 564
565 565 oldincludes, oldexcludes = repo.narrowpats
566 566
567 567 # filter the user passed additions and deletions into actual additions and
568 568 # deletions of excludes and includes
569 569 addedincludes -= oldincludes
570 570 removedincludes &= oldincludes
571 571 addedexcludes -= oldexcludes
572 572 removedexcludes &= oldexcludes
573 573
574 574 widening = addedincludes or removedexcludes
575 575 narrowing = removedincludes or addedexcludes
576 576
577 577 # Only print the current narrowspec.
578 578 if only_show:
579 579 ui.pager(b'tracked')
580 580 fm = ui.formatter(b'narrow', opts)
581 581 for i in sorted(oldincludes):
582 582 fm.startitem()
583 583 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
584 584 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
585 585 for i in sorted(oldexcludes):
586 586 fm.startitem()
587 587 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
588 588 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
589 589 fm.end()
590 590 return 0
591 591
592 592 if update_working_copy:
593 593 with repo.wlock(), repo.lock(), repo.transaction(
594 594 b'narrow-wc'
595 595 ), repo.dirstate.parentchange():
596 596 narrowspec.updateworkingcopy(repo)
597 597 narrowspec.copytoworkingcopy(repo)
598 598 return 0
599 599
600 600 if not (widening or narrowing or autoremoveincludes):
601 601 ui.status(_(b"nothing to widen or narrow\n"))
602 602 return 0
603 603
604 604 with repo.wlock(), repo.lock():
605 605 cmdutil.bailifchanged(repo)
606 606
607 607 # Find the revisions we have in common with the remote. These will
608 608 # be used for finding local-only changes for narrowing. They will
609 609 # also define the set of revisions to update for widening.
610 610 r = urlutil.get_unique_pull_path(b'tracked', repo, ui, remotepath)
611 611 url, branches = r
612 612 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(url))
613 613 remote = hg.peer(repo, opts, url)
614 614
615 615 try:
616 616 # check narrow support before doing anything if widening needs to be
617 617 # performed. In future we should also abort if client is ellipses and
618 618 # server does not support ellipses
619 619 if (
620 620 widening
621 621 and wireprototypes.NARROWCAP not in remote.capabilities()
622 622 ):
623 623 raise error.Abort(_(b"server does not support narrow clones"))
624 624
625 625 commoninc = discovery.findcommonincoming(repo, remote)
626 626
627 627 if autoremoveincludes:
628 628 outgoing = discovery.findcommonoutgoing(
629 629 repo, remote, commoninc=commoninc
630 630 )
631 631 ui.status(_(b'looking for unused includes to remove\n'))
632 632 localfiles = set()
633 633 for n in itertools.chain(outgoing.missing, outgoing.excluded):
634 634 localfiles.update(repo[n].files())
635 635 suggestedremovals = []
636 636 for include in sorted(oldincludes):
637 637 match = narrowspec.match(repo.root, [include], oldexcludes)
638 638 if not any(match(f) for f in localfiles):
639 639 suggestedremovals.append(include)
640 640 if suggestedremovals:
641 641 for s in suggestedremovals:
642 642 ui.status(b'%s\n' % s)
643 643 if (
644 644 ui.promptchoice(
645 645 _(
646 646 b'remove these unused includes (yn)?'
647 647 b'$$ &Yes $$ &No'
648 648 )
649 649 )
650 650 == 0
651 651 ):
652 652 removedincludes.update(suggestedremovals)
653 653 narrowing = True
654 654 else:
655 655 ui.status(_(b'found no unused includes\n'))
656 656
657 657 if narrowing:
658 658 newincludes = oldincludes - removedincludes
659 659 newexcludes = oldexcludes | addedexcludes
660 660 _narrow(
661 661 ui,
662 662 repo,
663 663 remote,
664 664 commoninc,
665 665 oldincludes,
666 666 oldexcludes,
667 667 newincludes,
668 668 newexcludes,
669 669 opts[b'force_delete_local_changes'],
670 670 opts[b'backup'],
671 671 )
672 672 # _narrow() updated the narrowspec and _widen() below needs to
673 673 # use the updated values as its base (otherwise removed includes
674 674 # and addedexcludes will be lost in the resulting narrowspec)
675 675 oldincludes = newincludes
676 676 oldexcludes = newexcludes
677 677
678 678 if widening:
679 679 newincludes = oldincludes | addedincludes
680 680 newexcludes = oldexcludes - removedexcludes
681 681 _widen(
682 682 ui,
683 683 repo,
684 684 remote,
685 685 commoninc,
686 686 oldincludes,
687 687 oldexcludes,
688 688 newincludes,
689 689 newexcludes,
690 690 )
691 691 finally:
692 692 remote.close()
693 693
694 694 return 0
@@ -1,399 +1,399 b''
1 1 from __future__ import absolute_import
2 2
3 3 import threading
4 4
5 5 from mercurial.node import (
6 6 hex,
7 7 sha1nodeconstants,
8 8 )
9 9 from mercurial.pycompat import getattr
10 10 from mercurial import (
11 11 mdiff,
12 12 pycompat,
13 13 revlog,
14 14 )
15 15 from . import (
16 16 basestore,
17 17 constants,
18 18 shallowutil,
19 19 )
20 20
21 21
22 22 class ChainIndicies(object):
23 23 """A static class for easy reference to the delta chain indicies."""
24 24
25 25 # The filename of this revision delta
26 26 NAME = 0
27 27 # The mercurial file node for this revision delta
28 28 NODE = 1
29 29 # The filename of the delta base's revision. This is useful when delta
30 30 # between different files (like in the case of a move or copy, we can delta
31 31 # against the original file content).
32 32 BASENAME = 2
33 33 # The mercurial file node for the delta base revision. This is the nullid if
34 34 # this delta is a full text.
35 35 BASENODE = 3
36 36 # The actual delta or full text data.
37 37 DATA = 4
38 38
39 39
40 40 class unioncontentstore(basestore.baseunionstore):
41 41 def __init__(self, *args, **kwargs):
42 42 super(unioncontentstore, self).__init__(*args, **kwargs)
43 43
44 44 self.stores = args
45 45 self.writestore = kwargs.get('writestore')
46 46
47 47 # If allowincomplete==True then the union store can return partial
48 48 # delta chains, otherwise it will throw a KeyError if a full
49 49 # deltachain can't be found.
50 50 self.allowincomplete = kwargs.get('allowincomplete', False)
51 51
52 52 def get(self, name, node):
53 53 """Fetches the full text revision contents of the given name+node pair.
54 54 If the full text doesn't exist, throws a KeyError.
55 55
56 56 Under the hood, this uses getdeltachain() across all the stores to build
57 57 up a full chain to produce the full text.
58 58 """
59 59 chain = self.getdeltachain(name, node)
60 60
61 61 if chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
62 62 # If we didn't receive a full chain, throw
63 63 raise KeyError((name, hex(node)))
64 64
65 65 # The last entry in the chain is a full text, so we start our delta
66 66 # applies with that.
67 67 fulltext = chain.pop()[ChainIndicies.DATA]
68 68
69 69 text = fulltext
70 70 while chain:
71 71 delta = chain.pop()[ChainIndicies.DATA]
72 72 text = mdiff.patches(text, [delta])
73 73
74 74 return text
75 75
76 76 @basestore.baseunionstore.retriable
77 77 def getdelta(self, name, node):
78 78 """Return the single delta entry for the given name/node pair."""
79 79 for store in self.stores:
80 80 try:
81 81 return store.getdelta(name, node)
82 82 except KeyError:
83 83 pass
84 84
85 85 raise KeyError((name, hex(node)))
86 86
87 87 def getdeltachain(self, name, node):
88 88 """Returns the deltachain for the given name/node pair.
89 89
90 90 Returns an ordered list of:
91 91
92 92 [(name, node, deltabasename, deltabasenode, deltacontent),...]
93 93
94 94 where the chain is terminated by a full text entry with a nullid
95 95 deltabasenode.
96 96 """
97 97 chain = self._getpartialchain(name, node)
98 98 while chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
99 99 x, x, deltabasename, deltabasenode, x = chain[-1]
100 100 try:
101 101 morechain = self._getpartialchain(deltabasename, deltabasenode)
102 102 chain.extend(morechain)
103 103 except KeyError:
104 104 # If we allow incomplete chains, don't throw.
105 105 if not self.allowincomplete:
106 106 raise
107 107 break
108 108
109 109 return chain
110 110
111 111 @basestore.baseunionstore.retriable
112 112 def getmeta(self, name, node):
113 113 """Returns the metadata dict for given node."""
114 114 for store in self.stores:
115 115 try:
116 116 return store.getmeta(name, node)
117 117 except KeyError:
118 118 pass
119 119 raise KeyError((name, hex(node)))
120 120
121 121 def getmetrics(self):
122 122 metrics = [s.getmetrics() for s in self.stores]
123 123 return shallowutil.sumdicts(*metrics)
124 124
125 125 @basestore.baseunionstore.retriable
126 126 def _getpartialchain(self, name, node):
127 127 """Returns a partial delta chain for the given name/node pair.
128 128
129 129 A partial chain is a chain that may not be terminated in a full-text.
130 130 """
131 131 for store in self.stores:
132 132 try:
133 133 return store.getdeltachain(name, node)
134 134 except KeyError:
135 135 pass
136 136
137 137 raise KeyError((name, hex(node)))
138 138
139 139 def add(self, name, node, data):
140 140 raise RuntimeError(
141 141 b"cannot add content only to remotefilelog contentstore"
142 142 )
143 143
144 144 def getmissing(self, keys):
145 145 missing = keys
146 146 for store in self.stores:
147 147 if missing:
148 148 missing = store.getmissing(missing)
149 149 return missing
150 150
151 151 def addremotefilelognode(self, name, node, data):
152 152 if self.writestore:
153 153 self.writestore.addremotefilelognode(name, node, data)
154 154 else:
155 155 raise RuntimeError(b"no writable store configured")
156 156
157 157 def markledger(self, ledger, options=None):
158 158 for store in self.stores:
159 159 store.markledger(ledger, options)
160 160
161 161
162 162 class remotefilelogcontentstore(basestore.basestore):
163 163 def __init__(self, *args, **kwargs):
164 164 super(remotefilelogcontentstore, self).__init__(*args, **kwargs)
165 165 self._threaddata = threading.local()
166 166
167 167 def get(self, name, node):
168 168 # return raw revision text
169 169 data = self._getdata(name, node)
170 170
171 171 offset, size, flags = shallowutil.parsesizeflags(data)
172 172 content = data[offset : offset + size]
173 173
174 174 ancestormap = shallowutil.ancestormap(data)
175 175 p1, p2, linknode, copyfrom = ancestormap[node]
176 176 copyrev = None
177 177 if copyfrom:
178 178 copyrev = hex(p1)
179 179
180 180 self._updatemetacache(node, size, flags)
181 181
182 182 # lfs tracks renames in its own metadata, remove hg copy metadata,
183 183 # because copy metadata will be re-added by lfs flag processor.
184 184 if flags & revlog.REVIDX_EXTSTORED:
185 185 copyrev = copyfrom = None
186 186 revision = shallowutil.createrevlogtext(content, copyfrom, copyrev)
187 187 return revision
188 188
189 189 def getdelta(self, name, node):
190 190 # Since remotefilelog content stores only contain full texts, just
191 191 # return that.
192 192 revision = self.get(name, node)
193 193 return (
194 194 revision,
195 195 name,
196 196 sha1nodeconstants.nullid,
197 197 self.getmeta(name, node),
198 198 )
199 199
200 200 def getdeltachain(self, name, node):
201 201 # Since remotefilelog content stores just contain full texts, we return
202 202 # a fake delta chain that just consists of a single full text revision.
203 203 # The nullid in the deltabasenode slot indicates that the revision is a
204 204 # fulltext.
205 205 revision = self.get(name, node)
206 206 return [(name, node, None, sha1nodeconstants.nullid, revision)]
207 207
208 208 def getmeta(self, name, node):
209 209 self._sanitizemetacache()
210 210 if node != self._threaddata.metacache[0]:
211 211 data = self._getdata(name, node)
212 212 offset, size, flags = shallowutil.parsesizeflags(data)
213 213 self._updatemetacache(node, size, flags)
214 214 return self._threaddata.metacache[1]
215 215
216 216 def add(self, name, node, data):
217 217 raise RuntimeError(
218 218 b"cannot add content only to remotefilelog contentstore"
219 219 )
220 220
221 221 def _sanitizemetacache(self):
222 222 metacache = getattr(self._threaddata, 'metacache', None)
223 223 if metacache is None:
224 224 self._threaddata.metacache = (None, None) # (node, meta)
225 225
226 226 def _updatemetacache(self, node, size, flags):
227 227 self._sanitizemetacache()
228 228 if node == self._threaddata.metacache[0]:
229 229 return
230 230 meta = {constants.METAKEYFLAG: flags, constants.METAKEYSIZE: size}
231 231 self._threaddata.metacache = (node, meta)
232 232
233 233
234 234 class remotecontentstore(object):
235 235 def __init__(self, ui, fileservice, shared):
236 236 self._fileservice = fileservice
237 237 # type(shared) is usually remotefilelogcontentstore
238 238 self._shared = shared
239 239
240 240 def get(self, name, node):
241 241 self._fileservice.prefetch(
242 242 [(name, hex(node))], force=True, fetchdata=True
243 243 )
244 244 return self._shared.get(name, node)
245 245
246 246 def getdelta(self, name, node):
247 247 revision = self.get(name, node)
248 248 return (
249 249 revision,
250 250 name,
251 251 sha1nodeconstants.nullid,
252 252 self._shared.getmeta(name, node),
253 253 )
254 254
255 255 def getdeltachain(self, name, node):
256 256 # Since our remote content stores just contain full texts, we return a
257 257 # fake delta chain that just consists of a single full text revision.
258 258 # The nullid in the deltabasenode slot indicates that the revision is a
259 259 # fulltext.
260 260 revision = self.get(name, node)
261 261 return [(name, node, None, sha1nodeconstants.nullid, revision)]
262 262
263 263 def getmeta(self, name, node):
264 264 self._fileservice.prefetch(
265 265 [(name, hex(node))], force=True, fetchdata=True
266 266 )
267 267 return self._shared.getmeta(name, node)
268 268
269 269 def add(self, name, node, data):
270 270 raise RuntimeError(b"cannot add to a remote store")
271 271
272 272 def getmissing(self, keys):
273 273 return keys
274 274
275 275 def markledger(self, ledger, options=None):
276 276 pass
277 277
278 278
279 279 class manifestrevlogstore(object):
280 280 def __init__(self, repo):
281 281 self._store = repo.store
282 282 self._svfs = repo.svfs
283 283 self._revlogs = dict()
284 284 self._cl = revlog.revlog(self._svfs, radix=b'00changelog.i')
285 285 self._repackstartlinkrev = 0
286 286
287 287 def get(self, name, node):
288 288 return self._revlog(name).rawdata(node)
289 289
290 290 def getdelta(self, name, node):
291 291 revision = self.get(name, node)
292 292 return revision, name, self._cl.nullid, self.getmeta(name, node)
293 293
294 294 def getdeltachain(self, name, node):
295 295 revision = self.get(name, node)
296 296 return [(name, node, None, self._cl.nullid, revision)]
297 297
298 298 def getmeta(self, name, node):
299 299 rl = self._revlog(name)
300 300 rev = rl.rev(node)
301 301 return {
302 302 constants.METAKEYFLAG: rl.flags(rev),
303 303 constants.METAKEYSIZE: rl.rawsize(rev),
304 304 }
305 305
306 306 def getancestors(self, name, node, known=None):
307 307 if known is None:
308 308 known = set()
309 309 if node in known:
310 310 return []
311 311
312 312 rl = self._revlog(name)
313 313 ancestors = {}
314 314 missing = {node}
315 315 for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
316 316 ancnode = rl.node(ancrev)
317 317 missing.discard(ancnode)
318 318
319 319 p1, p2 = rl.parents(ancnode)
320 320 if p1 != self._cl.nullid and p1 not in known:
321 321 missing.add(p1)
322 322 if p2 != self._cl.nullid and p2 not in known:
323 323 missing.add(p2)
324 324
325 325 linknode = self._cl.node(rl.linkrev(ancrev))
326 326 ancestors[rl.node(ancrev)] = (p1, p2, linknode, b'')
327 327 if not missing:
328 328 break
329 329 return ancestors
330 330
331 331 def getnodeinfo(self, name, node):
332 332 cl = self._cl
333 333 rl = self._revlog(name)
334 334 parents = rl.parents(node)
335 335 linkrev = rl.linkrev(rl.rev(node))
336 336 return (parents[0], parents[1], cl.node(linkrev), None)
337 337
338 338 def add(self, *args):
339 339 raise RuntimeError(b"cannot add to a revlog store")
340 340
341 341 def _revlog(self, name):
342 342 rl = self._revlogs.get(name)
343 343 if rl is None:
344 344 revlogname = b'00manifesttree'
345 345 if name != b'':
346 346 revlogname = b'meta/%s/00manifest' % name
347 347 rl = revlog.revlog(self._svfs, radix=revlogname)
348 348 self._revlogs[name] = rl
349 349 return rl
350 350
351 351 def getmissing(self, keys):
352 352 missing = []
353 353 for name, node in keys:
354 354 mfrevlog = self._revlog(name)
355 355 if node not in mfrevlog.nodemap:
356 356 missing.append((name, node))
357 357
358 358 return missing
359 359
360 360 def setrepacklinkrevrange(self, startrev, endrev):
361 361 self._repackstartlinkrev = startrev
362 362 self._repackendlinkrev = endrev
363 363
364 364 def markledger(self, ledger, options=None):
365 365 if options and options.get(constants.OPTION_PACKSONLY):
366 366 return
367 367 treename = b''
368 368 rl = revlog.revlog(self._svfs, radix=b'00manifesttree')
369 369 startlinkrev = self._repackstartlinkrev
370 370 endlinkrev = self._repackendlinkrev
371 371 for rev in pycompat.xrange(len(rl) - 1, -1, -1):
372 372 linkrev = rl.linkrev(rev)
373 373 if linkrev < startlinkrev:
374 374 break
375 375 if linkrev > endlinkrev:
376 376 continue
377 377 node = rl.node(rev)
378 378 ledger.markdataentry(self, treename, node)
379 379 ledger.markhistoryentry(self, treename, node)
380 380
381 for t, path, encoded, size in self._store.datafiles():
381 for t, path, size in self._store.datafiles():
382 382 if path[:5] != b'meta/' or path[-2:] != b'.i':
383 383 continue
384 384
385 385 treename = path[5 : -len(b'/00manifest')]
386 386
387 387 rl = revlog.revlog(self._svfs, indexfile=path[:-2])
388 388 for rev in pycompat.xrange(len(rl) - 1, -1, -1):
389 389 linkrev = rl.linkrev(rev)
390 390 if linkrev < startlinkrev:
391 391 break
392 392 if linkrev > endlinkrev:
393 393 continue
394 394 node = rl.node(rev)
395 395 ledger.markdataentry(self, treename, node)
396 396 ledger.markhistoryentry(self, treename, node)
397 397
398 398 def cleanup(self, ledger):
399 399 pass
@@ -1,441 +1,441 b''
1 1 # remotefilelogserver.py - server logic for a remotefilelog server
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import errno
10 10 import os
11 11 import stat
12 12 import time
13 13 import zlib
14 14
15 15 from mercurial.i18n import _
16 16 from mercurial.node import bin, hex
17 17 from mercurial.pycompat import open
18 18 from mercurial import (
19 19 changegroup,
20 20 changelog,
21 21 context,
22 22 error,
23 23 extensions,
24 24 match,
25 25 pycompat,
26 26 scmutil,
27 27 store,
28 28 streamclone,
29 29 util,
30 30 wireprotoserver,
31 31 wireprototypes,
32 32 wireprotov1server,
33 33 )
34 34 from . import (
35 35 constants,
36 36 shallowutil,
37 37 )
38 38
39 39 _sshv1server = wireprotoserver.sshv1protocolhandler
40 40
41 41
42 42 def setupserver(ui, repo):
43 43 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
44 44 onetimesetup(ui)
45 45
46 46 # don't send files to shallow clients during pulls
47 47 def generatefiles(
48 48 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
49 49 ):
50 50 caps = self._bundlecaps or []
51 51 if constants.BUNDLE2_CAPABLITY in caps:
52 52 # only send files that don't match the specified patterns
53 53 includepattern = None
54 54 excludepattern = None
55 55 for cap in self._bundlecaps or []:
56 56 if cap.startswith(b"includepattern="):
57 57 includepattern = cap[len(b"includepattern=") :].split(b'\0')
58 58 elif cap.startswith(b"excludepattern="):
59 59 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
60 60
61 61 m = match.always()
62 62 if includepattern or excludepattern:
63 63 m = match.match(
64 64 repo.root, b'', None, includepattern, excludepattern
65 65 )
66 66
67 67 changedfiles = list([f for f in changedfiles if not m(f)])
68 68 return orig(
69 69 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
70 70 )
71 71
72 72 extensions.wrapfunction(
73 73 changegroup.cgpacker, b'generatefiles', generatefiles
74 74 )
75 75
76 76
77 77 onetime = False
78 78
79 79
80 80 def onetimesetup(ui):
81 81 """Configures the wireprotocol for both clients and servers."""
82 82 global onetime
83 83 if onetime:
84 84 return
85 85 onetime = True
86 86
87 87 # support file content requests
88 88 wireprotov1server.wireprotocommand(
89 89 b'x_rfl_getflogheads', b'path', permission=b'pull'
90 90 )(getflogheads)
91 91 wireprotov1server.wireprotocommand(
92 92 b'x_rfl_getfiles', b'', permission=b'pull'
93 93 )(getfiles)
94 94 wireprotov1server.wireprotocommand(
95 95 b'x_rfl_getfile', b'file node', permission=b'pull'
96 96 )(getfile)
97 97
98 98 class streamstate(object):
99 99 match = None
100 100 shallowremote = False
101 101 noflatmf = False
102 102
103 103 state = streamstate()
104 104
105 105 def stream_out_shallow(repo, proto, other):
106 106 includepattern = None
107 107 excludepattern = None
108 108 raw = other.get(b'includepattern')
109 109 if raw:
110 110 includepattern = raw.split(b'\0')
111 111 raw = other.get(b'excludepattern')
112 112 if raw:
113 113 excludepattern = raw.split(b'\0')
114 114
115 115 oldshallow = state.shallowremote
116 116 oldmatch = state.match
117 117 oldnoflatmf = state.noflatmf
118 118 try:
119 119 state.shallowremote = True
120 120 state.match = match.always()
121 121 state.noflatmf = other.get(b'noflatmanifest') == b'True'
122 122 if includepattern or excludepattern:
123 123 state.match = match.match(
124 124 repo.root, b'', None, includepattern, excludepattern
125 125 )
126 126 streamres = wireprotov1server.stream(repo, proto)
127 127
128 128 # Force the first value to execute, so the file list is computed
129 129 # within the try/finally scope
130 130 first = next(streamres.gen)
131 131 second = next(streamres.gen)
132 132
133 133 def gen():
134 134 yield first
135 135 yield second
136 136 for value in streamres.gen:
137 137 yield value
138 138
139 139 return wireprototypes.streamres(gen())
140 140 finally:
141 141 state.shallowremote = oldshallow
142 142 state.match = oldmatch
143 143 state.noflatmf = oldnoflatmf
144 144
145 145 wireprotov1server.commands[b'stream_out_shallow'] = (
146 146 stream_out_shallow,
147 147 b'*',
148 148 )
149 149
150 150 # don't clone filelogs to shallow clients
151 151 def _walkstreamfiles(orig, repo, matcher=None):
152 152 if state.shallowremote:
153 153 # if we are shallow ourselves, stream our local commits
154 154 if shallowutil.isenabled(repo):
155 155 striplen = len(repo.store.path) + 1
156 156 readdir = repo.store.rawvfs.readdir
157 157 visit = [os.path.join(repo.store.path, b'data')]
158 158 while visit:
159 159 p = visit.pop()
160 160 for f, kind, st in readdir(p, stat=True):
161 161 fp = p + b'/' + f
162 162 if kind == stat.S_IFREG:
163 163 if not fp.endswith(b'.i') and not fp.endswith(
164 164 b'.d'
165 165 ):
166 166 n = util.pconvert(fp[striplen:])
167 167 d = store.decodedir(n)
168 168 t = store.FILETYPE_OTHER
169 yield (t, d, n, st.st_size)
169 yield (t, d, st.st_size)
170 170 if kind == stat.S_IFDIR:
171 171 visit.append(fp)
172 172
173 173 if scmutil.istreemanifest(repo):
174 for (t, u, e, s) in repo.store.datafiles():
174 for (t, u, s) in repo.store.datafiles():
175 175 if u.startswith(b'meta/') and (
176 176 u.endswith(b'.i') or u.endswith(b'.d')
177 177 ):
178 yield (t, u, e, s)
178 yield (t, u, s)
179 179
180 180 # Return .d and .i files that do not match the shallow pattern
181 181 match = state.match
182 182 if match and not match.always():
183 for (t, u, e, s) in repo.store.datafiles():
183 for (t, u, s) in repo.store.datafiles():
184 184 f = u[5:-2] # trim data/... and .i/.d
185 185 if not state.match(f):
186 yield (t, u, e, s)
186 yield (t, u, s)
187 187
188 188 for x in repo.store.topfiles():
189 189 if state.noflatmf and x[1][:11] == b'00manifest.':
190 190 continue
191 191 yield x
192 192
193 193 elif shallowutil.isenabled(repo):
194 194 # don't allow cloning from a shallow repo to a full repo
195 195 # since it would require fetching every version of every
196 196 # file in order to create the revlogs.
197 197 raise error.Abort(
198 198 _(b"Cannot clone from a shallow repo to a full repo.")
199 199 )
200 200 else:
201 201 for x in orig(repo, matcher):
202 202 yield x
203 203
204 204 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
205 205
206 206 # expose remotefilelog capabilities
207 207 def _capabilities(orig, repo, proto):
208 208 caps = orig(repo, proto)
209 209 if shallowutil.isenabled(repo) or ui.configbool(
210 210 b'remotefilelog', b'server'
211 211 ):
212 212 if isinstance(proto, _sshv1server):
213 213 # legacy getfiles method which only works over ssh
214 214 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
215 215 caps.append(b'x_rfl_getflogheads')
216 216 caps.append(b'x_rfl_getfile')
217 217 return caps
218 218
219 219 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
220 220
221 221 def _adjustlinkrev(orig, self, *args, **kwargs):
222 222 # When generating file blobs, taking the real path is too slow on large
223 223 # repos, so force it to just return the linkrev directly.
224 224 repo = self._repo
225 225 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
226 226 return self._filelog.linkrev(self._filelog.rev(self._filenode))
227 227 return orig(self, *args, **kwargs)
228 228
229 229 extensions.wrapfunction(
230 230 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
231 231 )
232 232
233 233 def _iscmd(orig, cmd):
234 234 if cmd == b'x_rfl_getfiles':
235 235 return False
236 236 return orig(cmd)
237 237
238 238 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
239 239
240 240
241 241 def _loadfileblob(repo, cachepath, path, node):
242 242 filecachepath = os.path.join(cachepath, path, hex(node))
243 243 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
244 244 filectx = repo.filectx(path, fileid=node)
245 245 if filectx.node() == repo.nullid:
246 246 repo.changelog = changelog.changelog(repo.svfs)
247 247 filectx = repo.filectx(path, fileid=node)
248 248
249 249 text = createfileblob(filectx)
250 250 # TODO configurable compression engines
251 251 text = zlib.compress(text)
252 252
253 253 # everything should be user & group read/writable
254 254 oldumask = os.umask(0o002)
255 255 try:
256 256 dirname = os.path.dirname(filecachepath)
257 257 if not os.path.exists(dirname):
258 258 try:
259 259 os.makedirs(dirname)
260 260 except OSError as ex:
261 261 if ex.errno != errno.EEXIST:
262 262 raise
263 263
264 264 f = None
265 265 try:
266 266 f = util.atomictempfile(filecachepath, b"wb")
267 267 f.write(text)
268 268 except (IOError, OSError):
269 269 # Don't abort if the user only has permission to read,
270 270 # and not write.
271 271 pass
272 272 finally:
273 273 if f:
274 274 f.close()
275 275 finally:
276 276 os.umask(oldumask)
277 277 else:
278 278 with open(filecachepath, b"rb") as f:
279 279 text = f.read()
280 280 return text
281 281
282 282
283 283 def getflogheads(repo, proto, path):
284 284 """A server api for requesting a filelog's heads"""
285 285 flog = repo.file(path)
286 286 heads = flog.heads()
287 287 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
288 288
289 289
290 290 def getfile(repo, proto, file, node):
291 291 """A server api for requesting a particular version of a file. Can be used
292 292 in batches to request many files at once. The return protocol is:
293 293 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
294 294 non-zero for an error.
295 295
296 296 data is a compressed blob with revlog flag and ancestors information. See
297 297 createfileblob for its content.
298 298 """
299 299 if shallowutil.isenabled(repo):
300 300 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
301 301 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
302 302 if not cachepath:
303 303 cachepath = os.path.join(repo.path, b"remotefilelogcache")
304 304 node = bin(node.strip())
305 305 if node == repo.nullid:
306 306 return b'0\0'
307 307 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
308 308
309 309
310 310 def getfiles(repo, proto):
311 311 """A server api for requesting particular versions of particular files."""
312 312 if shallowutil.isenabled(repo):
313 313 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
314 314 if not isinstance(proto, _sshv1server):
315 315 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
316 316
317 317 def streamer():
318 318 fin = proto._fin
319 319
320 320 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
321 321 if not cachepath:
322 322 cachepath = os.path.join(repo.path, b"remotefilelogcache")
323 323
324 324 while True:
325 325 request = fin.readline()[:-1]
326 326 if not request:
327 327 break
328 328
329 329 node = bin(request[:40])
330 330 if node == repo.nullid:
331 331 yield b'0\n'
332 332 continue
333 333
334 334 path = request[40:]
335 335
336 336 text = _loadfileblob(repo, cachepath, path, node)
337 337
338 338 yield b'%d\n%s' % (len(text), text)
339 339
340 340 # it would be better to only flush after processing a whole batch
341 341 # but currently we don't know if there are more requests coming
342 342 proto._fout.flush()
343 343
344 344 return wireprototypes.streamres(streamer())
345 345
346 346
347 347 def createfileblob(filectx):
348 348 """
349 349 format:
350 350 v0:
351 351 str(len(rawtext)) + '\0' + rawtext + ancestortext
352 352 v1:
353 353 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
354 354 metalist := metalist + '\n' + meta | meta
355 355 meta := sizemeta | flagmeta
356 356 sizemeta := METAKEYSIZE + str(len(rawtext))
357 357 flagmeta := METAKEYFLAG + str(flag)
358 358
359 359 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
360 360 length of 1.
361 361 """
362 362 flog = filectx.filelog()
363 363 frev = filectx.filerev()
364 364 revlogflags = flog._revlog.flags(frev)
365 365 if revlogflags == 0:
366 366 # normal files
367 367 text = filectx.data()
368 368 else:
369 369 # lfs, read raw revision data
370 370 text = flog.rawdata(frev)
371 371
372 372 repo = filectx._repo
373 373
374 374 ancestors = [filectx]
375 375
376 376 try:
377 377 repo.forcelinkrev = True
378 378 ancestors.extend([f for f in filectx.ancestors()])
379 379
380 380 ancestortext = b""
381 381 for ancestorctx in ancestors:
382 382 parents = ancestorctx.parents()
383 383 p1 = repo.nullid
384 384 p2 = repo.nullid
385 385 if len(parents) > 0:
386 386 p1 = parents[0].filenode()
387 387 if len(parents) > 1:
388 388 p2 = parents[1].filenode()
389 389
390 390 copyname = b""
391 391 rename = ancestorctx.renamed()
392 392 if rename:
393 393 copyname = rename[0]
394 394 linknode = ancestorctx.node()
395 395 ancestortext += b"%s%s%s%s%s\0" % (
396 396 ancestorctx.filenode(),
397 397 p1,
398 398 p2,
399 399 linknode,
400 400 copyname,
401 401 )
402 402 finally:
403 403 repo.forcelinkrev = False
404 404
405 405 header = shallowutil.buildfileblobheader(len(text), revlogflags)
406 406
407 407 return b"%s\0%s%s" % (header, text, ancestortext)
408 408
409 409
410 410 def gcserver(ui, repo):
411 411 if not repo.ui.configbool(b"remotefilelog", b"server"):
412 412 return
413 413
414 414 neededfiles = set()
415 415 heads = repo.revs(b"heads(tip~25000:) - null")
416 416
417 417 cachepath = repo.vfs.join(b"remotefilelogcache")
418 418 for head in heads:
419 419 mf = repo[head].manifest()
420 420 for filename, filenode in pycompat.iteritems(mf):
421 421 filecachepath = os.path.join(cachepath, filename, hex(filenode))
422 422 neededfiles.add(filecachepath)
423 423
424 424 # delete unneeded older files
425 425 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
426 426 expiration = time.time() - (days * 24 * 60 * 60)
427 427
428 428 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
429 429 progress.update(0)
430 430 for root, dirs, files in os.walk(cachepath):
431 431 for file in files:
432 432 filepath = os.path.join(root, file)
433 433 progress.increment()
434 434 if filepath in neededfiles:
435 435 continue
436 436
437 437 stat = os.stat(filepath)
438 438 if stat.st_mtime < expiration:
439 439 os.remove(filepath)
440 440
441 441 progress.complete()
@@ -1,563 +1,563 b''
1 1 # repair.py - functions for repository repair for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 4 # Copyright 2007 Olivia Mackall
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12
13 13 from .i18n import _
14 14 from .node import (
15 15 hex,
16 16 short,
17 17 )
18 18 from . import (
19 19 bundle2,
20 20 changegroup,
21 21 discovery,
22 22 error,
23 23 exchange,
24 24 obsolete,
25 25 obsutil,
26 26 pathutil,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33 from .utils import (
34 34 hashutil,
35 35 stringutil,
36 36 urlutil,
37 37 )
38 38
39 39
40 40 def backupbundle(
41 41 repo, bases, heads, node, suffix, compress=True, obsolescence=True
42 42 ):
43 43 """create a bundle with the specified revisions as a backup"""
44 44
45 45 backupdir = b"strip-backup"
46 46 vfs = repo.vfs
47 47 if not vfs.isdir(backupdir):
48 48 vfs.mkdir(backupdir)
49 49
50 50 # Include a hash of all the nodes in the filename for uniqueness
51 51 allcommits = repo.set(b'%ln::%ln', bases, heads)
52 52 allhashes = sorted(c.hex() for c in allcommits)
53 53 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
54 54 name = b"%s/%s-%s-%s.hg" % (
55 55 backupdir,
56 56 short(node),
57 57 hex(totalhash[:4]),
58 58 suffix,
59 59 )
60 60
61 61 cgversion = changegroup.localversion(repo)
62 62 comp = None
63 63 if cgversion != b'01':
64 64 bundletype = b"HG20"
65 65 if compress:
66 66 comp = b'BZ'
67 67 elif compress:
68 68 bundletype = b"HG10BZ"
69 69 else:
70 70 bundletype = b"HG10UN"
71 71
72 72 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
73 73 contentopts = {
74 74 b'cg.version': cgversion,
75 75 b'obsolescence': obsolescence,
76 76 b'phases': True,
77 77 }
78 78 return bundle2.writenewbundle(
79 79 repo.ui,
80 80 repo,
81 81 b'strip',
82 82 name,
83 83 bundletype,
84 84 outgoing,
85 85 contentopts,
86 86 vfs,
87 87 compression=comp,
88 88 )
89 89
90 90
91 91 def _collectfiles(repo, striprev):
92 92 """find out the filelogs affected by the strip"""
93 93 files = set()
94 94
95 95 for x in pycompat.xrange(striprev, len(repo)):
96 96 files.update(repo[x].files())
97 97
98 98 return sorted(files)
99 99
100 100
101 101 def _collectrevlog(revlog, striprev):
102 102 _, brokenset = revlog.getstrippoint(striprev)
103 103 return [revlog.linkrev(r) for r in brokenset]
104 104
105 105
106 106 def _collectbrokencsets(repo, files, striprev):
107 107 """return the changesets which will be broken by the truncation"""
108 108 s = set()
109 109
110 110 for revlog in manifestrevlogs(repo):
111 111 s.update(_collectrevlog(revlog, striprev))
112 112 for fname in files:
113 113 s.update(_collectrevlog(repo.file(fname), striprev))
114 114
115 115 return s
116 116
117 117
118 118 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
119 119 # This function requires the caller to lock the repo, but it operates
120 120 # within a transaction of its own, and thus requires there to be no current
121 121 # transaction when it is called.
122 122 if repo.currenttransaction() is not None:
123 123 raise error.ProgrammingError(b'cannot strip from inside a transaction')
124 124
125 125 # Simple way to maintain backwards compatibility for this
126 126 # argument.
127 127 if backup in [b'none', b'strip']:
128 128 backup = False
129 129
130 130 repo = repo.unfiltered()
131 131 repo.destroying()
132 132 vfs = repo.vfs
133 133 # load bookmark before changelog to avoid side effect from outdated
134 134 # changelog (see repo._refreshchangelog)
135 135 repo._bookmarks
136 136 cl = repo.changelog
137 137
138 138 # TODO handle undo of merge sets
139 139 if isinstance(nodelist, bytes):
140 140 nodelist = [nodelist]
141 141 striplist = [cl.rev(node) for node in nodelist]
142 142 striprev = min(striplist)
143 143
144 144 files = _collectfiles(repo, striprev)
145 145 saverevs = _collectbrokencsets(repo, files, striprev)
146 146
147 147 # Some revisions with rev > striprev may not be descendants of striprev.
148 148 # We have to find these revisions and put them in a bundle, so that
149 149 # we can restore them after the truncations.
150 150 # To create the bundle we use repo.changegroupsubset which requires
151 151 # the list of heads and bases of the set of interesting revisions.
152 152 # (head = revision in the set that has no descendant in the set;
153 153 # base = revision in the set that has no ancestor in the set)
154 154 tostrip = set(striplist)
155 155 saveheads = set(saverevs)
156 156 for r in cl.revs(start=striprev + 1):
157 157 if any(p in tostrip for p in cl.parentrevs(r)):
158 158 tostrip.add(r)
159 159
160 160 if r not in tostrip:
161 161 saverevs.add(r)
162 162 saveheads.difference_update(cl.parentrevs(r))
163 163 saveheads.add(r)
164 164 saveheads = [cl.node(r) for r in saveheads]
165 165
166 166 # compute base nodes
167 167 if saverevs:
168 168 descendants = set(cl.descendants(saverevs))
169 169 saverevs.difference_update(descendants)
170 170 savebases = [cl.node(r) for r in saverevs]
171 171 stripbases = [cl.node(r) for r in tostrip]
172 172
173 173 stripobsidx = obsmarkers = ()
174 174 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
175 175 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
176 176 if obsmarkers:
177 177 stripobsidx = [
178 178 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
179 179 ]
180 180
181 181 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
182 182
183 183 backupfile = None
184 184 node = nodelist[-1]
185 185 if backup:
186 186 backupfile = _createstripbackup(repo, stripbases, node, topic)
187 187 # create a changegroup for all the branches we need to keep
188 188 tmpbundlefile = None
189 189 if saveheads:
190 190 # do not compress temporary bundle if we remove it from disk later
191 191 #
192 192 # We do not include obsolescence, it might re-introduce prune markers
193 193 # we are trying to strip. This is harmless since the stripped markers
194 194 # are already backed up and we did not touched the markers for the
195 195 # saved changesets.
196 196 tmpbundlefile = backupbundle(
197 197 repo,
198 198 savebases,
199 199 saveheads,
200 200 node,
201 201 b'temp',
202 202 compress=False,
203 203 obsolescence=False,
204 204 )
205 205
206 206 with ui.uninterruptible():
207 207 try:
208 208 with repo.transaction(b"strip") as tr:
209 209 # TODO this code violates the interface abstraction of the
210 210 # transaction and makes assumptions that file storage is
211 211 # using append-only files. We'll need some kind of storage
212 212 # API to handle stripping for us.
213 213 oldfiles = set(tr._offsetmap.keys())
214 214 oldfiles.update(tr._newfiles)
215 215
216 216 tr.startgroup()
217 217 cl.strip(striprev, tr)
218 218 stripmanifest(repo, striprev, tr, files)
219 219
220 220 for fn in files:
221 221 repo.file(fn).strip(striprev, tr)
222 222 tr.endgroup()
223 223
224 224 entries = tr.readjournal()
225 225
226 226 for file, troffset in entries:
227 227 if file in oldfiles:
228 228 continue
229 229 with repo.svfs(file, b'a', checkambig=True) as fp:
230 230 fp.truncate(troffset)
231 231 if troffset == 0:
232 232 repo.store.markremoved(file)
233 233
234 234 deleteobsmarkers(repo.obsstore, stripobsidx)
235 235 del repo.obsstore
236 236 repo.invalidatevolatilesets()
237 237 repo._phasecache.filterunknown(repo)
238 238
239 239 if tmpbundlefile:
240 240 ui.note(_(b"adding branch\n"))
241 241 f = vfs.open(tmpbundlefile, b"rb")
242 242 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
243 243 # silence internal shuffling chatter
244 244 maybe_silent = (
245 245 repo.ui.silent()
246 246 if not repo.ui.verbose
247 247 else util.nullcontextmanager()
248 248 )
249 249 with maybe_silent:
250 250 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
251 251 txnname = b'strip'
252 252 if not isinstance(gen, bundle2.unbundle20):
253 253 txnname = b"strip\n%s" % urlutil.hidepassword(
254 254 tmpbundleurl
255 255 )
256 256 with repo.transaction(txnname) as tr:
257 257 bundle2.applybundle(
258 258 repo, gen, tr, source=b'strip', url=tmpbundleurl
259 259 )
260 260 f.close()
261 261
262 262 with repo.transaction(b'repair') as tr:
263 263 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
264 264 repo._bookmarks.applychanges(repo, tr, bmchanges)
265 265
266 266 # remove undo files
267 267 for undovfs, undofile in repo.undofiles():
268 268 try:
269 269 undovfs.unlink(undofile)
270 270 except OSError as e:
271 271 if e.errno != errno.ENOENT:
272 272 ui.warn(
273 273 _(b'error removing %s: %s\n')
274 274 % (
275 275 undovfs.join(undofile),
276 276 stringutil.forcebytestr(e),
277 277 )
278 278 )
279 279
280 280 except: # re-raises
281 281 if backupfile:
282 282 ui.warn(
283 283 _(b"strip failed, backup bundle stored in '%s'\n")
284 284 % vfs.join(backupfile)
285 285 )
286 286 if tmpbundlefile:
287 287 ui.warn(
288 288 _(b"strip failed, unrecovered changes stored in '%s'\n")
289 289 % vfs.join(tmpbundlefile)
290 290 )
291 291 ui.warn(
292 292 _(
293 293 b"(fix the problem, then recover the changesets with "
294 294 b"\"hg unbundle '%s'\")\n"
295 295 )
296 296 % vfs.join(tmpbundlefile)
297 297 )
298 298 raise
299 299 else:
300 300 if tmpbundlefile:
301 301 # Remove temporary bundle only if there were no exceptions
302 302 vfs.unlink(tmpbundlefile)
303 303
304 304 repo.destroyed()
305 305 # return the backup file path (or None if 'backup' was False) so
306 306 # extensions can use it
307 307 return backupfile
308 308
309 309
310 310 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
311 311 """perform a "soft" strip using the archived phase"""
312 312 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
313 313 if not tostrip:
314 314 return None
315 315
316 316 backupfile = None
317 317 if backup:
318 318 node = tostrip[0]
319 319 backupfile = _createstripbackup(repo, tostrip, node, topic)
320 320
321 321 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
322 322 with repo.transaction(b'strip') as tr:
323 323 phases.retractboundary(repo, tr, phases.archived, tostrip)
324 324 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
325 325 repo._bookmarks.applychanges(repo, tr, bmchanges)
326 326 return backupfile
327 327
328 328
329 329 def _bookmarkmovements(repo, tostrip):
330 330 # compute necessary bookmark movement
331 331 bm = repo._bookmarks
332 332 updatebm = []
333 333 for m in bm:
334 334 rev = repo[bm[m]].rev()
335 335 if rev in tostrip:
336 336 updatebm.append(m)
337 337 newbmtarget = None
338 338 # If we need to move bookmarks, compute bookmark
339 339 # targets. Otherwise we can skip doing this logic.
340 340 if updatebm:
341 341 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
342 342 # but is much faster
343 343 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
344 344 if newbmtarget:
345 345 newbmtarget = repo[newbmtarget.first()].node()
346 346 else:
347 347 newbmtarget = b'.'
348 348 return newbmtarget, updatebm
349 349
350 350
351 351 def _createstripbackup(repo, stripbases, node, topic):
352 352 # backup the changeset we are about to strip
353 353 vfs = repo.vfs
354 354 cl = repo.changelog
355 355 backupfile = backupbundle(repo, stripbases, cl.heads(), node, topic)
356 356 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
357 357 repo.ui.log(
358 358 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
359 359 )
360 360 return backupfile
361 361
362 362
363 363 def safestriproots(ui, repo, nodes):
364 364 """return list of roots of nodes where descendants are covered by nodes"""
365 365 torev = repo.unfiltered().changelog.rev
366 366 revs = {torev(n) for n in nodes}
367 367 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
368 368 # orphaned = affected - wanted
369 369 # affected = descendants(roots(wanted))
370 370 # wanted = revs
371 371 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
372 372 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
373 373 notstrip = revs - tostrip
374 374 if notstrip:
375 375 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
376 376 ui.warn(
377 377 _(b'warning: orphaned descendants detected, not stripping %s\n')
378 378 % nodestr
379 379 )
380 380 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
381 381
382 382
383 383 class stripcallback(object):
384 384 """used as a transaction postclose callback"""
385 385
386 386 def __init__(self, ui, repo, backup, topic):
387 387 self.ui = ui
388 388 self.repo = repo
389 389 self.backup = backup
390 390 self.topic = topic or b'backup'
391 391 self.nodelist = []
392 392
393 393 def addnodes(self, nodes):
394 394 self.nodelist.extend(nodes)
395 395
396 396 def __call__(self, tr):
397 397 roots = safestriproots(self.ui, self.repo, self.nodelist)
398 398 if roots:
399 399 strip(self.ui, self.repo, roots, self.backup, self.topic)
400 400
401 401
402 402 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
403 403 """like strip, but works inside transaction and won't strip irreverent revs
404 404
405 405 nodelist must explicitly contain all descendants. Otherwise a warning will
406 406 be printed that some nodes are not stripped.
407 407
408 408 Will do a backup if `backup` is True. The last non-None "topic" will be
409 409 used as the backup topic name. The default backup topic name is "backup".
410 410 """
411 411 tr = repo.currenttransaction()
412 412 if not tr:
413 413 nodes = safestriproots(ui, repo, nodelist)
414 414 return strip(ui, repo, nodes, backup=backup, topic=topic)
415 415 # transaction postclose callbacks are called in alphabet order.
416 416 # use '\xff' as prefix so we are likely to be called last.
417 417 callback = tr.getpostclose(b'\xffstrip')
418 418 if callback is None:
419 419 callback = stripcallback(ui, repo, backup=backup, topic=topic)
420 420 tr.addpostclose(b'\xffstrip', callback)
421 421 if topic:
422 422 callback.topic = topic
423 423 callback.addnodes(nodelist)
424 424
425 425
426 426 def stripmanifest(repo, striprev, tr, files):
427 427 for revlog in manifestrevlogs(repo):
428 428 revlog.strip(striprev, tr)
429 429
430 430
431 431 def manifestrevlogs(repo):
432 432 yield repo.manifestlog.getstorage(b'')
433 433 if scmutil.istreemanifest(repo):
434 434 # This logic is safe if treemanifest isn't enabled, but also
435 435 # pointless, so we skip it if treemanifest isn't enabled.
436 for t, unencoded, encoded, size in repo.store.datafiles():
436 for t, unencoded, size in repo.store.datafiles():
437 437 if unencoded.startswith(b'meta/') and unencoded.endswith(
438 438 b'00manifest.i'
439 439 ):
440 440 dir = unencoded[5:-12]
441 441 yield repo.manifestlog.getstorage(dir)
442 442
443 443
444 444 def rebuildfncache(ui, repo, only_data=False):
445 445 """Rebuilds the fncache file from repo history.
446 446
447 447 Missing entries will be added. Extra entries will be removed.
448 448 """
449 449 repo = repo.unfiltered()
450 450
451 451 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
452 452 ui.warn(
453 453 _(
454 454 b'(not rebuilding fncache because repository does not '
455 455 b'support fncache)\n'
456 456 )
457 457 )
458 458 return
459 459
460 460 with repo.lock():
461 461 fnc = repo.store.fncache
462 462 fnc.ensureloaded(warn=ui.warn)
463 463
464 464 oldentries = set(fnc.entries)
465 465 newentries = set()
466 466 seenfiles = set()
467 467
468 468 if only_data:
469 469 # Trust the listing of .i from the fncache, but not the .d. This is
470 470 # much faster, because we only need to stat every possible .d files,
471 471 # instead of reading the full changelog
472 472 for f in fnc:
473 473 if f[:5] == b'data/' and f[-2:] == b'.i':
474 474 seenfiles.add(f[5:-2])
475 475 newentries.add(f)
476 476 dataf = f[:-2] + b'.d'
477 477 if repo.store._exists(dataf):
478 478 newentries.add(dataf)
479 479 else:
480 480 progress = ui.makeprogress(
481 481 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
482 482 )
483 483 for rev in repo:
484 484 progress.update(rev)
485 485
486 486 ctx = repo[rev]
487 487 for f in ctx.files():
488 488 # This is to minimize I/O.
489 489 if f in seenfiles:
490 490 continue
491 491 seenfiles.add(f)
492 492
493 493 i = b'data/%s.i' % f
494 494 d = b'data/%s.d' % f
495 495
496 496 if repo.store._exists(i):
497 497 newentries.add(i)
498 498 if repo.store._exists(d):
499 499 newentries.add(d)
500 500
501 501 progress.complete()
502 502
503 503 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
504 504 # This logic is safe if treemanifest isn't enabled, but also
505 505 # pointless, so we skip it if treemanifest isn't enabled.
506 506 for dir in pathutil.dirs(seenfiles):
507 507 i = b'meta/%s/00manifest.i' % dir
508 508 d = b'meta/%s/00manifest.d' % dir
509 509
510 510 if repo.store._exists(i):
511 511 newentries.add(i)
512 512 if repo.store._exists(d):
513 513 newentries.add(d)
514 514
515 515 addcount = len(newentries - oldentries)
516 516 removecount = len(oldentries - newentries)
517 517 for p in sorted(oldentries - newentries):
518 518 ui.write(_(b'removing %s\n') % p)
519 519 for p in sorted(newentries - oldentries):
520 520 ui.write(_(b'adding %s\n') % p)
521 521
522 522 if addcount or removecount:
523 523 ui.write(
524 524 _(b'%d items added, %d removed from fncache\n')
525 525 % (addcount, removecount)
526 526 )
527 527 fnc.entries = newentries
528 528 fnc._dirty = True
529 529
530 530 with repo.transaction(b'fncache') as tr:
531 531 fnc.write(tr)
532 532 else:
533 533 ui.write(_(b'fncache already up to date\n'))
534 534
535 535
536 536 def deleteobsmarkers(obsstore, indices):
537 537 """Delete some obsmarkers from obsstore and return how many were deleted
538 538
539 539 'indices' is a list of ints which are the indices
540 540 of the markers to be deleted.
541 541
542 542 Every invocation of this function completely rewrites the obsstore file,
543 543 skipping the markers we want to be removed. The new temporary file is
544 544 created, remaining markers are written there and on .close() this file
545 545 gets atomically renamed to obsstore, thus guaranteeing consistency."""
546 546 if not indices:
547 547 # we don't want to rewrite the obsstore with the same content
548 548 return
549 549
550 550 left = []
551 551 current = obsstore._all
552 552 n = 0
553 553 for i, m in enumerate(current):
554 554 if i in indices:
555 555 n += 1
556 556 continue
557 557 left.append(m)
558 558
559 559 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
560 560 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
561 561 newobsstorefile.write(bytes)
562 562 newobsstorefile.close()
563 563 return n
@@ -1,886 +1,886 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl._generaldelta = rl._generaldelta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 rl._loadindex()
130 130
131 131
132 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 133 """censors a revision in a "version 2" revlog"""
134 134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 136
137 137 censor_revs = {revlog.rev(censornode)}
138 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 139
140 140
141 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 142 """rewrite a revlog to censor some of its content
143 143
144 144 General principle
145 145
146 146 We create new revlog files (index/data/sidedata) to copy the content of
147 147 the existing data without the censored data.
148 148
149 149 We need to recompute new delta for any revision that used the censored
150 150 revision as delta base. As the cumulative size of the new delta may be
151 151 large, we store them in a temporary file until they are stored in their
152 152 final destination.
153 153
154 154 All data before the censored data can be blindly copied. The rest needs
155 155 to be copied as we go and the associated index entry needs adjustement.
156 156 """
157 157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 159
160 160 old_index = revlog.index
161 161 docket = revlog._docket
162 162
163 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 164
165 165 first_excl_rev = min(censor_revs)
166 166
167 167 first_excl_entry = revlog.index[first_excl_rev]
168 168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 171
172 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 174 rewritten_entries = _precompute_rewritten_delta(
175 175 revlog,
176 176 old_index,
177 177 censor_revs,
178 178 tmp_storage,
179 179 )
180 180
181 181 all_files = _setup_new_files(
182 182 revlog,
183 183 index_cutoff,
184 184 data_cutoff,
185 185 sidedata_cutoff,
186 186 )
187 187
188 188 # we dont need to open the old index file since its content already
189 189 # exist in a usable form in `old_index`.
190 190 with all_files() as open_files:
191 191 (
192 192 old_data_file,
193 193 old_sidedata_file,
194 194 new_index_file,
195 195 new_data_file,
196 196 new_sidedata_file,
197 197 ) = open_files
198 198
199 199 # writing the censored revision
200 200
201 201 # Writing all subsequent revisions
202 202 for rev in range(first_excl_rev, len(old_index)):
203 203 if rev in censor_revs:
204 204 _rewrite_censor(
205 205 revlog,
206 206 old_index,
207 207 open_files,
208 208 rev,
209 209 tombstone,
210 210 )
211 211 else:
212 212 _rewrite_simple(
213 213 revlog,
214 214 old_index,
215 215 open_files,
216 216 rev,
217 217 rewritten_entries,
218 218 tmp_storage,
219 219 )
220 220 docket.write(transaction=None, stripping=True)
221 221
222 222
223 223 def _precompute_rewritten_delta(
224 224 revlog,
225 225 old_index,
226 226 excluded_revs,
227 227 tmp_storage,
228 228 ):
229 229 """Compute new delta for revisions whose delta is based on revision that
230 230 will not survive as is.
231 231
232 232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 233 """
234 234 dc = deltas.deltacomputer(revlog)
235 235 rewritten_entries = {}
236 236 first_excl_rev = min(excluded_revs)
237 237 with revlog._segmentfile._open_read() as dfh:
238 238 for rev in range(first_excl_rev, len(old_index)):
239 239 if rev in excluded_revs:
240 240 # this revision will be preserved as is, so we don't need to
241 241 # consider recomputing a delta.
242 242 continue
243 243 entry = old_index[rev]
244 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 245 continue
246 246 # This is a revision that use the censored revision as the base
247 247 # for its delta. We need a need new deltas
248 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 249 # this revision is empty, we can delta against nullrev
250 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 251 else:
252 252
253 253 text = revlog.rawdata(rev, _df=dfh)
254 254 info = revlogutils.revisioninfo(
255 255 node=entry[ENTRY_NODE_ID],
256 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 258 btext=[text],
259 259 textlen=len(text),
260 260 cachedelta=None,
261 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 262 )
263 263 d = dc.finddeltainfo(
264 264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 265 )
266 266 default_comp = revlog._docket.default_compression_header
267 267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 268 # using `tell` is a bit lazy, but we are not here for speed
269 269 start = tmp_storage.tell()
270 270 tmp_storage.write(d.data[1])
271 271 end = tmp_storage.tell()
272 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 273 return rewritten_entries
274 274
275 275
276 276 def _setup_new_files(
277 277 revlog,
278 278 index_cutoff,
279 279 data_cutoff,
280 280 sidedata_cutoff,
281 281 ):
282 282 """
283 283
284 284 return a context manager to open all the relevant files:
285 285 - old_data_file,
286 286 - old_sidedata_file,
287 287 - new_index_file,
288 288 - new_data_file,
289 289 - new_sidedata_file,
290 290
291 291 The old_index_file is not here because it is accessed through the
292 292 `old_index` object if the caller function.
293 293 """
294 294 docket = revlog._docket
295 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 298
299 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 302
303 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 305 util.copyfile(
306 306 old_sidedata_filepath,
307 307 new_sidedata_filepath,
308 308 nb_bytes=sidedata_cutoff,
309 309 )
310 310 revlog.opener.register_file(docket.index_filepath())
311 311 revlog.opener.register_file(docket.data_filepath())
312 312 revlog.opener.register_file(docket.sidedata_filepath())
313 313
314 314 docket.index_end = index_cutoff
315 315 docket.data_end = data_cutoff
316 316 docket.sidedata_end = sidedata_cutoff
317 317
318 318 # reload the revlog internal information
319 319 revlog.clearcaches()
320 320 revlog._loadindex(docket=docket)
321 321
322 322 @contextlib.contextmanager
323 323 def all_files_opener():
324 324 # hide opening in an helper function to please check-code, black
325 325 # and various python version at the same time
326 326 with open(old_data_filepath, 'rb') as old_data_file:
327 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 328 with open(new_index_filepath, 'r+b') as new_index_file:
329 329 with open(new_data_filepath, 'r+b') as new_data_file:
330 330 with open(
331 331 new_sidedata_filepath, 'r+b'
332 332 ) as new_sidedata_file:
333 333 new_index_file.seek(0, os.SEEK_END)
334 334 assert new_index_file.tell() == index_cutoff
335 335 new_data_file.seek(0, os.SEEK_END)
336 336 assert new_data_file.tell() == data_cutoff
337 337 new_sidedata_file.seek(0, os.SEEK_END)
338 338 assert new_sidedata_file.tell() == sidedata_cutoff
339 339 yield (
340 340 old_data_file,
341 341 old_sidedata_file,
342 342 new_index_file,
343 343 new_data_file,
344 344 new_sidedata_file,
345 345 )
346 346
347 347 return all_files_opener
348 348
349 349
350 350 def _rewrite_simple(
351 351 revlog,
352 352 old_index,
353 353 all_files,
354 354 rev,
355 355 rewritten_entries,
356 356 tmp_storage,
357 357 ):
358 358 """append a normal revision to the index after the rewritten one(s)"""
359 359 (
360 360 old_data_file,
361 361 old_sidedata_file,
362 362 new_index_file,
363 363 new_data_file,
364 364 new_sidedata_file,
365 365 ) = all_files
366 366 entry = old_index[rev]
367 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 369
370 370 if rev not in rewritten_entries:
371 371 old_data_file.seek(old_data_offset)
372 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 373 new_data = old_data_file.read(new_data_size)
374 374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 376 else:
377 377 (
378 378 data_delta_base,
379 379 start,
380 380 end,
381 381 d_comp_mode,
382 382 ) = rewritten_entries[rev]
383 383 new_data_size = end - start
384 384 tmp_storage.seek(start)
385 385 new_data = tmp_storage.read(new_data_size)
386 386
387 387 # It might be faster to group continuous read/write operation,
388 388 # however, this is censor, an operation that is not focussed
389 389 # around stellar performance. So I have not written this
390 390 # optimisation yet.
391 391 new_data_offset = new_data_file.tell()
392 392 new_data_file.write(new_data)
393 393
394 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 395 new_sidedata_offset = new_sidedata_file.tell()
396 396 if 0 < sidedata_size:
397 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 398 old_sidedata_file.seek(old_sidedata_offset)
399 399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 400 new_sidedata_file.write(new_sidedata)
401 401
402 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 404 assert data_delta_base <= rev, (data_delta_base, rev)
405 405
406 406 new_entry = revlogutils.entry(
407 407 flags=flags,
408 408 data_offset=new_data_offset,
409 409 data_compressed_length=new_data_size,
410 410 data_uncompressed_length=data_uncompressed_length,
411 411 data_delta_base=data_delta_base,
412 412 link_rev=entry[ENTRY_LINK_REV],
413 413 parent_rev_1=entry[ENTRY_PARENT_1],
414 414 parent_rev_2=entry[ENTRY_PARENT_2],
415 415 node_id=entry[ENTRY_NODE_ID],
416 416 sidedata_offset=new_sidedata_offset,
417 417 sidedata_compressed_length=sidedata_size,
418 418 data_compression_mode=d_comp_mode,
419 419 sidedata_compression_mode=sd_com_mode,
420 420 )
421 421 revlog.index.append(new_entry)
422 422 entry_bin = revlog.index.entry_binary(rev)
423 423 new_index_file.write(entry_bin)
424 424
425 425 revlog._docket.index_end = new_index_file.tell()
426 426 revlog._docket.data_end = new_data_file.tell()
427 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 428
429 429
430 430 def _rewrite_censor(
431 431 revlog,
432 432 old_index,
433 433 all_files,
434 434 rev,
435 435 tombstone,
436 436 ):
437 437 """rewrite and append a censored revision"""
438 438 (
439 439 old_data_file,
440 440 old_sidedata_file,
441 441 new_index_file,
442 442 new_data_file,
443 443 new_sidedata_file,
444 444 ) = all_files
445 445 entry = old_index[rev]
446 446
447 447 # XXX consider trying the default compression too
448 448 new_data_size = len(tombstone)
449 449 new_data_offset = new_data_file.tell()
450 450 new_data_file.write(tombstone)
451 451
452 452 # we are not adding any sidedata as they might leak info about the censored version
453 453
454 454 link_rev = entry[ENTRY_LINK_REV]
455 455
456 456 p1 = entry[ENTRY_PARENT_1]
457 457 p2 = entry[ENTRY_PARENT_2]
458 458
459 459 new_entry = revlogutils.entry(
460 460 flags=constants.REVIDX_ISCENSORED,
461 461 data_offset=new_data_offset,
462 462 data_compressed_length=new_data_size,
463 463 data_uncompressed_length=new_data_size,
464 464 data_delta_base=rev,
465 465 link_rev=link_rev,
466 466 parent_rev_1=p1,
467 467 parent_rev_2=p2,
468 468 node_id=entry[ENTRY_NODE_ID],
469 469 sidedata_offset=0,
470 470 sidedata_compressed_length=0,
471 471 data_compression_mode=COMP_MODE_PLAIN,
472 472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 473 )
474 474 revlog.index.append(new_entry)
475 475 entry_bin = revlog.index.entry_binary(rev)
476 476 new_index_file.write(entry_bin)
477 477 revlog._docket.index_end = new_index_file.tell()
478 478 revlog._docket.data_end = new_data_file.tell()
479 479
480 480
481 481 def _get_filename_from_filelog_index(path):
482 482 # Drop the extension and the `data/` prefix
483 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 484 if len(path_part) < 2:
485 485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 486 msg %= path
487 487 raise error.Abort(msg)
488 488
489 489 return path_part[1]
490 490
491 491
492 492 def _filelog_from_filename(repo, path):
493 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 494
495 495 from .. import filelog # avoid cycle
496 496
497 497 fl = filelog.filelog(repo.svfs, path)
498 498 return fl
499 499
500 500
501 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 503 from ..pure import parsers # avoid cycle
504 504
505 505 if repo._currentlock(repo._lockref) is None:
506 506 # Let's be paranoid about it
507 507 msg = "repo needs to be locked to rewrite parents"
508 508 raise error.ProgrammingError(msg)
509 509
510 510 index_format = parsers.IndexObject.index_format
511 511 entry = rl.index[rev]
512 512 new_entry = list(entry)
513 513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 514 packed = index_format.pack(*new_entry[:8])
515 515 fp.seek(offset)
516 516 fp.write(packed)
517 517
518 518
519 519 def _reorder_filelog_parents(repo, fl, to_fix):
520 520 """
521 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 522 new version to disk, overwriting the old one with a rename.
523 523 """
524 524 from ..pure import parsers # avoid cycle
525 525
526 526 ui = repo.ui
527 527 assert len(to_fix) > 0
528 528 rl = fl._revlog
529 529 if rl._format_version != constants.REVLOGV1:
530 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 531 raise error.ProgrammingError(msg)
532 532
533 533 index_file = rl._indexfile
534 534 new_file_path = index_file + b'.tmp-parents-fix'
535 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 536
537 537 with ui.uninterruptible():
538 538 try:
539 539 util.copyfile(
540 540 rl.opener.join(index_file),
541 541 rl.opener.join(new_file_path),
542 542 checkambig=rl._checkambig,
543 543 )
544 544
545 545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 546 if rl._inline:
547 547 index = parsers.InlinedIndexObject(fp.read())
548 548 for rev in fl.revs():
549 549 if rev in to_fix:
550 550 offset = index._calculate_index(rev)
551 551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 552 ui.write(repaired_msg % (rev, index_file))
553 553 else:
554 554 index_format = parsers.IndexObject.index_format
555 555 for rev in to_fix:
556 556 offset = rev * index_format.size
557 557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 558 ui.write(repaired_msg % (rev, index_file))
559 559
560 560 rl.opener.rename(new_file_path, index_file)
561 561 rl.clearcaches()
562 562 rl._loadindex()
563 563 finally:
564 564 util.tryunlink(new_file_path)
565 565
566 566
567 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 568 full_text = lambda: fl._revlog.rawdata(filerev)
569 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 570 return _is_revision_affected_inner(
571 571 full_text, parent_revs, filerev, metadata_cache
572 572 )
573 573
574 574
575 575 def _is_revision_affected_inner(
576 576 full_text,
577 577 parents_revs,
578 578 filerev,
579 579 metadata_cache=None,
580 580 ):
581 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 582 special meaning compared to the reverse in the context of filelog-based
583 583 copytracing. issue6528 exists because new code assumed that parent ordering
584 584 didn't matter, so this detects if the revision contains metadata (since
585 585 it's only used for filelog-based copytracing) and its parents are in the
586 586 "wrong" order."""
587 587 try:
588 588 raw_text = full_text()
589 589 except error.CensoredNodeError:
590 590 # We don't care about censored nodes as they never carry metadata
591 591 return False
592 592 has_meta = raw_text.startswith(b'\x01\n')
593 593 if metadata_cache is not None:
594 594 metadata_cache[filerev] = has_meta
595 595 if has_meta:
596 596 (p1, p2) = parents_revs()
597 597 if p1 != nullrev and p2 == nullrev:
598 598 return True
599 599 return False
600 600
601 601
602 602 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
603 603 rl = fl._revlog
604 604 is_censored = lambda: rl.iscensored(filerev)
605 605 delta_base = lambda: rl.deltaparent(filerev)
606 606 delta = lambda: rl._chunk(filerev)
607 607 full_text = lambda: rl.rawdata(filerev)
608 608 parent_revs = lambda: rl.parentrevs(filerev)
609 609 return _is_revision_affected_fast_inner(
610 610 is_censored,
611 611 delta_base,
612 612 delta,
613 613 full_text,
614 614 parent_revs,
615 615 filerev,
616 616 metadata_cache,
617 617 )
618 618
619 619
620 620 def _is_revision_affected_fast_inner(
621 621 is_censored,
622 622 delta_base,
623 623 delta,
624 624 full_text,
625 625 parent_revs,
626 626 filerev,
627 627 metadata_cache,
628 628 ):
629 629 """Optimization fast-path for `_is_revision_affected`.
630 630
631 631 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
632 632 revision to check if its base has metadata, saving computation of the full
633 633 text, instead looking at the current delta.
634 634
635 635 This optimization only works if the revisions are looked at in order."""
636 636
637 637 if is_censored():
638 638 # Censored revisions don't contain metadata, so they cannot be affected
639 639 metadata_cache[filerev] = False
640 640 return False
641 641
642 642 p1, p2 = parent_revs()
643 643 if p1 == nullrev or p2 != nullrev:
644 644 return False
645 645
646 646 delta_parent = delta_base()
647 647 parent_has_metadata = metadata_cache.get(delta_parent)
648 648 if parent_has_metadata is None:
649 649 return _is_revision_affected_inner(
650 650 full_text,
651 651 parent_revs,
652 652 filerev,
653 653 metadata_cache,
654 654 )
655 655
656 656 chunk = delta()
657 657 if not len(chunk):
658 658 # No diff for this revision
659 659 return parent_has_metadata
660 660
661 661 header_length = 12
662 662 if len(chunk) < header_length:
663 663 raise error.Abort(_(b"patch cannot be decoded"))
664 664
665 665 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
666 666
667 667 if start < 2: # len(b'\x01\n') == 2
668 668 # This delta does *something* to the metadata marker (if any).
669 669 # Check it the slow way
670 670 is_affected = _is_revision_affected_inner(
671 671 full_text,
672 672 parent_revs,
673 673 filerev,
674 674 metadata_cache,
675 675 )
676 676 return is_affected
677 677
678 678 # The diff did not remove or add the metadata header, it's then in the same
679 679 # situation as its parent
680 680 metadata_cache[filerev] = parent_has_metadata
681 681 return parent_has_metadata
682 682
683 683
684 684 def _from_report(ui, repo, context, from_report, dry_run):
685 685 """
686 686 Fix the revisions given in the `from_report` file, but still checks if the
687 687 revisions are indeed affected to prevent an unfortunate cyclic situation
688 688 where we'd swap well-ordered parents again.
689 689
690 690 See the doc for `debug_fix_issue6528` for the format documentation.
691 691 """
692 692 ui.write(_(b"loading report file '%s'\n") % from_report)
693 693
694 694 with context(), open(from_report, mode='rb') as f:
695 695 for line in f.read().split(b'\n'):
696 696 if not line:
697 697 continue
698 698 filenodes, filename = line.split(b' ', 1)
699 699 fl = _filelog_from_filename(repo, filename)
700 700 to_fix = set(
701 701 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
702 702 )
703 703 excluded = set()
704 704
705 705 for filerev in to_fix:
706 706 if _is_revision_affected(fl, filerev):
707 707 msg = b"found affected revision %d for filelog '%s'\n"
708 708 ui.warn(msg % (filerev, filename))
709 709 else:
710 710 msg = _(b"revision %s of file '%s' is not affected\n")
711 711 msg %= (binascii.hexlify(fl.node(filerev)), filename)
712 712 ui.warn(msg)
713 713 excluded.add(filerev)
714 714
715 715 to_fix = to_fix - excluded
716 716 if not to_fix:
717 717 msg = _(b"no affected revisions were found for '%s'\n")
718 718 ui.write(msg % filename)
719 719 continue
720 720 if not dry_run:
721 721 _reorder_filelog_parents(repo, fl, sorted(to_fix))
722 722
723 723
724 724 def filter_delta_issue6528(revlog, deltas_iter):
725 725 """filter incomind deltas to repaire issue 6528 on the fly"""
726 726 metadata_cache = {}
727 727
728 728 deltacomputer = deltas.deltacomputer(revlog)
729 729
730 730 for rev, d in enumerate(deltas_iter, len(revlog)):
731 731 (
732 732 node,
733 733 p1_node,
734 734 p2_node,
735 735 linknode,
736 736 deltabase,
737 737 delta,
738 738 flags,
739 739 sidedata,
740 740 ) = d
741 741
742 742 if not revlog.index.has_node(deltabase):
743 743 raise error.LookupError(
744 744 deltabase, revlog.radix, _(b'unknown parent')
745 745 )
746 746 base_rev = revlog.rev(deltabase)
747 747 if not revlog.index.has_node(p1_node):
748 748 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
749 749 p1_rev = revlog.rev(p1_node)
750 750 if not revlog.index.has_node(p2_node):
751 751 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
752 752 p2_rev = revlog.rev(p2_node)
753 753
754 754 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
755 755 delta_base = lambda: revlog.rev(delta_base)
756 756 delta_base = lambda: base_rev
757 757 parent_revs = lambda: (p1_rev, p2_rev)
758 758
759 759 def full_text():
760 760 # note: being able to reuse the full text computation in the
761 761 # underlying addrevision would be useful however this is a bit too
762 762 # intrusive the for the "quick" issue6528 we are writing before the
763 763 # 5.8 release
764 764 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
765 765
766 766 revinfo = revlogutils.revisioninfo(
767 767 node,
768 768 p1_node,
769 769 p2_node,
770 770 [None],
771 771 textlen,
772 772 (base_rev, delta),
773 773 flags,
774 774 )
775 775 # cached by the global "writing" context
776 776 assert revlog._writinghandles is not None
777 777 if revlog._inline:
778 778 fh = revlog._writinghandles[0]
779 779 else:
780 780 fh = revlog._writinghandles[1]
781 781 return deltacomputer.buildtext(revinfo, fh)
782 782
783 783 is_affected = _is_revision_affected_fast_inner(
784 784 is_censored,
785 785 delta_base,
786 786 lambda: delta,
787 787 full_text,
788 788 parent_revs,
789 789 rev,
790 790 metadata_cache,
791 791 )
792 792 if is_affected:
793 793 d = (
794 794 node,
795 795 p2_node,
796 796 p1_node,
797 797 linknode,
798 798 deltabase,
799 799 delta,
800 800 flags,
801 801 sidedata,
802 802 )
803 803 yield d
804 804
805 805
806 806 def repair_issue6528(
807 807 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
808 808 ):
809 809 from .. import store # avoid cycle
810 810
811 811 @contextlib.contextmanager
812 812 def context():
813 813 if dry_run or to_report: # No need for locking
814 814 yield
815 815 else:
816 816 with repo.wlock(), repo.lock():
817 817 yield
818 818
819 819 if from_report:
820 820 return _from_report(ui, repo, context, from_report, dry_run)
821 821
822 822 report_entries = []
823 823
824 824 with context():
825 825 files = list(
826 826 (file_type, path)
827 for (file_type, path, _e, _s) in repo.store.datafiles()
827 for (file_type, path, _s) in repo.store.datafiles()
828 828 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
829 829 )
830 830
831 831 progress = ui.makeprogress(
832 832 _(b"looking for affected revisions"),
833 833 unit=_(b"filelogs"),
834 834 total=len(files),
835 835 )
836 836 found_nothing = True
837 837
838 838 for file_type, path in files:
839 839 if (
840 840 not path.endswith(b'.i')
841 841 or not file_type & store.FILEFLAGS_FILELOG
842 842 ):
843 843 continue
844 844 progress.increment()
845 845 filename = _get_filename_from_filelog_index(path)
846 846 fl = _filelog_from_filename(repo, filename)
847 847
848 848 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
849 849 to_fix = set()
850 850 metadata_cache = {}
851 851 for filerev in fl.revs():
852 852 affected = _is_revision_affected_fast(
853 853 repo, fl, filerev, metadata_cache
854 854 )
855 855 if paranoid:
856 856 slow = _is_revision_affected(fl, filerev)
857 857 if slow != affected:
858 858 msg = _(b"paranoid check failed for '%s' at node %s")
859 859 node = binascii.hexlify(fl.node(filerev))
860 860 raise error.Abort(msg % (filename, node))
861 861 if affected:
862 862 msg = b"found affected revision %d for filelog '%s'\n"
863 863 ui.warn(msg % (filerev, path))
864 864 found_nothing = False
865 865 if not dry_run:
866 866 if to_report:
867 867 to_fix.add(binascii.hexlify(fl.node(filerev)))
868 868 else:
869 869 to_fix.add(filerev)
870 870
871 871 if to_fix:
872 872 to_fix = sorted(to_fix)
873 873 if to_report:
874 874 report_entries.append((filename, to_fix))
875 875 else:
876 876 _reorder_filelog_parents(repo, fl, to_fix)
877 877
878 878 if found_nothing:
879 879 ui.write(_(b"no affected revisions were found\n"))
880 880
881 881 if to_report and report_entries:
882 882 with open(to_report, mode="wb") as f:
883 883 for path, to_fix in report_entries:
884 884 f.write(b"%s %s\n" % (b",".join(to_fix), path))
885 885
886 886 progress.complete()
@@ -1,839 +1,850 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import functools
12 12 import os
13 13 import re
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in pycompat.iteritems(cmap):
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in pycompat.xrange(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join(
166 166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
167 167 ),
168 168 lambda s: b''.join(list(decode(s))),
169 169 )
170 170
171 171
172 172 _encodefname, _decodefname = _buildencodefun()
173 173
174 174
175 175 def encodefilename(s):
176 176 """
177 177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
178 178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
179 179 """
180 180 return _encodefname(encodedir(s))
181 181
182 182
183 183 def decodefilename(s):
184 184 """
185 185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
186 186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
187 187 """
188 188 return decodedir(_decodefname(s))
189 189
190 190
191 191 def _buildlowerencodefun():
192 192 """
193 193 >>> f = _buildlowerencodefun()
194 194 >>> f(b'nothing/special.txt')
195 195 'nothing/special.txt'
196 196 >>> f(b'HELLO')
197 197 'hello'
198 198 >>> f(b'hello:world?')
199 199 'hello~3aworld~3f'
200 200 >>> f(b'the\\x07quick\\xADshot')
201 201 'the~07quick~adshot'
202 202 """
203 203 xchr = pycompat.bytechr
204 204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
205 205 for x in _reserved():
206 206 cmap[xchr(x)] = b"~%02x" % x
207 207 for x in range(ord(b"A"), ord(b"Z") + 1):
208 208 cmap[xchr(x)] = xchr(x).lower()
209 209
210 210 def lowerencode(s):
211 211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
212 212
213 213 return lowerencode
214 214
215 215
216 216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
217 217
218 218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
219 219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
220 220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
221 221
222 222
223 223 def _auxencode(path, dotencode):
224 224 """
225 225 Encodes filenames containing names reserved by Windows or which end in
226 226 period or space. Does not touch other single reserved characters c.
227 227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
228 228 Additionally encodes space or period at the beginning, if dotencode is
229 229 True. Parameter path is assumed to be all lowercase.
230 230 A segment only needs encoding if a reserved name appears as a
231 231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
232 232 doesn't need encoding.
233 233
234 234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
235 235 >>> _auxencode(s.split(b'/'), True)
236 236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
237 237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
238 238 >>> _auxencode(s.split(b'/'), False)
239 239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
240 240 >>> _auxencode([b'foo. '], True)
241 241 ['foo.~20']
242 242 >>> _auxencode([b' .foo'], True)
243 243 ['~20.foo']
244 244 """
245 245 for i, n in enumerate(path):
246 246 if not n:
247 247 continue
248 248 if dotencode and n[0] in b'. ':
249 249 n = b"~%02x" % ord(n[0:1]) + n[1:]
250 250 path[i] = n
251 251 else:
252 252 l = n.find(b'.')
253 253 if l == -1:
254 254 l = len(n)
255 255 if (l == 3 and n[:3] in _winres3) or (
256 256 l == 4
257 257 and n[3:4] <= b'9'
258 258 and n[3:4] >= b'1'
259 259 and n[:3] in _winres4
260 260 ):
261 261 # encode third letter ('aux' -> 'au~78')
262 262 ec = b"~%02x" % ord(n[2:3])
263 263 n = n[0:2] + ec + n[3:]
264 264 path[i] = n
265 265 if n[-1] in b'. ':
266 266 # encode last period or space ('foo...' -> 'foo..~2e')
267 267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
268 268 return path
269 269
270 270
271 271 _maxstorepathlen = 120
272 272 _dirprefixlen = 8
273 273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
274 274
275 275
276 276 def _hashencode(path, dotencode):
277 277 digest = hex(hashutil.sha1(path).digest())
278 278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
279 279 parts = _auxencode(le, dotencode)
280 280 basename = parts[-1]
281 281 _root, ext = os.path.splitext(basename)
282 282 sdirs = []
283 283 sdirslen = 0
284 284 for p in parts[:-1]:
285 285 d = p[:_dirprefixlen]
286 286 if d[-1] in b'. ':
287 287 # Windows can't access dirs ending in period or space
288 288 d = d[:-1] + b'_'
289 289 if sdirslen == 0:
290 290 t = len(d)
291 291 else:
292 292 t = sdirslen + 1 + len(d)
293 293 if t > _maxshortdirslen:
294 294 break
295 295 sdirs.append(d)
296 296 sdirslen = t
297 297 dirs = b'/'.join(sdirs)
298 298 if len(dirs) > 0:
299 299 dirs += b'/'
300 300 res = b'dh/' + dirs + digest + ext
301 301 spaceleft = _maxstorepathlen - len(res)
302 302 if spaceleft > 0:
303 303 filler = basename[:spaceleft]
304 304 res = b'dh/' + dirs + filler + digest + ext
305 305 return res
306 306
307 307
308 308 def _hybridencode(path, dotencode):
309 309 """encodes path with a length limit
310 310
311 311 Encodes all paths that begin with 'data/', according to the following.
312 312
313 313 Default encoding (reversible):
314 314
315 315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
316 316 characters are encoded as '~xx', where xx is the two digit hex code
317 317 of the character (see encodefilename).
318 318 Relevant path components consisting of Windows reserved filenames are
319 319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
320 320
321 321 Hashed encoding (not reversible):
322 322
323 323 If the default-encoded path is longer than _maxstorepathlen, a
324 324 non-reversible hybrid hashing of the path is done instead.
325 325 This encoding uses up to _dirprefixlen characters of all directory
326 326 levels of the lowerencoded path, but not more levels than can fit into
327 327 _maxshortdirslen.
328 328 Then follows the filler followed by the sha digest of the full path.
329 329 The filler is the beginning of the basename of the lowerencoded path
330 330 (the basename is everything after the last path separator). The filler
331 331 is as long as possible, filling in characters from the basename until
332 332 the encoded path has _maxstorepathlen characters (or all chars of the
333 333 basename have been taken).
334 334 The extension (e.g. '.i' or '.d') is preserved.
335 335
336 336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
337 337 encoding was used.
338 338 """
339 339 path = encodedir(path)
340 340 ef = _encodefname(path).split(b'/')
341 341 res = b'/'.join(_auxencode(ef, dotencode))
342 342 if len(res) > _maxstorepathlen:
343 343 res = _hashencode(path, dotencode)
344 344 return res
345 345
346 346
347 347 def _pathencode(path):
348 348 de = encodedir(path)
349 349 if len(path) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 ef = _encodefname(de).split(b'/')
352 352 res = b'/'.join(_auxencode(ef, True))
353 353 if len(res) > _maxstorepathlen:
354 354 return _hashencode(de, True)
355 355 return res
356 356
357 357
358 358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
359 359
360 360
361 361 def _plainhybridencode(f):
362 362 return _hybridencode(f, False)
363 363
364 364
365 365 def _calcmode(vfs):
366 366 try:
367 367 # files in .hg/ will be created using this mode
368 368 mode = vfs.stat().st_mode
369 369 # avoid some useless chmods
370 370 if (0o777 & ~util.umask) == (0o777 & mode):
371 371 mode = None
372 372 except OSError:
373 373 mode = None
374 374 return mode
375 375
376 376
377 377 _data = [
378 378 b'bookmarks',
379 379 b'narrowspec',
380 380 b'data',
381 381 b'meta',
382 382 b'00manifest.d',
383 383 b'00manifest.i',
384 384 b'00changelog.d',
385 385 b'00changelog.i',
386 386 b'phaseroots',
387 387 b'obsstore',
388 388 b'requires',
389 389 ]
390 390
391 391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
392 392 REVLOG_FILES_OTHER_EXT = (
393 393 b'.idx',
394 394 b'.d',
395 395 b'.dat',
396 396 b'.n',
397 397 b'.nd',
398 398 b'.sda',
399 399 b'd.tmpcensored',
400 400 )
401 401 # files that are "volatile" and might change between listing and streaming
402 402 #
403 403 # note: the ".nd" file are nodemap data and won't "change" but they might be
404 404 # deleted.
405 405 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
406 406
407 407 # some exception to the above matching
408 408 #
409 409 # XXX This is currently not in use because of issue6542
410 410 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
411 411
412 412
413 413 def is_revlog(f, kind, st):
414 414 if kind != stat.S_IFREG:
415 415 return None
416 416 return revlog_type(f)
417 417
418 418
419 419 def revlog_type(f):
420 420 # XXX we need to filter `undo.` created by the transaction here, however
421 421 # being naive about it also filter revlog for `undo.*` files, leading to
422 422 # issue6542. So we no longer use EXCLUDED.
423 423 if f.endswith(REVLOG_FILES_MAIN_EXT):
424 424 return FILEFLAGS_REVLOG_MAIN
425 425 elif f.endswith(REVLOG_FILES_OTHER_EXT):
426 426 t = FILETYPE_FILELOG_OTHER
427 427 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
428 428 t |= FILEFLAGS_VOLATILE
429 429 return t
430 430 return None
431 431
432 432
433 433 # the file is part of changelog data
434 434 FILEFLAGS_CHANGELOG = 1 << 13
435 435 # the file is part of manifest data
436 436 FILEFLAGS_MANIFESTLOG = 1 << 12
437 437 # the file is part of filelog data
438 438 FILEFLAGS_FILELOG = 1 << 11
439 439 # file that are not directly part of a revlog
440 440 FILEFLAGS_OTHER = 1 << 10
441 441
442 442 # the main entry point for a revlog
443 443 FILEFLAGS_REVLOG_MAIN = 1 << 1
444 444 # a secondary file for a revlog
445 445 FILEFLAGS_REVLOG_OTHER = 1 << 0
446 446
447 447 # files that are "volatile" and might change between listing and streaming
448 448 FILEFLAGS_VOLATILE = 1 << 20
449 449
450 450 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
451 451 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
452 452 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
453 453 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
454 454 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_OTHER = FILEFLAGS_OTHER
457 457
458 458
459 459 class basicstore(object):
460 460 '''base class for local repository stores'''
461 461
462 462 def __init__(self, path, vfstype):
463 463 vfs = vfstype(path)
464 464 self.path = vfs.base
465 465 self.createmode = _calcmode(vfs)
466 466 vfs.createmode = self.createmode
467 467 self.rawvfs = vfs
468 468 self.vfs = vfsmod.filtervfs(vfs, encodedir)
469 469 self.opener = self.vfs
470 470
471 471 def join(self, f):
472 472 return self.path + b'/' + encodedir(f)
473 473
474 474 def _walk(self, relpath, recurse):
475 '''yields (unencoded, encoded, size)'''
475 '''yields (revlog_type, unencoded, size)'''
476 476 path = self.path
477 477 if relpath:
478 478 path += b'/' + relpath
479 479 striplen = len(self.path) + 1
480 480 l = []
481 481 if self.rawvfs.isdir(path):
482 482 visit = [path]
483 483 readdir = self.rawvfs.readdir
484 484 while visit:
485 485 p = visit.pop()
486 486 for f, kind, st in readdir(p, stat=True):
487 487 fp = p + b'/' + f
488 488 rl_type = is_revlog(f, kind, st)
489 489 if rl_type is not None:
490 490 n = util.pconvert(fp[striplen:])
491 l.append((rl_type, decodedir(n), n, st.st_size))
491 l.append((rl_type, decodedir(n), st.st_size))
492 492 elif kind == stat.S_IFDIR and recurse:
493 493 visit.append(fp)
494 494 l.sort()
495 495 return l
496 496
497 497 def changelog(self, trypending, concurrencychecker=None):
498 498 return changelog.changelog(
499 499 self.vfs,
500 500 trypending=trypending,
501 501 concurrencychecker=concurrencychecker,
502 502 )
503 503
504 504 def manifestlog(self, repo, storenarrowmatch):
505 505 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
506 506 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
507 507
508 def datafiles(self, matcher=None):
508 def datafiles(self, matcher=None, undecodable=None):
509 """Like walk, but excluding the changelog and root manifest.
510
511 When [undecodable] is None, revlogs names that can't be
512 decoded cause an exception. When it is provided, it should
513 be a list and the filenames that can't be decoded are added
514 to it instead. This is very rarely needed."""
509 515 files = self._walk(b'data', True) + self._walk(b'meta', True)
510 for (t, u, e, s) in files:
511 yield (FILEFLAGS_FILELOG | t, u, e, s)
516 for (t, u, s) in files:
517 yield (FILEFLAGS_FILELOG | t, u, s)
512 518
513 519 def topfiles(self):
514 520 # yield manifest before changelog
515 521 files = reversed(self._walk(b'', False))
516 for (t, u, e, s) in files:
522 for (t, u, s) in files:
517 523 if u.startswith(b'00changelog'):
518 yield (FILEFLAGS_CHANGELOG | t, u, e, s)
524 yield (FILEFLAGS_CHANGELOG | t, u, s)
519 525 elif u.startswith(b'00manifest'):
520 yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
526 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
521 527 else:
522 yield (FILETYPE_OTHER | t, u, e, s)
528 yield (FILETYPE_OTHER | t, u, s)
523 529
524 530 def walk(self, matcher=None):
525 531 """return file related to data storage (ie: revlogs)
526 532
527 yields (file_type, unencoded, encoded, size)
533 yields (file_type, unencoded, size)
528 534
529 535 if a matcher is passed, storage files of only those tracked paths
530 536 are passed with matches the matcher
531 537 """
532 538 # yield data files first
533 539 for x in self.datafiles(matcher):
534 540 yield x
535 541 for x in self.topfiles():
536 542 yield x
537 543
538 544 def copylist(self):
539 545 return _data
540 546
541 547 def write(self, tr):
542 548 pass
543 549
544 550 def invalidatecaches(self):
545 551 pass
546 552
547 553 def markremoved(self, fn):
548 554 pass
549 555
550 556 def __contains__(self, path):
551 557 '''Checks if the store contains path'''
552 558 path = b"/".join((b"data", path))
553 559 # file?
554 560 if self.vfs.exists(path + b".i"):
555 561 return True
556 562 # dir?
557 563 if not path.endswith(b"/"):
558 564 path = path + b"/"
559 565 return self.vfs.exists(path)
560 566
561 567
562 568 class encodedstore(basicstore):
563 569 def __init__(self, path, vfstype):
564 570 vfs = vfstype(path + b'/store')
565 571 self.path = vfs.base
566 572 self.createmode = _calcmode(vfs)
567 573 vfs.createmode = self.createmode
568 574 self.rawvfs = vfs
569 575 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
570 576 self.opener = self.vfs
571 577
572 578 # note: topfiles would also need a decode phase. It is just that in
573 579 # practice we do not have any file outside of `data/` that needs encoding.
574 580 # However that might change so we should probably add a test and encoding
575 581 # decoding for it too. see issue6548
576 582
577 def datafiles(self, matcher=None):
578 for t, a, b, size in super(encodedstore, self).datafiles():
583 def datafiles(self, matcher=None, undecodable=None):
584 for t, f1, size in super(encodedstore, self).datafiles():
579 585 try:
580 a = decodefilename(a)
586 f2 = decodefilename(f1)
581 587 except KeyError:
582 a = None
583 if a is not None and not _matchtrackedpath(a, matcher):
588 if undecodable is None:
589 msg = _(b'undecodable revlog name %s') % f1
590 raise error.StorageError(msg)
591 else:
592 undecodable.append(f1)
593 continue
594 if not _matchtrackedpath(f2, matcher):
584 595 continue
585 yield t, a, b, size
596 yield t, f2, size
586 597
587 598 def join(self, f):
588 599 return self.path + b'/' + encodefilename(f)
589 600
590 601 def copylist(self):
591 602 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
592 603
593 604
594 605 class fncache(object):
595 606 # the filename used to be partially encoded
596 607 # hence the encodedir/decodedir dance
597 608 def __init__(self, vfs):
598 609 self.vfs = vfs
599 610 self.entries = None
600 611 self._dirty = False
601 612 # set of new additions to fncache
602 613 self.addls = set()
603 614
604 615 def ensureloaded(self, warn=None):
605 616 """read the fncache file if not already read.
606 617
607 618 If the file on disk is corrupted, raise. If warn is provided,
608 619 warn and keep going instead."""
609 620 if self.entries is None:
610 621 self._load(warn)
611 622
612 623 def _load(self, warn=None):
613 624 '''fill the entries from the fncache file'''
614 625 self._dirty = False
615 626 try:
616 627 fp = self.vfs(b'fncache', mode=b'rb')
617 628 except IOError:
618 629 # skip nonexistent file
619 630 self.entries = set()
620 631 return
621 632
622 633 self.entries = set()
623 634 chunk = b''
624 635 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
625 636 chunk += c
626 637 try:
627 638 p = chunk.rindex(b'\n')
628 639 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
629 640 chunk = chunk[p + 1 :]
630 641 except ValueError:
631 642 # substring '\n' not found, maybe the entry is bigger than the
632 643 # chunksize, so let's keep iterating
633 644 pass
634 645
635 646 if chunk:
636 647 msg = _(b"fncache does not ends with a newline")
637 648 if warn:
638 649 warn(msg + b'\n')
639 650 else:
640 651 raise error.Abort(
641 652 msg,
642 653 hint=_(
643 654 b"use 'hg debugrebuildfncache' to "
644 655 b"rebuild the fncache"
645 656 ),
646 657 )
647 658 self._checkentries(fp, warn)
648 659 fp.close()
649 660
650 661 def _checkentries(self, fp, warn):
651 662 """make sure there is no empty string in entries"""
652 663 if b'' in self.entries:
653 664 fp.seek(0)
654 665 for n, line in enumerate(util.iterfile(fp)):
655 666 if not line.rstrip(b'\n'):
656 667 t = _(b'invalid entry in fncache, line %d') % (n + 1)
657 668 if warn:
658 669 warn(t + b'\n')
659 670 else:
660 671 raise error.Abort(t)
661 672
662 673 def write(self, tr):
663 674 if self._dirty:
664 675 assert self.entries is not None
665 676 self.entries = self.entries | self.addls
666 677 self.addls = set()
667 678 tr.addbackup(b'fncache')
668 679 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
669 680 if self.entries:
670 681 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
671 682 fp.close()
672 683 self._dirty = False
673 684 if self.addls:
674 685 # if we have just new entries, let's append them to the fncache
675 686 tr.addbackup(b'fncache')
676 687 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
677 688 if self.addls:
678 689 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
679 690 fp.close()
680 691 self.entries = None
681 692 self.addls = set()
682 693
683 694 def add(self, fn):
684 695 if self.entries is None:
685 696 self._load()
686 697 if fn not in self.entries:
687 698 self.addls.add(fn)
688 699
689 700 def remove(self, fn):
690 701 if self.entries is None:
691 702 self._load()
692 703 if fn in self.addls:
693 704 self.addls.remove(fn)
694 705 return
695 706 try:
696 707 self.entries.remove(fn)
697 708 self._dirty = True
698 709 except KeyError:
699 710 pass
700 711
701 712 def __contains__(self, fn):
702 713 if fn in self.addls:
703 714 return True
704 715 if self.entries is None:
705 716 self._load()
706 717 return fn in self.entries
707 718
708 719 def __iter__(self):
709 720 if self.entries is None:
710 721 self._load()
711 722 return iter(self.entries | self.addls)
712 723
713 724
714 725 class _fncachevfs(vfsmod.proxyvfs):
715 726 def __init__(self, vfs, fnc, encode):
716 727 vfsmod.proxyvfs.__init__(self, vfs)
717 728 self.fncache = fnc
718 729 self.encode = encode
719 730
720 731 def __call__(self, path, mode=b'r', *args, **kw):
721 732 encoded = self.encode(path)
722 733 if mode not in (b'r', b'rb') and (
723 734 path.startswith(b'data/') or path.startswith(b'meta/')
724 735 ):
725 736 # do not trigger a fncache load when adding a file that already is
726 737 # known to exist.
727 738 notload = self.fncache.entries is None and self.vfs.exists(encoded)
728 739 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
729 740 # when appending to an existing file, if the file has size zero,
730 741 # it should be considered as missing. Such zero-size files are
731 742 # the result of truncation when a transaction is aborted.
732 743 notload = False
733 744 if not notload:
734 745 self.fncache.add(path)
735 746 return self.vfs(encoded, mode, *args, **kw)
736 747
737 748 def join(self, path):
738 749 if path:
739 750 return self.vfs.join(self.encode(path))
740 751 else:
741 752 return self.vfs.join(path)
742 753
743 754 def register_file(self, path):
744 755 """generic hook point to lets fncache steer its stew"""
745 756 if path.startswith(b'data/') or path.startswith(b'meta/'):
746 757 self.fncache.add(path)
747 758
748 759
749 760 class fncachestore(basicstore):
750 761 def __init__(self, path, vfstype, dotencode):
751 762 if dotencode:
752 763 encode = _pathencode
753 764 else:
754 765 encode = _plainhybridencode
755 766 self.encode = encode
756 767 vfs = vfstype(path + b'/store')
757 768 self.path = vfs.base
758 769 self.pathsep = self.path + b'/'
759 770 self.createmode = _calcmode(vfs)
760 771 vfs.createmode = self.createmode
761 772 self.rawvfs = vfs
762 773 fnc = fncache(vfs)
763 774 self.fncache = fnc
764 775 self.vfs = _fncachevfs(vfs, fnc, encode)
765 776 self.opener = self.vfs
766 777
767 778 def join(self, f):
768 779 return self.pathsep + self.encode(f)
769 780
770 781 def getsize(self, path):
771 782 return self.rawvfs.stat(path).st_size
772 783
773 def datafiles(self, matcher=None):
784 def datafiles(self, matcher=None, undecodable=None):
774 785 for f in sorted(self.fncache):
775 786 if not _matchtrackedpath(f, matcher):
776 787 continue
777 788 ef = self.encode(f)
778 789 try:
779 790 t = revlog_type(f)
780 791 assert t is not None, f
781 792 t |= FILEFLAGS_FILELOG
782 yield t, f, ef, self.getsize(ef)
793 yield t, f, self.getsize(ef)
783 794 except OSError as err:
784 795 if err.errno != errno.ENOENT:
785 796 raise
786 797
787 798 def copylist(self):
788 799 d = (
789 800 b'bookmarks',
790 801 b'narrowspec',
791 802 b'data',
792 803 b'meta',
793 804 b'dh',
794 805 b'fncache',
795 806 b'phaseroots',
796 807 b'obsstore',
797 808 b'00manifest.d',
798 809 b'00manifest.i',
799 810 b'00changelog.d',
800 811 b'00changelog.i',
801 812 b'requires',
802 813 )
803 814 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
804 815
805 816 def write(self, tr):
806 817 self.fncache.write(tr)
807 818
808 819 def invalidatecaches(self):
809 820 self.fncache.entries = None
810 821 self.fncache.addls = set()
811 822
812 823 def markremoved(self, fn):
813 824 self.fncache.remove(fn)
814 825
815 826 def _exists(self, f):
816 827 ef = self.encode(f)
817 828 try:
818 829 self.getsize(ef)
819 830 return True
820 831 except OSError as err:
821 832 if err.errno != errno.ENOENT:
822 833 raise
823 834 # nonexistent entry
824 835 return False
825 836
826 837 def __contains__(self, path):
827 838 '''Checks if the store contains path'''
828 839 path = b"/".join((b"data", path))
829 840 # check for files (exact match)
830 841 e = path + b'.i'
831 842 if e in self.fncache and self._exists(e):
832 843 return True
833 844 # now check for directories (prefix match)
834 845 if not path.endswith(b'/'):
835 846 path += b'/'
836 847 for e in self.fncache:
837 848 if e.startswith(path) and self._exists(e):
838 849 return True
839 850 return False
@@ -1,918 +1,918 b''
1 1 # streamclone.py - producing and consuming streaming repository data
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import contextlib
11 11 import errno
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from .interfaces import repository
18 18 from . import (
19 19 bookmarks,
20 20 cacheutil,
21 21 error,
22 22 narrowspec,
23 23 phases,
24 24 pycompat,
25 25 requirements as requirementsmod,
26 26 scmutil,
27 27 store,
28 28 util,
29 29 )
30 30 from .utils import (
31 31 stringutil,
32 32 )
33 33
34 34
35 35 def canperformstreamclone(pullop, bundle2=False):
36 36 """Whether it is possible to perform a streaming clone as part of pull.
37 37
38 38 ``bundle2`` will cause the function to consider stream clone through
39 39 bundle2 and only through bundle2.
40 40
41 41 Returns a tuple of (supported, requirements). ``supported`` is True if
42 42 streaming clone is supported and False otherwise. ``requirements`` is
43 43 a set of repo requirements from the remote, or ``None`` if stream clone
44 44 isn't supported.
45 45 """
46 46 repo = pullop.repo
47 47 remote = pullop.remote
48 48
49 49 bundle2supported = False
50 50 if pullop.canusebundle2:
51 51 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
52 52 bundle2supported = True
53 53 # else
54 54 # Server doesn't support bundle2 stream clone or doesn't support
55 55 # the versions we support. Fall back and possibly allow legacy.
56 56
57 57 # Ensures legacy code path uses available bundle2.
58 58 if bundle2supported and not bundle2:
59 59 return False, None
60 60 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
61 61 elif bundle2 and not bundle2supported:
62 62 return False, None
63 63
64 64 # Streaming clone only works on empty repositories.
65 65 if len(repo):
66 66 return False, None
67 67
68 68 # Streaming clone only works if all data is being requested.
69 69 if pullop.heads:
70 70 return False, None
71 71
72 72 streamrequested = pullop.streamclonerequested
73 73
74 74 # If we don't have a preference, let the server decide for us. This
75 75 # likely only comes into play in LANs.
76 76 if streamrequested is None:
77 77 # The server can advertise whether to prefer streaming clone.
78 78 streamrequested = remote.capable(b'stream-preferred')
79 79
80 80 if not streamrequested:
81 81 return False, None
82 82
83 83 # In order for stream clone to work, the client has to support all the
84 84 # requirements advertised by the server.
85 85 #
86 86 # The server advertises its requirements via the "stream" and "streamreqs"
87 87 # capability. "stream" (a value-less capability) is advertised if and only
88 88 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
89 89 # is advertised and contains a comma-delimited list of requirements.
90 90 requirements = set()
91 91 if remote.capable(b'stream'):
92 92 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
93 93 else:
94 94 streamreqs = remote.capable(b'streamreqs')
95 95 # This is weird and shouldn't happen with modern servers.
96 96 if not streamreqs:
97 97 pullop.repo.ui.warn(
98 98 _(
99 99 b'warning: stream clone requested but server has them '
100 100 b'disabled\n'
101 101 )
102 102 )
103 103 return False, None
104 104
105 105 streamreqs = set(streamreqs.split(b','))
106 106 # Server requires something we don't support. Bail.
107 107 missingreqs = streamreqs - repo.supportedformats
108 108 if missingreqs:
109 109 pullop.repo.ui.warn(
110 110 _(
111 111 b'warning: stream clone requested but client is missing '
112 112 b'requirements: %s\n'
113 113 )
114 114 % b', '.join(sorted(missingreqs))
115 115 )
116 116 pullop.repo.ui.warn(
117 117 _(
118 118 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
119 119 b'for more information)\n'
120 120 )
121 121 )
122 122 return False, None
123 123 requirements = streamreqs
124 124
125 125 return True, requirements
126 126
127 127
128 128 def maybeperformlegacystreamclone(pullop):
129 129 """Possibly perform a legacy stream clone operation.
130 130
131 131 Legacy stream clones are performed as part of pull but before all other
132 132 operations.
133 133
134 134 A legacy stream clone will not be performed if a bundle2 stream clone is
135 135 supported.
136 136 """
137 137 from . import localrepo
138 138
139 139 supported, requirements = canperformstreamclone(pullop)
140 140
141 141 if not supported:
142 142 return
143 143
144 144 repo = pullop.repo
145 145 remote = pullop.remote
146 146
147 147 # Save remote branchmap. We will use it later to speed up branchcache
148 148 # creation.
149 149 rbranchmap = None
150 150 if remote.capable(b'branchmap'):
151 151 with remote.commandexecutor() as e:
152 152 rbranchmap = e.callcommand(b'branchmap', {}).result()
153 153
154 154 repo.ui.status(_(b'streaming all changes\n'))
155 155
156 156 with remote.commandexecutor() as e:
157 157 fp = e.callcommand(b'stream_out', {}).result()
158 158
159 159 # TODO strictly speaking, this code should all be inside the context
160 160 # manager because the context manager is supposed to ensure all wire state
161 161 # is flushed when exiting. But the legacy peers don't do this, so it
162 162 # doesn't matter.
163 163 l = fp.readline()
164 164 try:
165 165 resp = int(l)
166 166 except ValueError:
167 167 raise error.ResponseError(
168 168 _(b'unexpected response from remote server:'), l
169 169 )
170 170 if resp == 1:
171 171 raise error.Abort(_(b'operation forbidden by server'))
172 172 elif resp == 2:
173 173 raise error.Abort(_(b'locking the remote repository failed'))
174 174 elif resp != 0:
175 175 raise error.Abort(_(b'the server sent an unknown error code'))
176 176
177 177 l = fp.readline()
178 178 try:
179 179 filecount, bytecount = map(int, l.split(b' ', 1))
180 180 except (ValueError, TypeError):
181 181 raise error.ResponseError(
182 182 _(b'unexpected response from remote server:'), l
183 183 )
184 184
185 185 with repo.lock():
186 186 consumev1(repo, fp, filecount, bytecount)
187 187
188 188 # new requirements = old non-format requirements +
189 189 # new format-related remote requirements
190 190 # requirements from the streamed-in repository
191 191 repo.requirements = requirements | (
192 192 repo.requirements - repo.supportedformats
193 193 )
194 194 repo.svfs.options = localrepo.resolvestorevfsoptions(
195 195 repo.ui, repo.requirements, repo.features
196 196 )
197 197 scmutil.writereporequirements(repo)
198 198
199 199 if rbranchmap:
200 200 repo._branchcaches.replace(repo, rbranchmap)
201 201
202 202 repo.invalidate()
203 203
204 204
205 205 def allowservergeneration(repo):
206 206 """Whether streaming clones are allowed from the server."""
207 207 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
208 208 return False
209 209
210 210 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
211 211 return False
212 212
213 213 # The way stream clone works makes it impossible to hide secret changesets.
214 214 # So don't allow this by default.
215 215 secret = phases.hassecret(repo)
216 216 if secret:
217 217 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
218 218
219 219 return True
220 220
221 221
222 222 # This is it's own function so extensions can override it.
223 223 def _walkstreamfiles(repo, matcher=None):
224 224 return repo.store.walk(matcher)
225 225
226 226
227 227 def generatev1(repo):
228 228 """Emit content for version 1 of a streaming clone.
229 229
230 230 This returns a 3-tuple of (file count, byte size, data iterator).
231 231
232 232 The data iterator consists of N entries for each file being transferred.
233 233 Each file entry starts as a line with the file name and integer size
234 234 delimited by a null byte.
235 235
236 236 The raw file data follows. Following the raw file data is the next file
237 237 entry, or EOF.
238 238
239 239 When used on the wire protocol, an additional line indicating protocol
240 240 success will be prepended to the stream. This function is not responsible
241 241 for adding it.
242 242
243 243 This function will obtain a repository lock to ensure a consistent view of
244 244 the store is captured. It therefore may raise LockError.
245 245 """
246 246 entries = []
247 247 total_bytes = 0
248 248 # Get consistent snapshot of repo, lock during scan.
249 249 with repo.lock():
250 250 repo.ui.debug(b'scanning\n')
251 for file_type, name, ename, size in _walkstreamfiles(repo):
251 for file_type, name, size in _walkstreamfiles(repo):
252 252 if size:
253 253 entries.append((name, size))
254 254 total_bytes += size
255 255 _test_sync_point_walk_1(repo)
256 256 _test_sync_point_walk_2(repo)
257 257
258 258 repo.ui.debug(
259 259 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
260 260 )
261 261
262 262 svfs = repo.svfs
263 263 debugflag = repo.ui.debugflag
264 264
265 265 def emitrevlogdata():
266 266 for name, size in entries:
267 267 if debugflag:
268 268 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
269 269 # partially encode name over the wire for backwards compat
270 270 yield b'%s\0%d\n' % (store.encodedir(name), size)
271 271 # auditing at this stage is both pointless (paths are already
272 272 # trusted by the local repo) and expensive
273 273 with svfs(name, b'rb', auditpath=False) as fp:
274 274 if size <= 65536:
275 275 yield fp.read(size)
276 276 else:
277 277 for chunk in util.filechunkiter(fp, limit=size):
278 278 yield chunk
279 279
280 280 return len(entries), total_bytes, emitrevlogdata()
281 281
282 282
283 283 def generatev1wireproto(repo):
284 284 """Emit content for version 1 of streaming clone suitable for the wire.
285 285
286 286 This is the data output from ``generatev1()`` with 2 header lines. The
287 287 first line indicates overall success. The 2nd contains the file count and
288 288 byte size of payload.
289 289
290 290 The success line contains "0" for success, "1" for stream generation not
291 291 allowed, and "2" for error locking the repository (possibly indicating
292 292 a permissions error for the server process).
293 293 """
294 294 if not allowservergeneration(repo):
295 295 yield b'1\n'
296 296 return
297 297
298 298 try:
299 299 filecount, bytecount, it = generatev1(repo)
300 300 except error.LockError:
301 301 yield b'2\n'
302 302 return
303 303
304 304 # Indicates successful response.
305 305 yield b'0\n'
306 306 yield b'%d %d\n' % (filecount, bytecount)
307 307 for chunk in it:
308 308 yield chunk
309 309
310 310
311 311 def generatebundlev1(repo, compression=b'UN'):
312 312 """Emit content for version 1 of a stream clone bundle.
313 313
314 314 The first 4 bytes of the output ("HGS1") denote this as stream clone
315 315 bundle version 1.
316 316
317 317 The next 2 bytes indicate the compression type. Only "UN" is currently
318 318 supported.
319 319
320 320 The next 16 bytes are two 64-bit big endian unsigned integers indicating
321 321 file count and byte count, respectively.
322 322
323 323 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
324 324 of the requirements string, including a trailing \0. The following N bytes
325 325 are the requirements string, which is ASCII containing a comma-delimited
326 326 list of repo requirements that are needed to support the data.
327 327
328 328 The remaining content is the output of ``generatev1()`` (which may be
329 329 compressed in the future).
330 330
331 331 Returns a tuple of (requirements, data generator).
332 332 """
333 333 if compression != b'UN':
334 334 raise ValueError(b'we do not support the compression argument yet')
335 335
336 336 requirements = repo.requirements & repo.supportedformats
337 337 requires = b','.join(sorted(requirements))
338 338
339 339 def gen():
340 340 yield b'HGS1'
341 341 yield compression
342 342
343 343 filecount, bytecount, it = generatev1(repo)
344 344 repo.ui.status(
345 345 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
346 346 )
347 347
348 348 yield struct.pack(b'>QQ', filecount, bytecount)
349 349 yield struct.pack(b'>H', len(requires) + 1)
350 350 yield requires + b'\0'
351 351
352 352 # This is where we'll add compression in the future.
353 353 assert compression == b'UN'
354 354
355 355 progress = repo.ui.makeprogress(
356 356 _(b'bundle'), total=bytecount, unit=_(b'bytes')
357 357 )
358 358 progress.update(0)
359 359
360 360 for chunk in it:
361 361 progress.increment(step=len(chunk))
362 362 yield chunk
363 363
364 364 progress.complete()
365 365
366 366 return requirements, gen()
367 367
368 368
369 369 def consumev1(repo, fp, filecount, bytecount):
370 370 """Apply the contents from version 1 of a streaming clone file handle.
371 371
372 372 This takes the output from "stream_out" and applies it to the specified
373 373 repository.
374 374
375 375 Like "stream_out," the status line added by the wire protocol is not
376 376 handled by this function.
377 377 """
378 378 with repo.lock():
379 379 repo.ui.status(
380 380 _(b'%d files to transfer, %s of data\n')
381 381 % (filecount, util.bytecount(bytecount))
382 382 )
383 383 progress = repo.ui.makeprogress(
384 384 _(b'clone'), total=bytecount, unit=_(b'bytes')
385 385 )
386 386 progress.update(0)
387 387 start = util.timer()
388 388
389 389 # TODO: get rid of (potential) inconsistency
390 390 #
391 391 # If transaction is started and any @filecache property is
392 392 # changed at this point, it causes inconsistency between
393 393 # in-memory cached property and streamclone-ed file on the
394 394 # disk. Nested transaction prevents transaction scope "clone"
395 395 # below from writing in-memory changes out at the end of it,
396 396 # even though in-memory changes are discarded at the end of it
397 397 # regardless of transaction nesting.
398 398 #
399 399 # But transaction nesting can't be simply prohibited, because
400 400 # nesting occurs also in ordinary case (e.g. enabling
401 401 # clonebundles).
402 402
403 403 with repo.transaction(b'clone'):
404 404 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
405 405 for i in pycompat.xrange(filecount):
406 406 # XXX doesn't support '\n' or '\r' in filenames
407 407 l = fp.readline()
408 408 try:
409 409 name, size = l.split(b'\0', 1)
410 410 size = int(size)
411 411 except (ValueError, TypeError):
412 412 raise error.ResponseError(
413 413 _(b'unexpected response from remote server:'), l
414 414 )
415 415 if repo.ui.debugflag:
416 416 repo.ui.debug(
417 417 b'adding %s (%s)\n' % (name, util.bytecount(size))
418 418 )
419 419 # for backwards compat, name was partially encoded
420 420 path = store.decodedir(name)
421 421 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
422 422 for chunk in util.filechunkiter(fp, limit=size):
423 423 progress.increment(step=len(chunk))
424 424 ofp.write(chunk)
425 425
426 426 # force @filecache properties to be reloaded from
427 427 # streamclone-ed file at next access
428 428 repo.invalidate(clearfilecache=True)
429 429
430 430 elapsed = util.timer() - start
431 431 if elapsed <= 0:
432 432 elapsed = 0.001
433 433 progress.complete()
434 434 repo.ui.status(
435 435 _(b'transferred %s in %.1f seconds (%s/sec)\n')
436 436 % (
437 437 util.bytecount(bytecount),
438 438 elapsed,
439 439 util.bytecount(bytecount / elapsed),
440 440 )
441 441 )
442 442
443 443
444 444 def readbundle1header(fp):
445 445 compression = fp.read(2)
446 446 if compression != b'UN':
447 447 raise error.Abort(
448 448 _(
449 449 b'only uncompressed stream clone bundles are '
450 450 b'supported; got %s'
451 451 )
452 452 % compression
453 453 )
454 454
455 455 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
456 456 requireslen = struct.unpack(b'>H', fp.read(2))[0]
457 457 requires = fp.read(requireslen)
458 458
459 459 if not requires.endswith(b'\0'):
460 460 raise error.Abort(
461 461 _(
462 462 b'malformed stream clone bundle: '
463 463 b'requirements not properly encoded'
464 464 )
465 465 )
466 466
467 467 requirements = set(requires.rstrip(b'\0').split(b','))
468 468
469 469 return filecount, bytecount, requirements
470 470
471 471
472 472 def applybundlev1(repo, fp):
473 473 """Apply the content from a stream clone bundle version 1.
474 474
475 475 We assume the 4 byte header has been read and validated and the file handle
476 476 is at the 2 byte compression identifier.
477 477 """
478 478 if len(repo):
479 479 raise error.Abort(
480 480 _(b'cannot apply stream clone bundle on non-empty repo')
481 481 )
482 482
483 483 filecount, bytecount, requirements = readbundle1header(fp)
484 484 missingreqs = requirements - repo.supportedformats
485 485 if missingreqs:
486 486 raise error.Abort(
487 487 _(b'unable to apply stream clone: unsupported format: %s')
488 488 % b', '.join(sorted(missingreqs))
489 489 )
490 490
491 491 consumev1(repo, fp, filecount, bytecount)
492 492
493 493
494 494 class streamcloneapplier(object):
495 495 """Class to manage applying streaming clone bundles.
496 496
497 497 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
498 498 readers to perform bundle type-specific functionality.
499 499 """
500 500
501 501 def __init__(self, fh):
502 502 self._fh = fh
503 503
504 504 def apply(self, repo):
505 505 return applybundlev1(repo, self._fh)
506 506
507 507
508 508 # type of file to stream
509 509 _fileappend = 0 # append only file
510 510 _filefull = 1 # full snapshot file
511 511
512 512 # Source of the file
513 513 _srcstore = b's' # store (svfs)
514 514 _srccache = b'c' # cache (cache)
515 515
516 516 # This is it's own function so extensions can override it.
517 517 def _walkstreamfullstorefiles(repo):
518 518 """list snapshot file from the store"""
519 519 fnames = []
520 520 if not repo.publishing():
521 521 fnames.append(b'phaseroots')
522 522 return fnames
523 523
524 524
525 525 def _filterfull(entry, copy, vfsmap):
526 526 """actually copy the snapshot files"""
527 527 src, name, ftype, data = entry
528 528 if ftype != _filefull:
529 529 return entry
530 530 return (src, name, ftype, copy(vfsmap[src].join(name)))
531 531
532 532
533 533 @contextlib.contextmanager
534 534 def maketempcopies():
535 535 """return a function to temporary copy file"""
536 536 files = []
537 537 try:
538 538
539 539 def copy(src):
540 540 fd, dst = pycompat.mkstemp()
541 541 os.close(fd)
542 542 files.append(dst)
543 543 util.copyfiles(src, dst, hardlink=True)
544 544 return dst
545 545
546 546 yield copy
547 547 finally:
548 548 for tmp in files:
549 549 util.tryunlink(tmp)
550 550
551 551
552 552 def _makemap(repo):
553 553 """make a (src -> vfs) map for the repo"""
554 554 vfsmap = {
555 555 _srcstore: repo.svfs,
556 556 _srccache: repo.cachevfs,
557 557 }
558 558 # we keep repo.vfs out of the on purpose, ther are too many danger there
559 559 # (eg: .hg/hgrc)
560 560 assert repo.vfs not in vfsmap.values()
561 561
562 562 return vfsmap
563 563
564 564
565 565 def _emit2(repo, entries, totalfilesize):
566 566 """actually emit the stream bundle"""
567 567 vfsmap = _makemap(repo)
568 568 # we keep repo.vfs out of the on purpose, ther are too many danger there
569 569 # (eg: .hg/hgrc),
570 570 #
571 571 # this assert is duplicated (from _makemap) as author might think this is
572 572 # fine, while this is really not fine.
573 573 if repo.vfs in vfsmap.values():
574 574 raise error.ProgrammingError(
575 575 b'repo.vfs must not be added to vfsmap for security reasons'
576 576 )
577 577
578 578 progress = repo.ui.makeprogress(
579 579 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
580 580 )
581 581 progress.update(0)
582 582 with maketempcopies() as copy, progress:
583 583 # copy is delayed until we are in the try
584 584 entries = [_filterfull(e, copy, vfsmap) for e in entries]
585 585 yield None # this release the lock on the repository
586 586 totalbytecount = 0
587 587
588 588 for src, name, ftype, data in entries:
589 589 vfs = vfsmap[src]
590 590 yield src
591 591 yield util.uvarintencode(len(name))
592 592 if ftype == _fileappend:
593 593 fp = vfs(name)
594 594 size = data
595 595 elif ftype == _filefull:
596 596 fp = open(data, b'rb')
597 597 size = util.fstat(fp).st_size
598 598 bytecount = 0
599 599 try:
600 600 yield util.uvarintencode(size)
601 601 yield name
602 602 if size <= 65536:
603 603 chunks = (fp.read(size),)
604 604 else:
605 605 chunks = util.filechunkiter(fp, limit=size)
606 606 for chunk in chunks:
607 607 bytecount += len(chunk)
608 608 totalbytecount += len(chunk)
609 609 progress.update(totalbytecount)
610 610 yield chunk
611 611 if bytecount != size:
612 612 # Would most likely be caused by a race due to `hg strip` or
613 613 # a revlog split
614 614 raise error.Abort(
615 615 _(
616 616 b'clone could only read %d bytes from %s, but '
617 617 b'expected %d bytes'
618 618 )
619 619 % (bytecount, name, size)
620 620 )
621 621 finally:
622 622 fp.close()
623 623
624 624
625 625 def _test_sync_point_walk_1(repo):
626 626 """a function for synchronisation during tests"""
627 627
628 628
629 629 def _test_sync_point_walk_2(repo):
630 630 """a function for synchronisation during tests"""
631 631
632 632
633 633 def _v2_walk(repo, includes, excludes, includeobsmarkers):
634 634 """emit a seris of files information useful to clone a repo
635 635
636 636 return (entries, totalfilesize)
637 637
638 638 entries is a list of tuple (vfs-key, file-path, file-type, size)
639 639
640 640 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
641 641 - `name`: file path of the file to copy (to be feed to the vfss)
642 642 - `file-type`: do this file need to be copied with the source lock ?
643 643 - `size`: the size of the file (or None)
644 644 """
645 645 assert repo._currentlock(repo._lockref) is not None
646 646 entries = []
647 647 totalfilesize = 0
648 648
649 649 matcher = None
650 650 if includes or excludes:
651 651 matcher = narrowspec.match(repo.root, includes, excludes)
652 652
653 for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
653 for rl_type, name, size in _walkstreamfiles(repo, matcher):
654 654 if size:
655 655 ft = _fileappend
656 656 if rl_type & store.FILEFLAGS_VOLATILE:
657 657 ft = _filefull
658 658 entries.append((_srcstore, name, ft, size))
659 659 totalfilesize += size
660 660 for name in _walkstreamfullstorefiles(repo):
661 661 if repo.svfs.exists(name):
662 662 totalfilesize += repo.svfs.lstat(name).st_size
663 663 entries.append((_srcstore, name, _filefull, None))
664 664 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
665 665 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
666 666 entries.append((_srcstore, b'obsstore', _filefull, None))
667 667 for name in cacheutil.cachetocopy(repo):
668 668 if repo.cachevfs.exists(name):
669 669 totalfilesize += repo.cachevfs.lstat(name).st_size
670 670 entries.append((_srccache, name, _filefull, None))
671 671 return entries, totalfilesize
672 672
673 673
674 674 def generatev2(repo, includes, excludes, includeobsmarkers):
675 675 """Emit content for version 2 of a streaming clone.
676 676
677 677 the data stream consists the following entries:
678 678 1) A char representing the file destination (eg: store or cache)
679 679 2) A varint containing the length of the filename
680 680 3) A varint containing the length of file data
681 681 4) N bytes containing the filename (the internal, store-agnostic form)
682 682 5) N bytes containing the file data
683 683
684 684 Returns a 3-tuple of (file count, file size, data iterator).
685 685 """
686 686
687 687 with repo.lock():
688 688
689 689 repo.ui.debug(b'scanning\n')
690 690
691 691 entries, totalfilesize = _v2_walk(
692 692 repo,
693 693 includes=includes,
694 694 excludes=excludes,
695 695 includeobsmarkers=includeobsmarkers,
696 696 )
697 697
698 698 chunks = _emit2(repo, entries, totalfilesize)
699 699 first = next(chunks)
700 700 assert first is None
701 701 _test_sync_point_walk_1(repo)
702 702 _test_sync_point_walk_2(repo)
703 703
704 704 return len(entries), totalfilesize, chunks
705 705
706 706
707 707 @contextlib.contextmanager
708 708 def nested(*ctxs):
709 709 this = ctxs[0]
710 710 rest = ctxs[1:]
711 711 with this:
712 712 if rest:
713 713 with nested(*rest):
714 714 yield
715 715 else:
716 716 yield
717 717
718 718
719 719 def consumev2(repo, fp, filecount, filesize):
720 720 """Apply the contents from a version 2 streaming clone.
721 721
722 722 Data is read from an object that only needs to provide a ``read(size)``
723 723 method.
724 724 """
725 725 with repo.lock():
726 726 repo.ui.status(
727 727 _(b'%d files to transfer, %s of data\n')
728 728 % (filecount, util.bytecount(filesize))
729 729 )
730 730
731 731 start = util.timer()
732 732 progress = repo.ui.makeprogress(
733 733 _(b'clone'), total=filesize, unit=_(b'bytes')
734 734 )
735 735 progress.update(0)
736 736
737 737 vfsmap = _makemap(repo)
738 738 # we keep repo.vfs out of the on purpose, ther are too many danger
739 739 # there (eg: .hg/hgrc),
740 740 #
741 741 # this assert is duplicated (from _makemap) as author might think this
742 742 # is fine, while this is really not fine.
743 743 if repo.vfs in vfsmap.values():
744 744 raise error.ProgrammingError(
745 745 b'repo.vfs must not be added to vfsmap for security reasons'
746 746 )
747 747
748 748 with repo.transaction(b'clone'):
749 749 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
750 750 with nested(*ctxs):
751 751 for i in range(filecount):
752 752 src = util.readexactly(fp, 1)
753 753 vfs = vfsmap[src]
754 754 namelen = util.uvarintdecodestream(fp)
755 755 datalen = util.uvarintdecodestream(fp)
756 756
757 757 name = util.readexactly(fp, namelen)
758 758
759 759 if repo.ui.debugflag:
760 760 repo.ui.debug(
761 761 b'adding [%s] %s (%s)\n'
762 762 % (src, name, util.bytecount(datalen))
763 763 )
764 764
765 765 with vfs(name, b'w') as ofp:
766 766 for chunk in util.filechunkiter(fp, limit=datalen):
767 767 progress.increment(step=len(chunk))
768 768 ofp.write(chunk)
769 769
770 770 # force @filecache properties to be reloaded from
771 771 # streamclone-ed file at next access
772 772 repo.invalidate(clearfilecache=True)
773 773
774 774 elapsed = util.timer() - start
775 775 if elapsed <= 0:
776 776 elapsed = 0.001
777 777 repo.ui.status(
778 778 _(b'transferred %s in %.1f seconds (%s/sec)\n')
779 779 % (
780 780 util.bytecount(progress.pos),
781 781 elapsed,
782 782 util.bytecount(progress.pos / elapsed),
783 783 )
784 784 )
785 785 progress.complete()
786 786
787 787
788 788 def applybundlev2(repo, fp, filecount, filesize, requirements):
789 789 from . import localrepo
790 790
791 791 missingreqs = [r for r in requirements if r not in repo.supported]
792 792 if missingreqs:
793 793 raise error.Abort(
794 794 _(b'unable to apply stream clone: unsupported format: %s')
795 795 % b', '.join(sorted(missingreqs))
796 796 )
797 797
798 798 consumev2(repo, fp, filecount, filesize)
799 799
800 800 # new requirements = old non-format requirements +
801 801 # new format-related remote requirements
802 802 # requirements from the streamed-in repository
803 803 repo.requirements = set(requirements) | (
804 804 repo.requirements - repo.supportedformats
805 805 )
806 806 repo.svfs.options = localrepo.resolvestorevfsoptions(
807 807 repo.ui, repo.requirements, repo.features
808 808 )
809 809 scmutil.writereporequirements(repo)
810 810
811 811
812 812 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
813 813 hardlink = [True]
814 814
815 815 def copy_used():
816 816 hardlink[0] = False
817 817 progress.topic = _(b'copying')
818 818
819 819 for k, path, size in entries:
820 820 src_vfs = src_vfs_map[k]
821 821 dst_vfs = dst_vfs_map[k]
822 822 src_path = src_vfs.join(path)
823 823 dst_path = dst_vfs.join(path)
824 824 dirname = dst_vfs.dirname(path)
825 825 if not dst_vfs.exists(dirname):
826 826 dst_vfs.makedirs(dirname)
827 827 dst_vfs.register_file(path)
828 828 # XXX we could use the #nb_bytes argument.
829 829 util.copyfile(
830 830 src_path,
831 831 dst_path,
832 832 hardlink=hardlink[0],
833 833 no_hardlink_cb=copy_used,
834 834 check_fs_hardlink=False,
835 835 )
836 836 progress.increment()
837 837 return hardlink[0]
838 838
839 839
840 840 def local_copy(src_repo, dest_repo):
841 841 """copy all content from one local repository to another
842 842
843 843 This is useful for local clone"""
844 844 src_store_requirements = {
845 845 r
846 846 for r in src_repo.requirements
847 847 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
848 848 }
849 849 dest_store_requirements = {
850 850 r
851 851 for r in dest_repo.requirements
852 852 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
853 853 }
854 854 assert src_store_requirements == dest_store_requirements
855 855
856 856 with dest_repo.lock():
857 857 with src_repo.lock():
858 858
859 859 # bookmark is not integrated to the streaming as it might use the
860 860 # `repo.vfs` and they are too many sentitive data accessible
861 861 # through `repo.vfs` to expose it to streaming clone.
862 862 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
863 863 srcbookmarks = src_book_vfs.join(b'bookmarks')
864 864 bm_count = 0
865 865 if os.path.exists(srcbookmarks):
866 866 bm_count = 1
867 867
868 868 entries, totalfilesize = _v2_walk(
869 869 src_repo,
870 870 includes=None,
871 871 excludes=None,
872 872 includeobsmarkers=True,
873 873 )
874 874 src_vfs_map = _makemap(src_repo)
875 875 dest_vfs_map = _makemap(dest_repo)
876 876 progress = src_repo.ui.makeprogress(
877 877 topic=_(b'linking'),
878 878 total=len(entries) + bm_count,
879 879 unit=_(b'files'),
880 880 )
881 881 # copy files
882 882 #
883 883 # We could copy the full file while the source repository is locked
884 884 # and the other one without the lock. However, in the linking case,
885 885 # this would also requires checks that nobody is appending any data
886 886 # to the files while we do the clone, so this is not done yet. We
887 887 # could do this blindly when copying files.
888 888 files = ((k, path, size) for k, path, ftype, size in entries)
889 889 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
890 890
891 891 # copy bookmarks over
892 892 if bm_count:
893 893 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
894 894 dstbookmarks = dst_book_vfs.join(b'bookmarks')
895 895 util.copyfile(srcbookmarks, dstbookmarks)
896 896 progress.complete()
897 897 if hardlink:
898 898 msg = b'linked %d files\n'
899 899 else:
900 900 msg = b'copied %d files\n'
901 901 src_repo.ui.debug(msg % (len(entries) + bm_count))
902 902
903 903 with dest_repo.transaction(b"localclone") as tr:
904 904 dest_repo.store.write(tr)
905 905
906 906 # clean up transaction file as they do not make sense
907 907 undo_files = [(dest_repo.svfs, b'undo.backupfiles')]
908 908 undo_files.extend(dest_repo.undofiles())
909 909 for undovfs, undofile in undo_files:
910 910 try:
911 911 undovfs.unlink(undofile)
912 912 except OSError as e:
913 913 if e.errno != errno.ENOENT:
914 914 msg = _(b'error removing %s: %s\n')
915 915 path = undovfs.join(undofile)
916 916 e_msg = stringutil.forcebytestr(e)
917 917 msg %= (path, e_msg)
918 918 dest_repo.ui.warn(msg)
@@ -1,649 +1,649 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11
12 12 from ..i18n import _
13 13 from ..pycompat import getattr
14 14 from .. import (
15 15 changelog,
16 16 error,
17 17 filelog,
18 18 manifest,
19 19 metadata,
20 20 pycompat,
21 21 requirements,
22 22 scmutil,
23 23 store,
24 24 util,
25 25 vfs as vfsmod,
26 26 )
27 27 from ..revlogutils import (
28 28 constants as revlogconst,
29 29 flagutil,
30 30 nodemap,
31 31 sidedata as sidedatamod,
32 32 )
33 33 from . import actions as upgrade_actions
34 34
35 35
36 36 def get_sidedata_helpers(srcrepo, dstrepo):
37 37 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
38 38 sequential = pycompat.iswindows or not use_w
39 39 if not sequential:
40 40 srcrepo.register_sidedata_computer(
41 41 revlogconst.KIND_CHANGELOG,
42 42 sidedatamod.SD_FILES,
43 43 (sidedatamod.SD_FILES,),
44 44 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
45 45 flagutil.REVIDX_HASCOPIESINFO,
46 46 replace=True,
47 47 )
48 48 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
49 49
50 50
51 51 def _revlogfrompath(repo, rl_type, path):
52 52 """Obtain a revlog from a repo path.
53 53
54 54 An instance of the appropriate class is returned.
55 55 """
56 56 if rl_type & store.FILEFLAGS_CHANGELOG:
57 57 return changelog.changelog(repo.svfs)
58 58 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
59 59 mandir = b''
60 60 if b'/' in path:
61 61 mandir = path.rsplit(b'/', 1)[0]
62 62 return manifest.manifestrevlog(
63 63 repo.nodeconstants, repo.svfs, tree=mandir
64 64 )
65 65 else:
66 66 # drop the extension and the `data/` prefix
67 67 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
68 68 if len(path_part) < 2:
69 69 msg = _(b'cannot recognize revlog from filename: %s')
70 70 msg %= path
71 71 raise error.Abort(msg)
72 72 path = path_part[1]
73 73 return filelog.filelog(repo.svfs, path)
74 74
75 75
76 76 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
77 77 """copy all relevant files for `oldrl` into `destrepo` store
78 78
79 79 Files are copied "as is" without any transformation. The copy is performed
80 80 without extra checks. Callers are responsible for making sure the copied
81 81 content is compatible with format of the destination repository.
82 82 """
83 83 oldrl = getattr(oldrl, '_revlog', oldrl)
84 84 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
85 85 newrl = getattr(newrl, '_revlog', newrl)
86 86
87 87 oldvfs = oldrl.opener
88 88 newvfs = newrl.opener
89 89 oldindex = oldvfs.join(oldrl._indexfile)
90 90 newindex = newvfs.join(newrl._indexfile)
91 91 olddata = oldvfs.join(oldrl._datafile)
92 92 newdata = newvfs.join(newrl._datafile)
93 93
94 94 with newvfs(newrl._indexfile, b'w'):
95 95 pass # create all the directories
96 96
97 97 util.copyfile(oldindex, newindex)
98 98 copydata = oldrl.opener.exists(oldrl._datafile)
99 99 if copydata:
100 100 util.copyfile(olddata, newdata)
101 101
102 102 if rl_type & store.FILEFLAGS_FILELOG:
103 103 destrepo.svfs.fncache.add(unencodedname)
104 104 if copydata:
105 105 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
106 106
107 107
108 108 UPGRADE_CHANGELOG = b"changelog"
109 109 UPGRADE_MANIFEST = b"manifest"
110 110 UPGRADE_FILELOGS = b"all-filelogs"
111 111
112 112 UPGRADE_ALL_REVLOGS = frozenset(
113 113 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
114 114 )
115 115
116 116
117 117 def matchrevlog(revlogfilter, rl_type):
118 118 """check if a revlog is selected for cloning.
119 119
120 120 In other words, are there any updates which need to be done on revlog
121 121 or it can be blindly copied.
122 122
123 123 The store entry is checked against the passed filter"""
124 124 if rl_type & store.FILEFLAGS_CHANGELOG:
125 125 return UPGRADE_CHANGELOG in revlogfilter
126 126 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
127 127 return UPGRADE_MANIFEST in revlogfilter
128 128 assert rl_type & store.FILEFLAGS_FILELOG
129 129 return UPGRADE_FILELOGS in revlogfilter
130 130
131 131
132 132 def _perform_clone(
133 133 ui,
134 134 dstrepo,
135 135 tr,
136 136 old_revlog,
137 137 rl_type,
138 138 unencoded,
139 139 upgrade_op,
140 140 sidedata_helpers,
141 141 oncopiedrevision,
142 142 ):
143 143 """returns the new revlog object created"""
144 144 newrl = None
145 145 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
146 146 ui.note(
147 147 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
148 148 )
149 149 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
150 150 old_revlog.clone(
151 151 tr,
152 152 newrl,
153 153 addrevisioncb=oncopiedrevision,
154 154 deltareuse=upgrade_op.delta_reuse_mode,
155 155 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
156 156 sidedata_helpers=sidedata_helpers,
157 157 )
158 158 else:
159 159 msg = _(b'blindly copying %s containing %i revisions\n')
160 160 ui.note(msg % (unencoded, len(old_revlog)))
161 161 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
162 162
163 163 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
164 164 return newrl
165 165
166 166
167 167 def _clonerevlogs(
168 168 ui,
169 169 srcrepo,
170 170 dstrepo,
171 171 tr,
172 172 upgrade_op,
173 173 ):
174 174 """Copy revlogs between 2 repos."""
175 175 revcount = 0
176 176 srcsize = 0
177 177 srcrawsize = 0
178 178 dstsize = 0
179 179 fcount = 0
180 180 frevcount = 0
181 181 fsrcsize = 0
182 182 frawsize = 0
183 183 fdstsize = 0
184 184 mcount = 0
185 185 mrevcount = 0
186 186 msrcsize = 0
187 187 mrawsize = 0
188 188 mdstsize = 0
189 189 crevcount = 0
190 190 csrcsize = 0
191 191 crawsize = 0
192 192 cdstsize = 0
193 193
194 194 alldatafiles = list(srcrepo.store.walk())
195 195 # mapping of data files which needs to be cloned
196 196 # key is unencoded filename
197 197 # value is revlog_object_from_srcrepo
198 198 manifests = {}
199 199 changelogs = {}
200 200 filelogs = {}
201 201
202 202 # Perform a pass to collect metadata. This validates we can open all
203 203 # source files and allows a unified progress bar to be displayed.
204 for rl_type, unencoded, encoded, size in alldatafiles:
204 for rl_type, unencoded, size in alldatafiles:
205 205 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
206 206 continue
207 207
208 208 # the store.walk function will wrongly pickup transaction backup and
209 209 # get confused. As a quick fix for 5.9 release, we ignore those.
210 210 # (this is not a module constants because it seems better to keep the
211 211 # hack together)
212 212 skip_undo = (
213 213 b'undo.backup.00changelog.i',
214 214 b'undo.backup.00manifest.i',
215 215 )
216 216 if unencoded in skip_undo:
217 217 continue
218 218
219 219 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
220 220
221 221 info = rl.storageinfo(
222 222 exclusivefiles=True,
223 223 revisionscount=True,
224 224 trackedsize=True,
225 225 storedsize=True,
226 226 )
227 227
228 228 revcount += info[b'revisionscount'] or 0
229 229 datasize = info[b'storedsize'] or 0
230 230 rawsize = info[b'trackedsize'] or 0
231 231
232 232 srcsize += datasize
233 233 srcrawsize += rawsize
234 234
235 235 # This is for the separate progress bars.
236 236 if rl_type & store.FILEFLAGS_CHANGELOG:
237 237 changelogs[unencoded] = (rl_type, rl)
238 238 crevcount += len(rl)
239 239 csrcsize += datasize
240 240 crawsize += rawsize
241 241 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
242 242 manifests[unencoded] = (rl_type, rl)
243 243 mcount += 1
244 244 mrevcount += len(rl)
245 245 msrcsize += datasize
246 246 mrawsize += rawsize
247 247 elif rl_type & store.FILEFLAGS_FILELOG:
248 248 filelogs[unencoded] = (rl_type, rl)
249 249 fcount += 1
250 250 frevcount += len(rl)
251 251 fsrcsize += datasize
252 252 frawsize += rawsize
253 253 else:
254 254 error.ProgrammingError(b'unknown revlog type')
255 255
256 256 if not revcount:
257 257 return
258 258
259 259 ui.status(
260 260 _(
261 261 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
262 262 b'%d in changelog)\n'
263 263 )
264 264 % (revcount, frevcount, mrevcount, crevcount)
265 265 )
266 266 ui.status(
267 267 _(b'migrating %s in store; %s tracked data\n')
268 268 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
269 269 )
270 270
271 271 # Used to keep track of progress.
272 272 progress = None
273 273
274 274 def oncopiedrevision(rl, rev, node):
275 275 progress.increment()
276 276
277 277 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
278 278
279 279 # Migrating filelogs
280 280 ui.status(
281 281 _(
282 282 b'migrating %d filelogs containing %d revisions '
283 283 b'(%s in store; %s tracked data)\n'
284 284 )
285 285 % (
286 286 fcount,
287 287 frevcount,
288 288 util.bytecount(fsrcsize),
289 289 util.bytecount(frawsize),
290 290 )
291 291 )
292 292 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
293 293 for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
294 294 newrl = _perform_clone(
295 295 ui,
296 296 dstrepo,
297 297 tr,
298 298 oldrl,
299 299 rl_type,
300 300 unencoded,
301 301 upgrade_op,
302 302 sidedata_helpers,
303 303 oncopiedrevision,
304 304 )
305 305 info = newrl.storageinfo(storedsize=True)
306 306 fdstsize += info[b'storedsize'] or 0
307 307 ui.status(
308 308 _(
309 309 b'finished migrating %d filelog revisions across %d '
310 310 b'filelogs; change in size: %s\n'
311 311 )
312 312 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
313 313 )
314 314
315 315 # Migrating manifests
316 316 ui.status(
317 317 _(
318 318 b'migrating %d manifests containing %d revisions '
319 319 b'(%s in store; %s tracked data)\n'
320 320 )
321 321 % (
322 322 mcount,
323 323 mrevcount,
324 324 util.bytecount(msrcsize),
325 325 util.bytecount(mrawsize),
326 326 )
327 327 )
328 328 if progress:
329 329 progress.complete()
330 330 progress = srcrepo.ui.makeprogress(
331 331 _(b'manifest revisions'), total=mrevcount
332 332 )
333 333 for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
334 334 newrl = _perform_clone(
335 335 ui,
336 336 dstrepo,
337 337 tr,
338 338 oldrl,
339 339 rl_type,
340 340 unencoded,
341 341 upgrade_op,
342 342 sidedata_helpers,
343 343 oncopiedrevision,
344 344 )
345 345 info = newrl.storageinfo(storedsize=True)
346 346 mdstsize += info[b'storedsize'] or 0
347 347 ui.status(
348 348 _(
349 349 b'finished migrating %d manifest revisions across %d '
350 350 b'manifests; change in size: %s\n'
351 351 )
352 352 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
353 353 )
354 354
355 355 # Migrating changelog
356 356 ui.status(
357 357 _(
358 358 b'migrating changelog containing %d revisions '
359 359 b'(%s in store; %s tracked data)\n'
360 360 )
361 361 % (
362 362 crevcount,
363 363 util.bytecount(csrcsize),
364 364 util.bytecount(crawsize),
365 365 )
366 366 )
367 367 if progress:
368 368 progress.complete()
369 369 progress = srcrepo.ui.makeprogress(
370 370 _(b'changelog revisions'), total=crevcount
371 371 )
372 372 for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
373 373 newrl = _perform_clone(
374 374 ui,
375 375 dstrepo,
376 376 tr,
377 377 oldrl,
378 378 rl_type,
379 379 unencoded,
380 380 upgrade_op,
381 381 sidedata_helpers,
382 382 oncopiedrevision,
383 383 )
384 384 info = newrl.storageinfo(storedsize=True)
385 385 cdstsize += info[b'storedsize'] or 0
386 386 progress.complete()
387 387 ui.status(
388 388 _(
389 389 b'finished migrating %d changelog revisions; change in size: '
390 390 b'%s\n'
391 391 )
392 392 % (crevcount, util.bytecount(cdstsize - csrcsize))
393 393 )
394 394
395 395 dstsize = fdstsize + mdstsize + cdstsize
396 396 ui.status(
397 397 _(
398 398 b'finished migrating %d total revisions; total change in store '
399 399 b'size: %s\n'
400 400 )
401 401 % (revcount, util.bytecount(dstsize - srcsize))
402 402 )
403 403
404 404
405 405 def _files_to_copy_post_revlog_clone(srcrepo):
406 406 """yields files which should be copied to destination after revlogs
407 407 are cloned"""
408 408 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
409 409 # don't copy revlogs as they are already cloned
410 410 if store.revlog_type(path) is not None:
411 411 continue
412 412 # Skip transaction related files.
413 413 if path.startswith(b'undo'):
414 414 continue
415 415 # Only copy regular files.
416 416 if kind != stat.S_IFREG:
417 417 continue
418 418 # Skip other skipped files.
419 419 if path in (b'lock', b'fncache'):
420 420 continue
421 421 # TODO: should we skip cache too?
422 422
423 423 yield path
424 424
425 425
426 426 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
427 427 """Replace the stores after current repository is upgraded
428 428
429 429 Creates a backup of current repository store at backup path
430 430 Replaces upgraded store files in current repo from upgraded one
431 431
432 432 Arguments:
433 433 currentrepo: repo object of current repository
434 434 upgradedrepo: repo object of the upgraded data
435 435 backupvfs: vfs object for the backup path
436 436 upgrade_op: upgrade operation object
437 437 to be used to decide what all is upgraded
438 438 """
439 439 # TODO: don't blindly rename everything in store
440 440 # There can be upgrades where store is not touched at all
441 441 if upgrade_op.backup_store:
442 442 util.rename(currentrepo.spath, backupvfs.join(b'store'))
443 443 else:
444 444 currentrepo.vfs.rmtree(b'store', forcibly=True)
445 445 util.rename(upgradedrepo.spath, currentrepo.spath)
446 446
447 447
448 448 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
449 449 """Hook point for extensions to perform additional actions during upgrade.
450 450
451 451 This function is called after revlogs and store files have been copied but
452 452 before the new store is swapped into the original location.
453 453 """
454 454
455 455
456 456 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
457 457 """Do the low-level work of upgrading a repository.
458 458
459 459 The upgrade is effectively performed as a copy between a source
460 460 repository and a temporary destination repository.
461 461
462 462 The source repository is unmodified for as long as possible so the
463 463 upgrade can abort at any time without causing loss of service for
464 464 readers and without corrupting the source repository.
465 465 """
466 466 assert srcrepo.currentwlock()
467 467 assert dstrepo.currentwlock()
468 468 backuppath = None
469 469 backupvfs = None
470 470
471 471 ui.status(
472 472 _(
473 473 b'(it is safe to interrupt this process any time before '
474 474 b'data migration completes)\n'
475 475 )
476 476 )
477 477
478 478 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
479 479 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
480 480 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
481 481 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
482 482
483 483 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
484 484 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
485 485 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
486 486 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
487 487
488 488 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
489 489 return
490 490
491 491 if upgrade_op.requirements_only:
492 492 ui.status(_(b'upgrading repository requirements\n'))
493 493 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
494 494 # if there is only one action and that is persistent nodemap upgrade
495 495 # directly write the nodemap file and update requirements instead of going
496 496 # through the whole cloning process
497 497 elif (
498 498 len(upgrade_op.upgrade_actions) == 1
499 499 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
500 500 and not upgrade_op.removed_actions
501 501 ):
502 502 ui.status(
503 503 _(b'upgrading repository to use persistent nodemap feature\n')
504 504 )
505 505 with srcrepo.transaction(b'upgrade') as tr:
506 506 unfi = srcrepo.unfiltered()
507 507 cl = unfi.changelog
508 508 nodemap.persist_nodemap(tr, cl, force=True)
509 509 # we want to directly operate on the underlying revlog to force
510 510 # create a nodemap file. This is fine since this is upgrade code
511 511 # and it heavily relies on repository being revlog based
512 512 # hence accessing private attributes can be justified
513 513 nodemap.persist_nodemap(
514 514 tr, unfi.manifestlog._rootstore._revlog, force=True
515 515 )
516 516 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
517 517 elif (
518 518 len(upgrade_op.removed_actions) == 1
519 519 and [
520 520 x
521 521 for x in upgrade_op.removed_actions
522 522 if x.name == b'persistent-nodemap'
523 523 ]
524 524 and not upgrade_op.upgrade_actions
525 525 ):
526 526 ui.status(
527 527 _(b'downgrading repository to not use persistent nodemap feature\n')
528 528 )
529 529 with srcrepo.transaction(b'upgrade') as tr:
530 530 unfi = srcrepo.unfiltered()
531 531 cl = unfi.changelog
532 532 nodemap.delete_nodemap(tr, srcrepo, cl)
533 533 # check comment 20 lines above for accessing private attributes
534 534 nodemap.delete_nodemap(
535 535 tr, srcrepo, unfi.manifestlog._rootstore._revlog
536 536 )
537 537 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
538 538 else:
539 539 with dstrepo.transaction(b'upgrade') as tr:
540 540 _clonerevlogs(
541 541 ui,
542 542 srcrepo,
543 543 dstrepo,
544 544 tr,
545 545 upgrade_op,
546 546 )
547 547
548 548 # Now copy other files in the store directory.
549 549 for p in _files_to_copy_post_revlog_clone(srcrepo):
550 550 srcrepo.ui.status(_(b'copying %s\n') % p)
551 551 src = srcrepo.store.rawvfs.join(p)
552 552 dst = dstrepo.store.rawvfs.join(p)
553 553 util.copyfile(src, dst, copystat=True)
554 554
555 555 finishdatamigration(ui, srcrepo, dstrepo, requirements)
556 556
557 557 ui.status(_(b'data fully upgraded in a temporary repository\n'))
558 558
559 559 if upgrade_op.backup_store:
560 560 backuppath = pycompat.mkdtemp(
561 561 prefix=b'upgradebackup.', dir=srcrepo.path
562 562 )
563 563 backupvfs = vfsmod.vfs(backuppath)
564 564
565 565 # Make a backup of requires file first, as it is the first to be modified.
566 566 util.copyfile(
567 567 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
568 568 )
569 569
570 570 # We install an arbitrary requirement that clients must not support
571 571 # as a mechanism to lock out new clients during the data swap. This is
572 572 # better than allowing a client to continue while the repository is in
573 573 # an inconsistent state.
574 574 ui.status(
575 575 _(
576 576 b'marking source repository as being upgraded; clients will be '
577 577 b'unable to read from repository\n'
578 578 )
579 579 )
580 580 scmutil.writereporequirements(
581 581 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
582 582 )
583 583
584 584 ui.status(_(b'starting in-place swap of repository data\n'))
585 585 if upgrade_op.backup_store:
586 586 ui.status(
587 587 _(b'replaced files will be backed up at %s\n') % backuppath
588 588 )
589 589
590 590 # Now swap in the new store directory. Doing it as a rename should make
591 591 # the operation nearly instantaneous and atomic (at least in well-behaved
592 592 # environments).
593 593 ui.status(_(b'replacing store...\n'))
594 594 tstart = util.timer()
595 595 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
596 596 elapsed = util.timer() - tstart
597 597 ui.status(
598 598 _(
599 599 b'store replacement complete; repository was inconsistent for '
600 600 b'%0.1fs\n'
601 601 )
602 602 % elapsed
603 603 )
604 604
605 605 # We first write the requirements file. Any new requirements will lock
606 606 # out legacy clients.
607 607 ui.status(
608 608 _(
609 609 b'finalizing requirements file and making repository readable '
610 610 b'again\n'
611 611 )
612 612 )
613 613 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
614 614
615 615 if upgrade_op.backup_store:
616 616 # The lock file from the old store won't be removed because nothing has a
617 617 # reference to its new location. So clean it up manually. Alternatively, we
618 618 # could update srcrepo.svfs and other variables to point to the new
619 619 # location. This is simpler.
620 620 assert backupvfs is not None # help pytype
621 621 backupvfs.unlink(b'store/lock')
622 622
623 623 return backuppath
624 624
625 625
626 626 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
627 627 if upgrade_op.backup_store:
628 628 backuppath = pycompat.mkdtemp(
629 629 prefix=b'upgradebackup.', dir=srcrepo.path
630 630 )
631 631 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
632 632 backupvfs = vfsmod.vfs(backuppath)
633 633 util.copyfile(
634 634 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
635 635 )
636 636 util.copyfile(
637 637 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
638 638 )
639 639
640 640 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
641 641 srcrepo.dirstate._map._use_dirstate_tree = True
642 642 srcrepo.dirstate._map.preload()
643 643 srcrepo.dirstate._use_dirstate_v2 = new == b'v2'
644 644 srcrepo.dirstate._map._use_dirstate_v2 = srcrepo.dirstate._use_dirstate_v2
645 645 srcrepo.dirstate._dirty = True
646 646 srcrepo.vfs.unlink(b'dirstate')
647 647 srcrepo.dirstate.write(None)
648 648
649 649 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
@@ -1,586 +1,588 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49 WARN_UNKNOWN_COPY_SOURCE = _(
50 50 b"warning: copy source of '%s' not in parents of %s"
51 51 )
52 52
53 53 WARN_NULLID_COPY_SOURCE = _(
54 54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 55 )
56 56
57 57
58 58 class verifier(object):
59 59 def __init__(self, repo, level=None):
60 60 self.repo = repo.unfiltered()
61 61 self.ui = repo.ui
62 62 self.match = repo.narrowmatch()
63 63 if level is None:
64 64 level = VERIFY_DEFAULT
65 65 self._level = level
66 66 self.badrevs = set()
67 67 self.errors = 0
68 68 self.warnings = 0
69 69 self.havecl = len(repo.changelog) > 0
70 70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 73 self.refersmf = False
74 74 self.fncachewarned = False
75 75 # developer config: verify.skipflags
76 76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 77 self.warnorphanstorefiles = True
78 78
79 79 def _warn(self, msg):
80 80 """record a "warning" level issue"""
81 81 self.ui.warn(msg + b"\n")
82 82 self.warnings += 1
83 83
84 84 def _err(self, linkrev, msg, filename=None):
85 85 """record a "error" level issue"""
86 86 if linkrev is not None:
87 87 self.badrevs.add(linkrev)
88 88 linkrev = b"%d" % linkrev
89 89 else:
90 90 linkrev = b'?'
91 91 msg = b"%s: %s" % (linkrev, msg)
92 92 if filename:
93 93 msg = b"%s@%s" % (filename, msg)
94 94 self.ui.warn(b" " + msg + b"\n")
95 95 self.errors += 1
96 96
97 97 def _exc(self, linkrev, msg, inst, filename=None):
98 98 """record exception raised during the verify process"""
99 99 fmsg = stringutil.forcebytestr(inst)
100 100 if not fmsg:
101 101 fmsg = pycompat.byterepr(inst)
102 102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 103
104 104 def _checkrevlog(self, obj, name, linkrev):
105 105 """verify high level property of a revlog
106 106
107 107 - revlog is present,
108 108 - revlog is non-empty,
109 109 - sizes (index and data) are correct,
110 110 - revlog's format version is correct.
111 111 """
112 112 if not len(obj) and (self.havecl or self.havemf):
113 113 self._err(linkrev, _(b"empty or missing %s") % name)
114 114 return
115 115
116 116 d = obj.checksize()
117 117 if d[0]:
118 118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 119 if d[1]:
120 120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 121
122 122 if obj._format_version != revlog.REVLOGV0:
123 123 if not self.revlogv1:
124 124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 125 elif self.revlogv1:
126 126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 127
128 128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 129 """verify a single revlog entry
130 130
131 131 arguments are:
132 132 - obj: the source revlog
133 133 - i: the revision number
134 134 - node: the revision node id
135 135 - seen: nodes previously seen for this revlog
136 136 - linkrevs: [changelog-revisions] introducing "node"
137 137 - f: string label ("changelog", "manifest", or filename)
138 138
139 139 Performs the following checks:
140 140 - linkrev points to an existing changelog revision,
141 141 - linkrev points to a changelog revision that introduces this revision,
142 142 - linkrev points to the lowest of these changesets,
143 143 - both parents exist in the revlog,
144 144 - the revision is not duplicated.
145 145
146 146 Return the linkrev of the revision (or None for changelog's revisions).
147 147 """
148 148 lr = obj.linkrev(obj.rev(node))
149 149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 150 if lr < 0 or lr >= len(self.repo.changelog):
151 151 msg = _(b"rev %d points to nonexistent changeset %d")
152 152 else:
153 153 msg = _(b"rev %d points to unexpected changeset %d")
154 154 self._err(None, msg % (i, lr), f)
155 155 if linkrevs:
156 156 if f and len(linkrevs) > 1:
157 157 try:
158 158 # attempt to filter down to real linkrevs
159 159 linkrevs = []
160 160 for lr in linkrevs:
161 161 if self.lrugetctx(lr)[f].filenode() == node:
162 162 linkrevs.append(lr)
163 163 except Exception:
164 164 pass
165 165 msg = _(b" (expected %s)")
166 166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 167 self._warn(msg)
168 168 lr = None # can't be trusted
169 169
170 170 try:
171 171 p1, p2 = obj.parents(node)
172 172 if p1 not in seen and p1 != self.repo.nullid:
173 173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 174 self._err(lr, msg, f)
175 175 if p2 not in seen and p2 != self.repo.nullid:
176 176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 177 self._err(lr, msg, f)
178 178 except Exception as inst:
179 179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 180
181 181 if node in seen:
182 182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 183 seen[node] = i
184 184 return lr
185 185
186 186 def verify(self):
187 187 """verify the content of the Mercurial repository
188 188
189 189 This method run all verifications, displaying issues as they are found.
190 190
191 191 return 1 if any error have been encountered, 0 otherwise."""
192 192 # initial validation and generic report
193 193 repo = self.repo
194 194 ui = repo.ui
195 195 if not repo.url().startswith(b'file:'):
196 196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 197
198 198 if os.path.exists(repo.sjoin(b"journal")):
199 199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 200
201 201 if ui.verbose or not self.revlogv1:
202 202 ui.status(
203 203 _(b"repository uses revlog format %d\n")
204 204 % (self.revlogv1 and 1 or 0)
205 205 )
206 206
207 207 # data verification
208 208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 209 filenodes = self._verifymanifest(mflinkrevs)
210 210 del mflinkrevs
211 211 self._crosscheckfiles(filelinkrevs, filenodes)
212 212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 213
214 214 # final report
215 215 ui.status(
216 216 _(b"checked %d changesets with %d changes to %d files\n")
217 217 % (len(repo.changelog), filerevisions, totalfiles)
218 218 )
219 219 if self.warnings:
220 220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
221 221 if self.fncachewarned:
222 222 ui.warn(HINT_FNCACHE)
223 223 if self.errors:
224 224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
225 225 if self.badrevs:
226 226 msg = _(b"(first damaged changeset appears to be %d)\n")
227 227 msg %= min(self.badrevs)
228 228 ui.warn(msg)
229 229 return 1
230 230 return 0
231 231
232 232 def _verifychangelog(self):
233 233 """verify the changelog of a repository
234 234
235 235 The following checks are performed:
236 236 - all of `_checkrevlog` checks,
237 237 - all of `_checkentry` checks (for each revisions),
238 238 - each revision can be read.
239 239
240 240 The function returns some of the data observed in the changesets as a
241 241 (mflinkrevs, filelinkrevs) tuples:
242 242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
243 243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
244 244
245 245 If a matcher was specified, filelinkrevs will only contains matched
246 246 files.
247 247 """
248 248 ui = self.ui
249 249 repo = self.repo
250 250 match = self.match
251 251 cl = repo.changelog
252 252
253 253 ui.status(_(b"checking changesets\n"))
254 254 mflinkrevs = {}
255 255 filelinkrevs = {}
256 256 seen = {}
257 257 self._checkrevlog(cl, b"changelog", 0)
258 258 progress = ui.makeprogress(
259 259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
260 260 )
261 261 for i in repo:
262 262 progress.update(i)
263 263 n = cl.node(i)
264 264 self._checkentry(cl, i, n, seen, [i], b"changelog")
265 265
266 266 try:
267 267 changes = cl.read(n)
268 268 if changes[0] != self.repo.nullid:
269 269 mflinkrevs.setdefault(changes[0], []).append(i)
270 270 self.refersmf = True
271 271 for f in changes[3]:
272 272 if match(f):
273 273 filelinkrevs.setdefault(_normpath(f), []).append(i)
274 274 except Exception as inst:
275 275 self.refersmf = True
276 276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
277 277 progress.complete()
278 278 return mflinkrevs, filelinkrevs
279 279
280 280 def _verifymanifest(
281 281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
282 282 ):
283 283 """verify the manifestlog content
284 284
285 285 Inputs:
286 286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
287 287 - dir: a subdirectory to check (for tree manifest repo)
288 288 - storefiles: set of currently "orphan" files.
289 289 - subdirprogress: a progress object
290 290
291 291 This function checks:
292 292 * all of `_checkrevlog` checks (for all manifest related revlogs)
293 293 * all of `_checkentry` checks (for all manifest related revisions)
294 294 * nodes for subdirectory exists in the sub-directory manifest
295 295 * each manifest entries have a file path
296 296 * each manifest node refered in mflinkrevs exist in the manifest log
297 297
298 298 If tree manifest is in use and a matchers is specified, only the
299 299 sub-directories matching it will be verified.
300 300
301 301 return a two level mapping:
302 302 {"path" -> { filenode -> changelog-revision}}
303 303
304 304 This mapping primarily contains entries for every files in the
305 305 repository. In addition, when tree-manifest is used, it also contains
306 306 sub-directory entries.
307 307
308 308 If a matcher is provided, only matching paths will be included.
309 309 """
310 310 repo = self.repo
311 311 ui = self.ui
312 312 match = self.match
313 313 mfl = self.repo.manifestlog
314 314 mf = mfl.getstorage(dir)
315 315
316 316 if not dir:
317 317 self.ui.status(_(b"checking manifests\n"))
318 318
319 319 filenodes = {}
320 320 subdirnodes = {}
321 321 seen = {}
322 322 label = b"manifest"
323 323 if dir:
324 324 label = dir
325 325 revlogfiles = mf.files()
326 326 storefiles.difference_update(revlogfiles)
327 327 if subdirprogress: # should be true since we're in a subdirectory
328 328 subdirprogress.increment()
329 329 if self.refersmf:
330 330 # Do not check manifest if there are only changelog entries with
331 331 # null manifests.
332 332 self._checkrevlog(mf._revlog, label, 0)
333 333 progress = ui.makeprogress(
334 334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
335 335 )
336 336 for i in mf:
337 337 if not dir:
338 338 progress.update(i)
339 339 n = mf.node(i)
340 340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
341 341 if n in mflinkrevs:
342 342 del mflinkrevs[n]
343 343 elif dir:
344 344 msg = _(b"%s not in parent-directory manifest") % short(n)
345 345 self._err(lr, msg, label)
346 346 else:
347 347 self._err(lr, _(b"%s not in changesets") % short(n), label)
348 348
349 349 try:
350 350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
351 351 for f, fn, fl in mfdelta.iterentries():
352 352 if not f:
353 353 self._err(lr, _(b"entry without name in manifest"))
354 354 elif f == b"/dev/null": # ignore this in very old repos
355 355 continue
356 356 fullpath = dir + _normpath(f)
357 357 if fl == b't':
358 358 if not match.visitdir(fullpath):
359 359 continue
360 360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
361 361 sdn.setdefault(fn, []).append(lr)
362 362 else:
363 363 if not match(fullpath):
364 364 continue
365 365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
366 366 except Exception as inst:
367 367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
368 368 if self._level >= VERIFY_FULL:
369 369 try:
370 370 # Various issues can affect manifest. So we read each full
371 371 # text from storage. This triggers the checks from the core
372 372 # code (eg: hash verification, filename are ordered, etc.)
373 373 mfdelta = mfl.get(dir, n).read()
374 374 except Exception as inst:
375 375 msg = _(b"reading full manifest %s") % short(n)
376 376 self._exc(lr, msg, inst, label)
377 377
378 378 if not dir:
379 379 progress.complete()
380 380
381 381 if self.havemf:
382 382 # since we delete entry in `mflinkrevs` during iteration, any
383 383 # remaining entries are "missing". We need to issue errors for them.
384 384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
385 385 for c, m in sorted(changesetpairs):
386 386 if dir:
387 387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
388 388 else:
389 389 msg = _(b"changeset refers to unknown revision %s")
390 390 msg %= short(m)
391 391 self._err(c, msg, label)
392 392
393 393 if not dir and subdirnodes:
394 394 self.ui.status(_(b"checking directory manifests\n"))
395 395 storefiles = set()
396 396 subdirs = set()
397 397 revlogv1 = self.revlogv1
398 for t, f, f2, size in repo.store.datafiles():
399 if not f:
400 self._err(None, _(b"cannot decode filename '%s'") % f2)
401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
398 undecodable = []
399 for t, f, size in repo.store.datafiles(undecodable=undecodable):
400 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
402 401 storefiles.add(_normpath(f))
403 402 subdirs.add(os.path.dirname(f))
403 for f in undecodable:
404 self._err(None, _(b"cannot decode filename '%s'") % f)
404 405 subdirprogress = ui.makeprogress(
405 406 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 407 )
407 408
408 409 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 410 subdirfilenodes = self._verifymanifest(
410 411 linkrevs, subdir, storefiles, subdirprogress
411 412 )
412 413 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 414 filenodes.setdefault(f, {}).update(onefilenodes)
414 415
415 416 if not dir and subdirnodes:
416 417 assert subdirprogress is not None # help pytype
417 418 subdirprogress.complete()
418 419 if self.warnorphanstorefiles:
419 420 for f in sorted(storefiles):
420 421 self._warn(_(b"warning: orphan data file '%s'") % f)
421 422
422 423 return filenodes
423 424
424 425 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 426 repo = self.repo
426 427 ui = self.ui
427 428 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428 429
429 430 total = len(filelinkrevs) + len(filenodes)
430 431 progress = ui.makeprogress(
431 432 _(b'crosschecking'), unit=_(b'files'), total=total
432 433 )
433 434 if self.havemf:
434 435 for f in sorted(filelinkrevs):
435 436 progress.increment()
436 437 if f not in filenodes:
437 438 lr = filelinkrevs[f][0]
438 439 self._err(lr, _(b"in changeset but not in manifest"), f)
439 440
440 441 if self.havecl:
441 442 for f in sorted(filenodes):
442 443 progress.increment()
443 444 if f not in filelinkrevs:
444 445 try:
445 446 fl = repo.file(f)
446 447 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 448 except Exception:
448 449 lr = None
449 450 self._err(lr, _(b"in manifest but not in changeset"), f)
450 451
451 452 progress.complete()
452 453
453 454 def _verifyfiles(self, filenodes, filelinkrevs):
454 455 repo = self.repo
455 456 ui = self.ui
456 457 lrugetctx = self.lrugetctx
457 458 revlogv1 = self.revlogv1
458 459 havemf = self.havemf
459 460 ui.status(_(b"checking files\n"))
460 461
461 462 storefiles = set()
462 for rl_type, f, f2, size in repo.store.datafiles():
463 if not f:
464 self._err(None, _(b"cannot decode filename '%s'") % f2)
465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
463 undecodable = []
464 for t, f, size in repo.store.datafiles(undecodable=undecodable):
465 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
466 466 storefiles.add(_normpath(f))
467 for f in undecodable:
468 self._err(None, _(b"cannot decode filename '%s'") % f)
467 469
468 470 state = {
469 471 # TODO this assumes revlog storage for changelog.
470 472 b'expectedversion': self.repo.changelog._format_version,
471 473 b'skipflags': self.skipflags,
472 474 # experimental config: censor.policy
473 475 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 476 }
475 477
476 478 files = sorted(set(filenodes) | set(filelinkrevs))
477 479 revisions = 0
478 480 progress = ui.makeprogress(
479 481 _(b'checking'), unit=_(b'files'), total=len(files)
480 482 )
481 483 for i, f in enumerate(files):
482 484 progress.update(i, item=f)
483 485 try:
484 486 linkrevs = filelinkrevs[f]
485 487 except KeyError:
486 488 # in manifest but not in changelog
487 489 linkrevs = []
488 490
489 491 if linkrevs:
490 492 lr = linkrevs[0]
491 493 else:
492 494 lr = None
493 495
494 496 try:
495 497 fl = repo.file(f)
496 498 except error.StorageError as e:
497 499 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 500 continue
499 501
500 502 for ff in fl.files():
501 503 try:
502 504 storefiles.remove(ff)
503 505 except KeyError:
504 506 if self.warnorphanstorefiles:
505 507 msg = _(b" warning: revlog '%s' not in fncache!")
506 508 self._warn(msg % ff)
507 509 self.fncachewarned = True
508 510
509 511 if not len(fl) and (self.havecl or self.havemf):
510 512 self._err(lr, _(b"empty or missing %s") % f)
511 513 else:
512 514 # Guard against implementations not setting this.
513 515 state[b'skipread'] = set()
514 516 state[b'safe_renamed'] = set()
515 517
516 518 for problem in fl.verifyintegrity(state):
517 519 if problem.node is not None:
518 520 linkrev = fl.linkrev(fl.rev(problem.node))
519 521 else:
520 522 linkrev = None
521 523
522 524 if problem.warning:
523 525 self._warn(problem.warning)
524 526 elif problem.error:
525 527 linkrev_msg = linkrev if linkrev is not None else lr
526 528 self._err(linkrev_msg, problem.error, f)
527 529 else:
528 530 raise error.ProgrammingError(
529 531 b'problem instance does not set warning or error '
530 532 b'attribute: %s' % problem.msg
531 533 )
532 534
533 535 seen = {}
534 536 for i in fl:
535 537 revisions += 1
536 538 n = fl.node(i)
537 539 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
538 540 if f in filenodes:
539 541 if havemf and n not in filenodes[f]:
540 542 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
541 543 else:
542 544 del filenodes[f][n]
543 545
544 546 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
545 547 continue
546 548
547 549 # check renames
548 550 try:
549 551 # This requires resolving fulltext (at least on revlogs,
550 552 # though not with LFS revisions). We may want
551 553 # ``verifyintegrity()`` to pass a set of nodes with
552 554 # rename metadata as an optimization.
553 555 rp = fl.renamed(n)
554 556 if rp:
555 557 if lr is not None and ui.verbose:
556 558 ctx = lrugetctx(lr)
557 559 if not any(rp[0] in pctx for pctx in ctx.parents()):
558 560 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
559 561 fl2 = repo.file(rp[0])
560 562 if not len(fl2):
561 563 m = _(b"empty or missing copy source revlog %s:%s")
562 564 self._err(lr, m % (rp[0], short(rp[1])), f)
563 565 elif rp[1] == self.repo.nullid:
564 566 msg = WARN_NULLID_COPY_SOURCE
565 567 msg %= (f, lr, rp[0], short(rp[1]))
566 568 ui.note(msg)
567 569 else:
568 570 fl2.rev(rp[1])
569 571 except Exception as inst:
570 572 self._exc(
571 573 lr, _(b"checking rename of %s") % short(n), inst, f
572 574 )
573 575
574 576 # cross-check
575 577 if f in filenodes:
576 578 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 579 for lr, node in sorted(fns):
578 580 msg = _(b"manifest refers to unknown revision %s")
579 581 self._err(lr, msg % short(node), f)
580 582 progress.complete()
581 583
582 584 if self.warnorphanstorefiles:
583 585 for f in sorted(storefiles):
584 586 self._warn(_(b"warning: orphan data file '%s'") % f)
585 587
586 588 return len(files), revisions
@@ -1,1613 +1,1613 b''
1 1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
2 2 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 3 #
4 4 # This software may be used and distributed according to the terms of the
5 5 # GNU General Public License version 2 or any later version.
6 6
7 7 from __future__ import absolute_import
8 8
9 9 import collections
10 10 import contextlib
11 11
12 12 from .i18n import _
13 13 from .node import hex
14 14 from . import (
15 15 discovery,
16 16 encoding,
17 17 error,
18 18 match as matchmod,
19 19 narrowspec,
20 20 pycompat,
21 21 streamclone,
22 22 templatefilters,
23 23 util,
24 24 wireprotoframing,
25 25 wireprototypes,
26 26 )
27 27 from .interfaces import util as interfaceutil
28 28 from .utils import (
29 29 cborutil,
30 30 hashutil,
31 31 stringutil,
32 32 )
33 33
34 34 FRAMINGTYPE = b'application/mercurial-exp-framing-0006'
35 35
36 36 HTTP_WIREPROTO_V2 = wireprototypes.HTTP_WIREPROTO_V2
37 37
38 38 COMMANDS = wireprototypes.commanddict()
39 39
40 40 # Value inserted into cache key computation function. Change the value to
41 41 # force new cache keys for every command request. This should be done when
42 42 # there is a change to how caching works, etc.
43 43 GLOBAL_CACHE_VERSION = 1
44 44
45 45
46 46 def handlehttpv2request(rctx, req, res, checkperm, urlparts):
47 47 from .hgweb import common as hgwebcommon
48 48
49 49 # URL space looks like: <permissions>/<command>, where <permission> can
50 50 # be ``ro`` or ``rw`` to signal read-only or read-write, respectively.
51 51
52 52 # Root URL does nothing meaningful... yet.
53 53 if not urlparts:
54 54 res.status = b'200 OK'
55 55 res.headers[b'Content-Type'] = b'text/plain'
56 56 res.setbodybytes(_(b'HTTP version 2 API handler'))
57 57 return
58 58
59 59 if len(urlparts) == 1:
60 60 res.status = b'404 Not Found'
61 61 res.headers[b'Content-Type'] = b'text/plain'
62 62 res.setbodybytes(
63 63 _(b'do not know how to process %s\n') % req.dispatchpath
64 64 )
65 65 return
66 66
67 67 permission, command = urlparts[0:2]
68 68
69 69 if permission not in (b'ro', b'rw'):
70 70 res.status = b'404 Not Found'
71 71 res.headers[b'Content-Type'] = b'text/plain'
72 72 res.setbodybytes(_(b'unknown permission: %s') % permission)
73 73 return
74 74
75 75 if req.method != b'POST':
76 76 res.status = b'405 Method Not Allowed'
77 77 res.headers[b'Allow'] = b'POST'
78 78 res.setbodybytes(_(b'commands require POST requests'))
79 79 return
80 80
81 81 # At some point we'll want to use our own API instead of recycling the
82 82 # behavior of version 1 of the wire protocol...
83 83 # TODO return reasonable responses - not responses that overload the
84 84 # HTTP status line message for error reporting.
85 85 try:
86 86 checkperm(rctx, req, b'pull' if permission == b'ro' else b'push')
87 87 except hgwebcommon.ErrorResponse as e:
88 88 res.status = hgwebcommon.statusmessage(
89 89 e.code, stringutil.forcebytestr(e)
90 90 )
91 91 for k, v in e.headers:
92 92 res.headers[k] = v
93 93 res.setbodybytes(b'permission denied')
94 94 return
95 95
96 96 # We have a special endpoint to reflect the request back at the client.
97 97 if command == b'debugreflect':
98 98 _processhttpv2reflectrequest(rctx.repo.ui, rctx.repo, req, res)
99 99 return
100 100
101 101 # Extra commands that we handle that aren't really wire protocol
102 102 # commands. Think extra hard before making this hackery available to
103 103 # extension.
104 104 extracommands = {b'multirequest'}
105 105
106 106 if command not in COMMANDS and command not in extracommands:
107 107 res.status = b'404 Not Found'
108 108 res.headers[b'Content-Type'] = b'text/plain'
109 109 res.setbodybytes(_(b'unknown wire protocol command: %s\n') % command)
110 110 return
111 111
112 112 repo = rctx.repo
113 113 ui = repo.ui
114 114
115 115 proto = httpv2protocolhandler(req, ui)
116 116
117 117 if (
118 118 not COMMANDS.commandavailable(command, proto)
119 119 and command not in extracommands
120 120 ):
121 121 res.status = b'404 Not Found'
122 122 res.headers[b'Content-Type'] = b'text/plain'
123 123 res.setbodybytes(_(b'invalid wire protocol command: %s') % command)
124 124 return
125 125
126 126 # TODO consider cases where proxies may add additional Accept headers.
127 127 if req.headers.get(b'Accept') != FRAMINGTYPE:
128 128 res.status = b'406 Not Acceptable'
129 129 res.headers[b'Content-Type'] = b'text/plain'
130 130 res.setbodybytes(
131 131 _(b'client MUST specify Accept header with value: %s\n')
132 132 % FRAMINGTYPE
133 133 )
134 134 return
135 135
136 136 if req.headers.get(b'Content-Type') != FRAMINGTYPE:
137 137 res.status = b'415 Unsupported Media Type'
138 138 # TODO we should send a response with appropriate media type,
139 139 # since client does Accept it.
140 140 res.headers[b'Content-Type'] = b'text/plain'
141 141 res.setbodybytes(
142 142 _(b'client MUST send Content-Type header with value: %s\n')
143 143 % FRAMINGTYPE
144 144 )
145 145 return
146 146
147 147 _processhttpv2request(ui, repo, req, res, permission, command, proto)
148 148
149 149
150 150 def _processhttpv2reflectrequest(ui, repo, req, res):
151 151 """Reads unified frame protocol request and dumps out state to client.
152 152
153 153 This special endpoint can be used to help debug the wire protocol.
154 154
155 155 Instead of routing the request through the normal dispatch mechanism,
156 156 we instead read all frames, decode them, and feed them into our state
157 157 tracker. We then dump the log of all that activity back out to the
158 158 client.
159 159 """
160 160 # Reflection APIs have a history of being abused, accidentally disclosing
161 161 # sensitive data, etc. So we have a config knob.
162 162 if not ui.configbool(b'experimental', b'web.api.debugreflect'):
163 163 res.status = b'404 Not Found'
164 164 res.headers[b'Content-Type'] = b'text/plain'
165 165 res.setbodybytes(_(b'debugreflect service not available'))
166 166 return
167 167
168 168 # We assume we have a unified framing protocol request body.
169 169
170 170 reactor = wireprotoframing.serverreactor(ui)
171 171 states = []
172 172
173 173 while True:
174 174 frame = wireprotoframing.readframe(req.bodyfh)
175 175
176 176 if not frame:
177 177 states.append(b'received: <no frame>')
178 178 break
179 179
180 180 states.append(
181 181 b'received: %d %d %d %s'
182 182 % (frame.typeid, frame.flags, frame.requestid, frame.payload)
183 183 )
184 184
185 185 action, meta = reactor.onframerecv(frame)
186 186 states.append(templatefilters.json((action, meta)))
187 187
188 188 action, meta = reactor.oninputeof()
189 189 meta[b'action'] = action
190 190 states.append(templatefilters.json(meta))
191 191
192 192 res.status = b'200 OK'
193 193 res.headers[b'Content-Type'] = b'text/plain'
194 194 res.setbodybytes(b'\n'.join(states))
195 195
196 196
197 197 def _processhttpv2request(ui, repo, req, res, authedperm, reqcommand, proto):
198 198 """Post-validation handler for HTTPv2 requests.
199 199
200 200 Called when the HTTP request contains unified frame-based protocol
201 201 frames for evaluation.
202 202 """
203 203 # TODO Some HTTP clients are full duplex and can receive data before
204 204 # the entire request is transmitted. Figure out a way to indicate support
205 205 # for that so we can opt into full duplex mode.
206 206 reactor = wireprotoframing.serverreactor(ui, deferoutput=True)
207 207 seencommand = False
208 208
209 209 outstream = None
210 210
211 211 while True:
212 212 frame = wireprotoframing.readframe(req.bodyfh)
213 213 if not frame:
214 214 break
215 215
216 216 action, meta = reactor.onframerecv(frame)
217 217
218 218 if action == b'wantframe':
219 219 # Need more data before we can do anything.
220 220 continue
221 221 elif action == b'runcommand':
222 222 # Defer creating output stream because we need to wait for
223 223 # protocol settings frames so proper encoding can be applied.
224 224 if not outstream:
225 225 outstream = reactor.makeoutputstream()
226 226
227 227 sentoutput = _httpv2runcommand(
228 228 ui,
229 229 repo,
230 230 req,
231 231 res,
232 232 authedperm,
233 233 reqcommand,
234 234 reactor,
235 235 outstream,
236 236 meta,
237 237 issubsequent=seencommand,
238 238 )
239 239
240 240 if sentoutput:
241 241 return
242 242
243 243 seencommand = True
244 244
245 245 elif action == b'error':
246 246 # TODO define proper error mechanism.
247 247 res.status = b'200 OK'
248 248 res.headers[b'Content-Type'] = b'text/plain'
249 249 res.setbodybytes(meta[b'message'] + b'\n')
250 250 return
251 251 else:
252 252 raise error.ProgrammingError(
253 253 b'unhandled action from frame processor: %s' % action
254 254 )
255 255
256 256 action, meta = reactor.oninputeof()
257 257 if action == b'sendframes':
258 258 # We assume we haven't started sending the response yet. If we're
259 259 # wrong, the response type will raise an exception.
260 260 res.status = b'200 OK'
261 261 res.headers[b'Content-Type'] = FRAMINGTYPE
262 262 res.setbodygen(meta[b'framegen'])
263 263 elif action == b'noop':
264 264 pass
265 265 else:
266 266 raise error.ProgrammingError(
267 267 b'unhandled action from frame processor: %s' % action
268 268 )
269 269
270 270
271 271 def _httpv2runcommand(
272 272 ui,
273 273 repo,
274 274 req,
275 275 res,
276 276 authedperm,
277 277 reqcommand,
278 278 reactor,
279 279 outstream,
280 280 command,
281 281 issubsequent,
282 282 ):
283 283 """Dispatch a wire protocol command made from HTTPv2 requests.
284 284
285 285 The authenticated permission (``authedperm``) along with the original
286 286 command from the URL (``reqcommand``) are passed in.
287 287 """
288 288 # We already validated that the session has permissions to perform the
289 289 # actions in ``authedperm``. In the unified frame protocol, the canonical
290 290 # command to run is expressed in a frame. However, the URL also requested
291 291 # to run a specific command. We need to be careful that the command we
292 292 # run doesn't have permissions requirements greater than what was granted
293 293 # by ``authedperm``.
294 294 #
295 295 # Our rule for this is we only allow one command per HTTP request and
296 296 # that command must match the command in the URL. However, we make
297 297 # an exception for the ``multirequest`` URL. This URL is allowed to
298 298 # execute multiple commands. We double check permissions of each command
299 299 # as it is invoked to ensure there is no privilege escalation.
300 300 # TODO consider allowing multiple commands to regular command URLs
301 301 # iff each command is the same.
302 302
303 303 proto = httpv2protocolhandler(req, ui, args=command[b'args'])
304 304
305 305 if reqcommand == b'multirequest':
306 306 if not COMMANDS.commandavailable(command[b'command'], proto):
307 307 # TODO proper error mechanism
308 308 res.status = b'200 OK'
309 309 res.headers[b'Content-Type'] = b'text/plain'
310 310 res.setbodybytes(
311 311 _(b'wire protocol command not available: %s')
312 312 % command[b'command']
313 313 )
314 314 return True
315 315
316 316 # TODO don't use assert here, since it may be elided by -O.
317 317 assert authedperm in (b'ro', b'rw')
318 318 wirecommand = COMMANDS[command[b'command']]
319 319 assert wirecommand.permission in (b'push', b'pull')
320 320
321 321 if authedperm == b'ro' and wirecommand.permission != b'pull':
322 322 # TODO proper error mechanism
323 323 res.status = b'403 Forbidden'
324 324 res.headers[b'Content-Type'] = b'text/plain'
325 325 res.setbodybytes(
326 326 _(b'insufficient permissions to execute command: %s')
327 327 % command[b'command']
328 328 )
329 329 return True
330 330
331 331 # TODO should we also call checkperm() here? Maybe not if we're going
332 332 # to overhaul that API. The granted scope from the URL check should
333 333 # be good enough.
334 334
335 335 else:
336 336 # Don't allow multiple commands outside of ``multirequest`` URL.
337 337 if issubsequent:
338 338 # TODO proper error mechanism
339 339 res.status = b'200 OK'
340 340 res.headers[b'Content-Type'] = b'text/plain'
341 341 res.setbodybytes(
342 342 _(b'multiple commands cannot be issued to this URL')
343 343 )
344 344 return True
345 345
346 346 if reqcommand != command[b'command']:
347 347 # TODO define proper error mechanism
348 348 res.status = b'200 OK'
349 349 res.headers[b'Content-Type'] = b'text/plain'
350 350 res.setbodybytes(_(b'command in frame must match command in URL'))
351 351 return True
352 352
353 353 res.status = b'200 OK'
354 354 res.headers[b'Content-Type'] = FRAMINGTYPE
355 355
356 356 try:
357 357 objs = dispatch(repo, proto, command[b'command'], command[b'redirect'])
358 358
359 359 action, meta = reactor.oncommandresponsereadyobjects(
360 360 outstream, command[b'requestid'], objs
361 361 )
362 362
363 363 except error.WireprotoCommandError as e:
364 364 action, meta = reactor.oncommanderror(
365 365 outstream, command[b'requestid'], e.message, e.messageargs
366 366 )
367 367
368 368 except Exception as e:
369 369 action, meta = reactor.onservererror(
370 370 outstream,
371 371 command[b'requestid'],
372 372 _(b'exception when invoking command: %s')
373 373 % stringutil.forcebytestr(e),
374 374 )
375 375
376 376 if action == b'sendframes':
377 377 res.setbodygen(meta[b'framegen'])
378 378 return True
379 379 elif action == b'noop':
380 380 return False
381 381 else:
382 382 raise error.ProgrammingError(
383 383 b'unhandled event from reactor: %s' % action
384 384 )
385 385
386 386
387 387 def getdispatchrepo(repo, proto, command):
388 388 viewconfig = repo.ui.config(b'server', b'view')
389 389 return repo.filtered(viewconfig)
390 390
391 391
392 392 def dispatch(repo, proto, command, redirect):
393 393 """Run a wire protocol command.
394 394
395 395 Returns an iterable of objects that will be sent to the client.
396 396 """
397 397 repo = getdispatchrepo(repo, proto, command)
398 398
399 399 entry = COMMANDS[command]
400 400 func = entry.func
401 401 spec = entry.args
402 402
403 403 args = proto.getargs(spec)
404 404
405 405 # There is some duplicate boilerplate code here for calling the command and
406 406 # emitting objects. It is either that or a lot of indented code that looks
407 407 # like a pyramid (since there are a lot of code paths that result in not
408 408 # using the cacher).
409 409 callcommand = lambda: func(repo, proto, **pycompat.strkwargs(args))
410 410
411 411 # Request is not cacheable. Don't bother instantiating a cacher.
412 412 if not entry.cachekeyfn:
413 413 for o in callcommand():
414 414 yield o
415 415 return
416 416
417 417 if redirect:
418 418 redirecttargets = redirect[b'targets']
419 419 redirecthashes = redirect[b'hashes']
420 420 else:
421 421 redirecttargets = []
422 422 redirecthashes = []
423 423
424 424 cacher = makeresponsecacher(
425 425 repo,
426 426 proto,
427 427 command,
428 428 args,
429 429 cborutil.streamencode,
430 430 redirecttargets=redirecttargets,
431 431 redirecthashes=redirecthashes,
432 432 )
433 433
434 434 # But we have no cacher. Do default handling.
435 435 if not cacher:
436 436 for o in callcommand():
437 437 yield o
438 438 return
439 439
440 440 with cacher:
441 441 cachekey = entry.cachekeyfn(
442 442 repo, proto, cacher, **pycompat.strkwargs(args)
443 443 )
444 444
445 445 # No cache key or the cacher doesn't like it. Do default handling.
446 446 if cachekey is None or not cacher.setcachekey(cachekey):
447 447 for o in callcommand():
448 448 yield o
449 449 return
450 450
451 451 # Serve it from the cache, if possible.
452 452 cached = cacher.lookup()
453 453
454 454 if cached:
455 455 for o in cached[b'objs']:
456 456 yield o
457 457 return
458 458
459 459 # Else call the command and feed its output into the cacher, allowing
460 460 # the cacher to buffer/mutate objects as it desires.
461 461 for o in callcommand():
462 462 for o in cacher.onobject(o):
463 463 yield o
464 464
465 465 for o in cacher.onfinished():
466 466 yield o
467 467
468 468
469 469 @interfaceutil.implementer(wireprototypes.baseprotocolhandler)
470 470 class httpv2protocolhandler(object):
471 471 def __init__(self, req, ui, args=None):
472 472 self._req = req
473 473 self._ui = ui
474 474 self._args = args
475 475
476 476 @property
477 477 def name(self):
478 478 return HTTP_WIREPROTO_V2
479 479
480 480 def getargs(self, args):
481 481 # First look for args that were passed but aren't registered on this
482 482 # command.
483 483 extra = set(self._args) - set(args)
484 484 if extra:
485 485 raise error.WireprotoCommandError(
486 486 b'unsupported argument to command: %s'
487 487 % b', '.join(sorted(extra))
488 488 )
489 489
490 490 # And look for required arguments that are missing.
491 491 missing = {a for a in args if args[a][b'required']} - set(self._args)
492 492
493 493 if missing:
494 494 raise error.WireprotoCommandError(
495 495 b'missing required arguments: %s' % b', '.join(sorted(missing))
496 496 )
497 497
498 498 # Now derive the arguments to pass to the command, taking into
499 499 # account the arguments specified by the client.
500 500 data = {}
501 501 for k, meta in sorted(args.items()):
502 502 # This argument wasn't passed by the client.
503 503 if k not in self._args:
504 504 data[k] = meta[b'default']()
505 505 continue
506 506
507 507 v = self._args[k]
508 508
509 509 # Sets may be expressed as lists. Silently normalize.
510 510 if meta[b'type'] == b'set' and isinstance(v, list):
511 511 v = set(v)
512 512
513 513 # TODO consider more/stronger type validation.
514 514
515 515 data[k] = v
516 516
517 517 return data
518 518
519 519 def getprotocaps(self):
520 520 # Protocol capabilities are currently not implemented for HTTP V2.
521 521 return set()
522 522
523 523 def getpayload(self):
524 524 raise NotImplementedError
525 525
526 526 @contextlib.contextmanager
527 527 def mayberedirectstdio(self):
528 528 raise NotImplementedError
529 529
530 530 def client(self):
531 531 raise NotImplementedError
532 532
533 533 def addcapabilities(self, repo, caps):
534 534 return caps
535 535
536 536 def checkperm(self, perm):
537 537 raise NotImplementedError
538 538
539 539
540 540 def httpv2apidescriptor(req, repo):
541 541 proto = httpv2protocolhandler(req, repo.ui)
542 542
543 543 return _capabilitiesv2(repo, proto)
544 544
545 545
546 546 def _capabilitiesv2(repo, proto):
547 547 """Obtain the set of capabilities for version 2 transports.
548 548
549 549 These capabilities are distinct from the capabilities for version 1
550 550 transports.
551 551 """
552 552 caps = {
553 553 b'commands': {},
554 554 b'framingmediatypes': [FRAMINGTYPE],
555 555 b'pathfilterprefixes': set(narrowspec.VALID_PREFIXES),
556 556 }
557 557
558 558 for command, entry in COMMANDS.items():
559 559 args = {}
560 560
561 561 for arg, meta in entry.args.items():
562 562 args[arg] = {
563 563 # TODO should this be a normalized type using CBOR's
564 564 # terminology?
565 565 b'type': meta[b'type'],
566 566 b'required': meta[b'required'],
567 567 }
568 568
569 569 if not meta[b'required']:
570 570 args[arg][b'default'] = meta[b'default']()
571 571
572 572 if meta[b'validvalues']:
573 573 args[arg][b'validvalues'] = meta[b'validvalues']
574 574
575 575 # TODO this type of check should be defined in a per-command callback.
576 576 if (
577 577 command == b'rawstorefiledata'
578 578 and not streamclone.allowservergeneration(repo)
579 579 ):
580 580 continue
581 581
582 582 caps[b'commands'][command] = {
583 583 b'args': args,
584 584 b'permissions': [entry.permission],
585 585 }
586 586
587 587 if entry.extracapabilitiesfn:
588 588 extracaps = entry.extracapabilitiesfn(repo, proto)
589 589 caps[b'commands'][command].update(extracaps)
590 590
591 591 caps[b'rawrepoformats'] = sorted(repo.requirements & repo.supportedformats)
592 592
593 593 targets = getadvertisedredirecttargets(repo, proto)
594 594 if targets:
595 595 caps[b'redirect'] = {
596 596 b'targets': [],
597 597 b'hashes': [b'sha256', b'sha1'],
598 598 }
599 599
600 600 for target in targets:
601 601 entry = {
602 602 b'name': target[b'name'],
603 603 b'protocol': target[b'protocol'],
604 604 b'uris': target[b'uris'],
605 605 }
606 606
607 607 for key in (b'snirequired', b'tlsversions'):
608 608 if key in target:
609 609 entry[key] = target[key]
610 610
611 611 caps[b'redirect'][b'targets'].append(entry)
612 612
613 613 return proto.addcapabilities(repo, caps)
614 614
615 615
616 616 def getadvertisedredirecttargets(repo, proto):
617 617 """Obtain a list of content redirect targets.
618 618
619 619 Returns a list containing potential redirect targets that will be
620 620 advertised in capabilities data. Each dict MUST have the following
621 621 keys:
622 622
623 623 name
624 624 The name of this redirect target. This is the identifier clients use
625 625 to refer to a target. It is transferred as part of every command
626 626 request.
627 627
628 628 protocol
629 629 Network protocol used by this target. Typically this is the string
630 630 in front of the ``://`` in a URL. e.g. ``https``.
631 631
632 632 uris
633 633 List of representative URIs for this target. Clients can use the
634 634 URIs to test parsing for compatibility or for ordering preference
635 635 for which target to use.
636 636
637 637 The following optional keys are recognized:
638 638
639 639 snirequired
640 640 Bool indicating if Server Name Indication (SNI) is required to
641 641 connect to this target.
642 642
643 643 tlsversions
644 644 List of bytes indicating which TLS versions are supported by this
645 645 target.
646 646
647 647 By default, clients reflect the target order advertised by servers
648 648 and servers will use the first client-advertised target when picking
649 649 a redirect target. So targets should be advertised in the order the
650 650 server prefers they be used.
651 651 """
652 652 return []
653 653
654 654
655 655 def wireprotocommand(
656 656 name,
657 657 args=None,
658 658 permission=b'push',
659 659 cachekeyfn=None,
660 660 extracapabilitiesfn=None,
661 661 ):
662 662 """Decorator to declare a wire protocol command.
663 663
664 664 ``name`` is the name of the wire protocol command being provided.
665 665
666 666 ``args`` is a dict defining arguments accepted by the command. Keys are
667 667 the argument name. Values are dicts with the following keys:
668 668
669 669 ``type``
670 670 The argument data type. Must be one of the following string
671 671 literals: ``bytes``, ``int``, ``list``, ``dict``, ``set``,
672 672 or ``bool``.
673 673
674 674 ``default``
675 675 A callable returning the default value for this argument. If not
676 676 specified, ``None`` will be the default value.
677 677
678 678 ``example``
679 679 An example value for this argument.
680 680
681 681 ``validvalues``
682 682 Set of recognized values for this argument.
683 683
684 684 ``permission`` defines the permission type needed to run this command.
685 685 Can be ``push`` or ``pull``. These roughly map to read-write and read-only,
686 686 respectively. Default is to assume command requires ``push`` permissions
687 687 because otherwise commands not declaring their permissions could modify
688 688 a repository that is supposed to be read-only.
689 689
690 690 ``cachekeyfn`` defines an optional callable that can derive the
691 691 cache key for this request.
692 692
693 693 ``extracapabilitiesfn`` defines an optional callable that defines extra
694 694 command capabilities/parameters that are advertised next to the command
695 695 in the capabilities data structure describing the server. The callable
696 696 receives as arguments the repository and protocol objects. It returns
697 697 a dict of extra fields to add to the command descriptor.
698 698
699 699 Wire protocol commands are generators of objects to be serialized and
700 700 sent to the client.
701 701
702 702 If a command raises an uncaught exception, this will be translated into
703 703 a command error.
704 704
705 705 All commands can opt in to being cacheable by defining a function
706 706 (``cachekeyfn``) that is called to derive a cache key. This function
707 707 receives the same arguments as the command itself plus a ``cacher``
708 708 argument containing the active cacher for the request and returns a bytes
709 709 containing the key in a cache the response to this command may be cached
710 710 under.
711 711 """
712 712 transports = {
713 713 k for k, v in wireprototypes.TRANSPORTS.items() if v[b'version'] == 2
714 714 }
715 715
716 716 if permission not in (b'push', b'pull'):
717 717 raise error.ProgrammingError(
718 718 b'invalid wire protocol permission; '
719 719 b'got %s; expected "push" or "pull"' % permission
720 720 )
721 721
722 722 if args is None:
723 723 args = {}
724 724
725 725 if not isinstance(args, dict):
726 726 raise error.ProgrammingError(
727 727 b'arguments for version 2 commands must be declared as dicts'
728 728 )
729 729
730 730 for arg, meta in args.items():
731 731 if arg == b'*':
732 732 raise error.ProgrammingError(
733 733 b'* argument name not allowed on version 2 commands'
734 734 )
735 735
736 736 if not isinstance(meta, dict):
737 737 raise error.ProgrammingError(
738 738 b'arguments for version 2 commands '
739 739 b'must declare metadata as a dict'
740 740 )
741 741
742 742 if b'type' not in meta:
743 743 raise error.ProgrammingError(
744 744 b'%s argument for command %s does not '
745 745 b'declare type field' % (arg, name)
746 746 )
747 747
748 748 if meta[b'type'] not in (
749 749 b'bytes',
750 750 b'int',
751 751 b'list',
752 752 b'dict',
753 753 b'set',
754 754 b'bool',
755 755 ):
756 756 raise error.ProgrammingError(
757 757 b'%s argument for command %s has '
758 758 b'illegal type: %s' % (arg, name, meta[b'type'])
759 759 )
760 760
761 761 if b'example' not in meta:
762 762 raise error.ProgrammingError(
763 763 b'%s argument for command %s does not '
764 764 b'declare example field' % (arg, name)
765 765 )
766 766
767 767 meta[b'required'] = b'default' not in meta
768 768
769 769 meta.setdefault(b'default', lambda: None)
770 770 meta.setdefault(b'validvalues', None)
771 771
772 772 def register(func):
773 773 if name in COMMANDS:
774 774 raise error.ProgrammingError(
775 775 b'%s command already registered for version 2' % name
776 776 )
777 777
778 778 COMMANDS[name] = wireprototypes.commandentry(
779 779 func,
780 780 args=args,
781 781 transports=transports,
782 782 permission=permission,
783 783 cachekeyfn=cachekeyfn,
784 784 extracapabilitiesfn=extracapabilitiesfn,
785 785 )
786 786
787 787 return func
788 788
789 789 return register
790 790
791 791
792 792 def makecommandcachekeyfn(command, localversion=None, allargs=False):
793 793 """Construct a cache key derivation function with common features.
794 794
795 795 By default, the cache key is a hash of:
796 796
797 797 * The command name.
798 798 * A global cache version number.
799 799 * A local cache version number (passed via ``localversion``).
800 800 * All the arguments passed to the command.
801 801 * The media type used.
802 802 * Wire protocol version string.
803 803 * The repository path.
804 804 """
805 805 if not allargs:
806 806 raise error.ProgrammingError(
807 807 b'only allargs=True is currently supported'
808 808 )
809 809
810 810 if localversion is None:
811 811 raise error.ProgrammingError(b'must set localversion argument value')
812 812
813 813 def cachekeyfn(repo, proto, cacher, **args):
814 814 spec = COMMANDS[command]
815 815
816 816 # Commands that mutate the repo can not be cached.
817 817 if spec.permission == b'push':
818 818 return None
819 819
820 820 # TODO config option to disable caching.
821 821
822 822 # Our key derivation strategy is to construct a data structure
823 823 # holding everything that could influence cacheability and to hash
824 824 # the CBOR representation of that. Using CBOR seems like it might
825 825 # be overkill. However, simpler hashing mechanisms are prone to
826 826 # duplicate input issues. e.g. if you just concatenate two values,
827 827 # "foo"+"bar" is identical to "fo"+"obar". Using CBOR provides
828 828 # "padding" between values and prevents these problems.
829 829
830 830 # Seed the hash with various data.
831 831 state = {
832 832 # To invalidate all cache keys.
833 833 b'globalversion': GLOBAL_CACHE_VERSION,
834 834 # More granular cache key invalidation.
835 835 b'localversion': localversion,
836 836 # Cache keys are segmented by command.
837 837 b'command': command,
838 838 # Throw in the media type and API version strings so changes
839 839 # to exchange semantics invalid cache.
840 840 b'mediatype': FRAMINGTYPE,
841 841 b'version': HTTP_WIREPROTO_V2,
842 842 # So same requests for different repos don't share cache keys.
843 843 b'repo': repo.root,
844 844 }
845 845
846 846 # The arguments passed to us will have already been normalized.
847 847 # Default values will be set, etc. This is important because it
848 848 # means that it doesn't matter if clients send an explicit argument
849 849 # or rely on the default value: it will all normalize to the same
850 850 # set of arguments on the server and therefore the same cache key.
851 851 #
852 852 # Arguments by their very nature must support being encoded to CBOR.
853 853 # And the CBOR encoder is deterministic. So we hash the arguments
854 854 # by feeding the CBOR of their representation into the hasher.
855 855 if allargs:
856 856 state[b'args'] = pycompat.byteskwargs(args)
857 857
858 858 cacher.adjustcachekeystate(state)
859 859
860 860 hasher = hashutil.sha1()
861 861 for chunk in cborutil.streamencode(state):
862 862 hasher.update(chunk)
863 863
864 864 return pycompat.sysbytes(hasher.hexdigest())
865 865
866 866 return cachekeyfn
867 867
868 868
869 869 def makeresponsecacher(
870 870 repo, proto, command, args, objencoderfn, redirecttargets, redirecthashes
871 871 ):
872 872 """Construct a cacher for a cacheable command.
873 873
874 874 Returns an ``iwireprotocolcommandcacher`` instance.
875 875
876 876 Extensions can monkeypatch this function to provide custom caching
877 877 backends.
878 878 """
879 879 return None
880 880
881 881
882 882 def resolvenodes(repo, revisions):
883 883 """Resolve nodes from a revisions specifier data structure."""
884 884 cl = repo.changelog
885 885 clhasnode = cl.hasnode
886 886
887 887 seen = set()
888 888 nodes = []
889 889
890 890 if not isinstance(revisions, list):
891 891 raise error.WireprotoCommandError(
892 892 b'revisions must be defined as an array'
893 893 )
894 894
895 895 for spec in revisions:
896 896 if b'type' not in spec:
897 897 raise error.WireprotoCommandError(
898 898 b'type key not present in revision specifier'
899 899 )
900 900
901 901 typ = spec[b'type']
902 902
903 903 if typ == b'changesetexplicit':
904 904 if b'nodes' not in spec:
905 905 raise error.WireprotoCommandError(
906 906 b'nodes key not present in changesetexplicit revision '
907 907 b'specifier'
908 908 )
909 909
910 910 for node in spec[b'nodes']:
911 911 if node not in seen:
912 912 nodes.append(node)
913 913 seen.add(node)
914 914
915 915 elif typ == b'changesetexplicitdepth':
916 916 for key in (b'nodes', b'depth'):
917 917 if key not in spec:
918 918 raise error.WireprotoCommandError(
919 919 b'%s key not present in changesetexplicitdepth revision '
920 920 b'specifier',
921 921 (key,),
922 922 )
923 923
924 924 for rev in repo.revs(
925 925 b'ancestors(%ln, %s)', spec[b'nodes'], spec[b'depth'] - 1
926 926 ):
927 927 node = cl.node(rev)
928 928
929 929 if node not in seen:
930 930 nodes.append(node)
931 931 seen.add(node)
932 932
933 933 elif typ == b'changesetdagrange':
934 934 for key in (b'roots', b'heads'):
935 935 if key not in spec:
936 936 raise error.WireprotoCommandError(
937 937 b'%s key not present in changesetdagrange revision '
938 938 b'specifier',
939 939 (key,),
940 940 )
941 941
942 942 if not spec[b'heads']:
943 943 raise error.WireprotoCommandError(
944 944 b'heads key in changesetdagrange cannot be empty'
945 945 )
946 946
947 947 if spec[b'roots']:
948 948 common = [n for n in spec[b'roots'] if clhasnode(n)]
949 949 else:
950 950 common = [repo.nullid]
951 951
952 952 for n in discovery.outgoing(repo, common, spec[b'heads']).missing:
953 953 if n not in seen:
954 954 nodes.append(n)
955 955 seen.add(n)
956 956
957 957 else:
958 958 raise error.WireprotoCommandError(
959 959 b'unknown revision specifier type: %s', (typ,)
960 960 )
961 961
962 962 return nodes
963 963
964 964
965 965 @wireprotocommand(b'branchmap', permission=b'pull')
966 966 def branchmapv2(repo, proto):
967 967 yield {
968 968 encoding.fromlocal(k): v
969 969 for k, v in pycompat.iteritems(repo.branchmap())
970 970 }
971 971
972 972
973 973 @wireprotocommand(b'capabilities', permission=b'pull')
974 974 def capabilitiesv2(repo, proto):
975 975 yield _capabilitiesv2(repo, proto)
976 976
977 977
978 978 @wireprotocommand(
979 979 b'changesetdata',
980 980 args={
981 981 b'revisions': {
982 982 b'type': b'list',
983 983 b'example': [
984 984 {
985 985 b'type': b'changesetexplicit',
986 986 b'nodes': [b'abcdef...'],
987 987 }
988 988 ],
989 989 },
990 990 b'fields': {
991 991 b'type': b'set',
992 992 b'default': set,
993 993 b'example': {b'parents', b'revision'},
994 994 b'validvalues': {b'bookmarks', b'parents', b'phase', b'revision'},
995 995 },
996 996 },
997 997 permission=b'pull',
998 998 )
999 999 def changesetdata(repo, proto, revisions, fields):
1000 1000 # TODO look for unknown fields and abort when they can't be serviced.
1001 1001 # This could probably be validated by dispatcher using validvalues.
1002 1002
1003 1003 cl = repo.changelog
1004 1004 outgoing = resolvenodes(repo, revisions)
1005 1005 publishing = repo.publishing()
1006 1006
1007 1007 if outgoing:
1008 1008 repo.hook(b'preoutgoing', throw=True, source=b'serve')
1009 1009
1010 1010 yield {
1011 1011 b'totalitems': len(outgoing),
1012 1012 }
1013 1013
1014 1014 # The phases of nodes already transferred to the client may have changed
1015 1015 # since the client last requested data. We send phase-only records
1016 1016 # for these revisions, if requested.
1017 1017 # TODO actually do this. We'll probably want to emit phase heads
1018 1018 # in the ancestry set of the outgoing revisions. This will ensure
1019 1019 # that phase updates within that set are seen.
1020 1020 if b'phase' in fields:
1021 1021 pass
1022 1022
1023 1023 nodebookmarks = {}
1024 1024 for mark, node in repo._bookmarks.items():
1025 1025 nodebookmarks.setdefault(node, set()).add(mark)
1026 1026
1027 1027 # It is already topologically sorted by revision number.
1028 1028 for node in outgoing:
1029 1029 d = {
1030 1030 b'node': node,
1031 1031 }
1032 1032
1033 1033 if b'parents' in fields:
1034 1034 d[b'parents'] = cl.parents(node)
1035 1035
1036 1036 if b'phase' in fields:
1037 1037 if publishing:
1038 1038 d[b'phase'] = b'public'
1039 1039 else:
1040 1040 ctx = repo[node]
1041 1041 d[b'phase'] = ctx.phasestr()
1042 1042
1043 1043 if b'bookmarks' in fields and node in nodebookmarks:
1044 1044 d[b'bookmarks'] = sorted(nodebookmarks[node])
1045 1045 del nodebookmarks[node]
1046 1046
1047 1047 followingmeta = []
1048 1048 followingdata = []
1049 1049
1050 1050 if b'revision' in fields:
1051 1051 revisiondata = cl.revision(node)
1052 1052 followingmeta.append((b'revision', len(revisiondata)))
1053 1053 followingdata.append(revisiondata)
1054 1054
1055 1055 # TODO make it possible for extensions to wrap a function or register
1056 1056 # a handler to service custom fields.
1057 1057
1058 1058 if followingmeta:
1059 1059 d[b'fieldsfollowing'] = followingmeta
1060 1060
1061 1061 yield d
1062 1062
1063 1063 for extra in followingdata:
1064 1064 yield extra
1065 1065
1066 1066 # If requested, send bookmarks from nodes that didn't have revision
1067 1067 # data sent so receiver is aware of any bookmark updates.
1068 1068 if b'bookmarks' in fields:
1069 1069 for node, marks in sorted(pycompat.iteritems(nodebookmarks)):
1070 1070 yield {
1071 1071 b'node': node,
1072 1072 b'bookmarks': sorted(marks),
1073 1073 }
1074 1074
1075 1075
1076 1076 class FileAccessError(Exception):
1077 1077 """Represents an error accessing a specific file."""
1078 1078
1079 1079 def __init__(self, path, msg, args):
1080 1080 self.path = path
1081 1081 self.msg = msg
1082 1082 self.args = args
1083 1083
1084 1084
1085 1085 def getfilestore(repo, proto, path):
1086 1086 """Obtain a file storage object for use with wire protocol.
1087 1087
1088 1088 Exists as a standalone function so extensions can monkeypatch to add
1089 1089 access control.
1090 1090 """
1091 1091 # This seems to work even if the file doesn't exist. So catch
1092 1092 # "empty" files and return an error.
1093 1093 fl = repo.file(path)
1094 1094
1095 1095 if not len(fl):
1096 1096 raise FileAccessError(path, b'unknown file: %s', (path,))
1097 1097
1098 1098 return fl
1099 1099
1100 1100
1101 1101 def emitfilerevisions(repo, path, revisions, linknodes, fields):
1102 1102 for revision in revisions:
1103 1103 d = {
1104 1104 b'node': revision.node,
1105 1105 }
1106 1106
1107 1107 if b'parents' in fields:
1108 1108 d[b'parents'] = [revision.p1node, revision.p2node]
1109 1109
1110 1110 if b'linknode' in fields:
1111 1111 d[b'linknode'] = linknodes[revision.node]
1112 1112
1113 1113 followingmeta = []
1114 1114 followingdata = []
1115 1115
1116 1116 if b'revision' in fields:
1117 1117 if revision.revision is not None:
1118 1118 followingmeta.append((b'revision', len(revision.revision)))
1119 1119 followingdata.append(revision.revision)
1120 1120 else:
1121 1121 d[b'deltabasenode'] = revision.basenode
1122 1122 followingmeta.append((b'delta', len(revision.delta)))
1123 1123 followingdata.append(revision.delta)
1124 1124
1125 1125 if followingmeta:
1126 1126 d[b'fieldsfollowing'] = followingmeta
1127 1127
1128 1128 yield d
1129 1129
1130 1130 for extra in followingdata:
1131 1131 yield extra
1132 1132
1133 1133
1134 1134 def makefilematcher(repo, pathfilter):
1135 1135 """Construct a matcher from a path filter dict."""
1136 1136
1137 1137 # Validate values.
1138 1138 if pathfilter:
1139 1139 for key in (b'include', b'exclude'):
1140 1140 for pattern in pathfilter.get(key, []):
1141 1141 if not pattern.startswith((b'path:', b'rootfilesin:')):
1142 1142 raise error.WireprotoCommandError(
1143 1143 b'%s pattern must begin with `path:` or `rootfilesin:`; '
1144 1144 b'got %s',
1145 1145 (key, pattern),
1146 1146 )
1147 1147
1148 1148 if pathfilter:
1149 1149 matcher = matchmod.match(
1150 1150 repo.root,
1151 1151 b'',
1152 1152 include=pathfilter.get(b'include', []),
1153 1153 exclude=pathfilter.get(b'exclude', []),
1154 1154 )
1155 1155 else:
1156 1156 matcher = matchmod.match(repo.root, b'')
1157 1157
1158 1158 # Requested patterns could include files not in the local store. So
1159 1159 # filter those out.
1160 1160 return repo.narrowmatch(matcher)
1161 1161
1162 1162
1163 1163 @wireprotocommand(
1164 1164 b'filedata',
1165 1165 args={
1166 1166 b'haveparents': {
1167 1167 b'type': b'bool',
1168 1168 b'default': lambda: False,
1169 1169 b'example': True,
1170 1170 },
1171 1171 b'nodes': {
1172 1172 b'type': b'list',
1173 1173 b'example': [b'0123456...'],
1174 1174 },
1175 1175 b'fields': {
1176 1176 b'type': b'set',
1177 1177 b'default': set,
1178 1178 b'example': {b'parents', b'revision'},
1179 1179 b'validvalues': {b'parents', b'revision', b'linknode'},
1180 1180 },
1181 1181 b'path': {
1182 1182 b'type': b'bytes',
1183 1183 b'example': b'foo.txt',
1184 1184 },
1185 1185 },
1186 1186 permission=b'pull',
1187 1187 # TODO censoring a file revision won't invalidate the cache.
1188 1188 # Figure out a way to take censoring into account when deriving
1189 1189 # the cache key.
1190 1190 cachekeyfn=makecommandcachekeyfn(b'filedata', 1, allargs=True),
1191 1191 )
1192 1192 def filedata(repo, proto, haveparents, nodes, fields, path):
1193 1193 # TODO this API allows access to file revisions that are attached to
1194 1194 # secret changesets. filesdata does not have this problem. Maybe this
1195 1195 # API should be deleted?
1196 1196
1197 1197 try:
1198 1198 # Extensions may wish to access the protocol handler.
1199 1199 store = getfilestore(repo, proto, path)
1200 1200 except FileAccessError as e:
1201 1201 raise error.WireprotoCommandError(e.msg, e.args)
1202 1202
1203 1203 clnode = repo.changelog.node
1204 1204 linknodes = {}
1205 1205
1206 1206 # Validate requested nodes.
1207 1207 for node in nodes:
1208 1208 try:
1209 1209 store.rev(node)
1210 1210 except error.LookupError:
1211 1211 raise error.WireprotoCommandError(
1212 1212 b'unknown file node: %s', (hex(node),)
1213 1213 )
1214 1214
1215 1215 # TODO by creating the filectx against a specific file revision
1216 1216 # instead of changeset, linkrev() is always used. This is wrong for
1217 1217 # cases where linkrev() may refer to a hidden changeset. But since this
1218 1218 # API doesn't know anything about changesets, we're not sure how to
1219 1219 # disambiguate the linknode. Perhaps we should delete this API?
1220 1220 fctx = repo.filectx(path, fileid=node)
1221 1221 linknodes[node] = clnode(fctx.introrev())
1222 1222
1223 1223 revisions = store.emitrevisions(
1224 1224 nodes,
1225 1225 revisiondata=b'revision' in fields,
1226 1226 assumehaveparentrevisions=haveparents,
1227 1227 )
1228 1228
1229 1229 yield {
1230 1230 b'totalitems': len(nodes),
1231 1231 }
1232 1232
1233 1233 for o in emitfilerevisions(repo, path, revisions, linknodes, fields):
1234 1234 yield o
1235 1235
1236 1236
1237 1237 def filesdatacapabilities(repo, proto):
1238 1238 batchsize = repo.ui.configint(
1239 1239 b'experimental', b'server.filesdata.recommended-batch-size'
1240 1240 )
1241 1241 return {
1242 1242 b'recommendedbatchsize': batchsize,
1243 1243 }
1244 1244
1245 1245
1246 1246 @wireprotocommand(
1247 1247 b'filesdata',
1248 1248 args={
1249 1249 b'haveparents': {
1250 1250 b'type': b'bool',
1251 1251 b'default': lambda: False,
1252 1252 b'example': True,
1253 1253 },
1254 1254 b'fields': {
1255 1255 b'type': b'set',
1256 1256 b'default': set,
1257 1257 b'example': {b'parents', b'revision'},
1258 1258 b'validvalues': {
1259 1259 b'firstchangeset',
1260 1260 b'linknode',
1261 1261 b'parents',
1262 1262 b'revision',
1263 1263 },
1264 1264 },
1265 1265 b'pathfilter': {
1266 1266 b'type': b'dict',
1267 1267 b'default': lambda: None,
1268 1268 b'example': {b'include': [b'path:tests']},
1269 1269 },
1270 1270 b'revisions': {
1271 1271 b'type': b'list',
1272 1272 b'example': [
1273 1273 {
1274 1274 b'type': b'changesetexplicit',
1275 1275 b'nodes': [b'abcdef...'],
1276 1276 }
1277 1277 ],
1278 1278 },
1279 1279 },
1280 1280 permission=b'pull',
1281 1281 # TODO censoring a file revision won't invalidate the cache.
1282 1282 # Figure out a way to take censoring into account when deriving
1283 1283 # the cache key.
1284 1284 cachekeyfn=makecommandcachekeyfn(b'filesdata', 1, allargs=True),
1285 1285 extracapabilitiesfn=filesdatacapabilities,
1286 1286 )
1287 1287 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions):
1288 1288 # TODO This should operate on a repo that exposes obsolete changesets. There
1289 1289 # is a race between a client making a push that obsoletes a changeset and
1290 1290 # another client fetching files data for that changeset. If a client has a
1291 1291 # changeset, it should probably be allowed to access files data for that
1292 1292 # changeset.
1293 1293
1294 1294 outgoing = resolvenodes(repo, revisions)
1295 1295 filematcher = makefilematcher(repo, pathfilter)
1296 1296
1297 1297 # path -> {fnode: linknode}
1298 1298 fnodes = collections.defaultdict(dict)
1299 1299
1300 1300 # We collect the set of relevant file revisions by iterating the changeset
1301 1301 # revisions and either walking the set of files recorded in the changeset
1302 1302 # or by walking the manifest at that revision. There is probably room for a
1303 1303 # storage-level API to request this data, as it can be expensive to compute
1304 1304 # and would benefit from caching or alternate storage from what revlogs
1305 1305 # provide.
1306 1306 for node in outgoing:
1307 1307 ctx = repo[node]
1308 1308 mctx = ctx.manifestctx()
1309 1309 md = mctx.read()
1310 1310
1311 1311 if haveparents:
1312 1312 checkpaths = ctx.files()
1313 1313 else:
1314 1314 checkpaths = md.keys()
1315 1315
1316 1316 for path in checkpaths:
1317 1317 fnode = md[path]
1318 1318
1319 1319 if path in fnodes and fnode in fnodes[path]:
1320 1320 continue
1321 1321
1322 1322 if not filematcher(path):
1323 1323 continue
1324 1324
1325 1325 fnodes[path].setdefault(fnode, node)
1326 1326
1327 1327 yield {
1328 1328 b'totalpaths': len(fnodes),
1329 1329 b'totalitems': sum(len(v) for v in fnodes.values()),
1330 1330 }
1331 1331
1332 1332 for path, filenodes in sorted(fnodes.items()):
1333 1333 try:
1334 1334 store = getfilestore(repo, proto, path)
1335 1335 except FileAccessError as e:
1336 1336 raise error.WireprotoCommandError(e.msg, e.args)
1337 1337
1338 1338 yield {
1339 1339 b'path': path,
1340 1340 b'totalitems': len(filenodes),
1341 1341 }
1342 1342
1343 1343 revisions = store.emitrevisions(
1344 1344 filenodes.keys(),
1345 1345 revisiondata=b'revision' in fields,
1346 1346 assumehaveparentrevisions=haveparents,
1347 1347 )
1348 1348
1349 1349 for o in emitfilerevisions(repo, path, revisions, filenodes, fields):
1350 1350 yield o
1351 1351
1352 1352
1353 1353 @wireprotocommand(
1354 1354 b'heads',
1355 1355 args={
1356 1356 b'publiconly': {
1357 1357 b'type': b'bool',
1358 1358 b'default': lambda: False,
1359 1359 b'example': False,
1360 1360 },
1361 1361 },
1362 1362 permission=b'pull',
1363 1363 )
1364 1364 def headsv2(repo, proto, publiconly):
1365 1365 if publiconly:
1366 1366 repo = repo.filtered(b'immutable')
1367 1367
1368 1368 yield repo.heads()
1369 1369
1370 1370
1371 1371 @wireprotocommand(
1372 1372 b'known',
1373 1373 args={
1374 1374 b'nodes': {
1375 1375 b'type': b'list',
1376 1376 b'default': list,
1377 1377 b'example': [b'deadbeef'],
1378 1378 },
1379 1379 },
1380 1380 permission=b'pull',
1381 1381 )
1382 1382 def knownv2(repo, proto, nodes):
1383 1383 result = b''.join(b'1' if n else b'0' for n in repo.known(nodes))
1384 1384 yield result
1385 1385
1386 1386
1387 1387 @wireprotocommand(
1388 1388 b'listkeys',
1389 1389 args={
1390 1390 b'namespace': {
1391 1391 b'type': b'bytes',
1392 1392 b'example': b'ns',
1393 1393 },
1394 1394 },
1395 1395 permission=b'pull',
1396 1396 )
1397 1397 def listkeysv2(repo, proto, namespace):
1398 1398 keys = repo.listkeys(encoding.tolocal(namespace))
1399 1399 keys = {
1400 1400 encoding.fromlocal(k): encoding.fromlocal(v)
1401 1401 for k, v in pycompat.iteritems(keys)
1402 1402 }
1403 1403
1404 1404 yield keys
1405 1405
1406 1406
1407 1407 @wireprotocommand(
1408 1408 b'lookup',
1409 1409 args={
1410 1410 b'key': {
1411 1411 b'type': b'bytes',
1412 1412 b'example': b'foo',
1413 1413 },
1414 1414 },
1415 1415 permission=b'pull',
1416 1416 )
1417 1417 def lookupv2(repo, proto, key):
1418 1418 key = encoding.tolocal(key)
1419 1419
1420 1420 # TODO handle exception.
1421 1421 node = repo.lookup(key)
1422 1422
1423 1423 yield node
1424 1424
1425 1425
1426 1426 def manifestdatacapabilities(repo, proto):
1427 1427 batchsize = repo.ui.configint(
1428 1428 b'experimental', b'server.manifestdata.recommended-batch-size'
1429 1429 )
1430 1430
1431 1431 return {
1432 1432 b'recommendedbatchsize': batchsize,
1433 1433 }
1434 1434
1435 1435
1436 1436 @wireprotocommand(
1437 1437 b'manifestdata',
1438 1438 args={
1439 1439 b'nodes': {
1440 1440 b'type': b'list',
1441 1441 b'example': [b'0123456...'],
1442 1442 },
1443 1443 b'haveparents': {
1444 1444 b'type': b'bool',
1445 1445 b'default': lambda: False,
1446 1446 b'example': True,
1447 1447 },
1448 1448 b'fields': {
1449 1449 b'type': b'set',
1450 1450 b'default': set,
1451 1451 b'example': {b'parents', b'revision'},
1452 1452 b'validvalues': {b'parents', b'revision'},
1453 1453 },
1454 1454 b'tree': {
1455 1455 b'type': b'bytes',
1456 1456 b'example': b'',
1457 1457 },
1458 1458 },
1459 1459 permission=b'pull',
1460 1460 cachekeyfn=makecommandcachekeyfn(b'manifestdata', 1, allargs=True),
1461 1461 extracapabilitiesfn=manifestdatacapabilities,
1462 1462 )
1463 1463 def manifestdata(repo, proto, haveparents, nodes, fields, tree):
1464 1464 store = repo.manifestlog.getstorage(tree)
1465 1465
1466 1466 # Validate the node is known and abort on unknown revisions.
1467 1467 for node in nodes:
1468 1468 try:
1469 1469 store.rev(node)
1470 1470 except error.LookupError:
1471 1471 raise error.WireprotoCommandError(b'unknown node: %s', (node,))
1472 1472
1473 1473 revisions = store.emitrevisions(
1474 1474 nodes,
1475 1475 revisiondata=b'revision' in fields,
1476 1476 assumehaveparentrevisions=haveparents,
1477 1477 )
1478 1478
1479 1479 yield {
1480 1480 b'totalitems': len(nodes),
1481 1481 }
1482 1482
1483 1483 for revision in revisions:
1484 1484 d = {
1485 1485 b'node': revision.node,
1486 1486 }
1487 1487
1488 1488 if b'parents' in fields:
1489 1489 d[b'parents'] = [revision.p1node, revision.p2node]
1490 1490
1491 1491 followingmeta = []
1492 1492 followingdata = []
1493 1493
1494 1494 if b'revision' in fields:
1495 1495 if revision.revision is not None:
1496 1496 followingmeta.append((b'revision', len(revision.revision)))
1497 1497 followingdata.append(revision.revision)
1498 1498 else:
1499 1499 d[b'deltabasenode'] = revision.basenode
1500 1500 followingmeta.append((b'delta', len(revision.delta)))
1501 1501 followingdata.append(revision.delta)
1502 1502
1503 1503 if followingmeta:
1504 1504 d[b'fieldsfollowing'] = followingmeta
1505 1505
1506 1506 yield d
1507 1507
1508 1508 for extra in followingdata:
1509 1509 yield extra
1510 1510
1511 1511
1512 1512 @wireprotocommand(
1513 1513 b'pushkey',
1514 1514 args={
1515 1515 b'namespace': {
1516 1516 b'type': b'bytes',
1517 1517 b'example': b'ns',
1518 1518 },
1519 1519 b'key': {
1520 1520 b'type': b'bytes',
1521 1521 b'example': b'key',
1522 1522 },
1523 1523 b'old': {
1524 1524 b'type': b'bytes',
1525 1525 b'example': b'old',
1526 1526 },
1527 1527 b'new': {
1528 1528 b'type': b'bytes',
1529 1529 b'example': b'new',
1530 1530 },
1531 1531 },
1532 1532 permission=b'push',
1533 1533 )
1534 1534 def pushkeyv2(repo, proto, namespace, key, old, new):
1535 1535 # TODO handle ui output redirection
1536 1536 yield repo.pushkey(
1537 1537 encoding.tolocal(namespace),
1538 1538 encoding.tolocal(key),
1539 1539 encoding.tolocal(old),
1540 1540 encoding.tolocal(new),
1541 1541 )
1542 1542
1543 1543
1544 1544 @wireprotocommand(
1545 1545 b'rawstorefiledata',
1546 1546 args={
1547 1547 b'files': {
1548 1548 b'type': b'list',
1549 1549 b'example': [b'changelog', b'manifestlog'],
1550 1550 },
1551 1551 b'pathfilter': {
1552 1552 b'type': b'list',
1553 1553 b'default': lambda: None,
1554 1554 b'example': {b'include': [b'path:tests']},
1555 1555 },
1556 1556 },
1557 1557 permission=b'pull',
1558 1558 )
1559 1559 def rawstorefiledata(repo, proto, files, pathfilter):
1560 1560 if not streamclone.allowservergeneration(repo):
1561 1561 raise error.WireprotoCommandError(b'stream clone is disabled')
1562 1562
1563 1563 # TODO support dynamically advertising what store files "sets" are
1564 1564 # available. For now, we support changelog, manifestlog, and files.
1565 1565 files = set(files)
1566 1566 allowedfiles = {b'changelog', b'manifestlog'}
1567 1567
1568 1568 unsupported = files - allowedfiles
1569 1569 if unsupported:
1570 1570 raise error.WireprotoCommandError(
1571 1571 b'unknown file type: %s', (b', '.join(sorted(unsupported)),)
1572 1572 )
1573 1573
1574 1574 with repo.lock():
1575 1575 topfiles = list(repo.store.topfiles())
1576 1576
1577 1577 sendfiles = []
1578 1578 totalsize = 0
1579 1579
1580 1580 # TODO this is a bunch of storage layer interface abstractions because
1581 1581 # it assumes revlogs.
1582 for rl_type, name, encodedname, size in topfiles:
1582 for rl_type, name, size in topfiles:
1583 1583 # XXX use the `rl_type` for that
1584 1584 if b'changelog' in files and name.startswith(b'00changelog'):
1585 1585 pass
1586 1586 elif b'manifestlog' in files and name.startswith(b'00manifest'):
1587 1587 pass
1588 1588 else:
1589 1589 continue
1590 1590
1591 1591 sendfiles.append((b'store', name, size))
1592 1592 totalsize += size
1593 1593
1594 1594 yield {
1595 1595 b'filecount': len(sendfiles),
1596 1596 b'totalsize': totalsize,
1597 1597 }
1598 1598
1599 1599 for location, name, size in sendfiles:
1600 1600 yield {
1601 1601 b'location': location,
1602 1602 b'path': name,
1603 1603 b'size': size,
1604 1604 }
1605 1605
1606 1606 # We have to use a closure for this to ensure the context manager is
1607 1607 # closed only after sending the final chunk.
1608 1608 def getfiledata():
1609 1609 with repo.svfs(name, b'rb', auditpath=False) as fh:
1610 1610 for chunk in util.filechunkiter(fh, limit=size):
1611 1611 yield chunk
1612 1612
1613 1613 yield wireprototypes.indefinitebytestringresponse(getfiledata())
@@ -1,741 +1,745 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullrev,
22 22 )
23 23 from mercurial.thirdparty import attr
24 24 from mercurial import (
25 25 ancestor,
26 26 bundlerepo,
27 27 error,
28 28 extensions,
29 29 localrepo,
30 30 mdiff,
31 31 pycompat,
32 32 revlog,
33 33 store,
34 34 verify,
35 35 )
36 36 from mercurial.interfaces import (
37 37 repository,
38 38 util as interfaceutil,
39 39 )
40 40 from mercurial.utils import (
41 41 cborutil,
42 42 storageutil,
43 43 )
44 44 from mercurial.revlogutils import flagutil
45 45
46 46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 48 # be specifying the version(s) of Mercurial they are tested with, or
49 49 # leave the attribute unspecified.
50 50 testedwith = b'ships-with-hg-core'
51 51
52 52 REQUIREMENT = b'testonly-simplestore'
53 53
54 54
55 55 def validatenode(node):
56 56 if isinstance(node, int):
57 57 raise ValueError('expected node; got int')
58 58
59 59 if len(node) != 20:
60 60 raise ValueError('expected 20 byte node')
61 61
62 62
63 63 def validaterev(rev):
64 64 if not isinstance(rev, int):
65 65 raise ValueError('expected int')
66 66
67 67
68 68 class simplestoreerror(error.StorageError):
69 69 pass
70 70
71 71
72 72 @interfaceutil.implementer(repository.irevisiondelta)
73 73 @attr.s(slots=True)
74 74 class simplestorerevisiondelta(object):
75 75 node = attr.ib()
76 76 p1node = attr.ib()
77 77 p2node = attr.ib()
78 78 basenode = attr.ib()
79 79 flags = attr.ib()
80 80 baserevisionsize = attr.ib()
81 81 revision = attr.ib()
82 82 delta = attr.ib()
83 83 linknode = attr.ib(default=None)
84 84
85 85
86 86 @interfaceutil.implementer(repository.iverifyproblem)
87 87 @attr.s(frozen=True)
88 88 class simplefilestoreproblem(object):
89 89 warning = attr.ib(default=None)
90 90 error = attr.ib(default=None)
91 91 node = attr.ib(default=None)
92 92
93 93
94 94 @interfaceutil.implementer(repository.ifilestorage)
95 95 class filestorage(object):
96 96 """Implements storage for a tracked path.
97 97
98 98 Data is stored in the VFS in a directory corresponding to the tracked
99 99 path.
100 100
101 101 Index data is stored in an ``index`` file using CBOR.
102 102
103 103 Fulltext data is stored in files having names of the node.
104 104 """
105 105
106 106 _flagserrorclass = simplestoreerror
107 107
108 108 def __init__(self, repo, svfs, path):
109 109 self.nullid = repo.nullid
110 110 self._repo = repo
111 111 self._svfs = svfs
112 112 self._path = path
113 113
114 114 self._storepath = b'/'.join([b'data', path])
115 115 self._indexpath = b'/'.join([self._storepath, b'index'])
116 116
117 117 indexdata = self._svfs.tryread(self._indexpath)
118 118 if indexdata:
119 119 indexdata = cborutil.decodeall(indexdata)
120 120
121 121 self._indexdata = indexdata or []
122 122 self._indexbynode = {}
123 123 self._indexbyrev = {}
124 124 self._index = []
125 125 self._refreshindex()
126 126
127 127 self._flagprocessors = dict(flagutil.flagprocessors)
128 128
129 129 def _refreshindex(self):
130 130 self._indexbynode.clear()
131 131 self._indexbyrev.clear()
132 132 self._index = []
133 133
134 134 for i, entry in enumerate(self._indexdata):
135 135 self._indexbynode[entry[b'node']] = entry
136 136 self._indexbyrev[i] = entry
137 137
138 138 self._indexbynode[self._repo.nullid] = {
139 139 b'node': self._repo.nullid,
140 140 b'p1': self._repo.nullid,
141 141 b'p2': self._repo.nullid,
142 142 b'linkrev': nullrev,
143 143 b'flags': 0,
144 144 }
145 145
146 146 self._indexbyrev[nullrev] = {
147 147 b'node': self._repo.nullid,
148 148 b'p1': self._repo.nullid,
149 149 b'p2': self._repo.nullid,
150 150 b'linkrev': nullrev,
151 151 b'flags': 0,
152 152 }
153 153
154 154 for i, entry in enumerate(self._indexdata):
155 155 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
156 156
157 157 # start, length, rawsize, chainbase, linkrev, p1, p2, node
158 158 self._index.append(
159 159 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
160 160 )
161 161
162 162 self._index.append((0, 0, 0, -1, -1, -1, -1, self._repo.nullid))
163 163
164 164 def __len__(self):
165 165 return len(self._indexdata)
166 166
167 167 def __iter__(self):
168 168 return iter(range(len(self)))
169 169
170 170 def revs(self, start=0, stop=None):
171 171 step = 1
172 172 if stop is not None:
173 173 if start > stop:
174 174 step = -1
175 175
176 176 stop += step
177 177 else:
178 178 stop = len(self)
179 179
180 180 return range(start, stop, step)
181 181
182 182 def parents(self, node):
183 183 validatenode(node)
184 184
185 185 if node not in self._indexbynode:
186 186 raise KeyError('unknown node')
187 187
188 188 entry = self._indexbynode[node]
189 189
190 190 return entry[b'p1'], entry[b'p2']
191 191
192 192 def parentrevs(self, rev):
193 193 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
194 194 return self.rev(p1), self.rev(p2)
195 195
196 196 def rev(self, node):
197 197 validatenode(node)
198 198
199 199 try:
200 200 self._indexbynode[node]
201 201 except KeyError:
202 202 raise error.LookupError(node, self._indexpath, _('no node'))
203 203
204 204 for rev, entry in self._indexbyrev.items():
205 205 if entry[b'node'] == node:
206 206 return rev
207 207
208 208 raise error.ProgrammingError(b'this should not occur')
209 209
210 210 def node(self, rev):
211 211 validaterev(rev)
212 212
213 213 return self._indexbyrev[rev][b'node']
214 214
215 215 def hasnode(self, node):
216 216 validatenode(node)
217 217 return node in self._indexbynode
218 218
219 219 def censorrevision(self, tr, censornode, tombstone=b''):
220 220 raise NotImplementedError('TODO')
221 221
222 222 def lookup(self, node):
223 223 if isinstance(node, int):
224 224 return self.node(node)
225 225
226 226 if len(node) == 20:
227 227 self.rev(node)
228 228 return node
229 229
230 230 try:
231 231 rev = int(node)
232 232 if '%d' % rev != node:
233 233 raise ValueError
234 234
235 235 if rev < 0:
236 236 rev = len(self) + rev
237 237 if rev < 0 or rev >= len(self):
238 238 raise ValueError
239 239
240 240 return self.node(rev)
241 241 except (ValueError, OverflowError):
242 242 pass
243 243
244 244 if len(node) == 40:
245 245 try:
246 246 rawnode = bin(node)
247 247 self.rev(rawnode)
248 248 return rawnode
249 249 except TypeError:
250 250 pass
251 251
252 252 raise error.LookupError(node, self._path, _('invalid lookup input'))
253 253
254 254 def linkrev(self, rev):
255 255 validaterev(rev)
256 256
257 257 return self._indexbyrev[rev][b'linkrev']
258 258
259 259 def _flags(self, rev):
260 260 validaterev(rev)
261 261
262 262 return self._indexbyrev[rev][b'flags']
263 263
264 264 def _candelta(self, baserev, rev):
265 265 validaterev(baserev)
266 266 validaterev(rev)
267 267
268 268 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
269 269 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
270 270 ):
271 271 return False
272 272
273 273 return True
274 274
275 275 def checkhash(self, text, node, p1=None, p2=None, rev=None):
276 276 if p1 is None and p2 is None:
277 277 p1, p2 = self.parents(node)
278 278 if node != storageutil.hashrevisionsha1(text, p1, p2):
279 279 raise simplestoreerror(
280 280 _("integrity check failed on %s") % self._path
281 281 )
282 282
283 283 def revision(self, nodeorrev, raw=False):
284 284 if isinstance(nodeorrev, int):
285 285 node = self.node(nodeorrev)
286 286 else:
287 287 node = nodeorrev
288 288 validatenode(node)
289 289
290 290 if node == self._repo.nullid:
291 291 return b''
292 292
293 293 rev = self.rev(node)
294 294 flags = self._flags(rev)
295 295
296 296 path = b'/'.join([self._storepath, hex(node)])
297 297 rawtext = self._svfs.read(path)
298 298
299 299 if raw:
300 300 validatehash = flagutil.processflagsraw(self, rawtext, flags)
301 301 text = rawtext
302 302 else:
303 303 r = flagutil.processflagsread(self, rawtext, flags)
304 304 text, validatehash = r
305 305 if validatehash:
306 306 self.checkhash(text, node, rev=rev)
307 307
308 308 return text
309 309
310 310 def rawdata(self, nodeorrev):
311 311 return self.revision(raw=True)
312 312
313 313 def read(self, node):
314 314 validatenode(node)
315 315
316 316 revision = self.revision(node)
317 317
318 318 if not revision.startswith(b'\1\n'):
319 319 return revision
320 320
321 321 start = revision.index(b'\1\n', 2)
322 322 return revision[start + 2 :]
323 323
324 324 def renamed(self, node):
325 325 validatenode(node)
326 326
327 327 if self.parents(node)[0] != self._repo.nullid:
328 328 return False
329 329
330 330 fulltext = self.revision(node)
331 331 m = storageutil.parsemeta(fulltext)[0]
332 332
333 333 if m and 'copy' in m:
334 334 return m['copy'], bin(m['copyrev'])
335 335
336 336 return False
337 337
338 338 def cmp(self, node, text):
339 339 validatenode(node)
340 340
341 341 t = text
342 342
343 343 if text.startswith(b'\1\n'):
344 344 t = b'\1\n\1\n' + text
345 345
346 346 p1, p2 = self.parents(node)
347 347
348 348 if storageutil.hashrevisionsha1(t, p1, p2) == node:
349 349 return False
350 350
351 351 if self.iscensored(self.rev(node)):
352 352 return text != b''
353 353
354 354 if self.renamed(node):
355 355 t2 = self.read(node)
356 356 return t2 != text
357 357
358 358 return True
359 359
360 360 def size(self, rev):
361 361 validaterev(rev)
362 362
363 363 node = self._indexbyrev[rev][b'node']
364 364
365 365 if self.renamed(node):
366 366 return len(self.read(node))
367 367
368 368 if self.iscensored(rev):
369 369 return 0
370 370
371 371 return len(self.revision(node))
372 372
373 373 def iscensored(self, rev):
374 374 validaterev(rev)
375 375
376 376 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
377 377
378 378 def commonancestorsheads(self, a, b):
379 379 validatenode(a)
380 380 validatenode(b)
381 381
382 382 a = self.rev(a)
383 383 b = self.rev(b)
384 384
385 385 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
386 386 return pycompat.maplist(self.node, ancestors)
387 387
388 388 def descendants(self, revs):
389 389 # This is a copy of revlog.descendants()
390 390 first = min(revs)
391 391 if first == nullrev:
392 392 for i in self:
393 393 yield i
394 394 return
395 395
396 396 seen = set(revs)
397 397 for i in self.revs(start=first + 1):
398 398 for x in self.parentrevs(i):
399 399 if x != nullrev and x in seen:
400 400 seen.add(i)
401 401 yield i
402 402 break
403 403
404 404 # Required by verify.
405 405 def files(self):
406 406 entries = self._svfs.listdir(self._storepath)
407 407
408 408 # Strip out undo.backup.* files created as part of transaction
409 409 # recording.
410 410 entries = [f for f in entries if not f.startswith('undo.backup.')]
411 411
412 412 return [b'/'.join((self._storepath, f)) for f in entries]
413 413
414 414 def storageinfo(
415 415 self,
416 416 exclusivefiles=False,
417 417 sharedfiles=False,
418 418 revisionscount=False,
419 419 trackedsize=False,
420 420 storedsize=False,
421 421 ):
422 422 # TODO do a real implementation of this
423 423 return {
424 424 'exclusivefiles': [],
425 425 'sharedfiles': [],
426 426 'revisionscount': len(self),
427 427 'trackedsize': 0,
428 428 'storedsize': None,
429 429 }
430 430
431 431 def verifyintegrity(self, state):
432 432 state['skipread'] = set()
433 433 for rev in self:
434 434 node = self.node(rev)
435 435 try:
436 436 self.revision(node)
437 437 except Exception as e:
438 438 yield simplefilestoreproblem(
439 439 error='unpacking %s: %s' % (node, e), node=node
440 440 )
441 441 state['skipread'].add(node)
442 442
443 443 def emitrevisions(
444 444 self,
445 445 nodes,
446 446 nodesorder=None,
447 447 revisiondata=False,
448 448 assumehaveparentrevisions=False,
449 449 deltamode=repository.CG_DELTAMODE_STD,
450 450 sidedata_helpers=None,
451 451 ):
452 452 # TODO this will probably break on some ordering options.
453 453 nodes = [n for n in nodes if n != self._repo.nullid]
454 454 if not nodes:
455 455 return
456 456 for delta in storageutil.emitrevisions(
457 457 self,
458 458 nodes,
459 459 nodesorder,
460 460 simplestorerevisiondelta,
461 461 revisiondata=revisiondata,
462 462 assumehaveparentrevisions=assumehaveparentrevisions,
463 463 deltamode=deltamode,
464 464 sidedata_helpers=sidedata_helpers,
465 465 ):
466 466 yield delta
467 467
468 468 def add(self, text, meta, transaction, linkrev, p1, p2):
469 469 if meta or text.startswith(b'\1\n'):
470 470 text = storageutil.packmeta(meta, text)
471 471
472 472 return self.addrevision(text, transaction, linkrev, p1, p2)
473 473
474 474 def addrevision(
475 475 self,
476 476 text,
477 477 transaction,
478 478 linkrev,
479 479 p1,
480 480 p2,
481 481 node=None,
482 482 flags=revlog.REVIDX_DEFAULT_FLAGS,
483 483 cachedelta=None,
484 484 ):
485 485 validatenode(p1)
486 486 validatenode(p2)
487 487
488 488 if flags:
489 489 node = node or storageutil.hashrevisionsha1(text, p1, p2)
490 490
491 491 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
492 492
493 493 node = node or storageutil.hashrevisionsha1(text, p1, p2)
494 494
495 495 if node in self._indexbynode:
496 496 return node
497 497
498 498 if validatehash:
499 499 self.checkhash(rawtext, node, p1=p1, p2=p2)
500 500
501 501 return self._addrawrevision(
502 502 node, rawtext, transaction, linkrev, p1, p2, flags
503 503 )
504 504
505 505 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
506 506 transaction.addbackup(self._indexpath)
507 507
508 508 path = b'/'.join([self._storepath, hex(node)])
509 509
510 510 self._svfs.write(path, rawtext)
511 511
512 512 self._indexdata.append(
513 513 {
514 514 b'node': node,
515 515 b'p1': p1,
516 516 b'p2': p2,
517 517 b'linkrev': link,
518 518 b'flags': flags,
519 519 }
520 520 )
521 521
522 522 self._reflectindexupdate()
523 523
524 524 return node
525 525
526 526 def _reflectindexupdate(self):
527 527 self._refreshindex()
528 528 self._svfs.write(
529 529 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
530 530 )
531 531
532 532 def addgroup(
533 533 self,
534 534 deltas,
535 535 linkmapper,
536 536 transaction,
537 537 addrevisioncb=None,
538 538 duplicaterevisioncb=None,
539 539 maybemissingparents=False,
540 540 ):
541 541 if maybemissingparents:
542 542 raise error.Abort(
543 543 _('simple store does not support missing parents ' 'write mode')
544 544 )
545 545
546 546 empty = True
547 547
548 548 transaction.addbackup(self._indexpath)
549 549
550 550 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
551 551 linkrev = linkmapper(linknode)
552 552 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
553 553
554 554 if node in self._indexbynode:
555 555 if duplicaterevisioncb:
556 556 duplicaterevisioncb(self, self.rev(node))
557 557 empty = False
558 558 continue
559 559
560 560 # Need to resolve the fulltext from the delta base.
561 561 if deltabase == self._repo.nullid:
562 562 text = mdiff.patch(b'', delta)
563 563 else:
564 564 text = mdiff.patch(self.revision(deltabase), delta)
565 565
566 566 rev = self._addrawrevision(
567 567 node, text, transaction, linkrev, p1, p2, flags
568 568 )
569 569
570 570 if addrevisioncb:
571 571 addrevisioncb(self, rev)
572 572 empty = False
573 573 return not empty
574 574
575 575 def _headrevs(self):
576 576 # Assume all revisions are heads by default.
577 577 revishead = {rev: True for rev in self._indexbyrev}
578 578
579 579 for rev, entry in self._indexbyrev.items():
580 580 # Unset head flag for all seen parents.
581 581 revishead[self.rev(entry[b'p1'])] = False
582 582 revishead[self.rev(entry[b'p2'])] = False
583 583
584 584 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
585 585
586 586 def heads(self, start=None, stop=None):
587 587 # This is copied from revlog.py.
588 588 if start is None and stop is None:
589 589 if not len(self):
590 590 return [self._repo.nullid]
591 591 return [self.node(r) for r in self._headrevs()]
592 592
593 593 if start is None:
594 594 start = self._repo.nullid
595 595 if stop is None:
596 596 stop = []
597 597 stoprevs = {self.rev(n) for n in stop}
598 598 startrev = self.rev(start)
599 599 reachable = {startrev}
600 600 heads = {startrev}
601 601
602 602 parentrevs = self.parentrevs
603 603 for r in self.revs(start=startrev + 1):
604 604 for p in parentrevs(r):
605 605 if p in reachable:
606 606 if r not in stoprevs:
607 607 reachable.add(r)
608 608 heads.add(r)
609 609 if p in heads and p not in stoprevs:
610 610 heads.remove(p)
611 611
612 612 return [self.node(r) for r in heads]
613 613
614 614 def children(self, node):
615 615 validatenode(node)
616 616
617 617 # This is a copy of revlog.children().
618 618 c = []
619 619 p = self.rev(node)
620 620 for r in self.revs(start=p + 1):
621 621 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
622 622 if prevs:
623 623 for pr in prevs:
624 624 if pr == p:
625 625 c.append(self.node(r))
626 626 elif p == nullrev:
627 627 c.append(self.node(r))
628 628 return c
629 629
630 630 def getstrippoint(self, minlink):
631 631 return storageutil.resolvestripinfo(
632 632 minlink,
633 633 len(self) - 1,
634 634 self._headrevs(),
635 635 self.linkrev,
636 636 self.parentrevs,
637 637 )
638 638
639 639 def strip(self, minlink, transaction):
640 640 if not len(self):
641 641 return
642 642
643 643 rev, _ignored = self.getstrippoint(minlink)
644 644 if rev == len(self):
645 645 return
646 646
647 647 # Purge index data starting at the requested revision.
648 648 self._indexdata[rev:] = []
649 649 self._reflectindexupdate()
650 650
651 651
652 652 def issimplestorefile(f, kind, st):
653 653 if kind != stat.S_IFREG:
654 654 return False
655 655
656 656 if store.isrevlog(f, kind, st):
657 657 return False
658 658
659 659 # Ignore transaction undo files.
660 660 if f.startswith('undo.'):
661 661 return False
662 662
663 663 # Otherwise assume it belongs to the simple store.
664 664 return True
665 665
666 666
667 667 class simplestore(store.encodedstore):
668 def datafiles(self):
668 def datafiles(self, undecodable=None):
669 669 for x in super(simplestore, self).datafiles():
670 670 yield x
671 671
672 672 # Supplement with non-revlog files.
673 673 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
674 674
675 for unencoded, encoded, size in extrafiles:
675 for f1, size in extrafiles:
676 676 try:
677 unencoded = store.decodefilename(unencoded)
677 f2 = store.decodefilename(f1)
678 678 except KeyError:
679 unencoded = None
679 if undecodable is None:
680 raise error.StorageError(b'undecodable revlog name %s' % f1)
681 else:
682 undecodable.append(f1)
683 continue
680 684
681 yield unencoded, encoded, size
685 yield f2, size
682 686
683 687
684 688 def reposetup(ui, repo):
685 689 if not repo.local():
686 690 return
687 691
688 692 if isinstance(repo, bundlerepo.bundlerepository):
689 693 raise error.Abort(_('cannot use simple store with bundlerepo'))
690 694
691 695 class simplestorerepo(repo.__class__):
692 696 def file(self, f):
693 697 return filestorage(repo, self.svfs, f)
694 698
695 699 repo.__class__ = simplestorerepo
696 700
697 701
698 702 def featuresetup(ui, supported):
699 703 supported.add(REQUIREMENT)
700 704
701 705
702 706 def newreporequirements(orig, ui, createopts):
703 707 """Modifies default requirements for new repos to use the simple store."""
704 708 requirements = orig(ui, createopts)
705 709
706 710 # These requirements are only used to affect creation of the store
707 711 # object. We have our own store. So we can remove them.
708 712 # TODO do this once we feel like taking the test hit.
709 713 # if 'fncache' in requirements:
710 714 # requirements.remove('fncache')
711 715 # if 'dotencode' in requirements:
712 716 # requirements.remove('dotencode')
713 717
714 718 requirements.add(REQUIREMENT)
715 719
716 720 return requirements
717 721
718 722
719 723 def makestore(orig, requirements, path, vfstype):
720 724 if REQUIREMENT not in requirements:
721 725 return orig(requirements, path, vfstype)
722 726
723 727 return simplestore(path, vfstype)
724 728
725 729
726 730 def verifierinit(orig, self, *args, **kwargs):
727 731 orig(self, *args, **kwargs)
728 732
729 733 # We don't care that files in the store don't align with what is
730 734 # advertised. So suppress these warnings.
731 735 self.warnorphanstorefiles = False
732 736
733 737
734 738 def extsetup(ui):
735 739 localrepo.featuresetupfuncs.add(featuresetup)
736 740
737 741 extensions.wrapfunction(
738 742 localrepo, 'newreporequirements', newreporequirements
739 743 )
740 744 extensions.wrapfunction(localrepo, 'makestore', makestore)
741 745 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now