##// END OF EJS Templates
revlog: remove legacy usage of `_generaldelta`...
marmoute -
r51939:f636103c default
parent child Browse files
Show More
@@ -1,4836 +1,4836 b''
1 1 # debugcommands.py - command processing for debug* commands
2 2 #
3 3 # Copyright 2005-2016 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import binascii
10 10 import codecs
11 11 import collections
12 12 import contextlib
13 13 import difflib
14 14 import errno
15 15 import glob
16 16 import operator
17 17 import os
18 18 import platform
19 19 import random
20 20 import re
21 21 import socket
22 22 import ssl
23 23 import stat
24 24 import subprocess
25 25 import sys
26 26 import time
27 27
28 28 from .i18n import _
29 29 from .node import (
30 30 bin,
31 31 hex,
32 32 nullrev,
33 33 short,
34 34 )
35 35 from .pycompat import (
36 36 open,
37 37 )
38 38 from . import (
39 39 bundle2,
40 40 bundlerepo,
41 41 changegroup,
42 42 cmdutil,
43 43 color,
44 44 context,
45 45 copies,
46 46 dagparser,
47 47 dirstateutils,
48 48 encoding,
49 49 error,
50 50 exchange,
51 51 extensions,
52 52 filelog,
53 53 filemerge,
54 54 filesetlang,
55 55 formatter,
56 56 hg,
57 57 httppeer,
58 58 localrepo,
59 59 lock as lockmod,
60 60 logcmdutil,
61 61 manifest,
62 62 mergestate as mergestatemod,
63 63 metadata,
64 64 obsolete,
65 65 obsutil,
66 66 pathutil,
67 67 phases,
68 68 policy,
69 69 pvec,
70 70 pycompat,
71 71 registrar,
72 72 repair,
73 73 repoview,
74 74 requirements,
75 75 revlog,
76 76 revset,
77 77 revsetlang,
78 78 scmutil,
79 79 setdiscovery,
80 80 simplemerge,
81 81 sshpeer,
82 82 sslutil,
83 83 streamclone,
84 84 strip,
85 85 tags as tagsmod,
86 86 templater,
87 87 treediscovery,
88 88 upgrade,
89 89 url as urlmod,
90 90 util,
91 91 verify,
92 92 vfs as vfsmod,
93 93 wireprotoframing,
94 94 wireprotoserver,
95 95 )
96 96 from .interfaces import repository
97 97 from .stabletailgraph import stabletailsort
98 98 from .utils import (
99 99 cborutil,
100 100 compression,
101 101 dateutil,
102 102 procutil,
103 103 stringutil,
104 104 urlutil,
105 105 )
106 106
107 107 from .revlogutils import (
108 108 constants as revlog_constants,
109 109 debug as revlog_debug,
110 110 deltas as deltautil,
111 111 nodemap,
112 112 rewrite,
113 113 sidedata,
114 114 )
115 115
116 116 release = lockmod.release
117 117
118 118 table = {}
119 119 table.update(strip.command._table)
120 120 command = registrar.command(table)
121 121
122 122
123 123 @command(b'debugancestor', [], _(b'[INDEX] REV1 REV2'), optionalrepo=True)
124 124 def debugancestor(ui, repo, *args):
125 125 """find the ancestor revision of two revisions in a given index"""
126 126 if len(args) == 3:
127 127 index, rev1, rev2 = args
128 128 r = revlog.revlog(vfsmod.vfs(encoding.getcwd(), audit=False), index)
129 129 lookup = r.lookup
130 130 elif len(args) == 2:
131 131 if not repo:
132 132 raise error.Abort(
133 133 _(b'there is no Mercurial repository here (.hg not found)')
134 134 )
135 135 rev1, rev2 = args
136 136 r = repo.changelog
137 137 lookup = repo.lookup
138 138 else:
139 139 raise error.Abort(_(b'either two or three arguments required'))
140 140 a = r.ancestor(lookup(rev1), lookup(rev2))
141 141 ui.write(b'%d:%s\n' % (r.rev(a), hex(a)))
142 142
143 143
144 144 @command(b'debugantivirusrunning', [])
145 145 def debugantivirusrunning(ui, repo):
146 146 """attempt to trigger an antivirus scanner to see if one is active"""
147 147 with repo.cachevfs.open('eicar-test-file.com', b'wb') as f:
148 148 f.write(
149 149 util.b85decode(
150 150 # This is a base85-armored version of the EICAR test file. See
151 151 # https://en.wikipedia.org/wiki/EICAR_test_file for details.
152 152 b'ST#=}P$fV?P+K%yP+C|uG$>GBDK|qyDK~v2MM*<JQY}+dK~6+LQba95P'
153 153 b'E<)&Nm5l)EmTEQR4qnHOhq9iNGnJx'
154 154 )
155 155 )
156 156 # Give an AV engine time to scan the file.
157 157 time.sleep(2)
158 158 util.unlink(repo.cachevfs.join('eicar-test-file.com'))
159 159
160 160
161 161 @command(b'debugapplystreamclonebundle', [], b'FILE')
162 162 def debugapplystreamclonebundle(ui, repo, fname):
163 163 """apply a stream clone bundle file"""
164 164 f = hg.openpath(ui, fname)
165 165 gen = exchange.readbundle(ui, f, fname)
166 166 gen.apply(repo)
167 167
168 168
169 169 @command(
170 170 b'debugbuilddag',
171 171 [
172 172 (
173 173 b'm',
174 174 b'mergeable-file',
175 175 None,
176 176 _(b'add single file mergeable changes'),
177 177 ),
178 178 (
179 179 b'o',
180 180 b'overwritten-file',
181 181 None,
182 182 _(b'add single file all revs overwrite'),
183 183 ),
184 184 (b'n', b'new-file', None, _(b'add new file at each rev')),
185 185 (
186 186 b'',
187 187 b'from-existing',
188 188 None,
189 189 _(b'continue from a non-empty repository'),
190 190 ),
191 191 ],
192 192 _(b'[OPTION]... [TEXT]'),
193 193 )
194 194 def debugbuilddag(
195 195 ui,
196 196 repo,
197 197 text=None,
198 198 mergeable_file=False,
199 199 overwritten_file=False,
200 200 new_file=False,
201 201 from_existing=False,
202 202 ):
203 203 """builds a repo with a given DAG from scratch in the current empty repo
204 204
205 205 The description of the DAG is read from stdin if not given on the
206 206 command line.
207 207
208 208 Elements:
209 209
210 210 - "+n" is a linear run of n nodes based on the current default parent
211 211 - "." is a single node based on the current default parent
212 212 - "$" resets the default parent to null (implied at the start);
213 213 otherwise the default parent is always the last node created
214 214 - "<p" sets the default parent to the backref p
215 215 - "*p" is a fork at parent p, which is a backref
216 216 - "*p1/p2" is a merge of parents p1 and p2, which are backrefs
217 217 - "/p2" is a merge of the preceding node and p2
218 218 - ":tag" defines a local tag for the preceding node
219 219 - "@branch" sets the named branch for subsequent nodes
220 220 - "#...\\n" is a comment up to the end of the line
221 221
222 222 Whitespace between the above elements is ignored.
223 223
224 224 A backref is either
225 225
226 226 - a number n, which references the node curr-n, where curr is the current
227 227 node, or
228 228 - the name of a local tag you placed earlier using ":tag", or
229 229 - empty to denote the default parent.
230 230
231 231 All string valued-elements are either strictly alphanumeric, or must
232 232 be enclosed in double quotes ("..."), with "\\" as escape character.
233 233 """
234 234
235 235 if text is None:
236 236 ui.status(_(b"reading DAG from stdin\n"))
237 237 text = ui.fin.read()
238 238
239 239 cl = repo.changelog
240 240 if len(cl) > 0 and not from_existing:
241 241 raise error.Abort(_(b'repository is not empty'))
242 242
243 243 # determine number of revs in DAG
244 244 total = 0
245 245 for type, data in dagparser.parsedag(text):
246 246 if type == b'n':
247 247 total += 1
248 248
249 249 if mergeable_file:
250 250 linesperrev = 2
251 251 # make a file with k lines per rev
252 252 initialmergedlines = [b'%d' % i for i in range(0, total * linesperrev)]
253 253 initialmergedlines.append(b"")
254 254
255 255 tags = []
256 256 progress = ui.makeprogress(
257 257 _(b'building'), unit=_(b'revisions'), total=total
258 258 )
259 259 with progress, repo.wlock(), repo.lock(), repo.transaction(b"builddag"):
260 260 at = -1
261 261 atbranch = b'default'
262 262 nodeids = []
263 263 id = 0
264 264 progress.update(id)
265 265 for type, data in dagparser.parsedag(text):
266 266 if type == b'n':
267 267 ui.note((b'node %s\n' % pycompat.bytestr(data)))
268 268 id, ps = data
269 269
270 270 files = []
271 271 filecontent = {}
272 272
273 273 p2 = None
274 274 if mergeable_file:
275 275 fn = b"mf"
276 276 p1 = repo[ps[0]]
277 277 if len(ps) > 1:
278 278 p2 = repo[ps[1]]
279 279 pa = p1.ancestor(p2)
280 280 base, local, other = [
281 281 x[fn].data() for x in (pa, p1, p2)
282 282 ]
283 283 m3 = simplemerge.Merge3Text(base, local, other)
284 284 ml = [
285 285 l.strip()
286 286 for l in simplemerge.render_minimized(m3)[0]
287 287 ]
288 288 ml.append(b"")
289 289 elif at > 0:
290 290 ml = p1[fn].data().split(b"\n")
291 291 else:
292 292 ml = initialmergedlines
293 293 ml[id * linesperrev] += b" r%i" % id
294 294 mergedtext = b"\n".join(ml)
295 295 files.append(fn)
296 296 filecontent[fn] = mergedtext
297 297
298 298 if overwritten_file:
299 299 fn = b"of"
300 300 files.append(fn)
301 301 filecontent[fn] = b"r%i\n" % id
302 302
303 303 if new_file:
304 304 fn = b"nf%i" % id
305 305 files.append(fn)
306 306 filecontent[fn] = b"r%i\n" % id
307 307 if len(ps) > 1:
308 308 if not p2:
309 309 p2 = repo[ps[1]]
310 310 for fn in p2:
311 311 if fn.startswith(b"nf"):
312 312 files.append(fn)
313 313 filecontent[fn] = p2[fn].data()
314 314
315 315 def fctxfn(repo, cx, path):
316 316 if path in filecontent:
317 317 return context.memfilectx(
318 318 repo, cx, path, filecontent[path]
319 319 )
320 320 return None
321 321
322 322 if len(ps) == 0 or ps[0] < 0:
323 323 pars = [None, None]
324 324 elif len(ps) == 1:
325 325 pars = [nodeids[ps[0]], None]
326 326 else:
327 327 pars = [nodeids[p] for p in ps]
328 328 cx = context.memctx(
329 329 repo,
330 330 pars,
331 331 b"r%i" % id,
332 332 files,
333 333 fctxfn,
334 334 date=(id, 0),
335 335 user=b"debugbuilddag",
336 336 extra={b'branch': atbranch},
337 337 )
338 338 nodeid = repo.commitctx(cx)
339 339 nodeids.append(nodeid)
340 340 at = id
341 341 elif type == b'l':
342 342 id, name = data
343 343 ui.note((b'tag %s\n' % name))
344 344 tags.append(b"%s %s\n" % (hex(repo.changelog.node(id)), name))
345 345 elif type == b'a':
346 346 ui.note((b'branch %s\n' % data))
347 347 atbranch = data
348 348 progress.update(id)
349 349
350 350 if tags:
351 351 repo.vfs.write(b"localtags", b"".join(tags))
352 352
353 353
354 354 def _debugchangegroup(ui, gen, all=None, indent=0, **opts):
355 355 indent_string = b' ' * indent
356 356 if all:
357 357 ui.writenoi18n(
358 358 b"%sformat: id, p1, p2, cset, delta base, len(delta)\n"
359 359 % indent_string
360 360 )
361 361
362 362 def showchunks(named):
363 363 ui.write(b"\n%s%s\n" % (indent_string, named))
364 364 for deltadata in gen.deltaiter():
365 365 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
366 366 ui.write(
367 367 b"%s%s %s %s %s %s %d\n"
368 368 % (
369 369 indent_string,
370 370 hex(node),
371 371 hex(p1),
372 372 hex(p2),
373 373 hex(cs),
374 374 hex(deltabase),
375 375 len(delta),
376 376 )
377 377 )
378 378
379 379 gen.changelogheader()
380 380 showchunks(b"changelog")
381 381 gen.manifestheader()
382 382 showchunks(b"manifest")
383 383 for chunkdata in iter(gen.filelogheader, {}):
384 384 fname = chunkdata[b'filename']
385 385 showchunks(fname)
386 386 else:
387 387 if isinstance(gen, bundle2.unbundle20):
388 388 raise error.Abort(_(b'use debugbundle2 for this file'))
389 389 gen.changelogheader()
390 390 for deltadata in gen.deltaiter():
391 391 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
392 392 ui.write(b"%s%s\n" % (indent_string, hex(node)))
393 393
394 394
395 395 def _debugobsmarkers(ui, part, indent=0, **opts):
396 396 """display version and markers contained in 'data'"""
397 397 data = part.read()
398 398 indent_string = b' ' * indent
399 399 try:
400 400 version, markers = obsolete._readmarkers(data)
401 401 except error.UnknownVersion as exc:
402 402 msg = b"%sunsupported version: %s (%d bytes)\n"
403 403 msg %= indent_string, exc.version, len(data)
404 404 ui.write(msg)
405 405 else:
406 406 msg = b"%sversion: %d (%d bytes)\n"
407 407 msg %= indent_string, version, len(data)
408 408 ui.write(msg)
409 409 fm = ui.formatter(b'debugobsolete', pycompat.byteskwargs(opts))
410 410 for rawmarker in sorted(markers):
411 411 m = obsutil.marker(None, rawmarker)
412 412 fm.startitem()
413 413 fm.plain(indent_string)
414 414 cmdutil.showmarker(fm, m)
415 415 fm.end()
416 416
417 417
418 418 def _debugphaseheads(ui, data, indent=0):
419 419 """display version and markers contained in 'data'"""
420 420 indent_string = b' ' * indent
421 421 headsbyphase = phases.binarydecode(data)
422 422 for phase in phases.allphases:
423 423 for head in headsbyphase[phase]:
424 424 ui.write(indent_string)
425 425 ui.write(b'%s %s\n' % (hex(head), phases.phasenames[phase]))
426 426
427 427
428 428 def _quasirepr(thing):
429 429 if isinstance(thing, (dict, util.sortdict, collections.OrderedDict)):
430 430 return b'{%s}' % (
431 431 b', '.join(b'%s: %s' % (k, thing[k]) for k in sorted(thing))
432 432 )
433 433 return pycompat.bytestr(repr(thing))
434 434
435 435
436 436 def _debugbundle2(ui, gen, all=None, **opts):
437 437 """lists the contents of a bundle2"""
438 438 if not isinstance(gen, bundle2.unbundle20):
439 439 raise error.Abort(_(b'not a bundle2 file'))
440 440 ui.write((b'Stream params: %s\n' % _quasirepr(gen.params)))
441 441 parttypes = opts.get('part_type', [])
442 442 for part in gen.iterparts():
443 443 if parttypes and part.type not in parttypes:
444 444 continue
445 445 msg = b'%s -- %s (mandatory: %r)\n'
446 446 ui.write((msg % (part.type, _quasirepr(part.params), part.mandatory)))
447 447 if part.type == b'changegroup':
448 448 version = part.params.get(b'version', b'01')
449 449 cg = changegroup.getunbundler(version, part, b'UN')
450 450 if not ui.quiet:
451 451 _debugchangegroup(ui, cg, all=all, indent=4, **opts)
452 452 if part.type == b'obsmarkers':
453 453 if not ui.quiet:
454 454 _debugobsmarkers(ui, part, indent=4, **opts)
455 455 if part.type == b'phase-heads':
456 456 if not ui.quiet:
457 457 _debugphaseheads(ui, part, indent=4)
458 458
459 459
460 460 @command(
461 461 b'debugbundle',
462 462 [
463 463 (b'a', b'all', None, _(b'show all details')),
464 464 (b'', b'part-type', [], _(b'show only the named part type')),
465 465 (b'', b'spec', None, _(b'print the bundlespec of the bundle')),
466 466 ],
467 467 _(b'FILE'),
468 468 norepo=True,
469 469 )
470 470 def debugbundle(ui, bundlepath, all=None, spec=None, **opts):
471 471 """lists the contents of a bundle"""
472 472 with hg.openpath(ui, bundlepath) as f:
473 473 if spec:
474 474 spec = exchange.getbundlespec(ui, f)
475 475 ui.write(b'%s\n' % spec)
476 476 return
477 477
478 478 gen = exchange.readbundle(ui, f, bundlepath)
479 479 if isinstance(gen, bundle2.unbundle20):
480 480 return _debugbundle2(ui, gen, all=all, **opts)
481 481 _debugchangegroup(ui, gen, all=all, **opts)
482 482
483 483
484 484 @command(b'debugcapabilities', [], _(b'PATH'), norepo=True)
485 485 def debugcapabilities(ui, path, **opts):
486 486 """lists the capabilities of a remote peer"""
487 487 peer = hg.peer(ui, pycompat.byteskwargs(opts), path)
488 488 try:
489 489 caps = peer.capabilities()
490 490 ui.writenoi18n(b'Main capabilities:\n')
491 491 for c in sorted(caps):
492 492 ui.write(b' %s\n' % c)
493 493 b2caps = bundle2.bundle2caps(peer)
494 494 if b2caps:
495 495 ui.writenoi18n(b'Bundle2 capabilities:\n')
496 496 for key, values in sorted(b2caps.items()):
497 497 ui.write(b' %s\n' % key)
498 498 for v in values:
499 499 ui.write(b' %s\n' % v)
500 500 finally:
501 501 peer.close()
502 502
503 503
504 504 @command(
505 505 b'debugchangedfiles',
506 506 [
507 507 (
508 508 b'',
509 509 b'compute',
510 510 False,
511 511 b"compute information instead of reading it from storage",
512 512 ),
513 513 ],
514 514 b'REV',
515 515 )
516 516 def debugchangedfiles(ui, repo, rev, **opts):
517 517 """list the stored files changes for a revision"""
518 518 ctx = logcmdutil.revsingle(repo, rev, None)
519 519 files = None
520 520
521 521 if opts['compute']:
522 522 files = metadata.compute_all_files_changes(ctx)
523 523 else:
524 524 sd = repo.changelog.sidedata(ctx.rev())
525 525 files_block = sd.get(sidedata.SD_FILES)
526 526 if files_block is not None:
527 527 files = metadata.decode_files_sidedata(sd)
528 528 if files is not None:
529 529 for f in sorted(files.touched):
530 530 if f in files.added:
531 531 action = b"added"
532 532 elif f in files.removed:
533 533 action = b"removed"
534 534 elif f in files.merged:
535 535 action = b"merged"
536 536 elif f in files.salvaged:
537 537 action = b"salvaged"
538 538 else:
539 539 action = b"touched"
540 540
541 541 copy_parent = b""
542 542 copy_source = b""
543 543 if f in files.copied_from_p1:
544 544 copy_parent = b"p1"
545 545 copy_source = files.copied_from_p1[f]
546 546 elif f in files.copied_from_p2:
547 547 copy_parent = b"p2"
548 548 copy_source = files.copied_from_p2[f]
549 549
550 550 data = (action, copy_parent, f, copy_source)
551 551 template = b"%-8s %2s: %s, %s;\n"
552 552 ui.write(template % data)
553 553
554 554
555 555 @command(b'debugcheckstate', [], b'')
556 556 def debugcheckstate(ui, repo):
557 557 """validate the correctness of the current dirstate"""
558 558 errors = verify.verifier(repo)._verify_dirstate()
559 559 if errors:
560 560 errstr = _(b"dirstate inconsistent with current parent's manifest")
561 561 raise error.Abort(errstr)
562 562
563 563
564 564 @command(
565 565 b'debugcolor',
566 566 [(b'', b'style', None, _(b'show all configured styles'))],
567 567 b'hg debugcolor',
568 568 )
569 569 def debugcolor(ui, repo, **opts):
570 570 """show available color, effects or style"""
571 571 ui.writenoi18n(b'color mode: %s\n' % stringutil.pprint(ui._colormode))
572 572 if opts.get('style'):
573 573 return _debugdisplaystyle(ui)
574 574 else:
575 575 return _debugdisplaycolor(ui)
576 576
577 577
578 578 def _debugdisplaycolor(ui):
579 579 ui = ui.copy()
580 580 ui._styles.clear()
581 581 for effect in color._activeeffects(ui).keys():
582 582 ui._styles[effect] = effect
583 583 if ui._terminfoparams:
584 584 for k, v in ui.configitems(b'color'):
585 585 if k.startswith(b'color.'):
586 586 ui._styles[k] = k[6:]
587 587 elif k.startswith(b'terminfo.'):
588 588 ui._styles[k] = k[9:]
589 589 ui.write(_(b'available colors:\n'))
590 590 # sort label with a '_' after the other to group '_background' entry.
591 591 items = sorted(ui._styles.items(), key=lambda i: (b'_' in i[0], i[0], i[1]))
592 592 for colorname, label in items:
593 593 ui.write(b'%s\n' % colorname, label=label)
594 594
595 595
596 596 def _debugdisplaystyle(ui):
597 597 ui.write(_(b'available style:\n'))
598 598 if not ui._styles:
599 599 return
600 600 width = max(len(s) for s in ui._styles)
601 601 for label, effects in sorted(ui._styles.items()):
602 602 ui.write(b'%s' % label, label=label)
603 603 if effects:
604 604 # 50
605 605 ui.write(b': ')
606 606 ui.write(b' ' * (max(0, width - len(label))))
607 607 ui.write(b', '.join(ui.label(e, e) for e in effects.split()))
608 608 ui.write(b'\n')
609 609
610 610
611 611 @command(b'debugcreatestreamclonebundle', [], b'FILE')
612 612 def debugcreatestreamclonebundle(ui, repo, fname):
613 613 """create a stream clone bundle file
614 614
615 615 Stream bundles are special bundles that are essentially archives of
616 616 revlog files. They are commonly used for cloning very quickly.
617 617
618 618 This command creates a "version 1" stream clone, which is deprecated in
619 619 favor of newer versions of the stream protocol. Bundles using such newer
620 620 versions can be generated using the `hg bundle` command.
621 621 """
622 622 # TODO we may want to turn this into an abort when this functionality
623 623 # is moved into `hg bundle`.
624 624 if phases.hassecret(repo):
625 625 ui.warn(
626 626 _(
627 627 b'(warning: stream clone bundle will contain secret '
628 628 b'revisions)\n'
629 629 )
630 630 )
631 631
632 632 requirements, gen = streamclone.generatebundlev1(repo)
633 633 changegroup.writechunks(ui, gen, fname)
634 634
635 635 ui.write(_(b'bundle requirements: %s\n') % b', '.join(sorted(requirements)))
636 636
637 637
638 638 @command(
639 639 b'debugdag',
640 640 [
641 641 (b't', b'tags', None, _(b'use tags as labels')),
642 642 (b'b', b'branches', None, _(b'annotate with branch names')),
643 643 (b'', b'dots', None, _(b'use dots for runs')),
644 644 (b's', b'spaces', None, _(b'separate elements by spaces')),
645 645 ],
646 646 _(b'[OPTION]... [FILE [REV]...]'),
647 647 optionalrepo=True,
648 648 )
649 649 def debugdag(ui, repo, file_=None, *revs, **opts):
650 650 """format the changelog or an index DAG as a concise textual description
651 651
652 652 If you pass a revlog index, the revlog's DAG is emitted. If you list
653 653 revision numbers, they get labeled in the output as rN.
654 654
655 655 Otherwise, the changelog DAG of the current repo is emitted.
656 656 """
657 657 spaces = opts.get('spaces')
658 658 dots = opts.get('dots')
659 659 if file_:
660 660 rlog = revlog.revlog(vfsmod.vfs(encoding.getcwd(), audit=False), file_)
661 661 revs = {int(r) for r in revs}
662 662
663 663 def events():
664 664 for r in rlog:
665 665 yield b'n', (r, list(p for p in rlog.parentrevs(r) if p != -1))
666 666 if r in revs:
667 667 yield b'l', (r, b"r%i" % r)
668 668
669 669 elif repo:
670 670 cl = repo.changelog
671 671 tags = opts.get('tags')
672 672 branches = opts.get('branches')
673 673 if tags:
674 674 labels = {}
675 675 for l, n in repo.tags().items():
676 676 labels.setdefault(cl.rev(n), []).append(l)
677 677
678 678 def events():
679 679 b = b"default"
680 680 for r in cl:
681 681 if branches:
682 682 newb = cl.read(cl.node(r))[5][b'branch']
683 683 if newb != b:
684 684 yield b'a', newb
685 685 b = newb
686 686 yield b'n', (r, list(p for p in cl.parentrevs(r) if p != -1))
687 687 if tags:
688 688 ls = labels.get(r)
689 689 if ls:
690 690 for l in ls:
691 691 yield b'l', (r, l)
692 692
693 693 else:
694 694 raise error.Abort(_(b'need repo for changelog dag'))
695 695
696 696 for line in dagparser.dagtextlines(
697 697 events(),
698 698 addspaces=spaces,
699 699 wraplabels=True,
700 700 wrapannotations=True,
701 701 wrapnonlinear=dots,
702 702 usedots=dots,
703 703 maxlinewidth=70,
704 704 ):
705 705 ui.write(line)
706 706 ui.write(b"\n")
707 707
708 708
709 709 @command(b'debugdata', cmdutil.debugrevlogopts, _(b'-c|-m|FILE REV'))
710 710 def debugdata(ui, repo, file_, rev=None, **opts):
711 711 """dump the contents of a data file revision"""
712 712 if opts.get('changelog') or opts.get('manifest') or opts.get('dir'):
713 713 if rev is not None:
714 714 raise error.InputError(
715 715 _(b'cannot specify a revision with other arguments')
716 716 )
717 717 file_, rev = None, file_
718 718 elif rev is None:
719 719 raise error.InputError(_(b'please specify a revision'))
720 720 r = cmdutil.openstorage(
721 721 repo, b'debugdata', file_, pycompat.byteskwargs(opts)
722 722 )
723 723 try:
724 724 ui.write(r.rawdata(r.lookup(rev)))
725 725 except KeyError:
726 726 raise error.Abort(_(b'invalid revision identifier %s') % rev)
727 727
728 728
729 729 @command(
730 730 b'debugdate',
731 731 [(b'e', b'extended', None, _(b'try extended date formats'))],
732 732 _(b'[-e] DATE [RANGE]'),
733 733 norepo=True,
734 734 optionalrepo=True,
735 735 )
736 736 def debugdate(ui, date, range=None, **opts):
737 737 """parse and display a date"""
738 738 if opts["extended"]:
739 739 d = dateutil.parsedate(date, dateutil.extendeddateformats)
740 740 else:
741 741 d = dateutil.parsedate(date)
742 742 ui.writenoi18n(b"internal: %d %d\n" % d)
743 743 ui.writenoi18n(b"standard: %s\n" % dateutil.datestr(d))
744 744 if range:
745 745 m = dateutil.matchdate(range)
746 746 ui.writenoi18n(b"match: %s\n" % m(d[0]))
747 747
748 748
749 749 @command(
750 750 b'debugdeltachain',
751 751 cmdutil.debugrevlogopts + cmdutil.formatteropts,
752 752 _(b'-c|-m|FILE'),
753 753 optionalrepo=True,
754 754 )
755 755 def debugdeltachain(ui, repo, file_=None, **opts):
756 756 """dump information about delta chains in a revlog
757 757
758 758 Output can be templatized. Available template keywords are:
759 759
760 760 :``rev``: revision number
761 761 :``p1``: parent 1 revision number (for reference)
762 762 :``p2``: parent 2 revision number (for reference)
763 763 :``chainid``: delta chain identifier (numbered by unique base)
764 764 :``chainlen``: delta chain length to this revision
765 765 :``prevrev``: previous revision in delta chain
766 766 :``deltatype``: role of delta / how it was computed
767 767 - base: a full snapshot
768 768 - snap: an intermediate snapshot
769 769 - p1: a delta against the first parent
770 770 - p2: a delta against the second parent
771 771 - skip1: a delta against the same base as p1
772 772 (when p1 has empty delta
773 773 - skip2: a delta against the same base as p2
774 774 (when p2 has empty delta
775 775 - prev: a delta against the previous revision
776 776 - other: a delta against an arbitrary revision
777 777 :``compsize``: compressed size of revision
778 778 :``uncompsize``: uncompressed size of revision
779 779 :``chainsize``: total size of compressed revisions in chain
780 780 :``chainratio``: total chain size divided by uncompressed revision size
781 781 (new delta chains typically start at ratio 2.00)
782 782 :``lindist``: linear distance from base revision in delta chain to end
783 783 of this revision
784 784 :``extradist``: total size of revisions not part of this delta chain from
785 785 base of delta chain to end of this revision; a measurement
786 786 of how much extra data we need to read/seek across to read
787 787 the delta chain for this revision
788 788 :``extraratio``: extradist divided by chainsize; another representation of
789 789 how much unrelated data is needed to load this delta chain
790 790
791 791 If the repository is configured to use the sparse read, additional keywords
792 792 are available:
793 793
794 794 :``readsize``: total size of data read from the disk for a revision
795 795 (sum of the sizes of all the blocks)
796 796 :``largestblock``: size of the largest block of data read from the disk
797 797 :``readdensity``: density of useful bytes in the data read from the disk
798 798 :``srchunks``: in how many data hunks the whole revision would be read
799 799
800 800 The sparse read can be enabled with experimental.sparse-read = True
801 801 """
802 802 r = cmdutil.openrevlog(
803 803 repo, b'debugdeltachain', file_, pycompat.byteskwargs(opts)
804 804 )
805 805 index = r.index
806 806 start = r.start
807 807 length = r.length
808 generaldelta = r._generaldelta
808 generaldelta = r.delta_config.general_delta
809 809 withsparseread = getattr(r, '_withsparseread', False)
810 810
811 811 # security to avoid crash on corrupted revlogs
812 812 total_revs = len(index)
813 813
814 814 chain_size_cache = {}
815 815
816 816 def revinfo(rev):
817 817 e = index[rev]
818 818 compsize = e[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH]
819 819 uncompsize = e[revlog_constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
820 820
821 821 base = e[revlog_constants.ENTRY_DELTA_BASE]
822 822 p1 = e[revlog_constants.ENTRY_PARENT_1]
823 823 p2 = e[revlog_constants.ENTRY_PARENT_2]
824 824
825 825 # If the parents of a revision has an empty delta, we never try to delta
826 826 # against that parent, but directly against the delta base of that
827 827 # parent (recursively). It avoids adding a useless entry in the chain.
828 828 #
829 829 # However we need to detect that as a special case for delta-type, that
830 830 # is not simply "other".
831 831 p1_base = p1
832 832 if p1 != nullrev and p1 < total_revs:
833 833 e1 = index[p1]
834 834 while e1[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
835 835 new_base = e1[revlog_constants.ENTRY_DELTA_BASE]
836 836 if (
837 837 new_base == p1_base
838 838 or new_base == nullrev
839 839 or new_base >= total_revs
840 840 ):
841 841 break
842 842 p1_base = new_base
843 843 e1 = index[p1_base]
844 844 p2_base = p2
845 845 if p2 != nullrev and p2 < total_revs:
846 846 e2 = index[p2]
847 847 while e2[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
848 848 new_base = e2[revlog_constants.ENTRY_DELTA_BASE]
849 849 if (
850 850 new_base == p2_base
851 851 or new_base == nullrev
852 852 or new_base >= total_revs
853 853 ):
854 854 break
855 855 p2_base = new_base
856 856 e2 = index[p2_base]
857 857
858 858 if generaldelta:
859 859 if base == p1:
860 860 deltatype = b'p1'
861 861 elif base == p2:
862 862 deltatype = b'p2'
863 863 elif base == rev:
864 864 deltatype = b'base'
865 865 elif base == p1_base:
866 866 deltatype = b'skip1'
867 867 elif base == p2_base:
868 868 deltatype = b'skip2'
869 869 elif r.issnapshot(rev):
870 870 deltatype = b'snap'
871 871 elif base == rev - 1:
872 872 deltatype = b'prev'
873 873 else:
874 874 deltatype = b'other'
875 875 else:
876 876 if base == rev:
877 877 deltatype = b'base'
878 878 else:
879 879 deltatype = b'prev'
880 880
881 881 chain = r._deltachain(rev)[0]
882 882 chain_size = 0
883 883 for iter_rev in reversed(chain):
884 884 cached = chain_size_cache.get(iter_rev)
885 885 if cached is not None:
886 886 chain_size += cached
887 887 break
888 888 e = index[iter_rev]
889 889 chain_size += e[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH]
890 890 chain_size_cache[rev] = chain_size
891 891
892 892 return p1, p2, compsize, uncompsize, deltatype, chain, chain_size
893 893
894 894 fm = ui.formatter(b'debugdeltachain', pycompat.byteskwargs(opts))
895 895
896 896 fm.plain(
897 897 b' rev p1 p2 chain# chainlen prev delta '
898 898 b'size rawsize chainsize ratio lindist extradist '
899 899 b'extraratio'
900 900 )
901 901 if withsparseread:
902 902 fm.plain(b' readsize largestblk rddensity srchunks')
903 903 fm.plain(b'\n')
904 904
905 905 chainbases = {}
906 906 for rev in r:
907 907 p1, p2, comp, uncomp, deltatype, chain, chainsize = revinfo(rev)
908 908 chainbase = chain[0]
909 909 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
910 910 basestart = start(chainbase)
911 911 revstart = start(rev)
912 912 lineardist = revstart + comp - basestart
913 913 extradist = lineardist - chainsize
914 914 try:
915 915 prevrev = chain[-2]
916 916 except IndexError:
917 917 prevrev = -1
918 918
919 919 if uncomp != 0:
920 920 chainratio = float(chainsize) / float(uncomp)
921 921 else:
922 922 chainratio = chainsize
923 923
924 924 if chainsize != 0:
925 925 extraratio = float(extradist) / float(chainsize)
926 926 else:
927 927 extraratio = extradist
928 928
929 929 fm.startitem()
930 930 fm.write(
931 931 b'rev p1 p2 chainid chainlen prevrev deltatype compsize '
932 932 b'uncompsize chainsize chainratio lindist extradist '
933 933 b'extraratio',
934 934 b'%7d %7d %7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f',
935 935 rev,
936 936 p1,
937 937 p2,
938 938 chainid,
939 939 len(chain),
940 940 prevrev,
941 941 deltatype,
942 942 comp,
943 943 uncomp,
944 944 chainsize,
945 945 chainratio,
946 946 lineardist,
947 947 extradist,
948 948 extraratio,
949 949 rev=rev,
950 950 chainid=chainid,
951 951 chainlen=len(chain),
952 952 prevrev=prevrev,
953 953 deltatype=deltatype,
954 954 compsize=comp,
955 955 uncompsize=uncomp,
956 956 chainsize=chainsize,
957 957 chainratio=chainratio,
958 958 lindist=lineardist,
959 959 extradist=extradist,
960 960 extraratio=extraratio,
961 961 )
962 962 if withsparseread:
963 963 readsize = 0
964 964 largestblock = 0
965 965 srchunks = 0
966 966
967 967 for revschunk in deltautil.slicechunk(r, chain):
968 968 srchunks += 1
969 969 blkend = start(revschunk[-1]) + length(revschunk[-1])
970 970 blksize = blkend - start(revschunk[0])
971 971
972 972 readsize += blksize
973 973 if largestblock < blksize:
974 974 largestblock = blksize
975 975
976 976 if readsize:
977 977 readdensity = float(chainsize) / float(readsize)
978 978 else:
979 979 readdensity = 1
980 980
981 981 fm.write(
982 982 b'readsize largestblock readdensity srchunks',
983 983 b' %10d %10d %9.5f %8d',
984 984 readsize,
985 985 largestblock,
986 986 readdensity,
987 987 srchunks,
988 988 readsize=readsize,
989 989 largestblock=largestblock,
990 990 readdensity=readdensity,
991 991 srchunks=srchunks,
992 992 )
993 993
994 994 fm.plain(b'\n')
995 995
996 996 fm.end()
997 997
998 998
999 999 @command(
1000 1000 b'debug-delta-find',
1001 1001 cmdutil.debugrevlogopts
1002 1002 + cmdutil.formatteropts
1003 1003 + [
1004 1004 (
1005 1005 b'',
1006 1006 b'source',
1007 1007 b'full',
1008 1008 _(b'input data feed to the process (full, storage, p1, p2, prev)'),
1009 1009 ),
1010 1010 ],
1011 1011 _(b'-c|-m|FILE REV'),
1012 1012 optionalrepo=True,
1013 1013 )
1014 1014 def debugdeltafind(ui, repo, arg_1, arg_2=None, source=b'full', **opts):
1015 1015 """display the computation to get to a valid delta for storing REV
1016 1016
1017 1017 This command will replay the process used to find the "best" delta to store
1018 1018 a revision and display information about all the steps used to get to that
1019 1019 result.
1020 1020
1021 1021 By default, the process is fed with a the full-text for the revision. This
1022 1022 can be controlled with the --source flag.
1023 1023
1024 1024 The revision use the revision number of the target storage (not changelog
1025 1025 revision number).
1026 1026
1027 1027 note: the process is initiated from a full text of the revision to store.
1028 1028 """
1029 1029 if arg_2 is None:
1030 1030 file_ = None
1031 1031 rev = arg_1
1032 1032 else:
1033 1033 file_ = arg_1
1034 1034 rev = arg_2
1035 1035
1036 1036 rev = int(rev)
1037 1037
1038 1038 revlog = cmdutil.openrevlog(
1039 1039 repo, b'debugdeltachain', file_, pycompat.byteskwargs(opts)
1040 1040 )
1041 1041 p1r, p2r = revlog.parentrevs(rev)
1042 1042
1043 1043 if source == b'full':
1044 1044 base_rev = nullrev
1045 1045 elif source == b'storage':
1046 1046 base_rev = revlog.deltaparent(rev)
1047 1047 elif source == b'p1':
1048 1048 base_rev = p1r
1049 1049 elif source == b'p2':
1050 1050 base_rev = p2r
1051 1051 elif source == b'prev':
1052 1052 base_rev = rev - 1
1053 1053 else:
1054 1054 raise error.InputError(b"invalid --source value: %s" % source)
1055 1055
1056 1056 revlog_debug.debug_delta_find(ui, revlog, rev, base_rev=base_rev)
1057 1057
1058 1058
1059 1059 @command(
1060 1060 b'debugdirstate|debugstate',
1061 1061 [
1062 1062 (
1063 1063 b'',
1064 1064 b'nodates',
1065 1065 None,
1066 1066 _(b'do not display the saved mtime (DEPRECATED)'),
1067 1067 ),
1068 1068 (b'', b'dates', True, _(b'display the saved mtime')),
1069 1069 (b'', b'datesort', None, _(b'sort by saved mtime')),
1070 1070 (
1071 1071 b'',
1072 1072 b'docket',
1073 1073 False,
1074 1074 _(b'display the docket (metadata file) instead'),
1075 1075 ),
1076 1076 (
1077 1077 b'',
1078 1078 b'all',
1079 1079 False,
1080 1080 _(b'display dirstate-v2 tree nodes that would not exist in v1'),
1081 1081 ),
1082 1082 ],
1083 1083 _(b'[OPTION]...'),
1084 1084 )
1085 1085 def debugstate(ui, repo, **opts):
1086 1086 """show the contents of the current dirstate"""
1087 1087
1088 1088 if opts.get("docket"):
1089 1089 if not repo.dirstate._use_dirstate_v2:
1090 1090 raise error.Abort(_(b'dirstate v1 does not have a docket'))
1091 1091
1092 1092 docket = repo.dirstate._map.docket
1093 1093 (
1094 1094 start_offset,
1095 1095 root_nodes,
1096 1096 nodes_with_entry,
1097 1097 nodes_with_copy,
1098 1098 unused_bytes,
1099 1099 _unused,
1100 1100 ignore_pattern,
1101 1101 ) = dirstateutils.v2.TREE_METADATA.unpack(docket.tree_metadata)
1102 1102
1103 1103 ui.write(_(b"size of dirstate data: %d\n") % docket.data_size)
1104 1104 ui.write(_(b"data file uuid: %s\n") % docket.uuid)
1105 1105 ui.write(_(b"start offset of root nodes: %d\n") % start_offset)
1106 1106 ui.write(_(b"number of root nodes: %d\n") % root_nodes)
1107 1107 ui.write(_(b"nodes with entries: %d\n") % nodes_with_entry)
1108 1108 ui.write(_(b"nodes with copies: %d\n") % nodes_with_copy)
1109 1109 ui.write(_(b"number of unused bytes: %d\n") % unused_bytes)
1110 1110 ui.write(
1111 1111 _(b"ignore pattern hash: %s\n") % binascii.hexlify(ignore_pattern)
1112 1112 )
1113 1113 return
1114 1114
1115 1115 nodates = not opts['dates']
1116 1116 if opts.get('nodates') is not None:
1117 1117 nodates = True
1118 1118 datesort = opts.get('datesort')
1119 1119
1120 1120 if datesort:
1121 1121
1122 1122 def keyfunc(entry):
1123 1123 filename, _state, _mode, _size, mtime = entry
1124 1124 return (mtime, filename)
1125 1125
1126 1126 else:
1127 1127 keyfunc = None # sort by filename
1128 1128 entries = list(repo.dirstate._map.debug_iter(all=opts['all']))
1129 1129 entries.sort(key=keyfunc)
1130 1130 for entry in entries:
1131 1131 filename, state, mode, size, mtime = entry
1132 1132 if mtime == -1:
1133 1133 timestr = b'unset '
1134 1134 elif nodates:
1135 1135 timestr = b'set '
1136 1136 else:
1137 1137 timestr = time.strftime("%Y-%m-%d %H:%M:%S ", time.localtime(mtime))
1138 1138 timestr = encoding.strtolocal(timestr)
1139 1139 if mode & 0o20000:
1140 1140 mode = b'lnk'
1141 1141 else:
1142 1142 mode = b'%3o' % (mode & 0o777 & ~util.umask)
1143 1143 ui.write(b"%c %s %10d %s%s\n" % (state, mode, size, timestr, filename))
1144 1144 for f in repo.dirstate.copies():
1145 1145 ui.write(_(b"copy: %s -> %s\n") % (repo.dirstate.copied(f), f))
1146 1146
1147 1147
1148 1148 @command(
1149 1149 b'debugdirstateignorepatternshash',
1150 1150 [],
1151 1151 _(b''),
1152 1152 )
1153 1153 def debugdirstateignorepatternshash(ui, repo, **opts):
1154 1154 """show the hash of ignore patterns stored in dirstate if v2,
1155 1155 or nothing for dirstate-v2
1156 1156 """
1157 1157 if repo.dirstate._use_dirstate_v2:
1158 1158 docket = repo.dirstate._map.docket
1159 1159 hash_len = 20 # 160 bits for SHA-1
1160 1160 hash_bytes = docket.tree_metadata[-hash_len:]
1161 1161 ui.write(binascii.hexlify(hash_bytes) + b'\n')
1162 1162
1163 1163
1164 1164 @command(
1165 1165 b'debugdiscovery',
1166 1166 [
1167 1167 (b'', b'old', None, _(b'use old-style discovery')),
1168 1168 (
1169 1169 b'',
1170 1170 b'nonheads',
1171 1171 None,
1172 1172 _(b'use old-style discovery with non-heads included'),
1173 1173 ),
1174 1174 (b'', b'rev', [], b'restrict discovery to this set of revs'),
1175 1175 (b'', b'seed', b'12323', b'specify the random seed use for discovery'),
1176 1176 (
1177 1177 b'',
1178 1178 b'local-as-revs',
1179 1179 b"",
1180 1180 b'treat local has having these revisions only',
1181 1181 ),
1182 1182 (
1183 1183 b'',
1184 1184 b'remote-as-revs',
1185 1185 b"",
1186 1186 b'use local as remote, with only these revisions',
1187 1187 ),
1188 1188 ]
1189 1189 + cmdutil.remoteopts
1190 1190 + cmdutil.formatteropts,
1191 1191 _(b'[--rev REV] [OTHER]'),
1192 1192 )
1193 1193 def debugdiscovery(ui, repo, remoteurl=b"default", **opts):
1194 1194 """runs the changeset discovery protocol in isolation
1195 1195
1196 1196 The local peer can be "replaced" by a subset of the local repository by
1197 1197 using the `--local-as-revs` flag. In the same way, the usual `remote` peer
1198 1198 can be "replaced" by a subset of the local repository using the
1199 1199 `--remote-as-revs` flag. This is useful to efficiently debug pathological
1200 1200 discovery situations.
1201 1201
1202 1202 The following developer oriented config are relevant for people playing with this command:
1203 1203
1204 1204 * devel.discovery.exchange-heads=True
1205 1205
1206 1206 If False, the discovery will not start with
1207 1207 remote head fetching and local head querying.
1208 1208
1209 1209 * devel.discovery.grow-sample=True
1210 1210
1211 1211 If False, the sample size used in set discovery will not be increased
1212 1212 through the process
1213 1213
1214 1214 * devel.discovery.grow-sample.dynamic=True
1215 1215
1216 1216 When discovery.grow-sample.dynamic is True, the default, the sample size is
1217 1217 adapted to the shape of the undecided set (it is set to the max of:
1218 1218 <target-size>, len(roots(undecided)), len(heads(undecided)
1219 1219
1220 1220 * devel.discovery.grow-sample.rate=1.05
1221 1221
1222 1222 the rate at which the sample grow
1223 1223
1224 1224 * devel.discovery.randomize=True
1225 1225
1226 1226 If andom sampling during discovery are deterministic. It is meant for
1227 1227 integration tests.
1228 1228
1229 1229 * devel.discovery.sample-size=200
1230 1230
1231 1231 Control the initial size of the discovery sample
1232 1232
1233 1233 * devel.discovery.sample-size.initial=100
1234 1234
1235 1235 Control the initial size of the discovery for initial change
1236 1236 """
1237 1237 unfi = repo.unfiltered()
1238 1238
1239 1239 # setup potential extra filtering
1240 1240 local_revs = opts["local_as_revs"]
1241 1241 remote_revs = opts["remote_as_revs"]
1242 1242
1243 1243 # make sure tests are repeatable
1244 1244 random.seed(int(opts['seed']))
1245 1245
1246 1246 if not remote_revs:
1247 1247 path = urlutil.get_unique_pull_path_obj(
1248 1248 b'debugdiscovery', ui, remoteurl
1249 1249 )
1250 1250 branches = (path.branch, [])
1251 1251 remote = hg.peer(repo, pycompat.byteskwargs(opts), path)
1252 1252 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
1253 1253 else:
1254 1254 branches = (None, [])
1255 1255 remote_filtered_revs = logcmdutil.revrange(
1256 1256 unfi, [b"not (::(%s))" % remote_revs]
1257 1257 )
1258 1258 remote_filtered_revs = frozenset(remote_filtered_revs)
1259 1259
1260 1260 def remote_func(x):
1261 1261 return remote_filtered_revs
1262 1262
1263 1263 repoview.filtertable[b'debug-discovery-remote-filter'] = remote_func
1264 1264
1265 1265 remote = repo.peer()
1266 1266 remote._repo = remote._repo.filtered(b'debug-discovery-remote-filter')
1267 1267
1268 1268 if local_revs:
1269 1269 local_filtered_revs = logcmdutil.revrange(
1270 1270 unfi, [b"not (::(%s))" % local_revs]
1271 1271 )
1272 1272 local_filtered_revs = frozenset(local_filtered_revs)
1273 1273
1274 1274 def local_func(x):
1275 1275 return local_filtered_revs
1276 1276
1277 1277 repoview.filtertable[b'debug-discovery-local-filter'] = local_func
1278 1278 repo = repo.filtered(b'debug-discovery-local-filter')
1279 1279
1280 1280 data = {}
1281 1281 if opts.get('old'):
1282 1282
1283 1283 def doit(pushedrevs, remoteheads, remote=remote):
1284 1284 if not hasattr(remote, 'branches'):
1285 1285 # enable in-client legacy support
1286 1286 remote = localrepo.locallegacypeer(remote.local())
1287 1287 if remote_revs:
1288 1288 r = remote._repo.filtered(b'debug-discovery-remote-filter')
1289 1289 remote._repo = r
1290 1290 common, _in, hds = treediscovery.findcommonincoming(
1291 1291 repo, remote, force=True, audit=data
1292 1292 )
1293 1293 common = set(common)
1294 1294 if not opts.get('nonheads'):
1295 1295 ui.writenoi18n(
1296 1296 b"unpruned common: %s\n"
1297 1297 % b" ".join(sorted(short(n) for n in common))
1298 1298 )
1299 1299
1300 1300 clnode = repo.changelog.node
1301 1301 common = repo.revs(b'heads(::%ln)', common)
1302 1302 common = {clnode(r) for r in common}
1303 1303 return common, hds
1304 1304
1305 1305 else:
1306 1306
1307 1307 def doit(pushedrevs, remoteheads, remote=remote):
1308 1308 nodes = None
1309 1309 if pushedrevs:
1310 1310 revs = logcmdutil.revrange(repo, pushedrevs)
1311 1311 nodes = [repo[r].node() for r in revs]
1312 1312 common, any, hds = setdiscovery.findcommonheads(
1313 1313 ui,
1314 1314 repo,
1315 1315 remote,
1316 1316 ancestorsof=nodes,
1317 1317 audit=data,
1318 1318 abortwhenunrelated=False,
1319 1319 )
1320 1320 return common, hds
1321 1321
1322 1322 remoterevs, _checkout = hg.addbranchrevs(repo, remote, branches, revs=None)
1323 1323 localrevs = opts['rev']
1324 1324
1325 1325 fm = ui.formatter(b'debugdiscovery', pycompat.byteskwargs(opts))
1326 1326 if fm.strict_format:
1327 1327
1328 1328 @contextlib.contextmanager
1329 1329 def may_capture_output():
1330 1330 ui.pushbuffer()
1331 1331 yield
1332 1332 data[b'output'] = ui.popbuffer()
1333 1333
1334 1334 else:
1335 1335 may_capture_output = util.nullcontextmanager
1336 1336 with may_capture_output():
1337 1337 with util.timedcm('debug-discovery') as t:
1338 1338 common, hds = doit(localrevs, remoterevs)
1339 1339
1340 1340 # compute all statistics
1341 1341 if len(common) == 1 and repo.nullid in common:
1342 1342 common = set()
1343 1343 heads_common = set(common)
1344 1344 heads_remote = set(hds)
1345 1345 heads_local = set(repo.heads())
1346 1346 # note: they cannot be a local or remote head that is in common and not
1347 1347 # itself a head of common.
1348 1348 heads_common_local = heads_common & heads_local
1349 1349 heads_common_remote = heads_common & heads_remote
1350 1350 heads_common_both = heads_common & heads_remote & heads_local
1351 1351
1352 1352 all = repo.revs(b'all()')
1353 1353 common = repo.revs(b'::%ln', common)
1354 1354 roots_common = repo.revs(b'roots(::%ld)', common)
1355 1355 missing = repo.revs(b'not ::%ld', common)
1356 1356 heads_missing = repo.revs(b'heads(%ld)', missing)
1357 1357 roots_missing = repo.revs(b'roots(%ld)', missing)
1358 1358 assert len(common) + len(missing) == len(all)
1359 1359
1360 1360 initial_undecided = repo.revs(
1361 1361 b'not (::%ln or %ln::)', heads_common_remote, heads_common_local
1362 1362 )
1363 1363 heads_initial_undecided = repo.revs(b'heads(%ld)', initial_undecided)
1364 1364 roots_initial_undecided = repo.revs(b'roots(%ld)', initial_undecided)
1365 1365 common_initial_undecided = initial_undecided & common
1366 1366 missing_initial_undecided = initial_undecided & missing
1367 1367
1368 1368 data[b'elapsed'] = t.elapsed
1369 1369 data[b'nb-common-heads'] = len(heads_common)
1370 1370 data[b'nb-common-heads-local'] = len(heads_common_local)
1371 1371 data[b'nb-common-heads-remote'] = len(heads_common_remote)
1372 1372 data[b'nb-common-heads-both'] = len(heads_common_both)
1373 1373 data[b'nb-common-roots'] = len(roots_common)
1374 1374 data[b'nb-head-local'] = len(heads_local)
1375 1375 data[b'nb-head-local-missing'] = len(heads_local) - len(heads_common_local)
1376 1376 data[b'nb-head-remote'] = len(heads_remote)
1377 1377 data[b'nb-head-remote-unknown'] = len(heads_remote) - len(
1378 1378 heads_common_remote
1379 1379 )
1380 1380 data[b'nb-revs'] = len(all)
1381 1381 data[b'nb-revs-common'] = len(common)
1382 1382 data[b'nb-revs-missing'] = len(missing)
1383 1383 data[b'nb-missing-heads'] = len(heads_missing)
1384 1384 data[b'nb-missing-roots'] = len(roots_missing)
1385 1385 data[b'nb-ini_und'] = len(initial_undecided)
1386 1386 data[b'nb-ini_und-heads'] = len(heads_initial_undecided)
1387 1387 data[b'nb-ini_und-roots'] = len(roots_initial_undecided)
1388 1388 data[b'nb-ini_und-common'] = len(common_initial_undecided)
1389 1389 data[b'nb-ini_und-missing'] = len(missing_initial_undecided)
1390 1390
1391 1391 fm.startitem()
1392 1392 fm.data(**pycompat.strkwargs(data))
1393 1393 # display discovery summary
1394 1394 fm.plain(b"elapsed time: %(elapsed)f seconds\n" % data)
1395 1395 fm.plain(b"round-trips: %(total-roundtrips)9d\n" % data)
1396 1396 if b'total-round-trips-heads' in data:
1397 1397 fm.plain(
1398 1398 b" round-trips-heads: %(total-round-trips-heads)9d\n" % data
1399 1399 )
1400 1400 if b'total-round-trips-branches' in data:
1401 1401 fm.plain(
1402 1402 b" round-trips-branches: %(total-round-trips-branches)9d\n"
1403 1403 % data
1404 1404 )
1405 1405 if b'total-round-trips-between' in data:
1406 1406 fm.plain(
1407 1407 b" round-trips-between: %(total-round-trips-between)9d\n" % data
1408 1408 )
1409 1409 fm.plain(b"queries: %(total-queries)9d\n" % data)
1410 1410 if b'total-queries-branches' in data:
1411 1411 fm.plain(b" queries-branches: %(total-queries-branches)9d\n" % data)
1412 1412 if b'total-queries-between' in data:
1413 1413 fm.plain(b" queries-between: %(total-queries-between)9d\n" % data)
1414 1414 fm.plain(b"heads summary:\n")
1415 1415 fm.plain(b" total common heads: %(nb-common-heads)9d\n" % data)
1416 1416 fm.plain(b" also local heads: %(nb-common-heads-local)9d\n" % data)
1417 1417 fm.plain(b" also remote heads: %(nb-common-heads-remote)9d\n" % data)
1418 1418 fm.plain(b" both: %(nb-common-heads-both)9d\n" % data)
1419 1419 fm.plain(b" local heads: %(nb-head-local)9d\n" % data)
1420 1420 fm.plain(b" common: %(nb-common-heads-local)9d\n" % data)
1421 1421 fm.plain(b" missing: %(nb-head-local-missing)9d\n" % data)
1422 1422 fm.plain(b" remote heads: %(nb-head-remote)9d\n" % data)
1423 1423 fm.plain(b" common: %(nb-common-heads-remote)9d\n" % data)
1424 1424 fm.plain(b" unknown: %(nb-head-remote-unknown)9d\n" % data)
1425 1425 fm.plain(b"local changesets: %(nb-revs)9d\n" % data)
1426 1426 fm.plain(b" common: %(nb-revs-common)9d\n" % data)
1427 1427 fm.plain(b" heads: %(nb-common-heads)9d\n" % data)
1428 1428 fm.plain(b" roots: %(nb-common-roots)9d\n" % data)
1429 1429 fm.plain(b" missing: %(nb-revs-missing)9d\n" % data)
1430 1430 fm.plain(b" heads: %(nb-missing-heads)9d\n" % data)
1431 1431 fm.plain(b" roots: %(nb-missing-roots)9d\n" % data)
1432 1432 fm.plain(b" first undecided set: %(nb-ini_und)9d\n" % data)
1433 1433 fm.plain(b" heads: %(nb-ini_und-heads)9d\n" % data)
1434 1434 fm.plain(b" roots: %(nb-ini_und-roots)9d\n" % data)
1435 1435 fm.plain(b" common: %(nb-ini_und-common)9d\n" % data)
1436 1436 fm.plain(b" missing: %(nb-ini_und-missing)9d\n" % data)
1437 1437
1438 1438 if ui.verbose:
1439 1439 fm.plain(
1440 1440 b"common heads: %s\n"
1441 1441 % b" ".join(sorted(short(n) for n in heads_common))
1442 1442 )
1443 1443 fm.end()
1444 1444
1445 1445
1446 1446 _chunksize = 4 << 10
1447 1447
1448 1448
1449 1449 @command(
1450 1450 b'debugdownload',
1451 1451 [
1452 1452 (b'o', b'output', b'', _(b'path')),
1453 1453 ],
1454 1454 optionalrepo=True,
1455 1455 )
1456 1456 def debugdownload(ui, repo, url, output=None, **opts):
1457 1457 """download a resource using Mercurial logic and config"""
1458 1458 fh = urlmod.open(ui, url, output)
1459 1459
1460 1460 dest = ui
1461 1461 if output:
1462 1462 dest = open(output, b"wb", _chunksize)
1463 1463 try:
1464 1464 data = fh.read(_chunksize)
1465 1465 while data:
1466 1466 dest.write(data)
1467 1467 data = fh.read(_chunksize)
1468 1468 finally:
1469 1469 if output:
1470 1470 dest.close()
1471 1471
1472 1472
1473 1473 @command(b'debugextensions', cmdutil.formatteropts, [], optionalrepo=True)
1474 1474 def debugextensions(ui, repo, **opts):
1475 1475 '''show information about active extensions'''
1476 1476 exts = extensions.extensions(ui)
1477 1477 hgver = util.version()
1478 1478 fm = ui.formatter(b'debugextensions', pycompat.byteskwargs(opts))
1479 1479 for extname, extmod in sorted(exts, key=operator.itemgetter(0)):
1480 1480 isinternal = extensions.ismoduleinternal(extmod)
1481 1481 extsource = None
1482 1482
1483 1483 if hasattr(extmod, '__file__'):
1484 1484 extsource = pycompat.fsencode(extmod.__file__)
1485 1485 elif getattr(sys, 'oxidized', False):
1486 1486 extsource = pycompat.sysexecutable
1487 1487 if isinternal:
1488 1488 exttestedwith = [] # never expose magic string to users
1489 1489 else:
1490 1490 exttestedwith = getattr(extmod, 'testedwith', b'').split()
1491 1491 extbuglink = getattr(extmod, 'buglink', None)
1492 1492
1493 1493 fm.startitem()
1494 1494
1495 1495 if ui.quiet or ui.verbose:
1496 1496 fm.write(b'name', b'%s\n', extname)
1497 1497 else:
1498 1498 fm.write(b'name', b'%s', extname)
1499 1499 if isinternal or hgver in exttestedwith:
1500 1500 fm.plain(b'\n')
1501 1501 elif not exttestedwith:
1502 1502 fm.plain(_(b' (untested!)\n'))
1503 1503 else:
1504 1504 lasttestedversion = exttestedwith[-1]
1505 1505 fm.plain(b' (%s!)\n' % lasttestedversion)
1506 1506
1507 1507 fm.condwrite(
1508 1508 ui.verbose and extsource,
1509 1509 b'source',
1510 1510 _(b' location: %s\n'),
1511 1511 extsource or b"",
1512 1512 )
1513 1513
1514 1514 if ui.verbose:
1515 1515 fm.plain(_(b' bundled: %s\n') % [b'no', b'yes'][isinternal])
1516 1516 fm.data(bundled=isinternal)
1517 1517
1518 1518 fm.condwrite(
1519 1519 ui.verbose and exttestedwith,
1520 1520 b'testedwith',
1521 1521 _(b' tested with: %s\n'),
1522 1522 fm.formatlist(exttestedwith, name=b'ver'),
1523 1523 )
1524 1524
1525 1525 fm.condwrite(
1526 1526 ui.verbose and extbuglink,
1527 1527 b'buglink',
1528 1528 _(b' bug reporting: %s\n'),
1529 1529 extbuglink or b"",
1530 1530 )
1531 1531
1532 1532 fm.end()
1533 1533
1534 1534
1535 1535 @command(
1536 1536 b'debugfileset',
1537 1537 [
1538 1538 (
1539 1539 b'r',
1540 1540 b'rev',
1541 1541 b'',
1542 1542 _(b'apply the filespec on this revision'),
1543 1543 _(b'REV'),
1544 1544 ),
1545 1545 (
1546 1546 b'',
1547 1547 b'all-files',
1548 1548 False,
1549 1549 _(b'test files from all revisions and working directory'),
1550 1550 ),
1551 1551 (
1552 1552 b's',
1553 1553 b'show-matcher',
1554 1554 None,
1555 1555 _(b'print internal representation of matcher'),
1556 1556 ),
1557 1557 (
1558 1558 b'p',
1559 1559 b'show-stage',
1560 1560 [],
1561 1561 _(b'print parsed tree at the given stage'),
1562 1562 _(b'NAME'),
1563 1563 ),
1564 1564 ],
1565 1565 _(b'[-r REV] [--all-files] [OPTION]... FILESPEC'),
1566 1566 )
1567 1567 def debugfileset(ui, repo, expr, **opts):
1568 1568 '''parse and apply a fileset specification'''
1569 1569 from . import fileset
1570 1570
1571 1571 fileset.symbols # force import of fileset so we have predicates to optimize
1572 1572
1573 1573 ctx = logcmdutil.revsingle(repo, opts.get('rev'), None)
1574 1574
1575 1575 stages = [
1576 1576 (b'parsed', pycompat.identity),
1577 1577 (b'analyzed', filesetlang.analyze),
1578 1578 (b'optimized', filesetlang.optimize),
1579 1579 ]
1580 1580 stagenames = {n for n, f in stages}
1581 1581
1582 1582 showalways = set()
1583 1583 if ui.verbose and not opts['show_stage']:
1584 1584 # show parsed tree by --verbose (deprecated)
1585 1585 showalways.add(b'parsed')
1586 1586 if opts['show_stage'] == [b'all']:
1587 1587 showalways.update(stagenames)
1588 1588 else:
1589 1589 for n in opts['show_stage']:
1590 1590 if n not in stagenames:
1591 1591 raise error.Abort(_(b'invalid stage name: %s') % n)
1592 1592 showalways.update(opts['show_stage'])
1593 1593
1594 1594 tree = filesetlang.parse(expr)
1595 1595 for n, f in stages:
1596 1596 tree = f(tree)
1597 1597 if n in showalways:
1598 1598 if opts['show_stage'] or n != b'parsed':
1599 1599 ui.write(b"* %s:\n" % n)
1600 1600 ui.write(filesetlang.prettyformat(tree), b"\n")
1601 1601
1602 1602 files = set()
1603 1603 if opts['all_files']:
1604 1604 for r in repo:
1605 1605 c = repo[r]
1606 1606 files.update(c.files())
1607 1607 files.update(c.substate)
1608 1608 if opts['all_files'] or ctx.rev() is None:
1609 1609 wctx = repo[None]
1610 1610 files.update(
1611 1611 repo.dirstate.walk(
1612 1612 scmutil.matchall(repo),
1613 1613 subrepos=list(wctx.substate),
1614 1614 unknown=True,
1615 1615 ignored=True,
1616 1616 )
1617 1617 )
1618 1618 files.update(wctx.substate)
1619 1619 else:
1620 1620 files.update(ctx.files())
1621 1621 files.update(ctx.substate)
1622 1622
1623 1623 m = ctx.matchfileset(repo.getcwd(), expr)
1624 1624 if opts['show_matcher'] or (opts['show_matcher'] is None and ui.verbose):
1625 1625 ui.writenoi18n(b'* matcher:\n', stringutil.prettyrepr(m), b'\n')
1626 1626 for f in sorted(files):
1627 1627 if not m(f):
1628 1628 continue
1629 1629 ui.write(b"%s\n" % f)
1630 1630
1631 1631
1632 1632 @command(
1633 1633 b"debug-repair-issue6528",
1634 1634 [
1635 1635 (
1636 1636 b'',
1637 1637 b'to-report',
1638 1638 b'',
1639 1639 _(b'build a report of affected revisions to this file'),
1640 1640 _(b'FILE'),
1641 1641 ),
1642 1642 (
1643 1643 b'',
1644 1644 b'from-report',
1645 1645 b'',
1646 1646 _(b'repair revisions listed in this report file'),
1647 1647 _(b'FILE'),
1648 1648 ),
1649 1649 (
1650 1650 b'',
1651 1651 b'paranoid',
1652 1652 False,
1653 1653 _(b'check that both detection methods do the same thing'),
1654 1654 ),
1655 1655 ]
1656 1656 + cmdutil.dryrunopts,
1657 1657 )
1658 1658 def debug_repair_issue6528(ui, repo, **opts):
1659 1659 """find affected revisions and repair them. See issue6528 for more details.
1660 1660
1661 1661 The `--to-report` and `--from-report` flags allow you to cache and reuse the
1662 1662 computation of affected revisions for a given repository across clones.
1663 1663 The report format is line-based (with empty lines ignored):
1664 1664
1665 1665 ```
1666 1666 <ascii-hex of the affected revision>,... <unencoded filelog index filename>
1667 1667 ```
1668 1668
1669 1669 There can be multiple broken revisions per filelog, they are separated by
1670 1670 a comma with no spaces. The only space is between the revision(s) and the
1671 1671 filename.
1672 1672
1673 1673 Note that this does *not* mean that this repairs future affected revisions,
1674 1674 that needs a separate fix at the exchange level that was introduced in
1675 1675 Mercurial 5.9.1.
1676 1676
1677 1677 There is a `--paranoid` flag to test that the fast implementation is correct
1678 1678 by checking it against the slow implementation. Since this matter is quite
1679 1679 urgent and testing every edge-case is probably quite costly, we use this
1680 1680 method to test on large repositories as a fuzzing method of sorts.
1681 1681 """
1682 1682 cmdutil.check_incompatible_arguments(
1683 1683 opts, 'to_report', ['from_report', 'dry_run']
1684 1684 )
1685 1685 dry_run = opts.get('dry_run')
1686 1686 to_report = opts.get('to_report')
1687 1687 from_report = opts.get('from_report')
1688 1688 paranoid = opts.get('paranoid')
1689 1689 # TODO maybe add filelog pattern and revision pattern parameters to help
1690 1690 # narrow down the search for users that know what they're looking for?
1691 1691
1692 1692 if requirements.REVLOGV1_REQUIREMENT not in repo.requirements:
1693 1693 msg = b"can only repair revlogv1 repositories, v2 is not affected"
1694 1694 raise error.Abort(_(msg))
1695 1695
1696 1696 rewrite.repair_issue6528(
1697 1697 ui,
1698 1698 repo,
1699 1699 dry_run=dry_run,
1700 1700 to_report=to_report,
1701 1701 from_report=from_report,
1702 1702 paranoid=paranoid,
1703 1703 )
1704 1704
1705 1705
1706 1706 @command(b'debugformat', [] + cmdutil.formatteropts)
1707 1707 def debugformat(ui, repo, **opts):
1708 1708 """display format information about the current repository
1709 1709
1710 1710 Use --verbose to get extra information about current config value and
1711 1711 Mercurial default."""
1712 1712 maxvariantlength = max(len(fv.name) for fv in upgrade.allformatvariant)
1713 1713 maxvariantlength = max(len(b'format-variant'), maxvariantlength)
1714 1714
1715 1715 def makeformatname(name):
1716 1716 return b'%s:' + (b' ' * (maxvariantlength - len(name)))
1717 1717
1718 1718 fm = ui.formatter(b'debugformat', pycompat.byteskwargs(opts))
1719 1719 if fm.isplain():
1720 1720
1721 1721 def formatvalue(value):
1722 1722 if hasattr(value, 'startswith'):
1723 1723 return value
1724 1724 if value:
1725 1725 return b'yes'
1726 1726 else:
1727 1727 return b'no'
1728 1728
1729 1729 else:
1730 1730 formatvalue = pycompat.identity
1731 1731
1732 1732 fm.plain(b'format-variant')
1733 1733 fm.plain(b' ' * (maxvariantlength - len(b'format-variant')))
1734 1734 fm.plain(b' repo')
1735 1735 if ui.verbose:
1736 1736 fm.plain(b' config default')
1737 1737 fm.plain(b'\n')
1738 1738 for fv in upgrade.allformatvariant:
1739 1739 fm.startitem()
1740 1740 repovalue = fv.fromrepo(repo)
1741 1741 configvalue = fv.fromconfig(repo)
1742 1742
1743 1743 if repovalue != configvalue:
1744 1744 namelabel = b'formatvariant.name.mismatchconfig'
1745 1745 repolabel = b'formatvariant.repo.mismatchconfig'
1746 1746 elif repovalue != fv.default:
1747 1747 namelabel = b'formatvariant.name.mismatchdefault'
1748 1748 repolabel = b'formatvariant.repo.mismatchdefault'
1749 1749 else:
1750 1750 namelabel = b'formatvariant.name.uptodate'
1751 1751 repolabel = b'formatvariant.repo.uptodate'
1752 1752
1753 1753 fm.write(b'name', makeformatname(fv.name), fv.name, label=namelabel)
1754 1754 fm.write(b'repo', b' %3s', formatvalue(repovalue), label=repolabel)
1755 1755 if fv.default != configvalue:
1756 1756 configlabel = b'formatvariant.config.special'
1757 1757 else:
1758 1758 configlabel = b'formatvariant.config.default'
1759 1759 fm.condwrite(
1760 1760 ui.verbose,
1761 1761 b'config',
1762 1762 b' %6s',
1763 1763 formatvalue(configvalue),
1764 1764 label=configlabel,
1765 1765 )
1766 1766 fm.condwrite(
1767 1767 ui.verbose,
1768 1768 b'default',
1769 1769 b' %7s',
1770 1770 formatvalue(fv.default),
1771 1771 label=b'formatvariant.default',
1772 1772 )
1773 1773 fm.plain(b'\n')
1774 1774 fm.end()
1775 1775
1776 1776
1777 1777 @command(b'debugfsinfo', [], _(b'[PATH]'), norepo=True)
1778 1778 def debugfsinfo(ui, path=b"."):
1779 1779 """show information detected about current filesystem"""
1780 1780 ui.writenoi18n(b'path: %s\n' % path)
1781 1781 ui.writenoi18n(
1782 1782 b'mounted on: %s\n' % (util.getfsmountpoint(path) or b'(unknown)')
1783 1783 )
1784 1784 ui.writenoi18n(b'exec: %s\n' % (util.checkexec(path) and b'yes' or b'no'))
1785 1785 ui.writenoi18n(b'fstype: %s\n' % (util.getfstype(path) or b'(unknown)'))
1786 1786 ui.writenoi18n(
1787 1787 b'symlink: %s\n' % (util.checklink(path) and b'yes' or b'no')
1788 1788 )
1789 1789 ui.writenoi18n(
1790 1790 b'hardlink: %s\n' % (util.checknlink(path) and b'yes' or b'no')
1791 1791 )
1792 1792 casesensitive = b'(unknown)'
1793 1793 try:
1794 1794 with pycompat.namedtempfile(prefix=b'.debugfsinfo', dir=path) as f:
1795 1795 casesensitive = util.fscasesensitive(f.name) and b'yes' or b'no'
1796 1796 except OSError:
1797 1797 pass
1798 1798 ui.writenoi18n(b'case-sensitive: %s\n' % casesensitive)
1799 1799
1800 1800
1801 1801 @command(
1802 1802 b'debuggetbundle',
1803 1803 [
1804 1804 (b'H', b'head', [], _(b'id of head node'), _(b'ID')),
1805 1805 (b'C', b'common', [], _(b'id of common node'), _(b'ID')),
1806 1806 (
1807 1807 b't',
1808 1808 b'type',
1809 1809 b'bzip2',
1810 1810 _(b'bundle compression type to use'),
1811 1811 _(b'TYPE'),
1812 1812 ),
1813 1813 ],
1814 1814 _(b'REPO FILE [-H|-C ID]...'),
1815 1815 norepo=True,
1816 1816 )
1817 1817 def debuggetbundle(ui, repopath, bundlepath, head=None, common=None, **opts):
1818 1818 """retrieves a bundle from a repo
1819 1819
1820 1820 Every ID must be a full-length hex node id string. Saves the bundle to the
1821 1821 given file.
1822 1822 """
1823 1823 repo = hg.peer(ui, pycompat.byteskwargs(opts), repopath)
1824 1824 if not repo.capable(b'getbundle'):
1825 1825 raise error.Abort(b"getbundle() not supported by target repository")
1826 1826 args = {}
1827 1827 if common:
1828 1828 args['common'] = [bin(s) for s in common]
1829 1829 if head:
1830 1830 args['heads'] = [bin(s) for s in head]
1831 1831 # TODO: get desired bundlecaps from command line.
1832 1832 args['bundlecaps'] = None
1833 1833 bundle = repo.getbundle(b'debug', **args)
1834 1834
1835 1835 bundletype = opts.get('type', b'bzip2').lower()
1836 1836 btypes = {
1837 1837 b'none': b'HG10UN',
1838 1838 b'bzip2': b'HG10BZ',
1839 1839 b'gzip': b'HG10GZ',
1840 1840 b'bundle2': b'HG20',
1841 1841 }
1842 1842 bundletype = btypes.get(bundletype)
1843 1843 if bundletype not in bundle2.bundletypes:
1844 1844 raise error.Abort(_(b'unknown bundle type specified with --type'))
1845 1845 bundle2.writebundle(ui, bundle, bundlepath, bundletype)
1846 1846
1847 1847
1848 1848 @command(b'debugignore', [], b'[FILE]...')
1849 1849 def debugignore(ui, repo, *files, **opts):
1850 1850 """display the combined ignore pattern and information about ignored files
1851 1851
1852 1852 With no argument display the combined ignore pattern.
1853 1853
1854 1854 Given space separated file names, shows if the given file is ignored and
1855 1855 if so, show the ignore rule (file and line number) that matched it.
1856 1856 """
1857 1857 ignore = repo.dirstate._ignore
1858 1858 if not files:
1859 1859 # Show all the patterns
1860 1860 ui.write(b"%s\n" % pycompat.byterepr(ignore))
1861 1861 else:
1862 1862 m = scmutil.match(repo[None], pats=files)
1863 1863 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
1864 1864 for f in m.files():
1865 1865 nf = util.normpath(f)
1866 1866 ignored = None
1867 1867 ignoredata = None
1868 1868 if nf != b'.':
1869 1869 if ignore(nf):
1870 1870 ignored = nf
1871 1871 ignoredata = repo.dirstate._ignorefileandline(nf)
1872 1872 else:
1873 1873 for p in pathutil.finddirs(nf):
1874 1874 if ignore(p):
1875 1875 ignored = p
1876 1876 ignoredata = repo.dirstate._ignorefileandline(p)
1877 1877 break
1878 1878 if ignored:
1879 1879 if ignored == nf:
1880 1880 ui.write(_(b"%s is ignored\n") % uipathfn(f))
1881 1881 else:
1882 1882 ui.write(
1883 1883 _(
1884 1884 b"%s is ignored because of "
1885 1885 b"containing directory %s\n"
1886 1886 )
1887 1887 % (uipathfn(f), ignored)
1888 1888 )
1889 1889 ignorefile, lineno, line = ignoredata
1890 1890 ui.write(
1891 1891 _(b"(ignore rule in %s, line %d: '%s')\n")
1892 1892 % (ignorefile, lineno, line)
1893 1893 )
1894 1894 else:
1895 1895 ui.write(_(b"%s is not ignored\n") % uipathfn(f))
1896 1896
1897 1897
1898 1898 @command(
1899 1899 b'debug-revlog-index|debugindex',
1900 1900 cmdutil.debugrevlogopts + cmdutil.formatteropts,
1901 1901 _(b'-c|-m|FILE'),
1902 1902 )
1903 1903 def debugindex(ui, repo, file_=None, **opts):
1904 1904 """dump index data for a revlog"""
1905 1905 opts = pycompat.byteskwargs(opts)
1906 1906 store = cmdutil.openstorage(repo, b'debugindex', file_, opts)
1907 1907
1908 1908 fm = ui.formatter(b'debugindex', opts)
1909 1909
1910 1910 revlog = getattr(store, '_revlog', store)
1911 1911
1912 1912 return revlog_debug.debug_index(
1913 1913 ui,
1914 1914 repo,
1915 1915 formatter=fm,
1916 1916 revlog=revlog,
1917 1917 full_node=ui.debugflag,
1918 1918 )
1919 1919
1920 1920
1921 1921 @command(
1922 1922 b'debugindexdot',
1923 1923 cmdutil.debugrevlogopts,
1924 1924 _(b'-c|-m|FILE'),
1925 1925 optionalrepo=True,
1926 1926 )
1927 1927 def debugindexdot(ui, repo, file_=None, **opts):
1928 1928 """dump an index DAG as a graphviz dot file"""
1929 1929 r = cmdutil.openstorage(
1930 1930 repo, b'debugindexdot', file_, pycompat.byteskwargs(opts)
1931 1931 )
1932 1932 ui.writenoi18n(b"digraph G {\n")
1933 1933 for i in r:
1934 1934 node = r.node(i)
1935 1935 pp = r.parents(node)
1936 1936 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
1937 1937 if pp[1] != repo.nullid:
1938 1938 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
1939 1939 ui.write(b"}\n")
1940 1940
1941 1941
1942 1942 @command(b'debugindexstats', [])
1943 1943 def debugindexstats(ui, repo):
1944 1944 """show stats related to the changelog index"""
1945 1945 repo.changelog.shortest(repo.nullid, 1)
1946 1946 index = repo.changelog.index
1947 1947 if not hasattr(index, 'stats'):
1948 1948 raise error.Abort(_(b'debugindexstats only works with native code'))
1949 1949 for k, v in sorted(index.stats().items()):
1950 1950 ui.write(b'%s: %d\n' % (k, v))
1951 1951
1952 1952
1953 1953 @command(b'debuginstall', [] + cmdutil.formatteropts, b'', norepo=True)
1954 1954 def debuginstall(ui, **opts):
1955 1955 """test Mercurial installation
1956 1956
1957 1957 Returns 0 on success.
1958 1958 """
1959 1959 problems = 0
1960 1960
1961 1961 fm = ui.formatter(b'debuginstall', pycompat.byteskwargs(opts))
1962 1962 fm.startitem()
1963 1963
1964 1964 # encoding might be unknown or wrong. don't translate these messages.
1965 1965 fm.write(b'encoding', b"checking encoding (%s)...\n", encoding.encoding)
1966 1966 err = None
1967 1967 try:
1968 1968 codecs.lookup(pycompat.sysstr(encoding.encoding))
1969 1969 except LookupError as inst:
1970 1970 err = stringutil.forcebytestr(inst)
1971 1971 problems += 1
1972 1972 fm.condwrite(
1973 1973 err,
1974 1974 b'encodingerror',
1975 1975 b" %s\n (check that your locale is properly set)\n",
1976 1976 err,
1977 1977 )
1978 1978
1979 1979 # Python
1980 1980 pythonlib = None
1981 1981 if hasattr(os, '__file__'):
1982 1982 pythonlib = os.path.dirname(pycompat.fsencode(os.__file__))
1983 1983 elif getattr(sys, 'oxidized', False):
1984 1984 pythonlib = pycompat.sysexecutable
1985 1985
1986 1986 fm.write(
1987 1987 b'pythonexe',
1988 1988 _(b"checking Python executable (%s)\n"),
1989 1989 pycompat.sysexecutable or _(b"unknown"),
1990 1990 )
1991 1991 fm.write(
1992 1992 b'pythonimplementation',
1993 1993 _(b"checking Python implementation (%s)\n"),
1994 1994 pycompat.sysbytes(platform.python_implementation()),
1995 1995 )
1996 1996 fm.write(
1997 1997 b'pythonver',
1998 1998 _(b"checking Python version (%s)\n"),
1999 1999 (b"%d.%d.%d" % sys.version_info[:3]),
2000 2000 )
2001 2001 fm.write(
2002 2002 b'pythonlib',
2003 2003 _(b"checking Python lib (%s)...\n"),
2004 2004 pythonlib or _(b"unknown"),
2005 2005 )
2006 2006
2007 2007 try:
2008 2008 from . import rustext # pytype: disable=import-error
2009 2009
2010 2010 rustext.__doc__ # trigger lazy import
2011 2011 except ImportError:
2012 2012 rustext = None
2013 2013
2014 2014 security = set(sslutil.supportedprotocols)
2015 2015 if sslutil.hassni:
2016 2016 security.add(b'sni')
2017 2017
2018 2018 fm.write(
2019 2019 b'pythonsecurity',
2020 2020 _(b"checking Python security support (%s)\n"),
2021 2021 fm.formatlist(sorted(security), name=b'protocol', fmt=b'%s', sep=b','),
2022 2022 )
2023 2023
2024 2024 # These are warnings, not errors. So don't increment problem count. This
2025 2025 # may change in the future.
2026 2026 if b'tls1.2' not in security:
2027 2027 fm.plain(
2028 2028 _(
2029 2029 b' TLS 1.2 not supported by Python install; '
2030 2030 b'network connections lack modern security\n'
2031 2031 )
2032 2032 )
2033 2033 if b'sni' not in security:
2034 2034 fm.plain(
2035 2035 _(
2036 2036 b' SNI not supported by Python install; may have '
2037 2037 b'connectivity issues with some servers\n'
2038 2038 )
2039 2039 )
2040 2040
2041 2041 fm.plain(
2042 2042 _(
2043 2043 b"checking Rust extensions (%s)\n"
2044 2044 % (b'missing' if rustext is None else b'installed')
2045 2045 ),
2046 2046 )
2047 2047
2048 2048 # TODO print CA cert info
2049 2049
2050 2050 # hg version
2051 2051 hgver = util.version()
2052 2052 fm.write(
2053 2053 b'hgver', _(b"checking Mercurial version (%s)\n"), hgver.split(b'+')[0]
2054 2054 )
2055 2055 fm.write(
2056 2056 b'hgverextra',
2057 2057 _(b"checking Mercurial custom build (%s)\n"),
2058 2058 b'+'.join(hgver.split(b'+')[1:]),
2059 2059 )
2060 2060
2061 2061 # compiled modules
2062 2062 hgmodules = None
2063 2063 if hasattr(sys.modules[__name__], '__file__'):
2064 2064 hgmodules = os.path.dirname(pycompat.fsencode(__file__))
2065 2065 elif getattr(sys, 'oxidized', False):
2066 2066 hgmodules = pycompat.sysexecutable
2067 2067
2068 2068 fm.write(
2069 2069 b'hgmodulepolicy', _(b"checking module policy (%s)\n"), policy.policy
2070 2070 )
2071 2071 fm.write(
2072 2072 b'hgmodules',
2073 2073 _(b"checking installed modules (%s)...\n"),
2074 2074 hgmodules or _(b"unknown"),
2075 2075 )
2076 2076
2077 2077 rustandc = policy.policy in (b'rust+c', b'rust+c-allow')
2078 2078 rustext = rustandc # for now, that's the only case
2079 2079 cext = policy.policy in (b'c', b'allow') or rustandc
2080 2080 nopure = cext or rustext
2081 2081 if nopure:
2082 2082 err = None
2083 2083 try:
2084 2084 if cext:
2085 2085 from .cext import ( # pytype: disable=import-error
2086 2086 base85,
2087 2087 bdiff,
2088 2088 mpatch,
2089 2089 osutil,
2090 2090 )
2091 2091
2092 2092 # quiet pyflakes
2093 2093 dir(bdiff), dir(mpatch), dir(base85), dir(osutil)
2094 2094 if rustext:
2095 2095 from .rustext import ( # pytype: disable=import-error
2096 2096 ancestor,
2097 2097 dirstate,
2098 2098 )
2099 2099
2100 2100 dir(ancestor), dir(dirstate) # quiet pyflakes
2101 2101 except Exception as inst:
2102 2102 err = stringutil.forcebytestr(inst)
2103 2103 problems += 1
2104 2104 fm.condwrite(err, b'extensionserror', b" %s\n", err)
2105 2105
2106 2106 compengines = util.compengines._engines.values()
2107 2107 fm.write(
2108 2108 b'compengines',
2109 2109 _(b'checking registered compression engines (%s)\n'),
2110 2110 fm.formatlist(
2111 2111 sorted(e.name() for e in compengines),
2112 2112 name=b'compengine',
2113 2113 fmt=b'%s',
2114 2114 sep=b', ',
2115 2115 ),
2116 2116 )
2117 2117 fm.write(
2118 2118 b'compenginesavail',
2119 2119 _(b'checking available compression engines (%s)\n'),
2120 2120 fm.formatlist(
2121 2121 sorted(e.name() for e in compengines if e.available()),
2122 2122 name=b'compengine',
2123 2123 fmt=b'%s',
2124 2124 sep=b', ',
2125 2125 ),
2126 2126 )
2127 2127 wirecompengines = compression.compengines.supportedwireengines(
2128 2128 compression.SERVERROLE
2129 2129 )
2130 2130 fm.write(
2131 2131 b'compenginesserver',
2132 2132 _(
2133 2133 b'checking available compression engines '
2134 2134 b'for wire protocol (%s)\n'
2135 2135 ),
2136 2136 fm.formatlist(
2137 2137 [e.name() for e in wirecompengines if e.wireprotosupport()],
2138 2138 name=b'compengine',
2139 2139 fmt=b'%s',
2140 2140 sep=b', ',
2141 2141 ),
2142 2142 )
2143 2143 re2 = b'missing'
2144 2144 if util.has_re2():
2145 2145 re2 = b'available'
2146 2146 fm.plain(_(b'checking "re2" regexp engine (%s)\n') % re2)
2147 2147 fm.data(re2=bool(util._re2))
2148 2148
2149 2149 # templates
2150 2150 p = templater.templatedir()
2151 2151 fm.write(b'templatedirs', b'checking templates (%s)...\n', p or b'')
2152 2152 fm.condwrite(not p, b'', _(b" no template directories found\n"))
2153 2153 if p:
2154 2154 (m, fp) = templater.try_open_template(b"map-cmdline.default")
2155 2155 if m:
2156 2156 # template found, check if it is working
2157 2157 err = None
2158 2158 try:
2159 2159 templater.templater.frommapfile(m)
2160 2160 except Exception as inst:
2161 2161 err = stringutil.forcebytestr(inst)
2162 2162 p = None
2163 2163 fm.condwrite(err, b'defaulttemplateerror', b" %s\n", err)
2164 2164 else:
2165 2165 p = None
2166 2166 fm.condwrite(
2167 2167 p, b'defaulttemplate', _(b"checking default template (%s)\n"), m
2168 2168 )
2169 2169 fm.condwrite(
2170 2170 not m,
2171 2171 b'defaulttemplatenotfound',
2172 2172 _(b" template '%s' not found\n"),
2173 2173 b"default",
2174 2174 )
2175 2175 if not p:
2176 2176 problems += 1
2177 2177 fm.condwrite(
2178 2178 not p, b'', _(b" (templates seem to have been installed incorrectly)\n")
2179 2179 )
2180 2180
2181 2181 # editor
2182 2182 editor = ui.geteditor()
2183 2183 editor = util.expandpath(editor)
2184 2184 editorbin = procutil.shellsplit(editor)[0]
2185 2185 fm.write(b'editor', _(b"checking commit editor... (%s)\n"), editorbin)
2186 2186 cmdpath = procutil.findexe(editorbin)
2187 2187 fm.condwrite(
2188 2188 not cmdpath and editor == b'vi',
2189 2189 b'vinotfound',
2190 2190 _(
2191 2191 b" No commit editor set and can't find %s in PATH\n"
2192 2192 b" (specify a commit editor in your configuration"
2193 2193 b" file)\n"
2194 2194 ),
2195 2195 not cmdpath and editor == b'vi' and editorbin,
2196 2196 )
2197 2197 fm.condwrite(
2198 2198 not cmdpath and editor != b'vi',
2199 2199 b'editornotfound',
2200 2200 _(
2201 2201 b" Can't find editor '%s' in PATH\n"
2202 2202 b" (specify a commit editor in your configuration"
2203 2203 b" file)\n"
2204 2204 ),
2205 2205 not cmdpath and editorbin,
2206 2206 )
2207 2207 if not cmdpath and editor != b'vi':
2208 2208 problems += 1
2209 2209
2210 2210 # check username
2211 2211 username = None
2212 2212 err = None
2213 2213 try:
2214 2214 username = ui.username()
2215 2215 except error.Abort as e:
2216 2216 err = e.message
2217 2217 problems += 1
2218 2218
2219 2219 fm.condwrite(
2220 2220 username, b'username', _(b"checking username (%s)\n"), username
2221 2221 )
2222 2222 fm.condwrite(
2223 2223 err,
2224 2224 b'usernameerror',
2225 2225 _(
2226 2226 b"checking username...\n %s\n"
2227 2227 b" (specify a username in your configuration file)\n"
2228 2228 ),
2229 2229 err,
2230 2230 )
2231 2231
2232 2232 for name, mod in extensions.extensions():
2233 2233 handler = getattr(mod, 'debuginstall', None)
2234 2234 if handler is not None:
2235 2235 problems += handler(ui, fm)
2236 2236
2237 2237 fm.condwrite(not problems, b'', _(b"no problems detected\n"))
2238 2238 if not problems:
2239 2239 fm.data(problems=problems)
2240 2240 fm.condwrite(
2241 2241 problems,
2242 2242 b'problems',
2243 2243 _(b"%d problems detected, please check your install!\n"),
2244 2244 problems,
2245 2245 )
2246 2246 fm.end()
2247 2247
2248 2248 return problems
2249 2249
2250 2250
2251 2251 @command(b'debugknown', [], _(b'REPO ID...'), norepo=True)
2252 2252 def debugknown(ui, repopath, *ids, **opts):
2253 2253 """test whether node ids are known to a repo
2254 2254
2255 2255 Every ID must be a full-length hex node id string. Returns a list of 0s
2256 2256 and 1s indicating unknown/known.
2257 2257 """
2258 2258 repo = hg.peer(ui, pycompat.byteskwargs(opts), repopath)
2259 2259 if not repo.capable(b'known'):
2260 2260 raise error.Abort(b"known() not supported by target repository")
2261 2261 flags = repo.known([bin(s) for s in ids])
2262 2262 ui.write(b"%s\n" % (b"".join([f and b"1" or b"0" for f in flags])))
2263 2263
2264 2264
2265 2265 @command(b'debuglabelcomplete', [], _(b'LABEL...'))
2266 2266 def debuglabelcomplete(ui, repo, *args):
2267 2267 '''backwards compatibility with old bash completion scripts (DEPRECATED)'''
2268 2268 debugnamecomplete(ui, repo, *args)
2269 2269
2270 2270
2271 2271 @command(
2272 2272 b'debuglocks',
2273 2273 [
2274 2274 (b'L', b'force-free-lock', None, _(b'free the store lock (DANGEROUS)')),
2275 2275 (
2276 2276 b'W',
2277 2277 b'force-free-wlock',
2278 2278 None,
2279 2279 _(b'free the working state lock (DANGEROUS)'),
2280 2280 ),
2281 2281 (b's', b'set-lock', None, _(b'set the store lock until stopped')),
2282 2282 (
2283 2283 b'S',
2284 2284 b'set-wlock',
2285 2285 None,
2286 2286 _(b'set the working state lock until stopped'),
2287 2287 ),
2288 2288 ],
2289 2289 _(b'[OPTION]...'),
2290 2290 )
2291 2291 def debuglocks(ui, repo, **opts):
2292 2292 """show or modify state of locks
2293 2293
2294 2294 By default, this command will show which locks are held. This
2295 2295 includes the user and process holding the lock, the amount of time
2296 2296 the lock has been held, and the machine name where the process is
2297 2297 running if it's not local.
2298 2298
2299 2299 Locks protect the integrity of Mercurial's data, so should be
2300 2300 treated with care. System crashes or other interruptions may cause
2301 2301 locks to not be properly released, though Mercurial will usually
2302 2302 detect and remove such stale locks automatically.
2303 2303
2304 2304 However, detecting stale locks may not always be possible (for
2305 2305 instance, on a shared filesystem). Removing locks may also be
2306 2306 blocked by filesystem permissions.
2307 2307
2308 2308 Setting a lock will prevent other commands from changing the data.
2309 2309 The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs.
2310 2310 The set locks are removed when the command exits.
2311 2311
2312 2312 Returns 0 if no locks are held.
2313 2313
2314 2314 """
2315 2315
2316 2316 if opts.get('force_free_lock'):
2317 2317 repo.svfs.tryunlink(b'lock')
2318 2318 if opts.get('force_free_wlock'):
2319 2319 repo.vfs.tryunlink(b'wlock')
2320 2320 if opts.get('force_free_lock') or opts.get('force_free_wlock'):
2321 2321 return 0
2322 2322
2323 2323 locks = []
2324 2324 try:
2325 2325 if opts.get('set_wlock'):
2326 2326 try:
2327 2327 locks.append(repo.wlock(False))
2328 2328 except error.LockHeld:
2329 2329 raise error.Abort(_(b'wlock is already held'))
2330 2330 if opts.get('set_lock'):
2331 2331 try:
2332 2332 locks.append(repo.lock(False))
2333 2333 except error.LockHeld:
2334 2334 raise error.Abort(_(b'lock is already held'))
2335 2335 if len(locks):
2336 2336 try:
2337 2337 if ui.interactive():
2338 2338 prompt = _(b"ready to release the lock (y)? $$ &Yes")
2339 2339 ui.promptchoice(prompt)
2340 2340 else:
2341 2341 msg = b"%d locks held, waiting for signal\n"
2342 2342 msg %= len(locks)
2343 2343 ui.status(msg)
2344 2344 while True: # XXX wait for a signal
2345 2345 time.sleep(0.1)
2346 2346 except KeyboardInterrupt:
2347 2347 msg = b"signal-received releasing locks\n"
2348 2348 ui.status(msg)
2349 2349 return 0
2350 2350 finally:
2351 2351 release(*locks)
2352 2352
2353 2353 now = time.time()
2354 2354 held = 0
2355 2355
2356 2356 def report(vfs, name, method):
2357 2357 # this causes stale locks to get reaped for more accurate reporting
2358 2358 try:
2359 2359 l = method(False)
2360 2360 except error.LockHeld:
2361 2361 l = None
2362 2362
2363 2363 if l:
2364 2364 l.release()
2365 2365 else:
2366 2366 try:
2367 2367 st = vfs.lstat(name)
2368 2368 age = now - st[stat.ST_MTIME]
2369 2369 user = util.username(st.st_uid)
2370 2370 locker = vfs.readlock(name)
2371 2371 if b":" in locker:
2372 2372 host, pid = locker.split(b':')
2373 2373 if host == socket.gethostname():
2374 2374 locker = b'user %s, process %s' % (user or b'None', pid)
2375 2375 else:
2376 2376 locker = b'user %s, process %s, host %s' % (
2377 2377 user or b'None',
2378 2378 pid,
2379 2379 host,
2380 2380 )
2381 2381 ui.writenoi18n(b"%-6s %s (%ds)\n" % (name + b":", locker, age))
2382 2382 return 1
2383 2383 except FileNotFoundError:
2384 2384 pass
2385 2385
2386 2386 ui.writenoi18n(b"%-6s free\n" % (name + b":"))
2387 2387 return 0
2388 2388
2389 2389 held += report(repo.svfs, b"lock", repo.lock)
2390 2390 held += report(repo.vfs, b"wlock", repo.wlock)
2391 2391
2392 2392 return held
2393 2393
2394 2394
2395 2395 @command(
2396 2396 b'debugmanifestfulltextcache',
2397 2397 [
2398 2398 (b'', b'clear', False, _(b'clear the cache')),
2399 2399 (
2400 2400 b'a',
2401 2401 b'add',
2402 2402 [],
2403 2403 _(b'add the given manifest nodes to the cache'),
2404 2404 _(b'NODE'),
2405 2405 ),
2406 2406 ],
2407 2407 b'',
2408 2408 )
2409 2409 def debugmanifestfulltextcache(ui, repo, add=(), **opts):
2410 2410 """show, clear or amend the contents of the manifest fulltext cache"""
2411 2411
2412 2412 def getcache():
2413 2413 r = repo.manifestlog.getstorage(b'')
2414 2414 try:
2415 2415 return r._fulltextcache
2416 2416 except AttributeError:
2417 2417 msg = _(
2418 2418 b"Current revlog implementation doesn't appear to have a "
2419 2419 b"manifest fulltext cache\n"
2420 2420 )
2421 2421 raise error.Abort(msg)
2422 2422
2423 2423 if opts.get('clear'):
2424 2424 with repo.wlock():
2425 2425 cache = getcache()
2426 2426 cache.clear(clear_persisted_data=True)
2427 2427 return
2428 2428
2429 2429 if add:
2430 2430 with repo.wlock():
2431 2431 m = repo.manifestlog
2432 2432 store = m.getstorage(b'')
2433 2433 for n in add:
2434 2434 try:
2435 2435 manifest = m[store.lookup(n)]
2436 2436 except error.LookupError as e:
2437 2437 raise error.Abort(
2438 2438 bytes(e), hint=b"Check your manifest node id"
2439 2439 )
2440 2440 manifest.read() # stores revisision in cache too
2441 2441 return
2442 2442
2443 2443 cache = getcache()
2444 2444 if not len(cache):
2445 2445 ui.write(_(b'cache empty\n'))
2446 2446 else:
2447 2447 ui.write(
2448 2448 _(
2449 2449 b'cache contains %d manifest entries, in order of most to '
2450 2450 b'least recent:\n'
2451 2451 )
2452 2452 % (len(cache),)
2453 2453 )
2454 2454 totalsize = 0
2455 2455 for nodeid in cache:
2456 2456 # Use cache.get to not update the LRU order
2457 2457 data = cache.peek(nodeid)
2458 2458 size = len(data)
2459 2459 totalsize += size + 24 # 20 bytes nodeid, 4 bytes size
2460 2460 ui.write(
2461 2461 _(b'id: %s, size %s\n') % (hex(nodeid), util.bytecount(size))
2462 2462 )
2463 2463 ondisk = cache._opener.stat(b'manifestfulltextcache').st_size
2464 2464 ui.write(
2465 2465 _(b'total cache data size %s, on-disk %s\n')
2466 2466 % (util.bytecount(totalsize), util.bytecount(ondisk))
2467 2467 )
2468 2468
2469 2469
2470 2470 @command(b'debugmergestate', [] + cmdutil.templateopts, b'')
2471 2471 def debugmergestate(ui, repo, *args, **opts):
2472 2472 """print merge state
2473 2473
2474 2474 Use --verbose to print out information about whether v1 or v2 merge state
2475 2475 was chosen."""
2476 2476
2477 2477 if ui.verbose:
2478 2478 ms = mergestatemod.mergestate(repo)
2479 2479
2480 2480 # sort so that reasonable information is on top
2481 2481 v1records = ms._readrecordsv1()
2482 2482 v2records = ms._readrecordsv2()
2483 2483
2484 2484 if not v1records and not v2records:
2485 2485 pass
2486 2486 elif not v2records:
2487 2487 ui.writenoi18n(b'no version 2 merge state\n')
2488 2488 elif ms._v1v2match(v1records, v2records):
2489 2489 ui.writenoi18n(b'v1 and v2 states match: using v2\n')
2490 2490 else:
2491 2491 ui.writenoi18n(b'v1 and v2 states mismatch: using v1\n')
2492 2492
2493 2493 if not opts['template']:
2494 2494 opts['template'] = (
2495 2495 b'{if(commits, "", "no merge state found\n")}'
2496 2496 b'{commits % "{name}{if(label, " ({label})")}: {node}\n"}'
2497 2497 b'{files % "file: {path} (state \\"{state}\\")\n'
2498 2498 b'{if(local_path, "'
2499 2499 b' local path: {local_path} (hash {local_key}, flags \\"{local_flags}\\")\n'
2500 2500 b' ancestor path: {ancestor_path} (node {ancestor_node})\n'
2501 2501 b' other path: {other_path} (node {other_node})\n'
2502 2502 b'")}'
2503 2503 b'{if(rename_side, "'
2504 2504 b' rename side: {rename_side}\n'
2505 2505 b' renamed path: {renamed_path}\n'
2506 2506 b'")}'
2507 2507 b'{extras % " extra: {key} = {value}\n"}'
2508 2508 b'"}'
2509 2509 b'{extras % "extra: {file} ({key} = {value})\n"}'
2510 2510 )
2511 2511
2512 2512 ms = mergestatemod.mergestate.read(repo)
2513 2513
2514 2514 fm = ui.formatter(b'debugmergestate', pycompat.byteskwargs(opts))
2515 2515 fm.startitem()
2516 2516
2517 2517 fm_commits = fm.nested(b'commits')
2518 2518 if ms.active():
2519 2519 for name, node, label_index in (
2520 2520 (b'local', ms.local, 0),
2521 2521 (b'other', ms.other, 1),
2522 2522 ):
2523 2523 fm_commits.startitem()
2524 2524 fm_commits.data(name=name)
2525 2525 fm_commits.data(node=hex(node))
2526 2526 if ms._labels and len(ms._labels) > label_index:
2527 2527 fm_commits.data(label=ms._labels[label_index])
2528 2528 fm_commits.end()
2529 2529
2530 2530 fm_files = fm.nested(b'files')
2531 2531 if ms.active():
2532 2532 for f in ms:
2533 2533 fm_files.startitem()
2534 2534 fm_files.data(path=f)
2535 2535 state = ms._state[f]
2536 2536 fm_files.data(state=state[0])
2537 2537 if state[0] in (
2538 2538 mergestatemod.MERGE_RECORD_UNRESOLVED,
2539 2539 mergestatemod.MERGE_RECORD_RESOLVED,
2540 2540 ):
2541 2541 fm_files.data(local_key=state[1])
2542 2542 fm_files.data(local_path=state[2])
2543 2543 fm_files.data(ancestor_path=state[3])
2544 2544 fm_files.data(ancestor_node=state[4])
2545 2545 fm_files.data(other_path=state[5])
2546 2546 fm_files.data(other_node=state[6])
2547 2547 fm_files.data(local_flags=state[7])
2548 2548 elif state[0] in (
2549 2549 mergestatemod.MERGE_RECORD_UNRESOLVED_PATH,
2550 2550 mergestatemod.MERGE_RECORD_RESOLVED_PATH,
2551 2551 ):
2552 2552 fm_files.data(renamed_path=state[1])
2553 2553 fm_files.data(rename_side=state[2])
2554 2554 fm_extras = fm_files.nested(b'extras')
2555 2555 for k, v in sorted(ms.extras(f).items()):
2556 2556 fm_extras.startitem()
2557 2557 fm_extras.data(key=k)
2558 2558 fm_extras.data(value=v)
2559 2559 fm_extras.end()
2560 2560
2561 2561 fm_files.end()
2562 2562
2563 2563 fm_extras = fm.nested(b'extras')
2564 2564 for f, d in sorted(ms.allextras().items()):
2565 2565 if f in ms:
2566 2566 # If file is in mergestate, we have already processed it's extras
2567 2567 continue
2568 2568 for k, v in d.items():
2569 2569 fm_extras.startitem()
2570 2570 fm_extras.data(file=f)
2571 2571 fm_extras.data(key=k)
2572 2572 fm_extras.data(value=v)
2573 2573 fm_extras.end()
2574 2574
2575 2575 fm.end()
2576 2576
2577 2577
2578 2578 @command(b'debugnamecomplete', [], _(b'NAME...'))
2579 2579 def debugnamecomplete(ui, repo, *args):
2580 2580 '''complete "names" - tags, open branch names, bookmark names'''
2581 2581
2582 2582 names = set()
2583 2583 # since we previously only listed open branches, we will handle that
2584 2584 # specially (after this for loop)
2585 2585 for name, ns in repo.names.items():
2586 2586 if name != b'branches':
2587 2587 names.update(ns.listnames(repo))
2588 2588 names.update(
2589 2589 tag
2590 2590 for (tag, heads, tip, closed) in repo.branchmap().iterbranches()
2591 2591 if not closed
2592 2592 )
2593 2593 completions = set()
2594 2594 if not args:
2595 2595 args = [b'']
2596 2596 for a in args:
2597 2597 completions.update(n for n in names if n.startswith(a))
2598 2598 ui.write(b'\n'.join(sorted(completions)))
2599 2599 ui.write(b'\n')
2600 2600
2601 2601
2602 2602 @command(
2603 2603 b'debugnodemap',
2604 2604 (
2605 2605 cmdutil.debugrevlogopts
2606 2606 + [
2607 2607 (
2608 2608 b'',
2609 2609 b'dump-new',
2610 2610 False,
2611 2611 _(b'write a (new) persistent binary nodemap on stdout'),
2612 2612 ),
2613 2613 (b'', b'dump-disk', False, _(b'dump on-disk data on stdout')),
2614 2614 (
2615 2615 b'',
2616 2616 b'check',
2617 2617 False,
2618 2618 _(b'check that the data on disk data are correct.'),
2619 2619 ),
2620 2620 (
2621 2621 b'',
2622 2622 b'metadata',
2623 2623 False,
2624 2624 _(b'display the on disk meta data for the nodemap'),
2625 2625 ),
2626 2626 ]
2627 2627 ),
2628 2628 _(b'-c|-m|FILE'),
2629 2629 )
2630 2630 def debugnodemap(ui, repo, file_=None, **opts):
2631 2631 """write and inspect on disk nodemap"""
2632 2632 if opts.get('changelog') or opts.get('manifest') or opts.get('dir'):
2633 2633 if file_ is not None:
2634 2634 raise error.InputError(
2635 2635 _(b'cannot specify a file with other arguments')
2636 2636 )
2637 2637 elif file_ is None:
2638 2638 opts['changelog'] = True
2639 2639 r = cmdutil.openstorage(
2640 2640 repo.unfiltered(), b'debugnodemap', file_, pycompat.byteskwargs(opts)
2641 2641 )
2642 2642 if isinstance(r, (manifest.manifestrevlog, filelog.filelog)):
2643 2643 r = r._revlog
2644 2644 if opts['dump_new']:
2645 2645 if hasattr(r.index, "nodemap_data_all"):
2646 2646 data = r.index.nodemap_data_all()
2647 2647 else:
2648 2648 data = nodemap.persistent_data(r.index)
2649 2649 ui.write(data)
2650 2650 elif opts['dump_disk']:
2651 2651 nm_data = nodemap.persisted_data(r)
2652 2652 if nm_data is not None:
2653 2653 docket, data = nm_data
2654 2654 ui.write(data[:])
2655 2655 elif opts['check']:
2656 2656 nm_data = nodemap.persisted_data(r)
2657 2657 if nm_data is not None:
2658 2658 docket, data = nm_data
2659 2659 return nodemap.check_data(ui, r.index, data)
2660 2660 elif opts['metadata']:
2661 2661 nm_data = nodemap.persisted_data(r)
2662 2662 if nm_data is not None:
2663 2663 docket, data = nm_data
2664 2664 ui.write((b"uid: %s\n") % docket.uid)
2665 2665 ui.write((b"tip-rev: %d\n") % docket.tip_rev)
2666 2666 ui.write((b"tip-node: %s\n") % hex(docket.tip_node))
2667 2667 ui.write((b"data-length: %d\n") % docket.data_length)
2668 2668 ui.write((b"data-unused: %d\n") % docket.data_unused)
2669 2669 unused_perc = docket.data_unused * 100.0 / docket.data_length
2670 2670 ui.write((b"data-unused: %2.3f%%\n") % unused_perc)
2671 2671
2672 2672
2673 2673 @command(
2674 2674 b'debugobsolete',
2675 2675 [
2676 2676 (b'', b'flags', 0, _(b'markers flag')),
2677 2677 (
2678 2678 b'',
2679 2679 b'record-parents',
2680 2680 False,
2681 2681 _(b'record parent information for the precursor'),
2682 2682 ),
2683 2683 (b'r', b'rev', [], _(b'display markers relevant to REV')),
2684 2684 (
2685 2685 b'',
2686 2686 b'exclusive',
2687 2687 False,
2688 2688 _(b'restrict display to markers only relevant to REV'),
2689 2689 ),
2690 2690 (b'', b'index', False, _(b'display index of the marker')),
2691 2691 (b'', b'delete', [], _(b'delete markers specified by indices')),
2692 2692 ]
2693 2693 + cmdutil.commitopts2
2694 2694 + cmdutil.formatteropts,
2695 2695 _(b'[OBSOLETED [REPLACEMENT ...]]'),
2696 2696 )
2697 2697 def debugobsolete(ui, repo, precursor=None, *successors, **opts):
2698 2698 """create arbitrary obsolete marker
2699 2699
2700 2700 With no arguments, displays the list of obsolescence markers."""
2701 2701
2702 2702 def parsenodeid(s):
2703 2703 try:
2704 2704 # We do not use revsingle/revrange functions here to accept
2705 2705 # arbitrary node identifiers, possibly not present in the
2706 2706 # local repository.
2707 2707 n = bin(s)
2708 2708 if len(n) != repo.nodeconstants.nodelen:
2709 2709 raise ValueError
2710 2710 return n
2711 2711 except ValueError:
2712 2712 raise error.InputError(
2713 2713 b'changeset references must be full hexadecimal '
2714 2714 b'node identifiers'
2715 2715 )
2716 2716
2717 2717 if opts.get('delete'):
2718 2718 indices = []
2719 2719 for v in opts.get('delete'):
2720 2720 try:
2721 2721 indices.append(int(v))
2722 2722 except ValueError:
2723 2723 raise error.InputError(
2724 2724 _(b'invalid index value: %r') % v,
2725 2725 hint=_(b'use integers for indices'),
2726 2726 )
2727 2727
2728 2728 if repo.currenttransaction():
2729 2729 raise error.Abort(
2730 2730 _(b'cannot delete obsmarkers in the middle of transaction.')
2731 2731 )
2732 2732
2733 2733 with repo.lock():
2734 2734 n = repair.deleteobsmarkers(repo.obsstore, indices)
2735 2735 ui.write(_(b'deleted %i obsolescence markers\n') % n)
2736 2736
2737 2737 return
2738 2738
2739 2739 if precursor is not None:
2740 2740 if opts['rev']:
2741 2741 raise error.InputError(
2742 2742 b'cannot select revision when creating marker'
2743 2743 )
2744 2744 metadata = {}
2745 2745 metadata[b'user'] = encoding.fromlocal(opts['user'] or ui.username())
2746 2746 succs = tuple(parsenodeid(succ) for succ in successors)
2747 2747 l = repo.lock()
2748 2748 try:
2749 2749 tr = repo.transaction(b'debugobsolete')
2750 2750 try:
2751 2751 date = opts.get('date')
2752 2752 if date:
2753 2753 date = dateutil.parsedate(date)
2754 2754 else:
2755 2755 date = None
2756 2756 prec = parsenodeid(precursor)
2757 2757 parents = None
2758 2758 if opts['record_parents']:
2759 2759 if prec not in repo.unfiltered():
2760 2760 raise error.Abort(
2761 2761 b'cannot used --record-parents on '
2762 2762 b'unknown changesets'
2763 2763 )
2764 2764 parents = repo.unfiltered()[prec].parents()
2765 2765 parents = tuple(p.node() for p in parents)
2766 2766 repo.obsstore.create(
2767 2767 tr,
2768 2768 prec,
2769 2769 succs,
2770 2770 opts['flags'],
2771 2771 parents=parents,
2772 2772 date=date,
2773 2773 metadata=metadata,
2774 2774 ui=ui,
2775 2775 )
2776 2776 tr.close()
2777 2777 except ValueError as exc:
2778 2778 raise error.Abort(
2779 2779 _(b'bad obsmarker input: %s') % stringutil.forcebytestr(exc)
2780 2780 )
2781 2781 finally:
2782 2782 tr.release()
2783 2783 finally:
2784 2784 l.release()
2785 2785 else:
2786 2786 if opts['rev']:
2787 2787 revs = logcmdutil.revrange(repo, opts['rev'])
2788 2788 nodes = [repo[r].node() for r in revs]
2789 2789 markers = list(
2790 2790 obsutil.getmarkers(
2791 2791 repo, nodes=nodes, exclusive=opts['exclusive']
2792 2792 )
2793 2793 )
2794 2794 markers.sort(key=lambda x: x._data)
2795 2795 else:
2796 2796 markers = obsutil.getmarkers(repo)
2797 2797
2798 2798 markerstoiter = markers
2799 2799 isrelevant = lambda m: True
2800 2800 if opts.get('rev') and opts.get('index'):
2801 2801 markerstoiter = obsutil.getmarkers(repo)
2802 2802 markerset = set(markers)
2803 2803 isrelevant = lambda m: m in markerset
2804 2804
2805 2805 fm = ui.formatter(b'debugobsolete', pycompat.byteskwargs(opts))
2806 2806 for i, m in enumerate(markerstoiter):
2807 2807 if not isrelevant(m):
2808 2808 # marker can be irrelevant when we're iterating over a set
2809 2809 # of markers (markerstoiter) which is bigger than the set
2810 2810 # of markers we want to display (markers)
2811 2811 # this can happen if both --index and --rev options are
2812 2812 # provided and thus we need to iterate over all of the markers
2813 2813 # to get the correct indices, but only display the ones that
2814 2814 # are relevant to --rev value
2815 2815 continue
2816 2816 fm.startitem()
2817 2817 ind = i if opts.get('index') else None
2818 2818 cmdutil.showmarker(fm, m, index=ind)
2819 2819 fm.end()
2820 2820
2821 2821
2822 2822 @command(
2823 2823 b'debugp1copies',
2824 2824 [(b'r', b'rev', b'', _(b'revision to debug'), _(b'REV'))],
2825 2825 _(b'[-r REV]'),
2826 2826 )
2827 2827 def debugp1copies(ui, repo, **opts):
2828 2828 """dump copy information compared to p1"""
2829 2829
2830 2830 ctx = scmutil.revsingle(repo, opts.get('rev'), default=None)
2831 2831 for dst, src in ctx.p1copies().items():
2832 2832 ui.write(b'%s -> %s\n' % (src, dst))
2833 2833
2834 2834
2835 2835 @command(
2836 2836 b'debugp2copies',
2837 2837 [(b'r', b'rev', b'', _(b'revision to debug'), _(b'REV'))],
2838 2838 _(b'[-r REV]'),
2839 2839 )
2840 2840 def debugp2copies(ui, repo, **opts):
2841 2841 """dump copy information compared to p2"""
2842 2842
2843 2843 ctx = scmutil.revsingle(repo, opts.get('rev'), default=None)
2844 2844 for dst, src in ctx.p2copies().items():
2845 2845 ui.write(b'%s -> %s\n' % (src, dst))
2846 2846
2847 2847
2848 2848 @command(
2849 2849 b'debugpathcomplete',
2850 2850 [
2851 2851 (b'f', b'full', None, _(b'complete an entire path')),
2852 2852 (b'n', b'normal', None, _(b'show only normal files')),
2853 2853 (b'a', b'added', None, _(b'show only added files')),
2854 2854 (b'r', b'removed', None, _(b'show only removed files')),
2855 2855 ],
2856 2856 _(b'FILESPEC...'),
2857 2857 )
2858 2858 def debugpathcomplete(ui, repo, *specs, **opts):
2859 2859 """complete part or all of a tracked path
2860 2860
2861 2861 This command supports shells that offer path name completion. It
2862 2862 currently completes only files already known to the dirstate.
2863 2863
2864 2864 Completion extends only to the next path segment unless
2865 2865 --full is specified, in which case entire paths are used."""
2866 2866
2867 2867 def complete(path, acceptable):
2868 2868 dirstate = repo.dirstate
2869 2869 spec = os.path.normpath(os.path.join(encoding.getcwd(), path))
2870 2870 rootdir = repo.root + pycompat.ossep
2871 2871 if spec != repo.root and not spec.startswith(rootdir):
2872 2872 return [], []
2873 2873 if os.path.isdir(spec):
2874 2874 spec += b'/'
2875 2875 spec = spec[len(rootdir) :]
2876 2876 fixpaths = pycompat.ossep != b'/'
2877 2877 if fixpaths:
2878 2878 spec = spec.replace(pycompat.ossep, b'/')
2879 2879 speclen = len(spec)
2880 2880 fullpaths = opts['full']
2881 2881 files, dirs = set(), set()
2882 2882 adddir, addfile = dirs.add, files.add
2883 2883 for f, st in dirstate.items():
2884 2884 if f.startswith(spec) and st.state in acceptable:
2885 2885 if fixpaths:
2886 2886 f = f.replace(b'/', pycompat.ossep)
2887 2887 if fullpaths:
2888 2888 addfile(f)
2889 2889 continue
2890 2890 s = f.find(pycompat.ossep, speclen)
2891 2891 if s >= 0:
2892 2892 adddir(f[:s])
2893 2893 else:
2894 2894 addfile(f)
2895 2895 return files, dirs
2896 2896
2897 2897 acceptable = b''
2898 2898 if opts['normal']:
2899 2899 acceptable += b'nm'
2900 2900 if opts['added']:
2901 2901 acceptable += b'a'
2902 2902 if opts['removed']:
2903 2903 acceptable += b'r'
2904 2904 cwd = repo.getcwd()
2905 2905 if not specs:
2906 2906 specs = [b'.']
2907 2907
2908 2908 files, dirs = set(), set()
2909 2909 for spec in specs:
2910 2910 f, d = complete(spec, acceptable or b'nmar')
2911 2911 files.update(f)
2912 2912 dirs.update(d)
2913 2913 files.update(dirs)
2914 2914 ui.write(b'\n'.join(repo.pathto(p, cwd) for p in sorted(files)))
2915 2915 ui.write(b'\n')
2916 2916
2917 2917
2918 2918 @command(
2919 2919 b'debugpathcopies',
2920 2920 cmdutil.walkopts,
2921 2921 b'hg debugpathcopies REV1 REV2 [FILE]',
2922 2922 inferrepo=True,
2923 2923 )
2924 2924 def debugpathcopies(ui, repo, rev1, rev2, *pats, **opts):
2925 2925 """show copies between two revisions"""
2926 2926 ctx1 = scmutil.revsingle(repo, rev1)
2927 2927 ctx2 = scmutil.revsingle(repo, rev2)
2928 2928 m = scmutil.match(ctx1, pats, opts)
2929 2929 for dst, src in sorted(copies.pathcopies(ctx1, ctx2, m).items()):
2930 2930 ui.write(b'%s -> %s\n' % (src, dst))
2931 2931
2932 2932
2933 2933 @command(b'debugpeer', [], _(b'PATH'), norepo=True)
2934 2934 def debugpeer(ui, path):
2935 2935 """establish a connection to a peer repository"""
2936 2936 # Always enable peer request logging. Requires --debug to display
2937 2937 # though.
2938 2938 overrides = {
2939 2939 (b'devel', b'debug.peer-request'): True,
2940 2940 }
2941 2941
2942 2942 with ui.configoverride(overrides):
2943 2943 peer = hg.peer(ui, {}, path)
2944 2944
2945 2945 try:
2946 2946 local = peer.local() is not None
2947 2947 canpush = peer.canpush()
2948 2948
2949 2949 ui.write(_(b'url: %s\n') % peer.url())
2950 2950 ui.write(_(b'local: %s\n') % (_(b'yes') if local else _(b'no')))
2951 2951 ui.write(
2952 2952 _(b'pushable: %s\n') % (_(b'yes') if canpush else _(b'no'))
2953 2953 )
2954 2954 finally:
2955 2955 peer.close()
2956 2956
2957 2957
2958 2958 @command(
2959 2959 b'debugpickmergetool',
2960 2960 [
2961 2961 (b'r', b'rev', b'', _(b'check for files in this revision'), _(b'REV')),
2962 2962 (b'', b'changedelete', None, _(b'emulate merging change and delete')),
2963 2963 ]
2964 2964 + cmdutil.walkopts
2965 2965 + cmdutil.mergetoolopts,
2966 2966 _(b'[PATTERN]...'),
2967 2967 inferrepo=True,
2968 2968 )
2969 2969 def debugpickmergetool(ui, repo, *pats, **opts):
2970 2970 """examine which merge tool is chosen for specified file
2971 2971
2972 2972 As described in :hg:`help merge-tools`, Mercurial examines
2973 2973 configurations below in this order to decide which merge tool is
2974 2974 chosen for specified file.
2975 2975
2976 2976 1. ``--tool`` option
2977 2977 2. ``HGMERGE`` environment variable
2978 2978 3. configurations in ``merge-patterns`` section
2979 2979 4. configuration of ``ui.merge``
2980 2980 5. configurations in ``merge-tools`` section
2981 2981 6. ``hgmerge`` tool (for historical reason only)
2982 2982 7. default tool for fallback (``:merge`` or ``:prompt``)
2983 2983
2984 2984 This command writes out examination result in the style below::
2985 2985
2986 2986 FILE = MERGETOOL
2987 2987
2988 2988 By default, all files known in the first parent context of the
2989 2989 working directory are examined. Use file patterns and/or -I/-X
2990 2990 options to limit target files. -r/--rev is also useful to examine
2991 2991 files in another context without actual updating to it.
2992 2992
2993 2993 With --debug, this command shows warning messages while matching
2994 2994 against ``merge-patterns`` and so on, too. It is recommended to
2995 2995 use this option with explicit file patterns and/or -I/-X options,
2996 2996 because this option increases amount of output per file according
2997 2997 to configurations in hgrc.
2998 2998
2999 2999 With -v/--verbose, this command shows configurations below at
3000 3000 first (only if specified).
3001 3001
3002 3002 - ``--tool`` option
3003 3003 - ``HGMERGE`` environment variable
3004 3004 - configuration of ``ui.merge``
3005 3005
3006 3006 If merge tool is chosen before matching against
3007 3007 ``merge-patterns``, this command can't show any helpful
3008 3008 information, even with --debug. In such case, information above is
3009 3009 useful to know why a merge tool is chosen.
3010 3010 """
3011 3011 overrides = {}
3012 3012 if opts['tool']:
3013 3013 overrides[(b'ui', b'forcemerge')] = opts['tool']
3014 3014 ui.notenoi18n(b'with --tool %r\n' % (pycompat.bytestr(opts['tool'])))
3015 3015
3016 3016 with ui.configoverride(overrides, b'debugmergepatterns'):
3017 3017 hgmerge = encoding.environ.get(b"HGMERGE")
3018 3018 if hgmerge is not None:
3019 3019 ui.notenoi18n(b'with HGMERGE=%r\n' % (pycompat.bytestr(hgmerge)))
3020 3020 uimerge = ui.config(b"ui", b"merge")
3021 3021 if uimerge:
3022 3022 ui.notenoi18n(b'with ui.merge=%r\n' % (pycompat.bytestr(uimerge)))
3023 3023
3024 3024 ctx = scmutil.revsingle(repo, opts.get('rev'))
3025 3025 m = scmutil.match(ctx, pats, pycompat.byteskwargs(opts))
3026 3026 changedelete = opts['changedelete']
3027 3027 for path in ctx.walk(m):
3028 3028 fctx = ctx[path]
3029 3029 with ui.silent(
3030 3030 error=True
3031 3031 ) if not ui.debugflag else util.nullcontextmanager():
3032 3032 tool, toolpath = filemerge._picktool(
3033 3033 repo,
3034 3034 ui,
3035 3035 path,
3036 3036 fctx.isbinary(),
3037 3037 b'l' in fctx.flags(),
3038 3038 changedelete,
3039 3039 )
3040 3040 ui.write(b'%s = %s\n' % (path, tool))
3041 3041
3042 3042
3043 3043 @command(b'debugpushkey', [], _(b'REPO NAMESPACE [KEY OLD NEW]'), norepo=True)
3044 3044 def debugpushkey(ui, repopath, namespace, *keyinfo, **opts):
3045 3045 """access the pushkey key/value protocol
3046 3046
3047 3047 With two args, list the keys in the given namespace.
3048 3048
3049 3049 With five args, set a key to new if it currently is set to old.
3050 3050 Reports success or failure.
3051 3051 """
3052 3052
3053 3053 target = hg.peer(ui, {}, repopath)
3054 3054 try:
3055 3055 if keyinfo:
3056 3056 key, old, new = keyinfo
3057 3057 with target.commandexecutor() as e:
3058 3058 r = e.callcommand(
3059 3059 b'pushkey',
3060 3060 {
3061 3061 b'namespace': namespace,
3062 3062 b'key': key,
3063 3063 b'old': old,
3064 3064 b'new': new,
3065 3065 },
3066 3066 ).result()
3067 3067
3068 3068 ui.status(pycompat.bytestr(r) + b'\n')
3069 3069 return not r
3070 3070 else:
3071 3071 for k, v in sorted(target.listkeys(namespace).items()):
3072 3072 ui.write(
3073 3073 b"%s\t%s\n"
3074 3074 % (stringutil.escapestr(k), stringutil.escapestr(v))
3075 3075 )
3076 3076 finally:
3077 3077 target.close()
3078 3078
3079 3079
3080 3080 @command(b'debugpvec', [], _(b'A B'))
3081 3081 def debugpvec(ui, repo, a, b=None):
3082 3082 ca = scmutil.revsingle(repo, a)
3083 3083 cb = scmutil.revsingle(repo, b)
3084 3084 pa = pvec.ctxpvec(ca)
3085 3085 pb = pvec.ctxpvec(cb)
3086 3086 if pa == pb:
3087 3087 rel = b"="
3088 3088 elif pa > pb:
3089 3089 rel = b">"
3090 3090 elif pa < pb:
3091 3091 rel = b"<"
3092 3092 elif pa | pb:
3093 3093 rel = b"|"
3094 3094 ui.write(_(b"a: %s\n") % pa)
3095 3095 ui.write(_(b"b: %s\n") % pb)
3096 3096 ui.write(_(b"depth(a): %d depth(b): %d\n") % (pa._depth, pb._depth))
3097 3097 ui.write(
3098 3098 _(b"delta: %d hdist: %d distance: %d relation: %s\n")
3099 3099 % (
3100 3100 abs(pa._depth - pb._depth),
3101 3101 pvec._hamming(pa._vec, pb._vec),
3102 3102 pa.distance(pb),
3103 3103 rel,
3104 3104 )
3105 3105 )
3106 3106
3107 3107
3108 3108 @command(
3109 3109 b'debugrebuilddirstate|debugrebuildstate',
3110 3110 [
3111 3111 (b'r', b'rev', b'', _(b'revision to rebuild to'), _(b'REV')),
3112 3112 (
3113 3113 b'',
3114 3114 b'minimal',
3115 3115 None,
3116 3116 _(
3117 3117 b'only rebuild files that are inconsistent with '
3118 3118 b'the working copy parent'
3119 3119 ),
3120 3120 ),
3121 3121 ],
3122 3122 _(b'[-r REV]'),
3123 3123 )
3124 3124 def debugrebuilddirstate(ui, repo, rev, **opts):
3125 3125 """rebuild the dirstate as it would look like for the given revision
3126 3126
3127 3127 If no revision is specified the first current parent will be used.
3128 3128
3129 3129 The dirstate will be set to the files of the given revision.
3130 3130 The actual working directory content or existing dirstate
3131 3131 information such as adds or removes is not considered.
3132 3132
3133 3133 ``minimal`` will only rebuild the dirstate status for files that claim to be
3134 3134 tracked but are not in the parent manifest, or that exist in the parent
3135 3135 manifest but are not in the dirstate. It will not change adds, removes, or
3136 3136 modified files that are in the working copy parent.
3137 3137
3138 3138 One use of this command is to make the next :hg:`status` invocation
3139 3139 check the actual file content.
3140 3140 """
3141 3141 ctx = scmutil.revsingle(repo, rev)
3142 3142 with repo.wlock():
3143 3143 if repo.currenttransaction() is not None:
3144 3144 msg = b'rebuild the dirstate outside of a transaction'
3145 3145 raise error.ProgrammingError(msg)
3146 3146 dirstate = repo.dirstate
3147 3147 changedfiles = None
3148 3148 # See command doc for what minimal does.
3149 3149 if opts.get('minimal'):
3150 3150 manifestfiles = set(ctx.manifest().keys())
3151 3151 dirstatefiles = set(dirstate)
3152 3152 manifestonly = manifestfiles - dirstatefiles
3153 3153 dsonly = dirstatefiles - manifestfiles
3154 3154 dsnotadded = {f for f in dsonly if not dirstate.get_entry(f).added}
3155 3155 changedfiles = manifestonly | dsnotadded
3156 3156
3157 3157 with dirstate.changing_parents(repo):
3158 3158 dirstate.rebuild(ctx.node(), ctx.manifest(), changedfiles)
3159 3159
3160 3160
3161 3161 @command(
3162 3162 b'debugrebuildfncache',
3163 3163 [
3164 3164 (
3165 3165 b'',
3166 3166 b'only-data',
3167 3167 False,
3168 3168 _(b'only look for wrong .d files (much faster)'),
3169 3169 )
3170 3170 ],
3171 3171 b'',
3172 3172 )
3173 3173 def debugrebuildfncache(ui, repo, **opts):
3174 3174 """rebuild the fncache file"""
3175 3175 repair.rebuildfncache(ui, repo, opts.get("only_data"))
3176 3176
3177 3177
3178 3178 @command(
3179 3179 b'debugrename',
3180 3180 [(b'r', b'rev', b'', _(b'revision to debug'), _(b'REV'))],
3181 3181 _(b'[-r REV] [FILE]...'),
3182 3182 )
3183 3183 def debugrename(ui, repo, *pats, **opts):
3184 3184 """dump rename information"""
3185 3185
3186 3186 ctx = scmutil.revsingle(repo, opts.get('rev'))
3187 3187 m = scmutil.match(ctx, pats, pycompat.byteskwargs(opts))
3188 3188 for abs in ctx.walk(m):
3189 3189 fctx = ctx[abs]
3190 3190 o = fctx.filelog().renamed(fctx.filenode())
3191 3191 rel = repo.pathto(abs)
3192 3192 if o:
3193 3193 ui.write(_(b"%s renamed from %s:%s\n") % (rel, o[0], hex(o[1])))
3194 3194 else:
3195 3195 ui.write(_(b"%s not renamed\n") % rel)
3196 3196
3197 3197
3198 3198 @command(b'debugrequires|debugrequirements', [], b'')
3199 3199 def debugrequirements(ui, repo):
3200 3200 """print the current repo requirements"""
3201 3201 for r in sorted(repo.requirements):
3202 3202 ui.write(b"%s\n" % r)
3203 3203
3204 3204
3205 3205 @command(
3206 3206 b'debugrevlog',
3207 3207 cmdutil.debugrevlogopts + [(b'd', b'dump', False, _(b'dump index data'))],
3208 3208 _(b'-c|-m|FILE'),
3209 3209 optionalrepo=True,
3210 3210 )
3211 3211 def debugrevlog(ui, repo, file_=None, **opts):
3212 3212 """show data and statistics about a revlog"""
3213 3213 r = cmdutil.openrevlog(
3214 3214 repo, b'debugrevlog', file_, pycompat.byteskwargs(opts)
3215 3215 )
3216 3216
3217 3217 if opts.get("dump"):
3218 3218 revlog_debug.dump(ui, r)
3219 3219 else:
3220 3220 revlog_debug.debug_revlog(ui, r)
3221 3221 return 0
3222 3222
3223 3223
3224 3224 @command(
3225 3225 b'debugrevlogindex',
3226 3226 cmdutil.debugrevlogopts
3227 3227 + [(b'f', b'format', 0, _(b'revlog format'), _(b'FORMAT'))],
3228 3228 _(b'[-f FORMAT] -c|-m|FILE'),
3229 3229 optionalrepo=True,
3230 3230 )
3231 3231 def debugrevlogindex(ui, repo, file_=None, **opts):
3232 3232 """dump the contents of a revlog index"""
3233 3233 r = cmdutil.openrevlog(
3234 3234 repo, b'debugrevlogindex', file_, pycompat.byteskwargs(opts)
3235 3235 )
3236 3236 format = opts.get('format', 0)
3237 3237 if format not in (0, 1):
3238 3238 raise error.Abort(_(b"unknown format %d") % format)
3239 3239
3240 3240 if ui.debugflag:
3241 3241 shortfn = hex
3242 3242 else:
3243 3243 shortfn = short
3244 3244
3245 3245 # There might not be anything in r, so have a sane default
3246 3246 idlen = 12
3247 3247 for i in r:
3248 3248 idlen = len(shortfn(r.node(i)))
3249 3249 break
3250 3250
3251 3251 if format == 0:
3252 3252 if ui.verbose:
3253 3253 ui.writenoi18n(
3254 3254 b" rev offset length linkrev %s %s p2\n"
3255 3255 % (b"nodeid".ljust(idlen), b"p1".ljust(idlen))
3256 3256 )
3257 3257 else:
3258 3258 ui.writenoi18n(
3259 3259 b" rev linkrev %s %s p2\n"
3260 3260 % (b"nodeid".ljust(idlen), b"p1".ljust(idlen))
3261 3261 )
3262 3262 elif format == 1:
3263 3263 if ui.verbose:
3264 3264 ui.writenoi18n(
3265 3265 (
3266 3266 b" rev flag offset length size link p1"
3267 3267 b" p2 %s\n"
3268 3268 )
3269 3269 % b"nodeid".rjust(idlen)
3270 3270 )
3271 3271 else:
3272 3272 ui.writenoi18n(
3273 3273 b" rev flag size link p1 p2 %s\n"
3274 3274 % b"nodeid".rjust(idlen)
3275 3275 )
3276 3276
3277 3277 for i in r:
3278 3278 node = r.node(i)
3279 3279 if format == 0:
3280 3280 try:
3281 3281 pp = r.parents(node)
3282 3282 except Exception:
3283 3283 pp = [repo.nullid, repo.nullid]
3284 3284 if ui.verbose:
3285 3285 ui.write(
3286 3286 b"% 6d % 9d % 7d % 7d %s %s %s\n"
3287 3287 % (
3288 3288 i,
3289 3289 r.start(i),
3290 3290 r.length(i),
3291 3291 r.linkrev(i),
3292 3292 shortfn(node),
3293 3293 shortfn(pp[0]),
3294 3294 shortfn(pp[1]),
3295 3295 )
3296 3296 )
3297 3297 else:
3298 3298 ui.write(
3299 3299 b"% 6d % 7d %s %s %s\n"
3300 3300 % (
3301 3301 i,
3302 3302 r.linkrev(i),
3303 3303 shortfn(node),
3304 3304 shortfn(pp[0]),
3305 3305 shortfn(pp[1]),
3306 3306 )
3307 3307 )
3308 3308 elif format == 1:
3309 3309 pr = r.parentrevs(i)
3310 3310 if ui.verbose:
3311 3311 ui.write(
3312 3312 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d %s\n"
3313 3313 % (
3314 3314 i,
3315 3315 r.flags(i),
3316 3316 r.start(i),
3317 3317 r.length(i),
3318 3318 r.rawsize(i),
3319 3319 r.linkrev(i),
3320 3320 pr[0],
3321 3321 pr[1],
3322 3322 shortfn(node),
3323 3323 )
3324 3324 )
3325 3325 else:
3326 3326 ui.write(
3327 3327 b"% 6d %04x % 8d % 6d % 6d % 6d %s\n"
3328 3328 % (
3329 3329 i,
3330 3330 r.flags(i),
3331 3331 r.rawsize(i),
3332 3332 r.linkrev(i),
3333 3333 pr[0],
3334 3334 pr[1],
3335 3335 shortfn(node),
3336 3336 )
3337 3337 )
3338 3338
3339 3339
3340 3340 @command(
3341 3341 b'debugrevspec',
3342 3342 [
3343 3343 (
3344 3344 b'',
3345 3345 b'optimize',
3346 3346 None,
3347 3347 _(b'print parsed tree after optimizing (DEPRECATED)'),
3348 3348 ),
3349 3349 (
3350 3350 b'',
3351 3351 b'show-revs',
3352 3352 True,
3353 3353 _(b'print list of result revisions (default)'),
3354 3354 ),
3355 3355 (
3356 3356 b's',
3357 3357 b'show-set',
3358 3358 None,
3359 3359 _(b'print internal representation of result set'),
3360 3360 ),
3361 3361 (
3362 3362 b'p',
3363 3363 b'show-stage',
3364 3364 [],
3365 3365 _(b'print parsed tree at the given stage'),
3366 3366 _(b'NAME'),
3367 3367 ),
3368 3368 (b'', b'no-optimized', False, _(b'evaluate tree without optimization')),
3369 3369 (b'', b'verify-optimized', False, _(b'verify optimized result')),
3370 3370 ],
3371 3371 b'REVSPEC',
3372 3372 )
3373 3373 def debugrevspec(ui, repo, expr, **opts):
3374 3374 """parse and apply a revision specification
3375 3375
3376 3376 Use -p/--show-stage option to print the parsed tree at the given stages.
3377 3377 Use -p all to print tree at every stage.
3378 3378
3379 3379 Use --no-show-revs option with -s or -p to print only the set
3380 3380 representation or the parsed tree respectively.
3381 3381
3382 3382 Use --verify-optimized to compare the optimized result with the unoptimized
3383 3383 one. Returns 1 if the optimized result differs.
3384 3384 """
3385 3385 aliases = ui.configitems(b'revsetalias')
3386 3386 stages = [
3387 3387 (b'parsed', lambda tree: tree),
3388 3388 (
3389 3389 b'expanded',
3390 3390 lambda tree: revsetlang.expandaliases(tree, aliases, ui.warn),
3391 3391 ),
3392 3392 (b'concatenated', revsetlang.foldconcat),
3393 3393 (b'analyzed', revsetlang.analyze),
3394 3394 (b'optimized', revsetlang.optimize),
3395 3395 ]
3396 3396 if opts['no_optimized']:
3397 3397 stages = stages[:-1]
3398 3398 if opts['verify_optimized'] and opts['no_optimized']:
3399 3399 raise error.Abort(
3400 3400 _(b'cannot use --verify-optimized with --no-optimized')
3401 3401 )
3402 3402 stagenames = {n for n, f in stages}
3403 3403
3404 3404 showalways = set()
3405 3405 showchanged = set()
3406 3406 if ui.verbose and not opts['show_stage']:
3407 3407 # show parsed tree by --verbose (deprecated)
3408 3408 showalways.add(b'parsed')
3409 3409 showchanged.update([b'expanded', b'concatenated'])
3410 3410 if opts['optimize']:
3411 3411 showalways.add(b'optimized')
3412 3412 if opts['show_stage'] and opts['optimize']:
3413 3413 raise error.Abort(_(b'cannot use --optimize with --show-stage'))
3414 3414 if opts['show_stage'] == [b'all']:
3415 3415 showalways.update(stagenames)
3416 3416 else:
3417 3417 for n in opts['show_stage']:
3418 3418 if n not in stagenames:
3419 3419 raise error.Abort(_(b'invalid stage name: %s') % n)
3420 3420 showalways.update(opts['show_stage'])
3421 3421
3422 3422 treebystage = {}
3423 3423 printedtree = None
3424 3424 tree = revsetlang.parse(expr, lookup=revset.lookupfn(repo))
3425 3425 for n, f in stages:
3426 3426 treebystage[n] = tree = f(tree)
3427 3427 if n in showalways or (n in showchanged and tree != printedtree):
3428 3428 if opts['show_stage'] or n != b'parsed':
3429 3429 ui.write(b"* %s:\n" % n)
3430 3430 ui.write(revsetlang.prettyformat(tree), b"\n")
3431 3431 printedtree = tree
3432 3432
3433 3433 if opts['verify_optimized']:
3434 3434 arevs = revset.makematcher(treebystage[b'analyzed'])(repo)
3435 3435 brevs = revset.makematcher(treebystage[b'optimized'])(repo)
3436 3436 if opts['show_set'] or (opts['show_set'] is None and ui.verbose):
3437 3437 ui.writenoi18n(
3438 3438 b"* analyzed set:\n", stringutil.prettyrepr(arevs), b"\n"
3439 3439 )
3440 3440 ui.writenoi18n(
3441 3441 b"* optimized set:\n", stringutil.prettyrepr(brevs), b"\n"
3442 3442 )
3443 3443 arevs = list(arevs)
3444 3444 brevs = list(brevs)
3445 3445 if arevs == brevs:
3446 3446 return 0
3447 3447 ui.writenoi18n(b'--- analyzed\n', label=b'diff.file_a')
3448 3448 ui.writenoi18n(b'+++ optimized\n', label=b'diff.file_b')
3449 3449 sm = difflib.SequenceMatcher(None, arevs, brevs)
3450 3450 for tag, alo, ahi, blo, bhi in sm.get_opcodes():
3451 3451 if tag in ('delete', 'replace'):
3452 3452 for c in arevs[alo:ahi]:
3453 3453 ui.write(b'-%d\n' % c, label=b'diff.deleted')
3454 3454 if tag in ('insert', 'replace'):
3455 3455 for c in brevs[blo:bhi]:
3456 3456 ui.write(b'+%d\n' % c, label=b'diff.inserted')
3457 3457 if tag == 'equal':
3458 3458 for c in arevs[alo:ahi]:
3459 3459 ui.write(b' %d\n' % c)
3460 3460 return 1
3461 3461
3462 3462 func = revset.makematcher(tree)
3463 3463 revs = func(repo)
3464 3464 if opts['show_set'] or (opts['show_set'] is None and ui.verbose):
3465 3465 ui.writenoi18n(b"* set:\n", stringutil.prettyrepr(revs), b"\n")
3466 3466 if not opts['show_revs']:
3467 3467 return
3468 3468 for c in revs:
3469 3469 ui.write(b"%d\n" % c)
3470 3470
3471 3471
3472 3472 @command(
3473 3473 b'debugserve',
3474 3474 [
3475 3475 (
3476 3476 b'',
3477 3477 b'sshstdio',
3478 3478 False,
3479 3479 _(b'run an SSH server bound to process handles'),
3480 3480 ),
3481 3481 (b'', b'logiofd', b'', _(b'file descriptor to log server I/O to')),
3482 3482 (b'', b'logiofile', b'', _(b'file to log server I/O to')),
3483 3483 ],
3484 3484 b'',
3485 3485 )
3486 3486 def debugserve(ui, repo, **opts):
3487 3487 """run a server with advanced settings
3488 3488
3489 3489 This command is similar to :hg:`serve`. It exists partially as a
3490 3490 workaround to the fact that ``hg serve --stdio`` must have specific
3491 3491 arguments for security reasons.
3492 3492 """
3493 3493 if not opts['sshstdio']:
3494 3494 raise error.Abort(_(b'only --sshstdio is currently supported'))
3495 3495
3496 3496 logfh = None
3497 3497
3498 3498 if opts['logiofd'] and opts['logiofile']:
3499 3499 raise error.Abort(_(b'cannot use both --logiofd and --logiofile'))
3500 3500
3501 3501 if opts['logiofd']:
3502 3502 # Ideally we would be line buffered. But line buffering in binary
3503 3503 # mode isn't supported and emits a warning in Python 3.8+. Disabling
3504 3504 # buffering could have performance impacts. But since this isn't
3505 3505 # performance critical code, it should be fine.
3506 3506 try:
3507 3507 logfh = os.fdopen(int(opts['logiofd']), 'ab', 0)
3508 3508 except OSError as e:
3509 3509 if e.errno != errno.ESPIPE:
3510 3510 raise
3511 3511 # can't seek a pipe, so `ab` mode fails on py3
3512 3512 logfh = os.fdopen(int(opts['logiofd']), 'wb', 0)
3513 3513 elif opts['logiofile']:
3514 3514 logfh = open(opts['logiofile'], b'ab', 0)
3515 3515
3516 3516 s = wireprotoserver.sshserver(ui, repo, logfh=logfh)
3517 3517 s.serve_forever()
3518 3518
3519 3519
3520 3520 @command(b'debugsetparents', [], _(b'REV1 [REV2]'))
3521 3521 def debugsetparents(ui, repo, rev1, rev2=None):
3522 3522 """manually set the parents of the current working directory (DANGEROUS)
3523 3523
3524 3524 This command is not what you are looking for and should not be used. Using
3525 3525 this command will most certainly results in slight corruption of the file
3526 3526 level histories withing your repository. DO NOT USE THIS COMMAND.
3527 3527
3528 3528 The command update the p1 and p2 field in the dirstate, and not touching
3529 3529 anything else. This useful for writing repository conversion tools, but
3530 3530 should be used with extreme care. For example, neither the working
3531 3531 directory nor the dirstate is updated, so file status may be incorrect
3532 3532 after running this command. Only used if you are one of the few people that
3533 3533 deeply unstand both conversion tools and file level histories. If you are
3534 3534 reading this help, you are not one of this people (most of them sailed west
3535 3535 from Mithlond anyway.
3536 3536
3537 3537 So one last time DO NOT USE THIS COMMAND.
3538 3538
3539 3539 Returns 0 on success.
3540 3540 """
3541 3541
3542 3542 node1 = scmutil.revsingle(repo, rev1).node()
3543 3543 node2 = scmutil.revsingle(repo, rev2, b'null').node()
3544 3544
3545 3545 with repo.wlock():
3546 3546 repo.setparents(node1, node2)
3547 3547
3548 3548
3549 3549 @command(b'debugsidedata', cmdutil.debugrevlogopts, _(b'-c|-m|FILE REV'))
3550 3550 def debugsidedata(ui, repo, file_, rev=None, **opts):
3551 3551 """dump the side data for a cl/manifest/file revision
3552 3552
3553 3553 Use --verbose to dump the sidedata content."""
3554 3554 if opts.get('changelog') or opts.get('manifest') or opts.get('dir'):
3555 3555 if rev is not None:
3556 3556 raise error.InputError(
3557 3557 _(b'cannot specify a revision with other arguments')
3558 3558 )
3559 3559 file_, rev = None, file_
3560 3560 elif rev is None:
3561 3561 raise error.InputError(_(b'please specify a revision'))
3562 3562 r = cmdutil.openstorage(
3563 3563 repo, b'debugdata', file_, pycompat.byteskwargs(opts)
3564 3564 )
3565 3565 r = getattr(r, '_revlog', r)
3566 3566 try:
3567 3567 sidedata = r.sidedata(r.lookup(rev))
3568 3568 except KeyError:
3569 3569 raise error.Abort(_(b'invalid revision identifier %s') % rev)
3570 3570 if sidedata:
3571 3571 sidedata = list(sidedata.items())
3572 3572 sidedata.sort()
3573 3573 ui.writenoi18n(b'%d sidedata entries\n' % len(sidedata))
3574 3574 for key, value in sidedata:
3575 3575 ui.writenoi18n(b' entry-%04o size %d\n' % (key, len(value)))
3576 3576 if ui.verbose:
3577 3577 ui.writenoi18n(b' %s\n' % stringutil.pprint(value))
3578 3578
3579 3579
3580 3580 @command(b'debugssl', [], b'[SOURCE]', optionalrepo=True)
3581 3581 def debugssl(ui, repo, source=None, **opts):
3582 3582 """test a secure connection to a server
3583 3583
3584 3584 This builds the certificate chain for the server on Windows, installing the
3585 3585 missing intermediates and trusted root via Windows Update if necessary. It
3586 3586 does nothing on other platforms.
3587 3587
3588 3588 If SOURCE is omitted, the 'default' path will be used. If a URL is given,
3589 3589 that server is used. See :hg:`help urls` for more information.
3590 3590
3591 3591 If the update succeeds, retry the original operation. Otherwise, the cause
3592 3592 of the SSL error is likely another issue.
3593 3593 """
3594 3594 if not pycompat.iswindows:
3595 3595 raise error.Abort(
3596 3596 _(b'certificate chain building is only possible on Windows')
3597 3597 )
3598 3598
3599 3599 if not source:
3600 3600 if not repo:
3601 3601 raise error.Abort(
3602 3602 _(
3603 3603 b"there is no Mercurial repository here, and no "
3604 3604 b"server specified"
3605 3605 )
3606 3606 )
3607 3607 source = b"default"
3608 3608
3609 3609 path = urlutil.get_unique_pull_path_obj(b'debugssl', ui, source)
3610 3610 url = path.url
3611 3611
3612 3612 defaultport = {b'https': 443, b'ssh': 22}
3613 3613 if url.scheme in defaultport:
3614 3614 try:
3615 3615 addr = (url.host, int(url.port or defaultport[url.scheme]))
3616 3616 except ValueError:
3617 3617 raise error.Abort(_(b"malformed port number in URL"))
3618 3618 else:
3619 3619 raise error.Abort(_(b"only https and ssh connections are supported"))
3620 3620
3621 3621 from . import win32
3622 3622
3623 3623 s = ssl.wrap_socket(
3624 3624 socket.socket(),
3625 3625 ssl_version=ssl.PROTOCOL_TLS,
3626 3626 cert_reqs=ssl.CERT_NONE,
3627 3627 ca_certs=None,
3628 3628 )
3629 3629
3630 3630 try:
3631 3631 s.connect(addr)
3632 3632 cert = s.getpeercert(True)
3633 3633
3634 3634 ui.status(_(b'checking the certificate chain for %s\n') % url.host)
3635 3635
3636 3636 complete = win32.checkcertificatechain(cert, build=False)
3637 3637
3638 3638 if not complete:
3639 3639 ui.status(_(b'certificate chain is incomplete, updating... '))
3640 3640
3641 3641 if not win32.checkcertificatechain(cert):
3642 3642 ui.status(_(b'failed.\n'))
3643 3643 else:
3644 3644 ui.status(_(b'done.\n'))
3645 3645 else:
3646 3646 ui.status(_(b'full certificate chain is available\n'))
3647 3647 finally:
3648 3648 s.close()
3649 3649
3650 3650
3651 3651 @command(
3652 3652 b'debug::stable-tail-sort',
3653 3653 [
3654 3654 (
3655 3655 b'T',
3656 3656 b'template',
3657 3657 b'{rev}\n',
3658 3658 _(b'display with template'),
3659 3659 _(b'TEMPLATE'),
3660 3660 ),
3661 3661 ],
3662 3662 b'REV',
3663 3663 )
3664 3664 def debug_stable_tail_sort(ui, repo, revspec, template, **opts):
3665 3665 """display the stable-tail sort of the ancestors of a given node"""
3666 3666 rev = logcmdutil.revsingle(repo, revspec).rev()
3667 3667 cl = repo.changelog
3668 3668
3669 3669 displayer = logcmdutil.maketemplater(ui, repo, template)
3670 3670 sorted_revs = stabletailsort._stable_tail_sort_naive(cl, rev)
3671 3671 for ancestor_rev in sorted_revs:
3672 3672 displayer.show(repo[ancestor_rev])
3673 3673
3674 3674
3675 3675 @command(
3676 3676 b'debug::stable-tail-sort-leaps',
3677 3677 [
3678 3678 (
3679 3679 b'T',
3680 3680 b'template',
3681 3681 b'{rev}',
3682 3682 _(b'display with template'),
3683 3683 _(b'TEMPLATE'),
3684 3684 ),
3685 3685 (b's', b'specific', False, _(b'restrict to specific leaps')),
3686 3686 ],
3687 3687 b'REV',
3688 3688 )
3689 3689 def debug_stable_tail_sort_leaps(ui, repo, rspec, template, specific, **opts):
3690 3690 """display the leaps in the stable-tail sort of a node, one per line"""
3691 3691 rev = logcmdutil.revsingle(repo, rspec).rev()
3692 3692
3693 3693 if specific:
3694 3694 get_leaps = stabletailsort._find_specific_leaps_naive
3695 3695 else:
3696 3696 get_leaps = stabletailsort._find_all_leaps_naive
3697 3697
3698 3698 displayer = logcmdutil.maketemplater(ui, repo, template)
3699 3699 for source, target in get_leaps(repo.changelog, rev):
3700 3700 displayer.show(repo[source])
3701 3701 displayer.show(repo[target])
3702 3702 ui.write(b'\n')
3703 3703
3704 3704
3705 3705 @command(
3706 3706 b"debugbackupbundle",
3707 3707 [
3708 3708 (
3709 3709 b"",
3710 3710 b"recover",
3711 3711 b"",
3712 3712 b"brings the specified changeset back into the repository",
3713 3713 )
3714 3714 ]
3715 3715 + cmdutil.logopts,
3716 3716 _(b"hg debugbackupbundle [--recover HASH]"),
3717 3717 )
3718 3718 def debugbackupbundle(ui, repo, *pats, **opts):
3719 3719 """lists the changesets available in backup bundles
3720 3720
3721 3721 Without any arguments, this command prints a list of the changesets in each
3722 3722 backup bundle.
3723 3723
3724 3724 --recover takes a changeset hash and unbundles the first bundle that
3725 3725 contains that hash, which puts that changeset back in your repository.
3726 3726
3727 3727 --verbose will print the entire commit message and the bundle path for that
3728 3728 backup.
3729 3729 """
3730 3730 backups = list(
3731 3731 filter(
3732 3732 os.path.isfile, glob.glob(repo.vfs.join(b"strip-backup") + b"/*.hg")
3733 3733 )
3734 3734 )
3735 3735 backups.sort(key=lambda x: os.path.getmtime(x), reverse=True)
3736 3736
3737 3737 opts["bundle"] = b""
3738 3738 opts["force"] = None
3739 3739 limit = logcmdutil.getlimit(pycompat.byteskwargs(opts))
3740 3740
3741 3741 def display(other, chlist, displayer):
3742 3742 if opts.get("newest_first"):
3743 3743 chlist.reverse()
3744 3744 count = 0
3745 3745 for n in chlist:
3746 3746 if limit is not None and count >= limit:
3747 3747 break
3748 3748 parents = [
3749 3749 True for p in other.changelog.parents(n) if p != repo.nullid
3750 3750 ]
3751 3751 if opts.get("no_merges") and len(parents) == 2:
3752 3752 continue
3753 3753 count += 1
3754 3754 displayer.show(other[n])
3755 3755
3756 3756 recovernode = opts.get("recover")
3757 3757 if recovernode:
3758 3758 if scmutil.isrevsymbol(repo, recovernode):
3759 3759 ui.warn(_(b"%s already exists in the repo\n") % recovernode)
3760 3760 return
3761 3761 elif backups:
3762 3762 msg = _(
3763 3763 b"Recover changesets using: hg debugbackupbundle --recover "
3764 3764 b"<changeset hash>\n\nAvailable backup changesets:"
3765 3765 )
3766 3766 ui.status(msg, label=b"status.removed")
3767 3767 else:
3768 3768 ui.status(_(b"no backup changesets found\n"))
3769 3769 return
3770 3770
3771 3771 for backup in backups:
3772 3772 # Much of this is copied from the hg incoming logic
3773 3773 source = os.path.relpath(backup, encoding.getcwd())
3774 3774 path = urlutil.get_unique_pull_path_obj(
3775 3775 b'debugbackupbundle',
3776 3776 ui,
3777 3777 source,
3778 3778 )
3779 3779 try:
3780 3780 other = hg.peer(repo, pycompat.byteskwargs(opts), path)
3781 3781 except error.LookupError as ex:
3782 3782 msg = _(b"\nwarning: unable to open bundle %s") % path.loc
3783 3783 hint = _(b"\n(missing parent rev %s)\n") % short(ex.name)
3784 3784 ui.warn(msg, hint=hint)
3785 3785 continue
3786 3786 branches = (path.branch, opts.get('branch', []))
3787 3787 revs, checkout = hg.addbranchrevs(
3788 3788 repo, other, branches, opts.get("rev")
3789 3789 )
3790 3790
3791 3791 if revs:
3792 3792 revs = [other.lookup(rev) for rev in revs]
3793 3793
3794 3794 with ui.silent():
3795 3795 try:
3796 3796 other, chlist, cleanupfn = bundlerepo.getremotechanges(
3797 3797 ui, repo, other, revs, opts["bundle"], opts["force"]
3798 3798 )
3799 3799 except error.LookupError:
3800 3800 continue
3801 3801
3802 3802 try:
3803 3803 if not chlist:
3804 3804 continue
3805 3805 if recovernode:
3806 3806 with repo.lock(), repo.transaction(b"unbundle") as tr:
3807 3807 if scmutil.isrevsymbol(other, recovernode):
3808 3808 ui.status(_(b"Unbundling %s\n") % (recovernode))
3809 3809 f = hg.openpath(ui, path.loc)
3810 3810 gen = exchange.readbundle(ui, f, path.loc)
3811 3811 if isinstance(gen, bundle2.unbundle20):
3812 3812 bundle2.applybundle(
3813 3813 repo,
3814 3814 gen,
3815 3815 tr,
3816 3816 source=b"unbundle",
3817 3817 url=b"bundle:" + path.loc,
3818 3818 )
3819 3819 else:
3820 3820 gen.apply(repo, b"unbundle", b"bundle:" + path.loc)
3821 3821 break
3822 3822 else:
3823 3823 backupdate = encoding.strtolocal(
3824 3824 time.strftime(
3825 3825 "%a %H:%M, %Y-%m-%d",
3826 3826 time.localtime(os.path.getmtime(path.loc)),
3827 3827 )
3828 3828 )
3829 3829 ui.status(b"\n%s\n" % (backupdate.ljust(50)))
3830 3830 if ui.verbose:
3831 3831 ui.status(b"%s%s\n" % (b"bundle:".ljust(13), path.loc))
3832 3832 else:
3833 3833 opts[
3834 3834 "template"
3835 3835 ] = b"{label('status.modified', node|short)} {desc|firstline}\n"
3836 3836 displayer = logcmdutil.changesetdisplayer(
3837 3837 ui, other, pycompat.byteskwargs(opts), False
3838 3838 )
3839 3839 display(other, chlist, displayer)
3840 3840 displayer.close()
3841 3841 finally:
3842 3842 cleanupfn()
3843 3843
3844 3844
3845 3845 @command(
3846 3846 b'debugsub',
3847 3847 [(b'r', b'rev', b'', _(b'revision to check'), _(b'REV'))],
3848 3848 _(b'[-r REV] [REV]'),
3849 3849 )
3850 3850 def debugsub(ui, repo, rev=None):
3851 3851 ctx = scmutil.revsingle(repo, rev, None)
3852 3852 for k, v in sorted(ctx.substate.items()):
3853 3853 ui.writenoi18n(b'path %s\n' % k)
3854 3854 ui.writenoi18n(b' source %s\n' % v[0])
3855 3855 ui.writenoi18n(b' revision %s\n' % v[1])
3856 3856
3857 3857
3858 3858 @command(
3859 3859 b'debugshell',
3860 3860 [
3861 3861 (
3862 3862 b'c',
3863 3863 b'command',
3864 3864 b'',
3865 3865 _(b'program passed in as a string'),
3866 3866 _(b'COMMAND'),
3867 3867 )
3868 3868 ],
3869 3869 _(b'[-c COMMAND]'),
3870 3870 optionalrepo=True,
3871 3871 )
3872 3872 def debugshell(ui, repo, **opts):
3873 3873 """run an interactive Python interpreter
3874 3874
3875 3875 The local namespace is provided with a reference to the ui and
3876 3876 the repo instance (if available).
3877 3877 """
3878 3878 import code
3879 3879
3880 3880 imported_objects = {
3881 3881 'ui': ui,
3882 3882 'repo': repo,
3883 3883 }
3884 3884
3885 3885 # py2exe disables initialization of the site module, which is responsible
3886 3886 # for arranging for ``quit()`` to exit the interpreter. Manually initialize
3887 3887 # the stuff that site normally does here, so that the interpreter can be
3888 3888 # quit in a consistent manner, whether run with pyoxidizer, exewrapper.c,
3889 3889 # py.exe, or py2exe.
3890 3890 if getattr(sys, "frozen", None) == 'console_exe':
3891 3891 try:
3892 3892 import site
3893 3893
3894 3894 site.setcopyright()
3895 3895 site.sethelper()
3896 3896 site.setquit()
3897 3897 except ImportError:
3898 3898 site = None # Keep PyCharm happy
3899 3899
3900 3900 command = opts.get('command')
3901 3901 if command:
3902 3902 compiled = code.compile_command(encoding.strfromlocal(command))
3903 3903 code.InteractiveInterpreter(locals=imported_objects).runcode(compiled)
3904 3904 return
3905 3905
3906 3906 code.interact(local=imported_objects)
3907 3907
3908 3908
3909 3909 @command(
3910 3910 b'debug-revlog-stats',
3911 3911 [
3912 3912 (b'c', b'changelog', None, _(b'Display changelog statistics')),
3913 3913 (b'm', b'manifest', None, _(b'Display manifest statistics')),
3914 3914 (b'f', b'filelogs', None, _(b'Display filelogs statistics')),
3915 3915 ]
3916 3916 + cmdutil.formatteropts,
3917 3917 )
3918 3918 def debug_revlog_stats(ui, repo, **opts):
3919 3919 """display statistics about revlogs in the store"""
3920 3920 changelog = opts["changelog"]
3921 3921 manifest = opts["manifest"]
3922 3922 filelogs = opts["filelogs"]
3923 3923
3924 3924 if changelog is None and manifest is None and filelogs is None:
3925 3925 changelog = True
3926 3926 manifest = True
3927 3927 filelogs = True
3928 3928
3929 3929 repo = repo.unfiltered()
3930 3930 fm = ui.formatter(b'debug-revlog-stats', pycompat.byteskwargs(opts))
3931 3931 revlog_debug.debug_revlog_stats(repo, fm, changelog, manifest, filelogs)
3932 3932 fm.end()
3933 3933
3934 3934
3935 3935 @command(
3936 3936 b'debugsuccessorssets',
3937 3937 [(b'', b'closest', False, _(b'return closest successors sets only'))],
3938 3938 _(b'[REV]'),
3939 3939 )
3940 3940 def debugsuccessorssets(ui, repo, *revs, **opts):
3941 3941 """show set of successors for revision
3942 3942
3943 3943 A successors set of changeset A is a consistent group of revisions that
3944 3944 succeed A. It contains non-obsolete changesets only unless closests
3945 3945 successors set is set.
3946 3946
3947 3947 In most cases a changeset A has a single successors set containing a single
3948 3948 successor (changeset A replaced by A').
3949 3949
3950 3950 A changeset that is made obsolete with no successors are called "pruned".
3951 3951 Such changesets have no successors sets at all.
3952 3952
3953 3953 A changeset that has been "split" will have a successors set containing
3954 3954 more than one successor.
3955 3955
3956 3956 A changeset that has been rewritten in multiple different ways is called
3957 3957 "divergent". Such changesets have multiple successor sets (each of which
3958 3958 may also be split, i.e. have multiple successors).
3959 3959
3960 3960 Results are displayed as follows::
3961 3961
3962 3962 <rev1>
3963 3963 <successors-1A>
3964 3964 <rev2>
3965 3965 <successors-2A>
3966 3966 <successors-2B1> <successors-2B2> <successors-2B3>
3967 3967
3968 3968 Here rev2 has two possible (i.e. divergent) successors sets. The first
3969 3969 holds one element, whereas the second holds three (i.e. the changeset has
3970 3970 been split).
3971 3971 """
3972 3972 # passed to successorssets caching computation from one call to another
3973 3973 cache = {}
3974 3974 ctx2str = bytes
3975 3975 node2str = short
3976 3976 for rev in logcmdutil.revrange(repo, revs):
3977 3977 ctx = repo[rev]
3978 3978 ui.write(b'%s\n' % ctx2str(ctx))
3979 3979 for succsset in obsutil.successorssets(
3980 3980 repo, ctx.node(), closest=opts['closest'], cache=cache
3981 3981 ):
3982 3982 if succsset:
3983 3983 ui.write(b' ')
3984 3984 ui.write(node2str(succsset[0]))
3985 3985 for node in succsset[1:]:
3986 3986 ui.write(b' ')
3987 3987 ui.write(node2str(node))
3988 3988 ui.write(b'\n')
3989 3989
3990 3990
3991 3991 @command(b'debugtagscache', [])
3992 3992 def debugtagscache(ui, repo):
3993 3993 """display the contents of .hg/cache/hgtagsfnodes1"""
3994 3994 cache = tagsmod.hgtagsfnodescache(repo.unfiltered())
3995 3995 flog = repo.file(b'.hgtags')
3996 3996 for r in repo:
3997 3997 node = repo[r].node()
3998 3998 tagsnode = cache.getfnode(node, computemissing=False)
3999 3999 if tagsnode:
4000 4000 tagsnodedisplay = hex(tagsnode)
4001 4001 if not flog.hasnode(tagsnode):
4002 4002 tagsnodedisplay += b' (unknown node)'
4003 4003 elif tagsnode is None:
4004 4004 tagsnodedisplay = b'missing'
4005 4005 else:
4006 4006 tagsnodedisplay = b'invalid'
4007 4007
4008 4008 ui.write(b'%d %s %s\n' % (r, hex(node), tagsnodedisplay))
4009 4009
4010 4010
4011 4011 @command(
4012 4012 b'debugtemplate',
4013 4013 [
4014 4014 (b'r', b'rev', [], _(b'apply template on changesets'), _(b'REV')),
4015 4015 (b'D', b'define', [], _(b'define template keyword'), _(b'KEY=VALUE')),
4016 4016 ],
4017 4017 _(b'[-r REV]... [-D KEY=VALUE]... TEMPLATE'),
4018 4018 optionalrepo=True,
4019 4019 )
4020 4020 def debugtemplate(ui, repo, tmpl, **opts):
4021 4021 """parse and apply a template
4022 4022
4023 4023 If -r/--rev is given, the template is processed as a log template and
4024 4024 applied to the given changesets. Otherwise, it is processed as a generic
4025 4025 template.
4026 4026
4027 4027 Use --verbose to print the parsed tree.
4028 4028 """
4029 4029 revs = None
4030 4030 if opts['rev']:
4031 4031 if repo is None:
4032 4032 raise error.RepoError(
4033 4033 _(b'there is no Mercurial repository here (.hg not found)')
4034 4034 )
4035 4035 revs = logcmdutil.revrange(repo, opts['rev'])
4036 4036
4037 4037 props = {}
4038 4038 for d in opts['define']:
4039 4039 try:
4040 4040 k, v = (e.strip() for e in d.split(b'=', 1))
4041 4041 if not k or k == b'ui':
4042 4042 raise ValueError
4043 4043 props[k] = v
4044 4044 except ValueError:
4045 4045 raise error.Abort(_(b'malformed keyword definition: %s') % d)
4046 4046
4047 4047 if ui.verbose:
4048 4048 aliases = ui.configitems(b'templatealias')
4049 4049 tree = templater.parse(tmpl)
4050 4050 ui.note(templater.prettyformat(tree), b'\n')
4051 4051 newtree = templater.expandaliases(tree, aliases)
4052 4052 if newtree != tree:
4053 4053 ui.notenoi18n(
4054 4054 b"* expanded:\n", templater.prettyformat(newtree), b'\n'
4055 4055 )
4056 4056
4057 4057 if revs is None:
4058 4058 tres = formatter.templateresources(ui, repo)
4059 4059 t = formatter.maketemplater(ui, tmpl, resources=tres)
4060 4060 if ui.verbose:
4061 4061 kwds, funcs = t.symbolsuseddefault()
4062 4062 ui.writenoi18n(b"* keywords: %s\n" % b', '.join(sorted(kwds)))
4063 4063 ui.writenoi18n(b"* functions: %s\n" % b', '.join(sorted(funcs)))
4064 4064 ui.write(t.renderdefault(props))
4065 4065 else:
4066 4066 displayer = logcmdutil.maketemplater(ui, repo, tmpl)
4067 4067 if ui.verbose:
4068 4068 kwds, funcs = displayer.t.symbolsuseddefault()
4069 4069 ui.writenoi18n(b"* keywords: %s\n" % b', '.join(sorted(kwds)))
4070 4070 ui.writenoi18n(b"* functions: %s\n" % b', '.join(sorted(funcs)))
4071 4071 for r in revs:
4072 4072 displayer.show(repo[r], **pycompat.strkwargs(props))
4073 4073 displayer.close()
4074 4074
4075 4075
4076 4076 @command(
4077 4077 b'debuguigetpass',
4078 4078 [
4079 4079 (b'p', b'prompt', b'', _(b'prompt text'), _(b'TEXT')),
4080 4080 ],
4081 4081 _(b'[-p TEXT]'),
4082 4082 norepo=True,
4083 4083 )
4084 4084 def debuguigetpass(ui, prompt=b''):
4085 4085 """show prompt to type password"""
4086 4086 r = ui.getpass(prompt)
4087 4087 if r is None:
4088 4088 r = b"<default response>"
4089 4089 ui.writenoi18n(b'response: %s\n' % r)
4090 4090
4091 4091
4092 4092 @command(
4093 4093 b'debuguiprompt',
4094 4094 [
4095 4095 (b'p', b'prompt', b'', _(b'prompt text'), _(b'TEXT')),
4096 4096 ],
4097 4097 _(b'[-p TEXT]'),
4098 4098 norepo=True,
4099 4099 )
4100 4100 def debuguiprompt(ui, prompt=b''):
4101 4101 """show plain prompt"""
4102 4102 r = ui.prompt(prompt)
4103 4103 ui.writenoi18n(b'response: %s\n' % r)
4104 4104
4105 4105
4106 4106 @command(b'debugupdatecaches', [])
4107 4107 def debugupdatecaches(ui, repo, *pats, **opts):
4108 4108 """warm all known caches in the repository"""
4109 4109 with repo.wlock(), repo.lock():
4110 4110 repo.updatecaches(caches=repository.CACHES_ALL)
4111 4111
4112 4112
4113 4113 @command(
4114 4114 b'debugupgraderepo',
4115 4115 [
4116 4116 (
4117 4117 b'o',
4118 4118 b'optimize',
4119 4119 [],
4120 4120 _(b'extra optimization to perform'),
4121 4121 _(b'NAME'),
4122 4122 ),
4123 4123 (b'', b'run', False, _(b'performs an upgrade')),
4124 4124 (b'', b'backup', True, _(b'keep the old repository content around')),
4125 4125 (b'', b'changelog', None, _(b'select the changelog for upgrade')),
4126 4126 (b'', b'manifest', None, _(b'select the manifest for upgrade')),
4127 4127 (b'', b'filelogs', None, _(b'select all filelogs for upgrade')),
4128 4128 ],
4129 4129 )
4130 4130 def debugupgraderepo(ui, repo, run=False, optimize=None, backup=True, **opts):
4131 4131 """upgrade a repository to use different features
4132 4132
4133 4133 If no arguments are specified, the repository is evaluated for upgrade
4134 4134 and a list of problems and potential optimizations is printed.
4135 4135
4136 4136 With ``--run``, a repository upgrade is performed. Behavior of the upgrade
4137 4137 can be influenced via additional arguments. More details will be provided
4138 4138 by the command output when run without ``--run``.
4139 4139
4140 4140 During the upgrade, the repository will be locked and no writes will be
4141 4141 allowed.
4142 4142
4143 4143 At the end of the upgrade, the repository may not be readable while new
4144 4144 repository data is swapped in. This window will be as long as it takes to
4145 4145 rename some directories inside the ``.hg`` directory. On most machines, this
4146 4146 should complete almost instantaneously and the chances of a consumer being
4147 4147 unable to access the repository should be low.
4148 4148
4149 4149 By default, all revlogs will be upgraded. You can restrict this using flags
4150 4150 such as `--manifest`:
4151 4151
4152 4152 * `--manifest`: only optimize the manifest
4153 4153 * `--no-manifest`: optimize all revlog but the manifest
4154 4154 * `--changelog`: optimize the changelog only
4155 4155 * `--no-changelog --no-manifest`: optimize filelogs only
4156 4156 * `--filelogs`: optimize the filelogs only
4157 4157 * `--no-changelog --no-manifest --no-filelogs`: skip all revlog optimizations
4158 4158 """
4159 4159 return upgrade.upgraderepo(
4160 4160 ui, repo, run=run, optimize=set(optimize), backup=backup, **opts
4161 4161 )
4162 4162
4163 4163
4164 4164 @command(
4165 4165 b'debugwalk', cmdutil.walkopts, _(b'[OPTION]... [FILE]...'), inferrepo=True
4166 4166 )
4167 4167 def debugwalk(ui, repo, *pats, **opts):
4168 4168 """show how files match on given patterns"""
4169 4169 m = scmutil.match(repo[None], pats, pycompat.byteskwargs(opts))
4170 4170 if ui.verbose:
4171 4171 ui.writenoi18n(b'* matcher:\n', stringutil.prettyrepr(m), b'\n')
4172 4172 items = list(repo[None].walk(m))
4173 4173 if not items:
4174 4174 return
4175 4175 f = lambda fn: fn
4176 4176 if ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/':
4177 4177 f = lambda fn: util.normpath(fn)
4178 4178 fmt = b'f %%-%ds %%-%ds %%s' % (
4179 4179 max([len(abs) for abs in items]),
4180 4180 max([len(repo.pathto(abs)) for abs in items]),
4181 4181 )
4182 4182 for abs in items:
4183 4183 line = fmt % (
4184 4184 abs,
4185 4185 f(repo.pathto(abs)),
4186 4186 m.exact(abs) and b'exact' or b'',
4187 4187 )
4188 4188 ui.write(b"%s\n" % line.rstrip())
4189 4189
4190 4190
4191 4191 @command(b'debugwhyunstable', [], _(b'REV'))
4192 4192 def debugwhyunstable(ui, repo, rev):
4193 4193 """explain instabilities of a changeset"""
4194 4194 for entry in obsutil.whyunstable(repo, scmutil.revsingle(repo, rev)):
4195 4195 dnodes = b''
4196 4196 if entry.get(b'divergentnodes'):
4197 4197 dnodes = (
4198 4198 b' '.join(
4199 4199 b'%s (%s)' % (ctx.hex(), ctx.phasestr())
4200 4200 for ctx in entry[b'divergentnodes']
4201 4201 )
4202 4202 + b' '
4203 4203 )
4204 4204 ui.write(
4205 4205 b'%s: %s%s %s\n'
4206 4206 % (entry[b'instability'], dnodes, entry[b'reason'], entry[b'node'])
4207 4207 )
4208 4208
4209 4209
4210 4210 @command(
4211 4211 b'debugwireargs',
4212 4212 [
4213 4213 (b'', b'three', b'', b'three'),
4214 4214 (b'', b'four', b'', b'four'),
4215 4215 (b'', b'five', b'', b'five'),
4216 4216 ]
4217 4217 + cmdutil.remoteopts,
4218 4218 _(b'REPO [OPTIONS]... [ONE [TWO]]'),
4219 4219 norepo=True,
4220 4220 )
4221 4221 def debugwireargs(ui, repopath, *vals, **opts):
4222 4222 repo = hg.peer(ui, pycompat.byteskwargs(opts), repopath)
4223 4223 try:
4224 4224 for opt in cmdutil.remoteopts:
4225 4225 del opts[pycompat.sysstr(opt[1])]
4226 4226 args = {}
4227 4227 for k, v in opts.items():
4228 4228 if v:
4229 4229 args[k] = v
4230 4230
4231 4231 # run twice to check that we don't mess up the stream for the next command
4232 4232 res1 = repo.debugwireargs(*vals, **args)
4233 4233 res2 = repo.debugwireargs(*vals, **args)
4234 4234 ui.write(b"%s\n" % res1)
4235 4235 if res1 != res2:
4236 4236 ui.warn(b"%s\n" % res2)
4237 4237 finally:
4238 4238 repo.close()
4239 4239
4240 4240
4241 4241 def _parsewirelangblocks(fh):
4242 4242 activeaction = None
4243 4243 blocklines = []
4244 4244 lastindent = 0
4245 4245
4246 4246 for line in fh:
4247 4247 line = line.rstrip()
4248 4248 if not line:
4249 4249 continue
4250 4250
4251 4251 if line.startswith(b'#'):
4252 4252 continue
4253 4253
4254 4254 if not line.startswith(b' '):
4255 4255 # New block. Flush previous one.
4256 4256 if activeaction:
4257 4257 yield activeaction, blocklines
4258 4258
4259 4259 activeaction = line
4260 4260 blocklines = []
4261 4261 lastindent = 0
4262 4262 continue
4263 4263
4264 4264 # Else we start with an indent.
4265 4265
4266 4266 if not activeaction:
4267 4267 raise error.Abort(_(b'indented line outside of block'))
4268 4268
4269 4269 indent = len(line) - len(line.lstrip())
4270 4270
4271 4271 # If this line is indented more than the last line, concatenate it.
4272 4272 if indent > lastindent and blocklines:
4273 4273 blocklines[-1] += line.lstrip()
4274 4274 else:
4275 4275 blocklines.append(line)
4276 4276 lastindent = indent
4277 4277
4278 4278 # Flush last block.
4279 4279 if activeaction:
4280 4280 yield activeaction, blocklines
4281 4281
4282 4282
4283 4283 @command(
4284 4284 b'debugwireproto',
4285 4285 [
4286 4286 (b'', b'localssh', False, _(b'start an SSH server for this repo')),
4287 4287 (b'', b'peer', b'', _(b'construct a specific version of the peer')),
4288 4288 (
4289 4289 b'',
4290 4290 b'noreadstderr',
4291 4291 False,
4292 4292 _(b'do not read from stderr of the remote'),
4293 4293 ),
4294 4294 (
4295 4295 b'',
4296 4296 b'nologhandshake',
4297 4297 False,
4298 4298 _(b'do not log I/O related to the peer handshake'),
4299 4299 ),
4300 4300 ]
4301 4301 + cmdutil.remoteopts,
4302 4302 _(b'[PATH]'),
4303 4303 optionalrepo=True,
4304 4304 )
4305 4305 def debugwireproto(ui, repo, path=None, **opts):
4306 4306 """send wire protocol commands to a server
4307 4307
4308 4308 This command can be used to issue wire protocol commands to remote
4309 4309 peers and to debug the raw data being exchanged.
4310 4310
4311 4311 ``--localssh`` will start an SSH server against the current repository
4312 4312 and connect to that. By default, the connection will perform a handshake
4313 4313 and establish an appropriate peer instance.
4314 4314
4315 4315 ``--peer`` can be used to bypass the handshake protocol and construct a
4316 4316 peer instance using the specified class type. Valid values are ``raw``,
4317 4317 ``ssh1``. ``raw`` instances only allow sending raw data payloads and
4318 4318 don't support higher-level command actions.
4319 4319
4320 4320 ``--noreadstderr`` can be used to disable automatic reading from stderr
4321 4321 of the peer (for SSH connections only). Disabling automatic reading of
4322 4322 stderr is useful for making output more deterministic.
4323 4323
4324 4324 Commands are issued via a mini language which is specified via stdin.
4325 4325 The language consists of individual actions to perform. An action is
4326 4326 defined by a block. A block is defined as a line with no leading
4327 4327 space followed by 0 or more lines with leading space. Blocks are
4328 4328 effectively a high-level command with additional metadata.
4329 4329
4330 4330 Lines beginning with ``#`` are ignored.
4331 4331
4332 4332 The following sections denote available actions.
4333 4333
4334 4334 raw
4335 4335 ---
4336 4336
4337 4337 Send raw data to the server.
4338 4338
4339 4339 The block payload contains the raw data to send as one atomic send
4340 4340 operation. The data may not actually be delivered in a single system
4341 4341 call: it depends on the abilities of the transport being used.
4342 4342
4343 4343 Each line in the block is de-indented and concatenated. Then, that
4344 4344 value is evaluated as a Python b'' literal. This allows the use of
4345 4345 backslash escaping, etc.
4346 4346
4347 4347 raw+
4348 4348 ----
4349 4349
4350 4350 Behaves like ``raw`` except flushes output afterwards.
4351 4351
4352 4352 command <X>
4353 4353 -----------
4354 4354
4355 4355 Send a request to run a named command, whose name follows the ``command``
4356 4356 string.
4357 4357
4358 4358 Arguments to the command are defined as lines in this block. The format of
4359 4359 each line is ``<key> <value>``. e.g.::
4360 4360
4361 4361 command listkeys
4362 4362 namespace bookmarks
4363 4363
4364 4364 If the value begins with ``eval:``, it will be interpreted as a Python
4365 4365 literal expression. Otherwise values are interpreted as Python b'' literals.
4366 4366 This allows sending complex types and encoding special byte sequences via
4367 4367 backslash escaping.
4368 4368
4369 4369 The following arguments have special meaning:
4370 4370
4371 4371 ``PUSHFILE``
4372 4372 When defined, the *push* mechanism of the peer will be used instead
4373 4373 of the static request-response mechanism and the content of the
4374 4374 file specified in the value of this argument will be sent as the
4375 4375 command payload.
4376 4376
4377 4377 This can be used to submit a local bundle file to the remote.
4378 4378
4379 4379 batchbegin
4380 4380 ----------
4381 4381
4382 4382 Instruct the peer to begin a batched send.
4383 4383
4384 4384 All ``command`` blocks are queued for execution until the next
4385 4385 ``batchsubmit`` block.
4386 4386
4387 4387 batchsubmit
4388 4388 -----------
4389 4389
4390 4390 Submit previously queued ``command`` blocks as a batch request.
4391 4391
4392 4392 This action MUST be paired with a ``batchbegin`` action.
4393 4393
4394 4394 httprequest <method> <path>
4395 4395 ---------------------------
4396 4396
4397 4397 (HTTP peer only)
4398 4398
4399 4399 Send an HTTP request to the peer.
4400 4400
4401 4401 The HTTP request line follows the ``httprequest`` action. e.g. ``GET /foo``.
4402 4402
4403 4403 Arguments of the form ``<key>: <value>`` are interpreted as HTTP request
4404 4404 headers to add to the request. e.g. ``Accept: foo``.
4405 4405
4406 4406 The following arguments are special:
4407 4407
4408 4408 ``BODYFILE``
4409 4409 The content of the file defined as the value to this argument will be
4410 4410 transferred verbatim as the HTTP request body.
4411 4411
4412 4412 ``frame <type> <flags> <payload>``
4413 4413 Send a unified protocol frame as part of the request body.
4414 4414
4415 4415 All frames will be collected and sent as the body to the HTTP
4416 4416 request.
4417 4417
4418 4418 close
4419 4419 -----
4420 4420
4421 4421 Close the connection to the server.
4422 4422
4423 4423 flush
4424 4424 -----
4425 4425
4426 4426 Flush data written to the server.
4427 4427
4428 4428 readavailable
4429 4429 -------------
4430 4430
4431 4431 Close the write end of the connection and read all available data from
4432 4432 the server.
4433 4433
4434 4434 If the connection to the server encompasses multiple pipes, we poll both
4435 4435 pipes and read available data.
4436 4436
4437 4437 readline
4438 4438 --------
4439 4439
4440 4440 Read a line of output from the server. If there are multiple output
4441 4441 pipes, reads only the main pipe.
4442 4442
4443 4443 ereadline
4444 4444 ---------
4445 4445
4446 4446 Like ``readline``, but read from the stderr pipe, if available.
4447 4447
4448 4448 read <X>
4449 4449 --------
4450 4450
4451 4451 ``read()`` N bytes from the server's main output pipe.
4452 4452
4453 4453 eread <X>
4454 4454 ---------
4455 4455
4456 4456 ``read()`` N bytes from the server's stderr pipe, if available.
4457 4457
4458 4458 Specifying Unified Frame-Based Protocol Frames
4459 4459 ----------------------------------------------
4460 4460
4461 4461 It is possible to emit a *Unified Frame-Based Protocol* by using special
4462 4462 syntax.
4463 4463
4464 4464 A frame is composed as a type, flags, and payload. These can be parsed
4465 4465 from a string of the form:
4466 4466
4467 4467 <request-id> <stream-id> <stream-flags> <type> <flags> <payload>
4468 4468
4469 4469 ``request-id`` and ``stream-id`` are integers defining the request and
4470 4470 stream identifiers.
4471 4471
4472 4472 ``type`` can be an integer value for the frame type or the string name
4473 4473 of the type. The strings are defined in ``wireprotoframing.py``. e.g.
4474 4474 ``command-name``.
4475 4475
4476 4476 ``stream-flags`` and ``flags`` are a ``|`` delimited list of flag
4477 4477 components. Each component (and there can be just one) can be an integer
4478 4478 or a flag name for stream flags or frame flags, respectively. Values are
4479 4479 resolved to integers and then bitwise OR'd together.
4480 4480
4481 4481 ``payload`` represents the raw frame payload. If it begins with
4482 4482 ``cbor:``, the following string is evaluated as Python code and the
4483 4483 resulting object is fed into a CBOR encoder. Otherwise it is interpreted
4484 4484 as a Python byte string literal.
4485 4485 """
4486 4486 if opts['localssh'] and not repo:
4487 4487 raise error.Abort(_(b'--localssh requires a repository'))
4488 4488
4489 4489 if opts['peer'] and opts['peer'] not in (
4490 4490 b'raw',
4491 4491 b'ssh1',
4492 4492 ):
4493 4493 raise error.Abort(
4494 4494 _(b'invalid value for --peer'),
4495 4495 hint=_(b'valid values are "raw" and "ssh1"'),
4496 4496 )
4497 4497
4498 4498 if path and opts['localssh']:
4499 4499 raise error.Abort(_(b'cannot specify --localssh with an explicit path'))
4500 4500
4501 4501 if ui.interactive():
4502 4502 ui.write(_(b'(waiting for commands on stdin)\n'))
4503 4503
4504 4504 blocks = list(_parsewirelangblocks(ui.fin))
4505 4505
4506 4506 proc = None
4507 4507 stdin = None
4508 4508 stdout = None
4509 4509 stderr = None
4510 4510 opener = None
4511 4511
4512 4512 if opts['localssh']:
4513 4513 # We start the SSH server in its own process so there is process
4514 4514 # separation. This prevents a whole class of potential bugs around
4515 4515 # shared state from interfering with server operation.
4516 4516 args = procutil.hgcmd() + [
4517 4517 b'-R',
4518 4518 repo.root,
4519 4519 b'debugserve',
4520 4520 b'--sshstdio',
4521 4521 ]
4522 4522 proc = subprocess.Popen(
4523 4523 pycompat.rapply(procutil.tonativestr, args),
4524 4524 stdin=subprocess.PIPE,
4525 4525 stdout=subprocess.PIPE,
4526 4526 stderr=subprocess.PIPE,
4527 4527 bufsize=0,
4528 4528 )
4529 4529
4530 4530 stdin = proc.stdin
4531 4531 stdout = proc.stdout
4532 4532 stderr = proc.stderr
4533 4533
4534 4534 # We turn the pipes into observers so we can log I/O.
4535 4535 if ui.verbose or opts['peer'] == b'raw':
4536 4536 stdin = util.makeloggingfileobject(
4537 4537 ui, proc.stdin, b'i', logdata=True
4538 4538 )
4539 4539 stdout = util.makeloggingfileobject(
4540 4540 ui, proc.stdout, b'o', logdata=True
4541 4541 )
4542 4542 stderr = util.makeloggingfileobject(
4543 4543 ui, proc.stderr, b'e', logdata=True
4544 4544 )
4545 4545
4546 4546 # --localssh also implies the peer connection settings.
4547 4547
4548 4548 url = b'ssh://localserver'
4549 4549 autoreadstderr = not opts['noreadstderr']
4550 4550
4551 4551 if opts['peer'] == b'ssh1':
4552 4552 ui.write(_(b'creating ssh peer for wire protocol version 1\n'))
4553 4553 peer = sshpeer.sshv1peer(
4554 4554 ui,
4555 4555 url,
4556 4556 proc,
4557 4557 stdin,
4558 4558 stdout,
4559 4559 stderr,
4560 4560 None,
4561 4561 autoreadstderr=autoreadstderr,
4562 4562 )
4563 4563 elif opts['peer'] == b'raw':
4564 4564 ui.write(_(b'using raw connection to peer\n'))
4565 4565 peer = None
4566 4566 else:
4567 4567 ui.write(_(b'creating ssh peer from handshake results\n'))
4568 4568 peer = sshpeer._make_peer(
4569 4569 ui,
4570 4570 url,
4571 4571 proc,
4572 4572 stdin,
4573 4573 stdout,
4574 4574 stderr,
4575 4575 autoreadstderr=autoreadstderr,
4576 4576 )
4577 4577
4578 4578 elif path:
4579 4579 # We bypass hg.peer() so we can proxy the sockets.
4580 4580 # TODO consider not doing this because we skip
4581 4581 # ``hg.wirepeersetupfuncs`` and potentially other useful functionality.
4582 4582 u = urlutil.url(path)
4583 4583 if u.scheme != b'http':
4584 4584 raise error.Abort(_(b'only http:// paths are currently supported'))
4585 4585
4586 4586 url, authinfo = u.authinfo()
4587 4587 openerargs = {
4588 4588 'useragent': b'Mercurial debugwireproto',
4589 4589 }
4590 4590
4591 4591 # Turn pipes/sockets into observers so we can log I/O.
4592 4592 if ui.verbose:
4593 4593 openerargs.update(
4594 4594 {
4595 4595 'loggingfh': ui,
4596 4596 'loggingname': b's',
4597 4597 'loggingopts': {
4598 4598 'logdata': True,
4599 4599 'logdataapis': False,
4600 4600 },
4601 4601 }
4602 4602 )
4603 4603
4604 4604 if ui.debugflag:
4605 4605 openerargs['loggingopts']['logdataapis'] = True
4606 4606
4607 4607 # Don't send default headers when in raw mode. This allows us to
4608 4608 # bypass most of the behavior of our URL handling code so we can
4609 4609 # have near complete control over what's sent on the wire.
4610 4610 if opts['peer'] == b'raw':
4611 4611 openerargs['sendaccept'] = False
4612 4612
4613 4613 opener = urlmod.opener(ui, authinfo, **openerargs)
4614 4614
4615 4615 if opts['peer'] == b'raw':
4616 4616 ui.write(_(b'using raw connection to peer\n'))
4617 4617 peer = None
4618 4618 elif opts['peer']:
4619 4619 raise error.Abort(
4620 4620 _(b'--peer %s not supported with HTTP peers') % opts['peer']
4621 4621 )
4622 4622 else:
4623 4623 peer_path = urlutil.try_path(ui, path)
4624 4624 peer = httppeer._make_peer(ui, peer_path, opener=opener)
4625 4625
4626 4626 # We /could/ populate stdin/stdout with sock.makefile()...
4627 4627 else:
4628 4628 raise error.Abort(_(b'unsupported connection configuration'))
4629 4629
4630 4630 batchedcommands = None
4631 4631
4632 4632 # Now perform actions based on the parsed wire language instructions.
4633 4633 for action, lines in blocks:
4634 4634 if action in (b'raw', b'raw+'):
4635 4635 if not stdin:
4636 4636 raise error.Abort(_(b'cannot call raw/raw+ on this peer'))
4637 4637
4638 4638 # Concatenate the data together.
4639 4639 data = b''.join(l.lstrip() for l in lines)
4640 4640 data = stringutil.unescapestr(data)
4641 4641 stdin.write(data)
4642 4642
4643 4643 if action == b'raw+':
4644 4644 stdin.flush()
4645 4645 elif action == b'flush':
4646 4646 if not stdin:
4647 4647 raise error.Abort(_(b'cannot call flush on this peer'))
4648 4648 stdin.flush()
4649 4649 elif action.startswith(b'command'):
4650 4650 if not peer:
4651 4651 raise error.Abort(
4652 4652 _(
4653 4653 b'cannot send commands unless peer instance '
4654 4654 b'is available'
4655 4655 )
4656 4656 )
4657 4657
4658 4658 command = action.split(b' ', 1)[1]
4659 4659
4660 4660 args = {}
4661 4661 for line in lines:
4662 4662 # We need to allow empty values.
4663 4663 fields = line.lstrip().split(b' ', 1)
4664 4664 if len(fields) == 1:
4665 4665 key = fields[0]
4666 4666 value = b''
4667 4667 else:
4668 4668 key, value = fields
4669 4669
4670 4670 if value.startswith(b'eval:'):
4671 4671 value = stringutil.evalpythonliteral(value[5:])
4672 4672 else:
4673 4673 value = stringutil.unescapestr(value)
4674 4674
4675 4675 args[key] = value
4676 4676
4677 4677 if batchedcommands is not None:
4678 4678 batchedcommands.append((command, args))
4679 4679 continue
4680 4680
4681 4681 ui.status(_(b'sending %s command\n') % command)
4682 4682
4683 4683 if b'PUSHFILE' in args:
4684 4684 with open(args[b'PUSHFILE'], 'rb') as fh:
4685 4685 del args[b'PUSHFILE']
4686 4686 res, output = peer._callpush(
4687 4687 command, fh, **pycompat.strkwargs(args)
4688 4688 )
4689 4689 ui.status(_(b'result: %s\n') % stringutil.escapestr(res))
4690 4690 ui.status(
4691 4691 _(b'remote output: %s\n') % stringutil.escapestr(output)
4692 4692 )
4693 4693 else:
4694 4694 with peer.commandexecutor() as e:
4695 4695 res = e.callcommand(command, args).result()
4696 4696
4697 4697 ui.status(
4698 4698 _(b'response: %s\n')
4699 4699 % stringutil.pprint(res, bprefix=True, indent=2)
4700 4700 )
4701 4701
4702 4702 elif action == b'batchbegin':
4703 4703 if batchedcommands is not None:
4704 4704 raise error.Abort(_(b'nested batchbegin not allowed'))
4705 4705
4706 4706 batchedcommands = []
4707 4707 elif action == b'batchsubmit':
4708 4708 # There is a batching API we could go through. But it would be
4709 4709 # difficult to normalize requests into function calls. It is easier
4710 4710 # to bypass this layer and normalize to commands + args.
4711 4711 ui.status(
4712 4712 _(b'sending batch with %d sub-commands\n')
4713 4713 % len(batchedcommands)
4714 4714 )
4715 4715 assert peer is not None
4716 4716 for i, chunk in enumerate(peer._submitbatch(batchedcommands)):
4717 4717 ui.status(
4718 4718 _(b'response #%d: %s\n') % (i, stringutil.escapestr(chunk))
4719 4719 )
4720 4720
4721 4721 batchedcommands = None
4722 4722
4723 4723 elif action.startswith(b'httprequest '):
4724 4724 if not opener:
4725 4725 raise error.Abort(
4726 4726 _(b'cannot use httprequest without an HTTP peer')
4727 4727 )
4728 4728
4729 4729 request = action.split(b' ', 2)
4730 4730 if len(request) != 3:
4731 4731 raise error.Abort(
4732 4732 _(
4733 4733 b'invalid httprequest: expected format is '
4734 4734 b'"httprequest <method> <path>'
4735 4735 )
4736 4736 )
4737 4737
4738 4738 method, httppath = request[1:]
4739 4739 headers = {}
4740 4740 body = None
4741 4741 frames = []
4742 4742 for line in lines:
4743 4743 line = line.lstrip()
4744 4744 m = re.match(b'^([a-zA-Z0-9_-]+): (.*)$', line)
4745 4745 if m:
4746 4746 # Headers need to use native strings.
4747 4747 key = pycompat.strurl(m.group(1))
4748 4748 value = pycompat.strurl(m.group(2))
4749 4749 headers[key] = value
4750 4750 continue
4751 4751
4752 4752 if line.startswith(b'BODYFILE '):
4753 4753 with open(line.split(b' ', 1), b'rb') as fh:
4754 4754 body = fh.read()
4755 4755 elif line.startswith(b'frame '):
4756 4756 frame = wireprotoframing.makeframefromhumanstring(
4757 4757 line[len(b'frame ') :]
4758 4758 )
4759 4759
4760 4760 frames.append(frame)
4761 4761 else:
4762 4762 raise error.Abort(
4763 4763 _(b'unknown argument to httprequest: %s') % line
4764 4764 )
4765 4765
4766 4766 url = path + httppath
4767 4767
4768 4768 if frames:
4769 4769 body = b''.join(bytes(f) for f in frames)
4770 4770
4771 4771 req = urlmod.urlreq.request(pycompat.strurl(url), body, headers)
4772 4772
4773 4773 # urllib.Request insists on using has_data() as a proxy for
4774 4774 # determining the request method. Override that to use our
4775 4775 # explicitly requested method.
4776 4776 req.get_method = lambda: pycompat.sysstr(method)
4777 4777
4778 4778 try:
4779 4779 res = opener.open(req)
4780 4780 body = res.read()
4781 4781 except util.urlerr.urlerror as e:
4782 4782 # read() method must be called, but only exists in Python 2
4783 4783 getattr(e, 'read', lambda: None)()
4784 4784 continue
4785 4785
4786 4786 ct = res.headers.get('Content-Type')
4787 4787 if ct == 'application/mercurial-cbor':
4788 4788 ui.write(
4789 4789 _(b'cbor> %s\n')
4790 4790 % stringutil.pprint(
4791 4791 cborutil.decodeall(body), bprefix=True, indent=2
4792 4792 )
4793 4793 )
4794 4794
4795 4795 elif action == b'close':
4796 4796 assert peer is not None
4797 4797 peer.close()
4798 4798 elif action == b'readavailable':
4799 4799 if not stdout or not stderr:
4800 4800 raise error.Abort(
4801 4801 _(b'readavailable not available on this peer')
4802 4802 )
4803 4803
4804 4804 stdin.close()
4805 4805 stdout.read()
4806 4806 stderr.read()
4807 4807
4808 4808 elif action == b'readline':
4809 4809 if not stdout:
4810 4810 raise error.Abort(_(b'readline not available on this peer'))
4811 4811 stdout.readline()
4812 4812 elif action == b'ereadline':
4813 4813 if not stderr:
4814 4814 raise error.Abort(_(b'ereadline not available on this peer'))
4815 4815 stderr.readline()
4816 4816 elif action.startswith(b'read '):
4817 4817 count = int(action.split(b' ', 1)[1])
4818 4818 if not stdout:
4819 4819 raise error.Abort(_(b'read not available on this peer'))
4820 4820 stdout.read(count)
4821 4821 elif action.startswith(b'eread '):
4822 4822 count = int(action.split(b' ', 1)[1])
4823 4823 if not stderr:
4824 4824 raise error.Abort(_(b'eread not available on this peer'))
4825 4825 stderr.read(count)
4826 4826 else:
4827 4827 raise error.Abort(_(b'unknown action: %s') % action)
4828 4828
4829 4829 if batchedcommands is not None:
4830 4830 raise error.Abort(_(b'unclosed "batchbegin" request'))
4831 4831
4832 4832 if peer:
4833 4833 peer.close()
4834 4834
4835 4835 if proc:
4836 4836 proc.kill()
@@ -1,3708 +1,3714 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 opts = self.opener.options
614 614
615 615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 616 new_header = CHANGELOGV2
617 617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 618 self.feature_config.compute_rank = compute_rank
619 619 elif b'revlogv2' in opts:
620 620 new_header = REVLOGV2
621 621 elif b'revlogv1' in opts:
622 622 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 623 if b'generaldelta' in opts:
624 624 new_header |= FLAG_GENERALDELTA
625 625 elif b'revlogv0' in self.opener.options:
626 626 new_header = REVLOGV0
627 627 else:
628 628 new_header = REVLOG_DEFAULT_VERSION
629 629
630 630 mmapindexthreshold = None
631 631 if self._mmaplargeindex:
632 632 mmapindexthreshold = self.data_config.mmap_index_threshold
633 633 if self.feature_config.enable_ellipsis:
634 634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
635 635
636 636 # revlog v0 doesn't have flag processors
637 637 for flag, processor in opts.get(b'flagprocessors', {}).items():
638 638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
639 639
640 640 chunk_cache_size = self.data_config.chunk_cache_size
641 641 if chunk_cache_size <= 0:
642 642 raise error.RevlogError(
643 643 _(b'revlog chunk cache size %r is not greater than 0')
644 644 % chunk_cache_size
645 645 )
646 646 elif chunk_cache_size & (chunk_cache_size - 1):
647 647 raise error.RevlogError(
648 648 _(b'revlog chunk cache size %r is not a power of 2')
649 649 % chunk_cache_size
650 650 )
651 651 force_nodemap = opts.get(b'devel-force-nodemap', False)
652 652 return new_header, mmapindexthreshold, force_nodemap
653 653
654 654 def _get_data(self, filepath, mmap_threshold, size=None):
655 655 """return a file content with or without mmap
656 656
657 657 If the file is missing return the empty string"""
658 658 try:
659 659 with self.opener(filepath) as fp:
660 660 if mmap_threshold is not None:
661 661 file_size = self.opener.fstat(fp).st_size
662 662 if file_size >= mmap_threshold:
663 663 if size is not None:
664 664 # avoid potentiel mmap crash
665 665 size = min(file_size, size)
666 666 # TODO: should .close() to release resources without
667 667 # relying on Python GC
668 668 if size is None:
669 669 return util.buffer(util.mmapread(fp))
670 670 else:
671 671 return util.buffer(util.mmapread(fp, size))
672 672 if size is None:
673 673 return fp.read()
674 674 else:
675 675 return fp.read(size)
676 676 except FileNotFoundError:
677 677 return b''
678 678
679 679 def get_streams(self, max_linkrev, force_inline=False):
680 680 """return a list of streams that represent this revlog
681 681
682 682 This is used by stream-clone to do bytes to bytes copies of a repository.
683 683
684 684 This streams data for all revisions that refer to a changelog revision up
685 685 to `max_linkrev`.
686 686
687 687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
688 688
689 689 It returns is a list of three-tuple:
690 690
691 691 [
692 692 (filename, bytes_stream, stream_size),
693 693
694 694 ]
695 695 """
696 696 n = len(self)
697 697 index = self.index
698 698 while n > 0:
699 699 linkrev = index[n - 1][4]
700 700 if linkrev < max_linkrev:
701 701 break
702 702 # note: this loop will rarely go through multiple iterations, since
703 703 # it only traverses commits created during the current streaming
704 704 # pull operation.
705 705 #
706 706 # If this become a problem, using a binary search should cap the
707 707 # runtime of this.
708 708 n = n - 1
709 709 if n == 0:
710 710 # no data to send
711 711 return []
712 712 index_size = n * index.entry_size
713 713 data_size = self.end(n - 1)
714 714
715 715 # XXX we might have been split (or stripped) since the object
716 716 # initialization, We need to close this race too, but having a way to
717 717 # pre-open the file we feed to the revlog and never closing them before
718 718 # we are done streaming.
719 719
720 720 if self._inline:
721 721
722 722 def get_stream():
723 723 with self._indexfp() as fp:
724 724 yield None
725 725 size = index_size + data_size
726 726 if size <= 65536:
727 727 yield fp.read(size)
728 728 else:
729 729 yield from util.filechunkiter(fp, limit=size)
730 730
731 731 inline_stream = get_stream()
732 732 next(inline_stream)
733 733 return [
734 734 (self._indexfile, inline_stream, index_size + data_size),
735 735 ]
736 736 elif force_inline:
737 737
738 738 def get_stream():
739 739 with self.reading():
740 740 yield None
741 741
742 742 for rev in range(n):
743 743 idx = self.index.entry_binary(rev)
744 744 if rev == 0 and self._docket is None:
745 745 # re-inject the inline flag
746 746 header = self._format_flags
747 747 header |= self._format_version
748 748 header |= FLAG_INLINE_DATA
749 749 header = self.index.pack_header(header)
750 750 idx = header + idx
751 751 yield idx
752 752 yield self._getsegmentforrevs(rev, rev)[1]
753 753
754 754 inline_stream = get_stream()
755 755 next(inline_stream)
756 756 return [
757 757 (self._indexfile, inline_stream, index_size + data_size),
758 758 ]
759 759 else:
760 760
761 761 def get_index_stream():
762 762 with self._indexfp() as fp:
763 763 yield None
764 764 if index_size <= 65536:
765 765 yield fp.read(index_size)
766 766 else:
767 767 yield from util.filechunkiter(fp, limit=index_size)
768 768
769 769 def get_data_stream():
770 770 with self._datafp() as fp:
771 771 yield None
772 772 if data_size <= 65536:
773 773 yield fp.read(data_size)
774 774 else:
775 775 yield from util.filechunkiter(fp, limit=data_size)
776 776
777 777 index_stream = get_index_stream()
778 778 next(index_stream)
779 779 data_stream = get_data_stream()
780 780 next(data_stream)
781 781 return [
782 782 (self._datafile, data_stream, data_size),
783 783 (self._indexfile, index_stream, index_size),
784 784 ]
785 785
786 786 def _loadindex(self, docket=None):
787 787
788 788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
789 789
790 790 if self.postfix is not None:
791 791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
792 792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
793 793 entry_point = b'%s.i.a' % self.radix
794 794 elif self._try_split and self.opener.exists(self._split_index_file):
795 795 entry_point = self._split_index_file
796 796 else:
797 797 entry_point = b'%s.i' % self.radix
798 798
799 799 if docket is not None:
800 800 self._docket = docket
801 801 self._docket_file = entry_point
802 802 else:
803 803 self._initempty = True
804 804 entry_data = self._get_data(entry_point, mmapindexthreshold)
805 805 if len(entry_data) > 0:
806 806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
807 807 self._initempty = False
808 808 else:
809 809 header = new_header
810 810
811 811 self._format_flags = header & ~0xFFFF
812 812 self._format_version = header & 0xFFFF
813 813
814 814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
815 815 if supported_flags is None:
816 816 msg = _(b'unknown version (%d) in revlog %s')
817 817 msg %= (self._format_version, self.display_id)
818 818 raise error.RevlogError(msg)
819 819 elif self._format_flags & ~supported_flags:
820 820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
821 821 display_flag = self._format_flags >> 16
822 822 msg %= (display_flag, self._format_version, self.display_id)
823 823 raise error.RevlogError(msg)
824 824
825 825 features = FEATURES_BY_VERSION[self._format_version]
826 826 self._inline = features[b'inline'](self._format_flags)
827 827 self.delta_config.general_delta = features[b'generaldelta'](
828 828 self._format_flags
829 829 )
830 830 self.feature_config.has_side_data = features[b'sidedata']
831 831
832 832 if not features[b'docket']:
833 833 self._indexfile = entry_point
834 834 index_data = entry_data
835 835 else:
836 836 self._docket_file = entry_point
837 837 if self._initempty:
838 838 self._docket = docketutil.default_docket(self, header)
839 839 else:
840 840 self._docket = docketutil.parse_docket(
841 841 self, entry_data, use_pending=self._trypending
842 842 )
843 843
844 844 if self._docket is not None:
845 845 self._indexfile = self._docket.index_filepath()
846 846 index_data = b''
847 847 index_size = self._docket.index_end
848 848 if index_size > 0:
849 849 index_data = self._get_data(
850 850 self._indexfile, mmapindexthreshold, size=index_size
851 851 )
852 852 if len(index_data) < index_size:
853 853 msg = _(b'too few index data for %s: got %d, expected %d')
854 854 msg %= (self.display_id, len(index_data), index_size)
855 855 raise error.RevlogError(msg)
856 856
857 857 self._inline = False
858 858 # generaldelta implied by version 2 revlogs.
859 859 self.delta_config.general_delta = True
860 860 # the logic for persistent nodemap will be dealt with within the
861 861 # main docket, so disable it for now.
862 862 self._nodemap_file = None
863 863
864 864 if self._docket is not None:
865 865 self._datafile = self._docket.data_filepath()
866 866 self._sidedatafile = self._docket.sidedata_filepath()
867 867 elif self.postfix is None:
868 868 self._datafile = b'%s.d' % self.radix
869 869 else:
870 870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
871 871
872 872 self.nodeconstants = sha1nodeconstants
873 873 self.nullid = self.nodeconstants.nullid
874 874
875 875 # sparse-revlog can't be on without general-delta (issue6056)
876 if not self._generaldelta:
876 if not self.delta_config.general_delta:
877 877 self.delta_config.sparse_revlog = False
878 878
879 879 self._storedeltachains = True
880 880
881 881 devel_nodemap = (
882 882 self._nodemap_file
883 883 and force_nodemap
884 884 and parse_index_v1_nodemap is not None
885 885 )
886 886
887 887 use_rust_index = False
888 888 if rustrevlog is not None:
889 889 if self._nodemap_file is not None:
890 890 use_rust_index = True
891 891 else:
892 892 use_rust_index = self.opener.options.get(b'rust.index')
893 893
894 894 self._parse_index = parse_index_v1
895 895 if self._format_version == REVLOGV0:
896 896 self._parse_index = revlogv0.parse_index_v0
897 897 elif self._format_version == REVLOGV2:
898 898 self._parse_index = parse_index_v2
899 899 elif self._format_version == CHANGELOGV2:
900 900 self._parse_index = parse_index_cl_v2
901 901 elif devel_nodemap:
902 902 self._parse_index = parse_index_v1_nodemap
903 903 elif use_rust_index:
904 904 self._parse_index = parse_index_v1_mixed
905 905 try:
906 906 d = self._parse_index(index_data, self._inline)
907 907 index, chunkcache = d
908 908 use_nodemap = (
909 909 not self._inline
910 910 and self._nodemap_file is not None
911 911 and hasattr(index, 'update_nodemap_data')
912 912 )
913 913 if use_nodemap:
914 914 nodemap_data = nodemaputil.persisted_data(self)
915 915 if nodemap_data is not None:
916 916 docket = nodemap_data[0]
917 917 if (
918 918 len(d[0]) > docket.tip_rev
919 919 and d[0][docket.tip_rev][7] == docket.tip_node
920 920 ):
921 921 # no changelog tampering
922 922 self._nodemap_docket = docket
923 923 index.update_nodemap_data(*nodemap_data)
924 924 except (ValueError, IndexError):
925 925 raise error.RevlogError(
926 926 _(b"index %s is corrupted") % self.display_id
927 927 )
928 928 self.index = index
929 929 self._segmentfile = randomaccessfile.randomaccessfile(
930 930 self.opener,
931 931 (self._indexfile if self._inline else self._datafile),
932 932 self._chunkcachesize,
933 933 chunkcache,
934 934 )
935 935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
936 936 self.opener,
937 937 self._sidedatafile,
938 938 self._chunkcachesize,
939 939 )
940 940 # revnum -> (chain-length, sum-delta-length)
941 941 self._chaininfocache = util.lrucachedict(500)
942 942 # revlog header -> revlog compressor
943 943 self._decompressors = {}
944 944
945 945 def get_revlog(self):
946 946 """simple function to mirror API of other not-really-revlog API"""
947 947 return self
948 948
949 949 @util.propertycache
950 950 def revlog_kind(self):
951 951 return self.target[0]
952 952
953 953 @util.propertycache
954 954 def display_id(self):
955 955 """The public facing "ID" of the revlog that we use in message"""
956 956 if self.revlog_kind == KIND_FILELOG:
957 957 # Reference the file without the "data/" prefix, so it is familiar
958 958 # to the user.
959 959 return self.target[1]
960 960 else:
961 961 return self.radix
962 962
963 963 def _get_decompressor(self, t):
964 964 try:
965 965 compressor = self._decompressors[t]
966 966 except KeyError:
967 967 try:
968 968 engine = util.compengines.forrevlogheader(t)
969 969 compressor = engine.revlogcompressor(self._compengineopts)
970 970 self._decompressors[t] = compressor
971 971 except KeyError:
972 972 raise error.RevlogError(
973 973 _(b'unknown compression type %s') % binascii.hexlify(t)
974 974 )
975 975 return compressor
976 976
977 977 @util.propertycache
978 978 def _compressor(self):
979 979 engine = util.compengines[self._compengine]
980 980 return engine.revlogcompressor(self._compengineopts)
981 981
982 982 @util.propertycache
983 983 def _decompressor(self):
984 984 """the default decompressor"""
985 985 if self._docket is None:
986 986 return None
987 987 t = self._docket.default_compression_header
988 988 c = self._get_decompressor(t)
989 989 return c.decompress
990 990
991 991 def _indexfp(self):
992 992 """file object for the revlog's index file"""
993 993 return self.opener(self._indexfile, mode=b"r")
994 994
995 995 def __index_write_fp(self):
996 996 # You should not use this directly and use `_writing` instead
997 997 try:
998 998 f = self.opener(
999 999 self._indexfile, mode=b"r+", checkambig=self._checkambig
1000 1000 )
1001 1001 if self._docket is None:
1002 1002 f.seek(0, os.SEEK_END)
1003 1003 else:
1004 1004 f.seek(self._docket.index_end, os.SEEK_SET)
1005 1005 return f
1006 1006 except FileNotFoundError:
1007 1007 return self.opener(
1008 1008 self._indexfile, mode=b"w+", checkambig=self._checkambig
1009 1009 )
1010 1010
1011 1011 def __index_new_fp(self):
1012 1012 # You should not use this unless you are upgrading from inline revlog
1013 1013 return self.opener(
1014 1014 self._indexfile,
1015 1015 mode=b"w",
1016 1016 checkambig=self._checkambig,
1017 1017 atomictemp=True,
1018 1018 )
1019 1019
1020 1020 def _datafp(self, mode=b'r'):
1021 1021 """file object for the revlog's data file"""
1022 1022 return self.opener(self._datafile, mode=mode)
1023 1023
1024 1024 @contextlib.contextmanager
1025 1025 def _sidedatareadfp(self):
1026 1026 """file object suitable to read sidedata"""
1027 1027 if self._writinghandles:
1028 1028 yield self._writinghandles[2]
1029 1029 else:
1030 1030 with self.opener(self._sidedatafile) as fp:
1031 1031 yield fp
1032 1032
1033 1033 def tiprev(self):
1034 1034 return len(self.index) - 1
1035 1035
1036 1036 def tip(self):
1037 1037 return self.node(self.tiprev())
1038 1038
1039 1039 def __contains__(self, rev):
1040 1040 return 0 <= rev < len(self)
1041 1041
1042 1042 def __len__(self):
1043 1043 return len(self.index)
1044 1044
1045 1045 def __iter__(self):
1046 1046 return iter(range(len(self)))
1047 1047
1048 1048 def revs(self, start=0, stop=None):
1049 1049 """iterate over all rev in this revlog (from start to stop)"""
1050 1050 return storageutil.iterrevs(len(self), start=start, stop=stop)
1051 1051
1052 1052 def hasnode(self, node):
1053 1053 try:
1054 1054 self.rev(node)
1055 1055 return True
1056 1056 except KeyError:
1057 1057 return False
1058 1058
1059 1059 def _candelta(self, baserev, rev):
1060 1060 """whether two revisions (baserev, rev) can be delta-ed or not"""
1061 1061 # Disable delta if either rev requires a content-changing flag
1062 1062 # processor (ex. LFS). This is because such flag processor can alter
1063 1063 # the rawtext content that the delta will be based on, and two clients
1064 1064 # could have a same revlog node with different flags (i.e. different
1065 1065 # rawtext contents) and the delta could be incompatible.
1066 1066 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1067 1067 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1068 1068 ):
1069 1069 return False
1070 1070 return True
1071 1071
1072 1072 def update_caches(self, transaction):
1073 1073 """update on disk cache
1074 1074
1075 1075 If a transaction is passed, the update may be delayed to transaction
1076 1076 commit."""
1077 1077 if self._nodemap_file is not None:
1078 1078 if transaction is None:
1079 1079 nodemaputil.update_persistent_nodemap(self)
1080 1080 else:
1081 1081 nodemaputil.setup_persistent_nodemap(transaction, self)
1082 1082
1083 1083 def clearcaches(self):
1084 1084 """Clear in-memory caches"""
1085 1085 self._revisioncache = None
1086 1086 self._chainbasecache.clear()
1087 1087 self._segmentfile.clear_cache()
1088 1088 self._segmentfile_sidedata.clear_cache()
1089 1089 self._pcache = {}
1090 1090 self._nodemap_docket = None
1091 1091 self.index.clearcaches()
1092 1092 # The python code is the one responsible for validating the docket, we
1093 1093 # end up having to refresh it here.
1094 1094 use_nodemap = (
1095 1095 not self._inline
1096 1096 and self._nodemap_file is not None
1097 1097 and hasattr(self.index, 'update_nodemap_data')
1098 1098 )
1099 1099 if use_nodemap:
1100 1100 nodemap_data = nodemaputil.persisted_data(self)
1101 1101 if nodemap_data is not None:
1102 1102 self._nodemap_docket = nodemap_data[0]
1103 1103 self.index.update_nodemap_data(*nodemap_data)
1104 1104
1105 1105 def rev(self, node):
1106 1106 """return the revision number associated with a <nodeid>"""
1107 1107 try:
1108 1108 return self.index.rev(node)
1109 1109 except TypeError:
1110 1110 raise
1111 1111 except error.RevlogError:
1112 1112 # parsers.c radix tree lookup failed
1113 1113 if (
1114 1114 node == self.nodeconstants.wdirid
1115 1115 or node in self.nodeconstants.wdirfilenodeids
1116 1116 ):
1117 1117 raise error.WdirUnsupported
1118 1118 raise error.LookupError(node, self.display_id, _(b'no node'))
1119 1119
1120 1120 # Accessors for index entries.
1121 1121
1122 1122 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1123 1123 # are flags.
1124 1124 def start(self, rev):
1125 1125 return int(self.index[rev][0] >> 16)
1126 1126
1127 1127 def sidedata_cut_off(self, rev):
1128 1128 sd_cut_off = self.index[rev][8]
1129 1129 if sd_cut_off != 0:
1130 1130 return sd_cut_off
1131 1131 # This is some annoying dance, because entries without sidedata
1132 1132 # currently use 0 as their ofsset. (instead of previous-offset +
1133 1133 # previous-size)
1134 1134 #
1135 1135 # We should reconsider this sidedata → 0 sidata_offset policy.
1136 1136 # In the meantime, we need this.
1137 1137 while 0 <= rev:
1138 1138 e = self.index[rev]
1139 1139 if e[9] != 0:
1140 1140 return e[8] + e[9]
1141 1141 rev -= 1
1142 1142 return 0
1143 1143
1144 1144 def flags(self, rev):
1145 1145 return self.index[rev][0] & 0xFFFF
1146 1146
1147 1147 def length(self, rev):
1148 1148 return self.index[rev][1]
1149 1149
1150 1150 def sidedata_length(self, rev):
1151 1151 if not self.hassidedata:
1152 1152 return 0
1153 1153 return self.index[rev][9]
1154 1154
1155 1155 def rawsize(self, rev):
1156 1156 """return the length of the uncompressed text for a given revision"""
1157 1157 l = self.index[rev][2]
1158 1158 if l >= 0:
1159 1159 return l
1160 1160
1161 1161 t = self.rawdata(rev)
1162 1162 return len(t)
1163 1163
1164 1164 def size(self, rev):
1165 1165 """length of non-raw text (processed by a "read" flag processor)"""
1166 1166 # fast path: if no "read" flag processor could change the content,
1167 1167 # size is rawsize. note: ELLIPSIS is known to not change the content.
1168 1168 flags = self.flags(rev)
1169 1169 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1170 1170 return self.rawsize(rev)
1171 1171
1172 1172 return len(self.revision(rev))
1173 1173
1174 1174 def fast_rank(self, rev):
1175 1175 """Return the rank of a revision if already known, or None otherwise.
1176 1176
1177 1177 The rank of a revision is the size of the sub-graph it defines as a
1178 1178 head. Equivalently, the rank of a revision `r` is the size of the set
1179 1179 `ancestors(r)`, `r` included.
1180 1180
1181 1181 This method returns the rank retrieved from the revlog in constant
1182 1182 time. It makes no attempt at computing unknown values for versions of
1183 1183 the revlog which do not persist the rank.
1184 1184 """
1185 1185 rank = self.index[rev][ENTRY_RANK]
1186 1186 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1187 1187 return None
1188 1188 if rev == nullrev:
1189 1189 return 0 # convention
1190 1190 return rank
1191 1191
1192 1192 def chainbase(self, rev):
1193 1193 base = self._chainbasecache.get(rev)
1194 1194 if base is not None:
1195 1195 return base
1196 1196
1197 1197 index = self.index
1198 1198 iterrev = rev
1199 1199 base = index[iterrev][3]
1200 1200 while base != iterrev:
1201 1201 iterrev = base
1202 1202 base = index[iterrev][3]
1203 1203
1204 1204 self._chainbasecache[rev] = base
1205 1205 return base
1206 1206
1207 1207 def linkrev(self, rev):
1208 1208 return self.index[rev][4]
1209 1209
1210 1210 def parentrevs(self, rev):
1211 1211 try:
1212 1212 entry = self.index[rev]
1213 1213 except IndexError:
1214 1214 if rev == wdirrev:
1215 1215 raise error.WdirUnsupported
1216 1216 raise
1217 1217
1218 1218 if self.canonical_parent_order and entry[5] == nullrev:
1219 1219 return entry[6], entry[5]
1220 1220 else:
1221 1221 return entry[5], entry[6]
1222 1222
1223 1223 # fast parentrevs(rev) where rev isn't filtered
1224 1224 _uncheckedparentrevs = parentrevs
1225 1225
1226 1226 def node(self, rev):
1227 1227 try:
1228 1228 return self.index[rev][7]
1229 1229 except IndexError:
1230 1230 if rev == wdirrev:
1231 1231 raise error.WdirUnsupported
1232 1232 raise
1233 1233
1234 1234 # Derived from index values.
1235 1235
1236 1236 def end(self, rev):
1237 1237 return self.start(rev) + self.length(rev)
1238 1238
1239 1239 def parents(self, node):
1240 1240 i = self.index
1241 1241 d = i[self.rev(node)]
1242 1242 # inline node() to avoid function call overhead
1243 1243 if self.canonical_parent_order and d[5] == self.nullid:
1244 1244 return i[d[6]][7], i[d[5]][7]
1245 1245 else:
1246 1246 return i[d[5]][7], i[d[6]][7]
1247 1247
1248 1248 def chainlen(self, rev):
1249 1249 return self._chaininfo(rev)[0]
1250 1250
1251 1251 def _chaininfo(self, rev):
1252 1252 chaininfocache = self._chaininfocache
1253 1253 if rev in chaininfocache:
1254 1254 return chaininfocache[rev]
1255 1255 index = self.index
1256 generaldelta = self._generaldelta
1256 generaldelta = self.delta_config.general_delta
1257 1257 iterrev = rev
1258 1258 e = index[iterrev]
1259 1259 clen = 0
1260 1260 compresseddeltalen = 0
1261 1261 while iterrev != e[3]:
1262 1262 clen += 1
1263 1263 compresseddeltalen += e[1]
1264 1264 if generaldelta:
1265 1265 iterrev = e[3]
1266 1266 else:
1267 1267 iterrev -= 1
1268 1268 if iterrev in chaininfocache:
1269 1269 t = chaininfocache[iterrev]
1270 1270 clen += t[0]
1271 1271 compresseddeltalen += t[1]
1272 1272 break
1273 1273 e = index[iterrev]
1274 1274 else:
1275 1275 # Add text length of base since decompressing that also takes
1276 1276 # work. For cache hits the length is already included.
1277 1277 compresseddeltalen += e[1]
1278 1278 r = (clen, compresseddeltalen)
1279 1279 chaininfocache[rev] = r
1280 1280 return r
1281 1281
1282 1282 def _deltachain(self, rev, stoprev=None):
1283 1283 """Obtain the delta chain for a revision.
1284 1284
1285 1285 ``stoprev`` specifies a revision to stop at. If not specified, we
1286 1286 stop at the base of the chain.
1287 1287
1288 1288 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1289 1289 revs in ascending order and ``stopped`` is a bool indicating whether
1290 1290 ``stoprev`` was hit.
1291 1291 """
1292 generaldelta = self.delta_config.general_delta
1292 1293 # Try C implementation.
1293 1294 try:
1294 return self.index.deltachain(rev, stoprev, self._generaldelta)
1295 return self.index.deltachain(rev, stoprev, generaldelta)
1295 1296 except AttributeError:
1296 1297 pass
1297 1298
1298 1299 chain = []
1299 1300
1300 1301 # Alias to prevent attribute lookup in tight loop.
1301 1302 index = self.index
1302 generaldelta = self._generaldelta
1303 1303
1304 1304 iterrev = rev
1305 1305 e = index[iterrev]
1306 1306 while iterrev != e[3] and iterrev != stoprev:
1307 1307 chain.append(iterrev)
1308 1308 if generaldelta:
1309 1309 iterrev = e[3]
1310 1310 else:
1311 1311 iterrev -= 1
1312 1312 e = index[iterrev]
1313 1313
1314 1314 if iterrev == stoprev:
1315 1315 stopped = True
1316 1316 else:
1317 1317 chain.append(iterrev)
1318 1318 stopped = False
1319 1319
1320 1320 chain.reverse()
1321 1321 return chain, stopped
1322 1322
1323 1323 def ancestors(self, revs, stoprev=0, inclusive=False):
1324 1324 """Generate the ancestors of 'revs' in reverse revision order.
1325 1325 Does not generate revs lower than stoprev.
1326 1326
1327 1327 See the documentation for ancestor.lazyancestors for more details."""
1328 1328
1329 1329 # first, make sure start revisions aren't filtered
1330 1330 revs = list(revs)
1331 1331 checkrev = self.node
1332 1332 for r in revs:
1333 1333 checkrev(r)
1334 1334 # and we're sure ancestors aren't filtered as well
1335 1335
1336 1336 if rustancestor is not None and self.index.rust_ext_compat:
1337 1337 lazyancestors = rustancestor.LazyAncestors
1338 1338 arg = self.index
1339 1339 else:
1340 1340 lazyancestors = ancestor.lazyancestors
1341 1341 arg = self._uncheckedparentrevs
1342 1342 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1343 1343
1344 1344 def descendants(self, revs):
1345 1345 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1346 1346
1347 1347 def findcommonmissing(self, common=None, heads=None):
1348 1348 """Return a tuple of the ancestors of common and the ancestors of heads
1349 1349 that are not ancestors of common. In revset terminology, we return the
1350 1350 tuple:
1351 1351
1352 1352 ::common, (::heads) - (::common)
1353 1353
1354 1354 The list is sorted by revision number, meaning it is
1355 1355 topologically sorted.
1356 1356
1357 1357 'heads' and 'common' are both lists of node IDs. If heads is
1358 1358 not supplied, uses all of the revlog's heads. If common is not
1359 1359 supplied, uses nullid."""
1360 1360 if common is None:
1361 1361 common = [self.nullid]
1362 1362 if heads is None:
1363 1363 heads = self.heads()
1364 1364
1365 1365 common = [self.rev(n) for n in common]
1366 1366 heads = [self.rev(n) for n in heads]
1367 1367
1368 1368 # we want the ancestors, but inclusive
1369 1369 class lazyset:
1370 1370 def __init__(self, lazyvalues):
1371 1371 self.addedvalues = set()
1372 1372 self.lazyvalues = lazyvalues
1373 1373
1374 1374 def __contains__(self, value):
1375 1375 return value in self.addedvalues or value in self.lazyvalues
1376 1376
1377 1377 def __iter__(self):
1378 1378 added = self.addedvalues
1379 1379 for r in added:
1380 1380 yield r
1381 1381 for r in self.lazyvalues:
1382 1382 if not r in added:
1383 1383 yield r
1384 1384
1385 1385 def add(self, value):
1386 1386 self.addedvalues.add(value)
1387 1387
1388 1388 def update(self, values):
1389 1389 self.addedvalues.update(values)
1390 1390
1391 1391 has = lazyset(self.ancestors(common))
1392 1392 has.add(nullrev)
1393 1393 has.update(common)
1394 1394
1395 1395 # take all ancestors from heads that aren't in has
1396 1396 missing = set()
1397 1397 visit = collections.deque(r for r in heads if r not in has)
1398 1398 while visit:
1399 1399 r = visit.popleft()
1400 1400 if r in missing:
1401 1401 continue
1402 1402 else:
1403 1403 missing.add(r)
1404 1404 for p in self.parentrevs(r):
1405 1405 if p not in has:
1406 1406 visit.append(p)
1407 1407 missing = list(missing)
1408 1408 missing.sort()
1409 1409 return has, [self.node(miss) for miss in missing]
1410 1410
1411 1411 def incrementalmissingrevs(self, common=None):
1412 1412 """Return an object that can be used to incrementally compute the
1413 1413 revision numbers of the ancestors of arbitrary sets that are not
1414 1414 ancestors of common. This is an ancestor.incrementalmissingancestors
1415 1415 object.
1416 1416
1417 1417 'common' is a list of revision numbers. If common is not supplied, uses
1418 1418 nullrev.
1419 1419 """
1420 1420 if common is None:
1421 1421 common = [nullrev]
1422 1422
1423 1423 if rustancestor is not None and self.index.rust_ext_compat:
1424 1424 return rustancestor.MissingAncestors(self.index, common)
1425 1425 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1426 1426
1427 1427 def findmissingrevs(self, common=None, heads=None):
1428 1428 """Return the revision numbers of the ancestors of heads that
1429 1429 are not ancestors of common.
1430 1430
1431 1431 More specifically, return a list of revision numbers corresponding to
1432 1432 nodes N such that every N satisfies the following constraints:
1433 1433
1434 1434 1. N is an ancestor of some node in 'heads'
1435 1435 2. N is not an ancestor of any node in 'common'
1436 1436
1437 1437 The list is sorted by revision number, meaning it is
1438 1438 topologically sorted.
1439 1439
1440 1440 'heads' and 'common' are both lists of revision numbers. If heads is
1441 1441 not supplied, uses all of the revlog's heads. If common is not
1442 1442 supplied, uses nullid."""
1443 1443 if common is None:
1444 1444 common = [nullrev]
1445 1445 if heads is None:
1446 1446 heads = self.headrevs()
1447 1447
1448 1448 inc = self.incrementalmissingrevs(common=common)
1449 1449 return inc.missingancestors(heads)
1450 1450
1451 1451 def findmissing(self, common=None, heads=None):
1452 1452 """Return the ancestors of heads that are not ancestors of common.
1453 1453
1454 1454 More specifically, return a list of nodes N such that every N
1455 1455 satisfies the following constraints:
1456 1456
1457 1457 1. N is an ancestor of some node in 'heads'
1458 1458 2. N is not an ancestor of any node in 'common'
1459 1459
1460 1460 The list is sorted by revision number, meaning it is
1461 1461 topologically sorted.
1462 1462
1463 1463 'heads' and 'common' are both lists of node IDs. If heads is
1464 1464 not supplied, uses all of the revlog's heads. If common is not
1465 1465 supplied, uses nullid."""
1466 1466 if common is None:
1467 1467 common = [self.nullid]
1468 1468 if heads is None:
1469 1469 heads = self.heads()
1470 1470
1471 1471 common = [self.rev(n) for n in common]
1472 1472 heads = [self.rev(n) for n in heads]
1473 1473
1474 1474 inc = self.incrementalmissingrevs(common=common)
1475 1475 return [self.node(r) for r in inc.missingancestors(heads)]
1476 1476
1477 1477 def nodesbetween(self, roots=None, heads=None):
1478 1478 """Return a topological path from 'roots' to 'heads'.
1479 1479
1480 1480 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1481 1481 topologically sorted list of all nodes N that satisfy both of
1482 1482 these constraints:
1483 1483
1484 1484 1. N is a descendant of some node in 'roots'
1485 1485 2. N is an ancestor of some node in 'heads'
1486 1486
1487 1487 Every node is considered to be both a descendant and an ancestor
1488 1488 of itself, so every reachable node in 'roots' and 'heads' will be
1489 1489 included in 'nodes'.
1490 1490
1491 1491 'outroots' is the list of reachable nodes in 'roots', i.e., the
1492 1492 subset of 'roots' that is returned in 'nodes'. Likewise,
1493 1493 'outheads' is the subset of 'heads' that is also in 'nodes'.
1494 1494
1495 1495 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1496 1496 unspecified, uses nullid as the only root. If 'heads' is
1497 1497 unspecified, uses list of all of the revlog's heads."""
1498 1498 nonodes = ([], [], [])
1499 1499 if roots is not None:
1500 1500 roots = list(roots)
1501 1501 if not roots:
1502 1502 return nonodes
1503 1503 lowestrev = min([self.rev(n) for n in roots])
1504 1504 else:
1505 1505 roots = [self.nullid] # Everybody's a descendant of nullid
1506 1506 lowestrev = nullrev
1507 1507 if (lowestrev == nullrev) and (heads is None):
1508 1508 # We want _all_ the nodes!
1509 1509 return (
1510 1510 [self.node(r) for r in self],
1511 1511 [self.nullid],
1512 1512 list(self.heads()),
1513 1513 )
1514 1514 if heads is None:
1515 1515 # All nodes are ancestors, so the latest ancestor is the last
1516 1516 # node.
1517 1517 highestrev = len(self) - 1
1518 1518 # Set ancestors to None to signal that every node is an ancestor.
1519 1519 ancestors = None
1520 1520 # Set heads to an empty dictionary for later discovery of heads
1521 1521 heads = {}
1522 1522 else:
1523 1523 heads = list(heads)
1524 1524 if not heads:
1525 1525 return nonodes
1526 1526 ancestors = set()
1527 1527 # Turn heads into a dictionary so we can remove 'fake' heads.
1528 1528 # Also, later we will be using it to filter out the heads we can't
1529 1529 # find from roots.
1530 1530 heads = dict.fromkeys(heads, False)
1531 1531 # Start at the top and keep marking parents until we're done.
1532 1532 nodestotag = set(heads)
1533 1533 # Remember where the top was so we can use it as a limit later.
1534 1534 highestrev = max([self.rev(n) for n in nodestotag])
1535 1535 while nodestotag:
1536 1536 # grab a node to tag
1537 1537 n = nodestotag.pop()
1538 1538 # Never tag nullid
1539 1539 if n == self.nullid:
1540 1540 continue
1541 1541 # A node's revision number represents its place in a
1542 1542 # topologically sorted list of nodes.
1543 1543 r = self.rev(n)
1544 1544 if r >= lowestrev:
1545 1545 if n not in ancestors:
1546 1546 # If we are possibly a descendant of one of the roots
1547 1547 # and we haven't already been marked as an ancestor
1548 1548 ancestors.add(n) # Mark as ancestor
1549 1549 # Add non-nullid parents to list of nodes to tag.
1550 1550 nodestotag.update(
1551 1551 [p for p in self.parents(n) if p != self.nullid]
1552 1552 )
1553 1553 elif n in heads: # We've seen it before, is it a fake head?
1554 1554 # So it is, real heads should not be the ancestors of
1555 1555 # any other heads.
1556 1556 heads.pop(n)
1557 1557 if not ancestors:
1558 1558 return nonodes
1559 1559 # Now that we have our set of ancestors, we want to remove any
1560 1560 # roots that are not ancestors.
1561 1561
1562 1562 # If one of the roots was nullid, everything is included anyway.
1563 1563 if lowestrev > nullrev:
1564 1564 # But, since we weren't, let's recompute the lowest rev to not
1565 1565 # include roots that aren't ancestors.
1566 1566
1567 1567 # Filter out roots that aren't ancestors of heads
1568 1568 roots = [root for root in roots if root in ancestors]
1569 1569 # Recompute the lowest revision
1570 1570 if roots:
1571 1571 lowestrev = min([self.rev(root) for root in roots])
1572 1572 else:
1573 1573 # No more roots? Return empty list
1574 1574 return nonodes
1575 1575 else:
1576 1576 # We are descending from nullid, and don't need to care about
1577 1577 # any other roots.
1578 1578 lowestrev = nullrev
1579 1579 roots = [self.nullid]
1580 1580 # Transform our roots list into a set.
1581 1581 descendants = set(roots)
1582 1582 # Also, keep the original roots so we can filter out roots that aren't
1583 1583 # 'real' roots (i.e. are descended from other roots).
1584 1584 roots = descendants.copy()
1585 1585 # Our topologically sorted list of output nodes.
1586 1586 orderedout = []
1587 1587 # Don't start at nullid since we don't want nullid in our output list,
1588 1588 # and if nullid shows up in descendants, empty parents will look like
1589 1589 # they're descendants.
1590 1590 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1591 1591 n = self.node(r)
1592 1592 isdescendant = False
1593 1593 if lowestrev == nullrev: # Everybody is a descendant of nullid
1594 1594 isdescendant = True
1595 1595 elif n in descendants:
1596 1596 # n is already a descendant
1597 1597 isdescendant = True
1598 1598 # This check only needs to be done here because all the roots
1599 1599 # will start being marked is descendants before the loop.
1600 1600 if n in roots:
1601 1601 # If n was a root, check if it's a 'real' root.
1602 1602 p = tuple(self.parents(n))
1603 1603 # If any of its parents are descendants, it's not a root.
1604 1604 if (p[0] in descendants) or (p[1] in descendants):
1605 1605 roots.remove(n)
1606 1606 else:
1607 1607 p = tuple(self.parents(n))
1608 1608 # A node is a descendant if either of its parents are
1609 1609 # descendants. (We seeded the dependents list with the roots
1610 1610 # up there, remember?)
1611 1611 if (p[0] in descendants) or (p[1] in descendants):
1612 1612 descendants.add(n)
1613 1613 isdescendant = True
1614 1614 if isdescendant and ((ancestors is None) or (n in ancestors)):
1615 1615 # Only include nodes that are both descendants and ancestors.
1616 1616 orderedout.append(n)
1617 1617 if (ancestors is not None) and (n in heads):
1618 1618 # We're trying to figure out which heads are reachable
1619 1619 # from roots.
1620 1620 # Mark this head as having been reached
1621 1621 heads[n] = True
1622 1622 elif ancestors is None:
1623 1623 # Otherwise, we're trying to discover the heads.
1624 1624 # Assume this is a head because if it isn't, the next step
1625 1625 # will eventually remove it.
1626 1626 heads[n] = True
1627 1627 # But, obviously its parents aren't.
1628 1628 for p in self.parents(n):
1629 1629 heads.pop(p, None)
1630 1630 heads = [head for head, flag in heads.items() if flag]
1631 1631 roots = list(roots)
1632 1632 assert orderedout
1633 1633 assert roots
1634 1634 assert heads
1635 1635 return (orderedout, roots, heads)
1636 1636
1637 1637 def headrevs(self, revs=None):
1638 1638 if revs is None:
1639 1639 try:
1640 1640 return self.index.headrevs()
1641 1641 except AttributeError:
1642 1642 return self._headrevs()
1643 1643 if rustdagop is not None and self.index.rust_ext_compat:
1644 1644 return rustdagop.headrevs(self.index, revs)
1645 1645 return dagop.headrevs(revs, self._uncheckedparentrevs)
1646 1646
1647 1647 def computephases(self, roots):
1648 1648 return self.index.computephasesmapsets(roots)
1649 1649
1650 1650 def _headrevs(self):
1651 1651 count = len(self)
1652 1652 if not count:
1653 1653 return [nullrev]
1654 1654 # we won't iter over filtered rev so nobody is a head at start
1655 1655 ishead = [0] * (count + 1)
1656 1656 index = self.index
1657 1657 for r in self:
1658 1658 ishead[r] = 1 # I may be an head
1659 1659 e = index[r]
1660 1660 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1661 1661 return [r for r, val in enumerate(ishead) if val]
1662 1662
1663 1663 def heads(self, start=None, stop=None):
1664 1664 """return the list of all nodes that have no children
1665 1665
1666 1666 if start is specified, only heads that are descendants of
1667 1667 start will be returned
1668 1668 if stop is specified, it will consider all the revs from stop
1669 1669 as if they had no children
1670 1670 """
1671 1671 if start is None and stop is None:
1672 1672 if not len(self):
1673 1673 return [self.nullid]
1674 1674 return [self.node(r) for r in self.headrevs()]
1675 1675
1676 1676 if start is None:
1677 1677 start = nullrev
1678 1678 else:
1679 1679 start = self.rev(start)
1680 1680
1681 1681 stoprevs = {self.rev(n) for n in stop or []}
1682 1682
1683 1683 revs = dagop.headrevssubset(
1684 1684 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1685 1685 )
1686 1686
1687 1687 return [self.node(rev) for rev in revs]
1688 1688
1689 1689 def children(self, node):
1690 1690 """find the children of a given node"""
1691 1691 c = []
1692 1692 p = self.rev(node)
1693 1693 for r in self.revs(start=p + 1):
1694 1694 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1695 1695 if prevs:
1696 1696 for pr in prevs:
1697 1697 if pr == p:
1698 1698 c.append(self.node(r))
1699 1699 elif p == nullrev:
1700 1700 c.append(self.node(r))
1701 1701 return c
1702 1702
1703 1703 def commonancestorsheads(self, a, b):
1704 1704 """calculate all the heads of the common ancestors of nodes a and b"""
1705 1705 a, b = self.rev(a), self.rev(b)
1706 1706 ancs = self._commonancestorsheads(a, b)
1707 1707 return pycompat.maplist(self.node, ancs)
1708 1708
1709 1709 def _commonancestorsheads(self, *revs):
1710 1710 """calculate all the heads of the common ancestors of revs"""
1711 1711 try:
1712 1712 ancs = self.index.commonancestorsheads(*revs)
1713 1713 except (AttributeError, OverflowError): # C implementation failed
1714 1714 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1715 1715 return ancs
1716 1716
1717 1717 def isancestor(self, a, b):
1718 1718 """return True if node a is an ancestor of node b
1719 1719
1720 1720 A revision is considered an ancestor of itself."""
1721 1721 a, b = self.rev(a), self.rev(b)
1722 1722 return self.isancestorrev(a, b)
1723 1723
1724 1724 def isancestorrev(self, a, b):
1725 1725 """return True if revision a is an ancestor of revision b
1726 1726
1727 1727 A revision is considered an ancestor of itself.
1728 1728
1729 1729 The implementation of this is trivial but the use of
1730 1730 reachableroots is not."""
1731 1731 if a == nullrev:
1732 1732 return True
1733 1733 elif a == b:
1734 1734 return True
1735 1735 elif a > b:
1736 1736 return False
1737 1737 return bool(self.reachableroots(a, [b], [a], includepath=False))
1738 1738
1739 1739 def reachableroots(self, minroot, heads, roots, includepath=False):
1740 1740 """return (heads(::(<roots> and <roots>::<heads>)))
1741 1741
1742 1742 If includepath is True, return (<roots>::<heads>)."""
1743 1743 try:
1744 1744 return self.index.reachableroots2(
1745 1745 minroot, heads, roots, includepath
1746 1746 )
1747 1747 except AttributeError:
1748 1748 return dagop._reachablerootspure(
1749 1749 self.parentrevs, minroot, roots, heads, includepath
1750 1750 )
1751 1751
1752 1752 def ancestor(self, a, b):
1753 1753 """calculate the "best" common ancestor of nodes a and b"""
1754 1754
1755 1755 a, b = self.rev(a), self.rev(b)
1756 1756 try:
1757 1757 ancs = self.index.ancestors(a, b)
1758 1758 except (AttributeError, OverflowError):
1759 1759 ancs = ancestor.ancestors(self.parentrevs, a, b)
1760 1760 if ancs:
1761 1761 # choose a consistent winner when there's a tie
1762 1762 return min(map(self.node, ancs))
1763 1763 return self.nullid
1764 1764
1765 1765 def _match(self, id):
1766 1766 if isinstance(id, int):
1767 1767 # rev
1768 1768 return self.node(id)
1769 1769 if len(id) == self.nodeconstants.nodelen:
1770 1770 # possibly a binary node
1771 1771 # odds of a binary node being all hex in ASCII are 1 in 10**25
1772 1772 try:
1773 1773 node = id
1774 1774 self.rev(node) # quick search the index
1775 1775 return node
1776 1776 except error.LookupError:
1777 1777 pass # may be partial hex id
1778 1778 try:
1779 1779 # str(rev)
1780 1780 rev = int(id)
1781 1781 if b"%d" % rev != id:
1782 1782 raise ValueError
1783 1783 if rev < 0:
1784 1784 rev = len(self) + rev
1785 1785 if rev < 0 or rev >= len(self):
1786 1786 raise ValueError
1787 1787 return self.node(rev)
1788 1788 except (ValueError, OverflowError):
1789 1789 pass
1790 1790 if len(id) == 2 * self.nodeconstants.nodelen:
1791 1791 try:
1792 1792 # a full hex nodeid?
1793 1793 node = bin(id)
1794 1794 self.rev(node)
1795 1795 return node
1796 1796 except (binascii.Error, error.LookupError):
1797 1797 pass
1798 1798
1799 1799 def _partialmatch(self, id):
1800 1800 # we don't care wdirfilenodeids as they should be always full hash
1801 1801 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1802 1802 ambiguous = False
1803 1803 try:
1804 1804 partial = self.index.partialmatch(id)
1805 1805 if partial and self.hasnode(partial):
1806 1806 if maybewdir:
1807 1807 # single 'ff...' match in radix tree, ambiguous with wdir
1808 1808 ambiguous = True
1809 1809 else:
1810 1810 return partial
1811 1811 elif maybewdir:
1812 1812 # no 'ff...' match in radix tree, wdir identified
1813 1813 raise error.WdirUnsupported
1814 1814 else:
1815 1815 return None
1816 1816 except error.RevlogError:
1817 1817 # parsers.c radix tree lookup gave multiple matches
1818 1818 # fast path: for unfiltered changelog, radix tree is accurate
1819 1819 if not getattr(self, 'filteredrevs', None):
1820 1820 ambiguous = True
1821 1821 # fall through to slow path that filters hidden revisions
1822 1822 except (AttributeError, ValueError):
1823 1823 # we are pure python, or key is not hex
1824 1824 pass
1825 1825 if ambiguous:
1826 1826 raise error.AmbiguousPrefixLookupError(
1827 1827 id, self.display_id, _(b'ambiguous identifier')
1828 1828 )
1829 1829
1830 1830 if id in self._pcache:
1831 1831 return self._pcache[id]
1832 1832
1833 1833 if len(id) <= 40:
1834 1834 # hex(node)[:...]
1835 1835 l = len(id) // 2 * 2 # grab an even number of digits
1836 1836 try:
1837 1837 # we're dropping the last digit, so let's check that it's hex,
1838 1838 # to avoid the expensive computation below if it's not
1839 1839 if len(id) % 2 > 0:
1840 1840 if not (id[-1] in hexdigits):
1841 1841 return None
1842 1842 prefix = bin(id[:l])
1843 1843 except binascii.Error:
1844 1844 pass
1845 1845 else:
1846 1846 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1847 1847 nl = [
1848 1848 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1849 1849 ]
1850 1850 if self.nodeconstants.nullhex.startswith(id):
1851 1851 nl.append(self.nullid)
1852 1852 if len(nl) > 0:
1853 1853 if len(nl) == 1 and not maybewdir:
1854 1854 self._pcache[id] = nl[0]
1855 1855 return nl[0]
1856 1856 raise error.AmbiguousPrefixLookupError(
1857 1857 id, self.display_id, _(b'ambiguous identifier')
1858 1858 )
1859 1859 if maybewdir:
1860 1860 raise error.WdirUnsupported
1861 1861 return None
1862 1862
1863 1863 def lookup(self, id):
1864 1864 """locate a node based on:
1865 1865 - revision number or str(revision number)
1866 1866 - nodeid or subset of hex nodeid
1867 1867 """
1868 1868 n = self._match(id)
1869 1869 if n is not None:
1870 1870 return n
1871 1871 n = self._partialmatch(id)
1872 1872 if n:
1873 1873 return n
1874 1874
1875 1875 raise error.LookupError(id, self.display_id, _(b'no match found'))
1876 1876
1877 1877 def shortest(self, node, minlength=1):
1878 1878 """Find the shortest unambiguous prefix that matches node."""
1879 1879
1880 1880 def isvalid(prefix):
1881 1881 try:
1882 1882 matchednode = self._partialmatch(prefix)
1883 1883 except error.AmbiguousPrefixLookupError:
1884 1884 return False
1885 1885 except error.WdirUnsupported:
1886 1886 # single 'ff...' match
1887 1887 return True
1888 1888 if matchednode is None:
1889 1889 raise error.LookupError(node, self.display_id, _(b'no node'))
1890 1890 return True
1891 1891
1892 1892 def maybewdir(prefix):
1893 1893 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1894 1894
1895 1895 hexnode = hex(node)
1896 1896
1897 1897 def disambiguate(hexnode, minlength):
1898 1898 """Disambiguate against wdirid."""
1899 1899 for length in range(minlength, len(hexnode) + 1):
1900 1900 prefix = hexnode[:length]
1901 1901 if not maybewdir(prefix):
1902 1902 return prefix
1903 1903
1904 1904 if not getattr(self, 'filteredrevs', None):
1905 1905 try:
1906 1906 length = max(self.index.shortest(node), minlength)
1907 1907 return disambiguate(hexnode, length)
1908 1908 except error.RevlogError:
1909 1909 if node != self.nodeconstants.wdirid:
1910 1910 raise error.LookupError(
1911 1911 node, self.display_id, _(b'no node')
1912 1912 )
1913 1913 except AttributeError:
1914 1914 # Fall through to pure code
1915 1915 pass
1916 1916
1917 1917 if node == self.nodeconstants.wdirid:
1918 1918 for length in range(minlength, len(hexnode) + 1):
1919 1919 prefix = hexnode[:length]
1920 1920 if isvalid(prefix):
1921 1921 return prefix
1922 1922
1923 1923 for length in range(minlength, len(hexnode) + 1):
1924 1924 prefix = hexnode[:length]
1925 1925 if isvalid(prefix):
1926 1926 return disambiguate(hexnode, length)
1927 1927
1928 1928 def cmp(self, node, text):
1929 1929 """compare text with a given file revision
1930 1930
1931 1931 returns True if text is different than what is stored.
1932 1932 """
1933 1933 p1, p2 = self.parents(node)
1934 1934 return storageutil.hashrevisionsha1(text, p1, p2) != node
1935 1935
1936 1936 def _getsegmentforrevs(self, startrev, endrev):
1937 1937 """Obtain a segment of raw data corresponding to a range of revisions.
1938 1938
1939 1939 Accepts the start and end revisions and an optional already-open
1940 1940 file handle to be used for reading. If the file handle is read, its
1941 1941 seek position will not be preserved.
1942 1942
1943 1943 Requests for data may be satisfied by a cache.
1944 1944
1945 1945 Returns a 2-tuple of (offset, data) for the requested range of
1946 1946 revisions. Offset is the integer offset from the beginning of the
1947 1947 revlog and data is a str or buffer of the raw byte data.
1948 1948
1949 1949 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1950 1950 to determine where each revision's data begins and ends.
1951 1951 """
1952 1952 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1953 1953 # (functions are expensive).
1954 1954 index = self.index
1955 1955 istart = index[startrev]
1956 1956 start = int(istart[0] >> 16)
1957 1957 if startrev == endrev:
1958 1958 end = start + istart[1]
1959 1959 else:
1960 1960 iend = index[endrev]
1961 1961 end = int(iend[0] >> 16) + iend[1]
1962 1962
1963 1963 if self._inline:
1964 1964 start += (startrev + 1) * self.index.entry_size
1965 1965 end += (endrev + 1) * self.index.entry_size
1966 1966 length = end - start
1967 1967
1968 1968 return start, self._segmentfile.read_chunk(start, length)
1969 1969
1970 1970 def _chunk(self, rev):
1971 1971 """Obtain a single decompressed chunk for a revision.
1972 1972
1973 1973 Accepts an integer revision and an optional already-open file handle
1974 1974 to be used for reading. If used, the seek position of the file will not
1975 1975 be preserved.
1976 1976
1977 1977 Returns a str holding uncompressed data for the requested revision.
1978 1978 """
1979 1979 compression_mode = self.index[rev][10]
1980 1980 data = self._getsegmentforrevs(rev, rev)[1]
1981 1981 if compression_mode == COMP_MODE_PLAIN:
1982 1982 return data
1983 1983 elif compression_mode == COMP_MODE_DEFAULT:
1984 1984 return self._decompressor(data)
1985 1985 elif compression_mode == COMP_MODE_INLINE:
1986 1986 return self.decompress(data)
1987 1987 else:
1988 1988 msg = b'unknown compression mode %d'
1989 1989 msg %= compression_mode
1990 1990 raise error.RevlogError(msg)
1991 1991
1992 1992 def _chunks(self, revs, targetsize=None):
1993 1993 """Obtain decompressed chunks for the specified revisions.
1994 1994
1995 1995 Accepts an iterable of numeric revisions that are assumed to be in
1996 1996 ascending order. Also accepts an optional already-open file handle
1997 1997 to be used for reading. If used, the seek position of the file will
1998 1998 not be preserved.
1999 1999
2000 2000 This function is similar to calling ``self._chunk()`` multiple times,
2001 2001 but is faster.
2002 2002
2003 2003 Returns a list with decompressed data for each requested revision.
2004 2004 """
2005 2005 if not revs:
2006 2006 return []
2007 2007 start = self.start
2008 2008 length = self.length
2009 2009 inline = self._inline
2010 2010 iosize = self.index.entry_size
2011 2011 buffer = util.buffer
2012 2012
2013 2013 l = []
2014 2014 ladd = l.append
2015 2015
2016 2016 if not self._withsparseread:
2017 2017 slicedchunks = (revs,)
2018 2018 else:
2019 2019 slicedchunks = deltautil.slicechunk(
2020 2020 self, revs, targetsize=targetsize
2021 2021 )
2022 2022
2023 2023 for revschunk in slicedchunks:
2024 2024 firstrev = revschunk[0]
2025 2025 # Skip trailing revisions with empty diff
2026 2026 for lastrev in revschunk[::-1]:
2027 2027 if length(lastrev) != 0:
2028 2028 break
2029 2029
2030 2030 try:
2031 2031 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2032 2032 except OverflowError:
2033 2033 # issue4215 - we can't cache a run of chunks greater than
2034 2034 # 2G on Windows
2035 2035 return [self._chunk(rev) for rev in revschunk]
2036 2036
2037 2037 decomp = self.decompress
2038 2038 # self._decompressor might be None, but will not be used in that case
2039 2039 def_decomp = self._decompressor
2040 2040 for rev in revschunk:
2041 2041 chunkstart = start(rev)
2042 2042 if inline:
2043 2043 chunkstart += (rev + 1) * iosize
2044 2044 chunklength = length(rev)
2045 2045 comp_mode = self.index[rev][10]
2046 2046 c = buffer(data, chunkstart - offset, chunklength)
2047 2047 if comp_mode == COMP_MODE_PLAIN:
2048 2048 ladd(c)
2049 2049 elif comp_mode == COMP_MODE_INLINE:
2050 2050 ladd(decomp(c))
2051 2051 elif comp_mode == COMP_MODE_DEFAULT:
2052 2052 ladd(def_decomp(c))
2053 2053 else:
2054 2054 msg = b'unknown compression mode %d'
2055 2055 msg %= comp_mode
2056 2056 raise error.RevlogError(msg)
2057 2057
2058 2058 return l
2059 2059
2060 2060 def deltaparent(self, rev):
2061 2061 """return deltaparent of the given revision"""
2062 2062 base = self.index[rev][3]
2063 2063 if base == rev:
2064 2064 return nullrev
2065 elif self._generaldelta:
2065 elif self.delta_config.general_delta:
2066 2066 return base
2067 2067 else:
2068 2068 return rev - 1
2069 2069
2070 2070 def issnapshot(self, rev):
2071 2071 """tells whether rev is a snapshot"""
2072 2072 if not self._sparserevlog:
2073 2073 return self.deltaparent(rev) == nullrev
2074 2074 elif hasattr(self.index, 'issnapshot'):
2075 2075 # directly assign the method to cache the testing and access
2076 2076 self.issnapshot = self.index.issnapshot
2077 2077 return self.issnapshot(rev)
2078 2078 if rev == nullrev:
2079 2079 return True
2080 2080 entry = self.index[rev]
2081 2081 base = entry[3]
2082 2082 if base == rev:
2083 2083 return True
2084 2084 if base == nullrev:
2085 2085 return True
2086 2086 p1 = entry[5]
2087 2087 while self.length(p1) == 0:
2088 2088 b = self.deltaparent(p1)
2089 2089 if b == p1:
2090 2090 break
2091 2091 p1 = b
2092 2092 p2 = entry[6]
2093 2093 while self.length(p2) == 0:
2094 2094 b = self.deltaparent(p2)
2095 2095 if b == p2:
2096 2096 break
2097 2097 p2 = b
2098 2098 if base == p1 or base == p2:
2099 2099 return False
2100 2100 return self.issnapshot(base)
2101 2101
2102 2102 def snapshotdepth(self, rev):
2103 2103 """number of snapshot in the chain before this one"""
2104 2104 if not self.issnapshot(rev):
2105 2105 raise error.ProgrammingError(b'revision %d not a snapshot')
2106 2106 return len(self._deltachain(rev)[0]) - 1
2107 2107
2108 2108 def revdiff(self, rev1, rev2):
2109 2109 """return or calculate a delta between two revisions
2110 2110
2111 2111 The delta calculated is in binary form and is intended to be written to
2112 2112 revlog data directly. So this function needs raw revision data.
2113 2113 """
2114 2114 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2115 2115 return bytes(self._chunk(rev2))
2116 2116
2117 2117 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2118 2118
2119 2119 def revision(self, nodeorrev):
2120 2120 """return an uncompressed revision of a given node or revision
2121 2121 number.
2122 2122 """
2123 2123 return self._revisiondata(nodeorrev)
2124 2124
2125 2125 def sidedata(self, nodeorrev):
2126 2126 """a map of extra data related to the changeset but not part of the hash
2127 2127
2128 2128 This function currently return a dictionary. However, more advanced
2129 2129 mapping object will likely be used in the future for a more
2130 2130 efficient/lazy code.
2131 2131 """
2132 2132 # deal with <nodeorrev> argument type
2133 2133 if isinstance(nodeorrev, int):
2134 2134 rev = nodeorrev
2135 2135 else:
2136 2136 rev = self.rev(nodeorrev)
2137 2137 return self._sidedata(rev)
2138 2138
2139 2139 def _revisiondata(self, nodeorrev, raw=False):
2140 2140 # deal with <nodeorrev> argument type
2141 2141 if isinstance(nodeorrev, int):
2142 2142 rev = nodeorrev
2143 2143 node = self.node(rev)
2144 2144 else:
2145 2145 node = nodeorrev
2146 2146 rev = None
2147 2147
2148 2148 # fast path the special `nullid` rev
2149 2149 if node == self.nullid:
2150 2150 return b""
2151 2151
2152 2152 # ``rawtext`` is the text as stored inside the revlog. Might be the
2153 2153 # revision or might need to be processed to retrieve the revision.
2154 2154 rev, rawtext, validated = self._rawtext(node, rev)
2155 2155
2156 2156 if raw and validated:
2157 2157 # if we don't want to process the raw text and that raw
2158 2158 # text is cached, we can exit early.
2159 2159 return rawtext
2160 2160 if rev is None:
2161 2161 rev = self.rev(node)
2162 2162 # the revlog's flag for this revision
2163 2163 # (usually alter its state or content)
2164 2164 flags = self.flags(rev)
2165 2165
2166 2166 if validated and flags == REVIDX_DEFAULT_FLAGS:
2167 2167 # no extra flags set, no flag processor runs, text = rawtext
2168 2168 return rawtext
2169 2169
2170 2170 if raw:
2171 2171 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2172 2172 text = rawtext
2173 2173 else:
2174 2174 r = flagutil.processflagsread(self, rawtext, flags)
2175 2175 text, validatehash = r
2176 2176 if validatehash:
2177 2177 self.checkhash(text, node, rev=rev)
2178 2178 if not validated:
2179 2179 self._revisioncache = (node, rev, rawtext)
2180 2180
2181 2181 return text
2182 2182
2183 2183 def _rawtext(self, node, rev):
2184 2184 """return the possibly unvalidated rawtext for a revision
2185 2185
2186 2186 returns (rev, rawtext, validated)
2187 2187 """
2188 2188
2189 2189 # revision in the cache (could be useful to apply delta)
2190 2190 cachedrev = None
2191 2191 # An intermediate text to apply deltas to
2192 2192 basetext = None
2193 2193
2194 2194 # Check if we have the entry in cache
2195 2195 # The cache entry looks like (node, rev, rawtext)
2196 2196 if self._revisioncache:
2197 2197 if self._revisioncache[0] == node:
2198 2198 return (rev, self._revisioncache[2], True)
2199 2199 cachedrev = self._revisioncache[1]
2200 2200
2201 2201 if rev is None:
2202 2202 rev = self.rev(node)
2203 2203
2204 2204 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2205 2205 if stopped:
2206 2206 basetext = self._revisioncache[2]
2207 2207
2208 2208 # drop cache to save memory, the caller is expected to
2209 2209 # update self._revisioncache after validating the text
2210 2210 self._revisioncache = None
2211 2211
2212 2212 targetsize = None
2213 2213 rawsize = self.index[rev][2]
2214 2214 if 0 <= rawsize:
2215 2215 targetsize = 4 * rawsize
2216 2216
2217 2217 bins = self._chunks(chain, targetsize=targetsize)
2218 2218 if basetext is None:
2219 2219 basetext = bytes(bins[0])
2220 2220 bins = bins[1:]
2221 2221
2222 2222 rawtext = mdiff.patches(basetext, bins)
2223 2223 del basetext # let us have a chance to free memory early
2224 2224 return (rev, rawtext, False)
2225 2225
2226 2226 def _sidedata(self, rev):
2227 2227 """Return the sidedata for a given revision number."""
2228 2228 index_entry = self.index[rev]
2229 2229 sidedata_offset = index_entry[8]
2230 2230 sidedata_size = index_entry[9]
2231 2231
2232 2232 if self._inline:
2233 2233 sidedata_offset += self.index.entry_size * (1 + rev)
2234 2234 if sidedata_size == 0:
2235 2235 return {}
2236 2236
2237 2237 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2238 2238 filename = self._sidedatafile
2239 2239 end = self._docket.sidedata_end
2240 2240 offset = sidedata_offset
2241 2241 length = sidedata_size
2242 2242 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2243 2243 raise error.RevlogError(m)
2244 2244
2245 2245 comp_segment = self._segmentfile_sidedata.read_chunk(
2246 2246 sidedata_offset, sidedata_size
2247 2247 )
2248 2248
2249 2249 comp = self.index[rev][11]
2250 2250 if comp == COMP_MODE_PLAIN:
2251 2251 segment = comp_segment
2252 2252 elif comp == COMP_MODE_DEFAULT:
2253 2253 segment = self._decompressor(comp_segment)
2254 2254 elif comp == COMP_MODE_INLINE:
2255 2255 segment = self.decompress(comp_segment)
2256 2256 else:
2257 2257 msg = b'unknown compression mode %d'
2258 2258 msg %= comp
2259 2259 raise error.RevlogError(msg)
2260 2260
2261 2261 sidedata = sidedatautil.deserialize_sidedata(segment)
2262 2262 return sidedata
2263 2263
2264 2264 def rawdata(self, nodeorrev):
2265 2265 """return an uncompressed raw data of a given node or revision number."""
2266 2266 return self._revisiondata(nodeorrev, raw=True)
2267 2267
2268 2268 def hash(self, text, p1, p2):
2269 2269 """Compute a node hash.
2270 2270
2271 2271 Available as a function so that subclasses can replace the hash
2272 2272 as needed.
2273 2273 """
2274 2274 return storageutil.hashrevisionsha1(text, p1, p2)
2275 2275
2276 2276 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2277 2277 """Check node hash integrity.
2278 2278
2279 2279 Available as a function so that subclasses can extend hash mismatch
2280 2280 behaviors as needed.
2281 2281 """
2282 2282 try:
2283 2283 if p1 is None and p2 is None:
2284 2284 p1, p2 = self.parents(node)
2285 2285 if node != self.hash(text, p1, p2):
2286 2286 # Clear the revision cache on hash failure. The revision cache
2287 2287 # only stores the raw revision and clearing the cache does have
2288 2288 # the side-effect that we won't have a cache hit when the raw
2289 2289 # revision data is accessed. But this case should be rare and
2290 2290 # it is extra work to teach the cache about the hash
2291 2291 # verification state.
2292 2292 if self._revisioncache and self._revisioncache[0] == node:
2293 2293 self._revisioncache = None
2294 2294
2295 2295 revornode = rev
2296 2296 if revornode is None:
2297 2297 revornode = templatefilters.short(hex(node))
2298 2298 raise error.RevlogError(
2299 2299 _(b"integrity check failed on %s:%s")
2300 2300 % (self.display_id, pycompat.bytestr(revornode))
2301 2301 )
2302 2302 except error.RevlogError:
2303 2303 if self._censorable and storageutil.iscensoredtext(text):
2304 2304 raise error.CensoredNodeError(self.display_id, node, text)
2305 2305 raise
2306 2306
2307 2307 @property
2308 2308 def _split_index_file(self):
2309 2309 """the path where to expect the index of an ongoing splitting operation
2310 2310
2311 2311 The file will only exist if a splitting operation is in progress, but
2312 2312 it is always expected at the same location."""
2313 2313 parts = self.radix.split(b'/')
2314 2314 if len(parts) > 1:
2315 2315 # adds a '-s' prefix to the ``data/` or `meta/` base
2316 2316 head = parts[0] + b'-s'
2317 2317 mids = parts[1:-1]
2318 2318 tail = parts[-1] + b'.i'
2319 2319 pieces = [head] + mids + [tail]
2320 2320 return b'/'.join(pieces)
2321 2321 else:
2322 2322 # the revlog is stored at the root of the store (changelog or
2323 2323 # manifest), no risk of collision.
2324 2324 return self.radix + b'.i.s'
2325 2325
2326 2326 def _enforceinlinesize(self, tr, side_write=True):
2327 2327 """Check if the revlog is too big for inline and convert if so.
2328 2328
2329 2329 This should be called after revisions are added to the revlog. If the
2330 2330 revlog has grown too large to be an inline revlog, it will convert it
2331 2331 to use multiple index and data files.
2332 2332 """
2333 2333 tiprev = len(self) - 1
2334 2334 total_size = self.start(tiprev) + self.length(tiprev)
2335 2335 if not self._inline or total_size < _maxinline:
2336 2336 return
2337 2337
2338 2338 troffset = tr.findoffset(self._indexfile)
2339 2339 if troffset is None:
2340 2340 raise error.RevlogError(
2341 2341 _(b"%s not found in the transaction") % self._indexfile
2342 2342 )
2343 2343 if troffset:
2344 2344 tr.addbackup(self._indexfile, for_offset=True)
2345 2345 tr.add(self._datafile, 0)
2346 2346
2347 2347 existing_handles = False
2348 2348 if self._writinghandles is not None:
2349 2349 existing_handles = True
2350 2350 fp = self._writinghandles[0]
2351 2351 fp.flush()
2352 2352 fp.close()
2353 2353 # We can't use the cached file handle after close(). So prevent
2354 2354 # its usage.
2355 2355 self._writinghandles = None
2356 2356 self._segmentfile.writing_handle = None
2357 2357 # No need to deal with sidedata writing handle as it is only
2358 2358 # relevant with revlog-v2 which is never inline, not reaching
2359 2359 # this code
2360 2360 if side_write:
2361 2361 old_index_file_path = self._indexfile
2362 2362 new_index_file_path = self._split_index_file
2363 2363 opener = self.opener
2364 2364 weak_self = weakref.ref(self)
2365 2365
2366 2366 # the "split" index replace the real index when the transaction is finalized
2367 2367 def finalize_callback(tr):
2368 2368 opener.rename(
2369 2369 new_index_file_path,
2370 2370 old_index_file_path,
2371 2371 checkambig=True,
2372 2372 )
2373 2373 maybe_self = weak_self()
2374 2374 if maybe_self is not None:
2375 2375 maybe_self._indexfile = old_index_file_path
2376 2376
2377 2377 def abort_callback(tr):
2378 2378 maybe_self = weak_self()
2379 2379 if maybe_self is not None:
2380 2380 maybe_self._indexfile = old_index_file_path
2381 2381
2382 2382 tr.registertmp(new_index_file_path)
2383 2383 if self.target[1] is not None:
2384 2384 callback_id = b'000-revlog-split-%d-%s' % self.target
2385 2385 else:
2386 2386 callback_id = b'000-revlog-split-%d' % self.target[0]
2387 2387 tr.addfinalize(callback_id, finalize_callback)
2388 2388 tr.addabort(callback_id, abort_callback)
2389 2389
2390 2390 new_dfh = self._datafp(b'w+')
2391 2391 new_dfh.truncate(0) # drop any potentially existing data
2392 2392 try:
2393 2393 with self.reading():
2394 2394 for r in self:
2395 2395 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2396 2396 new_dfh.flush()
2397 2397
2398 2398 if side_write:
2399 2399 self._indexfile = new_index_file_path
2400 2400 with self.__index_new_fp() as fp:
2401 2401 self._format_flags &= ~FLAG_INLINE_DATA
2402 2402 self._inline = False
2403 2403 for i in self:
2404 2404 e = self.index.entry_binary(i)
2405 2405 if i == 0 and self._docket is None:
2406 2406 header = self._format_flags | self._format_version
2407 2407 header = self.index.pack_header(header)
2408 2408 e = header + e
2409 2409 fp.write(e)
2410 2410 if self._docket is not None:
2411 2411 self._docket.index_end = fp.tell()
2412 2412
2413 2413 # If we don't use side-write, the temp file replace the real
2414 2414 # index when we exit the context manager
2415 2415
2416 2416 nodemaputil.setup_persistent_nodemap(tr, self)
2417 2417 self._segmentfile = randomaccessfile.randomaccessfile(
2418 2418 self.opener,
2419 2419 self._datafile,
2420 2420 self._chunkcachesize,
2421 2421 )
2422 2422
2423 2423 if existing_handles:
2424 2424 # switched from inline to conventional reopen the index
2425 2425 ifh = self.__index_write_fp()
2426 2426 self._writinghandles = (ifh, new_dfh, None)
2427 2427 self._segmentfile.writing_handle = new_dfh
2428 2428 new_dfh = None
2429 2429 # No need to deal with sidedata writing handle as it is only
2430 2430 # relevant with revlog-v2 which is never inline, not reaching
2431 2431 # this code
2432 2432 finally:
2433 2433 if new_dfh is not None:
2434 2434 new_dfh.close()
2435 2435
2436 2436 def _nodeduplicatecallback(self, transaction, node):
2437 2437 """called when trying to add a node already stored."""
2438 2438
2439 2439 @contextlib.contextmanager
2440 2440 def reading(self):
2441 2441 """Context manager that keeps data and sidedata files open for reading"""
2442 2442 if len(self.index) == 0:
2443 2443 yield # nothing to be read
2444 2444 else:
2445 2445 with self._segmentfile.reading():
2446 2446 with self._segmentfile_sidedata.reading():
2447 2447 yield
2448 2448
2449 2449 @contextlib.contextmanager
2450 2450 def _writing(self, transaction):
2451 2451 if self._trypending:
2452 2452 msg = b'try to write in a `trypending` revlog: %s'
2453 2453 msg %= self.display_id
2454 2454 raise error.ProgrammingError(msg)
2455 2455 if self._writinghandles is not None:
2456 2456 yield
2457 2457 else:
2458 2458 ifh = dfh = sdfh = None
2459 2459 try:
2460 2460 r = len(self)
2461 2461 # opening the data file.
2462 2462 dsize = 0
2463 2463 if r:
2464 2464 dsize = self.end(r - 1)
2465 2465 dfh = None
2466 2466 if not self._inline:
2467 2467 try:
2468 2468 dfh = self._datafp(b"r+")
2469 2469 if self._docket is None:
2470 2470 dfh.seek(0, os.SEEK_END)
2471 2471 else:
2472 2472 dfh.seek(self._docket.data_end, os.SEEK_SET)
2473 2473 except FileNotFoundError:
2474 2474 dfh = self._datafp(b"w+")
2475 2475 transaction.add(self._datafile, dsize)
2476 2476 if self._sidedatafile is not None:
2477 2477 # revlog-v2 does not inline, help Pytype
2478 2478 assert dfh is not None
2479 2479 try:
2480 2480 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2481 2481 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2482 2482 except FileNotFoundError:
2483 2483 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2484 2484 transaction.add(
2485 2485 self._sidedatafile, self._docket.sidedata_end
2486 2486 )
2487 2487
2488 2488 # opening the index file.
2489 2489 isize = r * self.index.entry_size
2490 2490 ifh = self.__index_write_fp()
2491 2491 if self._inline:
2492 2492 transaction.add(self._indexfile, dsize + isize)
2493 2493 else:
2494 2494 transaction.add(self._indexfile, isize)
2495 2495 # exposing all file handle for writing.
2496 2496 self._writinghandles = (ifh, dfh, sdfh)
2497 2497 self._segmentfile.writing_handle = ifh if self._inline else dfh
2498 2498 self._segmentfile_sidedata.writing_handle = sdfh
2499 2499 yield
2500 2500 if self._docket is not None:
2501 2501 self._write_docket(transaction)
2502 2502 finally:
2503 2503 self._writinghandles = None
2504 2504 self._segmentfile.writing_handle = None
2505 2505 self._segmentfile_sidedata.writing_handle = None
2506 2506 if dfh is not None:
2507 2507 dfh.close()
2508 2508 if sdfh is not None:
2509 2509 sdfh.close()
2510 2510 # closing the index file last to avoid exposing referent to
2511 2511 # potential unflushed data content.
2512 2512 if ifh is not None:
2513 2513 ifh.close()
2514 2514
2515 2515 def _write_docket(self, transaction):
2516 2516 """write the current docket on disk
2517 2517
2518 2518 Exist as a method to help changelog to implement transaction logic
2519 2519
2520 2520 We could also imagine using the same transaction logic for all revlog
2521 2521 since docket are cheap."""
2522 2522 self._docket.write(transaction)
2523 2523
2524 2524 def addrevision(
2525 2525 self,
2526 2526 text,
2527 2527 transaction,
2528 2528 link,
2529 2529 p1,
2530 2530 p2,
2531 2531 cachedelta=None,
2532 2532 node=None,
2533 2533 flags=REVIDX_DEFAULT_FLAGS,
2534 2534 deltacomputer=None,
2535 2535 sidedata=None,
2536 2536 ):
2537 2537 """add a revision to the log
2538 2538
2539 2539 text - the revision data to add
2540 2540 transaction - the transaction object used for rollback
2541 2541 link - the linkrev data to add
2542 2542 p1, p2 - the parent nodeids of the revision
2543 2543 cachedelta - an optional precomputed delta
2544 2544 node - nodeid of revision; typically node is not specified, and it is
2545 2545 computed by default as hash(text, p1, p2), however subclasses might
2546 2546 use different hashing method (and override checkhash() in such case)
2547 2547 flags - the known flags to set on the revision
2548 2548 deltacomputer - an optional deltacomputer instance shared between
2549 2549 multiple calls
2550 2550 """
2551 2551 if link == nullrev:
2552 2552 raise error.RevlogError(
2553 2553 _(b"attempted to add linkrev -1 to %s") % self.display_id
2554 2554 )
2555 2555
2556 2556 if sidedata is None:
2557 2557 sidedata = {}
2558 2558 elif sidedata and not self.hassidedata:
2559 2559 raise error.ProgrammingError(
2560 2560 _(b"trying to add sidedata to a revlog who don't support them")
2561 2561 )
2562 2562
2563 2563 if flags:
2564 2564 node = node or self.hash(text, p1, p2)
2565 2565
2566 2566 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2567 2567
2568 2568 # If the flag processor modifies the revision data, ignore any provided
2569 2569 # cachedelta.
2570 2570 if rawtext != text:
2571 2571 cachedelta = None
2572 2572
2573 2573 if len(rawtext) > _maxentrysize:
2574 2574 raise error.RevlogError(
2575 2575 _(
2576 2576 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2577 2577 )
2578 2578 % (self.display_id, len(rawtext))
2579 2579 )
2580 2580
2581 2581 node = node or self.hash(rawtext, p1, p2)
2582 2582 rev = self.index.get_rev(node)
2583 2583 if rev is not None:
2584 2584 return rev
2585 2585
2586 2586 if validatehash:
2587 2587 self.checkhash(rawtext, node, p1=p1, p2=p2)
2588 2588
2589 2589 return self.addrawrevision(
2590 2590 rawtext,
2591 2591 transaction,
2592 2592 link,
2593 2593 p1,
2594 2594 p2,
2595 2595 node,
2596 2596 flags,
2597 2597 cachedelta=cachedelta,
2598 2598 deltacomputer=deltacomputer,
2599 2599 sidedata=sidedata,
2600 2600 )
2601 2601
2602 2602 def addrawrevision(
2603 2603 self,
2604 2604 rawtext,
2605 2605 transaction,
2606 2606 link,
2607 2607 p1,
2608 2608 p2,
2609 2609 node,
2610 2610 flags,
2611 2611 cachedelta=None,
2612 2612 deltacomputer=None,
2613 2613 sidedata=None,
2614 2614 ):
2615 2615 """add a raw revision with known flags, node and parents
2616 2616 useful when reusing a revision not stored in this revlog (ex: received
2617 2617 over wire, or read from an external bundle).
2618 2618 """
2619 2619 with self._writing(transaction):
2620 2620 return self._addrevision(
2621 2621 node,
2622 2622 rawtext,
2623 2623 transaction,
2624 2624 link,
2625 2625 p1,
2626 2626 p2,
2627 2627 flags,
2628 2628 cachedelta,
2629 2629 deltacomputer=deltacomputer,
2630 2630 sidedata=sidedata,
2631 2631 )
2632 2632
2633 2633 def compress(self, data):
2634 2634 """Generate a possibly-compressed representation of data."""
2635 2635 if not data:
2636 2636 return b'', data
2637 2637
2638 2638 compressed = self._compressor.compress(data)
2639 2639
2640 2640 if compressed:
2641 2641 # The revlog compressor added the header in the returned data.
2642 2642 return b'', compressed
2643 2643
2644 2644 if data[0:1] == b'\0':
2645 2645 return b'', data
2646 2646 return b'u', data
2647 2647
2648 2648 def decompress(self, data):
2649 2649 """Decompress a revlog chunk.
2650 2650
2651 2651 The chunk is expected to begin with a header identifying the
2652 2652 format type so it can be routed to an appropriate decompressor.
2653 2653 """
2654 2654 if not data:
2655 2655 return data
2656 2656
2657 2657 # Revlogs are read much more frequently than they are written and many
2658 2658 # chunks only take microseconds to decompress, so performance is
2659 2659 # important here.
2660 2660 #
2661 2661 # We can make a few assumptions about revlogs:
2662 2662 #
2663 2663 # 1) the majority of chunks will be compressed (as opposed to inline
2664 2664 # raw data).
2665 2665 # 2) decompressing *any* data will likely by at least 10x slower than
2666 2666 # returning raw inline data.
2667 2667 # 3) we want to prioritize common and officially supported compression
2668 2668 # engines
2669 2669 #
2670 2670 # It follows that we want to optimize for "decompress compressed data
2671 2671 # when encoded with common and officially supported compression engines"
2672 2672 # case over "raw data" and "data encoded by less common or non-official
2673 2673 # compression engines." That is why we have the inline lookup first
2674 2674 # followed by the compengines lookup.
2675 2675 #
2676 2676 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2677 2677 # compressed chunks. And this matters for changelog and manifest reads.
2678 2678 t = data[0:1]
2679 2679
2680 2680 if t == b'x':
2681 2681 try:
2682 2682 return _zlibdecompress(data)
2683 2683 except zlib.error as e:
2684 2684 raise error.RevlogError(
2685 2685 _(b'revlog decompress error: %s')
2686 2686 % stringutil.forcebytestr(e)
2687 2687 )
2688 2688 # '\0' is more common than 'u' so it goes first.
2689 2689 elif t == b'\0':
2690 2690 return data
2691 2691 elif t == b'u':
2692 2692 return util.buffer(data, 1)
2693 2693
2694 2694 compressor = self._get_decompressor(t)
2695 2695
2696 2696 return compressor.decompress(data)
2697 2697
2698 2698 def _addrevision(
2699 2699 self,
2700 2700 node,
2701 2701 rawtext,
2702 2702 transaction,
2703 2703 link,
2704 2704 p1,
2705 2705 p2,
2706 2706 flags,
2707 2707 cachedelta,
2708 2708 alwayscache=False,
2709 2709 deltacomputer=None,
2710 2710 sidedata=None,
2711 2711 ):
2712 2712 """internal function to add revisions to the log
2713 2713
2714 2714 see addrevision for argument descriptions.
2715 2715
2716 2716 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2717 2717
2718 2718 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2719 2719 be used.
2720 2720
2721 2721 invariants:
2722 2722 - rawtext is optional (can be None); if not set, cachedelta must be set.
2723 2723 if both are set, they must correspond to each other.
2724 2724 """
2725 2725 if node == self.nullid:
2726 2726 raise error.RevlogError(
2727 2727 _(b"%s: attempt to add null revision") % self.display_id
2728 2728 )
2729 2729 if (
2730 2730 node == self.nodeconstants.wdirid
2731 2731 or node in self.nodeconstants.wdirfilenodeids
2732 2732 ):
2733 2733 raise error.RevlogError(
2734 2734 _(b"%s: attempt to add wdir revision") % self.display_id
2735 2735 )
2736 2736 if self._writinghandles is None:
2737 2737 msg = b'adding revision outside `revlog._writing` context'
2738 2738 raise error.ProgrammingError(msg)
2739 2739
2740 2740 btext = [rawtext]
2741 2741
2742 2742 curr = len(self)
2743 2743 prev = curr - 1
2744 2744
2745 2745 offset = self._get_data_offset(prev)
2746 2746
2747 2747 if self._concurrencychecker:
2748 2748 ifh, dfh, sdfh = self._writinghandles
2749 2749 # XXX no checking for the sidedata file
2750 2750 if self._inline:
2751 2751 # offset is "as if" it were in the .d file, so we need to add on
2752 2752 # the size of the entry metadata.
2753 2753 self._concurrencychecker(
2754 2754 ifh, self._indexfile, offset + curr * self.index.entry_size
2755 2755 )
2756 2756 else:
2757 2757 # Entries in the .i are a consistent size.
2758 2758 self._concurrencychecker(
2759 2759 ifh, self._indexfile, curr * self.index.entry_size
2760 2760 )
2761 2761 self._concurrencychecker(dfh, self._datafile, offset)
2762 2762
2763 2763 p1r, p2r = self.rev(p1), self.rev(p2)
2764 2764
2765 2765 # full versions are inserted when the needed deltas
2766 2766 # become comparable to the uncompressed text
2767 2767 if rawtext is None:
2768 2768 # need rawtext size, before changed by flag processors, which is
2769 2769 # the non-raw size. use revlog explicitly to avoid filelog's extra
2770 2770 # logic that might remove metadata size.
2771 2771 textlen = mdiff.patchedsize(
2772 2772 revlog.size(self, cachedelta[0]), cachedelta[1]
2773 2773 )
2774 2774 else:
2775 2775 textlen = len(rawtext)
2776 2776
2777 2777 if deltacomputer is None:
2778 2778 write_debug = None
2779 2779 if self._debug_delta:
2780 2780 write_debug = transaction._report
2781 2781 deltacomputer = deltautil.deltacomputer(
2782 2782 self, write_debug=write_debug
2783 2783 )
2784 2784
2785 2785 if cachedelta is not None and len(cachedelta) == 2:
2786 2786 # If the cached delta has no information about how it should be
2787 2787 # reused, add the default reuse instruction according to the
2788 2788 # revlog's configuration.
2789 if self._generaldelta and self._lazydeltabase:
2789 if (
2790 self.delta_config.general_delta
2791 and self.delta_config.lazy_delta_base
2792 ):
2790 2793 delta_base_reuse = DELTA_BASE_REUSE_TRY
2791 2794 else:
2792 2795 delta_base_reuse = DELTA_BASE_REUSE_NO
2793 2796 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2794 2797
2795 2798 revinfo = revlogutils.revisioninfo(
2796 2799 node,
2797 2800 p1,
2798 2801 p2,
2799 2802 btext,
2800 2803 textlen,
2801 2804 cachedelta,
2802 2805 flags,
2803 2806 )
2804 2807
2805 2808 deltainfo = deltacomputer.finddeltainfo(revinfo)
2806 2809
2807 2810 compression_mode = COMP_MODE_INLINE
2808 2811 if self._docket is not None:
2809 2812 default_comp = self._docket.default_compression_header
2810 2813 r = deltautil.delta_compression(default_comp, deltainfo)
2811 2814 compression_mode, deltainfo = r
2812 2815
2813 2816 sidedata_compression_mode = COMP_MODE_INLINE
2814 2817 if sidedata and self.hassidedata:
2815 2818 sidedata_compression_mode = COMP_MODE_PLAIN
2816 2819 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2817 2820 sidedata_offset = self._docket.sidedata_end
2818 2821 h, comp_sidedata = self.compress(serialized_sidedata)
2819 2822 if (
2820 2823 h != b'u'
2821 2824 and comp_sidedata[0:1] != b'\0'
2822 2825 and len(comp_sidedata) < len(serialized_sidedata)
2823 2826 ):
2824 2827 assert not h
2825 2828 if (
2826 2829 comp_sidedata[0:1]
2827 2830 == self._docket.default_compression_header
2828 2831 ):
2829 2832 sidedata_compression_mode = COMP_MODE_DEFAULT
2830 2833 serialized_sidedata = comp_sidedata
2831 2834 else:
2832 2835 sidedata_compression_mode = COMP_MODE_INLINE
2833 2836 serialized_sidedata = comp_sidedata
2834 2837 else:
2835 2838 serialized_sidedata = b""
2836 2839 # Don't store the offset if the sidedata is empty, that way
2837 2840 # we can easily detect empty sidedata and they will be no different
2838 2841 # than ones we manually add.
2839 2842 sidedata_offset = 0
2840 2843
2841 2844 rank = RANK_UNKNOWN
2842 2845 if self._compute_rank:
2843 2846 if (p1r, p2r) == (nullrev, nullrev):
2844 2847 rank = 1
2845 2848 elif p1r != nullrev and p2r == nullrev:
2846 2849 rank = 1 + self.fast_rank(p1r)
2847 2850 elif p1r == nullrev and p2r != nullrev:
2848 2851 rank = 1 + self.fast_rank(p2r)
2849 2852 else: # merge node
2850 2853 if rustdagop is not None and self.index.rust_ext_compat:
2851 2854 rank = rustdagop.rank(self.index, p1r, p2r)
2852 2855 else:
2853 2856 pmin, pmax = sorted((p1r, p2r))
2854 2857 rank = 1 + self.fast_rank(pmax)
2855 2858 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2856 2859
2857 2860 e = revlogutils.entry(
2858 2861 flags=flags,
2859 2862 data_offset=offset,
2860 2863 data_compressed_length=deltainfo.deltalen,
2861 2864 data_uncompressed_length=textlen,
2862 2865 data_compression_mode=compression_mode,
2863 2866 data_delta_base=deltainfo.base,
2864 2867 link_rev=link,
2865 2868 parent_rev_1=p1r,
2866 2869 parent_rev_2=p2r,
2867 2870 node_id=node,
2868 2871 sidedata_offset=sidedata_offset,
2869 2872 sidedata_compressed_length=len(serialized_sidedata),
2870 2873 sidedata_compression_mode=sidedata_compression_mode,
2871 2874 rank=rank,
2872 2875 )
2873 2876
2874 2877 self.index.append(e)
2875 2878 entry = self.index.entry_binary(curr)
2876 2879 if curr == 0 and self._docket is None:
2877 2880 header = self._format_flags | self._format_version
2878 2881 header = self.index.pack_header(header)
2879 2882 entry = header + entry
2880 2883 self._writeentry(
2881 2884 transaction,
2882 2885 entry,
2883 2886 deltainfo.data,
2884 2887 link,
2885 2888 offset,
2886 2889 serialized_sidedata,
2887 2890 sidedata_offset,
2888 2891 )
2889 2892
2890 2893 rawtext = btext[0]
2891 2894
2892 2895 if alwayscache and rawtext is None:
2893 2896 rawtext = deltacomputer.buildtext(revinfo)
2894 2897
2895 2898 if type(rawtext) == bytes: # only accept immutable objects
2896 2899 self._revisioncache = (node, curr, rawtext)
2897 2900 self._chainbasecache[curr] = deltainfo.chainbase
2898 2901 return curr
2899 2902
2900 2903 def _get_data_offset(self, prev):
2901 2904 """Returns the current offset in the (in-transaction) data file.
2902 2905 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2903 2906 file to store that information: since sidedata can be rewritten to the
2904 2907 end of the data file within a transaction, you can have cases where, for
2905 2908 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2906 2909 to `n - 1`'s sidedata being written after `n`'s data.
2907 2910
2908 2911 TODO cache this in a docket file before getting out of experimental."""
2909 2912 if self._docket is None:
2910 2913 return self.end(prev)
2911 2914 else:
2912 2915 return self._docket.data_end
2913 2916
2914 2917 def _writeentry(
2915 2918 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2916 2919 ):
2917 2920 # Files opened in a+ mode have inconsistent behavior on various
2918 2921 # platforms. Windows requires that a file positioning call be made
2919 2922 # when the file handle transitions between reads and writes. See
2920 2923 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2921 2924 # platforms, Python or the platform itself can be buggy. Some versions
2922 2925 # of Solaris have been observed to not append at the end of the file
2923 2926 # if the file was seeked to before the end. See issue4943 for more.
2924 2927 #
2925 2928 # We work around this issue by inserting a seek() before writing.
2926 2929 # Note: This is likely not necessary on Python 3. However, because
2927 2930 # the file handle is reused for reads and may be seeked there, we need
2928 2931 # to be careful before changing this.
2929 2932 if self._writinghandles is None:
2930 2933 msg = b'adding revision outside `revlog._writing` context'
2931 2934 raise error.ProgrammingError(msg)
2932 2935 ifh, dfh, sdfh = self._writinghandles
2933 2936 if self._docket is None:
2934 2937 ifh.seek(0, os.SEEK_END)
2935 2938 else:
2936 2939 ifh.seek(self._docket.index_end, os.SEEK_SET)
2937 2940 if dfh:
2938 2941 if self._docket is None:
2939 2942 dfh.seek(0, os.SEEK_END)
2940 2943 else:
2941 2944 dfh.seek(self._docket.data_end, os.SEEK_SET)
2942 2945 if sdfh:
2943 2946 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2944 2947
2945 2948 curr = len(self) - 1
2946 2949 if not self._inline:
2947 2950 transaction.add(self._datafile, offset)
2948 2951 if self._sidedatafile:
2949 2952 transaction.add(self._sidedatafile, sidedata_offset)
2950 2953 transaction.add(self._indexfile, curr * len(entry))
2951 2954 if data[0]:
2952 2955 dfh.write(data[0])
2953 2956 dfh.write(data[1])
2954 2957 if sidedata:
2955 2958 sdfh.write(sidedata)
2956 2959 ifh.write(entry)
2957 2960 else:
2958 2961 offset += curr * self.index.entry_size
2959 2962 transaction.add(self._indexfile, offset)
2960 2963 ifh.write(entry)
2961 2964 ifh.write(data[0])
2962 2965 ifh.write(data[1])
2963 2966 assert not sidedata
2964 2967 self._enforceinlinesize(transaction)
2965 2968 if self._docket is not None:
2966 2969 # revlog-v2 always has 3 writing handles, help Pytype
2967 2970 wh1 = self._writinghandles[0]
2968 2971 wh2 = self._writinghandles[1]
2969 2972 wh3 = self._writinghandles[2]
2970 2973 assert wh1 is not None
2971 2974 assert wh2 is not None
2972 2975 assert wh3 is not None
2973 2976 self._docket.index_end = wh1.tell()
2974 2977 self._docket.data_end = wh2.tell()
2975 2978 self._docket.sidedata_end = wh3.tell()
2976 2979
2977 2980 nodemaputil.setup_persistent_nodemap(transaction, self)
2978 2981
2979 2982 def addgroup(
2980 2983 self,
2981 2984 deltas,
2982 2985 linkmapper,
2983 2986 transaction,
2984 2987 alwayscache=False,
2985 2988 addrevisioncb=None,
2986 2989 duplicaterevisioncb=None,
2987 2990 debug_info=None,
2988 2991 delta_base_reuse_policy=None,
2989 2992 ):
2990 2993 """
2991 2994 add a delta group
2992 2995
2993 2996 given a set of deltas, add them to the revision log. the
2994 2997 first delta is against its parent, which should be in our
2995 2998 log, the rest are against the previous delta.
2996 2999
2997 3000 If ``addrevisioncb`` is defined, it will be called with arguments of
2998 3001 this revlog and the node that was added.
2999 3002 """
3000 3003
3001 3004 if self._adding_group:
3002 3005 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3003 3006
3004 3007 # read the default delta-base reuse policy from revlog config if the
3005 3008 # group did not specify one.
3006 3009 if delta_base_reuse_policy is None:
3007 if self._generaldelta and self._lazydeltabase:
3010 if (
3011 self.delta_config.general_delta
3012 and self.delta_config.lazy_delta_base
3013 ):
3008 3014 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3009 3015 else:
3010 3016 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3011 3017
3012 3018 self._adding_group = True
3013 3019 empty = True
3014 3020 try:
3015 3021 with self._writing(transaction):
3016 3022 write_debug = None
3017 3023 if self._debug_delta:
3018 3024 write_debug = transaction._report
3019 3025 deltacomputer = deltautil.deltacomputer(
3020 3026 self,
3021 3027 write_debug=write_debug,
3022 3028 debug_info=debug_info,
3023 3029 )
3024 3030 # loop through our set of deltas
3025 3031 for data in deltas:
3026 3032 (
3027 3033 node,
3028 3034 p1,
3029 3035 p2,
3030 3036 linknode,
3031 3037 deltabase,
3032 3038 delta,
3033 3039 flags,
3034 3040 sidedata,
3035 3041 ) = data
3036 3042 link = linkmapper(linknode)
3037 3043 flags = flags or REVIDX_DEFAULT_FLAGS
3038 3044
3039 3045 rev = self.index.get_rev(node)
3040 3046 if rev is not None:
3041 3047 # this can happen if two branches make the same change
3042 3048 self._nodeduplicatecallback(transaction, rev)
3043 3049 if duplicaterevisioncb:
3044 3050 duplicaterevisioncb(self, rev)
3045 3051 empty = False
3046 3052 continue
3047 3053
3048 3054 for p in (p1, p2):
3049 3055 if not self.index.has_node(p):
3050 3056 raise error.LookupError(
3051 3057 p, self.radix, _(b'unknown parent')
3052 3058 )
3053 3059
3054 3060 if not self.index.has_node(deltabase):
3055 3061 raise error.LookupError(
3056 3062 deltabase, self.display_id, _(b'unknown delta base')
3057 3063 )
3058 3064
3059 3065 baserev = self.rev(deltabase)
3060 3066
3061 3067 if baserev != nullrev and self.iscensored(baserev):
3062 3068 # if base is censored, delta must be full replacement in a
3063 3069 # single patch operation
3064 3070 hlen = struct.calcsize(b">lll")
3065 3071 oldlen = self.rawsize(baserev)
3066 3072 newlen = len(delta) - hlen
3067 3073 if delta[:hlen] != mdiff.replacediffheader(
3068 3074 oldlen, newlen
3069 3075 ):
3070 3076 raise error.CensoredBaseError(
3071 3077 self.display_id, self.node(baserev)
3072 3078 )
3073 3079
3074 3080 if not flags and self._peek_iscensored(baserev, delta):
3075 3081 flags |= REVIDX_ISCENSORED
3076 3082
3077 3083 # We assume consumers of addrevisioncb will want to retrieve
3078 3084 # the added revision, which will require a call to
3079 3085 # revision(). revision() will fast path if there is a cache
3080 3086 # hit. So, we tell _addrevision() to always cache in this case.
3081 3087 # We're only using addgroup() in the context of changegroup
3082 3088 # generation so the revision data can always be handled as raw
3083 3089 # by the flagprocessor.
3084 3090 rev = self._addrevision(
3085 3091 node,
3086 3092 None,
3087 3093 transaction,
3088 3094 link,
3089 3095 p1,
3090 3096 p2,
3091 3097 flags,
3092 3098 (baserev, delta, delta_base_reuse_policy),
3093 3099 alwayscache=alwayscache,
3094 3100 deltacomputer=deltacomputer,
3095 3101 sidedata=sidedata,
3096 3102 )
3097 3103
3098 3104 if addrevisioncb:
3099 3105 addrevisioncb(self, rev)
3100 3106 empty = False
3101 3107 finally:
3102 3108 self._adding_group = False
3103 3109 return not empty
3104 3110
3105 3111 def iscensored(self, rev):
3106 3112 """Check if a file revision is censored."""
3107 3113 if not self._censorable:
3108 3114 return False
3109 3115
3110 3116 return self.flags(rev) & REVIDX_ISCENSORED
3111 3117
3112 3118 def _peek_iscensored(self, baserev, delta):
3113 3119 """Quickly check if a delta produces a censored revision."""
3114 3120 if not self._censorable:
3115 3121 return False
3116 3122
3117 3123 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3118 3124
3119 3125 def getstrippoint(self, minlink):
3120 3126 """find the minimum rev that must be stripped to strip the linkrev
3121 3127
3122 3128 Returns a tuple containing the minimum rev and a set of all revs that
3123 3129 have linkrevs that will be broken by this strip.
3124 3130 """
3125 3131 return storageutil.resolvestripinfo(
3126 3132 minlink,
3127 3133 len(self) - 1,
3128 3134 self.headrevs(),
3129 3135 self.linkrev,
3130 3136 self.parentrevs,
3131 3137 )
3132 3138
3133 3139 def strip(self, minlink, transaction):
3134 3140 """truncate the revlog on the first revision with a linkrev >= minlink
3135 3141
3136 3142 This function is called when we're stripping revision minlink and
3137 3143 its descendants from the repository.
3138 3144
3139 3145 We have to remove all revisions with linkrev >= minlink, because
3140 3146 the equivalent changelog revisions will be renumbered after the
3141 3147 strip.
3142 3148
3143 3149 So we truncate the revlog on the first of these revisions, and
3144 3150 trust that the caller has saved the revisions that shouldn't be
3145 3151 removed and that it'll re-add them after this truncation.
3146 3152 """
3147 3153 if len(self) == 0:
3148 3154 return
3149 3155
3150 3156 rev, _ = self.getstrippoint(minlink)
3151 3157 if rev == len(self):
3152 3158 return
3153 3159
3154 3160 # first truncate the files on disk
3155 3161 data_end = self.start(rev)
3156 3162 if not self._inline:
3157 3163 transaction.add(self._datafile, data_end)
3158 3164 end = rev * self.index.entry_size
3159 3165 else:
3160 3166 end = data_end + (rev * self.index.entry_size)
3161 3167
3162 3168 if self._sidedatafile:
3163 3169 sidedata_end = self.sidedata_cut_off(rev)
3164 3170 transaction.add(self._sidedatafile, sidedata_end)
3165 3171
3166 3172 transaction.add(self._indexfile, end)
3167 3173 if self._docket is not None:
3168 3174 # XXX we could, leverage the docket while stripping. However it is
3169 3175 # not powerfull enough at the time of this comment
3170 3176 self._docket.index_end = end
3171 3177 self._docket.data_end = data_end
3172 3178 self._docket.sidedata_end = sidedata_end
3173 3179 self._docket.write(transaction, stripping=True)
3174 3180
3175 3181 # then reset internal state in memory to forget those revisions
3176 3182 self._revisioncache = None
3177 3183 self._chaininfocache = util.lrucachedict(500)
3178 3184 self._segmentfile.clear_cache()
3179 3185 self._segmentfile_sidedata.clear_cache()
3180 3186
3181 3187 del self.index[rev:-1]
3182 3188
3183 3189 def checksize(self):
3184 3190 """Check size of index and data files
3185 3191
3186 3192 return a (dd, di) tuple.
3187 3193 - dd: extra bytes for the "data" file
3188 3194 - di: extra bytes for the "index" file
3189 3195
3190 3196 A healthy revlog will return (0, 0).
3191 3197 """
3192 3198 expected = 0
3193 3199 if len(self):
3194 3200 expected = max(0, self.end(len(self) - 1))
3195 3201
3196 3202 try:
3197 3203 with self._datafp() as f:
3198 3204 f.seek(0, io.SEEK_END)
3199 3205 actual = f.tell()
3200 3206 dd = actual - expected
3201 3207 except FileNotFoundError:
3202 3208 dd = 0
3203 3209
3204 3210 try:
3205 3211 f = self.opener(self._indexfile)
3206 3212 f.seek(0, io.SEEK_END)
3207 3213 actual = f.tell()
3208 3214 f.close()
3209 3215 s = self.index.entry_size
3210 3216 i = max(0, actual // s)
3211 3217 di = actual - (i * s)
3212 3218 if self._inline:
3213 3219 databytes = 0
3214 3220 for r in self:
3215 3221 databytes += max(0, self.length(r))
3216 3222 dd = 0
3217 3223 di = actual - len(self) * s - databytes
3218 3224 except FileNotFoundError:
3219 3225 di = 0
3220 3226
3221 3227 return (dd, di)
3222 3228
3223 3229 def files(self):
3224 3230 res = [self._indexfile]
3225 3231 if self._docket_file is None:
3226 3232 if not self._inline:
3227 3233 res.append(self._datafile)
3228 3234 else:
3229 3235 res.append(self._docket_file)
3230 3236 res.extend(self._docket.old_index_filepaths(include_empty=False))
3231 3237 if self._docket.data_end:
3232 3238 res.append(self._datafile)
3233 3239 res.extend(self._docket.old_data_filepaths(include_empty=False))
3234 3240 if self._docket.sidedata_end:
3235 3241 res.append(self._sidedatafile)
3236 3242 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3237 3243 return res
3238 3244
3239 3245 def emitrevisions(
3240 3246 self,
3241 3247 nodes,
3242 3248 nodesorder=None,
3243 3249 revisiondata=False,
3244 3250 assumehaveparentrevisions=False,
3245 3251 deltamode=repository.CG_DELTAMODE_STD,
3246 3252 sidedata_helpers=None,
3247 3253 debug_info=None,
3248 3254 ):
3249 3255 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3250 3256 raise error.ProgrammingError(
3251 3257 b'unhandled value for nodesorder: %s' % nodesorder
3252 3258 )
3253 3259
3254 if nodesorder is None and not self._generaldelta:
3260 if nodesorder is None and not self.delta_config.general_delta:
3255 3261 nodesorder = b'storage'
3256 3262
3257 3263 if (
3258 3264 not self._storedeltachains
3259 3265 and deltamode != repository.CG_DELTAMODE_PREV
3260 3266 ):
3261 3267 deltamode = repository.CG_DELTAMODE_FULL
3262 3268
3263 3269 return storageutil.emitrevisions(
3264 3270 self,
3265 3271 nodes,
3266 3272 nodesorder,
3267 3273 revlogrevisiondelta,
3268 3274 deltaparentfn=self.deltaparent,
3269 3275 candeltafn=self._candelta,
3270 3276 rawsizefn=self.rawsize,
3271 3277 revdifffn=self.revdiff,
3272 3278 flagsfn=self.flags,
3273 3279 deltamode=deltamode,
3274 3280 revisiondata=revisiondata,
3275 3281 assumehaveparentrevisions=assumehaveparentrevisions,
3276 3282 sidedata_helpers=sidedata_helpers,
3277 3283 debug_info=debug_info,
3278 3284 )
3279 3285
3280 3286 DELTAREUSEALWAYS = b'always'
3281 3287 DELTAREUSESAMEREVS = b'samerevs'
3282 3288 DELTAREUSENEVER = b'never'
3283 3289
3284 3290 DELTAREUSEFULLADD = b'fulladd'
3285 3291
3286 3292 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3287 3293
3288 3294 def clone(
3289 3295 self,
3290 3296 tr,
3291 3297 destrevlog,
3292 3298 addrevisioncb=None,
3293 3299 deltareuse=DELTAREUSESAMEREVS,
3294 3300 forcedeltabothparents=None,
3295 3301 sidedata_helpers=None,
3296 3302 ):
3297 3303 """Copy this revlog to another, possibly with format changes.
3298 3304
3299 3305 The destination revlog will contain the same revisions and nodes.
3300 3306 However, it may not be bit-for-bit identical due to e.g. delta encoding
3301 3307 differences.
3302 3308
3303 3309 The ``deltareuse`` argument control how deltas from the existing revlog
3304 3310 are preserved in the destination revlog. The argument can have the
3305 3311 following values:
3306 3312
3307 3313 DELTAREUSEALWAYS
3308 3314 Deltas will always be reused (if possible), even if the destination
3309 3315 revlog would not select the same revisions for the delta. This is the
3310 3316 fastest mode of operation.
3311 3317 DELTAREUSESAMEREVS
3312 3318 Deltas will be reused if the destination revlog would pick the same
3313 3319 revisions for the delta. This mode strikes a balance between speed
3314 3320 and optimization.
3315 3321 DELTAREUSENEVER
3316 3322 Deltas will never be reused. This is the slowest mode of execution.
3317 3323 This mode can be used to recompute deltas (e.g. if the diff/delta
3318 3324 algorithm changes).
3319 3325 DELTAREUSEFULLADD
3320 3326 Revision will be re-added as if their were new content. This is
3321 3327 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3322 3328 eg: large file detection and handling.
3323 3329
3324 3330 Delta computation can be slow, so the choice of delta reuse policy can
3325 3331 significantly affect run time.
3326 3332
3327 3333 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3328 3334 two extremes. Deltas will be reused if they are appropriate. But if the
3329 3335 delta could choose a better revision, it will do so. This means if you
3330 3336 are converting a non-generaldelta revlog to a generaldelta revlog,
3331 3337 deltas will be recomputed if the delta's parent isn't a parent of the
3332 3338 revision.
3333 3339
3334 3340 In addition to the delta policy, the ``forcedeltabothparents``
3335 3341 argument controls whether to force compute deltas against both parents
3336 3342 for merges. By default, the current default is used.
3337 3343
3338 3344 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3339 3345 `sidedata_helpers`.
3340 3346 """
3341 3347 if deltareuse not in self.DELTAREUSEALL:
3342 3348 raise ValueError(
3343 3349 _(b'value for deltareuse invalid: %s') % deltareuse
3344 3350 )
3345 3351
3346 3352 if len(destrevlog):
3347 3353 raise ValueError(_(b'destination revlog is not empty'))
3348 3354
3349 3355 if getattr(self, 'filteredrevs', None):
3350 3356 raise ValueError(_(b'source revlog has filtered revisions'))
3351 3357 if getattr(destrevlog, 'filteredrevs', None):
3352 3358 raise ValueError(_(b'destination revlog has filtered revisions'))
3353 3359
3354 3360 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3355 3361 # if possible.
3356 3362 old_delta_config = destrevlog.delta_config
3357 3363 destrevlog.delta_config = destrevlog.delta_config.copy()
3358 3364
3359 3365 try:
3360 3366 if deltareuse == self.DELTAREUSEALWAYS:
3361 3367 destrevlog.delta_config.lazy_delta_base = True
3362 3368 destrevlog.delta_config.lazy_delta = True
3363 3369 elif deltareuse == self.DELTAREUSESAMEREVS:
3364 3370 destrevlog.delta_config.lazy_delta_base = False
3365 3371 destrevlog.delta_config.lazy_delta = True
3366 3372 elif deltareuse == self.DELTAREUSENEVER:
3367 3373 destrevlog.delta_config.lazy_delta_base = False
3368 3374 destrevlog.delta_config.lazy_delta = False
3369 3375
3370 3376 delta_both_parents = (
3371 3377 forcedeltabothparents or old_delta_config.delta_both_parents
3372 3378 )
3373 3379 destrevlog.delta_config.delta_both_parents = delta_both_parents
3374 3380
3375 3381 with self.reading():
3376 3382 self._clone(
3377 3383 tr,
3378 3384 destrevlog,
3379 3385 addrevisioncb,
3380 3386 deltareuse,
3381 3387 forcedeltabothparents,
3382 3388 sidedata_helpers,
3383 3389 )
3384 3390
3385 3391 finally:
3386 3392 destrevlog.delta_config = old_delta_config
3387 3393
3388 3394 def _clone(
3389 3395 self,
3390 3396 tr,
3391 3397 destrevlog,
3392 3398 addrevisioncb,
3393 3399 deltareuse,
3394 3400 forcedeltabothparents,
3395 3401 sidedata_helpers,
3396 3402 ):
3397 3403 """perform the core duty of `revlog.clone` after parameter processing"""
3398 3404 write_debug = None
3399 3405 if self._debug_delta:
3400 3406 write_debug = tr._report
3401 3407 deltacomputer = deltautil.deltacomputer(
3402 3408 destrevlog,
3403 3409 write_debug=write_debug,
3404 3410 )
3405 3411 index = self.index
3406 3412 for rev in self:
3407 3413 entry = index[rev]
3408 3414
3409 3415 # Some classes override linkrev to take filtered revs into
3410 3416 # account. Use raw entry from index.
3411 3417 flags = entry[0] & 0xFFFF
3412 3418 linkrev = entry[4]
3413 3419 p1 = index[entry[5]][7]
3414 3420 p2 = index[entry[6]][7]
3415 3421 node = entry[7]
3416 3422
3417 3423 # (Possibly) reuse the delta from the revlog if allowed and
3418 3424 # the revlog chunk is a delta.
3419 3425 cachedelta = None
3420 3426 rawtext = None
3421 3427 if deltareuse == self.DELTAREUSEFULLADD:
3422 3428 text = self._revisiondata(rev)
3423 3429 sidedata = self.sidedata(rev)
3424 3430
3425 3431 if sidedata_helpers is not None:
3426 3432 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3427 3433 self, sidedata_helpers, sidedata, rev
3428 3434 )
3429 3435 flags = flags | new_flags[0] & ~new_flags[1]
3430 3436
3431 3437 destrevlog.addrevision(
3432 3438 text,
3433 3439 tr,
3434 3440 linkrev,
3435 3441 p1,
3436 3442 p2,
3437 3443 cachedelta=cachedelta,
3438 3444 node=node,
3439 3445 flags=flags,
3440 3446 deltacomputer=deltacomputer,
3441 3447 sidedata=sidedata,
3442 3448 )
3443 3449 else:
3444 3450 if destrevlog._lazydelta:
3445 3451 dp = self.deltaparent(rev)
3446 3452 if dp != nullrev:
3447 3453 cachedelta = (dp, bytes(self._chunk(rev)))
3448 3454
3449 3455 sidedata = None
3450 3456 if not cachedelta:
3451 3457 rawtext = self._revisiondata(rev)
3452 3458 sidedata = self.sidedata(rev)
3453 3459 if sidedata is None:
3454 3460 sidedata = self.sidedata(rev)
3455 3461
3456 3462 if sidedata_helpers is not None:
3457 3463 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3458 3464 self, sidedata_helpers, sidedata, rev
3459 3465 )
3460 3466 flags = flags | new_flags[0] & ~new_flags[1]
3461 3467
3462 3468 with destrevlog._writing(tr):
3463 3469 destrevlog._addrevision(
3464 3470 node,
3465 3471 rawtext,
3466 3472 tr,
3467 3473 linkrev,
3468 3474 p1,
3469 3475 p2,
3470 3476 flags,
3471 3477 cachedelta,
3472 3478 deltacomputer=deltacomputer,
3473 3479 sidedata=sidedata,
3474 3480 )
3475 3481
3476 3482 if addrevisioncb:
3477 3483 addrevisioncb(self, rev, node)
3478 3484
3479 3485 def censorrevision(self, tr, censornode, tombstone=b''):
3480 3486 if self._format_version == REVLOGV0:
3481 3487 raise error.RevlogError(
3482 3488 _(b'cannot censor with version %d revlogs')
3483 3489 % self._format_version
3484 3490 )
3485 3491 elif self._format_version == REVLOGV1:
3486 3492 rewrite.v1_censor(self, tr, censornode, tombstone)
3487 3493 else:
3488 3494 rewrite.v2_censor(self, tr, censornode, tombstone)
3489 3495
3490 3496 def verifyintegrity(self, state):
3491 3497 """Verifies the integrity of the revlog.
3492 3498
3493 3499 Yields ``revlogproblem`` instances describing problems that are
3494 3500 found.
3495 3501 """
3496 3502 dd, di = self.checksize()
3497 3503 if dd:
3498 3504 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3499 3505 if di:
3500 3506 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3501 3507
3502 3508 version = self._format_version
3503 3509
3504 3510 # The verifier tells us what version revlog we should be.
3505 3511 if version != state[b'expectedversion']:
3506 3512 yield revlogproblem(
3507 3513 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3508 3514 % (self.display_id, version, state[b'expectedversion'])
3509 3515 )
3510 3516
3511 3517 state[b'skipread'] = set()
3512 3518 state[b'safe_renamed'] = set()
3513 3519
3514 3520 for rev in self:
3515 3521 node = self.node(rev)
3516 3522
3517 3523 # Verify contents. 4 cases to care about:
3518 3524 #
3519 3525 # common: the most common case
3520 3526 # rename: with a rename
3521 3527 # meta: file content starts with b'\1\n', the metadata
3522 3528 # header defined in filelog.py, but without a rename
3523 3529 # ext: content stored externally
3524 3530 #
3525 3531 # More formally, their differences are shown below:
3526 3532 #
3527 3533 # | common | rename | meta | ext
3528 3534 # -------------------------------------------------------
3529 3535 # flags() | 0 | 0 | 0 | not 0
3530 3536 # renamed() | False | True | False | ?
3531 3537 # rawtext[0:2]=='\1\n'| False | True | True | ?
3532 3538 #
3533 3539 # "rawtext" means the raw text stored in revlog data, which
3534 3540 # could be retrieved by "rawdata(rev)". "text"
3535 3541 # mentioned below is "revision(rev)".
3536 3542 #
3537 3543 # There are 3 different lengths stored physically:
3538 3544 # 1. L1: rawsize, stored in revlog index
3539 3545 # 2. L2: len(rawtext), stored in revlog data
3540 3546 # 3. L3: len(text), stored in revlog data if flags==0, or
3541 3547 # possibly somewhere else if flags!=0
3542 3548 #
3543 3549 # L1 should be equal to L2. L3 could be different from them.
3544 3550 # "text" may or may not affect commit hash depending on flag
3545 3551 # processors (see flagutil.addflagprocessor).
3546 3552 #
3547 3553 # | common | rename | meta | ext
3548 3554 # -------------------------------------------------
3549 3555 # rawsize() | L1 | L1 | L1 | L1
3550 3556 # size() | L1 | L2-LM | L1(*) | L1 (?)
3551 3557 # len(rawtext) | L2 | L2 | L2 | L2
3552 3558 # len(text) | L2 | L2 | L2 | L3
3553 3559 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3554 3560 #
3555 3561 # LM: length of metadata, depending on rawtext
3556 3562 # (*): not ideal, see comment in filelog.size
3557 3563 # (?): could be "- len(meta)" if the resolved content has
3558 3564 # rename metadata
3559 3565 #
3560 3566 # Checks needed to be done:
3561 3567 # 1. length check: L1 == L2, in all cases.
3562 3568 # 2. hash check: depending on flag processor, we may need to
3563 3569 # use either "text" (external), or "rawtext" (in revlog).
3564 3570
3565 3571 try:
3566 3572 skipflags = state.get(b'skipflags', 0)
3567 3573 if skipflags:
3568 3574 skipflags &= self.flags(rev)
3569 3575
3570 3576 _verify_revision(self, skipflags, state, node)
3571 3577
3572 3578 l1 = self.rawsize(rev)
3573 3579 l2 = len(self.rawdata(node))
3574 3580
3575 3581 if l1 != l2:
3576 3582 yield revlogproblem(
3577 3583 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3578 3584 node=node,
3579 3585 )
3580 3586
3581 3587 except error.CensoredNodeError:
3582 3588 if state[b'erroroncensored']:
3583 3589 yield revlogproblem(
3584 3590 error=_(b'censored file data'), node=node
3585 3591 )
3586 3592 state[b'skipread'].add(node)
3587 3593 except Exception as e:
3588 3594 yield revlogproblem(
3589 3595 error=_(b'unpacking %s: %s')
3590 3596 % (short(node), stringutil.forcebytestr(e)),
3591 3597 node=node,
3592 3598 )
3593 3599 state[b'skipread'].add(node)
3594 3600
3595 3601 def storageinfo(
3596 3602 self,
3597 3603 exclusivefiles=False,
3598 3604 sharedfiles=False,
3599 3605 revisionscount=False,
3600 3606 trackedsize=False,
3601 3607 storedsize=False,
3602 3608 ):
3603 3609 d = {}
3604 3610
3605 3611 if exclusivefiles:
3606 3612 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3607 3613 if not self._inline:
3608 3614 d[b'exclusivefiles'].append((self.opener, self._datafile))
3609 3615
3610 3616 if sharedfiles:
3611 3617 d[b'sharedfiles'] = []
3612 3618
3613 3619 if revisionscount:
3614 3620 d[b'revisionscount'] = len(self)
3615 3621
3616 3622 if trackedsize:
3617 3623 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3618 3624
3619 3625 if storedsize:
3620 3626 d[b'storedsize'] = sum(
3621 3627 self.opener.stat(path).st_size for path in self.files()
3622 3628 )
3623 3629
3624 3630 return d
3625 3631
3626 3632 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3627 3633 if not self.hassidedata:
3628 3634 return
3629 3635 # revlog formats with sidedata support does not support inline
3630 3636 assert not self._inline
3631 3637 if not helpers[1] and not helpers[2]:
3632 3638 # Nothing to generate or remove
3633 3639 return
3634 3640
3635 3641 new_entries = []
3636 3642 # append the new sidedata
3637 3643 with self._writing(transaction):
3638 3644 ifh, dfh, sdfh = self._writinghandles
3639 3645 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3640 3646
3641 3647 current_offset = sdfh.tell()
3642 3648 for rev in range(startrev, endrev + 1):
3643 3649 entry = self.index[rev]
3644 3650 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3645 3651 store=self,
3646 3652 sidedata_helpers=helpers,
3647 3653 sidedata={},
3648 3654 rev=rev,
3649 3655 )
3650 3656
3651 3657 serialized_sidedata = sidedatautil.serialize_sidedata(
3652 3658 new_sidedata
3653 3659 )
3654 3660
3655 3661 sidedata_compression_mode = COMP_MODE_INLINE
3656 3662 if serialized_sidedata and self.hassidedata:
3657 3663 sidedata_compression_mode = COMP_MODE_PLAIN
3658 3664 h, comp_sidedata = self.compress(serialized_sidedata)
3659 3665 if (
3660 3666 h != b'u'
3661 3667 and comp_sidedata[0] != b'\0'
3662 3668 and len(comp_sidedata) < len(serialized_sidedata)
3663 3669 ):
3664 3670 assert not h
3665 3671 if (
3666 3672 comp_sidedata[0]
3667 3673 == self._docket.default_compression_header
3668 3674 ):
3669 3675 sidedata_compression_mode = COMP_MODE_DEFAULT
3670 3676 serialized_sidedata = comp_sidedata
3671 3677 else:
3672 3678 sidedata_compression_mode = COMP_MODE_INLINE
3673 3679 serialized_sidedata = comp_sidedata
3674 3680 if entry[8] != 0 or entry[9] != 0:
3675 3681 # rewriting entries that already have sidedata is not
3676 3682 # supported yet, because it introduces garbage data in the
3677 3683 # revlog.
3678 3684 msg = b"rewriting existing sidedata is not supported yet"
3679 3685 raise error.Abort(msg)
3680 3686
3681 3687 # Apply (potential) flags to add and to remove after running
3682 3688 # the sidedata helpers
3683 3689 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3684 3690 entry_update = (
3685 3691 current_offset,
3686 3692 len(serialized_sidedata),
3687 3693 new_offset_flags,
3688 3694 sidedata_compression_mode,
3689 3695 )
3690 3696
3691 3697 # the sidedata computation might have move the file cursors around
3692 3698 sdfh.seek(current_offset, os.SEEK_SET)
3693 3699 sdfh.write(serialized_sidedata)
3694 3700 new_entries.append(entry_update)
3695 3701 current_offset += len(serialized_sidedata)
3696 3702 self._docket.sidedata_end = sdfh.tell()
3697 3703
3698 3704 # rewrite the new index entries
3699 3705 ifh.seek(startrev * self.index.entry_size)
3700 3706 for i, e in enumerate(new_entries):
3701 3707 rev = startrev + i
3702 3708 self.index.replace_sidedata_info(rev, *e)
3703 3709 packed = self.index.entry_binary(rev)
3704 3710 if rev == 0 and self._docket is None:
3705 3711 header = self._format_flags | self._format_version
3706 3712 header = self.index.pack_header(header)
3707 3713 packed = header + packed
3708 3714 ifh.write(packed)
@@ -1,1624 +1,1624 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17
18 18 from .constants import (
19 19 COMP_MODE_DEFAULT,
20 20 COMP_MODE_INLINE,
21 21 COMP_MODE_PLAIN,
22 22 DELTA_BASE_REUSE_FORCE,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 from .. import revlog
51 51
52 52 self._data = data
53 53 self._srdensitythreshold = density
54 54 self._srmingapsize = mingap
55 55 self.data_config = revlog.DataConfig()
56 56 self.data_config.sr_density_threshold = density
57 57 self.data_config.sr_min_gap_size = mingap
58 58 self.delta_config = revlog.DeltaConfig()
59 59 self.feature_config = revlog.FeatureConfig()
60 60 self._snapshot = set(snapshot)
61 61 self.index = None
62 62
63 63 def start(self, rev):
64 64 if rev == nullrev:
65 65 return 0
66 66 if rev == 0:
67 67 return 0
68 68 return self._data[rev - 1]
69 69
70 70 def end(self, rev):
71 71 if rev == nullrev:
72 72 return 0
73 73 return self._data[rev]
74 74
75 75 def length(self, rev):
76 76 return self.end(rev) - self.start(rev)
77 77
78 78 def __len__(self):
79 79 return len(self._data)
80 80
81 81 def issnapshot(self, rev):
82 82 if rev == nullrev:
83 83 return True
84 84 return rev in self._snapshot
85 85
86 86
87 87 def slicechunk(revlog, revs, targetsize=None):
88 88 """slice revs to reduce the amount of unrelated data to be read from disk.
89 89
90 90 ``revs`` is sliced into groups that should be read in one time.
91 91 Assume that revs are sorted.
92 92
93 93 The initial chunk is sliced until the overall density (payload/chunks-span
94 94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
95 95 `revlog._srmingapsize` is skipped.
96 96
97 97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
98 98 For consistency with other slicing choice, this limit won't go lower than
99 99 `revlog._srmingapsize`.
100 100
101 101 If individual revisions chunk are larger than this limit, they will still
102 102 be raised individually.
103 103
104 104 >>> data = [
105 105 ... 5, #00 (5)
106 106 ... 10, #01 (5)
107 107 ... 12, #02 (2)
108 108 ... 12, #03 (empty)
109 109 ... 27, #04 (15)
110 110 ... 31, #05 (4)
111 111 ... 31, #06 (empty)
112 112 ... 42, #07 (11)
113 113 ... 47, #08 (5)
114 114 ... 47, #09 (empty)
115 115 ... 48, #10 (1)
116 116 ... 51, #11 (3)
117 117 ... 74, #12 (23)
118 118 ... 85, #13 (11)
119 119 ... 86, #14 (1)
120 120 ... 91, #15 (5)
121 121 ... ]
122 122 >>> revlog = _testrevlog(data, snapshot=range(16))
123 123
124 124 >>> list(slicechunk(revlog, list(range(16))))
125 125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
126 126 >>> list(slicechunk(revlog, [0, 15]))
127 127 [[0], [15]]
128 128 >>> list(slicechunk(revlog, [0, 11, 15]))
129 129 [[0], [11], [15]]
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
131 131 [[0], [11, 13, 15]]
132 132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
133 133 [[1, 2], [5, 8, 10, 11], [14]]
134 134
135 135 Slicing with a maximum chunk size
136 136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
137 137 [[0], [11], [13], [15]]
138 138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
139 139 [[0], [11], [13, 15]]
140 140
141 141 Slicing involving nullrev
142 142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
143 143 [[-1, 0], [11], [13, 15]]
144 144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
145 145 [[-1], [13], [15]]
146 146 """
147 147 if targetsize is not None:
148 148 targetsize = max(targetsize, revlog._srmingapsize)
149 149 # targetsize should not be specified when evaluating delta candidates:
150 150 # * targetsize is used to ensure we stay within specification when reading,
151 151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
152 152 if densityslicing is None:
153 153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
154 154 for chunk in densityslicing(
155 155 revs, revlog._srdensitythreshold, revlog._srmingapsize
156 156 ):
157 157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 158 yield subchunk
159 159
160 160
161 161 def _slicechunktosize(revlog, revs, targetsize=None):
162 162 """slice revs to match the target size
163 163
164 164 This is intended to be used on chunk that density slicing selected by that
165 165 are still too large compared to the read garantee of revlog. This might
166 166 happens when "minimal gap size" interrupted the slicing or when chain are
167 167 built in a way that create large blocks next to each other.
168 168
169 169 >>> data = [
170 170 ... 3, #0 (3)
171 171 ... 5, #1 (2)
172 172 ... 6, #2 (1)
173 173 ... 8, #3 (2)
174 174 ... 8, #4 (empty)
175 175 ... 11, #5 (3)
176 176 ... 12, #6 (1)
177 177 ... 13, #7 (1)
178 178 ... 14, #8 (1)
179 179 ... ]
180 180
181 181 == All snapshots cases ==
182 182 >>> revlog = _testrevlog(data, snapshot=range(9))
183 183
184 184 Cases where chunk is already small enough
185 185 >>> list(_slicechunktosize(revlog, [0], 3))
186 186 [[0]]
187 187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 188 [[6, 7]]
189 189 >>> list(_slicechunktosize(revlog, [0], None))
190 190 [[0]]
191 191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 192 [[6, 7]]
193 193
194 194 cases where we need actual slicing
195 195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 196 [[0], [1]]
197 197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 198 [[1], [3]]
199 199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 200 [[1, 2], [3]]
201 201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 202 [[3], [5]]
203 203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 204 [[3], [5]]
205 205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 206 [[5], [6, 7, 8]]
207 207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209 209
210 210 Case with too large individual chunk (must return valid chunk)
211 211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 212 [[0], [1]]
213 213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 214 [[1], [3]]
215 215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 216 [[3], [5]]
217 217
218 218 == No Snapshot cases ==
219 219 >>> revlog = _testrevlog(data)
220 220
221 221 Cases where chunk is already small enough
222 222 >>> list(_slicechunktosize(revlog, [0], 3))
223 223 [[0]]
224 224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 225 [[6, 7]]
226 226 >>> list(_slicechunktosize(revlog, [0], None))
227 227 [[0]]
228 228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 229 [[6, 7]]
230 230
231 231 cases where we need actual slicing
232 232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 233 [[0], [1]]
234 234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 235 [[1], [3]]
236 236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 237 [[1], [2, 3]]
238 238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 239 [[3], [5]]
240 240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 241 [[3], [4, 5]]
242 242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 243 [[5], [6, 7, 8]]
244 244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246 246
247 247 Case with too large individual chunk (must return valid chunk)
248 248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 249 [[0], [1]]
250 250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 251 [[1], [3]]
252 252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 253 [[3], [5]]
254 254
255 255 == mixed case ==
256 256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 259 """
260 260 assert targetsize is None or 0 <= targetsize
261 261 startdata = revlog.start(revs[0])
262 262 enddata = revlog.end(revs[-1])
263 263 fullspan = enddata - startdata
264 264 if targetsize is None or fullspan <= targetsize:
265 265 yield revs
266 266 return
267 267
268 268 startrevidx = 0
269 269 endrevidx = 1
270 270 iterrevs = enumerate(revs)
271 271 next(iterrevs) # skip first rev.
272 272 # first step: get snapshots out of the way
273 273 for idx, r in iterrevs:
274 274 span = revlog.end(r) - startdata
275 275 snapshot = revlog.issnapshot(r)
276 276 if span <= targetsize and snapshot:
277 277 endrevidx = idx + 1
278 278 else:
279 279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 280 if chunk:
281 281 yield chunk
282 282 startrevidx = idx
283 283 startdata = revlog.start(r)
284 284 endrevidx = idx + 1
285 285 if not snapshot:
286 286 break
287 287
288 288 # for the others, we use binary slicing to quickly converge toward valid
289 289 # chunks (otherwise, we might end up looking for start/end of many
290 290 # revisions). This logic is not looking for the perfect slicing point, it
291 291 # focuses on quickly converging toward valid chunks.
292 292 nbitem = len(revs)
293 293 while (enddata - startdata) > targetsize:
294 294 endrevidx = nbitem
295 295 if nbitem - startrevidx <= 1:
296 296 break # protect against individual chunk larger than limit
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 while span > targetsize:
300 300 if endrevidx - startrevidx <= 1:
301 301 break # protect against individual chunk larger than limit
302 302 endrevidx -= (endrevidx - startrevidx) // 2
303 303 localenddata = revlog.end(revs[endrevidx - 1])
304 304 span = localenddata - startdata
305 305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 306 if chunk:
307 307 yield chunk
308 308 startrevidx = endrevidx
309 309 startdata = revlog.start(revs[startrevidx])
310 310
311 311 chunk = _trimchunk(revlog, revs, startrevidx)
312 312 if chunk:
313 313 yield chunk
314 314
315 315
316 316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 317 """slice revs to reduce the amount of unrelated data to be read from disk.
318 318
319 319 ``revs`` is sliced into groups that should be read in one time.
320 320 Assume that revs are sorted.
321 321
322 322 The initial chunk is sliced until the overall density (payload/chunks-span
323 323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 324 skipped.
325 325
326 326 >>> revlog = _testrevlog([
327 327 ... 5, #00 (5)
328 328 ... 10, #01 (5)
329 329 ... 12, #02 (2)
330 330 ... 12, #03 (empty)
331 331 ... 27, #04 (15)
332 332 ... 31, #05 (4)
333 333 ... 31, #06 (empty)
334 334 ... 42, #07 (11)
335 335 ... 47, #08 (5)
336 336 ... 47, #09 (empty)
337 337 ... 48, #10 (1)
338 338 ... 51, #11 (3)
339 339 ... 74, #12 (23)
340 340 ... 85, #13 (11)
341 341 ... 86, #14 (1)
342 342 ... 91, #15 (5)
343 343 ... ])
344 344
345 345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 348 [[0], [15]]
349 349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 350 [[0], [11], [15]]
351 351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 352 [[0], [11, 13, 15]]
353 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 354 [[1, 2], [5, 8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... mingapsize=20))
357 357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 359 ... targetdensity=0.95))
360 360 [[1, 2], [5], [8, 10, 11], [14]]
361 361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 362 ... targetdensity=0.95, mingapsize=12))
363 363 [[1, 2], [5, 8, 10, 11], [14]]
364 364 """
365 365 start = revlog.start
366 366 length = revlog.length
367 367
368 368 if len(revs) <= 1:
369 369 yield revs
370 370 return
371 371
372 372 deltachainspan = segmentspan(revlog, revs)
373 373
374 374 if deltachainspan < mingapsize:
375 375 yield revs
376 376 return
377 377
378 378 readdata = deltachainspan
379 379 chainpayload = sum(length(r) for r in revs)
380 380
381 381 if deltachainspan:
382 382 density = chainpayload / float(deltachainspan)
383 383 else:
384 384 density = 1.0
385 385
386 386 if density >= targetdensity:
387 387 yield revs
388 388 return
389 389
390 390 # Store the gaps in a heap to have them sorted by decreasing size
391 391 gaps = []
392 392 prevend = None
393 393 for i, rev in enumerate(revs):
394 394 revstart = start(rev)
395 395 revlen = length(rev)
396 396
397 397 # Skip empty revisions to form larger holes
398 398 if revlen == 0:
399 399 continue
400 400
401 401 if prevend is not None:
402 402 gapsize = revstart - prevend
403 403 # only consider holes that are large enough
404 404 if gapsize > mingapsize:
405 405 gaps.append((gapsize, i))
406 406
407 407 prevend = revstart + revlen
408 408 # sort the gaps to pop them from largest to small
409 409 gaps.sort()
410 410
411 411 # Collect the indices of the largest holes until the density is acceptable
412 412 selected = []
413 413 while gaps and density < targetdensity:
414 414 gapsize, gapidx = gaps.pop()
415 415
416 416 selected.append(gapidx)
417 417
418 418 # the gap sizes are stored as negatives to be sorted decreasingly
419 419 # by the heap
420 420 readdata -= gapsize
421 421 if readdata > 0:
422 422 density = chainpayload / float(readdata)
423 423 else:
424 424 density = 1.0
425 425 selected.sort()
426 426
427 427 # Cut the revs at collected indices
428 428 previdx = 0
429 429 for idx in selected:
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435 previdx = idx
436 436
437 437 chunk = _trimchunk(revlog, revs, previdx)
438 438 if chunk:
439 439 yield chunk
440 440
441 441
442 442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 443 """returns revs[startidx:endidx] without empty trailing revs
444 444
445 445 Doctest Setup
446 446 >>> revlog = _testrevlog([
447 447 ... 5, #0
448 448 ... 10, #1
449 449 ... 12, #2
450 450 ... 12, #3 (empty)
451 451 ... 17, #4
452 452 ... 21, #5
453 453 ... 21, #6 (empty)
454 454 ... ])
455 455
456 456 Contiguous cases:
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 458 [0, 1, 2, 3, 4, 5]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 460 [0, 1, 2, 3, 4]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 462 [0, 1, 2]
463 463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 464 [2]
465 465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 466 [3, 4, 5]
467 467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 468 [3, 4]
469 469
470 470 Discontiguous cases:
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 472 [1, 3, 5]
473 473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 474 [1]
475 475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 476 [3, 5]
477 477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 478 [3, 5]
479 479 """
480 480 length = revlog.length
481 481
482 482 if endidx is None:
483 483 endidx = len(revs)
484 484
485 485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 486 if revs[endidx - 1] < len(revlog):
487 487 # Trim empty revs at the end, except the very first revision of a chain
488 488 while (
489 489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 490 ):
491 491 endidx -= 1
492 492
493 493 return revs[startidx:endidx]
494 494
495 495
496 496 def segmentspan(revlog, revs):
497 497 """Get the byte span of a segment of revisions
498 498
499 499 revs is a sorted array of revision numbers
500 500
501 501 >>> revlog = _testrevlog([
502 502 ... 5, #0
503 503 ... 10, #1
504 504 ... 12, #2
505 505 ... 12, #3 (empty)
506 506 ... 17, #4
507 507 ... ])
508 508
509 509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 510 17
511 511 >>> segmentspan(revlog, [0, 4])
512 512 17
513 513 >>> segmentspan(revlog, [3, 4])
514 514 5
515 515 >>> segmentspan(revlog, [1, 2, 3,])
516 516 7
517 517 >>> segmentspan(revlog, [1, 3])
518 518 7
519 519 """
520 520 if not revs:
521 521 return 0
522 522 end = revlog.end(revs[-1])
523 523 return end - revlog.start(revs[0])
524 524
525 525
526 526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 527 """build full text from a (base, delta) pair and other metadata"""
528 528 # special case deltas which replace entire base; no need to decode
529 529 # base revision. this neatly avoids censored bases, which throw when
530 530 # they're decoded.
531 531 hlen = struct.calcsize(b">lll")
532 532 if delta[:hlen] == mdiff.replacediffheader(
533 533 revlog.rawsize(baserev), len(delta) - hlen
534 534 ):
535 535 fulltext = delta[hlen:]
536 536 else:
537 537 # deltabase is rawtext before changed by flag processors, which is
538 538 # equivalent to non-raw text
539 539 basetext = revlog.revision(baserev)
540 540 fulltext = mdiff.patch(basetext, delta)
541 541
542 542 try:
543 543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 544 if validatehash:
545 545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 546 if flags & REVIDX_ISCENSORED:
547 547 raise error.StorageError(
548 548 _(b'node %s is not censored') % expectednode
549 549 )
550 550 except error.CensoredNodeError:
551 551 # must pass the censored index flag to add censored revisions
552 552 if not flags & REVIDX_ISCENSORED:
553 553 raise
554 554 return fulltext
555 555
556 556
557 557 @attr.s(slots=True, frozen=True)
558 558 class _deltainfo:
559 559 distance = attr.ib()
560 560 deltalen = attr.ib()
561 561 data = attr.ib()
562 562 base = attr.ib()
563 563 chainbase = attr.ib()
564 564 chainlen = attr.ib()
565 565 compresseddeltalen = attr.ib()
566 566 snapshotdepth = attr.ib()
567 567
568 568
569 569 def drop_u_compression(delta):
570 570 """turn into a "u" (no-compression) into no-compression without header
571 571
572 572 This is useful for revlog format that has better compression method.
573 573 """
574 574 assert delta.data[0] == b'u', delta.data[0]
575 575 return _deltainfo(
576 576 delta.distance,
577 577 delta.deltalen - 1,
578 578 (b'', delta.data[1]),
579 579 delta.base,
580 580 delta.chainbase,
581 581 delta.chainlen,
582 582 delta.compresseddeltalen,
583 583 delta.snapshotdepth,
584 584 )
585 585
586 586
587 587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 588 """Returns True if the given delta is good. Good means that it is within
589 589 the disk span, disk size, and chain length bounds that we know to be
590 590 performant."""
591 591 if deltainfo is None:
592 592 return False
593 593
594 594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 595 # we should never end up asking such question. Adding the assert as a
596 596 # safe-guard to detect anything that would be fishy in this regard.
597 597 assert (
598 598 revinfo.cachedelta is None
599 599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog._generaldelta
600 or not revlog.delta_config.general_delta
601 601 )
602 602
603 603 # - 'deltainfo.distance' is the distance from the base revision --
604 604 # bounding it limits the amount of I/O we need to do.
605 605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 606 # deltas we need to apply -- bounding it limits the amount of CPU
607 607 # we consume.
608 608
609 609 textlen = revinfo.textlen
610 610 defaultmax = textlen * 4
611 611 maxdist = revlog._maxdeltachainspan
612 612 if not maxdist:
613 613 maxdist = deltainfo.distance # ensure the conditional pass
614 614 maxdist = max(maxdist, defaultmax)
615 615
616 616 # Bad delta from read span:
617 617 #
618 618 # If the span of data read is larger than the maximum allowed.
619 619 #
620 620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 621 # to avoid issue related to the span of data. In theory, it would be
622 622 # possible to build pathological revlog where delta pattern would lead
623 623 # to too many reads. However, they do not happen in practice at all. So
624 624 # we skip the span check entirely.
625 625 if not revlog._sparserevlog and maxdist < deltainfo.distance:
626 626 return False
627 627
628 628 # Bad delta from new delta size:
629 629 #
630 630 # If the delta size is larger than the target text, storing the
631 631 # delta will be inefficient.
632 632 if textlen < deltainfo.deltalen:
633 633 return False
634 634
635 635 # Bad delta from cumulated payload size:
636 636 #
637 637 # If the sum of delta get larger than K * target text length.
638 638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 639 return False
640 640
641 641 # Bad delta from chain length:
642 642 #
643 643 # If the number of delta in the chain gets too high.
644 644 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
645 645 return False
646 646
647 647 # bad delta from intermediate snapshot size limit
648 648 #
649 649 # If an intermediate snapshot size is higher than the limit. The
650 650 # limit exist to prevent endless chain of intermediate delta to be
651 651 # created.
652 652 if (
653 653 deltainfo.snapshotdepth is not None
654 654 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
655 655 ):
656 656 return False
657 657
658 658 # bad delta if new intermediate snapshot is larger than the previous
659 659 # snapshot
660 660 if (
661 661 deltainfo.snapshotdepth
662 662 and revlog.length(deltainfo.base) < deltainfo.deltalen
663 663 ):
664 664 return False
665 665
666 666 return True
667 667
668 668
669 669 # If a revision's full text is that much bigger than a base candidate full
670 670 # text's, it is very unlikely that it will produce a valid delta. We no longer
671 671 # consider these candidates.
672 672 LIMIT_BASE2TEXT = 500
673 673
674 674
675 675 def _candidategroups(
676 676 revlog,
677 677 textlen,
678 678 p1,
679 679 p2,
680 680 cachedelta,
681 681 excluded_bases=None,
682 682 target_rev=None,
683 683 snapshot_cache=None,
684 684 ):
685 685 """Provides group of revision to be tested as delta base
686 686
687 687 This top level function focus on emitting groups with unique and worthwhile
688 688 content. See _raw_candidate_groups for details about the group order.
689 689 """
690 690 # should we try to build a delta?
691 691 if not (len(revlog) and revlog._storedeltachains):
692 692 yield None
693 693 return
694 694
695 695 if target_rev is None:
696 696 target_rev = len(revlog)
697 697
698 if not revlog._generaldelta:
698 if not revlog.delta_config.general_delta:
699 699 # before general delta, there is only one possible delta base
700 700 yield (target_rev - 1,)
701 701 yield None
702 702 return
703 703
704 704 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
705 705 # we should never end up asking such question. Adding the assert as a
706 706 # safe-guard to detect anything that would be fishy in this regard.
707 707 assert (
708 708 cachedelta is None
709 709 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
710 or not revlog._generaldelta
710 or not revlog.delta_config.general_delta
711 711 )
712 712
713 713 deltalength = revlog.length
714 714 deltaparent = revlog.deltaparent
715 715 sparse = revlog._sparserevlog
716 716 good = None
717 717
718 718 deltas_limit = textlen * LIMIT_DELTA2TEXT
719 719 group_chunk_size = revlog._candidate_group_chunk_size
720 720
721 721 tested = {nullrev}
722 722 candidates = _refinedgroups(
723 723 revlog,
724 724 p1,
725 725 p2,
726 726 cachedelta,
727 727 snapshot_cache=snapshot_cache,
728 728 )
729 729 while True:
730 730 temptative = candidates.send(good)
731 731 if temptative is None:
732 732 break
733 733 group = []
734 734 for rev in temptative:
735 735 # skip over empty delta (no need to include them in a chain)
736 736 while not (rev == nullrev or rev in tested or deltalength(rev)):
737 737 tested.add(rev)
738 738 rev = deltaparent(rev)
739 739 # no need to try a delta against nullrev, this will be done as a
740 740 # last resort.
741 741 if rev == nullrev:
742 742 continue
743 743 # filter out revision we tested already
744 744 if rev in tested:
745 745 continue
746 746
747 747 # an higher authority deamed the base unworthy (e.g. censored)
748 748 if excluded_bases is not None and rev in excluded_bases:
749 749 tested.add(rev)
750 750 continue
751 751 # We are in some recomputation cases and that rev is too high in
752 752 # the revlog
753 753 if target_rev is not None and rev >= target_rev:
754 754 tested.add(rev)
755 755 continue
756 756 # filter out delta base that will never produce good delta
757 757 if deltas_limit < revlog.length(rev):
758 758 tested.add(rev)
759 759 continue
760 760 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
761 761 tested.add(rev)
762 762 continue
763 763 # no delta for rawtext-changing revs (see "candelta" for why)
764 764 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
765 765 tested.add(rev)
766 766 continue
767 767
768 768 # If we reach here, we are about to build and test a delta.
769 769 # The delta building process will compute the chaininfo in all
770 770 # case, since that computation is cached, it is fine to access it
771 771 # here too.
772 772 chainlen, chainsize = revlog._chaininfo(rev)
773 773 # if chain will be too long, skip base
774 774 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
775 775 tested.add(rev)
776 776 continue
777 777 # if chain already have too much data, skip base
778 778 if deltas_limit < chainsize:
779 779 tested.add(rev)
780 780 continue
781 781 if sparse and revlog.upperboundcomp is not None:
782 782 maxcomp = revlog.upperboundcomp
783 783 basenotsnap = (p1, p2, nullrev)
784 784 if rev not in basenotsnap and revlog.issnapshot(rev):
785 785 snapshotdepth = revlog.snapshotdepth(rev)
786 786 # If text is significantly larger than the base, we can
787 787 # expect the resulting delta to be proportional to the size
788 788 # difference
789 789 revsize = revlog.rawsize(rev)
790 790 rawsizedistance = max(textlen - revsize, 0)
791 791 # use an estimate of the compression upper bound.
792 792 lowestrealisticdeltalen = rawsizedistance // maxcomp
793 793
794 794 # check the absolute constraint on the delta size
795 795 snapshotlimit = textlen >> snapshotdepth
796 796 if snapshotlimit < lowestrealisticdeltalen:
797 797 # delta lower bound is larger than accepted upper bound
798 798 tested.add(rev)
799 799 continue
800 800
801 801 # check the relative constraint on the delta size
802 802 revlength = revlog.length(rev)
803 803 if revlength < lowestrealisticdeltalen:
804 804 # delta probable lower bound is larger than target base
805 805 tested.add(rev)
806 806 continue
807 807
808 808 group.append(rev)
809 809 if group:
810 810 # When the size of the candidate group is big, it can result in a
811 811 # quite significant performance impact. To reduce this, we can send
812 812 # them in smaller batches until the new batch does not provide any
813 813 # improvements.
814 814 #
815 815 # This might reduce the overall efficiency of the compression in
816 816 # some corner cases, but that should also prevent very pathological
817 817 # cases from being an issue. (eg. 20 000 candidates).
818 818 #
819 819 # XXX note that the ordering of the group becomes important as it
820 820 # now impacts the final result. The current order is unprocessed
821 821 # and can be improved.
822 822 if group_chunk_size == 0:
823 823 tested.update(group)
824 824 good = yield tuple(group)
825 825 else:
826 826 prev_good = good
827 827 for start in range(0, len(group), group_chunk_size):
828 828 sub_group = group[start : start + group_chunk_size]
829 829 tested.update(sub_group)
830 830 good = yield tuple(sub_group)
831 831 if prev_good == good:
832 832 break
833 833
834 834 yield None
835 835
836 836
837 837 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
838 838 good = None
839 839 # First we try to reuse a the delta contained in the bundle.
840 840 # (or from the source revlog)
841 841 #
842 842 # This logic only applies to general delta repositories and can be disabled
843 843 # through configuration. Disabling reuse source delta is useful when
844 844 # we want to make sure we recomputed "optimal" deltas.
845 845 debug_info = None
846 846 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
847 847 # Assume what we received from the server is a good choice
848 848 # build delta will reuse the cache
849 849 if debug_info is not None:
850 850 debug_info['cached-delta.tested'] += 1
851 851 good = yield (cachedelta[0],)
852 852 if good is not None:
853 853 if debug_info is not None:
854 854 debug_info['cached-delta.accepted'] += 1
855 855 yield None
856 856 return
857 857 if snapshot_cache is None:
858 858 snapshot_cache = SnapshotCache()
859 859 groups = _rawgroups(
860 860 revlog,
861 861 p1,
862 862 p2,
863 863 cachedelta,
864 864 snapshot_cache,
865 865 )
866 866 for candidates in groups:
867 867 good = yield candidates
868 868 if good is not None:
869 869 break
870 870
871 871 # If sparse revlog is enabled, we can try to refine the available deltas
872 872 if not revlog._sparserevlog:
873 873 yield None
874 874 return
875 875
876 876 # if we have a refinable value, try to refine it
877 877 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
878 878 # refine snapshot down
879 879 previous = None
880 880 while previous != good:
881 881 previous = good
882 882 base = revlog.deltaparent(good)
883 883 if base == nullrev:
884 884 break
885 885 good = yield (base,)
886 886 # refine snapshot up
887 887 if not snapshot_cache.snapshots:
888 888 snapshot_cache.update(revlog, good + 1)
889 889 previous = None
890 890 while good != previous:
891 891 previous = good
892 892 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
893 893 good = yield children
894 894
895 895 if debug_info is not None:
896 896 if good is None:
897 897 debug_info['no-solution'] += 1
898 898
899 899 yield None
900 900
901 901
902 902 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
903 903 """Provides group of revision to be tested as delta base
904 904
905 905 This lower level function focus on emitting delta theorically interresting
906 906 without looking it any practical details.
907 907
908 908 The group order aims at providing fast or small candidates first.
909 909 """
910 910 # Why search for delta base if we cannot use a delta base ?
911 assert revlog._generaldelta
911 assert revlog.delta_config.general_delta
912 912 # also see issue6056
913 913 sparse = revlog._sparserevlog
914 914 curr = len(revlog)
915 915 prev = curr - 1
916 916 deltachain = lambda rev: revlog._deltachain(rev)[0]
917 917
918 918 # exclude already lazy tested base if any
919 919 parents = [p for p in (p1, p2) if p != nullrev]
920 920
921 921 if not revlog._deltabothparents and len(parents) == 2:
922 922 parents.sort()
923 923 # To minimize the chance of having to build a fulltext,
924 924 # pick first whichever parent is closest to us (max rev)
925 925 yield (parents[1],)
926 926 # then the other one (min rev) if the first did not fit
927 927 yield (parents[0],)
928 928 elif len(parents) > 0:
929 929 # Test all parents (1 or 2), and keep the best candidate
930 930 yield parents
931 931
932 932 if sparse and parents:
933 933 if snapshot_cache is None:
934 934 # map: base-rev: [snapshot-revs]
935 935 snapshot_cache = SnapshotCache()
936 936 # See if we can use an existing snapshot in the parent chains to use as
937 937 # a base for a new intermediate-snapshot
938 938 #
939 939 # search for snapshot in parents delta chain
940 940 # map: snapshot-level: snapshot-rev
941 941 parents_snaps = collections.defaultdict(set)
942 942 candidate_chains = [deltachain(p) for p in parents]
943 943 for chain in candidate_chains:
944 944 for idx, s in enumerate(chain):
945 945 if not revlog.issnapshot(s):
946 946 break
947 947 parents_snaps[idx].add(s)
948 948 snapfloor = min(parents_snaps[0]) + 1
949 949 snapshot_cache.update(revlog, snapfloor)
950 950 # search for the highest "unrelated" revision
951 951 #
952 952 # Adding snapshots used by "unrelated" revision increase the odd we
953 953 # reuse an independant, yet better snapshot chain.
954 954 #
955 955 # XXX instead of building a set of revisions, we could lazily enumerate
956 956 # over the chains. That would be more efficient, however we stick to
957 957 # simple code for now.
958 958 all_revs = set()
959 959 for chain in candidate_chains:
960 960 all_revs.update(chain)
961 961 other = None
962 962 for r in revlog.revs(prev, snapfloor):
963 963 if r not in all_revs:
964 964 other = r
965 965 break
966 966 if other is not None:
967 967 # To avoid unfair competition, we won't use unrelated intermediate
968 968 # snapshot that are deeper than the ones from the parent delta
969 969 # chain.
970 970 max_depth = max(parents_snaps.keys())
971 971 chain = deltachain(other)
972 972 for depth, s in enumerate(chain):
973 973 if s < snapfloor:
974 974 continue
975 975 if max_depth < depth:
976 976 break
977 977 if not revlog.issnapshot(s):
978 978 break
979 979 parents_snaps[depth].add(s)
980 980 # Test them as possible intermediate snapshot base
981 981 # We test them from highest to lowest level. High level one are more
982 982 # likely to result in small delta
983 983 floor = None
984 984 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
985 985 siblings = set()
986 986 for s in snaps:
987 987 siblings.update(snapshot_cache.snapshots[s])
988 988 # Before considering making a new intermediate snapshot, we check
989 989 # if an existing snapshot, children of base we consider, would be
990 990 # suitable.
991 991 #
992 992 # It give a change to reuse a delta chain "unrelated" to the
993 993 # current revision instead of starting our own. Without such
994 994 # re-use, topological branches would keep reopening new chains.
995 995 # Creating more and more snapshot as the repository grow.
996 996
997 997 if floor is not None:
998 998 # We only do this for siblings created after the one in our
999 999 # parent's delta chain. Those created before has less chances
1000 1000 # to be valid base since our ancestors had to create a new
1001 1001 # snapshot.
1002 1002 siblings = [r for r in siblings if floor < r]
1003 1003 yield tuple(sorted(siblings))
1004 1004 # then test the base from our parent's delta chain.
1005 1005 yield tuple(sorted(snaps))
1006 1006 floor = min(snaps)
1007 1007 # No suitable base found in the parent chain, search if any full
1008 1008 # snapshots emitted since parent's base would be a suitable base for an
1009 1009 # intermediate snapshot.
1010 1010 #
1011 1011 # It give a chance to reuse a delta chain unrelated to the current
1012 1012 # revisions instead of starting our own. Without such re-use,
1013 1013 # topological branches would keep reopening new full chains. Creating
1014 1014 # more and more snapshot as the repository grow.
1015 1015 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1016 1016 yield tuple(sorted(full))
1017 1017
1018 1018 if not sparse:
1019 1019 # other approach failed try against prev to hopefully save us a
1020 1020 # fulltext.
1021 1021 yield (prev,)
1022 1022
1023 1023
1024 1024 class SnapshotCache:
1025 1025 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1026 1026
1027 1027 def __init__(self):
1028 1028 self.snapshots = collections.defaultdict(set)
1029 1029 self._start_rev = None
1030 1030 self._end_rev = None
1031 1031
1032 1032 def update(self, revlog, start_rev=0):
1033 1033 """find snapshots from start_rev to tip"""
1034 1034 nb_revs = len(revlog)
1035 1035 end_rev = nb_revs - 1
1036 1036 if start_rev > end_rev:
1037 1037 return # range is empty
1038 1038
1039 1039 if self._start_rev is None:
1040 1040 assert self._end_rev is None
1041 1041 self._update(revlog, start_rev, end_rev)
1042 1042 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1043 1043 if start_rev < self._start_rev:
1044 1044 self._update(revlog, start_rev, self._start_rev - 1)
1045 1045 if self._end_rev < end_rev:
1046 1046 self._update(revlog, self._end_rev + 1, end_rev)
1047 1047
1048 1048 if self._start_rev is None:
1049 1049 assert self._end_rev is None
1050 1050 self._end_rev = end_rev
1051 1051 self._start_rev = start_rev
1052 1052 else:
1053 1053 self._start_rev = min(self._start_rev, start_rev)
1054 1054 self._end_rev = max(self._end_rev, end_rev)
1055 1055 assert self._start_rev <= self._end_rev, (
1056 1056 self._start_rev,
1057 1057 self._end_rev,
1058 1058 )
1059 1059
1060 1060 def _update(self, revlog, start_rev, end_rev):
1061 1061 """internal method that actually do update content"""
1062 1062 assert self._start_rev is None or (
1063 1063 start_rev < self._start_rev or start_rev > self._end_rev
1064 1064 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 1065 assert self._start_rev is None or (
1066 1066 end_rev < self._start_rev or end_rev > self._end_rev
1067 1067 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1068 1068 cache = self.snapshots
1069 1069 if hasattr(revlog.index, 'findsnapshots'):
1070 1070 revlog.index.findsnapshots(cache, start_rev, end_rev)
1071 1071 else:
1072 1072 deltaparent = revlog.deltaparent
1073 1073 issnapshot = revlog.issnapshot
1074 1074 for rev in revlog.revs(start_rev, end_rev):
1075 1075 if issnapshot(rev):
1076 1076 cache[deltaparent(rev)].add(rev)
1077 1077
1078 1078
1079 1079 class deltacomputer:
1080 1080 def __init__(
1081 1081 self,
1082 1082 revlog,
1083 1083 write_debug=None,
1084 1084 debug_search=False,
1085 1085 debug_info=None,
1086 1086 ):
1087 1087 self.revlog = revlog
1088 1088 self._write_debug = write_debug
1089 1089 if write_debug is None:
1090 1090 self._debug_search = False
1091 1091 else:
1092 1092 self._debug_search = debug_search
1093 1093 self._debug_info = debug_info
1094 1094 self._snapshot_cache = SnapshotCache()
1095 1095
1096 1096 @property
1097 1097 def _gather_debug(self):
1098 1098 return self._write_debug is not None or self._debug_info is not None
1099 1099
1100 1100 def buildtext(self, revinfo):
1101 1101 """Builds a fulltext version of a revision
1102 1102
1103 1103 revinfo: revisioninfo instance that contains all needed info
1104 1104 """
1105 1105 btext = revinfo.btext
1106 1106 if btext[0] is not None:
1107 1107 return btext[0]
1108 1108
1109 1109 revlog = self.revlog
1110 1110 cachedelta = revinfo.cachedelta
1111 1111 baserev = cachedelta[0]
1112 1112 delta = cachedelta[1]
1113 1113
1114 1114 fulltext = btext[0] = _textfromdelta(
1115 1115 revlog,
1116 1116 baserev,
1117 1117 delta,
1118 1118 revinfo.p1,
1119 1119 revinfo.p2,
1120 1120 revinfo.flags,
1121 1121 revinfo.node,
1122 1122 )
1123 1123 return fulltext
1124 1124
1125 1125 def _builddeltadiff(self, base, revinfo):
1126 1126 revlog = self.revlog
1127 1127 t = self.buildtext(revinfo)
1128 1128 if revlog.iscensored(base):
1129 1129 # deltas based on a censored revision must replace the
1130 1130 # full content in one patch, so delta works everywhere
1131 1131 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1132 1132 delta = header + t
1133 1133 else:
1134 1134 ptext = revlog.rawdata(base)
1135 1135 delta = mdiff.textdiff(ptext, t)
1136 1136
1137 1137 return delta
1138 1138
1139 1139 def _builddeltainfo(self, revinfo, base, target_rev=None):
1140 1140 # can we use the cached delta?
1141 1141 revlog = self.revlog
1142 1142 chainbase = revlog.chainbase(base)
1143 if revlog._generaldelta:
1143 if revlog.delta_config.general_delta:
1144 1144 deltabase = base
1145 1145 else:
1146 1146 if target_rev is not None and base != target_rev - 1:
1147 1147 msg = (
1148 1148 b'general delta cannot use delta for something else '
1149 1149 b'than `prev`: %d<-%d'
1150 1150 )
1151 1151 msg %= (base, target_rev)
1152 1152 raise error.ProgrammingError(msg)
1153 1153 deltabase = chainbase
1154 1154 snapshotdepth = None
1155 1155 if revlog._sparserevlog and deltabase == nullrev:
1156 1156 snapshotdepth = 0
1157 1157 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1158 1158 # A delta chain should always be one full snapshot,
1159 1159 # zero or more semi-snapshots, and zero or more deltas
1160 1160 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1161 1161 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1162 1162 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1163 1163 delta = None
1164 1164 if revinfo.cachedelta:
1165 1165 cachebase = revinfo.cachedelta[0]
1166 1166 # check if the diff still apply
1167 1167 currentbase = cachebase
1168 1168 while (
1169 1169 currentbase != nullrev
1170 1170 and currentbase != base
1171 1171 and self.revlog.length(currentbase) == 0
1172 1172 ):
1173 1173 currentbase = self.revlog.deltaparent(currentbase)
1174 1174 if self.revlog._lazydelta and currentbase == base:
1175 1175 delta = revinfo.cachedelta[1]
1176 1176 if delta is None:
1177 1177 delta = self._builddeltadiff(base, revinfo)
1178 1178 if self._debug_search:
1179 1179 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1180 1180 msg %= len(delta)
1181 1181 self._write_debug(msg)
1182 1182 # snapshotdept need to be neither None nor 0 level snapshot
1183 1183 if revlog.upperboundcomp is not None and snapshotdepth:
1184 1184 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1185 1185 snapshotlimit = revinfo.textlen >> snapshotdepth
1186 1186 if self._debug_search:
1187 1187 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1188 1188 msg %= lowestrealisticdeltalen
1189 1189 self._write_debug(msg)
1190 1190 if snapshotlimit < lowestrealisticdeltalen:
1191 1191 if self._debug_search:
1192 1192 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1193 1193 self._write_debug(msg)
1194 1194 return None
1195 1195 if revlog.length(base) < lowestrealisticdeltalen:
1196 1196 if self._debug_search:
1197 1197 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1198 1198 self._write_debug(msg)
1199 1199 return None
1200 1200 header, data = revlog.compress(delta)
1201 1201 deltalen = len(header) + len(data)
1202 1202 offset = revlog.end(len(revlog) - 1)
1203 1203 dist = deltalen + offset - revlog.start(chainbase)
1204 1204 chainlen, compresseddeltalen = revlog._chaininfo(base)
1205 1205 chainlen += 1
1206 1206 compresseddeltalen += deltalen
1207 1207
1208 1208 return _deltainfo(
1209 1209 dist,
1210 1210 deltalen,
1211 1211 (header, data),
1212 1212 deltabase,
1213 1213 chainbase,
1214 1214 chainlen,
1215 1215 compresseddeltalen,
1216 1216 snapshotdepth,
1217 1217 )
1218 1218
1219 1219 def _fullsnapshotinfo(self, revinfo, curr):
1220 1220 rawtext = self.buildtext(revinfo)
1221 1221 data = self.revlog.compress(rawtext)
1222 1222 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1223 1223 deltabase = chainbase = curr
1224 1224 snapshotdepth = 0
1225 1225 chainlen = 1
1226 1226
1227 1227 return _deltainfo(
1228 1228 dist,
1229 1229 deltalen,
1230 1230 data,
1231 1231 deltabase,
1232 1232 chainbase,
1233 1233 chainlen,
1234 1234 compresseddeltalen,
1235 1235 snapshotdepth,
1236 1236 )
1237 1237
1238 1238 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1239 1239 """Find an acceptable delta against a candidate revision
1240 1240
1241 1241 revinfo: information about the revision (instance of _revisioninfo)
1242 1242
1243 1243 Returns the first acceptable candidate revision, as ordered by
1244 1244 _candidategroups
1245 1245
1246 1246 If no suitable deltabase is found, we return delta info for a full
1247 1247 snapshot.
1248 1248
1249 1249 `excluded_bases` is an optional set of revision that cannot be used as
1250 1250 a delta base. Use this to recompute delta suitable in censor or strip
1251 1251 context.
1252 1252 """
1253 1253 if target_rev is None:
1254 1254 target_rev = len(self.revlog)
1255 1255
1256 1256 gather_debug = self._gather_debug
1257 1257 cachedelta = revinfo.cachedelta
1258 1258 revlog = self.revlog
1259 1259 p1r = p2r = None
1260 1260
1261 1261 if excluded_bases is None:
1262 1262 excluded_bases = set()
1263 1263
1264 1264 if gather_debug:
1265 1265 start = util.timer()
1266 1266 dbg = self._one_dbg_data()
1267 1267 dbg['revision'] = target_rev
1268 1268 target_revlog = b"UNKNOWN"
1269 1269 target_type = self.revlog.target[0]
1270 1270 target_key = self.revlog.target[1]
1271 1271 if target_type == KIND_CHANGELOG:
1272 1272 target_revlog = b'CHANGELOG:'
1273 1273 elif target_type == KIND_MANIFESTLOG:
1274 1274 target_revlog = b'MANIFESTLOG:'
1275 1275 if target_key:
1276 1276 target_revlog += b'%s:' % target_key
1277 1277 elif target_type == KIND_FILELOG:
1278 1278 target_revlog = b'FILELOG:'
1279 1279 if target_key:
1280 1280 target_revlog += b'%s:' % target_key
1281 1281 dbg['target-revlog'] = target_revlog
1282 1282 p1r = revlog.rev(revinfo.p1)
1283 1283 p2r = revlog.rev(revinfo.p2)
1284 1284 if p1r != nullrev:
1285 1285 p1_chain_len = revlog._chaininfo(p1r)[0]
1286 1286 else:
1287 1287 p1_chain_len = -1
1288 1288 if p2r != nullrev:
1289 1289 p2_chain_len = revlog._chaininfo(p2r)[0]
1290 1290 else:
1291 1291 p2_chain_len = -1
1292 1292 dbg['p1-chain-len'] = p1_chain_len
1293 1293 dbg['p2-chain-len'] = p2_chain_len
1294 1294
1295 1295 # 1) if the revision is empty, no amount of delta can beat it
1296 1296 #
1297 1297 # 2) no delta for flag processor revision (see "candelta" for why)
1298 1298 # not calling candelta since only one revision needs test, also to
1299 1299 # avoid overhead fetching flags again.
1300 1300 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1301 1301 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1302 1302 if gather_debug:
1303 1303 end = util.timer()
1304 1304 dbg['duration'] = end - start
1305 1305 dbg[
1306 1306 'delta-base'
1307 1307 ] = deltainfo.base # pytype: disable=attribute-error
1308 1308 dbg['search_round_count'] = 0
1309 1309 dbg['using-cached-base'] = False
1310 1310 dbg['delta_try_count'] = 0
1311 1311 dbg['type'] = b"full"
1312 1312 dbg['snapshot-depth'] = 0
1313 1313 self._dbg_process_data(dbg)
1314 1314 return deltainfo
1315 1315
1316 1316 deltainfo = None
1317 1317
1318 1318 # If this source delta are to be forcibly reuse, let us comply early.
1319 1319 if (
1320 revlog._generaldelta
1320 revlog.delta_config.general_delta
1321 1321 and revinfo.cachedelta is not None
1322 1322 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1323 1323 ):
1324 1324 base = revinfo.cachedelta[0]
1325 1325 if base == nullrev:
1326 1326 dbg_type = b"full"
1327 1327 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1328 1328 if gather_debug:
1329 1329 snapshotdepth = 0
1330 1330 elif base not in excluded_bases:
1331 1331 delta = revinfo.cachedelta[1]
1332 1332 header, data = revlog.compress(delta)
1333 1333 deltalen = len(header) + len(data)
1334 1334 if gather_debug:
1335 1335 offset = revlog.end(len(revlog) - 1)
1336 1336 chainbase = revlog.chainbase(base)
1337 1337 distance = deltalen + offset - revlog.start(chainbase)
1338 1338 chainlen, compresseddeltalen = revlog._chaininfo(base)
1339 1339 chainlen += 1
1340 1340 compresseddeltalen += deltalen
1341 1341 if base == p1r or base == p2r:
1342 1342 dbg_type = b"delta"
1343 1343 snapshotdepth = None
1344 1344 elif not revlog.issnapshot(base):
1345 1345 snapshotdepth = None
1346 1346 else:
1347 1347 dbg_type = b"snapshot"
1348 1348 snapshotdepth = revlog.snapshotdepth(base) + 1
1349 1349 else:
1350 1350 distance = None
1351 1351 chainbase = None
1352 1352 chainlen = None
1353 1353 compresseddeltalen = None
1354 1354 snapshotdepth = None
1355 1355 deltainfo = _deltainfo(
1356 1356 distance=distance,
1357 1357 deltalen=deltalen,
1358 1358 data=(header, data),
1359 1359 base=base,
1360 1360 chainbase=chainbase,
1361 1361 chainlen=chainlen,
1362 1362 compresseddeltalen=compresseddeltalen,
1363 1363 snapshotdepth=snapshotdepth,
1364 1364 )
1365 1365
1366 1366 if deltainfo is not None:
1367 1367 if gather_debug:
1368 1368 end = util.timer()
1369 1369 dbg['duration'] = end - start
1370 1370 dbg[
1371 1371 'delta-base'
1372 1372 ] = deltainfo.base # pytype: disable=attribute-error
1373 1373 dbg['search_round_count'] = 0
1374 1374 dbg['using-cached-base'] = True
1375 1375 dbg['delta_try_count'] = 0
1376 1376 dbg['type'] = b"full"
1377 1377 if snapshotdepth is None:
1378 1378 dbg['snapshot-depth'] = 0
1379 1379 else:
1380 1380 dbg['snapshot-depth'] = snapshotdepth
1381 1381 self._dbg_process_data(dbg)
1382 1382 return deltainfo
1383 1383
1384 1384 # count the number of different delta we tried (for debug purpose)
1385 1385 dbg_try_count = 0
1386 1386 # count the number of "search round" we did. (for debug purpose)
1387 1387 dbg_try_rounds = 0
1388 1388 dbg_type = b'unknown'
1389 1389
1390 1390 if p1r is None:
1391 1391 p1r = revlog.rev(revinfo.p1)
1392 1392 p2r = revlog.rev(revinfo.p2)
1393 1393
1394 1394 if self._debug_search:
1395 1395 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1396 1396 msg %= target_rev
1397 1397 self._write_debug(msg)
1398 1398
1399 1399 groups = _candidategroups(
1400 1400 self.revlog,
1401 1401 revinfo.textlen,
1402 1402 p1r,
1403 1403 p2r,
1404 1404 cachedelta,
1405 1405 excluded_bases,
1406 1406 target_rev,
1407 1407 snapshot_cache=self._snapshot_cache,
1408 1408 )
1409 1409 candidaterevs = next(groups)
1410 1410 while candidaterevs is not None:
1411 1411 dbg_try_rounds += 1
1412 1412 if self._debug_search:
1413 1413 prev = None
1414 1414 if deltainfo is not None:
1415 1415 prev = deltainfo.base
1416 1416
1417 1417 if (
1418 1418 cachedelta is not None
1419 1419 and len(candidaterevs) == 1
1420 1420 and cachedelta[0] in candidaterevs
1421 1421 ):
1422 1422 round_type = b"cached-delta"
1423 1423 elif p1r in candidaterevs or p2r in candidaterevs:
1424 1424 round_type = b"parents"
1425 1425 elif prev is not None and all(c < prev for c in candidaterevs):
1426 1426 round_type = b"refine-down"
1427 1427 elif prev is not None and all(c > prev for c in candidaterevs):
1428 1428 round_type = b"refine-up"
1429 1429 else:
1430 1430 round_type = b"search-down"
1431 1431 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1432 1432 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1433 1433 self._write_debug(msg)
1434 1434 nominateddeltas = []
1435 1435 if deltainfo is not None:
1436 1436 if self._debug_search:
1437 1437 msg = (
1438 1438 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1439 1439 )
1440 1440 msg %= (deltainfo.base, deltainfo.deltalen)
1441 1441 self._write_debug(msg)
1442 1442 # if we already found a good delta,
1443 1443 # challenge it against refined candidates
1444 1444 nominateddeltas.append(deltainfo)
1445 1445 for candidaterev in candidaterevs:
1446 1446 if self._debug_search:
1447 1447 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1448 1448 msg %= candidaterev
1449 1449 self._write_debug(msg)
1450 1450 candidate_type = None
1451 1451 if candidaterev == p1r:
1452 1452 candidate_type = b"p1"
1453 1453 elif candidaterev == p2r:
1454 1454 candidate_type = b"p2"
1455 1455 elif self.revlog.issnapshot(candidaterev):
1456 1456 candidate_type = b"snapshot-%d"
1457 1457 candidate_type %= self.revlog.snapshotdepth(
1458 1458 candidaterev
1459 1459 )
1460 1460
1461 1461 if candidate_type is not None:
1462 1462 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1463 1463 msg %= candidate_type
1464 1464 self._write_debug(msg)
1465 1465 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1466 1466 msg %= self.revlog.length(candidaterev)
1467 1467 self._write_debug(msg)
1468 1468 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1469 1469 msg %= self.revlog.deltaparent(candidaterev)
1470 1470 self._write_debug(msg)
1471 1471
1472 1472 dbg_try_count += 1
1473 1473
1474 1474 if self._debug_search:
1475 1475 delta_start = util.timer()
1476 1476 candidatedelta = self._builddeltainfo(
1477 1477 revinfo,
1478 1478 candidaterev,
1479 1479 target_rev=target_rev,
1480 1480 )
1481 1481 if self._debug_search:
1482 1482 delta_end = util.timer()
1483 1483 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1484 1484 msg %= delta_end - delta_start
1485 1485 self._write_debug(msg)
1486 1486 if candidatedelta is not None:
1487 1487 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1488 1488 if self._debug_search:
1489 1489 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1490 1490 msg %= candidatedelta.deltalen
1491 1491 self._write_debug(msg)
1492 1492 nominateddeltas.append(candidatedelta)
1493 1493 elif self._debug_search:
1494 1494 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1495 1495 msg %= candidatedelta.deltalen
1496 1496 self._write_debug(msg)
1497 1497 elif self._debug_search:
1498 1498 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1499 1499 self._write_debug(msg)
1500 1500 if nominateddeltas:
1501 1501 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1502 1502 if deltainfo is not None:
1503 1503 candidaterevs = groups.send(deltainfo.base)
1504 1504 else:
1505 1505 candidaterevs = next(groups)
1506 1506
1507 1507 if deltainfo is None:
1508 1508 dbg_type = b"full"
1509 1509 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1510 1510 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1511 1511 dbg_type = b"snapshot"
1512 1512 else:
1513 1513 dbg_type = b"delta"
1514 1514
1515 1515 if gather_debug:
1516 1516 end = util.timer()
1517 1517 if dbg_type == b'full':
1518 1518 used_cached = (
1519 1519 cachedelta is not None
1520 1520 and dbg_try_rounds == 0
1521 1521 and dbg_try_count == 0
1522 1522 and cachedelta[0] == nullrev
1523 1523 )
1524 1524 else:
1525 1525 used_cached = (
1526 1526 cachedelta is not None
1527 1527 and dbg_try_rounds == 1
1528 1528 and dbg_try_count == 1
1529 1529 and deltainfo.base == cachedelta[0]
1530 1530 )
1531 1531 dbg['duration'] = end - start
1532 1532 dbg[
1533 1533 'delta-base'
1534 1534 ] = deltainfo.base # pytype: disable=attribute-error
1535 1535 dbg['search_round_count'] = dbg_try_rounds
1536 1536 dbg['using-cached-base'] = used_cached
1537 1537 dbg['delta_try_count'] = dbg_try_count
1538 1538 dbg['type'] = dbg_type
1539 1539 if (
1540 1540 deltainfo.snapshotdepth # pytype: disable=attribute-error
1541 1541 is not None
1542 1542 ):
1543 1543 dbg[
1544 1544 'snapshot-depth'
1545 1545 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 1546 else:
1547 1547 dbg['snapshot-depth'] = 0
1548 1548 self._dbg_process_data(dbg)
1549 1549 return deltainfo
1550 1550
1551 1551 def _one_dbg_data(self):
1552 1552 return {
1553 1553 'duration': None,
1554 1554 'revision': None,
1555 1555 'delta-base': None,
1556 1556 'search_round_count': None,
1557 1557 'using-cached-base': None,
1558 1558 'delta_try_count': None,
1559 1559 'type': None,
1560 1560 'p1-chain-len': None,
1561 1561 'p2-chain-len': None,
1562 1562 'snapshot-depth': None,
1563 1563 'target-revlog': None,
1564 1564 }
1565 1565
1566 1566 def _dbg_process_data(self, dbg):
1567 1567 if self._debug_info is not None:
1568 1568 self._debug_info.append(dbg)
1569 1569
1570 1570 if self._write_debug is not None:
1571 1571 msg = (
1572 1572 b"DBG-DELTAS:"
1573 1573 b" %-12s"
1574 1574 b" rev=%d:"
1575 1575 b" delta-base=%d"
1576 1576 b" is-cached=%d"
1577 1577 b" - search-rounds=%d"
1578 1578 b" try-count=%d"
1579 1579 b" - delta-type=%-6s"
1580 1580 b" snap-depth=%d"
1581 1581 b" - p1-chain-length=%d"
1582 1582 b" p2-chain-length=%d"
1583 1583 b" - duration=%f"
1584 1584 b"\n"
1585 1585 )
1586 1586 msg %= (
1587 1587 dbg["target-revlog"],
1588 1588 dbg["revision"],
1589 1589 dbg["delta-base"],
1590 1590 dbg["using-cached-base"],
1591 1591 dbg["search_round_count"],
1592 1592 dbg["delta_try_count"],
1593 1593 dbg["type"],
1594 1594 dbg["snapshot-depth"],
1595 1595 dbg["p1-chain-len"],
1596 1596 dbg["p2-chain-len"],
1597 1597 dbg["duration"],
1598 1598 )
1599 1599 self._write_debug(msg)
1600 1600
1601 1601
1602 1602 def delta_compression(default_compression_header, deltainfo):
1603 1603 """return (COMPRESSION_MODE, deltainfo)
1604 1604
1605 1605 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1606 1606 compression.
1607 1607 """
1608 1608 h, d = deltainfo.data
1609 1609 compression_mode = COMP_MODE_INLINE
1610 1610 if not h and not d:
1611 1611 # not data to store at all... declare them uncompressed
1612 1612 compression_mode = COMP_MODE_PLAIN
1613 1613 elif not h:
1614 1614 t = d[0:1]
1615 1615 if t == b'\0':
1616 1616 compression_mode = COMP_MODE_PLAIN
1617 1617 elif t == default_compression_header:
1618 1618 compression_mode = COMP_MODE_DEFAULT
1619 1619 elif h == b'u':
1620 1620 # we have a more efficient way to declare uncompressed
1621 1621 h = b''
1622 1622 compression_mode = COMP_MODE_PLAIN
1623 1623 deltainfo = drop_u_compression(deltainfo)
1624 1624 return compression_mode, deltainfo
@@ -1,875 +1,875 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 newrl.delta_config.general_delta = rl._generaldelta
78 newrl.delta_config.general_delta = rl.delta_config.general_delta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 rl._loadindex()
130 130
131 131
132 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 133 """censors a revision in a "version 2" revlog"""
134 134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 136
137 137 censor_revs = {revlog.rev(censornode)}
138 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 139
140 140
141 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 142 """rewrite a revlog to censor some of its content
143 143
144 144 General principle
145 145
146 146 We create new revlog files (index/data/sidedata) to copy the content of
147 147 the existing data without the censored data.
148 148
149 149 We need to recompute new delta for any revision that used the censored
150 150 revision as delta base. As the cumulative size of the new delta may be
151 151 large, we store them in a temporary file until they are stored in their
152 152 final destination.
153 153
154 154 All data before the censored data can be blindly copied. The rest needs
155 155 to be copied as we go and the associated index entry needs adjustement.
156 156 """
157 157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 159
160 160 old_index = revlog.index
161 161 docket = revlog._docket
162 162
163 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 164
165 165 first_excl_rev = min(censor_revs)
166 166
167 167 first_excl_entry = revlog.index[first_excl_rev]
168 168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 171
172 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 173 # rev → (new_base, data_start, data_end, compression_mode)
174 174 rewritten_entries = _precompute_rewritten_delta(
175 175 revlog,
176 176 old_index,
177 177 censor_revs,
178 178 tmp_storage,
179 179 )
180 180
181 181 all_files = _setup_new_files(
182 182 revlog,
183 183 index_cutoff,
184 184 data_cutoff,
185 185 sidedata_cutoff,
186 186 )
187 187
188 188 # we dont need to open the old index file since its content already
189 189 # exist in a usable form in `old_index`.
190 190 with all_files() as open_files:
191 191 (
192 192 old_data_file,
193 193 old_sidedata_file,
194 194 new_index_file,
195 195 new_data_file,
196 196 new_sidedata_file,
197 197 ) = open_files
198 198
199 199 # writing the censored revision
200 200
201 201 # Writing all subsequent revisions
202 202 for rev in range(first_excl_rev, len(old_index)):
203 203 if rev in censor_revs:
204 204 _rewrite_censor(
205 205 revlog,
206 206 old_index,
207 207 open_files,
208 208 rev,
209 209 tombstone,
210 210 )
211 211 else:
212 212 _rewrite_simple(
213 213 revlog,
214 214 old_index,
215 215 open_files,
216 216 rev,
217 217 rewritten_entries,
218 218 tmp_storage,
219 219 )
220 220 docket.write(transaction=None, stripping=True)
221 221
222 222
223 223 def _precompute_rewritten_delta(
224 224 revlog,
225 225 old_index,
226 226 excluded_revs,
227 227 tmp_storage,
228 228 ):
229 229 """Compute new delta for revisions whose delta is based on revision that
230 230 will not survive as is.
231 231
232 232 Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
233 233 """
234 234 dc = deltas.deltacomputer(revlog)
235 235 rewritten_entries = {}
236 236 first_excl_rev = min(excluded_revs)
237 237 with revlog.reading():
238 238 for rev in range(first_excl_rev, len(old_index)):
239 239 if rev in excluded_revs:
240 240 # this revision will be preserved as is, so we don't need to
241 241 # consider recomputing a delta.
242 242 continue
243 243 entry = old_index[rev]
244 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 245 continue
246 246 # This is a revision that use the censored revision as the base
247 247 # for its delta. We need a need new deltas
248 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 249 # this revision is empty, we can delta against nullrev
250 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 251 else:
252 252
253 253 text = revlog.rawdata(rev)
254 254 info = revlogutils.revisioninfo(
255 255 node=entry[ENTRY_NODE_ID],
256 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 258 btext=[text],
259 259 textlen=len(text),
260 260 cachedelta=None,
261 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 262 )
263 263 d = dc.finddeltainfo(
264 264 info, excluded_bases=excluded_revs, target_rev=rev
265 265 )
266 266 default_comp = revlog._docket.default_compression_header
267 267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 268 # using `tell` is a bit lazy, but we are not here for speed
269 269 start = tmp_storage.tell()
270 270 tmp_storage.write(d.data[1])
271 271 end = tmp_storage.tell()
272 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 273 return rewritten_entries
274 274
275 275
276 276 def _setup_new_files(
277 277 revlog,
278 278 index_cutoff,
279 279 data_cutoff,
280 280 sidedata_cutoff,
281 281 ):
282 282 """
283 283
284 284 return a context manager to open all the relevant files:
285 285 - old_data_file,
286 286 - old_sidedata_file,
287 287 - new_index_file,
288 288 - new_data_file,
289 289 - new_sidedata_file,
290 290
291 291 The old_index_file is not here because it is accessed through the
292 292 `old_index` object if the caller function.
293 293 """
294 294 docket = revlog._docket
295 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 298
299 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 302
303 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 305 util.copyfile(
306 306 old_sidedata_filepath,
307 307 new_sidedata_filepath,
308 308 nb_bytes=sidedata_cutoff,
309 309 )
310 310 revlog.opener.register_file(docket.index_filepath())
311 311 revlog.opener.register_file(docket.data_filepath())
312 312 revlog.opener.register_file(docket.sidedata_filepath())
313 313
314 314 docket.index_end = index_cutoff
315 315 docket.data_end = data_cutoff
316 316 docket.sidedata_end = sidedata_cutoff
317 317
318 318 # reload the revlog internal information
319 319 revlog.clearcaches()
320 320 revlog._loadindex(docket=docket)
321 321
322 322 @contextlib.contextmanager
323 323 def all_files_opener():
324 324 # hide opening in an helper function to please check-code, black
325 325 # and various python version at the same time
326 326 with open(old_data_filepath, 'rb') as old_data_file:
327 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 328 with open(new_index_filepath, 'r+b') as new_index_file:
329 329 with open(new_data_filepath, 'r+b') as new_data_file:
330 330 with open(
331 331 new_sidedata_filepath, 'r+b'
332 332 ) as new_sidedata_file:
333 333 new_index_file.seek(0, os.SEEK_END)
334 334 assert new_index_file.tell() == index_cutoff
335 335 new_data_file.seek(0, os.SEEK_END)
336 336 assert new_data_file.tell() == data_cutoff
337 337 new_sidedata_file.seek(0, os.SEEK_END)
338 338 assert new_sidedata_file.tell() == sidedata_cutoff
339 339 yield (
340 340 old_data_file,
341 341 old_sidedata_file,
342 342 new_index_file,
343 343 new_data_file,
344 344 new_sidedata_file,
345 345 )
346 346
347 347 return all_files_opener
348 348
349 349
350 350 def _rewrite_simple(
351 351 revlog,
352 352 old_index,
353 353 all_files,
354 354 rev,
355 355 rewritten_entries,
356 356 tmp_storage,
357 357 ):
358 358 """append a normal revision to the index after the rewritten one(s)"""
359 359 (
360 360 old_data_file,
361 361 old_sidedata_file,
362 362 new_index_file,
363 363 new_data_file,
364 364 new_sidedata_file,
365 365 ) = all_files
366 366 entry = old_index[rev]
367 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 369
370 370 if rev not in rewritten_entries:
371 371 old_data_file.seek(old_data_offset)
372 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 373 new_data = old_data_file.read(new_data_size)
374 374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 376 else:
377 377 (
378 378 data_delta_base,
379 379 start,
380 380 end,
381 381 d_comp_mode,
382 382 ) = rewritten_entries[rev]
383 383 new_data_size = end - start
384 384 tmp_storage.seek(start)
385 385 new_data = tmp_storage.read(new_data_size)
386 386
387 387 # It might be faster to group continuous read/write operation,
388 388 # however, this is censor, an operation that is not focussed
389 389 # around stellar performance. So I have not written this
390 390 # optimisation yet.
391 391 new_data_offset = new_data_file.tell()
392 392 new_data_file.write(new_data)
393 393
394 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 395 new_sidedata_offset = new_sidedata_file.tell()
396 396 if 0 < sidedata_size:
397 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 398 old_sidedata_file.seek(old_sidedata_offset)
399 399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 400 new_sidedata_file.write(new_sidedata)
401 401
402 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 404 assert data_delta_base <= rev, (data_delta_base, rev)
405 405
406 406 new_entry = revlogutils.entry(
407 407 flags=flags,
408 408 data_offset=new_data_offset,
409 409 data_compressed_length=new_data_size,
410 410 data_uncompressed_length=data_uncompressed_length,
411 411 data_delta_base=data_delta_base,
412 412 link_rev=entry[ENTRY_LINK_REV],
413 413 parent_rev_1=entry[ENTRY_PARENT_1],
414 414 parent_rev_2=entry[ENTRY_PARENT_2],
415 415 node_id=entry[ENTRY_NODE_ID],
416 416 sidedata_offset=new_sidedata_offset,
417 417 sidedata_compressed_length=sidedata_size,
418 418 data_compression_mode=d_comp_mode,
419 419 sidedata_compression_mode=sd_com_mode,
420 420 )
421 421 revlog.index.append(new_entry)
422 422 entry_bin = revlog.index.entry_binary(rev)
423 423 new_index_file.write(entry_bin)
424 424
425 425 revlog._docket.index_end = new_index_file.tell()
426 426 revlog._docket.data_end = new_data_file.tell()
427 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 428
429 429
430 430 def _rewrite_censor(
431 431 revlog,
432 432 old_index,
433 433 all_files,
434 434 rev,
435 435 tombstone,
436 436 ):
437 437 """rewrite and append a censored revision"""
438 438 (
439 439 old_data_file,
440 440 old_sidedata_file,
441 441 new_index_file,
442 442 new_data_file,
443 443 new_sidedata_file,
444 444 ) = all_files
445 445 entry = old_index[rev]
446 446
447 447 # XXX consider trying the default compression too
448 448 new_data_size = len(tombstone)
449 449 new_data_offset = new_data_file.tell()
450 450 new_data_file.write(tombstone)
451 451
452 452 # we are not adding any sidedata as they might leak info about the censored version
453 453
454 454 link_rev = entry[ENTRY_LINK_REV]
455 455
456 456 p1 = entry[ENTRY_PARENT_1]
457 457 p2 = entry[ENTRY_PARENT_2]
458 458
459 459 new_entry = revlogutils.entry(
460 460 flags=constants.REVIDX_ISCENSORED,
461 461 data_offset=new_data_offset,
462 462 data_compressed_length=new_data_size,
463 463 data_uncompressed_length=new_data_size,
464 464 data_delta_base=rev,
465 465 link_rev=link_rev,
466 466 parent_rev_1=p1,
467 467 parent_rev_2=p2,
468 468 node_id=entry[ENTRY_NODE_ID],
469 469 sidedata_offset=0,
470 470 sidedata_compressed_length=0,
471 471 data_compression_mode=COMP_MODE_PLAIN,
472 472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 473 )
474 474 revlog.index.append(new_entry)
475 475 entry_bin = revlog.index.entry_binary(rev)
476 476 new_index_file.write(entry_bin)
477 477 revlog._docket.index_end = new_index_file.tell()
478 478 revlog._docket.data_end = new_data_file.tell()
479 479
480 480
481 481 def _get_filename_from_filelog_index(path):
482 482 # Drop the extension and the `data/` prefix
483 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 484 if len(path_part) < 2:
485 485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 486 msg %= path
487 487 raise error.Abort(msg)
488 488
489 489 return path_part[1]
490 490
491 491
492 492 def _filelog_from_filename(repo, path):
493 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 494
495 495 from .. import filelog # avoid cycle
496 496
497 497 fl = filelog.filelog(repo.svfs, path)
498 498 return fl
499 499
500 500
501 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 503 from ..pure import parsers # avoid cycle
504 504
505 505 if repo._currentlock(repo._lockref) is None:
506 506 # Let's be paranoid about it
507 507 msg = "repo needs to be locked to rewrite parents"
508 508 raise error.ProgrammingError(msg)
509 509
510 510 index_format = parsers.IndexObject.index_format
511 511 entry = rl.index[rev]
512 512 new_entry = list(entry)
513 513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 514 packed = index_format.pack(*new_entry[:8])
515 515 fp.seek(offset)
516 516 fp.write(packed)
517 517
518 518
519 519 def _reorder_filelog_parents(repo, fl, to_fix):
520 520 """
521 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 522 new version to disk, overwriting the old one with a rename.
523 523 """
524 524 from ..pure import parsers # avoid cycle
525 525
526 526 ui = repo.ui
527 527 assert len(to_fix) > 0
528 528 rl = fl._revlog
529 529 if rl._format_version != constants.REVLOGV1:
530 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 531 raise error.ProgrammingError(msg)
532 532
533 533 index_file = rl._indexfile
534 534 new_file_path = index_file + b'.tmp-parents-fix'
535 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 536
537 537 with ui.uninterruptible():
538 538 try:
539 539 util.copyfile(
540 540 rl.opener.join(index_file),
541 541 rl.opener.join(new_file_path),
542 542 checkambig=rl._checkambig,
543 543 )
544 544
545 545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 546 if rl._inline:
547 547 index = parsers.InlinedIndexObject(fp.read())
548 548 for rev in fl.revs():
549 549 if rev in to_fix:
550 550 offset = index._calculate_index(rev)
551 551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 552 ui.write(repaired_msg % (rev, index_file))
553 553 else:
554 554 index_format = parsers.IndexObject.index_format
555 555 for rev in to_fix:
556 556 offset = rev * index_format.size
557 557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 558 ui.write(repaired_msg % (rev, index_file))
559 559
560 560 rl.opener.rename(new_file_path, index_file)
561 561 rl.clearcaches()
562 562 rl._loadindex()
563 563 finally:
564 564 util.tryunlink(new_file_path)
565 565
566 566
567 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 568 full_text = lambda: fl._revlog.rawdata(filerev)
569 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 570 return _is_revision_affected_inner(
571 571 full_text, parent_revs, filerev, metadata_cache
572 572 )
573 573
574 574
575 575 def _is_revision_affected_inner(
576 576 full_text,
577 577 parents_revs,
578 578 filerev,
579 579 metadata_cache=None,
580 580 ):
581 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 582 special meaning compared to the reverse in the context of filelog-based
583 583 copytracing. issue6528 exists because new code assumed that parent ordering
584 584 didn't matter, so this detects if the revision contains metadata (since
585 585 it's only used for filelog-based copytracing) and its parents are in the
586 586 "wrong" order."""
587 587 try:
588 588 raw_text = full_text()
589 589 except error.CensoredNodeError:
590 590 # We don't care about censored nodes as they never carry metadata
591 591 return False
592 592
593 593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 595 if metadata_cache is not None:
596 596 metadata_cache[filerev] = has_meta
597 597 if has_meta:
598 598 (p1, p2) = parents_revs()
599 599 if p1 != nullrev and p2 == nullrev:
600 600 return True
601 601 return False
602 602
603 603
604 604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 605 rl = fl._revlog
606 606 is_censored = lambda: rl.iscensored(filerev)
607 607 delta_base = lambda: rl.deltaparent(filerev)
608 608 delta = lambda: rl._chunk(filerev)
609 609 full_text = lambda: rl.rawdata(filerev)
610 610 parent_revs = lambda: rl.parentrevs(filerev)
611 611 return _is_revision_affected_fast_inner(
612 612 is_censored,
613 613 delta_base,
614 614 delta,
615 615 full_text,
616 616 parent_revs,
617 617 filerev,
618 618 metadata_cache,
619 619 )
620 620
621 621
622 622 def _is_revision_affected_fast_inner(
623 623 is_censored,
624 624 delta_base,
625 625 delta,
626 626 full_text,
627 627 parent_revs,
628 628 filerev,
629 629 metadata_cache,
630 630 ):
631 631 """Optimization fast-path for `_is_revision_affected`.
632 632
633 633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 634 revision to check if its base has metadata, saving computation of the full
635 635 text, instead looking at the current delta.
636 636
637 637 This optimization only works if the revisions are looked at in order."""
638 638
639 639 if is_censored():
640 640 # Censored revisions don't contain metadata, so they cannot be affected
641 641 metadata_cache[filerev] = False
642 642 return False
643 643
644 644 p1, p2 = parent_revs()
645 645 if p1 == nullrev or p2 != nullrev:
646 646 return False
647 647
648 648 delta_parent = delta_base()
649 649 parent_has_metadata = metadata_cache.get(delta_parent)
650 650 if parent_has_metadata is None:
651 651 return _is_revision_affected_inner(
652 652 full_text,
653 653 parent_revs,
654 654 filerev,
655 655 metadata_cache,
656 656 )
657 657
658 658 chunk = delta()
659 659 if not len(chunk):
660 660 # No diff for this revision
661 661 return parent_has_metadata
662 662
663 663 header_length = 12
664 664 if len(chunk) < header_length:
665 665 raise error.Abort(_(b"patch cannot be decoded"))
666 666
667 667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668 668
669 669 if start < 2: # len(b'\x01\n') == 2
670 670 # This delta does *something* to the metadata marker (if any).
671 671 # Check it the slow way
672 672 is_affected = _is_revision_affected_inner(
673 673 full_text,
674 674 parent_revs,
675 675 filerev,
676 676 metadata_cache,
677 677 )
678 678 return is_affected
679 679
680 680 # The diff did not remove or add the metadata header, it's then in the same
681 681 # situation as its parent
682 682 metadata_cache[filerev] = parent_has_metadata
683 683 return parent_has_metadata
684 684
685 685
686 686 def _from_report(ui, repo, context, from_report, dry_run):
687 687 """
688 688 Fix the revisions given in the `from_report` file, but still checks if the
689 689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 690 where we'd swap well-ordered parents again.
691 691
692 692 See the doc for `debug_fix_issue6528` for the format documentation.
693 693 """
694 694 ui.write(_(b"loading report file '%s'\n") % from_report)
695 695
696 696 with context(), open(from_report, mode='rb') as f:
697 697 for line in f.read().split(b'\n'):
698 698 if not line:
699 699 continue
700 700 filenodes, filename = line.split(b' ', 1)
701 701 fl = _filelog_from_filename(repo, filename)
702 702 to_fix = set(
703 703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 704 )
705 705 excluded = set()
706 706
707 707 for filerev in to_fix:
708 708 if _is_revision_affected(fl, filerev):
709 709 msg = b"found affected revision %d for filelog '%s'\n"
710 710 ui.warn(msg % (filerev, filename))
711 711 else:
712 712 msg = _(b"revision %s of file '%s' is not affected\n")
713 713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 714 ui.warn(msg)
715 715 excluded.add(filerev)
716 716
717 717 to_fix = to_fix - excluded
718 718 if not to_fix:
719 719 msg = _(b"no affected revisions were found for '%s'\n")
720 720 ui.write(msg % filename)
721 721 continue
722 722 if not dry_run:
723 723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724 724
725 725
726 726 def filter_delta_issue6528(revlog, deltas_iter):
727 727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 728 metadata_cache = {}
729 729
730 730 deltacomputer = deltas.deltacomputer(revlog)
731 731
732 732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 733 (
734 734 node,
735 735 p1_node,
736 736 p2_node,
737 737 linknode,
738 738 deltabase,
739 739 delta,
740 740 flags,
741 741 sidedata,
742 742 ) = d
743 743
744 744 if not revlog.index.has_node(deltabase):
745 745 raise error.LookupError(
746 746 deltabase, revlog.radix, _(b'unknown parent')
747 747 )
748 748 base_rev = revlog.rev(deltabase)
749 749 if not revlog.index.has_node(p1_node):
750 750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 751 p1_rev = revlog.rev(p1_node)
752 752 if not revlog.index.has_node(p2_node):
753 753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 754 p2_rev = revlog.rev(p2_node)
755 755
756 756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 757 delta_base = lambda: revlog.rev(delta_base)
758 758 delta_base = lambda: base_rev
759 759 parent_revs = lambda: (p1_rev, p2_rev)
760 760
761 761 def full_text():
762 762 # note: being able to reuse the full text computation in the
763 763 # underlying addrevision would be useful however this is a bit too
764 764 # intrusive the for the "quick" issue6528 we are writing before the
765 765 # 5.8 release
766 766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767 767
768 768 revinfo = revlogutils.revisioninfo(
769 769 node,
770 770 p1_node,
771 771 p2_node,
772 772 [None],
773 773 textlen,
774 774 (base_rev, delta),
775 775 flags,
776 776 )
777 777 return deltacomputer.buildtext(revinfo)
778 778
779 779 is_affected = _is_revision_affected_fast_inner(
780 780 is_censored,
781 781 delta_base,
782 782 lambda: delta,
783 783 full_text,
784 784 parent_revs,
785 785 rev,
786 786 metadata_cache,
787 787 )
788 788 if is_affected:
789 789 d = (
790 790 node,
791 791 p2_node,
792 792 p1_node,
793 793 linknode,
794 794 deltabase,
795 795 delta,
796 796 flags,
797 797 sidedata,
798 798 )
799 799 yield d
800 800
801 801
802 802 def repair_issue6528(
803 803 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
804 804 ):
805 805 @contextlib.contextmanager
806 806 def context():
807 807 if dry_run or to_report: # No need for locking
808 808 yield
809 809 else:
810 810 with repo.wlock(), repo.lock():
811 811 yield
812 812
813 813 if from_report:
814 814 return _from_report(ui, repo, context, from_report, dry_run)
815 815
816 816 report_entries = []
817 817
818 818 with context():
819 819 files = list(
820 820 entry
821 821 for entry in repo.store.data_entries()
822 822 if entry.is_revlog and entry.is_filelog
823 823 )
824 824
825 825 progress = ui.makeprogress(
826 826 _(b"looking for affected revisions"),
827 827 unit=_(b"filelogs"),
828 828 total=len(files),
829 829 )
830 830 found_nothing = True
831 831
832 832 for entry in files:
833 833 progress.increment()
834 834 filename = entry.target_id
835 835 fl = _filelog_from_filename(repo, entry.target_id)
836 836
837 837 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
838 838 to_fix = set()
839 839 metadata_cache = {}
840 840 for filerev in fl.revs():
841 841 affected = _is_revision_affected_fast(
842 842 repo, fl, filerev, metadata_cache
843 843 )
844 844 if paranoid:
845 845 slow = _is_revision_affected(fl, filerev)
846 846 if slow != affected:
847 847 msg = _(b"paranoid check failed for '%s' at node %s")
848 848 node = binascii.hexlify(fl.node(filerev))
849 849 raise error.Abort(msg % (filename, node))
850 850 if affected:
851 851 msg = b"found affected revision %d for file '%s'\n"
852 852 ui.warn(msg % (filerev, filename))
853 853 found_nothing = False
854 854 if not dry_run:
855 855 if to_report:
856 856 to_fix.add(binascii.hexlify(fl.node(filerev)))
857 857 else:
858 858 to_fix.add(filerev)
859 859
860 860 if to_fix:
861 861 to_fix = sorted(to_fix)
862 862 if to_report:
863 863 report_entries.append((filename, to_fix))
864 864 else:
865 865 _reorder_filelog_parents(repo, fl, to_fix)
866 866
867 867 if found_nothing:
868 868 ui.write(_(b"no affected revisions were found\n"))
869 869
870 870 if to_report and report_entries:
871 871 with open(to_report, mode="wb") as f:
872 872 for path, to_fix in report_entries:
873 873 f.write(b"%s %s\n" % (b",".join(to_fix), path))
874 874
875 875 progress.complete()
@@ -1,41 +1,41 b''
1 1 #!/bin/bash
2 2 #
3 3 # Make sure to patch mercurial to create the delta against nullrev
4 4 #
5 5 # # Parent cdb85d0512b81031d4a7b30d6a5ddbe69ef1a876
6 6 #
7 7 # diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
8 8 # --- a/mercurial/revlogutils/deltas.py
9 9 # +++ b/mercurial/revlogutils/deltas.py
10 10 # @@ -1117,7 +1117,10 @@ class deltacomputer:
11 11 # candidaterevs = next(groups)
12 12 #
13 13 # if deltainfo is None:
14 14 # - deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
15 # + if revlog._generaldelta:
15 # + if revlog.delta_config.general_delta:
16 16 # + deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
17 17 # + else:
18 18 # + deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
19 19
20 20
21 21 cd "`dirname \"$0\"`"
22 22 export HGRCPATH=
23 23 export HGMODULEPOLICY=py
24 24
25 25 rm -rf nullrev-diff
26 26 ../../hg init nullrev-diff --config format.revlog-compression=zlib
27 27 cd nullrev-diff
28 28 echo hi > a
29 29 ../../../hg commit -Am root-B
30 30 echo ho > a
31 31 ../../../hg commit -Am child-A
32 32 hg up null
33 33 echo ha > a
34 34 ../../../hg commit -Am root-A
35 35 ../../../hg debugdeltachain a
36 36 rm -rf .hg/cache/ .hg/wcache/
37 37 cd ..
38 38
39 39 tar cf test-revlog-diff-relative-to-nullrev.tar nullrev-diff
40 40
41 41 rm -rf nullrev-diff
General Comments 0
You need to be logged in to leave comments. Login now