##// END OF EJS Templates
revlog: split functionality related to deltas computation in a new module...
Boris Feld -
r39366:655b5b46 default
parent child Browse files
Show More
@@ -1,3361 +1,3365 b''
1 1 # debugcommands.py - command processing for debug* commands
2 2 #
3 3 # Copyright 2005-2016 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import codecs
11 11 import collections
12 12 import difflib
13 13 import errno
14 14 import operator
15 15 import os
16 16 import random
17 17 import re
18 18 import socket
19 19 import ssl
20 20 import stat
21 21 import string
22 22 import subprocess
23 23 import sys
24 24 import time
25 25
26 26 from .i18n import _
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullhex,
31 31 nullid,
32 32 nullrev,
33 33 short,
34 34 )
35 35 from .thirdparty import (
36 36 cbor,
37 37 )
38 38 from . import (
39 39 bundle2,
40 40 changegroup,
41 41 cmdutil,
42 42 color,
43 43 context,
44 44 dagparser,
45 45 encoding,
46 46 error,
47 47 exchange,
48 48 extensions,
49 49 filemerge,
50 50 filesetlang,
51 51 formatter,
52 52 hg,
53 53 httppeer,
54 54 localrepo,
55 55 lock as lockmod,
56 56 logcmdutil,
57 57 merge as mergemod,
58 58 obsolete,
59 59 obsutil,
60 60 phases,
61 61 policy,
62 62 pvec,
63 63 pycompat,
64 64 registrar,
65 65 repair,
66 66 revlog,
67 67 revset,
68 68 revsetlang,
69 69 scmutil,
70 70 setdiscovery,
71 71 simplemerge,
72 72 sshpeer,
73 73 sslutil,
74 74 streamclone,
75 75 templater,
76 76 treediscovery,
77 77 upgrade,
78 78 url as urlmod,
79 79 util,
80 80 vfs as vfsmod,
81 81 wireprotoframing,
82 82 wireprotoserver,
83 83 wireprotov2peer,
84 84 )
85 85 from .utils import (
86 86 dateutil,
87 87 procutil,
88 88 stringutil,
89 89 )
90 90
91 from .revlogutils import (
92 deltas as deltautil
93 )
94
91 95 release = lockmod.release
92 96
93 97 command = registrar.command()
94 98
95 99 @command('debugancestor', [], _('[INDEX] REV1 REV2'), optionalrepo=True)
96 100 def debugancestor(ui, repo, *args):
97 101 """find the ancestor revision of two revisions in a given index"""
98 102 if len(args) == 3:
99 103 index, rev1, rev2 = args
100 104 r = revlog.revlog(vfsmod.vfs(pycompat.getcwd(), audit=False), index)
101 105 lookup = r.lookup
102 106 elif len(args) == 2:
103 107 if not repo:
104 108 raise error.Abort(_('there is no Mercurial repository here '
105 109 '(.hg not found)'))
106 110 rev1, rev2 = args
107 111 r = repo.changelog
108 112 lookup = repo.lookup
109 113 else:
110 114 raise error.Abort(_('either two or three arguments required'))
111 115 a = r.ancestor(lookup(rev1), lookup(rev2))
112 116 ui.write('%d:%s\n' % (r.rev(a), hex(a)))
113 117
114 118 @command('debugapplystreamclonebundle', [], 'FILE')
115 119 def debugapplystreamclonebundle(ui, repo, fname):
116 120 """apply a stream clone bundle file"""
117 121 f = hg.openpath(ui, fname)
118 122 gen = exchange.readbundle(ui, f, fname)
119 123 gen.apply(repo)
120 124
121 125 @command('debugbuilddag',
122 126 [('m', 'mergeable-file', None, _('add single file mergeable changes')),
123 127 ('o', 'overwritten-file', None, _('add single file all revs overwrite')),
124 128 ('n', 'new-file', None, _('add new file at each rev'))],
125 129 _('[OPTION]... [TEXT]'))
126 130 def debugbuilddag(ui, repo, text=None,
127 131 mergeable_file=False,
128 132 overwritten_file=False,
129 133 new_file=False):
130 134 """builds a repo with a given DAG from scratch in the current empty repo
131 135
132 136 The description of the DAG is read from stdin if not given on the
133 137 command line.
134 138
135 139 Elements:
136 140
137 141 - "+n" is a linear run of n nodes based on the current default parent
138 142 - "." is a single node based on the current default parent
139 143 - "$" resets the default parent to null (implied at the start);
140 144 otherwise the default parent is always the last node created
141 145 - "<p" sets the default parent to the backref p
142 146 - "*p" is a fork at parent p, which is a backref
143 147 - "*p1/p2" is a merge of parents p1 and p2, which are backrefs
144 148 - "/p2" is a merge of the preceding node and p2
145 149 - ":tag" defines a local tag for the preceding node
146 150 - "@branch" sets the named branch for subsequent nodes
147 151 - "#...\\n" is a comment up to the end of the line
148 152
149 153 Whitespace between the above elements is ignored.
150 154
151 155 A backref is either
152 156
153 157 - a number n, which references the node curr-n, where curr is the current
154 158 node, or
155 159 - the name of a local tag you placed earlier using ":tag", or
156 160 - empty to denote the default parent.
157 161
158 162 All string valued-elements are either strictly alphanumeric, or must
159 163 be enclosed in double quotes ("..."), with "\\" as escape character.
160 164 """
161 165
162 166 if text is None:
163 167 ui.status(_("reading DAG from stdin\n"))
164 168 text = ui.fin.read()
165 169
166 170 cl = repo.changelog
167 171 if len(cl) > 0:
168 172 raise error.Abort(_('repository is not empty'))
169 173
170 174 # determine number of revs in DAG
171 175 total = 0
172 176 for type, data in dagparser.parsedag(text):
173 177 if type == 'n':
174 178 total += 1
175 179
176 180 if mergeable_file:
177 181 linesperrev = 2
178 182 # make a file with k lines per rev
179 183 initialmergedlines = ['%d' % i
180 184 for i in pycompat.xrange(0, total * linesperrev)]
181 185 initialmergedlines.append("")
182 186
183 187 tags = []
184 188 progress = ui.makeprogress(_('building'), unit=_('revisions'),
185 189 total=total)
186 190 with progress, repo.wlock(), repo.lock(), repo.transaction("builddag"):
187 191 at = -1
188 192 atbranch = 'default'
189 193 nodeids = []
190 194 id = 0
191 195 progress.update(id)
192 196 for type, data in dagparser.parsedag(text):
193 197 if type == 'n':
194 198 ui.note(('node %s\n' % pycompat.bytestr(data)))
195 199 id, ps = data
196 200
197 201 files = []
198 202 filecontent = {}
199 203
200 204 p2 = None
201 205 if mergeable_file:
202 206 fn = "mf"
203 207 p1 = repo[ps[0]]
204 208 if len(ps) > 1:
205 209 p2 = repo[ps[1]]
206 210 pa = p1.ancestor(p2)
207 211 base, local, other = [x[fn].data() for x in (pa, p1,
208 212 p2)]
209 213 m3 = simplemerge.Merge3Text(base, local, other)
210 214 ml = [l.strip() for l in m3.merge_lines()]
211 215 ml.append("")
212 216 elif at > 0:
213 217 ml = p1[fn].data().split("\n")
214 218 else:
215 219 ml = initialmergedlines
216 220 ml[id * linesperrev] += " r%i" % id
217 221 mergedtext = "\n".join(ml)
218 222 files.append(fn)
219 223 filecontent[fn] = mergedtext
220 224
221 225 if overwritten_file:
222 226 fn = "of"
223 227 files.append(fn)
224 228 filecontent[fn] = "r%i\n" % id
225 229
226 230 if new_file:
227 231 fn = "nf%i" % id
228 232 files.append(fn)
229 233 filecontent[fn] = "r%i\n" % id
230 234 if len(ps) > 1:
231 235 if not p2:
232 236 p2 = repo[ps[1]]
233 237 for fn in p2:
234 238 if fn.startswith("nf"):
235 239 files.append(fn)
236 240 filecontent[fn] = p2[fn].data()
237 241
238 242 def fctxfn(repo, cx, path):
239 243 if path in filecontent:
240 244 return context.memfilectx(repo, cx, path,
241 245 filecontent[path])
242 246 return None
243 247
244 248 if len(ps) == 0 or ps[0] < 0:
245 249 pars = [None, None]
246 250 elif len(ps) == 1:
247 251 pars = [nodeids[ps[0]], None]
248 252 else:
249 253 pars = [nodeids[p] for p in ps]
250 254 cx = context.memctx(repo, pars, "r%i" % id, files, fctxfn,
251 255 date=(id, 0),
252 256 user="debugbuilddag",
253 257 extra={'branch': atbranch})
254 258 nodeid = repo.commitctx(cx)
255 259 nodeids.append(nodeid)
256 260 at = id
257 261 elif type == 'l':
258 262 id, name = data
259 263 ui.note(('tag %s\n' % name))
260 264 tags.append("%s %s\n" % (hex(repo.changelog.node(id)), name))
261 265 elif type == 'a':
262 266 ui.note(('branch %s\n' % data))
263 267 atbranch = data
264 268 progress.update(id)
265 269
266 270 if tags:
267 271 repo.vfs.write("localtags", "".join(tags))
268 272
269 273 def _debugchangegroup(ui, gen, all=None, indent=0, **opts):
270 274 indent_string = ' ' * indent
271 275 if all:
272 276 ui.write(("%sformat: id, p1, p2, cset, delta base, len(delta)\n")
273 277 % indent_string)
274 278
275 279 def showchunks(named):
276 280 ui.write("\n%s%s\n" % (indent_string, named))
277 281 for deltadata in gen.deltaiter():
278 282 node, p1, p2, cs, deltabase, delta, flags = deltadata
279 283 ui.write("%s%s %s %s %s %s %d\n" %
280 284 (indent_string, hex(node), hex(p1), hex(p2),
281 285 hex(cs), hex(deltabase), len(delta)))
282 286
283 287 chunkdata = gen.changelogheader()
284 288 showchunks("changelog")
285 289 chunkdata = gen.manifestheader()
286 290 showchunks("manifest")
287 291 for chunkdata in iter(gen.filelogheader, {}):
288 292 fname = chunkdata['filename']
289 293 showchunks(fname)
290 294 else:
291 295 if isinstance(gen, bundle2.unbundle20):
292 296 raise error.Abort(_('use debugbundle2 for this file'))
293 297 chunkdata = gen.changelogheader()
294 298 for deltadata in gen.deltaiter():
295 299 node, p1, p2, cs, deltabase, delta, flags = deltadata
296 300 ui.write("%s%s\n" % (indent_string, hex(node)))
297 301
298 302 def _debugobsmarkers(ui, part, indent=0, **opts):
299 303 """display version and markers contained in 'data'"""
300 304 opts = pycompat.byteskwargs(opts)
301 305 data = part.read()
302 306 indent_string = ' ' * indent
303 307 try:
304 308 version, markers = obsolete._readmarkers(data)
305 309 except error.UnknownVersion as exc:
306 310 msg = "%sunsupported version: %s (%d bytes)\n"
307 311 msg %= indent_string, exc.version, len(data)
308 312 ui.write(msg)
309 313 else:
310 314 msg = "%sversion: %d (%d bytes)\n"
311 315 msg %= indent_string, version, len(data)
312 316 ui.write(msg)
313 317 fm = ui.formatter('debugobsolete', opts)
314 318 for rawmarker in sorted(markers):
315 319 m = obsutil.marker(None, rawmarker)
316 320 fm.startitem()
317 321 fm.plain(indent_string)
318 322 cmdutil.showmarker(fm, m)
319 323 fm.end()
320 324
321 325 def _debugphaseheads(ui, data, indent=0):
322 326 """display version and markers contained in 'data'"""
323 327 indent_string = ' ' * indent
324 328 headsbyphase = phases.binarydecode(data)
325 329 for phase in phases.allphases:
326 330 for head in headsbyphase[phase]:
327 331 ui.write(indent_string)
328 332 ui.write('%s %s\n' % (hex(head), phases.phasenames[phase]))
329 333
330 334 def _quasirepr(thing):
331 335 if isinstance(thing, (dict, util.sortdict, collections.OrderedDict)):
332 336 return '{%s}' % (
333 337 b', '.join(b'%s: %s' % (k, thing[k]) for k in sorted(thing)))
334 338 return pycompat.bytestr(repr(thing))
335 339
336 340 def _debugbundle2(ui, gen, all=None, **opts):
337 341 """lists the contents of a bundle2"""
338 342 if not isinstance(gen, bundle2.unbundle20):
339 343 raise error.Abort(_('not a bundle2 file'))
340 344 ui.write(('Stream params: %s\n' % _quasirepr(gen.params)))
341 345 parttypes = opts.get(r'part_type', [])
342 346 for part in gen.iterparts():
343 347 if parttypes and part.type not in parttypes:
344 348 continue
345 349 msg = '%s -- %s (mandatory: %r)\n'
346 350 ui.write((msg % (part.type, _quasirepr(part.params), part.mandatory)))
347 351 if part.type == 'changegroup':
348 352 version = part.params.get('version', '01')
349 353 cg = changegroup.getunbundler(version, part, 'UN')
350 354 if not ui.quiet:
351 355 _debugchangegroup(ui, cg, all=all, indent=4, **opts)
352 356 if part.type == 'obsmarkers':
353 357 if not ui.quiet:
354 358 _debugobsmarkers(ui, part, indent=4, **opts)
355 359 if part.type == 'phase-heads':
356 360 if not ui.quiet:
357 361 _debugphaseheads(ui, part, indent=4)
358 362
359 363 @command('debugbundle',
360 364 [('a', 'all', None, _('show all details')),
361 365 ('', 'part-type', [], _('show only the named part type')),
362 366 ('', 'spec', None, _('print the bundlespec of the bundle'))],
363 367 _('FILE'),
364 368 norepo=True)
365 369 def debugbundle(ui, bundlepath, all=None, spec=None, **opts):
366 370 """lists the contents of a bundle"""
367 371 with hg.openpath(ui, bundlepath) as f:
368 372 if spec:
369 373 spec = exchange.getbundlespec(ui, f)
370 374 ui.write('%s\n' % spec)
371 375 return
372 376
373 377 gen = exchange.readbundle(ui, f, bundlepath)
374 378 if isinstance(gen, bundle2.unbundle20):
375 379 return _debugbundle2(ui, gen, all=all, **opts)
376 380 _debugchangegroup(ui, gen, all=all, **opts)
377 381
378 382 @command('debugcapabilities',
379 383 [], _('PATH'),
380 384 norepo=True)
381 385 def debugcapabilities(ui, path, **opts):
382 386 """lists the capabilities of a remote peer"""
383 387 opts = pycompat.byteskwargs(opts)
384 388 peer = hg.peer(ui, opts, path)
385 389 caps = peer.capabilities()
386 390 ui.write(('Main capabilities:\n'))
387 391 for c in sorted(caps):
388 392 ui.write((' %s\n') % c)
389 393 b2caps = bundle2.bundle2caps(peer)
390 394 if b2caps:
391 395 ui.write(('Bundle2 capabilities:\n'))
392 396 for key, values in sorted(b2caps.iteritems()):
393 397 ui.write((' %s\n') % key)
394 398 for v in values:
395 399 ui.write((' %s\n') % v)
396 400
397 401 @command('debugcheckstate', [], '')
398 402 def debugcheckstate(ui, repo):
399 403 """validate the correctness of the current dirstate"""
400 404 parent1, parent2 = repo.dirstate.parents()
401 405 m1 = repo[parent1].manifest()
402 406 m2 = repo[parent2].manifest()
403 407 errors = 0
404 408 for f in repo.dirstate:
405 409 state = repo.dirstate[f]
406 410 if state in "nr" and f not in m1:
407 411 ui.warn(_("%s in state %s, but not in manifest1\n") % (f, state))
408 412 errors += 1
409 413 if state in "a" and f in m1:
410 414 ui.warn(_("%s in state %s, but also in manifest1\n") % (f, state))
411 415 errors += 1
412 416 if state in "m" and f not in m1 and f not in m2:
413 417 ui.warn(_("%s in state %s, but not in either manifest\n") %
414 418 (f, state))
415 419 errors += 1
416 420 for f in m1:
417 421 state = repo.dirstate[f]
418 422 if state not in "nrm":
419 423 ui.warn(_("%s in manifest1, but listed as state %s") % (f, state))
420 424 errors += 1
421 425 if errors:
422 426 error = _(".hg/dirstate inconsistent with current parent's manifest")
423 427 raise error.Abort(error)
424 428
425 429 @command('debugcolor',
426 430 [('', 'style', None, _('show all configured styles'))],
427 431 'hg debugcolor')
428 432 def debugcolor(ui, repo, **opts):
429 433 """show available color, effects or style"""
430 434 ui.write(('color mode: %s\n') % stringutil.pprint(ui._colormode))
431 435 if opts.get(r'style'):
432 436 return _debugdisplaystyle(ui)
433 437 else:
434 438 return _debugdisplaycolor(ui)
435 439
436 440 def _debugdisplaycolor(ui):
437 441 ui = ui.copy()
438 442 ui._styles.clear()
439 443 for effect in color._activeeffects(ui).keys():
440 444 ui._styles[effect] = effect
441 445 if ui._terminfoparams:
442 446 for k, v in ui.configitems('color'):
443 447 if k.startswith('color.'):
444 448 ui._styles[k] = k[6:]
445 449 elif k.startswith('terminfo.'):
446 450 ui._styles[k] = k[9:]
447 451 ui.write(_('available colors:\n'))
448 452 # sort label with a '_' after the other to group '_background' entry.
449 453 items = sorted(ui._styles.items(),
450 454 key=lambda i: ('_' in i[0], i[0], i[1]))
451 455 for colorname, label in items:
452 456 ui.write(('%s\n') % colorname, label=label)
453 457
454 458 def _debugdisplaystyle(ui):
455 459 ui.write(_('available style:\n'))
456 460 if not ui._styles:
457 461 return
458 462 width = max(len(s) for s in ui._styles)
459 463 for label, effects in sorted(ui._styles.items()):
460 464 ui.write('%s' % label, label=label)
461 465 if effects:
462 466 # 50
463 467 ui.write(': ')
464 468 ui.write(' ' * (max(0, width - len(label))))
465 469 ui.write(', '.join(ui.label(e, e) for e in effects.split()))
466 470 ui.write('\n')
467 471
468 472 @command('debugcreatestreamclonebundle', [], 'FILE')
469 473 def debugcreatestreamclonebundle(ui, repo, fname):
470 474 """create a stream clone bundle file
471 475
472 476 Stream bundles are special bundles that are essentially archives of
473 477 revlog files. They are commonly used for cloning very quickly.
474 478 """
475 479 # TODO we may want to turn this into an abort when this functionality
476 480 # is moved into `hg bundle`.
477 481 if phases.hassecret(repo):
478 482 ui.warn(_('(warning: stream clone bundle will contain secret '
479 483 'revisions)\n'))
480 484
481 485 requirements, gen = streamclone.generatebundlev1(repo)
482 486 changegroup.writechunks(ui, gen, fname)
483 487
484 488 ui.write(_('bundle requirements: %s\n') % ', '.join(sorted(requirements)))
485 489
486 490 @command('debugdag',
487 491 [('t', 'tags', None, _('use tags as labels')),
488 492 ('b', 'branches', None, _('annotate with branch names')),
489 493 ('', 'dots', None, _('use dots for runs')),
490 494 ('s', 'spaces', None, _('separate elements by spaces'))],
491 495 _('[OPTION]... [FILE [REV]...]'),
492 496 optionalrepo=True)
493 497 def debugdag(ui, repo, file_=None, *revs, **opts):
494 498 """format the changelog or an index DAG as a concise textual description
495 499
496 500 If you pass a revlog index, the revlog's DAG is emitted. If you list
497 501 revision numbers, they get labeled in the output as rN.
498 502
499 503 Otherwise, the changelog DAG of the current repo is emitted.
500 504 """
501 505 spaces = opts.get(r'spaces')
502 506 dots = opts.get(r'dots')
503 507 if file_:
504 508 rlog = revlog.revlog(vfsmod.vfs(pycompat.getcwd(), audit=False),
505 509 file_)
506 510 revs = set((int(r) for r in revs))
507 511 def events():
508 512 for r in rlog:
509 513 yield 'n', (r, list(p for p in rlog.parentrevs(r)
510 514 if p != -1))
511 515 if r in revs:
512 516 yield 'l', (r, "r%i" % r)
513 517 elif repo:
514 518 cl = repo.changelog
515 519 tags = opts.get(r'tags')
516 520 branches = opts.get(r'branches')
517 521 if tags:
518 522 labels = {}
519 523 for l, n in repo.tags().items():
520 524 labels.setdefault(cl.rev(n), []).append(l)
521 525 def events():
522 526 b = "default"
523 527 for r in cl:
524 528 if branches:
525 529 newb = cl.read(cl.node(r))[5]['branch']
526 530 if newb != b:
527 531 yield 'a', newb
528 532 b = newb
529 533 yield 'n', (r, list(p for p in cl.parentrevs(r)
530 534 if p != -1))
531 535 if tags:
532 536 ls = labels.get(r)
533 537 if ls:
534 538 for l in ls:
535 539 yield 'l', (r, l)
536 540 else:
537 541 raise error.Abort(_('need repo for changelog dag'))
538 542
539 543 for line in dagparser.dagtextlines(events(),
540 544 addspaces=spaces,
541 545 wraplabels=True,
542 546 wrapannotations=True,
543 547 wrapnonlinear=dots,
544 548 usedots=dots,
545 549 maxlinewidth=70):
546 550 ui.write(line)
547 551 ui.write("\n")
548 552
549 553 @command('debugdata', cmdutil.debugrevlogopts, _('-c|-m|FILE REV'))
550 554 def debugdata(ui, repo, file_, rev=None, **opts):
551 555 """dump the contents of a data file revision"""
552 556 opts = pycompat.byteskwargs(opts)
553 557 if opts.get('changelog') or opts.get('manifest') or opts.get('dir'):
554 558 if rev is not None:
555 559 raise error.CommandError('debugdata', _('invalid arguments'))
556 560 file_, rev = None, file_
557 561 elif rev is None:
558 562 raise error.CommandError('debugdata', _('invalid arguments'))
559 563 r = cmdutil.openstorage(repo, 'debugdata', file_, opts)
560 564 try:
561 565 ui.write(r.revision(r.lookup(rev), raw=True))
562 566 except KeyError:
563 567 raise error.Abort(_('invalid revision identifier %s') % rev)
564 568
565 569 @command('debugdate',
566 570 [('e', 'extended', None, _('try extended date formats'))],
567 571 _('[-e] DATE [RANGE]'),
568 572 norepo=True, optionalrepo=True)
569 573 def debugdate(ui, date, range=None, **opts):
570 574 """parse and display a date"""
571 575 if opts[r"extended"]:
572 576 d = dateutil.parsedate(date, util.extendeddateformats)
573 577 else:
574 578 d = dateutil.parsedate(date)
575 579 ui.write(("internal: %d %d\n") % d)
576 580 ui.write(("standard: %s\n") % dateutil.datestr(d))
577 581 if range:
578 582 m = dateutil.matchdate(range)
579 583 ui.write(("match: %s\n") % m(d[0]))
580 584
581 585 @command('debugdeltachain',
582 586 cmdutil.debugrevlogopts + cmdutil.formatteropts,
583 587 _('-c|-m|FILE'),
584 588 optionalrepo=True)
585 589 def debugdeltachain(ui, repo, file_=None, **opts):
586 590 """dump information about delta chains in a revlog
587 591
588 592 Output can be templatized. Available template keywords are:
589 593
590 594 :``rev``: revision number
591 595 :``chainid``: delta chain identifier (numbered by unique base)
592 596 :``chainlen``: delta chain length to this revision
593 597 :``prevrev``: previous revision in delta chain
594 598 :``deltatype``: role of delta / how it was computed
595 599 :``compsize``: compressed size of revision
596 600 :``uncompsize``: uncompressed size of revision
597 601 :``chainsize``: total size of compressed revisions in chain
598 602 :``chainratio``: total chain size divided by uncompressed revision size
599 603 (new delta chains typically start at ratio 2.00)
600 604 :``lindist``: linear distance from base revision in delta chain to end
601 605 of this revision
602 606 :``extradist``: total size of revisions not part of this delta chain from
603 607 base of delta chain to end of this revision; a measurement
604 608 of how much extra data we need to read/seek across to read
605 609 the delta chain for this revision
606 610 :``extraratio``: extradist divided by chainsize; another representation of
607 611 how much unrelated data is needed to load this delta chain
608 612
609 613 If the repository is configured to use the sparse read, additional keywords
610 614 are available:
611 615
612 616 :``readsize``: total size of data read from the disk for a revision
613 617 (sum of the sizes of all the blocks)
614 618 :``largestblock``: size of the largest block of data read from the disk
615 619 :``readdensity``: density of useful bytes in the data read from the disk
616 620 :``srchunks``: in how many data hunks the whole revision would be read
617 621
618 622 The sparse read can be enabled with experimental.sparse-read = True
619 623 """
620 624 opts = pycompat.byteskwargs(opts)
621 625 r = cmdutil.openrevlog(repo, 'debugdeltachain', file_, opts)
622 626 index = r.index
623 627 start = r.start
624 628 length = r.length
625 629 generaldelta = r.version & revlog.FLAG_GENERALDELTA
626 630 withsparseread = getattr(r, '_withsparseread', False)
627 631
628 632 def revinfo(rev):
629 633 e = index[rev]
630 634 compsize = e[1]
631 635 uncompsize = e[2]
632 636 chainsize = 0
633 637
634 638 if generaldelta:
635 639 if e[3] == e[5]:
636 640 deltatype = 'p1'
637 641 elif e[3] == e[6]:
638 642 deltatype = 'p2'
639 643 elif e[3] == rev - 1:
640 644 deltatype = 'prev'
641 645 elif e[3] == rev:
642 646 deltatype = 'base'
643 647 else:
644 648 deltatype = 'other'
645 649 else:
646 650 if e[3] == rev:
647 651 deltatype = 'base'
648 652 else:
649 653 deltatype = 'prev'
650 654
651 655 chain = r._deltachain(rev)[0]
652 656 for iterrev in chain:
653 657 e = index[iterrev]
654 658 chainsize += e[1]
655 659
656 660 return compsize, uncompsize, deltatype, chain, chainsize
657 661
658 662 fm = ui.formatter('debugdeltachain', opts)
659 663
660 664 fm.plain(' rev chain# chainlen prev delta '
661 665 'size rawsize chainsize ratio lindist extradist '
662 666 'extraratio')
663 667 if withsparseread:
664 668 fm.plain(' readsize largestblk rddensity srchunks')
665 669 fm.plain('\n')
666 670
667 671 chainbases = {}
668 672 for rev in r:
669 673 comp, uncomp, deltatype, chain, chainsize = revinfo(rev)
670 674 chainbase = chain[0]
671 675 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
672 676 basestart = start(chainbase)
673 677 revstart = start(rev)
674 678 lineardist = revstart + comp - basestart
675 679 extradist = lineardist - chainsize
676 680 try:
677 681 prevrev = chain[-2]
678 682 except IndexError:
679 683 prevrev = -1
680 684
681 685 if uncomp != 0:
682 686 chainratio = float(chainsize) / float(uncomp)
683 687 else:
684 688 chainratio = chainsize
685 689
686 690 if chainsize != 0:
687 691 extraratio = float(extradist) / float(chainsize)
688 692 else:
689 693 extraratio = extradist
690 694
691 695 fm.startitem()
692 696 fm.write('rev chainid chainlen prevrev deltatype compsize '
693 697 'uncompsize chainsize chainratio lindist extradist '
694 698 'extraratio',
695 699 '%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f',
696 700 rev, chainid, len(chain), prevrev, deltatype, comp,
697 701 uncomp, chainsize, chainratio, lineardist, extradist,
698 702 extraratio,
699 703 rev=rev, chainid=chainid, chainlen=len(chain),
700 704 prevrev=prevrev, deltatype=deltatype, compsize=comp,
701 705 uncompsize=uncomp, chainsize=chainsize,
702 706 chainratio=chainratio, lindist=lineardist,
703 707 extradist=extradist, extraratio=extraratio)
704 708 if withsparseread:
705 709 readsize = 0
706 710 largestblock = 0
707 711 srchunks = 0
708 712
709 for revschunk in revlog._slicechunk(r, chain):
713 for revschunk in deltautil.slicechunk(r, chain):
710 714 srchunks += 1
711 715 blkend = start(revschunk[-1]) + length(revschunk[-1])
712 716 blksize = blkend - start(revschunk[0])
713 717
714 718 readsize += blksize
715 719 if largestblock < blksize:
716 720 largestblock = blksize
717 721
718 722 if readsize:
719 723 readdensity = float(chainsize) / float(readsize)
720 724 else:
721 725 readdensity = 1
722 726
723 727 fm.write('readsize largestblock readdensity srchunks',
724 728 ' %10d %10d %9.5f %8d',
725 729 readsize, largestblock, readdensity, srchunks,
726 730 readsize=readsize, largestblock=largestblock,
727 731 readdensity=readdensity, srchunks=srchunks)
728 732
729 733 fm.plain('\n')
730 734
731 735 fm.end()
732 736
733 737 @command('debugdirstate|debugstate',
734 738 [('', 'nodates', None, _('do not display the saved mtime')),
735 739 ('', 'datesort', None, _('sort by saved mtime'))],
736 740 _('[OPTION]...'))
737 741 def debugstate(ui, repo, **opts):
738 742 """show the contents of the current dirstate"""
739 743
740 744 nodates = opts.get(r'nodates')
741 745 datesort = opts.get(r'datesort')
742 746
743 747 timestr = ""
744 748 if datesort:
745 749 keyfunc = lambda x: (x[1][3], x[0]) # sort by mtime, then by filename
746 750 else:
747 751 keyfunc = None # sort by filename
748 752 for file_, ent in sorted(repo.dirstate._map.iteritems(), key=keyfunc):
749 753 if ent[3] == -1:
750 754 timestr = 'unset '
751 755 elif nodates:
752 756 timestr = 'set '
753 757 else:
754 758 timestr = time.strftime(r"%Y-%m-%d %H:%M:%S ",
755 759 time.localtime(ent[3]))
756 760 timestr = encoding.strtolocal(timestr)
757 761 if ent[1] & 0o20000:
758 762 mode = 'lnk'
759 763 else:
760 764 mode = '%3o' % (ent[1] & 0o777 & ~util.umask)
761 765 ui.write("%c %s %10d %s%s\n" % (ent[0], mode, ent[2], timestr, file_))
762 766 for f in repo.dirstate.copies():
763 767 ui.write(_("copy: %s -> %s\n") % (repo.dirstate.copied(f), f))
764 768
765 769 @command('debugdiscovery',
766 770 [('', 'old', None, _('use old-style discovery')),
767 771 ('', 'nonheads', None,
768 772 _('use old-style discovery with non-heads included')),
769 773 ('', 'rev', [], 'restrict discovery to this set of revs'),
770 774 ] + cmdutil.remoteopts,
771 775 _('[--rev REV] [OTHER]'))
772 776 def debugdiscovery(ui, repo, remoteurl="default", **opts):
773 777 """runs the changeset discovery protocol in isolation"""
774 778 opts = pycompat.byteskwargs(opts)
775 779 remoteurl, branches = hg.parseurl(ui.expandpath(remoteurl))
776 780 remote = hg.peer(repo, opts, remoteurl)
777 781 ui.status(_('comparing with %s\n') % util.hidepassword(remoteurl))
778 782
779 783 # make sure tests are repeatable
780 784 random.seed(12323)
781 785
782 786 def doit(pushedrevs, remoteheads, remote=remote):
783 787 if opts.get('old'):
784 788 if not util.safehasattr(remote, 'branches'):
785 789 # enable in-client legacy support
786 790 remote = localrepo.locallegacypeer(remote.local())
787 791 common, _in, hds = treediscovery.findcommonincoming(repo, remote,
788 792 force=True)
789 793 common = set(common)
790 794 if not opts.get('nonheads'):
791 795 ui.write(("unpruned common: %s\n") %
792 796 " ".join(sorted(short(n) for n in common)))
793 797
794 798 clnode = repo.changelog.node
795 799 common = repo.revs('heads(::%ln)', common)
796 800 common = {clnode(r) for r in common}
797 801 else:
798 802 nodes = None
799 803 if pushedrevs:
800 804 revs = scmutil.revrange(repo, pushedrevs)
801 805 nodes = [repo[r].node() for r in revs]
802 806 common, any, hds = setdiscovery.findcommonheads(ui, repo, remote,
803 807 ancestorsof=nodes)
804 808 common = set(common)
805 809 rheads = set(hds)
806 810 lheads = set(repo.heads())
807 811 ui.write(("common heads: %s\n") %
808 812 " ".join(sorted(short(n) for n in common)))
809 813 if lheads <= common:
810 814 ui.write(("local is subset\n"))
811 815 elif rheads <= common:
812 816 ui.write(("remote is subset\n"))
813 817
814 818 remoterevs, _checkout = hg.addbranchrevs(repo, remote, branches, revs=None)
815 819 localrevs = opts['rev']
816 820 doit(localrevs, remoterevs)
817 821
818 822 _chunksize = 4 << 10
819 823
820 824 @command('debugdownload',
821 825 [
822 826 ('o', 'output', '', _('path')),
823 827 ],
824 828 optionalrepo=True)
825 829 def debugdownload(ui, repo, url, output=None, **opts):
826 830 """download a resource using Mercurial logic and config
827 831 """
828 832 fh = urlmod.open(ui, url, output)
829 833
830 834 dest = ui
831 835 if output:
832 836 dest = open(output, "wb", _chunksize)
833 837 try:
834 838 data = fh.read(_chunksize)
835 839 while data:
836 840 dest.write(data)
837 841 data = fh.read(_chunksize)
838 842 finally:
839 843 if output:
840 844 dest.close()
841 845
842 846 @command('debugextensions', cmdutil.formatteropts, [], optionalrepo=True)
843 847 def debugextensions(ui, repo, **opts):
844 848 '''show information about active extensions'''
845 849 opts = pycompat.byteskwargs(opts)
846 850 exts = extensions.extensions(ui)
847 851 hgver = util.version()
848 852 fm = ui.formatter('debugextensions', opts)
849 853 for extname, extmod in sorted(exts, key=operator.itemgetter(0)):
850 854 isinternal = extensions.ismoduleinternal(extmod)
851 855 extsource = pycompat.fsencode(extmod.__file__)
852 856 if isinternal:
853 857 exttestedwith = [] # never expose magic string to users
854 858 else:
855 859 exttestedwith = getattr(extmod, 'testedwith', '').split()
856 860 extbuglink = getattr(extmod, 'buglink', None)
857 861
858 862 fm.startitem()
859 863
860 864 if ui.quiet or ui.verbose:
861 865 fm.write('name', '%s\n', extname)
862 866 else:
863 867 fm.write('name', '%s', extname)
864 868 if isinternal or hgver in exttestedwith:
865 869 fm.plain('\n')
866 870 elif not exttestedwith:
867 871 fm.plain(_(' (untested!)\n'))
868 872 else:
869 873 lasttestedversion = exttestedwith[-1]
870 874 fm.plain(' (%s!)\n' % lasttestedversion)
871 875
872 876 fm.condwrite(ui.verbose and extsource, 'source',
873 877 _(' location: %s\n'), extsource or "")
874 878
875 879 if ui.verbose:
876 880 fm.plain(_(' bundled: %s\n') % ['no', 'yes'][isinternal])
877 881 fm.data(bundled=isinternal)
878 882
879 883 fm.condwrite(ui.verbose and exttestedwith, 'testedwith',
880 884 _(' tested with: %s\n'),
881 885 fm.formatlist(exttestedwith, name='ver'))
882 886
883 887 fm.condwrite(ui.verbose and extbuglink, 'buglink',
884 888 _(' bug reporting: %s\n'), extbuglink or "")
885 889
886 890 fm.end()
887 891
888 892 @command('debugfileset',
889 893 [('r', 'rev', '', _('apply the filespec on this revision'), _('REV')),
890 894 ('', 'all-files', False,
891 895 _('test files from all revisions and working directory')),
892 896 ('s', 'show-matcher', None,
893 897 _('print internal representation of matcher')),
894 898 ('p', 'show-stage', [],
895 899 _('print parsed tree at the given stage'), _('NAME'))],
896 900 _('[-r REV] [--all-files] [OPTION]... FILESPEC'))
897 901 def debugfileset(ui, repo, expr, **opts):
898 902 '''parse and apply a fileset specification'''
899 903 from . import fileset
900 904 fileset.symbols # force import of fileset so we have predicates to optimize
901 905 opts = pycompat.byteskwargs(opts)
902 906 ctx = scmutil.revsingle(repo, opts.get('rev'), None)
903 907
904 908 stages = [
905 909 ('parsed', pycompat.identity),
906 910 ('analyzed', filesetlang.analyze),
907 911 ('optimized', filesetlang.optimize),
908 912 ]
909 913 stagenames = set(n for n, f in stages)
910 914
911 915 showalways = set()
912 916 if ui.verbose and not opts['show_stage']:
913 917 # show parsed tree by --verbose (deprecated)
914 918 showalways.add('parsed')
915 919 if opts['show_stage'] == ['all']:
916 920 showalways.update(stagenames)
917 921 else:
918 922 for n in opts['show_stage']:
919 923 if n not in stagenames:
920 924 raise error.Abort(_('invalid stage name: %s') % n)
921 925 showalways.update(opts['show_stage'])
922 926
923 927 tree = filesetlang.parse(expr)
924 928 for n, f in stages:
925 929 tree = f(tree)
926 930 if n in showalways:
927 931 if opts['show_stage'] or n != 'parsed':
928 932 ui.write(("* %s:\n") % n)
929 933 ui.write(filesetlang.prettyformat(tree), "\n")
930 934
931 935 files = set()
932 936 if opts['all_files']:
933 937 for r in repo:
934 938 c = repo[r]
935 939 files.update(c.files())
936 940 files.update(c.substate)
937 941 if opts['all_files'] or ctx.rev() is None:
938 942 wctx = repo[None]
939 943 files.update(repo.dirstate.walk(scmutil.matchall(repo),
940 944 subrepos=list(wctx.substate),
941 945 unknown=True, ignored=True))
942 946 files.update(wctx.substate)
943 947 else:
944 948 files.update(ctx.files())
945 949 files.update(ctx.substate)
946 950
947 951 m = ctx.matchfileset(expr)
948 952 if opts['show_matcher'] or (opts['show_matcher'] is None and ui.verbose):
949 953 ui.write(('* matcher:\n'), stringutil.prettyrepr(m), '\n')
950 954 for f in sorted(files):
951 955 if not m(f):
952 956 continue
953 957 ui.write("%s\n" % f)
954 958
955 959 @command('debugformat',
956 960 [] + cmdutil.formatteropts)
957 961 def debugformat(ui, repo, **opts):
958 962 """display format information about the current repository
959 963
960 964 Use --verbose to get extra information about current config value and
961 965 Mercurial default."""
962 966 opts = pycompat.byteskwargs(opts)
963 967 maxvariantlength = max(len(fv.name) for fv in upgrade.allformatvariant)
964 968 maxvariantlength = max(len('format-variant'), maxvariantlength)
965 969
966 970 def makeformatname(name):
967 971 return '%s:' + (' ' * (maxvariantlength - len(name)))
968 972
969 973 fm = ui.formatter('debugformat', opts)
970 974 if fm.isplain():
971 975 def formatvalue(value):
972 976 if util.safehasattr(value, 'startswith'):
973 977 return value
974 978 if value:
975 979 return 'yes'
976 980 else:
977 981 return 'no'
978 982 else:
979 983 formatvalue = pycompat.identity
980 984
981 985 fm.plain('format-variant')
982 986 fm.plain(' ' * (maxvariantlength - len('format-variant')))
983 987 fm.plain(' repo')
984 988 if ui.verbose:
985 989 fm.plain(' config default')
986 990 fm.plain('\n')
987 991 for fv in upgrade.allformatvariant:
988 992 fm.startitem()
989 993 repovalue = fv.fromrepo(repo)
990 994 configvalue = fv.fromconfig(repo)
991 995
992 996 if repovalue != configvalue:
993 997 namelabel = 'formatvariant.name.mismatchconfig'
994 998 repolabel = 'formatvariant.repo.mismatchconfig'
995 999 elif repovalue != fv.default:
996 1000 namelabel = 'formatvariant.name.mismatchdefault'
997 1001 repolabel = 'formatvariant.repo.mismatchdefault'
998 1002 else:
999 1003 namelabel = 'formatvariant.name.uptodate'
1000 1004 repolabel = 'formatvariant.repo.uptodate'
1001 1005
1002 1006 fm.write('name', makeformatname(fv.name), fv.name,
1003 1007 label=namelabel)
1004 1008 fm.write('repo', ' %3s', formatvalue(repovalue),
1005 1009 label=repolabel)
1006 1010 if fv.default != configvalue:
1007 1011 configlabel = 'formatvariant.config.special'
1008 1012 else:
1009 1013 configlabel = 'formatvariant.config.default'
1010 1014 fm.condwrite(ui.verbose, 'config', ' %6s', formatvalue(configvalue),
1011 1015 label=configlabel)
1012 1016 fm.condwrite(ui.verbose, 'default', ' %7s', formatvalue(fv.default),
1013 1017 label='formatvariant.default')
1014 1018 fm.plain('\n')
1015 1019 fm.end()
1016 1020
1017 1021 @command('debugfsinfo', [], _('[PATH]'), norepo=True)
1018 1022 def debugfsinfo(ui, path="."):
1019 1023 """show information detected about current filesystem"""
1020 1024 ui.write(('path: %s\n') % path)
1021 1025 ui.write(('mounted on: %s\n') % (util.getfsmountpoint(path) or '(unknown)'))
1022 1026 ui.write(('exec: %s\n') % (util.checkexec(path) and 'yes' or 'no'))
1023 1027 ui.write(('fstype: %s\n') % (util.getfstype(path) or '(unknown)'))
1024 1028 ui.write(('symlink: %s\n') % (util.checklink(path) and 'yes' or 'no'))
1025 1029 ui.write(('hardlink: %s\n') % (util.checknlink(path) and 'yes' or 'no'))
1026 1030 casesensitive = '(unknown)'
1027 1031 try:
1028 1032 with pycompat.namedtempfile(prefix='.debugfsinfo', dir=path) as f:
1029 1033 casesensitive = util.fscasesensitive(f.name) and 'yes' or 'no'
1030 1034 except OSError:
1031 1035 pass
1032 1036 ui.write(('case-sensitive: %s\n') % casesensitive)
1033 1037
1034 1038 @command('debuggetbundle',
1035 1039 [('H', 'head', [], _('id of head node'), _('ID')),
1036 1040 ('C', 'common', [], _('id of common node'), _('ID')),
1037 1041 ('t', 'type', 'bzip2', _('bundle compression type to use'), _('TYPE'))],
1038 1042 _('REPO FILE [-H|-C ID]...'),
1039 1043 norepo=True)
1040 1044 def debuggetbundle(ui, repopath, bundlepath, head=None, common=None, **opts):
1041 1045 """retrieves a bundle from a repo
1042 1046
1043 1047 Every ID must be a full-length hex node id string. Saves the bundle to the
1044 1048 given file.
1045 1049 """
1046 1050 opts = pycompat.byteskwargs(opts)
1047 1051 repo = hg.peer(ui, opts, repopath)
1048 1052 if not repo.capable('getbundle'):
1049 1053 raise error.Abort("getbundle() not supported by target repository")
1050 1054 args = {}
1051 1055 if common:
1052 1056 args[r'common'] = [bin(s) for s in common]
1053 1057 if head:
1054 1058 args[r'heads'] = [bin(s) for s in head]
1055 1059 # TODO: get desired bundlecaps from command line.
1056 1060 args[r'bundlecaps'] = None
1057 1061 bundle = repo.getbundle('debug', **args)
1058 1062
1059 1063 bundletype = opts.get('type', 'bzip2').lower()
1060 1064 btypes = {'none': 'HG10UN',
1061 1065 'bzip2': 'HG10BZ',
1062 1066 'gzip': 'HG10GZ',
1063 1067 'bundle2': 'HG20'}
1064 1068 bundletype = btypes.get(bundletype)
1065 1069 if bundletype not in bundle2.bundletypes:
1066 1070 raise error.Abort(_('unknown bundle type specified with --type'))
1067 1071 bundle2.writebundle(ui, bundle, bundlepath, bundletype)
1068 1072
1069 1073 @command('debugignore', [], '[FILE]')
1070 1074 def debugignore(ui, repo, *files, **opts):
1071 1075 """display the combined ignore pattern and information about ignored files
1072 1076
1073 1077 With no argument display the combined ignore pattern.
1074 1078
1075 1079 Given space separated file names, shows if the given file is ignored and
1076 1080 if so, show the ignore rule (file and line number) that matched it.
1077 1081 """
1078 1082 ignore = repo.dirstate._ignore
1079 1083 if not files:
1080 1084 # Show all the patterns
1081 1085 ui.write("%s\n" % pycompat.byterepr(ignore))
1082 1086 else:
1083 1087 m = scmutil.match(repo[None], pats=files)
1084 1088 for f in m.files():
1085 1089 nf = util.normpath(f)
1086 1090 ignored = None
1087 1091 ignoredata = None
1088 1092 if nf != '.':
1089 1093 if ignore(nf):
1090 1094 ignored = nf
1091 1095 ignoredata = repo.dirstate._ignorefileandline(nf)
1092 1096 else:
1093 1097 for p in util.finddirs(nf):
1094 1098 if ignore(p):
1095 1099 ignored = p
1096 1100 ignoredata = repo.dirstate._ignorefileandline(p)
1097 1101 break
1098 1102 if ignored:
1099 1103 if ignored == nf:
1100 1104 ui.write(_("%s is ignored\n") % m.uipath(f))
1101 1105 else:
1102 1106 ui.write(_("%s is ignored because of "
1103 1107 "containing folder %s\n")
1104 1108 % (m.uipath(f), ignored))
1105 1109 ignorefile, lineno, line = ignoredata
1106 1110 ui.write(_("(ignore rule in %s, line %d: '%s')\n")
1107 1111 % (ignorefile, lineno, line))
1108 1112 else:
1109 1113 ui.write(_("%s is not ignored\n") % m.uipath(f))
1110 1114
1111 1115 @command('debugindex', cmdutil.debugrevlogopts + cmdutil.formatteropts,
1112 1116 _('-c|-m|FILE'))
1113 1117 def debugindex(ui, repo, file_=None, **opts):
1114 1118 """dump index data for a storage primitive"""
1115 1119 opts = pycompat.byteskwargs(opts)
1116 1120 store = cmdutil.openstorage(repo, 'debugindex', file_, opts)
1117 1121
1118 1122 if ui.debugflag:
1119 1123 shortfn = hex
1120 1124 else:
1121 1125 shortfn = short
1122 1126
1123 1127 idlen = 12
1124 1128 for i in store:
1125 1129 idlen = len(shortfn(store.node(i)))
1126 1130 break
1127 1131
1128 1132 fm = ui.formatter('debugindex', opts)
1129 1133 fm.plain(b' rev linkrev %s %s p2\n' % (
1130 1134 b'nodeid'.ljust(idlen),
1131 1135 b'p1'.ljust(idlen)))
1132 1136
1133 1137 for rev in store:
1134 1138 node = store.node(rev)
1135 1139 parents = store.parents(node)
1136 1140
1137 1141 fm.startitem()
1138 1142 fm.write(b'rev', b'%6d ', rev)
1139 1143 fm.write(b'linkrev', '%7d ', store.linkrev(rev))
1140 1144 fm.write(b'node', '%s ', shortfn(node))
1141 1145 fm.write(b'p1', '%s ', shortfn(parents[0]))
1142 1146 fm.write(b'p2', '%s', shortfn(parents[1]))
1143 1147 fm.plain(b'\n')
1144 1148
1145 1149 fm.end()
1146 1150
1147 1151 @command('debugindexdot', cmdutil.debugrevlogopts,
1148 1152 _('-c|-m|FILE'), optionalrepo=True)
1149 1153 def debugindexdot(ui, repo, file_=None, **opts):
1150 1154 """dump an index DAG as a graphviz dot file"""
1151 1155 opts = pycompat.byteskwargs(opts)
1152 1156 r = cmdutil.openstorage(repo, 'debugindexdot', file_, opts)
1153 1157 ui.write(("digraph G {\n"))
1154 1158 for i in r:
1155 1159 node = r.node(i)
1156 1160 pp = r.parents(node)
1157 1161 ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
1158 1162 if pp[1] != nullid:
1159 1163 ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
1160 1164 ui.write("}\n")
1161 1165
1162 1166 @command('debuginstall', [] + cmdutil.formatteropts, '', norepo=True)
1163 1167 def debuginstall(ui, **opts):
1164 1168 '''test Mercurial installation
1165 1169
1166 1170 Returns 0 on success.
1167 1171 '''
1168 1172 opts = pycompat.byteskwargs(opts)
1169 1173
1170 1174 def writetemp(contents):
1171 1175 (fd, name) = pycompat.mkstemp(prefix="hg-debuginstall-")
1172 1176 f = os.fdopen(fd, r"wb")
1173 1177 f.write(contents)
1174 1178 f.close()
1175 1179 return name
1176 1180
1177 1181 problems = 0
1178 1182
1179 1183 fm = ui.formatter('debuginstall', opts)
1180 1184 fm.startitem()
1181 1185
1182 1186 # encoding
1183 1187 fm.write('encoding', _("checking encoding (%s)...\n"), encoding.encoding)
1184 1188 err = None
1185 1189 try:
1186 1190 codecs.lookup(pycompat.sysstr(encoding.encoding))
1187 1191 except LookupError as inst:
1188 1192 err = stringutil.forcebytestr(inst)
1189 1193 problems += 1
1190 1194 fm.condwrite(err, 'encodingerror', _(" %s\n"
1191 1195 " (check that your locale is properly set)\n"), err)
1192 1196
1193 1197 # Python
1194 1198 fm.write('pythonexe', _("checking Python executable (%s)\n"),
1195 1199 pycompat.sysexecutable)
1196 1200 fm.write('pythonver', _("checking Python version (%s)\n"),
1197 1201 ("%d.%d.%d" % sys.version_info[:3]))
1198 1202 fm.write('pythonlib', _("checking Python lib (%s)...\n"),
1199 1203 os.path.dirname(pycompat.fsencode(os.__file__)))
1200 1204
1201 1205 security = set(sslutil.supportedprotocols)
1202 1206 if sslutil.hassni:
1203 1207 security.add('sni')
1204 1208
1205 1209 fm.write('pythonsecurity', _("checking Python security support (%s)\n"),
1206 1210 fm.formatlist(sorted(security), name='protocol',
1207 1211 fmt='%s', sep=','))
1208 1212
1209 1213 # These are warnings, not errors. So don't increment problem count. This
1210 1214 # may change in the future.
1211 1215 if 'tls1.2' not in security:
1212 1216 fm.plain(_(' TLS 1.2 not supported by Python install; '
1213 1217 'network connections lack modern security\n'))
1214 1218 if 'sni' not in security:
1215 1219 fm.plain(_(' SNI not supported by Python install; may have '
1216 1220 'connectivity issues with some servers\n'))
1217 1221
1218 1222 # TODO print CA cert info
1219 1223
1220 1224 # hg version
1221 1225 hgver = util.version()
1222 1226 fm.write('hgver', _("checking Mercurial version (%s)\n"),
1223 1227 hgver.split('+')[0])
1224 1228 fm.write('hgverextra', _("checking Mercurial custom build (%s)\n"),
1225 1229 '+'.join(hgver.split('+')[1:]))
1226 1230
1227 1231 # compiled modules
1228 1232 fm.write('hgmodulepolicy', _("checking module policy (%s)\n"),
1229 1233 policy.policy)
1230 1234 fm.write('hgmodules', _("checking installed modules (%s)...\n"),
1231 1235 os.path.dirname(pycompat.fsencode(__file__)))
1232 1236
1233 1237 if policy.policy in ('c', 'allow'):
1234 1238 err = None
1235 1239 try:
1236 1240 from .cext import (
1237 1241 base85,
1238 1242 bdiff,
1239 1243 mpatch,
1240 1244 osutil,
1241 1245 )
1242 1246 dir(bdiff), dir(mpatch), dir(base85), dir(osutil) # quiet pyflakes
1243 1247 except Exception as inst:
1244 1248 err = stringutil.forcebytestr(inst)
1245 1249 problems += 1
1246 1250 fm.condwrite(err, 'extensionserror', " %s\n", err)
1247 1251
1248 1252 compengines = util.compengines._engines.values()
1249 1253 fm.write('compengines', _('checking registered compression engines (%s)\n'),
1250 1254 fm.formatlist(sorted(e.name() for e in compengines),
1251 1255 name='compengine', fmt='%s', sep=', '))
1252 1256 fm.write('compenginesavail', _('checking available compression engines '
1253 1257 '(%s)\n'),
1254 1258 fm.formatlist(sorted(e.name() for e in compengines
1255 1259 if e.available()),
1256 1260 name='compengine', fmt='%s', sep=', '))
1257 1261 wirecompengines = util.compengines.supportedwireengines(util.SERVERROLE)
1258 1262 fm.write('compenginesserver', _('checking available compression engines '
1259 1263 'for wire protocol (%s)\n'),
1260 1264 fm.formatlist([e.name() for e in wirecompengines
1261 1265 if e.wireprotosupport()],
1262 1266 name='compengine', fmt='%s', sep=', '))
1263 1267 re2 = 'missing'
1264 1268 if util._re2:
1265 1269 re2 = 'available'
1266 1270 fm.plain(_('checking "re2" regexp engine (%s)\n') % re2)
1267 1271 fm.data(re2=bool(util._re2))
1268 1272
1269 1273 # templates
1270 1274 p = templater.templatepaths()
1271 1275 fm.write('templatedirs', 'checking templates (%s)...\n', ' '.join(p))
1272 1276 fm.condwrite(not p, '', _(" no template directories found\n"))
1273 1277 if p:
1274 1278 m = templater.templatepath("map-cmdline.default")
1275 1279 if m:
1276 1280 # template found, check if it is working
1277 1281 err = None
1278 1282 try:
1279 1283 templater.templater.frommapfile(m)
1280 1284 except Exception as inst:
1281 1285 err = stringutil.forcebytestr(inst)
1282 1286 p = None
1283 1287 fm.condwrite(err, 'defaulttemplateerror', " %s\n", err)
1284 1288 else:
1285 1289 p = None
1286 1290 fm.condwrite(p, 'defaulttemplate',
1287 1291 _("checking default template (%s)\n"), m)
1288 1292 fm.condwrite(not m, 'defaulttemplatenotfound',
1289 1293 _(" template '%s' not found\n"), "default")
1290 1294 if not p:
1291 1295 problems += 1
1292 1296 fm.condwrite(not p, '',
1293 1297 _(" (templates seem to have been installed incorrectly)\n"))
1294 1298
1295 1299 # editor
1296 1300 editor = ui.geteditor()
1297 1301 editor = util.expandpath(editor)
1298 1302 editorbin = procutil.shellsplit(editor)[0]
1299 1303 fm.write('editor', _("checking commit editor... (%s)\n"), editorbin)
1300 1304 cmdpath = procutil.findexe(editorbin)
1301 1305 fm.condwrite(not cmdpath and editor == 'vi', 'vinotfound',
1302 1306 _(" No commit editor set and can't find %s in PATH\n"
1303 1307 " (specify a commit editor in your configuration"
1304 1308 " file)\n"), not cmdpath and editor == 'vi' and editorbin)
1305 1309 fm.condwrite(not cmdpath and editor != 'vi', 'editornotfound',
1306 1310 _(" Can't find editor '%s' in PATH\n"
1307 1311 " (specify a commit editor in your configuration"
1308 1312 " file)\n"), not cmdpath and editorbin)
1309 1313 if not cmdpath and editor != 'vi':
1310 1314 problems += 1
1311 1315
1312 1316 # check username
1313 1317 username = None
1314 1318 err = None
1315 1319 try:
1316 1320 username = ui.username()
1317 1321 except error.Abort as e:
1318 1322 err = stringutil.forcebytestr(e)
1319 1323 problems += 1
1320 1324
1321 1325 fm.condwrite(username, 'username', _("checking username (%s)\n"), username)
1322 1326 fm.condwrite(err, 'usernameerror', _("checking username...\n %s\n"
1323 1327 " (specify a username in your configuration file)\n"), err)
1324 1328
1325 1329 fm.condwrite(not problems, '',
1326 1330 _("no problems detected\n"))
1327 1331 if not problems:
1328 1332 fm.data(problems=problems)
1329 1333 fm.condwrite(problems, 'problems',
1330 1334 _("%d problems detected,"
1331 1335 " please check your install!\n"), problems)
1332 1336 fm.end()
1333 1337
1334 1338 return problems
1335 1339
1336 1340 @command('debugknown', [], _('REPO ID...'), norepo=True)
1337 1341 def debugknown(ui, repopath, *ids, **opts):
1338 1342 """test whether node ids are known to a repo
1339 1343
1340 1344 Every ID must be a full-length hex node id string. Returns a list of 0s
1341 1345 and 1s indicating unknown/known.
1342 1346 """
1343 1347 opts = pycompat.byteskwargs(opts)
1344 1348 repo = hg.peer(ui, opts, repopath)
1345 1349 if not repo.capable('known'):
1346 1350 raise error.Abort("known() not supported by target repository")
1347 1351 flags = repo.known([bin(s) for s in ids])
1348 1352 ui.write("%s\n" % ("".join([f and "1" or "0" for f in flags])))
1349 1353
1350 1354 @command('debuglabelcomplete', [], _('LABEL...'))
1351 1355 def debuglabelcomplete(ui, repo, *args):
1352 1356 '''backwards compatibility with old bash completion scripts (DEPRECATED)'''
1353 1357 debugnamecomplete(ui, repo, *args)
1354 1358
1355 1359 @command('debuglocks',
1356 1360 [('L', 'force-lock', None, _('free the store lock (DANGEROUS)')),
1357 1361 ('W', 'force-wlock', None,
1358 1362 _('free the working state lock (DANGEROUS)')),
1359 1363 ('s', 'set-lock', None, _('set the store lock until stopped')),
1360 1364 ('S', 'set-wlock', None,
1361 1365 _('set the working state lock until stopped'))],
1362 1366 _('[OPTION]...'))
1363 1367 def debuglocks(ui, repo, **opts):
1364 1368 """show or modify state of locks
1365 1369
1366 1370 By default, this command will show which locks are held. This
1367 1371 includes the user and process holding the lock, the amount of time
1368 1372 the lock has been held, and the machine name where the process is
1369 1373 running if it's not local.
1370 1374
1371 1375 Locks protect the integrity of Mercurial's data, so should be
1372 1376 treated with care. System crashes or other interruptions may cause
1373 1377 locks to not be properly released, though Mercurial will usually
1374 1378 detect and remove such stale locks automatically.
1375 1379
1376 1380 However, detecting stale locks may not always be possible (for
1377 1381 instance, on a shared filesystem). Removing locks may also be
1378 1382 blocked by filesystem permissions.
1379 1383
1380 1384 Setting a lock will prevent other commands from changing the data.
1381 1385 The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs.
1382 1386 The set locks are removed when the command exits.
1383 1387
1384 1388 Returns 0 if no locks are held.
1385 1389
1386 1390 """
1387 1391
1388 1392 if opts.get(r'force_lock'):
1389 1393 repo.svfs.unlink('lock')
1390 1394 if opts.get(r'force_wlock'):
1391 1395 repo.vfs.unlink('wlock')
1392 1396 if opts.get(r'force_lock') or opts.get(r'force_wlock'):
1393 1397 return 0
1394 1398
1395 1399 locks = []
1396 1400 try:
1397 1401 if opts.get(r'set_wlock'):
1398 1402 try:
1399 1403 locks.append(repo.wlock(False))
1400 1404 except error.LockHeld:
1401 1405 raise error.Abort(_('wlock is already held'))
1402 1406 if opts.get(r'set_lock'):
1403 1407 try:
1404 1408 locks.append(repo.lock(False))
1405 1409 except error.LockHeld:
1406 1410 raise error.Abort(_('lock is already held'))
1407 1411 if len(locks):
1408 1412 ui.promptchoice(_("ready to release the lock (y)? $$ &Yes"))
1409 1413 return 0
1410 1414 finally:
1411 1415 release(*locks)
1412 1416
1413 1417 now = time.time()
1414 1418 held = 0
1415 1419
1416 1420 def report(vfs, name, method):
1417 1421 # this causes stale locks to get reaped for more accurate reporting
1418 1422 try:
1419 1423 l = method(False)
1420 1424 except error.LockHeld:
1421 1425 l = None
1422 1426
1423 1427 if l:
1424 1428 l.release()
1425 1429 else:
1426 1430 try:
1427 1431 st = vfs.lstat(name)
1428 1432 age = now - st[stat.ST_MTIME]
1429 1433 user = util.username(st.st_uid)
1430 1434 locker = vfs.readlock(name)
1431 1435 if ":" in locker:
1432 1436 host, pid = locker.split(':')
1433 1437 if host == socket.gethostname():
1434 1438 locker = 'user %s, process %s' % (user, pid)
1435 1439 else:
1436 1440 locker = 'user %s, process %s, host %s' \
1437 1441 % (user, pid, host)
1438 1442 ui.write(("%-6s %s (%ds)\n") % (name + ":", locker, age))
1439 1443 return 1
1440 1444 except OSError as e:
1441 1445 if e.errno != errno.ENOENT:
1442 1446 raise
1443 1447
1444 1448 ui.write(("%-6s free\n") % (name + ":"))
1445 1449 return 0
1446 1450
1447 1451 held += report(repo.svfs, "lock", repo.lock)
1448 1452 held += report(repo.vfs, "wlock", repo.wlock)
1449 1453
1450 1454 return held
1451 1455
1452 1456 @command('debugmanifestfulltextcache', [
1453 1457 ('', 'clear', False, _('clear the cache')),
1454 1458 ('a', 'add', '', _('add the given manifest node to the cache'),
1455 1459 _('NODE'))
1456 1460 ], '')
1457 1461 def debugmanifestfulltextcache(ui, repo, add=None, **opts):
1458 1462 """show, clear or amend the contents of the manifest fulltext cache"""
1459 1463 with repo.lock():
1460 1464 r = repo.manifestlog.getstorage(b'')
1461 1465 try:
1462 1466 cache = r._fulltextcache
1463 1467 except AttributeError:
1464 1468 ui.warn(_(
1465 1469 "Current revlog implementation doesn't appear to have a "
1466 1470 'manifest fulltext cache\n'))
1467 1471 return
1468 1472
1469 1473 if opts.get(r'clear'):
1470 1474 cache.clear()
1471 1475
1472 1476 if add:
1473 1477 try:
1474 1478 manifest = repo.manifestlog[r.lookup(add)]
1475 1479 except error.LookupError as e:
1476 1480 raise error.Abort(e, hint="Check your manifest node id")
1477 1481 manifest.read() # stores revisision in cache too
1478 1482
1479 1483 if not len(cache):
1480 1484 ui.write(_('Cache empty'))
1481 1485 else:
1482 1486 ui.write(
1483 1487 _('Cache contains %d manifest entries, in order of most to '
1484 1488 'least recent:\n') % (len(cache),))
1485 1489 totalsize = 0
1486 1490 for nodeid in cache:
1487 1491 # Use cache.get to not update the LRU order
1488 1492 data = cache.get(nodeid)
1489 1493 size = len(data)
1490 1494 totalsize += size + 24 # 20 bytes nodeid, 4 bytes size
1491 1495 ui.write(_('id: %s, size %s\n') % (
1492 1496 hex(nodeid), util.bytecount(size)))
1493 1497 ondisk = cache._opener.stat('manifestfulltextcache').st_size
1494 1498 ui.write(
1495 1499 _('Total cache data size %s, on-disk %s\n') % (
1496 1500 util.bytecount(totalsize), util.bytecount(ondisk))
1497 1501 )
1498 1502
1499 1503 @command('debugmergestate', [], '')
1500 1504 def debugmergestate(ui, repo, *args):
1501 1505 """print merge state
1502 1506
1503 1507 Use --verbose to print out information about whether v1 or v2 merge state
1504 1508 was chosen."""
1505 1509 def _hashornull(h):
1506 1510 if h == nullhex:
1507 1511 return 'null'
1508 1512 else:
1509 1513 return h
1510 1514
1511 1515 def printrecords(version):
1512 1516 ui.write(('* version %d records\n') % version)
1513 1517 if version == 1:
1514 1518 records = v1records
1515 1519 else:
1516 1520 records = v2records
1517 1521
1518 1522 for rtype, record in records:
1519 1523 # pretty print some record types
1520 1524 if rtype == 'L':
1521 1525 ui.write(('local: %s\n') % record)
1522 1526 elif rtype == 'O':
1523 1527 ui.write(('other: %s\n') % record)
1524 1528 elif rtype == 'm':
1525 1529 driver, mdstate = record.split('\0', 1)
1526 1530 ui.write(('merge driver: %s (state "%s")\n')
1527 1531 % (driver, mdstate))
1528 1532 elif rtype in 'FDC':
1529 1533 r = record.split('\0')
1530 1534 f, state, hash, lfile, afile, anode, ofile = r[0:7]
1531 1535 if version == 1:
1532 1536 onode = 'not stored in v1 format'
1533 1537 flags = r[7]
1534 1538 else:
1535 1539 onode, flags = r[7:9]
1536 1540 ui.write(('file: %s (record type "%s", state "%s", hash %s)\n')
1537 1541 % (f, rtype, state, _hashornull(hash)))
1538 1542 ui.write((' local path: %s (flags "%s")\n') % (lfile, flags))
1539 1543 ui.write((' ancestor path: %s (node %s)\n')
1540 1544 % (afile, _hashornull(anode)))
1541 1545 ui.write((' other path: %s (node %s)\n')
1542 1546 % (ofile, _hashornull(onode)))
1543 1547 elif rtype == 'f':
1544 1548 filename, rawextras = record.split('\0', 1)
1545 1549 extras = rawextras.split('\0')
1546 1550 i = 0
1547 1551 extrastrings = []
1548 1552 while i < len(extras):
1549 1553 extrastrings.append('%s = %s' % (extras[i], extras[i + 1]))
1550 1554 i += 2
1551 1555
1552 1556 ui.write(('file extras: %s (%s)\n')
1553 1557 % (filename, ', '.join(extrastrings)))
1554 1558 elif rtype == 'l':
1555 1559 labels = record.split('\0', 2)
1556 1560 labels = [l for l in labels if len(l) > 0]
1557 1561 ui.write(('labels:\n'))
1558 1562 ui.write((' local: %s\n' % labels[0]))
1559 1563 ui.write((' other: %s\n' % labels[1]))
1560 1564 if len(labels) > 2:
1561 1565 ui.write((' base: %s\n' % labels[2]))
1562 1566 else:
1563 1567 ui.write(('unrecognized entry: %s\t%s\n')
1564 1568 % (rtype, record.replace('\0', '\t')))
1565 1569
1566 1570 # Avoid mergestate.read() since it may raise an exception for unsupported
1567 1571 # merge state records. We shouldn't be doing this, but this is OK since this
1568 1572 # command is pretty low-level.
1569 1573 ms = mergemod.mergestate(repo)
1570 1574
1571 1575 # sort so that reasonable information is on top
1572 1576 v1records = ms._readrecordsv1()
1573 1577 v2records = ms._readrecordsv2()
1574 1578 order = 'LOml'
1575 1579 def key(r):
1576 1580 idx = order.find(r[0])
1577 1581 if idx == -1:
1578 1582 return (1, r[1])
1579 1583 else:
1580 1584 return (0, idx)
1581 1585 v1records.sort(key=key)
1582 1586 v2records.sort(key=key)
1583 1587
1584 1588 if not v1records and not v2records:
1585 1589 ui.write(('no merge state found\n'))
1586 1590 elif not v2records:
1587 1591 ui.note(('no version 2 merge state\n'))
1588 1592 printrecords(1)
1589 1593 elif ms._v1v2match(v1records, v2records):
1590 1594 ui.note(('v1 and v2 states match: using v2\n'))
1591 1595 printrecords(2)
1592 1596 else:
1593 1597 ui.note(('v1 and v2 states mismatch: using v1\n'))
1594 1598 printrecords(1)
1595 1599 if ui.verbose:
1596 1600 printrecords(2)
1597 1601
1598 1602 @command('debugnamecomplete', [], _('NAME...'))
1599 1603 def debugnamecomplete(ui, repo, *args):
1600 1604 '''complete "names" - tags, open branch names, bookmark names'''
1601 1605
1602 1606 names = set()
1603 1607 # since we previously only listed open branches, we will handle that
1604 1608 # specially (after this for loop)
1605 1609 for name, ns in repo.names.iteritems():
1606 1610 if name != 'branches':
1607 1611 names.update(ns.listnames(repo))
1608 1612 names.update(tag for (tag, heads, tip, closed)
1609 1613 in repo.branchmap().iterbranches() if not closed)
1610 1614 completions = set()
1611 1615 if not args:
1612 1616 args = ['']
1613 1617 for a in args:
1614 1618 completions.update(n for n in names if n.startswith(a))
1615 1619 ui.write('\n'.join(sorted(completions)))
1616 1620 ui.write('\n')
1617 1621
1618 1622 @command('debugobsolete',
1619 1623 [('', 'flags', 0, _('markers flag')),
1620 1624 ('', 'record-parents', False,
1621 1625 _('record parent information for the precursor')),
1622 1626 ('r', 'rev', [], _('display markers relevant to REV')),
1623 1627 ('', 'exclusive', False, _('restrict display to markers only '
1624 1628 'relevant to REV')),
1625 1629 ('', 'index', False, _('display index of the marker')),
1626 1630 ('', 'delete', [], _('delete markers specified by indices')),
1627 1631 ] + cmdutil.commitopts2 + cmdutil.formatteropts,
1628 1632 _('[OBSOLETED [REPLACEMENT ...]]'))
1629 1633 def debugobsolete(ui, repo, precursor=None, *successors, **opts):
1630 1634 """create arbitrary obsolete marker
1631 1635
1632 1636 With no arguments, displays the list of obsolescence markers."""
1633 1637
1634 1638 opts = pycompat.byteskwargs(opts)
1635 1639
1636 1640 def parsenodeid(s):
1637 1641 try:
1638 1642 # We do not use revsingle/revrange functions here to accept
1639 1643 # arbitrary node identifiers, possibly not present in the
1640 1644 # local repository.
1641 1645 n = bin(s)
1642 1646 if len(n) != len(nullid):
1643 1647 raise TypeError()
1644 1648 return n
1645 1649 except TypeError:
1646 1650 raise error.Abort('changeset references must be full hexadecimal '
1647 1651 'node identifiers')
1648 1652
1649 1653 if opts.get('delete'):
1650 1654 indices = []
1651 1655 for v in opts.get('delete'):
1652 1656 try:
1653 1657 indices.append(int(v))
1654 1658 except ValueError:
1655 1659 raise error.Abort(_('invalid index value: %r') % v,
1656 1660 hint=_('use integers for indices'))
1657 1661
1658 1662 if repo.currenttransaction():
1659 1663 raise error.Abort(_('cannot delete obsmarkers in the middle '
1660 1664 'of transaction.'))
1661 1665
1662 1666 with repo.lock():
1663 1667 n = repair.deleteobsmarkers(repo.obsstore, indices)
1664 1668 ui.write(_('deleted %i obsolescence markers\n') % n)
1665 1669
1666 1670 return
1667 1671
1668 1672 if precursor is not None:
1669 1673 if opts['rev']:
1670 1674 raise error.Abort('cannot select revision when creating marker')
1671 1675 metadata = {}
1672 1676 metadata['user'] = encoding.fromlocal(opts['user'] or ui.username())
1673 1677 succs = tuple(parsenodeid(succ) for succ in successors)
1674 1678 l = repo.lock()
1675 1679 try:
1676 1680 tr = repo.transaction('debugobsolete')
1677 1681 try:
1678 1682 date = opts.get('date')
1679 1683 if date:
1680 1684 date = dateutil.parsedate(date)
1681 1685 else:
1682 1686 date = None
1683 1687 prec = parsenodeid(precursor)
1684 1688 parents = None
1685 1689 if opts['record_parents']:
1686 1690 if prec not in repo.unfiltered():
1687 1691 raise error.Abort('cannot used --record-parents on '
1688 1692 'unknown changesets')
1689 1693 parents = repo.unfiltered()[prec].parents()
1690 1694 parents = tuple(p.node() for p in parents)
1691 1695 repo.obsstore.create(tr, prec, succs, opts['flags'],
1692 1696 parents=parents, date=date,
1693 1697 metadata=metadata, ui=ui)
1694 1698 tr.close()
1695 1699 except ValueError as exc:
1696 1700 raise error.Abort(_('bad obsmarker input: %s') %
1697 1701 pycompat.bytestr(exc))
1698 1702 finally:
1699 1703 tr.release()
1700 1704 finally:
1701 1705 l.release()
1702 1706 else:
1703 1707 if opts['rev']:
1704 1708 revs = scmutil.revrange(repo, opts['rev'])
1705 1709 nodes = [repo[r].node() for r in revs]
1706 1710 markers = list(obsutil.getmarkers(repo, nodes=nodes,
1707 1711 exclusive=opts['exclusive']))
1708 1712 markers.sort(key=lambda x: x._data)
1709 1713 else:
1710 1714 markers = obsutil.getmarkers(repo)
1711 1715
1712 1716 markerstoiter = markers
1713 1717 isrelevant = lambda m: True
1714 1718 if opts.get('rev') and opts.get('index'):
1715 1719 markerstoiter = obsutil.getmarkers(repo)
1716 1720 markerset = set(markers)
1717 1721 isrelevant = lambda m: m in markerset
1718 1722
1719 1723 fm = ui.formatter('debugobsolete', opts)
1720 1724 for i, m in enumerate(markerstoiter):
1721 1725 if not isrelevant(m):
1722 1726 # marker can be irrelevant when we're iterating over a set
1723 1727 # of markers (markerstoiter) which is bigger than the set
1724 1728 # of markers we want to display (markers)
1725 1729 # this can happen if both --index and --rev options are
1726 1730 # provided and thus we need to iterate over all of the markers
1727 1731 # to get the correct indices, but only display the ones that
1728 1732 # are relevant to --rev value
1729 1733 continue
1730 1734 fm.startitem()
1731 1735 ind = i if opts.get('index') else None
1732 1736 cmdutil.showmarker(fm, m, index=ind)
1733 1737 fm.end()
1734 1738
1735 1739 @command('debugpathcomplete',
1736 1740 [('f', 'full', None, _('complete an entire path')),
1737 1741 ('n', 'normal', None, _('show only normal files')),
1738 1742 ('a', 'added', None, _('show only added files')),
1739 1743 ('r', 'removed', None, _('show only removed files'))],
1740 1744 _('FILESPEC...'))
1741 1745 def debugpathcomplete(ui, repo, *specs, **opts):
1742 1746 '''complete part or all of a tracked path
1743 1747
1744 1748 This command supports shells that offer path name completion. It
1745 1749 currently completes only files already known to the dirstate.
1746 1750
1747 1751 Completion extends only to the next path segment unless
1748 1752 --full is specified, in which case entire paths are used.'''
1749 1753
1750 1754 def complete(path, acceptable):
1751 1755 dirstate = repo.dirstate
1752 1756 spec = os.path.normpath(os.path.join(pycompat.getcwd(), path))
1753 1757 rootdir = repo.root + pycompat.ossep
1754 1758 if spec != repo.root and not spec.startswith(rootdir):
1755 1759 return [], []
1756 1760 if os.path.isdir(spec):
1757 1761 spec += '/'
1758 1762 spec = spec[len(rootdir):]
1759 1763 fixpaths = pycompat.ossep != '/'
1760 1764 if fixpaths:
1761 1765 spec = spec.replace(pycompat.ossep, '/')
1762 1766 speclen = len(spec)
1763 1767 fullpaths = opts[r'full']
1764 1768 files, dirs = set(), set()
1765 1769 adddir, addfile = dirs.add, files.add
1766 1770 for f, st in dirstate.iteritems():
1767 1771 if f.startswith(spec) and st[0] in acceptable:
1768 1772 if fixpaths:
1769 1773 f = f.replace('/', pycompat.ossep)
1770 1774 if fullpaths:
1771 1775 addfile(f)
1772 1776 continue
1773 1777 s = f.find(pycompat.ossep, speclen)
1774 1778 if s >= 0:
1775 1779 adddir(f[:s])
1776 1780 else:
1777 1781 addfile(f)
1778 1782 return files, dirs
1779 1783
1780 1784 acceptable = ''
1781 1785 if opts[r'normal']:
1782 1786 acceptable += 'nm'
1783 1787 if opts[r'added']:
1784 1788 acceptable += 'a'
1785 1789 if opts[r'removed']:
1786 1790 acceptable += 'r'
1787 1791 cwd = repo.getcwd()
1788 1792 if not specs:
1789 1793 specs = ['.']
1790 1794
1791 1795 files, dirs = set(), set()
1792 1796 for spec in specs:
1793 1797 f, d = complete(spec, acceptable or 'nmar')
1794 1798 files.update(f)
1795 1799 dirs.update(d)
1796 1800 files.update(dirs)
1797 1801 ui.write('\n'.join(repo.pathto(p, cwd) for p in sorted(files)))
1798 1802 ui.write('\n')
1799 1803
1800 1804 @command('debugpeer', [], _('PATH'), norepo=True)
1801 1805 def debugpeer(ui, path):
1802 1806 """establish a connection to a peer repository"""
1803 1807 # Always enable peer request logging. Requires --debug to display
1804 1808 # though.
1805 1809 overrides = {
1806 1810 ('devel', 'debug.peer-request'): True,
1807 1811 }
1808 1812
1809 1813 with ui.configoverride(overrides):
1810 1814 peer = hg.peer(ui, {}, path)
1811 1815
1812 1816 local = peer.local() is not None
1813 1817 canpush = peer.canpush()
1814 1818
1815 1819 ui.write(_('url: %s\n') % peer.url())
1816 1820 ui.write(_('local: %s\n') % (_('yes') if local else _('no')))
1817 1821 ui.write(_('pushable: %s\n') % (_('yes') if canpush else _('no')))
1818 1822
1819 1823 @command('debugpickmergetool',
1820 1824 [('r', 'rev', '', _('check for files in this revision'), _('REV')),
1821 1825 ('', 'changedelete', None, _('emulate merging change and delete')),
1822 1826 ] + cmdutil.walkopts + cmdutil.mergetoolopts,
1823 1827 _('[PATTERN]...'),
1824 1828 inferrepo=True)
1825 1829 def debugpickmergetool(ui, repo, *pats, **opts):
1826 1830 """examine which merge tool is chosen for specified file
1827 1831
1828 1832 As described in :hg:`help merge-tools`, Mercurial examines
1829 1833 configurations below in this order to decide which merge tool is
1830 1834 chosen for specified file.
1831 1835
1832 1836 1. ``--tool`` option
1833 1837 2. ``HGMERGE`` environment variable
1834 1838 3. configurations in ``merge-patterns`` section
1835 1839 4. configuration of ``ui.merge``
1836 1840 5. configurations in ``merge-tools`` section
1837 1841 6. ``hgmerge`` tool (for historical reason only)
1838 1842 7. default tool for fallback (``:merge`` or ``:prompt``)
1839 1843
1840 1844 This command writes out examination result in the style below::
1841 1845
1842 1846 FILE = MERGETOOL
1843 1847
1844 1848 By default, all files known in the first parent context of the
1845 1849 working directory are examined. Use file patterns and/or -I/-X
1846 1850 options to limit target files. -r/--rev is also useful to examine
1847 1851 files in another context without actual updating to it.
1848 1852
1849 1853 With --debug, this command shows warning messages while matching
1850 1854 against ``merge-patterns`` and so on, too. It is recommended to
1851 1855 use this option with explicit file patterns and/or -I/-X options,
1852 1856 because this option increases amount of output per file according
1853 1857 to configurations in hgrc.
1854 1858
1855 1859 With -v/--verbose, this command shows configurations below at
1856 1860 first (only if specified).
1857 1861
1858 1862 - ``--tool`` option
1859 1863 - ``HGMERGE`` environment variable
1860 1864 - configuration of ``ui.merge``
1861 1865
1862 1866 If merge tool is chosen before matching against
1863 1867 ``merge-patterns``, this command can't show any helpful
1864 1868 information, even with --debug. In such case, information above is
1865 1869 useful to know why a merge tool is chosen.
1866 1870 """
1867 1871 opts = pycompat.byteskwargs(opts)
1868 1872 overrides = {}
1869 1873 if opts['tool']:
1870 1874 overrides[('ui', 'forcemerge')] = opts['tool']
1871 1875 ui.note(('with --tool %r\n') % (pycompat.bytestr(opts['tool'])))
1872 1876
1873 1877 with ui.configoverride(overrides, 'debugmergepatterns'):
1874 1878 hgmerge = encoding.environ.get("HGMERGE")
1875 1879 if hgmerge is not None:
1876 1880 ui.note(('with HGMERGE=%r\n') % (pycompat.bytestr(hgmerge)))
1877 1881 uimerge = ui.config("ui", "merge")
1878 1882 if uimerge:
1879 1883 ui.note(('with ui.merge=%r\n') % (pycompat.bytestr(uimerge)))
1880 1884
1881 1885 ctx = scmutil.revsingle(repo, opts.get('rev'))
1882 1886 m = scmutil.match(ctx, pats, opts)
1883 1887 changedelete = opts['changedelete']
1884 1888 for path in ctx.walk(m):
1885 1889 fctx = ctx[path]
1886 1890 try:
1887 1891 if not ui.debugflag:
1888 1892 ui.pushbuffer(error=True)
1889 1893 tool, toolpath = filemerge._picktool(repo, ui, path,
1890 1894 fctx.isbinary(),
1891 1895 'l' in fctx.flags(),
1892 1896 changedelete)
1893 1897 finally:
1894 1898 if not ui.debugflag:
1895 1899 ui.popbuffer()
1896 1900 ui.write(('%s = %s\n') % (path, tool))
1897 1901
1898 1902 @command('debugpushkey', [], _('REPO NAMESPACE [KEY OLD NEW]'), norepo=True)
1899 1903 def debugpushkey(ui, repopath, namespace, *keyinfo, **opts):
1900 1904 '''access the pushkey key/value protocol
1901 1905
1902 1906 With two args, list the keys in the given namespace.
1903 1907
1904 1908 With five args, set a key to new if it currently is set to old.
1905 1909 Reports success or failure.
1906 1910 '''
1907 1911
1908 1912 target = hg.peer(ui, {}, repopath)
1909 1913 if keyinfo:
1910 1914 key, old, new = keyinfo
1911 1915 with target.commandexecutor() as e:
1912 1916 r = e.callcommand('pushkey', {
1913 1917 'namespace': namespace,
1914 1918 'key': key,
1915 1919 'old': old,
1916 1920 'new': new,
1917 1921 }).result()
1918 1922
1919 1923 ui.status(pycompat.bytestr(r) + '\n')
1920 1924 return not r
1921 1925 else:
1922 1926 for k, v in sorted(target.listkeys(namespace).iteritems()):
1923 1927 ui.write("%s\t%s\n" % (stringutil.escapestr(k),
1924 1928 stringutil.escapestr(v)))
1925 1929
1926 1930 @command('debugpvec', [], _('A B'))
1927 1931 def debugpvec(ui, repo, a, b=None):
1928 1932 ca = scmutil.revsingle(repo, a)
1929 1933 cb = scmutil.revsingle(repo, b)
1930 1934 pa = pvec.ctxpvec(ca)
1931 1935 pb = pvec.ctxpvec(cb)
1932 1936 if pa == pb:
1933 1937 rel = "="
1934 1938 elif pa > pb:
1935 1939 rel = ">"
1936 1940 elif pa < pb:
1937 1941 rel = "<"
1938 1942 elif pa | pb:
1939 1943 rel = "|"
1940 1944 ui.write(_("a: %s\n") % pa)
1941 1945 ui.write(_("b: %s\n") % pb)
1942 1946 ui.write(_("depth(a): %d depth(b): %d\n") % (pa._depth, pb._depth))
1943 1947 ui.write(_("delta: %d hdist: %d distance: %d relation: %s\n") %
1944 1948 (abs(pa._depth - pb._depth), pvec._hamming(pa._vec, pb._vec),
1945 1949 pa.distance(pb), rel))
1946 1950
1947 1951 @command('debugrebuilddirstate|debugrebuildstate',
1948 1952 [('r', 'rev', '', _('revision to rebuild to'), _('REV')),
1949 1953 ('', 'minimal', None, _('only rebuild files that are inconsistent with '
1950 1954 'the working copy parent')),
1951 1955 ],
1952 1956 _('[-r REV]'))
1953 1957 def debugrebuilddirstate(ui, repo, rev, **opts):
1954 1958 """rebuild the dirstate as it would look like for the given revision
1955 1959
1956 1960 If no revision is specified the first current parent will be used.
1957 1961
1958 1962 The dirstate will be set to the files of the given revision.
1959 1963 The actual working directory content or existing dirstate
1960 1964 information such as adds or removes is not considered.
1961 1965
1962 1966 ``minimal`` will only rebuild the dirstate status for files that claim to be
1963 1967 tracked but are not in the parent manifest, or that exist in the parent
1964 1968 manifest but are not in the dirstate. It will not change adds, removes, or
1965 1969 modified files that are in the working copy parent.
1966 1970
1967 1971 One use of this command is to make the next :hg:`status` invocation
1968 1972 check the actual file content.
1969 1973 """
1970 1974 ctx = scmutil.revsingle(repo, rev)
1971 1975 with repo.wlock():
1972 1976 dirstate = repo.dirstate
1973 1977 changedfiles = None
1974 1978 # See command doc for what minimal does.
1975 1979 if opts.get(r'minimal'):
1976 1980 manifestfiles = set(ctx.manifest().keys())
1977 1981 dirstatefiles = set(dirstate)
1978 1982 manifestonly = manifestfiles - dirstatefiles
1979 1983 dsonly = dirstatefiles - manifestfiles
1980 1984 dsnotadded = set(f for f in dsonly if dirstate[f] != 'a')
1981 1985 changedfiles = manifestonly | dsnotadded
1982 1986
1983 1987 dirstate.rebuild(ctx.node(), ctx.manifest(), changedfiles)
1984 1988
1985 1989 @command('debugrebuildfncache', [], '')
1986 1990 def debugrebuildfncache(ui, repo):
1987 1991 """rebuild the fncache file"""
1988 1992 repair.rebuildfncache(ui, repo)
1989 1993
1990 1994 @command('debugrename',
1991 1995 [('r', 'rev', '', _('revision to debug'), _('REV'))],
1992 1996 _('[-r REV] FILE'))
1993 1997 def debugrename(ui, repo, file1, *pats, **opts):
1994 1998 """dump rename information"""
1995 1999
1996 2000 opts = pycompat.byteskwargs(opts)
1997 2001 ctx = scmutil.revsingle(repo, opts.get('rev'))
1998 2002 m = scmutil.match(ctx, (file1,) + pats, opts)
1999 2003 for abs in ctx.walk(m):
2000 2004 fctx = ctx[abs]
2001 2005 o = fctx.filelog().renamed(fctx.filenode())
2002 2006 rel = m.rel(abs)
2003 2007 if o:
2004 2008 ui.write(_("%s renamed from %s:%s\n") % (rel, o[0], hex(o[1])))
2005 2009 else:
2006 2010 ui.write(_("%s not renamed\n") % rel)
2007 2011
2008 2012 @command('debugrevlog', cmdutil.debugrevlogopts +
2009 2013 [('d', 'dump', False, _('dump index data'))],
2010 2014 _('-c|-m|FILE'),
2011 2015 optionalrepo=True)
2012 2016 def debugrevlog(ui, repo, file_=None, **opts):
2013 2017 """show data and statistics about a revlog"""
2014 2018 opts = pycompat.byteskwargs(opts)
2015 2019 r = cmdutil.openrevlog(repo, 'debugrevlog', file_, opts)
2016 2020
2017 2021 if opts.get("dump"):
2018 2022 numrevs = len(r)
2019 2023 ui.write(("# rev p1rev p2rev start end deltastart base p1 p2"
2020 2024 " rawsize totalsize compression heads chainlen\n"))
2021 2025 ts = 0
2022 2026 heads = set()
2023 2027
2024 2028 for rev in pycompat.xrange(numrevs):
2025 2029 dbase = r.deltaparent(rev)
2026 2030 if dbase == -1:
2027 2031 dbase = rev
2028 2032 cbase = r.chainbase(rev)
2029 2033 clen = r.chainlen(rev)
2030 2034 p1, p2 = r.parentrevs(rev)
2031 2035 rs = r.rawsize(rev)
2032 2036 ts = ts + rs
2033 2037 heads -= set(r.parentrevs(rev))
2034 2038 heads.add(rev)
2035 2039 try:
2036 2040 compression = ts / r.end(rev)
2037 2041 except ZeroDivisionError:
2038 2042 compression = 0
2039 2043 ui.write("%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
2040 2044 "%11d %5d %8d\n" %
2041 2045 (rev, p1, p2, r.start(rev), r.end(rev),
2042 2046 r.start(dbase), r.start(cbase),
2043 2047 r.start(p1), r.start(p2),
2044 2048 rs, ts, compression, len(heads), clen))
2045 2049 return 0
2046 2050
2047 2051 v = r.version
2048 2052 format = v & 0xFFFF
2049 2053 flags = []
2050 2054 gdelta = False
2051 2055 if v & revlog.FLAG_INLINE_DATA:
2052 2056 flags.append('inline')
2053 2057 if v & revlog.FLAG_GENERALDELTA:
2054 2058 gdelta = True
2055 2059 flags.append('generaldelta')
2056 2060 if not flags:
2057 2061 flags = ['(none)']
2058 2062
2059 2063 ### tracks merge vs single parent
2060 2064 nummerges = 0
2061 2065
2062 2066 ### tracks ways the "delta" are build
2063 2067 # nodelta
2064 2068 numempty = 0
2065 2069 numemptytext = 0
2066 2070 numemptydelta = 0
2067 2071 # full file content
2068 2072 numfull = 0
2069 2073 # intermediate snapshot against a prior snapshot
2070 2074 numsemi = 0
2071 2075 # snapshot count per depth
2072 2076 numsnapdepth = collections.defaultdict(lambda: 0)
2073 2077 # delta against previous revision
2074 2078 numprev = 0
2075 2079 # delta against first or second parent (not prev)
2076 2080 nump1 = 0
2077 2081 nump2 = 0
2078 2082 # delta against neither prev nor parents
2079 2083 numother = 0
2080 2084 # delta against prev that are also first or second parent
2081 2085 # (details of `numprev`)
2082 2086 nump1prev = 0
2083 2087 nump2prev = 0
2084 2088
2085 2089 # data about delta chain of each revs
2086 2090 chainlengths = []
2087 2091 chainbases = []
2088 2092 chainspans = []
2089 2093
2090 2094 # data about each revision
2091 2095 datasize = [None, 0, 0]
2092 2096 fullsize = [None, 0, 0]
2093 2097 semisize = [None, 0, 0]
2094 2098 # snapshot count per depth
2095 2099 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
2096 2100 deltasize = [None, 0, 0]
2097 2101 chunktypecounts = {}
2098 2102 chunktypesizes = {}
2099 2103
2100 2104 def addsize(size, l):
2101 2105 if l[0] is None or size < l[0]:
2102 2106 l[0] = size
2103 2107 if size > l[1]:
2104 2108 l[1] = size
2105 2109 l[2] += size
2106 2110
2107 2111 numrevs = len(r)
2108 2112 for rev in pycompat.xrange(numrevs):
2109 2113 p1, p2 = r.parentrevs(rev)
2110 2114 delta = r.deltaparent(rev)
2111 2115 if format > 0:
2112 2116 addsize(r.rawsize(rev), datasize)
2113 2117 if p2 != nullrev:
2114 2118 nummerges += 1
2115 2119 size = r.length(rev)
2116 2120 if delta == nullrev:
2117 2121 chainlengths.append(0)
2118 2122 chainbases.append(r.start(rev))
2119 2123 chainspans.append(size)
2120 2124 if size == 0:
2121 2125 numempty += 1
2122 2126 numemptytext += 1
2123 2127 else:
2124 2128 numfull += 1
2125 2129 numsnapdepth[0] += 1
2126 2130 addsize(size, fullsize)
2127 2131 addsize(size, snapsizedepth[0])
2128 2132 else:
2129 2133 chainlengths.append(chainlengths[delta] + 1)
2130 2134 baseaddr = chainbases[delta]
2131 2135 revaddr = r.start(rev)
2132 2136 chainbases.append(baseaddr)
2133 2137 chainspans.append((revaddr - baseaddr) + size)
2134 2138 if size == 0:
2135 2139 numempty += 1
2136 2140 numemptydelta += 1
2137 2141 elif r.issnapshot(rev):
2138 2142 addsize(size, semisize)
2139 2143 numsemi += 1
2140 2144 depth = r.snapshotdepth(rev)
2141 2145 numsnapdepth[depth] += 1
2142 2146 addsize(size, snapsizedepth[depth])
2143 2147 else:
2144 2148 addsize(size, deltasize)
2145 2149 if delta == rev - 1:
2146 2150 numprev += 1
2147 2151 if delta == p1:
2148 2152 nump1prev += 1
2149 2153 elif delta == p2:
2150 2154 nump2prev += 1
2151 2155 elif delta == p1:
2152 2156 nump1 += 1
2153 2157 elif delta == p2:
2154 2158 nump2 += 1
2155 2159 elif delta != nullrev:
2156 2160 numother += 1
2157 2161
2158 2162 # Obtain data on the raw chunks in the revlog.
2159 2163 if util.safehasattr(r, '_getsegmentforrevs'):
2160 2164 segment = r._getsegmentforrevs(rev, rev)[1]
2161 2165 else:
2162 2166 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
2163 2167 if segment:
2164 2168 chunktype = bytes(segment[0:1])
2165 2169 else:
2166 2170 chunktype = 'empty'
2167 2171
2168 2172 if chunktype not in chunktypecounts:
2169 2173 chunktypecounts[chunktype] = 0
2170 2174 chunktypesizes[chunktype] = 0
2171 2175
2172 2176 chunktypecounts[chunktype] += 1
2173 2177 chunktypesizes[chunktype] += size
2174 2178
2175 2179 # Adjust size min value for empty cases
2176 2180 for size in (datasize, fullsize, semisize, deltasize):
2177 2181 if size[0] is None:
2178 2182 size[0] = 0
2179 2183
2180 2184 numdeltas = numrevs - numfull - numempty - numsemi
2181 2185 numoprev = numprev - nump1prev - nump2prev
2182 2186 totalrawsize = datasize[2]
2183 2187 datasize[2] /= numrevs
2184 2188 fulltotal = fullsize[2]
2185 2189 fullsize[2] /= numfull
2186 2190 semitotal = semisize[2]
2187 2191 snaptotal = {}
2188 2192 if 0 < numsemi:
2189 2193 semisize[2] /= numsemi
2190 2194 for depth in snapsizedepth:
2191 2195 snaptotal[depth] = snapsizedepth[depth][2]
2192 2196 snapsizedepth[depth][2] /= numsnapdepth[depth]
2193 2197
2194 2198 deltatotal = deltasize[2]
2195 2199 if numdeltas > 0:
2196 2200 deltasize[2] /= numdeltas
2197 2201 totalsize = fulltotal + semitotal + deltatotal
2198 2202 avgchainlen = sum(chainlengths) / numrevs
2199 2203 maxchainlen = max(chainlengths)
2200 2204 maxchainspan = max(chainspans)
2201 2205 compratio = 1
2202 2206 if totalsize:
2203 2207 compratio = totalrawsize / totalsize
2204 2208
2205 2209 basedfmtstr = '%%%dd\n'
2206 2210 basepcfmtstr = '%%%dd %s(%%5.2f%%%%)\n'
2207 2211
2208 2212 def dfmtstr(max):
2209 2213 return basedfmtstr % len(str(max))
2210 2214 def pcfmtstr(max, padding=0):
2211 2215 return basepcfmtstr % (len(str(max)), ' ' * padding)
2212 2216
2213 2217 def pcfmt(value, total):
2214 2218 if total:
2215 2219 return (value, 100 * float(value) / total)
2216 2220 else:
2217 2221 return value, 100.0
2218 2222
2219 2223 ui.write(('format : %d\n') % format)
2220 2224 ui.write(('flags : %s\n') % ', '.join(flags))
2221 2225
2222 2226 ui.write('\n')
2223 2227 fmt = pcfmtstr(totalsize)
2224 2228 fmt2 = dfmtstr(totalsize)
2225 2229 ui.write(('revisions : ') + fmt2 % numrevs)
2226 2230 ui.write((' merges : ') + fmt % pcfmt(nummerges, numrevs))
2227 2231 ui.write((' normal : ') + fmt % pcfmt(numrevs - nummerges, numrevs))
2228 2232 ui.write(('revisions : ') + fmt2 % numrevs)
2229 2233 ui.write((' empty : ') + fmt % pcfmt(numempty, numrevs))
2230 2234 ui.write((' text : ')
2231 2235 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta))
2232 2236 ui.write((' delta : ')
2233 2237 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta))
2234 2238 ui.write((' snapshot : ') + fmt % pcfmt(numfull + numsemi, numrevs))
2235 2239 for depth in sorted(numsnapdepth):
2236 2240 ui.write((' lvl-%-3d : ' % depth)
2237 2241 + fmt % pcfmt(numsnapdepth[depth], numrevs))
2238 2242 ui.write((' deltas : ') + fmt % pcfmt(numdeltas, numrevs))
2239 2243 ui.write(('revision size : ') + fmt2 % totalsize)
2240 2244 ui.write((' snapshot : ')
2241 2245 + fmt % pcfmt(fulltotal + semitotal, totalsize))
2242 2246 for depth in sorted(numsnapdepth):
2243 2247 ui.write((' lvl-%-3d : ' % depth)
2244 2248 + fmt % pcfmt(snaptotal[depth], totalsize))
2245 2249 ui.write((' deltas : ') + fmt % pcfmt(deltatotal, totalsize))
2246 2250
2247 2251 def fmtchunktype(chunktype):
2248 2252 if chunktype == 'empty':
2249 2253 return ' %s : ' % chunktype
2250 2254 elif chunktype in pycompat.bytestr(string.ascii_letters):
2251 2255 return ' 0x%s (%s) : ' % (hex(chunktype), chunktype)
2252 2256 else:
2253 2257 return ' 0x%s : ' % hex(chunktype)
2254 2258
2255 2259 ui.write('\n')
2256 2260 ui.write(('chunks : ') + fmt2 % numrevs)
2257 2261 for chunktype in sorted(chunktypecounts):
2258 2262 ui.write(fmtchunktype(chunktype))
2259 2263 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
2260 2264 ui.write(('chunks size : ') + fmt2 % totalsize)
2261 2265 for chunktype in sorted(chunktypecounts):
2262 2266 ui.write(fmtchunktype(chunktype))
2263 2267 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
2264 2268
2265 2269 ui.write('\n')
2266 2270 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
2267 2271 ui.write(('avg chain length : ') + fmt % avgchainlen)
2268 2272 ui.write(('max chain length : ') + fmt % maxchainlen)
2269 2273 ui.write(('max chain reach : ') + fmt % maxchainspan)
2270 2274 ui.write(('compression ratio : ') + fmt % compratio)
2271 2275
2272 2276 if format > 0:
2273 2277 ui.write('\n')
2274 2278 ui.write(('uncompressed data size (min/max/avg) : %d / %d / %d\n')
2275 2279 % tuple(datasize))
2276 2280 ui.write(('full revision size (min/max/avg) : %d / %d / %d\n')
2277 2281 % tuple(fullsize))
2278 2282 ui.write(('inter-snapshot size (min/max/avg) : %d / %d / %d\n')
2279 2283 % tuple(semisize))
2280 2284 for depth in sorted(snapsizedepth):
2281 2285 if depth == 0:
2282 2286 continue
2283 2287 ui.write((' level-%-3d (min/max/avg) : %d / %d / %d\n')
2284 2288 % ((depth,) + tuple(snapsizedepth[depth])))
2285 2289 ui.write(('delta size (min/max/avg) : %d / %d / %d\n')
2286 2290 % tuple(deltasize))
2287 2291
2288 2292 if numdeltas > 0:
2289 2293 ui.write('\n')
2290 2294 fmt = pcfmtstr(numdeltas)
2291 2295 fmt2 = pcfmtstr(numdeltas, 4)
2292 2296 ui.write(('deltas against prev : ') + fmt % pcfmt(numprev, numdeltas))
2293 2297 if numprev > 0:
2294 2298 ui.write((' where prev = p1 : ') + fmt2 % pcfmt(nump1prev,
2295 2299 numprev))
2296 2300 ui.write((' where prev = p2 : ') + fmt2 % pcfmt(nump2prev,
2297 2301 numprev))
2298 2302 ui.write((' other : ') + fmt2 % pcfmt(numoprev,
2299 2303 numprev))
2300 2304 if gdelta:
2301 2305 ui.write(('deltas against p1 : ')
2302 2306 + fmt % pcfmt(nump1, numdeltas))
2303 2307 ui.write(('deltas against p2 : ')
2304 2308 + fmt % pcfmt(nump2, numdeltas))
2305 2309 ui.write(('deltas against other : ') + fmt % pcfmt(numother,
2306 2310 numdeltas))
2307 2311
2308 2312 @command('debugrevlogindex', cmdutil.debugrevlogopts +
2309 2313 [('f', 'format', 0, _('revlog format'), _('FORMAT'))],
2310 2314 _('[-f FORMAT] -c|-m|FILE'),
2311 2315 optionalrepo=True)
2312 2316 def debugrevlogindex(ui, repo, file_=None, **opts):
2313 2317 """dump the contents of a revlog index"""
2314 2318 opts = pycompat.byteskwargs(opts)
2315 2319 r = cmdutil.openrevlog(repo, 'debugrevlogindex', file_, opts)
2316 2320 format = opts.get('format', 0)
2317 2321 if format not in (0, 1):
2318 2322 raise error.Abort(_("unknown format %d") % format)
2319 2323
2320 2324 if ui.debugflag:
2321 2325 shortfn = hex
2322 2326 else:
2323 2327 shortfn = short
2324 2328
2325 2329 # There might not be anything in r, so have a sane default
2326 2330 idlen = 12
2327 2331 for i in r:
2328 2332 idlen = len(shortfn(r.node(i)))
2329 2333 break
2330 2334
2331 2335 if format == 0:
2332 2336 if ui.verbose:
2333 2337 ui.write((" rev offset length linkrev"
2334 2338 " %s %s p2\n") % ("nodeid".ljust(idlen),
2335 2339 "p1".ljust(idlen)))
2336 2340 else:
2337 2341 ui.write((" rev linkrev %s %s p2\n") % (
2338 2342 "nodeid".ljust(idlen), "p1".ljust(idlen)))
2339 2343 elif format == 1:
2340 2344 if ui.verbose:
2341 2345 ui.write((" rev flag offset length size link p1"
2342 2346 " p2 %s\n") % "nodeid".rjust(idlen))
2343 2347 else:
2344 2348 ui.write((" rev flag size link p1 p2 %s\n") %
2345 2349 "nodeid".rjust(idlen))
2346 2350
2347 2351 for i in r:
2348 2352 node = r.node(i)
2349 2353 if format == 0:
2350 2354 try:
2351 2355 pp = r.parents(node)
2352 2356 except Exception:
2353 2357 pp = [nullid, nullid]
2354 2358 if ui.verbose:
2355 2359 ui.write("% 6d % 9d % 7d % 7d %s %s %s\n" % (
2356 2360 i, r.start(i), r.length(i), r.linkrev(i),
2357 2361 shortfn(node), shortfn(pp[0]), shortfn(pp[1])))
2358 2362 else:
2359 2363 ui.write("% 6d % 7d %s %s %s\n" % (
2360 2364 i, r.linkrev(i), shortfn(node), shortfn(pp[0]),
2361 2365 shortfn(pp[1])))
2362 2366 elif format == 1:
2363 2367 pr = r.parentrevs(i)
2364 2368 if ui.verbose:
2365 2369 ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d %s\n" % (
2366 2370 i, r.flags(i), r.start(i), r.length(i), r.rawsize(i),
2367 2371 r.linkrev(i), pr[0], pr[1], shortfn(node)))
2368 2372 else:
2369 2373 ui.write("% 6d %04x % 8d % 6d % 6d % 6d %s\n" % (
2370 2374 i, r.flags(i), r.rawsize(i), r.linkrev(i), pr[0], pr[1],
2371 2375 shortfn(node)))
2372 2376
2373 2377 @command('debugrevspec',
2374 2378 [('', 'optimize', None,
2375 2379 _('print parsed tree after optimizing (DEPRECATED)')),
2376 2380 ('', 'show-revs', True, _('print list of result revisions (default)')),
2377 2381 ('s', 'show-set', None, _('print internal representation of result set')),
2378 2382 ('p', 'show-stage', [],
2379 2383 _('print parsed tree at the given stage'), _('NAME')),
2380 2384 ('', 'no-optimized', False, _('evaluate tree without optimization')),
2381 2385 ('', 'verify-optimized', False, _('verify optimized result')),
2382 2386 ],
2383 2387 ('REVSPEC'))
2384 2388 def debugrevspec(ui, repo, expr, **opts):
2385 2389 """parse and apply a revision specification
2386 2390
2387 2391 Use -p/--show-stage option to print the parsed tree at the given stages.
2388 2392 Use -p all to print tree at every stage.
2389 2393
2390 2394 Use --no-show-revs option with -s or -p to print only the set
2391 2395 representation or the parsed tree respectively.
2392 2396
2393 2397 Use --verify-optimized to compare the optimized result with the unoptimized
2394 2398 one. Returns 1 if the optimized result differs.
2395 2399 """
2396 2400 opts = pycompat.byteskwargs(opts)
2397 2401 aliases = ui.configitems('revsetalias')
2398 2402 stages = [
2399 2403 ('parsed', lambda tree: tree),
2400 2404 ('expanded', lambda tree: revsetlang.expandaliases(tree, aliases,
2401 2405 ui.warn)),
2402 2406 ('concatenated', revsetlang.foldconcat),
2403 2407 ('analyzed', revsetlang.analyze),
2404 2408 ('optimized', revsetlang.optimize),
2405 2409 ]
2406 2410 if opts['no_optimized']:
2407 2411 stages = stages[:-1]
2408 2412 if opts['verify_optimized'] and opts['no_optimized']:
2409 2413 raise error.Abort(_('cannot use --verify-optimized with '
2410 2414 '--no-optimized'))
2411 2415 stagenames = set(n for n, f in stages)
2412 2416
2413 2417 showalways = set()
2414 2418 showchanged = set()
2415 2419 if ui.verbose and not opts['show_stage']:
2416 2420 # show parsed tree by --verbose (deprecated)
2417 2421 showalways.add('parsed')
2418 2422 showchanged.update(['expanded', 'concatenated'])
2419 2423 if opts['optimize']:
2420 2424 showalways.add('optimized')
2421 2425 if opts['show_stage'] and opts['optimize']:
2422 2426 raise error.Abort(_('cannot use --optimize with --show-stage'))
2423 2427 if opts['show_stage'] == ['all']:
2424 2428 showalways.update(stagenames)
2425 2429 else:
2426 2430 for n in opts['show_stage']:
2427 2431 if n not in stagenames:
2428 2432 raise error.Abort(_('invalid stage name: %s') % n)
2429 2433 showalways.update(opts['show_stage'])
2430 2434
2431 2435 treebystage = {}
2432 2436 printedtree = None
2433 2437 tree = revsetlang.parse(expr, lookup=revset.lookupfn(repo))
2434 2438 for n, f in stages:
2435 2439 treebystage[n] = tree = f(tree)
2436 2440 if n in showalways or (n in showchanged and tree != printedtree):
2437 2441 if opts['show_stage'] or n != 'parsed':
2438 2442 ui.write(("* %s:\n") % n)
2439 2443 ui.write(revsetlang.prettyformat(tree), "\n")
2440 2444 printedtree = tree
2441 2445
2442 2446 if opts['verify_optimized']:
2443 2447 arevs = revset.makematcher(treebystage['analyzed'])(repo)
2444 2448 brevs = revset.makematcher(treebystage['optimized'])(repo)
2445 2449 if opts['show_set'] or (opts['show_set'] is None and ui.verbose):
2446 2450 ui.write(("* analyzed set:\n"), stringutil.prettyrepr(arevs), "\n")
2447 2451 ui.write(("* optimized set:\n"), stringutil.prettyrepr(brevs), "\n")
2448 2452 arevs = list(arevs)
2449 2453 brevs = list(brevs)
2450 2454 if arevs == brevs:
2451 2455 return 0
2452 2456 ui.write(('--- analyzed\n'), label='diff.file_a')
2453 2457 ui.write(('+++ optimized\n'), label='diff.file_b')
2454 2458 sm = difflib.SequenceMatcher(None, arevs, brevs)
2455 2459 for tag, alo, ahi, blo, bhi in sm.get_opcodes():
2456 2460 if tag in ('delete', 'replace'):
2457 2461 for c in arevs[alo:ahi]:
2458 2462 ui.write('-%s\n' % c, label='diff.deleted')
2459 2463 if tag in ('insert', 'replace'):
2460 2464 for c in brevs[blo:bhi]:
2461 2465 ui.write('+%s\n' % c, label='diff.inserted')
2462 2466 if tag == 'equal':
2463 2467 for c in arevs[alo:ahi]:
2464 2468 ui.write(' %s\n' % c)
2465 2469 return 1
2466 2470
2467 2471 func = revset.makematcher(tree)
2468 2472 revs = func(repo)
2469 2473 if opts['show_set'] or (opts['show_set'] is None and ui.verbose):
2470 2474 ui.write(("* set:\n"), stringutil.prettyrepr(revs), "\n")
2471 2475 if not opts['show_revs']:
2472 2476 return
2473 2477 for c in revs:
2474 2478 ui.write("%d\n" % c)
2475 2479
2476 2480 @command('debugserve', [
2477 2481 ('', 'sshstdio', False, _('run an SSH server bound to process handles')),
2478 2482 ('', 'logiofd', '', _('file descriptor to log server I/O to')),
2479 2483 ('', 'logiofile', '', _('file to log server I/O to')),
2480 2484 ], '')
2481 2485 def debugserve(ui, repo, **opts):
2482 2486 """run a server with advanced settings
2483 2487
2484 2488 This command is similar to :hg:`serve`. It exists partially as a
2485 2489 workaround to the fact that ``hg serve --stdio`` must have specific
2486 2490 arguments for security reasons.
2487 2491 """
2488 2492 opts = pycompat.byteskwargs(opts)
2489 2493
2490 2494 if not opts['sshstdio']:
2491 2495 raise error.Abort(_('only --sshstdio is currently supported'))
2492 2496
2493 2497 logfh = None
2494 2498
2495 2499 if opts['logiofd'] and opts['logiofile']:
2496 2500 raise error.Abort(_('cannot use both --logiofd and --logiofile'))
2497 2501
2498 2502 if opts['logiofd']:
2499 2503 # Line buffered because output is line based.
2500 2504 try:
2501 2505 logfh = os.fdopen(int(opts['logiofd']), r'ab', 1)
2502 2506 except OSError as e:
2503 2507 if e.errno != errno.ESPIPE:
2504 2508 raise
2505 2509 # can't seek a pipe, so `ab` mode fails on py3
2506 2510 logfh = os.fdopen(int(opts['logiofd']), r'wb', 1)
2507 2511 elif opts['logiofile']:
2508 2512 logfh = open(opts['logiofile'], 'ab', 1)
2509 2513
2510 2514 s = wireprotoserver.sshserver(ui, repo, logfh=logfh)
2511 2515 s.serve_forever()
2512 2516
2513 2517 @command('debugsetparents', [], _('REV1 [REV2]'))
2514 2518 def debugsetparents(ui, repo, rev1, rev2=None):
2515 2519 """manually set the parents of the current working directory
2516 2520
2517 2521 This is useful for writing repository conversion tools, but should
2518 2522 be used with care. For example, neither the working directory nor the
2519 2523 dirstate is updated, so file status may be incorrect after running this
2520 2524 command.
2521 2525
2522 2526 Returns 0 on success.
2523 2527 """
2524 2528
2525 2529 node1 = scmutil.revsingle(repo, rev1).node()
2526 2530 node2 = scmutil.revsingle(repo, rev2, 'null').node()
2527 2531
2528 2532 with repo.wlock():
2529 2533 repo.setparents(node1, node2)
2530 2534
2531 2535 @command('debugssl', [], '[SOURCE]', optionalrepo=True)
2532 2536 def debugssl(ui, repo, source=None, **opts):
2533 2537 '''test a secure connection to a server
2534 2538
2535 2539 This builds the certificate chain for the server on Windows, installing the
2536 2540 missing intermediates and trusted root via Windows Update if necessary. It
2537 2541 does nothing on other platforms.
2538 2542
2539 2543 If SOURCE is omitted, the 'default' path will be used. If a URL is given,
2540 2544 that server is used. See :hg:`help urls` for more information.
2541 2545
2542 2546 If the update succeeds, retry the original operation. Otherwise, the cause
2543 2547 of the SSL error is likely another issue.
2544 2548 '''
2545 2549 if not pycompat.iswindows:
2546 2550 raise error.Abort(_('certificate chain building is only possible on '
2547 2551 'Windows'))
2548 2552
2549 2553 if not source:
2550 2554 if not repo:
2551 2555 raise error.Abort(_("there is no Mercurial repository here, and no "
2552 2556 "server specified"))
2553 2557 source = "default"
2554 2558
2555 2559 source, branches = hg.parseurl(ui.expandpath(source))
2556 2560 url = util.url(source)
2557 2561 addr = None
2558 2562
2559 2563 defaultport = {'https': 443, 'ssh': 22}
2560 2564 if url.scheme in defaultport:
2561 2565 try:
2562 2566 addr = (url.host, int(url.port or defaultport[url.scheme]))
2563 2567 except ValueError:
2564 2568 raise error.Abort(_("malformed port number in URL"))
2565 2569 else:
2566 2570 raise error.Abort(_("only https and ssh connections are supported"))
2567 2571
2568 2572 from . import win32
2569 2573
2570 2574 s = ssl.wrap_socket(socket.socket(), ssl_version=ssl.PROTOCOL_TLS,
2571 2575 cert_reqs=ssl.CERT_NONE, ca_certs=None)
2572 2576
2573 2577 try:
2574 2578 s.connect(addr)
2575 2579 cert = s.getpeercert(True)
2576 2580
2577 2581 ui.status(_('checking the certificate chain for %s\n') % url.host)
2578 2582
2579 2583 complete = win32.checkcertificatechain(cert, build=False)
2580 2584
2581 2585 if not complete:
2582 2586 ui.status(_('certificate chain is incomplete, updating... '))
2583 2587
2584 2588 if not win32.checkcertificatechain(cert):
2585 2589 ui.status(_('failed.\n'))
2586 2590 else:
2587 2591 ui.status(_('done.\n'))
2588 2592 else:
2589 2593 ui.status(_('full certificate chain is available\n'))
2590 2594 finally:
2591 2595 s.close()
2592 2596
2593 2597 @command('debugsub',
2594 2598 [('r', 'rev', '',
2595 2599 _('revision to check'), _('REV'))],
2596 2600 _('[-r REV] [REV]'))
2597 2601 def debugsub(ui, repo, rev=None):
2598 2602 ctx = scmutil.revsingle(repo, rev, None)
2599 2603 for k, v in sorted(ctx.substate.items()):
2600 2604 ui.write(('path %s\n') % k)
2601 2605 ui.write((' source %s\n') % v[0])
2602 2606 ui.write((' revision %s\n') % v[1])
2603 2607
2604 2608 @command('debugsuccessorssets',
2605 2609 [('', 'closest', False, _('return closest successors sets only'))],
2606 2610 _('[REV]'))
2607 2611 def debugsuccessorssets(ui, repo, *revs, **opts):
2608 2612 """show set of successors for revision
2609 2613
2610 2614 A successors set of changeset A is a consistent group of revisions that
2611 2615 succeed A. It contains non-obsolete changesets only unless closests
2612 2616 successors set is set.
2613 2617
2614 2618 In most cases a changeset A has a single successors set containing a single
2615 2619 successor (changeset A replaced by A').
2616 2620
2617 2621 A changeset that is made obsolete with no successors are called "pruned".
2618 2622 Such changesets have no successors sets at all.
2619 2623
2620 2624 A changeset that has been "split" will have a successors set containing
2621 2625 more than one successor.
2622 2626
2623 2627 A changeset that has been rewritten in multiple different ways is called
2624 2628 "divergent". Such changesets have multiple successor sets (each of which
2625 2629 may also be split, i.e. have multiple successors).
2626 2630
2627 2631 Results are displayed as follows::
2628 2632
2629 2633 <rev1>
2630 2634 <successors-1A>
2631 2635 <rev2>
2632 2636 <successors-2A>
2633 2637 <successors-2B1> <successors-2B2> <successors-2B3>
2634 2638
2635 2639 Here rev2 has two possible (i.e. divergent) successors sets. The first
2636 2640 holds one element, whereas the second holds three (i.e. the changeset has
2637 2641 been split).
2638 2642 """
2639 2643 # passed to successorssets caching computation from one call to another
2640 2644 cache = {}
2641 2645 ctx2str = bytes
2642 2646 node2str = short
2643 2647 for rev in scmutil.revrange(repo, revs):
2644 2648 ctx = repo[rev]
2645 2649 ui.write('%s\n'% ctx2str(ctx))
2646 2650 for succsset in obsutil.successorssets(repo, ctx.node(),
2647 2651 closest=opts[r'closest'],
2648 2652 cache=cache):
2649 2653 if succsset:
2650 2654 ui.write(' ')
2651 2655 ui.write(node2str(succsset[0]))
2652 2656 for node in succsset[1:]:
2653 2657 ui.write(' ')
2654 2658 ui.write(node2str(node))
2655 2659 ui.write('\n')
2656 2660
2657 2661 @command('debugtemplate',
2658 2662 [('r', 'rev', [], _('apply template on changesets'), _('REV')),
2659 2663 ('D', 'define', [], _('define template keyword'), _('KEY=VALUE'))],
2660 2664 _('[-r REV]... [-D KEY=VALUE]... TEMPLATE'),
2661 2665 optionalrepo=True)
2662 2666 def debugtemplate(ui, repo, tmpl, **opts):
2663 2667 """parse and apply a template
2664 2668
2665 2669 If -r/--rev is given, the template is processed as a log template and
2666 2670 applied to the given changesets. Otherwise, it is processed as a generic
2667 2671 template.
2668 2672
2669 2673 Use --verbose to print the parsed tree.
2670 2674 """
2671 2675 revs = None
2672 2676 if opts[r'rev']:
2673 2677 if repo is None:
2674 2678 raise error.RepoError(_('there is no Mercurial repository here '
2675 2679 '(.hg not found)'))
2676 2680 revs = scmutil.revrange(repo, opts[r'rev'])
2677 2681
2678 2682 props = {}
2679 2683 for d in opts[r'define']:
2680 2684 try:
2681 2685 k, v = (e.strip() for e in d.split('=', 1))
2682 2686 if not k or k == 'ui':
2683 2687 raise ValueError
2684 2688 props[k] = v
2685 2689 except ValueError:
2686 2690 raise error.Abort(_('malformed keyword definition: %s') % d)
2687 2691
2688 2692 if ui.verbose:
2689 2693 aliases = ui.configitems('templatealias')
2690 2694 tree = templater.parse(tmpl)
2691 2695 ui.note(templater.prettyformat(tree), '\n')
2692 2696 newtree = templater.expandaliases(tree, aliases)
2693 2697 if newtree != tree:
2694 2698 ui.note(("* expanded:\n"), templater.prettyformat(newtree), '\n')
2695 2699
2696 2700 if revs is None:
2697 2701 tres = formatter.templateresources(ui, repo)
2698 2702 t = formatter.maketemplater(ui, tmpl, resources=tres)
2699 2703 if ui.verbose:
2700 2704 kwds, funcs = t.symbolsuseddefault()
2701 2705 ui.write(("* keywords: %s\n") % ', '.join(sorted(kwds)))
2702 2706 ui.write(("* functions: %s\n") % ', '.join(sorted(funcs)))
2703 2707 ui.write(t.renderdefault(props))
2704 2708 else:
2705 2709 displayer = logcmdutil.maketemplater(ui, repo, tmpl)
2706 2710 if ui.verbose:
2707 2711 kwds, funcs = displayer.t.symbolsuseddefault()
2708 2712 ui.write(("* keywords: %s\n") % ', '.join(sorted(kwds)))
2709 2713 ui.write(("* functions: %s\n") % ', '.join(sorted(funcs)))
2710 2714 for r in revs:
2711 2715 displayer.show(repo[r], **pycompat.strkwargs(props))
2712 2716 displayer.close()
2713 2717
2714 2718 @command('debuguigetpass', [
2715 2719 ('p', 'prompt', '', _('prompt text'), _('TEXT')),
2716 2720 ], _('[-p TEXT]'), norepo=True)
2717 2721 def debuguigetpass(ui, prompt=''):
2718 2722 """show prompt to type password"""
2719 2723 r = ui.getpass(prompt)
2720 2724 ui.write(('respose: %s\n') % r)
2721 2725
2722 2726 @command('debuguiprompt', [
2723 2727 ('p', 'prompt', '', _('prompt text'), _('TEXT')),
2724 2728 ], _('[-p TEXT]'), norepo=True)
2725 2729 def debuguiprompt(ui, prompt=''):
2726 2730 """show plain prompt"""
2727 2731 r = ui.prompt(prompt)
2728 2732 ui.write(('response: %s\n') % r)
2729 2733
2730 2734 @command('debugupdatecaches', [])
2731 2735 def debugupdatecaches(ui, repo, *pats, **opts):
2732 2736 """warm all known caches in the repository"""
2733 2737 with repo.wlock(), repo.lock():
2734 2738 repo.updatecaches(full=True)
2735 2739
2736 2740 @command('debugupgraderepo', [
2737 2741 ('o', 'optimize', [], _('extra optimization to perform'), _('NAME')),
2738 2742 ('', 'run', False, _('performs an upgrade')),
2739 2743 ])
2740 2744 def debugupgraderepo(ui, repo, run=False, optimize=None):
2741 2745 """upgrade a repository to use different features
2742 2746
2743 2747 If no arguments are specified, the repository is evaluated for upgrade
2744 2748 and a list of problems and potential optimizations is printed.
2745 2749
2746 2750 With ``--run``, a repository upgrade is performed. Behavior of the upgrade
2747 2751 can be influenced via additional arguments. More details will be provided
2748 2752 by the command output when run without ``--run``.
2749 2753
2750 2754 During the upgrade, the repository will be locked and no writes will be
2751 2755 allowed.
2752 2756
2753 2757 At the end of the upgrade, the repository may not be readable while new
2754 2758 repository data is swapped in. This window will be as long as it takes to
2755 2759 rename some directories inside the ``.hg`` directory. On most machines, this
2756 2760 should complete almost instantaneously and the chances of a consumer being
2757 2761 unable to access the repository should be low.
2758 2762 """
2759 2763 return upgrade.upgraderepo(ui, repo, run=run, optimize=optimize)
2760 2764
2761 2765 @command('debugwalk', cmdutil.walkopts, _('[OPTION]... [FILE]...'),
2762 2766 inferrepo=True)
2763 2767 def debugwalk(ui, repo, *pats, **opts):
2764 2768 """show how files match on given patterns"""
2765 2769 opts = pycompat.byteskwargs(opts)
2766 2770 m = scmutil.match(repo[None], pats, opts)
2767 2771 if ui.verbose:
2768 2772 ui.write(('* matcher:\n'), stringutil.prettyrepr(m), '\n')
2769 2773 items = list(repo[None].walk(m))
2770 2774 if not items:
2771 2775 return
2772 2776 f = lambda fn: fn
2773 2777 if ui.configbool('ui', 'slash') and pycompat.ossep != '/':
2774 2778 f = lambda fn: util.normpath(fn)
2775 2779 fmt = 'f %%-%ds %%-%ds %%s' % (
2776 2780 max([len(abs) for abs in items]),
2777 2781 max([len(m.rel(abs)) for abs in items]))
2778 2782 for abs in items:
2779 2783 line = fmt % (abs, f(m.rel(abs)), m.exact(abs) and 'exact' or '')
2780 2784 ui.write("%s\n" % line.rstrip())
2781 2785
2782 2786 @command('debugwhyunstable', [], _('REV'))
2783 2787 def debugwhyunstable(ui, repo, rev):
2784 2788 """explain instabilities of a changeset"""
2785 2789 for entry in obsutil.whyunstable(repo, scmutil.revsingle(repo, rev)):
2786 2790 dnodes = ''
2787 2791 if entry.get('divergentnodes'):
2788 2792 dnodes = ' '.join('%s (%s)' % (ctx.hex(), ctx.phasestr())
2789 2793 for ctx in entry['divergentnodes']) + ' '
2790 2794 ui.write('%s: %s%s %s\n' % (entry['instability'], dnodes,
2791 2795 entry['reason'], entry['node']))
2792 2796
2793 2797 @command('debugwireargs',
2794 2798 [('', 'three', '', 'three'),
2795 2799 ('', 'four', '', 'four'),
2796 2800 ('', 'five', '', 'five'),
2797 2801 ] + cmdutil.remoteopts,
2798 2802 _('REPO [OPTIONS]... [ONE [TWO]]'),
2799 2803 norepo=True)
2800 2804 def debugwireargs(ui, repopath, *vals, **opts):
2801 2805 opts = pycompat.byteskwargs(opts)
2802 2806 repo = hg.peer(ui, opts, repopath)
2803 2807 for opt in cmdutil.remoteopts:
2804 2808 del opts[opt[1]]
2805 2809 args = {}
2806 2810 for k, v in opts.iteritems():
2807 2811 if v:
2808 2812 args[k] = v
2809 2813 args = pycompat.strkwargs(args)
2810 2814 # run twice to check that we don't mess up the stream for the next command
2811 2815 res1 = repo.debugwireargs(*vals, **args)
2812 2816 res2 = repo.debugwireargs(*vals, **args)
2813 2817 ui.write("%s\n" % res1)
2814 2818 if res1 != res2:
2815 2819 ui.warn("%s\n" % res2)
2816 2820
2817 2821 def _parsewirelangblocks(fh):
2818 2822 activeaction = None
2819 2823 blocklines = []
2820 2824
2821 2825 for line in fh:
2822 2826 line = line.rstrip()
2823 2827 if not line:
2824 2828 continue
2825 2829
2826 2830 if line.startswith(b'#'):
2827 2831 continue
2828 2832
2829 2833 if not line.startswith(b' '):
2830 2834 # New block. Flush previous one.
2831 2835 if activeaction:
2832 2836 yield activeaction, blocklines
2833 2837
2834 2838 activeaction = line
2835 2839 blocklines = []
2836 2840 continue
2837 2841
2838 2842 # Else we start with an indent.
2839 2843
2840 2844 if not activeaction:
2841 2845 raise error.Abort(_('indented line outside of block'))
2842 2846
2843 2847 blocklines.append(line)
2844 2848
2845 2849 # Flush last block.
2846 2850 if activeaction:
2847 2851 yield activeaction, blocklines
2848 2852
2849 2853 @command('debugwireproto',
2850 2854 [
2851 2855 ('', 'localssh', False, _('start an SSH server for this repo')),
2852 2856 ('', 'peer', '', _('construct a specific version of the peer')),
2853 2857 ('', 'noreadstderr', False, _('do not read from stderr of the remote')),
2854 2858 ('', 'nologhandshake', False,
2855 2859 _('do not log I/O related to the peer handshake')),
2856 2860 ] + cmdutil.remoteopts,
2857 2861 _('[PATH]'),
2858 2862 optionalrepo=True)
2859 2863 def debugwireproto(ui, repo, path=None, **opts):
2860 2864 """send wire protocol commands to a server
2861 2865
2862 2866 This command can be used to issue wire protocol commands to remote
2863 2867 peers and to debug the raw data being exchanged.
2864 2868
2865 2869 ``--localssh`` will start an SSH server against the current repository
2866 2870 and connect to that. By default, the connection will perform a handshake
2867 2871 and establish an appropriate peer instance.
2868 2872
2869 2873 ``--peer`` can be used to bypass the handshake protocol and construct a
2870 2874 peer instance using the specified class type. Valid values are ``raw``,
2871 2875 ``http2``, ``ssh1``, and ``ssh2``. ``raw`` instances only allow sending
2872 2876 raw data payloads and don't support higher-level command actions.
2873 2877
2874 2878 ``--noreadstderr`` can be used to disable automatic reading from stderr
2875 2879 of the peer (for SSH connections only). Disabling automatic reading of
2876 2880 stderr is useful for making output more deterministic.
2877 2881
2878 2882 Commands are issued via a mini language which is specified via stdin.
2879 2883 The language consists of individual actions to perform. An action is
2880 2884 defined by a block. A block is defined as a line with no leading
2881 2885 space followed by 0 or more lines with leading space. Blocks are
2882 2886 effectively a high-level command with additional metadata.
2883 2887
2884 2888 Lines beginning with ``#`` are ignored.
2885 2889
2886 2890 The following sections denote available actions.
2887 2891
2888 2892 raw
2889 2893 ---
2890 2894
2891 2895 Send raw data to the server.
2892 2896
2893 2897 The block payload contains the raw data to send as one atomic send
2894 2898 operation. The data may not actually be delivered in a single system
2895 2899 call: it depends on the abilities of the transport being used.
2896 2900
2897 2901 Each line in the block is de-indented and concatenated. Then, that
2898 2902 value is evaluated as a Python b'' literal. This allows the use of
2899 2903 backslash escaping, etc.
2900 2904
2901 2905 raw+
2902 2906 ----
2903 2907
2904 2908 Behaves like ``raw`` except flushes output afterwards.
2905 2909
2906 2910 command <X>
2907 2911 -----------
2908 2912
2909 2913 Send a request to run a named command, whose name follows the ``command``
2910 2914 string.
2911 2915
2912 2916 Arguments to the command are defined as lines in this block. The format of
2913 2917 each line is ``<key> <value>``. e.g.::
2914 2918
2915 2919 command listkeys
2916 2920 namespace bookmarks
2917 2921
2918 2922 If the value begins with ``eval:``, it will be interpreted as a Python
2919 2923 literal expression. Otherwise values are interpreted as Python b'' literals.
2920 2924 This allows sending complex types and encoding special byte sequences via
2921 2925 backslash escaping.
2922 2926
2923 2927 The following arguments have special meaning:
2924 2928
2925 2929 ``PUSHFILE``
2926 2930 When defined, the *push* mechanism of the peer will be used instead
2927 2931 of the static request-response mechanism and the content of the
2928 2932 file specified in the value of this argument will be sent as the
2929 2933 command payload.
2930 2934
2931 2935 This can be used to submit a local bundle file to the remote.
2932 2936
2933 2937 batchbegin
2934 2938 ----------
2935 2939
2936 2940 Instruct the peer to begin a batched send.
2937 2941
2938 2942 All ``command`` blocks are queued for execution until the next
2939 2943 ``batchsubmit`` block.
2940 2944
2941 2945 batchsubmit
2942 2946 -----------
2943 2947
2944 2948 Submit previously queued ``command`` blocks as a batch request.
2945 2949
2946 2950 This action MUST be paired with a ``batchbegin`` action.
2947 2951
2948 2952 httprequest <method> <path>
2949 2953 ---------------------------
2950 2954
2951 2955 (HTTP peer only)
2952 2956
2953 2957 Send an HTTP request to the peer.
2954 2958
2955 2959 The HTTP request line follows the ``httprequest`` action. e.g. ``GET /foo``.
2956 2960
2957 2961 Arguments of the form ``<key>: <value>`` are interpreted as HTTP request
2958 2962 headers to add to the request. e.g. ``Accept: foo``.
2959 2963
2960 2964 The following arguments are special:
2961 2965
2962 2966 ``BODYFILE``
2963 2967 The content of the file defined as the value to this argument will be
2964 2968 transferred verbatim as the HTTP request body.
2965 2969
2966 2970 ``frame <type> <flags> <payload>``
2967 2971 Send a unified protocol frame as part of the request body.
2968 2972
2969 2973 All frames will be collected and sent as the body to the HTTP
2970 2974 request.
2971 2975
2972 2976 close
2973 2977 -----
2974 2978
2975 2979 Close the connection to the server.
2976 2980
2977 2981 flush
2978 2982 -----
2979 2983
2980 2984 Flush data written to the server.
2981 2985
2982 2986 readavailable
2983 2987 -------------
2984 2988
2985 2989 Close the write end of the connection and read all available data from
2986 2990 the server.
2987 2991
2988 2992 If the connection to the server encompasses multiple pipes, we poll both
2989 2993 pipes and read available data.
2990 2994
2991 2995 readline
2992 2996 --------
2993 2997
2994 2998 Read a line of output from the server. If there are multiple output
2995 2999 pipes, reads only the main pipe.
2996 3000
2997 3001 ereadline
2998 3002 ---------
2999 3003
3000 3004 Like ``readline``, but read from the stderr pipe, if available.
3001 3005
3002 3006 read <X>
3003 3007 --------
3004 3008
3005 3009 ``read()`` N bytes from the server's main output pipe.
3006 3010
3007 3011 eread <X>
3008 3012 ---------
3009 3013
3010 3014 ``read()`` N bytes from the server's stderr pipe, if available.
3011 3015
3012 3016 Specifying Unified Frame-Based Protocol Frames
3013 3017 ----------------------------------------------
3014 3018
3015 3019 It is possible to emit a *Unified Frame-Based Protocol* by using special
3016 3020 syntax.
3017 3021
3018 3022 A frame is composed as a type, flags, and payload. These can be parsed
3019 3023 from a string of the form:
3020 3024
3021 3025 <request-id> <stream-id> <stream-flags> <type> <flags> <payload>
3022 3026
3023 3027 ``request-id`` and ``stream-id`` are integers defining the request and
3024 3028 stream identifiers.
3025 3029
3026 3030 ``type`` can be an integer value for the frame type or the string name
3027 3031 of the type. The strings are defined in ``wireprotoframing.py``. e.g.
3028 3032 ``command-name``.
3029 3033
3030 3034 ``stream-flags`` and ``flags`` are a ``|`` delimited list of flag
3031 3035 components. Each component (and there can be just one) can be an integer
3032 3036 or a flag name for stream flags or frame flags, respectively. Values are
3033 3037 resolved to integers and then bitwise OR'd together.
3034 3038
3035 3039 ``payload`` represents the raw frame payload. If it begins with
3036 3040 ``cbor:``, the following string is evaluated as Python code and the
3037 3041 resulting object is fed into a CBOR encoder. Otherwise it is interpreted
3038 3042 as a Python byte string literal.
3039 3043 """
3040 3044 opts = pycompat.byteskwargs(opts)
3041 3045
3042 3046 if opts['localssh'] and not repo:
3043 3047 raise error.Abort(_('--localssh requires a repository'))
3044 3048
3045 3049 if opts['peer'] and opts['peer'] not in ('raw', 'http2', 'ssh1', 'ssh2'):
3046 3050 raise error.Abort(_('invalid value for --peer'),
3047 3051 hint=_('valid values are "raw", "ssh1", and "ssh2"'))
3048 3052
3049 3053 if path and opts['localssh']:
3050 3054 raise error.Abort(_('cannot specify --localssh with an explicit '
3051 3055 'path'))
3052 3056
3053 3057 if ui.interactive():
3054 3058 ui.write(_('(waiting for commands on stdin)\n'))
3055 3059
3056 3060 blocks = list(_parsewirelangblocks(ui.fin))
3057 3061
3058 3062 proc = None
3059 3063 stdin = None
3060 3064 stdout = None
3061 3065 stderr = None
3062 3066 opener = None
3063 3067
3064 3068 if opts['localssh']:
3065 3069 # We start the SSH server in its own process so there is process
3066 3070 # separation. This prevents a whole class of potential bugs around
3067 3071 # shared state from interfering with server operation.
3068 3072 args = procutil.hgcmd() + [
3069 3073 '-R', repo.root,
3070 3074 'debugserve', '--sshstdio',
3071 3075 ]
3072 3076 proc = subprocess.Popen(args, stdin=subprocess.PIPE,
3073 3077 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3074 3078 bufsize=0)
3075 3079
3076 3080 stdin = proc.stdin
3077 3081 stdout = proc.stdout
3078 3082 stderr = proc.stderr
3079 3083
3080 3084 # We turn the pipes into observers so we can log I/O.
3081 3085 if ui.verbose or opts['peer'] == 'raw':
3082 3086 stdin = util.makeloggingfileobject(ui, proc.stdin, b'i',
3083 3087 logdata=True)
3084 3088 stdout = util.makeloggingfileobject(ui, proc.stdout, b'o',
3085 3089 logdata=True)
3086 3090 stderr = util.makeloggingfileobject(ui, proc.stderr, b'e',
3087 3091 logdata=True)
3088 3092
3089 3093 # --localssh also implies the peer connection settings.
3090 3094
3091 3095 url = 'ssh://localserver'
3092 3096 autoreadstderr = not opts['noreadstderr']
3093 3097
3094 3098 if opts['peer'] == 'ssh1':
3095 3099 ui.write(_('creating ssh peer for wire protocol version 1\n'))
3096 3100 peer = sshpeer.sshv1peer(ui, url, proc, stdin, stdout, stderr,
3097 3101 None, autoreadstderr=autoreadstderr)
3098 3102 elif opts['peer'] == 'ssh2':
3099 3103 ui.write(_('creating ssh peer for wire protocol version 2\n'))
3100 3104 peer = sshpeer.sshv2peer(ui, url, proc, stdin, stdout, stderr,
3101 3105 None, autoreadstderr=autoreadstderr)
3102 3106 elif opts['peer'] == 'raw':
3103 3107 ui.write(_('using raw connection to peer\n'))
3104 3108 peer = None
3105 3109 else:
3106 3110 ui.write(_('creating ssh peer from handshake results\n'))
3107 3111 peer = sshpeer.makepeer(ui, url, proc, stdin, stdout, stderr,
3108 3112 autoreadstderr=autoreadstderr)
3109 3113
3110 3114 elif path:
3111 3115 # We bypass hg.peer() so we can proxy the sockets.
3112 3116 # TODO consider not doing this because we skip
3113 3117 # ``hg.wirepeersetupfuncs`` and potentially other useful functionality.
3114 3118 u = util.url(path)
3115 3119 if u.scheme != 'http':
3116 3120 raise error.Abort(_('only http:// paths are currently supported'))
3117 3121
3118 3122 url, authinfo = u.authinfo()
3119 3123 openerargs = {
3120 3124 r'useragent': b'Mercurial debugwireproto',
3121 3125 }
3122 3126
3123 3127 # Turn pipes/sockets into observers so we can log I/O.
3124 3128 if ui.verbose:
3125 3129 openerargs.update({
3126 3130 r'loggingfh': ui,
3127 3131 r'loggingname': b's',
3128 3132 r'loggingopts': {
3129 3133 r'logdata': True,
3130 3134 r'logdataapis': False,
3131 3135 },
3132 3136 })
3133 3137
3134 3138 if ui.debugflag:
3135 3139 openerargs[r'loggingopts'][r'logdataapis'] = True
3136 3140
3137 3141 # Don't send default headers when in raw mode. This allows us to
3138 3142 # bypass most of the behavior of our URL handling code so we can
3139 3143 # have near complete control over what's sent on the wire.
3140 3144 if opts['peer'] == 'raw':
3141 3145 openerargs[r'sendaccept'] = False
3142 3146
3143 3147 opener = urlmod.opener(ui, authinfo, **openerargs)
3144 3148
3145 3149 if opts['peer'] == 'http2':
3146 3150 ui.write(_('creating http peer for wire protocol version 2\n'))
3147 3151 # We go through makepeer() because we need an API descriptor for
3148 3152 # the peer instance to be useful.
3149 3153 with ui.configoverride({
3150 3154 ('experimental', 'httppeer.advertise-v2'): True}):
3151 3155 if opts['nologhandshake']:
3152 3156 ui.pushbuffer()
3153 3157
3154 3158 peer = httppeer.makepeer(ui, path, opener=opener)
3155 3159
3156 3160 if opts['nologhandshake']:
3157 3161 ui.popbuffer()
3158 3162
3159 3163 if not isinstance(peer, httppeer.httpv2peer):
3160 3164 raise error.Abort(_('could not instantiate HTTP peer for '
3161 3165 'wire protocol version 2'),
3162 3166 hint=_('the server may not have the feature '
3163 3167 'enabled or is not allowing this '
3164 3168 'client version'))
3165 3169
3166 3170 elif opts['peer'] == 'raw':
3167 3171 ui.write(_('using raw connection to peer\n'))
3168 3172 peer = None
3169 3173 elif opts['peer']:
3170 3174 raise error.Abort(_('--peer %s not supported with HTTP peers') %
3171 3175 opts['peer'])
3172 3176 else:
3173 3177 peer = httppeer.makepeer(ui, path, opener=opener)
3174 3178
3175 3179 # We /could/ populate stdin/stdout with sock.makefile()...
3176 3180 else:
3177 3181 raise error.Abort(_('unsupported connection configuration'))
3178 3182
3179 3183 batchedcommands = None
3180 3184
3181 3185 # Now perform actions based on the parsed wire language instructions.
3182 3186 for action, lines in blocks:
3183 3187 if action in ('raw', 'raw+'):
3184 3188 if not stdin:
3185 3189 raise error.Abort(_('cannot call raw/raw+ on this peer'))
3186 3190
3187 3191 # Concatenate the data together.
3188 3192 data = ''.join(l.lstrip() for l in lines)
3189 3193 data = stringutil.unescapestr(data)
3190 3194 stdin.write(data)
3191 3195
3192 3196 if action == 'raw+':
3193 3197 stdin.flush()
3194 3198 elif action == 'flush':
3195 3199 if not stdin:
3196 3200 raise error.Abort(_('cannot call flush on this peer'))
3197 3201 stdin.flush()
3198 3202 elif action.startswith('command'):
3199 3203 if not peer:
3200 3204 raise error.Abort(_('cannot send commands unless peer instance '
3201 3205 'is available'))
3202 3206
3203 3207 command = action.split(' ', 1)[1]
3204 3208
3205 3209 args = {}
3206 3210 for line in lines:
3207 3211 # We need to allow empty values.
3208 3212 fields = line.lstrip().split(' ', 1)
3209 3213 if len(fields) == 1:
3210 3214 key = fields[0]
3211 3215 value = ''
3212 3216 else:
3213 3217 key, value = fields
3214 3218
3215 3219 if value.startswith('eval:'):
3216 3220 value = stringutil.evalpythonliteral(value[5:])
3217 3221 else:
3218 3222 value = stringutil.unescapestr(value)
3219 3223
3220 3224 args[key] = value
3221 3225
3222 3226 if batchedcommands is not None:
3223 3227 batchedcommands.append((command, args))
3224 3228 continue
3225 3229
3226 3230 ui.status(_('sending %s command\n') % command)
3227 3231
3228 3232 if 'PUSHFILE' in args:
3229 3233 with open(args['PUSHFILE'], r'rb') as fh:
3230 3234 del args['PUSHFILE']
3231 3235 res, output = peer._callpush(command, fh,
3232 3236 **pycompat.strkwargs(args))
3233 3237 ui.status(_('result: %s\n') % stringutil.escapestr(res))
3234 3238 ui.status(_('remote output: %s\n') %
3235 3239 stringutil.escapestr(output))
3236 3240 else:
3237 3241 with peer.commandexecutor() as e:
3238 3242 res = e.callcommand(command, args).result()
3239 3243
3240 3244 if isinstance(res, wireprotov2peer.commandresponse):
3241 3245 val = list(res.cborobjects())
3242 3246 ui.status(_('response: %s\n') %
3243 3247 stringutil.pprint(val, bprefix=True))
3244 3248
3245 3249 else:
3246 3250 ui.status(_('response: %s\n') %
3247 3251 stringutil.pprint(res, bprefix=True))
3248 3252
3249 3253 elif action == 'batchbegin':
3250 3254 if batchedcommands is not None:
3251 3255 raise error.Abort(_('nested batchbegin not allowed'))
3252 3256
3253 3257 batchedcommands = []
3254 3258 elif action == 'batchsubmit':
3255 3259 # There is a batching API we could go through. But it would be
3256 3260 # difficult to normalize requests into function calls. It is easier
3257 3261 # to bypass this layer and normalize to commands + args.
3258 3262 ui.status(_('sending batch with %d sub-commands\n') %
3259 3263 len(batchedcommands))
3260 3264 for i, chunk in enumerate(peer._submitbatch(batchedcommands)):
3261 3265 ui.status(_('response #%d: %s\n') %
3262 3266 (i, stringutil.escapestr(chunk)))
3263 3267
3264 3268 batchedcommands = None
3265 3269
3266 3270 elif action.startswith('httprequest '):
3267 3271 if not opener:
3268 3272 raise error.Abort(_('cannot use httprequest without an HTTP '
3269 3273 'peer'))
3270 3274
3271 3275 request = action.split(' ', 2)
3272 3276 if len(request) != 3:
3273 3277 raise error.Abort(_('invalid httprequest: expected format is '
3274 3278 '"httprequest <method> <path>'))
3275 3279
3276 3280 method, httppath = request[1:]
3277 3281 headers = {}
3278 3282 body = None
3279 3283 frames = []
3280 3284 for line in lines:
3281 3285 line = line.lstrip()
3282 3286 m = re.match(b'^([a-zA-Z0-9_-]+): (.*)$', line)
3283 3287 if m:
3284 3288 headers[m.group(1)] = m.group(2)
3285 3289 continue
3286 3290
3287 3291 if line.startswith(b'BODYFILE '):
3288 3292 with open(line.split(b' ', 1), 'rb') as fh:
3289 3293 body = fh.read()
3290 3294 elif line.startswith(b'frame '):
3291 3295 frame = wireprotoframing.makeframefromhumanstring(
3292 3296 line[len(b'frame '):])
3293 3297
3294 3298 frames.append(frame)
3295 3299 else:
3296 3300 raise error.Abort(_('unknown argument to httprequest: %s') %
3297 3301 line)
3298 3302
3299 3303 url = path + httppath
3300 3304
3301 3305 if frames:
3302 3306 body = b''.join(bytes(f) for f in frames)
3303 3307
3304 3308 req = urlmod.urlreq.request(pycompat.strurl(url), body, headers)
3305 3309
3306 3310 # urllib.Request insists on using has_data() as a proxy for
3307 3311 # determining the request method. Override that to use our
3308 3312 # explicitly requested method.
3309 3313 req.get_method = lambda: pycompat.sysstr(method)
3310 3314
3311 3315 try:
3312 3316 res = opener.open(req)
3313 3317 body = res.read()
3314 3318 except util.urlerr.urlerror as e:
3315 3319 # read() method must be called, but only exists in Python 2
3316 3320 getattr(e, 'read', lambda: None)()
3317 3321 continue
3318 3322
3319 3323 if res.headers.get('Content-Type') == 'application/mercurial-cbor':
3320 3324 ui.write(_('cbor> %s\n') %
3321 3325 stringutil.pprint(cbor.loads(body), bprefix=True))
3322 3326
3323 3327 elif action == 'close':
3324 3328 peer.close()
3325 3329 elif action == 'readavailable':
3326 3330 if not stdout or not stderr:
3327 3331 raise error.Abort(_('readavailable not available on this peer'))
3328 3332
3329 3333 stdin.close()
3330 3334 stdout.read()
3331 3335 stderr.read()
3332 3336
3333 3337 elif action == 'readline':
3334 3338 if not stdout:
3335 3339 raise error.Abort(_('readline not available on this peer'))
3336 3340 stdout.readline()
3337 3341 elif action == 'ereadline':
3338 3342 if not stderr:
3339 3343 raise error.Abort(_('ereadline not available on this peer'))
3340 3344 stderr.readline()
3341 3345 elif action.startswith('read '):
3342 3346 count = int(action.split(' ', 1)[1])
3343 3347 if not stdout:
3344 3348 raise error.Abort(_('read not available on this peer'))
3345 3349 stdout.read(count)
3346 3350 elif action.startswith('eread '):
3347 3351 count = int(action.split(' ', 1)[1])
3348 3352 if not stderr:
3349 3353 raise error.Abort(_('eread not available on this peer'))
3350 3354 stderr.read(count)
3351 3355 else:
3352 3356 raise error.Abort(_('unknown action: %s') % action)
3353 3357
3354 3358 if batchedcommands is not None:
3355 3359 raise error.Abort(_('unclosed "batchbegin" request'))
3356 3360
3357 3361 if peer:
3358 3362 peer.close()
3359 3363
3360 3364 if proc:
3361 3365 proc.kill()
This diff has been collapsed as it changes many lines, (709 lines changed) Show them Hide them
@@ -1,3180 +1,2489 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 import heapq
21 20 import os
22 21 import re
23 22 import struct
24 23 import zlib
25 24
26 25 # import stuff from node for others to import from revlog
27 26 from .node import (
28 27 bin,
29 28 hex,
30 29 nullhex,
31 30 nullid,
32 31 nullrev,
33 32 wdirfilenodeids,
34 33 wdirhex,
35 34 wdirid,
36 35 wdirrev,
37 36 )
38 37 from .i18n import _
39 38 from .revlogutils.constants import (
40 39 FLAG_GENERALDELTA,
41 40 FLAG_INLINE_DATA,
42 LIMIT_DELTA2TEXT,
43 41 REVIDX_DEFAULT_FLAGS,
44 42 REVIDX_ELLIPSIS,
45 43 REVIDX_EXTSTORED,
46 44 REVIDX_FLAGS_ORDER,
47 45 REVIDX_ISCENSORED,
48 46 REVIDX_KNOWN_FLAGS,
49 47 REVIDX_RAWTEXT_CHANGING_FLAGS,
50 48 REVLOGV0,
51 49 REVLOGV1,
52 50 REVLOGV1_FLAGS,
53 51 REVLOGV2,
54 52 REVLOGV2_FLAGS,
55 53 REVLOG_DEFAULT_FLAGS,
56 54 REVLOG_DEFAULT_FORMAT,
57 55 REVLOG_DEFAULT_VERSION,
58 56 )
59 57 from .thirdparty import (
60 58 attr,
61 59 )
62 60 from . import (
63 61 ancestor,
64 62 error,
65 63 mdiff,
66 64 policy,
67 65 pycompat,
68 66 repository,
69 67 templatefilters,
70 68 util,
71 69 )
70 from .revlogutils import (
71 deltas as deltautil,
72 )
72 73 from .utils import (
73 74 interfaceutil,
74 75 stringutil,
75 76 )
76 77
77 78 # blanked usage of all the name to prevent pyflakes constraints
78 79 # We need these name available in the module for extensions.
79 80 REVLOGV0
80 81 REVLOGV1
81 82 REVLOGV2
82 83 FLAG_INLINE_DATA
83 84 FLAG_GENERALDELTA
84 85 REVLOG_DEFAULT_FLAGS
85 86 REVLOG_DEFAULT_FORMAT
86 87 REVLOG_DEFAULT_VERSION
87 88 REVLOGV1_FLAGS
88 89 REVLOGV2_FLAGS
89 90 REVIDX_ISCENSORED
90 91 REVIDX_ELLIPSIS
91 92 REVIDX_EXTSTORED
92 93 REVIDX_DEFAULT_FLAGS
93 94 REVIDX_FLAGS_ORDER
94 95 REVIDX_KNOWN_FLAGS
95 96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 97
97 98 parsers = policy.importmod(r'parsers')
98 99
99 100 # Aliased for performance.
100 101 _zlibdecompress = zlib.decompress
101 102
102 103 # max size of revlog with inline data
103 104 _maxinline = 131072
104 105 _chunksize = 1048576
105 106
106 107 RevlogError = error.RevlogError
107 108 LookupError = error.LookupError
108 109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
109 110 CensoredNodeError = error.CensoredNodeError
110 111 ProgrammingError = error.ProgrammingError
111 112
112 113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 114 _flagprocessors = {
114 115 REVIDX_ISCENSORED: None,
115 116 }
116 117
117 118 _mdre = re.compile('\1\n')
118 119 def parsemeta(text):
119 120 """return (metadatadict, metadatasize)"""
120 121 # text can be buffer, so we can't use .startswith or .index
121 122 if text[:2] != '\1\n':
122 123 return None, None
123 124 s = _mdre.search(text, 2).start()
124 125 mtext = text[2:s]
125 126 meta = {}
126 127 for l in mtext.splitlines():
127 128 k, v = l.split(": ", 1)
128 129 meta[k] = v
129 130 return meta, (s + 2)
130 131
131 132 def packmeta(meta, text):
132 133 keys = sorted(meta)
133 134 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
134 135 return "\1\n%s\1\n%s" % (metatext, text)
135 136
136 137 def _censoredtext(text):
137 138 m, offs = parsemeta(text)
138 139 return m and "censored" in m
139 140
140 141 def addflagprocessor(flag, processor):
141 142 """Register a flag processor on a revision data flag.
142 143
143 144 Invariant:
144 145 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
145 146 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
146 147 - Only one flag processor can be registered on a specific flag.
147 148 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
148 149 following signatures:
149 150 - (read) f(self, rawtext) -> text, bool
150 151 - (write) f(self, text) -> rawtext, bool
151 152 - (raw) f(self, rawtext) -> bool
152 153 "text" is presented to the user. "rawtext" is stored in revlog data, not
153 154 directly visible to the user.
154 155 The boolean returned by these transforms is used to determine whether
155 156 the returned text can be used for hash integrity checking. For example,
156 157 if "write" returns False, then "text" is used to generate hash. If
157 158 "write" returns True, that basically means "rawtext" returned by "write"
158 159 should be used to generate hash. Usually, "write" and "read" return
159 160 different booleans. And "raw" returns a same boolean as "write".
160 161
161 162 Note: The 'raw' transform is used for changegroup generation and in some
162 163 debug commands. In this case the transform only indicates whether the
163 164 contents can be used for hash integrity checks.
164 165 """
165 166 if not flag & REVIDX_KNOWN_FLAGS:
166 167 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
167 168 raise ProgrammingError(msg)
168 169 if flag not in REVIDX_FLAGS_ORDER:
169 170 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
170 171 raise ProgrammingError(msg)
171 172 if flag in _flagprocessors:
172 173 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
173 174 raise error.Abort(msg)
174 175 _flagprocessors[flag] = processor
175 176
176 177 def getoffset(q):
177 178 return int(q >> 16)
178 179
179 180 def gettype(q):
180 181 return int(q & 0xFFFF)
181 182
182 183 def offset_type(offset, type):
183 184 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
184 185 raise ValueError('unknown revlog index flags')
185 186 return int(int(offset) << 16 | type)
186 187
187 188 _nullhash = hashlib.sha1(nullid)
188 189
189 190 def hash(text, p1, p2):
190 191 """generate a hash from the given text and its parent hashes
191 192
192 193 This hash combines both the current file contents and its history
193 194 in a manner that makes it easy to distinguish nodes with the same
194 195 content in the revision graph.
195 196 """
196 197 # As of now, if one of the parent node is null, p2 is null
197 198 if p2 == nullid:
198 199 # deep copy of a hash is faster than creating one
199 200 s = _nullhash.copy()
200 201 s.update(p1)
201 202 else:
202 203 # none of the parent nodes are nullid
203 204 if p1 < p2:
204 205 a = p1
205 206 b = p2
206 207 else:
207 208 a = p2
208 209 b = p1
209 210 s = hashlib.sha1(a)
210 211 s.update(b)
211 212 s.update(text)
212 213 return s.digest()
213 214
214 class _testrevlog(object):
215 """minimalist fake revlog to use in doctests"""
216
217 def __init__(self, data, density=0.5, mingap=0):
218 """data is an list of revision payload boundaries"""
219 self._data = data
220 self._srdensitythreshold = density
221 self._srmingapsize = mingap
222
223 def start(self, rev):
224 if rev == 0:
225 return 0
226 return self._data[rev - 1]
227
228 def end(self, rev):
229 return self._data[rev]
230
231 def length(self, rev):
232 return self.end(rev) - self.start(rev)
233
234 def __len__(self):
235 return len(self._data)
236
237 def _trimchunk(revlog, revs, startidx, endidx=None):
238 """returns revs[startidx:endidx] without empty trailing revs
239
240 Doctest Setup
241 >>> revlog = _testrevlog([
242 ... 5, #0
243 ... 10, #1
244 ... 12, #2
245 ... 12, #3 (empty)
246 ... 17, #4
247 ... 21, #5
248 ... 21, #6 (empty)
249 ... ])
250
251 Contiguous cases:
252 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
253 [0, 1, 2, 3, 4, 5]
254 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
255 [0, 1, 2, 3, 4]
256 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
257 [0, 1, 2]
258 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
259 [2]
260 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
261 [3, 4, 5]
262 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
263 [3, 4]
264
265 Discontiguous cases:
266 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
267 [1, 3, 5]
268 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
269 [1]
270 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
271 [3, 5]
272 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
273 [3, 5]
274 """
275 length = revlog.length
276
277 if endidx is None:
278 endidx = len(revs)
279
280 # If we have a non-emtpy delta candidate, there are nothing to trim
281 if revs[endidx - 1] < len(revlog):
282 # Trim empty revs at the end, except the very first revision of a chain
283 while (endidx > 1
284 and endidx > startidx
285 and length(revs[endidx - 1]) == 0):
286 endidx -= 1
287
288 return revs[startidx:endidx]
289
290 def _segmentspan(revlog, revs, deltainfo=None):
291 """Get the byte span of a segment of revisions
292
293 revs is a sorted array of revision numbers
294
295 >>> revlog = _testrevlog([
296 ... 5, #0
297 ... 10, #1
298 ... 12, #2
299 ... 12, #3 (empty)
300 ... 17, #4
301 ... ])
302
303 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
304 17
305 >>> _segmentspan(revlog, [0, 4])
306 17
307 >>> _segmentspan(revlog, [3, 4])
308 5
309 >>> _segmentspan(revlog, [1, 2, 3,])
310 7
311 >>> _segmentspan(revlog, [1, 3])
312 7
313 """
314 if not revs:
315 return 0
316 if deltainfo is not None and len(revlog) <= revs[-1]:
317 if len(revs) == 1:
318 return deltainfo.deltalen
319 offset = revlog.end(len(revlog) - 1)
320 end = deltainfo.deltalen + offset
321 else:
322 end = revlog.end(revs[-1])
323 return end - revlog.start(revs[0])
324
325 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
326 """slice revs to reduce the amount of unrelated data to be read from disk.
327
328 ``revs`` is sliced into groups that should be read in one time.
329 Assume that revs are sorted.
330
331 The initial chunk is sliced until the overall density (payload/chunks-span
332 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
333 `revlog._srmingapsize` is skipped.
334
335 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
336 For consistency with other slicing choice, this limit won't go lower than
337 `revlog._srmingapsize`.
338
339 If individual revisions chunk are larger than this limit, they will still
340 be raised individually.
341
342 >>> revlog = _testrevlog([
343 ... 5, #00 (5)
344 ... 10, #01 (5)
345 ... 12, #02 (2)
346 ... 12, #03 (empty)
347 ... 27, #04 (15)
348 ... 31, #05 (4)
349 ... 31, #06 (empty)
350 ... 42, #07 (11)
351 ... 47, #08 (5)
352 ... 47, #09 (empty)
353 ... 48, #10 (1)
354 ... 51, #11 (3)
355 ... 74, #12 (23)
356 ... 85, #13 (11)
357 ... 86, #14 (1)
358 ... 91, #15 (5)
359 ... ])
360
361 >>> list(_slicechunk(revlog, list(range(16))))
362 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
363 >>> list(_slicechunk(revlog, [0, 15]))
364 [[0], [15]]
365 >>> list(_slicechunk(revlog, [0, 11, 15]))
366 [[0], [11], [15]]
367 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
368 [[0], [11, 13, 15]]
369 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
370 [[1, 2], [5, 8, 10, 11], [14]]
371
372 Slicing with a maximum chunk size
373 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
374 [[0], [11], [13], [15]]
375 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
376 [[0], [11], [13, 15]]
377 """
378 if targetsize is not None:
379 targetsize = max(targetsize, revlog._srmingapsize)
380 # targetsize should not be specified when evaluating delta candidates:
381 # * targetsize is used to ensure we stay within specification when reading,
382 # * deltainfo is used to pick are good delta chain when writing.
383 if not (deltainfo is None or targetsize is None):
384 msg = 'cannot use `targetsize` with a `deltainfo`'
385 raise error.ProgrammingError(msg)
386 for chunk in _slicechunktodensity(revlog, revs,
387 deltainfo,
388 revlog._srdensitythreshold,
389 revlog._srmingapsize):
390 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
391 yield subchunk
392
393 def _slicechunktosize(revlog, revs, targetsize=None):
394 """slice revs to match the target size
395
396 This is intended to be used on chunk that density slicing selected by that
397 are still too large compared to the read garantee of revlog. This might
398 happens when "minimal gap size" interrupted the slicing or when chain are
399 built in a way that create large blocks next to each other.
400
401 >>> revlog = _testrevlog([
402 ... 3, #0 (3)
403 ... 5, #1 (2)
404 ... 6, #2 (1)
405 ... 8, #3 (2)
406 ... 8, #4 (empty)
407 ... 11, #5 (3)
408 ... 12, #6 (1)
409 ... 13, #7 (1)
410 ... 14, #8 (1)
411 ... ])
412
413 Cases where chunk is already small enough
414 >>> list(_slicechunktosize(revlog, [0], 3))
415 [[0]]
416 >>> list(_slicechunktosize(revlog, [6, 7], 3))
417 [[6, 7]]
418 >>> list(_slicechunktosize(revlog, [0], None))
419 [[0]]
420 >>> list(_slicechunktosize(revlog, [6, 7], None))
421 [[6, 7]]
422
423 cases where we need actual slicing
424 >>> list(_slicechunktosize(revlog, [0, 1], 3))
425 [[0], [1]]
426 >>> list(_slicechunktosize(revlog, [1, 3], 3))
427 [[1], [3]]
428 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
429 [[1, 2], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 5], 3))
431 [[3], [5]]
432 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
433 [[3], [5]]
434 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
435 [[5], [6, 7, 8]]
436 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
437 [[0], [1, 2], [3], [5], [6, 7, 8]]
438
439 Case with too large individual chunk (must return valid chunk)
440 >>> list(_slicechunktosize(revlog, [0, 1], 2))
441 [[0], [1]]
442 >>> list(_slicechunktosize(revlog, [1, 3], 1))
443 [[1], [3]]
444 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
445 [[3], [5]]
446 """
447 assert targetsize is None or 0 <= targetsize
448 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
449 yield revs
450 return
451
452 startrevidx = 0
453 startdata = revlog.start(revs[0])
454 endrevidx = 0
455 iterrevs = enumerate(revs)
456 next(iterrevs) # skip first rev.
457 for idx, r in iterrevs:
458 span = revlog.end(r) - startdata
459 if span <= targetsize:
460 endrevidx = idx
461 else:
462 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
463 if chunk:
464 yield chunk
465 startrevidx = idx
466 startdata = revlog.start(r)
467 endrevidx = idx
468 yield _trimchunk(revlog, revs, startrevidx)
469
470 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
471 mingapsize=0):
472 """slice revs to reduce the amount of unrelated data to be read from disk.
473
474 ``revs`` is sliced into groups that should be read in one time.
475 Assume that revs are sorted.
476
477 ``deltainfo`` is a _deltainfo instance of a revision that we would append
478 to the top of the revlog.
479
480 The initial chunk is sliced until the overall density (payload/chunks-span
481 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
482 skipped.
483
484 >>> revlog = _testrevlog([
485 ... 5, #00 (5)
486 ... 10, #01 (5)
487 ... 12, #02 (2)
488 ... 12, #03 (empty)
489 ... 27, #04 (15)
490 ... 31, #05 (4)
491 ... 31, #06 (empty)
492 ... 42, #07 (11)
493 ... 47, #08 (5)
494 ... 47, #09 (empty)
495 ... 48, #10 (1)
496 ... 51, #11 (3)
497 ... 74, #12 (23)
498 ... 85, #13 (11)
499 ... 86, #14 (1)
500 ... 91, #15 (5)
501 ... ])
502
503 >>> list(_slicechunktodensity(revlog, list(range(16))))
504 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
505 >>> list(_slicechunktodensity(revlog, [0, 15]))
506 [[0], [15]]
507 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
508 [[0], [11], [15]]
509 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
510 [[0], [11, 13, 15]]
511 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
512 [[1, 2], [5, 8, 10, 11], [14]]
513 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
514 ... mingapsize=20))
515 [[1, 2, 3, 5, 8, 10, 11], [14]]
516 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
517 ... targetdensity=0.95))
518 [[1, 2], [5], [8, 10, 11], [14]]
519 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
520 ... targetdensity=0.95, mingapsize=12))
521 [[1, 2], [5, 8, 10, 11], [14]]
522 """
523 start = revlog.start
524 length = revlog.length
525
526 if len(revs) <= 1:
527 yield revs
528 return
529
530 nextrev = len(revlog)
531 nextoffset = revlog.end(nextrev - 1)
532
533 if deltainfo is None:
534 deltachainspan = _segmentspan(revlog, revs)
535 chainpayload = sum(length(r) for r in revs)
536 else:
537 deltachainspan = deltainfo.distance
538 chainpayload = deltainfo.compresseddeltalen
539
540 if deltachainspan < mingapsize:
541 yield revs
542 return
543
544 readdata = deltachainspan
545
546 if deltachainspan:
547 density = chainpayload / float(deltachainspan)
548 else:
549 density = 1.0
550
551 if density >= targetdensity:
552 yield revs
553 return
554
555 if deltainfo is not None and deltainfo.deltalen:
556 revs = list(revs)
557 revs.append(nextrev)
558
559 # Store the gaps in a heap to have them sorted by decreasing size
560 gapsheap = []
561 heapq.heapify(gapsheap)
562 prevend = None
563 for i, rev in enumerate(revs):
564 if rev < nextrev:
565 revstart = start(rev)
566 revlen = length(rev)
567 else:
568 revstart = nextoffset
569 revlen = deltainfo.deltalen
570
571 # Skip empty revisions to form larger holes
572 if revlen == 0:
573 continue
574
575 if prevend is not None:
576 gapsize = revstart - prevend
577 # only consider holes that are large enough
578 if gapsize > mingapsize:
579 heapq.heappush(gapsheap, (-gapsize, i))
580
581 prevend = revstart + revlen
582
583 # Collect the indices of the largest holes until the density is acceptable
584 indicesheap = []
585 heapq.heapify(indicesheap)
586 while gapsheap and density < targetdensity:
587 oppgapsize, gapidx = heapq.heappop(gapsheap)
588
589 heapq.heappush(indicesheap, gapidx)
590
591 # the gap sizes are stored as negatives to be sorted decreasingly
592 # by the heap
593 readdata -= (-oppgapsize)
594 if readdata > 0:
595 density = chainpayload / float(readdata)
596 else:
597 density = 1.0
598
599 # Cut the revs at collected indices
600 previdx = 0
601 while indicesheap:
602 idx = heapq.heappop(indicesheap)
603
604 chunk = _trimchunk(revlog, revs, previdx, idx)
605 if chunk:
606 yield chunk
607
608 previdx = idx
609
610 chunk = _trimchunk(revlog, revs, previdx)
611 if chunk:
612 yield chunk
613
614 @attr.s(slots=True, frozen=True)
615 class _deltainfo(object):
616 distance = attr.ib()
617 deltalen = attr.ib()
618 data = attr.ib()
619 base = attr.ib()
620 chainbase = attr.ib()
621 chainlen = attr.ib()
622 compresseddeltalen = attr.ib()
623 snapshotdepth = attr.ib()
624
625 class _deltacomputer(object):
626 def __init__(self, revlog):
627 self.revlog = revlog
628
629 def _getcandidaterevs(self, p1, p2, cachedelta):
630 """
631 Provides revisions that present an interest to be diffed against,
632 grouped by level of easiness.
633 """
634 revlog = self.revlog
635 gdelta = revlog._generaldelta
636 curr = len(revlog)
637 prev = curr - 1
638 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
639
640 # should we try to build a delta?
641 if prev != nullrev and revlog._storedeltachains:
642 tested = set()
643 # This condition is true most of the time when processing
644 # changegroup data into a generaldelta repo. The only time it
645 # isn't true is if this is the first revision in a delta chain
646 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
647 if cachedelta and gdelta and revlog._lazydeltabase:
648 # Assume what we received from the server is a good choice
649 # build delta will reuse the cache
650 yield (cachedelta[0],)
651 tested.add(cachedelta[0])
652
653 if gdelta:
654 # exclude already lazy tested base if any
655 parents = [p for p in (p1r, p2r)
656 if p != nullrev and p not in tested]
657
658 if not revlog._deltabothparents and len(parents) == 2:
659 parents.sort()
660 # To minimize the chance of having to build a fulltext,
661 # pick first whichever parent is closest to us (max rev)
662 yield (parents[1],)
663 # then the other one (min rev) if the first did not fit
664 yield (parents[0],)
665 tested.update(parents)
666 elif len(parents) > 0:
667 # Test all parents (1 or 2), and keep the best candidate
668 yield parents
669 tested.update(parents)
670
671 if prev not in tested:
672 # other approach failed try against prev to hopefully save us a
673 # fulltext.
674 yield (prev,)
675 tested.add(prev)
676
677 def buildtext(self, revinfo, fh):
678 """Builds a fulltext version of a revision
679
680 revinfo: _revisioninfo instance that contains all needed info
681 fh: file handle to either the .i or the .d revlog file,
682 depending on whether it is inlined or not
683 """
684 btext = revinfo.btext
685 if btext[0] is not None:
686 return btext[0]
687
688 revlog = self.revlog
689 cachedelta = revinfo.cachedelta
690 flags = revinfo.flags
691 node = revinfo.node
692
693 baserev = cachedelta[0]
694 delta = cachedelta[1]
695 # special case deltas which replace entire base; no need to decode
696 # base revision. this neatly avoids censored bases, which throw when
697 # they're decoded.
698 hlen = struct.calcsize(">lll")
699 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
700 len(delta) - hlen):
701 btext[0] = delta[hlen:]
702 else:
703 # deltabase is rawtext before changed by flag processors, which is
704 # equivalent to non-raw text
705 basetext = revlog.revision(baserev, _df=fh, raw=False)
706 btext[0] = mdiff.patch(basetext, delta)
707
708 try:
709 res = revlog._processflags(btext[0], flags, 'read', raw=True)
710 btext[0], validatehash = res
711 if validatehash:
712 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
713 if flags & REVIDX_ISCENSORED:
714 raise RevlogError(_('node %s is not censored') % node)
715 except CensoredNodeError:
716 # must pass the censored index flag to add censored revisions
717 if not flags & REVIDX_ISCENSORED:
718 raise
719 return btext[0]
720
721 def _builddeltadiff(self, base, revinfo, fh):
722 revlog = self.revlog
723 t = self.buildtext(revinfo, fh)
724 if revlog.iscensored(base):
725 # deltas based on a censored revision must replace the
726 # full content in one patch, so delta works everywhere
727 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
728 delta = header + t
729 else:
730 ptext = revlog.revision(base, _df=fh, raw=True)
731 delta = mdiff.textdiff(ptext, t)
732
733 return delta
734
735 def _builddeltainfo(self, revinfo, base, fh):
736 # can we use the cached delta?
737 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
738 delta = revinfo.cachedelta[1]
739 else:
740 delta = self._builddeltadiff(base, revinfo, fh)
741 revlog = self.revlog
742 header, data = revlog.compress(delta)
743 deltalen = len(header) + len(data)
744 chainbase = revlog.chainbase(base)
745 offset = revlog.end(len(revlog) - 1)
746 dist = deltalen + offset - revlog.start(chainbase)
747 if revlog._generaldelta:
748 deltabase = base
749 else:
750 deltabase = chainbase
751 chainlen, compresseddeltalen = revlog._chaininfo(base)
752 chainlen += 1
753 compresseddeltalen += deltalen
754
755 revlog = self.revlog
756 snapshotdepth = None
757 if deltabase == nullrev:
758 snapshotdepth = 0
759 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
760 # A delta chain should always be one full snapshot,
761 # zero or more semi-snapshots, and zero or more deltas
762 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
763 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
764 snapshotdepth = len(revlog._deltachain(deltabase)[0])
765
766 return _deltainfo(dist, deltalen, (header, data), deltabase,
767 chainbase, chainlen, compresseddeltalen,
768 snapshotdepth)
769
770 def finddeltainfo(self, revinfo, fh):
771 """Find an acceptable delta against a candidate revision
772
773 revinfo: information about the revision (instance of _revisioninfo)
774 fh: file handle to either the .i or the .d revlog file,
775 depending on whether it is inlined or not
776
777 Returns the first acceptable candidate revision, as ordered by
778 _getcandidaterevs
779 """
780 if not revinfo.textlen:
781 return None # empty file do not need delta
782
783 cachedelta = revinfo.cachedelta
784 p1 = revinfo.p1
785 p2 = revinfo.p2
786 revlog = self.revlog
787
788 deltalength = self.revlog.length
789 deltaparent = self.revlog.deltaparent
790
791 deltainfo = None
792 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
793 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
794 # filter out delta base that will never produce good delta
795 candidaterevs = [r for r in candidaterevs
796 if self.revlog.length(r) <= deltas_limit]
797 nominateddeltas = []
798 for candidaterev in candidaterevs:
799 # skip over empty delta (no need to include them in a chain)
800 while candidaterev != nullrev and not deltalength(candidaterev):
801 candidaterev = deltaparent(candidaterev)
802 # no need to try a delta against nullid, this will be handled
803 # by fulltext later.
804 if candidaterev == nullrev:
805 continue
806 # no delta for rawtext-changing revs (see "candelta" for why)
807 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
808 continue
809 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
810 if revlog._isgooddeltainfo(candidatedelta, revinfo):
811 nominateddeltas.append(candidatedelta)
812 if nominateddeltas:
813 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
814 break
815
816 return deltainfo
817
818 215 @attr.s(slots=True, frozen=True)
819 216 class _revisioninfo(object):
820 217 """Information about a revision that allows building its fulltext
821 218 node: expected hash of the revision
822 219 p1, p2: parent revs of the revision
823 220 btext: built text cache consisting of a one-element list
824 221 cachedelta: (baserev, uncompressed_delta) or None
825 222 flags: flags associated to the revision storage
826 223
827 224 One of btext[0] or cachedelta must be set.
828 225 """
829 226 node = attr.ib()
830 227 p1 = attr.ib()
831 228 p2 = attr.ib()
832 229 btext = attr.ib()
833 230 textlen = attr.ib()
834 231 cachedelta = attr.ib()
835 232 flags = attr.ib()
836 233
837 234 @interfaceutil.implementer(repository.irevisiondelta)
838 235 @attr.s(slots=True, frozen=True)
839 236 class revlogrevisiondelta(object):
840 237 node = attr.ib()
841 238 p1node = attr.ib()
842 239 p2node = attr.ib()
843 240 basenode = attr.ib()
844 241 linknode = attr.ib()
845 242 flags = attr.ib()
846 243 baserevisionsize = attr.ib()
847 244 revision = attr.ib()
848 245 delta = attr.ib()
849 246
850 247 # index v0:
851 248 # 4 bytes: offset
852 249 # 4 bytes: compressed length
853 250 # 4 bytes: base rev
854 251 # 4 bytes: link rev
855 252 # 20 bytes: parent 1 nodeid
856 253 # 20 bytes: parent 2 nodeid
857 254 # 20 bytes: nodeid
858 255 indexformatv0 = struct.Struct(">4l20s20s20s")
859 256 indexformatv0_pack = indexformatv0.pack
860 257 indexformatv0_unpack = indexformatv0.unpack
861 258
862 259 class revlogoldindex(list):
863 260 def __getitem__(self, i):
864 261 if i == -1:
865 262 return (0, 0, 0, -1, -1, -1, -1, nullid)
866 263 return list.__getitem__(self, i)
867 264
868 265 class revlogoldio(object):
869 266 def __init__(self):
870 267 self.size = indexformatv0.size
871 268
872 269 def parseindex(self, data, inline):
873 270 s = self.size
874 271 index = []
875 272 nodemap = {nullid: nullrev}
876 273 n = off = 0
877 274 l = len(data)
878 275 while off + s <= l:
879 276 cur = data[off:off + s]
880 277 off += s
881 278 e = indexformatv0_unpack(cur)
882 279 # transform to revlogv1 format
883 280 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
884 281 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
885 282 index.append(e2)
886 283 nodemap[e[6]] = n
887 284 n += 1
888 285
889 286 return revlogoldindex(index), nodemap, None
890 287
891 288 def packentry(self, entry, node, version, rev):
892 289 if gettype(entry[0]):
893 290 raise RevlogError(_('index entry flags need revlog version 1'))
894 291 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
895 292 node(entry[5]), node(entry[6]), entry[7])
896 293 return indexformatv0_pack(*e2)
897 294
898 295 # index ng:
899 296 # 6 bytes: offset
900 297 # 2 bytes: flags
901 298 # 4 bytes: compressed length
902 299 # 4 bytes: uncompressed length
903 300 # 4 bytes: base rev
904 301 # 4 bytes: link rev
905 302 # 4 bytes: parent 1 rev
906 303 # 4 bytes: parent 2 rev
907 304 # 32 bytes: nodeid
908 305 indexformatng = struct.Struct(">Qiiiiii20s12x")
909 306 indexformatng_pack = indexformatng.pack
910 307 versionformat = struct.Struct(">I")
911 308 versionformat_pack = versionformat.pack
912 309 versionformat_unpack = versionformat.unpack
913 310
914 311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
915 312 # signed integer)
916 313 _maxentrysize = 0x7fffffff
917 314
918 315 class revlogio(object):
919 316 def __init__(self):
920 317 self.size = indexformatng.size
921 318
922 319 def parseindex(self, data, inline):
923 320 # call the C implementation to parse the index data
924 321 index, cache = parsers.parse_index2(data, inline)
925 322 return index, getattr(index, 'nodemap', None), cache
926 323
927 324 def packentry(self, entry, node, version, rev):
928 325 p = indexformatng_pack(*entry)
929 326 if rev == 0:
930 327 p = versionformat_pack(version) + p[4:]
931 328 return p
932 329
933 330 class revlog(object):
934 331 """
935 332 the underlying revision storage object
936 333
937 334 A revlog consists of two parts, an index and the revision data.
938 335
939 336 The index is a file with a fixed record size containing
940 337 information on each revision, including its nodeid (hash), the
941 338 nodeids of its parents, the position and offset of its data within
942 339 the data file, and the revision it's based on. Finally, each entry
943 340 contains a linkrev entry that can serve as a pointer to external
944 341 data.
945 342
946 343 The revision data itself is a linear collection of data chunks.
947 344 Each chunk represents a revision and is usually represented as a
948 345 delta against the previous chunk. To bound lookup time, runs of
949 346 deltas are limited to about 2 times the length of the original
950 347 version data. This makes retrieval of a version proportional to
951 348 its size, or O(1) relative to the number of revisions.
952 349
953 350 Both pieces of the revlog are written to in an append-only
954 351 fashion, which means we never need to rewrite a file to insert or
955 352 remove data, and can use some simple techniques to avoid the need
956 353 for locking while reading.
957 354
958 355 If checkambig, indexfile is opened with checkambig=True at
959 356 writing, to avoid file stat ambiguity.
960 357
961 358 If mmaplargeindex is True, and an mmapindexthreshold is set, the
962 359 index will be mmapped rather than read if it is larger than the
963 360 configured threshold.
964 361
965 362 If censorable is True, the revlog can have censored revisions.
966 363 """
967 364 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
968 365 mmaplargeindex=False, censorable=False):
969 366 """
970 367 create a revlog object
971 368
972 369 opener is a function that abstracts the file opening operation
973 370 and can be used to implement COW semantics or the like.
974 371 """
975 372 self.indexfile = indexfile
976 373 self.datafile = datafile or (indexfile[:-2] + ".d")
977 374 self.opener = opener
978 375 # When True, indexfile is opened with checkambig=True at writing, to
979 376 # avoid file stat ambiguity.
980 377 self._checkambig = checkambig
981 378 self._censorable = censorable
982 379 # 3-tuple of (node, rev, text) for a raw revision.
983 380 self._cache = None
984 381 # Maps rev to chain base rev.
985 382 self._chainbasecache = util.lrucachedict(100)
986 383 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
987 384 self._chunkcache = (0, '')
988 385 # How much data to read and cache into the raw revlog data cache.
989 386 self._chunkcachesize = 65536
990 387 self._maxchainlen = None
991 388 self._deltabothparents = True
992 389 self.index = []
993 390 # Mapping of partial identifiers to full nodes.
994 391 self._pcache = {}
995 392 # Mapping of revision integer to full node.
996 393 self._nodecache = {nullid: nullrev}
997 394 self._nodepos = None
998 395 self._compengine = 'zlib'
999 396 self._maxdeltachainspan = -1
1000 397 self._withsparseread = False
1001 398 self._sparserevlog = False
1002 399 self._srdensitythreshold = 0.50
1003 400 self._srmingapsize = 262144
1004 401
1005 402 mmapindexthreshold = None
1006 403 v = REVLOG_DEFAULT_VERSION
1007 404 opts = getattr(opener, 'options', None)
1008 405 if opts is not None:
1009 406 if 'revlogv2' in opts:
1010 407 # version 2 revlogs always use generaldelta.
1011 408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
1012 409 elif 'revlogv1' in opts:
1013 410 if 'generaldelta' in opts:
1014 411 v |= FLAG_GENERALDELTA
1015 412 else:
1016 413 v = 0
1017 414 if 'chunkcachesize' in opts:
1018 415 self._chunkcachesize = opts['chunkcachesize']
1019 416 if 'maxchainlen' in opts:
1020 417 self._maxchainlen = opts['maxchainlen']
1021 418 if 'deltabothparents' in opts:
1022 419 self._deltabothparents = opts['deltabothparents']
1023 420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1024 421 if 'compengine' in opts:
1025 422 self._compengine = opts['compengine']
1026 423 if 'maxdeltachainspan' in opts:
1027 424 self._maxdeltachainspan = opts['maxdeltachainspan']
1028 425 if mmaplargeindex and 'mmapindexthreshold' in opts:
1029 426 mmapindexthreshold = opts['mmapindexthreshold']
1030 427 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1031 428 withsparseread = bool(opts.get('with-sparse-read', False))
1032 429 # sparse-revlog forces sparse-read
1033 430 self._withsparseread = self._sparserevlog or withsparseread
1034 431 if 'sparse-read-density-threshold' in opts:
1035 432 self._srdensitythreshold = opts['sparse-read-density-threshold']
1036 433 if 'sparse-read-min-gap-size' in opts:
1037 434 self._srmingapsize = opts['sparse-read-min-gap-size']
1038 435
1039 436 if self._chunkcachesize <= 0:
1040 437 raise RevlogError(_('revlog chunk cache size %r is not greater '
1041 438 'than 0') % self._chunkcachesize)
1042 439 elif self._chunkcachesize & (self._chunkcachesize - 1):
1043 440 raise RevlogError(_('revlog chunk cache size %r is not a power '
1044 441 'of 2') % self._chunkcachesize)
1045 442
1046 443 indexdata = ''
1047 444 self._initempty = True
1048 445 try:
1049 446 with self._indexfp() as f:
1050 447 if (mmapindexthreshold is not None and
1051 448 self.opener.fstat(f).st_size >= mmapindexthreshold):
1052 449 indexdata = util.buffer(util.mmapread(f))
1053 450 else:
1054 451 indexdata = f.read()
1055 452 if len(indexdata) > 0:
1056 453 v = versionformat_unpack(indexdata[:4])[0]
1057 454 self._initempty = False
1058 455 except IOError as inst:
1059 456 if inst.errno != errno.ENOENT:
1060 457 raise
1061 458
1062 459 self.version = v
1063 460 self._inline = v & FLAG_INLINE_DATA
1064 461 self._generaldelta = v & FLAG_GENERALDELTA
1065 462 flags = v & ~0xFFFF
1066 463 fmt = v & 0xFFFF
1067 464 if fmt == REVLOGV0:
1068 465 if flags:
1069 466 raise RevlogError(_('unknown flags (%#04x) in version %d '
1070 467 'revlog %s') %
1071 468 (flags >> 16, fmt, self.indexfile))
1072 469 elif fmt == REVLOGV1:
1073 470 if flags & ~REVLOGV1_FLAGS:
1074 471 raise RevlogError(_('unknown flags (%#04x) in version %d '
1075 472 'revlog %s') %
1076 473 (flags >> 16, fmt, self.indexfile))
1077 474 elif fmt == REVLOGV2:
1078 475 if flags & ~REVLOGV2_FLAGS:
1079 476 raise RevlogError(_('unknown flags (%#04x) in version %d '
1080 477 'revlog %s') %
1081 478 (flags >> 16, fmt, self.indexfile))
1082 479 else:
1083 480 raise RevlogError(_('unknown version (%d) in revlog %s') %
1084 481 (fmt, self.indexfile))
1085 482
1086 483 self._storedeltachains = True
1087 484
1088 485 self._io = revlogio()
1089 486 if self.version == REVLOGV0:
1090 487 self._io = revlogoldio()
1091 488 try:
1092 489 d = self._io.parseindex(indexdata, self._inline)
1093 490 except (ValueError, IndexError):
1094 491 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1095 492 self.index, nodemap, self._chunkcache = d
1096 493 if nodemap is not None:
1097 494 self.nodemap = self._nodecache = nodemap
1098 495 if not self._chunkcache:
1099 496 self._chunkclear()
1100 497 # revnum -> (chain-length, sum-delta-length)
1101 498 self._chaininfocache = {}
1102 499 # revlog header -> revlog compressor
1103 500 self._decompressors = {}
1104 501
1105 502 @util.propertycache
1106 503 def _compressor(self):
1107 504 return util.compengines[self._compengine].revlogcompressor()
1108 505
1109 506 def _indexfp(self, mode='r'):
1110 507 """file object for the revlog's index file"""
1111 508 args = {r'mode': mode}
1112 509 if mode != 'r':
1113 510 args[r'checkambig'] = self._checkambig
1114 511 if mode == 'w':
1115 512 args[r'atomictemp'] = True
1116 513 return self.opener(self.indexfile, **args)
1117 514
1118 515 def _datafp(self, mode='r'):
1119 516 """file object for the revlog's data file"""
1120 517 return self.opener(self.datafile, mode=mode)
1121 518
1122 519 @contextlib.contextmanager
1123 520 def _datareadfp(self, existingfp=None):
1124 521 """file object suitable to read data"""
1125 522 if existingfp is not None:
1126 523 yield existingfp
1127 524 else:
1128 525 if self._inline:
1129 526 func = self._indexfp
1130 527 else:
1131 528 func = self._datafp
1132 529 with func() as fp:
1133 530 yield fp
1134 531
1135 532 def tip(self):
1136 533 return self.node(len(self.index) - 1)
1137 534 def __contains__(self, rev):
1138 535 return 0 <= rev < len(self)
1139 536 def __len__(self):
1140 537 return len(self.index)
1141 538 def __iter__(self):
1142 539 return iter(pycompat.xrange(len(self)))
1143 540 def revs(self, start=0, stop=None):
1144 541 """iterate over all rev in this revlog (from start to stop)"""
1145 542 step = 1
1146 543 length = len(self)
1147 544 if stop is not None:
1148 545 if start > stop:
1149 546 step = -1
1150 547 stop += step
1151 548 if stop > length:
1152 549 stop = length
1153 550 else:
1154 551 stop = length
1155 552 return pycompat.xrange(start, stop, step)
1156 553
1157 554 @util.propertycache
1158 555 def nodemap(self):
1159 556 if self.index:
1160 557 # populate mapping down to the initial node
1161 558 node0 = self.index[0][7] # get around changelog filtering
1162 559 self.rev(node0)
1163 560 return self._nodecache
1164 561
1165 562 def hasnode(self, node):
1166 563 try:
1167 564 self.rev(node)
1168 565 return True
1169 566 except KeyError:
1170 567 return False
1171 568
1172 569 def candelta(self, baserev, rev):
1173 570 """whether two revisions (baserev, rev) can be delta-ed or not"""
1174 571 # Disable delta if either rev requires a content-changing flag
1175 572 # processor (ex. LFS). This is because such flag processor can alter
1176 573 # the rawtext content that the delta will be based on, and two clients
1177 574 # could have a same revlog node with different flags (i.e. different
1178 575 # rawtext contents) and the delta could be incompatible.
1179 576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1180 577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1181 578 return False
1182 579 return True
1183 580
1184 581 def clearcaches(self):
1185 582 self._cache = None
1186 583 self._chainbasecache.clear()
1187 584 self._chunkcache = (0, '')
1188 585 self._pcache = {}
1189 586
1190 587 try:
1191 588 self._nodecache.clearcaches()
1192 589 except AttributeError:
1193 590 self._nodecache = {nullid: nullrev}
1194 591 self._nodepos = None
1195 592
1196 593 def rev(self, node):
1197 594 try:
1198 595 return self._nodecache[node]
1199 596 except TypeError:
1200 597 raise
1201 598 except RevlogError:
1202 599 # parsers.c radix tree lookup failed
1203 600 if node == wdirid or node in wdirfilenodeids:
1204 601 raise error.WdirUnsupported
1205 602 raise LookupError(node, self.indexfile, _('no node'))
1206 603 except KeyError:
1207 604 # pure python cache lookup failed
1208 605 n = self._nodecache
1209 606 i = self.index
1210 607 p = self._nodepos
1211 608 if p is None:
1212 609 p = len(i) - 1
1213 610 else:
1214 611 assert p < len(i)
1215 612 for r in pycompat.xrange(p, -1, -1):
1216 613 v = i[r][7]
1217 614 n[v] = r
1218 615 if v == node:
1219 616 self._nodepos = r - 1
1220 617 return r
1221 618 if node == wdirid or node in wdirfilenodeids:
1222 619 raise error.WdirUnsupported
1223 620 raise LookupError(node, self.indexfile, _('no node'))
1224 621
1225 622 # Accessors for index entries.
1226 623
1227 624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1228 625 # are flags.
1229 626 def start(self, rev):
1230 627 return int(self.index[rev][0] >> 16)
1231 628
1232 629 def flags(self, rev):
1233 630 return self.index[rev][0] & 0xFFFF
1234 631
1235 632 def length(self, rev):
1236 633 return self.index[rev][1]
1237 634
1238 635 def rawsize(self, rev):
1239 636 """return the length of the uncompressed text for a given revision"""
1240 637 l = self.index[rev][2]
1241 638 if l >= 0:
1242 639 return l
1243 640
1244 641 t = self.revision(rev, raw=True)
1245 642 return len(t)
1246 643
1247 644 def size(self, rev):
1248 645 """length of non-raw text (processed by a "read" flag processor)"""
1249 646 # fast path: if no "read" flag processor could change the content,
1250 647 # size is rawsize. note: ELLIPSIS is known to not change the content.
1251 648 flags = self.flags(rev)
1252 649 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1253 650 return self.rawsize(rev)
1254 651
1255 652 return len(self.revision(rev, raw=False))
1256 653
1257 654 def chainbase(self, rev):
1258 655 base = self._chainbasecache.get(rev)
1259 656 if base is not None:
1260 657 return base
1261 658
1262 659 index = self.index
1263 660 iterrev = rev
1264 661 base = index[iterrev][3]
1265 662 while base != iterrev:
1266 663 iterrev = base
1267 664 base = index[iterrev][3]
1268 665
1269 666 self._chainbasecache[rev] = base
1270 667 return base
1271 668
1272 669 def linkrev(self, rev):
1273 670 return self.index[rev][4]
1274 671
1275 672 def parentrevs(self, rev):
1276 673 try:
1277 674 entry = self.index[rev]
1278 675 except IndexError:
1279 676 if rev == wdirrev:
1280 677 raise error.WdirUnsupported
1281 678 raise
1282 679
1283 680 return entry[5], entry[6]
1284 681
1285 682 def node(self, rev):
1286 683 try:
1287 684 return self.index[rev][7]
1288 685 except IndexError:
1289 686 if rev == wdirrev:
1290 687 raise error.WdirUnsupported
1291 688 raise
1292 689
1293 690 # Derived from index values.
1294 691
1295 692 def end(self, rev):
1296 693 return self.start(rev) + self.length(rev)
1297 694
1298 695 def parents(self, node):
1299 696 i = self.index
1300 697 d = i[self.rev(node)]
1301 698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1302 699
1303 700 def chainlen(self, rev):
1304 701 return self._chaininfo(rev)[0]
1305 702
1306 703 def _chaininfo(self, rev):
1307 704 chaininfocache = self._chaininfocache
1308 705 if rev in chaininfocache:
1309 706 return chaininfocache[rev]
1310 707 index = self.index
1311 708 generaldelta = self._generaldelta
1312 709 iterrev = rev
1313 710 e = index[iterrev]
1314 711 clen = 0
1315 712 compresseddeltalen = 0
1316 713 while iterrev != e[3]:
1317 714 clen += 1
1318 715 compresseddeltalen += e[1]
1319 716 if generaldelta:
1320 717 iterrev = e[3]
1321 718 else:
1322 719 iterrev -= 1
1323 720 if iterrev in chaininfocache:
1324 721 t = chaininfocache[iterrev]
1325 722 clen += t[0]
1326 723 compresseddeltalen += t[1]
1327 724 break
1328 725 e = index[iterrev]
1329 726 else:
1330 727 # Add text length of base since decompressing that also takes
1331 728 # work. For cache hits the length is already included.
1332 729 compresseddeltalen += e[1]
1333 730 r = (clen, compresseddeltalen)
1334 731 chaininfocache[rev] = r
1335 732 return r
1336 733
1337 734 def _deltachain(self, rev, stoprev=None):
1338 735 """Obtain the delta chain for a revision.
1339 736
1340 737 ``stoprev`` specifies a revision to stop at. If not specified, we
1341 738 stop at the base of the chain.
1342 739
1343 740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1344 741 revs in ascending order and ``stopped`` is a bool indicating whether
1345 742 ``stoprev`` was hit.
1346 743 """
1347 744 # Try C implementation.
1348 745 try:
1349 746 return self.index.deltachain(rev, stoprev, self._generaldelta)
1350 747 except AttributeError:
1351 748 pass
1352 749
1353 750 chain = []
1354 751
1355 752 # Alias to prevent attribute lookup in tight loop.
1356 753 index = self.index
1357 754 generaldelta = self._generaldelta
1358 755
1359 756 iterrev = rev
1360 757 e = index[iterrev]
1361 758 while iterrev != e[3] and iterrev != stoprev:
1362 759 chain.append(iterrev)
1363 760 if generaldelta:
1364 761 iterrev = e[3]
1365 762 else:
1366 763 iterrev -= 1
1367 764 e = index[iterrev]
1368 765
1369 766 if iterrev == stoprev:
1370 767 stopped = True
1371 768 else:
1372 769 chain.append(iterrev)
1373 770 stopped = False
1374 771
1375 772 chain.reverse()
1376 773 return chain, stopped
1377 774
1378 775 def ancestors(self, revs, stoprev=0, inclusive=False):
1379 776 """Generate the ancestors of 'revs' in reverse topological order.
1380 777 Does not generate revs lower than stoprev.
1381 778
1382 779 See the documentation for ancestor.lazyancestors for more details."""
1383 780
1384 781 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1385 782 inclusive=inclusive)
1386 783
1387 784 def descendants(self, revs):
1388 785 """Generate the descendants of 'revs' in revision order.
1389 786
1390 787 Yield a sequence of revision numbers starting with a child of
1391 788 some rev in revs, i.e., each revision is *not* considered a
1392 789 descendant of itself. Results are ordered by revision number (a
1393 790 topological sort)."""
1394 791 first = min(revs)
1395 792 if first == nullrev:
1396 793 for i in self:
1397 794 yield i
1398 795 return
1399 796
1400 797 seen = set(revs)
1401 798 for i in self.revs(start=first + 1):
1402 799 for x in self.parentrevs(i):
1403 800 if x != nullrev and x in seen:
1404 801 seen.add(i)
1405 802 yield i
1406 803 break
1407 804
1408 805 def findcommonmissing(self, common=None, heads=None):
1409 806 """Return a tuple of the ancestors of common and the ancestors of heads
1410 807 that are not ancestors of common. In revset terminology, we return the
1411 808 tuple:
1412 809
1413 810 ::common, (::heads) - (::common)
1414 811
1415 812 The list is sorted by revision number, meaning it is
1416 813 topologically sorted.
1417 814
1418 815 'heads' and 'common' are both lists of node IDs. If heads is
1419 816 not supplied, uses all of the revlog's heads. If common is not
1420 817 supplied, uses nullid."""
1421 818 if common is None:
1422 819 common = [nullid]
1423 820 if heads is None:
1424 821 heads = self.heads()
1425 822
1426 823 common = [self.rev(n) for n in common]
1427 824 heads = [self.rev(n) for n in heads]
1428 825
1429 826 # we want the ancestors, but inclusive
1430 827 class lazyset(object):
1431 828 def __init__(self, lazyvalues):
1432 829 self.addedvalues = set()
1433 830 self.lazyvalues = lazyvalues
1434 831
1435 832 def __contains__(self, value):
1436 833 return value in self.addedvalues or value in self.lazyvalues
1437 834
1438 835 def __iter__(self):
1439 836 added = self.addedvalues
1440 837 for r in added:
1441 838 yield r
1442 839 for r in self.lazyvalues:
1443 840 if not r in added:
1444 841 yield r
1445 842
1446 843 def add(self, value):
1447 844 self.addedvalues.add(value)
1448 845
1449 846 def update(self, values):
1450 847 self.addedvalues.update(values)
1451 848
1452 849 has = lazyset(self.ancestors(common))
1453 850 has.add(nullrev)
1454 851 has.update(common)
1455 852
1456 853 # take all ancestors from heads that aren't in has
1457 854 missing = set()
1458 855 visit = collections.deque(r for r in heads if r not in has)
1459 856 while visit:
1460 857 r = visit.popleft()
1461 858 if r in missing:
1462 859 continue
1463 860 else:
1464 861 missing.add(r)
1465 862 for p in self.parentrevs(r):
1466 863 if p not in has:
1467 864 visit.append(p)
1468 865 missing = list(missing)
1469 866 missing.sort()
1470 867 return has, [self.node(miss) for miss in missing]
1471 868
1472 869 def incrementalmissingrevs(self, common=None):
1473 870 """Return an object that can be used to incrementally compute the
1474 871 revision numbers of the ancestors of arbitrary sets that are not
1475 872 ancestors of common. This is an ancestor.incrementalmissingancestors
1476 873 object.
1477 874
1478 875 'common' is a list of revision numbers. If common is not supplied, uses
1479 876 nullrev.
1480 877 """
1481 878 if common is None:
1482 879 common = [nullrev]
1483 880
1484 881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1485 882
1486 883 def findmissingrevs(self, common=None, heads=None):
1487 884 """Return the revision numbers of the ancestors of heads that
1488 885 are not ancestors of common.
1489 886
1490 887 More specifically, return a list of revision numbers corresponding to
1491 888 nodes N such that every N satisfies the following constraints:
1492 889
1493 890 1. N is an ancestor of some node in 'heads'
1494 891 2. N is not an ancestor of any node in 'common'
1495 892
1496 893 The list is sorted by revision number, meaning it is
1497 894 topologically sorted.
1498 895
1499 896 'heads' and 'common' are both lists of revision numbers. If heads is
1500 897 not supplied, uses all of the revlog's heads. If common is not
1501 898 supplied, uses nullid."""
1502 899 if common is None:
1503 900 common = [nullrev]
1504 901 if heads is None:
1505 902 heads = self.headrevs()
1506 903
1507 904 inc = self.incrementalmissingrevs(common=common)
1508 905 return inc.missingancestors(heads)
1509 906
1510 907 def findmissing(self, common=None, heads=None):
1511 908 """Return the ancestors of heads that are not ancestors of common.
1512 909
1513 910 More specifically, return a list of nodes N such that every N
1514 911 satisfies the following constraints:
1515 912
1516 913 1. N is an ancestor of some node in 'heads'
1517 914 2. N is not an ancestor of any node in 'common'
1518 915
1519 916 The list is sorted by revision number, meaning it is
1520 917 topologically sorted.
1521 918
1522 919 'heads' and 'common' are both lists of node IDs. If heads is
1523 920 not supplied, uses all of the revlog's heads. If common is not
1524 921 supplied, uses nullid."""
1525 922 if common is None:
1526 923 common = [nullid]
1527 924 if heads is None:
1528 925 heads = self.heads()
1529 926
1530 927 common = [self.rev(n) for n in common]
1531 928 heads = [self.rev(n) for n in heads]
1532 929
1533 930 inc = self.incrementalmissingrevs(common=common)
1534 931 return [self.node(r) for r in inc.missingancestors(heads)]
1535 932
1536 933 def nodesbetween(self, roots=None, heads=None):
1537 934 """Return a topological path from 'roots' to 'heads'.
1538 935
1539 936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1540 937 topologically sorted list of all nodes N that satisfy both of
1541 938 these constraints:
1542 939
1543 940 1. N is a descendant of some node in 'roots'
1544 941 2. N is an ancestor of some node in 'heads'
1545 942
1546 943 Every node is considered to be both a descendant and an ancestor
1547 944 of itself, so every reachable node in 'roots' and 'heads' will be
1548 945 included in 'nodes'.
1549 946
1550 947 'outroots' is the list of reachable nodes in 'roots', i.e., the
1551 948 subset of 'roots' that is returned in 'nodes'. Likewise,
1552 949 'outheads' is the subset of 'heads' that is also in 'nodes'.
1553 950
1554 951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1555 952 unspecified, uses nullid as the only root. If 'heads' is
1556 953 unspecified, uses list of all of the revlog's heads."""
1557 954 nonodes = ([], [], [])
1558 955 if roots is not None:
1559 956 roots = list(roots)
1560 957 if not roots:
1561 958 return nonodes
1562 959 lowestrev = min([self.rev(n) for n in roots])
1563 960 else:
1564 961 roots = [nullid] # Everybody's a descendant of nullid
1565 962 lowestrev = nullrev
1566 963 if (lowestrev == nullrev) and (heads is None):
1567 964 # We want _all_ the nodes!
1568 965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1569 966 if heads is None:
1570 967 # All nodes are ancestors, so the latest ancestor is the last
1571 968 # node.
1572 969 highestrev = len(self) - 1
1573 970 # Set ancestors to None to signal that every node is an ancestor.
1574 971 ancestors = None
1575 972 # Set heads to an empty dictionary for later discovery of heads
1576 973 heads = {}
1577 974 else:
1578 975 heads = list(heads)
1579 976 if not heads:
1580 977 return nonodes
1581 978 ancestors = set()
1582 979 # Turn heads into a dictionary so we can remove 'fake' heads.
1583 980 # Also, later we will be using it to filter out the heads we can't
1584 981 # find from roots.
1585 982 heads = dict.fromkeys(heads, False)
1586 983 # Start at the top and keep marking parents until we're done.
1587 984 nodestotag = set(heads)
1588 985 # Remember where the top was so we can use it as a limit later.
1589 986 highestrev = max([self.rev(n) for n in nodestotag])
1590 987 while nodestotag:
1591 988 # grab a node to tag
1592 989 n = nodestotag.pop()
1593 990 # Never tag nullid
1594 991 if n == nullid:
1595 992 continue
1596 993 # A node's revision number represents its place in a
1597 994 # topologically sorted list of nodes.
1598 995 r = self.rev(n)
1599 996 if r >= lowestrev:
1600 997 if n not in ancestors:
1601 998 # If we are possibly a descendant of one of the roots
1602 999 # and we haven't already been marked as an ancestor
1603 1000 ancestors.add(n) # Mark as ancestor
1604 1001 # Add non-nullid parents to list of nodes to tag.
1605 1002 nodestotag.update([p for p in self.parents(n) if
1606 1003 p != nullid])
1607 1004 elif n in heads: # We've seen it before, is it a fake head?
1608 1005 # So it is, real heads should not be the ancestors of
1609 1006 # any other heads.
1610 1007 heads.pop(n)
1611 1008 if not ancestors:
1612 1009 return nonodes
1613 1010 # Now that we have our set of ancestors, we want to remove any
1614 1011 # roots that are not ancestors.
1615 1012
1616 1013 # If one of the roots was nullid, everything is included anyway.
1617 1014 if lowestrev > nullrev:
1618 1015 # But, since we weren't, let's recompute the lowest rev to not
1619 1016 # include roots that aren't ancestors.
1620 1017
1621 1018 # Filter out roots that aren't ancestors of heads
1622 1019 roots = [root for root in roots if root in ancestors]
1623 1020 # Recompute the lowest revision
1624 1021 if roots:
1625 1022 lowestrev = min([self.rev(root) for root in roots])
1626 1023 else:
1627 1024 # No more roots? Return empty list
1628 1025 return nonodes
1629 1026 else:
1630 1027 # We are descending from nullid, and don't need to care about
1631 1028 # any other roots.
1632 1029 lowestrev = nullrev
1633 1030 roots = [nullid]
1634 1031 # Transform our roots list into a set.
1635 1032 descendants = set(roots)
1636 1033 # Also, keep the original roots so we can filter out roots that aren't
1637 1034 # 'real' roots (i.e. are descended from other roots).
1638 1035 roots = descendants.copy()
1639 1036 # Our topologically sorted list of output nodes.
1640 1037 orderedout = []
1641 1038 # Don't start at nullid since we don't want nullid in our output list,
1642 1039 # and if nullid shows up in descendants, empty parents will look like
1643 1040 # they're descendants.
1644 1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1645 1042 n = self.node(r)
1646 1043 isdescendant = False
1647 1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1648 1045 isdescendant = True
1649 1046 elif n in descendants:
1650 1047 # n is already a descendant
1651 1048 isdescendant = True
1652 1049 # This check only needs to be done here because all the roots
1653 1050 # will start being marked is descendants before the loop.
1654 1051 if n in roots:
1655 1052 # If n was a root, check if it's a 'real' root.
1656 1053 p = tuple(self.parents(n))
1657 1054 # If any of its parents are descendants, it's not a root.
1658 1055 if (p[0] in descendants) or (p[1] in descendants):
1659 1056 roots.remove(n)
1660 1057 else:
1661 1058 p = tuple(self.parents(n))
1662 1059 # A node is a descendant if either of its parents are
1663 1060 # descendants. (We seeded the dependents list with the roots
1664 1061 # up there, remember?)
1665 1062 if (p[0] in descendants) or (p[1] in descendants):
1666 1063 descendants.add(n)
1667 1064 isdescendant = True
1668 1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1669 1066 # Only include nodes that are both descendants and ancestors.
1670 1067 orderedout.append(n)
1671 1068 if (ancestors is not None) and (n in heads):
1672 1069 # We're trying to figure out which heads are reachable
1673 1070 # from roots.
1674 1071 # Mark this head as having been reached
1675 1072 heads[n] = True
1676 1073 elif ancestors is None:
1677 1074 # Otherwise, we're trying to discover the heads.
1678 1075 # Assume this is a head because if it isn't, the next step
1679 1076 # will eventually remove it.
1680 1077 heads[n] = True
1681 1078 # But, obviously its parents aren't.
1682 1079 for p in self.parents(n):
1683 1080 heads.pop(p, None)
1684 1081 heads = [head for head, flag in heads.iteritems() if flag]
1685 1082 roots = list(roots)
1686 1083 assert orderedout
1687 1084 assert roots
1688 1085 assert heads
1689 1086 return (orderedout, roots, heads)
1690 1087
1691 1088 def headrevs(self):
1692 1089 try:
1693 1090 return self.index.headrevs()
1694 1091 except AttributeError:
1695 1092 return self._headrevs()
1696 1093
1697 1094 def computephases(self, roots):
1698 1095 return self.index.computephasesmapsets(roots)
1699 1096
1700 1097 def _headrevs(self):
1701 1098 count = len(self)
1702 1099 if not count:
1703 1100 return [nullrev]
1704 1101 # we won't iter over filtered rev so nobody is a head at start
1705 1102 ishead = [0] * (count + 1)
1706 1103 index = self.index
1707 1104 for r in self:
1708 1105 ishead[r] = 1 # I may be an head
1709 1106 e = index[r]
1710 1107 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1711 1108 return [r for r, val in enumerate(ishead) if val]
1712 1109
1713 1110 def heads(self, start=None, stop=None):
1714 1111 """return the list of all nodes that have no children
1715 1112
1716 1113 if start is specified, only heads that are descendants of
1717 1114 start will be returned
1718 1115 if stop is specified, it will consider all the revs from stop
1719 1116 as if they had no children
1720 1117 """
1721 1118 if start is None and stop is None:
1722 1119 if not len(self):
1723 1120 return [nullid]
1724 1121 return [self.node(r) for r in self.headrevs()]
1725 1122
1726 1123 if start is None:
1727 1124 start = nullid
1728 1125 if stop is None:
1729 1126 stop = []
1730 1127 stoprevs = set([self.rev(n) for n in stop])
1731 1128 startrev = self.rev(start)
1732 1129 reachable = {startrev}
1733 1130 heads = {startrev}
1734 1131
1735 1132 parentrevs = self.parentrevs
1736 1133 for r in self.revs(start=startrev + 1):
1737 1134 for p in parentrevs(r):
1738 1135 if p in reachable:
1739 1136 if r not in stoprevs:
1740 1137 reachable.add(r)
1741 1138 heads.add(r)
1742 1139 if p in heads and p not in stoprevs:
1743 1140 heads.remove(p)
1744 1141
1745 1142 return [self.node(r) for r in heads]
1746 1143
1747 1144 def children(self, node):
1748 1145 """find the children of a given node"""
1749 1146 c = []
1750 1147 p = self.rev(node)
1751 1148 for r in self.revs(start=p + 1):
1752 1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1753 1150 if prevs:
1754 1151 for pr in prevs:
1755 1152 if pr == p:
1756 1153 c.append(self.node(r))
1757 1154 elif p == nullrev:
1758 1155 c.append(self.node(r))
1759 1156 return c
1760 1157
1761 1158 def commonancestorsheads(self, a, b):
1762 1159 """calculate all the heads of the common ancestors of nodes a and b"""
1763 1160 a, b = self.rev(a), self.rev(b)
1764 1161 ancs = self._commonancestorsheads(a, b)
1765 1162 return pycompat.maplist(self.node, ancs)
1766 1163
1767 1164 def _commonancestorsheads(self, *revs):
1768 1165 """calculate all the heads of the common ancestors of revs"""
1769 1166 try:
1770 1167 ancs = self.index.commonancestorsheads(*revs)
1771 1168 except (AttributeError, OverflowError): # C implementation failed
1772 1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1773 1170 return ancs
1774 1171
1775 1172 def isancestor(self, a, b):
1776 1173 """return True if node a is an ancestor of node b
1777 1174
1778 1175 A revision is considered an ancestor of itself."""
1779 1176 a, b = self.rev(a), self.rev(b)
1780 1177 return self.isancestorrev(a, b)
1781 1178
1782 1179 def isancestorrev(self, a, b):
1783 1180 """return True if revision a is an ancestor of revision b
1784 1181
1785 1182 A revision is considered an ancestor of itself.
1786 1183
1787 1184 The implementation of this is trivial but the use of
1788 1185 commonancestorsheads is not."""
1789 1186 if a == nullrev:
1790 1187 return True
1791 1188 elif a == b:
1792 1189 return True
1793 1190 elif a > b:
1794 1191 return False
1795 1192 return a in self._commonancestorsheads(a, b)
1796 1193
1797 1194 def ancestor(self, a, b):
1798 1195 """calculate the "best" common ancestor of nodes a and b"""
1799 1196
1800 1197 a, b = self.rev(a), self.rev(b)
1801 1198 try:
1802 1199 ancs = self.index.ancestors(a, b)
1803 1200 except (AttributeError, OverflowError):
1804 1201 ancs = ancestor.ancestors(self.parentrevs, a, b)
1805 1202 if ancs:
1806 1203 # choose a consistent winner when there's a tie
1807 1204 return min(map(self.node, ancs))
1808 1205 return nullid
1809 1206
1810 1207 def _match(self, id):
1811 1208 if isinstance(id, int):
1812 1209 # rev
1813 1210 return self.node(id)
1814 1211 if len(id) == 20:
1815 1212 # possibly a binary node
1816 1213 # odds of a binary node being all hex in ASCII are 1 in 10**25
1817 1214 try:
1818 1215 node = id
1819 1216 self.rev(node) # quick search the index
1820 1217 return node
1821 1218 except LookupError:
1822 1219 pass # may be partial hex id
1823 1220 try:
1824 1221 # str(rev)
1825 1222 rev = int(id)
1826 1223 if "%d" % rev != id:
1827 1224 raise ValueError
1828 1225 if rev < 0:
1829 1226 rev = len(self) + rev
1830 1227 if rev < 0 or rev >= len(self):
1831 1228 raise ValueError
1832 1229 return self.node(rev)
1833 1230 except (ValueError, OverflowError):
1834 1231 pass
1835 1232 if len(id) == 40:
1836 1233 try:
1837 1234 # a full hex nodeid?
1838 1235 node = bin(id)
1839 1236 self.rev(node)
1840 1237 return node
1841 1238 except (TypeError, LookupError):
1842 1239 pass
1843 1240
1844 1241 def _partialmatch(self, id):
1845 1242 # we don't care wdirfilenodeids as they should be always full hash
1846 1243 maybewdir = wdirhex.startswith(id)
1847 1244 try:
1848 1245 partial = self.index.partialmatch(id)
1849 1246 if partial and self.hasnode(partial):
1850 1247 if maybewdir:
1851 1248 # single 'ff...' match in radix tree, ambiguous with wdir
1852 1249 raise RevlogError
1853 1250 return partial
1854 1251 if maybewdir:
1855 1252 # no 'ff...' match in radix tree, wdir identified
1856 1253 raise error.WdirUnsupported
1857 1254 return None
1858 1255 except RevlogError:
1859 1256 # parsers.c radix tree lookup gave multiple matches
1860 1257 # fast path: for unfiltered changelog, radix tree is accurate
1861 1258 if not getattr(self, 'filteredrevs', None):
1862 1259 raise AmbiguousPrefixLookupError(id, self.indexfile,
1863 1260 _('ambiguous identifier'))
1864 1261 # fall through to slow path that filters hidden revisions
1865 1262 except (AttributeError, ValueError):
1866 1263 # we are pure python, or key was too short to search radix tree
1867 1264 pass
1868 1265
1869 1266 if id in self._pcache:
1870 1267 return self._pcache[id]
1871 1268
1872 1269 if len(id) <= 40:
1873 1270 try:
1874 1271 # hex(node)[:...]
1875 1272 l = len(id) // 2 # grab an even number of digits
1876 1273 prefix = bin(id[:l * 2])
1877 1274 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1878 1275 nl = [n for n in nl if hex(n).startswith(id) and
1879 1276 self.hasnode(n)]
1880 1277 if nullhex.startswith(id):
1881 1278 nl.append(nullid)
1882 1279 if len(nl) > 0:
1883 1280 if len(nl) == 1 and not maybewdir:
1884 1281 self._pcache[id] = nl[0]
1885 1282 return nl[0]
1886 1283 raise AmbiguousPrefixLookupError(id, self.indexfile,
1887 1284 _('ambiguous identifier'))
1888 1285 if maybewdir:
1889 1286 raise error.WdirUnsupported
1890 1287 return None
1891 1288 except TypeError:
1892 1289 pass
1893 1290
1894 1291 def lookup(self, id):
1895 1292 """locate a node based on:
1896 1293 - revision number or str(revision number)
1897 1294 - nodeid or subset of hex nodeid
1898 1295 """
1899 1296 n = self._match(id)
1900 1297 if n is not None:
1901 1298 return n
1902 1299 n = self._partialmatch(id)
1903 1300 if n:
1904 1301 return n
1905 1302
1906 1303 raise LookupError(id, self.indexfile, _('no match found'))
1907 1304
1908 1305 def shortest(self, node, minlength=1):
1909 1306 """Find the shortest unambiguous prefix that matches node."""
1910 1307 def isvalid(prefix):
1911 1308 try:
1912 1309 node = self._partialmatch(prefix)
1913 1310 except error.RevlogError:
1914 1311 return False
1915 1312 except error.WdirUnsupported:
1916 1313 # single 'ff...' match
1917 1314 return True
1918 1315 if node is None:
1919 1316 raise LookupError(node, self.indexfile, _('no node'))
1920 1317 return True
1921 1318
1922 1319 def maybewdir(prefix):
1923 1320 return all(c == 'f' for c in prefix)
1924 1321
1925 1322 hexnode = hex(node)
1926 1323
1927 1324 def disambiguate(hexnode, minlength):
1928 1325 """Disambiguate against wdirid."""
1929 1326 for length in range(minlength, 41):
1930 1327 prefix = hexnode[:length]
1931 1328 if not maybewdir(prefix):
1932 1329 return prefix
1933 1330
1934 1331 if not getattr(self, 'filteredrevs', None):
1935 1332 try:
1936 1333 length = max(self.index.shortest(node), minlength)
1937 1334 return disambiguate(hexnode, length)
1938 1335 except RevlogError:
1939 1336 if node != wdirid:
1940 1337 raise LookupError(node, self.indexfile, _('no node'))
1941 1338 except AttributeError:
1942 1339 # Fall through to pure code
1943 1340 pass
1944 1341
1945 1342 if node == wdirid:
1946 1343 for length in range(minlength, 41):
1947 1344 prefix = hexnode[:length]
1948 1345 if isvalid(prefix):
1949 1346 return prefix
1950 1347
1951 1348 for length in range(minlength, 41):
1952 1349 prefix = hexnode[:length]
1953 1350 if isvalid(prefix):
1954 1351 return disambiguate(hexnode, length)
1955 1352
1956 1353 def cmp(self, node, text):
1957 1354 """compare text with a given file revision
1958 1355
1959 1356 returns True if text is different than what is stored.
1960 1357 """
1961 1358 p1, p2 = self.parents(node)
1962 1359 return hash(text, p1, p2) != node
1963 1360
1964 1361 def _cachesegment(self, offset, data):
1965 1362 """Add a segment to the revlog cache.
1966 1363
1967 1364 Accepts an absolute offset and the data that is at that location.
1968 1365 """
1969 1366 o, d = self._chunkcache
1970 1367 # try to add to existing cache
1971 1368 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1972 1369 self._chunkcache = o, d + data
1973 1370 else:
1974 1371 self._chunkcache = offset, data
1975 1372
1976 1373 def _readsegment(self, offset, length, df=None):
1977 1374 """Load a segment of raw data from the revlog.
1978 1375
1979 1376 Accepts an absolute offset, length to read, and an optional existing
1980 1377 file handle to read from.
1981 1378
1982 1379 If an existing file handle is passed, it will be seeked and the
1983 1380 original seek position will NOT be restored.
1984 1381
1985 1382 Returns a str or buffer of raw byte data.
1986 1383 """
1987 1384 # Cache data both forward and backward around the requested
1988 1385 # data, in a fixed size window. This helps speed up operations
1989 1386 # involving reading the revlog backwards.
1990 1387 cachesize = self._chunkcachesize
1991 1388 realoffset = offset & ~(cachesize - 1)
1992 1389 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1993 1390 - realoffset)
1994 1391 with self._datareadfp(df) as df:
1995 1392 df.seek(realoffset)
1996 1393 d = df.read(reallength)
1997 1394 self._cachesegment(realoffset, d)
1998 1395 if offset != realoffset or reallength != length:
1999 1396 return util.buffer(d, offset - realoffset, length)
2000 1397 return d
2001 1398
2002 1399 def _getsegment(self, offset, length, df=None):
2003 1400 """Obtain a segment of raw data from the revlog.
2004 1401
2005 1402 Accepts an absolute offset, length of bytes to obtain, and an
2006 1403 optional file handle to the already-opened revlog. If the file
2007 1404 handle is used, it's original seek position will not be preserved.
2008 1405
2009 1406 Requests for data may be returned from a cache.
2010 1407
2011 1408 Returns a str or a buffer instance of raw byte data.
2012 1409 """
2013 1410 o, d = self._chunkcache
2014 1411 l = len(d)
2015 1412
2016 1413 # is it in the cache?
2017 1414 cachestart = offset - o
2018 1415 cacheend = cachestart + length
2019 1416 if cachestart >= 0 and cacheend <= l:
2020 1417 if cachestart == 0 and cacheend == l:
2021 1418 return d # avoid a copy
2022 1419 return util.buffer(d, cachestart, cacheend - cachestart)
2023 1420
2024 1421 return self._readsegment(offset, length, df=df)
2025 1422
2026 1423 def _getsegmentforrevs(self, startrev, endrev, df=None):
2027 1424 """Obtain a segment of raw data corresponding to a range of revisions.
2028 1425
2029 1426 Accepts the start and end revisions and an optional already-open
2030 1427 file handle to be used for reading. If the file handle is read, its
2031 1428 seek position will not be preserved.
2032 1429
2033 1430 Requests for data may be satisfied by a cache.
2034 1431
2035 1432 Returns a 2-tuple of (offset, data) for the requested range of
2036 1433 revisions. Offset is the integer offset from the beginning of the
2037 1434 revlog and data is a str or buffer of the raw byte data.
2038 1435
2039 1436 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2040 1437 to determine where each revision's data begins and ends.
2041 1438 """
2042 1439 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2043 1440 # (functions are expensive).
2044 1441 index = self.index
2045 1442 istart = index[startrev]
2046 1443 start = int(istart[0] >> 16)
2047 1444 if startrev == endrev:
2048 1445 end = start + istart[1]
2049 1446 else:
2050 1447 iend = index[endrev]
2051 1448 end = int(iend[0] >> 16) + iend[1]
2052 1449
2053 1450 if self._inline:
2054 1451 start += (startrev + 1) * self._io.size
2055 1452 end += (endrev + 1) * self._io.size
2056 1453 length = end - start
2057 1454
2058 1455 return start, self._getsegment(start, length, df=df)
2059 1456
2060 1457 def _chunk(self, rev, df=None):
2061 1458 """Obtain a single decompressed chunk for a revision.
2062 1459
2063 1460 Accepts an integer revision and an optional already-open file handle
2064 1461 to be used for reading. If used, the seek position of the file will not
2065 1462 be preserved.
2066 1463
2067 1464 Returns a str holding uncompressed data for the requested revision.
2068 1465 """
2069 1466 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2070 1467
2071 1468 def _chunks(self, revs, df=None, targetsize=None):
2072 1469 """Obtain decompressed chunks for the specified revisions.
2073 1470
2074 1471 Accepts an iterable of numeric revisions that are assumed to be in
2075 1472 ascending order. Also accepts an optional already-open file handle
2076 1473 to be used for reading. If used, the seek position of the file will
2077 1474 not be preserved.
2078 1475
2079 1476 This function is similar to calling ``self._chunk()`` multiple times,
2080 1477 but is faster.
2081 1478
2082 1479 Returns a list with decompressed data for each requested revision.
2083 1480 """
2084 1481 if not revs:
2085 1482 return []
2086 1483 start = self.start
2087 1484 length = self.length
2088 1485 inline = self._inline
2089 1486 iosize = self._io.size
2090 1487 buffer = util.buffer
2091 1488
2092 1489 l = []
2093 1490 ladd = l.append
2094 1491
2095 1492 if not self._withsparseread:
2096 1493 slicedchunks = (revs,)
2097 1494 else:
2098 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
1495 slicedchunks = deltautil.slicechunk(self, revs,
1496 targetsize=targetsize)
2099 1497
2100 1498 for revschunk in slicedchunks:
2101 1499 firstrev = revschunk[0]
2102 1500 # Skip trailing revisions with empty diff
2103 1501 for lastrev in revschunk[::-1]:
2104 1502 if length(lastrev) != 0:
2105 1503 break
2106 1504
2107 1505 try:
2108 1506 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2109 1507 except OverflowError:
2110 1508 # issue4215 - we can't cache a run of chunks greater than
2111 1509 # 2G on Windows
2112 1510 return [self._chunk(rev, df=df) for rev in revschunk]
2113 1511
2114 1512 decomp = self.decompress
2115 1513 for rev in revschunk:
2116 1514 chunkstart = start(rev)
2117 1515 if inline:
2118 1516 chunkstart += (rev + 1) * iosize
2119 1517 chunklength = length(rev)
2120 1518 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2121 1519
2122 1520 return l
2123 1521
2124 1522 def _chunkclear(self):
2125 1523 """Clear the raw chunk cache."""
2126 1524 self._chunkcache = (0, '')
2127 1525
2128 1526 def deltaparent(self, rev):
2129 1527 """return deltaparent of the given revision"""
2130 1528 base = self.index[rev][3]
2131 1529 if base == rev:
2132 1530 return nullrev
2133 1531 elif self._generaldelta:
2134 1532 return base
2135 1533 else:
2136 1534 return rev - 1
2137 1535
2138 1536 def issnapshot(self, rev):
2139 1537 """tells whether rev is a snapshot
2140 1538 """
2141 1539 if rev == nullrev:
2142 1540 return True
2143 1541 deltap = self.deltaparent(rev)
2144 1542 if deltap == nullrev:
2145 1543 return True
2146 1544 p1, p2 = self.parentrevs(rev)
2147 1545 if deltap in (p1, p2):
2148 1546 return False
2149 1547 return self.issnapshot(deltap)
2150 1548
2151 1549 def snapshotdepth(self, rev):
2152 1550 """number of snapshot in the chain before this one"""
2153 1551 if not self.issnapshot(rev):
2154 1552 raise ProgrammingError('revision %d not a snapshot')
2155 1553 return len(self._deltachain(rev)[0]) - 1
2156 1554
2157 1555 def revdiff(self, rev1, rev2):
2158 1556 """return or calculate a delta between two revisions
2159 1557
2160 1558 The delta calculated is in binary form and is intended to be written to
2161 1559 revlog data directly. So this function needs raw revision data.
2162 1560 """
2163 1561 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2164 1562 return bytes(self._chunk(rev2))
2165 1563
2166 1564 return mdiff.textdiff(self.revision(rev1, raw=True),
2167 1565 self.revision(rev2, raw=True))
2168 1566
2169 1567 def revision(self, nodeorrev, _df=None, raw=False):
2170 1568 """return an uncompressed revision of a given node or revision
2171 1569 number.
2172 1570
2173 1571 _df - an existing file handle to read from. (internal-only)
2174 1572 raw - an optional argument specifying if the revision data is to be
2175 1573 treated as raw data when applying flag transforms. 'raw' should be set
2176 1574 to True when generating changegroups or in debug commands.
2177 1575 """
2178 1576 if isinstance(nodeorrev, int):
2179 1577 rev = nodeorrev
2180 1578 node = self.node(rev)
2181 1579 else:
2182 1580 node = nodeorrev
2183 1581 rev = None
2184 1582
2185 1583 cachedrev = None
2186 1584 flags = None
2187 1585 rawtext = None
2188 1586 if node == nullid:
2189 1587 return ""
2190 1588 if self._cache:
2191 1589 if self._cache[0] == node:
2192 1590 # _cache only stores rawtext
2193 1591 if raw:
2194 1592 return self._cache[2]
2195 1593 # duplicated, but good for perf
2196 1594 if rev is None:
2197 1595 rev = self.rev(node)
2198 1596 if flags is None:
2199 1597 flags = self.flags(rev)
2200 1598 # no extra flags set, no flag processor runs, text = rawtext
2201 1599 if flags == REVIDX_DEFAULT_FLAGS:
2202 1600 return self._cache[2]
2203 1601 # rawtext is reusable. need to run flag processor
2204 1602 rawtext = self._cache[2]
2205 1603
2206 1604 cachedrev = self._cache[1]
2207 1605
2208 1606 # look up what we need to read
2209 1607 if rawtext is None:
2210 1608 if rev is None:
2211 1609 rev = self.rev(node)
2212 1610
2213 1611 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2214 1612 if stopped:
2215 1613 rawtext = self._cache[2]
2216 1614
2217 1615 # drop cache to save memory
2218 1616 self._cache = None
2219 1617
2220 1618 targetsize = None
2221 1619 rawsize = self.index[rev][2]
2222 1620 if 0 <= rawsize:
2223 1621 targetsize = 4 * rawsize
2224 1622
2225 1623 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2226 1624 if rawtext is None:
2227 1625 rawtext = bytes(bins[0])
2228 1626 bins = bins[1:]
2229 1627
2230 1628 rawtext = mdiff.patches(rawtext, bins)
2231 1629 self._cache = (node, rev, rawtext)
2232 1630
2233 1631 if flags is None:
2234 1632 if rev is None:
2235 1633 rev = self.rev(node)
2236 1634 flags = self.flags(rev)
2237 1635
2238 1636 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2239 1637 if validatehash:
2240 1638 self.checkhash(text, node, rev=rev)
2241 1639
2242 1640 return text
2243 1641
2244 1642 def hash(self, text, p1, p2):
2245 1643 """Compute a node hash.
2246 1644
2247 1645 Available as a function so that subclasses can replace the hash
2248 1646 as needed.
2249 1647 """
2250 1648 return hash(text, p1, p2)
2251 1649
2252 1650 def _processflags(self, text, flags, operation, raw=False):
2253 1651 """Inspect revision data flags and applies transforms defined by
2254 1652 registered flag processors.
2255 1653
2256 1654 ``text`` - the revision data to process
2257 1655 ``flags`` - the revision flags
2258 1656 ``operation`` - the operation being performed (read or write)
2259 1657 ``raw`` - an optional argument describing if the raw transform should be
2260 1658 applied.
2261 1659
2262 1660 This method processes the flags in the order (or reverse order if
2263 1661 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2264 1662 flag processors registered for present flags. The order of flags defined
2265 1663 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2266 1664
2267 1665 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2268 1666 processed text and ``validatehash`` is a bool indicating whether the
2269 1667 returned text should be checked for hash integrity.
2270 1668
2271 1669 Note: If the ``raw`` argument is set, it has precedence over the
2272 1670 operation and will only update the value of ``validatehash``.
2273 1671 """
2274 1672 # fast path: no flag processors will run
2275 1673 if flags == 0:
2276 1674 return text, True
2277 1675 if not operation in ('read', 'write'):
2278 1676 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2279 1677 # Check all flags are known.
2280 1678 if flags & ~REVIDX_KNOWN_FLAGS:
2281 1679 raise RevlogError(_("incompatible revision flag '%#x'") %
2282 1680 (flags & ~REVIDX_KNOWN_FLAGS))
2283 1681 validatehash = True
2284 1682 # Depending on the operation (read or write), the order might be
2285 1683 # reversed due to non-commutative transforms.
2286 1684 orderedflags = REVIDX_FLAGS_ORDER
2287 1685 if operation == 'write':
2288 1686 orderedflags = reversed(orderedflags)
2289 1687
2290 1688 for flag in orderedflags:
2291 1689 # If a flagprocessor has been registered for a known flag, apply the
2292 1690 # related operation transform and update result tuple.
2293 1691 if flag & flags:
2294 1692 vhash = True
2295 1693
2296 1694 if flag not in _flagprocessors:
2297 1695 message = _("missing processor for flag '%#x'") % (flag)
2298 1696 raise RevlogError(message)
2299 1697
2300 1698 processor = _flagprocessors[flag]
2301 1699 if processor is not None:
2302 1700 readtransform, writetransform, rawtransform = processor
2303 1701
2304 1702 if raw:
2305 1703 vhash = rawtransform(self, text)
2306 1704 elif operation == 'read':
2307 1705 text, vhash = readtransform(self, text)
2308 1706 else: # write operation
2309 1707 text, vhash = writetransform(self, text)
2310 1708 validatehash = validatehash and vhash
2311 1709
2312 1710 return text, validatehash
2313 1711
2314 1712 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2315 1713 """Check node hash integrity.
2316 1714
2317 1715 Available as a function so that subclasses can extend hash mismatch
2318 1716 behaviors as needed.
2319 1717 """
2320 1718 try:
2321 1719 if p1 is None and p2 is None:
2322 1720 p1, p2 = self.parents(node)
2323 1721 if node != self.hash(text, p1, p2):
2324 1722 revornode = rev
2325 1723 if revornode is None:
2326 1724 revornode = templatefilters.short(hex(node))
2327 1725 raise RevlogError(_("integrity check failed on %s:%s")
2328 1726 % (self.indexfile, pycompat.bytestr(revornode)))
2329 1727 except RevlogError:
2330 1728 if self._censorable and _censoredtext(text):
2331 1729 raise error.CensoredNodeError(self.indexfile, node, text)
2332 1730 raise
2333 1731
2334 1732 def _enforceinlinesize(self, tr, fp=None):
2335 1733 """Check if the revlog is too big for inline and convert if so.
2336 1734
2337 1735 This should be called after revisions are added to the revlog. If the
2338 1736 revlog has grown too large to be an inline revlog, it will convert it
2339 1737 to use multiple index and data files.
2340 1738 """
2341 1739 tiprev = len(self) - 1
2342 1740 if (not self._inline or
2343 1741 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2344 1742 return
2345 1743
2346 1744 trinfo = tr.find(self.indexfile)
2347 1745 if trinfo is None:
2348 1746 raise RevlogError(_("%s not found in the transaction")
2349 1747 % self.indexfile)
2350 1748
2351 1749 trindex = trinfo[2]
2352 1750 if trindex is not None:
2353 1751 dataoff = self.start(trindex)
2354 1752 else:
2355 1753 # revlog was stripped at start of transaction, use all leftover data
2356 1754 trindex = len(self) - 1
2357 1755 dataoff = self.end(tiprev)
2358 1756
2359 1757 tr.add(self.datafile, dataoff)
2360 1758
2361 1759 if fp:
2362 1760 fp.flush()
2363 1761 fp.close()
2364 1762
2365 1763 with self._datafp('w') as df:
2366 1764 for r in self:
2367 1765 df.write(self._getsegmentforrevs(r, r)[1])
2368 1766
2369 1767 with self._indexfp('w') as fp:
2370 1768 self.version &= ~FLAG_INLINE_DATA
2371 1769 self._inline = False
2372 1770 io = self._io
2373 1771 for i in self:
2374 1772 e = io.packentry(self.index[i], self.node, self.version, i)
2375 1773 fp.write(e)
2376 1774
2377 1775 # the temp file replace the real index when we exit the context
2378 1776 # manager
2379 1777
2380 1778 tr.replace(self.indexfile, trindex * self._io.size)
2381 1779 self._chunkclear()
2382 1780
2383 1781 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2384 1782 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2385 1783 """add a revision to the log
2386 1784
2387 1785 text - the revision data to add
2388 1786 transaction - the transaction object used for rollback
2389 1787 link - the linkrev data to add
2390 1788 p1, p2 - the parent nodeids of the revision
2391 1789 cachedelta - an optional precomputed delta
2392 1790 node - nodeid of revision; typically node is not specified, and it is
2393 1791 computed by default as hash(text, p1, p2), however subclasses might
2394 1792 use different hashing method (and override checkhash() in such case)
2395 1793 flags - the known flags to set on the revision
2396 deltacomputer - an optional _deltacomputer instance shared between
1794 deltacomputer - an optional deltacomputer instance shared between
2397 1795 multiple calls
2398 1796 """
2399 1797 if link == nullrev:
2400 1798 raise RevlogError(_("attempted to add linkrev -1 to %s")
2401 1799 % self.indexfile)
2402 1800
2403 1801 if flags:
2404 1802 node = node or self.hash(text, p1, p2)
2405 1803
2406 1804 rawtext, validatehash = self._processflags(text, flags, 'write')
2407 1805
2408 1806 # If the flag processor modifies the revision data, ignore any provided
2409 1807 # cachedelta.
2410 1808 if rawtext != text:
2411 1809 cachedelta = None
2412 1810
2413 1811 if len(rawtext) > _maxentrysize:
2414 1812 raise RevlogError(
2415 1813 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2416 1814 % (self.indexfile, len(rawtext)))
2417 1815
2418 1816 node = node or self.hash(rawtext, p1, p2)
2419 1817 if node in self.nodemap:
2420 1818 return node
2421 1819
2422 1820 if validatehash:
2423 1821 self.checkhash(rawtext, node, p1=p1, p2=p2)
2424 1822
2425 1823 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2426 1824 flags, cachedelta=cachedelta,
2427 1825 deltacomputer=deltacomputer)
2428 1826
2429 1827 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2430 1828 cachedelta=None, deltacomputer=None):
2431 1829 """add a raw revision with known flags, node and parents
2432 1830 useful when reusing a revision not stored in this revlog (ex: received
2433 1831 over wire, or read from an external bundle).
2434 1832 """
2435 1833 dfh = None
2436 1834 if not self._inline:
2437 1835 dfh = self._datafp("a+")
2438 1836 ifh = self._indexfp("a+")
2439 1837 try:
2440 1838 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2441 1839 flags, cachedelta, ifh, dfh,
2442 1840 deltacomputer=deltacomputer)
2443 1841 finally:
2444 1842 if dfh:
2445 1843 dfh.close()
2446 1844 ifh.close()
2447 1845
2448 1846 def compress(self, data):
2449 1847 """Generate a possibly-compressed representation of data."""
2450 1848 if not data:
2451 1849 return '', data
2452 1850
2453 1851 compressed = self._compressor.compress(data)
2454 1852
2455 1853 if compressed:
2456 1854 # The revlog compressor added the header in the returned data.
2457 1855 return '', compressed
2458 1856
2459 1857 if data[0:1] == '\0':
2460 1858 return '', data
2461 1859 return 'u', data
2462 1860
2463 1861 def decompress(self, data):
2464 1862 """Decompress a revlog chunk.
2465 1863
2466 1864 The chunk is expected to begin with a header identifying the
2467 1865 format type so it can be routed to an appropriate decompressor.
2468 1866 """
2469 1867 if not data:
2470 1868 return data
2471 1869
2472 1870 # Revlogs are read much more frequently than they are written and many
2473 1871 # chunks only take microseconds to decompress, so performance is
2474 1872 # important here.
2475 1873 #
2476 1874 # We can make a few assumptions about revlogs:
2477 1875 #
2478 1876 # 1) the majority of chunks will be compressed (as opposed to inline
2479 1877 # raw data).
2480 1878 # 2) decompressing *any* data will likely by at least 10x slower than
2481 1879 # returning raw inline data.
2482 1880 # 3) we want to prioritize common and officially supported compression
2483 1881 # engines
2484 1882 #
2485 1883 # It follows that we want to optimize for "decompress compressed data
2486 1884 # when encoded with common and officially supported compression engines"
2487 1885 # case over "raw data" and "data encoded by less common or non-official
2488 1886 # compression engines." That is why we have the inline lookup first
2489 1887 # followed by the compengines lookup.
2490 1888 #
2491 1889 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2492 1890 # compressed chunks. And this matters for changelog and manifest reads.
2493 1891 t = data[0:1]
2494 1892
2495 1893 if t == 'x':
2496 1894 try:
2497 1895 return _zlibdecompress(data)
2498 1896 except zlib.error as e:
2499 1897 raise RevlogError(_('revlog decompress error: %s') %
2500 1898 stringutil.forcebytestr(e))
2501 1899 # '\0' is more common than 'u' so it goes first.
2502 1900 elif t == '\0':
2503 1901 return data
2504 1902 elif t == 'u':
2505 1903 return util.buffer(data, 1)
2506 1904
2507 1905 try:
2508 1906 compressor = self._decompressors[t]
2509 1907 except KeyError:
2510 1908 try:
2511 1909 engine = util.compengines.forrevlogheader(t)
2512 1910 compressor = engine.revlogcompressor()
2513 1911 self._decompressors[t] = compressor
2514 1912 except KeyError:
2515 1913 raise RevlogError(_('unknown compression type %r') % t)
2516 1914
2517 1915 return compressor.decompress(data)
2518 1916
2519 def _isgooddeltainfo(self, deltainfo, revinfo):
2520 """Returns True if the given delta is good. Good means that it is within
2521 the disk span, disk size, and chain length bounds that we know to be
2522 performant."""
2523 if deltainfo is None:
2524 return False
2525
2526 # - 'deltainfo.distance' is the distance from the base revision --
2527 # bounding it limits the amount of I/O we need to do.
2528 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2529 # deltas we need to apply -- bounding it limits the amount of CPU
2530 # we consume.
2531
2532 if self._sparserevlog:
2533 # As sparse-read will be used, we can consider that the distance,
2534 # instead of being the span of the whole chunk,
2535 # is the span of the largest read chunk
2536 base = deltainfo.base
2537
2538 if base != nullrev:
2539 deltachain = self._deltachain(base)[0]
2540 else:
2541 deltachain = []
2542
2543 # search for the first non-snapshot revision
2544 for idx, r in enumerate(deltachain):
2545 if not self.issnapshot(r):
2546 break
2547 deltachain = deltachain[idx:]
2548 chunks = _slicechunk(self, deltachain, deltainfo)
2549 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2550 distance = max(all_span)
2551 else:
2552 distance = deltainfo.distance
2553
2554 textlen = revinfo.textlen
2555 defaultmax = textlen * 4
2556 maxdist = self._maxdeltachainspan
2557 if not maxdist:
2558 maxdist = distance # ensure the conditional pass
2559 maxdist = max(maxdist, defaultmax)
2560 if self._sparserevlog and maxdist < self._srmingapsize:
2561 # In multiple place, we are ignoring irrelevant data range below a
2562 # certain size. Be also apply this tradeoff here and relax span
2563 # constraint for small enought content.
2564 maxdist = self._srmingapsize
2565
2566 # Bad delta from read span:
2567 #
2568 # If the span of data read is larger than the maximum allowed.
2569 if maxdist < distance:
2570 return False
2571
2572 # Bad delta from new delta size:
2573 #
2574 # If the delta size is larger than the target text, storing the
2575 # delta will be inefficient.
2576 if textlen < deltainfo.deltalen:
2577 return False
2578
2579 # Bad delta from cumulated payload size:
2580 #
2581 # If the sum of delta get larger than K * target text length.
2582 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2583 return False
2584
2585 # Bad delta from chain length:
2586 #
2587 # If the number of delta in the chain gets too high.
2588 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2589 return False
2590
2591 # bad delta from intermediate snapshot size limit
2592 #
2593 # If an intermediate snapshot size is higher than the limit. The
2594 # limit exist to prevent endless chain of intermediate delta to be
2595 # created.
2596 if (deltainfo.snapshotdepth is not None and
2597 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2598 return False
2599
2600 # bad delta if new intermediate snapshot is larger than the previous
2601 # snapshot
2602 if (deltainfo.snapshotdepth
2603 and self.length(deltainfo.base) < deltainfo.deltalen):
2604 return False
2605
2606 return True
2607
2608 1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2609 1918 cachedelta, ifh, dfh, alwayscache=False,
2610 1919 deltacomputer=None):
2611 1920 """internal function to add revisions to the log
2612 1921
2613 1922 see addrevision for argument descriptions.
2614 1923
2615 1924 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2616 1925
2617 1926 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2618 1927 be used.
2619 1928
2620 1929 invariants:
2621 1930 - rawtext is optional (can be None); if not set, cachedelta must be set.
2622 1931 if both are set, they must correspond to each other.
2623 1932 """
2624 1933 if node == nullid:
2625 1934 raise RevlogError(_("%s: attempt to add null revision") %
2626 1935 (self.indexfile))
2627 1936 if node == wdirid or node in wdirfilenodeids:
2628 1937 raise RevlogError(_("%s: attempt to add wdir revision") %
2629 1938 (self.indexfile))
2630 1939
2631 1940 if self._inline:
2632 1941 fh = ifh
2633 1942 else:
2634 1943 fh = dfh
2635 1944
2636 1945 btext = [rawtext]
2637 1946
2638 1947 curr = len(self)
2639 1948 prev = curr - 1
2640 1949 offset = self.end(prev)
2641 1950 p1r, p2r = self.rev(p1), self.rev(p2)
2642 1951
2643 1952 # full versions are inserted when the needed deltas
2644 1953 # become comparable to the uncompressed text
2645 1954 if rawtext is None:
2646 1955 # need rawtext size, before changed by flag processors, which is
2647 1956 # the non-raw size. use revlog explicitly to avoid filelog's extra
2648 1957 # logic that might remove metadata size.
2649 1958 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2650 1959 cachedelta[1])
2651 1960 else:
2652 1961 textlen = len(rawtext)
2653 1962
2654 1963 if deltacomputer is None:
2655 deltacomputer = _deltacomputer(self)
1964 deltacomputer = deltautil.deltacomputer(self)
2656 1965
2657 1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2658 1967
2659 1968 # no delta for flag processor revision (see "candelta" for why)
2660 1969 # not calling candelta since only one revision needs test, also to
2661 1970 # avoid overhead fetching flags again.
2662 1971 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2663 1972 deltainfo = None
2664 1973 else:
2665 1974 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2666 1975
2667 1976 if deltainfo is not None:
2668 1977 base = deltainfo.base
2669 1978 chainbase = deltainfo.chainbase
2670 1979 data = deltainfo.data
2671 1980 l = deltainfo.deltalen
2672 1981 else:
2673 1982 rawtext = deltacomputer.buildtext(revinfo, fh)
2674 1983 data = self.compress(rawtext)
2675 1984 l = len(data[1]) + len(data[0])
2676 1985 base = chainbase = curr
2677 1986
2678 1987 e = (offset_type(offset, flags), l, textlen,
2679 1988 base, link, p1r, p2r, node)
2680 1989 self.index.append(e)
2681 1990 self.nodemap[node] = curr
2682 1991
2683 1992 entry = self._io.packentry(e, self.node, self.version, curr)
2684 1993 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2685 1994
2686 1995 if alwayscache and rawtext is None:
2687 1996 rawtext = deltacomputer.buildtext(revinfo, fh)
2688 1997
2689 1998 if type(rawtext) == bytes: # only accept immutable objects
2690 1999 self._cache = (node, curr, rawtext)
2691 2000 self._chainbasecache[curr] = chainbase
2692 2001 return node
2693 2002
2694 2003 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2695 2004 # Files opened in a+ mode have inconsistent behavior on various
2696 2005 # platforms. Windows requires that a file positioning call be made
2697 2006 # when the file handle transitions between reads and writes. See
2698 2007 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2699 2008 # platforms, Python or the platform itself can be buggy. Some versions
2700 2009 # of Solaris have been observed to not append at the end of the file
2701 2010 # if the file was seeked to before the end. See issue4943 for more.
2702 2011 #
2703 2012 # We work around this issue by inserting a seek() before writing.
2704 2013 # Note: This is likely not necessary on Python 3.
2705 2014 ifh.seek(0, os.SEEK_END)
2706 2015 if dfh:
2707 2016 dfh.seek(0, os.SEEK_END)
2708 2017
2709 2018 curr = len(self) - 1
2710 2019 if not self._inline:
2711 2020 transaction.add(self.datafile, offset)
2712 2021 transaction.add(self.indexfile, curr * len(entry))
2713 2022 if data[0]:
2714 2023 dfh.write(data[0])
2715 2024 dfh.write(data[1])
2716 2025 ifh.write(entry)
2717 2026 else:
2718 2027 offset += curr * self._io.size
2719 2028 transaction.add(self.indexfile, offset, curr)
2720 2029 ifh.write(entry)
2721 2030 ifh.write(data[0])
2722 2031 ifh.write(data[1])
2723 2032 self._enforceinlinesize(transaction, ifh)
2724 2033
2725 2034 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2726 2035 """
2727 2036 add a delta group
2728 2037
2729 2038 given a set of deltas, add them to the revision log. the
2730 2039 first delta is against its parent, which should be in our
2731 2040 log, the rest are against the previous delta.
2732 2041
2733 2042 If ``addrevisioncb`` is defined, it will be called with arguments of
2734 2043 this revlog and the node that was added.
2735 2044 """
2736 2045
2737 2046 nodes = []
2738 2047
2739 2048 r = len(self)
2740 2049 end = 0
2741 2050 if r:
2742 2051 end = self.end(r - 1)
2743 2052 ifh = self._indexfp("a+")
2744 2053 isize = r * self._io.size
2745 2054 if self._inline:
2746 2055 transaction.add(self.indexfile, end + isize, r)
2747 2056 dfh = None
2748 2057 else:
2749 2058 transaction.add(self.indexfile, isize, r)
2750 2059 transaction.add(self.datafile, end)
2751 2060 dfh = self._datafp("a+")
2752 2061 def flush():
2753 2062 if dfh:
2754 2063 dfh.flush()
2755 2064 ifh.flush()
2756 2065 try:
2757 deltacomputer = _deltacomputer(self)
2066 deltacomputer = deltautil.deltacomputer(self)
2758 2067 # loop through our set of deltas
2759 2068 for data in deltas:
2760 2069 node, p1, p2, linknode, deltabase, delta, flags = data
2761 2070 link = linkmapper(linknode)
2762 2071 flags = flags or REVIDX_DEFAULT_FLAGS
2763 2072
2764 2073 nodes.append(node)
2765 2074
2766 2075 if node in self.nodemap:
2767 2076 # this can happen if two branches make the same change
2768 2077 continue
2769 2078
2770 2079 for p in (p1, p2):
2771 2080 if p not in self.nodemap:
2772 2081 raise LookupError(p, self.indexfile,
2773 2082 _('unknown parent'))
2774 2083
2775 2084 if deltabase not in self.nodemap:
2776 2085 raise LookupError(deltabase, self.indexfile,
2777 2086 _('unknown delta base'))
2778 2087
2779 2088 baserev = self.rev(deltabase)
2780 2089
2781 2090 if baserev != nullrev and self.iscensored(baserev):
2782 2091 # if base is censored, delta must be full replacement in a
2783 2092 # single patch operation
2784 2093 hlen = struct.calcsize(">lll")
2785 2094 oldlen = self.rawsize(baserev)
2786 2095 newlen = len(delta) - hlen
2787 2096 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2788 2097 raise error.CensoredBaseError(self.indexfile,
2789 2098 self.node(baserev))
2790 2099
2791 2100 if not flags and self._peek_iscensored(baserev, delta, flush):
2792 2101 flags |= REVIDX_ISCENSORED
2793 2102
2794 2103 # We assume consumers of addrevisioncb will want to retrieve
2795 2104 # the added revision, which will require a call to
2796 2105 # revision(). revision() will fast path if there is a cache
2797 2106 # hit. So, we tell _addrevision() to always cache in this case.
2798 2107 # We're only using addgroup() in the context of changegroup
2799 2108 # generation so the revision data can always be handled as raw
2800 2109 # by the flagprocessor.
2801 2110 self._addrevision(node, None, transaction, link,
2802 2111 p1, p2, flags, (baserev, delta),
2803 2112 ifh, dfh,
2804 2113 alwayscache=bool(addrevisioncb),
2805 2114 deltacomputer=deltacomputer)
2806 2115
2807 2116 if addrevisioncb:
2808 2117 addrevisioncb(self, node)
2809 2118
2810 2119 if not dfh and not self._inline:
2811 2120 # addrevision switched from inline to conventional
2812 2121 # reopen the index
2813 2122 ifh.close()
2814 2123 dfh = self._datafp("a+")
2815 2124 ifh = self._indexfp("a+")
2816 2125 finally:
2817 2126 if dfh:
2818 2127 dfh.close()
2819 2128 ifh.close()
2820 2129
2821 2130 return nodes
2822 2131
2823 2132 def iscensored(self, rev):
2824 2133 """Check if a file revision is censored."""
2825 2134 if not self._censorable:
2826 2135 return False
2827 2136
2828 2137 return self.flags(rev) & REVIDX_ISCENSORED
2829 2138
2830 2139 def _peek_iscensored(self, baserev, delta, flush):
2831 2140 """Quickly check if a delta produces a censored revision."""
2832 2141 if not self._censorable:
2833 2142 return False
2834 2143
2835 2144 # Fragile heuristic: unless new file meta keys are added alphabetically
2836 2145 # preceding "censored", all censored revisions are prefixed by
2837 2146 # "\1\ncensored:". A delta producing such a censored revision must be a
2838 2147 # full-replacement delta, so we inspect the first and only patch in the
2839 2148 # delta for this prefix.
2840 2149 hlen = struct.calcsize(">lll")
2841 2150 if len(delta) <= hlen:
2842 2151 return False
2843 2152
2844 2153 oldlen = self.rawsize(baserev)
2845 2154 newlen = len(delta) - hlen
2846 2155 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2847 2156 return False
2848 2157
2849 2158 add = "\1\ncensored:"
2850 2159 addlen = len(add)
2851 2160 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2852 2161
2853 2162 def getstrippoint(self, minlink):
2854 2163 """find the minimum rev that must be stripped to strip the linkrev
2855 2164
2856 2165 Returns a tuple containing the minimum rev and a set of all revs that
2857 2166 have linkrevs that will be broken by this strip.
2858 2167 """
2859 2168 brokenrevs = set()
2860 2169 strippoint = len(self)
2861 2170
2862 2171 heads = {}
2863 2172 futurelargelinkrevs = set()
2864 2173 for head in self.headrevs():
2865 2174 headlinkrev = self.linkrev(head)
2866 2175 heads[head] = headlinkrev
2867 2176 if headlinkrev >= minlink:
2868 2177 futurelargelinkrevs.add(headlinkrev)
2869 2178
2870 2179 # This algorithm involves walking down the rev graph, starting at the
2871 2180 # heads. Since the revs are topologically sorted according to linkrev,
2872 2181 # once all head linkrevs are below the minlink, we know there are
2873 2182 # no more revs that could have a linkrev greater than minlink.
2874 2183 # So we can stop walking.
2875 2184 while futurelargelinkrevs:
2876 2185 strippoint -= 1
2877 2186 linkrev = heads.pop(strippoint)
2878 2187
2879 2188 if linkrev < minlink:
2880 2189 brokenrevs.add(strippoint)
2881 2190 else:
2882 2191 futurelargelinkrevs.remove(linkrev)
2883 2192
2884 2193 for p in self.parentrevs(strippoint):
2885 2194 if p != nullrev:
2886 2195 plinkrev = self.linkrev(p)
2887 2196 heads[p] = plinkrev
2888 2197 if plinkrev >= minlink:
2889 2198 futurelargelinkrevs.add(plinkrev)
2890 2199
2891 2200 return strippoint, brokenrevs
2892 2201
2893 2202 def strip(self, minlink, transaction):
2894 2203 """truncate the revlog on the first revision with a linkrev >= minlink
2895 2204
2896 2205 This function is called when we're stripping revision minlink and
2897 2206 its descendants from the repository.
2898 2207
2899 2208 We have to remove all revisions with linkrev >= minlink, because
2900 2209 the equivalent changelog revisions will be renumbered after the
2901 2210 strip.
2902 2211
2903 2212 So we truncate the revlog on the first of these revisions, and
2904 2213 trust that the caller has saved the revisions that shouldn't be
2905 2214 removed and that it'll re-add them after this truncation.
2906 2215 """
2907 2216 if len(self) == 0:
2908 2217 return
2909 2218
2910 2219 rev, _ = self.getstrippoint(minlink)
2911 2220 if rev == len(self):
2912 2221 return
2913 2222
2914 2223 # first truncate the files on disk
2915 2224 end = self.start(rev)
2916 2225 if not self._inline:
2917 2226 transaction.add(self.datafile, end)
2918 2227 end = rev * self._io.size
2919 2228 else:
2920 2229 end += rev * self._io.size
2921 2230
2922 2231 transaction.add(self.indexfile, end)
2923 2232
2924 2233 # then reset internal state in memory to forget those revisions
2925 2234 self._cache = None
2926 2235 self._chaininfocache = {}
2927 2236 self._chunkclear()
2928 2237 for x in pycompat.xrange(rev, len(self)):
2929 2238 del self.nodemap[self.node(x)]
2930 2239
2931 2240 del self.index[rev:-1]
2932 2241 self._nodepos = None
2933 2242
2934 2243 def checksize(self):
2935 2244 expected = 0
2936 2245 if len(self):
2937 2246 expected = max(0, self.end(len(self) - 1))
2938 2247
2939 2248 try:
2940 2249 with self._datafp() as f:
2941 2250 f.seek(0, 2)
2942 2251 actual = f.tell()
2943 2252 dd = actual - expected
2944 2253 except IOError as inst:
2945 2254 if inst.errno != errno.ENOENT:
2946 2255 raise
2947 2256 dd = 0
2948 2257
2949 2258 try:
2950 2259 f = self.opener(self.indexfile)
2951 2260 f.seek(0, 2)
2952 2261 actual = f.tell()
2953 2262 f.close()
2954 2263 s = self._io.size
2955 2264 i = max(0, actual // s)
2956 2265 di = actual - (i * s)
2957 2266 if self._inline:
2958 2267 databytes = 0
2959 2268 for r in self:
2960 2269 databytes += max(0, self.length(r))
2961 2270 dd = 0
2962 2271 di = actual - len(self) * s - databytes
2963 2272 except IOError as inst:
2964 2273 if inst.errno != errno.ENOENT:
2965 2274 raise
2966 2275 di = 0
2967 2276
2968 2277 return (dd, di)
2969 2278
2970 2279 def files(self):
2971 2280 res = [self.indexfile]
2972 2281 if not self._inline:
2973 2282 res.append(self.datafile)
2974 2283 return res
2975 2284
2976 2285 def emitrevisiondeltas(self, requests):
2977 2286 frev = self.rev
2978 2287
2979 2288 prevrev = None
2980 2289 for request in requests:
2981 2290 node = request.node
2982 2291 rev = frev(node)
2983 2292
2984 2293 if prevrev is None:
2985 2294 prevrev = self.index[rev][5]
2986 2295
2987 2296 # Requesting a full revision.
2988 2297 if request.basenode == nullid:
2989 2298 baserev = nullrev
2990 2299 # Requesting an explicit revision.
2991 2300 elif request.basenode is not None:
2992 2301 baserev = frev(request.basenode)
2993 2302 # Allowing us to choose.
2994 2303 else:
2995 2304 p1rev, p2rev = self.parentrevs(rev)
2996 2305 deltaparentrev = self.deltaparent(rev)
2997 2306
2998 2307 # Avoid sending full revisions when delta parent is null. Pick
2999 2308 # prev in that case. It's tempting to pick p1 in this case, as
3000 2309 # p1 will be smaller in the common case. However, computing a
3001 2310 # delta against p1 may require resolving the raw text of p1,
3002 2311 # which could be expensive. The revlog caches should have prev
3003 2312 # cached, meaning less CPU for delta generation. There is
3004 2313 # likely room to add a flag and/or config option to control this
3005 2314 # behavior.
3006 2315 if deltaparentrev == nullrev and self._storedeltachains:
3007 2316 baserev = prevrev
3008 2317
3009 2318 # Revlog is configured to use full snapshot for a reason.
3010 2319 # Stick to full snapshot.
3011 2320 elif deltaparentrev == nullrev:
3012 2321 baserev = nullrev
3013 2322
3014 2323 # Pick previous when we can't be sure the base is available
3015 2324 # on consumer.
3016 2325 elif deltaparentrev not in (p1rev, p2rev, prevrev):
3017 2326 baserev = prevrev
3018 2327 else:
3019 2328 baserev = deltaparentrev
3020 2329
3021 2330 if baserev != nullrev and not self.candelta(baserev, rev):
3022 2331 baserev = nullrev
3023 2332
3024 2333 revision = None
3025 2334 delta = None
3026 2335 baserevisionsize = None
3027 2336
3028 2337 if self.iscensored(baserev) or self.iscensored(rev):
3029 2338 try:
3030 2339 revision = self.revision(node, raw=True)
3031 2340 except error.CensoredNodeError as e:
3032 2341 revision = e.tombstone
3033 2342
3034 2343 if baserev != nullrev:
3035 2344 baserevisionsize = self.rawsize(baserev)
3036 2345
3037 2346 elif baserev == nullrev:
3038 2347 revision = self.revision(node, raw=True)
3039 2348 else:
3040 2349 delta = self.revdiff(baserev, rev)
3041 2350
3042 2351 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
3043 2352
3044 2353 yield revlogrevisiondelta(
3045 2354 node=node,
3046 2355 p1node=request.p1node,
3047 2356 p2node=request.p2node,
3048 2357 linknode=request.linknode,
3049 2358 basenode=self.node(baserev),
3050 2359 flags=self.flags(rev) | extraflags,
3051 2360 baserevisionsize=baserevisionsize,
3052 2361 revision=revision,
3053 2362 delta=delta)
3054 2363
3055 2364 prevrev = rev
3056 2365
3057 2366 DELTAREUSEALWAYS = 'always'
3058 2367 DELTAREUSESAMEREVS = 'samerevs'
3059 2368 DELTAREUSENEVER = 'never'
3060 2369
3061 2370 DELTAREUSEFULLADD = 'fulladd'
3062 2371
3063 2372 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
3064 2373
3065 2374 def clone(self, tr, destrevlog, addrevisioncb=None,
3066 2375 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
3067 2376 """Copy this revlog to another, possibly with format changes.
3068 2377
3069 2378 The destination revlog will contain the same revisions and nodes.
3070 2379 However, it may not be bit-for-bit identical due to e.g. delta encoding
3071 2380 differences.
3072 2381
3073 2382 The ``deltareuse`` argument control how deltas from the existing revlog
3074 2383 are preserved in the destination revlog. The argument can have the
3075 2384 following values:
3076 2385
3077 2386 DELTAREUSEALWAYS
3078 2387 Deltas will always be reused (if possible), even if the destination
3079 2388 revlog would not select the same revisions for the delta. This is the
3080 2389 fastest mode of operation.
3081 2390 DELTAREUSESAMEREVS
3082 2391 Deltas will be reused if the destination revlog would pick the same
3083 2392 revisions for the delta. This mode strikes a balance between speed
3084 2393 and optimization.
3085 2394 DELTAREUSENEVER
3086 2395 Deltas will never be reused. This is the slowest mode of execution.
3087 2396 This mode can be used to recompute deltas (e.g. if the diff/delta
3088 2397 algorithm changes).
3089 2398
3090 2399 Delta computation can be slow, so the choice of delta reuse policy can
3091 2400 significantly affect run time.
3092 2401
3093 2402 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3094 2403 two extremes. Deltas will be reused if they are appropriate. But if the
3095 2404 delta could choose a better revision, it will do so. This means if you
3096 2405 are converting a non-generaldelta revlog to a generaldelta revlog,
3097 2406 deltas will be recomputed if the delta's parent isn't a parent of the
3098 2407 revision.
3099 2408
3100 2409 In addition to the delta policy, the ``deltabothparents`` argument
3101 2410 controls whether to compute deltas against both parents for merges.
3102 2411 By default, the current default is used.
3103 2412 """
3104 2413 if deltareuse not in self.DELTAREUSEALL:
3105 2414 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3106 2415
3107 2416 if len(destrevlog):
3108 2417 raise ValueError(_('destination revlog is not empty'))
3109 2418
3110 2419 if getattr(self, 'filteredrevs', None):
3111 2420 raise ValueError(_('source revlog has filtered revisions'))
3112 2421 if getattr(destrevlog, 'filteredrevs', None):
3113 2422 raise ValueError(_('destination revlog has filtered revisions'))
3114 2423
3115 2424 # lazydeltabase controls whether to reuse a cached delta, if possible.
3116 2425 oldlazydeltabase = destrevlog._lazydeltabase
3117 2426 oldamd = destrevlog._deltabothparents
3118 2427
3119 2428 try:
3120 2429 if deltareuse == self.DELTAREUSEALWAYS:
3121 2430 destrevlog._lazydeltabase = True
3122 2431 elif deltareuse == self.DELTAREUSESAMEREVS:
3123 2432 destrevlog._lazydeltabase = False
3124 2433
3125 2434 destrevlog._deltabothparents = deltabothparents or oldamd
3126 2435
3127 2436 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3128 2437 self.DELTAREUSESAMEREVS)
3129 2438
3130 deltacomputer = _deltacomputer(destrevlog)
2439 deltacomputer = deltautil.deltacomputer(destrevlog)
3131 2440 index = self.index
3132 2441 for rev in self:
3133 2442 entry = index[rev]
3134 2443
3135 2444 # Some classes override linkrev to take filtered revs into
3136 2445 # account. Use raw entry from index.
3137 2446 flags = entry[0] & 0xffff
3138 2447 linkrev = entry[4]
3139 2448 p1 = index[entry[5]][7]
3140 2449 p2 = index[entry[6]][7]
3141 2450 node = entry[7]
3142 2451
3143 2452 # (Possibly) reuse the delta from the revlog if allowed and
3144 2453 # the revlog chunk is a delta.
3145 2454 cachedelta = None
3146 2455 rawtext = None
3147 2456 if populatecachedelta:
3148 2457 dp = self.deltaparent(rev)
3149 2458 if dp != nullrev:
3150 2459 cachedelta = (dp, bytes(self._chunk(rev)))
3151 2460
3152 2461 if not cachedelta:
3153 2462 rawtext = self.revision(rev, raw=True)
3154 2463
3155 2464
3156 2465 if deltareuse == self.DELTAREUSEFULLADD:
3157 2466 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3158 2467 cachedelta=cachedelta,
3159 2468 node=node, flags=flags,
3160 2469 deltacomputer=deltacomputer)
3161 2470 else:
3162 2471 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3163 2472 checkambig=False)
3164 2473 dfh = None
3165 2474 if not destrevlog._inline:
3166 2475 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3167 2476 try:
3168 2477 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3169 2478 p2, flags, cachedelta, ifh, dfh,
3170 2479 deltacomputer=deltacomputer)
3171 2480 finally:
3172 2481 if dfh:
3173 2482 dfh.close()
3174 2483 ifh.close()
3175 2484
3176 2485 if addrevisioncb:
3177 2486 addrevisioncb(self, rev, node)
3178 2487 finally:
3179 2488 destrevlog._lazydeltabase = oldlazydeltabase
3180 2489 destrevlog._deltabothparents = oldamd
@@ -1,46 +1,43 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 from .. import (
13 13 util,
14 14 )
15 15
16 16 # revlog header flags
17 17 REVLOGV0 = 0
18 18 REVLOGV1 = 1
19 19 # Dummy value until file format is finalized.
20 20 # Reminder: change the bounds check in revlog.__init__ when this is changed.
21 21 REVLOGV2 = 0xDEAD
22 22 FLAG_INLINE_DATA = (1 << 16)
23 23 FLAG_GENERALDELTA = (1 << 17)
24 24 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
25 25 REVLOG_DEFAULT_FORMAT = REVLOGV1
26 26 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
27 27 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
28 28 REVLOGV2_FLAGS = REVLOGV1_FLAGS
29 29
30 30 # revlog index flags
31 31 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
32 32 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
33 33 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
34 34 REVIDX_DEFAULT_FLAGS = 0
35 35 # stable order in which flags need to be processed and their processors applied
36 36 REVIDX_FLAGS_ORDER = [
37 37 REVIDX_ISCENSORED,
38 38 REVIDX_ELLIPSIS,
39 39 REVIDX_EXTSTORED,
40 40 ]
41 41 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
42 42 # bitmark for flags that could cause rawdata content change
43 43 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
44
45 # maximum <delta-chain-data>/<revision-text-length> ratio
46 LIMIT_DELTA2TEXT = 2
This diff has been collapsed as it changes many lines, (2854 lines changed) Show them Hide them
@@ -1,3180 +1,734 b''
1 # revlog.py - storage back-end for mercurial
1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 5 #
5 6 # This software may be used and distributed according to the terms of the
6 7 # GNU General Public License version 2 or any later version.
7
8 """Storage back-end for Mercurial.
9
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
12 """
8 """Helper class to compute deltas stored inside revlogs"""
13 9
14 10 from __future__ import absolute_import
15 11
16 import collections
17 import contextlib
18 import errno
19 import hashlib
20 12 import heapq
21 import os
22 import re
23 13 import struct
24 import zlib
25 14
26 15 # import stuff from node for others to import from revlog
27 from .node import (
28 bin,
29 hex,
30 nullhex,
31 nullid,
16 from ..node import (
32 17 nullrev,
33 wdirfilenodeids,
34 wdirhex,
35 wdirid,
36 wdirrev,
37 18 )
38 from .i18n import _
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
42 LIMIT_DELTA2TEXT,
43 REVIDX_DEFAULT_FLAGS,
44 REVIDX_ELLIPSIS,
45 REVIDX_EXTSTORED,
46 REVIDX_FLAGS_ORDER,
19 from ..i18n import _
20
21 from .constants import (
47 22 REVIDX_ISCENSORED,
48 REVIDX_KNOWN_FLAGS,
49 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
50 REVLOGV0,
51 REVLOGV1,
52 REVLOGV1_FLAGS,
53 REVLOGV2,
54 REVLOGV2_FLAGS,
55 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_VERSION,
58 24 )
59 from .thirdparty import (
25
26 from ..thirdparty import (
60 27 attr,
61 28 )
62 from . import (
63 ancestor,
29
30 from .. import (
64 31 error,
65 32 mdiff,
66 policy,
67 pycompat,
68 repository,
69 templatefilters,
70 util,
71 33 )
72 from .utils import (
73 interfaceutil,
74 stringutil,
75 )
76
77 # blanked usage of all the name to prevent pyflakes constraints
78 # We need these name available in the module for extensions.
79 REVLOGV0
80 REVLOGV1
81 REVLOGV2
82 FLAG_INLINE_DATA
83 FLAG_GENERALDELTA
84 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_VERSION
87 REVLOGV1_FLAGS
88 REVLOGV2_FLAGS
89 REVIDX_ISCENSORED
90 REVIDX_ELLIPSIS
91 REVIDX_EXTSTORED
92 REVIDX_DEFAULT_FLAGS
93 REVIDX_FLAGS_ORDER
94 REVIDX_KNOWN_FLAGS
95 REVIDX_RAWTEXT_CHANGING_FLAGS
96
97 parsers = policy.importmod(r'parsers')
98
99 # Aliased for performance.
100 _zlibdecompress = zlib.decompress
101
102 # max size of revlog with inline data
103 _maxinline = 131072
104 _chunksize = 1048576
105 34
106 35 RevlogError = error.RevlogError
107 LookupError = error.LookupError
108 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
109 36 CensoredNodeError = error.CensoredNodeError
110 ProgrammingError = error.ProgrammingError
111
112 # Store flag processors (cf. 'addflagprocessor()' to register)
113 _flagprocessors = {
114 REVIDX_ISCENSORED: None,
115 }
116
117 _mdre = re.compile('\1\n')
118 def parsemeta(text):
119 """return (metadatadict, metadatasize)"""
120 # text can be buffer, so we can't use .startswith or .index
121 if text[:2] != '\1\n':
122 return None, None
123 s = _mdre.search(text, 2).start()
124 mtext = text[2:s]
125 meta = {}
126 for l in mtext.splitlines():
127 k, v = l.split(": ", 1)
128 meta[k] = v
129 return meta, (s + 2)
130
131 def packmeta(meta, text):
132 keys = sorted(meta)
133 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
134 return "\1\n%s\1\n%s" % (metatext, text)
135
136 def _censoredtext(text):
137 m, offs = parsemeta(text)
138 return m and "censored" in m
139
140 def addflagprocessor(flag, processor):
141 """Register a flag processor on a revision data flag.
142 37
143 Invariant:
144 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
145 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
146 - Only one flag processor can be registered on a specific flag.
147 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
148 following signatures:
149 - (read) f(self, rawtext) -> text, bool
150 - (write) f(self, text) -> rawtext, bool
151 - (raw) f(self, rawtext) -> bool
152 "text" is presented to the user. "rawtext" is stored in revlog data, not
153 directly visible to the user.
154 The boolean returned by these transforms is used to determine whether
155 the returned text can be used for hash integrity checking. For example,
156 if "write" returns False, then "text" is used to generate hash. If
157 "write" returns True, that basically means "rawtext" returned by "write"
158 should be used to generate hash. Usually, "write" and "read" return
159 different booleans. And "raw" returns a same boolean as "write".
160
161 Note: The 'raw' transform is used for changegroup generation and in some
162 debug commands. In this case the transform only indicates whether the
163 contents can be used for hash integrity checks.
164 """
165 if not flag & REVIDX_KNOWN_FLAGS:
166 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
167 raise ProgrammingError(msg)
168 if flag not in REVIDX_FLAGS_ORDER:
169 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
170 raise ProgrammingError(msg)
171 if flag in _flagprocessors:
172 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
173 raise error.Abort(msg)
174 _flagprocessors[flag] = processor
175
176 def getoffset(q):
177 return int(q >> 16)
178
179 def gettype(q):
180 return int(q & 0xFFFF)
181
182 def offset_type(offset, type):
183 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
184 raise ValueError('unknown revlog index flags')
185 return int(int(offset) << 16 | type)
186
187 _nullhash = hashlib.sha1(nullid)
188
189 def hash(text, p1, p2):
190 """generate a hash from the given text and its parent hashes
191
192 This hash combines both the current file contents and its history
193 in a manner that makes it easy to distinguish nodes with the same
194 content in the revision graph.
195 """
196 # As of now, if one of the parent node is null, p2 is null
197 if p2 == nullid:
198 # deep copy of a hash is faster than creating one
199 s = _nullhash.copy()
200 s.update(p1)
201 else:
202 # none of the parent nodes are nullid
203 if p1 < p2:
204 a = p1
205 b = p2
206 else:
207 a = p2
208 b = p1
209 s = hashlib.sha1(a)
210 s.update(b)
211 s.update(text)
212 return s.digest()
38 # maximum <delta-chain-data>/<revision-text-length> ratio
39 LIMIT_DELTA2TEXT = 2
213 40
214 41 class _testrevlog(object):
215 42 """minimalist fake revlog to use in doctests"""
216 43
217 44 def __init__(self, data, density=0.5, mingap=0):
218 45 """data is an list of revision payload boundaries"""
219 46 self._data = data
220 47 self._srdensitythreshold = density
221 48 self._srmingapsize = mingap
222 49
223 50 def start(self, rev):
224 51 if rev == 0:
225 52 return 0
226 53 return self._data[rev - 1]
227 54
228 55 def end(self, rev):
229 56 return self._data[rev]
230 57
231 58 def length(self, rev):
232 59 return self.end(rev) - self.start(rev)
233 60
234 61 def __len__(self):
235 62 return len(self._data)
236 63
237 def _trimchunk(revlog, revs, startidx, endidx=None):
238 """returns revs[startidx:endidx] without empty trailing revs
239
240 Doctest Setup
241 >>> revlog = _testrevlog([
242 ... 5, #0
243 ... 10, #1
244 ... 12, #2
245 ... 12, #3 (empty)
246 ... 17, #4
247 ... 21, #5
248 ... 21, #6 (empty)
249 ... ])
250
251 Contiguous cases:
252 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
253 [0, 1, 2, 3, 4, 5]
254 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
255 [0, 1, 2, 3, 4]
256 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
257 [0, 1, 2]
258 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
259 [2]
260 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
261 [3, 4, 5]
262 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
263 [3, 4]
264
265 Discontiguous cases:
266 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
267 [1, 3, 5]
268 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
269 [1]
270 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
271 [3, 5]
272 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
273 [3, 5]
274 """
275 length = revlog.length
276
277 if endidx is None:
278 endidx = len(revs)
279
280 # If we have a non-emtpy delta candidate, there are nothing to trim
281 if revs[endidx - 1] < len(revlog):
282 # Trim empty revs at the end, except the very first revision of a chain
283 while (endidx > 1
284 and endidx > startidx
285 and length(revs[endidx - 1]) == 0):
286 endidx -= 1
287
288 return revs[startidx:endidx]
289
290 def _segmentspan(revlog, revs, deltainfo=None):
291 """Get the byte span of a segment of revisions
292
293 revs is a sorted array of revision numbers
294
295 >>> revlog = _testrevlog([
296 ... 5, #0
297 ... 10, #1
298 ... 12, #2
299 ... 12, #3 (empty)
300 ... 17, #4
301 ... ])
302
303 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
304 17
305 >>> _segmentspan(revlog, [0, 4])
306 17
307 >>> _segmentspan(revlog, [3, 4])
308 5
309 >>> _segmentspan(revlog, [1, 2, 3,])
310 7
311 >>> _segmentspan(revlog, [1, 3])
312 7
313 """
314 if not revs:
315 return 0
316 if deltainfo is not None and len(revlog) <= revs[-1]:
317 if len(revs) == 1:
318 return deltainfo.deltalen
319 offset = revlog.end(len(revlog) - 1)
320 end = deltainfo.deltalen + offset
321 else:
322 end = revlog.end(revs[-1])
323 return end - revlog.start(revs[0])
324
325 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
326 65 """slice revs to reduce the amount of unrelated data to be read from disk.
327 66
328 67 ``revs`` is sliced into groups that should be read in one time.
329 68 Assume that revs are sorted.
330 69
331 70 The initial chunk is sliced until the overall density (payload/chunks-span
332 71 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
333 72 `revlog._srmingapsize` is skipped.
334 73
335 74 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
336 75 For consistency with other slicing choice, this limit won't go lower than
337 76 `revlog._srmingapsize`.
338 77
339 78 If individual revisions chunk are larger than this limit, they will still
340 79 be raised individually.
341 80
342 81 >>> revlog = _testrevlog([
343 82 ... 5, #00 (5)
344 83 ... 10, #01 (5)
345 84 ... 12, #02 (2)
346 85 ... 12, #03 (empty)
347 86 ... 27, #04 (15)
348 87 ... 31, #05 (4)
349 88 ... 31, #06 (empty)
350 89 ... 42, #07 (11)
351 90 ... 47, #08 (5)
352 91 ... 47, #09 (empty)
353 92 ... 48, #10 (1)
354 93 ... 51, #11 (3)
355 94 ... 74, #12 (23)
356 95 ... 85, #13 (11)
357 96 ... 86, #14 (1)
358 97 ... 91, #15 (5)
359 98 ... ])
360 99
361 >>> list(_slicechunk(revlog, list(range(16))))
100 >>> list(slicechunk(revlog, list(range(16))))
362 101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
363 >>> list(_slicechunk(revlog, [0, 15]))
102 >>> list(slicechunk(revlog, [0, 15]))
364 103 [[0], [15]]
365 >>> list(_slicechunk(revlog, [0, 11, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 15]))
366 105 [[0], [11], [15]]
367 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
368 107 [[0], [11, 13, 15]]
369 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
370 109 [[1, 2], [5, 8, 10, 11], [14]]
371 110
372 111 Slicing with a maximum chunk size
373 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
374 113 [[0], [11], [13], [15]]
375 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
376 115 [[0], [11], [13, 15]]
377 116 """
378 117 if targetsize is not None:
379 118 targetsize = max(targetsize, revlog._srmingapsize)
380 119 # targetsize should not be specified when evaluating delta candidates:
381 120 # * targetsize is used to ensure we stay within specification when reading,
382 121 # * deltainfo is used to pick are good delta chain when writing.
383 122 if not (deltainfo is None or targetsize is None):
384 123 msg = 'cannot use `targetsize` with a `deltainfo`'
385 124 raise error.ProgrammingError(msg)
386 125 for chunk in _slicechunktodensity(revlog, revs,
387 126 deltainfo,
388 127 revlog._srdensitythreshold,
389 128 revlog._srmingapsize):
390 129 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
391 130 yield subchunk
392 131
393 132 def _slicechunktosize(revlog, revs, targetsize=None):
394 133 """slice revs to match the target size
395 134
396 135 This is intended to be used on chunk that density slicing selected by that
397 136 are still too large compared to the read garantee of revlog. This might
398 137 happens when "minimal gap size" interrupted the slicing or when chain are
399 138 built in a way that create large blocks next to each other.
400 139
401 140 >>> revlog = _testrevlog([
402 141 ... 3, #0 (3)
403 142 ... 5, #1 (2)
404 143 ... 6, #2 (1)
405 144 ... 8, #3 (2)
406 145 ... 8, #4 (empty)
407 146 ... 11, #5 (3)
408 147 ... 12, #6 (1)
409 148 ... 13, #7 (1)
410 149 ... 14, #8 (1)
411 150 ... ])
412 151
413 152 Cases where chunk is already small enough
414 153 >>> list(_slicechunktosize(revlog, [0], 3))
415 154 [[0]]
416 155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
417 156 [[6, 7]]
418 157 >>> list(_slicechunktosize(revlog, [0], None))
419 158 [[0]]
420 159 >>> list(_slicechunktosize(revlog, [6, 7], None))
421 160 [[6, 7]]
422 161
423 162 cases where we need actual slicing
424 163 >>> list(_slicechunktosize(revlog, [0, 1], 3))
425 164 [[0], [1]]
426 165 >>> list(_slicechunktosize(revlog, [1, 3], 3))
427 166 [[1], [3]]
428 167 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
429 168 [[1, 2], [3]]
430 169 >>> list(_slicechunktosize(revlog, [3, 5], 3))
431 170 [[3], [5]]
432 171 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
433 172 [[3], [5]]
434 173 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
435 174 [[5], [6, 7, 8]]
436 175 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
437 176 [[0], [1, 2], [3], [5], [6, 7, 8]]
438 177
439 178 Case with too large individual chunk (must return valid chunk)
440 179 >>> list(_slicechunktosize(revlog, [0, 1], 2))
441 180 [[0], [1]]
442 181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
443 182 [[1], [3]]
444 183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
445 184 [[3], [5]]
446 185 """
447 186 assert targetsize is None or 0 <= targetsize
448 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
449 188 yield revs
450 189 return
451 190
452 191 startrevidx = 0
453 192 startdata = revlog.start(revs[0])
454 193 endrevidx = 0
455 194 iterrevs = enumerate(revs)
456 195 next(iterrevs) # skip first rev.
457 196 for idx, r in iterrevs:
458 197 span = revlog.end(r) - startdata
459 198 if span <= targetsize:
460 199 endrevidx = idx
461 200 else:
462 201 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
463 202 if chunk:
464 203 yield chunk
465 204 startrevidx = idx
466 205 startdata = revlog.start(r)
467 206 endrevidx = idx
468 207 yield _trimchunk(revlog, revs, startrevidx)
469 208
470 209 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
471 210 mingapsize=0):
472 211 """slice revs to reduce the amount of unrelated data to be read from disk.
473 212
474 213 ``revs`` is sliced into groups that should be read in one time.
475 214 Assume that revs are sorted.
476 215
477 216 ``deltainfo`` is a _deltainfo instance of a revision that we would append
478 217 to the top of the revlog.
479 218
480 219 The initial chunk is sliced until the overall density (payload/chunks-span
481 220 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
482 221 skipped.
483 222
484 223 >>> revlog = _testrevlog([
485 224 ... 5, #00 (5)
486 225 ... 10, #01 (5)
487 226 ... 12, #02 (2)
488 227 ... 12, #03 (empty)
489 228 ... 27, #04 (15)
490 229 ... 31, #05 (4)
491 230 ... 31, #06 (empty)
492 231 ... 42, #07 (11)
493 232 ... 47, #08 (5)
494 233 ... 47, #09 (empty)
495 234 ... 48, #10 (1)
496 235 ... 51, #11 (3)
497 236 ... 74, #12 (23)
498 237 ... 85, #13 (11)
499 238 ... 86, #14 (1)
500 239 ... 91, #15 (5)
501 240 ... ])
502 241
503 242 >>> list(_slicechunktodensity(revlog, list(range(16))))
504 243 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
505 244 >>> list(_slicechunktodensity(revlog, [0, 15]))
506 245 [[0], [15]]
507 246 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
508 247 [[0], [11], [15]]
509 248 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
510 249 [[0], [11, 13, 15]]
511 250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
512 251 [[1, 2], [5, 8, 10, 11], [14]]
513 252 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
514 253 ... mingapsize=20))
515 254 [[1, 2, 3, 5, 8, 10, 11], [14]]
516 255 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
517 256 ... targetdensity=0.95))
518 257 [[1, 2], [5], [8, 10, 11], [14]]
519 258 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
520 259 ... targetdensity=0.95, mingapsize=12))
521 260 [[1, 2], [5, 8, 10, 11], [14]]
522 261 """
523 262 start = revlog.start
524 263 length = revlog.length
525 264
526 265 if len(revs) <= 1:
527 266 yield revs
528 267 return
529 268
530 269 nextrev = len(revlog)
531 270 nextoffset = revlog.end(nextrev - 1)
532 271
533 272 if deltainfo is None:
534 deltachainspan = _segmentspan(revlog, revs)
273 deltachainspan = segmentspan(revlog, revs)
535 274 chainpayload = sum(length(r) for r in revs)
536 275 else:
537 276 deltachainspan = deltainfo.distance
538 277 chainpayload = deltainfo.compresseddeltalen
539 278
540 279 if deltachainspan < mingapsize:
541 280 yield revs
542 281 return
543 282
544 283 readdata = deltachainspan
545 284
546 285 if deltachainspan:
547 286 density = chainpayload / float(deltachainspan)
548 287 else:
549 288 density = 1.0
550 289
551 290 if density >= targetdensity:
552 291 yield revs
553 292 return
554 293
555 294 if deltainfo is not None and deltainfo.deltalen:
556 295 revs = list(revs)
557 296 revs.append(nextrev)
558 297
559 298 # Store the gaps in a heap to have them sorted by decreasing size
560 299 gapsheap = []
561 300 heapq.heapify(gapsheap)
562 301 prevend = None
563 302 for i, rev in enumerate(revs):
564 303 if rev < nextrev:
565 304 revstart = start(rev)
566 305 revlen = length(rev)
567 306 else:
568 307 revstart = nextoffset
569 308 revlen = deltainfo.deltalen
570 309
571 310 # Skip empty revisions to form larger holes
572 311 if revlen == 0:
573 312 continue
574 313
575 314 if prevend is not None:
576 315 gapsize = revstart - prevend
577 316 # only consider holes that are large enough
578 317 if gapsize > mingapsize:
579 318 heapq.heappush(gapsheap, (-gapsize, i))
580 319
581 320 prevend = revstart + revlen
582 321
583 322 # Collect the indices of the largest holes until the density is acceptable
584 323 indicesheap = []
585 324 heapq.heapify(indicesheap)
586 325 while gapsheap and density < targetdensity:
587 326 oppgapsize, gapidx = heapq.heappop(gapsheap)
588 327
589 328 heapq.heappush(indicesheap, gapidx)
590 329
591 330 # the gap sizes are stored as negatives to be sorted decreasingly
592 331 # by the heap
593 332 readdata -= (-oppgapsize)
594 333 if readdata > 0:
595 334 density = chainpayload / float(readdata)
596 335 else:
597 336 density = 1.0
598 337
599 338 # Cut the revs at collected indices
600 339 previdx = 0
601 340 while indicesheap:
602 341 idx = heapq.heappop(indicesheap)
603 342
604 343 chunk = _trimchunk(revlog, revs, previdx, idx)
605 344 if chunk:
606 345 yield chunk
607 346
608 347 previdx = idx
609 348
610 349 chunk = _trimchunk(revlog, revs, previdx)
611 350 if chunk:
612 351 yield chunk
613 352
353 def _trimchunk(revlog, revs, startidx, endidx=None):
354 """returns revs[startidx:endidx] without empty trailing revs
355
356 Doctest Setup
357 >>> revlog = _testrevlog([
358 ... 5, #0
359 ... 10, #1
360 ... 12, #2
361 ... 12, #3 (empty)
362 ... 17, #4
363 ... 21, #5
364 ... 21, #6 (empty)
365 ... ])
366
367 Contiguous cases:
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
369 [0, 1, 2, 3, 4, 5]
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
371 [0, 1, 2, 3, 4]
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
373 [0, 1, 2]
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
375 [2]
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
377 [3, 4, 5]
378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
379 [3, 4]
380
381 Discontiguous cases:
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
383 [1, 3, 5]
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
385 [1]
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
387 [3, 5]
388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
389 [3, 5]
390 """
391 length = revlog.length
392
393 if endidx is None:
394 endidx = len(revs)
395
396 # If we have a non-emtpy delta candidate, there are nothing to trim
397 if revs[endidx - 1] < len(revlog):
398 # Trim empty revs at the end, except the very first revision of a chain
399 while (endidx > 1
400 and endidx > startidx
401 and length(revs[endidx - 1]) == 0):
402 endidx -= 1
403
404 return revs[startidx:endidx]
405
406 def segmentspan(revlog, revs, deltainfo=None):
407 """Get the byte span of a segment of revisions
408
409 revs is a sorted array of revision numbers
410
411 >>> revlog = _testrevlog([
412 ... 5, #0
413 ... 10, #1
414 ... 12, #2
415 ... 12, #3 (empty)
416 ... 17, #4
417 ... ])
418
419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
420 17
421 >>> segmentspan(revlog, [0, 4])
422 17
423 >>> segmentspan(revlog, [3, 4])
424 5
425 >>> segmentspan(revlog, [1, 2, 3,])
426 7
427 >>> segmentspan(revlog, [1, 3])
428 7
429 """
430 if not revs:
431 return 0
432 if deltainfo is not None and len(revlog) <= revs[-1]:
433 if len(revs) == 1:
434 return deltainfo.deltalen
435 offset = revlog.end(len(revlog) - 1)
436 end = deltainfo.deltalen + offset
437 else:
438 end = revlog.end(revs[-1])
439 return end - revlog.start(revs[0])
440
614 441 @attr.s(slots=True, frozen=True)
615 442 class _deltainfo(object):
616 443 distance = attr.ib()
617 444 deltalen = attr.ib()
618 445 data = attr.ib()
619 446 base = attr.ib()
620 447 chainbase = attr.ib()
621 448 chainlen = attr.ib()
622 449 compresseddeltalen = attr.ib()
623 450 snapshotdepth = attr.ib()
624 451
625 class _deltacomputer(object):
452 def isgooddeltainfo(revlog, deltainfo, revinfo):
453 """Returns True if the given delta is good. Good means that it is within
454 the disk span, disk size, and chain length bounds that we know to be
455 performant."""
456 if deltainfo is None:
457 return False
458
459 # - 'deltainfo.distance' is the distance from the base revision --
460 # bounding it limits the amount of I/O we need to do.
461 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
462 # deltas we need to apply -- bounding it limits the amount of CPU
463 # we consume.
464
465 if revlog._sparserevlog:
466 # As sparse-read will be used, we can consider that the distance,
467 # instead of being the span of the whole chunk,
468 # is the span of the largest read chunk
469 base = deltainfo.base
470
471 if base != nullrev:
472 deltachain = revlog._deltachain(base)[0]
473 else:
474 deltachain = []
475
476 # search for the first non-snapshot revision
477 for idx, r in enumerate(deltachain):
478 if not revlog.issnapshot(r):
479 break
480 deltachain = deltachain[idx:]
481 chunks = slicechunk(revlog, deltachain, deltainfo)
482 all_span = [segmentspan(revlog, revs, deltainfo)
483 for revs in chunks]
484 distance = max(all_span)
485 else:
486 distance = deltainfo.distance
487
488 textlen = revinfo.textlen
489 defaultmax = textlen * 4
490 maxdist = revlog._maxdeltachainspan
491 if not maxdist:
492 maxdist = distance # ensure the conditional pass
493 maxdist = max(maxdist, defaultmax)
494 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
495 # In multiple place, we are ignoring irrelevant data range below a
496 # certain size. Be also apply this tradeoff here and relax span
497 # constraint for small enought content.
498 maxdist = revlog._srmingapsize
499
500 # Bad delta from read span:
501 #
502 # If the span of data read is larger than the maximum allowed.
503 if maxdist < distance:
504 return False
505
506 # Bad delta from new delta size:
507 #
508 # If the delta size is larger than the target text, storing the
509 # delta will be inefficient.
510 if textlen < deltainfo.deltalen:
511 return False
512
513 # Bad delta from cumulated payload size:
514 #
515 # If the sum of delta get larger than K * target text length.
516 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
517 return False
518
519 # Bad delta from chain length:
520 #
521 # If the number of delta in the chain gets too high.
522 if (revlog._maxchainlen
523 and revlog._maxchainlen < deltainfo.chainlen):
524 return False
525
526 # bad delta from intermediate snapshot size limit
527 #
528 # If an intermediate snapshot size is higher than the limit. The
529 # limit exist to prevent endless chain of intermediate delta to be
530 # created.
531 if (deltainfo.snapshotdepth is not None and
532 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
533 return False
534
535 # bad delta if new intermediate snapshot is larger than the previous
536 # snapshot
537 if (deltainfo.snapshotdepth
538 and revlog.length(deltainfo.base) < deltainfo.deltalen):
539 return False
540
541 return True
542
543 class deltacomputer(object):
626 544 def __init__(self, revlog):
627 545 self.revlog = revlog
628 546
629 547 def _getcandidaterevs(self, p1, p2, cachedelta):
630 548 """
631 549 Provides revisions that present an interest to be diffed against,
632 550 grouped by level of easiness.
633 551 """
634 552 revlog = self.revlog
635 553 gdelta = revlog._generaldelta
636 554 curr = len(revlog)
637 555 prev = curr - 1
638 556 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
639 557
640 558 # should we try to build a delta?
641 559 if prev != nullrev and revlog._storedeltachains:
642 560 tested = set()
643 561 # This condition is true most of the time when processing
644 562 # changegroup data into a generaldelta repo. The only time it
645 563 # isn't true is if this is the first revision in a delta chain
646 564 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
647 565 if cachedelta and gdelta and revlog._lazydeltabase:
648 566 # Assume what we received from the server is a good choice
649 567 # build delta will reuse the cache
650 568 yield (cachedelta[0],)
651 569 tested.add(cachedelta[0])
652 570
653 571 if gdelta:
654 572 # exclude already lazy tested base if any
655 573 parents = [p for p in (p1r, p2r)
656 574 if p != nullrev and p not in tested]
657 575
658 576 if not revlog._deltabothparents and len(parents) == 2:
659 577 parents.sort()
660 578 # To minimize the chance of having to build a fulltext,
661 579 # pick first whichever parent is closest to us (max rev)
662 580 yield (parents[1],)
663 581 # then the other one (min rev) if the first did not fit
664 582 yield (parents[0],)
665 583 tested.update(parents)
666 584 elif len(parents) > 0:
667 585 # Test all parents (1 or 2), and keep the best candidate
668 586 yield parents
669 587 tested.update(parents)
670 588
671 589 if prev not in tested:
672 590 # other approach failed try against prev to hopefully save us a
673 591 # fulltext.
674 592 yield (prev,)
675 593 tested.add(prev)
676 594
677 595 def buildtext(self, revinfo, fh):
678 596 """Builds a fulltext version of a revision
679 597
680 598 revinfo: _revisioninfo instance that contains all needed info
681 599 fh: file handle to either the .i or the .d revlog file,
682 600 depending on whether it is inlined or not
683 601 """
684 602 btext = revinfo.btext
685 603 if btext[0] is not None:
686 604 return btext[0]
687 605
688 606 revlog = self.revlog
689 607 cachedelta = revinfo.cachedelta
690 608 flags = revinfo.flags
691 609 node = revinfo.node
692 610
693 611 baserev = cachedelta[0]
694 612 delta = cachedelta[1]
695 613 # special case deltas which replace entire base; no need to decode
696 614 # base revision. this neatly avoids censored bases, which throw when
697 615 # they're decoded.
698 616 hlen = struct.calcsize(">lll")
699 617 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
700 618 len(delta) - hlen):
701 619 btext[0] = delta[hlen:]
702 620 else:
703 621 # deltabase is rawtext before changed by flag processors, which is
704 622 # equivalent to non-raw text
705 623 basetext = revlog.revision(baserev, _df=fh, raw=False)
706 624 btext[0] = mdiff.patch(basetext, delta)
707 625
708 626 try:
709 627 res = revlog._processflags(btext[0], flags, 'read', raw=True)
710 628 btext[0], validatehash = res
711 629 if validatehash:
712 630 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
713 631 if flags & REVIDX_ISCENSORED:
714 632 raise RevlogError(_('node %s is not censored') % node)
715 633 except CensoredNodeError:
716 634 # must pass the censored index flag to add censored revisions
717 635 if not flags & REVIDX_ISCENSORED:
718 636 raise
719 637 return btext[0]
720 638
721 639 def _builddeltadiff(self, base, revinfo, fh):
722 640 revlog = self.revlog
723 641 t = self.buildtext(revinfo, fh)
724 642 if revlog.iscensored(base):
725 643 # deltas based on a censored revision must replace the
726 644 # full content in one patch, so delta works everywhere
727 645 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
728 646 delta = header + t
729 647 else:
730 648 ptext = revlog.revision(base, _df=fh, raw=True)
731 649 delta = mdiff.textdiff(ptext, t)
732 650
733 651 return delta
734 652
735 653 def _builddeltainfo(self, revinfo, base, fh):
736 654 # can we use the cached delta?
737 655 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
738 656 delta = revinfo.cachedelta[1]
739 657 else:
740 658 delta = self._builddeltadiff(base, revinfo, fh)
741 659 revlog = self.revlog
742 660 header, data = revlog.compress(delta)
743 661 deltalen = len(header) + len(data)
744 662 chainbase = revlog.chainbase(base)
745 663 offset = revlog.end(len(revlog) - 1)
746 664 dist = deltalen + offset - revlog.start(chainbase)
747 665 if revlog._generaldelta:
748 666 deltabase = base
749 667 else:
750 668 deltabase = chainbase
751 669 chainlen, compresseddeltalen = revlog._chaininfo(base)
752 670 chainlen += 1
753 671 compresseddeltalen += deltalen
754 672
755 673 revlog = self.revlog
756 674 snapshotdepth = None
757 675 if deltabase == nullrev:
758 676 snapshotdepth = 0
759 677 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
760 678 # A delta chain should always be one full snapshot,
761 679 # zero or more semi-snapshots, and zero or more deltas
762 680 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
763 681 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
764 682 snapshotdepth = len(revlog._deltachain(deltabase)[0])
765 683
766 684 return _deltainfo(dist, deltalen, (header, data), deltabase,
767 685 chainbase, chainlen, compresseddeltalen,
768 686 snapshotdepth)
769 687
770 688 def finddeltainfo(self, revinfo, fh):
771 689 """Find an acceptable delta against a candidate revision
772 690
773 691 revinfo: information about the revision (instance of _revisioninfo)
774 692 fh: file handle to either the .i or the .d revlog file,
775 693 depending on whether it is inlined or not
776 694
777 695 Returns the first acceptable candidate revision, as ordered by
778 696 _getcandidaterevs
779 697 """
780 698 if not revinfo.textlen:
781 699 return None # empty file do not need delta
782 700
783 701 cachedelta = revinfo.cachedelta
784 702 p1 = revinfo.p1
785 703 p2 = revinfo.p2
786 704 revlog = self.revlog
787 705
788 706 deltalength = self.revlog.length
789 707 deltaparent = self.revlog.deltaparent
790 708
791 709 deltainfo = None
792 710 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
793 711 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
794 712 # filter out delta base that will never produce good delta
795 713 candidaterevs = [r for r in candidaterevs
796 714 if self.revlog.length(r) <= deltas_limit]
797 715 nominateddeltas = []
798 716 for candidaterev in candidaterevs:
799 717 # skip over empty delta (no need to include them in a chain)
800 718 while candidaterev != nullrev and not deltalength(candidaterev):
801 719 candidaterev = deltaparent(candidaterev)
802 720 # no need to try a delta against nullid, this will be handled
803 721 # by fulltext later.
804 722 if candidaterev == nullrev:
805 723 continue
806 724 # no delta for rawtext-changing revs (see "candelta" for why)
807 725 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
808 726 continue
809 727 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
810 if revlog._isgooddeltainfo(candidatedelta, revinfo):
728 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
811 729 nominateddeltas.append(candidatedelta)
812 730 if nominateddeltas:
813 731 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
814 732 break
815 733
816 734 return deltainfo
817
818 @attr.s(slots=True, frozen=True)
819 class _revisioninfo(object):
820 """Information about a revision that allows building its fulltext
821 node: expected hash of the revision
822 p1, p2: parent revs of the revision
823 btext: built text cache consisting of a one-element list
824 cachedelta: (baserev, uncompressed_delta) or None
825 flags: flags associated to the revision storage
826
827 One of btext[0] or cachedelta must be set.
828 """
829 node = attr.ib()
830 p1 = attr.ib()
831 p2 = attr.ib()
832 btext = attr.ib()
833 textlen = attr.ib()
834 cachedelta = attr.ib()
835 flags = attr.ib()
836
837 @interfaceutil.implementer(repository.irevisiondelta)
838 @attr.s(slots=True, frozen=True)
839 class revlogrevisiondelta(object):
840 node = attr.ib()
841 p1node = attr.ib()
842 p2node = attr.ib()
843 basenode = attr.ib()
844 linknode = attr.ib()
845 flags = attr.ib()
846 baserevisionsize = attr.ib()
847 revision = attr.ib()
848 delta = attr.ib()
849
850 # index v0:
851 # 4 bytes: offset
852 # 4 bytes: compressed length
853 # 4 bytes: base rev
854 # 4 bytes: link rev
855 # 20 bytes: parent 1 nodeid
856 # 20 bytes: parent 2 nodeid
857 # 20 bytes: nodeid
858 indexformatv0 = struct.Struct(">4l20s20s20s")
859 indexformatv0_pack = indexformatv0.pack
860 indexformatv0_unpack = indexformatv0.unpack
861
862 class revlogoldindex(list):
863 def __getitem__(self, i):
864 if i == -1:
865 return (0, 0, 0, -1, -1, -1, -1, nullid)
866 return list.__getitem__(self, i)
867
868 class revlogoldio(object):
869 def __init__(self):
870 self.size = indexformatv0.size
871
872 def parseindex(self, data, inline):
873 s = self.size
874 index = []
875 nodemap = {nullid: nullrev}
876 n = off = 0
877 l = len(data)
878 while off + s <= l:
879 cur = data[off:off + s]
880 off += s
881 e = indexformatv0_unpack(cur)
882 # transform to revlogv1 format
883 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
884 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
885 index.append(e2)
886 nodemap[e[6]] = n
887 n += 1
888
889 return revlogoldindex(index), nodemap, None
890
891 def packentry(self, entry, node, version, rev):
892 if gettype(entry[0]):
893 raise RevlogError(_('index entry flags need revlog version 1'))
894 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
895 node(entry[5]), node(entry[6]), entry[7])
896 return indexformatv0_pack(*e2)
897
898 # index ng:
899 # 6 bytes: offset
900 # 2 bytes: flags
901 # 4 bytes: compressed length
902 # 4 bytes: uncompressed length
903 # 4 bytes: base rev
904 # 4 bytes: link rev
905 # 4 bytes: parent 1 rev
906 # 4 bytes: parent 2 rev
907 # 32 bytes: nodeid
908 indexformatng = struct.Struct(">Qiiiiii20s12x")
909 indexformatng_pack = indexformatng.pack
910 versionformat = struct.Struct(">I")
911 versionformat_pack = versionformat.pack
912 versionformat_unpack = versionformat.unpack
913
914 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
915 # signed integer)
916 _maxentrysize = 0x7fffffff
917
918 class revlogio(object):
919 def __init__(self):
920 self.size = indexformatng.size
921
922 def parseindex(self, data, inline):
923 # call the C implementation to parse the index data
924 index, cache = parsers.parse_index2(data, inline)
925 return index, getattr(index, 'nodemap', None), cache
926
927 def packentry(self, entry, node, version, rev):
928 p = indexformatng_pack(*entry)
929 if rev == 0:
930 p = versionformat_pack(version) + p[4:]
931 return p
932
933 class revlog(object):
934 """
935 the underlying revision storage object
936
937 A revlog consists of two parts, an index and the revision data.
938
939 The index is a file with a fixed record size containing
940 information on each revision, including its nodeid (hash), the
941 nodeids of its parents, the position and offset of its data within
942 the data file, and the revision it's based on. Finally, each entry
943 contains a linkrev entry that can serve as a pointer to external
944 data.
945
946 The revision data itself is a linear collection of data chunks.
947 Each chunk represents a revision and is usually represented as a
948 delta against the previous chunk. To bound lookup time, runs of
949 deltas are limited to about 2 times the length of the original
950 version data. This makes retrieval of a version proportional to
951 its size, or O(1) relative to the number of revisions.
952
953 Both pieces of the revlog are written to in an append-only
954 fashion, which means we never need to rewrite a file to insert or
955 remove data, and can use some simple techniques to avoid the need
956 for locking while reading.
957
958 If checkambig, indexfile is opened with checkambig=True at
959 writing, to avoid file stat ambiguity.
960
961 If mmaplargeindex is True, and an mmapindexthreshold is set, the
962 index will be mmapped rather than read if it is larger than the
963 configured threshold.
964
965 If censorable is True, the revlog can have censored revisions.
966 """
967 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
968 mmaplargeindex=False, censorable=False):
969 """
970 create a revlog object
971
972 opener is a function that abstracts the file opening operation
973 and can be used to implement COW semantics or the like.
974 """
975 self.indexfile = indexfile
976 self.datafile = datafile or (indexfile[:-2] + ".d")
977 self.opener = opener
978 # When True, indexfile is opened with checkambig=True at writing, to
979 # avoid file stat ambiguity.
980 self._checkambig = checkambig
981 self._censorable = censorable
982 # 3-tuple of (node, rev, text) for a raw revision.
983 self._cache = None
984 # Maps rev to chain base rev.
985 self._chainbasecache = util.lrucachedict(100)
986 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
987 self._chunkcache = (0, '')
988 # How much data to read and cache into the raw revlog data cache.
989 self._chunkcachesize = 65536
990 self._maxchainlen = None
991 self._deltabothparents = True
992 self.index = []
993 # Mapping of partial identifiers to full nodes.
994 self._pcache = {}
995 # Mapping of revision integer to full node.
996 self._nodecache = {nullid: nullrev}
997 self._nodepos = None
998 self._compengine = 'zlib'
999 self._maxdeltachainspan = -1
1000 self._withsparseread = False
1001 self._sparserevlog = False
1002 self._srdensitythreshold = 0.50
1003 self._srmingapsize = 262144
1004
1005 mmapindexthreshold = None
1006 v = REVLOG_DEFAULT_VERSION
1007 opts = getattr(opener, 'options', None)
1008 if opts is not None:
1009 if 'revlogv2' in opts:
1010 # version 2 revlogs always use generaldelta.
1011 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
1012 elif 'revlogv1' in opts:
1013 if 'generaldelta' in opts:
1014 v |= FLAG_GENERALDELTA
1015 else:
1016 v = 0
1017 if 'chunkcachesize' in opts:
1018 self._chunkcachesize = opts['chunkcachesize']
1019 if 'maxchainlen' in opts:
1020 self._maxchainlen = opts['maxchainlen']
1021 if 'deltabothparents' in opts:
1022 self._deltabothparents = opts['deltabothparents']
1023 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1024 if 'compengine' in opts:
1025 self._compengine = opts['compengine']
1026 if 'maxdeltachainspan' in opts:
1027 self._maxdeltachainspan = opts['maxdeltachainspan']
1028 if mmaplargeindex and 'mmapindexthreshold' in opts:
1029 mmapindexthreshold = opts['mmapindexthreshold']
1030 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1031 withsparseread = bool(opts.get('with-sparse-read', False))
1032 # sparse-revlog forces sparse-read
1033 self._withsparseread = self._sparserevlog or withsparseread
1034 if 'sparse-read-density-threshold' in opts:
1035 self._srdensitythreshold = opts['sparse-read-density-threshold']
1036 if 'sparse-read-min-gap-size' in opts:
1037 self._srmingapsize = opts['sparse-read-min-gap-size']
1038
1039 if self._chunkcachesize <= 0:
1040 raise RevlogError(_('revlog chunk cache size %r is not greater '
1041 'than 0') % self._chunkcachesize)
1042 elif self._chunkcachesize & (self._chunkcachesize - 1):
1043 raise RevlogError(_('revlog chunk cache size %r is not a power '
1044 'of 2') % self._chunkcachesize)
1045
1046 indexdata = ''
1047 self._initempty = True
1048 try:
1049 with self._indexfp() as f:
1050 if (mmapindexthreshold is not None and
1051 self.opener.fstat(f).st_size >= mmapindexthreshold):
1052 indexdata = util.buffer(util.mmapread(f))
1053 else:
1054 indexdata = f.read()
1055 if len(indexdata) > 0:
1056 v = versionformat_unpack(indexdata[:4])[0]
1057 self._initempty = False
1058 except IOError as inst:
1059 if inst.errno != errno.ENOENT:
1060 raise
1061
1062 self.version = v
1063 self._inline = v & FLAG_INLINE_DATA
1064 self._generaldelta = v & FLAG_GENERALDELTA
1065 flags = v & ~0xFFFF
1066 fmt = v & 0xFFFF
1067 if fmt == REVLOGV0:
1068 if flags:
1069 raise RevlogError(_('unknown flags (%#04x) in version %d '
1070 'revlog %s') %
1071 (flags >> 16, fmt, self.indexfile))
1072 elif fmt == REVLOGV1:
1073 if flags & ~REVLOGV1_FLAGS:
1074 raise RevlogError(_('unknown flags (%#04x) in version %d '
1075 'revlog %s') %
1076 (flags >> 16, fmt, self.indexfile))
1077 elif fmt == REVLOGV2:
1078 if flags & ~REVLOGV2_FLAGS:
1079 raise RevlogError(_('unknown flags (%#04x) in version %d '
1080 'revlog %s') %
1081 (flags >> 16, fmt, self.indexfile))
1082 else:
1083 raise RevlogError(_('unknown version (%d) in revlog %s') %
1084 (fmt, self.indexfile))
1085
1086 self._storedeltachains = True
1087
1088 self._io = revlogio()
1089 if self.version == REVLOGV0:
1090 self._io = revlogoldio()
1091 try:
1092 d = self._io.parseindex(indexdata, self._inline)
1093 except (ValueError, IndexError):
1094 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1095 self.index, nodemap, self._chunkcache = d
1096 if nodemap is not None:
1097 self.nodemap = self._nodecache = nodemap
1098 if not self._chunkcache:
1099 self._chunkclear()
1100 # revnum -> (chain-length, sum-delta-length)
1101 self._chaininfocache = {}
1102 # revlog header -> revlog compressor
1103 self._decompressors = {}
1104
1105 @util.propertycache
1106 def _compressor(self):
1107 return util.compengines[self._compengine].revlogcompressor()
1108
1109 def _indexfp(self, mode='r'):
1110 """file object for the revlog's index file"""
1111 args = {r'mode': mode}
1112 if mode != 'r':
1113 args[r'checkambig'] = self._checkambig
1114 if mode == 'w':
1115 args[r'atomictemp'] = True
1116 return self.opener(self.indexfile, **args)
1117
1118 def _datafp(self, mode='r'):
1119 """file object for the revlog's data file"""
1120 return self.opener(self.datafile, mode=mode)
1121
1122 @contextlib.contextmanager
1123 def _datareadfp(self, existingfp=None):
1124 """file object suitable to read data"""
1125 if existingfp is not None:
1126 yield existingfp
1127 else:
1128 if self._inline:
1129 func = self._indexfp
1130 else:
1131 func = self._datafp
1132 with func() as fp:
1133 yield fp
1134
1135 def tip(self):
1136 return self.node(len(self.index) - 1)
1137 def __contains__(self, rev):
1138 return 0 <= rev < len(self)
1139 def __len__(self):
1140 return len(self.index)
1141 def __iter__(self):
1142 return iter(pycompat.xrange(len(self)))
1143 def revs(self, start=0, stop=None):
1144 """iterate over all rev in this revlog (from start to stop)"""
1145 step = 1
1146 length = len(self)
1147 if stop is not None:
1148 if start > stop:
1149 step = -1
1150 stop += step
1151 if stop > length:
1152 stop = length
1153 else:
1154 stop = length
1155 return pycompat.xrange(start, stop, step)
1156
1157 @util.propertycache
1158 def nodemap(self):
1159 if self.index:
1160 # populate mapping down to the initial node
1161 node0 = self.index[0][7] # get around changelog filtering
1162 self.rev(node0)
1163 return self._nodecache
1164
1165 def hasnode(self, node):
1166 try:
1167 self.rev(node)
1168 return True
1169 except KeyError:
1170 return False
1171
1172 def candelta(self, baserev, rev):
1173 """whether two revisions (baserev, rev) can be delta-ed or not"""
1174 # Disable delta if either rev requires a content-changing flag
1175 # processor (ex. LFS). This is because such flag processor can alter
1176 # the rawtext content that the delta will be based on, and two clients
1177 # could have a same revlog node with different flags (i.e. different
1178 # rawtext contents) and the delta could be incompatible.
1179 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1180 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1181 return False
1182 return True
1183
1184 def clearcaches(self):
1185 self._cache = None
1186 self._chainbasecache.clear()
1187 self._chunkcache = (0, '')
1188 self._pcache = {}
1189
1190 try:
1191 self._nodecache.clearcaches()
1192 except AttributeError:
1193 self._nodecache = {nullid: nullrev}
1194 self._nodepos = None
1195
1196 def rev(self, node):
1197 try:
1198 return self._nodecache[node]
1199 except TypeError:
1200 raise
1201 except RevlogError:
1202 # parsers.c radix tree lookup failed
1203 if node == wdirid or node in wdirfilenodeids:
1204 raise error.WdirUnsupported
1205 raise LookupError(node, self.indexfile, _('no node'))
1206 except KeyError:
1207 # pure python cache lookup failed
1208 n = self._nodecache
1209 i = self.index
1210 p = self._nodepos
1211 if p is None:
1212 p = len(i) - 1
1213 else:
1214 assert p < len(i)
1215 for r in pycompat.xrange(p, -1, -1):
1216 v = i[r][7]
1217 n[v] = r
1218 if v == node:
1219 self._nodepos = r - 1
1220 return r
1221 if node == wdirid or node in wdirfilenodeids:
1222 raise error.WdirUnsupported
1223 raise LookupError(node, self.indexfile, _('no node'))
1224
1225 # Accessors for index entries.
1226
1227 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1228 # are flags.
1229 def start(self, rev):
1230 return int(self.index[rev][0] >> 16)
1231
1232 def flags(self, rev):
1233 return self.index[rev][0] & 0xFFFF
1234
1235 def length(self, rev):
1236 return self.index[rev][1]
1237
1238 def rawsize(self, rev):
1239 """return the length of the uncompressed text for a given revision"""
1240 l = self.index[rev][2]
1241 if l >= 0:
1242 return l
1243
1244 t = self.revision(rev, raw=True)
1245 return len(t)
1246
1247 def size(self, rev):
1248 """length of non-raw text (processed by a "read" flag processor)"""
1249 # fast path: if no "read" flag processor could change the content,
1250 # size is rawsize. note: ELLIPSIS is known to not change the content.
1251 flags = self.flags(rev)
1252 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1253 return self.rawsize(rev)
1254
1255 return len(self.revision(rev, raw=False))
1256
1257 def chainbase(self, rev):
1258 base = self._chainbasecache.get(rev)
1259 if base is not None:
1260 return base
1261
1262 index = self.index
1263 iterrev = rev
1264 base = index[iterrev][3]
1265 while base != iterrev:
1266 iterrev = base
1267 base = index[iterrev][3]
1268
1269 self._chainbasecache[rev] = base
1270 return base
1271
1272 def linkrev(self, rev):
1273 return self.index[rev][4]
1274
1275 def parentrevs(self, rev):
1276 try:
1277 entry = self.index[rev]
1278 except IndexError:
1279 if rev == wdirrev:
1280 raise error.WdirUnsupported
1281 raise
1282
1283 return entry[5], entry[6]
1284
1285 def node(self, rev):
1286 try:
1287 return self.index[rev][7]
1288 except IndexError:
1289 if rev == wdirrev:
1290 raise error.WdirUnsupported
1291 raise
1292
1293 # Derived from index values.
1294
1295 def end(self, rev):
1296 return self.start(rev) + self.length(rev)
1297
1298 def parents(self, node):
1299 i = self.index
1300 d = i[self.rev(node)]
1301 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1302
1303 def chainlen(self, rev):
1304 return self._chaininfo(rev)[0]
1305
1306 def _chaininfo(self, rev):
1307 chaininfocache = self._chaininfocache
1308 if rev in chaininfocache:
1309 return chaininfocache[rev]
1310 index = self.index
1311 generaldelta = self._generaldelta
1312 iterrev = rev
1313 e = index[iterrev]
1314 clen = 0
1315 compresseddeltalen = 0
1316 while iterrev != e[3]:
1317 clen += 1
1318 compresseddeltalen += e[1]
1319 if generaldelta:
1320 iterrev = e[3]
1321 else:
1322 iterrev -= 1
1323 if iterrev in chaininfocache:
1324 t = chaininfocache[iterrev]
1325 clen += t[0]
1326 compresseddeltalen += t[1]
1327 break
1328 e = index[iterrev]
1329 else:
1330 # Add text length of base since decompressing that also takes
1331 # work. For cache hits the length is already included.
1332 compresseddeltalen += e[1]
1333 r = (clen, compresseddeltalen)
1334 chaininfocache[rev] = r
1335 return r
1336
1337 def _deltachain(self, rev, stoprev=None):
1338 """Obtain the delta chain for a revision.
1339
1340 ``stoprev`` specifies a revision to stop at. If not specified, we
1341 stop at the base of the chain.
1342
1343 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1344 revs in ascending order and ``stopped`` is a bool indicating whether
1345 ``stoprev`` was hit.
1346 """
1347 # Try C implementation.
1348 try:
1349 return self.index.deltachain(rev, stoprev, self._generaldelta)
1350 except AttributeError:
1351 pass
1352
1353 chain = []
1354
1355 # Alias to prevent attribute lookup in tight loop.
1356 index = self.index
1357 generaldelta = self._generaldelta
1358
1359 iterrev = rev
1360 e = index[iterrev]
1361 while iterrev != e[3] and iterrev != stoprev:
1362 chain.append(iterrev)
1363 if generaldelta:
1364 iterrev = e[3]
1365 else:
1366 iterrev -= 1
1367 e = index[iterrev]
1368
1369 if iterrev == stoprev:
1370 stopped = True
1371 else:
1372 chain.append(iterrev)
1373 stopped = False
1374
1375 chain.reverse()
1376 return chain, stopped
1377
1378 def ancestors(self, revs, stoprev=0, inclusive=False):
1379 """Generate the ancestors of 'revs' in reverse topological order.
1380 Does not generate revs lower than stoprev.
1381
1382 See the documentation for ancestor.lazyancestors for more details."""
1383
1384 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1385 inclusive=inclusive)
1386
1387 def descendants(self, revs):
1388 """Generate the descendants of 'revs' in revision order.
1389
1390 Yield a sequence of revision numbers starting with a child of
1391 some rev in revs, i.e., each revision is *not* considered a
1392 descendant of itself. Results are ordered by revision number (a
1393 topological sort)."""
1394 first = min(revs)
1395 if first == nullrev:
1396 for i in self:
1397 yield i
1398 return
1399
1400 seen = set(revs)
1401 for i in self.revs(start=first + 1):
1402 for x in self.parentrevs(i):
1403 if x != nullrev and x in seen:
1404 seen.add(i)
1405 yield i
1406 break
1407
1408 def findcommonmissing(self, common=None, heads=None):
1409 """Return a tuple of the ancestors of common and the ancestors of heads
1410 that are not ancestors of common. In revset terminology, we return the
1411 tuple:
1412
1413 ::common, (::heads) - (::common)
1414
1415 The list is sorted by revision number, meaning it is
1416 topologically sorted.
1417
1418 'heads' and 'common' are both lists of node IDs. If heads is
1419 not supplied, uses all of the revlog's heads. If common is not
1420 supplied, uses nullid."""
1421 if common is None:
1422 common = [nullid]
1423 if heads is None:
1424 heads = self.heads()
1425
1426 common = [self.rev(n) for n in common]
1427 heads = [self.rev(n) for n in heads]
1428
1429 # we want the ancestors, but inclusive
1430 class lazyset(object):
1431 def __init__(self, lazyvalues):
1432 self.addedvalues = set()
1433 self.lazyvalues = lazyvalues
1434
1435 def __contains__(self, value):
1436 return value in self.addedvalues or value in self.lazyvalues
1437
1438 def __iter__(self):
1439 added = self.addedvalues
1440 for r in added:
1441 yield r
1442 for r in self.lazyvalues:
1443 if not r in added:
1444 yield r
1445
1446 def add(self, value):
1447 self.addedvalues.add(value)
1448
1449 def update(self, values):
1450 self.addedvalues.update(values)
1451
1452 has = lazyset(self.ancestors(common))
1453 has.add(nullrev)
1454 has.update(common)
1455
1456 # take all ancestors from heads that aren't in has
1457 missing = set()
1458 visit = collections.deque(r for r in heads if r not in has)
1459 while visit:
1460 r = visit.popleft()
1461 if r in missing:
1462 continue
1463 else:
1464 missing.add(r)
1465 for p in self.parentrevs(r):
1466 if p not in has:
1467 visit.append(p)
1468 missing = list(missing)
1469 missing.sort()
1470 return has, [self.node(miss) for miss in missing]
1471
1472 def incrementalmissingrevs(self, common=None):
1473 """Return an object that can be used to incrementally compute the
1474 revision numbers of the ancestors of arbitrary sets that are not
1475 ancestors of common. This is an ancestor.incrementalmissingancestors
1476 object.
1477
1478 'common' is a list of revision numbers. If common is not supplied, uses
1479 nullrev.
1480 """
1481 if common is None:
1482 common = [nullrev]
1483
1484 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1485
1486 def findmissingrevs(self, common=None, heads=None):
1487 """Return the revision numbers of the ancestors of heads that
1488 are not ancestors of common.
1489
1490 More specifically, return a list of revision numbers corresponding to
1491 nodes N such that every N satisfies the following constraints:
1492
1493 1. N is an ancestor of some node in 'heads'
1494 2. N is not an ancestor of any node in 'common'
1495
1496 The list is sorted by revision number, meaning it is
1497 topologically sorted.
1498
1499 'heads' and 'common' are both lists of revision numbers. If heads is
1500 not supplied, uses all of the revlog's heads. If common is not
1501 supplied, uses nullid."""
1502 if common is None:
1503 common = [nullrev]
1504 if heads is None:
1505 heads = self.headrevs()
1506
1507 inc = self.incrementalmissingrevs(common=common)
1508 return inc.missingancestors(heads)
1509
1510 def findmissing(self, common=None, heads=None):
1511 """Return the ancestors of heads that are not ancestors of common.
1512
1513 More specifically, return a list of nodes N such that every N
1514 satisfies the following constraints:
1515
1516 1. N is an ancestor of some node in 'heads'
1517 2. N is not an ancestor of any node in 'common'
1518
1519 The list is sorted by revision number, meaning it is
1520 topologically sorted.
1521
1522 'heads' and 'common' are both lists of node IDs. If heads is
1523 not supplied, uses all of the revlog's heads. If common is not
1524 supplied, uses nullid."""
1525 if common is None:
1526 common = [nullid]
1527 if heads is None:
1528 heads = self.heads()
1529
1530 common = [self.rev(n) for n in common]
1531 heads = [self.rev(n) for n in heads]
1532
1533 inc = self.incrementalmissingrevs(common=common)
1534 return [self.node(r) for r in inc.missingancestors(heads)]
1535
1536 def nodesbetween(self, roots=None, heads=None):
1537 """Return a topological path from 'roots' to 'heads'.
1538
1539 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1540 topologically sorted list of all nodes N that satisfy both of
1541 these constraints:
1542
1543 1. N is a descendant of some node in 'roots'
1544 2. N is an ancestor of some node in 'heads'
1545
1546 Every node is considered to be both a descendant and an ancestor
1547 of itself, so every reachable node in 'roots' and 'heads' will be
1548 included in 'nodes'.
1549
1550 'outroots' is the list of reachable nodes in 'roots', i.e., the
1551 subset of 'roots' that is returned in 'nodes'. Likewise,
1552 'outheads' is the subset of 'heads' that is also in 'nodes'.
1553
1554 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1555 unspecified, uses nullid as the only root. If 'heads' is
1556 unspecified, uses list of all of the revlog's heads."""
1557 nonodes = ([], [], [])
1558 if roots is not None:
1559 roots = list(roots)
1560 if not roots:
1561 return nonodes
1562 lowestrev = min([self.rev(n) for n in roots])
1563 else:
1564 roots = [nullid] # Everybody's a descendant of nullid
1565 lowestrev = nullrev
1566 if (lowestrev == nullrev) and (heads is None):
1567 # We want _all_ the nodes!
1568 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1569 if heads is None:
1570 # All nodes are ancestors, so the latest ancestor is the last
1571 # node.
1572 highestrev = len(self) - 1
1573 # Set ancestors to None to signal that every node is an ancestor.
1574 ancestors = None
1575 # Set heads to an empty dictionary for later discovery of heads
1576 heads = {}
1577 else:
1578 heads = list(heads)
1579 if not heads:
1580 return nonodes
1581 ancestors = set()
1582 # Turn heads into a dictionary so we can remove 'fake' heads.
1583 # Also, later we will be using it to filter out the heads we can't
1584 # find from roots.
1585 heads = dict.fromkeys(heads, False)
1586 # Start at the top and keep marking parents until we're done.
1587 nodestotag = set(heads)
1588 # Remember where the top was so we can use it as a limit later.
1589 highestrev = max([self.rev(n) for n in nodestotag])
1590 while nodestotag:
1591 # grab a node to tag
1592 n = nodestotag.pop()
1593 # Never tag nullid
1594 if n == nullid:
1595 continue
1596 # A node's revision number represents its place in a
1597 # topologically sorted list of nodes.
1598 r = self.rev(n)
1599 if r >= lowestrev:
1600 if n not in ancestors:
1601 # If we are possibly a descendant of one of the roots
1602 # and we haven't already been marked as an ancestor
1603 ancestors.add(n) # Mark as ancestor
1604 # Add non-nullid parents to list of nodes to tag.
1605 nodestotag.update([p for p in self.parents(n) if
1606 p != nullid])
1607 elif n in heads: # We've seen it before, is it a fake head?
1608 # So it is, real heads should not be the ancestors of
1609 # any other heads.
1610 heads.pop(n)
1611 if not ancestors:
1612 return nonodes
1613 # Now that we have our set of ancestors, we want to remove any
1614 # roots that are not ancestors.
1615
1616 # If one of the roots was nullid, everything is included anyway.
1617 if lowestrev > nullrev:
1618 # But, since we weren't, let's recompute the lowest rev to not
1619 # include roots that aren't ancestors.
1620
1621 # Filter out roots that aren't ancestors of heads
1622 roots = [root for root in roots if root in ancestors]
1623 # Recompute the lowest revision
1624 if roots:
1625 lowestrev = min([self.rev(root) for root in roots])
1626 else:
1627 # No more roots? Return empty list
1628 return nonodes
1629 else:
1630 # We are descending from nullid, and don't need to care about
1631 # any other roots.
1632 lowestrev = nullrev
1633 roots = [nullid]
1634 # Transform our roots list into a set.
1635 descendants = set(roots)
1636 # Also, keep the original roots so we can filter out roots that aren't
1637 # 'real' roots (i.e. are descended from other roots).
1638 roots = descendants.copy()
1639 # Our topologically sorted list of output nodes.
1640 orderedout = []
1641 # Don't start at nullid since we don't want nullid in our output list,
1642 # and if nullid shows up in descendants, empty parents will look like
1643 # they're descendants.
1644 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1645 n = self.node(r)
1646 isdescendant = False
1647 if lowestrev == nullrev: # Everybody is a descendant of nullid
1648 isdescendant = True
1649 elif n in descendants:
1650 # n is already a descendant
1651 isdescendant = True
1652 # This check only needs to be done here because all the roots
1653 # will start being marked is descendants before the loop.
1654 if n in roots:
1655 # If n was a root, check if it's a 'real' root.
1656 p = tuple(self.parents(n))
1657 # If any of its parents are descendants, it's not a root.
1658 if (p[0] in descendants) or (p[1] in descendants):
1659 roots.remove(n)
1660 else:
1661 p = tuple(self.parents(n))
1662 # A node is a descendant if either of its parents are
1663 # descendants. (We seeded the dependents list with the roots
1664 # up there, remember?)
1665 if (p[0] in descendants) or (p[1] in descendants):
1666 descendants.add(n)
1667 isdescendant = True
1668 if isdescendant and ((ancestors is None) or (n in ancestors)):
1669 # Only include nodes that are both descendants and ancestors.
1670 orderedout.append(n)
1671 if (ancestors is not None) and (n in heads):
1672 # We're trying to figure out which heads are reachable
1673 # from roots.
1674 # Mark this head as having been reached
1675 heads[n] = True
1676 elif ancestors is None:
1677 # Otherwise, we're trying to discover the heads.
1678 # Assume this is a head because if it isn't, the next step
1679 # will eventually remove it.
1680 heads[n] = True
1681 # But, obviously its parents aren't.
1682 for p in self.parents(n):
1683 heads.pop(p, None)
1684 heads = [head for head, flag in heads.iteritems() if flag]
1685 roots = list(roots)
1686 assert orderedout
1687 assert roots
1688 assert heads
1689 return (orderedout, roots, heads)
1690
1691 def headrevs(self):
1692 try:
1693 return self.index.headrevs()
1694 except AttributeError:
1695 return self._headrevs()
1696
1697 def computephases(self, roots):
1698 return self.index.computephasesmapsets(roots)
1699
1700 def _headrevs(self):
1701 count = len(self)
1702 if not count:
1703 return [nullrev]
1704 # we won't iter over filtered rev so nobody is a head at start
1705 ishead = [0] * (count + 1)
1706 index = self.index
1707 for r in self:
1708 ishead[r] = 1 # I may be an head
1709 e = index[r]
1710 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1711 return [r for r, val in enumerate(ishead) if val]
1712
1713 def heads(self, start=None, stop=None):
1714 """return the list of all nodes that have no children
1715
1716 if start is specified, only heads that are descendants of
1717 start will be returned
1718 if stop is specified, it will consider all the revs from stop
1719 as if they had no children
1720 """
1721 if start is None and stop is None:
1722 if not len(self):
1723 return [nullid]
1724 return [self.node(r) for r in self.headrevs()]
1725
1726 if start is None:
1727 start = nullid
1728 if stop is None:
1729 stop = []
1730 stoprevs = set([self.rev(n) for n in stop])
1731 startrev = self.rev(start)
1732 reachable = {startrev}
1733 heads = {startrev}
1734
1735 parentrevs = self.parentrevs
1736 for r in self.revs(start=startrev + 1):
1737 for p in parentrevs(r):
1738 if p in reachable:
1739 if r not in stoprevs:
1740 reachable.add(r)
1741 heads.add(r)
1742 if p in heads and p not in stoprevs:
1743 heads.remove(p)
1744
1745 return [self.node(r) for r in heads]
1746
1747 def children(self, node):
1748 """find the children of a given node"""
1749 c = []
1750 p = self.rev(node)
1751 for r in self.revs(start=p + 1):
1752 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1753 if prevs:
1754 for pr in prevs:
1755 if pr == p:
1756 c.append(self.node(r))
1757 elif p == nullrev:
1758 c.append(self.node(r))
1759 return c
1760
1761 def commonancestorsheads(self, a, b):
1762 """calculate all the heads of the common ancestors of nodes a and b"""
1763 a, b = self.rev(a), self.rev(b)
1764 ancs = self._commonancestorsheads(a, b)
1765 return pycompat.maplist(self.node, ancs)
1766
1767 def _commonancestorsheads(self, *revs):
1768 """calculate all the heads of the common ancestors of revs"""
1769 try:
1770 ancs = self.index.commonancestorsheads(*revs)
1771 except (AttributeError, OverflowError): # C implementation failed
1772 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1773 return ancs
1774
1775 def isancestor(self, a, b):
1776 """return True if node a is an ancestor of node b
1777
1778 A revision is considered an ancestor of itself."""
1779 a, b = self.rev(a), self.rev(b)
1780 return self.isancestorrev(a, b)
1781
1782 def isancestorrev(self, a, b):
1783 """return True if revision a is an ancestor of revision b
1784
1785 A revision is considered an ancestor of itself.
1786
1787 The implementation of this is trivial but the use of
1788 commonancestorsheads is not."""
1789 if a == nullrev:
1790 return True
1791 elif a == b:
1792 return True
1793 elif a > b:
1794 return False
1795 return a in self._commonancestorsheads(a, b)
1796
1797 def ancestor(self, a, b):
1798 """calculate the "best" common ancestor of nodes a and b"""
1799
1800 a, b = self.rev(a), self.rev(b)
1801 try:
1802 ancs = self.index.ancestors(a, b)
1803 except (AttributeError, OverflowError):
1804 ancs = ancestor.ancestors(self.parentrevs, a, b)
1805 if ancs:
1806 # choose a consistent winner when there's a tie
1807 return min(map(self.node, ancs))
1808 return nullid
1809
1810 def _match(self, id):
1811 if isinstance(id, int):
1812 # rev
1813 return self.node(id)
1814 if len(id) == 20:
1815 # possibly a binary node
1816 # odds of a binary node being all hex in ASCII are 1 in 10**25
1817 try:
1818 node = id
1819 self.rev(node) # quick search the index
1820 return node
1821 except LookupError:
1822 pass # may be partial hex id
1823 try:
1824 # str(rev)
1825 rev = int(id)
1826 if "%d" % rev != id:
1827 raise ValueError
1828 if rev < 0:
1829 rev = len(self) + rev
1830 if rev < 0 or rev >= len(self):
1831 raise ValueError
1832 return self.node(rev)
1833 except (ValueError, OverflowError):
1834 pass
1835 if len(id) == 40:
1836 try:
1837 # a full hex nodeid?
1838 node = bin(id)
1839 self.rev(node)
1840 return node
1841 except (TypeError, LookupError):
1842 pass
1843
1844 def _partialmatch(self, id):
1845 # we don't care wdirfilenodeids as they should be always full hash
1846 maybewdir = wdirhex.startswith(id)
1847 try:
1848 partial = self.index.partialmatch(id)
1849 if partial and self.hasnode(partial):
1850 if maybewdir:
1851 # single 'ff...' match in radix tree, ambiguous with wdir
1852 raise RevlogError
1853 return partial
1854 if maybewdir:
1855 # no 'ff...' match in radix tree, wdir identified
1856 raise error.WdirUnsupported
1857 return None
1858 except RevlogError:
1859 # parsers.c radix tree lookup gave multiple matches
1860 # fast path: for unfiltered changelog, radix tree is accurate
1861 if not getattr(self, 'filteredrevs', None):
1862 raise AmbiguousPrefixLookupError(id, self.indexfile,
1863 _('ambiguous identifier'))
1864 # fall through to slow path that filters hidden revisions
1865 except (AttributeError, ValueError):
1866 # we are pure python, or key was too short to search radix tree
1867 pass
1868
1869 if id in self._pcache:
1870 return self._pcache[id]
1871
1872 if len(id) <= 40:
1873 try:
1874 # hex(node)[:...]
1875 l = len(id) // 2 # grab an even number of digits
1876 prefix = bin(id[:l * 2])
1877 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1878 nl = [n for n in nl if hex(n).startswith(id) and
1879 self.hasnode(n)]
1880 if nullhex.startswith(id):
1881 nl.append(nullid)
1882 if len(nl) > 0:
1883 if len(nl) == 1 and not maybewdir:
1884 self._pcache[id] = nl[0]
1885 return nl[0]
1886 raise AmbiguousPrefixLookupError(id, self.indexfile,
1887 _('ambiguous identifier'))
1888 if maybewdir:
1889 raise error.WdirUnsupported
1890 return None
1891 except TypeError:
1892 pass
1893
1894 def lookup(self, id):
1895 """locate a node based on:
1896 - revision number or str(revision number)
1897 - nodeid or subset of hex nodeid
1898 """
1899 n = self._match(id)
1900 if n is not None:
1901 return n
1902 n = self._partialmatch(id)
1903 if n:
1904 return n
1905
1906 raise LookupError(id, self.indexfile, _('no match found'))
1907
1908 def shortest(self, node, minlength=1):
1909 """Find the shortest unambiguous prefix that matches node."""
1910 def isvalid(prefix):
1911 try:
1912 node = self._partialmatch(prefix)
1913 except error.RevlogError:
1914 return False
1915 except error.WdirUnsupported:
1916 # single 'ff...' match
1917 return True
1918 if node is None:
1919 raise LookupError(node, self.indexfile, _('no node'))
1920 return True
1921
1922 def maybewdir(prefix):
1923 return all(c == 'f' for c in prefix)
1924
1925 hexnode = hex(node)
1926
1927 def disambiguate(hexnode, minlength):
1928 """Disambiguate against wdirid."""
1929 for length in range(minlength, 41):
1930 prefix = hexnode[:length]
1931 if not maybewdir(prefix):
1932 return prefix
1933
1934 if not getattr(self, 'filteredrevs', None):
1935 try:
1936 length = max(self.index.shortest(node), minlength)
1937 return disambiguate(hexnode, length)
1938 except RevlogError:
1939 if node != wdirid:
1940 raise LookupError(node, self.indexfile, _('no node'))
1941 except AttributeError:
1942 # Fall through to pure code
1943 pass
1944
1945 if node == wdirid:
1946 for length in range(minlength, 41):
1947 prefix = hexnode[:length]
1948 if isvalid(prefix):
1949 return prefix
1950
1951 for length in range(minlength, 41):
1952 prefix = hexnode[:length]
1953 if isvalid(prefix):
1954 return disambiguate(hexnode, length)
1955
1956 def cmp(self, node, text):
1957 """compare text with a given file revision
1958
1959 returns True if text is different than what is stored.
1960 """
1961 p1, p2 = self.parents(node)
1962 return hash(text, p1, p2) != node
1963
1964 def _cachesegment(self, offset, data):
1965 """Add a segment to the revlog cache.
1966
1967 Accepts an absolute offset and the data that is at that location.
1968 """
1969 o, d = self._chunkcache
1970 # try to add to existing cache
1971 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1972 self._chunkcache = o, d + data
1973 else:
1974 self._chunkcache = offset, data
1975
1976 def _readsegment(self, offset, length, df=None):
1977 """Load a segment of raw data from the revlog.
1978
1979 Accepts an absolute offset, length to read, and an optional existing
1980 file handle to read from.
1981
1982 If an existing file handle is passed, it will be seeked and the
1983 original seek position will NOT be restored.
1984
1985 Returns a str or buffer of raw byte data.
1986 """
1987 # Cache data both forward and backward around the requested
1988 # data, in a fixed size window. This helps speed up operations
1989 # involving reading the revlog backwards.
1990 cachesize = self._chunkcachesize
1991 realoffset = offset & ~(cachesize - 1)
1992 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1993 - realoffset)
1994 with self._datareadfp(df) as df:
1995 df.seek(realoffset)
1996 d = df.read(reallength)
1997 self._cachesegment(realoffset, d)
1998 if offset != realoffset or reallength != length:
1999 return util.buffer(d, offset - realoffset, length)
2000 return d
2001
2002 def _getsegment(self, offset, length, df=None):
2003 """Obtain a segment of raw data from the revlog.
2004
2005 Accepts an absolute offset, length of bytes to obtain, and an
2006 optional file handle to the already-opened revlog. If the file
2007 handle is used, it's original seek position will not be preserved.
2008
2009 Requests for data may be returned from a cache.
2010
2011 Returns a str or a buffer instance of raw byte data.
2012 """
2013 o, d = self._chunkcache
2014 l = len(d)
2015
2016 # is it in the cache?
2017 cachestart = offset - o
2018 cacheend = cachestart + length
2019 if cachestart >= 0 and cacheend <= l:
2020 if cachestart == 0 and cacheend == l:
2021 return d # avoid a copy
2022 return util.buffer(d, cachestart, cacheend - cachestart)
2023
2024 return self._readsegment(offset, length, df=df)
2025
2026 def _getsegmentforrevs(self, startrev, endrev, df=None):
2027 """Obtain a segment of raw data corresponding to a range of revisions.
2028
2029 Accepts the start and end revisions and an optional already-open
2030 file handle to be used for reading. If the file handle is read, its
2031 seek position will not be preserved.
2032
2033 Requests for data may be satisfied by a cache.
2034
2035 Returns a 2-tuple of (offset, data) for the requested range of
2036 revisions. Offset is the integer offset from the beginning of the
2037 revlog and data is a str or buffer of the raw byte data.
2038
2039 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2040 to determine where each revision's data begins and ends.
2041 """
2042 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2043 # (functions are expensive).
2044 index = self.index
2045 istart = index[startrev]
2046 start = int(istart[0] >> 16)
2047 if startrev == endrev:
2048 end = start + istart[1]
2049 else:
2050 iend = index[endrev]
2051 end = int(iend[0] >> 16) + iend[1]
2052
2053 if self._inline:
2054 start += (startrev + 1) * self._io.size
2055 end += (endrev + 1) * self._io.size
2056 length = end - start
2057
2058 return start, self._getsegment(start, length, df=df)
2059
2060 def _chunk(self, rev, df=None):
2061 """Obtain a single decompressed chunk for a revision.
2062
2063 Accepts an integer revision and an optional already-open file handle
2064 to be used for reading. If used, the seek position of the file will not
2065 be preserved.
2066
2067 Returns a str holding uncompressed data for the requested revision.
2068 """
2069 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2070
2071 def _chunks(self, revs, df=None, targetsize=None):
2072 """Obtain decompressed chunks for the specified revisions.
2073
2074 Accepts an iterable of numeric revisions that are assumed to be in
2075 ascending order. Also accepts an optional already-open file handle
2076 to be used for reading. If used, the seek position of the file will
2077 not be preserved.
2078
2079 This function is similar to calling ``self._chunk()`` multiple times,
2080 but is faster.
2081
2082 Returns a list with decompressed data for each requested revision.
2083 """
2084 if not revs:
2085 return []
2086 start = self.start
2087 length = self.length
2088 inline = self._inline
2089 iosize = self._io.size
2090 buffer = util.buffer
2091
2092 l = []
2093 ladd = l.append
2094
2095 if not self._withsparseread:
2096 slicedchunks = (revs,)
2097 else:
2098 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2099
2100 for revschunk in slicedchunks:
2101 firstrev = revschunk[0]
2102 # Skip trailing revisions with empty diff
2103 for lastrev in revschunk[::-1]:
2104 if length(lastrev) != 0:
2105 break
2106
2107 try:
2108 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2109 except OverflowError:
2110 # issue4215 - we can't cache a run of chunks greater than
2111 # 2G on Windows
2112 return [self._chunk(rev, df=df) for rev in revschunk]
2113
2114 decomp = self.decompress
2115 for rev in revschunk:
2116 chunkstart = start(rev)
2117 if inline:
2118 chunkstart += (rev + 1) * iosize
2119 chunklength = length(rev)
2120 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2121
2122 return l
2123
2124 def _chunkclear(self):
2125 """Clear the raw chunk cache."""
2126 self._chunkcache = (0, '')
2127
2128 def deltaparent(self, rev):
2129 """return deltaparent of the given revision"""
2130 base = self.index[rev][3]
2131 if base == rev:
2132 return nullrev
2133 elif self._generaldelta:
2134 return base
2135 else:
2136 return rev - 1
2137
2138 def issnapshot(self, rev):
2139 """tells whether rev is a snapshot
2140 """
2141 if rev == nullrev:
2142 return True
2143 deltap = self.deltaparent(rev)
2144 if deltap == nullrev:
2145 return True
2146 p1, p2 = self.parentrevs(rev)
2147 if deltap in (p1, p2):
2148 return False
2149 return self.issnapshot(deltap)
2150
2151 def snapshotdepth(self, rev):
2152 """number of snapshot in the chain before this one"""
2153 if not self.issnapshot(rev):
2154 raise ProgrammingError('revision %d not a snapshot')
2155 return len(self._deltachain(rev)[0]) - 1
2156
2157 def revdiff(self, rev1, rev2):
2158 """return or calculate a delta between two revisions
2159
2160 The delta calculated is in binary form and is intended to be written to
2161 revlog data directly. So this function needs raw revision data.
2162 """
2163 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2164 return bytes(self._chunk(rev2))
2165
2166 return mdiff.textdiff(self.revision(rev1, raw=True),
2167 self.revision(rev2, raw=True))
2168
2169 def revision(self, nodeorrev, _df=None, raw=False):
2170 """return an uncompressed revision of a given node or revision
2171 number.
2172
2173 _df - an existing file handle to read from. (internal-only)
2174 raw - an optional argument specifying if the revision data is to be
2175 treated as raw data when applying flag transforms. 'raw' should be set
2176 to True when generating changegroups or in debug commands.
2177 """
2178 if isinstance(nodeorrev, int):
2179 rev = nodeorrev
2180 node = self.node(rev)
2181 else:
2182 node = nodeorrev
2183 rev = None
2184
2185 cachedrev = None
2186 flags = None
2187 rawtext = None
2188 if node == nullid:
2189 return ""
2190 if self._cache:
2191 if self._cache[0] == node:
2192 # _cache only stores rawtext
2193 if raw:
2194 return self._cache[2]
2195 # duplicated, but good for perf
2196 if rev is None:
2197 rev = self.rev(node)
2198 if flags is None:
2199 flags = self.flags(rev)
2200 # no extra flags set, no flag processor runs, text = rawtext
2201 if flags == REVIDX_DEFAULT_FLAGS:
2202 return self._cache[2]
2203 # rawtext is reusable. need to run flag processor
2204 rawtext = self._cache[2]
2205
2206 cachedrev = self._cache[1]
2207
2208 # look up what we need to read
2209 if rawtext is None:
2210 if rev is None:
2211 rev = self.rev(node)
2212
2213 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2214 if stopped:
2215 rawtext = self._cache[2]
2216
2217 # drop cache to save memory
2218 self._cache = None
2219
2220 targetsize = None
2221 rawsize = self.index[rev][2]
2222 if 0 <= rawsize:
2223 targetsize = 4 * rawsize
2224
2225 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2226 if rawtext is None:
2227 rawtext = bytes(bins[0])
2228 bins = bins[1:]
2229
2230 rawtext = mdiff.patches(rawtext, bins)
2231 self._cache = (node, rev, rawtext)
2232
2233 if flags is None:
2234 if rev is None:
2235 rev = self.rev(node)
2236 flags = self.flags(rev)
2237
2238 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2239 if validatehash:
2240 self.checkhash(text, node, rev=rev)
2241
2242 return text
2243
2244 def hash(self, text, p1, p2):
2245 """Compute a node hash.
2246
2247 Available as a function so that subclasses can replace the hash
2248 as needed.
2249 """
2250 return hash(text, p1, p2)
2251
2252 def _processflags(self, text, flags, operation, raw=False):
2253 """Inspect revision data flags and applies transforms defined by
2254 registered flag processors.
2255
2256 ``text`` - the revision data to process
2257 ``flags`` - the revision flags
2258 ``operation`` - the operation being performed (read or write)
2259 ``raw`` - an optional argument describing if the raw transform should be
2260 applied.
2261
2262 This method processes the flags in the order (or reverse order if
2263 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2264 flag processors registered for present flags. The order of flags defined
2265 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2266
2267 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2268 processed text and ``validatehash`` is a bool indicating whether the
2269 returned text should be checked for hash integrity.
2270
2271 Note: If the ``raw`` argument is set, it has precedence over the
2272 operation and will only update the value of ``validatehash``.
2273 """
2274 # fast path: no flag processors will run
2275 if flags == 0:
2276 return text, True
2277 if not operation in ('read', 'write'):
2278 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2279 # Check all flags are known.
2280 if flags & ~REVIDX_KNOWN_FLAGS:
2281 raise RevlogError(_("incompatible revision flag '%#x'") %
2282 (flags & ~REVIDX_KNOWN_FLAGS))
2283 validatehash = True
2284 # Depending on the operation (read or write), the order might be
2285 # reversed due to non-commutative transforms.
2286 orderedflags = REVIDX_FLAGS_ORDER
2287 if operation == 'write':
2288 orderedflags = reversed(orderedflags)
2289
2290 for flag in orderedflags:
2291 # If a flagprocessor has been registered for a known flag, apply the
2292 # related operation transform and update result tuple.
2293 if flag & flags:
2294 vhash = True
2295
2296 if flag not in _flagprocessors:
2297 message = _("missing processor for flag '%#x'") % (flag)
2298 raise RevlogError(message)
2299
2300 processor = _flagprocessors[flag]
2301 if processor is not None:
2302 readtransform, writetransform, rawtransform = processor
2303
2304 if raw:
2305 vhash = rawtransform(self, text)
2306 elif operation == 'read':
2307 text, vhash = readtransform(self, text)
2308 else: # write operation
2309 text, vhash = writetransform(self, text)
2310 validatehash = validatehash and vhash
2311
2312 return text, validatehash
2313
2314 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2315 """Check node hash integrity.
2316
2317 Available as a function so that subclasses can extend hash mismatch
2318 behaviors as needed.
2319 """
2320 try:
2321 if p1 is None and p2 is None:
2322 p1, p2 = self.parents(node)
2323 if node != self.hash(text, p1, p2):
2324 revornode = rev
2325 if revornode is None:
2326 revornode = templatefilters.short(hex(node))
2327 raise RevlogError(_("integrity check failed on %s:%s")
2328 % (self.indexfile, pycompat.bytestr(revornode)))
2329 except RevlogError:
2330 if self._censorable and _censoredtext(text):
2331 raise error.CensoredNodeError(self.indexfile, node, text)
2332 raise
2333
2334 def _enforceinlinesize(self, tr, fp=None):
2335 """Check if the revlog is too big for inline and convert if so.
2336
2337 This should be called after revisions are added to the revlog. If the
2338 revlog has grown too large to be an inline revlog, it will convert it
2339 to use multiple index and data files.
2340 """
2341 tiprev = len(self) - 1
2342 if (not self._inline or
2343 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2344 return
2345
2346 trinfo = tr.find(self.indexfile)
2347 if trinfo is None:
2348 raise RevlogError(_("%s not found in the transaction")
2349 % self.indexfile)
2350
2351 trindex = trinfo[2]
2352 if trindex is not None:
2353 dataoff = self.start(trindex)
2354 else:
2355 # revlog was stripped at start of transaction, use all leftover data
2356 trindex = len(self) - 1
2357 dataoff = self.end(tiprev)
2358
2359 tr.add(self.datafile, dataoff)
2360
2361 if fp:
2362 fp.flush()
2363 fp.close()
2364
2365 with self._datafp('w') as df:
2366 for r in self:
2367 df.write(self._getsegmentforrevs(r, r)[1])
2368
2369 with self._indexfp('w') as fp:
2370 self.version &= ~FLAG_INLINE_DATA
2371 self._inline = False
2372 io = self._io
2373 for i in self:
2374 e = io.packentry(self.index[i], self.node, self.version, i)
2375 fp.write(e)
2376
2377 # the temp file replace the real index when we exit the context
2378 # manager
2379
2380 tr.replace(self.indexfile, trindex * self._io.size)
2381 self._chunkclear()
2382
2383 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2384 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2385 """add a revision to the log
2386
2387 text - the revision data to add
2388 transaction - the transaction object used for rollback
2389 link - the linkrev data to add
2390 p1, p2 - the parent nodeids of the revision
2391 cachedelta - an optional precomputed delta
2392 node - nodeid of revision; typically node is not specified, and it is
2393 computed by default as hash(text, p1, p2), however subclasses might
2394 use different hashing method (and override checkhash() in such case)
2395 flags - the known flags to set on the revision
2396 deltacomputer - an optional _deltacomputer instance shared between
2397 multiple calls
2398 """
2399 if link == nullrev:
2400 raise RevlogError(_("attempted to add linkrev -1 to %s")
2401 % self.indexfile)
2402
2403 if flags:
2404 node = node or self.hash(text, p1, p2)
2405
2406 rawtext, validatehash = self._processflags(text, flags, 'write')
2407
2408 # If the flag processor modifies the revision data, ignore any provided
2409 # cachedelta.
2410 if rawtext != text:
2411 cachedelta = None
2412
2413 if len(rawtext) > _maxentrysize:
2414 raise RevlogError(
2415 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2416 % (self.indexfile, len(rawtext)))
2417
2418 node = node or self.hash(rawtext, p1, p2)
2419 if node in self.nodemap:
2420 return node
2421
2422 if validatehash:
2423 self.checkhash(rawtext, node, p1=p1, p2=p2)
2424
2425 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2426 flags, cachedelta=cachedelta,
2427 deltacomputer=deltacomputer)
2428
2429 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2430 cachedelta=None, deltacomputer=None):
2431 """add a raw revision with known flags, node and parents
2432 useful when reusing a revision not stored in this revlog (ex: received
2433 over wire, or read from an external bundle).
2434 """
2435 dfh = None
2436 if not self._inline:
2437 dfh = self._datafp("a+")
2438 ifh = self._indexfp("a+")
2439 try:
2440 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2441 flags, cachedelta, ifh, dfh,
2442 deltacomputer=deltacomputer)
2443 finally:
2444 if dfh:
2445 dfh.close()
2446 ifh.close()
2447
2448 def compress(self, data):
2449 """Generate a possibly-compressed representation of data."""
2450 if not data:
2451 return '', data
2452
2453 compressed = self._compressor.compress(data)
2454
2455 if compressed:
2456 # The revlog compressor added the header in the returned data.
2457 return '', compressed
2458
2459 if data[0:1] == '\0':
2460 return '', data
2461 return 'u', data
2462
2463 def decompress(self, data):
2464 """Decompress a revlog chunk.
2465
2466 The chunk is expected to begin with a header identifying the
2467 format type so it can be routed to an appropriate decompressor.
2468 """
2469 if not data:
2470 return data
2471
2472 # Revlogs are read much more frequently than they are written and many
2473 # chunks only take microseconds to decompress, so performance is
2474 # important here.
2475 #
2476 # We can make a few assumptions about revlogs:
2477 #
2478 # 1) the majority of chunks will be compressed (as opposed to inline
2479 # raw data).
2480 # 2) decompressing *any* data will likely by at least 10x slower than
2481 # returning raw inline data.
2482 # 3) we want to prioritize common and officially supported compression
2483 # engines
2484 #
2485 # It follows that we want to optimize for "decompress compressed data
2486 # when encoded with common and officially supported compression engines"
2487 # case over "raw data" and "data encoded by less common or non-official
2488 # compression engines." That is why we have the inline lookup first
2489 # followed by the compengines lookup.
2490 #
2491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2492 # compressed chunks. And this matters for changelog and manifest reads.
2493 t = data[0:1]
2494
2495 if t == 'x':
2496 try:
2497 return _zlibdecompress(data)
2498 except zlib.error as e:
2499 raise RevlogError(_('revlog decompress error: %s') %
2500 stringutil.forcebytestr(e))
2501 # '\0' is more common than 'u' so it goes first.
2502 elif t == '\0':
2503 return data
2504 elif t == 'u':
2505 return util.buffer(data, 1)
2506
2507 try:
2508 compressor = self._decompressors[t]
2509 except KeyError:
2510 try:
2511 engine = util.compengines.forrevlogheader(t)
2512 compressor = engine.revlogcompressor()
2513 self._decompressors[t] = compressor
2514 except KeyError:
2515 raise RevlogError(_('unknown compression type %r') % t)
2516
2517 return compressor.decompress(data)
2518
2519 def _isgooddeltainfo(self, deltainfo, revinfo):
2520 """Returns True if the given delta is good. Good means that it is within
2521 the disk span, disk size, and chain length bounds that we know to be
2522 performant."""
2523 if deltainfo is None:
2524 return False
2525
2526 # - 'deltainfo.distance' is the distance from the base revision --
2527 # bounding it limits the amount of I/O we need to do.
2528 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2529 # deltas we need to apply -- bounding it limits the amount of CPU
2530 # we consume.
2531
2532 if self._sparserevlog:
2533 # As sparse-read will be used, we can consider that the distance,
2534 # instead of being the span of the whole chunk,
2535 # is the span of the largest read chunk
2536 base = deltainfo.base
2537
2538 if base != nullrev:
2539 deltachain = self._deltachain(base)[0]
2540 else:
2541 deltachain = []
2542
2543 # search for the first non-snapshot revision
2544 for idx, r in enumerate(deltachain):
2545 if not self.issnapshot(r):
2546 break
2547 deltachain = deltachain[idx:]
2548 chunks = _slicechunk(self, deltachain, deltainfo)
2549 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2550 distance = max(all_span)
2551 else:
2552 distance = deltainfo.distance
2553
2554 textlen = revinfo.textlen
2555 defaultmax = textlen * 4
2556 maxdist = self._maxdeltachainspan
2557 if not maxdist:
2558 maxdist = distance # ensure the conditional pass
2559 maxdist = max(maxdist, defaultmax)
2560 if self._sparserevlog and maxdist < self._srmingapsize:
2561 # In multiple place, we are ignoring irrelevant data range below a
2562 # certain size. Be also apply this tradeoff here and relax span
2563 # constraint for small enought content.
2564 maxdist = self._srmingapsize
2565
2566 # Bad delta from read span:
2567 #
2568 # If the span of data read is larger than the maximum allowed.
2569 if maxdist < distance:
2570 return False
2571
2572 # Bad delta from new delta size:
2573 #
2574 # If the delta size is larger than the target text, storing the
2575 # delta will be inefficient.
2576 if textlen < deltainfo.deltalen:
2577 return False
2578
2579 # Bad delta from cumulated payload size:
2580 #
2581 # If the sum of delta get larger than K * target text length.
2582 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2583 return False
2584
2585 # Bad delta from chain length:
2586 #
2587 # If the number of delta in the chain gets too high.
2588 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2589 return False
2590
2591 # bad delta from intermediate snapshot size limit
2592 #
2593 # If an intermediate snapshot size is higher than the limit. The
2594 # limit exist to prevent endless chain of intermediate delta to be
2595 # created.
2596 if (deltainfo.snapshotdepth is not None and
2597 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2598 return False
2599
2600 # bad delta if new intermediate snapshot is larger than the previous
2601 # snapshot
2602 if (deltainfo.snapshotdepth
2603 and self.length(deltainfo.base) < deltainfo.deltalen):
2604 return False
2605
2606 return True
2607
2608 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2609 cachedelta, ifh, dfh, alwayscache=False,
2610 deltacomputer=None):
2611 """internal function to add revisions to the log
2612
2613 see addrevision for argument descriptions.
2614
2615 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2616
2617 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2618 be used.
2619
2620 invariants:
2621 - rawtext is optional (can be None); if not set, cachedelta must be set.
2622 if both are set, they must correspond to each other.
2623 """
2624 if node == nullid:
2625 raise RevlogError(_("%s: attempt to add null revision") %
2626 (self.indexfile))
2627 if node == wdirid or node in wdirfilenodeids:
2628 raise RevlogError(_("%s: attempt to add wdir revision") %
2629 (self.indexfile))
2630
2631 if self._inline:
2632 fh = ifh
2633 else:
2634 fh = dfh
2635
2636 btext = [rawtext]
2637
2638 curr = len(self)
2639 prev = curr - 1
2640 offset = self.end(prev)
2641 p1r, p2r = self.rev(p1), self.rev(p2)
2642
2643 # full versions are inserted when the needed deltas
2644 # become comparable to the uncompressed text
2645 if rawtext is None:
2646 # need rawtext size, before changed by flag processors, which is
2647 # the non-raw size. use revlog explicitly to avoid filelog's extra
2648 # logic that might remove metadata size.
2649 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2650 cachedelta[1])
2651 else:
2652 textlen = len(rawtext)
2653
2654 if deltacomputer is None:
2655 deltacomputer = _deltacomputer(self)
2656
2657 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2658
2659 # no delta for flag processor revision (see "candelta" for why)
2660 # not calling candelta since only one revision needs test, also to
2661 # avoid overhead fetching flags again.
2662 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2663 deltainfo = None
2664 else:
2665 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2666
2667 if deltainfo is not None:
2668 base = deltainfo.base
2669 chainbase = deltainfo.chainbase
2670 data = deltainfo.data
2671 l = deltainfo.deltalen
2672 else:
2673 rawtext = deltacomputer.buildtext(revinfo, fh)
2674 data = self.compress(rawtext)
2675 l = len(data[1]) + len(data[0])
2676 base = chainbase = curr
2677
2678 e = (offset_type(offset, flags), l, textlen,
2679 base, link, p1r, p2r, node)
2680 self.index.append(e)
2681 self.nodemap[node] = curr
2682
2683 entry = self._io.packentry(e, self.node, self.version, curr)
2684 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2685
2686 if alwayscache and rawtext is None:
2687 rawtext = deltacomputer.buildtext(revinfo, fh)
2688
2689 if type(rawtext) == bytes: # only accept immutable objects
2690 self._cache = (node, curr, rawtext)
2691 self._chainbasecache[curr] = chainbase
2692 return node
2693
2694 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2695 # Files opened in a+ mode have inconsistent behavior on various
2696 # platforms. Windows requires that a file positioning call be made
2697 # when the file handle transitions between reads and writes. See
2698 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2699 # platforms, Python or the platform itself can be buggy. Some versions
2700 # of Solaris have been observed to not append at the end of the file
2701 # if the file was seeked to before the end. See issue4943 for more.
2702 #
2703 # We work around this issue by inserting a seek() before writing.
2704 # Note: This is likely not necessary on Python 3.
2705 ifh.seek(0, os.SEEK_END)
2706 if dfh:
2707 dfh.seek(0, os.SEEK_END)
2708
2709 curr = len(self) - 1
2710 if not self._inline:
2711 transaction.add(self.datafile, offset)
2712 transaction.add(self.indexfile, curr * len(entry))
2713 if data[0]:
2714 dfh.write(data[0])
2715 dfh.write(data[1])
2716 ifh.write(entry)
2717 else:
2718 offset += curr * self._io.size
2719 transaction.add(self.indexfile, offset, curr)
2720 ifh.write(entry)
2721 ifh.write(data[0])
2722 ifh.write(data[1])
2723 self._enforceinlinesize(transaction, ifh)
2724
2725 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2726 """
2727 add a delta group
2728
2729 given a set of deltas, add them to the revision log. the
2730 first delta is against its parent, which should be in our
2731 log, the rest are against the previous delta.
2732
2733 If ``addrevisioncb`` is defined, it will be called with arguments of
2734 this revlog and the node that was added.
2735 """
2736
2737 nodes = []
2738
2739 r = len(self)
2740 end = 0
2741 if r:
2742 end = self.end(r - 1)
2743 ifh = self._indexfp("a+")
2744 isize = r * self._io.size
2745 if self._inline:
2746 transaction.add(self.indexfile, end + isize, r)
2747 dfh = None
2748 else:
2749 transaction.add(self.indexfile, isize, r)
2750 transaction.add(self.datafile, end)
2751 dfh = self._datafp("a+")
2752 def flush():
2753 if dfh:
2754 dfh.flush()
2755 ifh.flush()
2756 try:
2757 deltacomputer = _deltacomputer(self)
2758 # loop through our set of deltas
2759 for data in deltas:
2760 node, p1, p2, linknode, deltabase, delta, flags = data
2761 link = linkmapper(linknode)
2762 flags = flags or REVIDX_DEFAULT_FLAGS
2763
2764 nodes.append(node)
2765
2766 if node in self.nodemap:
2767 # this can happen if two branches make the same change
2768 continue
2769
2770 for p in (p1, p2):
2771 if p not in self.nodemap:
2772 raise LookupError(p, self.indexfile,
2773 _('unknown parent'))
2774
2775 if deltabase not in self.nodemap:
2776 raise LookupError(deltabase, self.indexfile,
2777 _('unknown delta base'))
2778
2779 baserev = self.rev(deltabase)
2780
2781 if baserev != nullrev and self.iscensored(baserev):
2782 # if base is censored, delta must be full replacement in a
2783 # single patch operation
2784 hlen = struct.calcsize(">lll")
2785 oldlen = self.rawsize(baserev)
2786 newlen = len(delta) - hlen
2787 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2788 raise error.CensoredBaseError(self.indexfile,
2789 self.node(baserev))
2790
2791 if not flags and self._peek_iscensored(baserev, delta, flush):
2792 flags |= REVIDX_ISCENSORED
2793
2794 # We assume consumers of addrevisioncb will want to retrieve
2795 # the added revision, which will require a call to
2796 # revision(). revision() will fast path if there is a cache
2797 # hit. So, we tell _addrevision() to always cache in this case.
2798 # We're only using addgroup() in the context of changegroup
2799 # generation so the revision data can always be handled as raw
2800 # by the flagprocessor.
2801 self._addrevision(node, None, transaction, link,
2802 p1, p2, flags, (baserev, delta),
2803 ifh, dfh,
2804 alwayscache=bool(addrevisioncb),
2805 deltacomputer=deltacomputer)
2806
2807 if addrevisioncb:
2808 addrevisioncb(self, node)
2809
2810 if not dfh and not self._inline:
2811 # addrevision switched from inline to conventional
2812 # reopen the index
2813 ifh.close()
2814 dfh = self._datafp("a+")
2815 ifh = self._indexfp("a+")
2816 finally:
2817 if dfh:
2818 dfh.close()
2819 ifh.close()
2820
2821 return nodes
2822
2823 def iscensored(self, rev):
2824 """Check if a file revision is censored."""
2825 if not self._censorable:
2826 return False
2827
2828 return self.flags(rev) & REVIDX_ISCENSORED
2829
2830 def _peek_iscensored(self, baserev, delta, flush):
2831 """Quickly check if a delta produces a censored revision."""
2832 if not self._censorable:
2833 return False
2834
2835 # Fragile heuristic: unless new file meta keys are added alphabetically
2836 # preceding "censored", all censored revisions are prefixed by
2837 # "\1\ncensored:". A delta producing such a censored revision must be a
2838 # full-replacement delta, so we inspect the first and only patch in the
2839 # delta for this prefix.
2840 hlen = struct.calcsize(">lll")
2841 if len(delta) <= hlen:
2842 return False
2843
2844 oldlen = self.rawsize(baserev)
2845 newlen = len(delta) - hlen
2846 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2847 return False
2848
2849 add = "\1\ncensored:"
2850 addlen = len(add)
2851 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2852
2853 def getstrippoint(self, minlink):
2854 """find the minimum rev that must be stripped to strip the linkrev
2855
2856 Returns a tuple containing the minimum rev and a set of all revs that
2857 have linkrevs that will be broken by this strip.
2858 """
2859 brokenrevs = set()
2860 strippoint = len(self)
2861
2862 heads = {}
2863 futurelargelinkrevs = set()
2864 for head in self.headrevs():
2865 headlinkrev = self.linkrev(head)
2866 heads[head] = headlinkrev
2867 if headlinkrev >= minlink:
2868 futurelargelinkrevs.add(headlinkrev)
2869
2870 # This algorithm involves walking down the rev graph, starting at the
2871 # heads. Since the revs are topologically sorted according to linkrev,
2872 # once all head linkrevs are below the minlink, we know there are
2873 # no more revs that could have a linkrev greater than minlink.
2874 # So we can stop walking.
2875 while futurelargelinkrevs:
2876 strippoint -= 1
2877 linkrev = heads.pop(strippoint)
2878
2879 if linkrev < minlink:
2880 brokenrevs.add(strippoint)
2881 else:
2882 futurelargelinkrevs.remove(linkrev)
2883
2884 for p in self.parentrevs(strippoint):
2885 if p != nullrev:
2886 plinkrev = self.linkrev(p)
2887 heads[p] = plinkrev
2888 if plinkrev >= minlink:
2889 futurelargelinkrevs.add(plinkrev)
2890
2891 return strippoint, brokenrevs
2892
2893 def strip(self, minlink, transaction):
2894 """truncate the revlog on the first revision with a linkrev >= minlink
2895
2896 This function is called when we're stripping revision minlink and
2897 its descendants from the repository.
2898
2899 We have to remove all revisions with linkrev >= minlink, because
2900 the equivalent changelog revisions will be renumbered after the
2901 strip.
2902
2903 So we truncate the revlog on the first of these revisions, and
2904 trust that the caller has saved the revisions that shouldn't be
2905 removed and that it'll re-add them after this truncation.
2906 """
2907 if len(self) == 0:
2908 return
2909
2910 rev, _ = self.getstrippoint(minlink)
2911 if rev == len(self):
2912 return
2913
2914 # first truncate the files on disk
2915 end = self.start(rev)
2916 if not self._inline:
2917 transaction.add(self.datafile, end)
2918 end = rev * self._io.size
2919 else:
2920 end += rev * self._io.size
2921
2922 transaction.add(self.indexfile, end)
2923
2924 # then reset internal state in memory to forget those revisions
2925 self._cache = None
2926 self._chaininfocache = {}
2927 self._chunkclear()
2928 for x in pycompat.xrange(rev, len(self)):
2929 del self.nodemap[self.node(x)]
2930
2931 del self.index[rev:-1]
2932 self._nodepos = None
2933
2934 def checksize(self):
2935 expected = 0
2936 if len(self):
2937 expected = max(0, self.end(len(self) - 1))
2938
2939 try:
2940 with self._datafp() as f:
2941 f.seek(0, 2)
2942 actual = f.tell()
2943 dd = actual - expected
2944 except IOError as inst:
2945 if inst.errno != errno.ENOENT:
2946 raise
2947 dd = 0
2948
2949 try:
2950 f = self.opener(self.indexfile)
2951 f.seek(0, 2)
2952 actual = f.tell()
2953 f.close()
2954 s = self._io.size
2955 i = max(0, actual // s)
2956 di = actual - (i * s)
2957 if self._inline:
2958 databytes = 0
2959 for r in self:
2960 databytes += max(0, self.length(r))
2961 dd = 0
2962 di = actual - len(self) * s - databytes
2963 except IOError as inst:
2964 if inst.errno != errno.ENOENT:
2965 raise
2966 di = 0
2967
2968 return (dd, di)
2969
2970 def files(self):
2971 res = [self.indexfile]
2972 if not self._inline:
2973 res.append(self.datafile)
2974 return res
2975
2976 def emitrevisiondeltas(self, requests):
2977 frev = self.rev
2978
2979 prevrev = None
2980 for request in requests:
2981 node = request.node
2982 rev = frev(node)
2983
2984 if prevrev is None:
2985 prevrev = self.index[rev][5]
2986
2987 # Requesting a full revision.
2988 if request.basenode == nullid:
2989 baserev = nullrev
2990 # Requesting an explicit revision.
2991 elif request.basenode is not None:
2992 baserev = frev(request.basenode)
2993 # Allowing us to choose.
2994 else:
2995 p1rev, p2rev = self.parentrevs(rev)
2996 deltaparentrev = self.deltaparent(rev)
2997
2998 # Avoid sending full revisions when delta parent is null. Pick
2999 # prev in that case. It's tempting to pick p1 in this case, as
3000 # p1 will be smaller in the common case. However, computing a
3001 # delta against p1 may require resolving the raw text of p1,
3002 # which could be expensive. The revlog caches should have prev
3003 # cached, meaning less CPU for delta generation. There is
3004 # likely room to add a flag and/or config option to control this
3005 # behavior.
3006 if deltaparentrev == nullrev and self._storedeltachains:
3007 baserev = prevrev
3008
3009 # Revlog is configured to use full snapshot for a reason.
3010 # Stick to full snapshot.
3011 elif deltaparentrev == nullrev:
3012 baserev = nullrev
3013
3014 # Pick previous when we can't be sure the base is available
3015 # on consumer.
3016 elif deltaparentrev not in (p1rev, p2rev, prevrev):
3017 baserev = prevrev
3018 else:
3019 baserev = deltaparentrev
3020
3021 if baserev != nullrev and not self.candelta(baserev, rev):
3022 baserev = nullrev
3023
3024 revision = None
3025 delta = None
3026 baserevisionsize = None
3027
3028 if self.iscensored(baserev) or self.iscensored(rev):
3029 try:
3030 revision = self.revision(node, raw=True)
3031 except error.CensoredNodeError as e:
3032 revision = e.tombstone
3033
3034 if baserev != nullrev:
3035 baserevisionsize = self.rawsize(baserev)
3036
3037 elif baserev == nullrev:
3038 revision = self.revision(node, raw=True)
3039 else:
3040 delta = self.revdiff(baserev, rev)
3041
3042 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
3043
3044 yield revlogrevisiondelta(
3045 node=node,
3046 p1node=request.p1node,
3047 p2node=request.p2node,
3048 linknode=request.linknode,
3049 basenode=self.node(baserev),
3050 flags=self.flags(rev) | extraflags,
3051 baserevisionsize=baserevisionsize,
3052 revision=revision,
3053 delta=delta)
3054
3055 prevrev = rev
3056
3057 DELTAREUSEALWAYS = 'always'
3058 DELTAREUSESAMEREVS = 'samerevs'
3059 DELTAREUSENEVER = 'never'
3060
3061 DELTAREUSEFULLADD = 'fulladd'
3062
3063 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
3064
3065 def clone(self, tr, destrevlog, addrevisioncb=None,
3066 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
3067 """Copy this revlog to another, possibly with format changes.
3068
3069 The destination revlog will contain the same revisions and nodes.
3070 However, it may not be bit-for-bit identical due to e.g. delta encoding
3071 differences.
3072
3073 The ``deltareuse`` argument control how deltas from the existing revlog
3074 are preserved in the destination revlog. The argument can have the
3075 following values:
3076
3077 DELTAREUSEALWAYS
3078 Deltas will always be reused (if possible), even if the destination
3079 revlog would not select the same revisions for the delta. This is the
3080 fastest mode of operation.
3081 DELTAREUSESAMEREVS
3082 Deltas will be reused if the destination revlog would pick the same
3083 revisions for the delta. This mode strikes a balance between speed
3084 and optimization.
3085 DELTAREUSENEVER
3086 Deltas will never be reused. This is the slowest mode of execution.
3087 This mode can be used to recompute deltas (e.g. if the diff/delta
3088 algorithm changes).
3089
3090 Delta computation can be slow, so the choice of delta reuse policy can
3091 significantly affect run time.
3092
3093 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3094 two extremes. Deltas will be reused if they are appropriate. But if the
3095 delta could choose a better revision, it will do so. This means if you
3096 are converting a non-generaldelta revlog to a generaldelta revlog,
3097 deltas will be recomputed if the delta's parent isn't a parent of the
3098 revision.
3099
3100 In addition to the delta policy, the ``deltabothparents`` argument
3101 controls whether to compute deltas against both parents for merges.
3102 By default, the current default is used.
3103 """
3104 if deltareuse not in self.DELTAREUSEALL:
3105 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3106
3107 if len(destrevlog):
3108 raise ValueError(_('destination revlog is not empty'))
3109
3110 if getattr(self, 'filteredrevs', None):
3111 raise ValueError(_('source revlog has filtered revisions'))
3112 if getattr(destrevlog, 'filteredrevs', None):
3113 raise ValueError(_('destination revlog has filtered revisions'))
3114
3115 # lazydeltabase controls whether to reuse a cached delta, if possible.
3116 oldlazydeltabase = destrevlog._lazydeltabase
3117 oldamd = destrevlog._deltabothparents
3118
3119 try:
3120 if deltareuse == self.DELTAREUSEALWAYS:
3121 destrevlog._lazydeltabase = True
3122 elif deltareuse == self.DELTAREUSESAMEREVS:
3123 destrevlog._lazydeltabase = False
3124
3125 destrevlog._deltabothparents = deltabothparents or oldamd
3126
3127 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3128 self.DELTAREUSESAMEREVS)
3129
3130 deltacomputer = _deltacomputer(destrevlog)
3131 index = self.index
3132 for rev in self:
3133 entry = index[rev]
3134
3135 # Some classes override linkrev to take filtered revs into
3136 # account. Use raw entry from index.
3137 flags = entry[0] & 0xffff
3138 linkrev = entry[4]
3139 p1 = index[entry[5]][7]
3140 p2 = index[entry[6]][7]
3141 node = entry[7]
3142
3143 # (Possibly) reuse the delta from the revlog if allowed and
3144 # the revlog chunk is a delta.
3145 cachedelta = None
3146 rawtext = None
3147 if populatecachedelta:
3148 dp = self.deltaparent(rev)
3149 if dp != nullrev:
3150 cachedelta = (dp, bytes(self._chunk(rev)))
3151
3152 if not cachedelta:
3153 rawtext = self.revision(rev, raw=True)
3154
3155
3156 if deltareuse == self.DELTAREUSEFULLADD:
3157 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3158 cachedelta=cachedelta,
3159 node=node, flags=flags,
3160 deltacomputer=deltacomputer)
3161 else:
3162 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3163 checkambig=False)
3164 dfh = None
3165 if not destrevlog._inline:
3166 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3167 try:
3168 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3169 p2, flags, cachedelta, ifh, dfh,
3170 deltacomputer=deltacomputer)
3171 finally:
3172 if dfh:
3173 dfh.close()
3174 ifh.close()
3175
3176 if addrevisioncb:
3177 addrevisioncb(self, rev, node)
3178 finally:
3179 destrevlog._lazydeltabase = oldlazydeltabase
3180 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now