##// END OF EJS Templates
revlog: introduce an explicit tracking of what the revlog is about...
marmoute -
r47838:4c041c71 default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -1,47 +1,55 b''
1 1 #!/usr/bin/env python3
2 2 # Dump revlogs as raw data stream
3 3 # $ find .hg/store/ -name "*.i" | xargs dumprevlog > repo.dump
4 4
5 5 from __future__ import absolute_import, print_function
6 6
7 7 import sys
8 8 from mercurial.node import hex
9 9 from mercurial import (
10 10 encoding,
11 11 pycompat,
12 12 revlog,
13 13 )
14 14 from mercurial.utils import procutil
15 15
16 from mercurial.revlogutils import (
17 constants as revlog_constants,
18 )
19
16 20 for fp in (sys.stdin, sys.stdout, sys.stderr):
17 21 procutil.setbinary(fp)
18 22
19 23
20 24 def binopen(path, mode=b'rb'):
21 25 if b'b' not in mode:
22 26 mode = mode + b'b'
23 27 return open(path, pycompat.sysstr(mode))
24 28
25 29
26 30 binopen.options = {}
27 31
28 32
29 33 def printb(data, end=b'\n'):
30 34 sys.stdout.flush()
31 35 procutil.stdout.write(data + end)
32 36
33 37
34 38 for f in sys.argv[1:]:
35 r = revlog.revlog(binopen, encoding.strtolocal(f))
39 r = revlog.revlog(
40 binopen,
41 target=(revlog_constants.KIND_OTHER, b'dump-revlog'),
42 indexfile=encoding.strtolocal(f),
43 )
36 44 print("file:", f)
37 45 for i in r:
38 46 n = r.node(i)
39 47 p = r.parents(n)
40 48 d = r.revision(n)
41 49 printb(b"node: %s" % hex(n))
42 50 printb(b"linkrev: %d" % r.linkrev(i))
43 51 printb(b"parents: %s %s" % (hex(p[0]), hex(p[1])))
44 52 printb(b"length: %d" % len(d))
45 53 printb(b"-start-")
46 54 printb(d)
47 55 printb(b"-end-")
@@ -1,3927 +1,3944 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance
3 3
4 4 Configurations
5 5 ==============
6 6
7 7 ``perf``
8 8 --------
9 9
10 10 ``all-timing``
11 11 When set, additional statistics will be reported for each benchmark: best,
12 12 worst, median average. If not set only the best timing is reported
13 13 (default: off).
14 14
15 15 ``presleep``
16 16 number of second to wait before any group of runs (default: 1)
17 17
18 18 ``pre-run``
19 19 number of run to perform before starting measurement.
20 20
21 21 ``profile-benchmark``
22 22 Enable profiling for the benchmarked section.
23 23 (The first iteration is benchmarked)
24 24
25 25 ``run-limits``
26 26 Control the number of runs each benchmark will perform. The option value
27 27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 28 conditions are considered in order with the following logic:
29 29
30 30 If benchmark has been running for <time> seconds, and we have performed
31 31 <numberofrun> iterations, stop the benchmark,
32 32
33 33 The default value is: `3.0-100, 10.0-3`
34 34
35 35 ``stub``
36 36 When set, benchmarks will only be run once, useful for testing
37 37 (default: off)
38 38 '''
39 39
40 40 # "historical portability" policy of perf.py:
41 41 #
42 42 # We have to do:
43 43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 46 # - make historical perf command work correctly with as wide Mercurial
47 47 # version as possible
48 48 #
49 49 # We have to do, if possible with reasonable cost:
50 50 # - make recent perf command for historical feature work correctly
51 51 # with early Mercurial
52 52 #
53 53 # We don't have to do:
54 54 # - make perf command for recent feature work correctly with early
55 55 # Mercurial
56 56
57 57 from __future__ import absolute_import
58 58 import contextlib
59 59 import functools
60 60 import gc
61 61 import os
62 62 import random
63 63 import shutil
64 64 import struct
65 65 import sys
66 66 import tempfile
67 67 import threading
68 68 import time
69
70 import mercurial.revlog
69 71 from mercurial import (
70 72 changegroup,
71 73 cmdutil,
72 74 commands,
73 75 copies,
74 76 error,
75 77 extensions,
76 78 hg,
77 79 mdiff,
78 80 merge,
79 revlog,
80 81 util,
81 82 )
82 83
83 84 # for "historical portability":
84 85 # try to import modules separately (in dict order), and ignore
85 86 # failure, because these aren't available with early Mercurial
86 87 try:
87 88 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 89 except ImportError:
89 90 pass
90 91 try:
91 92 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 93 except ImportError:
93 94 pass
94 95 try:
95 96 from mercurial import registrar # since 3.7 (or 37d50250b696)
96 97
97 98 dir(registrar) # forcibly load it
98 99 except ImportError:
99 100 registrar = None
100 101 try:
101 102 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
102 103 except ImportError:
103 104 pass
104 105 try:
105 106 from mercurial.utils import repoviewutil # since 5.0
106 107 except ImportError:
107 108 repoviewutil = None
108 109 try:
109 110 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
110 111 except ImportError:
111 112 pass
112 113 try:
113 114 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
114 115 except ImportError:
115 116 pass
116 117
117 118 try:
118 119 from mercurial import profiling
119 120 except ImportError:
120 121 profiling = None
121 122
123 try:
124 from mercurial.revlogutils import constants as revlog_constants
125
126 perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
127
128 def revlog(opener, *args, **kwargs):
129 return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
130
131
132 except (ImportError, AttributeError):
133 perf_rl_kind = None
134
135 def revlog(opener, *args, **kwargs):
136 return mercurial.revlog.revlog(opener, *args, **kwargs)
137
122 138
123 139 def identity(a):
124 140 return a
125 141
126 142
127 143 try:
128 144 from mercurial import pycompat
129 145
130 146 getargspec = pycompat.getargspec # added to module after 4.5
131 147 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
132 148 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
133 149 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
134 150 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
135 151 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
136 152 if pycompat.ispy3:
137 153 _maxint = sys.maxsize # per py3 docs for replacing maxint
138 154 else:
139 155 _maxint = sys.maxint
140 156 except (NameError, ImportError, AttributeError):
141 157 import inspect
142 158
143 159 getargspec = inspect.getargspec
144 160 _byteskwargs = identity
145 161 _bytestr = str
146 162 fsencode = identity # no py3 support
147 163 _maxint = sys.maxint # no py3 support
148 164 _sysstr = lambda x: x # no py3 support
149 165 _xrange = xrange
150 166
151 167 try:
152 168 # 4.7+
153 169 queue = pycompat.queue.Queue
154 170 except (NameError, AttributeError, ImportError):
155 171 # <4.7.
156 172 try:
157 173 queue = pycompat.queue
158 174 except (NameError, AttributeError, ImportError):
159 175 import Queue as queue
160 176
161 177 try:
162 178 from mercurial import logcmdutil
163 179
164 180 makelogtemplater = logcmdutil.maketemplater
165 181 except (AttributeError, ImportError):
166 182 try:
167 183 makelogtemplater = cmdutil.makelogtemplater
168 184 except (AttributeError, ImportError):
169 185 makelogtemplater = None
170 186
171 187 # for "historical portability":
172 188 # define util.safehasattr forcibly, because util.safehasattr has been
173 189 # available since 1.9.3 (or 94b200a11cf7)
174 190 _undefined = object()
175 191
176 192
177 193 def safehasattr(thing, attr):
178 194 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
179 195
180 196
181 197 setattr(util, 'safehasattr', safehasattr)
182 198
183 199 # for "historical portability":
184 200 # define util.timer forcibly, because util.timer has been available
185 201 # since ae5d60bb70c9
186 202 if safehasattr(time, 'perf_counter'):
187 203 util.timer = time.perf_counter
188 204 elif os.name == b'nt':
189 205 util.timer = time.clock
190 206 else:
191 207 util.timer = time.time
192 208
193 209 # for "historical portability":
194 210 # use locally defined empty option list, if formatteropts isn't
195 211 # available, because commands.formatteropts has been available since
196 212 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
197 213 # available since 2.2 (or ae5f92e154d3)
198 214 formatteropts = getattr(
199 215 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
200 216 )
201 217
202 218 # for "historical portability":
203 219 # use locally defined option list, if debugrevlogopts isn't available,
204 220 # because commands.debugrevlogopts has been available since 3.7 (or
205 221 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
206 222 # since 1.9 (or a79fea6b3e77).
207 223 revlogopts = getattr(
208 224 cmdutil,
209 225 "debugrevlogopts",
210 226 getattr(
211 227 commands,
212 228 "debugrevlogopts",
213 229 [
214 230 (b'c', b'changelog', False, b'open changelog'),
215 231 (b'm', b'manifest', False, b'open manifest'),
216 232 (b'', b'dir', False, b'open directory manifest'),
217 233 ],
218 234 ),
219 235 )
220 236
221 237 cmdtable = {}
222 238
223 239 # for "historical portability":
224 240 # define parsealiases locally, because cmdutil.parsealiases has been
225 241 # available since 1.5 (or 6252852b4332)
226 242 def parsealiases(cmd):
227 243 return cmd.split(b"|")
228 244
229 245
230 246 if safehasattr(registrar, 'command'):
231 247 command = registrar.command(cmdtable)
232 248 elif safehasattr(cmdutil, 'command'):
233 249 command = cmdutil.command(cmdtable)
234 250 if 'norepo' not in getargspec(command).args:
235 251 # for "historical portability":
236 252 # wrap original cmdutil.command, because "norepo" option has
237 253 # been available since 3.1 (or 75a96326cecb)
238 254 _command = command
239 255
240 256 def command(name, options=(), synopsis=None, norepo=False):
241 257 if norepo:
242 258 commands.norepo += b' %s' % b' '.join(parsealiases(name))
243 259 return _command(name, list(options), synopsis)
244 260
245 261
246 262 else:
247 263 # for "historical portability":
248 264 # define "@command" annotation locally, because cmdutil.command
249 265 # has been available since 1.9 (or 2daa5179e73f)
250 266 def command(name, options=(), synopsis=None, norepo=False):
251 267 def decorator(func):
252 268 if synopsis:
253 269 cmdtable[name] = func, list(options), synopsis
254 270 else:
255 271 cmdtable[name] = func, list(options)
256 272 if norepo:
257 273 commands.norepo += b' %s' % b' '.join(parsealiases(name))
258 274 return func
259 275
260 276 return decorator
261 277
262 278
263 279 try:
264 280 import mercurial.registrar
265 281 import mercurial.configitems
266 282
267 283 configtable = {}
268 284 configitem = mercurial.registrar.configitem(configtable)
269 285 configitem(
270 286 b'perf',
271 287 b'presleep',
272 288 default=mercurial.configitems.dynamicdefault,
273 289 experimental=True,
274 290 )
275 291 configitem(
276 292 b'perf',
277 293 b'stub',
278 294 default=mercurial.configitems.dynamicdefault,
279 295 experimental=True,
280 296 )
281 297 configitem(
282 298 b'perf',
283 299 b'parentscount',
284 300 default=mercurial.configitems.dynamicdefault,
285 301 experimental=True,
286 302 )
287 303 configitem(
288 304 b'perf',
289 305 b'all-timing',
290 306 default=mercurial.configitems.dynamicdefault,
291 307 experimental=True,
292 308 )
293 309 configitem(
294 310 b'perf',
295 311 b'pre-run',
296 312 default=mercurial.configitems.dynamicdefault,
297 313 )
298 314 configitem(
299 315 b'perf',
300 316 b'profile-benchmark',
301 317 default=mercurial.configitems.dynamicdefault,
302 318 )
303 319 configitem(
304 320 b'perf',
305 321 b'run-limits',
306 322 default=mercurial.configitems.dynamicdefault,
307 323 experimental=True,
308 324 )
309 325 except (ImportError, AttributeError):
310 326 pass
311 327 except TypeError:
312 328 # compatibility fix for a11fd395e83f
313 329 # hg version: 5.2
314 330 configitem(
315 331 b'perf',
316 332 b'presleep',
317 333 default=mercurial.configitems.dynamicdefault,
318 334 )
319 335 configitem(
320 336 b'perf',
321 337 b'stub',
322 338 default=mercurial.configitems.dynamicdefault,
323 339 )
324 340 configitem(
325 341 b'perf',
326 342 b'parentscount',
327 343 default=mercurial.configitems.dynamicdefault,
328 344 )
329 345 configitem(
330 346 b'perf',
331 347 b'all-timing',
332 348 default=mercurial.configitems.dynamicdefault,
333 349 )
334 350 configitem(
335 351 b'perf',
336 352 b'pre-run',
337 353 default=mercurial.configitems.dynamicdefault,
338 354 )
339 355 configitem(
340 356 b'perf',
341 357 b'profile-benchmark',
342 358 default=mercurial.configitems.dynamicdefault,
343 359 )
344 360 configitem(
345 361 b'perf',
346 362 b'run-limits',
347 363 default=mercurial.configitems.dynamicdefault,
348 364 )
349 365
350 366
351 367 def getlen(ui):
352 368 if ui.configbool(b"perf", b"stub", False):
353 369 return lambda x: 1
354 370 return len
355 371
356 372
357 373 class noop(object):
358 374 """dummy context manager"""
359 375
360 376 def __enter__(self):
361 377 pass
362 378
363 379 def __exit__(self, *args):
364 380 pass
365 381
366 382
367 383 NOOPCTX = noop()
368 384
369 385
370 386 def gettimer(ui, opts=None):
371 387 """return a timer function and formatter: (timer, formatter)
372 388
373 389 This function exists to gather the creation of formatter in a single
374 390 place instead of duplicating it in all performance commands."""
375 391
376 392 # enforce an idle period before execution to counteract power management
377 393 # experimental config: perf.presleep
378 394 time.sleep(getint(ui, b"perf", b"presleep", 1))
379 395
380 396 if opts is None:
381 397 opts = {}
382 398 # redirect all to stderr unless buffer api is in use
383 399 if not ui._buffers:
384 400 ui = ui.copy()
385 401 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
386 402 if uifout:
387 403 # for "historical portability":
388 404 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
389 405 uifout.set(ui.ferr)
390 406
391 407 # get a formatter
392 408 uiformatter = getattr(ui, 'formatter', None)
393 409 if uiformatter:
394 410 fm = uiformatter(b'perf', opts)
395 411 else:
396 412 # for "historical portability":
397 413 # define formatter locally, because ui.formatter has been
398 414 # available since 2.2 (or ae5f92e154d3)
399 415 from mercurial import node
400 416
401 417 class defaultformatter(object):
402 418 """Minimized composition of baseformatter and plainformatter"""
403 419
404 420 def __init__(self, ui, topic, opts):
405 421 self._ui = ui
406 422 if ui.debugflag:
407 423 self.hexfunc = node.hex
408 424 else:
409 425 self.hexfunc = node.short
410 426
411 427 def __nonzero__(self):
412 428 return False
413 429
414 430 __bool__ = __nonzero__
415 431
416 432 def startitem(self):
417 433 pass
418 434
419 435 def data(self, **data):
420 436 pass
421 437
422 438 def write(self, fields, deftext, *fielddata, **opts):
423 439 self._ui.write(deftext % fielddata, **opts)
424 440
425 441 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
426 442 if cond:
427 443 self._ui.write(deftext % fielddata, **opts)
428 444
429 445 def plain(self, text, **opts):
430 446 self._ui.write(text, **opts)
431 447
432 448 def end(self):
433 449 pass
434 450
435 451 fm = defaultformatter(ui, b'perf', opts)
436 452
437 453 # stub function, runs code only once instead of in a loop
438 454 # experimental config: perf.stub
439 455 if ui.configbool(b"perf", b"stub", False):
440 456 return functools.partial(stub_timer, fm), fm
441 457
442 458 # experimental config: perf.all-timing
443 459 displayall = ui.configbool(b"perf", b"all-timing", False)
444 460
445 461 # experimental config: perf.run-limits
446 462 limitspec = ui.configlist(b"perf", b"run-limits", [])
447 463 limits = []
448 464 for item in limitspec:
449 465 parts = item.split(b'-', 1)
450 466 if len(parts) < 2:
451 467 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
452 468 continue
453 469 try:
454 470 time_limit = float(_sysstr(parts[0]))
455 471 except ValueError as e:
456 472 ui.warn(
457 473 (
458 474 b'malformatted run limit entry, %s: %s\n'
459 475 % (_bytestr(e), item)
460 476 )
461 477 )
462 478 continue
463 479 try:
464 480 run_limit = int(_sysstr(parts[1]))
465 481 except ValueError as e:
466 482 ui.warn(
467 483 (
468 484 b'malformatted run limit entry, %s: %s\n'
469 485 % (_bytestr(e), item)
470 486 )
471 487 )
472 488 continue
473 489 limits.append((time_limit, run_limit))
474 490 if not limits:
475 491 limits = DEFAULTLIMITS
476 492
477 493 profiler = None
478 494 if profiling is not None:
479 495 if ui.configbool(b"perf", b"profile-benchmark", False):
480 496 profiler = profiling.profile(ui)
481 497
482 498 prerun = getint(ui, b"perf", b"pre-run", 0)
483 499 t = functools.partial(
484 500 _timer,
485 501 fm,
486 502 displayall=displayall,
487 503 limits=limits,
488 504 prerun=prerun,
489 505 profiler=profiler,
490 506 )
491 507 return t, fm
492 508
493 509
494 510 def stub_timer(fm, func, setup=None, title=None):
495 511 if setup is not None:
496 512 setup()
497 513 func()
498 514
499 515
500 516 @contextlib.contextmanager
501 517 def timeone():
502 518 r = []
503 519 ostart = os.times()
504 520 cstart = util.timer()
505 521 yield r
506 522 cstop = util.timer()
507 523 ostop = os.times()
508 524 a, b = ostart, ostop
509 525 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
510 526
511 527
512 528 # list of stop condition (elapsed time, minimal run count)
513 529 DEFAULTLIMITS = (
514 530 (3.0, 100),
515 531 (10.0, 3),
516 532 )
517 533
518 534
519 535 def _timer(
520 536 fm,
521 537 func,
522 538 setup=None,
523 539 title=None,
524 540 displayall=False,
525 541 limits=DEFAULTLIMITS,
526 542 prerun=0,
527 543 profiler=None,
528 544 ):
529 545 gc.collect()
530 546 results = []
531 547 begin = util.timer()
532 548 count = 0
533 549 if profiler is None:
534 550 profiler = NOOPCTX
535 551 for i in range(prerun):
536 552 if setup is not None:
537 553 setup()
538 554 func()
539 555 keepgoing = True
540 556 while keepgoing:
541 557 if setup is not None:
542 558 setup()
543 559 with profiler:
544 560 with timeone() as item:
545 561 r = func()
546 562 profiler = NOOPCTX
547 563 count += 1
548 564 results.append(item[0])
549 565 cstop = util.timer()
550 566 # Look for a stop condition.
551 567 elapsed = cstop - begin
552 568 for t, mincount in limits:
553 569 if elapsed >= t and count >= mincount:
554 570 keepgoing = False
555 571 break
556 572
557 573 formatone(fm, results, title=title, result=r, displayall=displayall)
558 574
559 575
560 576 def formatone(fm, timings, title=None, result=None, displayall=False):
561 577
562 578 count = len(timings)
563 579
564 580 fm.startitem()
565 581
566 582 if title:
567 583 fm.write(b'title', b'! %s\n', title)
568 584 if result:
569 585 fm.write(b'result', b'! result: %s\n', result)
570 586
571 587 def display(role, entry):
572 588 prefix = b''
573 589 if role != b'best':
574 590 prefix = b'%s.' % role
575 591 fm.plain(b'!')
576 592 fm.write(prefix + b'wall', b' wall %f', entry[0])
577 593 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
578 594 fm.write(prefix + b'user', b' user %f', entry[1])
579 595 fm.write(prefix + b'sys', b' sys %f', entry[2])
580 596 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
581 597 fm.plain(b'\n')
582 598
583 599 timings.sort()
584 600 min_val = timings[0]
585 601 display(b'best', min_val)
586 602 if displayall:
587 603 max_val = timings[-1]
588 604 display(b'max', max_val)
589 605 avg = tuple([sum(x) / count for x in zip(*timings)])
590 606 display(b'avg', avg)
591 607 median = timings[len(timings) // 2]
592 608 display(b'median', median)
593 609
594 610
595 611 # utilities for historical portability
596 612
597 613
598 614 def getint(ui, section, name, default):
599 615 # for "historical portability":
600 616 # ui.configint has been available since 1.9 (or fa2b596db182)
601 617 v = ui.config(section, name, None)
602 618 if v is None:
603 619 return default
604 620 try:
605 621 return int(v)
606 622 except ValueError:
607 623 raise error.ConfigError(
608 624 b"%s.%s is not an integer ('%s')" % (section, name, v)
609 625 )
610 626
611 627
612 628 def safeattrsetter(obj, name, ignoremissing=False):
613 629 """Ensure that 'obj' has 'name' attribute before subsequent setattr
614 630
615 631 This function is aborted, if 'obj' doesn't have 'name' attribute
616 632 at runtime. This avoids overlooking removal of an attribute, which
617 633 breaks assumption of performance measurement, in the future.
618 634
619 635 This function returns the object to (1) assign a new value, and
620 636 (2) restore an original value to the attribute.
621 637
622 638 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
623 639 abortion, and this function returns None. This is useful to
624 640 examine an attribute, which isn't ensured in all Mercurial
625 641 versions.
626 642 """
627 643 if not util.safehasattr(obj, name):
628 644 if ignoremissing:
629 645 return None
630 646 raise error.Abort(
631 647 (
632 648 b"missing attribute %s of %s might break assumption"
633 649 b" of performance measurement"
634 650 )
635 651 % (name, obj)
636 652 )
637 653
638 654 origvalue = getattr(obj, _sysstr(name))
639 655
640 656 class attrutil(object):
641 657 def set(self, newvalue):
642 658 setattr(obj, _sysstr(name), newvalue)
643 659
644 660 def restore(self):
645 661 setattr(obj, _sysstr(name), origvalue)
646 662
647 663 return attrutil()
648 664
649 665
650 666 # utilities to examine each internal API changes
651 667
652 668
653 669 def getbranchmapsubsettable():
654 670 # for "historical portability":
655 671 # subsettable is defined in:
656 672 # - branchmap since 2.9 (or 175c6fd8cacc)
657 673 # - repoview since 2.5 (or 59a9f18d4587)
658 674 # - repoviewutil since 5.0
659 675 for mod in (branchmap, repoview, repoviewutil):
660 676 subsettable = getattr(mod, 'subsettable', None)
661 677 if subsettable:
662 678 return subsettable
663 679
664 680 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
665 681 # branchmap and repoview modules exist, but subsettable attribute
666 682 # doesn't)
667 683 raise error.Abort(
668 684 b"perfbranchmap not available with this Mercurial",
669 685 hint=b"use 2.5 or later",
670 686 )
671 687
672 688
673 689 def getsvfs(repo):
674 690 """Return appropriate object to access files under .hg/store"""
675 691 # for "historical portability":
676 692 # repo.svfs has been available since 2.3 (or 7034365089bf)
677 693 svfs = getattr(repo, 'svfs', None)
678 694 if svfs:
679 695 return svfs
680 696 else:
681 697 return getattr(repo, 'sopener')
682 698
683 699
684 700 def getvfs(repo):
685 701 """Return appropriate object to access files under .hg"""
686 702 # for "historical portability":
687 703 # repo.vfs has been available since 2.3 (or 7034365089bf)
688 704 vfs = getattr(repo, 'vfs', None)
689 705 if vfs:
690 706 return vfs
691 707 else:
692 708 return getattr(repo, 'opener')
693 709
694 710
695 711 def repocleartagscachefunc(repo):
696 712 """Return the function to clear tags cache according to repo internal API"""
697 713 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
698 714 # in this case, setattr(repo, '_tagscache', None) or so isn't
699 715 # correct way to clear tags cache, because existing code paths
700 716 # expect _tagscache to be a structured object.
701 717 def clearcache():
702 718 # _tagscache has been filteredpropertycache since 2.5 (or
703 719 # 98c867ac1330), and delattr() can't work in such case
704 720 if '_tagscache' in vars(repo):
705 721 del repo.__dict__['_tagscache']
706 722
707 723 return clearcache
708 724
709 725 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
710 726 if repotags: # since 1.4 (or 5614a628d173)
711 727 return lambda: repotags.set(None)
712 728
713 729 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
714 730 if repotagscache: # since 0.6 (or d7df759d0e97)
715 731 return lambda: repotagscache.set(None)
716 732
717 733 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
718 734 # this point, but it isn't so problematic, because:
719 735 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
720 736 # in perftags() causes failure soon
721 737 # - perf.py itself has been available since 1.1 (or eb240755386d)
722 738 raise error.Abort(b"tags API of this hg command is unknown")
723 739
724 740
725 741 # utilities to clear cache
726 742
727 743
728 744 def clearfilecache(obj, attrname):
729 745 unfiltered = getattr(obj, 'unfiltered', None)
730 746 if unfiltered is not None:
731 747 obj = obj.unfiltered()
732 748 if attrname in vars(obj):
733 749 delattr(obj, attrname)
734 750 obj._filecache.pop(attrname, None)
735 751
736 752
737 753 def clearchangelog(repo):
738 754 if repo is not repo.unfiltered():
739 755 object.__setattr__(repo, '_clcachekey', None)
740 756 object.__setattr__(repo, '_clcache', None)
741 757 clearfilecache(repo.unfiltered(), 'changelog')
742 758
743 759
744 760 # perf commands
745 761
746 762
747 763 @command(b'perf::walk|perfwalk', formatteropts)
748 764 def perfwalk(ui, repo, *pats, **opts):
749 765 opts = _byteskwargs(opts)
750 766 timer, fm = gettimer(ui, opts)
751 767 m = scmutil.match(repo[None], pats, {})
752 768 timer(
753 769 lambda: len(
754 770 list(
755 771 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
756 772 )
757 773 )
758 774 )
759 775 fm.end()
760 776
761 777
762 778 @command(b'perf::annotate|perfannotate', formatteropts)
763 779 def perfannotate(ui, repo, f, **opts):
764 780 opts = _byteskwargs(opts)
765 781 timer, fm = gettimer(ui, opts)
766 782 fc = repo[b'.'][f]
767 783 timer(lambda: len(fc.annotate(True)))
768 784 fm.end()
769 785
770 786
771 787 @command(
772 788 b'perf::status|perfstatus',
773 789 [
774 790 (b'u', b'unknown', False, b'ask status to look for unknown files'),
775 791 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
776 792 ]
777 793 + formatteropts,
778 794 )
779 795 def perfstatus(ui, repo, **opts):
780 796 """benchmark the performance of a single status call
781 797
782 798 The repository data are preserved between each call.
783 799
784 800 By default, only the status of the tracked file are requested. If
785 801 `--unknown` is passed, the "unknown" files are also tracked.
786 802 """
787 803 opts = _byteskwargs(opts)
788 804 # m = match.always(repo.root, repo.getcwd())
789 805 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
790 806 # False))))
791 807 timer, fm = gettimer(ui, opts)
792 808 if opts[b'dirstate']:
793 809 dirstate = repo.dirstate
794 810 m = scmutil.matchall(repo)
795 811 unknown = opts[b'unknown']
796 812
797 813 def status_dirstate():
798 814 s = dirstate.status(
799 815 m, subrepos=[], ignored=False, clean=False, unknown=unknown
800 816 )
801 817 sum(map(bool, s))
802 818
803 819 timer(status_dirstate)
804 820 else:
805 821 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
806 822 fm.end()
807 823
808 824
809 825 @command(b'perf::addremove|perfaddremove', formatteropts)
810 826 def perfaddremove(ui, repo, **opts):
811 827 opts = _byteskwargs(opts)
812 828 timer, fm = gettimer(ui, opts)
813 829 try:
814 830 oldquiet = repo.ui.quiet
815 831 repo.ui.quiet = True
816 832 matcher = scmutil.match(repo[None])
817 833 opts[b'dry_run'] = True
818 834 if 'uipathfn' in getargspec(scmutil.addremove).args:
819 835 uipathfn = scmutil.getuipathfn(repo)
820 836 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
821 837 else:
822 838 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
823 839 finally:
824 840 repo.ui.quiet = oldquiet
825 841 fm.end()
826 842
827 843
828 844 def clearcaches(cl):
829 845 # behave somewhat consistently across internal API changes
830 846 if util.safehasattr(cl, b'clearcaches'):
831 847 cl.clearcaches()
832 848 elif util.safehasattr(cl, b'_nodecache'):
833 849 # <= hg-5.2
834 850 from mercurial.node import nullid, nullrev
835 851
836 852 cl._nodecache = {nullid: nullrev}
837 853 cl._nodepos = None
838 854
839 855
840 856 @command(b'perf::heads|perfheads', formatteropts)
841 857 def perfheads(ui, repo, **opts):
842 858 """benchmark the computation of a changelog heads"""
843 859 opts = _byteskwargs(opts)
844 860 timer, fm = gettimer(ui, opts)
845 861 cl = repo.changelog
846 862
847 863 def s():
848 864 clearcaches(cl)
849 865
850 866 def d():
851 867 len(cl.headrevs())
852 868
853 869 timer(d, setup=s)
854 870 fm.end()
855 871
856 872
857 873 @command(
858 874 b'perf::tags|perftags',
859 875 formatteropts
860 876 + [
861 877 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
862 878 ],
863 879 )
864 880 def perftags(ui, repo, **opts):
865 881 opts = _byteskwargs(opts)
866 882 timer, fm = gettimer(ui, opts)
867 883 repocleartagscache = repocleartagscachefunc(repo)
868 884 clearrevlogs = opts[b'clear_revlogs']
869 885
870 886 def s():
871 887 if clearrevlogs:
872 888 clearchangelog(repo)
873 889 clearfilecache(repo.unfiltered(), 'manifest')
874 890 repocleartagscache()
875 891
876 892 def t():
877 893 return len(repo.tags())
878 894
879 895 timer(t, setup=s)
880 896 fm.end()
881 897
882 898
883 899 @command(b'perf::ancestors|perfancestors', formatteropts)
884 900 def perfancestors(ui, repo, **opts):
885 901 opts = _byteskwargs(opts)
886 902 timer, fm = gettimer(ui, opts)
887 903 heads = repo.changelog.headrevs()
888 904
889 905 def d():
890 906 for a in repo.changelog.ancestors(heads):
891 907 pass
892 908
893 909 timer(d)
894 910 fm.end()
895 911
896 912
897 913 @command(b'perf::ancestorset|perfancestorset', formatteropts)
898 914 def perfancestorset(ui, repo, revset, **opts):
899 915 opts = _byteskwargs(opts)
900 916 timer, fm = gettimer(ui, opts)
901 917 revs = repo.revs(revset)
902 918 heads = repo.changelog.headrevs()
903 919
904 920 def d():
905 921 s = repo.changelog.ancestors(heads)
906 922 for rev in revs:
907 923 rev in s
908 924
909 925 timer(d)
910 926 fm.end()
911 927
912 928
913 929 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
914 930 def perfdiscovery(ui, repo, path, **opts):
915 931 """benchmark discovery between local repo and the peer at given path"""
916 932 repos = [repo, None]
917 933 timer, fm = gettimer(ui, opts)
918 934
919 935 try:
920 936 from mercurial.utils.urlutil import get_unique_pull_path
921 937
922 938 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
923 939 except ImportError:
924 940 path = ui.expandpath(path)
925 941
926 942 def s():
927 943 repos[1] = hg.peer(ui, opts, path)
928 944
929 945 def d():
930 946 setdiscovery.findcommonheads(ui, *repos)
931 947
932 948 timer(d, setup=s)
933 949 fm.end()
934 950
935 951
936 952 @command(
937 953 b'perf::bookmarks|perfbookmarks',
938 954 formatteropts
939 955 + [
940 956 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
941 957 ],
942 958 )
943 959 def perfbookmarks(ui, repo, **opts):
944 960 """benchmark parsing bookmarks from disk to memory"""
945 961 opts = _byteskwargs(opts)
946 962 timer, fm = gettimer(ui, opts)
947 963
948 964 clearrevlogs = opts[b'clear_revlogs']
949 965
950 966 def s():
951 967 if clearrevlogs:
952 968 clearchangelog(repo)
953 969 clearfilecache(repo, b'_bookmarks')
954 970
955 971 def d():
956 972 repo._bookmarks
957 973
958 974 timer(d, setup=s)
959 975 fm.end()
960 976
961 977
962 978 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
963 979 def perfbundleread(ui, repo, bundlepath, **opts):
964 980 """Benchmark reading of bundle files.
965 981
966 982 This command is meant to isolate the I/O part of bundle reading as
967 983 much as possible.
968 984 """
969 985 from mercurial import (
970 986 bundle2,
971 987 exchange,
972 988 streamclone,
973 989 )
974 990
975 991 opts = _byteskwargs(opts)
976 992
977 993 def makebench(fn):
978 994 def run():
979 995 with open(bundlepath, b'rb') as fh:
980 996 bundle = exchange.readbundle(ui, fh, bundlepath)
981 997 fn(bundle)
982 998
983 999 return run
984 1000
985 1001 def makereadnbytes(size):
986 1002 def run():
987 1003 with open(bundlepath, b'rb') as fh:
988 1004 bundle = exchange.readbundle(ui, fh, bundlepath)
989 1005 while bundle.read(size):
990 1006 pass
991 1007
992 1008 return run
993 1009
994 1010 def makestdioread(size):
995 1011 def run():
996 1012 with open(bundlepath, b'rb') as fh:
997 1013 while fh.read(size):
998 1014 pass
999 1015
1000 1016 return run
1001 1017
1002 1018 # bundle1
1003 1019
1004 1020 def deltaiter(bundle):
1005 1021 for delta in bundle.deltaiter():
1006 1022 pass
1007 1023
1008 1024 def iterchunks(bundle):
1009 1025 for chunk in bundle.getchunks():
1010 1026 pass
1011 1027
1012 1028 # bundle2
1013 1029
1014 1030 def forwardchunks(bundle):
1015 1031 for chunk in bundle._forwardchunks():
1016 1032 pass
1017 1033
1018 1034 def iterparts(bundle):
1019 1035 for part in bundle.iterparts():
1020 1036 pass
1021 1037
1022 1038 def iterpartsseekable(bundle):
1023 1039 for part in bundle.iterparts(seekable=True):
1024 1040 pass
1025 1041
1026 1042 def seek(bundle):
1027 1043 for part in bundle.iterparts(seekable=True):
1028 1044 part.seek(0, os.SEEK_END)
1029 1045
1030 1046 def makepartreadnbytes(size):
1031 1047 def run():
1032 1048 with open(bundlepath, b'rb') as fh:
1033 1049 bundle = exchange.readbundle(ui, fh, bundlepath)
1034 1050 for part in bundle.iterparts():
1035 1051 while part.read(size):
1036 1052 pass
1037 1053
1038 1054 return run
1039 1055
1040 1056 benches = [
1041 1057 (makestdioread(8192), b'read(8k)'),
1042 1058 (makestdioread(16384), b'read(16k)'),
1043 1059 (makestdioread(32768), b'read(32k)'),
1044 1060 (makestdioread(131072), b'read(128k)'),
1045 1061 ]
1046 1062
1047 1063 with open(bundlepath, b'rb') as fh:
1048 1064 bundle = exchange.readbundle(ui, fh, bundlepath)
1049 1065
1050 1066 if isinstance(bundle, changegroup.cg1unpacker):
1051 1067 benches.extend(
1052 1068 [
1053 1069 (makebench(deltaiter), b'cg1 deltaiter()'),
1054 1070 (makebench(iterchunks), b'cg1 getchunks()'),
1055 1071 (makereadnbytes(8192), b'cg1 read(8k)'),
1056 1072 (makereadnbytes(16384), b'cg1 read(16k)'),
1057 1073 (makereadnbytes(32768), b'cg1 read(32k)'),
1058 1074 (makereadnbytes(131072), b'cg1 read(128k)'),
1059 1075 ]
1060 1076 )
1061 1077 elif isinstance(bundle, bundle2.unbundle20):
1062 1078 benches.extend(
1063 1079 [
1064 1080 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1065 1081 (makebench(iterparts), b'bundle2 iterparts()'),
1066 1082 (
1067 1083 makebench(iterpartsseekable),
1068 1084 b'bundle2 iterparts() seekable',
1069 1085 ),
1070 1086 (makebench(seek), b'bundle2 part seek()'),
1071 1087 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1072 1088 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1073 1089 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1074 1090 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1075 1091 ]
1076 1092 )
1077 1093 elif isinstance(bundle, streamclone.streamcloneapplier):
1078 1094 raise error.Abort(b'stream clone bundles not supported')
1079 1095 else:
1080 1096 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1081 1097
1082 1098 for fn, title in benches:
1083 1099 timer, fm = gettimer(ui, opts)
1084 1100 timer(fn, title=title)
1085 1101 fm.end()
1086 1102
1087 1103
1088 1104 @command(
1089 1105 b'perf::changegroupchangelog|perfchangegroupchangelog',
1090 1106 formatteropts
1091 1107 + [
1092 1108 (b'', b'cgversion', b'02', b'changegroup version'),
1093 1109 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1094 1110 ],
1095 1111 )
1096 1112 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1097 1113 """Benchmark producing a changelog group for a changegroup.
1098 1114
1099 1115 This measures the time spent processing the changelog during a
1100 1116 bundle operation. This occurs during `hg bundle` and on a server
1101 1117 processing a `getbundle` wire protocol request (handles clones
1102 1118 and pull requests).
1103 1119
1104 1120 By default, all revisions are added to the changegroup.
1105 1121 """
1106 1122 opts = _byteskwargs(opts)
1107 1123 cl = repo.changelog
1108 1124 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1109 1125 bundler = changegroup.getbundler(cgversion, repo)
1110 1126
1111 1127 def d():
1112 1128 state, chunks = bundler._generatechangelog(cl, nodes)
1113 1129 for chunk in chunks:
1114 1130 pass
1115 1131
1116 1132 timer, fm = gettimer(ui, opts)
1117 1133
1118 1134 # Terminal printing can interfere with timing. So disable it.
1119 1135 with ui.configoverride({(b'progress', b'disable'): True}):
1120 1136 timer(d)
1121 1137
1122 1138 fm.end()
1123 1139
1124 1140
1125 1141 @command(b'perf::dirs|perfdirs', formatteropts)
1126 1142 def perfdirs(ui, repo, **opts):
1127 1143 opts = _byteskwargs(opts)
1128 1144 timer, fm = gettimer(ui, opts)
1129 1145 dirstate = repo.dirstate
1130 1146 b'a' in dirstate
1131 1147
1132 1148 def d():
1133 1149 dirstate.hasdir(b'a')
1134 1150 del dirstate._map._dirs
1135 1151
1136 1152 timer(d)
1137 1153 fm.end()
1138 1154
1139 1155
1140 1156 @command(
1141 1157 b'perf::dirstate|perfdirstate',
1142 1158 [
1143 1159 (
1144 1160 b'',
1145 1161 b'iteration',
1146 1162 None,
1147 1163 b'benchmark a full iteration for the dirstate',
1148 1164 ),
1149 1165 (
1150 1166 b'',
1151 1167 b'contains',
1152 1168 None,
1153 1169 b'benchmark a large amount of `nf in dirstate` calls',
1154 1170 ),
1155 1171 ]
1156 1172 + formatteropts,
1157 1173 )
1158 1174 def perfdirstate(ui, repo, **opts):
1159 1175 """benchmap the time of various distate operations
1160 1176
1161 1177 By default benchmark the time necessary to load a dirstate from scratch.
1162 1178 The dirstate is loaded to the point were a "contains" request can be
1163 1179 answered.
1164 1180 """
1165 1181 opts = _byteskwargs(opts)
1166 1182 timer, fm = gettimer(ui, opts)
1167 1183 b"a" in repo.dirstate
1168 1184
1169 1185 if opts[b'iteration'] and opts[b'contains']:
1170 1186 msg = b'only specify one of --iteration or --contains'
1171 1187 raise error.Abort(msg)
1172 1188
1173 1189 if opts[b'iteration']:
1174 1190 setup = None
1175 1191 dirstate = repo.dirstate
1176 1192
1177 1193 def d():
1178 1194 for f in dirstate:
1179 1195 pass
1180 1196
1181 1197 elif opts[b'contains']:
1182 1198 setup = None
1183 1199 dirstate = repo.dirstate
1184 1200 allfiles = list(dirstate)
1185 1201 # also add file path that will be "missing" from the dirstate
1186 1202 allfiles.extend([f[::-1] for f in allfiles])
1187 1203
1188 1204 def d():
1189 1205 for f in allfiles:
1190 1206 f in dirstate
1191 1207
1192 1208 else:
1193 1209
1194 1210 def setup():
1195 1211 repo.dirstate.invalidate()
1196 1212
1197 1213 def d():
1198 1214 b"a" in repo.dirstate
1199 1215
1200 1216 timer(d, setup=setup)
1201 1217 fm.end()
1202 1218
1203 1219
1204 1220 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1205 1221 def perfdirstatedirs(ui, repo, **opts):
1206 1222 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1207 1223 opts = _byteskwargs(opts)
1208 1224 timer, fm = gettimer(ui, opts)
1209 1225 repo.dirstate.hasdir(b"a")
1210 1226
1211 1227 def setup():
1212 1228 del repo.dirstate._map._dirs
1213 1229
1214 1230 def d():
1215 1231 repo.dirstate.hasdir(b"a")
1216 1232
1217 1233 timer(d, setup=setup)
1218 1234 fm.end()
1219 1235
1220 1236
1221 1237 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1222 1238 def perfdirstatefoldmap(ui, repo, **opts):
1223 1239 """benchmap a `dirstate._map.filefoldmap.get()` request
1224 1240
1225 1241 The dirstate filefoldmap cache is dropped between every request.
1226 1242 """
1227 1243 opts = _byteskwargs(opts)
1228 1244 timer, fm = gettimer(ui, opts)
1229 1245 dirstate = repo.dirstate
1230 1246 dirstate._map.filefoldmap.get(b'a')
1231 1247
1232 1248 def setup():
1233 1249 del dirstate._map.filefoldmap
1234 1250
1235 1251 def d():
1236 1252 dirstate._map.filefoldmap.get(b'a')
1237 1253
1238 1254 timer(d, setup=setup)
1239 1255 fm.end()
1240 1256
1241 1257
1242 1258 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1243 1259 def perfdirfoldmap(ui, repo, **opts):
1244 1260 """benchmap a `dirstate._map.dirfoldmap.get()` request
1245 1261
1246 1262 The dirstate dirfoldmap cache is dropped between every request.
1247 1263 """
1248 1264 opts = _byteskwargs(opts)
1249 1265 timer, fm = gettimer(ui, opts)
1250 1266 dirstate = repo.dirstate
1251 1267 dirstate._map.dirfoldmap.get(b'a')
1252 1268
1253 1269 def setup():
1254 1270 del dirstate._map.dirfoldmap
1255 1271 del dirstate._map._dirs
1256 1272
1257 1273 def d():
1258 1274 dirstate._map.dirfoldmap.get(b'a')
1259 1275
1260 1276 timer(d, setup=setup)
1261 1277 fm.end()
1262 1278
1263 1279
1264 1280 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1265 1281 def perfdirstatewrite(ui, repo, **opts):
1266 1282 """benchmap the time it take to write a dirstate on disk"""
1267 1283 opts = _byteskwargs(opts)
1268 1284 timer, fm = gettimer(ui, opts)
1269 1285 ds = repo.dirstate
1270 1286 b"a" in ds
1271 1287
1272 1288 def setup():
1273 1289 ds._dirty = True
1274 1290
1275 1291 def d():
1276 1292 ds.write(repo.currenttransaction())
1277 1293
1278 1294 timer(d, setup=setup)
1279 1295 fm.end()
1280 1296
1281 1297
1282 1298 def _getmergerevs(repo, opts):
1283 1299 """parse command argument to return rev involved in merge
1284 1300
1285 1301 input: options dictionnary with `rev`, `from` and `bse`
1286 1302 output: (localctx, otherctx, basectx)
1287 1303 """
1288 1304 if opts[b'from']:
1289 1305 fromrev = scmutil.revsingle(repo, opts[b'from'])
1290 1306 wctx = repo[fromrev]
1291 1307 else:
1292 1308 wctx = repo[None]
1293 1309 # we don't want working dir files to be stat'd in the benchmark, so
1294 1310 # prime that cache
1295 1311 wctx.dirty()
1296 1312 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1297 1313 if opts[b'base']:
1298 1314 fromrev = scmutil.revsingle(repo, opts[b'base'])
1299 1315 ancestor = repo[fromrev]
1300 1316 else:
1301 1317 ancestor = wctx.ancestor(rctx)
1302 1318 return (wctx, rctx, ancestor)
1303 1319
1304 1320
1305 1321 @command(
1306 1322 b'perf::mergecalculate|perfmergecalculate',
1307 1323 [
1308 1324 (b'r', b'rev', b'.', b'rev to merge against'),
1309 1325 (b'', b'from', b'', b'rev to merge from'),
1310 1326 (b'', b'base', b'', b'the revision to use as base'),
1311 1327 ]
1312 1328 + formatteropts,
1313 1329 )
1314 1330 def perfmergecalculate(ui, repo, **opts):
1315 1331 opts = _byteskwargs(opts)
1316 1332 timer, fm = gettimer(ui, opts)
1317 1333
1318 1334 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1319 1335
1320 1336 def d():
1321 1337 # acceptremote is True because we don't want prompts in the middle of
1322 1338 # our benchmark
1323 1339 merge.calculateupdates(
1324 1340 repo,
1325 1341 wctx,
1326 1342 rctx,
1327 1343 [ancestor],
1328 1344 branchmerge=False,
1329 1345 force=False,
1330 1346 acceptremote=True,
1331 1347 followcopies=True,
1332 1348 )
1333 1349
1334 1350 timer(d)
1335 1351 fm.end()
1336 1352
1337 1353
1338 1354 @command(
1339 1355 b'perf::mergecopies|perfmergecopies',
1340 1356 [
1341 1357 (b'r', b'rev', b'.', b'rev to merge against'),
1342 1358 (b'', b'from', b'', b'rev to merge from'),
1343 1359 (b'', b'base', b'', b'the revision to use as base'),
1344 1360 ]
1345 1361 + formatteropts,
1346 1362 )
1347 1363 def perfmergecopies(ui, repo, **opts):
1348 1364 """measure runtime of `copies.mergecopies`"""
1349 1365 opts = _byteskwargs(opts)
1350 1366 timer, fm = gettimer(ui, opts)
1351 1367 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1352 1368
1353 1369 def d():
1354 1370 # acceptremote is True because we don't want prompts in the middle of
1355 1371 # our benchmark
1356 1372 copies.mergecopies(repo, wctx, rctx, ancestor)
1357 1373
1358 1374 timer(d)
1359 1375 fm.end()
1360 1376
1361 1377
1362 1378 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1363 1379 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1364 1380 """benchmark the copy tracing logic"""
1365 1381 opts = _byteskwargs(opts)
1366 1382 timer, fm = gettimer(ui, opts)
1367 1383 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1368 1384 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1369 1385
1370 1386 def d():
1371 1387 copies.pathcopies(ctx1, ctx2)
1372 1388
1373 1389 timer(d)
1374 1390 fm.end()
1375 1391
1376 1392
1377 1393 @command(
1378 1394 b'perf::phases|perfphases',
1379 1395 [
1380 1396 (b'', b'full', False, b'include file reading time too'),
1381 1397 ],
1382 1398 b"",
1383 1399 )
1384 1400 def perfphases(ui, repo, **opts):
1385 1401 """benchmark phasesets computation"""
1386 1402 opts = _byteskwargs(opts)
1387 1403 timer, fm = gettimer(ui, opts)
1388 1404 _phases = repo._phasecache
1389 1405 full = opts.get(b'full')
1390 1406
1391 1407 def d():
1392 1408 phases = _phases
1393 1409 if full:
1394 1410 clearfilecache(repo, b'_phasecache')
1395 1411 phases = repo._phasecache
1396 1412 phases.invalidate()
1397 1413 phases.loadphaserevs(repo)
1398 1414
1399 1415 timer(d)
1400 1416 fm.end()
1401 1417
1402 1418
1403 1419 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1404 1420 def perfphasesremote(ui, repo, dest=None, **opts):
1405 1421 """benchmark time needed to analyse phases of the remote server"""
1406 1422 from mercurial.node import bin
1407 1423 from mercurial import (
1408 1424 exchange,
1409 1425 hg,
1410 1426 phases,
1411 1427 )
1412 1428
1413 1429 opts = _byteskwargs(opts)
1414 1430 timer, fm = gettimer(ui, opts)
1415 1431
1416 1432 path = ui.getpath(dest, default=(b'default-push', b'default'))
1417 1433 if not path:
1418 1434 raise error.Abort(
1419 1435 b'default repository not configured!',
1420 1436 hint=b"see 'hg help config.paths'",
1421 1437 )
1422 1438 dest = path.pushloc or path.loc
1423 1439 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1424 1440 other = hg.peer(repo, opts, dest)
1425 1441
1426 1442 # easier to perform discovery through the operation
1427 1443 op = exchange.pushoperation(repo, other)
1428 1444 exchange._pushdiscoverychangeset(op)
1429 1445
1430 1446 remotesubset = op.fallbackheads
1431 1447
1432 1448 with other.commandexecutor() as e:
1433 1449 remotephases = e.callcommand(
1434 1450 b'listkeys', {b'namespace': b'phases'}
1435 1451 ).result()
1436 1452 del other
1437 1453 publishing = remotephases.get(b'publishing', False)
1438 1454 if publishing:
1439 1455 ui.statusnoi18n(b'publishing: yes\n')
1440 1456 else:
1441 1457 ui.statusnoi18n(b'publishing: no\n')
1442 1458
1443 1459 has_node = getattr(repo.changelog.index, 'has_node', None)
1444 1460 if has_node is None:
1445 1461 has_node = repo.changelog.nodemap.__contains__
1446 1462 nonpublishroots = 0
1447 1463 for nhex, phase in remotephases.iteritems():
1448 1464 if nhex == b'publishing': # ignore data related to publish option
1449 1465 continue
1450 1466 node = bin(nhex)
1451 1467 if has_node(node) and int(phase):
1452 1468 nonpublishroots += 1
1453 1469 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1454 1470 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1455 1471
1456 1472 def d():
1457 1473 phases.remotephasessummary(repo, remotesubset, remotephases)
1458 1474
1459 1475 timer(d)
1460 1476 fm.end()
1461 1477
1462 1478
1463 1479 @command(
1464 1480 b'perf::manifest|perfmanifest',
1465 1481 [
1466 1482 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1467 1483 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1468 1484 ]
1469 1485 + formatteropts,
1470 1486 b'REV|NODE',
1471 1487 )
1472 1488 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1473 1489 """benchmark the time to read a manifest from disk and return a usable
1474 1490 dict-like object
1475 1491
1476 1492 Manifest caches are cleared before retrieval."""
1477 1493 opts = _byteskwargs(opts)
1478 1494 timer, fm = gettimer(ui, opts)
1479 1495 if not manifest_rev:
1480 1496 ctx = scmutil.revsingle(repo, rev, rev)
1481 1497 t = ctx.manifestnode()
1482 1498 else:
1483 1499 from mercurial.node import bin
1484 1500
1485 1501 if len(rev) == 40:
1486 1502 t = bin(rev)
1487 1503 else:
1488 1504 try:
1489 1505 rev = int(rev)
1490 1506
1491 1507 if util.safehasattr(repo.manifestlog, b'getstorage'):
1492 1508 t = repo.manifestlog.getstorage(b'').node(rev)
1493 1509 else:
1494 1510 t = repo.manifestlog._revlog.lookup(rev)
1495 1511 except ValueError:
1496 1512 raise error.Abort(
1497 1513 b'manifest revision must be integer or full node'
1498 1514 )
1499 1515
1500 1516 def d():
1501 1517 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1502 1518 repo.manifestlog[t].read()
1503 1519
1504 1520 timer(d)
1505 1521 fm.end()
1506 1522
1507 1523
1508 1524 @command(b'perf::changeset|perfchangeset', formatteropts)
1509 1525 def perfchangeset(ui, repo, rev, **opts):
1510 1526 opts = _byteskwargs(opts)
1511 1527 timer, fm = gettimer(ui, opts)
1512 1528 n = scmutil.revsingle(repo, rev).node()
1513 1529
1514 1530 def d():
1515 1531 repo.changelog.read(n)
1516 1532 # repo.changelog._cache = None
1517 1533
1518 1534 timer(d)
1519 1535 fm.end()
1520 1536
1521 1537
1522 1538 @command(b'perf::ignore|perfignore', formatteropts)
1523 1539 def perfignore(ui, repo, **opts):
1524 1540 """benchmark operation related to computing ignore"""
1525 1541 opts = _byteskwargs(opts)
1526 1542 timer, fm = gettimer(ui, opts)
1527 1543 dirstate = repo.dirstate
1528 1544
1529 1545 def setupone():
1530 1546 dirstate.invalidate()
1531 1547 clearfilecache(dirstate, b'_ignore')
1532 1548
1533 1549 def runone():
1534 1550 dirstate._ignore
1535 1551
1536 1552 timer(runone, setup=setupone, title=b"load")
1537 1553 fm.end()
1538 1554
1539 1555
1540 1556 @command(
1541 1557 b'perf::index|perfindex',
1542 1558 [
1543 1559 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1544 1560 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1545 1561 ]
1546 1562 + formatteropts,
1547 1563 )
1548 1564 def perfindex(ui, repo, **opts):
1549 1565 """benchmark index creation time followed by a lookup
1550 1566
1551 1567 The default is to look `tip` up. Depending on the index implementation,
1552 1568 the revision looked up can matters. For example, an implementation
1553 1569 scanning the index will have a faster lookup time for `--rev tip` than for
1554 1570 `--rev 0`. The number of looked up revisions and their order can also
1555 1571 matters.
1556 1572
1557 1573 Example of useful set to test:
1558 1574
1559 1575 * tip
1560 1576 * 0
1561 1577 * -10:
1562 1578 * :10
1563 1579 * -10: + :10
1564 1580 * :10: + -10:
1565 1581 * -10000:
1566 1582 * -10000: + 0
1567 1583
1568 1584 It is not currently possible to check for lookup of a missing node. For
1569 1585 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1570 1586 import mercurial.revlog
1571 1587
1572 1588 opts = _byteskwargs(opts)
1573 1589 timer, fm = gettimer(ui, opts)
1574 1590 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1575 1591 if opts[b'no_lookup']:
1576 1592 if opts['rev']:
1577 1593 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1578 1594 nodes = []
1579 1595 elif not opts[b'rev']:
1580 1596 nodes = [repo[b"tip"].node()]
1581 1597 else:
1582 1598 revs = scmutil.revrange(repo, opts[b'rev'])
1583 1599 cl = repo.changelog
1584 1600 nodes = [cl.node(r) for r in revs]
1585 1601
1586 1602 unfi = repo.unfiltered()
1587 1603 # find the filecache func directly
1588 1604 # This avoid polluting the benchmark with the filecache logic
1589 1605 makecl = unfi.__class__.changelog.func
1590 1606
1591 1607 def setup():
1592 1608 # probably not necessary, but for good measure
1593 1609 clearchangelog(unfi)
1594 1610
1595 1611 def d():
1596 1612 cl = makecl(unfi)
1597 1613 for n in nodes:
1598 1614 cl.rev(n)
1599 1615
1600 1616 timer(d, setup=setup)
1601 1617 fm.end()
1602 1618
1603 1619
1604 1620 @command(
1605 1621 b'perf::nodemap|perfnodemap',
1606 1622 [
1607 1623 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1608 1624 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1609 1625 ]
1610 1626 + formatteropts,
1611 1627 )
1612 1628 def perfnodemap(ui, repo, **opts):
1613 1629 """benchmark the time necessary to look up revision from a cold nodemap
1614 1630
1615 1631 Depending on the implementation, the amount and order of revision we look
1616 1632 up can varies. Example of useful set to test:
1617 1633 * tip
1618 1634 * 0
1619 1635 * -10:
1620 1636 * :10
1621 1637 * -10: + :10
1622 1638 * :10: + -10:
1623 1639 * -10000:
1624 1640 * -10000: + 0
1625 1641
1626 1642 The command currently focus on valid binary lookup. Benchmarking for
1627 1643 hexlookup, prefix lookup and missing lookup would also be valuable.
1628 1644 """
1629 1645 import mercurial.revlog
1630 1646
1631 1647 opts = _byteskwargs(opts)
1632 1648 timer, fm = gettimer(ui, opts)
1633 1649 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1634 1650
1635 1651 unfi = repo.unfiltered()
1636 1652 clearcaches = opts[b'clear_caches']
1637 1653 # find the filecache func directly
1638 1654 # This avoid polluting the benchmark with the filecache logic
1639 1655 makecl = unfi.__class__.changelog.func
1640 1656 if not opts[b'rev']:
1641 1657 raise error.Abort(b'use --rev to specify revisions to look up')
1642 1658 revs = scmutil.revrange(repo, opts[b'rev'])
1643 1659 cl = repo.changelog
1644 1660 nodes = [cl.node(r) for r in revs]
1645 1661
1646 1662 # use a list to pass reference to a nodemap from one closure to the next
1647 1663 nodeget = [None]
1648 1664
1649 1665 def setnodeget():
1650 1666 # probably not necessary, but for good measure
1651 1667 clearchangelog(unfi)
1652 1668 cl = makecl(unfi)
1653 1669 if util.safehasattr(cl.index, 'get_rev'):
1654 1670 nodeget[0] = cl.index.get_rev
1655 1671 else:
1656 1672 nodeget[0] = cl.nodemap.get
1657 1673
1658 1674 def d():
1659 1675 get = nodeget[0]
1660 1676 for n in nodes:
1661 1677 get(n)
1662 1678
1663 1679 setup = None
1664 1680 if clearcaches:
1665 1681
1666 1682 def setup():
1667 1683 setnodeget()
1668 1684
1669 1685 else:
1670 1686 setnodeget()
1671 1687 d() # prewarm the data structure
1672 1688 timer(d, setup=setup)
1673 1689 fm.end()
1674 1690
1675 1691
1676 1692 @command(b'perf::startup|perfstartup', formatteropts)
1677 1693 def perfstartup(ui, repo, **opts):
1678 1694 opts = _byteskwargs(opts)
1679 1695 timer, fm = gettimer(ui, opts)
1680 1696
1681 1697 def d():
1682 1698 if os.name != 'nt':
1683 1699 os.system(
1684 1700 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
1685 1701 )
1686 1702 else:
1687 1703 os.environ['HGRCPATH'] = r' '
1688 1704 os.system("%s version -q > NUL" % sys.argv[0])
1689 1705
1690 1706 timer(d)
1691 1707 fm.end()
1692 1708
1693 1709
1694 1710 @command(b'perf::parents|perfparents', formatteropts)
1695 1711 def perfparents(ui, repo, **opts):
1696 1712 """benchmark the time necessary to fetch one changeset's parents.
1697 1713
1698 1714 The fetch is done using the `node identifier`, traversing all object layers
1699 1715 from the repository object. The first N revisions will be used for this
1700 1716 benchmark. N is controlled by the ``perf.parentscount`` config option
1701 1717 (default: 1000).
1702 1718 """
1703 1719 opts = _byteskwargs(opts)
1704 1720 timer, fm = gettimer(ui, opts)
1705 1721 # control the number of commits perfparents iterates over
1706 1722 # experimental config: perf.parentscount
1707 1723 count = getint(ui, b"perf", b"parentscount", 1000)
1708 1724 if len(repo.changelog) < count:
1709 1725 raise error.Abort(b"repo needs %d commits for this test" % count)
1710 1726 repo = repo.unfiltered()
1711 1727 nl = [repo.changelog.node(i) for i in _xrange(count)]
1712 1728
1713 1729 def d():
1714 1730 for n in nl:
1715 1731 repo.changelog.parents(n)
1716 1732
1717 1733 timer(d)
1718 1734 fm.end()
1719 1735
1720 1736
1721 1737 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
1722 1738 def perfctxfiles(ui, repo, x, **opts):
1723 1739 opts = _byteskwargs(opts)
1724 1740 x = int(x)
1725 1741 timer, fm = gettimer(ui, opts)
1726 1742
1727 1743 def d():
1728 1744 len(repo[x].files())
1729 1745
1730 1746 timer(d)
1731 1747 fm.end()
1732 1748
1733 1749
1734 1750 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
1735 1751 def perfrawfiles(ui, repo, x, **opts):
1736 1752 opts = _byteskwargs(opts)
1737 1753 x = int(x)
1738 1754 timer, fm = gettimer(ui, opts)
1739 1755 cl = repo.changelog
1740 1756
1741 1757 def d():
1742 1758 len(cl.read(x)[3])
1743 1759
1744 1760 timer(d)
1745 1761 fm.end()
1746 1762
1747 1763
1748 1764 @command(b'perf::lookup|perflookup', formatteropts)
1749 1765 def perflookup(ui, repo, rev, **opts):
1750 1766 opts = _byteskwargs(opts)
1751 1767 timer, fm = gettimer(ui, opts)
1752 1768 timer(lambda: len(repo.lookup(rev)))
1753 1769 fm.end()
1754 1770
1755 1771
1756 1772 @command(
1757 1773 b'perf::linelogedits|perflinelogedits',
1758 1774 [
1759 1775 (b'n', b'edits', 10000, b'number of edits'),
1760 1776 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1761 1777 ],
1762 1778 norepo=True,
1763 1779 )
1764 1780 def perflinelogedits(ui, **opts):
1765 1781 from mercurial import linelog
1766 1782
1767 1783 opts = _byteskwargs(opts)
1768 1784
1769 1785 edits = opts[b'edits']
1770 1786 maxhunklines = opts[b'max_hunk_lines']
1771 1787
1772 1788 maxb1 = 100000
1773 1789 random.seed(0)
1774 1790 randint = random.randint
1775 1791 currentlines = 0
1776 1792 arglist = []
1777 1793 for rev in _xrange(edits):
1778 1794 a1 = randint(0, currentlines)
1779 1795 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1780 1796 b1 = randint(0, maxb1)
1781 1797 b2 = randint(b1, b1 + maxhunklines)
1782 1798 currentlines += (b2 - b1) - (a2 - a1)
1783 1799 arglist.append((rev, a1, a2, b1, b2))
1784 1800
1785 1801 def d():
1786 1802 ll = linelog.linelog()
1787 1803 for args in arglist:
1788 1804 ll.replacelines(*args)
1789 1805
1790 1806 timer, fm = gettimer(ui, opts)
1791 1807 timer(d)
1792 1808 fm.end()
1793 1809
1794 1810
1795 1811 @command(b'perf::revrange|perfrevrange', formatteropts)
1796 1812 def perfrevrange(ui, repo, *specs, **opts):
1797 1813 opts = _byteskwargs(opts)
1798 1814 timer, fm = gettimer(ui, opts)
1799 1815 revrange = scmutil.revrange
1800 1816 timer(lambda: len(revrange(repo, specs)))
1801 1817 fm.end()
1802 1818
1803 1819
1804 1820 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
1805 1821 def perfnodelookup(ui, repo, rev, **opts):
1806 1822 opts = _byteskwargs(opts)
1807 1823 timer, fm = gettimer(ui, opts)
1808 1824 import mercurial.revlog
1809 1825
1810 1826 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1811 1827 n = scmutil.revsingle(repo, rev).node()
1812 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1828
1829 cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
1813 1830
1814 1831 def d():
1815 1832 cl.rev(n)
1816 1833 clearcaches(cl)
1817 1834
1818 1835 timer(d)
1819 1836 fm.end()
1820 1837
1821 1838
1822 1839 @command(
1823 1840 b'perf::log|perflog',
1824 1841 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
1825 1842 )
1826 1843 def perflog(ui, repo, rev=None, **opts):
1827 1844 opts = _byteskwargs(opts)
1828 1845 if rev is None:
1829 1846 rev = []
1830 1847 timer, fm = gettimer(ui, opts)
1831 1848 ui.pushbuffer()
1832 1849 timer(
1833 1850 lambda: commands.log(
1834 1851 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
1835 1852 )
1836 1853 )
1837 1854 ui.popbuffer()
1838 1855 fm.end()
1839 1856
1840 1857
1841 1858 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
1842 1859 def perfmoonwalk(ui, repo, **opts):
1843 1860 """benchmark walking the changelog backwards
1844 1861
1845 1862 This also loads the changelog data for each revision in the changelog.
1846 1863 """
1847 1864 opts = _byteskwargs(opts)
1848 1865 timer, fm = gettimer(ui, opts)
1849 1866
1850 1867 def moonwalk():
1851 1868 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1852 1869 ctx = repo[i]
1853 1870 ctx.branch() # read changelog data (in addition to the index)
1854 1871
1855 1872 timer(moonwalk)
1856 1873 fm.end()
1857 1874
1858 1875
1859 1876 @command(
1860 1877 b'perf::templating|perftemplating',
1861 1878 [
1862 1879 (b'r', b'rev', [], b'revisions to run the template on'),
1863 1880 ]
1864 1881 + formatteropts,
1865 1882 )
1866 1883 def perftemplating(ui, repo, testedtemplate=None, **opts):
1867 1884 """test the rendering time of a given template"""
1868 1885 if makelogtemplater is None:
1869 1886 raise error.Abort(
1870 1887 b"perftemplating not available with this Mercurial",
1871 1888 hint=b"use 4.3 or later",
1872 1889 )
1873 1890
1874 1891 opts = _byteskwargs(opts)
1875 1892
1876 1893 nullui = ui.copy()
1877 1894 nullui.fout = open(os.devnull, 'wb')
1878 1895 nullui.disablepager()
1879 1896 revs = opts.get(b'rev')
1880 1897 if not revs:
1881 1898 revs = [b'all()']
1882 1899 revs = list(scmutil.revrange(repo, revs))
1883 1900
1884 1901 defaulttemplate = (
1885 1902 b'{date|shortdate} [{rev}:{node|short}]'
1886 1903 b' {author|person}: {desc|firstline}\n'
1887 1904 )
1888 1905 if testedtemplate is None:
1889 1906 testedtemplate = defaulttemplate
1890 1907 displayer = makelogtemplater(nullui, repo, testedtemplate)
1891 1908
1892 1909 def format():
1893 1910 for r in revs:
1894 1911 ctx = repo[r]
1895 1912 displayer.show(ctx)
1896 1913 displayer.flush(ctx)
1897 1914
1898 1915 timer, fm = gettimer(ui, opts)
1899 1916 timer(format)
1900 1917 fm.end()
1901 1918
1902 1919
1903 1920 def _displaystats(ui, opts, entries, data):
1904 1921 # use a second formatter because the data are quite different, not sure
1905 1922 # how it flies with the templater.
1906 1923 fm = ui.formatter(b'perf-stats', opts)
1907 1924 for key, title in entries:
1908 1925 values = data[key]
1909 1926 nbvalues = len(data)
1910 1927 values.sort()
1911 1928 stats = {
1912 1929 'key': key,
1913 1930 'title': title,
1914 1931 'nbitems': len(values),
1915 1932 'min': values[0][0],
1916 1933 '10%': values[(nbvalues * 10) // 100][0],
1917 1934 '25%': values[(nbvalues * 25) // 100][0],
1918 1935 '50%': values[(nbvalues * 50) // 100][0],
1919 1936 '75%': values[(nbvalues * 75) // 100][0],
1920 1937 '80%': values[(nbvalues * 80) // 100][0],
1921 1938 '85%': values[(nbvalues * 85) // 100][0],
1922 1939 '90%': values[(nbvalues * 90) // 100][0],
1923 1940 '95%': values[(nbvalues * 95) // 100][0],
1924 1941 '99%': values[(nbvalues * 99) // 100][0],
1925 1942 'max': values[-1][0],
1926 1943 }
1927 1944 fm.startitem()
1928 1945 fm.data(**stats)
1929 1946 # make node pretty for the human output
1930 1947 fm.plain('### %s (%d items)\n' % (title, len(values)))
1931 1948 lines = [
1932 1949 'min',
1933 1950 '10%',
1934 1951 '25%',
1935 1952 '50%',
1936 1953 '75%',
1937 1954 '80%',
1938 1955 '85%',
1939 1956 '90%',
1940 1957 '95%',
1941 1958 '99%',
1942 1959 'max',
1943 1960 ]
1944 1961 for l in lines:
1945 1962 fm.plain('%s: %s\n' % (l, stats[l]))
1946 1963 fm.end()
1947 1964
1948 1965
1949 1966 @command(
1950 1967 b'perf::helper-mergecopies|perfhelper-mergecopies',
1951 1968 formatteropts
1952 1969 + [
1953 1970 (b'r', b'revs', [], b'restrict search to these revisions'),
1954 1971 (b'', b'timing', False, b'provides extra data (costly)'),
1955 1972 (b'', b'stats', False, b'provides statistic about the measured data'),
1956 1973 ],
1957 1974 )
1958 1975 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1959 1976 """find statistics about potential parameters for `perfmergecopies`
1960 1977
1961 1978 This command find (base, p1, p2) triplet relevant for copytracing
1962 1979 benchmarking in the context of a merge. It reports values for some of the
1963 1980 parameters that impact merge copy tracing time during merge.
1964 1981
1965 1982 If `--timing` is set, rename detection is run and the associated timing
1966 1983 will be reported. The extra details come at the cost of slower command
1967 1984 execution.
1968 1985
1969 1986 Since rename detection is only run once, other factors might easily
1970 1987 affect the precision of the timing. However it should give a good
1971 1988 approximation of which revision triplets are very costly.
1972 1989 """
1973 1990 opts = _byteskwargs(opts)
1974 1991 fm = ui.formatter(b'perf', opts)
1975 1992 dotiming = opts[b'timing']
1976 1993 dostats = opts[b'stats']
1977 1994
1978 1995 output_template = [
1979 1996 ("base", "%(base)12s"),
1980 1997 ("p1", "%(p1.node)12s"),
1981 1998 ("p2", "%(p2.node)12s"),
1982 1999 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1983 2000 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1984 2001 ("p1.renames", "%(p1.renamedfiles)12d"),
1985 2002 ("p1.time", "%(p1.time)12.3f"),
1986 2003 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1987 2004 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1988 2005 ("p2.renames", "%(p2.renamedfiles)12d"),
1989 2006 ("p2.time", "%(p2.time)12.3f"),
1990 2007 ("renames", "%(nbrenamedfiles)12d"),
1991 2008 ("total.time", "%(time)12.3f"),
1992 2009 ]
1993 2010 if not dotiming:
1994 2011 output_template = [
1995 2012 i
1996 2013 for i in output_template
1997 2014 if not ('time' in i[0] or 'renames' in i[0])
1998 2015 ]
1999 2016 header_names = [h for (h, v) in output_template]
2000 2017 output = ' '.join([v for (h, v) in output_template]) + '\n'
2001 2018 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2002 2019 fm.plain(header % tuple(header_names))
2003 2020
2004 2021 if not revs:
2005 2022 revs = ['all()']
2006 2023 revs = scmutil.revrange(repo, revs)
2007 2024
2008 2025 if dostats:
2009 2026 alldata = {
2010 2027 'nbrevs': [],
2011 2028 'nbmissingfiles': [],
2012 2029 }
2013 2030 if dotiming:
2014 2031 alldata['parentnbrenames'] = []
2015 2032 alldata['totalnbrenames'] = []
2016 2033 alldata['parenttime'] = []
2017 2034 alldata['totaltime'] = []
2018 2035
2019 2036 roi = repo.revs('merge() and %ld', revs)
2020 2037 for r in roi:
2021 2038 ctx = repo[r]
2022 2039 p1 = ctx.p1()
2023 2040 p2 = ctx.p2()
2024 2041 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2025 2042 for b in bases:
2026 2043 b = repo[b]
2027 2044 p1missing = copies._computeforwardmissing(b, p1)
2028 2045 p2missing = copies._computeforwardmissing(b, p2)
2029 2046 data = {
2030 2047 b'base': b.hex(),
2031 2048 b'p1.node': p1.hex(),
2032 2049 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2033 2050 b'p1.nbmissingfiles': len(p1missing),
2034 2051 b'p2.node': p2.hex(),
2035 2052 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2036 2053 b'p2.nbmissingfiles': len(p2missing),
2037 2054 }
2038 2055 if dostats:
2039 2056 if p1missing:
2040 2057 alldata['nbrevs'].append(
2041 2058 (data['p1.nbrevs'], b.hex(), p1.hex())
2042 2059 )
2043 2060 alldata['nbmissingfiles'].append(
2044 2061 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2045 2062 )
2046 2063 if p2missing:
2047 2064 alldata['nbrevs'].append(
2048 2065 (data['p2.nbrevs'], b.hex(), p2.hex())
2049 2066 )
2050 2067 alldata['nbmissingfiles'].append(
2051 2068 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2052 2069 )
2053 2070 if dotiming:
2054 2071 begin = util.timer()
2055 2072 mergedata = copies.mergecopies(repo, p1, p2, b)
2056 2073 end = util.timer()
2057 2074 # not very stable timing since we did only one run
2058 2075 data['time'] = end - begin
2059 2076 # mergedata contains five dicts: "copy", "movewithdir",
2060 2077 # "diverge", "renamedelete" and "dirmove".
2061 2078 # The first 4 are about renamed file so lets count that.
2062 2079 renames = len(mergedata[0])
2063 2080 renames += len(mergedata[1])
2064 2081 renames += len(mergedata[2])
2065 2082 renames += len(mergedata[3])
2066 2083 data['nbrenamedfiles'] = renames
2067 2084 begin = util.timer()
2068 2085 p1renames = copies.pathcopies(b, p1)
2069 2086 end = util.timer()
2070 2087 data['p1.time'] = end - begin
2071 2088 begin = util.timer()
2072 2089 p2renames = copies.pathcopies(b, p2)
2073 2090 end = util.timer()
2074 2091 data['p2.time'] = end - begin
2075 2092 data['p1.renamedfiles'] = len(p1renames)
2076 2093 data['p2.renamedfiles'] = len(p2renames)
2077 2094
2078 2095 if dostats:
2079 2096 if p1missing:
2080 2097 alldata['parentnbrenames'].append(
2081 2098 (data['p1.renamedfiles'], b.hex(), p1.hex())
2082 2099 )
2083 2100 alldata['parenttime'].append(
2084 2101 (data['p1.time'], b.hex(), p1.hex())
2085 2102 )
2086 2103 if p2missing:
2087 2104 alldata['parentnbrenames'].append(
2088 2105 (data['p2.renamedfiles'], b.hex(), p2.hex())
2089 2106 )
2090 2107 alldata['parenttime'].append(
2091 2108 (data['p2.time'], b.hex(), p2.hex())
2092 2109 )
2093 2110 if p1missing or p2missing:
2094 2111 alldata['totalnbrenames'].append(
2095 2112 (
2096 2113 data['nbrenamedfiles'],
2097 2114 b.hex(),
2098 2115 p1.hex(),
2099 2116 p2.hex(),
2100 2117 )
2101 2118 )
2102 2119 alldata['totaltime'].append(
2103 2120 (data['time'], b.hex(), p1.hex(), p2.hex())
2104 2121 )
2105 2122 fm.startitem()
2106 2123 fm.data(**data)
2107 2124 # make node pretty for the human output
2108 2125 out = data.copy()
2109 2126 out['base'] = fm.hexfunc(b.node())
2110 2127 out['p1.node'] = fm.hexfunc(p1.node())
2111 2128 out['p2.node'] = fm.hexfunc(p2.node())
2112 2129 fm.plain(output % out)
2113 2130
2114 2131 fm.end()
2115 2132 if dostats:
2116 2133 # use a second formatter because the data are quite different, not sure
2117 2134 # how it flies with the templater.
2118 2135 entries = [
2119 2136 ('nbrevs', 'number of revision covered'),
2120 2137 ('nbmissingfiles', 'number of missing files at head'),
2121 2138 ]
2122 2139 if dotiming:
2123 2140 entries.append(
2124 2141 ('parentnbrenames', 'rename from one parent to base')
2125 2142 )
2126 2143 entries.append(('totalnbrenames', 'total number of renames'))
2127 2144 entries.append(('parenttime', 'time for one parent'))
2128 2145 entries.append(('totaltime', 'time for both parents'))
2129 2146 _displaystats(ui, opts, entries, alldata)
2130 2147
2131 2148
2132 2149 @command(
2133 2150 b'perf::helper-pathcopies|perfhelper-pathcopies',
2134 2151 formatteropts
2135 2152 + [
2136 2153 (b'r', b'revs', [], b'restrict search to these revisions'),
2137 2154 (b'', b'timing', False, b'provides extra data (costly)'),
2138 2155 (b'', b'stats', False, b'provides statistic about the measured data'),
2139 2156 ],
2140 2157 )
2141 2158 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2142 2159 """find statistic about potential parameters for the `perftracecopies`
2143 2160
2144 2161 This command find source-destination pair relevant for copytracing testing.
2145 2162 It report value for some of the parameters that impact copy tracing time.
2146 2163
2147 2164 If `--timing` is set, rename detection is run and the associated timing
2148 2165 will be reported. The extra details comes at the cost of a slower command
2149 2166 execution.
2150 2167
2151 2168 Since the rename detection is only run once, other factors might easily
2152 2169 affect the precision of the timing. However it should give a good
2153 2170 approximation of which revision pairs are very costly.
2154 2171 """
2155 2172 opts = _byteskwargs(opts)
2156 2173 fm = ui.formatter(b'perf', opts)
2157 2174 dotiming = opts[b'timing']
2158 2175 dostats = opts[b'stats']
2159 2176
2160 2177 if dotiming:
2161 2178 header = '%12s %12s %12s %12s %12s %12s\n'
2162 2179 output = (
2163 2180 "%(source)12s %(destination)12s "
2164 2181 "%(nbrevs)12d %(nbmissingfiles)12d "
2165 2182 "%(nbrenamedfiles)12d %(time)18.5f\n"
2166 2183 )
2167 2184 header_names = (
2168 2185 "source",
2169 2186 "destination",
2170 2187 "nb-revs",
2171 2188 "nb-files",
2172 2189 "nb-renames",
2173 2190 "time",
2174 2191 )
2175 2192 fm.plain(header % header_names)
2176 2193 else:
2177 2194 header = '%12s %12s %12s %12s\n'
2178 2195 output = (
2179 2196 "%(source)12s %(destination)12s "
2180 2197 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2181 2198 )
2182 2199 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2183 2200
2184 2201 if not revs:
2185 2202 revs = ['all()']
2186 2203 revs = scmutil.revrange(repo, revs)
2187 2204
2188 2205 if dostats:
2189 2206 alldata = {
2190 2207 'nbrevs': [],
2191 2208 'nbmissingfiles': [],
2192 2209 }
2193 2210 if dotiming:
2194 2211 alldata['nbrenames'] = []
2195 2212 alldata['time'] = []
2196 2213
2197 2214 roi = repo.revs('merge() and %ld', revs)
2198 2215 for r in roi:
2199 2216 ctx = repo[r]
2200 2217 p1 = ctx.p1().rev()
2201 2218 p2 = ctx.p2().rev()
2202 2219 bases = repo.changelog._commonancestorsheads(p1, p2)
2203 2220 for p in (p1, p2):
2204 2221 for b in bases:
2205 2222 base = repo[b]
2206 2223 parent = repo[p]
2207 2224 missing = copies._computeforwardmissing(base, parent)
2208 2225 if not missing:
2209 2226 continue
2210 2227 data = {
2211 2228 b'source': base.hex(),
2212 2229 b'destination': parent.hex(),
2213 2230 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2214 2231 b'nbmissingfiles': len(missing),
2215 2232 }
2216 2233 if dostats:
2217 2234 alldata['nbrevs'].append(
2218 2235 (
2219 2236 data['nbrevs'],
2220 2237 base.hex(),
2221 2238 parent.hex(),
2222 2239 )
2223 2240 )
2224 2241 alldata['nbmissingfiles'].append(
2225 2242 (
2226 2243 data['nbmissingfiles'],
2227 2244 base.hex(),
2228 2245 parent.hex(),
2229 2246 )
2230 2247 )
2231 2248 if dotiming:
2232 2249 begin = util.timer()
2233 2250 renames = copies.pathcopies(base, parent)
2234 2251 end = util.timer()
2235 2252 # not very stable timing since we did only one run
2236 2253 data['time'] = end - begin
2237 2254 data['nbrenamedfiles'] = len(renames)
2238 2255 if dostats:
2239 2256 alldata['time'].append(
2240 2257 (
2241 2258 data['time'],
2242 2259 base.hex(),
2243 2260 parent.hex(),
2244 2261 )
2245 2262 )
2246 2263 alldata['nbrenames'].append(
2247 2264 (
2248 2265 data['nbrenamedfiles'],
2249 2266 base.hex(),
2250 2267 parent.hex(),
2251 2268 )
2252 2269 )
2253 2270 fm.startitem()
2254 2271 fm.data(**data)
2255 2272 out = data.copy()
2256 2273 out['source'] = fm.hexfunc(base.node())
2257 2274 out['destination'] = fm.hexfunc(parent.node())
2258 2275 fm.plain(output % out)
2259 2276
2260 2277 fm.end()
2261 2278 if dostats:
2262 2279 entries = [
2263 2280 ('nbrevs', 'number of revision covered'),
2264 2281 ('nbmissingfiles', 'number of missing files at head'),
2265 2282 ]
2266 2283 if dotiming:
2267 2284 entries.append(('nbrenames', 'renamed files'))
2268 2285 entries.append(('time', 'time'))
2269 2286 _displaystats(ui, opts, entries, alldata)
2270 2287
2271 2288
2272 2289 @command(b'perf::cca|perfcca', formatteropts)
2273 2290 def perfcca(ui, repo, **opts):
2274 2291 opts = _byteskwargs(opts)
2275 2292 timer, fm = gettimer(ui, opts)
2276 2293 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2277 2294 fm.end()
2278 2295
2279 2296
2280 2297 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2281 2298 def perffncacheload(ui, repo, **opts):
2282 2299 opts = _byteskwargs(opts)
2283 2300 timer, fm = gettimer(ui, opts)
2284 2301 s = repo.store
2285 2302
2286 2303 def d():
2287 2304 s.fncache._load()
2288 2305
2289 2306 timer(d)
2290 2307 fm.end()
2291 2308
2292 2309
2293 2310 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2294 2311 def perffncachewrite(ui, repo, **opts):
2295 2312 opts = _byteskwargs(opts)
2296 2313 timer, fm = gettimer(ui, opts)
2297 2314 s = repo.store
2298 2315 lock = repo.lock()
2299 2316 s.fncache._load()
2300 2317 tr = repo.transaction(b'perffncachewrite')
2301 2318 tr.addbackup(b'fncache')
2302 2319
2303 2320 def d():
2304 2321 s.fncache._dirty = True
2305 2322 s.fncache.write(tr)
2306 2323
2307 2324 timer(d)
2308 2325 tr.close()
2309 2326 lock.release()
2310 2327 fm.end()
2311 2328
2312 2329
2313 2330 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2314 2331 def perffncacheencode(ui, repo, **opts):
2315 2332 opts = _byteskwargs(opts)
2316 2333 timer, fm = gettimer(ui, opts)
2317 2334 s = repo.store
2318 2335 s.fncache._load()
2319 2336
2320 2337 def d():
2321 2338 for p in s.fncache.entries:
2322 2339 s.encode(p)
2323 2340
2324 2341 timer(d)
2325 2342 fm.end()
2326 2343
2327 2344
2328 2345 def _bdiffworker(q, blocks, xdiff, ready, done):
2329 2346 while not done.is_set():
2330 2347 pair = q.get()
2331 2348 while pair is not None:
2332 2349 if xdiff:
2333 2350 mdiff.bdiff.xdiffblocks(*pair)
2334 2351 elif blocks:
2335 2352 mdiff.bdiff.blocks(*pair)
2336 2353 else:
2337 2354 mdiff.textdiff(*pair)
2338 2355 q.task_done()
2339 2356 pair = q.get()
2340 2357 q.task_done() # for the None one
2341 2358 with ready:
2342 2359 ready.wait()
2343 2360
2344 2361
2345 2362 def _manifestrevision(repo, mnode):
2346 2363 ml = repo.manifestlog
2347 2364
2348 2365 if util.safehasattr(ml, b'getstorage'):
2349 2366 store = ml.getstorage(b'')
2350 2367 else:
2351 2368 store = ml._revlog
2352 2369
2353 2370 return store.revision(mnode)
2354 2371
2355 2372
2356 2373 @command(
2357 2374 b'perf::bdiff|perfbdiff',
2358 2375 revlogopts
2359 2376 + formatteropts
2360 2377 + [
2361 2378 (
2362 2379 b'',
2363 2380 b'count',
2364 2381 1,
2365 2382 b'number of revisions to test (when using --startrev)',
2366 2383 ),
2367 2384 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2368 2385 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2369 2386 (b'', b'blocks', False, b'test computing diffs into blocks'),
2370 2387 (b'', b'xdiff', False, b'use xdiff algorithm'),
2371 2388 ],
2372 2389 b'-c|-m|FILE REV',
2373 2390 )
2374 2391 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2375 2392 """benchmark a bdiff between revisions
2376 2393
2377 2394 By default, benchmark a bdiff between its delta parent and itself.
2378 2395
2379 2396 With ``--count``, benchmark bdiffs between delta parents and self for N
2380 2397 revisions starting at the specified revision.
2381 2398
2382 2399 With ``--alldata``, assume the requested revision is a changeset and
2383 2400 measure bdiffs for all changes related to that changeset (manifest
2384 2401 and filelogs).
2385 2402 """
2386 2403 opts = _byteskwargs(opts)
2387 2404
2388 2405 if opts[b'xdiff'] and not opts[b'blocks']:
2389 2406 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2390 2407
2391 2408 if opts[b'alldata']:
2392 2409 opts[b'changelog'] = True
2393 2410
2394 2411 if opts.get(b'changelog') or opts.get(b'manifest'):
2395 2412 file_, rev = None, file_
2396 2413 elif rev is None:
2397 2414 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2398 2415
2399 2416 blocks = opts[b'blocks']
2400 2417 xdiff = opts[b'xdiff']
2401 2418 textpairs = []
2402 2419
2403 2420 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2404 2421
2405 2422 startrev = r.rev(r.lookup(rev))
2406 2423 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2407 2424 if opts[b'alldata']:
2408 2425 # Load revisions associated with changeset.
2409 2426 ctx = repo[rev]
2410 2427 mtext = _manifestrevision(repo, ctx.manifestnode())
2411 2428 for pctx in ctx.parents():
2412 2429 pman = _manifestrevision(repo, pctx.manifestnode())
2413 2430 textpairs.append((pman, mtext))
2414 2431
2415 2432 # Load filelog revisions by iterating manifest delta.
2416 2433 man = ctx.manifest()
2417 2434 pman = ctx.p1().manifest()
2418 2435 for filename, change in pman.diff(man).items():
2419 2436 fctx = repo.file(filename)
2420 2437 f1 = fctx.revision(change[0][0] or -1)
2421 2438 f2 = fctx.revision(change[1][0] or -1)
2422 2439 textpairs.append((f1, f2))
2423 2440 else:
2424 2441 dp = r.deltaparent(rev)
2425 2442 textpairs.append((r.revision(dp), r.revision(rev)))
2426 2443
2427 2444 withthreads = threads > 0
2428 2445 if not withthreads:
2429 2446
2430 2447 def d():
2431 2448 for pair in textpairs:
2432 2449 if xdiff:
2433 2450 mdiff.bdiff.xdiffblocks(*pair)
2434 2451 elif blocks:
2435 2452 mdiff.bdiff.blocks(*pair)
2436 2453 else:
2437 2454 mdiff.textdiff(*pair)
2438 2455
2439 2456 else:
2440 2457 q = queue()
2441 2458 for i in _xrange(threads):
2442 2459 q.put(None)
2443 2460 ready = threading.Condition()
2444 2461 done = threading.Event()
2445 2462 for i in _xrange(threads):
2446 2463 threading.Thread(
2447 2464 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2448 2465 ).start()
2449 2466 q.join()
2450 2467
2451 2468 def d():
2452 2469 for pair in textpairs:
2453 2470 q.put(pair)
2454 2471 for i in _xrange(threads):
2455 2472 q.put(None)
2456 2473 with ready:
2457 2474 ready.notify_all()
2458 2475 q.join()
2459 2476
2460 2477 timer, fm = gettimer(ui, opts)
2461 2478 timer(d)
2462 2479 fm.end()
2463 2480
2464 2481 if withthreads:
2465 2482 done.set()
2466 2483 for i in _xrange(threads):
2467 2484 q.put(None)
2468 2485 with ready:
2469 2486 ready.notify_all()
2470 2487
2471 2488
2472 2489 @command(
2473 2490 b'perf::unidiff|perfunidiff',
2474 2491 revlogopts
2475 2492 + formatteropts
2476 2493 + [
2477 2494 (
2478 2495 b'',
2479 2496 b'count',
2480 2497 1,
2481 2498 b'number of revisions to test (when using --startrev)',
2482 2499 ),
2483 2500 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
2484 2501 ],
2485 2502 b'-c|-m|FILE REV',
2486 2503 )
2487 2504 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
2488 2505 """benchmark a unified diff between revisions
2489 2506
2490 2507 This doesn't include any copy tracing - it's just a unified diff
2491 2508 of the texts.
2492 2509
2493 2510 By default, benchmark a diff between its delta parent and itself.
2494 2511
2495 2512 With ``--count``, benchmark diffs between delta parents and self for N
2496 2513 revisions starting at the specified revision.
2497 2514
2498 2515 With ``--alldata``, assume the requested revision is a changeset and
2499 2516 measure diffs for all changes related to that changeset (manifest
2500 2517 and filelogs).
2501 2518 """
2502 2519 opts = _byteskwargs(opts)
2503 2520 if opts[b'alldata']:
2504 2521 opts[b'changelog'] = True
2505 2522
2506 2523 if opts.get(b'changelog') or opts.get(b'manifest'):
2507 2524 file_, rev = None, file_
2508 2525 elif rev is None:
2509 2526 raise error.CommandError(b'perfunidiff', b'invalid arguments')
2510 2527
2511 2528 textpairs = []
2512 2529
2513 2530 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
2514 2531
2515 2532 startrev = r.rev(r.lookup(rev))
2516 2533 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2517 2534 if opts[b'alldata']:
2518 2535 # Load revisions associated with changeset.
2519 2536 ctx = repo[rev]
2520 2537 mtext = _manifestrevision(repo, ctx.manifestnode())
2521 2538 for pctx in ctx.parents():
2522 2539 pman = _manifestrevision(repo, pctx.manifestnode())
2523 2540 textpairs.append((pman, mtext))
2524 2541
2525 2542 # Load filelog revisions by iterating manifest delta.
2526 2543 man = ctx.manifest()
2527 2544 pman = ctx.p1().manifest()
2528 2545 for filename, change in pman.diff(man).items():
2529 2546 fctx = repo.file(filename)
2530 2547 f1 = fctx.revision(change[0][0] or -1)
2531 2548 f2 = fctx.revision(change[1][0] or -1)
2532 2549 textpairs.append((f1, f2))
2533 2550 else:
2534 2551 dp = r.deltaparent(rev)
2535 2552 textpairs.append((r.revision(dp), r.revision(rev)))
2536 2553
2537 2554 def d():
2538 2555 for left, right in textpairs:
2539 2556 # The date strings don't matter, so we pass empty strings.
2540 2557 headerlines, hunks = mdiff.unidiff(
2541 2558 left, b'', right, b'', b'left', b'right', binary=False
2542 2559 )
2543 2560 # consume iterators in roughly the way patch.py does
2544 2561 b'\n'.join(headerlines)
2545 2562 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2546 2563
2547 2564 timer, fm = gettimer(ui, opts)
2548 2565 timer(d)
2549 2566 fm.end()
2550 2567
2551 2568
2552 2569 @command(b'perf::diffwd|perfdiffwd', formatteropts)
2553 2570 def perfdiffwd(ui, repo, **opts):
2554 2571 """Profile diff of working directory changes"""
2555 2572 opts = _byteskwargs(opts)
2556 2573 timer, fm = gettimer(ui, opts)
2557 2574 options = {
2558 2575 'w': 'ignore_all_space',
2559 2576 'b': 'ignore_space_change',
2560 2577 'B': 'ignore_blank_lines',
2561 2578 }
2562 2579
2563 2580 for diffopt in ('', 'w', 'b', 'B', 'wB'):
2564 2581 opts = {options[c]: b'1' for c in diffopt}
2565 2582
2566 2583 def d():
2567 2584 ui.pushbuffer()
2568 2585 commands.diff(ui, repo, **opts)
2569 2586 ui.popbuffer()
2570 2587
2571 2588 diffopt = diffopt.encode('ascii')
2572 2589 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
2573 2590 timer(d, title=title)
2574 2591 fm.end()
2575 2592
2576 2593
2577 2594 @command(
2578 2595 b'perf::revlogindex|perfrevlogindex',
2579 2596 revlogopts + formatteropts,
2580 2597 b'-c|-m|FILE',
2581 2598 )
2582 2599 def perfrevlogindex(ui, repo, file_=None, **opts):
2583 2600 """Benchmark operations against a revlog index.
2584 2601
2585 2602 This tests constructing a revlog instance, reading index data,
2586 2603 parsing index data, and performing various operations related to
2587 2604 index data.
2588 2605 """
2589 2606
2590 2607 opts = _byteskwargs(opts)
2591 2608
2592 2609 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
2593 2610
2594 2611 opener = getattr(rl, 'opener') # trick linter
2595 2612 indexfile = rl.indexfile
2596 2613 data = opener.read(indexfile)
2597 2614
2598 2615 header = struct.unpack(b'>I', data[0:4])[0]
2599 2616 version = header & 0xFFFF
2600 2617 if version == 1:
2601 2618 inline = header & (1 << 16)
2602 2619 else:
2603 2620 raise error.Abort(b'unsupported revlog version: %d' % version)
2604 2621
2605 parse_index_v1 = getattr(revlog, 'parse_index_v1', None)
2622 parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
2606 2623 if parse_index_v1 is None:
2607 parse_index_v1 = revlog.revlogio().parseindex
2624 parse_index_v1 = mercurial.revlog.revlogio().parseindex
2608 2625
2609 2626 rllen = len(rl)
2610 2627
2611 2628 node0 = rl.node(0)
2612 2629 node25 = rl.node(rllen // 4)
2613 2630 node50 = rl.node(rllen // 2)
2614 2631 node75 = rl.node(rllen // 4 * 3)
2615 2632 node100 = rl.node(rllen - 1)
2616 2633
2617 2634 allrevs = range(rllen)
2618 2635 allrevsrev = list(reversed(allrevs))
2619 2636 allnodes = [rl.node(rev) for rev in range(rllen)]
2620 2637 allnodesrev = list(reversed(allnodes))
2621 2638
2622 2639 def constructor():
2623 revlog.revlog(opener, indexfile)
2640 revlog(opener, indexfile=indexfile)
2624 2641
2625 2642 def read():
2626 2643 with opener(indexfile) as fh:
2627 2644 fh.read()
2628 2645
2629 2646 def parseindex():
2630 2647 parse_index_v1(data, inline)
2631 2648
2632 2649 def getentry(revornode):
2633 2650 index = parse_index_v1(data, inline)[0]
2634 2651 index[revornode]
2635 2652
2636 2653 def getentries(revs, count=1):
2637 2654 index = parse_index_v1(data, inline)[0]
2638 2655
2639 2656 for i in range(count):
2640 2657 for rev in revs:
2641 2658 index[rev]
2642 2659
2643 2660 def resolvenode(node):
2644 2661 index = parse_index_v1(data, inline)[0]
2645 2662 rev = getattr(index, 'rev', None)
2646 2663 if rev is None:
2647 2664 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
2648 2665 # This only works for the C code.
2649 2666 if nodemap is None:
2650 2667 return
2651 2668 rev = nodemap.__getitem__
2652 2669
2653 2670 try:
2654 2671 rev(node)
2655 2672 except error.RevlogError:
2656 2673 pass
2657 2674
2658 2675 def resolvenodes(nodes, count=1):
2659 2676 index = parse_index_v1(data, inline)[0]
2660 2677 rev = getattr(index, 'rev', None)
2661 2678 if rev is None:
2662 2679 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
2663 2680 # This only works for the C code.
2664 2681 if nodemap is None:
2665 2682 return
2666 2683 rev = nodemap.__getitem__
2667 2684
2668 2685 for i in range(count):
2669 2686 for node in nodes:
2670 2687 try:
2671 2688 rev(node)
2672 2689 except error.RevlogError:
2673 2690 pass
2674 2691
2675 2692 benches = [
2676 2693 (constructor, b'revlog constructor'),
2677 2694 (read, b'read'),
2678 2695 (parseindex, b'create index object'),
2679 2696 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2680 2697 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2681 2698 (lambda: resolvenode(node0), b'look up node at rev 0'),
2682 2699 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2683 2700 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2684 2701 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2685 2702 (lambda: resolvenode(node100), b'look up node at tip'),
2686 2703 # 2x variation is to measure caching impact.
2687 2704 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
2688 2705 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
2689 2706 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
2690 2707 (
2691 2708 lambda: resolvenodes(allnodesrev, 2),
2692 2709 b'look up all nodes 2x (reverse)',
2693 2710 ),
2694 2711 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
2695 2712 (
2696 2713 lambda: getentries(allrevs, 2),
2697 2714 b'retrieve all index entries 2x (forward)',
2698 2715 ),
2699 2716 (
2700 2717 lambda: getentries(allrevsrev),
2701 2718 b'retrieve all index entries (reverse)',
2702 2719 ),
2703 2720 (
2704 2721 lambda: getentries(allrevsrev, 2),
2705 2722 b'retrieve all index entries 2x (reverse)',
2706 2723 ),
2707 2724 ]
2708 2725
2709 2726 for fn, title in benches:
2710 2727 timer, fm = gettimer(ui, opts)
2711 2728 timer(fn, title=title)
2712 2729 fm.end()
2713 2730
2714 2731
2715 2732 @command(
2716 2733 b'perf::revlogrevisions|perfrevlogrevisions',
2717 2734 revlogopts
2718 2735 + formatteropts
2719 2736 + [
2720 2737 (b'd', b'dist', 100, b'distance between the revisions'),
2721 2738 (b's', b'startrev', 0, b'revision to start reading at'),
2722 2739 (b'', b'reverse', False, b'read in reverse'),
2723 2740 ],
2724 2741 b'-c|-m|FILE',
2725 2742 )
2726 2743 def perfrevlogrevisions(
2727 2744 ui, repo, file_=None, startrev=0, reverse=False, **opts
2728 2745 ):
2729 2746 """Benchmark reading a series of revisions from a revlog.
2730 2747
2731 2748 By default, we read every ``-d/--dist`` revision from 0 to tip of
2732 2749 the specified revlog.
2733 2750
2734 2751 The start revision can be defined via ``-s/--startrev``.
2735 2752 """
2736 2753 opts = _byteskwargs(opts)
2737 2754
2738 2755 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2739 2756 rllen = getlen(ui)(rl)
2740 2757
2741 2758 if startrev < 0:
2742 2759 startrev = rllen + startrev
2743 2760
2744 2761 def d():
2745 2762 rl.clearcaches()
2746 2763
2747 2764 beginrev = startrev
2748 2765 endrev = rllen
2749 2766 dist = opts[b'dist']
2750 2767
2751 2768 if reverse:
2752 2769 beginrev, endrev = endrev - 1, beginrev - 1
2753 2770 dist = -1 * dist
2754 2771
2755 2772 for x in _xrange(beginrev, endrev, dist):
2756 2773 # Old revisions don't support passing int.
2757 2774 n = rl.node(x)
2758 2775 rl.revision(n)
2759 2776
2760 2777 timer, fm = gettimer(ui, opts)
2761 2778 timer(d)
2762 2779 fm.end()
2763 2780
2764 2781
2765 2782 @command(
2766 2783 b'perf::revlogwrite|perfrevlogwrite',
2767 2784 revlogopts
2768 2785 + formatteropts
2769 2786 + [
2770 2787 (b's', b'startrev', 1000, b'revision to start writing at'),
2771 2788 (b'', b'stoprev', -1, b'last revision to write'),
2772 2789 (b'', b'count', 3, b'number of passes to perform'),
2773 2790 (b'', b'details', False, b'print timing for every revisions tested'),
2774 2791 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2775 2792 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2776 2793 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2777 2794 ],
2778 2795 b'-c|-m|FILE',
2779 2796 )
2780 2797 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2781 2798 """Benchmark writing a series of revisions to a revlog.
2782 2799
2783 2800 Possible source values are:
2784 2801 * `full`: add from a full text (default).
2785 2802 * `parent-1`: add from a delta to the first parent
2786 2803 * `parent-2`: add from a delta to the second parent if it exists
2787 2804 (use a delta from the first parent otherwise)
2788 2805 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2789 2806 * `storage`: add from the existing precomputed deltas
2790 2807
2791 2808 Note: This performance command measures performance in a custom way. As a
2792 2809 result some of the global configuration of the 'perf' command does not
2793 2810 apply to it:
2794 2811
2795 2812 * ``pre-run``: disabled
2796 2813
2797 2814 * ``profile-benchmark``: disabled
2798 2815
2799 2816 * ``run-limits``: disabled use --count instead
2800 2817 """
2801 2818 opts = _byteskwargs(opts)
2802 2819
2803 2820 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2804 2821 rllen = getlen(ui)(rl)
2805 2822 if startrev < 0:
2806 2823 startrev = rllen + startrev
2807 2824 if stoprev < 0:
2808 2825 stoprev = rllen + stoprev
2809 2826
2810 2827 lazydeltabase = opts['lazydeltabase']
2811 2828 source = opts['source']
2812 2829 clearcaches = opts['clear_caches']
2813 2830 validsource = (
2814 2831 b'full',
2815 2832 b'parent-1',
2816 2833 b'parent-2',
2817 2834 b'parent-smallest',
2818 2835 b'storage',
2819 2836 )
2820 2837 if source not in validsource:
2821 2838 raise error.Abort('invalid source type: %s' % source)
2822 2839
2823 2840 ### actually gather results
2824 2841 count = opts['count']
2825 2842 if count <= 0:
2826 2843 raise error.Abort('invalide run count: %d' % count)
2827 2844 allresults = []
2828 2845 for c in range(count):
2829 2846 timing = _timeonewrite(
2830 2847 ui,
2831 2848 rl,
2832 2849 source,
2833 2850 startrev,
2834 2851 stoprev,
2835 2852 c + 1,
2836 2853 lazydeltabase=lazydeltabase,
2837 2854 clearcaches=clearcaches,
2838 2855 )
2839 2856 allresults.append(timing)
2840 2857
2841 2858 ### consolidate the results in a single list
2842 2859 results = []
2843 2860 for idx, (rev, t) in enumerate(allresults[0]):
2844 2861 ts = [t]
2845 2862 for other in allresults[1:]:
2846 2863 orev, ot = other[idx]
2847 2864 assert orev == rev
2848 2865 ts.append(ot)
2849 2866 results.append((rev, ts))
2850 2867 resultcount = len(results)
2851 2868
2852 2869 ### Compute and display relevant statistics
2853 2870
2854 2871 # get a formatter
2855 2872 fm = ui.formatter(b'perf', opts)
2856 2873 displayall = ui.configbool(b"perf", b"all-timing", False)
2857 2874
2858 2875 # print individual details if requested
2859 2876 if opts['details']:
2860 2877 for idx, item in enumerate(results, 1):
2861 2878 rev, data = item
2862 2879 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2863 2880 formatone(fm, data, title=title, displayall=displayall)
2864 2881
2865 2882 # sorts results by median time
2866 2883 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2867 2884 # list of (name, index) to display)
2868 2885 relevants = [
2869 2886 ("min", 0),
2870 2887 ("10%", resultcount * 10 // 100),
2871 2888 ("25%", resultcount * 25 // 100),
2872 2889 ("50%", resultcount * 70 // 100),
2873 2890 ("75%", resultcount * 75 // 100),
2874 2891 ("90%", resultcount * 90 // 100),
2875 2892 ("95%", resultcount * 95 // 100),
2876 2893 ("99%", resultcount * 99 // 100),
2877 2894 ("99.9%", resultcount * 999 // 1000),
2878 2895 ("99.99%", resultcount * 9999 // 10000),
2879 2896 ("99.999%", resultcount * 99999 // 100000),
2880 2897 ("max", -1),
2881 2898 ]
2882 2899 if not ui.quiet:
2883 2900 for name, idx in relevants:
2884 2901 data = results[idx]
2885 2902 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2886 2903 formatone(fm, data[1], title=title, displayall=displayall)
2887 2904
2888 2905 # XXX summing that many float will not be very precise, we ignore this fact
2889 2906 # for now
2890 2907 totaltime = []
2891 2908 for item in allresults:
2892 2909 totaltime.append(
2893 2910 (
2894 2911 sum(x[1][0] for x in item),
2895 2912 sum(x[1][1] for x in item),
2896 2913 sum(x[1][2] for x in item),
2897 2914 )
2898 2915 )
2899 2916 formatone(
2900 2917 fm,
2901 2918 totaltime,
2902 2919 title="total time (%d revs)" % resultcount,
2903 2920 displayall=displayall,
2904 2921 )
2905 2922 fm.end()
2906 2923
2907 2924
2908 2925 class _faketr(object):
2909 2926 def add(s, x, y, z=None):
2910 2927 return None
2911 2928
2912 2929
2913 2930 def _timeonewrite(
2914 2931 ui,
2915 2932 orig,
2916 2933 source,
2917 2934 startrev,
2918 2935 stoprev,
2919 2936 runidx=None,
2920 2937 lazydeltabase=True,
2921 2938 clearcaches=True,
2922 2939 ):
2923 2940 timings = []
2924 2941 tr = _faketr()
2925 2942 with _temprevlog(ui, orig, startrev) as dest:
2926 2943 dest._lazydeltabase = lazydeltabase
2927 2944 revs = list(orig.revs(startrev, stoprev))
2928 2945 total = len(revs)
2929 2946 topic = 'adding'
2930 2947 if runidx is not None:
2931 2948 topic += ' (run #%d)' % runidx
2932 2949 # Support both old and new progress API
2933 2950 if util.safehasattr(ui, 'makeprogress'):
2934 2951 progress = ui.makeprogress(topic, unit='revs', total=total)
2935 2952
2936 2953 def updateprogress(pos):
2937 2954 progress.update(pos)
2938 2955
2939 2956 def completeprogress():
2940 2957 progress.complete()
2941 2958
2942 2959 else:
2943 2960
2944 2961 def updateprogress(pos):
2945 2962 ui.progress(topic, pos, unit='revs', total=total)
2946 2963
2947 2964 def completeprogress():
2948 2965 ui.progress(topic, None, unit='revs', total=total)
2949 2966
2950 2967 for idx, rev in enumerate(revs):
2951 2968 updateprogress(idx)
2952 2969 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2953 2970 if clearcaches:
2954 2971 dest.index.clearcaches()
2955 2972 dest.clearcaches()
2956 2973 with timeone() as r:
2957 2974 dest.addrawrevision(*addargs, **addkwargs)
2958 2975 timings.append((rev, r[0]))
2959 2976 updateprogress(total)
2960 2977 completeprogress()
2961 2978 return timings
2962 2979
2963 2980
2964 2981 def _getrevisionseed(orig, rev, tr, source):
2965 2982 from mercurial.node import nullid
2966 2983
2967 2984 linkrev = orig.linkrev(rev)
2968 2985 node = orig.node(rev)
2969 2986 p1, p2 = orig.parents(node)
2970 2987 flags = orig.flags(rev)
2971 2988 cachedelta = None
2972 2989 text = None
2973 2990
2974 2991 if source == b'full':
2975 2992 text = orig.revision(rev)
2976 2993 elif source == b'parent-1':
2977 2994 baserev = orig.rev(p1)
2978 2995 cachedelta = (baserev, orig.revdiff(p1, rev))
2979 2996 elif source == b'parent-2':
2980 2997 parent = p2
2981 2998 if p2 == nullid:
2982 2999 parent = p1
2983 3000 baserev = orig.rev(parent)
2984 3001 cachedelta = (baserev, orig.revdiff(parent, rev))
2985 3002 elif source == b'parent-smallest':
2986 3003 p1diff = orig.revdiff(p1, rev)
2987 3004 parent = p1
2988 3005 diff = p1diff
2989 3006 if p2 != nullid:
2990 3007 p2diff = orig.revdiff(p2, rev)
2991 3008 if len(p1diff) > len(p2diff):
2992 3009 parent = p2
2993 3010 diff = p2diff
2994 3011 baserev = orig.rev(parent)
2995 3012 cachedelta = (baserev, diff)
2996 3013 elif source == b'storage':
2997 3014 baserev = orig.deltaparent(rev)
2998 3015 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
2999 3016
3000 3017 return (
3001 3018 (text, tr, linkrev, p1, p2),
3002 3019 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3003 3020 )
3004 3021
3005 3022
3006 3023 @contextlib.contextmanager
3007 3024 def _temprevlog(ui, orig, truncaterev):
3008 3025 from mercurial import vfs as vfsmod
3009 3026
3010 3027 if orig._inline:
3011 3028 raise error.Abort('not supporting inline revlog (yet)')
3012 3029 revlogkwargs = {}
3013 3030 k = 'upperboundcomp'
3014 3031 if util.safehasattr(orig, k):
3015 3032 revlogkwargs[k] = getattr(orig, k)
3016 3033
3017 3034 origindexpath = orig.opener.join(orig.indexfile)
3018 3035 origdatapath = orig.opener.join(orig.datafile)
3019 3036 indexname = 'revlog.i'
3020 3037 dataname = 'revlog.d'
3021 3038
3022 3039 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3023 3040 try:
3024 3041 # copy the data file in a temporary directory
3025 3042 ui.debug('copying data in %s\n' % tmpdir)
3026 3043 destindexpath = os.path.join(tmpdir, 'revlog.i')
3027 3044 destdatapath = os.path.join(tmpdir, 'revlog.d')
3028 3045 shutil.copyfile(origindexpath, destindexpath)
3029 3046 shutil.copyfile(origdatapath, destdatapath)
3030 3047
3031 3048 # remove the data we want to add again
3032 3049 ui.debug('truncating data to be rewritten\n')
3033 3050 with open(destindexpath, 'ab') as index:
3034 3051 index.seek(0)
3035 3052 index.truncate(truncaterev * orig._io.size)
3036 3053 with open(destdatapath, 'ab') as data:
3037 3054 data.seek(0)
3038 3055 data.truncate(orig.start(truncaterev))
3039 3056
3040 3057 # instantiate a new revlog from the temporary copy
3041 3058 ui.debug('truncating adding to be rewritten\n')
3042 3059 vfs = vfsmod.vfs(tmpdir)
3043 3060 vfs.options = getattr(orig.opener, 'options', None)
3044 3061
3045 dest = revlog.revlog(
3062 dest = revlog(
3046 3063 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3047 3064 )
3048 3065 if dest._inline:
3049 3066 raise error.Abort('not supporting inline revlog (yet)')
3050 3067 # make sure internals are initialized
3051 3068 dest.revision(len(dest) - 1)
3052 3069 yield dest
3053 3070 del dest, vfs
3054 3071 finally:
3055 3072 shutil.rmtree(tmpdir, True)
3056 3073
3057 3074
3058 3075 @command(
3059 3076 b'perf::revlogchunks|perfrevlogchunks',
3060 3077 revlogopts
3061 3078 + formatteropts
3062 3079 + [
3063 3080 (b'e', b'engines', b'', b'compression engines to use'),
3064 3081 (b's', b'startrev', 0, b'revision to start at'),
3065 3082 ],
3066 3083 b'-c|-m|FILE',
3067 3084 )
3068 3085 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3069 3086 """Benchmark operations on revlog chunks.
3070 3087
3071 3088 Logically, each revlog is a collection of fulltext revisions. However,
3072 3089 stored within each revlog are "chunks" of possibly compressed data. This
3073 3090 data needs to be read and decompressed or compressed and written.
3074 3091
3075 3092 This command measures the time it takes to read+decompress and recompress
3076 3093 chunks in a revlog. It effectively isolates I/O and compression performance.
3077 3094 For measurements of higher-level operations like resolving revisions,
3078 3095 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3079 3096 """
3080 3097 opts = _byteskwargs(opts)
3081 3098
3082 3099 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3083 3100
3084 3101 # _chunkraw was renamed to _getsegmentforrevs.
3085 3102 try:
3086 3103 segmentforrevs = rl._getsegmentforrevs
3087 3104 except AttributeError:
3088 3105 segmentforrevs = rl._chunkraw
3089 3106
3090 3107 # Verify engines argument.
3091 3108 if engines:
3092 3109 engines = {e.strip() for e in engines.split(b',')}
3093 3110 for engine in engines:
3094 3111 try:
3095 3112 util.compressionengines[engine]
3096 3113 except KeyError:
3097 3114 raise error.Abort(b'unknown compression engine: %s' % engine)
3098 3115 else:
3099 3116 engines = []
3100 3117 for e in util.compengines:
3101 3118 engine = util.compengines[e]
3102 3119 try:
3103 3120 if engine.available():
3104 3121 engine.revlogcompressor().compress(b'dummy')
3105 3122 engines.append(e)
3106 3123 except NotImplementedError:
3107 3124 pass
3108 3125
3109 3126 revs = list(rl.revs(startrev, len(rl) - 1))
3110 3127
3111 3128 def rlfh(rl):
3112 3129 if rl._inline:
3113 3130 return getsvfs(repo)(rl.indexfile)
3114 3131 else:
3115 3132 return getsvfs(repo)(rl.datafile)
3116 3133
3117 3134 def doread():
3118 3135 rl.clearcaches()
3119 3136 for rev in revs:
3120 3137 segmentforrevs(rev, rev)
3121 3138
3122 3139 def doreadcachedfh():
3123 3140 rl.clearcaches()
3124 3141 fh = rlfh(rl)
3125 3142 for rev in revs:
3126 3143 segmentforrevs(rev, rev, df=fh)
3127 3144
3128 3145 def doreadbatch():
3129 3146 rl.clearcaches()
3130 3147 segmentforrevs(revs[0], revs[-1])
3131 3148
3132 3149 def doreadbatchcachedfh():
3133 3150 rl.clearcaches()
3134 3151 fh = rlfh(rl)
3135 3152 segmentforrevs(revs[0], revs[-1], df=fh)
3136 3153
3137 3154 def dochunk():
3138 3155 rl.clearcaches()
3139 3156 fh = rlfh(rl)
3140 3157 for rev in revs:
3141 3158 rl._chunk(rev, df=fh)
3142 3159
3143 3160 chunks = [None]
3144 3161
3145 3162 def dochunkbatch():
3146 3163 rl.clearcaches()
3147 3164 fh = rlfh(rl)
3148 3165 # Save chunks as a side-effect.
3149 3166 chunks[0] = rl._chunks(revs, df=fh)
3150 3167
3151 3168 def docompress(compressor):
3152 3169 rl.clearcaches()
3153 3170
3154 3171 try:
3155 3172 # Swap in the requested compression engine.
3156 3173 oldcompressor = rl._compressor
3157 3174 rl._compressor = compressor
3158 3175 for chunk in chunks[0]:
3159 3176 rl.compress(chunk)
3160 3177 finally:
3161 3178 rl._compressor = oldcompressor
3162 3179
3163 3180 benches = [
3164 3181 (lambda: doread(), b'read'),
3165 3182 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3166 3183 (lambda: doreadbatch(), b'read batch'),
3167 3184 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3168 3185 (lambda: dochunk(), b'chunk'),
3169 3186 (lambda: dochunkbatch(), b'chunk batch'),
3170 3187 ]
3171 3188
3172 3189 for engine in sorted(engines):
3173 3190 compressor = util.compengines[engine].revlogcompressor()
3174 3191 benches.append(
3175 3192 (
3176 3193 functools.partial(docompress, compressor),
3177 3194 b'compress w/ %s' % engine,
3178 3195 )
3179 3196 )
3180 3197
3181 3198 for fn, title in benches:
3182 3199 timer, fm = gettimer(ui, opts)
3183 3200 timer(fn, title=title)
3184 3201 fm.end()
3185 3202
3186 3203
3187 3204 @command(
3188 3205 b'perf::revlogrevision|perfrevlogrevision',
3189 3206 revlogopts
3190 3207 + formatteropts
3191 3208 + [(b'', b'cache', False, b'use caches instead of clearing')],
3192 3209 b'-c|-m|FILE REV',
3193 3210 )
3194 3211 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3195 3212 """Benchmark obtaining a revlog revision.
3196 3213
3197 3214 Obtaining a revlog revision consists of roughly the following steps:
3198 3215
3199 3216 1. Compute the delta chain
3200 3217 2. Slice the delta chain if applicable
3201 3218 3. Obtain the raw chunks for that delta chain
3202 3219 4. Decompress each raw chunk
3203 3220 5. Apply binary patches to obtain fulltext
3204 3221 6. Verify hash of fulltext
3205 3222
3206 3223 This command measures the time spent in each of these phases.
3207 3224 """
3208 3225 opts = _byteskwargs(opts)
3209 3226
3210 3227 if opts.get(b'changelog') or opts.get(b'manifest'):
3211 3228 file_, rev = None, file_
3212 3229 elif rev is None:
3213 3230 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3214 3231
3215 3232 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3216 3233
3217 3234 # _chunkraw was renamed to _getsegmentforrevs.
3218 3235 try:
3219 3236 segmentforrevs = r._getsegmentforrevs
3220 3237 except AttributeError:
3221 3238 segmentforrevs = r._chunkraw
3222 3239
3223 3240 node = r.lookup(rev)
3224 3241 rev = r.rev(node)
3225 3242
3226 3243 def getrawchunks(data, chain):
3227 3244 start = r.start
3228 3245 length = r.length
3229 3246 inline = r._inline
3230 3247 try:
3231 3248 iosize = r.index.entry_size
3232 3249 except AttributeError:
3233 3250 iosize = r._io.size
3234 3251 buffer = util.buffer
3235 3252
3236 3253 chunks = []
3237 3254 ladd = chunks.append
3238 3255 for idx, item in enumerate(chain):
3239 3256 offset = start(item[0])
3240 3257 bits = data[idx]
3241 3258 for rev in item:
3242 3259 chunkstart = start(rev)
3243 3260 if inline:
3244 3261 chunkstart += (rev + 1) * iosize
3245 3262 chunklength = length(rev)
3246 3263 ladd(buffer(bits, chunkstart - offset, chunklength))
3247 3264
3248 3265 return chunks
3249 3266
3250 3267 def dodeltachain(rev):
3251 3268 if not cache:
3252 3269 r.clearcaches()
3253 3270 r._deltachain(rev)
3254 3271
3255 3272 def doread(chain):
3256 3273 if not cache:
3257 3274 r.clearcaches()
3258 3275 for item in slicedchain:
3259 3276 segmentforrevs(item[0], item[-1])
3260 3277
3261 3278 def doslice(r, chain, size):
3262 3279 for s in slicechunk(r, chain, targetsize=size):
3263 3280 pass
3264 3281
3265 3282 def dorawchunks(data, chain):
3266 3283 if not cache:
3267 3284 r.clearcaches()
3268 3285 getrawchunks(data, chain)
3269 3286
3270 3287 def dodecompress(chunks):
3271 3288 decomp = r.decompress
3272 3289 for chunk in chunks:
3273 3290 decomp(chunk)
3274 3291
3275 3292 def dopatch(text, bins):
3276 3293 if not cache:
3277 3294 r.clearcaches()
3278 3295 mdiff.patches(text, bins)
3279 3296
3280 3297 def dohash(text):
3281 3298 if not cache:
3282 3299 r.clearcaches()
3283 3300 r.checkhash(text, node, rev=rev)
3284 3301
3285 3302 def dorevision():
3286 3303 if not cache:
3287 3304 r.clearcaches()
3288 3305 r.revision(node)
3289 3306
3290 3307 try:
3291 3308 from mercurial.revlogutils.deltas import slicechunk
3292 3309 except ImportError:
3293 3310 slicechunk = getattr(revlog, '_slicechunk', None)
3294 3311
3295 3312 size = r.length(rev)
3296 3313 chain = r._deltachain(rev)[0]
3297 3314 if not getattr(r, '_withsparseread', False):
3298 3315 slicedchain = (chain,)
3299 3316 else:
3300 3317 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
3301 3318 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
3302 3319 rawchunks = getrawchunks(data, slicedchain)
3303 3320 bins = r._chunks(chain)
3304 3321 text = bytes(bins[0])
3305 3322 bins = bins[1:]
3306 3323 text = mdiff.patches(text, bins)
3307 3324
3308 3325 benches = [
3309 3326 (lambda: dorevision(), b'full'),
3310 3327 (lambda: dodeltachain(rev), b'deltachain'),
3311 3328 (lambda: doread(chain), b'read'),
3312 3329 ]
3313 3330
3314 3331 if getattr(r, '_withsparseread', False):
3315 3332 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
3316 3333 benches.append(slicing)
3317 3334
3318 3335 benches.extend(
3319 3336 [
3320 3337 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
3321 3338 (lambda: dodecompress(rawchunks), b'decompress'),
3322 3339 (lambda: dopatch(text, bins), b'patch'),
3323 3340 (lambda: dohash(text), b'hash'),
3324 3341 ]
3325 3342 )
3326 3343
3327 3344 timer, fm = gettimer(ui, opts)
3328 3345 for fn, title in benches:
3329 3346 timer(fn, title=title)
3330 3347 fm.end()
3331 3348
3332 3349
3333 3350 @command(
3334 3351 b'perf::revset|perfrevset',
3335 3352 [
3336 3353 (b'C', b'clear', False, b'clear volatile cache between each call.'),
3337 3354 (b'', b'contexts', False, b'obtain changectx for each revision'),
3338 3355 ]
3339 3356 + formatteropts,
3340 3357 b"REVSET",
3341 3358 )
3342 3359 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
3343 3360 """benchmark the execution time of a revset
3344 3361
3345 3362 Use the --clean option if need to evaluate the impact of build volatile
3346 3363 revisions set cache on the revset execution. Volatile cache hold filtered
3347 3364 and obsolete related cache."""
3348 3365 opts = _byteskwargs(opts)
3349 3366
3350 3367 timer, fm = gettimer(ui, opts)
3351 3368
3352 3369 def d():
3353 3370 if clear:
3354 3371 repo.invalidatevolatilesets()
3355 3372 if contexts:
3356 3373 for ctx in repo.set(expr):
3357 3374 pass
3358 3375 else:
3359 3376 for r in repo.revs(expr):
3360 3377 pass
3361 3378
3362 3379 timer(d)
3363 3380 fm.end()
3364 3381
3365 3382
3366 3383 @command(
3367 3384 b'perf::volatilesets|perfvolatilesets',
3368 3385 [
3369 3386 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
3370 3387 ]
3371 3388 + formatteropts,
3372 3389 )
3373 3390 def perfvolatilesets(ui, repo, *names, **opts):
3374 3391 """benchmark the computation of various volatile set
3375 3392
3376 3393 Volatile set computes element related to filtering and obsolescence."""
3377 3394 opts = _byteskwargs(opts)
3378 3395 timer, fm = gettimer(ui, opts)
3379 3396 repo = repo.unfiltered()
3380 3397
3381 3398 def getobs(name):
3382 3399 def d():
3383 3400 repo.invalidatevolatilesets()
3384 3401 if opts[b'clear_obsstore']:
3385 3402 clearfilecache(repo, b'obsstore')
3386 3403 obsolete.getrevs(repo, name)
3387 3404
3388 3405 return d
3389 3406
3390 3407 allobs = sorted(obsolete.cachefuncs)
3391 3408 if names:
3392 3409 allobs = [n for n in allobs if n in names]
3393 3410
3394 3411 for name in allobs:
3395 3412 timer(getobs(name), title=name)
3396 3413
3397 3414 def getfiltered(name):
3398 3415 def d():
3399 3416 repo.invalidatevolatilesets()
3400 3417 if opts[b'clear_obsstore']:
3401 3418 clearfilecache(repo, b'obsstore')
3402 3419 repoview.filterrevs(repo, name)
3403 3420
3404 3421 return d
3405 3422
3406 3423 allfilter = sorted(repoview.filtertable)
3407 3424 if names:
3408 3425 allfilter = [n for n in allfilter if n in names]
3409 3426
3410 3427 for name in allfilter:
3411 3428 timer(getfiltered(name), title=name)
3412 3429 fm.end()
3413 3430
3414 3431
3415 3432 @command(
3416 3433 b'perf::branchmap|perfbranchmap',
3417 3434 [
3418 3435 (b'f', b'full', False, b'Includes build time of subset'),
3419 3436 (
3420 3437 b'',
3421 3438 b'clear-revbranch',
3422 3439 False,
3423 3440 b'purge the revbranch cache between computation',
3424 3441 ),
3425 3442 ]
3426 3443 + formatteropts,
3427 3444 )
3428 3445 def perfbranchmap(ui, repo, *filternames, **opts):
3429 3446 """benchmark the update of a branchmap
3430 3447
3431 3448 This benchmarks the full repo.branchmap() call with read and write disabled
3432 3449 """
3433 3450 opts = _byteskwargs(opts)
3434 3451 full = opts.get(b"full", False)
3435 3452 clear_revbranch = opts.get(b"clear_revbranch", False)
3436 3453 timer, fm = gettimer(ui, opts)
3437 3454
3438 3455 def getbranchmap(filtername):
3439 3456 """generate a benchmark function for the filtername"""
3440 3457 if filtername is None:
3441 3458 view = repo
3442 3459 else:
3443 3460 view = repo.filtered(filtername)
3444 3461 if util.safehasattr(view._branchcaches, '_per_filter'):
3445 3462 filtered = view._branchcaches._per_filter
3446 3463 else:
3447 3464 # older versions
3448 3465 filtered = view._branchcaches
3449 3466
3450 3467 def d():
3451 3468 if clear_revbranch:
3452 3469 repo.revbranchcache()._clear()
3453 3470 if full:
3454 3471 view._branchcaches.clear()
3455 3472 else:
3456 3473 filtered.pop(filtername, None)
3457 3474 view.branchmap()
3458 3475
3459 3476 return d
3460 3477
3461 3478 # add filter in smaller subset to bigger subset
3462 3479 possiblefilters = set(repoview.filtertable)
3463 3480 if filternames:
3464 3481 possiblefilters &= set(filternames)
3465 3482 subsettable = getbranchmapsubsettable()
3466 3483 allfilters = []
3467 3484 while possiblefilters:
3468 3485 for name in possiblefilters:
3469 3486 subset = subsettable.get(name)
3470 3487 if subset not in possiblefilters:
3471 3488 break
3472 3489 else:
3473 3490 assert False, b'subset cycle %s!' % possiblefilters
3474 3491 allfilters.append(name)
3475 3492 possiblefilters.remove(name)
3476 3493
3477 3494 # warm the cache
3478 3495 if not full:
3479 3496 for name in allfilters:
3480 3497 repo.filtered(name).branchmap()
3481 3498 if not filternames or b'unfiltered' in filternames:
3482 3499 # add unfiltered
3483 3500 allfilters.append(None)
3484 3501
3485 3502 if util.safehasattr(branchmap.branchcache, 'fromfile'):
3486 3503 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
3487 3504 branchcacheread.set(classmethod(lambda *args: None))
3488 3505 else:
3489 3506 # older versions
3490 3507 branchcacheread = safeattrsetter(branchmap, b'read')
3491 3508 branchcacheread.set(lambda *args: None)
3492 3509 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
3493 3510 branchcachewrite.set(lambda *args: None)
3494 3511 try:
3495 3512 for name in allfilters:
3496 3513 printname = name
3497 3514 if name is None:
3498 3515 printname = b'unfiltered'
3499 3516 timer(getbranchmap(name), title=printname)
3500 3517 finally:
3501 3518 branchcacheread.restore()
3502 3519 branchcachewrite.restore()
3503 3520 fm.end()
3504 3521
3505 3522
3506 3523 @command(
3507 3524 b'perf::branchmapupdate|perfbranchmapupdate',
3508 3525 [
3509 3526 (b'', b'base', [], b'subset of revision to start from'),
3510 3527 (b'', b'target', [], b'subset of revision to end with'),
3511 3528 (b'', b'clear-caches', False, b'clear cache between each runs'),
3512 3529 ]
3513 3530 + formatteropts,
3514 3531 )
3515 3532 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
3516 3533 """benchmark branchmap update from for <base> revs to <target> revs
3517 3534
3518 3535 If `--clear-caches` is passed, the following items will be reset before
3519 3536 each update:
3520 3537 * the changelog instance and associated indexes
3521 3538 * the rev-branch-cache instance
3522 3539
3523 3540 Examples:
3524 3541
3525 3542 # update for the one last revision
3526 3543 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
3527 3544
3528 3545 $ update for change coming with a new branch
3529 3546 $ hg perfbranchmapupdate --base 'stable' --target 'default'
3530 3547 """
3531 3548 from mercurial import branchmap
3532 3549 from mercurial import repoview
3533 3550
3534 3551 opts = _byteskwargs(opts)
3535 3552 timer, fm = gettimer(ui, opts)
3536 3553 clearcaches = opts[b'clear_caches']
3537 3554 unfi = repo.unfiltered()
3538 3555 x = [None] # used to pass data between closure
3539 3556
3540 3557 # we use a `list` here to avoid possible side effect from smartset
3541 3558 baserevs = list(scmutil.revrange(repo, base))
3542 3559 targetrevs = list(scmutil.revrange(repo, target))
3543 3560 if not baserevs:
3544 3561 raise error.Abort(b'no revisions selected for --base')
3545 3562 if not targetrevs:
3546 3563 raise error.Abort(b'no revisions selected for --target')
3547 3564
3548 3565 # make sure the target branchmap also contains the one in the base
3549 3566 targetrevs = list(set(baserevs) | set(targetrevs))
3550 3567 targetrevs.sort()
3551 3568
3552 3569 cl = repo.changelog
3553 3570 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
3554 3571 allbaserevs.sort()
3555 3572 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
3556 3573
3557 3574 newrevs = list(alltargetrevs.difference(allbaserevs))
3558 3575 newrevs.sort()
3559 3576
3560 3577 allrevs = frozenset(unfi.changelog.revs())
3561 3578 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
3562 3579 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
3563 3580
3564 3581 def basefilter(repo, visibilityexceptions=None):
3565 3582 return basefilterrevs
3566 3583
3567 3584 def targetfilter(repo, visibilityexceptions=None):
3568 3585 return targetfilterrevs
3569 3586
3570 3587 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
3571 3588 ui.status(msg % (len(allbaserevs), len(newrevs)))
3572 3589 if targetfilterrevs:
3573 3590 msg = b'(%d revisions still filtered)\n'
3574 3591 ui.status(msg % len(targetfilterrevs))
3575 3592
3576 3593 try:
3577 3594 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
3578 3595 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
3579 3596
3580 3597 baserepo = repo.filtered(b'__perf_branchmap_update_base')
3581 3598 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
3582 3599
3583 3600 # try to find an existing branchmap to reuse
3584 3601 subsettable = getbranchmapsubsettable()
3585 3602 candidatefilter = subsettable.get(None)
3586 3603 while candidatefilter is not None:
3587 3604 candidatebm = repo.filtered(candidatefilter).branchmap()
3588 3605 if candidatebm.validfor(baserepo):
3589 3606 filtered = repoview.filterrevs(repo, candidatefilter)
3590 3607 missing = [r for r in allbaserevs if r in filtered]
3591 3608 base = candidatebm.copy()
3592 3609 base.update(baserepo, missing)
3593 3610 break
3594 3611 candidatefilter = subsettable.get(candidatefilter)
3595 3612 else:
3596 3613 # no suitable subset where found
3597 3614 base = branchmap.branchcache()
3598 3615 base.update(baserepo, allbaserevs)
3599 3616
3600 3617 def setup():
3601 3618 x[0] = base.copy()
3602 3619 if clearcaches:
3603 3620 unfi._revbranchcache = None
3604 3621 clearchangelog(repo)
3605 3622
3606 3623 def bench():
3607 3624 x[0].update(targetrepo, newrevs)
3608 3625
3609 3626 timer(bench, setup=setup)
3610 3627 fm.end()
3611 3628 finally:
3612 3629 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
3613 3630 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
3614 3631
3615 3632
3616 3633 @command(
3617 3634 b'perf::branchmapload|perfbranchmapload',
3618 3635 [
3619 3636 (b'f', b'filter', b'', b'Specify repoview filter'),
3620 3637 (b'', b'list', False, b'List brachmap filter caches'),
3621 3638 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
3622 3639 ]
3623 3640 + formatteropts,
3624 3641 )
3625 3642 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
3626 3643 """benchmark reading the branchmap"""
3627 3644 opts = _byteskwargs(opts)
3628 3645 clearrevlogs = opts[b'clear_revlogs']
3629 3646
3630 3647 if list:
3631 3648 for name, kind, st in repo.cachevfs.readdir(stat=True):
3632 3649 if name.startswith(b'branch2'):
3633 3650 filtername = name.partition(b'-')[2] or b'unfiltered'
3634 3651 ui.status(
3635 3652 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
3636 3653 )
3637 3654 return
3638 3655 if not filter:
3639 3656 filter = None
3640 3657 subsettable = getbranchmapsubsettable()
3641 3658 if filter is None:
3642 3659 repo = repo.unfiltered()
3643 3660 else:
3644 3661 repo = repoview.repoview(repo, filter)
3645 3662
3646 3663 repo.branchmap() # make sure we have a relevant, up to date branchmap
3647 3664
3648 3665 try:
3649 3666 fromfile = branchmap.branchcache.fromfile
3650 3667 except AttributeError:
3651 3668 # older versions
3652 3669 fromfile = branchmap.read
3653 3670
3654 3671 currentfilter = filter
3655 3672 # try once without timer, the filter may not be cached
3656 3673 while fromfile(repo) is None:
3657 3674 currentfilter = subsettable.get(currentfilter)
3658 3675 if currentfilter is None:
3659 3676 raise error.Abort(
3660 3677 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
3661 3678 )
3662 3679 repo = repo.filtered(currentfilter)
3663 3680 timer, fm = gettimer(ui, opts)
3664 3681
3665 3682 def setup():
3666 3683 if clearrevlogs:
3667 3684 clearchangelog(repo)
3668 3685
3669 3686 def bench():
3670 3687 fromfile(repo)
3671 3688
3672 3689 timer(bench, setup=setup)
3673 3690 fm.end()
3674 3691
3675 3692
3676 3693 @command(b'perf::loadmarkers|perfloadmarkers')
3677 3694 def perfloadmarkers(ui, repo):
3678 3695 """benchmark the time to parse the on-disk markers for a repo
3679 3696
3680 3697 Result is the number of markers in the repo."""
3681 3698 timer, fm = gettimer(ui)
3682 3699 svfs = getsvfs(repo)
3683 3700 timer(lambda: len(obsolete.obsstore(repo, svfs)))
3684 3701 fm.end()
3685 3702
3686 3703
3687 3704 @command(
3688 3705 b'perf::lrucachedict|perflrucachedict',
3689 3706 formatteropts
3690 3707 + [
3691 3708 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
3692 3709 (b'', b'mincost', 0, b'smallest cost of items in cache'),
3693 3710 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
3694 3711 (b'', b'size', 4, b'size of cache'),
3695 3712 (b'', b'gets', 10000, b'number of key lookups'),
3696 3713 (b'', b'sets', 10000, b'number of key sets'),
3697 3714 (b'', b'mixed', 10000, b'number of mixed mode operations'),
3698 3715 (
3699 3716 b'',
3700 3717 b'mixedgetfreq',
3701 3718 50,
3702 3719 b'frequency of get vs set ops in mixed mode',
3703 3720 ),
3704 3721 ],
3705 3722 norepo=True,
3706 3723 )
3707 3724 def perflrucache(
3708 3725 ui,
3709 3726 mincost=0,
3710 3727 maxcost=100,
3711 3728 costlimit=0,
3712 3729 size=4,
3713 3730 gets=10000,
3714 3731 sets=10000,
3715 3732 mixed=10000,
3716 3733 mixedgetfreq=50,
3717 3734 **opts
3718 3735 ):
3719 3736 opts = _byteskwargs(opts)
3720 3737
3721 3738 def doinit():
3722 3739 for i in _xrange(10000):
3723 3740 util.lrucachedict(size)
3724 3741
3725 3742 costrange = list(range(mincost, maxcost + 1))
3726 3743
3727 3744 values = []
3728 3745 for i in _xrange(size):
3729 3746 values.append(random.randint(0, _maxint))
3730 3747
3731 3748 # Get mode fills the cache and tests raw lookup performance with no
3732 3749 # eviction.
3733 3750 getseq = []
3734 3751 for i in _xrange(gets):
3735 3752 getseq.append(random.choice(values))
3736 3753
3737 3754 def dogets():
3738 3755 d = util.lrucachedict(size)
3739 3756 for v in values:
3740 3757 d[v] = v
3741 3758 for key in getseq:
3742 3759 value = d[key]
3743 3760 value # silence pyflakes warning
3744 3761
3745 3762 def dogetscost():
3746 3763 d = util.lrucachedict(size, maxcost=costlimit)
3747 3764 for i, v in enumerate(values):
3748 3765 d.insert(v, v, cost=costs[i])
3749 3766 for key in getseq:
3750 3767 try:
3751 3768 value = d[key]
3752 3769 value # silence pyflakes warning
3753 3770 except KeyError:
3754 3771 pass
3755 3772
3756 3773 # Set mode tests insertion speed with cache eviction.
3757 3774 setseq = []
3758 3775 costs = []
3759 3776 for i in _xrange(sets):
3760 3777 setseq.append(random.randint(0, _maxint))
3761 3778 costs.append(random.choice(costrange))
3762 3779
3763 3780 def doinserts():
3764 3781 d = util.lrucachedict(size)
3765 3782 for v in setseq:
3766 3783 d.insert(v, v)
3767 3784
3768 3785 def doinsertscost():
3769 3786 d = util.lrucachedict(size, maxcost=costlimit)
3770 3787 for i, v in enumerate(setseq):
3771 3788 d.insert(v, v, cost=costs[i])
3772 3789
3773 3790 def dosets():
3774 3791 d = util.lrucachedict(size)
3775 3792 for v in setseq:
3776 3793 d[v] = v
3777 3794
3778 3795 # Mixed mode randomly performs gets and sets with eviction.
3779 3796 mixedops = []
3780 3797 for i in _xrange(mixed):
3781 3798 r = random.randint(0, 100)
3782 3799 if r < mixedgetfreq:
3783 3800 op = 0
3784 3801 else:
3785 3802 op = 1
3786 3803
3787 3804 mixedops.append(
3788 3805 (op, random.randint(0, size * 2), random.choice(costrange))
3789 3806 )
3790 3807
3791 3808 def domixed():
3792 3809 d = util.lrucachedict(size)
3793 3810
3794 3811 for op, v, cost in mixedops:
3795 3812 if op == 0:
3796 3813 try:
3797 3814 d[v]
3798 3815 except KeyError:
3799 3816 pass
3800 3817 else:
3801 3818 d[v] = v
3802 3819
3803 3820 def domixedcost():
3804 3821 d = util.lrucachedict(size, maxcost=costlimit)
3805 3822
3806 3823 for op, v, cost in mixedops:
3807 3824 if op == 0:
3808 3825 try:
3809 3826 d[v]
3810 3827 except KeyError:
3811 3828 pass
3812 3829 else:
3813 3830 d.insert(v, v, cost=cost)
3814 3831
3815 3832 benches = [
3816 3833 (doinit, b'init'),
3817 3834 ]
3818 3835
3819 3836 if costlimit:
3820 3837 benches.extend(
3821 3838 [
3822 3839 (dogetscost, b'gets w/ cost limit'),
3823 3840 (doinsertscost, b'inserts w/ cost limit'),
3824 3841 (domixedcost, b'mixed w/ cost limit'),
3825 3842 ]
3826 3843 )
3827 3844 else:
3828 3845 benches.extend(
3829 3846 [
3830 3847 (dogets, b'gets'),
3831 3848 (doinserts, b'inserts'),
3832 3849 (dosets, b'sets'),
3833 3850 (domixed, b'mixed'),
3834 3851 ]
3835 3852 )
3836 3853
3837 3854 for fn, title in benches:
3838 3855 timer, fm = gettimer(ui, opts)
3839 3856 timer(fn, title=title)
3840 3857 fm.end()
3841 3858
3842 3859
3843 3860 @command(
3844 3861 b'perf::write|perfwrite',
3845 3862 formatteropts
3846 3863 + [
3847 3864 (b'', b'write-method', b'write', b'ui write method'),
3848 3865 (b'', b'nlines', 100, b'number of lines'),
3849 3866 (b'', b'nitems', 100, b'number of items (per line)'),
3850 3867 (b'', b'item', b'x', b'item that is written'),
3851 3868 (b'', b'batch-line', None, b'pass whole line to write method at once'),
3852 3869 (b'', b'flush-line', None, b'flush after each line'),
3853 3870 ],
3854 3871 )
3855 3872 def perfwrite(ui, repo, **opts):
3856 3873 """microbenchmark ui.write (and others)"""
3857 3874 opts = _byteskwargs(opts)
3858 3875
3859 3876 write = getattr(ui, _sysstr(opts[b'write_method']))
3860 3877 nlines = int(opts[b'nlines'])
3861 3878 nitems = int(opts[b'nitems'])
3862 3879 item = opts[b'item']
3863 3880 batch_line = opts.get(b'batch_line')
3864 3881 flush_line = opts.get(b'flush_line')
3865 3882
3866 3883 if batch_line:
3867 3884 line = item * nitems + b'\n'
3868 3885
3869 3886 def benchmark():
3870 3887 for i in pycompat.xrange(nlines):
3871 3888 if batch_line:
3872 3889 write(line)
3873 3890 else:
3874 3891 for i in pycompat.xrange(nitems):
3875 3892 write(item)
3876 3893 write(b'\n')
3877 3894 if flush_line:
3878 3895 ui.flush()
3879 3896 ui.flush()
3880 3897
3881 3898 timer, fm = gettimer(ui, opts)
3882 3899 timer(benchmark)
3883 3900 fm.end()
3884 3901
3885 3902
3886 3903 def uisetup(ui):
3887 3904 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
3888 3905 commands, b'debugrevlogopts'
3889 3906 ):
3890 3907 # for "historical portability":
3891 3908 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3892 3909 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3893 3910 # openrevlog() should cause failure, because it has been
3894 3911 # available since 3.5 (or 49c583ca48c4).
3895 3912 def openrevlog(orig, repo, cmd, file_, opts):
3896 3913 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3897 3914 raise error.Abort(
3898 3915 b"This version doesn't support --dir option",
3899 3916 hint=b"use 3.5 or later",
3900 3917 )
3901 3918 return orig(repo, cmd, file_, opts)
3902 3919
3903 3920 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3904 3921
3905 3922
3906 3923 @command(
3907 3924 b'perf::progress|perfprogress',
3908 3925 formatteropts
3909 3926 + [
3910 3927 (b'', b'topic', b'topic', b'topic for progress messages'),
3911 3928 (b'c', b'total', 1000000, b'total value we are progressing to'),
3912 3929 ],
3913 3930 norepo=True,
3914 3931 )
3915 3932 def perfprogress(ui, topic=None, total=None, **opts):
3916 3933 """printing of progress bars"""
3917 3934 opts = _byteskwargs(opts)
3918 3935
3919 3936 timer, fm = gettimer(ui, opts)
3920 3937
3921 3938 def doprogress():
3922 3939 with ui.makeprogress(topic, total=total) as progress:
3923 3940 for i in _xrange(total):
3924 3941 progress.increment()
3925 3942
3926 3943 timer(doprogress)
3927 3944 fm.end()
@@ -1,48 +1,56 b''
1 1 #!/usr/bin/env python3
2 2 # Undump a dump from dumprevlog
3 3 # $ hg init
4 4 # $ undumprevlog < repo.dump
5 5
6 6 from __future__ import absolute_import, print_function
7 7
8 8 import sys
9 9 from mercurial.node import bin
10 10 from mercurial import (
11 11 encoding,
12 12 revlog,
13 13 transaction,
14 14 vfs as vfsmod,
15 15 )
16 16 from mercurial.utils import procutil
17 17
18 from mercurial.revlogutils import (
19 constants as revlog_constants,
20 )
21
18 22 for fp in (sys.stdin, sys.stdout, sys.stderr):
19 23 procutil.setbinary(fp)
20 24
21 25 opener = vfsmod.vfs(b'.', False)
22 26 tr = transaction.transaction(
23 27 sys.stderr.write, opener, {b'store': opener}, b"undump.journal"
24 28 )
25 29 while True:
26 30 l = sys.stdin.readline()
27 31 if not l:
28 32 break
29 33 if l.startswith("file:"):
30 34 f = encoding.strtolocal(l[6:-1])
31 r = revlog.revlog(opener, f)
35 r = revlog.revlog(
36 opener,
37 target=(revlog_constants.KIND_OTHER, b'undump-revlog'),
38 indexfile=f,
39 )
32 40 procutil.stdout.write(b'%s\n' % f)
33 41 elif l.startswith("node:"):
34 42 n = bin(l[6:-1])
35 43 elif l.startswith("linkrev:"):
36 44 lr = int(l[9:-1])
37 45 elif l.startswith("parents:"):
38 46 p = l[9:-1].split()
39 47 p1 = bin(p[0])
40 48 p2 = bin(p[1])
41 49 elif l.startswith("length:"):
42 50 length = int(l[8:-1])
43 51 sys.stdin.readline() # start marker
44 52 d = encoding.strtolocal(sys.stdin.read(length))
45 53 sys.stdin.readline() # end marker
46 54 r.addrevision(d, tr, lr, p1, p2)
47 55
48 56 tr.close()
@@ -1,693 +1,712 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import os
17 17 import shutil
18 18
19 19 from .i18n import _
20 20 from .node import (
21 21 hex,
22 22 nullrev,
23 23 )
24 24
25 25 from . import (
26 26 bundle2,
27 27 changegroup,
28 28 changelog,
29 29 cmdutil,
30 30 discovery,
31 31 encoding,
32 32 error,
33 33 exchange,
34 34 filelog,
35 35 localrepo,
36 36 manifest,
37 37 mdiff,
38 38 pathutil,
39 39 phases,
40 40 pycompat,
41 41 revlog,
42 42 util,
43 43 vfs as vfsmod,
44 44 )
45 45 from .utils import (
46 46 urlutil,
47 47 )
48 48
49 from .revlogutils import (
50 constants as revlog_constants,
51 )
52
49 53
50 54 class bundlerevlog(revlog.revlog):
51 def __init__(self, opener, indexfile, cgunpacker, linkmapper):
55 def __init__(self, opener, target, indexfile, cgunpacker, linkmapper):
52 56 # How it works:
53 57 # To retrieve a revision, we need to know the offset of the revision in
54 58 # the bundle (an unbundle object). We store this offset in the index
55 59 # (start). The base of the delta is stored in the base field.
56 60 #
57 61 # To differentiate a rev in the bundle from a rev in the revlog, we
58 62 # check revision against repotiprev.
59 63 opener = vfsmod.readonlyvfs(opener)
60 revlog.revlog.__init__(self, opener, indexfile)
64 revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
61 65 self.bundle = cgunpacker
62 66 n = len(self)
63 67 self.repotiprev = n - 1
64 68 self.bundlerevs = set() # used by 'bundle()' revset expression
65 69 for deltadata in cgunpacker.deltaiter():
66 70 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
67 71
68 72 size = len(delta)
69 73 start = cgunpacker.tell() - size
70 74
71 75 if self.index.has_node(node):
72 76 # this can happen if two branches make the same change
73 77 self.bundlerevs.add(self.index.rev(node))
74 78 continue
75 79 if cs == node:
76 80 linkrev = nullrev
77 81 else:
78 82 linkrev = linkmapper(cs)
79 83
80 84 for p in (p1, p2):
81 85 if not self.index.has_node(p):
82 86 raise error.LookupError(
83 87 p, self.indexfile, _(b"unknown parent")
84 88 )
85 89
86 90 if not self.index.has_node(deltabase):
87 91 raise LookupError(
88 92 deltabase, self.indexfile, _(b'unknown delta base')
89 93 )
90 94
91 95 baserev = self.rev(deltabase)
92 96 # start, size, full unc. size, base (unused), link, p1, p2, node
93 97 e = (
94 98 revlog.offset_type(start, flags),
95 99 size,
96 100 -1,
97 101 baserev,
98 102 linkrev,
99 103 self.rev(p1),
100 104 self.rev(p2),
101 105 node,
102 106 )
103 107 self.index.append(e)
104 108 self.bundlerevs.add(n)
105 109 n += 1
106 110
107 111 def _chunk(self, rev, df=None):
108 112 # Warning: in case of bundle, the diff is against what we stored as
109 113 # delta base, not against rev - 1
110 114 # XXX: could use some caching
111 115 if rev <= self.repotiprev:
112 116 return revlog.revlog._chunk(self, rev)
113 117 self.bundle.seek(self.start(rev))
114 118 return self.bundle.read(self.length(rev))
115 119
116 120 def revdiff(self, rev1, rev2):
117 121 """return or calculate a delta between two revisions"""
118 122 if rev1 > self.repotiprev and rev2 > self.repotiprev:
119 123 # hot path for bundle
120 124 revb = self.index[rev2][3]
121 125 if revb == rev1:
122 126 return self._chunk(rev2)
123 127 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
124 128 return revlog.revlog.revdiff(self, rev1, rev2)
125 129
126 130 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
127 131
128 132 def _rawtext(self, node, rev, _df=None):
129 133 if rev is None:
130 134 rev = self.rev(node)
131 135 validated = False
132 136 rawtext = None
133 137 chain = []
134 138 iterrev = rev
135 139 # reconstruct the revision if it is from a changegroup
136 140 while iterrev > self.repotiprev:
137 141 if self._revisioncache and self._revisioncache[1] == iterrev:
138 142 rawtext = self._revisioncache[2]
139 143 break
140 144 chain.append(iterrev)
141 145 iterrev = self.index[iterrev][3]
142 146 if iterrev == nullrev:
143 147 rawtext = b''
144 148 elif rawtext is None:
145 149 r = super(bundlerevlog, self)._rawtext(
146 150 self.node(iterrev), iterrev, _df=_df
147 151 )
148 152 __, rawtext, validated = r
149 153 if chain:
150 154 validated = False
151 155 while chain:
152 156 delta = self._chunk(chain.pop())
153 157 rawtext = mdiff.patches(rawtext, [delta])
154 158 return rev, rawtext, validated
155 159
156 160 def addrevision(self, *args, **kwargs):
157 161 raise NotImplementedError
158 162
159 163 def addgroup(self, *args, **kwargs):
160 164 raise NotImplementedError
161 165
162 166 def strip(self, *args, **kwargs):
163 167 raise NotImplementedError
164 168
165 169 def checksize(self):
166 170 raise NotImplementedError
167 171
168 172
169 173 class bundlechangelog(bundlerevlog, changelog.changelog):
170 174 def __init__(self, opener, cgunpacker):
171 175 changelog.changelog.__init__(self, opener)
172 176 linkmapper = lambda x: x
173 177 bundlerevlog.__init__(
174 self, opener, self.indexfile, cgunpacker, linkmapper
178 self,
179 opener,
180 (revlog_constants.KIND_CHANGELOG, None),
181 self.indexfile,
182 cgunpacker,
183 linkmapper,
175 184 )
176 185
177 186
178 187 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
179 188 def __init__(
180 189 self,
181 190 nodeconstants,
182 191 opener,
183 192 cgunpacker,
184 193 linkmapper,
185 194 dirlogstarts=None,
186 195 dir=b'',
187 196 ):
188 197 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
189 198 bundlerevlog.__init__(
190 self, opener, self.indexfile, cgunpacker, linkmapper
199 self,
200 opener,
201 (revlog_constants.KIND_MANIFESTLOG, dir),
202 self.indexfile,
203 cgunpacker,
204 linkmapper,
191 205 )
192 206 if dirlogstarts is None:
193 207 dirlogstarts = {}
194 208 if self.bundle.version == b"03":
195 209 dirlogstarts = _getfilestarts(self.bundle)
196 210 self._dirlogstarts = dirlogstarts
197 211 self._linkmapper = linkmapper
198 212
199 213 def dirlog(self, d):
200 214 if d in self._dirlogstarts:
201 215 self.bundle.seek(self._dirlogstarts[d])
202 216 return bundlemanifest(
203 217 self.nodeconstants,
204 218 self.opener,
205 219 self.bundle,
206 220 self._linkmapper,
207 221 self._dirlogstarts,
208 222 dir=d,
209 223 )
210 224 return super(bundlemanifest, self).dirlog(d)
211 225
212 226
213 227 class bundlefilelog(filelog.filelog):
214 228 def __init__(self, opener, path, cgunpacker, linkmapper):
215 229 filelog.filelog.__init__(self, opener, path)
216 230 self._revlog = bundlerevlog(
217 opener, self.indexfile, cgunpacker, linkmapper
231 opener,
232 # XXX should use the unencoded path
233 target=(revlog_constants.KIND_FILELOG, path),
234 indexfile=self.indexfile,
235 cgunpacker=cgunpacker,
236 linkmapper=linkmapper,
218 237 )
219 238
220 239
221 240 class bundlepeer(localrepo.localpeer):
222 241 def canpush(self):
223 242 return False
224 243
225 244
226 245 class bundlephasecache(phases.phasecache):
227 246 def __init__(self, *args, **kwargs):
228 247 super(bundlephasecache, self).__init__(*args, **kwargs)
229 248 if util.safehasattr(self, 'opener'):
230 249 self.opener = vfsmod.readonlyvfs(self.opener)
231 250
232 251 def write(self):
233 252 raise NotImplementedError
234 253
235 254 def _write(self, fp):
236 255 raise NotImplementedError
237 256
238 257 def _updateroots(self, phase, newroots, tr):
239 258 self.phaseroots[phase] = newroots
240 259 self.invalidate()
241 260 self.dirty = True
242 261
243 262
244 263 def _getfilestarts(cgunpacker):
245 264 filespos = {}
246 265 for chunkdata in iter(cgunpacker.filelogheader, {}):
247 266 fname = chunkdata[b'filename']
248 267 filespos[fname] = cgunpacker.tell()
249 268 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
250 269 pass
251 270 return filespos
252 271
253 272
254 273 class bundlerepository(object):
255 274 """A repository instance that is a union of a local repo and a bundle.
256 275
257 276 Instances represent a read-only repository composed of a local repository
258 277 with the contents of a bundle file applied. The repository instance is
259 278 conceptually similar to the state of a repository after an
260 279 ``hg unbundle`` operation. However, the contents of the bundle are never
261 280 applied to the actual base repository.
262 281
263 282 Instances constructed directly are not usable as repository objects.
264 283 Use instance() or makebundlerepository() to create instances.
265 284 """
266 285
267 286 def __init__(self, bundlepath, url, tempparent):
268 287 self._tempparent = tempparent
269 288 self._url = url
270 289
271 290 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
272 291
273 292 self.tempfile = None
274 293 f = util.posixfile(bundlepath, b"rb")
275 294 bundle = exchange.readbundle(self.ui, f, bundlepath)
276 295
277 296 if isinstance(bundle, bundle2.unbundle20):
278 297 self._bundlefile = bundle
279 298 self._cgunpacker = None
280 299
281 300 cgpart = None
282 301 for part in bundle.iterparts(seekable=True):
283 302 if part.type == b'changegroup':
284 303 if cgpart:
285 304 raise NotImplementedError(
286 305 b"can't process multiple changegroups"
287 306 )
288 307 cgpart = part
289 308
290 309 self._handlebundle2part(bundle, part)
291 310
292 311 if not cgpart:
293 312 raise error.Abort(_(b"No changegroups found"))
294 313
295 314 # This is required to placate a later consumer, which expects
296 315 # the payload offset to be at the beginning of the changegroup.
297 316 # We need to do this after the iterparts() generator advances
298 317 # because iterparts() will seek to end of payload after the
299 318 # generator returns control to iterparts().
300 319 cgpart.seek(0, os.SEEK_SET)
301 320
302 321 elif isinstance(bundle, changegroup.cg1unpacker):
303 322 if bundle.compressed():
304 323 f = self._writetempbundle(
305 324 bundle.read, b'.hg10un', header=b'HG10UN'
306 325 )
307 326 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
308 327
309 328 self._bundlefile = bundle
310 329 self._cgunpacker = bundle
311 330 else:
312 331 raise error.Abort(
313 332 _(b'bundle type %s cannot be read') % type(bundle)
314 333 )
315 334
316 335 # dict with the mapping 'filename' -> position in the changegroup.
317 336 self._cgfilespos = {}
318 337
319 338 self.firstnewrev = self.changelog.repotiprev + 1
320 339 phases.retractboundary(
321 340 self,
322 341 None,
323 342 phases.draft,
324 343 [ctx.node() for ctx in self[self.firstnewrev :]],
325 344 )
326 345
327 346 def _handlebundle2part(self, bundle, part):
328 347 if part.type != b'changegroup':
329 348 return
330 349
331 350 cgstream = part
332 351 version = part.params.get(b'version', b'01')
333 352 legalcgvers = changegroup.supportedincomingversions(self)
334 353 if version not in legalcgvers:
335 354 msg = _(b'Unsupported changegroup version: %s')
336 355 raise error.Abort(msg % version)
337 356 if bundle.compressed():
338 357 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
339 358
340 359 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
341 360
342 361 def _writetempbundle(self, readfn, suffix, header=b''):
343 362 """Write a temporary file to disk"""
344 363 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
345 364 self.tempfile = temp
346 365
347 366 with os.fdopen(fdtemp, 'wb') as fptemp:
348 367 fptemp.write(header)
349 368 while True:
350 369 chunk = readfn(2 ** 18)
351 370 if not chunk:
352 371 break
353 372 fptemp.write(chunk)
354 373
355 374 return self.vfs.open(self.tempfile, mode=b"rb")
356 375
357 376 @localrepo.unfilteredpropertycache
358 377 def _phasecache(self):
359 378 return bundlephasecache(self, self._phasedefaults)
360 379
361 380 @localrepo.unfilteredpropertycache
362 381 def changelog(self):
363 382 # consume the header if it exists
364 383 self._cgunpacker.changelogheader()
365 384 c = bundlechangelog(self.svfs, self._cgunpacker)
366 385 self.manstart = self._cgunpacker.tell()
367 386 return c
368 387
369 388 def _refreshchangelog(self):
370 389 # changelog for bundle repo are not filecache, this method is not
371 390 # applicable.
372 391 pass
373 392
374 393 @localrepo.unfilteredpropertycache
375 394 def manifestlog(self):
376 395 self._cgunpacker.seek(self.manstart)
377 396 # consume the header if it exists
378 397 self._cgunpacker.manifestheader()
379 398 linkmapper = self.unfiltered().changelog.rev
380 399 rootstore = bundlemanifest(
381 400 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
382 401 )
383 402 self.filestart = self._cgunpacker.tell()
384 403
385 404 return manifest.manifestlog(
386 405 self.svfs, self, rootstore, self.narrowmatch()
387 406 )
388 407
389 408 def _consumemanifest(self):
390 409 """Consumes the manifest portion of the bundle, setting filestart so the
391 410 file portion can be read."""
392 411 self._cgunpacker.seek(self.manstart)
393 412 self._cgunpacker.manifestheader()
394 413 for delta in self._cgunpacker.deltaiter():
395 414 pass
396 415 self.filestart = self._cgunpacker.tell()
397 416
398 417 @localrepo.unfilteredpropertycache
399 418 def manstart(self):
400 419 self.changelog
401 420 return self.manstart
402 421
403 422 @localrepo.unfilteredpropertycache
404 423 def filestart(self):
405 424 self.manifestlog
406 425
407 426 # If filestart was not set by self.manifestlog, that means the
408 427 # manifestlog implementation did not consume the manifests from the
409 428 # changegroup (ex: it might be consuming trees from a separate bundle2
410 429 # part instead). So we need to manually consume it.
411 430 if 'filestart' not in self.__dict__:
412 431 self._consumemanifest()
413 432
414 433 return self.filestart
415 434
416 435 def url(self):
417 436 return self._url
418 437
419 438 def file(self, f):
420 439 if not self._cgfilespos:
421 440 self._cgunpacker.seek(self.filestart)
422 441 self._cgfilespos = _getfilestarts(self._cgunpacker)
423 442
424 443 if f in self._cgfilespos:
425 444 self._cgunpacker.seek(self._cgfilespos[f])
426 445 linkmapper = self.unfiltered().changelog.rev
427 446 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
428 447 else:
429 448 return super(bundlerepository, self).file(f)
430 449
431 450 def close(self):
432 451 """Close assigned bundle file immediately."""
433 452 self._bundlefile.close()
434 453 if self.tempfile is not None:
435 454 self.vfs.unlink(self.tempfile)
436 455 if self._tempparent:
437 456 shutil.rmtree(self._tempparent, True)
438 457
439 458 def cancopy(self):
440 459 return False
441 460
442 461 def peer(self):
443 462 return bundlepeer(self)
444 463
445 464 def getcwd(self):
446 465 return encoding.getcwd() # always outside the repo
447 466
448 467 # Check if parents exist in localrepo before setting
449 468 def setparents(self, p1, p2=None):
450 469 if p2 is None:
451 470 p2 = self.nullid
452 471 p1rev = self.changelog.rev(p1)
453 472 p2rev = self.changelog.rev(p2)
454 473 msg = _(b"setting parent to node %s that only exists in the bundle\n")
455 474 if self.changelog.repotiprev < p1rev:
456 475 self.ui.warn(msg % hex(p1))
457 476 if self.changelog.repotiprev < p2rev:
458 477 self.ui.warn(msg % hex(p2))
459 478 return super(bundlerepository, self).setparents(p1, p2)
460 479
461 480
462 481 def instance(ui, path, create, intents=None, createopts=None):
463 482 if create:
464 483 raise error.Abort(_(b'cannot create new bundle repository'))
465 484 # internal config: bundle.mainreporoot
466 485 parentpath = ui.config(b"bundle", b"mainreporoot")
467 486 if not parentpath:
468 487 # try to find the correct path to the working directory repo
469 488 parentpath = cmdutil.findrepo(encoding.getcwd())
470 489 if parentpath is None:
471 490 parentpath = b''
472 491 if parentpath:
473 492 # Try to make the full path relative so we get a nice, short URL.
474 493 # In particular, we don't want temp dir names in test outputs.
475 494 cwd = encoding.getcwd()
476 495 if parentpath == cwd:
477 496 parentpath = b''
478 497 else:
479 498 cwd = pathutil.normasprefix(cwd)
480 499 if parentpath.startswith(cwd):
481 500 parentpath = parentpath[len(cwd) :]
482 501 u = urlutil.url(path)
483 502 path = u.localpath()
484 503 if u.scheme == b'bundle':
485 504 s = path.split(b"+", 1)
486 505 if len(s) == 1:
487 506 repopath, bundlename = parentpath, s[0]
488 507 else:
489 508 repopath, bundlename = s
490 509 else:
491 510 repopath, bundlename = parentpath, path
492 511
493 512 return makebundlerepository(ui, repopath, bundlename)
494 513
495 514
496 515 def makebundlerepository(ui, repopath, bundlepath):
497 516 """Make a bundle repository object based on repo and bundle paths."""
498 517 if repopath:
499 518 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
500 519 else:
501 520 url = b'bundle:%s' % bundlepath
502 521
503 522 # Because we can't make any guarantees about the type of the base
504 523 # repository, we can't have a static class representing the bundle
505 524 # repository. We also can't make any guarantees about how to even
506 525 # call the base repository's constructor!
507 526 #
508 527 # So, our strategy is to go through ``localrepo.instance()`` to construct
509 528 # a repo instance. Then, we dynamically create a new type derived from
510 529 # both it and our ``bundlerepository`` class which overrides some
511 530 # functionality. We then change the type of the constructed repository
512 531 # to this new type and initialize the bundle-specific bits of it.
513 532
514 533 try:
515 534 repo = localrepo.instance(ui, repopath, create=False)
516 535 tempparent = None
517 536 except error.RepoError:
518 537 tempparent = pycompat.mkdtemp()
519 538 try:
520 539 repo = localrepo.instance(ui, tempparent, create=True)
521 540 except Exception:
522 541 shutil.rmtree(tempparent)
523 542 raise
524 543
525 544 class derivedbundlerepository(bundlerepository, repo.__class__):
526 545 pass
527 546
528 547 repo.__class__ = derivedbundlerepository
529 548 bundlerepository.__init__(repo, bundlepath, url, tempparent)
530 549
531 550 return repo
532 551
533 552
534 553 class bundletransactionmanager(object):
535 554 def transaction(self):
536 555 return None
537 556
538 557 def close(self):
539 558 raise NotImplementedError
540 559
541 560 def release(self):
542 561 raise NotImplementedError
543 562
544 563
545 564 def getremotechanges(
546 565 ui, repo, peer, onlyheads=None, bundlename=None, force=False
547 566 ):
548 567 """obtains a bundle of changes incoming from peer
549 568
550 569 "onlyheads" restricts the returned changes to those reachable from the
551 570 specified heads.
552 571 "bundlename", if given, stores the bundle to this file path permanently;
553 572 otherwise it's stored to a temp file and gets deleted again when you call
554 573 the returned "cleanupfn".
555 574 "force" indicates whether to proceed on unrelated repos.
556 575
557 576 Returns a tuple (local, csets, cleanupfn):
558 577
559 578 "local" is a local repo from which to obtain the actual incoming
560 579 changesets; it is a bundlerepo for the obtained bundle when the
561 580 original "peer" is remote.
562 581 "csets" lists the incoming changeset node ids.
563 582 "cleanupfn" must be called without arguments when you're done processing
564 583 the changes; it closes both the original "peer" and the one returned
565 584 here.
566 585 """
567 586 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
568 587 common, incoming, rheads = tmp
569 588 if not incoming:
570 589 try:
571 590 if bundlename:
572 591 os.unlink(bundlename)
573 592 except OSError:
574 593 pass
575 594 return repo, [], peer.close
576 595
577 596 commonset = set(common)
578 597 rheads = [x for x in rheads if x not in commonset]
579 598
580 599 bundle = None
581 600 bundlerepo = None
582 601 localrepo = peer.local()
583 602 if bundlename or not localrepo:
584 603 # create a bundle (uncompressed if peer repo is not local)
585 604
586 605 # developer config: devel.legacy.exchange
587 606 legexc = ui.configlist(b'devel', b'legacy.exchange')
588 607 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
589 608 canbundle2 = (
590 609 not forcebundle1
591 610 and peer.capable(b'getbundle')
592 611 and peer.capable(b'bundle2')
593 612 )
594 613 if canbundle2:
595 614 with peer.commandexecutor() as e:
596 615 b2 = e.callcommand(
597 616 b'getbundle',
598 617 {
599 618 b'source': b'incoming',
600 619 b'common': common,
601 620 b'heads': rheads,
602 621 b'bundlecaps': exchange.caps20to10(
603 622 repo, role=b'client'
604 623 ),
605 624 b'cg': True,
606 625 },
607 626 ).result()
608 627
609 628 fname = bundle = changegroup.writechunks(
610 629 ui, b2._forwardchunks(), bundlename
611 630 )
612 631 else:
613 632 if peer.capable(b'getbundle'):
614 633 with peer.commandexecutor() as e:
615 634 cg = e.callcommand(
616 635 b'getbundle',
617 636 {
618 637 b'source': b'incoming',
619 638 b'common': common,
620 639 b'heads': rheads,
621 640 },
622 641 ).result()
623 642 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
624 643 # compat with older servers when pulling all remote heads
625 644
626 645 with peer.commandexecutor() as e:
627 646 cg = e.callcommand(
628 647 b'changegroup',
629 648 {
630 649 b'nodes': incoming,
631 650 b'source': b'incoming',
632 651 },
633 652 ).result()
634 653
635 654 rheads = None
636 655 else:
637 656 with peer.commandexecutor() as e:
638 657 cg = e.callcommand(
639 658 b'changegroupsubset',
640 659 {
641 660 b'bases': incoming,
642 661 b'heads': rheads,
643 662 b'source': b'incoming',
644 663 },
645 664 ).result()
646 665
647 666 if localrepo:
648 667 bundletype = b"HG10BZ"
649 668 else:
650 669 bundletype = b"HG10UN"
651 670 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
652 671 # keep written bundle?
653 672 if bundlename:
654 673 bundle = None
655 674 if not localrepo:
656 675 # use the created uncompressed bundlerepo
657 676 localrepo = bundlerepo = makebundlerepository(
658 677 repo.baseui, repo.root, fname
659 678 )
660 679
661 680 # this repo contains local and peer now, so filter out local again
662 681 common = repo.heads()
663 682 if localrepo:
664 683 # Part of common may be remotely filtered
665 684 # So use an unfiltered version
666 685 # The discovery process probably need cleanup to avoid that
667 686 localrepo = localrepo.unfiltered()
668 687
669 688 csets = localrepo.changelog.findmissing(common, rheads)
670 689
671 690 if bundlerepo:
672 691 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
673 692
674 693 with peer.commandexecutor() as e:
675 694 remotephases = e.callcommand(
676 695 b'listkeys',
677 696 {
678 697 b'namespace': b'phases',
679 698 },
680 699 ).result()
681 700
682 701 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
683 702 pullop.trmanager = bundletransactionmanager()
684 703 exchange._pullapplyphases(pullop, remotephases)
685 704
686 705 def cleanup():
687 706 if bundlerepo:
688 707 bundlerepo.close()
689 708 if bundle:
690 709 os.unlink(bundle)
691 710 peer.close()
692 711
693 712 return (localrepo, csets, cleanup)
@@ -1,622 +1,626 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 from .revlogutils import flagutil
28 from .revlogutils import (
29 constants as revlog_constants,
30 flagutil,
31 )
29 32
30 33 _defaultextra = {b'branch': b'default'}
31 34
32 35
33 36 def _string_escape(text):
34 37 """
35 38 >>> from .pycompat import bytechr as chr
36 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
38 41 >>> s
39 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
40 43 >>> res = _string_escape(s)
41 44 >>> s == _string_unescape(res)
42 45 True
43 46 """
44 47 # subset of the string_escape codec
45 48 text = (
46 49 text.replace(b'\\', b'\\\\')
47 50 .replace(b'\n', b'\\n')
48 51 .replace(b'\r', b'\\r')
49 52 )
50 53 return text.replace(b'\0', b'\\0')
51 54
52 55
53 56 def _string_unescape(text):
54 57 if b'\\0' in text:
55 58 # fix up \0 without getting into trouble with \\0
56 59 text = text.replace(b'\\\\', b'\\\\\n')
57 60 text = text.replace(b'\\0', b'\0')
58 61 text = text.replace(b'\n', b'')
59 62 return stringutil.unescapestr(text)
60 63
61 64
62 65 def decodeextra(text):
63 66 """
64 67 >>> from .pycompat import bytechr as chr
65 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
66 69 ... ).items())
67 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
68 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
69 72 ... b'baz': chr(92) + chr(0) + b'2'})
70 73 ... ).items())
71 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
72 75 """
73 76 extra = _defaultextra.copy()
74 77 for l in text.split(b'\0'):
75 78 if l:
76 79 k, v = _string_unescape(l).split(b':', 1)
77 80 extra[k] = v
78 81 return extra
79 82
80 83
81 84 def encodeextra(d):
82 85 # keys must be sorted to produce a deterministic changelog entry
83 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
84 87 return b"\0".join(items)
85 88
86 89
87 90 def stripdesc(desc):
88 91 """strip trailing whitespace and leading and trailing empty lines"""
89 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
90 93
91 94
92 95 class appender(object):
93 96 """the changelog index must be updated last on disk, so we use this class
94 97 to delay writes to it"""
95 98
96 99 def __init__(self, vfs, name, mode, buf):
97 100 self.data = buf
98 101 fp = vfs(name, mode)
99 102 self.fp = fp
100 103 self.offset = fp.tell()
101 104 self.size = vfs.fstat(fp).st_size
102 105 self._end = self.size
103 106
104 107 def end(self):
105 108 return self._end
106 109
107 110 def tell(self):
108 111 return self.offset
109 112
110 113 def flush(self):
111 114 pass
112 115
113 116 @property
114 117 def closed(self):
115 118 return self.fp.closed
116 119
117 120 def close(self):
118 121 self.fp.close()
119 122
120 123 def seek(self, offset, whence=0):
121 124 '''virtual file offset spans real file and data'''
122 125 if whence == 0:
123 126 self.offset = offset
124 127 elif whence == 1:
125 128 self.offset += offset
126 129 elif whence == 2:
127 130 self.offset = self.end() + offset
128 131 if self.offset < self.size:
129 132 self.fp.seek(self.offset)
130 133
131 134 def read(self, count=-1):
132 135 '''only trick here is reads that span real file and data'''
133 136 ret = b""
134 137 if self.offset < self.size:
135 138 s = self.fp.read(count)
136 139 ret = s
137 140 self.offset += len(s)
138 141 if count > 0:
139 142 count -= len(s)
140 143 if count != 0:
141 144 doff = self.offset - self.size
142 145 self.data.insert(0, b"".join(self.data))
143 146 del self.data[1:]
144 147 s = self.data[0][doff : doff + count]
145 148 self.offset += len(s)
146 149 ret += s
147 150 return ret
148 151
149 152 def write(self, s):
150 153 self.data.append(bytes(s))
151 154 self.offset += len(s)
152 155 self._end += len(s)
153 156
154 157 def __enter__(self):
155 158 self.fp.__enter__()
156 159 return self
157 160
158 161 def __exit__(self, *args):
159 162 return self.fp.__exit__(*args)
160 163
161 164
162 165 class _divertopener(object):
163 166 def __init__(self, opener, target):
164 167 self._opener = opener
165 168 self._target = target
166 169
167 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
168 171 if name != self._target:
169 172 return self._opener(name, mode, **kwargs)
170 173 return self._opener(name + b".a", mode, **kwargs)
171 174
172 175 def __getattr__(self, attr):
173 176 return getattr(self._opener, attr)
174 177
175 178
176 179 def _delayopener(opener, target, buf):
177 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
178 181
179 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
180 183 if name != target:
181 184 return opener(name, mode, **kwargs)
182 185 assert not kwargs
183 186 return appender(opener, name, mode, buf)
184 187
185 188 return _delay
186 189
187 190
188 191 @attr.s
189 192 class _changelogrevision(object):
190 193 # Extensions might modify _defaultextra, so let the constructor below pass
191 194 # it in
192 195 extra = attr.ib()
193 196 manifest = attr.ib()
194 197 user = attr.ib(default=b'')
195 198 date = attr.ib(default=(0, 0))
196 199 files = attr.ib(default=attr.Factory(list))
197 200 filesadded = attr.ib(default=None)
198 201 filesremoved = attr.ib(default=None)
199 202 p1copies = attr.ib(default=None)
200 203 p2copies = attr.ib(default=None)
201 204 description = attr.ib(default=b'')
202 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
203 206
204 207
205 208 class changelogrevision(object):
206 209 """Holds results of a parsed changelog revision.
207 210
208 211 Changelog revisions consist of multiple pieces of data, including
209 212 the manifest node, user, and date. This object exposes a view into
210 213 the parsed object.
211 214 """
212 215
213 216 __slots__ = (
214 217 '_offsets',
215 218 '_text',
216 219 '_sidedata',
217 220 '_cpsd',
218 221 '_changes',
219 222 )
220 223
221 224 def __new__(cls, cl, text, sidedata, cpsd):
222 225 if not text:
223 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
224 227
225 228 self = super(changelogrevision, cls).__new__(cls)
226 229 # We could return here and implement the following as an __init__.
227 230 # But doing it here is equivalent and saves an extra function call.
228 231
229 232 # format used:
230 233 # nodeid\n : manifest node in ascii
231 234 # user\n : user, no \n or \r allowed
232 235 # time tz extra\n : date (time is int or float, timezone is int)
233 236 # : extra is metadata, encoded and separated by '\0'
234 237 # : older versions ignore it
235 238 # files\n\n : files modified by the cset, no \n or \r allowed
236 239 # (.*) : comment (free text, ideally utf-8)
237 240 #
238 241 # changelog v0 doesn't use extra
239 242
240 243 nl1 = text.index(b'\n')
241 244 nl2 = text.index(b'\n', nl1 + 1)
242 245 nl3 = text.index(b'\n', nl2 + 1)
243 246
244 247 # The list of files may be empty. Which means nl3 is the first of the
245 248 # double newline that precedes the description.
246 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 250 doublenl = nl3
248 251 else:
249 252 doublenl = text.index(b'\n\n', nl3 + 1)
250 253
251 254 self._offsets = (nl1, nl2, nl3, doublenl)
252 255 self._text = text
253 256 self._sidedata = sidedata
254 257 self._cpsd = cpsd
255 258 self._changes = None
256 259
257 260 return self
258 261
259 262 @property
260 263 def manifest(self):
261 264 return bin(self._text[0 : self._offsets[0]])
262 265
263 266 @property
264 267 def user(self):
265 268 off = self._offsets
266 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
267 270
268 271 @property
269 272 def _rawdate(self):
270 273 off = self._offsets
271 274 dateextra = self._text[off[1] + 1 : off[2]]
272 275 return dateextra.split(b' ', 2)[0:2]
273 276
274 277 @property
275 278 def _rawextra(self):
276 279 off = self._offsets
277 280 dateextra = self._text[off[1] + 1 : off[2]]
278 281 fields = dateextra.split(b' ', 2)
279 282 if len(fields) != 3:
280 283 return None
281 284
282 285 return fields[2]
283 286
284 287 @property
285 288 def date(self):
286 289 raw = self._rawdate
287 290 time = float(raw[0])
288 291 # Various tools did silly things with the timezone.
289 292 try:
290 293 timezone = int(raw[1])
291 294 except ValueError:
292 295 timezone = 0
293 296
294 297 return time, timezone
295 298
296 299 @property
297 300 def extra(self):
298 301 raw = self._rawextra
299 302 if raw is None:
300 303 return _defaultextra
301 304
302 305 return decodeextra(raw)
303 306
304 307 @property
305 308 def changes(self):
306 309 if self._changes is not None:
307 310 return self._changes
308 311 if self._cpsd:
309 312 changes = metadata.decode_files_sidedata(self._sidedata)
310 313 else:
311 314 changes = metadata.ChangingFiles(
312 315 touched=self.files or (),
313 316 added=self.filesadded or (),
314 317 removed=self.filesremoved or (),
315 318 p1_copies=self.p1copies or {},
316 319 p2_copies=self.p2copies or {},
317 320 )
318 321 self._changes = changes
319 322 return changes
320 323
321 324 @property
322 325 def files(self):
323 326 if self._cpsd:
324 327 return sorted(self.changes.touched)
325 328 off = self._offsets
326 329 if off[2] == off[3]:
327 330 return []
328 331
329 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
330 333
331 334 @property
332 335 def filesadded(self):
333 336 if self._cpsd:
334 337 return self.changes.added
335 338 else:
336 339 rawindices = self.extra.get(b'filesadded')
337 340 if rawindices is None:
338 341 return None
339 342 return metadata.decodefileindices(self.files, rawindices)
340 343
341 344 @property
342 345 def filesremoved(self):
343 346 if self._cpsd:
344 347 return self.changes.removed
345 348 else:
346 349 rawindices = self.extra.get(b'filesremoved')
347 350 if rawindices is None:
348 351 return None
349 352 return metadata.decodefileindices(self.files, rawindices)
350 353
351 354 @property
352 355 def p1copies(self):
353 356 if self._cpsd:
354 357 return self.changes.copied_from_p1
355 358 else:
356 359 rawcopies = self.extra.get(b'p1copies')
357 360 if rawcopies is None:
358 361 return None
359 362 return metadata.decodecopies(self.files, rawcopies)
360 363
361 364 @property
362 365 def p2copies(self):
363 366 if self._cpsd:
364 367 return self.changes.copied_from_p2
365 368 else:
366 369 rawcopies = self.extra.get(b'p2copies')
367 370 if rawcopies is None:
368 371 return None
369 372 return metadata.decodecopies(self.files, rawcopies)
370 373
371 374 @property
372 375 def description(self):
373 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
374 377
375 378 @property
376 379 def branchinfo(self):
377 380 extra = self.extra
378 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
379 382
380 383
381 384 class changelog(revlog.revlog):
382 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
383 386 """Load a changelog revlog using an opener.
384 387
385 388 If ``trypending`` is true, we attempt to load the index from a
386 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
387 390 The ``00changelog.i.a`` file contains index (and possibly inline
388 391 revision) data for a transaction that hasn't been finalized yet.
389 392 It exists in a separate file to facilitate readers (such as
390 393 hooks processes) accessing data before a transaction is finalized.
391 394
392 395 ``concurrencychecker`` will be passed to the revlog init function, see
393 396 the documentation there.
394 397 """
395 398 if trypending and opener.exists(b'00changelog.i.a'):
396 399 indexfile = b'00changelog.i.a'
397 400 else:
398 401 indexfile = b'00changelog.i'
399 402
400 403 datafile = b'00changelog.d'
401 404 revlog.revlog.__init__(
402 405 self,
403 406 opener,
404 indexfile,
407 target=(revlog_constants.KIND_CHANGELOG, None),
408 indexfile=indexfile,
405 409 datafile=datafile,
406 410 checkambig=True,
407 411 mmaplargeindex=True,
408 412 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
409 413 concurrencychecker=concurrencychecker,
410 414 )
411 415
412 416 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
413 417 # changelogs don't benefit from generaldelta.
414 418
415 419 self.version &= ~revlog.FLAG_GENERALDELTA
416 420 self._generaldelta = False
417 421
418 422 # Delta chains for changelogs tend to be very small because entries
419 423 # tend to be small and don't delta well with each. So disable delta
420 424 # chains.
421 425 self._storedeltachains = False
422 426
423 427 self._realopener = opener
424 428 self._delayed = False
425 429 self._delaybuf = None
426 430 self._divert = False
427 431 self._filteredrevs = frozenset()
428 432 self._filteredrevs_hashcache = {}
429 433 self._copiesstorage = opener.options.get(b'copies-storage')
430 434 self.revlog_kind = b'changelog'
431 435
432 436 @property
433 437 def filteredrevs(self):
434 438 return self._filteredrevs
435 439
436 440 @filteredrevs.setter
437 441 def filteredrevs(self, val):
438 442 # Ensure all updates go through this function
439 443 assert isinstance(val, frozenset)
440 444 self._filteredrevs = val
441 445 self._filteredrevs_hashcache = {}
442 446
443 447 def delayupdate(self, tr):
444 448 """delay visibility of index updates to other readers"""
445 449
446 450 if not self._delayed:
447 451 if len(self) == 0:
448 452 self._divert = True
449 453 if self._realopener.exists(self.indexfile + b'.a'):
450 454 self._realopener.unlink(self.indexfile + b'.a')
451 455 self.opener = _divertopener(self._realopener, self.indexfile)
452 456 else:
453 457 self._delaybuf = []
454 458 self.opener = _delayopener(
455 459 self._realopener, self.indexfile, self._delaybuf
456 460 )
457 461 self._delayed = True
458 462 tr.addpending(b'cl-%i' % id(self), self._writepending)
459 463 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
460 464
461 465 def _finalize(self, tr):
462 466 """finalize index updates"""
463 467 self._delayed = False
464 468 self.opener = self._realopener
465 469 # move redirected index data back into place
466 470 if self._divert:
467 471 assert not self._delaybuf
468 472 tmpname = self.indexfile + b".a"
469 473 nfile = self.opener.open(tmpname)
470 474 nfile.close()
471 475 self.opener.rename(tmpname, self.indexfile, checkambig=True)
472 476 elif self._delaybuf:
473 477 fp = self.opener(self.indexfile, b'a', checkambig=True)
474 478 fp.write(b"".join(self._delaybuf))
475 479 fp.close()
476 480 self._delaybuf = None
477 481 self._divert = False
478 482 # split when we're done
479 483 self._enforceinlinesize(tr)
480 484
481 485 def _writepending(self, tr):
482 486 """create a file containing the unfinalized state for
483 487 pretxnchangegroup"""
484 488 if self._delaybuf:
485 489 # make a temporary copy of the index
486 490 fp1 = self._realopener(self.indexfile)
487 491 pendingfilename = self.indexfile + b".a"
488 492 # register as a temp file to ensure cleanup on failure
489 493 tr.registertmp(pendingfilename)
490 494 # write existing data
491 495 fp2 = self._realopener(pendingfilename, b"w")
492 496 fp2.write(fp1.read())
493 497 # add pending data
494 498 fp2.write(b"".join(self._delaybuf))
495 499 fp2.close()
496 500 # switch modes so finalize can simply rename
497 501 self._delaybuf = None
498 502 self._divert = True
499 503 self.opener = _divertopener(self._realopener, self.indexfile)
500 504
501 505 if self._divert:
502 506 return True
503 507
504 508 return False
505 509
506 510 def _enforceinlinesize(self, tr, fp=None):
507 511 if not self._delayed:
508 512 revlog.revlog._enforceinlinesize(self, tr, fp)
509 513
510 514 def read(self, nodeorrev):
511 515 """Obtain data from a parsed changelog revision.
512 516
513 517 Returns a 6-tuple of:
514 518
515 519 - manifest node in binary
516 520 - author/user as a localstr
517 521 - date as a 2-tuple of (time, timezone)
518 522 - list of files
519 523 - commit message as a localstr
520 524 - dict of extra metadata
521 525
522 526 Unless you need to access all fields, consider calling
523 527 ``changelogrevision`` instead, as it is faster for partial object
524 528 access.
525 529 """
526 530 d, s = self._revisiondata(nodeorrev)
527 531 c = changelogrevision(
528 532 self, d, s, self._copiesstorage == b'changeset-sidedata'
529 533 )
530 534 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
531 535
532 536 def changelogrevision(self, nodeorrev):
533 537 """Obtain a ``changelogrevision`` for a node or revision."""
534 538 text, sidedata = self._revisiondata(nodeorrev)
535 539 return changelogrevision(
536 540 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
537 541 )
538 542
539 543 def readfiles(self, nodeorrev):
540 544 """
541 545 short version of read that only returns the files modified by the cset
542 546 """
543 547 text = self.revision(nodeorrev)
544 548 if not text:
545 549 return []
546 550 last = text.index(b"\n\n")
547 551 l = text[:last].split(b'\n')
548 552 return l[3:]
549 553
550 554 def add(
551 555 self,
552 556 manifest,
553 557 files,
554 558 desc,
555 559 transaction,
556 560 p1,
557 561 p2,
558 562 user,
559 563 date=None,
560 564 extra=None,
561 565 ):
562 566 # Convert to UTF-8 encoded bytestrings as the very first
563 567 # thing: calling any method on a localstr object will turn it
564 568 # into a str object and the cached UTF-8 string is thus lost.
565 569 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
566 570
567 571 user = user.strip()
568 572 # An empty username or a username with a "\n" will make the
569 573 # revision text contain two "\n\n" sequences -> corrupt
570 574 # repository since read cannot unpack the revision.
571 575 if not user:
572 576 raise error.StorageError(_(b"empty username"))
573 577 if b"\n" in user:
574 578 raise error.StorageError(
575 579 _(b"username %r contains a newline") % pycompat.bytestr(user)
576 580 )
577 581
578 582 desc = stripdesc(desc)
579 583
580 584 if date:
581 585 parseddate = b"%d %d" % dateutil.parsedate(date)
582 586 else:
583 587 parseddate = b"%d %d" % dateutil.makedate()
584 588 if extra:
585 589 branch = extra.get(b"branch")
586 590 if branch in (b"default", b""):
587 591 del extra[b"branch"]
588 592 elif branch in (b".", b"null", b"tip"):
589 593 raise error.StorageError(
590 594 _(b'the name \'%s\' is reserved') % branch
591 595 )
592 596 sortedfiles = sorted(files.touched)
593 597 flags = 0
594 598 sidedata = None
595 599 if self._copiesstorage == b'changeset-sidedata':
596 600 if files.has_copies_info:
597 601 flags |= flagutil.REVIDX_HASCOPIESINFO
598 602 sidedata = metadata.encode_files_sidedata(files)
599 603
600 604 if extra:
601 605 extra = encodeextra(extra)
602 606 parseddate = b"%s %s" % (parseddate, extra)
603 607 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
604 608 text = b"\n".join(l)
605 609 rev = self.addrevision(
606 610 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
607 611 )
608 612 return self.node(rev)
609 613
610 614 def branchinfo(self, rev):
611 615 """return the branch name and open/close state of a revision
612 616
613 617 This function exists because creating a changectx object
614 618 just to access this is costly."""
615 619 return self.changelogrevision(rev).branchinfo
616 620
617 621 def _nodeduplicatecallback(self, transaction, rev):
618 622 # keep track of revisions that got "re-added", eg: unbunde of know rev.
619 623 #
620 624 # We track them in a list to preserve their order from the source bundle
621 625 duplicates = transaction.changes.setdefault(b'revduplicates', [])
622 626 duplicates.append(rev)
@@ -1,3920 +1,3928 b''
1 1 # cmdutil.py - help for command processing in mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy as copymod
11 11 import errno
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import (
22 22 getattr,
23 23 open,
24 24 setattr,
25 25 )
26 26 from .thirdparty import attr
27 27
28 28 from . import (
29 29 bookmarks,
30 30 changelog,
31 31 copies,
32 32 crecord as crecordmod,
33 33 dirstateguard,
34 34 encoding,
35 35 error,
36 36 formatter,
37 37 logcmdutil,
38 38 match as matchmod,
39 39 merge as mergemod,
40 40 mergestate as mergestatemod,
41 41 mergeutil,
42 42 obsolete,
43 43 patch,
44 44 pathutil,
45 45 phases,
46 46 pycompat,
47 47 repair,
48 48 revlog,
49 49 rewriteutil,
50 50 scmutil,
51 51 state as statemod,
52 52 subrepoutil,
53 53 templatekw,
54 54 templater,
55 55 util,
56 56 vfs as vfsmod,
57 57 )
58 58
59 59 from .utils import (
60 60 dateutil,
61 61 stringutil,
62 62 )
63 63
64 from .revlogutils import (
65 constants as revlog_constants,
66 )
67
64 68 if pycompat.TYPE_CHECKING:
65 69 from typing import (
66 70 Any,
67 71 Dict,
68 72 )
69 73
70 74 for t in (Any, Dict):
71 75 assert t
72 76
73 77 stringio = util.stringio
74 78
75 79 # templates of common command options
76 80
77 81 dryrunopts = [
78 82 (b'n', b'dry-run', None, _(b'do not perform actions, just print output')),
79 83 ]
80 84
81 85 confirmopts = [
82 86 (b'', b'confirm', None, _(b'ask before applying actions')),
83 87 ]
84 88
85 89 remoteopts = [
86 90 (b'e', b'ssh', b'', _(b'specify ssh command to use'), _(b'CMD')),
87 91 (
88 92 b'',
89 93 b'remotecmd',
90 94 b'',
91 95 _(b'specify hg command to run on the remote side'),
92 96 _(b'CMD'),
93 97 ),
94 98 (
95 99 b'',
96 100 b'insecure',
97 101 None,
98 102 _(b'do not verify server certificate (ignoring web.cacerts config)'),
99 103 ),
100 104 ]
101 105
102 106 walkopts = [
103 107 (
104 108 b'I',
105 109 b'include',
106 110 [],
107 111 _(b'include names matching the given patterns'),
108 112 _(b'PATTERN'),
109 113 ),
110 114 (
111 115 b'X',
112 116 b'exclude',
113 117 [],
114 118 _(b'exclude names matching the given patterns'),
115 119 _(b'PATTERN'),
116 120 ),
117 121 ]
118 122
119 123 commitopts = [
120 124 (b'm', b'message', b'', _(b'use text as commit message'), _(b'TEXT')),
121 125 (b'l', b'logfile', b'', _(b'read commit message from file'), _(b'FILE')),
122 126 ]
123 127
124 128 commitopts2 = [
125 129 (
126 130 b'd',
127 131 b'date',
128 132 b'',
129 133 _(b'record the specified date as commit date'),
130 134 _(b'DATE'),
131 135 ),
132 136 (
133 137 b'u',
134 138 b'user',
135 139 b'',
136 140 _(b'record the specified user as committer'),
137 141 _(b'USER'),
138 142 ),
139 143 ]
140 144
141 145 commitopts3 = [
142 146 (b'D', b'currentdate', None, _(b'record the current date as commit date')),
143 147 (b'U', b'currentuser', None, _(b'record the current user as committer')),
144 148 ]
145 149
146 150 formatteropts = [
147 151 (b'T', b'template', b'', _(b'display with template'), _(b'TEMPLATE')),
148 152 ]
149 153
150 154 templateopts = [
151 155 (
152 156 b'',
153 157 b'style',
154 158 b'',
155 159 _(b'display using template map file (DEPRECATED)'),
156 160 _(b'STYLE'),
157 161 ),
158 162 (b'T', b'template', b'', _(b'display with template'), _(b'TEMPLATE')),
159 163 ]
160 164
161 165 logopts = [
162 166 (b'p', b'patch', None, _(b'show patch')),
163 167 (b'g', b'git', None, _(b'use git extended diff format')),
164 168 (b'l', b'limit', b'', _(b'limit number of changes displayed'), _(b'NUM')),
165 169 (b'M', b'no-merges', None, _(b'do not show merges')),
166 170 (b'', b'stat', None, _(b'output diffstat-style summary of changes')),
167 171 (b'G', b'graph', None, _(b"show the revision DAG")),
168 172 ] + templateopts
169 173
170 174 diffopts = [
171 175 (b'a', b'text', None, _(b'treat all files as text')),
172 176 (
173 177 b'g',
174 178 b'git',
175 179 None,
176 180 _(b'use git extended diff format (DEFAULT: diff.git)'),
177 181 ),
178 182 (b'', b'binary', None, _(b'generate binary diffs in git mode (default)')),
179 183 (b'', b'nodates', None, _(b'omit dates from diff headers')),
180 184 ]
181 185
182 186 diffwsopts = [
183 187 (
184 188 b'w',
185 189 b'ignore-all-space',
186 190 None,
187 191 _(b'ignore white space when comparing lines'),
188 192 ),
189 193 (
190 194 b'b',
191 195 b'ignore-space-change',
192 196 None,
193 197 _(b'ignore changes in the amount of white space'),
194 198 ),
195 199 (
196 200 b'B',
197 201 b'ignore-blank-lines',
198 202 None,
199 203 _(b'ignore changes whose lines are all blank'),
200 204 ),
201 205 (
202 206 b'Z',
203 207 b'ignore-space-at-eol',
204 208 None,
205 209 _(b'ignore changes in whitespace at EOL'),
206 210 ),
207 211 ]
208 212
209 213 diffopts2 = (
210 214 [
211 215 (b'', b'noprefix', None, _(b'omit a/ and b/ prefixes from filenames')),
212 216 (
213 217 b'p',
214 218 b'show-function',
215 219 None,
216 220 _(
217 221 b'show which function each change is in (DEFAULT: diff.showfunc)'
218 222 ),
219 223 ),
220 224 (b'', b'reverse', None, _(b'produce a diff that undoes the changes')),
221 225 ]
222 226 + diffwsopts
223 227 + [
224 228 (
225 229 b'U',
226 230 b'unified',
227 231 b'',
228 232 _(b'number of lines of context to show'),
229 233 _(b'NUM'),
230 234 ),
231 235 (b'', b'stat', None, _(b'output diffstat-style summary of changes')),
232 236 (
233 237 b'',
234 238 b'root',
235 239 b'',
236 240 _(b'produce diffs relative to subdirectory'),
237 241 _(b'DIR'),
238 242 ),
239 243 ]
240 244 )
241 245
242 246 mergetoolopts = [
243 247 (b't', b'tool', b'', _(b'specify merge tool'), _(b'TOOL')),
244 248 ]
245 249
246 250 similarityopts = [
247 251 (
248 252 b's',
249 253 b'similarity',
250 254 b'',
251 255 _(b'guess renamed files by similarity (0<=s<=100)'),
252 256 _(b'SIMILARITY'),
253 257 )
254 258 ]
255 259
256 260 subrepoopts = [(b'S', b'subrepos', None, _(b'recurse into subrepositories'))]
257 261
258 262 debugrevlogopts = [
259 263 (b'c', b'changelog', False, _(b'open changelog')),
260 264 (b'm', b'manifest', False, _(b'open manifest')),
261 265 (b'', b'dir', b'', _(b'open directory manifest')),
262 266 ]
263 267
264 268 # special string such that everything below this line will be ingored in the
265 269 # editor text
266 270 _linebelow = b"^HG: ------------------------ >8 ------------------------$"
267 271
268 272
269 273 def check_at_most_one_arg(opts, *args):
270 274 """abort if more than one of the arguments are in opts
271 275
272 276 Returns the unique argument or None if none of them were specified.
273 277 """
274 278
275 279 def to_display(name):
276 280 return pycompat.sysbytes(name).replace(b'_', b'-')
277 281
278 282 previous = None
279 283 for x in args:
280 284 if opts.get(x):
281 285 if previous:
282 286 raise error.InputError(
283 287 _(b'cannot specify both --%s and --%s')
284 288 % (to_display(previous), to_display(x))
285 289 )
286 290 previous = x
287 291 return previous
288 292
289 293
290 294 def check_incompatible_arguments(opts, first, others):
291 295 """abort if the first argument is given along with any of the others
292 296
293 297 Unlike check_at_most_one_arg(), `others` are not mutually exclusive
294 298 among themselves, and they're passed as a single collection.
295 299 """
296 300 for other in others:
297 301 check_at_most_one_arg(opts, first, other)
298 302
299 303
300 304 def resolvecommitoptions(ui, opts):
301 305 """modify commit options dict to handle related options
302 306
303 307 The return value indicates that ``rewrite.update-timestamp`` is the reason
304 308 the ``date`` option is set.
305 309 """
306 310 check_at_most_one_arg(opts, b'date', b'currentdate')
307 311 check_at_most_one_arg(opts, b'user', b'currentuser')
308 312
309 313 datemaydiffer = False # date-only change should be ignored?
310 314
311 315 if opts.get(b'currentdate'):
312 316 opts[b'date'] = b'%d %d' % dateutil.makedate()
313 317 elif (
314 318 not opts.get(b'date')
315 319 and ui.configbool(b'rewrite', b'update-timestamp')
316 320 and opts.get(b'currentdate') is None
317 321 ):
318 322 opts[b'date'] = b'%d %d' % dateutil.makedate()
319 323 datemaydiffer = True
320 324
321 325 if opts.get(b'currentuser'):
322 326 opts[b'user'] = ui.username()
323 327
324 328 return datemaydiffer
325 329
326 330
327 331 def checknotesize(ui, opts):
328 332 """ make sure note is of valid format """
329 333
330 334 note = opts.get(b'note')
331 335 if not note:
332 336 return
333 337
334 338 if len(note) > 255:
335 339 raise error.InputError(_(b"cannot store a note of more than 255 bytes"))
336 340 if b'\n' in note:
337 341 raise error.InputError(_(b"note cannot contain a newline"))
338 342
339 343
340 344 def ishunk(x):
341 345 hunkclasses = (crecordmod.uihunk, patch.recordhunk)
342 346 return isinstance(x, hunkclasses)
343 347
344 348
345 349 def newandmodified(chunks, originalchunks):
346 350 newlyaddedandmodifiedfiles = set()
347 351 alsorestore = set()
348 352 for chunk in chunks:
349 353 if (
350 354 ishunk(chunk)
351 355 and chunk.header.isnewfile()
352 356 and chunk not in originalchunks
353 357 ):
354 358 newlyaddedandmodifiedfiles.add(chunk.header.filename())
355 359 alsorestore.update(
356 360 set(chunk.header.files()) - {chunk.header.filename()}
357 361 )
358 362 return newlyaddedandmodifiedfiles, alsorestore
359 363
360 364
361 365 def parsealiases(cmd):
362 366 base_aliases = cmd.split(b"|")
363 367 all_aliases = set(base_aliases)
364 368 extra_aliases = []
365 369 for alias in base_aliases:
366 370 if b'-' in alias:
367 371 folded_alias = alias.replace(b'-', b'')
368 372 if folded_alias not in all_aliases:
369 373 all_aliases.add(folded_alias)
370 374 extra_aliases.append(folded_alias)
371 375 base_aliases.extend(extra_aliases)
372 376 return base_aliases
373 377
374 378
375 379 def setupwrapcolorwrite(ui):
376 380 # wrap ui.write so diff output can be labeled/colorized
377 381 def wrapwrite(orig, *args, **kw):
378 382 label = kw.pop('label', b'')
379 383 for chunk, l in patch.difflabel(lambda: args):
380 384 orig(chunk, label=label + l)
381 385
382 386 oldwrite = ui.write
383 387
384 388 def wrap(*args, **kwargs):
385 389 return wrapwrite(oldwrite, *args, **kwargs)
386 390
387 391 setattr(ui, 'write', wrap)
388 392 return oldwrite
389 393
390 394
391 395 def filterchunks(ui, originalhunks, usecurses, testfile, match, operation=None):
392 396 try:
393 397 if usecurses:
394 398 if testfile:
395 399 recordfn = crecordmod.testdecorator(
396 400 testfile, crecordmod.testchunkselector
397 401 )
398 402 else:
399 403 recordfn = crecordmod.chunkselector
400 404
401 405 return crecordmod.filterpatch(
402 406 ui, originalhunks, recordfn, operation
403 407 )
404 408 except crecordmod.fallbackerror as e:
405 409 ui.warn(b'%s\n' % e)
406 410 ui.warn(_(b'falling back to text mode\n'))
407 411
408 412 return patch.filterpatch(ui, originalhunks, match, operation)
409 413
410 414
411 415 def recordfilter(ui, originalhunks, match, operation=None):
412 416 """Prompts the user to filter the originalhunks and return a list of
413 417 selected hunks.
414 418 *operation* is used for to build ui messages to indicate the user what
415 419 kind of filtering they are doing: reverting, committing, shelving, etc.
416 420 (see patch.filterpatch).
417 421 """
418 422 usecurses = crecordmod.checkcurses(ui)
419 423 testfile = ui.config(b'experimental', b'crecordtest')
420 424 oldwrite = setupwrapcolorwrite(ui)
421 425 try:
422 426 newchunks, newopts = filterchunks(
423 427 ui, originalhunks, usecurses, testfile, match, operation
424 428 )
425 429 finally:
426 430 ui.write = oldwrite
427 431 return newchunks, newopts
428 432
429 433
430 434 def dorecord(
431 435 ui, repo, commitfunc, cmdsuggest, backupall, filterfn, *pats, **opts
432 436 ):
433 437 opts = pycompat.byteskwargs(opts)
434 438 if not ui.interactive():
435 439 if cmdsuggest:
436 440 msg = _(b'running non-interactively, use %s instead') % cmdsuggest
437 441 else:
438 442 msg = _(b'running non-interactively')
439 443 raise error.InputError(msg)
440 444
441 445 # make sure username is set before going interactive
442 446 if not opts.get(b'user'):
443 447 ui.username() # raise exception, username not provided
444 448
445 449 def recordfunc(ui, repo, message, match, opts):
446 450 """This is generic record driver.
447 451
448 452 Its job is to interactively filter local changes, and
449 453 accordingly prepare working directory into a state in which the
450 454 job can be delegated to a non-interactive commit command such as
451 455 'commit' or 'qrefresh'.
452 456
453 457 After the actual job is done by non-interactive command, the
454 458 working directory is restored to its original state.
455 459
456 460 In the end we'll record interesting changes, and everything else
457 461 will be left in place, so the user can continue working.
458 462 """
459 463 if not opts.get(b'interactive-unshelve'):
460 464 checkunfinished(repo, commit=True)
461 465 wctx = repo[None]
462 466 merge = len(wctx.parents()) > 1
463 467 if merge:
464 468 raise error.InputError(
465 469 _(
466 470 b'cannot partially commit a merge '
467 471 b'(use "hg commit" instead)'
468 472 )
469 473 )
470 474
471 475 def fail(f, msg):
472 476 raise error.InputError(b'%s: %s' % (f, msg))
473 477
474 478 force = opts.get(b'force')
475 479 if not force:
476 480 match = matchmod.badmatch(match, fail)
477 481
478 482 status = repo.status(match=match)
479 483
480 484 overrides = {(b'ui', b'commitsubrepos'): True}
481 485
482 486 with repo.ui.configoverride(overrides, b'record'):
483 487 # subrepoutil.precommit() modifies the status
484 488 tmpstatus = scmutil.status(
485 489 copymod.copy(status.modified),
486 490 copymod.copy(status.added),
487 491 copymod.copy(status.removed),
488 492 copymod.copy(status.deleted),
489 493 copymod.copy(status.unknown),
490 494 copymod.copy(status.ignored),
491 495 copymod.copy(status.clean), # pytype: disable=wrong-arg-count
492 496 )
493 497
494 498 # Force allows -X subrepo to skip the subrepo.
495 499 subs, commitsubs, newstate = subrepoutil.precommit(
496 500 repo.ui, wctx, tmpstatus, match, force=True
497 501 )
498 502 for s in subs:
499 503 if s in commitsubs:
500 504 dirtyreason = wctx.sub(s).dirtyreason(True)
501 505 raise error.Abort(dirtyreason)
502 506
503 507 if not force:
504 508 repo.checkcommitpatterns(wctx, match, status, fail)
505 509 diffopts = patch.difffeatureopts(
506 510 ui,
507 511 opts=opts,
508 512 whitespace=True,
509 513 section=b'commands',
510 514 configprefix=b'commit.interactive.',
511 515 )
512 516 diffopts.nodates = True
513 517 diffopts.git = True
514 518 diffopts.showfunc = True
515 519 originaldiff = patch.diff(repo, changes=status, opts=diffopts)
516 520 originalchunks = patch.parsepatch(originaldiff)
517 521 match = scmutil.match(repo[None], pats)
518 522
519 523 # 1. filter patch, since we are intending to apply subset of it
520 524 try:
521 525 chunks, newopts = filterfn(ui, originalchunks, match)
522 526 except error.PatchError as err:
523 527 raise error.InputError(_(b'error parsing patch: %s') % err)
524 528 opts.update(newopts)
525 529
526 530 # We need to keep a backup of files that have been newly added and
527 531 # modified during the recording process because there is a previous
528 532 # version without the edit in the workdir. We also will need to restore
529 533 # files that were the sources of renames so that the patch application
530 534 # works.
531 535 newlyaddedandmodifiedfiles, alsorestore = newandmodified(
532 536 chunks, originalchunks
533 537 )
534 538 contenders = set()
535 539 for h in chunks:
536 540 try:
537 541 contenders.update(set(h.files()))
538 542 except AttributeError:
539 543 pass
540 544
541 545 changed = status.modified + status.added + status.removed
542 546 newfiles = [f for f in changed if f in contenders]
543 547 if not newfiles:
544 548 ui.status(_(b'no changes to record\n'))
545 549 return 0
546 550
547 551 modified = set(status.modified)
548 552
549 553 # 2. backup changed files, so we can restore them in the end
550 554
551 555 if backupall:
552 556 tobackup = changed
553 557 else:
554 558 tobackup = [
555 559 f
556 560 for f in newfiles
557 561 if f in modified or f in newlyaddedandmodifiedfiles
558 562 ]
559 563 backups = {}
560 564 if tobackup:
561 565 backupdir = repo.vfs.join(b'record-backups')
562 566 try:
563 567 os.mkdir(backupdir)
564 568 except OSError as err:
565 569 if err.errno != errno.EEXIST:
566 570 raise
567 571 try:
568 572 # backup continues
569 573 for f in tobackup:
570 574 fd, tmpname = pycompat.mkstemp(
571 575 prefix=os.path.basename(f) + b'.', dir=backupdir
572 576 )
573 577 os.close(fd)
574 578 ui.debug(b'backup %r as %r\n' % (f, tmpname))
575 579 util.copyfile(repo.wjoin(f), tmpname, copystat=True)
576 580 backups[f] = tmpname
577 581
578 582 fp = stringio()
579 583 for c in chunks:
580 584 fname = c.filename()
581 585 if fname in backups:
582 586 c.write(fp)
583 587 dopatch = fp.tell()
584 588 fp.seek(0)
585 589
586 590 # 2.5 optionally review / modify patch in text editor
587 591 if opts.get(b'review', False):
588 592 patchtext = (
589 593 crecordmod.diffhelptext
590 594 + crecordmod.patchhelptext
591 595 + fp.read()
592 596 )
593 597 reviewedpatch = ui.edit(
594 598 patchtext, b"", action=b"diff", repopath=repo.path
595 599 )
596 600 fp.truncate(0)
597 601 fp.write(reviewedpatch)
598 602 fp.seek(0)
599 603
600 604 [os.unlink(repo.wjoin(c)) for c in newlyaddedandmodifiedfiles]
601 605 # 3a. apply filtered patch to clean repo (clean)
602 606 if backups:
603 607 m = scmutil.matchfiles(repo, set(backups.keys()) | alsorestore)
604 608 mergemod.revert_to(repo[b'.'], matcher=m)
605 609
606 610 # 3b. (apply)
607 611 if dopatch:
608 612 try:
609 613 ui.debug(b'applying patch\n')
610 614 ui.debug(fp.getvalue())
611 615 patch.internalpatch(ui, repo, fp, 1, eolmode=None)
612 616 except error.PatchError as err:
613 617 raise error.InputError(pycompat.bytestr(err))
614 618 del fp
615 619
616 620 # 4. We prepared working directory according to filtered
617 621 # patch. Now is the time to delegate the job to
618 622 # commit/qrefresh or the like!
619 623
620 624 # Make all of the pathnames absolute.
621 625 newfiles = [repo.wjoin(nf) for nf in newfiles]
622 626 return commitfunc(ui, repo, *newfiles, **pycompat.strkwargs(opts))
623 627 finally:
624 628 # 5. finally restore backed-up files
625 629 try:
626 630 dirstate = repo.dirstate
627 631 for realname, tmpname in pycompat.iteritems(backups):
628 632 ui.debug(b'restoring %r to %r\n' % (tmpname, realname))
629 633
630 634 if dirstate[realname] == b'n':
631 635 # without normallookup, restoring timestamp
632 636 # may cause partially committed files
633 637 # to be treated as unmodified
634 638 dirstate.normallookup(realname)
635 639
636 640 # copystat=True here and above are a hack to trick any
637 641 # editors that have f open that we haven't modified them.
638 642 #
639 643 # Also note that this racy as an editor could notice the
640 644 # file's mtime before we've finished writing it.
641 645 util.copyfile(tmpname, repo.wjoin(realname), copystat=True)
642 646 os.unlink(tmpname)
643 647 if tobackup:
644 648 os.rmdir(backupdir)
645 649 except OSError:
646 650 pass
647 651
648 652 def recordinwlock(ui, repo, message, match, opts):
649 653 with repo.wlock():
650 654 return recordfunc(ui, repo, message, match, opts)
651 655
652 656 return commit(ui, repo, recordinwlock, pats, opts)
653 657
654 658
655 659 class dirnode(object):
656 660 """
657 661 Represent a directory in user working copy with information required for
658 662 the purpose of tersing its status.
659 663
660 664 path is the path to the directory, without a trailing '/'
661 665
662 666 statuses is a set of statuses of all files in this directory (this includes
663 667 all the files in all the subdirectories too)
664 668
665 669 files is a list of files which are direct child of this directory
666 670
667 671 subdirs is a dictionary of sub-directory name as the key and it's own
668 672 dirnode object as the value
669 673 """
670 674
671 675 def __init__(self, dirpath):
672 676 self.path = dirpath
673 677 self.statuses = set()
674 678 self.files = []
675 679 self.subdirs = {}
676 680
677 681 def _addfileindir(self, filename, status):
678 682 """Add a file in this directory as a direct child."""
679 683 self.files.append((filename, status))
680 684
681 685 def addfile(self, filename, status):
682 686 """
683 687 Add a file to this directory or to its direct parent directory.
684 688
685 689 If the file is not direct child of this directory, we traverse to the
686 690 directory of which this file is a direct child of and add the file
687 691 there.
688 692 """
689 693
690 694 # the filename contains a path separator, it means it's not the direct
691 695 # child of this directory
692 696 if b'/' in filename:
693 697 subdir, filep = filename.split(b'/', 1)
694 698
695 699 # does the dirnode object for subdir exists
696 700 if subdir not in self.subdirs:
697 701 subdirpath = pathutil.join(self.path, subdir)
698 702 self.subdirs[subdir] = dirnode(subdirpath)
699 703
700 704 # try adding the file in subdir
701 705 self.subdirs[subdir].addfile(filep, status)
702 706
703 707 else:
704 708 self._addfileindir(filename, status)
705 709
706 710 if status not in self.statuses:
707 711 self.statuses.add(status)
708 712
709 713 def iterfilepaths(self):
710 714 """Yield (status, path) for files directly under this directory."""
711 715 for f, st in self.files:
712 716 yield st, pathutil.join(self.path, f)
713 717
714 718 def tersewalk(self, terseargs):
715 719 """
716 720 Yield (status, path) obtained by processing the status of this
717 721 dirnode.
718 722
719 723 terseargs is the string of arguments passed by the user with `--terse`
720 724 flag.
721 725
722 726 Following are the cases which can happen:
723 727
724 728 1) All the files in the directory (including all the files in its
725 729 subdirectories) share the same status and the user has asked us to terse
726 730 that status. -> yield (status, dirpath). dirpath will end in '/'.
727 731
728 732 2) Otherwise, we do following:
729 733
730 734 a) Yield (status, filepath) for all the files which are in this
731 735 directory (only the ones in this directory, not the subdirs)
732 736
733 737 b) Recurse the function on all the subdirectories of this
734 738 directory
735 739 """
736 740
737 741 if len(self.statuses) == 1:
738 742 onlyst = self.statuses.pop()
739 743
740 744 # Making sure we terse only when the status abbreviation is
741 745 # passed as terse argument
742 746 if onlyst in terseargs:
743 747 yield onlyst, self.path + b'/'
744 748 return
745 749
746 750 # add the files to status list
747 751 for st, fpath in self.iterfilepaths():
748 752 yield st, fpath
749 753
750 754 # recurse on the subdirs
751 755 for dirobj in self.subdirs.values():
752 756 for st, fpath in dirobj.tersewalk(terseargs):
753 757 yield st, fpath
754 758
755 759
756 760 def tersedir(statuslist, terseargs):
757 761 """
758 762 Terse the status if all the files in a directory shares the same status.
759 763
760 764 statuslist is scmutil.status() object which contains a list of files for
761 765 each status.
762 766 terseargs is string which is passed by the user as the argument to `--terse`
763 767 flag.
764 768
765 769 The function makes a tree of objects of dirnode class, and at each node it
766 770 stores the information required to know whether we can terse a certain
767 771 directory or not.
768 772 """
769 773 # the order matters here as that is used to produce final list
770 774 allst = (b'm', b'a', b'r', b'd', b'u', b'i', b'c')
771 775
772 776 # checking the argument validity
773 777 for s in pycompat.bytestr(terseargs):
774 778 if s not in allst:
775 779 raise error.InputError(_(b"'%s' not recognized") % s)
776 780
777 781 # creating a dirnode object for the root of the repo
778 782 rootobj = dirnode(b'')
779 783 pstatus = (
780 784 b'modified',
781 785 b'added',
782 786 b'deleted',
783 787 b'clean',
784 788 b'unknown',
785 789 b'ignored',
786 790 b'removed',
787 791 )
788 792
789 793 tersedict = {}
790 794 for attrname in pstatus:
791 795 statuschar = attrname[0:1]
792 796 for f in getattr(statuslist, attrname):
793 797 rootobj.addfile(f, statuschar)
794 798 tersedict[statuschar] = []
795 799
796 800 # we won't be tersing the root dir, so add files in it
797 801 for st, fpath in rootobj.iterfilepaths():
798 802 tersedict[st].append(fpath)
799 803
800 804 # process each sub-directory and build tersedict
801 805 for subdir in rootobj.subdirs.values():
802 806 for st, f in subdir.tersewalk(terseargs):
803 807 tersedict[st].append(f)
804 808
805 809 tersedlist = []
806 810 for st in allst:
807 811 tersedict[st].sort()
808 812 tersedlist.append(tersedict[st])
809 813
810 814 return scmutil.status(*tersedlist)
811 815
812 816
813 817 def _commentlines(raw):
814 818 '''Surround lineswith a comment char and a new line'''
815 819 lines = raw.splitlines()
816 820 commentedlines = [b'# %s' % line for line in lines]
817 821 return b'\n'.join(commentedlines) + b'\n'
818 822
819 823
820 824 @attr.s(frozen=True)
821 825 class morestatus(object):
822 826 reporoot = attr.ib()
823 827 unfinishedop = attr.ib()
824 828 unfinishedmsg = attr.ib()
825 829 activemerge = attr.ib()
826 830 unresolvedpaths = attr.ib()
827 831 _formattedpaths = attr.ib(init=False, default=set())
828 832 _label = b'status.morestatus'
829 833
830 834 def formatfile(self, path, fm):
831 835 self._formattedpaths.add(path)
832 836 if self.activemerge and path in self.unresolvedpaths:
833 837 fm.data(unresolved=True)
834 838
835 839 def formatfooter(self, fm):
836 840 if self.unfinishedop or self.unfinishedmsg:
837 841 fm.startitem()
838 842 fm.data(itemtype=b'morestatus')
839 843
840 844 if self.unfinishedop:
841 845 fm.data(unfinished=self.unfinishedop)
842 846 statemsg = (
843 847 _(b'The repository is in an unfinished *%s* state.')
844 848 % self.unfinishedop
845 849 )
846 850 fm.plain(b'%s\n' % _commentlines(statemsg), label=self._label)
847 851 if self.unfinishedmsg:
848 852 fm.data(unfinishedmsg=self.unfinishedmsg)
849 853
850 854 # May also start new data items.
851 855 self._formatconflicts(fm)
852 856
853 857 if self.unfinishedmsg:
854 858 fm.plain(
855 859 b'%s\n' % _commentlines(self.unfinishedmsg), label=self._label
856 860 )
857 861
858 862 def _formatconflicts(self, fm):
859 863 if not self.activemerge:
860 864 return
861 865
862 866 if self.unresolvedpaths:
863 867 mergeliststr = b'\n'.join(
864 868 [
865 869 b' %s'
866 870 % util.pathto(self.reporoot, encoding.getcwd(), path)
867 871 for path in self.unresolvedpaths
868 872 ]
869 873 )
870 874 msg = (
871 875 _(
872 876 b'''Unresolved merge conflicts:
873 877
874 878 %s
875 879
876 880 To mark files as resolved: hg resolve --mark FILE'''
877 881 )
878 882 % mergeliststr
879 883 )
880 884
881 885 # If any paths with unresolved conflicts were not previously
882 886 # formatted, output them now.
883 887 for f in self.unresolvedpaths:
884 888 if f in self._formattedpaths:
885 889 # Already output.
886 890 continue
887 891 fm.startitem()
888 892 # We can't claim to know the status of the file - it may just
889 893 # have been in one of the states that were not requested for
890 894 # display, so it could be anything.
891 895 fm.data(itemtype=b'file', path=f, unresolved=True)
892 896
893 897 else:
894 898 msg = _(b'No unresolved merge conflicts.')
895 899
896 900 fm.plain(b'%s\n' % _commentlines(msg), label=self._label)
897 901
898 902
899 903 def readmorestatus(repo):
900 904 """Returns a morestatus object if the repo has unfinished state."""
901 905 statetuple = statemod.getrepostate(repo)
902 906 mergestate = mergestatemod.mergestate.read(repo)
903 907 activemerge = mergestate.active()
904 908 if not statetuple and not activemerge:
905 909 return None
906 910
907 911 unfinishedop = unfinishedmsg = unresolved = None
908 912 if statetuple:
909 913 unfinishedop, unfinishedmsg = statetuple
910 914 if activemerge:
911 915 unresolved = sorted(mergestate.unresolved())
912 916 return morestatus(
913 917 repo.root, unfinishedop, unfinishedmsg, activemerge, unresolved
914 918 )
915 919
916 920
917 921 def findpossible(cmd, table, strict=False):
918 922 """
919 923 Return cmd -> (aliases, command table entry)
920 924 for each matching command.
921 925 Return debug commands (or their aliases) only if no normal command matches.
922 926 """
923 927 choice = {}
924 928 debugchoice = {}
925 929
926 930 if cmd in table:
927 931 # short-circuit exact matches, "log" alias beats "log|history"
928 932 keys = [cmd]
929 933 else:
930 934 keys = table.keys()
931 935
932 936 allcmds = []
933 937 for e in keys:
934 938 aliases = parsealiases(e)
935 939 allcmds.extend(aliases)
936 940 found = None
937 941 if cmd in aliases:
938 942 found = cmd
939 943 elif not strict:
940 944 for a in aliases:
941 945 if a.startswith(cmd):
942 946 found = a
943 947 break
944 948 if found is not None:
945 949 if aliases[0].startswith(b"debug") or found.startswith(b"debug"):
946 950 debugchoice[found] = (aliases, table[e])
947 951 else:
948 952 choice[found] = (aliases, table[e])
949 953
950 954 if not choice and debugchoice:
951 955 choice = debugchoice
952 956
953 957 return choice, allcmds
954 958
955 959
956 960 def findcmd(cmd, table, strict=True):
957 961 """Return (aliases, command table entry) for command string."""
958 962 choice, allcmds = findpossible(cmd, table, strict)
959 963
960 964 if cmd in choice:
961 965 return choice[cmd]
962 966
963 967 if len(choice) > 1:
964 968 clist = sorted(choice)
965 969 raise error.AmbiguousCommand(cmd, clist)
966 970
967 971 if choice:
968 972 return list(choice.values())[0]
969 973
970 974 raise error.UnknownCommand(cmd, allcmds)
971 975
972 976
973 977 def changebranch(ui, repo, revs, label, opts):
974 978 """ Change the branch name of given revs to label """
975 979
976 980 with repo.wlock(), repo.lock(), repo.transaction(b'branches'):
977 981 # abort in case of uncommitted merge or dirty wdir
978 982 bailifchanged(repo)
979 983 revs = scmutil.revrange(repo, revs)
980 984 if not revs:
981 985 raise error.InputError(b"empty revision set")
982 986 roots = repo.revs(b'roots(%ld)', revs)
983 987 if len(roots) > 1:
984 988 raise error.InputError(
985 989 _(b"cannot change branch of non-linear revisions")
986 990 )
987 991 rewriteutil.precheck(repo, revs, b'change branch of')
988 992
989 993 root = repo[roots.first()]
990 994 rpb = {parent.branch() for parent in root.parents()}
991 995 if (
992 996 not opts.get(b'force')
993 997 and label not in rpb
994 998 and label in repo.branchmap()
995 999 ):
996 1000 raise error.InputError(
997 1001 _(b"a branch of the same name already exists")
998 1002 )
999 1003
1000 1004 # make sure only topological heads
1001 1005 if repo.revs(b'heads(%ld) - head()', revs):
1002 1006 raise error.InputError(
1003 1007 _(b"cannot change branch in middle of a stack")
1004 1008 )
1005 1009
1006 1010 replacements = {}
1007 1011 # avoid import cycle mercurial.cmdutil -> mercurial.context ->
1008 1012 # mercurial.subrepo -> mercurial.cmdutil
1009 1013 from . import context
1010 1014
1011 1015 for rev in revs:
1012 1016 ctx = repo[rev]
1013 1017 oldbranch = ctx.branch()
1014 1018 # check if ctx has same branch
1015 1019 if oldbranch == label:
1016 1020 continue
1017 1021
1018 1022 def filectxfn(repo, newctx, path):
1019 1023 try:
1020 1024 return ctx[path]
1021 1025 except error.ManifestLookupError:
1022 1026 return None
1023 1027
1024 1028 ui.debug(
1025 1029 b"changing branch of '%s' from '%s' to '%s'\n"
1026 1030 % (hex(ctx.node()), oldbranch, label)
1027 1031 )
1028 1032 extra = ctx.extra()
1029 1033 extra[b'branch_change'] = hex(ctx.node())
1030 1034 # While changing branch of set of linear commits, make sure that
1031 1035 # we base our commits on new parent rather than old parent which
1032 1036 # was obsoleted while changing the branch
1033 1037 p1 = ctx.p1().node()
1034 1038 p2 = ctx.p2().node()
1035 1039 if p1 in replacements:
1036 1040 p1 = replacements[p1][0]
1037 1041 if p2 in replacements:
1038 1042 p2 = replacements[p2][0]
1039 1043
1040 1044 mc = context.memctx(
1041 1045 repo,
1042 1046 (p1, p2),
1043 1047 ctx.description(),
1044 1048 ctx.files(),
1045 1049 filectxfn,
1046 1050 user=ctx.user(),
1047 1051 date=ctx.date(),
1048 1052 extra=extra,
1049 1053 branch=label,
1050 1054 )
1051 1055
1052 1056 newnode = repo.commitctx(mc)
1053 1057 replacements[ctx.node()] = (newnode,)
1054 1058 ui.debug(b'new node id is %s\n' % hex(newnode))
1055 1059
1056 1060 # create obsmarkers and move bookmarks
1057 1061 scmutil.cleanupnodes(
1058 1062 repo, replacements, b'branch-change', fixphase=True
1059 1063 )
1060 1064
1061 1065 # move the working copy too
1062 1066 wctx = repo[None]
1063 1067 # in-progress merge is a bit too complex for now.
1064 1068 if len(wctx.parents()) == 1:
1065 1069 newid = replacements.get(wctx.p1().node())
1066 1070 if newid is not None:
1067 1071 # avoid import cycle mercurial.cmdutil -> mercurial.hg ->
1068 1072 # mercurial.cmdutil
1069 1073 from . import hg
1070 1074
1071 1075 hg.update(repo, newid[0], quietempty=True)
1072 1076
1073 1077 ui.status(_(b"changed branch on %d changesets\n") % len(replacements))
1074 1078
1075 1079
1076 1080 def findrepo(p):
1077 1081 while not os.path.isdir(os.path.join(p, b".hg")):
1078 1082 oldp, p = p, os.path.dirname(p)
1079 1083 if p == oldp:
1080 1084 return None
1081 1085
1082 1086 return p
1083 1087
1084 1088
1085 1089 def bailifchanged(repo, merge=True, hint=None):
1086 1090 """enforce the precondition that working directory must be clean.
1087 1091
1088 1092 'merge' can be set to false if a pending uncommitted merge should be
1089 1093 ignored (such as when 'update --check' runs).
1090 1094
1091 1095 'hint' is the usual hint given to Abort exception.
1092 1096 """
1093 1097
1094 1098 if merge and repo.dirstate.p2() != repo.nullid:
1095 1099 raise error.StateError(_(b'outstanding uncommitted merge'), hint=hint)
1096 1100 st = repo.status()
1097 1101 if st.modified or st.added or st.removed or st.deleted:
1098 1102 raise error.StateError(_(b'uncommitted changes'), hint=hint)
1099 1103 ctx = repo[None]
1100 1104 for s in sorted(ctx.substate):
1101 1105 ctx.sub(s).bailifchanged(hint=hint)
1102 1106
1103 1107
1104 1108 def logmessage(ui, opts):
1105 1109 """ get the log message according to -m and -l option """
1106 1110
1107 1111 check_at_most_one_arg(opts, b'message', b'logfile')
1108 1112
1109 1113 message = opts.get(b'message')
1110 1114 logfile = opts.get(b'logfile')
1111 1115
1112 1116 if not message and logfile:
1113 1117 try:
1114 1118 if isstdiofilename(logfile):
1115 1119 message = ui.fin.read()
1116 1120 else:
1117 1121 message = b'\n'.join(util.readfile(logfile).splitlines())
1118 1122 except IOError as inst:
1119 1123 raise error.Abort(
1120 1124 _(b"can't read commit message '%s': %s")
1121 1125 % (logfile, encoding.strtolocal(inst.strerror))
1122 1126 )
1123 1127 return message
1124 1128
1125 1129
1126 1130 def mergeeditform(ctxorbool, baseformname):
1127 1131 """return appropriate editform name (referencing a committemplate)
1128 1132
1129 1133 'ctxorbool' is either a ctx to be committed, or a bool indicating whether
1130 1134 merging is committed.
1131 1135
1132 1136 This returns baseformname with '.merge' appended if it is a merge,
1133 1137 otherwise '.normal' is appended.
1134 1138 """
1135 1139 if isinstance(ctxorbool, bool):
1136 1140 if ctxorbool:
1137 1141 return baseformname + b".merge"
1138 1142 elif len(ctxorbool.parents()) > 1:
1139 1143 return baseformname + b".merge"
1140 1144
1141 1145 return baseformname + b".normal"
1142 1146
1143 1147
1144 1148 def getcommiteditor(
1145 1149 edit=False, finishdesc=None, extramsg=None, editform=b'', **opts
1146 1150 ):
1147 1151 """get appropriate commit message editor according to '--edit' option
1148 1152
1149 1153 'finishdesc' is a function to be called with edited commit message
1150 1154 (= 'description' of the new changeset) just after editing, but
1151 1155 before checking empty-ness. It should return actual text to be
1152 1156 stored into history. This allows to change description before
1153 1157 storing.
1154 1158
1155 1159 'extramsg' is a extra message to be shown in the editor instead of
1156 1160 'Leave message empty to abort commit' line. 'HG: ' prefix and EOL
1157 1161 is automatically added.
1158 1162
1159 1163 'editform' is a dot-separated list of names, to distinguish
1160 1164 the purpose of commit text editing.
1161 1165
1162 1166 'getcommiteditor' returns 'commitforceeditor' regardless of
1163 1167 'edit', if one of 'finishdesc' or 'extramsg' is specified, because
1164 1168 they are specific for usage in MQ.
1165 1169 """
1166 1170 if edit or finishdesc or extramsg:
1167 1171 return lambda r, c, s: commitforceeditor(
1168 1172 r, c, s, finishdesc=finishdesc, extramsg=extramsg, editform=editform
1169 1173 )
1170 1174 elif editform:
1171 1175 return lambda r, c, s: commiteditor(r, c, s, editform=editform)
1172 1176 else:
1173 1177 return commiteditor
1174 1178
1175 1179
1176 1180 def _escapecommandtemplate(tmpl):
1177 1181 parts = []
1178 1182 for typ, start, end in templater.scantemplate(tmpl, raw=True):
1179 1183 if typ == b'string':
1180 1184 parts.append(stringutil.escapestr(tmpl[start:end]))
1181 1185 else:
1182 1186 parts.append(tmpl[start:end])
1183 1187 return b''.join(parts)
1184 1188
1185 1189
1186 1190 def rendercommandtemplate(ui, tmpl, props):
1187 1191 r"""Expand a literal template 'tmpl' in a way suitable for command line
1188 1192
1189 1193 '\' in outermost string is not taken as an escape character because it
1190 1194 is a directory separator on Windows.
1191 1195
1192 1196 >>> from . import ui as uimod
1193 1197 >>> ui = uimod.ui()
1194 1198 >>> rendercommandtemplate(ui, b'c:\\{path}', {b'path': b'foo'})
1195 1199 'c:\\foo'
1196 1200 >>> rendercommandtemplate(ui, b'{"c:\\{path}"}', {'path': b'foo'})
1197 1201 'c:{path}'
1198 1202 """
1199 1203 if not tmpl:
1200 1204 return tmpl
1201 1205 t = formatter.maketemplater(ui, _escapecommandtemplate(tmpl))
1202 1206 return t.renderdefault(props)
1203 1207
1204 1208
1205 1209 def rendertemplate(ctx, tmpl, props=None):
1206 1210 """Expand a literal template 'tmpl' byte-string against one changeset
1207 1211
1208 1212 Each props item must be a stringify-able value or a callable returning
1209 1213 such value, i.e. no bare list nor dict should be passed.
1210 1214 """
1211 1215 repo = ctx.repo()
1212 1216 tres = formatter.templateresources(repo.ui, repo)
1213 1217 t = formatter.maketemplater(
1214 1218 repo.ui, tmpl, defaults=templatekw.keywords, resources=tres
1215 1219 )
1216 1220 mapping = {b'ctx': ctx}
1217 1221 if props:
1218 1222 mapping.update(props)
1219 1223 return t.renderdefault(mapping)
1220 1224
1221 1225
1222 1226 def format_changeset_summary(ui, ctx, command=None, default_spec=None):
1223 1227 """Format a changeset summary (one line)."""
1224 1228 spec = None
1225 1229 if command:
1226 1230 spec = ui.config(
1227 1231 b'command-templates', b'oneline-summary.%s' % command, None
1228 1232 )
1229 1233 if not spec:
1230 1234 spec = ui.config(b'command-templates', b'oneline-summary')
1231 1235 if not spec:
1232 1236 spec = default_spec
1233 1237 if not spec:
1234 1238 spec = (
1235 1239 b'{separate(" ", '
1236 1240 b'label("oneline-summary.changeset", "{rev}:{node|short}")'
1237 1241 b', '
1238 1242 b'join(filter(namespaces % "{ifeq(namespace, "branches", "", join(names % "{label("oneline-summary.{namespace}", name)}", " "))}"), " ")'
1239 1243 b')} '
1240 1244 b'"{label("oneline-summary.desc", desc|firstline)}"'
1241 1245 )
1242 1246 text = rendertemplate(ctx, spec)
1243 1247 return text.split(b'\n')[0]
1244 1248
1245 1249
1246 1250 def _buildfntemplate(pat, total=None, seqno=None, revwidth=None, pathname=None):
1247 1251 r"""Convert old-style filename format string to template string
1248 1252
1249 1253 >>> _buildfntemplate(b'foo-%b-%n.patch', seqno=0)
1250 1254 'foo-{reporoot|basename}-{seqno}.patch'
1251 1255 >>> _buildfntemplate(b'%R{tags % "{tag}"}%H')
1252 1256 '{rev}{tags % "{tag}"}{node}'
1253 1257
1254 1258 '\' in outermost strings has to be escaped because it is a directory
1255 1259 separator on Windows:
1256 1260
1257 1261 >>> _buildfntemplate(b'c:\\tmp\\%R\\%n.patch', seqno=0)
1258 1262 'c:\\\\tmp\\\\{rev}\\\\{seqno}.patch'
1259 1263 >>> _buildfntemplate(b'\\\\foo\\bar.patch')
1260 1264 '\\\\\\\\foo\\\\bar.patch'
1261 1265 >>> _buildfntemplate(b'\\{tags % "{tag}"}')
1262 1266 '\\\\{tags % "{tag}"}'
1263 1267
1264 1268 but inner strings follow the template rules (i.e. '\' is taken as an
1265 1269 escape character):
1266 1270
1267 1271 >>> _buildfntemplate(br'{"c:\tmp"}', seqno=0)
1268 1272 '{"c:\\tmp"}'
1269 1273 """
1270 1274 expander = {
1271 1275 b'H': b'{node}',
1272 1276 b'R': b'{rev}',
1273 1277 b'h': b'{node|short}',
1274 1278 b'm': br'{sub(r"[^\w]", "_", desc|firstline)}',
1275 1279 b'r': b'{if(revwidth, pad(rev, revwidth, "0", left=True), rev)}',
1276 1280 b'%': b'%',
1277 1281 b'b': b'{reporoot|basename}',
1278 1282 }
1279 1283 if total is not None:
1280 1284 expander[b'N'] = b'{total}'
1281 1285 if seqno is not None:
1282 1286 expander[b'n'] = b'{seqno}'
1283 1287 if total is not None and seqno is not None:
1284 1288 expander[b'n'] = b'{pad(seqno, total|stringify|count, "0", left=True)}'
1285 1289 if pathname is not None:
1286 1290 expander[b's'] = b'{pathname|basename}'
1287 1291 expander[b'd'] = b'{if(pathname|dirname, pathname|dirname, ".")}'
1288 1292 expander[b'p'] = b'{pathname}'
1289 1293
1290 1294 newname = []
1291 1295 for typ, start, end in templater.scantemplate(pat, raw=True):
1292 1296 if typ != b'string':
1293 1297 newname.append(pat[start:end])
1294 1298 continue
1295 1299 i = start
1296 1300 while i < end:
1297 1301 n = pat.find(b'%', i, end)
1298 1302 if n < 0:
1299 1303 newname.append(stringutil.escapestr(pat[i:end]))
1300 1304 break
1301 1305 newname.append(stringutil.escapestr(pat[i:n]))
1302 1306 if n + 2 > end:
1303 1307 raise error.Abort(
1304 1308 _(b"incomplete format spec in output filename")
1305 1309 )
1306 1310 c = pat[n + 1 : n + 2]
1307 1311 i = n + 2
1308 1312 try:
1309 1313 newname.append(expander[c])
1310 1314 except KeyError:
1311 1315 raise error.Abort(
1312 1316 _(b"invalid format spec '%%%s' in output filename") % c
1313 1317 )
1314 1318 return b''.join(newname)
1315 1319
1316 1320
1317 1321 def makefilename(ctx, pat, **props):
1318 1322 if not pat:
1319 1323 return pat
1320 1324 tmpl = _buildfntemplate(pat, **props)
1321 1325 # BUG: alias expansion shouldn't be made against template fragments
1322 1326 # rewritten from %-format strings, but we have no easy way to partially
1323 1327 # disable the expansion.
1324 1328 return rendertemplate(ctx, tmpl, pycompat.byteskwargs(props))
1325 1329
1326 1330
1327 1331 def isstdiofilename(pat):
1328 1332 """True if the given pat looks like a filename denoting stdin/stdout"""
1329 1333 return not pat or pat == b'-'
1330 1334
1331 1335
1332 1336 class _unclosablefile(object):
1333 1337 def __init__(self, fp):
1334 1338 self._fp = fp
1335 1339
1336 1340 def close(self):
1337 1341 pass
1338 1342
1339 1343 def __iter__(self):
1340 1344 return iter(self._fp)
1341 1345
1342 1346 def __getattr__(self, attr):
1343 1347 return getattr(self._fp, attr)
1344 1348
1345 1349 def __enter__(self):
1346 1350 return self
1347 1351
1348 1352 def __exit__(self, exc_type, exc_value, exc_tb):
1349 1353 pass
1350 1354
1351 1355
1352 1356 def makefileobj(ctx, pat, mode=b'wb', **props):
1353 1357 writable = mode not in (b'r', b'rb')
1354 1358
1355 1359 if isstdiofilename(pat):
1356 1360 repo = ctx.repo()
1357 1361 if writable:
1358 1362 fp = repo.ui.fout
1359 1363 else:
1360 1364 fp = repo.ui.fin
1361 1365 return _unclosablefile(fp)
1362 1366 fn = makefilename(ctx, pat, **props)
1363 1367 return open(fn, mode)
1364 1368
1365 1369
1366 1370 def openstorage(repo, cmd, file_, opts, returnrevlog=False):
1367 1371 """opens the changelog, manifest, a filelog or a given revlog"""
1368 1372 cl = opts[b'changelog']
1369 1373 mf = opts[b'manifest']
1370 1374 dir = opts[b'dir']
1371 1375 msg = None
1372 1376 if cl and mf:
1373 1377 msg = _(b'cannot specify --changelog and --manifest at the same time')
1374 1378 elif cl and dir:
1375 1379 msg = _(b'cannot specify --changelog and --dir at the same time')
1376 1380 elif cl or mf or dir:
1377 1381 if file_:
1378 1382 msg = _(b'cannot specify filename with --changelog or --manifest')
1379 1383 elif not repo:
1380 1384 msg = _(
1381 1385 b'cannot specify --changelog or --manifest or --dir '
1382 1386 b'without a repository'
1383 1387 )
1384 1388 if msg:
1385 1389 raise error.InputError(msg)
1386 1390
1387 1391 r = None
1388 1392 if repo:
1389 1393 if cl:
1390 1394 r = repo.unfiltered().changelog
1391 1395 elif dir:
1392 1396 if not scmutil.istreemanifest(repo):
1393 1397 raise error.InputError(
1394 1398 _(
1395 1399 b"--dir can only be used on repos with "
1396 1400 b"treemanifest enabled"
1397 1401 )
1398 1402 )
1399 1403 if not dir.endswith(b'/'):
1400 1404 dir = dir + b'/'
1401 1405 dirlog = repo.manifestlog.getstorage(dir)
1402 1406 if len(dirlog):
1403 1407 r = dirlog
1404 1408 elif mf:
1405 1409 r = repo.manifestlog.getstorage(b'')
1406 1410 elif file_:
1407 1411 filelog = repo.file(file_)
1408 1412 if len(filelog):
1409 1413 r = filelog
1410 1414
1411 1415 # Not all storage may be revlogs. If requested, try to return an actual
1412 1416 # revlog instance.
1413 1417 if returnrevlog:
1414 1418 if isinstance(r, revlog.revlog):
1415 1419 pass
1416 1420 elif util.safehasattr(r, b'_revlog'):
1417 1421 r = r._revlog # pytype: disable=attribute-error
1418 1422 elif r is not None:
1419 1423 raise error.InputError(
1420 1424 _(b'%r does not appear to be a revlog') % r
1421 1425 )
1422 1426
1423 1427 if not r:
1424 1428 if not returnrevlog:
1425 1429 raise error.InputError(_(b'cannot give path to non-revlog'))
1426 1430
1427 1431 if not file_:
1428 1432 raise error.CommandError(cmd, _(b'invalid arguments'))
1429 1433 if not os.path.isfile(file_):
1430 1434 raise error.InputError(_(b"revlog '%s' not found") % file_)
1435
1436 target = (revlog_constants.KIND_OTHER, b'free-form:%s' % file_)
1431 1437 r = revlog.revlog(
1432 vfsmod.vfs(encoding.getcwd(), audit=False), file_[:-2] + b".i"
1438 vfsmod.vfs(encoding.getcwd(), audit=False),
1439 target=target,
1440 indexfile=file_[:-2] + b".i",
1433 1441 )
1434 1442 return r
1435 1443
1436 1444
1437 1445 def openrevlog(repo, cmd, file_, opts):
1438 1446 """Obtain a revlog backing storage of an item.
1439 1447
1440 1448 This is similar to ``openstorage()`` except it always returns a revlog.
1441 1449
1442 1450 In most cases, a caller cares about the main storage object - not the
1443 1451 revlog backing it. Therefore, this function should only be used by code
1444 1452 that needs to examine low-level revlog implementation details. e.g. debug
1445 1453 commands.
1446 1454 """
1447 1455 return openstorage(repo, cmd, file_, opts, returnrevlog=True)
1448 1456
1449 1457
1450 1458 def copy(ui, repo, pats, opts, rename=False):
1451 1459 check_incompatible_arguments(opts, b'forget', [b'dry_run'])
1452 1460
1453 1461 # called with the repo lock held
1454 1462 #
1455 1463 # hgsep => pathname that uses "/" to separate directories
1456 1464 # ossep => pathname that uses os.sep to separate directories
1457 1465 cwd = repo.getcwd()
1458 1466 targets = {}
1459 1467 forget = opts.get(b"forget")
1460 1468 after = opts.get(b"after")
1461 1469 dryrun = opts.get(b"dry_run")
1462 1470 rev = opts.get(b'at_rev')
1463 1471 if rev:
1464 1472 if not forget and not after:
1465 1473 # TODO: Remove this restriction and make it also create the copy
1466 1474 # targets (and remove the rename source if rename==True).
1467 1475 raise error.InputError(_(b'--at-rev requires --after'))
1468 1476 ctx = scmutil.revsingle(repo, rev)
1469 1477 if len(ctx.parents()) > 1:
1470 1478 raise error.InputError(
1471 1479 _(b'cannot mark/unmark copy in merge commit')
1472 1480 )
1473 1481 else:
1474 1482 ctx = repo[None]
1475 1483
1476 1484 pctx = ctx.p1()
1477 1485
1478 1486 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
1479 1487
1480 1488 if forget:
1481 1489 if ctx.rev() is None:
1482 1490 new_ctx = ctx
1483 1491 else:
1484 1492 if len(ctx.parents()) > 1:
1485 1493 raise error.InputError(_(b'cannot unmark copy in merge commit'))
1486 1494 # avoid cycle context -> subrepo -> cmdutil
1487 1495 from . import context
1488 1496
1489 1497 rewriteutil.precheck(repo, [ctx.rev()], b'uncopy')
1490 1498 new_ctx = context.overlayworkingctx(repo)
1491 1499 new_ctx.setbase(ctx.p1())
1492 1500 mergemod.graft(repo, ctx, wctx=new_ctx)
1493 1501
1494 1502 match = scmutil.match(ctx, pats, opts)
1495 1503
1496 1504 current_copies = ctx.p1copies()
1497 1505 current_copies.update(ctx.p2copies())
1498 1506
1499 1507 uipathfn = scmutil.getuipathfn(repo)
1500 1508 for f in ctx.walk(match):
1501 1509 if f in current_copies:
1502 1510 new_ctx[f].markcopied(None)
1503 1511 elif match.exact(f):
1504 1512 ui.warn(
1505 1513 _(
1506 1514 b'%s: not unmarking as copy - file is not marked as copied\n'
1507 1515 )
1508 1516 % uipathfn(f)
1509 1517 )
1510 1518
1511 1519 if ctx.rev() is not None:
1512 1520 with repo.lock():
1513 1521 mem_ctx = new_ctx.tomemctx_for_amend(ctx)
1514 1522 new_node = mem_ctx.commit()
1515 1523
1516 1524 if repo.dirstate.p1() == ctx.node():
1517 1525 with repo.dirstate.parentchange():
1518 1526 scmutil.movedirstate(repo, repo[new_node])
1519 1527 replacements = {ctx.node(): [new_node]}
1520 1528 scmutil.cleanupnodes(
1521 1529 repo, replacements, b'uncopy', fixphase=True
1522 1530 )
1523 1531
1524 1532 return
1525 1533
1526 1534 pats = scmutil.expandpats(pats)
1527 1535 if not pats:
1528 1536 raise error.InputError(_(b'no source or destination specified'))
1529 1537 if len(pats) == 1:
1530 1538 raise error.InputError(_(b'no destination specified'))
1531 1539 dest = pats.pop()
1532 1540
1533 1541 def walkpat(pat):
1534 1542 srcs = []
1535 1543 # TODO: Inline and simplify the non-working-copy version of this code
1536 1544 # since it shares very little with the working-copy version of it.
1537 1545 ctx_to_walk = ctx if ctx.rev() is None else pctx
1538 1546 m = scmutil.match(ctx_to_walk, [pat], opts, globbed=True)
1539 1547 for abs in ctx_to_walk.walk(m):
1540 1548 rel = uipathfn(abs)
1541 1549 exact = m.exact(abs)
1542 1550 if abs not in ctx:
1543 1551 if abs in pctx:
1544 1552 if not after:
1545 1553 if exact:
1546 1554 ui.warn(
1547 1555 _(
1548 1556 b'%s: not copying - file has been marked '
1549 1557 b'for remove\n'
1550 1558 )
1551 1559 % rel
1552 1560 )
1553 1561 continue
1554 1562 else:
1555 1563 if exact:
1556 1564 ui.warn(
1557 1565 _(b'%s: not copying - file is not managed\n') % rel
1558 1566 )
1559 1567 continue
1560 1568
1561 1569 # abs: hgsep
1562 1570 # rel: ossep
1563 1571 srcs.append((abs, rel, exact))
1564 1572 return srcs
1565 1573
1566 1574 if ctx.rev() is not None:
1567 1575 rewriteutil.precheck(repo, [ctx.rev()], b'uncopy')
1568 1576 absdest = pathutil.canonpath(repo.root, cwd, dest)
1569 1577 if ctx.hasdir(absdest):
1570 1578 raise error.InputError(
1571 1579 _(b'%s: --at-rev does not support a directory as destination')
1572 1580 % uipathfn(absdest)
1573 1581 )
1574 1582 if absdest not in ctx:
1575 1583 raise error.InputError(
1576 1584 _(b'%s: copy destination does not exist in %s')
1577 1585 % (uipathfn(absdest), ctx)
1578 1586 )
1579 1587
1580 1588 # avoid cycle context -> subrepo -> cmdutil
1581 1589 from . import context
1582 1590
1583 1591 copylist = []
1584 1592 for pat in pats:
1585 1593 srcs = walkpat(pat)
1586 1594 if not srcs:
1587 1595 continue
1588 1596 for abs, rel, exact in srcs:
1589 1597 copylist.append(abs)
1590 1598
1591 1599 if not copylist:
1592 1600 raise error.InputError(_(b'no files to copy'))
1593 1601 # TODO: Add support for `hg cp --at-rev . foo bar dir` and
1594 1602 # `hg cp --at-rev . dir1 dir2`, preferably unifying the code with the
1595 1603 # existing functions below.
1596 1604 if len(copylist) != 1:
1597 1605 raise error.InputError(_(b'--at-rev requires a single source'))
1598 1606
1599 1607 new_ctx = context.overlayworkingctx(repo)
1600 1608 new_ctx.setbase(ctx.p1())
1601 1609 mergemod.graft(repo, ctx, wctx=new_ctx)
1602 1610
1603 1611 new_ctx.markcopied(absdest, copylist[0])
1604 1612
1605 1613 with repo.lock():
1606 1614 mem_ctx = new_ctx.tomemctx_for_amend(ctx)
1607 1615 new_node = mem_ctx.commit()
1608 1616
1609 1617 if repo.dirstate.p1() == ctx.node():
1610 1618 with repo.dirstate.parentchange():
1611 1619 scmutil.movedirstate(repo, repo[new_node])
1612 1620 replacements = {ctx.node(): [new_node]}
1613 1621 scmutil.cleanupnodes(repo, replacements, b'copy', fixphase=True)
1614 1622
1615 1623 return
1616 1624
1617 1625 # abssrc: hgsep
1618 1626 # relsrc: ossep
1619 1627 # otarget: ossep
1620 1628 def copyfile(abssrc, relsrc, otarget, exact):
1621 1629 abstarget = pathutil.canonpath(repo.root, cwd, otarget)
1622 1630 if b'/' in abstarget:
1623 1631 # We cannot normalize abstarget itself, this would prevent
1624 1632 # case only renames, like a => A.
1625 1633 abspath, absname = abstarget.rsplit(b'/', 1)
1626 1634 abstarget = repo.dirstate.normalize(abspath) + b'/' + absname
1627 1635 reltarget = repo.pathto(abstarget, cwd)
1628 1636 target = repo.wjoin(abstarget)
1629 1637 src = repo.wjoin(abssrc)
1630 1638 state = repo.dirstate[abstarget]
1631 1639
1632 1640 scmutil.checkportable(ui, abstarget)
1633 1641
1634 1642 # check for collisions
1635 1643 prevsrc = targets.get(abstarget)
1636 1644 if prevsrc is not None:
1637 1645 ui.warn(
1638 1646 _(b'%s: not overwriting - %s collides with %s\n')
1639 1647 % (
1640 1648 reltarget,
1641 1649 repo.pathto(abssrc, cwd),
1642 1650 repo.pathto(prevsrc, cwd),
1643 1651 )
1644 1652 )
1645 1653 return True # report a failure
1646 1654
1647 1655 # check for overwrites
1648 1656 exists = os.path.lexists(target)
1649 1657 samefile = False
1650 1658 if exists and abssrc != abstarget:
1651 1659 if repo.dirstate.normalize(abssrc) == repo.dirstate.normalize(
1652 1660 abstarget
1653 1661 ):
1654 1662 if not rename:
1655 1663 ui.warn(_(b"%s: can't copy - same file\n") % reltarget)
1656 1664 return True # report a failure
1657 1665 exists = False
1658 1666 samefile = True
1659 1667
1660 1668 if not after and exists or after and state in b'mn':
1661 1669 if not opts[b'force']:
1662 1670 if state in b'mn':
1663 1671 msg = _(b'%s: not overwriting - file already committed\n')
1664 1672 if after:
1665 1673 flags = b'--after --force'
1666 1674 else:
1667 1675 flags = b'--force'
1668 1676 if rename:
1669 1677 hint = (
1670 1678 _(
1671 1679 b"('hg rename %s' to replace the file by "
1672 1680 b'recording a rename)\n'
1673 1681 )
1674 1682 % flags
1675 1683 )
1676 1684 else:
1677 1685 hint = (
1678 1686 _(
1679 1687 b"('hg copy %s' to replace the file by "
1680 1688 b'recording a copy)\n'
1681 1689 )
1682 1690 % flags
1683 1691 )
1684 1692 else:
1685 1693 msg = _(b'%s: not overwriting - file exists\n')
1686 1694 if rename:
1687 1695 hint = _(
1688 1696 b"('hg rename --after' to record the rename)\n"
1689 1697 )
1690 1698 else:
1691 1699 hint = _(b"('hg copy --after' to record the copy)\n")
1692 1700 ui.warn(msg % reltarget)
1693 1701 ui.warn(hint)
1694 1702 return True # report a failure
1695 1703
1696 1704 if after:
1697 1705 if not exists:
1698 1706 if rename:
1699 1707 ui.warn(
1700 1708 _(b'%s: not recording move - %s does not exist\n')
1701 1709 % (relsrc, reltarget)
1702 1710 )
1703 1711 else:
1704 1712 ui.warn(
1705 1713 _(b'%s: not recording copy - %s does not exist\n')
1706 1714 % (relsrc, reltarget)
1707 1715 )
1708 1716 return True # report a failure
1709 1717 elif not dryrun:
1710 1718 try:
1711 1719 if exists:
1712 1720 os.unlink(target)
1713 1721 targetdir = os.path.dirname(target) or b'.'
1714 1722 if not os.path.isdir(targetdir):
1715 1723 os.makedirs(targetdir)
1716 1724 if samefile:
1717 1725 tmp = target + b"~hgrename"
1718 1726 os.rename(src, tmp)
1719 1727 os.rename(tmp, target)
1720 1728 else:
1721 1729 # Preserve stat info on renames, not on copies; this matches
1722 1730 # Linux CLI behavior.
1723 1731 util.copyfile(src, target, copystat=rename)
1724 1732 srcexists = True
1725 1733 except IOError as inst:
1726 1734 if inst.errno == errno.ENOENT:
1727 1735 ui.warn(_(b'%s: deleted in working directory\n') % relsrc)
1728 1736 srcexists = False
1729 1737 else:
1730 1738 ui.warn(
1731 1739 _(b'%s: cannot copy - %s\n')
1732 1740 % (relsrc, encoding.strtolocal(inst.strerror))
1733 1741 )
1734 1742 return True # report a failure
1735 1743
1736 1744 if ui.verbose or not exact:
1737 1745 if rename:
1738 1746 ui.status(_(b'moving %s to %s\n') % (relsrc, reltarget))
1739 1747 else:
1740 1748 ui.status(_(b'copying %s to %s\n') % (relsrc, reltarget))
1741 1749
1742 1750 targets[abstarget] = abssrc
1743 1751
1744 1752 # fix up dirstate
1745 1753 scmutil.dirstatecopy(
1746 1754 ui, repo, ctx, abssrc, abstarget, dryrun=dryrun, cwd=cwd
1747 1755 )
1748 1756 if rename and not dryrun:
1749 1757 if not after and srcexists and not samefile:
1750 1758 rmdir = repo.ui.configbool(b'experimental', b'removeemptydirs')
1751 1759 repo.wvfs.unlinkpath(abssrc, rmdir=rmdir)
1752 1760 ctx.forget([abssrc])
1753 1761
1754 1762 # pat: ossep
1755 1763 # dest ossep
1756 1764 # srcs: list of (hgsep, hgsep, ossep, bool)
1757 1765 # return: function that takes hgsep and returns ossep
1758 1766 def targetpathfn(pat, dest, srcs):
1759 1767 if os.path.isdir(pat):
1760 1768 abspfx = pathutil.canonpath(repo.root, cwd, pat)
1761 1769 abspfx = util.localpath(abspfx)
1762 1770 if destdirexists:
1763 1771 striplen = len(os.path.split(abspfx)[0])
1764 1772 else:
1765 1773 striplen = len(abspfx)
1766 1774 if striplen:
1767 1775 striplen += len(pycompat.ossep)
1768 1776 res = lambda p: os.path.join(dest, util.localpath(p)[striplen:])
1769 1777 elif destdirexists:
1770 1778 res = lambda p: os.path.join(
1771 1779 dest, os.path.basename(util.localpath(p))
1772 1780 )
1773 1781 else:
1774 1782 res = lambda p: dest
1775 1783 return res
1776 1784
1777 1785 # pat: ossep
1778 1786 # dest ossep
1779 1787 # srcs: list of (hgsep, hgsep, ossep, bool)
1780 1788 # return: function that takes hgsep and returns ossep
1781 1789 def targetpathafterfn(pat, dest, srcs):
1782 1790 if matchmod.patkind(pat):
1783 1791 # a mercurial pattern
1784 1792 res = lambda p: os.path.join(
1785 1793 dest, os.path.basename(util.localpath(p))
1786 1794 )
1787 1795 else:
1788 1796 abspfx = pathutil.canonpath(repo.root, cwd, pat)
1789 1797 if len(abspfx) < len(srcs[0][0]):
1790 1798 # A directory. Either the target path contains the last
1791 1799 # component of the source path or it does not.
1792 1800 def evalpath(striplen):
1793 1801 score = 0
1794 1802 for s in srcs:
1795 1803 t = os.path.join(dest, util.localpath(s[0])[striplen:])
1796 1804 if os.path.lexists(t):
1797 1805 score += 1
1798 1806 return score
1799 1807
1800 1808 abspfx = util.localpath(abspfx)
1801 1809 striplen = len(abspfx)
1802 1810 if striplen:
1803 1811 striplen += len(pycompat.ossep)
1804 1812 if os.path.isdir(os.path.join(dest, os.path.split(abspfx)[1])):
1805 1813 score = evalpath(striplen)
1806 1814 striplen1 = len(os.path.split(abspfx)[0])
1807 1815 if striplen1:
1808 1816 striplen1 += len(pycompat.ossep)
1809 1817 if evalpath(striplen1) > score:
1810 1818 striplen = striplen1
1811 1819 res = lambda p: os.path.join(dest, util.localpath(p)[striplen:])
1812 1820 else:
1813 1821 # a file
1814 1822 if destdirexists:
1815 1823 res = lambda p: os.path.join(
1816 1824 dest, os.path.basename(util.localpath(p))
1817 1825 )
1818 1826 else:
1819 1827 res = lambda p: dest
1820 1828 return res
1821 1829
1822 1830 destdirexists = os.path.isdir(dest) and not os.path.islink(dest)
1823 1831 if not destdirexists:
1824 1832 if len(pats) > 1 or matchmod.patkind(pats[0]):
1825 1833 raise error.InputError(
1826 1834 _(
1827 1835 b'with multiple sources, destination must be an '
1828 1836 b'existing directory'
1829 1837 )
1830 1838 )
1831 1839 if util.endswithsep(dest):
1832 1840 raise error.InputError(
1833 1841 _(b'destination %s is not a directory') % dest
1834 1842 )
1835 1843
1836 1844 tfn = targetpathfn
1837 1845 if after:
1838 1846 tfn = targetpathafterfn
1839 1847 copylist = []
1840 1848 for pat in pats:
1841 1849 srcs = walkpat(pat)
1842 1850 if not srcs:
1843 1851 continue
1844 1852 copylist.append((tfn(pat, dest, srcs), srcs))
1845 1853 if not copylist:
1846 1854 raise error.InputError(_(b'no files to copy'))
1847 1855
1848 1856 errors = 0
1849 1857 for targetpath, srcs in copylist:
1850 1858 for abssrc, relsrc, exact in srcs:
1851 1859 if copyfile(abssrc, relsrc, targetpath(abssrc), exact):
1852 1860 errors += 1
1853 1861
1854 1862 return errors != 0
1855 1863
1856 1864
1857 1865 ## facility to let extension process additional data into an import patch
1858 1866 # list of identifier to be executed in order
1859 1867 extrapreimport = [] # run before commit
1860 1868 extrapostimport = [] # run after commit
1861 1869 # mapping from identifier to actual import function
1862 1870 #
1863 1871 # 'preimport' are run before the commit is made and are provided the following
1864 1872 # arguments:
1865 1873 # - repo: the localrepository instance,
1866 1874 # - patchdata: data extracted from patch header (cf m.patch.patchheadermap),
1867 1875 # - extra: the future extra dictionary of the changeset, please mutate it,
1868 1876 # - opts: the import options.
1869 1877 # XXX ideally, we would just pass an ctx ready to be computed, that would allow
1870 1878 # mutation of in memory commit and more. Feel free to rework the code to get
1871 1879 # there.
1872 1880 extrapreimportmap = {}
1873 1881 # 'postimport' are run after the commit is made and are provided the following
1874 1882 # argument:
1875 1883 # - ctx: the changectx created by import.
1876 1884 extrapostimportmap = {}
1877 1885
1878 1886
1879 1887 def tryimportone(ui, repo, patchdata, parents, opts, msgs, updatefunc):
1880 1888 """Utility function used by commands.import to import a single patch
1881 1889
1882 1890 This function is explicitly defined here to help the evolve extension to
1883 1891 wrap this part of the import logic.
1884 1892
1885 1893 The API is currently a bit ugly because it a simple code translation from
1886 1894 the import command. Feel free to make it better.
1887 1895
1888 1896 :patchdata: a dictionary containing parsed patch data (such as from
1889 1897 ``patch.extract()``)
1890 1898 :parents: nodes that will be parent of the created commit
1891 1899 :opts: the full dict of option passed to the import command
1892 1900 :msgs: list to save commit message to.
1893 1901 (used in case we need to save it when failing)
1894 1902 :updatefunc: a function that update a repo to a given node
1895 1903 updatefunc(<repo>, <node>)
1896 1904 """
1897 1905 # avoid cycle context -> subrepo -> cmdutil
1898 1906 from . import context
1899 1907
1900 1908 tmpname = patchdata.get(b'filename')
1901 1909 message = patchdata.get(b'message')
1902 1910 user = opts.get(b'user') or patchdata.get(b'user')
1903 1911 date = opts.get(b'date') or patchdata.get(b'date')
1904 1912 branch = patchdata.get(b'branch')
1905 1913 nodeid = patchdata.get(b'nodeid')
1906 1914 p1 = patchdata.get(b'p1')
1907 1915 p2 = patchdata.get(b'p2')
1908 1916
1909 1917 nocommit = opts.get(b'no_commit')
1910 1918 importbranch = opts.get(b'import_branch')
1911 1919 update = not opts.get(b'bypass')
1912 1920 strip = opts[b"strip"]
1913 1921 prefix = opts[b"prefix"]
1914 1922 sim = float(opts.get(b'similarity') or 0)
1915 1923
1916 1924 if not tmpname:
1917 1925 return None, None, False
1918 1926
1919 1927 rejects = False
1920 1928
1921 1929 cmdline_message = logmessage(ui, opts)
1922 1930 if cmdline_message:
1923 1931 # pickup the cmdline msg
1924 1932 message = cmdline_message
1925 1933 elif message:
1926 1934 # pickup the patch msg
1927 1935 message = message.strip()
1928 1936 else:
1929 1937 # launch the editor
1930 1938 message = None
1931 1939 ui.debug(b'message:\n%s\n' % (message or b''))
1932 1940
1933 1941 if len(parents) == 1:
1934 1942 parents.append(repo[nullrev])
1935 1943 if opts.get(b'exact'):
1936 1944 if not nodeid or not p1:
1937 1945 raise error.InputError(_(b'not a Mercurial patch'))
1938 1946 p1 = repo[p1]
1939 1947 p2 = repo[p2 or nullrev]
1940 1948 elif p2:
1941 1949 try:
1942 1950 p1 = repo[p1]
1943 1951 p2 = repo[p2]
1944 1952 # Without any options, consider p2 only if the
1945 1953 # patch is being applied on top of the recorded
1946 1954 # first parent.
1947 1955 if p1 != parents[0]:
1948 1956 p1 = parents[0]
1949 1957 p2 = repo[nullrev]
1950 1958 except error.RepoError:
1951 1959 p1, p2 = parents
1952 1960 if p2.rev() == nullrev:
1953 1961 ui.warn(
1954 1962 _(
1955 1963 b"warning: import the patch as a normal revision\n"
1956 1964 b"(use --exact to import the patch as a merge)\n"
1957 1965 )
1958 1966 )
1959 1967 else:
1960 1968 p1, p2 = parents
1961 1969
1962 1970 n = None
1963 1971 if update:
1964 1972 if p1 != parents[0]:
1965 1973 updatefunc(repo, p1.node())
1966 1974 if p2 != parents[1]:
1967 1975 repo.setparents(p1.node(), p2.node())
1968 1976
1969 1977 if opts.get(b'exact') or importbranch:
1970 1978 repo.dirstate.setbranch(branch or b'default')
1971 1979
1972 1980 partial = opts.get(b'partial', False)
1973 1981 files = set()
1974 1982 try:
1975 1983 patch.patch(
1976 1984 ui,
1977 1985 repo,
1978 1986 tmpname,
1979 1987 strip=strip,
1980 1988 prefix=prefix,
1981 1989 files=files,
1982 1990 eolmode=None,
1983 1991 similarity=sim / 100.0,
1984 1992 )
1985 1993 except error.PatchError as e:
1986 1994 if not partial:
1987 1995 raise error.Abort(pycompat.bytestr(e))
1988 1996 if partial:
1989 1997 rejects = True
1990 1998
1991 1999 files = list(files)
1992 2000 if nocommit:
1993 2001 if message:
1994 2002 msgs.append(message)
1995 2003 else:
1996 2004 if opts.get(b'exact') or p2:
1997 2005 # If you got here, you either use --force and know what
1998 2006 # you are doing or used --exact or a merge patch while
1999 2007 # being updated to its first parent.
2000 2008 m = None
2001 2009 else:
2002 2010 m = scmutil.matchfiles(repo, files or [])
2003 2011 editform = mergeeditform(repo[None], b'import.normal')
2004 2012 if opts.get(b'exact'):
2005 2013 editor = None
2006 2014 else:
2007 2015 editor = getcommiteditor(
2008 2016 editform=editform, **pycompat.strkwargs(opts)
2009 2017 )
2010 2018 extra = {}
2011 2019 for idfunc in extrapreimport:
2012 2020 extrapreimportmap[idfunc](repo, patchdata, extra, opts)
2013 2021 overrides = {}
2014 2022 if partial:
2015 2023 overrides[(b'ui', b'allowemptycommit')] = True
2016 2024 if opts.get(b'secret'):
2017 2025 overrides[(b'phases', b'new-commit')] = b'secret'
2018 2026 with repo.ui.configoverride(overrides, b'import'):
2019 2027 n = repo.commit(
2020 2028 message, user, date, match=m, editor=editor, extra=extra
2021 2029 )
2022 2030 for idfunc in extrapostimport:
2023 2031 extrapostimportmap[idfunc](repo[n])
2024 2032 else:
2025 2033 if opts.get(b'exact') or importbranch:
2026 2034 branch = branch or b'default'
2027 2035 else:
2028 2036 branch = p1.branch()
2029 2037 store = patch.filestore()
2030 2038 try:
2031 2039 files = set()
2032 2040 try:
2033 2041 patch.patchrepo(
2034 2042 ui,
2035 2043 repo,
2036 2044 p1,
2037 2045 store,
2038 2046 tmpname,
2039 2047 strip,
2040 2048 prefix,
2041 2049 files,
2042 2050 eolmode=None,
2043 2051 )
2044 2052 except error.PatchError as e:
2045 2053 raise error.Abort(stringutil.forcebytestr(e))
2046 2054 if opts.get(b'exact'):
2047 2055 editor = None
2048 2056 else:
2049 2057 editor = getcommiteditor(editform=b'import.bypass')
2050 2058 memctx = context.memctx(
2051 2059 repo,
2052 2060 (p1.node(), p2.node()),
2053 2061 message,
2054 2062 files=files,
2055 2063 filectxfn=store,
2056 2064 user=user,
2057 2065 date=date,
2058 2066 branch=branch,
2059 2067 editor=editor,
2060 2068 )
2061 2069
2062 2070 overrides = {}
2063 2071 if opts.get(b'secret'):
2064 2072 overrides[(b'phases', b'new-commit')] = b'secret'
2065 2073 with repo.ui.configoverride(overrides, b'import'):
2066 2074 n = memctx.commit()
2067 2075 finally:
2068 2076 store.close()
2069 2077 if opts.get(b'exact') and nocommit:
2070 2078 # --exact with --no-commit is still useful in that it does merge
2071 2079 # and branch bits
2072 2080 ui.warn(_(b"warning: can't check exact import with --no-commit\n"))
2073 2081 elif opts.get(b'exact') and (not n or hex(n) != nodeid):
2074 2082 raise error.Abort(_(b'patch is damaged or loses information'))
2075 2083 msg = _(b'applied to working directory')
2076 2084 if n:
2077 2085 # i18n: refers to a short changeset id
2078 2086 msg = _(b'created %s') % short(n)
2079 2087 return msg, n, rejects
2080 2088
2081 2089
2082 2090 # facility to let extensions include additional data in an exported patch
2083 2091 # list of identifiers to be executed in order
2084 2092 extraexport = []
2085 2093 # mapping from identifier to actual export function
2086 2094 # function as to return a string to be added to the header or None
2087 2095 # it is given two arguments (sequencenumber, changectx)
2088 2096 extraexportmap = {}
2089 2097
2090 2098
2091 2099 def _exportsingle(repo, ctx, fm, match, switch_parent, seqno, diffopts):
2092 2100 node = scmutil.binnode(ctx)
2093 2101 parents = [p.node() for p in ctx.parents() if p]
2094 2102 branch = ctx.branch()
2095 2103 if switch_parent:
2096 2104 parents.reverse()
2097 2105
2098 2106 if parents:
2099 2107 prev = parents[0]
2100 2108 else:
2101 2109 prev = repo.nullid
2102 2110
2103 2111 fm.context(ctx=ctx)
2104 2112 fm.plain(b'# HG changeset patch\n')
2105 2113 fm.write(b'user', b'# User %s\n', ctx.user())
2106 2114 fm.plain(b'# Date %d %d\n' % ctx.date())
2107 2115 fm.write(b'date', b'# %s\n', fm.formatdate(ctx.date()))
2108 2116 fm.condwrite(
2109 2117 branch and branch != b'default', b'branch', b'# Branch %s\n', branch
2110 2118 )
2111 2119 fm.write(b'node', b'# Node ID %s\n', hex(node))
2112 2120 fm.plain(b'# Parent %s\n' % hex(prev))
2113 2121 if len(parents) > 1:
2114 2122 fm.plain(b'# Parent %s\n' % hex(parents[1]))
2115 2123 fm.data(parents=fm.formatlist(pycompat.maplist(hex, parents), name=b'node'))
2116 2124
2117 2125 # TODO: redesign extraexportmap function to support formatter
2118 2126 for headerid in extraexport:
2119 2127 header = extraexportmap[headerid](seqno, ctx)
2120 2128 if header is not None:
2121 2129 fm.plain(b'# %s\n' % header)
2122 2130
2123 2131 fm.write(b'desc', b'%s\n', ctx.description().rstrip())
2124 2132 fm.plain(b'\n')
2125 2133
2126 2134 if fm.isplain():
2127 2135 chunkiter = patch.diffui(repo, prev, node, match, opts=diffopts)
2128 2136 for chunk, label in chunkiter:
2129 2137 fm.plain(chunk, label=label)
2130 2138 else:
2131 2139 chunkiter = patch.diff(repo, prev, node, match, opts=diffopts)
2132 2140 # TODO: make it structured?
2133 2141 fm.data(diff=b''.join(chunkiter))
2134 2142
2135 2143
2136 2144 def _exportfile(repo, revs, fm, dest, switch_parent, diffopts, match):
2137 2145 """Export changesets to stdout or a single file"""
2138 2146 for seqno, rev in enumerate(revs, 1):
2139 2147 ctx = repo[rev]
2140 2148 if not dest.startswith(b'<'):
2141 2149 repo.ui.note(b"%s\n" % dest)
2142 2150 fm.startitem()
2143 2151 _exportsingle(repo, ctx, fm, match, switch_parent, seqno, diffopts)
2144 2152
2145 2153
2146 2154 def _exportfntemplate(
2147 2155 repo, revs, basefm, fntemplate, switch_parent, diffopts, match
2148 2156 ):
2149 2157 """Export changesets to possibly multiple files"""
2150 2158 total = len(revs)
2151 2159 revwidth = max(len(str(rev)) for rev in revs)
2152 2160 filemap = util.sortdict() # filename: [(seqno, rev), ...]
2153 2161
2154 2162 for seqno, rev in enumerate(revs, 1):
2155 2163 ctx = repo[rev]
2156 2164 dest = makefilename(
2157 2165 ctx, fntemplate, total=total, seqno=seqno, revwidth=revwidth
2158 2166 )
2159 2167 filemap.setdefault(dest, []).append((seqno, rev))
2160 2168
2161 2169 for dest in filemap:
2162 2170 with formatter.maybereopen(basefm, dest) as fm:
2163 2171 repo.ui.note(b"%s\n" % dest)
2164 2172 for seqno, rev in filemap[dest]:
2165 2173 fm.startitem()
2166 2174 ctx = repo[rev]
2167 2175 _exportsingle(
2168 2176 repo, ctx, fm, match, switch_parent, seqno, diffopts
2169 2177 )
2170 2178
2171 2179
2172 2180 def _prefetchchangedfiles(repo, revs, match):
2173 2181 allfiles = set()
2174 2182 for rev in revs:
2175 2183 for file in repo[rev].files():
2176 2184 if not match or match(file):
2177 2185 allfiles.add(file)
2178 2186 match = scmutil.matchfiles(repo, allfiles)
2179 2187 revmatches = [(rev, match) for rev in revs]
2180 2188 scmutil.prefetchfiles(repo, revmatches)
2181 2189
2182 2190
2183 2191 def export(
2184 2192 repo,
2185 2193 revs,
2186 2194 basefm,
2187 2195 fntemplate=b'hg-%h.patch',
2188 2196 switch_parent=False,
2189 2197 opts=None,
2190 2198 match=None,
2191 2199 ):
2192 2200 """export changesets as hg patches
2193 2201
2194 2202 Args:
2195 2203 repo: The repository from which we're exporting revisions.
2196 2204 revs: A list of revisions to export as revision numbers.
2197 2205 basefm: A formatter to which patches should be written.
2198 2206 fntemplate: An optional string to use for generating patch file names.
2199 2207 switch_parent: If True, show diffs against second parent when not nullid.
2200 2208 Default is false, which always shows diff against p1.
2201 2209 opts: diff options to use for generating the patch.
2202 2210 match: If specified, only export changes to files matching this matcher.
2203 2211
2204 2212 Returns:
2205 2213 Nothing.
2206 2214
2207 2215 Side Effect:
2208 2216 "HG Changeset Patch" data is emitted to one of the following
2209 2217 destinations:
2210 2218 fntemplate specified: Each rev is written to a unique file named using
2211 2219 the given template.
2212 2220 Otherwise: All revs will be written to basefm.
2213 2221 """
2214 2222 _prefetchchangedfiles(repo, revs, match)
2215 2223
2216 2224 if not fntemplate:
2217 2225 _exportfile(
2218 2226 repo, revs, basefm, b'<unnamed>', switch_parent, opts, match
2219 2227 )
2220 2228 else:
2221 2229 _exportfntemplate(
2222 2230 repo, revs, basefm, fntemplate, switch_parent, opts, match
2223 2231 )
2224 2232
2225 2233
2226 2234 def exportfile(repo, revs, fp, switch_parent=False, opts=None, match=None):
2227 2235 """Export changesets to the given file stream"""
2228 2236 _prefetchchangedfiles(repo, revs, match)
2229 2237
2230 2238 dest = getattr(fp, 'name', b'<unnamed>')
2231 2239 with formatter.formatter(repo.ui, fp, b'export', {}) as fm:
2232 2240 _exportfile(repo, revs, fm, dest, switch_parent, opts, match)
2233 2241
2234 2242
2235 2243 def showmarker(fm, marker, index=None):
2236 2244 """utility function to display obsolescence marker in a readable way
2237 2245
2238 2246 To be used by debug function."""
2239 2247 if index is not None:
2240 2248 fm.write(b'index', b'%i ', index)
2241 2249 fm.write(b'prednode', b'%s ', hex(marker.prednode()))
2242 2250 succs = marker.succnodes()
2243 2251 fm.condwrite(
2244 2252 succs,
2245 2253 b'succnodes',
2246 2254 b'%s ',
2247 2255 fm.formatlist(map(hex, succs), name=b'node'),
2248 2256 )
2249 2257 fm.write(b'flag', b'%X ', marker.flags())
2250 2258 parents = marker.parentnodes()
2251 2259 if parents is not None:
2252 2260 fm.write(
2253 2261 b'parentnodes',
2254 2262 b'{%s} ',
2255 2263 fm.formatlist(map(hex, parents), name=b'node', sep=b', '),
2256 2264 )
2257 2265 fm.write(b'date', b'(%s) ', fm.formatdate(marker.date()))
2258 2266 meta = marker.metadata().copy()
2259 2267 meta.pop(b'date', None)
2260 2268 smeta = pycompat.rapply(pycompat.maybebytestr, meta)
2261 2269 fm.write(
2262 2270 b'metadata', b'{%s}', fm.formatdict(smeta, fmt=b'%r: %r', sep=b', ')
2263 2271 )
2264 2272 fm.plain(b'\n')
2265 2273
2266 2274
2267 2275 def finddate(ui, repo, date):
2268 2276 """Find the tipmost changeset that matches the given date spec"""
2269 2277 mrevs = repo.revs(b'date(%s)', date)
2270 2278 try:
2271 2279 rev = mrevs.max()
2272 2280 except ValueError:
2273 2281 raise error.InputError(_(b"revision matching date not found"))
2274 2282
2275 2283 ui.status(
2276 2284 _(b"found revision %d from %s\n")
2277 2285 % (rev, dateutil.datestr(repo[rev].date()))
2278 2286 )
2279 2287 return b'%d' % rev
2280 2288
2281 2289
2282 2290 def add(ui, repo, match, prefix, uipathfn, explicitonly, **opts):
2283 2291 bad = []
2284 2292
2285 2293 badfn = lambda x, y: bad.append(x) or match.bad(x, y)
2286 2294 names = []
2287 2295 wctx = repo[None]
2288 2296 cca = None
2289 2297 abort, warn = scmutil.checkportabilityalert(ui)
2290 2298 if abort or warn:
2291 2299 cca = scmutil.casecollisionauditor(ui, abort, repo.dirstate)
2292 2300
2293 2301 match = repo.narrowmatch(match, includeexact=True)
2294 2302 badmatch = matchmod.badmatch(match, badfn)
2295 2303 dirstate = repo.dirstate
2296 2304 # We don't want to just call wctx.walk here, since it would return a lot of
2297 2305 # clean files, which we aren't interested in and takes time.
2298 2306 for f in sorted(
2299 2307 dirstate.walk(
2300 2308 badmatch,
2301 2309 subrepos=sorted(wctx.substate),
2302 2310 unknown=True,
2303 2311 ignored=False,
2304 2312 full=False,
2305 2313 )
2306 2314 ):
2307 2315 exact = match.exact(f)
2308 2316 if exact or not explicitonly and f not in wctx and repo.wvfs.lexists(f):
2309 2317 if cca:
2310 2318 cca(f)
2311 2319 names.append(f)
2312 2320 if ui.verbose or not exact:
2313 2321 ui.status(
2314 2322 _(b'adding %s\n') % uipathfn(f), label=b'ui.addremove.added'
2315 2323 )
2316 2324
2317 2325 for subpath in sorted(wctx.substate):
2318 2326 sub = wctx.sub(subpath)
2319 2327 try:
2320 2328 submatch = matchmod.subdirmatcher(subpath, match)
2321 2329 subprefix = repo.wvfs.reljoin(prefix, subpath)
2322 2330 subuipathfn = scmutil.subdiruipathfn(subpath, uipathfn)
2323 2331 if opts.get('subrepos'):
2324 2332 bad.extend(
2325 2333 sub.add(ui, submatch, subprefix, subuipathfn, False, **opts)
2326 2334 )
2327 2335 else:
2328 2336 bad.extend(
2329 2337 sub.add(ui, submatch, subprefix, subuipathfn, True, **opts)
2330 2338 )
2331 2339 except error.LookupError:
2332 2340 ui.status(
2333 2341 _(b"skipping missing subrepository: %s\n") % uipathfn(subpath)
2334 2342 )
2335 2343
2336 2344 if not opts.get('dry_run'):
2337 2345 rejected = wctx.add(names, prefix)
2338 2346 bad.extend(f for f in rejected if f in match.files())
2339 2347 return bad
2340 2348
2341 2349
2342 2350 def addwebdirpath(repo, serverpath, webconf):
2343 2351 webconf[serverpath] = repo.root
2344 2352 repo.ui.debug(b'adding %s = %s\n' % (serverpath, repo.root))
2345 2353
2346 2354 for r in repo.revs(b'filelog("path:.hgsub")'):
2347 2355 ctx = repo[r]
2348 2356 for subpath in ctx.substate:
2349 2357 ctx.sub(subpath).addwebdirpath(serverpath, webconf)
2350 2358
2351 2359
2352 2360 def forget(
2353 2361 ui, repo, match, prefix, uipathfn, explicitonly, dryrun, interactive
2354 2362 ):
2355 2363 if dryrun and interactive:
2356 2364 raise error.InputError(
2357 2365 _(b"cannot specify both --dry-run and --interactive")
2358 2366 )
2359 2367 bad = []
2360 2368 badfn = lambda x, y: bad.append(x) or match.bad(x, y)
2361 2369 wctx = repo[None]
2362 2370 forgot = []
2363 2371
2364 2372 s = repo.status(match=matchmod.badmatch(match, badfn), clean=True)
2365 2373 forget = sorted(s.modified + s.added + s.deleted + s.clean)
2366 2374 if explicitonly:
2367 2375 forget = [f for f in forget if match.exact(f)]
2368 2376
2369 2377 for subpath in sorted(wctx.substate):
2370 2378 sub = wctx.sub(subpath)
2371 2379 submatch = matchmod.subdirmatcher(subpath, match)
2372 2380 subprefix = repo.wvfs.reljoin(prefix, subpath)
2373 2381 subuipathfn = scmutil.subdiruipathfn(subpath, uipathfn)
2374 2382 try:
2375 2383 subbad, subforgot = sub.forget(
2376 2384 submatch,
2377 2385 subprefix,
2378 2386 subuipathfn,
2379 2387 dryrun=dryrun,
2380 2388 interactive=interactive,
2381 2389 )
2382 2390 bad.extend([subpath + b'/' + f for f in subbad])
2383 2391 forgot.extend([subpath + b'/' + f for f in subforgot])
2384 2392 except error.LookupError:
2385 2393 ui.status(
2386 2394 _(b"skipping missing subrepository: %s\n") % uipathfn(subpath)
2387 2395 )
2388 2396
2389 2397 if not explicitonly:
2390 2398 for f in match.files():
2391 2399 if f not in repo.dirstate and not repo.wvfs.isdir(f):
2392 2400 if f not in forgot:
2393 2401 if repo.wvfs.exists(f):
2394 2402 # Don't complain if the exact case match wasn't given.
2395 2403 # But don't do this until after checking 'forgot', so
2396 2404 # that subrepo files aren't normalized, and this op is
2397 2405 # purely from data cached by the status walk above.
2398 2406 if repo.dirstate.normalize(f) in repo.dirstate:
2399 2407 continue
2400 2408 ui.warn(
2401 2409 _(
2402 2410 b'not removing %s: '
2403 2411 b'file is already untracked\n'
2404 2412 )
2405 2413 % uipathfn(f)
2406 2414 )
2407 2415 bad.append(f)
2408 2416
2409 2417 if interactive:
2410 2418 responses = _(
2411 2419 b'[Ynsa?]'
2412 2420 b'$$ &Yes, forget this file'
2413 2421 b'$$ &No, skip this file'
2414 2422 b'$$ &Skip remaining files'
2415 2423 b'$$ Include &all remaining files'
2416 2424 b'$$ &? (display help)'
2417 2425 )
2418 2426 for filename in forget[:]:
2419 2427 r = ui.promptchoice(
2420 2428 _(b'forget %s %s') % (uipathfn(filename), responses)
2421 2429 )
2422 2430 if r == 4: # ?
2423 2431 while r == 4:
2424 2432 for c, t in ui.extractchoices(responses)[1]:
2425 2433 ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
2426 2434 r = ui.promptchoice(
2427 2435 _(b'forget %s %s') % (uipathfn(filename), responses)
2428 2436 )
2429 2437 if r == 0: # yes
2430 2438 continue
2431 2439 elif r == 1: # no
2432 2440 forget.remove(filename)
2433 2441 elif r == 2: # Skip
2434 2442 fnindex = forget.index(filename)
2435 2443 del forget[fnindex:]
2436 2444 break
2437 2445 elif r == 3: # All
2438 2446 break
2439 2447
2440 2448 for f in forget:
2441 2449 if ui.verbose or not match.exact(f) or interactive:
2442 2450 ui.status(
2443 2451 _(b'removing %s\n') % uipathfn(f), label=b'ui.addremove.removed'
2444 2452 )
2445 2453
2446 2454 if not dryrun:
2447 2455 rejected = wctx.forget(forget, prefix)
2448 2456 bad.extend(f for f in rejected if f in match.files())
2449 2457 forgot.extend(f for f in forget if f not in rejected)
2450 2458 return bad, forgot
2451 2459
2452 2460
2453 2461 def files(ui, ctx, m, uipathfn, fm, fmt, subrepos):
2454 2462 ret = 1
2455 2463
2456 2464 needsfctx = ui.verbose or {b'size', b'flags'} & fm.datahint()
2457 2465 if fm.isplain() and not needsfctx:
2458 2466 # Fast path. The speed-up comes from skipping the formatter, and batching
2459 2467 # calls to ui.write.
2460 2468 buf = []
2461 2469 for f in ctx.matches(m):
2462 2470 buf.append(fmt % uipathfn(f))
2463 2471 if len(buf) > 100:
2464 2472 ui.write(b''.join(buf))
2465 2473 del buf[:]
2466 2474 ret = 0
2467 2475 if buf:
2468 2476 ui.write(b''.join(buf))
2469 2477 else:
2470 2478 for f in ctx.matches(m):
2471 2479 fm.startitem()
2472 2480 fm.context(ctx=ctx)
2473 2481 if needsfctx:
2474 2482 fc = ctx[f]
2475 2483 fm.write(b'size flags', b'% 10d % 1s ', fc.size(), fc.flags())
2476 2484 fm.data(path=f)
2477 2485 fm.plain(fmt % uipathfn(f))
2478 2486 ret = 0
2479 2487
2480 2488 for subpath in sorted(ctx.substate):
2481 2489 submatch = matchmod.subdirmatcher(subpath, m)
2482 2490 subuipathfn = scmutil.subdiruipathfn(subpath, uipathfn)
2483 2491 if subrepos or m.exact(subpath) or any(submatch.files()):
2484 2492 sub = ctx.sub(subpath)
2485 2493 try:
2486 2494 recurse = m.exact(subpath) or subrepos
2487 2495 if (
2488 2496 sub.printfiles(ui, submatch, subuipathfn, fm, fmt, recurse)
2489 2497 == 0
2490 2498 ):
2491 2499 ret = 0
2492 2500 except error.LookupError:
2493 2501 ui.status(
2494 2502 _(b"skipping missing subrepository: %s\n")
2495 2503 % uipathfn(subpath)
2496 2504 )
2497 2505
2498 2506 return ret
2499 2507
2500 2508
2501 2509 def remove(
2502 2510 ui, repo, m, prefix, uipathfn, after, force, subrepos, dryrun, warnings=None
2503 2511 ):
2504 2512 ret = 0
2505 2513 s = repo.status(match=m, clean=True)
2506 2514 modified, added, deleted, clean = s.modified, s.added, s.deleted, s.clean
2507 2515
2508 2516 wctx = repo[None]
2509 2517
2510 2518 if warnings is None:
2511 2519 warnings = []
2512 2520 warn = True
2513 2521 else:
2514 2522 warn = False
2515 2523
2516 2524 subs = sorted(wctx.substate)
2517 2525 progress = ui.makeprogress(
2518 2526 _(b'searching'), total=len(subs), unit=_(b'subrepos')
2519 2527 )
2520 2528 for subpath in subs:
2521 2529 submatch = matchmod.subdirmatcher(subpath, m)
2522 2530 subprefix = repo.wvfs.reljoin(prefix, subpath)
2523 2531 subuipathfn = scmutil.subdiruipathfn(subpath, uipathfn)
2524 2532 if subrepos or m.exact(subpath) or any(submatch.files()):
2525 2533 progress.increment()
2526 2534 sub = wctx.sub(subpath)
2527 2535 try:
2528 2536 if sub.removefiles(
2529 2537 submatch,
2530 2538 subprefix,
2531 2539 subuipathfn,
2532 2540 after,
2533 2541 force,
2534 2542 subrepos,
2535 2543 dryrun,
2536 2544 warnings,
2537 2545 ):
2538 2546 ret = 1
2539 2547 except error.LookupError:
2540 2548 warnings.append(
2541 2549 _(b"skipping missing subrepository: %s\n")
2542 2550 % uipathfn(subpath)
2543 2551 )
2544 2552 progress.complete()
2545 2553
2546 2554 # warn about failure to delete explicit files/dirs
2547 2555 deleteddirs = pathutil.dirs(deleted)
2548 2556 files = m.files()
2549 2557 progress = ui.makeprogress(
2550 2558 _(b'deleting'), total=len(files), unit=_(b'files')
2551 2559 )
2552 2560 for f in files:
2553 2561
2554 2562 def insubrepo():
2555 2563 for subpath in wctx.substate:
2556 2564 if f.startswith(subpath + b'/'):
2557 2565 return True
2558 2566 return False
2559 2567
2560 2568 progress.increment()
2561 2569 isdir = f in deleteddirs or wctx.hasdir(f)
2562 2570 if f in repo.dirstate or isdir or f == b'.' or insubrepo() or f in subs:
2563 2571 continue
2564 2572
2565 2573 if repo.wvfs.exists(f):
2566 2574 if repo.wvfs.isdir(f):
2567 2575 warnings.append(
2568 2576 _(b'not removing %s: no tracked files\n') % uipathfn(f)
2569 2577 )
2570 2578 else:
2571 2579 warnings.append(
2572 2580 _(b'not removing %s: file is untracked\n') % uipathfn(f)
2573 2581 )
2574 2582 # missing files will generate a warning elsewhere
2575 2583 ret = 1
2576 2584 progress.complete()
2577 2585
2578 2586 if force:
2579 2587 list = modified + deleted + clean + added
2580 2588 elif after:
2581 2589 list = deleted
2582 2590 remaining = modified + added + clean
2583 2591 progress = ui.makeprogress(
2584 2592 _(b'skipping'), total=len(remaining), unit=_(b'files')
2585 2593 )
2586 2594 for f in remaining:
2587 2595 progress.increment()
2588 2596 if ui.verbose or (f in files):
2589 2597 warnings.append(
2590 2598 _(b'not removing %s: file still exists\n') % uipathfn(f)
2591 2599 )
2592 2600 ret = 1
2593 2601 progress.complete()
2594 2602 else:
2595 2603 list = deleted + clean
2596 2604 progress = ui.makeprogress(
2597 2605 _(b'skipping'), total=(len(modified) + len(added)), unit=_(b'files')
2598 2606 )
2599 2607 for f in modified:
2600 2608 progress.increment()
2601 2609 warnings.append(
2602 2610 _(
2603 2611 b'not removing %s: file is modified (use -f'
2604 2612 b' to force removal)\n'
2605 2613 )
2606 2614 % uipathfn(f)
2607 2615 )
2608 2616 ret = 1
2609 2617 for f in added:
2610 2618 progress.increment()
2611 2619 warnings.append(
2612 2620 _(
2613 2621 b"not removing %s: file has been marked for add"
2614 2622 b" (use 'hg forget' to undo add)\n"
2615 2623 )
2616 2624 % uipathfn(f)
2617 2625 )
2618 2626 ret = 1
2619 2627 progress.complete()
2620 2628
2621 2629 list = sorted(list)
2622 2630 progress = ui.makeprogress(
2623 2631 _(b'deleting'), total=len(list), unit=_(b'files')
2624 2632 )
2625 2633 for f in list:
2626 2634 if ui.verbose or not m.exact(f):
2627 2635 progress.increment()
2628 2636 ui.status(
2629 2637 _(b'removing %s\n') % uipathfn(f), label=b'ui.addremove.removed'
2630 2638 )
2631 2639 progress.complete()
2632 2640
2633 2641 if not dryrun:
2634 2642 with repo.wlock():
2635 2643 if not after:
2636 2644 for f in list:
2637 2645 if f in added:
2638 2646 continue # we never unlink added files on remove
2639 2647 rmdir = repo.ui.configbool(
2640 2648 b'experimental', b'removeemptydirs'
2641 2649 )
2642 2650 repo.wvfs.unlinkpath(f, ignoremissing=True, rmdir=rmdir)
2643 2651 repo[None].forget(list)
2644 2652
2645 2653 if warn:
2646 2654 for warning in warnings:
2647 2655 ui.warn(warning)
2648 2656
2649 2657 return ret
2650 2658
2651 2659
2652 2660 def _catfmtneedsdata(fm):
2653 2661 return not fm.datahint() or b'data' in fm.datahint()
2654 2662
2655 2663
2656 2664 def _updatecatformatter(fm, ctx, matcher, path, decode):
2657 2665 """Hook for adding data to the formatter used by ``hg cat``.
2658 2666
2659 2667 Extensions (e.g., lfs) can wrap this to inject keywords/data, but must call
2660 2668 this method first."""
2661 2669
2662 2670 # data() can be expensive to fetch (e.g. lfs), so don't fetch it if it
2663 2671 # wasn't requested.
2664 2672 data = b''
2665 2673 if _catfmtneedsdata(fm):
2666 2674 data = ctx[path].data()
2667 2675 if decode:
2668 2676 data = ctx.repo().wwritedata(path, data)
2669 2677 fm.startitem()
2670 2678 fm.context(ctx=ctx)
2671 2679 fm.write(b'data', b'%s', data)
2672 2680 fm.data(path=path)
2673 2681
2674 2682
2675 2683 def cat(ui, repo, ctx, matcher, basefm, fntemplate, prefix, **opts):
2676 2684 err = 1
2677 2685 opts = pycompat.byteskwargs(opts)
2678 2686
2679 2687 def write(path):
2680 2688 filename = None
2681 2689 if fntemplate:
2682 2690 filename = makefilename(
2683 2691 ctx, fntemplate, pathname=os.path.join(prefix, path)
2684 2692 )
2685 2693 # attempt to create the directory if it does not already exist
2686 2694 try:
2687 2695 os.makedirs(os.path.dirname(filename))
2688 2696 except OSError:
2689 2697 pass
2690 2698 with formatter.maybereopen(basefm, filename) as fm:
2691 2699 _updatecatformatter(fm, ctx, matcher, path, opts.get(b'decode'))
2692 2700
2693 2701 # Automation often uses hg cat on single files, so special case it
2694 2702 # for performance to avoid the cost of parsing the manifest.
2695 2703 if len(matcher.files()) == 1 and not matcher.anypats():
2696 2704 file = matcher.files()[0]
2697 2705 mfl = repo.manifestlog
2698 2706 mfnode = ctx.manifestnode()
2699 2707 try:
2700 2708 if mfnode and mfl[mfnode].find(file)[0]:
2701 2709 if _catfmtneedsdata(basefm):
2702 2710 scmutil.prefetchfiles(repo, [(ctx.rev(), matcher)])
2703 2711 write(file)
2704 2712 return 0
2705 2713 except KeyError:
2706 2714 pass
2707 2715
2708 2716 if _catfmtneedsdata(basefm):
2709 2717 scmutil.prefetchfiles(repo, [(ctx.rev(), matcher)])
2710 2718
2711 2719 for abs in ctx.walk(matcher):
2712 2720 write(abs)
2713 2721 err = 0
2714 2722
2715 2723 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
2716 2724 for subpath in sorted(ctx.substate):
2717 2725 sub = ctx.sub(subpath)
2718 2726 try:
2719 2727 submatch = matchmod.subdirmatcher(subpath, matcher)
2720 2728 subprefix = os.path.join(prefix, subpath)
2721 2729 if not sub.cat(
2722 2730 submatch,
2723 2731 basefm,
2724 2732 fntemplate,
2725 2733 subprefix,
2726 2734 **pycompat.strkwargs(opts)
2727 2735 ):
2728 2736 err = 0
2729 2737 except error.RepoLookupError:
2730 2738 ui.status(
2731 2739 _(b"skipping missing subrepository: %s\n") % uipathfn(subpath)
2732 2740 )
2733 2741
2734 2742 return err
2735 2743
2736 2744
2737 2745 def commit(ui, repo, commitfunc, pats, opts):
2738 2746 '''commit the specified files or all outstanding changes'''
2739 2747 date = opts.get(b'date')
2740 2748 if date:
2741 2749 opts[b'date'] = dateutil.parsedate(date)
2742 2750 message = logmessage(ui, opts)
2743 2751 matcher = scmutil.match(repo[None], pats, opts)
2744 2752
2745 2753 dsguard = None
2746 2754 # extract addremove carefully -- this function can be called from a command
2747 2755 # that doesn't support addremove
2748 2756 if opts.get(b'addremove'):
2749 2757 dsguard = dirstateguard.dirstateguard(repo, b'commit')
2750 2758 with dsguard or util.nullcontextmanager():
2751 2759 if dsguard:
2752 2760 relative = scmutil.anypats(pats, opts)
2753 2761 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=relative)
2754 2762 if scmutil.addremove(repo, matcher, b"", uipathfn, opts) != 0:
2755 2763 raise error.Abort(
2756 2764 _(b"failed to mark all new/missing files as added/removed")
2757 2765 )
2758 2766
2759 2767 return commitfunc(ui, repo, message, matcher, opts)
2760 2768
2761 2769
2762 2770 def samefile(f, ctx1, ctx2):
2763 2771 if f in ctx1.manifest():
2764 2772 a = ctx1.filectx(f)
2765 2773 if f in ctx2.manifest():
2766 2774 b = ctx2.filectx(f)
2767 2775 return not a.cmp(b) and a.flags() == b.flags()
2768 2776 else:
2769 2777 return False
2770 2778 else:
2771 2779 return f not in ctx2.manifest()
2772 2780
2773 2781
2774 2782 def amend(ui, repo, old, extra, pats, opts):
2775 2783 # avoid cycle context -> subrepo -> cmdutil
2776 2784 from . import context
2777 2785
2778 2786 # amend will reuse the existing user if not specified, but the obsolete
2779 2787 # marker creation requires that the current user's name is specified.
2780 2788 if obsolete.isenabled(repo, obsolete.createmarkersopt):
2781 2789 ui.username() # raise exception if username not set
2782 2790
2783 2791 ui.note(_(b'amending changeset %s\n') % old)
2784 2792 base = old.p1()
2785 2793
2786 2794 with repo.wlock(), repo.lock(), repo.transaction(b'amend'):
2787 2795 # Participating changesets:
2788 2796 #
2789 2797 # wctx o - workingctx that contains changes from working copy
2790 2798 # | to go into amending commit
2791 2799 # |
2792 2800 # old o - changeset to amend
2793 2801 # |
2794 2802 # base o - first parent of the changeset to amend
2795 2803 wctx = repo[None]
2796 2804
2797 2805 # Copy to avoid mutating input
2798 2806 extra = extra.copy()
2799 2807 # Update extra dict from amended commit (e.g. to preserve graft
2800 2808 # source)
2801 2809 extra.update(old.extra())
2802 2810
2803 2811 # Also update it from the from the wctx
2804 2812 extra.update(wctx.extra())
2805 2813
2806 2814 # date-only change should be ignored?
2807 2815 datemaydiffer = resolvecommitoptions(ui, opts)
2808 2816
2809 2817 date = old.date()
2810 2818 if opts.get(b'date'):
2811 2819 date = dateutil.parsedate(opts.get(b'date'))
2812 2820 user = opts.get(b'user') or old.user()
2813 2821
2814 2822 if len(old.parents()) > 1:
2815 2823 # ctx.files() isn't reliable for merges, so fall back to the
2816 2824 # slower repo.status() method
2817 2825 st = base.status(old)
2818 2826 files = set(st.modified) | set(st.added) | set(st.removed)
2819 2827 else:
2820 2828 files = set(old.files())
2821 2829
2822 2830 # add/remove the files to the working copy if the "addremove" option
2823 2831 # was specified.
2824 2832 matcher = scmutil.match(wctx, pats, opts)
2825 2833 relative = scmutil.anypats(pats, opts)
2826 2834 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=relative)
2827 2835 if opts.get(b'addremove') and scmutil.addremove(
2828 2836 repo, matcher, b"", uipathfn, opts
2829 2837 ):
2830 2838 raise error.Abort(
2831 2839 _(b"failed to mark all new/missing files as added/removed")
2832 2840 )
2833 2841
2834 2842 # Check subrepos. This depends on in-place wctx._status update in
2835 2843 # subrepo.precommit(). To minimize the risk of this hack, we do
2836 2844 # nothing if .hgsub does not exist.
2837 2845 if b'.hgsub' in wctx or b'.hgsub' in old:
2838 2846 subs, commitsubs, newsubstate = subrepoutil.precommit(
2839 2847 ui, wctx, wctx._status, matcher
2840 2848 )
2841 2849 # amend should abort if commitsubrepos is enabled
2842 2850 assert not commitsubs
2843 2851 if subs:
2844 2852 subrepoutil.writestate(repo, newsubstate)
2845 2853
2846 2854 ms = mergestatemod.mergestate.read(repo)
2847 2855 mergeutil.checkunresolved(ms)
2848 2856
2849 2857 filestoamend = {f for f in wctx.files() if matcher(f)}
2850 2858
2851 2859 changes = len(filestoamend) > 0
2852 2860 if changes:
2853 2861 # Recompute copies (avoid recording a -> b -> a)
2854 2862 copied = copies.pathcopies(base, wctx, matcher)
2855 2863 if old.p2:
2856 2864 copied.update(copies.pathcopies(old.p2(), wctx, matcher))
2857 2865
2858 2866 # Prune files which were reverted by the updates: if old
2859 2867 # introduced file X and the file was renamed in the working
2860 2868 # copy, then those two files are the same and
2861 2869 # we can discard X from our list of files. Likewise if X
2862 2870 # was removed, it's no longer relevant. If X is missing (aka
2863 2871 # deleted), old X must be preserved.
2864 2872 files.update(filestoamend)
2865 2873 files = [
2866 2874 f
2867 2875 for f in files
2868 2876 if (f not in filestoamend or not samefile(f, wctx, base))
2869 2877 ]
2870 2878
2871 2879 def filectxfn(repo, ctx_, path):
2872 2880 try:
2873 2881 # If the file being considered is not amongst the files
2874 2882 # to be amended, we should return the file context from the
2875 2883 # old changeset. This avoids issues when only some files in
2876 2884 # the working copy are being amended but there are also
2877 2885 # changes to other files from the old changeset.
2878 2886 if path not in filestoamend:
2879 2887 return old.filectx(path)
2880 2888
2881 2889 # Return None for removed files.
2882 2890 if path in wctx.removed():
2883 2891 return None
2884 2892
2885 2893 fctx = wctx[path]
2886 2894 flags = fctx.flags()
2887 2895 mctx = context.memfilectx(
2888 2896 repo,
2889 2897 ctx_,
2890 2898 fctx.path(),
2891 2899 fctx.data(),
2892 2900 islink=b'l' in flags,
2893 2901 isexec=b'x' in flags,
2894 2902 copysource=copied.get(path),
2895 2903 )
2896 2904 return mctx
2897 2905 except KeyError:
2898 2906 return None
2899 2907
2900 2908 else:
2901 2909 ui.note(_(b'copying changeset %s to %s\n') % (old, base))
2902 2910
2903 2911 # Use version of files as in the old cset
2904 2912 def filectxfn(repo, ctx_, path):
2905 2913 try:
2906 2914 return old.filectx(path)
2907 2915 except KeyError:
2908 2916 return None
2909 2917
2910 2918 # See if we got a message from -m or -l, if not, open the editor with
2911 2919 # the message of the changeset to amend.
2912 2920 message = logmessage(ui, opts)
2913 2921
2914 2922 editform = mergeeditform(old, b'commit.amend')
2915 2923
2916 2924 if not message:
2917 2925 message = old.description()
2918 2926 # Default if message isn't provided and --edit is not passed is to
2919 2927 # invoke editor, but allow --no-edit. If somehow we don't have any
2920 2928 # description, let's always start the editor.
2921 2929 doedit = not message or opts.get(b'edit') in [True, None]
2922 2930 else:
2923 2931 # Default if message is provided is to not invoke editor, but allow
2924 2932 # --edit.
2925 2933 doedit = opts.get(b'edit') is True
2926 2934 editor = getcommiteditor(edit=doedit, editform=editform)
2927 2935
2928 2936 pureextra = extra.copy()
2929 2937 extra[b'amend_source'] = old.hex()
2930 2938
2931 2939 new = context.memctx(
2932 2940 repo,
2933 2941 parents=[base.node(), old.p2().node()],
2934 2942 text=message,
2935 2943 files=files,
2936 2944 filectxfn=filectxfn,
2937 2945 user=user,
2938 2946 date=date,
2939 2947 extra=extra,
2940 2948 editor=editor,
2941 2949 )
2942 2950
2943 2951 newdesc = changelog.stripdesc(new.description())
2944 2952 if (
2945 2953 (not changes)
2946 2954 and newdesc == old.description()
2947 2955 and user == old.user()
2948 2956 and (date == old.date() or datemaydiffer)
2949 2957 and pureextra == old.extra()
2950 2958 ):
2951 2959 # nothing changed. continuing here would create a new node
2952 2960 # anyway because of the amend_source noise.
2953 2961 #
2954 2962 # This not what we expect from amend.
2955 2963 return old.node()
2956 2964
2957 2965 commitphase = None
2958 2966 if opts.get(b'secret'):
2959 2967 commitphase = phases.secret
2960 2968 newid = repo.commitctx(new)
2961 2969 ms.reset()
2962 2970
2963 2971 # Reroute the working copy parent to the new changeset
2964 2972 repo.setparents(newid, repo.nullid)
2965 2973
2966 2974 # Fixing the dirstate because localrepo.commitctx does not update
2967 2975 # it. This is rather convenient because we did not need to update
2968 2976 # the dirstate for all the files in the new commit which commitctx
2969 2977 # could have done if it updated the dirstate. Now, we can
2970 2978 # selectively update the dirstate only for the amended files.
2971 2979 dirstate = repo.dirstate
2972 2980
2973 2981 # Update the state of the files which were added and modified in the
2974 2982 # amend to "normal" in the dirstate. We need to use "normallookup" since
2975 2983 # the files may have changed since the command started; using "normal"
2976 2984 # would mark them as clean but with uncommitted contents.
2977 2985 normalfiles = set(wctx.modified() + wctx.added()) & filestoamend
2978 2986 for f in normalfiles:
2979 2987 dirstate.normallookup(f)
2980 2988
2981 2989 # Update the state of files which were removed in the amend
2982 2990 # to "removed" in the dirstate.
2983 2991 removedfiles = set(wctx.removed()) & filestoamend
2984 2992 for f in removedfiles:
2985 2993 dirstate.drop(f)
2986 2994
2987 2995 mapping = {old.node(): (newid,)}
2988 2996 obsmetadata = None
2989 2997 if opts.get(b'note'):
2990 2998 obsmetadata = {b'note': encoding.fromlocal(opts[b'note'])}
2991 2999 backup = ui.configbool(b'rewrite', b'backup-bundle')
2992 3000 scmutil.cleanupnodes(
2993 3001 repo,
2994 3002 mapping,
2995 3003 b'amend',
2996 3004 metadata=obsmetadata,
2997 3005 fixphase=True,
2998 3006 targetphase=commitphase,
2999 3007 backup=backup,
3000 3008 )
3001 3009
3002 3010 return newid
3003 3011
3004 3012
3005 3013 def commiteditor(repo, ctx, subs, editform=b''):
3006 3014 if ctx.description():
3007 3015 return ctx.description()
3008 3016 return commitforceeditor(
3009 3017 repo, ctx, subs, editform=editform, unchangedmessagedetection=True
3010 3018 )
3011 3019
3012 3020
3013 3021 def commitforceeditor(
3014 3022 repo,
3015 3023 ctx,
3016 3024 subs,
3017 3025 finishdesc=None,
3018 3026 extramsg=None,
3019 3027 editform=b'',
3020 3028 unchangedmessagedetection=False,
3021 3029 ):
3022 3030 if not extramsg:
3023 3031 extramsg = _(b"Leave message empty to abort commit.")
3024 3032
3025 3033 forms = [e for e in editform.split(b'.') if e]
3026 3034 forms.insert(0, b'changeset')
3027 3035 templatetext = None
3028 3036 while forms:
3029 3037 ref = b'.'.join(forms)
3030 3038 if repo.ui.config(b'committemplate', ref):
3031 3039 templatetext = committext = buildcommittemplate(
3032 3040 repo, ctx, subs, extramsg, ref
3033 3041 )
3034 3042 break
3035 3043 forms.pop()
3036 3044 else:
3037 3045 committext = buildcommittext(repo, ctx, subs, extramsg)
3038 3046
3039 3047 # run editor in the repository root
3040 3048 olddir = encoding.getcwd()
3041 3049 os.chdir(repo.root)
3042 3050
3043 3051 # make in-memory changes visible to external process
3044 3052 tr = repo.currenttransaction()
3045 3053 repo.dirstate.write(tr)
3046 3054 pending = tr and tr.writepending() and repo.root
3047 3055
3048 3056 editortext = repo.ui.edit(
3049 3057 committext,
3050 3058 ctx.user(),
3051 3059 ctx.extra(),
3052 3060 editform=editform,
3053 3061 pending=pending,
3054 3062 repopath=repo.path,
3055 3063 action=b'commit',
3056 3064 )
3057 3065 text = editortext
3058 3066
3059 3067 # strip away anything below this special string (used for editors that want
3060 3068 # to display the diff)
3061 3069 stripbelow = re.search(_linebelow, text, flags=re.MULTILINE)
3062 3070 if stripbelow:
3063 3071 text = text[: stripbelow.start()]
3064 3072
3065 3073 text = re.sub(b"(?m)^HG:.*(\n|$)", b"", text)
3066 3074 os.chdir(olddir)
3067 3075
3068 3076 if finishdesc:
3069 3077 text = finishdesc(text)
3070 3078 if not text.strip():
3071 3079 raise error.InputError(_(b"empty commit message"))
3072 3080 if unchangedmessagedetection and editortext == templatetext:
3073 3081 raise error.InputError(_(b"commit message unchanged"))
3074 3082
3075 3083 return text
3076 3084
3077 3085
3078 3086 def buildcommittemplate(repo, ctx, subs, extramsg, ref):
3079 3087 ui = repo.ui
3080 3088 spec = formatter.reference_templatespec(ref)
3081 3089 t = logcmdutil.changesettemplater(ui, repo, spec)
3082 3090 t.t.cache.update(
3083 3091 (k, templater.unquotestring(v))
3084 3092 for k, v in repo.ui.configitems(b'committemplate')
3085 3093 )
3086 3094
3087 3095 if not extramsg:
3088 3096 extramsg = b'' # ensure that extramsg is string
3089 3097
3090 3098 ui.pushbuffer()
3091 3099 t.show(ctx, extramsg=extramsg)
3092 3100 return ui.popbuffer()
3093 3101
3094 3102
3095 3103 def hgprefix(msg):
3096 3104 return b"\n".join([b"HG: %s" % a for a in msg.split(b"\n") if a])
3097 3105
3098 3106
3099 3107 def buildcommittext(repo, ctx, subs, extramsg):
3100 3108 edittext = []
3101 3109 modified, added, removed = ctx.modified(), ctx.added(), ctx.removed()
3102 3110 if ctx.description():
3103 3111 edittext.append(ctx.description())
3104 3112 edittext.append(b"")
3105 3113 edittext.append(b"") # Empty line between message and comments.
3106 3114 edittext.append(
3107 3115 hgprefix(
3108 3116 _(
3109 3117 b"Enter commit message."
3110 3118 b" Lines beginning with 'HG:' are removed."
3111 3119 )
3112 3120 )
3113 3121 )
3114 3122 edittext.append(hgprefix(extramsg))
3115 3123 edittext.append(b"HG: --")
3116 3124 edittext.append(hgprefix(_(b"user: %s") % ctx.user()))
3117 3125 if ctx.p2():
3118 3126 edittext.append(hgprefix(_(b"branch merge")))
3119 3127 if ctx.branch():
3120 3128 edittext.append(hgprefix(_(b"branch '%s'") % ctx.branch()))
3121 3129 if bookmarks.isactivewdirparent(repo):
3122 3130 edittext.append(hgprefix(_(b"bookmark '%s'") % repo._activebookmark))
3123 3131 edittext.extend([hgprefix(_(b"subrepo %s") % s) for s in subs])
3124 3132 edittext.extend([hgprefix(_(b"added %s") % f) for f in added])
3125 3133 edittext.extend([hgprefix(_(b"changed %s") % f) for f in modified])
3126 3134 edittext.extend([hgprefix(_(b"removed %s") % f) for f in removed])
3127 3135 if not added and not modified and not removed:
3128 3136 edittext.append(hgprefix(_(b"no files changed")))
3129 3137 edittext.append(b"")
3130 3138
3131 3139 return b"\n".join(edittext)
3132 3140
3133 3141
3134 3142 def commitstatus(repo, node, branch, bheads=None, tip=None, opts=None):
3135 3143 if opts is None:
3136 3144 opts = {}
3137 3145 ctx = repo[node]
3138 3146 parents = ctx.parents()
3139 3147
3140 3148 if tip is not None and repo.changelog.tip() == tip:
3141 3149 # avoid reporting something like "committed new head" when
3142 3150 # recommitting old changesets, and issue a helpful warning
3143 3151 # for most instances
3144 3152 repo.ui.warn(_(b"warning: commit already existed in the repository!\n"))
3145 3153 elif (
3146 3154 not opts.get(b'amend')
3147 3155 and bheads
3148 3156 and node not in bheads
3149 3157 and not any(
3150 3158 p.node() in bheads and p.branch() == branch for p in parents
3151 3159 )
3152 3160 ):
3153 3161 repo.ui.status(_(b'created new head\n'))
3154 3162 # The message is not printed for initial roots. For the other
3155 3163 # changesets, it is printed in the following situations:
3156 3164 #
3157 3165 # Par column: for the 2 parents with ...
3158 3166 # N: null or no parent
3159 3167 # B: parent is on another named branch
3160 3168 # C: parent is a regular non head changeset
3161 3169 # H: parent was a branch head of the current branch
3162 3170 # Msg column: whether we print "created new head" message
3163 3171 # In the following, it is assumed that there already exists some
3164 3172 # initial branch heads of the current branch, otherwise nothing is
3165 3173 # printed anyway.
3166 3174 #
3167 3175 # Par Msg Comment
3168 3176 # N N y additional topo root
3169 3177 #
3170 3178 # B N y additional branch root
3171 3179 # C N y additional topo head
3172 3180 # H N n usual case
3173 3181 #
3174 3182 # B B y weird additional branch root
3175 3183 # C B y branch merge
3176 3184 # H B n merge with named branch
3177 3185 #
3178 3186 # C C y additional head from merge
3179 3187 # C H n merge with a head
3180 3188 #
3181 3189 # H H n head merge: head count decreases
3182 3190
3183 3191 if not opts.get(b'close_branch'):
3184 3192 for r in parents:
3185 3193 if r.closesbranch() and r.branch() == branch:
3186 3194 repo.ui.status(
3187 3195 _(b'reopening closed branch head %d\n') % r.rev()
3188 3196 )
3189 3197
3190 3198 if repo.ui.debugflag:
3191 3199 repo.ui.write(
3192 3200 _(b'committed changeset %d:%s\n') % (ctx.rev(), ctx.hex())
3193 3201 )
3194 3202 elif repo.ui.verbose:
3195 3203 repo.ui.write(_(b'committed changeset %d:%s\n') % (ctx.rev(), ctx))
3196 3204
3197 3205
3198 3206 def postcommitstatus(repo, pats, opts):
3199 3207 return repo.status(match=scmutil.match(repo[None], pats, opts))
3200 3208
3201 3209
3202 3210 def revert(ui, repo, ctx, *pats, **opts):
3203 3211 opts = pycompat.byteskwargs(opts)
3204 3212 parent, p2 = repo.dirstate.parents()
3205 3213 node = ctx.node()
3206 3214
3207 3215 mf = ctx.manifest()
3208 3216 if node == p2:
3209 3217 parent = p2
3210 3218
3211 3219 # need all matching names in dirstate and manifest of target rev,
3212 3220 # so have to walk both. do not print errors if files exist in one
3213 3221 # but not other. in both cases, filesets should be evaluated against
3214 3222 # workingctx to get consistent result (issue4497). this means 'set:**'
3215 3223 # cannot be used to select missing files from target rev.
3216 3224
3217 3225 # `names` is a mapping for all elements in working copy and target revision
3218 3226 # The mapping is in the form:
3219 3227 # <abs path in repo> -> (<path from CWD>, <exactly specified by matcher?>)
3220 3228 names = {}
3221 3229 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
3222 3230
3223 3231 with repo.wlock():
3224 3232 ## filling of the `names` mapping
3225 3233 # walk dirstate to fill `names`
3226 3234
3227 3235 interactive = opts.get(b'interactive', False)
3228 3236 wctx = repo[None]
3229 3237 m = scmutil.match(wctx, pats, opts)
3230 3238
3231 3239 # we'll need this later
3232 3240 targetsubs = sorted(s for s in wctx.substate if m(s))
3233 3241
3234 3242 if not m.always():
3235 3243 matcher = matchmod.badmatch(m, lambda x, y: False)
3236 3244 for abs in wctx.walk(matcher):
3237 3245 names[abs] = m.exact(abs)
3238 3246
3239 3247 # walk target manifest to fill `names`
3240 3248
3241 3249 def badfn(path, msg):
3242 3250 if path in names:
3243 3251 return
3244 3252 if path in ctx.substate:
3245 3253 return
3246 3254 path_ = path + b'/'
3247 3255 for f in names:
3248 3256 if f.startswith(path_):
3249 3257 return
3250 3258 ui.warn(b"%s: %s\n" % (uipathfn(path), msg))
3251 3259
3252 3260 for abs in ctx.walk(matchmod.badmatch(m, badfn)):
3253 3261 if abs not in names:
3254 3262 names[abs] = m.exact(abs)
3255 3263
3256 3264 # Find status of all file in `names`.
3257 3265 m = scmutil.matchfiles(repo, names)
3258 3266
3259 3267 changes = repo.status(
3260 3268 node1=node, match=m, unknown=True, ignored=True, clean=True
3261 3269 )
3262 3270 else:
3263 3271 changes = repo.status(node1=node, match=m)
3264 3272 for kind in changes:
3265 3273 for abs in kind:
3266 3274 names[abs] = m.exact(abs)
3267 3275
3268 3276 m = scmutil.matchfiles(repo, names)
3269 3277
3270 3278 modified = set(changes.modified)
3271 3279 added = set(changes.added)
3272 3280 removed = set(changes.removed)
3273 3281 _deleted = set(changes.deleted)
3274 3282 unknown = set(changes.unknown)
3275 3283 unknown.update(changes.ignored)
3276 3284 clean = set(changes.clean)
3277 3285 modadded = set()
3278 3286
3279 3287 # We need to account for the state of the file in the dirstate,
3280 3288 # even when we revert against something else than parent. This will
3281 3289 # slightly alter the behavior of revert (doing back up or not, delete
3282 3290 # or just forget etc).
3283 3291 if parent == node:
3284 3292 dsmodified = modified
3285 3293 dsadded = added
3286 3294 dsremoved = removed
3287 3295 # store all local modifications, useful later for rename detection
3288 3296 localchanges = dsmodified | dsadded
3289 3297 modified, added, removed = set(), set(), set()
3290 3298 else:
3291 3299 changes = repo.status(node1=parent, match=m)
3292 3300 dsmodified = set(changes.modified)
3293 3301 dsadded = set(changes.added)
3294 3302 dsremoved = set(changes.removed)
3295 3303 # store all local modifications, useful later for rename detection
3296 3304 localchanges = dsmodified | dsadded
3297 3305
3298 3306 # only take into account for removes between wc and target
3299 3307 clean |= dsremoved - removed
3300 3308 dsremoved &= removed
3301 3309 # distinct between dirstate remove and other
3302 3310 removed -= dsremoved
3303 3311
3304 3312 modadded = added & dsmodified
3305 3313 added -= modadded
3306 3314
3307 3315 # tell newly modified apart.
3308 3316 dsmodified &= modified
3309 3317 dsmodified |= modified & dsadded # dirstate added may need backup
3310 3318 modified -= dsmodified
3311 3319
3312 3320 # We need to wait for some post-processing to update this set
3313 3321 # before making the distinction. The dirstate will be used for
3314 3322 # that purpose.
3315 3323 dsadded = added
3316 3324
3317 3325 # in case of merge, files that are actually added can be reported as
3318 3326 # modified, we need to post process the result
3319 3327 if p2 != repo.nullid:
3320 3328 mergeadd = set(dsmodified)
3321 3329 for path in dsmodified:
3322 3330 if path in mf:
3323 3331 mergeadd.remove(path)
3324 3332 dsadded |= mergeadd
3325 3333 dsmodified -= mergeadd
3326 3334
3327 3335 # if f is a rename, update `names` to also revert the source
3328 3336 for f in localchanges:
3329 3337 src = repo.dirstate.copied(f)
3330 3338 # XXX should we check for rename down to target node?
3331 3339 if src and src not in names and repo.dirstate[src] == b'r':
3332 3340 dsremoved.add(src)
3333 3341 names[src] = True
3334 3342
3335 3343 # determine the exact nature of the deleted changesets
3336 3344 deladded = set(_deleted)
3337 3345 for path in _deleted:
3338 3346 if path in mf:
3339 3347 deladded.remove(path)
3340 3348 deleted = _deleted - deladded
3341 3349
3342 3350 # distinguish between file to forget and the other
3343 3351 added = set()
3344 3352 for abs in dsadded:
3345 3353 if repo.dirstate[abs] != b'a':
3346 3354 added.add(abs)
3347 3355 dsadded -= added
3348 3356
3349 3357 for abs in deladded:
3350 3358 if repo.dirstate[abs] == b'a':
3351 3359 dsadded.add(abs)
3352 3360 deladded -= dsadded
3353 3361
3354 3362 # For files marked as removed, we check if an unknown file is present at
3355 3363 # the same path. If a such file exists it may need to be backed up.
3356 3364 # Making the distinction at this stage helps have simpler backup
3357 3365 # logic.
3358 3366 removunk = set()
3359 3367 for abs in removed:
3360 3368 target = repo.wjoin(abs)
3361 3369 if os.path.lexists(target):
3362 3370 removunk.add(abs)
3363 3371 removed -= removunk
3364 3372
3365 3373 dsremovunk = set()
3366 3374 for abs in dsremoved:
3367 3375 target = repo.wjoin(abs)
3368 3376 if os.path.lexists(target):
3369 3377 dsremovunk.add(abs)
3370 3378 dsremoved -= dsremovunk
3371 3379
3372 3380 # action to be actually performed by revert
3373 3381 # (<list of file>, message>) tuple
3374 3382 actions = {
3375 3383 b'revert': ([], _(b'reverting %s\n')),
3376 3384 b'add': ([], _(b'adding %s\n')),
3377 3385 b'remove': ([], _(b'removing %s\n')),
3378 3386 b'drop': ([], _(b'removing %s\n')),
3379 3387 b'forget': ([], _(b'forgetting %s\n')),
3380 3388 b'undelete': ([], _(b'undeleting %s\n')),
3381 3389 b'noop': (None, _(b'no changes needed to %s\n')),
3382 3390 b'unknown': (None, _(b'file not managed: %s\n')),
3383 3391 }
3384 3392
3385 3393 # "constant" that convey the backup strategy.
3386 3394 # All set to `discard` if `no-backup` is set do avoid checking
3387 3395 # no_backup lower in the code.
3388 3396 # These values are ordered for comparison purposes
3389 3397 backupinteractive = 3 # do backup if interactively modified
3390 3398 backup = 2 # unconditionally do backup
3391 3399 check = 1 # check if the existing file differs from target
3392 3400 discard = 0 # never do backup
3393 3401 if opts.get(b'no_backup'):
3394 3402 backupinteractive = backup = check = discard
3395 3403 if interactive:
3396 3404 dsmodifiedbackup = backupinteractive
3397 3405 else:
3398 3406 dsmodifiedbackup = backup
3399 3407 tobackup = set()
3400 3408
3401 3409 backupanddel = actions[b'remove']
3402 3410 if not opts.get(b'no_backup'):
3403 3411 backupanddel = actions[b'drop']
3404 3412
3405 3413 disptable = (
3406 3414 # dispatch table:
3407 3415 # file state
3408 3416 # action
3409 3417 # make backup
3410 3418 ## Sets that results that will change file on disk
3411 3419 # Modified compared to target, no local change
3412 3420 (modified, actions[b'revert'], discard),
3413 3421 # Modified compared to target, but local file is deleted
3414 3422 (deleted, actions[b'revert'], discard),
3415 3423 # Modified compared to target, local change
3416 3424 (dsmodified, actions[b'revert'], dsmodifiedbackup),
3417 3425 # Added since target
3418 3426 (added, actions[b'remove'], discard),
3419 3427 # Added in working directory
3420 3428 (dsadded, actions[b'forget'], discard),
3421 3429 # Added since target, have local modification
3422 3430 (modadded, backupanddel, backup),
3423 3431 # Added since target but file is missing in working directory
3424 3432 (deladded, actions[b'drop'], discard),
3425 3433 # Removed since target, before working copy parent
3426 3434 (removed, actions[b'add'], discard),
3427 3435 # Same as `removed` but an unknown file exists at the same path
3428 3436 (removunk, actions[b'add'], check),
3429 3437 # Removed since targe, marked as such in working copy parent
3430 3438 (dsremoved, actions[b'undelete'], discard),
3431 3439 # Same as `dsremoved` but an unknown file exists at the same path
3432 3440 (dsremovunk, actions[b'undelete'], check),
3433 3441 ## the following sets does not result in any file changes
3434 3442 # File with no modification
3435 3443 (clean, actions[b'noop'], discard),
3436 3444 # Existing file, not tracked anywhere
3437 3445 (unknown, actions[b'unknown'], discard),
3438 3446 )
3439 3447
3440 3448 for abs, exact in sorted(names.items()):
3441 3449 # target file to be touch on disk (relative to cwd)
3442 3450 target = repo.wjoin(abs)
3443 3451 # search the entry in the dispatch table.
3444 3452 # if the file is in any of these sets, it was touched in the working
3445 3453 # directory parent and we are sure it needs to be reverted.
3446 3454 for table, (xlist, msg), dobackup in disptable:
3447 3455 if abs not in table:
3448 3456 continue
3449 3457 if xlist is not None:
3450 3458 xlist.append(abs)
3451 3459 if dobackup:
3452 3460 # If in interactive mode, don't automatically create
3453 3461 # .orig files (issue4793)
3454 3462 if dobackup == backupinteractive:
3455 3463 tobackup.add(abs)
3456 3464 elif backup <= dobackup or wctx[abs].cmp(ctx[abs]):
3457 3465 absbakname = scmutil.backuppath(ui, repo, abs)
3458 3466 bakname = os.path.relpath(
3459 3467 absbakname, start=repo.root
3460 3468 )
3461 3469 ui.note(
3462 3470 _(b'saving current version of %s as %s\n')
3463 3471 % (uipathfn(abs), uipathfn(bakname))
3464 3472 )
3465 3473 if not opts.get(b'dry_run'):
3466 3474 if interactive:
3467 3475 util.copyfile(target, absbakname)
3468 3476 else:
3469 3477 util.rename(target, absbakname)
3470 3478 if opts.get(b'dry_run'):
3471 3479 if ui.verbose or not exact:
3472 3480 ui.status(msg % uipathfn(abs))
3473 3481 elif exact:
3474 3482 ui.warn(msg % uipathfn(abs))
3475 3483 break
3476 3484
3477 3485 if not opts.get(b'dry_run'):
3478 3486 needdata = (b'revert', b'add', b'undelete')
3479 3487 oplist = [actions[name][0] for name in needdata]
3480 3488 prefetch = scmutil.prefetchfiles
3481 3489 matchfiles = scmutil.matchfiles(
3482 3490 repo, [f for sublist in oplist for f in sublist]
3483 3491 )
3484 3492 prefetch(
3485 3493 repo,
3486 3494 [(ctx.rev(), matchfiles)],
3487 3495 )
3488 3496 match = scmutil.match(repo[None], pats)
3489 3497 _performrevert(
3490 3498 repo,
3491 3499 ctx,
3492 3500 names,
3493 3501 uipathfn,
3494 3502 actions,
3495 3503 match,
3496 3504 interactive,
3497 3505 tobackup,
3498 3506 )
3499 3507
3500 3508 if targetsubs:
3501 3509 # Revert the subrepos on the revert list
3502 3510 for sub in targetsubs:
3503 3511 try:
3504 3512 wctx.sub(sub).revert(
3505 3513 ctx.substate[sub], *pats, **pycompat.strkwargs(opts)
3506 3514 )
3507 3515 except KeyError:
3508 3516 raise error.Abort(
3509 3517 b"subrepository '%s' does not exist in %s!"
3510 3518 % (sub, short(ctx.node()))
3511 3519 )
3512 3520
3513 3521
3514 3522 def _performrevert(
3515 3523 repo,
3516 3524 ctx,
3517 3525 names,
3518 3526 uipathfn,
3519 3527 actions,
3520 3528 match,
3521 3529 interactive=False,
3522 3530 tobackup=None,
3523 3531 ):
3524 3532 """function that actually perform all the actions computed for revert
3525 3533
3526 3534 This is an independent function to let extension to plug in and react to
3527 3535 the imminent revert.
3528 3536
3529 3537 Make sure you have the working directory locked when calling this function.
3530 3538 """
3531 3539 parent, p2 = repo.dirstate.parents()
3532 3540 node = ctx.node()
3533 3541 excluded_files = []
3534 3542
3535 3543 def checkout(f):
3536 3544 fc = ctx[f]
3537 3545 repo.wwrite(f, fc.data(), fc.flags())
3538 3546
3539 3547 def doremove(f):
3540 3548 try:
3541 3549 rmdir = repo.ui.configbool(b'experimental', b'removeemptydirs')
3542 3550 repo.wvfs.unlinkpath(f, rmdir=rmdir)
3543 3551 except OSError:
3544 3552 pass
3545 3553 repo.dirstate.remove(f)
3546 3554
3547 3555 def prntstatusmsg(action, f):
3548 3556 exact = names[f]
3549 3557 if repo.ui.verbose or not exact:
3550 3558 repo.ui.status(actions[action][1] % uipathfn(f))
3551 3559
3552 3560 audit_path = pathutil.pathauditor(repo.root, cached=True)
3553 3561 for f in actions[b'forget'][0]:
3554 3562 if interactive:
3555 3563 choice = repo.ui.promptchoice(
3556 3564 _(b"forget added file %s (Yn)?$$ &Yes $$ &No") % uipathfn(f)
3557 3565 )
3558 3566 if choice == 0:
3559 3567 prntstatusmsg(b'forget', f)
3560 3568 repo.dirstate.drop(f)
3561 3569 else:
3562 3570 excluded_files.append(f)
3563 3571 else:
3564 3572 prntstatusmsg(b'forget', f)
3565 3573 repo.dirstate.drop(f)
3566 3574 for f in actions[b'remove'][0]:
3567 3575 audit_path(f)
3568 3576 if interactive:
3569 3577 choice = repo.ui.promptchoice(
3570 3578 _(b"remove added file %s (Yn)?$$ &Yes $$ &No") % uipathfn(f)
3571 3579 )
3572 3580 if choice == 0:
3573 3581 prntstatusmsg(b'remove', f)
3574 3582 doremove(f)
3575 3583 else:
3576 3584 excluded_files.append(f)
3577 3585 else:
3578 3586 prntstatusmsg(b'remove', f)
3579 3587 doremove(f)
3580 3588 for f in actions[b'drop'][0]:
3581 3589 audit_path(f)
3582 3590 prntstatusmsg(b'drop', f)
3583 3591 repo.dirstate.remove(f)
3584 3592
3585 3593 normal = None
3586 3594 if node == parent:
3587 3595 # We're reverting to our parent. If possible, we'd like status
3588 3596 # to report the file as clean. We have to use normallookup for
3589 3597 # merges to avoid losing information about merged/dirty files.
3590 3598 if p2 != repo.nullid:
3591 3599 normal = repo.dirstate.normallookup
3592 3600 else:
3593 3601 normal = repo.dirstate.normal
3594 3602
3595 3603 newlyaddedandmodifiedfiles = set()
3596 3604 if interactive:
3597 3605 # Prompt the user for changes to revert
3598 3606 torevert = [f for f in actions[b'revert'][0] if f not in excluded_files]
3599 3607 m = scmutil.matchfiles(repo, torevert)
3600 3608 diffopts = patch.difffeatureopts(
3601 3609 repo.ui,
3602 3610 whitespace=True,
3603 3611 section=b'commands',
3604 3612 configprefix=b'revert.interactive.',
3605 3613 )
3606 3614 diffopts.nodates = True
3607 3615 diffopts.git = True
3608 3616 operation = b'apply'
3609 3617 if node == parent:
3610 3618 if repo.ui.configbool(
3611 3619 b'experimental', b'revert.interactive.select-to-keep'
3612 3620 ):
3613 3621 operation = b'keep'
3614 3622 else:
3615 3623 operation = b'discard'
3616 3624
3617 3625 if operation == b'apply':
3618 3626 diff = patch.diff(repo, None, ctx.node(), m, opts=diffopts)
3619 3627 else:
3620 3628 diff = patch.diff(repo, ctx.node(), None, m, opts=diffopts)
3621 3629 originalchunks = patch.parsepatch(diff)
3622 3630
3623 3631 try:
3624 3632
3625 3633 chunks, opts = recordfilter(
3626 3634 repo.ui, originalchunks, match, operation=operation
3627 3635 )
3628 3636 if operation == b'discard':
3629 3637 chunks = patch.reversehunks(chunks)
3630 3638
3631 3639 except error.PatchError as err:
3632 3640 raise error.Abort(_(b'error parsing patch: %s') % err)
3633 3641
3634 3642 # FIXME: when doing an interactive revert of a copy, there's no way of
3635 3643 # performing a partial revert of the added file, the only option is
3636 3644 # "remove added file <name> (Yn)?", so we don't need to worry about the
3637 3645 # alsorestore value. Ideally we'd be able to partially revert
3638 3646 # copied/renamed files.
3639 3647 newlyaddedandmodifiedfiles, unusedalsorestore = newandmodified(
3640 3648 chunks, originalchunks
3641 3649 )
3642 3650 if tobackup is None:
3643 3651 tobackup = set()
3644 3652 # Apply changes
3645 3653 fp = stringio()
3646 3654 # chunks are serialized per file, but files aren't sorted
3647 3655 for f in sorted({c.header.filename() for c in chunks if ishunk(c)}):
3648 3656 prntstatusmsg(b'revert', f)
3649 3657 files = set()
3650 3658 for c in chunks:
3651 3659 if ishunk(c):
3652 3660 abs = c.header.filename()
3653 3661 # Create a backup file only if this hunk should be backed up
3654 3662 if c.header.filename() in tobackup:
3655 3663 target = repo.wjoin(abs)
3656 3664 bakname = scmutil.backuppath(repo.ui, repo, abs)
3657 3665 util.copyfile(target, bakname)
3658 3666 tobackup.remove(abs)
3659 3667 if abs not in files:
3660 3668 files.add(abs)
3661 3669 if operation == b'keep':
3662 3670 checkout(abs)
3663 3671 c.write(fp)
3664 3672 dopatch = fp.tell()
3665 3673 fp.seek(0)
3666 3674 if dopatch:
3667 3675 try:
3668 3676 patch.internalpatch(repo.ui, repo, fp, 1, eolmode=None)
3669 3677 except error.PatchError as err:
3670 3678 raise error.Abort(pycompat.bytestr(err))
3671 3679 del fp
3672 3680 else:
3673 3681 for f in actions[b'revert'][0]:
3674 3682 prntstatusmsg(b'revert', f)
3675 3683 checkout(f)
3676 3684 if normal:
3677 3685 normal(f)
3678 3686
3679 3687 for f in actions[b'add'][0]:
3680 3688 # Don't checkout modified files, they are already created by the diff
3681 3689 if f not in newlyaddedandmodifiedfiles:
3682 3690 prntstatusmsg(b'add', f)
3683 3691 checkout(f)
3684 3692 repo.dirstate.add(f)
3685 3693
3686 3694 normal = repo.dirstate.normallookup
3687 3695 if node == parent and p2 == repo.nullid:
3688 3696 normal = repo.dirstate.normal
3689 3697 for f in actions[b'undelete'][0]:
3690 3698 if interactive:
3691 3699 choice = repo.ui.promptchoice(
3692 3700 _(b"add back removed file %s (Yn)?$$ &Yes $$ &No") % f
3693 3701 )
3694 3702 if choice == 0:
3695 3703 prntstatusmsg(b'undelete', f)
3696 3704 checkout(f)
3697 3705 normal(f)
3698 3706 else:
3699 3707 excluded_files.append(f)
3700 3708 else:
3701 3709 prntstatusmsg(b'undelete', f)
3702 3710 checkout(f)
3703 3711 normal(f)
3704 3712
3705 3713 copied = copies.pathcopies(repo[parent], ctx)
3706 3714
3707 3715 for f in (
3708 3716 actions[b'add'][0] + actions[b'undelete'][0] + actions[b'revert'][0]
3709 3717 ):
3710 3718 if f in copied:
3711 3719 repo.dirstate.copy(copied[f], f)
3712 3720
3713 3721
3714 3722 # a list of (ui, repo, otherpeer, opts, missing) functions called by
3715 3723 # commands.outgoing. "missing" is "missing" of the result of
3716 3724 # "findcommonoutgoing()"
3717 3725 outgoinghooks = util.hooks()
3718 3726
3719 3727 # a list of (ui, repo) functions called by commands.summary
3720 3728 summaryhooks = util.hooks()
3721 3729
3722 3730 # a list of (ui, repo, opts, changes) functions called by commands.summary.
3723 3731 #
3724 3732 # functions should return tuple of booleans below, if 'changes' is None:
3725 3733 # (whether-incomings-are-needed, whether-outgoings-are-needed)
3726 3734 #
3727 3735 # otherwise, 'changes' is a tuple of tuples below:
3728 3736 # - (sourceurl, sourcebranch, sourcepeer, incoming)
3729 3737 # - (desturl, destbranch, destpeer, outgoing)
3730 3738 summaryremotehooks = util.hooks()
3731 3739
3732 3740
3733 3741 def checkunfinished(repo, commit=False, skipmerge=False):
3734 3742 """Look for an unfinished multistep operation, like graft, and abort
3735 3743 if found. It's probably good to check this right before
3736 3744 bailifchanged().
3737 3745 """
3738 3746 # Check for non-clearable states first, so things like rebase will take
3739 3747 # precedence over update.
3740 3748 for state in statemod._unfinishedstates:
3741 3749 if (
3742 3750 state._clearable
3743 3751 or (commit and state._allowcommit)
3744 3752 or state._reportonly
3745 3753 ):
3746 3754 continue
3747 3755 if state.isunfinished(repo):
3748 3756 raise error.StateError(state.msg(), hint=state.hint())
3749 3757
3750 3758 for s in statemod._unfinishedstates:
3751 3759 if (
3752 3760 not s._clearable
3753 3761 or (commit and s._allowcommit)
3754 3762 or (s._opname == b'merge' and skipmerge)
3755 3763 or s._reportonly
3756 3764 ):
3757 3765 continue
3758 3766 if s.isunfinished(repo):
3759 3767 raise error.StateError(s.msg(), hint=s.hint())
3760 3768
3761 3769
3762 3770 def clearunfinished(repo):
3763 3771 """Check for unfinished operations (as above), and clear the ones
3764 3772 that are clearable.
3765 3773 """
3766 3774 for state in statemod._unfinishedstates:
3767 3775 if state._reportonly:
3768 3776 continue
3769 3777 if not state._clearable and state.isunfinished(repo):
3770 3778 raise error.StateError(state.msg(), hint=state.hint())
3771 3779
3772 3780 for s in statemod._unfinishedstates:
3773 3781 if s._opname == b'merge' or s._reportonly:
3774 3782 continue
3775 3783 if s._clearable and s.isunfinished(repo):
3776 3784 util.unlink(repo.vfs.join(s._fname))
3777 3785
3778 3786
3779 3787 def getunfinishedstate(repo):
3780 3788 """Checks for unfinished operations and returns statecheck object
3781 3789 for it"""
3782 3790 for state in statemod._unfinishedstates:
3783 3791 if state.isunfinished(repo):
3784 3792 return state
3785 3793 return None
3786 3794
3787 3795
3788 3796 def howtocontinue(repo):
3789 3797 """Check for an unfinished operation and return the command to finish
3790 3798 it.
3791 3799
3792 3800 statemod._unfinishedstates list is checked for an unfinished operation
3793 3801 and the corresponding message to finish it is generated if a method to
3794 3802 continue is supported by the operation.
3795 3803
3796 3804 Returns a (msg, warning) tuple. 'msg' is a string and 'warning' is
3797 3805 a boolean.
3798 3806 """
3799 3807 contmsg = _(b"continue: %s")
3800 3808 for state in statemod._unfinishedstates:
3801 3809 if not state._continueflag:
3802 3810 continue
3803 3811 if state.isunfinished(repo):
3804 3812 return contmsg % state.continuemsg(), True
3805 3813 if repo[None].dirty(missing=True, merge=False, branch=False):
3806 3814 return contmsg % _(b"hg commit"), False
3807 3815 return None, None
3808 3816
3809 3817
3810 3818 def checkafterresolved(repo):
3811 3819 """Inform the user about the next action after completing hg resolve
3812 3820
3813 3821 If there's a an unfinished operation that supports continue flag,
3814 3822 howtocontinue will yield repo.ui.warn as the reporter.
3815 3823
3816 3824 Otherwise, it will yield repo.ui.note.
3817 3825 """
3818 3826 msg, warning = howtocontinue(repo)
3819 3827 if msg is not None:
3820 3828 if warning:
3821 3829 repo.ui.warn(b"%s\n" % msg)
3822 3830 else:
3823 3831 repo.ui.note(b"%s\n" % msg)
3824 3832
3825 3833
3826 3834 def wrongtooltocontinue(repo, task):
3827 3835 """Raise an abort suggesting how to properly continue if there is an
3828 3836 active task.
3829 3837
3830 3838 Uses howtocontinue() to find the active task.
3831 3839
3832 3840 If there's no task (repo.ui.note for 'hg commit'), it does not offer
3833 3841 a hint.
3834 3842 """
3835 3843 after = howtocontinue(repo)
3836 3844 hint = None
3837 3845 if after[1]:
3838 3846 hint = after[0]
3839 3847 raise error.StateError(_(b'no %s in progress') % task, hint=hint)
3840 3848
3841 3849
3842 3850 def abortgraft(ui, repo, graftstate):
3843 3851 """abort the interrupted graft and rollbacks to the state before interrupted
3844 3852 graft"""
3845 3853 if not graftstate.exists():
3846 3854 raise error.StateError(_(b"no interrupted graft to abort"))
3847 3855 statedata = readgraftstate(repo, graftstate)
3848 3856 newnodes = statedata.get(b'newnodes')
3849 3857 if newnodes is None:
3850 3858 # and old graft state which does not have all the data required to abort
3851 3859 # the graft
3852 3860 raise error.Abort(_(b"cannot abort using an old graftstate"))
3853 3861
3854 3862 # changeset from which graft operation was started
3855 3863 if len(newnodes) > 0:
3856 3864 startctx = repo[newnodes[0]].p1()
3857 3865 else:
3858 3866 startctx = repo[b'.']
3859 3867 # whether to strip or not
3860 3868 cleanup = False
3861 3869
3862 3870 if newnodes:
3863 3871 newnodes = [repo[r].rev() for r in newnodes]
3864 3872 cleanup = True
3865 3873 # checking that none of the newnodes turned public or is public
3866 3874 immutable = [c for c in newnodes if not repo[c].mutable()]
3867 3875 if immutable:
3868 3876 repo.ui.warn(
3869 3877 _(b"cannot clean up public changesets %s\n")
3870 3878 % b', '.join(bytes(repo[r]) for r in immutable),
3871 3879 hint=_(b"see 'hg help phases' for details"),
3872 3880 )
3873 3881 cleanup = False
3874 3882
3875 3883 # checking that no new nodes are created on top of grafted revs
3876 3884 desc = set(repo.changelog.descendants(newnodes))
3877 3885 if desc - set(newnodes):
3878 3886 repo.ui.warn(
3879 3887 _(
3880 3888 b"new changesets detected on destination "
3881 3889 b"branch, can't strip\n"
3882 3890 )
3883 3891 )
3884 3892 cleanup = False
3885 3893
3886 3894 if cleanup:
3887 3895 with repo.wlock(), repo.lock():
3888 3896 mergemod.clean_update(startctx)
3889 3897 # stripping the new nodes created
3890 3898 strippoints = [
3891 3899 c.node() for c in repo.set(b"roots(%ld)", newnodes)
3892 3900 ]
3893 3901 repair.strip(repo.ui, repo, strippoints, backup=False)
3894 3902
3895 3903 if not cleanup:
3896 3904 # we don't update to the startnode if we can't strip
3897 3905 startctx = repo[b'.']
3898 3906 mergemod.clean_update(startctx)
3899 3907
3900 3908 ui.status(_(b"graft aborted\n"))
3901 3909 ui.status(_(b"working directory is now at %s\n") % startctx.hex()[:12])
3902 3910 graftstate.delete()
3903 3911 return 0
3904 3912
3905 3913
3906 3914 def readgraftstate(repo, graftstate):
3907 3915 # type: (Any, statemod.cmdstate) -> Dict[bytes, Any]
3908 3916 """read the graft state file and return a dict of the data stored in it"""
3909 3917 try:
3910 3918 return graftstate.read()
3911 3919 except error.CorruptedState:
3912 3920 nodes = repo.vfs.read(b'graftstate').splitlines()
3913 3921 return {b'nodes': nodes}
3914 3922
3915 3923
3916 3924 def hgabortgraft(ui, repo):
3917 3925 """ abort logic for aborting graft using 'hg abort'"""
3918 3926 with repo.wlock():
3919 3927 graftstate = statemod.cmdstate(repo, b'graftstate')
3920 3928 return abortgraft(ui, repo, graftstate)
@@ -1,292 +1,299 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import nullrev
12 12 from . import (
13 13 error,
14 14 revlog,
15 15 )
16 16 from .interfaces import (
17 17 repository,
18 18 util as interfaceutil,
19 19 )
20 20 from .utils import storageutil
21 from .revlogutils import (
22 constants as revlog_constants,
23 )
21 24
22 25
23 26 @interfaceutil.implementer(repository.ifilestorage)
24 27 class filelog(object):
25 28 def __init__(self, opener, path):
26 29 self._revlog = revlog.revlog(
27 opener, b'/'.join((b'data', path + b'.i')), censorable=True
30 opener,
31 # XXX should use the unencoded path
32 target=(revlog_constants.KIND_FILELOG, path),
33 indexfile=b'/'.join((b'data', path + b'.i')),
34 censorable=True,
28 35 )
29 36 # Full name of the user visible file, relative to the repository root.
30 37 # Used by LFS.
31 38 self._revlog.filename = path
32 39 self._revlog.revlog_kind = b'filelog'
33 40 self.nullid = self._revlog.nullid
34 41
35 42 def __len__(self):
36 43 return len(self._revlog)
37 44
38 45 def __iter__(self):
39 46 return self._revlog.__iter__()
40 47
41 48 def hasnode(self, node):
42 49 if node in (self.nullid, nullrev):
43 50 return False
44 51
45 52 try:
46 53 self._revlog.rev(node)
47 54 return True
48 55 except (TypeError, ValueError, IndexError, error.LookupError):
49 56 return False
50 57
51 58 def revs(self, start=0, stop=None):
52 59 return self._revlog.revs(start=start, stop=stop)
53 60
54 61 def parents(self, node):
55 62 return self._revlog.parents(node)
56 63
57 64 def parentrevs(self, rev):
58 65 return self._revlog.parentrevs(rev)
59 66
60 67 def rev(self, node):
61 68 return self._revlog.rev(node)
62 69
63 70 def node(self, rev):
64 71 return self._revlog.node(rev)
65 72
66 73 def lookup(self, node):
67 74 return storageutil.fileidlookup(
68 75 self._revlog, node, self._revlog.indexfile
69 76 )
70 77
71 78 def linkrev(self, rev):
72 79 return self._revlog.linkrev(rev)
73 80
74 81 def commonancestorsheads(self, node1, node2):
75 82 return self._revlog.commonancestorsheads(node1, node2)
76 83
77 84 # Used by dagop.blockdescendants().
78 85 def descendants(self, revs):
79 86 return self._revlog.descendants(revs)
80 87
81 88 def heads(self, start=None, stop=None):
82 89 return self._revlog.heads(start, stop)
83 90
84 91 # Used by hgweb, children extension.
85 92 def children(self, node):
86 93 return self._revlog.children(node)
87 94
88 95 def iscensored(self, rev):
89 96 return self._revlog.iscensored(rev)
90 97
91 98 def revision(self, node, _df=None, raw=False):
92 99 return self._revlog.revision(node, _df=_df, raw=raw)
93 100
94 101 def rawdata(self, node, _df=None):
95 102 return self._revlog.rawdata(node, _df=_df)
96 103
97 104 def emitrevisions(
98 105 self,
99 106 nodes,
100 107 nodesorder=None,
101 108 revisiondata=False,
102 109 assumehaveparentrevisions=False,
103 110 deltamode=repository.CG_DELTAMODE_STD,
104 111 sidedata_helpers=None,
105 112 ):
106 113 return self._revlog.emitrevisions(
107 114 nodes,
108 115 nodesorder=nodesorder,
109 116 revisiondata=revisiondata,
110 117 assumehaveparentrevisions=assumehaveparentrevisions,
111 118 deltamode=deltamode,
112 119 sidedata_helpers=sidedata_helpers,
113 120 )
114 121
115 122 def addrevision(
116 123 self,
117 124 revisiondata,
118 125 transaction,
119 126 linkrev,
120 127 p1,
121 128 p2,
122 129 node=None,
123 130 flags=revlog.REVIDX_DEFAULT_FLAGS,
124 131 cachedelta=None,
125 132 ):
126 133 return self._revlog.addrevision(
127 134 revisiondata,
128 135 transaction,
129 136 linkrev,
130 137 p1,
131 138 p2,
132 139 node=node,
133 140 flags=flags,
134 141 cachedelta=cachedelta,
135 142 )
136 143
137 144 def addgroup(
138 145 self,
139 146 deltas,
140 147 linkmapper,
141 148 transaction,
142 149 addrevisioncb=None,
143 150 duplicaterevisioncb=None,
144 151 maybemissingparents=False,
145 152 ):
146 153 if maybemissingparents:
147 154 raise error.Abort(
148 155 _(
149 156 b'revlog storage does not support missing '
150 157 b'parents write mode'
151 158 )
152 159 )
153 160
154 161 return self._revlog.addgroup(
155 162 deltas,
156 163 linkmapper,
157 164 transaction,
158 165 addrevisioncb=addrevisioncb,
159 166 duplicaterevisioncb=duplicaterevisioncb,
160 167 )
161 168
162 169 def getstrippoint(self, minlink):
163 170 return self._revlog.getstrippoint(minlink)
164 171
165 172 def strip(self, minlink, transaction):
166 173 return self._revlog.strip(minlink, transaction)
167 174
168 175 def censorrevision(self, tr, node, tombstone=b''):
169 176 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
170 177
171 178 def files(self):
172 179 return self._revlog.files()
173 180
174 181 def read(self, node):
175 182 return storageutil.filtermetadata(self.revision(node))
176 183
177 184 def add(self, text, meta, transaction, link, p1=None, p2=None):
178 185 if meta or text.startswith(b'\1\n'):
179 186 text = storageutil.packmeta(meta, text)
180 187 rev = self.addrevision(text, transaction, link, p1, p2)
181 188 return self.node(rev)
182 189
183 190 def renamed(self, node):
184 191 return storageutil.filerevisioncopied(self, node)
185 192
186 193 def size(self, rev):
187 194 """return the size of a given revision"""
188 195
189 196 # for revisions with renames, we have to go the slow way
190 197 node = self.node(rev)
191 198 if self.renamed(node):
192 199 return len(self.read(node))
193 200 if self.iscensored(rev):
194 201 return 0
195 202
196 203 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
197 204 return self._revlog.size(rev)
198 205
199 206 def cmp(self, node, text):
200 207 """compare text with a given file revision
201 208
202 209 returns True if text is different than what is stored.
203 210 """
204 211 return not storageutil.filedataequivalent(self, node, text)
205 212
206 213 def verifyintegrity(self, state):
207 214 return self._revlog.verifyintegrity(state)
208 215
209 216 def storageinfo(
210 217 self,
211 218 exclusivefiles=False,
212 219 sharedfiles=False,
213 220 revisionscount=False,
214 221 trackedsize=False,
215 222 storedsize=False,
216 223 ):
217 224 return self._revlog.storageinfo(
218 225 exclusivefiles=exclusivefiles,
219 226 sharedfiles=sharedfiles,
220 227 revisionscount=revisionscount,
221 228 trackedsize=trackedsize,
222 229 storedsize=storedsize,
223 230 )
224 231
225 232 # TODO these aren't part of the interface and aren't internal methods.
226 233 # Callers should be fixed to not use them.
227 234
228 235 # Used by bundlefilelog, unionfilelog.
229 236 @property
230 237 def indexfile(self):
231 238 return self._revlog.indexfile
232 239
233 240 @indexfile.setter
234 241 def indexfile(self, value):
235 242 self._revlog.indexfile = value
236 243
237 244 # Used by repo upgrade.
238 245 def clone(self, tr, destrevlog, **kwargs):
239 246 if not isinstance(destrevlog, filelog):
240 247 raise error.ProgrammingError(b'expected filelog to clone()')
241 248
242 249 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
243 250
244 251
245 252 class narrowfilelog(filelog):
246 253 """Filelog variation to be used with narrow stores."""
247 254
248 255 def __init__(self, opener, path, narrowmatch):
249 256 super(narrowfilelog, self).__init__(opener, path)
250 257 self._narrowmatch = narrowmatch
251 258
252 259 def renamed(self, node):
253 260 res = super(narrowfilelog, self).renamed(node)
254 261
255 262 # Renames that come from outside the narrowspec are problematic
256 263 # because we may lack the base text for the rename. This can result
257 264 # in code attempting to walk the ancestry or compute a diff
258 265 # encountering a missing revision. We address this by silently
259 266 # removing rename metadata if the source file is outside the
260 267 # narrow spec.
261 268 #
262 269 # A better solution would be to see if the base revision is available,
263 270 # rather than assuming it isn't.
264 271 #
265 272 # An even better solution would be to teach all consumers of rename
266 273 # metadata that the base revision may not be available.
267 274 #
268 275 # TODO consider better ways of doing this.
269 276 if res and not self._narrowmatch(res[0]):
270 277 return None
271 278
272 279 return res
273 280
274 281 def size(self, rev):
275 282 # Because we have a custom renamed() that may lie, we need to call
276 283 # the base renamed() to report accurate results.
277 284 node = self.node(rev)
278 285 if super(narrowfilelog, self).renamed(node):
279 286 return len(self.read(node))
280 287 else:
281 288 return super(narrowfilelog, self).size(rev)
282 289
283 290 def cmp(self, node, text):
284 291 # We don't call `super` because narrow parents can be buggy in case of a
285 292 # ambiguous dirstate. Always take the slow path until there is a better
286 293 # fix, see issue6150.
287 294
288 295 # Censored files compare against the empty file.
289 296 if self.iscensored(self.rev(node)):
290 297 return text != b''
291 298
292 299 return self.read(node) != text
@@ -1,2382 +1,2386 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullrev,
20 20 )
21 21 from .pycompat import getattr
22 22 from . import (
23 23 encoding,
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 pathutil,
28 28 policy,
29 29 pycompat,
30 30 revlog,
31 31 util,
32 32 )
33 33 from .interfaces import (
34 34 repository,
35 35 util as interfaceutil,
36 36 )
37 from .revlogutils import (
38 constants as revlog_constants,
39 )
37 40
38 41 parsers = policy.importmod('parsers')
39 42 propertycache = util.propertycache
40 43
41 44 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
42 45 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
43 46
44 47
45 48 def _parse(nodelen, data):
46 49 # This method does a little bit of excessive-looking
47 50 # precondition checking. This is so that the behavior of this
48 51 # class exactly matches its C counterpart to try and help
49 52 # prevent surprise breakage for anyone that develops against
50 53 # the pure version.
51 54 if data and data[-1:] != b'\n':
52 55 raise ValueError(b'Manifest did not end in a newline.')
53 56 prev = None
54 57 for l in data.splitlines():
55 58 if prev is not None and prev > l:
56 59 raise ValueError(b'Manifest lines not in sorted order.')
57 60 prev = l
58 61 f, n = l.split(b'\0')
59 62 nl = len(n)
60 63 flags = n[-1:]
61 64 if flags in _manifestflags:
62 65 n = n[:-1]
63 66 nl -= 1
64 67 else:
65 68 flags = b''
66 69 if nl != 2 * nodelen:
67 70 raise ValueError(b'Invalid manifest line')
68 71
69 72 yield f, bin(n), flags
70 73
71 74
72 75 def _text(it):
73 76 files = []
74 77 lines = []
75 78 for f, n, fl in it:
76 79 files.append(f)
77 80 # if this is changed to support newlines in filenames,
78 81 # be sure to check the templates/ dir again (especially *-raw.tmpl)
79 82 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
80 83
81 84 _checkforbidden(files)
82 85 return b''.join(lines)
83 86
84 87
85 88 class lazymanifestiter(object):
86 89 def __init__(self, lm):
87 90 self.pos = 0
88 91 self.lm = lm
89 92
90 93 def __iter__(self):
91 94 return self
92 95
93 96 def next(self):
94 97 try:
95 98 data, pos = self.lm._get(self.pos)
96 99 except IndexError:
97 100 raise StopIteration
98 101 if pos == -1:
99 102 self.pos += 1
100 103 return data[0]
101 104 self.pos += 1
102 105 zeropos = data.find(b'\x00', pos)
103 106 return data[pos:zeropos]
104 107
105 108 __next__ = next
106 109
107 110
108 111 class lazymanifestiterentries(object):
109 112 def __init__(self, lm):
110 113 self.lm = lm
111 114 self.pos = 0
112 115
113 116 def __iter__(self):
114 117 return self
115 118
116 119 def next(self):
117 120 try:
118 121 data, pos = self.lm._get(self.pos)
119 122 except IndexError:
120 123 raise StopIteration
121 124 if pos == -1:
122 125 self.pos += 1
123 126 return data
124 127 zeropos = data.find(b'\x00', pos)
125 128 nlpos = data.find(b'\n', pos)
126 129 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
127 130 raise error.StorageError(b'Invalid manifest line')
128 131 flags = data[nlpos - 1 : nlpos]
129 132 if flags in _manifestflags:
130 133 hlen = nlpos - zeropos - 2
131 134 else:
132 135 hlen = nlpos - zeropos - 1
133 136 flags = b''
134 137 if hlen != 2 * self.lm._nodelen:
135 138 raise error.StorageError(b'Invalid manifest line')
136 139 hashval = unhexlify(
137 140 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
138 141 )
139 142 self.pos += 1
140 143 return (data[pos:zeropos], hashval, flags)
141 144
142 145 __next__ = next
143 146
144 147
145 148 def unhexlify(data, extra, pos, length):
146 149 s = bin(data[pos : pos + length])
147 150 if extra:
148 151 s += chr(extra & 0xFF)
149 152 return s
150 153
151 154
152 155 def _cmp(a, b):
153 156 return (a > b) - (a < b)
154 157
155 158
156 159 _manifestflags = {b'', b'l', b't', b'x'}
157 160
158 161
159 162 class _lazymanifest(object):
160 163 """A pure python manifest backed by a byte string. It is supplimented with
161 164 internal lists as it is modified, until it is compacted back to a pure byte
162 165 string.
163 166
164 167 ``data`` is the initial manifest data.
165 168
166 169 ``positions`` is a list of offsets, one per manifest entry. Positive
167 170 values are offsets into ``data``, negative values are offsets into the
168 171 ``extradata`` list. When an entry is removed, its entry is dropped from
169 172 ``positions``. The values are encoded such that when walking the list and
170 173 indexing into ``data`` or ``extradata`` as appropriate, the entries are
171 174 sorted by filename.
172 175
173 176 ``extradata`` is a list of (key, hash, flags) for entries that were added or
174 177 modified since the manifest was created or compacted.
175 178 """
176 179
177 180 def __init__(
178 181 self,
179 182 nodelen,
180 183 data,
181 184 positions=None,
182 185 extrainfo=None,
183 186 extradata=None,
184 187 hasremovals=False,
185 188 ):
186 189 self._nodelen = nodelen
187 190 if positions is None:
188 191 self.positions = self.findlines(data)
189 192 self.extrainfo = [0] * len(self.positions)
190 193 self.data = data
191 194 self.extradata = []
192 195 self.hasremovals = False
193 196 else:
194 197 self.positions = positions[:]
195 198 self.extrainfo = extrainfo[:]
196 199 self.extradata = extradata[:]
197 200 self.data = data
198 201 self.hasremovals = hasremovals
199 202
200 203 def findlines(self, data):
201 204 if not data:
202 205 return []
203 206 pos = data.find(b"\n")
204 207 if pos == -1 or data[-1:] != b'\n':
205 208 raise ValueError(b"Manifest did not end in a newline.")
206 209 positions = [0]
207 210 prev = data[: data.find(b'\x00')]
208 211 while pos < len(data) - 1 and pos != -1:
209 212 positions.append(pos + 1)
210 213 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
211 214 if nexts < prev:
212 215 raise ValueError(b"Manifest lines not in sorted order.")
213 216 prev = nexts
214 217 pos = data.find(b"\n", pos + 1)
215 218 return positions
216 219
217 220 def _get(self, index):
218 221 # get the position encoded in pos:
219 222 # positive number is an index in 'data'
220 223 # negative number is in extrapieces
221 224 pos = self.positions[index]
222 225 if pos >= 0:
223 226 return self.data, pos
224 227 return self.extradata[-pos - 1], -1
225 228
226 229 def _getkey(self, pos):
227 230 if pos >= 0:
228 231 return self.data[pos : self.data.find(b'\x00', pos + 1)]
229 232 return self.extradata[-pos - 1][0]
230 233
231 234 def bsearch(self, key):
232 235 first = 0
233 236 last = len(self.positions) - 1
234 237
235 238 while first <= last:
236 239 midpoint = (first + last) // 2
237 240 nextpos = self.positions[midpoint]
238 241 candidate = self._getkey(nextpos)
239 242 r = _cmp(key, candidate)
240 243 if r == 0:
241 244 return midpoint
242 245 else:
243 246 if r < 0:
244 247 last = midpoint - 1
245 248 else:
246 249 first = midpoint + 1
247 250 return -1
248 251
249 252 def bsearch2(self, key):
250 253 # same as the above, but will always return the position
251 254 # done for performance reasons
252 255 first = 0
253 256 last = len(self.positions) - 1
254 257
255 258 while first <= last:
256 259 midpoint = (first + last) // 2
257 260 nextpos = self.positions[midpoint]
258 261 candidate = self._getkey(nextpos)
259 262 r = _cmp(key, candidate)
260 263 if r == 0:
261 264 return (midpoint, True)
262 265 else:
263 266 if r < 0:
264 267 last = midpoint - 1
265 268 else:
266 269 first = midpoint + 1
267 270 return (first, False)
268 271
269 272 def __contains__(self, key):
270 273 return self.bsearch(key) != -1
271 274
272 275 def __getitem__(self, key):
273 276 if not isinstance(key, bytes):
274 277 raise TypeError(b"getitem: manifest keys must be a bytes.")
275 278 needle = self.bsearch(key)
276 279 if needle == -1:
277 280 raise KeyError
278 281 data, pos = self._get(needle)
279 282 if pos == -1:
280 283 return (data[1], data[2])
281 284 zeropos = data.find(b'\x00', pos)
282 285 nlpos = data.find(b'\n', zeropos)
283 286 assert 0 <= needle <= len(self.positions)
284 287 assert len(self.extrainfo) == len(self.positions)
285 288 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
286 289 raise error.StorageError(b'Invalid manifest line')
287 290 hlen = nlpos - zeropos - 1
288 291 flags = data[nlpos - 1 : nlpos]
289 292 if flags in _manifestflags:
290 293 hlen -= 1
291 294 else:
292 295 flags = b''
293 296 if hlen != 2 * self._nodelen:
294 297 raise error.StorageError(b'Invalid manifest line')
295 298 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
296 299 return (hashval, flags)
297 300
298 301 def __delitem__(self, key):
299 302 needle, found = self.bsearch2(key)
300 303 if not found:
301 304 raise KeyError
302 305 cur = self.positions[needle]
303 306 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
304 307 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
305 308 if cur >= 0:
306 309 # This does NOT unsort the list as far as the search functions are
307 310 # concerned, as they only examine lines mapped by self.positions.
308 311 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
309 312 self.hasremovals = True
310 313
311 314 def __setitem__(self, key, value):
312 315 if not isinstance(key, bytes):
313 316 raise TypeError(b"setitem: manifest keys must be a byte string.")
314 317 if not isinstance(value, tuple) or len(value) != 2:
315 318 raise TypeError(
316 319 b"Manifest values must be a tuple of (node, flags)."
317 320 )
318 321 hashval = value[0]
319 322 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
320 323 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
321 324 flags = value[1]
322 325 if not isinstance(flags, bytes) or len(flags) > 1:
323 326 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
324 327 needle, found = self.bsearch2(key)
325 328 if found:
326 329 # put the item
327 330 pos = self.positions[needle]
328 331 if pos < 0:
329 332 self.extradata[-pos - 1] = (key, hashval, value[1])
330 333 else:
331 334 # just don't bother
332 335 self.extradata.append((key, hashval, value[1]))
333 336 self.positions[needle] = -len(self.extradata)
334 337 else:
335 338 # not found, put it in with extra positions
336 339 self.extradata.append((key, hashval, value[1]))
337 340 self.positions = (
338 341 self.positions[:needle]
339 342 + [-len(self.extradata)]
340 343 + self.positions[needle:]
341 344 )
342 345 self.extrainfo = (
343 346 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
344 347 )
345 348
346 349 def copy(self):
347 350 # XXX call _compact like in C?
348 351 return _lazymanifest(
349 352 self._nodelen,
350 353 self.data,
351 354 self.positions,
352 355 self.extrainfo,
353 356 self.extradata,
354 357 self.hasremovals,
355 358 )
356 359
357 360 def _compact(self):
358 361 # hopefully not called TOO often
359 362 if len(self.extradata) == 0 and not self.hasremovals:
360 363 return
361 364 l = []
362 365 i = 0
363 366 offset = 0
364 367 self.extrainfo = [0] * len(self.positions)
365 368 while i < len(self.positions):
366 369 if self.positions[i] >= 0:
367 370 cur = self.positions[i]
368 371 last_cut = cur
369 372
370 373 # Collect all contiguous entries in the buffer at the current
371 374 # offset, breaking out only for added/modified items held in
372 375 # extradata, or a deleted line prior to the next position.
373 376 while True:
374 377 self.positions[i] = offset
375 378 i += 1
376 379 if i == len(self.positions) or self.positions[i] < 0:
377 380 break
378 381
379 382 # A removed file has no positions[] entry, but does have an
380 383 # overwritten first byte. Break out and find the end of the
381 384 # current good entry/entries if there is a removed file
382 385 # before the next position.
383 386 if (
384 387 self.hasremovals
385 388 and self.data.find(b'\n\x00', cur, self.positions[i])
386 389 != -1
387 390 ):
388 391 break
389 392
390 393 offset += self.positions[i] - cur
391 394 cur = self.positions[i]
392 395 end_cut = self.data.find(b'\n', cur)
393 396 if end_cut != -1:
394 397 end_cut += 1
395 398 offset += end_cut - cur
396 399 l.append(self.data[last_cut:end_cut])
397 400 else:
398 401 while i < len(self.positions) and self.positions[i] < 0:
399 402 cur = self.positions[i]
400 403 t = self.extradata[-cur - 1]
401 404 l.append(self._pack(t))
402 405 self.positions[i] = offset
403 406 # Hashes are either 20 bytes (old sha1s) or 32
404 407 # bytes (new non-sha1).
405 408 hlen = 20
406 409 if len(t[1]) > 25:
407 410 hlen = 32
408 411 if len(t[1]) > hlen:
409 412 self.extrainfo[i] = ord(t[1][hlen + 1])
410 413 offset += len(l[-1])
411 414 i += 1
412 415 self.data = b''.join(l)
413 416 self.hasremovals = False
414 417 self.extradata = []
415 418
416 419 def _pack(self, d):
417 420 n = d[1]
418 421 assert len(n) in (20, 32)
419 422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
420 423
421 424 def text(self):
422 425 self._compact()
423 426 return self.data
424 427
425 428 def diff(self, m2, clean=False):
426 429 '''Finds changes between the current manifest and m2.'''
427 430 # XXX think whether efficiency matters here
428 431 diff = {}
429 432
430 433 for fn, e1, flags in self.iterentries():
431 434 if fn not in m2:
432 435 diff[fn] = (e1, flags), (None, b'')
433 436 else:
434 437 e2 = m2[fn]
435 438 if (e1, flags) != e2:
436 439 diff[fn] = (e1, flags), e2
437 440 elif clean:
438 441 diff[fn] = None
439 442
440 443 for fn, e2, flags in m2.iterentries():
441 444 if fn not in self:
442 445 diff[fn] = (None, b''), (e2, flags)
443 446
444 447 return diff
445 448
446 449 def iterentries(self):
447 450 return lazymanifestiterentries(self)
448 451
449 452 def iterkeys(self):
450 453 return lazymanifestiter(self)
451 454
452 455 def __iter__(self):
453 456 return lazymanifestiter(self)
454 457
455 458 def __len__(self):
456 459 return len(self.positions)
457 460
458 461 def filtercopy(self, filterfn):
459 462 # XXX should be optimized
460 463 c = _lazymanifest(self._nodelen, b'')
461 464 for f, n, fl in self.iterentries():
462 465 if filterfn(f):
463 466 c[f] = n, fl
464 467 return c
465 468
466 469
467 470 try:
468 471 _lazymanifest = parsers.lazymanifest
469 472 except AttributeError:
470 473 pass
471 474
472 475
473 476 @interfaceutil.implementer(repository.imanifestdict)
474 477 class manifestdict(object):
475 478 def __init__(self, nodelen, data=b''):
476 479 self._nodelen = nodelen
477 480 self._lm = _lazymanifest(nodelen, data)
478 481
479 482 def __getitem__(self, key):
480 483 return self._lm[key][0]
481 484
482 485 def find(self, key):
483 486 return self._lm[key]
484 487
485 488 def __len__(self):
486 489 return len(self._lm)
487 490
488 491 def __nonzero__(self):
489 492 # nonzero is covered by the __len__ function, but implementing it here
490 493 # makes it easier for extensions to override.
491 494 return len(self._lm) != 0
492 495
493 496 __bool__ = __nonzero__
494 497
495 498 def __setitem__(self, key, node):
496 499 self._lm[key] = node, self.flags(key)
497 500
498 501 def __contains__(self, key):
499 502 if key is None:
500 503 return False
501 504 return key in self._lm
502 505
503 506 def __delitem__(self, key):
504 507 del self._lm[key]
505 508
506 509 def __iter__(self):
507 510 return self._lm.__iter__()
508 511
509 512 def iterkeys(self):
510 513 return self._lm.iterkeys()
511 514
512 515 def keys(self):
513 516 return list(self.iterkeys())
514 517
515 518 def filesnotin(self, m2, match=None):
516 519 '''Set of files in this manifest that are not in the other'''
517 520 if match is not None:
518 521 match = matchmod.badmatch(match, lambda path, msg: None)
519 522 sm2 = set(m2.walk(match))
520 523 return {f for f in self.walk(match) if f not in sm2}
521 524 return {f for f in self if f not in m2}
522 525
523 526 @propertycache
524 527 def _dirs(self):
525 528 return pathutil.dirs(self)
526 529
527 530 def dirs(self):
528 531 return self._dirs
529 532
530 533 def hasdir(self, dir):
531 534 return dir in self._dirs
532 535
533 536 def _filesfastpath(self, match):
534 537 """Checks whether we can correctly and quickly iterate over matcher
535 538 files instead of over manifest files."""
536 539 files = match.files()
537 540 return len(files) < 100 and (
538 541 match.isexact()
539 542 or (match.prefix() and all(fn in self for fn in files))
540 543 )
541 544
542 545 def walk(self, match):
543 546 """Generates matching file names.
544 547
545 548 Equivalent to manifest.matches(match).iterkeys(), but without creating
546 549 an entirely new manifest.
547 550
548 551 It also reports nonexistent files by marking them bad with match.bad().
549 552 """
550 553 if match.always():
551 554 for f in iter(self):
552 555 yield f
553 556 return
554 557
555 558 fset = set(match.files())
556 559
557 560 # avoid the entire walk if we're only looking for specific files
558 561 if self._filesfastpath(match):
559 562 for fn in sorted(fset):
560 563 if fn in self:
561 564 yield fn
562 565 return
563 566
564 567 for fn in self:
565 568 if fn in fset:
566 569 # specified pattern is the exact name
567 570 fset.remove(fn)
568 571 if match(fn):
569 572 yield fn
570 573
571 574 # for dirstate.walk, files=[''] means "walk the whole tree".
572 575 # follow that here, too
573 576 fset.discard(b'')
574 577
575 578 for fn in sorted(fset):
576 579 if not self.hasdir(fn):
577 580 match.bad(fn, None)
578 581
579 582 def _matches(self, match):
580 583 '''generate a new manifest filtered by the match argument'''
581 584 if match.always():
582 585 return self.copy()
583 586
584 587 if self._filesfastpath(match):
585 588 m = manifestdict(self._nodelen)
586 589 lm = self._lm
587 590 for fn in match.files():
588 591 if fn in lm:
589 592 m._lm[fn] = lm[fn]
590 593 return m
591 594
592 595 m = manifestdict(self._nodelen)
593 596 m._lm = self._lm.filtercopy(match)
594 597 return m
595 598
596 599 def diff(self, m2, match=None, clean=False):
597 600 """Finds changes between the current manifest and m2.
598 601
599 602 Args:
600 603 m2: the manifest to which this manifest should be compared.
601 604 clean: if true, include files unchanged between these manifests
602 605 with a None value in the returned dictionary.
603 606
604 607 The result is returned as a dict with filename as key and
605 608 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
606 609 nodeid in the current/other manifest and fl1/fl2 is the flag
607 610 in the current/other manifest. Where the file does not exist,
608 611 the nodeid will be None and the flags will be the empty
609 612 string.
610 613 """
611 614 if match:
612 615 m1 = self._matches(match)
613 616 m2 = m2._matches(match)
614 617 return m1.diff(m2, clean=clean)
615 618 return self._lm.diff(m2._lm, clean)
616 619
617 620 def setflag(self, key, flag):
618 621 if flag not in _manifestflags:
619 622 raise TypeError(b"Invalid manifest flag set.")
620 623 self._lm[key] = self[key], flag
621 624
622 625 def get(self, key, default=None):
623 626 try:
624 627 return self._lm[key][0]
625 628 except KeyError:
626 629 return default
627 630
628 631 def flags(self, key):
629 632 try:
630 633 return self._lm[key][1]
631 634 except KeyError:
632 635 return b''
633 636
634 637 def copy(self):
635 638 c = manifestdict(self._nodelen)
636 639 c._lm = self._lm.copy()
637 640 return c
638 641
639 642 def items(self):
640 643 return (x[:2] for x in self._lm.iterentries())
641 644
642 645 def iteritems(self):
643 646 return (x[:2] for x in self._lm.iterentries())
644 647
645 648 def iterentries(self):
646 649 return self._lm.iterentries()
647 650
648 651 def text(self):
649 652 # most likely uses native version
650 653 return self._lm.text()
651 654
652 655 def fastdelta(self, base, changes):
653 656 """Given a base manifest text as a bytearray and a list of changes
654 657 relative to that text, compute a delta that can be used by revlog.
655 658 """
656 659 delta = []
657 660 dstart = None
658 661 dend = None
659 662 dline = [b""]
660 663 start = 0
661 664 # zero copy representation of base as a buffer
662 665 addbuf = util.buffer(base)
663 666
664 667 changes = list(changes)
665 668 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
666 669 # start with a readonly loop that finds the offset of
667 670 # each line and creates the deltas
668 671 for f, todelete in changes:
669 672 # bs will either be the index of the item or the insert point
670 673 start, end = _msearch(addbuf, f, start)
671 674 if not todelete:
672 675 h, fl = self._lm[f]
673 676 l = b"%s\0%s%s\n" % (f, hex(h), fl)
674 677 else:
675 678 if start == end:
676 679 # item we want to delete was not found, error out
677 680 raise AssertionError(
678 681 _(b"failed to remove %s from manifest") % f
679 682 )
680 683 l = b""
681 684 if dstart is not None and dstart <= start and dend >= start:
682 685 if dend < end:
683 686 dend = end
684 687 if l:
685 688 dline.append(l)
686 689 else:
687 690 if dstart is not None:
688 691 delta.append([dstart, dend, b"".join(dline)])
689 692 dstart = start
690 693 dend = end
691 694 dline = [l]
692 695
693 696 if dstart is not None:
694 697 delta.append([dstart, dend, b"".join(dline)])
695 698 # apply the delta to the base, and get a delta for addrevision
696 699 deltatext, arraytext = _addlistdelta(base, delta)
697 700 else:
698 701 # For large changes, it's much cheaper to just build the text and
699 702 # diff it.
700 703 arraytext = bytearray(self.text())
701 704 deltatext = mdiff.textdiff(
702 705 util.buffer(base), util.buffer(arraytext)
703 706 )
704 707
705 708 return arraytext, deltatext
706 709
707 710
708 711 def _msearch(m, s, lo=0, hi=None):
709 712 """return a tuple (start, end) that says where to find s within m.
710 713
711 714 If the string is found m[start:end] are the line containing
712 715 that string. If start == end the string was not found and
713 716 they indicate the proper sorted insertion point.
714 717
715 718 m should be a buffer, a memoryview or a byte string.
716 719 s is a byte string"""
717 720
718 721 def advance(i, c):
719 722 while i < lenm and m[i : i + 1] != c:
720 723 i += 1
721 724 return i
722 725
723 726 if not s:
724 727 return (lo, lo)
725 728 lenm = len(m)
726 729 if not hi:
727 730 hi = lenm
728 731 while lo < hi:
729 732 mid = (lo + hi) // 2
730 733 start = mid
731 734 while start > 0 and m[start - 1 : start] != b'\n':
732 735 start -= 1
733 736 end = advance(start, b'\0')
734 737 if bytes(m[start:end]) < s:
735 738 # we know that after the null there are 40 bytes of sha1
736 739 # this translates to the bisect lo = mid + 1
737 740 lo = advance(end + 40, b'\n') + 1
738 741 else:
739 742 # this translates to the bisect hi = mid
740 743 hi = start
741 744 end = advance(lo, b'\0')
742 745 found = m[lo:end]
743 746 if s == found:
744 747 # we know that after the null there are 40 bytes of sha1
745 748 end = advance(end + 40, b'\n')
746 749 return (lo, end + 1)
747 750 else:
748 751 return (lo, lo)
749 752
750 753
751 754 def _checkforbidden(l):
752 755 """Check filenames for illegal characters."""
753 756 for f in l:
754 757 if b'\n' in f or b'\r' in f:
755 758 raise error.StorageError(
756 759 _(b"'\\n' and '\\r' disallowed in filenames: %r")
757 760 % pycompat.bytestr(f)
758 761 )
759 762
760 763
761 764 # apply the changes collected during the bisect loop to our addlist
762 765 # return a delta suitable for addrevision
763 766 def _addlistdelta(addlist, x):
764 767 # for large addlist arrays, building a new array is cheaper
765 768 # than repeatedly modifying the existing one
766 769 currentposition = 0
767 770 newaddlist = bytearray()
768 771
769 772 for start, end, content in x:
770 773 newaddlist += addlist[currentposition:start]
771 774 if content:
772 775 newaddlist += bytearray(content)
773 776
774 777 currentposition = end
775 778
776 779 newaddlist += addlist[currentposition:]
777 780
778 781 deltatext = b"".join(
779 782 struct.pack(b">lll", start, end, len(content)) + content
780 783 for start, end, content in x
781 784 )
782 785 return deltatext, newaddlist
783 786
784 787
785 788 def _splittopdir(f):
786 789 if b'/' in f:
787 790 dir, subpath = f.split(b'/', 1)
788 791 return dir + b'/', subpath
789 792 else:
790 793 return b'', f
791 794
792 795
793 796 _noop = lambda s: None
794 797
795 798
796 799 @interfaceutil.implementer(repository.imanifestdict)
797 800 class treemanifest(object):
798 801 def __init__(self, nodeconstants, dir=b'', text=b''):
799 802 self._dir = dir
800 803 self.nodeconstants = nodeconstants
801 804 self._node = self.nodeconstants.nullid
802 805 self._nodelen = self.nodeconstants.nodelen
803 806 self._loadfunc = _noop
804 807 self._copyfunc = _noop
805 808 self._dirty = False
806 809 self._dirs = {}
807 810 self._lazydirs = {}
808 811 # Using _lazymanifest here is a little slower than plain old dicts
809 812 self._files = {}
810 813 self._flags = {}
811 814 if text:
812 815
813 816 def readsubtree(subdir, subm):
814 817 raise AssertionError(
815 818 b'treemanifest constructor only accepts flat manifests'
816 819 )
817 820
818 821 self.parse(text, readsubtree)
819 822 self._dirty = True # Mark flat manifest dirty after parsing
820 823
821 824 def _subpath(self, path):
822 825 return self._dir + path
823 826
824 827 def _loadalllazy(self):
825 828 selfdirs = self._dirs
826 829 subpath = self._subpath
827 830 for d, (node, readsubtree, docopy) in pycompat.iteritems(
828 831 self._lazydirs
829 832 ):
830 833 if docopy:
831 834 selfdirs[d] = readsubtree(subpath(d), node).copy()
832 835 else:
833 836 selfdirs[d] = readsubtree(subpath(d), node)
834 837 self._lazydirs = {}
835 838
836 839 def _loadlazy(self, d):
837 840 v = self._lazydirs.get(d)
838 841 if v:
839 842 node, readsubtree, docopy = v
840 843 if docopy:
841 844 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
842 845 else:
843 846 self._dirs[d] = readsubtree(self._subpath(d), node)
844 847 del self._lazydirs[d]
845 848
846 849 def _loadchildrensetlazy(self, visit):
847 850 if not visit:
848 851 return None
849 852 if visit == b'all' or visit == b'this':
850 853 self._loadalllazy()
851 854 return None
852 855
853 856 loadlazy = self._loadlazy
854 857 for k in visit:
855 858 loadlazy(k + b'/')
856 859 return visit
857 860
858 861 def _loaddifflazy(self, t1, t2):
859 862 """load items in t1 and t2 if they're needed for diffing.
860 863
861 864 The criteria currently is:
862 865 - if it's not present in _lazydirs in either t1 or t2, load it in the
863 866 other (it may already be loaded or it may not exist, doesn't matter)
864 867 - if it's present in _lazydirs in both, compare the nodeid; if it
865 868 differs, load it in both
866 869 """
867 870 toloadlazy = []
868 871 for d, v1 in pycompat.iteritems(t1._lazydirs):
869 872 v2 = t2._lazydirs.get(d)
870 873 if not v2 or v2[0] != v1[0]:
871 874 toloadlazy.append(d)
872 875 for d, v1 in pycompat.iteritems(t2._lazydirs):
873 876 if d not in t1._lazydirs:
874 877 toloadlazy.append(d)
875 878
876 879 for d in toloadlazy:
877 880 t1._loadlazy(d)
878 881 t2._loadlazy(d)
879 882
880 883 def __len__(self):
881 884 self._load()
882 885 size = len(self._files)
883 886 self._loadalllazy()
884 887 for m in self._dirs.values():
885 888 size += m.__len__()
886 889 return size
887 890
888 891 def __nonzero__(self):
889 892 # Faster than "__len() != 0" since it avoids loading sub-manifests
890 893 return not self._isempty()
891 894
892 895 __bool__ = __nonzero__
893 896
894 897 def _isempty(self):
895 898 self._load() # for consistency; already loaded by all callers
896 899 # See if we can skip loading everything.
897 900 if self._files or (
898 901 self._dirs and any(not m._isempty() for m in self._dirs.values())
899 902 ):
900 903 return False
901 904 self._loadalllazy()
902 905 return not self._dirs or all(m._isempty() for m in self._dirs.values())
903 906
904 907 @encoding.strmethod
905 908 def __repr__(self):
906 909 return (
907 910 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
908 911 % (
909 912 self._dir,
910 913 hex(self._node),
911 914 bool(self._loadfunc is _noop),
912 915 self._dirty,
913 916 id(self),
914 917 )
915 918 )
916 919
917 920 def dir(self):
918 921 """The directory that this tree manifest represents, including a
919 922 trailing '/'. Empty string for the repo root directory."""
920 923 return self._dir
921 924
922 925 def node(self):
923 926 """This node of this instance. nullid for unsaved instances. Should
924 927 be updated when the instance is read or written from a revlog.
925 928 """
926 929 assert not self._dirty
927 930 return self._node
928 931
929 932 def setnode(self, node):
930 933 self._node = node
931 934 self._dirty = False
932 935
933 936 def iterentries(self):
934 937 self._load()
935 938 self._loadalllazy()
936 939 for p, n in sorted(
937 940 itertools.chain(self._dirs.items(), self._files.items())
938 941 ):
939 942 if p in self._files:
940 943 yield self._subpath(p), n, self._flags.get(p, b'')
941 944 else:
942 945 for x in n.iterentries():
943 946 yield x
944 947
945 948 def items(self):
946 949 self._load()
947 950 self._loadalllazy()
948 951 for p, n in sorted(
949 952 itertools.chain(self._dirs.items(), self._files.items())
950 953 ):
951 954 if p in self._files:
952 955 yield self._subpath(p), n
953 956 else:
954 957 for f, sn in pycompat.iteritems(n):
955 958 yield f, sn
956 959
957 960 iteritems = items
958 961
959 962 def iterkeys(self):
960 963 self._load()
961 964 self._loadalllazy()
962 965 for p in sorted(itertools.chain(self._dirs, self._files)):
963 966 if p in self._files:
964 967 yield self._subpath(p)
965 968 else:
966 969 for f in self._dirs[p]:
967 970 yield f
968 971
969 972 def keys(self):
970 973 return list(self.iterkeys())
971 974
972 975 def __iter__(self):
973 976 return self.iterkeys()
974 977
975 978 def __contains__(self, f):
976 979 if f is None:
977 980 return False
978 981 self._load()
979 982 dir, subpath = _splittopdir(f)
980 983 if dir:
981 984 self._loadlazy(dir)
982 985
983 986 if dir not in self._dirs:
984 987 return False
985 988
986 989 return self._dirs[dir].__contains__(subpath)
987 990 else:
988 991 return f in self._files
989 992
990 993 def get(self, f, default=None):
991 994 self._load()
992 995 dir, subpath = _splittopdir(f)
993 996 if dir:
994 997 self._loadlazy(dir)
995 998
996 999 if dir not in self._dirs:
997 1000 return default
998 1001 return self._dirs[dir].get(subpath, default)
999 1002 else:
1000 1003 return self._files.get(f, default)
1001 1004
1002 1005 def __getitem__(self, f):
1003 1006 self._load()
1004 1007 dir, subpath = _splittopdir(f)
1005 1008 if dir:
1006 1009 self._loadlazy(dir)
1007 1010
1008 1011 return self._dirs[dir].__getitem__(subpath)
1009 1012 else:
1010 1013 return self._files[f]
1011 1014
1012 1015 def flags(self, f):
1013 1016 self._load()
1014 1017 dir, subpath = _splittopdir(f)
1015 1018 if dir:
1016 1019 self._loadlazy(dir)
1017 1020
1018 1021 if dir not in self._dirs:
1019 1022 return b''
1020 1023 return self._dirs[dir].flags(subpath)
1021 1024 else:
1022 1025 if f in self._lazydirs or f in self._dirs:
1023 1026 return b''
1024 1027 return self._flags.get(f, b'')
1025 1028
1026 1029 def find(self, f):
1027 1030 self._load()
1028 1031 dir, subpath = _splittopdir(f)
1029 1032 if dir:
1030 1033 self._loadlazy(dir)
1031 1034
1032 1035 return self._dirs[dir].find(subpath)
1033 1036 else:
1034 1037 return self._files[f], self._flags.get(f, b'')
1035 1038
1036 1039 def __delitem__(self, f):
1037 1040 self._load()
1038 1041 dir, subpath = _splittopdir(f)
1039 1042 if dir:
1040 1043 self._loadlazy(dir)
1041 1044
1042 1045 self._dirs[dir].__delitem__(subpath)
1043 1046 # If the directory is now empty, remove it
1044 1047 if self._dirs[dir]._isempty():
1045 1048 del self._dirs[dir]
1046 1049 else:
1047 1050 del self._files[f]
1048 1051 if f in self._flags:
1049 1052 del self._flags[f]
1050 1053 self._dirty = True
1051 1054
1052 1055 def __setitem__(self, f, n):
1053 1056 assert n is not None
1054 1057 self._load()
1055 1058 dir, subpath = _splittopdir(f)
1056 1059 if dir:
1057 1060 self._loadlazy(dir)
1058 1061 if dir not in self._dirs:
1059 1062 self._dirs[dir] = treemanifest(
1060 1063 self.nodeconstants, self._subpath(dir)
1061 1064 )
1062 1065 self._dirs[dir].__setitem__(subpath, n)
1063 1066 else:
1064 1067 # manifest nodes are either 20 bytes or 32 bytes,
1065 1068 # depending on the hash in use. Assert this as historically
1066 1069 # sometimes extra bytes were added.
1067 1070 assert len(n) in (20, 32)
1068 1071 self._files[f] = n
1069 1072 self._dirty = True
1070 1073
1071 1074 def _load(self):
1072 1075 if self._loadfunc is not _noop:
1073 1076 lf, self._loadfunc = self._loadfunc, _noop
1074 1077 lf(self)
1075 1078 elif self._copyfunc is not _noop:
1076 1079 cf, self._copyfunc = self._copyfunc, _noop
1077 1080 cf(self)
1078 1081
1079 1082 def setflag(self, f, flags):
1080 1083 """Set the flags (symlink, executable) for path f."""
1081 1084 if flags not in _manifestflags:
1082 1085 raise TypeError(b"Invalid manifest flag set.")
1083 1086 self._load()
1084 1087 dir, subpath = _splittopdir(f)
1085 1088 if dir:
1086 1089 self._loadlazy(dir)
1087 1090 if dir not in self._dirs:
1088 1091 self._dirs[dir] = treemanifest(
1089 1092 self.nodeconstants, self._subpath(dir)
1090 1093 )
1091 1094 self._dirs[dir].setflag(subpath, flags)
1092 1095 else:
1093 1096 self._flags[f] = flags
1094 1097 self._dirty = True
1095 1098
1096 1099 def copy(self):
1097 1100 copy = treemanifest(self.nodeconstants, self._dir)
1098 1101 copy._node = self._node
1099 1102 copy._dirty = self._dirty
1100 1103 if self._copyfunc is _noop:
1101 1104
1102 1105 def _copyfunc(s):
1103 1106 self._load()
1104 1107 s._lazydirs = {
1105 1108 d: (n, r, True)
1106 1109 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1107 1110 }
1108 1111 sdirs = s._dirs
1109 1112 for d, v in pycompat.iteritems(self._dirs):
1110 1113 sdirs[d] = v.copy()
1111 1114 s._files = dict.copy(self._files)
1112 1115 s._flags = dict.copy(self._flags)
1113 1116
1114 1117 if self._loadfunc is _noop:
1115 1118 _copyfunc(copy)
1116 1119 else:
1117 1120 copy._copyfunc = _copyfunc
1118 1121 else:
1119 1122 copy._copyfunc = self._copyfunc
1120 1123 return copy
1121 1124
1122 1125 def filesnotin(self, m2, match=None):
1123 1126 '''Set of files in this manifest that are not in the other'''
1124 1127 if match and not match.always():
1125 1128 m1 = self._matches(match)
1126 1129 m2 = m2._matches(match)
1127 1130 return m1.filesnotin(m2)
1128 1131
1129 1132 files = set()
1130 1133
1131 1134 def _filesnotin(t1, t2):
1132 1135 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1133 1136 return
1134 1137 t1._load()
1135 1138 t2._load()
1136 1139 self._loaddifflazy(t1, t2)
1137 1140 for d, m1 in pycompat.iteritems(t1._dirs):
1138 1141 if d in t2._dirs:
1139 1142 m2 = t2._dirs[d]
1140 1143 _filesnotin(m1, m2)
1141 1144 else:
1142 1145 files.update(m1.iterkeys())
1143 1146
1144 1147 for fn in t1._files:
1145 1148 if fn not in t2._files:
1146 1149 files.add(t1._subpath(fn))
1147 1150
1148 1151 _filesnotin(self, m2)
1149 1152 return files
1150 1153
1151 1154 @propertycache
1152 1155 def _alldirs(self):
1153 1156 return pathutil.dirs(self)
1154 1157
1155 1158 def dirs(self):
1156 1159 return self._alldirs
1157 1160
1158 1161 def hasdir(self, dir):
1159 1162 self._load()
1160 1163 topdir, subdir = _splittopdir(dir)
1161 1164 if topdir:
1162 1165 self._loadlazy(topdir)
1163 1166 if topdir in self._dirs:
1164 1167 return self._dirs[topdir].hasdir(subdir)
1165 1168 return False
1166 1169 dirslash = dir + b'/'
1167 1170 return dirslash in self._dirs or dirslash in self._lazydirs
1168 1171
1169 1172 def walk(self, match):
1170 1173 """Generates matching file names.
1171 1174
1172 1175 It also reports nonexistent files by marking them bad with match.bad().
1173 1176 """
1174 1177 if match.always():
1175 1178 for f in iter(self):
1176 1179 yield f
1177 1180 return
1178 1181
1179 1182 fset = set(match.files())
1180 1183
1181 1184 for fn in self._walk(match):
1182 1185 if fn in fset:
1183 1186 # specified pattern is the exact name
1184 1187 fset.remove(fn)
1185 1188 yield fn
1186 1189
1187 1190 # for dirstate.walk, files=[''] means "walk the whole tree".
1188 1191 # follow that here, too
1189 1192 fset.discard(b'')
1190 1193
1191 1194 for fn in sorted(fset):
1192 1195 if not self.hasdir(fn):
1193 1196 match.bad(fn, None)
1194 1197
1195 1198 def _walk(self, match):
1196 1199 '''Recursively generates matching file names for walk().'''
1197 1200 visit = match.visitchildrenset(self._dir[:-1])
1198 1201 if not visit:
1199 1202 return
1200 1203
1201 1204 # yield this dir's files and walk its submanifests
1202 1205 self._load()
1203 1206 visit = self._loadchildrensetlazy(visit)
1204 1207 for p in sorted(list(self._dirs) + list(self._files)):
1205 1208 if p in self._files:
1206 1209 fullp = self._subpath(p)
1207 1210 if match(fullp):
1208 1211 yield fullp
1209 1212 else:
1210 1213 if not visit or p[:-1] in visit:
1211 1214 for f in self._dirs[p]._walk(match):
1212 1215 yield f
1213 1216
1214 1217 def _matches(self, match):
1215 1218 """recursively generate a new manifest filtered by the match argument."""
1216 1219 if match.always():
1217 1220 return self.copy()
1218 1221 return self._matches_inner(match)
1219 1222
1220 1223 def _matches_inner(self, match):
1221 1224 if match.always():
1222 1225 return self.copy()
1223 1226
1224 1227 visit = match.visitchildrenset(self._dir[:-1])
1225 1228 if visit == b'all':
1226 1229 return self.copy()
1227 1230 ret = treemanifest(self.nodeconstants, self._dir)
1228 1231 if not visit:
1229 1232 return ret
1230 1233
1231 1234 self._load()
1232 1235 for fn in self._files:
1233 1236 # While visitchildrenset *usually* lists only subdirs, this is
1234 1237 # actually up to the matcher and may have some files in the set().
1235 1238 # If visit == 'this', we should obviously look at the files in this
1236 1239 # directory; if visit is a set, and fn is in it, we should inspect
1237 1240 # fn (but no need to inspect things not in the set).
1238 1241 if visit != b'this' and fn not in visit:
1239 1242 continue
1240 1243 fullp = self._subpath(fn)
1241 1244 # visitchildrenset isn't perfect, we still need to call the regular
1242 1245 # matcher code to further filter results.
1243 1246 if not match(fullp):
1244 1247 continue
1245 1248 ret._files[fn] = self._files[fn]
1246 1249 if fn in self._flags:
1247 1250 ret._flags[fn] = self._flags[fn]
1248 1251
1249 1252 visit = self._loadchildrensetlazy(visit)
1250 1253 for dir, subm in pycompat.iteritems(self._dirs):
1251 1254 if visit and dir[:-1] not in visit:
1252 1255 continue
1253 1256 m = subm._matches_inner(match)
1254 1257 if not m._isempty():
1255 1258 ret._dirs[dir] = m
1256 1259
1257 1260 if not ret._isempty():
1258 1261 ret._dirty = True
1259 1262 return ret
1260 1263
1261 1264 def fastdelta(self, base, changes):
1262 1265 raise FastdeltaUnavailable()
1263 1266
1264 1267 def diff(self, m2, match=None, clean=False):
1265 1268 """Finds changes between the current manifest and m2.
1266 1269
1267 1270 Args:
1268 1271 m2: the manifest to which this manifest should be compared.
1269 1272 clean: if true, include files unchanged between these manifests
1270 1273 with a None value in the returned dictionary.
1271 1274
1272 1275 The result is returned as a dict with filename as key and
1273 1276 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1274 1277 nodeid in the current/other manifest and fl1/fl2 is the flag
1275 1278 in the current/other manifest. Where the file does not exist,
1276 1279 the nodeid will be None and the flags will be the empty
1277 1280 string.
1278 1281 """
1279 1282 if match and not match.always():
1280 1283 m1 = self._matches(match)
1281 1284 m2 = m2._matches(match)
1282 1285 return m1.diff(m2, clean=clean)
1283 1286 result = {}
1284 1287 emptytree = treemanifest(self.nodeconstants)
1285 1288
1286 1289 def _iterativediff(t1, t2, stack):
1287 1290 """compares two tree manifests and append new tree-manifests which
1288 1291 needs to be compared to stack"""
1289 1292 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1290 1293 return
1291 1294 t1._load()
1292 1295 t2._load()
1293 1296 self._loaddifflazy(t1, t2)
1294 1297
1295 1298 for d, m1 in pycompat.iteritems(t1._dirs):
1296 1299 m2 = t2._dirs.get(d, emptytree)
1297 1300 stack.append((m1, m2))
1298 1301
1299 1302 for d, m2 in pycompat.iteritems(t2._dirs):
1300 1303 if d not in t1._dirs:
1301 1304 stack.append((emptytree, m2))
1302 1305
1303 1306 for fn, n1 in pycompat.iteritems(t1._files):
1304 1307 fl1 = t1._flags.get(fn, b'')
1305 1308 n2 = t2._files.get(fn, None)
1306 1309 fl2 = t2._flags.get(fn, b'')
1307 1310 if n1 != n2 or fl1 != fl2:
1308 1311 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1309 1312 elif clean:
1310 1313 result[t1._subpath(fn)] = None
1311 1314
1312 1315 for fn, n2 in pycompat.iteritems(t2._files):
1313 1316 if fn not in t1._files:
1314 1317 fl2 = t2._flags.get(fn, b'')
1315 1318 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1316 1319
1317 1320 stackls = []
1318 1321 _iterativediff(self, m2, stackls)
1319 1322 while stackls:
1320 1323 t1, t2 = stackls.pop()
1321 1324 # stackls is populated in the function call
1322 1325 _iterativediff(t1, t2, stackls)
1323 1326 return result
1324 1327
1325 1328 def unmodifiedsince(self, m2):
1326 1329 return not self._dirty and not m2._dirty and self._node == m2._node
1327 1330
1328 1331 def parse(self, text, readsubtree):
1329 1332 selflazy = self._lazydirs
1330 1333 for f, n, fl in _parse(self._nodelen, text):
1331 1334 if fl == b't':
1332 1335 f = f + b'/'
1333 1336 # False below means "doesn't need to be copied" and can use the
1334 1337 # cached value from readsubtree directly.
1335 1338 selflazy[f] = (n, readsubtree, False)
1336 1339 elif b'/' in f:
1337 1340 # This is a flat manifest, so use __setitem__ and setflag rather
1338 1341 # than assigning directly to _files and _flags, so we can
1339 1342 # assign a path in a subdirectory, and to mark dirty (compared
1340 1343 # to nullid).
1341 1344 self[f] = n
1342 1345 if fl:
1343 1346 self.setflag(f, fl)
1344 1347 else:
1345 1348 # Assigning to _files and _flags avoids marking as dirty,
1346 1349 # and should be a little faster.
1347 1350 self._files[f] = n
1348 1351 if fl:
1349 1352 self._flags[f] = fl
1350 1353
1351 1354 def text(self):
1352 1355 """Get the full data of this manifest as a bytestring."""
1353 1356 self._load()
1354 1357 return _text(self.iterentries())
1355 1358
1356 1359 def dirtext(self):
1357 1360 """Get the full data of this directory as a bytestring. Make sure that
1358 1361 any submanifests have been written first, so their nodeids are correct.
1359 1362 """
1360 1363 self._load()
1361 1364 flags = self.flags
1362 1365 lazydirs = [
1363 1366 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1364 1367 ]
1365 1368 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1366 1369 files = [(f, self._files[f], flags(f)) for f in self._files]
1367 1370 return _text(sorted(dirs + files + lazydirs))
1368 1371
1369 1372 def read(self, gettext, readsubtree):
1370 1373 def _load_for_read(s):
1371 1374 s.parse(gettext(), readsubtree)
1372 1375 s._dirty = False
1373 1376
1374 1377 self._loadfunc = _load_for_read
1375 1378
1376 1379 def writesubtrees(self, m1, m2, writesubtree, match):
1377 1380 self._load() # for consistency; should never have any effect here
1378 1381 m1._load()
1379 1382 m2._load()
1380 1383 emptytree = treemanifest(self.nodeconstants)
1381 1384
1382 1385 def getnode(m, d):
1383 1386 ld = m._lazydirs.get(d)
1384 1387 if ld:
1385 1388 return ld[0]
1386 1389 return m._dirs.get(d, emptytree)._node
1387 1390
1388 1391 # let's skip investigating things that `match` says we do not need.
1389 1392 visit = match.visitchildrenset(self._dir[:-1])
1390 1393 visit = self._loadchildrensetlazy(visit)
1391 1394 if visit == b'this' or visit == b'all':
1392 1395 visit = None
1393 1396 for d, subm in pycompat.iteritems(self._dirs):
1394 1397 if visit and d[:-1] not in visit:
1395 1398 continue
1396 1399 subp1 = getnode(m1, d)
1397 1400 subp2 = getnode(m2, d)
1398 1401 if subp1 == self.nodeconstants.nullid:
1399 1402 subp1, subp2 = subp2, subp1
1400 1403 writesubtree(subm, subp1, subp2, match)
1401 1404
1402 1405 def walksubtrees(self, matcher=None):
1403 1406 """Returns an iterator of the subtrees of this manifest, including this
1404 1407 manifest itself.
1405 1408
1406 1409 If `matcher` is provided, it only returns subtrees that match.
1407 1410 """
1408 1411 if matcher and not matcher.visitdir(self._dir[:-1]):
1409 1412 return
1410 1413 if not matcher or matcher(self._dir[:-1]):
1411 1414 yield self
1412 1415
1413 1416 self._load()
1414 1417 # OPT: use visitchildrenset to avoid loading everything.
1415 1418 self._loadalllazy()
1416 1419 for d, subm in pycompat.iteritems(self._dirs):
1417 1420 for subtree in subm.walksubtrees(matcher=matcher):
1418 1421 yield subtree
1419 1422
1420 1423
1421 1424 class manifestfulltextcache(util.lrucachedict):
1422 1425 """File-backed LRU cache for the manifest cache
1423 1426
1424 1427 File consists of entries, up to EOF:
1425 1428
1426 1429 - 20 bytes node, 4 bytes length, <length> manifest data
1427 1430
1428 1431 These are written in reverse cache order (oldest to newest).
1429 1432
1430 1433 """
1431 1434
1432 1435 _file = b'manifestfulltextcache'
1433 1436
1434 1437 def __init__(self, max):
1435 1438 super(manifestfulltextcache, self).__init__(max)
1436 1439 self._dirty = False
1437 1440 self._read = False
1438 1441 self._opener = None
1439 1442
1440 1443 def read(self):
1441 1444 if self._read or self._opener is None:
1442 1445 return
1443 1446
1444 1447 try:
1445 1448 with self._opener(self._file) as fp:
1446 1449 set = super(manifestfulltextcache, self).__setitem__
1447 1450 # ignore trailing data, this is a cache, corruption is skipped
1448 1451 while True:
1449 1452 # TODO do we need to do work here for sha1 portability?
1450 1453 node = fp.read(20)
1451 1454 if len(node) < 20:
1452 1455 break
1453 1456 try:
1454 1457 size = struct.unpack(b'>L', fp.read(4))[0]
1455 1458 except struct.error:
1456 1459 break
1457 1460 value = bytearray(fp.read(size))
1458 1461 if len(value) != size:
1459 1462 break
1460 1463 set(node, value)
1461 1464 except IOError:
1462 1465 # the file is allowed to be missing
1463 1466 pass
1464 1467
1465 1468 self._read = True
1466 1469 self._dirty = False
1467 1470
1468 1471 def write(self):
1469 1472 if not self._dirty or self._opener is None:
1470 1473 return
1471 1474 # rotate backwards to the first used node
1472 1475 try:
1473 1476 with self._opener(
1474 1477 self._file, b'w', atomictemp=True, checkambig=True
1475 1478 ) as fp:
1476 1479 node = self._head.prev
1477 1480 while True:
1478 1481 if node.key in self._cache:
1479 1482 fp.write(node.key)
1480 1483 fp.write(struct.pack(b'>L', len(node.value)))
1481 1484 fp.write(node.value)
1482 1485 if node is self._head:
1483 1486 break
1484 1487 node = node.prev
1485 1488 except IOError:
1486 1489 # We could not write the cache (eg: permission error)
1487 1490 # the content can be missing.
1488 1491 #
1489 1492 # We could try harder and see if we could recreate a wcache
1490 1493 # directory were we coudl write too.
1491 1494 #
1492 1495 # XXX the error pass silently, having some way to issue an error
1493 1496 # log `ui.log` would be nice.
1494 1497 pass
1495 1498
1496 1499 def __len__(self):
1497 1500 if not self._read:
1498 1501 self.read()
1499 1502 return super(manifestfulltextcache, self).__len__()
1500 1503
1501 1504 def __contains__(self, k):
1502 1505 if not self._read:
1503 1506 self.read()
1504 1507 return super(manifestfulltextcache, self).__contains__(k)
1505 1508
1506 1509 def __iter__(self):
1507 1510 if not self._read:
1508 1511 self.read()
1509 1512 return super(manifestfulltextcache, self).__iter__()
1510 1513
1511 1514 def __getitem__(self, k):
1512 1515 if not self._read:
1513 1516 self.read()
1514 1517 # the cache lru order can change on read
1515 1518 setdirty = self._cache.get(k) is not self._head
1516 1519 value = super(manifestfulltextcache, self).__getitem__(k)
1517 1520 if setdirty:
1518 1521 self._dirty = True
1519 1522 return value
1520 1523
1521 1524 def __setitem__(self, k, v):
1522 1525 if not self._read:
1523 1526 self.read()
1524 1527 super(manifestfulltextcache, self).__setitem__(k, v)
1525 1528 self._dirty = True
1526 1529
1527 1530 def __delitem__(self, k):
1528 1531 if not self._read:
1529 1532 self.read()
1530 1533 super(manifestfulltextcache, self).__delitem__(k)
1531 1534 self._dirty = True
1532 1535
1533 1536 def get(self, k, default=None):
1534 1537 if not self._read:
1535 1538 self.read()
1536 1539 return super(manifestfulltextcache, self).get(k, default=default)
1537 1540
1538 1541 def clear(self, clear_persisted_data=False):
1539 1542 super(manifestfulltextcache, self).clear()
1540 1543 if clear_persisted_data:
1541 1544 self._dirty = True
1542 1545 self.write()
1543 1546 self._read = False
1544 1547
1545 1548
1546 1549 # and upper bound of what we expect from compression
1547 1550 # (real live value seems to be "3")
1548 1551 MAXCOMPRESSION = 3
1549 1552
1550 1553
1551 1554 class FastdeltaUnavailable(Exception):
1552 1555 """Exception raised when fastdelta isn't usable on a manifest."""
1553 1556
1554 1557
1555 1558 @interfaceutil.implementer(repository.imanifeststorage)
1556 1559 class manifestrevlog(object):
1557 1560 """A revlog that stores manifest texts. This is responsible for caching the
1558 1561 full-text manifest contents.
1559 1562 """
1560 1563
1561 1564 def __init__(
1562 1565 self,
1563 1566 nodeconstants,
1564 1567 opener,
1565 1568 tree=b'',
1566 1569 dirlogcache=None,
1567 1570 indexfile=None,
1568 1571 treemanifest=False,
1569 1572 ):
1570 1573 """Constructs a new manifest revlog
1571 1574
1572 1575 `indexfile` - used by extensions to have two manifests at once, like
1573 1576 when transitioning between flatmanifeset and treemanifests.
1574 1577
1575 1578 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1576 1579 options can also be used to make this a tree manifest revlog. The opener
1577 1580 option takes precedence, so if it is set to True, we ignore whatever
1578 1581 value is passed in to the constructor.
1579 1582 """
1580 1583 self.nodeconstants = nodeconstants
1581 1584 # During normal operations, we expect to deal with not more than four
1582 1585 # revs at a time (such as during commit --amend). When rebasing large
1583 1586 # stacks of commits, the number can go up, hence the config knob below.
1584 1587 cachesize = 4
1585 1588 optiontreemanifest = False
1586 1589 opts = getattr(opener, 'options', None)
1587 1590 if opts is not None:
1588 1591 cachesize = opts.get(b'manifestcachesize', cachesize)
1589 1592 optiontreemanifest = opts.get(b'treemanifest', False)
1590 1593
1591 1594 self._treeondisk = optiontreemanifest or treemanifest
1592 1595
1593 1596 self._fulltextcache = manifestfulltextcache(cachesize)
1594 1597
1595 1598 if tree:
1596 1599 assert self._treeondisk, b'opts is %r' % opts
1597 1600
1598 1601 if indexfile is None:
1599 1602 indexfile = b'00manifest.i'
1600 1603 if tree:
1601 1604 indexfile = b"meta/" + tree + indexfile
1602 1605
1603 1606 self.tree = tree
1604 1607
1605 1608 # The dirlogcache is kept on the root manifest log
1606 1609 if tree:
1607 1610 self._dirlogcache = dirlogcache
1608 1611 else:
1609 1612 self._dirlogcache = {b'': self}
1610 1613
1611 1614 self._revlog = revlog.revlog(
1612 1615 opener,
1613 indexfile,
1616 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1617 indexfile=indexfile,
1614 1618 # only root indexfile is cached
1615 1619 checkambig=not bool(tree),
1616 1620 mmaplargeindex=True,
1617 1621 upperboundcomp=MAXCOMPRESSION,
1618 1622 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1619 1623 )
1620 1624
1621 1625 self.index = self._revlog.index
1622 1626 self.version = self._revlog.version
1623 1627 self._generaldelta = self._revlog._generaldelta
1624 1628 self._revlog.revlog_kind = b'manifest'
1625 1629
1626 1630 def _setupmanifestcachehooks(self, repo):
1627 1631 """Persist the manifestfulltextcache on lock release"""
1628 1632 if not util.safehasattr(repo, b'_wlockref'):
1629 1633 return
1630 1634
1631 1635 self._fulltextcache._opener = repo.wcachevfs
1632 1636 if repo._currentlock(repo._wlockref) is None:
1633 1637 return
1634 1638
1635 1639 reporef = weakref.ref(repo)
1636 1640 manifestrevlogref = weakref.ref(self)
1637 1641
1638 1642 def persistmanifestcache(success):
1639 1643 # Repo is in an unknown state, do not persist.
1640 1644 if not success:
1641 1645 return
1642 1646
1643 1647 repo = reporef()
1644 1648 self = manifestrevlogref()
1645 1649 if repo is None or self is None:
1646 1650 return
1647 1651 if repo.manifestlog.getstorage(b'') is not self:
1648 1652 # there's a different manifest in play now, abort
1649 1653 return
1650 1654 self._fulltextcache.write()
1651 1655
1652 1656 repo._afterlock(persistmanifestcache)
1653 1657
1654 1658 @property
1655 1659 def fulltextcache(self):
1656 1660 return self._fulltextcache
1657 1661
1658 1662 def clearcaches(self, clear_persisted_data=False):
1659 1663 self._revlog.clearcaches()
1660 1664 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1661 1665 self._dirlogcache = {self.tree: self}
1662 1666
1663 1667 def dirlog(self, d):
1664 1668 if d:
1665 1669 assert self._treeondisk
1666 1670 if d not in self._dirlogcache:
1667 1671 mfrevlog = manifestrevlog(
1668 1672 self.nodeconstants,
1669 1673 self.opener,
1670 1674 d,
1671 1675 self._dirlogcache,
1672 1676 treemanifest=self._treeondisk,
1673 1677 )
1674 1678 self._dirlogcache[d] = mfrevlog
1675 1679 return self._dirlogcache[d]
1676 1680
1677 1681 def add(
1678 1682 self,
1679 1683 m,
1680 1684 transaction,
1681 1685 link,
1682 1686 p1,
1683 1687 p2,
1684 1688 added,
1685 1689 removed,
1686 1690 readtree=None,
1687 1691 match=None,
1688 1692 ):
1689 1693 """add some manifest entry in to the manifest log
1690 1694
1691 1695 input:
1692 1696
1693 1697 m: the manifest dict we want to store
1694 1698 transaction: the open transaction
1695 1699 p1: manifest-node of p1
1696 1700 p2: manifest-node of p2
1697 1701 added: file added/changed compared to parent
1698 1702 removed: file removed compared to parent
1699 1703
1700 1704 tree manifest input:
1701 1705
1702 1706 readtree: a function to read a subtree
1703 1707 match: a filematcher for the subpart of the tree manifest
1704 1708 """
1705 1709 try:
1706 1710 if p1 not in self.fulltextcache:
1707 1711 raise FastdeltaUnavailable()
1708 1712 # If our first parent is in the manifest cache, we can
1709 1713 # compute a delta here using properties we know about the
1710 1714 # manifest up-front, which may save time later for the
1711 1715 # revlog layer.
1712 1716
1713 1717 _checkforbidden(added)
1714 1718 # combine the changed lists into one sorted iterator
1715 1719 work = heapq.merge(
1716 1720 [(x, False) for x in sorted(added)],
1717 1721 [(x, True) for x in sorted(removed)],
1718 1722 )
1719 1723
1720 1724 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1721 1725 cachedelta = self._revlog.rev(p1), deltatext
1722 1726 text = util.buffer(arraytext)
1723 1727 rev = self._revlog.addrevision(
1724 1728 text, transaction, link, p1, p2, cachedelta
1725 1729 )
1726 1730 n = self._revlog.node(rev)
1727 1731 except FastdeltaUnavailable:
1728 1732 # The first parent manifest isn't already loaded or the
1729 1733 # manifest implementation doesn't support fastdelta, so
1730 1734 # we'll just encode a fulltext of the manifest and pass
1731 1735 # that through to the revlog layer, and let it handle the
1732 1736 # delta process.
1733 1737 if self._treeondisk:
1734 1738 assert readtree, b"readtree must be set for treemanifest writes"
1735 1739 assert match, b"match must be specified for treemanifest writes"
1736 1740 m1 = readtree(self.tree, p1)
1737 1741 m2 = readtree(self.tree, p2)
1738 1742 n = self._addtree(
1739 1743 m, transaction, link, m1, m2, readtree, match=match
1740 1744 )
1741 1745 arraytext = None
1742 1746 else:
1743 1747 text = m.text()
1744 1748 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1745 1749 n = self._revlog.node(rev)
1746 1750 arraytext = bytearray(text)
1747 1751
1748 1752 if arraytext is not None:
1749 1753 self.fulltextcache[n] = arraytext
1750 1754
1751 1755 return n
1752 1756
1753 1757 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1754 1758 # If the manifest is unchanged compared to one parent,
1755 1759 # don't write a new revision
1756 1760 if self.tree != b'' and (
1757 1761 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1758 1762 ):
1759 1763 return m.node()
1760 1764
1761 1765 def writesubtree(subm, subp1, subp2, match):
1762 1766 sublog = self.dirlog(subm.dir())
1763 1767 sublog.add(
1764 1768 subm,
1765 1769 transaction,
1766 1770 link,
1767 1771 subp1,
1768 1772 subp2,
1769 1773 None,
1770 1774 None,
1771 1775 readtree=readtree,
1772 1776 match=match,
1773 1777 )
1774 1778
1775 1779 m.writesubtrees(m1, m2, writesubtree, match)
1776 1780 text = m.dirtext()
1777 1781 n = None
1778 1782 if self.tree != b'':
1779 1783 # Double-check whether contents are unchanged to one parent
1780 1784 if text == m1.dirtext():
1781 1785 n = m1.node()
1782 1786 elif text == m2.dirtext():
1783 1787 n = m2.node()
1784 1788
1785 1789 if not n:
1786 1790 rev = self._revlog.addrevision(
1787 1791 text, transaction, link, m1.node(), m2.node()
1788 1792 )
1789 1793 n = self._revlog.node(rev)
1790 1794
1791 1795 # Save nodeid so parent manifest can calculate its nodeid
1792 1796 m.setnode(n)
1793 1797 return n
1794 1798
1795 1799 def __len__(self):
1796 1800 return len(self._revlog)
1797 1801
1798 1802 def __iter__(self):
1799 1803 return self._revlog.__iter__()
1800 1804
1801 1805 def rev(self, node):
1802 1806 return self._revlog.rev(node)
1803 1807
1804 1808 def node(self, rev):
1805 1809 return self._revlog.node(rev)
1806 1810
1807 1811 def lookup(self, value):
1808 1812 return self._revlog.lookup(value)
1809 1813
1810 1814 def parentrevs(self, rev):
1811 1815 return self._revlog.parentrevs(rev)
1812 1816
1813 1817 def parents(self, node):
1814 1818 return self._revlog.parents(node)
1815 1819
1816 1820 def linkrev(self, rev):
1817 1821 return self._revlog.linkrev(rev)
1818 1822
1819 1823 def checksize(self):
1820 1824 return self._revlog.checksize()
1821 1825
1822 1826 def revision(self, node, _df=None, raw=False):
1823 1827 return self._revlog.revision(node, _df=_df, raw=raw)
1824 1828
1825 1829 def rawdata(self, node, _df=None):
1826 1830 return self._revlog.rawdata(node, _df=_df)
1827 1831
1828 1832 def revdiff(self, rev1, rev2):
1829 1833 return self._revlog.revdiff(rev1, rev2)
1830 1834
1831 1835 def cmp(self, node, text):
1832 1836 return self._revlog.cmp(node, text)
1833 1837
1834 1838 def deltaparent(self, rev):
1835 1839 return self._revlog.deltaparent(rev)
1836 1840
1837 1841 def emitrevisions(
1838 1842 self,
1839 1843 nodes,
1840 1844 nodesorder=None,
1841 1845 revisiondata=False,
1842 1846 assumehaveparentrevisions=False,
1843 1847 deltamode=repository.CG_DELTAMODE_STD,
1844 1848 sidedata_helpers=None,
1845 1849 ):
1846 1850 return self._revlog.emitrevisions(
1847 1851 nodes,
1848 1852 nodesorder=nodesorder,
1849 1853 revisiondata=revisiondata,
1850 1854 assumehaveparentrevisions=assumehaveparentrevisions,
1851 1855 deltamode=deltamode,
1852 1856 sidedata_helpers=sidedata_helpers,
1853 1857 )
1854 1858
1855 1859 def addgroup(
1856 1860 self,
1857 1861 deltas,
1858 1862 linkmapper,
1859 1863 transaction,
1860 1864 alwayscache=False,
1861 1865 addrevisioncb=None,
1862 1866 duplicaterevisioncb=None,
1863 1867 ):
1864 1868 return self._revlog.addgroup(
1865 1869 deltas,
1866 1870 linkmapper,
1867 1871 transaction,
1868 1872 alwayscache=alwayscache,
1869 1873 addrevisioncb=addrevisioncb,
1870 1874 duplicaterevisioncb=duplicaterevisioncb,
1871 1875 )
1872 1876
1873 1877 def rawsize(self, rev):
1874 1878 return self._revlog.rawsize(rev)
1875 1879
1876 1880 def getstrippoint(self, minlink):
1877 1881 return self._revlog.getstrippoint(minlink)
1878 1882
1879 1883 def strip(self, minlink, transaction):
1880 1884 return self._revlog.strip(minlink, transaction)
1881 1885
1882 1886 def files(self):
1883 1887 return self._revlog.files()
1884 1888
1885 1889 def clone(self, tr, destrevlog, **kwargs):
1886 1890 if not isinstance(destrevlog, manifestrevlog):
1887 1891 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1888 1892
1889 1893 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1890 1894
1891 1895 def storageinfo(
1892 1896 self,
1893 1897 exclusivefiles=False,
1894 1898 sharedfiles=False,
1895 1899 revisionscount=False,
1896 1900 trackedsize=False,
1897 1901 storedsize=False,
1898 1902 ):
1899 1903 return self._revlog.storageinfo(
1900 1904 exclusivefiles=exclusivefiles,
1901 1905 sharedfiles=sharedfiles,
1902 1906 revisionscount=revisionscount,
1903 1907 trackedsize=trackedsize,
1904 1908 storedsize=storedsize,
1905 1909 )
1906 1910
1907 1911 @property
1908 1912 def indexfile(self):
1909 1913 return self._revlog.indexfile
1910 1914
1911 1915 @indexfile.setter
1912 1916 def indexfile(self, value):
1913 1917 self._revlog.indexfile = value
1914 1918
1915 1919 @property
1916 1920 def opener(self):
1917 1921 return self._revlog.opener
1918 1922
1919 1923 @opener.setter
1920 1924 def opener(self, value):
1921 1925 self._revlog.opener = value
1922 1926
1923 1927
1924 1928 @interfaceutil.implementer(repository.imanifestlog)
1925 1929 class manifestlog(object):
1926 1930 """A collection class representing the collection of manifest snapshots
1927 1931 referenced by commits in the repository.
1928 1932
1929 1933 In this situation, 'manifest' refers to the abstract concept of a snapshot
1930 1934 of the list of files in the given commit. Consumers of the output of this
1931 1935 class do not care about the implementation details of the actual manifests
1932 1936 they receive (i.e. tree or flat or lazily loaded, etc)."""
1933 1937
1934 1938 def __init__(self, opener, repo, rootstore, narrowmatch):
1935 1939 self.nodeconstants = repo.nodeconstants
1936 1940 usetreemanifest = False
1937 1941 cachesize = 4
1938 1942
1939 1943 opts = getattr(opener, 'options', None)
1940 1944 if opts is not None:
1941 1945 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1942 1946 cachesize = opts.get(b'manifestcachesize', cachesize)
1943 1947
1944 1948 self._treemanifests = usetreemanifest
1945 1949
1946 1950 self._rootstore = rootstore
1947 1951 self._rootstore._setupmanifestcachehooks(repo)
1948 1952 self._narrowmatch = narrowmatch
1949 1953
1950 1954 # A cache of the manifestctx or treemanifestctx for each directory
1951 1955 self._dirmancache = {}
1952 1956 self._dirmancache[b''] = util.lrucachedict(cachesize)
1953 1957
1954 1958 self._cachesize = cachesize
1955 1959
1956 1960 def __getitem__(self, node):
1957 1961 """Retrieves the manifest instance for the given node. Throws a
1958 1962 LookupError if not found.
1959 1963 """
1960 1964 return self.get(b'', node)
1961 1965
1962 1966 def get(self, tree, node, verify=True):
1963 1967 """Retrieves the manifest instance for the given node. Throws a
1964 1968 LookupError if not found.
1965 1969
1966 1970 `verify` - if True an exception will be thrown if the node is not in
1967 1971 the revlog
1968 1972 """
1969 1973 if node in self._dirmancache.get(tree, ()):
1970 1974 return self._dirmancache[tree][node]
1971 1975
1972 1976 if not self._narrowmatch.always():
1973 1977 if not self._narrowmatch.visitdir(tree[:-1]):
1974 1978 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1975 1979 if tree:
1976 1980 if self._rootstore._treeondisk:
1977 1981 if verify:
1978 1982 # Side-effect is LookupError is raised if node doesn't
1979 1983 # exist.
1980 1984 self.getstorage(tree).rev(node)
1981 1985
1982 1986 m = treemanifestctx(self, tree, node)
1983 1987 else:
1984 1988 raise error.Abort(
1985 1989 _(
1986 1990 b"cannot ask for manifest directory '%s' in a flat "
1987 1991 b"manifest"
1988 1992 )
1989 1993 % tree
1990 1994 )
1991 1995 else:
1992 1996 if verify:
1993 1997 # Side-effect is LookupError is raised if node doesn't exist.
1994 1998 self._rootstore.rev(node)
1995 1999
1996 2000 if self._treemanifests:
1997 2001 m = treemanifestctx(self, b'', node)
1998 2002 else:
1999 2003 m = manifestctx(self, node)
2000 2004
2001 2005 if node != self.nodeconstants.nullid:
2002 2006 mancache = self._dirmancache.get(tree)
2003 2007 if not mancache:
2004 2008 mancache = util.lrucachedict(self._cachesize)
2005 2009 self._dirmancache[tree] = mancache
2006 2010 mancache[node] = m
2007 2011 return m
2008 2012
2009 2013 def getstorage(self, tree):
2010 2014 return self._rootstore.dirlog(tree)
2011 2015
2012 2016 def clearcaches(self, clear_persisted_data=False):
2013 2017 self._dirmancache.clear()
2014 2018 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2015 2019
2016 2020 def rev(self, node):
2017 2021 return self._rootstore.rev(node)
2018 2022
2019 2023 def update_caches(self, transaction):
2020 2024 return self._rootstore._revlog.update_caches(transaction=transaction)
2021 2025
2022 2026
2023 2027 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2024 2028 class memmanifestctx(object):
2025 2029 def __init__(self, manifestlog):
2026 2030 self._manifestlog = manifestlog
2027 2031 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2028 2032
2029 2033 def _storage(self):
2030 2034 return self._manifestlog.getstorage(b'')
2031 2035
2032 2036 def copy(self):
2033 2037 memmf = memmanifestctx(self._manifestlog)
2034 2038 memmf._manifestdict = self.read().copy()
2035 2039 return memmf
2036 2040
2037 2041 def read(self):
2038 2042 return self._manifestdict
2039 2043
2040 2044 def write(self, transaction, link, p1, p2, added, removed, match=None):
2041 2045 return self._storage().add(
2042 2046 self._manifestdict,
2043 2047 transaction,
2044 2048 link,
2045 2049 p1,
2046 2050 p2,
2047 2051 added,
2048 2052 removed,
2049 2053 match=match,
2050 2054 )
2051 2055
2052 2056
2053 2057 @interfaceutil.implementer(repository.imanifestrevisionstored)
2054 2058 class manifestctx(object):
2055 2059 """A class representing a single revision of a manifest, including its
2056 2060 contents, its parent revs, and its linkrev.
2057 2061 """
2058 2062
2059 2063 def __init__(self, manifestlog, node):
2060 2064 self._manifestlog = manifestlog
2061 2065 self._data = None
2062 2066
2063 2067 self._node = node
2064 2068
2065 2069 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2066 2070 # but let's add it later when something needs it and we can load it
2067 2071 # lazily.
2068 2072 # self.p1, self.p2 = store.parents(node)
2069 2073 # rev = store.rev(node)
2070 2074 # self.linkrev = store.linkrev(rev)
2071 2075
2072 2076 def _storage(self):
2073 2077 return self._manifestlog.getstorage(b'')
2074 2078
2075 2079 def node(self):
2076 2080 return self._node
2077 2081
2078 2082 def copy(self):
2079 2083 memmf = memmanifestctx(self._manifestlog)
2080 2084 memmf._manifestdict = self.read().copy()
2081 2085 return memmf
2082 2086
2083 2087 @propertycache
2084 2088 def parents(self):
2085 2089 return self._storage().parents(self._node)
2086 2090
2087 2091 def read(self):
2088 2092 if self._data is None:
2089 2093 nc = self._manifestlog.nodeconstants
2090 2094 if self._node == nc.nullid:
2091 2095 self._data = manifestdict(nc.nodelen)
2092 2096 else:
2093 2097 store = self._storage()
2094 2098 if self._node in store.fulltextcache:
2095 2099 text = pycompat.bytestr(store.fulltextcache[self._node])
2096 2100 else:
2097 2101 text = store.revision(self._node)
2098 2102 arraytext = bytearray(text)
2099 2103 store.fulltextcache[self._node] = arraytext
2100 2104 self._data = manifestdict(nc.nodelen, text)
2101 2105 return self._data
2102 2106
2103 2107 def readfast(self, shallow=False):
2104 2108 """Calls either readdelta or read, based on which would be less work.
2105 2109 readdelta is called if the delta is against the p1, and therefore can be
2106 2110 read quickly.
2107 2111
2108 2112 If `shallow` is True, nothing changes since this is a flat manifest.
2109 2113 """
2110 2114 store = self._storage()
2111 2115 r = store.rev(self._node)
2112 2116 deltaparent = store.deltaparent(r)
2113 2117 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2114 2118 return self.readdelta()
2115 2119 return self.read()
2116 2120
2117 2121 def readdelta(self, shallow=False):
2118 2122 """Returns a manifest containing just the entries that are present
2119 2123 in this manifest, but not in its p1 manifest. This is efficient to read
2120 2124 if the revlog delta is already p1.
2121 2125
2122 2126 Changing the value of `shallow` has no effect on flat manifests.
2123 2127 """
2124 2128 store = self._storage()
2125 2129 r = store.rev(self._node)
2126 2130 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2127 2131 return manifestdict(store.nodeconstants.nodelen, d)
2128 2132
2129 2133 def find(self, key):
2130 2134 return self.read().find(key)
2131 2135
2132 2136
2133 2137 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2134 2138 class memtreemanifestctx(object):
2135 2139 def __init__(self, manifestlog, dir=b''):
2136 2140 self._manifestlog = manifestlog
2137 2141 self._dir = dir
2138 2142 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2139 2143
2140 2144 def _storage(self):
2141 2145 return self._manifestlog.getstorage(b'')
2142 2146
2143 2147 def copy(self):
2144 2148 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2145 2149 memmf._treemanifest = self._treemanifest.copy()
2146 2150 return memmf
2147 2151
2148 2152 def read(self):
2149 2153 return self._treemanifest
2150 2154
2151 2155 def write(self, transaction, link, p1, p2, added, removed, match=None):
2152 2156 def readtree(dir, node):
2153 2157 return self._manifestlog.get(dir, node).read()
2154 2158
2155 2159 return self._storage().add(
2156 2160 self._treemanifest,
2157 2161 transaction,
2158 2162 link,
2159 2163 p1,
2160 2164 p2,
2161 2165 added,
2162 2166 removed,
2163 2167 readtree=readtree,
2164 2168 match=match,
2165 2169 )
2166 2170
2167 2171
2168 2172 @interfaceutil.implementer(repository.imanifestrevisionstored)
2169 2173 class treemanifestctx(object):
2170 2174 def __init__(self, manifestlog, dir, node):
2171 2175 self._manifestlog = manifestlog
2172 2176 self._dir = dir
2173 2177 self._data = None
2174 2178
2175 2179 self._node = node
2176 2180
2177 2181 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2178 2182 # we can instantiate treemanifestctx objects for directories we don't
2179 2183 # have on disk.
2180 2184 # self.p1, self.p2 = store.parents(node)
2181 2185 # rev = store.rev(node)
2182 2186 # self.linkrev = store.linkrev(rev)
2183 2187
2184 2188 def _storage(self):
2185 2189 narrowmatch = self._manifestlog._narrowmatch
2186 2190 if not narrowmatch.always():
2187 2191 if not narrowmatch.visitdir(self._dir[:-1]):
2188 2192 return excludedmanifestrevlog(
2189 2193 self._manifestlog.nodeconstants, self._dir
2190 2194 )
2191 2195 return self._manifestlog.getstorage(self._dir)
2192 2196
2193 2197 def read(self):
2194 2198 if self._data is None:
2195 2199 store = self._storage()
2196 2200 if self._node == self._manifestlog.nodeconstants.nullid:
2197 2201 self._data = treemanifest(self._manifestlog.nodeconstants)
2198 2202 # TODO accessing non-public API
2199 2203 elif store._treeondisk:
2200 2204 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2201 2205
2202 2206 def gettext():
2203 2207 return store.revision(self._node)
2204 2208
2205 2209 def readsubtree(dir, subm):
2206 2210 # Set verify to False since we need to be able to create
2207 2211 # subtrees for trees that don't exist on disk.
2208 2212 return self._manifestlog.get(dir, subm, verify=False).read()
2209 2213
2210 2214 m.read(gettext, readsubtree)
2211 2215 m.setnode(self._node)
2212 2216 self._data = m
2213 2217 else:
2214 2218 if self._node in store.fulltextcache:
2215 2219 text = pycompat.bytestr(store.fulltextcache[self._node])
2216 2220 else:
2217 2221 text = store.revision(self._node)
2218 2222 arraytext = bytearray(text)
2219 2223 store.fulltextcache[self._node] = arraytext
2220 2224 self._data = treemanifest(
2221 2225 self._manifestlog.nodeconstants, dir=self._dir, text=text
2222 2226 )
2223 2227
2224 2228 return self._data
2225 2229
2226 2230 def node(self):
2227 2231 return self._node
2228 2232
2229 2233 def copy(self):
2230 2234 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2231 2235 memmf._treemanifest = self.read().copy()
2232 2236 return memmf
2233 2237
2234 2238 @propertycache
2235 2239 def parents(self):
2236 2240 return self._storage().parents(self._node)
2237 2241
2238 2242 def readdelta(self, shallow=False):
2239 2243 """Returns a manifest containing just the entries that are present
2240 2244 in this manifest, but not in its p1 manifest. This is efficient to read
2241 2245 if the revlog delta is already p1.
2242 2246
2243 2247 If `shallow` is True, this will read the delta for this directory,
2244 2248 without recursively reading subdirectory manifests. Instead, any
2245 2249 subdirectory entry will be reported as it appears in the manifest, i.e.
2246 2250 the subdirectory will be reported among files and distinguished only by
2247 2251 its 't' flag.
2248 2252 """
2249 2253 store = self._storage()
2250 2254 if shallow:
2251 2255 r = store.rev(self._node)
2252 2256 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2253 2257 return manifestdict(store.nodeconstants.nodelen, d)
2254 2258 else:
2255 2259 # Need to perform a slow delta
2256 2260 r0 = store.deltaparent(store.rev(self._node))
2257 2261 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2258 2262 m1 = self.read()
2259 2263 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2260 2264 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2261 2265 if n1:
2262 2266 md[f] = n1
2263 2267 if fl1:
2264 2268 md.setflag(f, fl1)
2265 2269 return md
2266 2270
2267 2271 def readfast(self, shallow=False):
2268 2272 """Calls either readdelta or read, based on which would be less work.
2269 2273 readdelta is called if the delta is against the p1, and therefore can be
2270 2274 read quickly.
2271 2275
2272 2276 If `shallow` is True, it only returns the entries from this manifest,
2273 2277 and not any submanifests.
2274 2278 """
2275 2279 store = self._storage()
2276 2280 r = store.rev(self._node)
2277 2281 deltaparent = store.deltaparent(r)
2278 2282 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2279 2283 return self.readdelta(shallow=shallow)
2280 2284
2281 2285 if shallow:
2282 2286 return manifestdict(
2283 2287 store.nodeconstants.nodelen, store.revision(self._node)
2284 2288 )
2285 2289 else:
2286 2290 return self.read()
2287 2291
2288 2292 def find(self, key):
2289 2293 return self.read().find(key)
2290 2294
2291 2295
2292 2296 class excludeddir(treemanifest):
2293 2297 """Stand-in for a directory that is excluded from the repository.
2294 2298
2295 2299 With narrowing active on a repository that uses treemanifests,
2296 2300 some of the directory revlogs will be excluded from the resulting
2297 2301 clone. This is a huge storage win for clients, but means we need
2298 2302 some sort of pseudo-manifest to surface to internals so we can
2299 2303 detect a merge conflict outside the narrowspec. That's what this
2300 2304 class is: it stands in for a directory whose node is known, but
2301 2305 whose contents are unknown.
2302 2306 """
2303 2307
2304 2308 def __init__(self, nodeconstants, dir, node):
2305 2309 super(excludeddir, self).__init__(nodeconstants, dir)
2306 2310 self._node = node
2307 2311 # Add an empty file, which will be included by iterators and such,
2308 2312 # appearing as the directory itself (i.e. something like "dir/")
2309 2313 self._files[b''] = node
2310 2314 self._flags[b''] = b't'
2311 2315
2312 2316 # Manifests outside the narrowspec should never be modified, so avoid
2313 2317 # copying. This makes a noticeable difference when there are very many
2314 2318 # directories outside the narrowspec. Also, it makes sense for the copy to
2315 2319 # be of the same type as the original, which would not happen with the
2316 2320 # super type's copy().
2317 2321 def copy(self):
2318 2322 return self
2319 2323
2320 2324
2321 2325 class excludeddirmanifestctx(treemanifestctx):
2322 2326 """context wrapper for excludeddir - see that docstring for rationale"""
2323 2327
2324 2328 def __init__(self, nodeconstants, dir, node):
2325 2329 self.nodeconstants = nodeconstants
2326 2330 self._dir = dir
2327 2331 self._node = node
2328 2332
2329 2333 def read(self):
2330 2334 return excludeddir(self.nodeconstants, self._dir, self._node)
2331 2335
2332 2336 def readfast(self, shallow=False):
2333 2337 # special version of readfast since we don't have underlying storage
2334 2338 return self.read()
2335 2339
2336 2340 def write(self, *args):
2337 2341 raise error.ProgrammingError(
2338 2342 b'attempt to write manifest from excluded dir %s' % self._dir
2339 2343 )
2340 2344
2341 2345
2342 2346 class excludedmanifestrevlog(manifestrevlog):
2343 2347 """Stand-in for excluded treemanifest revlogs.
2344 2348
2345 2349 When narrowing is active on a treemanifest repository, we'll have
2346 2350 references to directories we can't see due to the revlog being
2347 2351 skipped. This class exists to conform to the manifestrevlog
2348 2352 interface for those directories and proactively prevent writes to
2349 2353 outside the narrowspec.
2350 2354 """
2351 2355
2352 2356 def __init__(self, nodeconstants, dir):
2353 2357 self.nodeconstants = nodeconstants
2354 2358 self._dir = dir
2355 2359
2356 2360 def __len__(self):
2357 2361 raise error.ProgrammingError(
2358 2362 b'attempt to get length of excluded dir %s' % self._dir
2359 2363 )
2360 2364
2361 2365 def rev(self, node):
2362 2366 raise error.ProgrammingError(
2363 2367 b'attempt to get rev from excluded dir %s' % self._dir
2364 2368 )
2365 2369
2366 2370 def linkrev(self, node):
2367 2371 raise error.ProgrammingError(
2368 2372 b'attempt to get linkrev from excluded dir %s' % self._dir
2369 2373 )
2370 2374
2371 2375 def node(self, rev):
2372 2376 raise error.ProgrammingError(
2373 2377 b'attempt to get node from excluded dir %s' % self._dir
2374 2378 )
2375 2379
2376 2380 def add(self, *args, **kwargs):
2377 2381 # We should never write entries in dirlogs outside the narrow clone.
2378 2382 # However, the method still gets called from writesubtree() in
2379 2383 # _addtree(), so we need to handle it. We should possibly make that
2380 2384 # avoid calling add() with a clean manifest (_dirty is always False
2381 2385 # in excludeddir instances).
2382 2386 pass
@@ -1,3121 +1,3138 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 ALL_KINDS,
37 38 FLAG_GENERALDELTA,
38 39 FLAG_INLINE_DATA,
39 40 INDEX_HEADER,
40 41 REVLOGV0,
41 42 REVLOGV1,
42 43 REVLOGV1_FLAGS,
43 44 REVLOGV2,
44 45 REVLOGV2_FLAGS,
45 46 REVLOG_DEFAULT_FLAGS,
46 47 REVLOG_DEFAULT_FORMAT,
47 48 REVLOG_DEFAULT_VERSION,
48 49 )
49 50 from .revlogutils.flagutil import (
50 51 REVIDX_DEFAULT_FLAGS,
51 52 REVIDX_ELLIPSIS,
52 53 REVIDX_EXTSTORED,
53 54 REVIDX_FLAGS_ORDER,
54 55 REVIDX_HASCOPIESINFO,
55 56 REVIDX_ISCENSORED,
56 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 58 REVIDX_SIDEDATA,
58 59 )
59 60 from .thirdparty import attr
60 61 from . import (
61 62 ancestor,
62 63 dagop,
63 64 error,
64 65 mdiff,
65 66 policy,
66 67 pycompat,
67 68 templatefilters,
68 69 util,
69 70 )
70 71 from .interfaces import (
71 72 repository,
72 73 util as interfaceutil,
73 74 )
74 75 from .revlogutils import (
75 76 deltas as deltautil,
76 77 flagutil,
77 78 nodemap as nodemaputil,
78 79 revlogv0,
79 80 sidedata as sidedatautil,
80 81 )
81 82 from .utils import (
82 83 storageutil,
83 84 stringutil,
84 85 )
85 86
86 87 # blanked usage of all the name to prevent pyflakes constraints
87 88 # We need these name available in the module for extensions.
88 89 REVLOGV0
89 90 REVLOGV1
90 91 REVLOGV2
91 92 FLAG_INLINE_DATA
92 93 FLAG_GENERALDELTA
93 94 REVLOG_DEFAULT_FLAGS
94 95 REVLOG_DEFAULT_FORMAT
95 96 REVLOG_DEFAULT_VERSION
96 97 REVLOGV1_FLAGS
97 98 REVLOGV2_FLAGS
98 99 REVIDX_ISCENSORED
99 100 REVIDX_ELLIPSIS
100 101 REVIDX_SIDEDATA
101 102 REVIDX_HASCOPIESINFO
102 103 REVIDX_EXTSTORED
103 104 REVIDX_DEFAULT_FLAGS
104 105 REVIDX_FLAGS_ORDER
105 106 REVIDX_RAWTEXT_CHANGING_FLAGS
106 107
107 108 parsers = policy.importmod('parsers')
108 109 rustancestor = policy.importrust('ancestor')
109 110 rustdagop = policy.importrust('dagop')
110 111 rustrevlog = policy.importrust('revlog')
111 112
112 113 # Aliased for performance.
113 114 _zlibdecompress = zlib.decompress
114 115
115 116 # max size of revlog with inline data
116 117 _maxinline = 131072
117 118 _chunksize = 1048576
118 119
119 120 # Flag processors for REVIDX_ELLIPSIS.
120 121 def ellipsisreadprocessor(rl, text):
121 122 return text, False
122 123
123 124
124 125 def ellipsiswriteprocessor(rl, text):
125 126 return text, False
126 127
127 128
128 129 def ellipsisrawprocessor(rl, text):
129 130 return False
130 131
131 132
132 133 ellipsisprocessor = (
133 134 ellipsisreadprocessor,
134 135 ellipsiswriteprocessor,
135 136 ellipsisrawprocessor,
136 137 )
137 138
138 139
139 140 def offset_type(offset, type):
140 141 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 142 raise ValueError(b'unknown revlog index flags')
142 143 return int(int(offset) << 16 | type)
143 144
144 145
145 146 def _verify_revision(rl, skipflags, state, node):
146 147 """Verify the integrity of the given revlog ``node`` while providing a hook
147 148 point for extensions to influence the operation."""
148 149 if skipflags:
149 150 state[b'skipread'].add(node)
150 151 else:
151 152 # Side-effect: read content and verify hash.
152 153 rl.revision(node)
153 154
154 155
155 156 # True if a fast implementation for persistent-nodemap is available
156 157 #
157 158 # We also consider we have a "fast" implementation in "pure" python because
158 159 # people using pure don't really have performance consideration (and a
159 160 # wheelbarrow of other slowness source)
160 161 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 162 parsers, 'BaseIndexObject'
162 163 )
163 164
164 165
165 166 @attr.s(slots=True, frozen=True)
166 167 class _revisioninfo(object):
167 168 """Information about a revision that allows building its fulltext
168 169 node: expected hash of the revision
169 170 p1, p2: parent revs of the revision
170 171 btext: built text cache consisting of a one-element list
171 172 cachedelta: (baserev, uncompressed_delta) or None
172 173 flags: flags associated to the revision storage
173 174
174 175 One of btext[0] or cachedelta must be set.
175 176 """
176 177
177 178 node = attr.ib()
178 179 p1 = attr.ib()
179 180 p2 = attr.ib()
180 181 btext = attr.ib()
181 182 textlen = attr.ib()
182 183 cachedelta = attr.ib()
183 184 flags = attr.ib()
184 185
185 186
186 187 @interfaceutil.implementer(repository.irevisiondelta)
187 188 @attr.s(slots=True)
188 189 class revlogrevisiondelta(object):
189 190 node = attr.ib()
190 191 p1node = attr.ib()
191 192 p2node = attr.ib()
192 193 basenode = attr.ib()
193 194 flags = attr.ib()
194 195 baserevisionsize = attr.ib()
195 196 revision = attr.ib()
196 197 delta = attr.ib()
197 198 sidedata = attr.ib()
198 199 linknode = attr.ib(default=None)
199 200
200 201
201 202 @interfaceutil.implementer(repository.iverifyproblem)
202 203 @attr.s(frozen=True)
203 204 class revlogproblem(object):
204 205 warning = attr.ib(default=None)
205 206 error = attr.ib(default=None)
206 207 node = attr.ib(default=None)
207 208
208 209
209 210 def parse_index_v1(data, inline):
210 211 # call the C implementation to parse the index data
211 212 index, cache = parsers.parse_index2(data, inline)
212 213 return index, cache
213 214
214 215
215 216 def parse_index_v2(data, inline):
216 217 # call the C implementation to parse the index data
217 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 219 return index, cache
219 220
220 221
221 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 223
223 224 def parse_index_v1_nodemap(data, inline):
224 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 226 return index, cache
226 227
227 228
228 229 else:
229 230 parse_index_v1_nodemap = None
230 231
231 232
232 233 def parse_index_v1_mixed(data, inline):
233 234 index, cache = parse_index_v1(data, inline)
234 235 return rustrevlog.MixedIndex(index), cache
235 236
236 237
237 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 239 # signed integer)
239 240 _maxentrysize = 0x7FFFFFFF
240 241
241 242
242 243 class revlog(object):
243 244 """
244 245 the underlying revision storage object
245 246
246 247 A revlog consists of two parts, an index and the revision data.
247 248
248 249 The index is a file with a fixed record size containing
249 250 information on each revision, including its nodeid (hash), the
250 251 nodeids of its parents, the position and offset of its data within
251 252 the data file, and the revision it's based on. Finally, each entry
252 253 contains a linkrev entry that can serve as a pointer to external
253 254 data.
254 255
255 256 The revision data itself is a linear collection of data chunks.
256 257 Each chunk represents a revision and is usually represented as a
257 258 delta against the previous chunk. To bound lookup time, runs of
258 259 deltas are limited to about 2 times the length of the original
259 260 version data. This makes retrieval of a version proportional to
260 261 its size, or O(1) relative to the number of revisions.
261 262
262 263 Both pieces of the revlog are written to in an append-only
263 264 fashion, which means we never need to rewrite a file to insert or
264 265 remove data, and can use some simple techniques to avoid the need
265 266 for locking while reading.
266 267
267 268 If checkambig, indexfile is opened with checkambig=True at
268 269 writing, to avoid file stat ambiguity.
269 270
270 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 272 index will be mmapped rather than read if it is larger than the
272 273 configured threshold.
273 274
274 275 If censorable is True, the revlog can have censored revisions.
275 276
276 277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 278 compression for the data content.
278 279
279 280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 281 file handle, a filename, and an expected position. It should check whether
281 282 the current position in the file handle is valid, and log/warn/fail (by
282 283 raising).
283 284 """
284 285
285 286 _flagserrorclass = error.RevlogError
286 287
287 288 def __init__(
288 289 self,
289 290 opener,
290 indexfile,
291 target,
292 indexfile=None,
291 293 datafile=None,
292 294 checkambig=False,
293 295 mmaplargeindex=False,
294 296 censorable=False,
295 297 upperboundcomp=None,
296 298 persistentnodemap=False,
297 299 concurrencychecker=None,
298 300 ):
299 301 """
300 302 create a revlog object
301 303
302 304 opener is a function that abstracts the file opening operation
303 305 and can be used to implement COW semantics or the like.
304 306
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
305 313 """
306 314 self.upperboundcomp = upperboundcomp
307 315 self.indexfile = indexfile
308 316 self.datafile = datafile or (indexfile[:-2] + b".d")
309 317 self.nodemap_file = None
310 318 if persistentnodemap:
311 319 self.nodemap_file = nodemaputil.get_nodemap_file(
312 320 opener, self.indexfile
313 321 )
314 322
315 323 self.opener = opener
324 assert target[0] in ALL_KINDS
325 assert len(target) == 2
326 self.target = target
316 327 # When True, indexfile is opened with checkambig=True at writing, to
317 328 # avoid file stat ambiguity.
318 329 self._checkambig = checkambig
319 330 self._mmaplargeindex = mmaplargeindex
320 331 self._censorable = censorable
321 332 # 3-tuple of (node, rev, text) for a raw revision.
322 333 self._revisioncache = None
323 334 # Maps rev to chain base rev.
324 335 self._chainbasecache = util.lrucachedict(100)
325 336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
326 337 self._chunkcache = (0, b'')
327 338 # How much data to read and cache into the raw revlog data cache.
328 339 self._chunkcachesize = 65536
329 340 self._maxchainlen = None
330 341 self._deltabothparents = True
331 342 self.index = None
332 343 self._nodemap_docket = None
333 344 # Mapping of partial identifiers to full nodes.
334 345 self._pcache = {}
335 346 # Mapping of revision integer to full node.
336 347 self._compengine = b'zlib'
337 348 self._compengineopts = {}
338 349 self._maxdeltachainspan = -1
339 350 self._withsparseread = False
340 351 self._sparserevlog = False
341 352 self._srdensitythreshold = 0.50
342 353 self._srmingapsize = 262144
343 354
344 355 # Make copy of flag processors so each revlog instance can support
345 356 # custom flags.
346 357 self._flagprocessors = dict(flagutil.flagprocessors)
347 358
348 359 # 2-tuple of file handles being used for active writing.
349 360 self._writinghandles = None
350 361
351 362 self._loadindex()
352 363
353 364 self._concurrencychecker = concurrencychecker
354 365
355 366 def _loadindex(self):
356 367 mmapindexthreshold = None
357 368 opts = self.opener.options
358 369
359 370 if b'revlogv2' in opts:
360 371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
361 372 elif b'revlogv1' in opts:
362 373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
363 374 if b'generaldelta' in opts:
364 375 newversionflags |= FLAG_GENERALDELTA
365 376 elif b'revlogv0' in self.opener.options:
366 377 newversionflags = REVLOGV0
367 378 else:
368 379 newversionflags = REVLOG_DEFAULT_VERSION
369 380
370 381 if b'chunkcachesize' in opts:
371 382 self._chunkcachesize = opts[b'chunkcachesize']
372 383 if b'maxchainlen' in opts:
373 384 self._maxchainlen = opts[b'maxchainlen']
374 385 if b'deltabothparents' in opts:
375 386 self._deltabothparents = opts[b'deltabothparents']
376 387 self._lazydelta = bool(opts.get(b'lazydelta', True))
377 388 self._lazydeltabase = False
378 389 if self._lazydelta:
379 390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
380 391 if b'compengine' in opts:
381 392 self._compengine = opts[b'compengine']
382 393 if b'zlib.level' in opts:
383 394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
384 395 if b'zstd.level' in opts:
385 396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
386 397 if b'maxdeltachainspan' in opts:
387 398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
388 399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
389 400 mmapindexthreshold = opts[b'mmapindexthreshold']
390 401 self.hassidedata = bool(opts.get(b'side-data', False))
391 402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
392 403 withsparseread = bool(opts.get(b'with-sparse-read', False))
393 404 # sparse-revlog forces sparse-read
394 405 self._withsparseread = self._sparserevlog or withsparseread
395 406 if b'sparse-read-density-threshold' in opts:
396 407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
397 408 if b'sparse-read-min-gap-size' in opts:
398 409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
399 410 if opts.get(b'enableellipsis'):
400 411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401 412
402 413 # revlog v0 doesn't have flag processors
403 414 for flag, processor in pycompat.iteritems(
404 415 opts.get(b'flagprocessors', {})
405 416 ):
406 417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
407 418
408 419 if self._chunkcachesize <= 0:
409 420 raise error.RevlogError(
410 421 _(b'revlog chunk cache size %r is not greater than 0')
411 422 % self._chunkcachesize
412 423 )
413 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
414 425 raise error.RevlogError(
415 426 _(b'revlog chunk cache size %r is not a power of 2')
416 427 % self._chunkcachesize
417 428 )
418 429
419 430 indexdata = b''
420 431 self._initempty = True
421 432 try:
422 433 with self._indexfp() as f:
423 434 if (
424 435 mmapindexthreshold is not None
425 436 and self.opener.fstat(f).st_size >= mmapindexthreshold
426 437 ):
427 438 # TODO: should .close() to release resources without
428 439 # relying on Python GC
429 440 indexdata = util.buffer(util.mmapread(f))
430 441 else:
431 442 indexdata = f.read()
432 443 if len(indexdata) > 0:
433 444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
434 445 self._initempty = False
435 446 else:
436 447 versionflags = newversionflags
437 448 except IOError as inst:
438 449 if inst.errno != errno.ENOENT:
439 450 raise
440 451
441 452 versionflags = newversionflags
442 453
443 454 self.version = versionflags
444 455
445 456 flags = versionflags & ~0xFFFF
446 457 fmt = versionflags & 0xFFFF
447 458
448 459 if fmt == REVLOGV0:
449 460 if flags:
450 461 raise error.RevlogError(
451 462 _(b'unknown flags (%#04x) in version %d revlog %s')
452 463 % (flags >> 16, fmt, self.indexfile)
453 464 )
454 465
455 466 self._inline = False
456 467 self._generaldelta = False
457 468
458 469 elif fmt == REVLOGV1:
459 470 if flags & ~REVLOGV1_FLAGS:
460 471 raise error.RevlogError(
461 472 _(b'unknown flags (%#04x) in version %d revlog %s')
462 473 % (flags >> 16, fmt, self.indexfile)
463 474 )
464 475
465 476 self._inline = versionflags & FLAG_INLINE_DATA
466 477 self._generaldelta = versionflags & FLAG_GENERALDELTA
467 478
468 479 elif fmt == REVLOGV2:
469 480 if flags & ~REVLOGV2_FLAGS:
470 481 raise error.RevlogError(
471 482 _(b'unknown flags (%#04x) in version %d revlog %s')
472 483 % (flags >> 16, fmt, self.indexfile)
473 484 )
474 485
475 486 # There is a bug in the transaction handling when going from an
476 487 # inline revlog to a separate index and data file. Turn it off until
477 488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
478 489 # See issue6485
479 490 self._inline = False
480 491 # generaldelta implied by version 2 revlogs.
481 492 self._generaldelta = True
482 493
483 494 else:
484 495 raise error.RevlogError(
485 496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
486 497 )
487 498
488 499 self.nodeconstants = sha1nodeconstants
489 500 self.nullid = self.nodeconstants.nullid
490 501
491 502 # sparse-revlog can't be on without general-delta (issue6056)
492 503 if not self._generaldelta:
493 504 self._sparserevlog = False
494 505
495 506 self._storedeltachains = True
496 507
497 508 devel_nodemap = (
498 509 self.nodemap_file
499 510 and opts.get(b'devel-force-nodemap', False)
500 511 and parse_index_v1_nodemap is not None
501 512 )
502 513
503 514 use_rust_index = False
504 515 if rustrevlog is not None:
505 516 if self.nodemap_file is not None:
506 517 use_rust_index = True
507 518 else:
508 519 use_rust_index = self.opener.options.get(b'rust.index')
509 520
510 521 self._parse_index = parse_index_v1
511 522 if self.version == REVLOGV0:
512 523 self._parse_index = revlogv0.parse_index_v0
513 524 elif fmt == REVLOGV2:
514 525 self._parse_index = parse_index_v2
515 526 elif devel_nodemap:
516 527 self._parse_index = parse_index_v1_nodemap
517 528 elif use_rust_index:
518 529 self._parse_index = parse_index_v1_mixed
519 530 try:
520 531 d = self._parse_index(indexdata, self._inline)
521 532 index, _chunkcache = d
522 533 use_nodemap = (
523 534 not self._inline
524 535 and self.nodemap_file is not None
525 536 and util.safehasattr(index, 'update_nodemap_data')
526 537 )
527 538 if use_nodemap:
528 539 nodemap_data = nodemaputil.persisted_data(self)
529 540 if nodemap_data is not None:
530 541 docket = nodemap_data[0]
531 542 if (
532 543 len(d[0]) > docket.tip_rev
533 544 and d[0][docket.tip_rev][7] == docket.tip_node
534 545 ):
535 546 # no changelog tampering
536 547 self._nodemap_docket = docket
537 548 index.update_nodemap_data(*nodemap_data)
538 549 except (ValueError, IndexError):
539 550 raise error.RevlogError(
540 551 _(b"index %s is corrupted") % self.indexfile
541 552 )
542 553 self.index, self._chunkcache = d
543 554 if not self._chunkcache:
544 555 self._chunkclear()
545 556 # revnum -> (chain-length, sum-delta-length)
546 557 self._chaininfocache = util.lrucachedict(500)
547 558 # revlog header -> revlog compressor
548 559 self._decompressors = {}
549 560
550 561 @util.propertycache
551 562 def _compressor(self):
552 563 engine = util.compengines[self._compengine]
553 564 return engine.revlogcompressor(self._compengineopts)
554 565
555 566 def _indexfp(self, mode=b'r'):
556 567 """file object for the revlog's index file"""
557 568 args = {'mode': mode}
558 569 if mode != b'r':
559 570 args['checkambig'] = self._checkambig
560 571 if mode == b'w':
561 572 args['atomictemp'] = True
562 573 return self.opener(self.indexfile, **args)
563 574
564 575 def _datafp(self, mode=b'r'):
565 576 """file object for the revlog's data file"""
566 577 return self.opener(self.datafile, mode=mode)
567 578
568 579 @contextlib.contextmanager
569 580 def _datareadfp(self, existingfp=None):
570 581 """file object suitable to read data"""
571 582 # Use explicit file handle, if given.
572 583 if existingfp is not None:
573 584 yield existingfp
574 585
575 586 # Use a file handle being actively used for writes, if available.
576 587 # There is some danger to doing this because reads will seek the
577 588 # file. However, _writeentry() performs a SEEK_END before all writes,
578 589 # so we should be safe.
579 590 elif self._writinghandles:
580 591 if self._inline:
581 592 yield self._writinghandles[0]
582 593 else:
583 594 yield self._writinghandles[1]
584 595
585 596 # Otherwise open a new file handle.
586 597 else:
587 598 if self._inline:
588 599 func = self._indexfp
589 600 else:
590 601 func = self._datafp
591 602 with func() as fp:
592 603 yield fp
593 604
594 605 def tiprev(self):
595 606 return len(self.index) - 1
596 607
597 608 def tip(self):
598 609 return self.node(self.tiprev())
599 610
600 611 def __contains__(self, rev):
601 612 return 0 <= rev < len(self)
602 613
603 614 def __len__(self):
604 615 return len(self.index)
605 616
606 617 def __iter__(self):
607 618 return iter(pycompat.xrange(len(self)))
608 619
609 620 def revs(self, start=0, stop=None):
610 621 """iterate over all rev in this revlog (from start to stop)"""
611 622 return storageutil.iterrevs(len(self), start=start, stop=stop)
612 623
613 624 @property
614 625 def nodemap(self):
615 626 msg = (
616 627 b"revlog.nodemap is deprecated, "
617 628 b"use revlog.index.[has_node|rev|get_rev]"
618 629 )
619 630 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
620 631 return self.index.nodemap
621 632
622 633 @property
623 634 def _nodecache(self):
624 635 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
625 636 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
626 637 return self.index.nodemap
627 638
628 639 def hasnode(self, node):
629 640 try:
630 641 self.rev(node)
631 642 return True
632 643 except KeyError:
633 644 return False
634 645
635 646 def candelta(self, baserev, rev):
636 647 """whether two revisions (baserev, rev) can be delta-ed or not"""
637 648 # Disable delta if either rev requires a content-changing flag
638 649 # processor (ex. LFS). This is because such flag processor can alter
639 650 # the rawtext content that the delta will be based on, and two clients
640 651 # could have a same revlog node with different flags (i.e. different
641 652 # rawtext contents) and the delta could be incompatible.
642 653 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
643 654 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
644 655 ):
645 656 return False
646 657 return True
647 658
648 659 def update_caches(self, transaction):
649 660 if self.nodemap_file is not None:
650 661 if transaction is None:
651 662 nodemaputil.update_persistent_nodemap(self)
652 663 else:
653 664 nodemaputil.setup_persistent_nodemap(transaction, self)
654 665
655 666 def clearcaches(self):
656 667 self._revisioncache = None
657 668 self._chainbasecache.clear()
658 669 self._chunkcache = (0, b'')
659 670 self._pcache = {}
660 671 self._nodemap_docket = None
661 672 self.index.clearcaches()
662 673 # The python code is the one responsible for validating the docket, we
663 674 # end up having to refresh it here.
664 675 use_nodemap = (
665 676 not self._inline
666 677 and self.nodemap_file is not None
667 678 and util.safehasattr(self.index, 'update_nodemap_data')
668 679 )
669 680 if use_nodemap:
670 681 nodemap_data = nodemaputil.persisted_data(self)
671 682 if nodemap_data is not None:
672 683 self._nodemap_docket = nodemap_data[0]
673 684 self.index.update_nodemap_data(*nodemap_data)
674 685
675 686 def rev(self, node):
676 687 try:
677 688 return self.index.rev(node)
678 689 except TypeError:
679 690 raise
680 691 except error.RevlogError:
681 692 # parsers.c radix tree lookup failed
682 693 if (
683 694 node == self.nodeconstants.wdirid
684 695 or node in self.nodeconstants.wdirfilenodeids
685 696 ):
686 697 raise error.WdirUnsupported
687 698 raise error.LookupError(node, self.indexfile, _(b'no node'))
688 699
689 700 # Accessors for index entries.
690 701
691 702 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
692 703 # are flags.
693 704 def start(self, rev):
694 705 return int(self.index[rev][0] >> 16)
695 706
696 707 def flags(self, rev):
697 708 return self.index[rev][0] & 0xFFFF
698 709
699 710 def length(self, rev):
700 711 return self.index[rev][1]
701 712
702 713 def sidedata_length(self, rev):
703 714 if self.version & 0xFFFF != REVLOGV2:
704 715 return 0
705 716 return self.index[rev][9]
706 717
707 718 def rawsize(self, rev):
708 719 """return the length of the uncompressed text for a given revision"""
709 720 l = self.index[rev][2]
710 721 if l >= 0:
711 722 return l
712 723
713 724 t = self.rawdata(rev)
714 725 return len(t)
715 726
716 727 def size(self, rev):
717 728 """length of non-raw text (processed by a "read" flag processor)"""
718 729 # fast path: if no "read" flag processor could change the content,
719 730 # size is rawsize. note: ELLIPSIS is known to not change the content.
720 731 flags = self.flags(rev)
721 732 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
722 733 return self.rawsize(rev)
723 734
724 735 return len(self.revision(rev, raw=False))
725 736
726 737 def chainbase(self, rev):
727 738 base = self._chainbasecache.get(rev)
728 739 if base is not None:
729 740 return base
730 741
731 742 index = self.index
732 743 iterrev = rev
733 744 base = index[iterrev][3]
734 745 while base != iterrev:
735 746 iterrev = base
736 747 base = index[iterrev][3]
737 748
738 749 self._chainbasecache[rev] = base
739 750 return base
740 751
741 752 def linkrev(self, rev):
742 753 return self.index[rev][4]
743 754
744 755 def parentrevs(self, rev):
745 756 try:
746 757 entry = self.index[rev]
747 758 except IndexError:
748 759 if rev == wdirrev:
749 760 raise error.WdirUnsupported
750 761 raise
751 762 if entry[5] == nullrev:
752 763 return entry[6], entry[5]
753 764 else:
754 765 return entry[5], entry[6]
755 766
756 767 # fast parentrevs(rev) where rev isn't filtered
757 768 _uncheckedparentrevs = parentrevs
758 769
759 770 def node(self, rev):
760 771 try:
761 772 return self.index[rev][7]
762 773 except IndexError:
763 774 if rev == wdirrev:
764 775 raise error.WdirUnsupported
765 776 raise
766 777
767 778 # Derived from index values.
768 779
769 780 def end(self, rev):
770 781 return self.start(rev) + self.length(rev)
771 782
772 783 def parents(self, node):
773 784 i = self.index
774 785 d = i[self.rev(node)]
775 786 # inline node() to avoid function call overhead
776 787 if d[5] == self.nullid:
777 788 return i[d[6]][7], i[d[5]][7]
778 789 else:
779 790 return i[d[5]][7], i[d[6]][7]
780 791
781 792 def chainlen(self, rev):
782 793 return self._chaininfo(rev)[0]
783 794
784 795 def _chaininfo(self, rev):
785 796 chaininfocache = self._chaininfocache
786 797 if rev in chaininfocache:
787 798 return chaininfocache[rev]
788 799 index = self.index
789 800 generaldelta = self._generaldelta
790 801 iterrev = rev
791 802 e = index[iterrev]
792 803 clen = 0
793 804 compresseddeltalen = 0
794 805 while iterrev != e[3]:
795 806 clen += 1
796 807 compresseddeltalen += e[1]
797 808 if generaldelta:
798 809 iterrev = e[3]
799 810 else:
800 811 iterrev -= 1
801 812 if iterrev in chaininfocache:
802 813 t = chaininfocache[iterrev]
803 814 clen += t[0]
804 815 compresseddeltalen += t[1]
805 816 break
806 817 e = index[iterrev]
807 818 else:
808 819 # Add text length of base since decompressing that also takes
809 820 # work. For cache hits the length is already included.
810 821 compresseddeltalen += e[1]
811 822 r = (clen, compresseddeltalen)
812 823 chaininfocache[rev] = r
813 824 return r
814 825
815 826 def _deltachain(self, rev, stoprev=None):
816 827 """Obtain the delta chain for a revision.
817 828
818 829 ``stoprev`` specifies a revision to stop at. If not specified, we
819 830 stop at the base of the chain.
820 831
821 832 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
822 833 revs in ascending order and ``stopped`` is a bool indicating whether
823 834 ``stoprev`` was hit.
824 835 """
825 836 # Try C implementation.
826 837 try:
827 838 return self.index.deltachain(rev, stoprev, self._generaldelta)
828 839 except AttributeError:
829 840 pass
830 841
831 842 chain = []
832 843
833 844 # Alias to prevent attribute lookup in tight loop.
834 845 index = self.index
835 846 generaldelta = self._generaldelta
836 847
837 848 iterrev = rev
838 849 e = index[iterrev]
839 850 while iterrev != e[3] and iterrev != stoprev:
840 851 chain.append(iterrev)
841 852 if generaldelta:
842 853 iterrev = e[3]
843 854 else:
844 855 iterrev -= 1
845 856 e = index[iterrev]
846 857
847 858 if iterrev == stoprev:
848 859 stopped = True
849 860 else:
850 861 chain.append(iterrev)
851 862 stopped = False
852 863
853 864 chain.reverse()
854 865 return chain, stopped
855 866
856 867 def ancestors(self, revs, stoprev=0, inclusive=False):
857 868 """Generate the ancestors of 'revs' in reverse revision order.
858 869 Does not generate revs lower than stoprev.
859 870
860 871 See the documentation for ancestor.lazyancestors for more details."""
861 872
862 873 # first, make sure start revisions aren't filtered
863 874 revs = list(revs)
864 875 checkrev = self.node
865 876 for r in revs:
866 877 checkrev(r)
867 878 # and we're sure ancestors aren't filtered as well
868 879
869 880 if rustancestor is not None:
870 881 lazyancestors = rustancestor.LazyAncestors
871 882 arg = self.index
872 883 else:
873 884 lazyancestors = ancestor.lazyancestors
874 885 arg = self._uncheckedparentrevs
875 886 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
876 887
877 888 def descendants(self, revs):
878 889 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
879 890
880 891 def findcommonmissing(self, common=None, heads=None):
881 892 """Return a tuple of the ancestors of common and the ancestors of heads
882 893 that are not ancestors of common. In revset terminology, we return the
883 894 tuple:
884 895
885 896 ::common, (::heads) - (::common)
886 897
887 898 The list is sorted by revision number, meaning it is
888 899 topologically sorted.
889 900
890 901 'heads' and 'common' are both lists of node IDs. If heads is
891 902 not supplied, uses all of the revlog's heads. If common is not
892 903 supplied, uses nullid."""
893 904 if common is None:
894 905 common = [self.nullid]
895 906 if heads is None:
896 907 heads = self.heads()
897 908
898 909 common = [self.rev(n) for n in common]
899 910 heads = [self.rev(n) for n in heads]
900 911
901 912 # we want the ancestors, but inclusive
902 913 class lazyset(object):
903 914 def __init__(self, lazyvalues):
904 915 self.addedvalues = set()
905 916 self.lazyvalues = lazyvalues
906 917
907 918 def __contains__(self, value):
908 919 return value in self.addedvalues or value in self.lazyvalues
909 920
910 921 def __iter__(self):
911 922 added = self.addedvalues
912 923 for r in added:
913 924 yield r
914 925 for r in self.lazyvalues:
915 926 if not r in added:
916 927 yield r
917 928
918 929 def add(self, value):
919 930 self.addedvalues.add(value)
920 931
921 932 def update(self, values):
922 933 self.addedvalues.update(values)
923 934
924 935 has = lazyset(self.ancestors(common))
925 936 has.add(nullrev)
926 937 has.update(common)
927 938
928 939 # take all ancestors from heads that aren't in has
929 940 missing = set()
930 941 visit = collections.deque(r for r in heads if r not in has)
931 942 while visit:
932 943 r = visit.popleft()
933 944 if r in missing:
934 945 continue
935 946 else:
936 947 missing.add(r)
937 948 for p in self.parentrevs(r):
938 949 if p not in has:
939 950 visit.append(p)
940 951 missing = list(missing)
941 952 missing.sort()
942 953 return has, [self.node(miss) for miss in missing]
943 954
944 955 def incrementalmissingrevs(self, common=None):
945 956 """Return an object that can be used to incrementally compute the
946 957 revision numbers of the ancestors of arbitrary sets that are not
947 958 ancestors of common. This is an ancestor.incrementalmissingancestors
948 959 object.
949 960
950 961 'common' is a list of revision numbers. If common is not supplied, uses
951 962 nullrev.
952 963 """
953 964 if common is None:
954 965 common = [nullrev]
955 966
956 967 if rustancestor is not None:
957 968 return rustancestor.MissingAncestors(self.index, common)
958 969 return ancestor.incrementalmissingancestors(self.parentrevs, common)
959 970
960 971 def findmissingrevs(self, common=None, heads=None):
961 972 """Return the revision numbers of the ancestors of heads that
962 973 are not ancestors of common.
963 974
964 975 More specifically, return a list of revision numbers corresponding to
965 976 nodes N such that every N satisfies the following constraints:
966 977
967 978 1. N is an ancestor of some node in 'heads'
968 979 2. N is not an ancestor of any node in 'common'
969 980
970 981 The list is sorted by revision number, meaning it is
971 982 topologically sorted.
972 983
973 984 'heads' and 'common' are both lists of revision numbers. If heads is
974 985 not supplied, uses all of the revlog's heads. If common is not
975 986 supplied, uses nullid."""
976 987 if common is None:
977 988 common = [nullrev]
978 989 if heads is None:
979 990 heads = self.headrevs()
980 991
981 992 inc = self.incrementalmissingrevs(common=common)
982 993 return inc.missingancestors(heads)
983 994
984 995 def findmissing(self, common=None, heads=None):
985 996 """Return the ancestors of heads that are not ancestors of common.
986 997
987 998 More specifically, return a list of nodes N such that every N
988 999 satisfies the following constraints:
989 1000
990 1001 1. N is an ancestor of some node in 'heads'
991 1002 2. N is not an ancestor of any node in 'common'
992 1003
993 1004 The list is sorted by revision number, meaning it is
994 1005 topologically sorted.
995 1006
996 1007 'heads' and 'common' are both lists of node IDs. If heads is
997 1008 not supplied, uses all of the revlog's heads. If common is not
998 1009 supplied, uses nullid."""
999 1010 if common is None:
1000 1011 common = [self.nullid]
1001 1012 if heads is None:
1002 1013 heads = self.heads()
1003 1014
1004 1015 common = [self.rev(n) for n in common]
1005 1016 heads = [self.rev(n) for n in heads]
1006 1017
1007 1018 inc = self.incrementalmissingrevs(common=common)
1008 1019 return [self.node(r) for r in inc.missingancestors(heads)]
1009 1020
1010 1021 def nodesbetween(self, roots=None, heads=None):
1011 1022 """Return a topological path from 'roots' to 'heads'.
1012 1023
1013 1024 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1014 1025 topologically sorted list of all nodes N that satisfy both of
1015 1026 these constraints:
1016 1027
1017 1028 1. N is a descendant of some node in 'roots'
1018 1029 2. N is an ancestor of some node in 'heads'
1019 1030
1020 1031 Every node is considered to be both a descendant and an ancestor
1021 1032 of itself, so every reachable node in 'roots' and 'heads' will be
1022 1033 included in 'nodes'.
1023 1034
1024 1035 'outroots' is the list of reachable nodes in 'roots', i.e., the
1025 1036 subset of 'roots' that is returned in 'nodes'. Likewise,
1026 1037 'outheads' is the subset of 'heads' that is also in 'nodes'.
1027 1038
1028 1039 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1029 1040 unspecified, uses nullid as the only root. If 'heads' is
1030 1041 unspecified, uses list of all of the revlog's heads."""
1031 1042 nonodes = ([], [], [])
1032 1043 if roots is not None:
1033 1044 roots = list(roots)
1034 1045 if not roots:
1035 1046 return nonodes
1036 1047 lowestrev = min([self.rev(n) for n in roots])
1037 1048 else:
1038 1049 roots = [self.nullid] # Everybody's a descendant of nullid
1039 1050 lowestrev = nullrev
1040 1051 if (lowestrev == nullrev) and (heads is None):
1041 1052 # We want _all_ the nodes!
1042 1053 return (
1043 1054 [self.node(r) for r in self],
1044 1055 [self.nullid],
1045 1056 list(self.heads()),
1046 1057 )
1047 1058 if heads is None:
1048 1059 # All nodes are ancestors, so the latest ancestor is the last
1049 1060 # node.
1050 1061 highestrev = len(self) - 1
1051 1062 # Set ancestors to None to signal that every node is an ancestor.
1052 1063 ancestors = None
1053 1064 # Set heads to an empty dictionary for later discovery of heads
1054 1065 heads = {}
1055 1066 else:
1056 1067 heads = list(heads)
1057 1068 if not heads:
1058 1069 return nonodes
1059 1070 ancestors = set()
1060 1071 # Turn heads into a dictionary so we can remove 'fake' heads.
1061 1072 # Also, later we will be using it to filter out the heads we can't
1062 1073 # find from roots.
1063 1074 heads = dict.fromkeys(heads, False)
1064 1075 # Start at the top and keep marking parents until we're done.
1065 1076 nodestotag = set(heads)
1066 1077 # Remember where the top was so we can use it as a limit later.
1067 1078 highestrev = max([self.rev(n) for n in nodestotag])
1068 1079 while nodestotag:
1069 1080 # grab a node to tag
1070 1081 n = nodestotag.pop()
1071 1082 # Never tag nullid
1072 1083 if n == self.nullid:
1073 1084 continue
1074 1085 # A node's revision number represents its place in a
1075 1086 # topologically sorted list of nodes.
1076 1087 r = self.rev(n)
1077 1088 if r >= lowestrev:
1078 1089 if n not in ancestors:
1079 1090 # If we are possibly a descendant of one of the roots
1080 1091 # and we haven't already been marked as an ancestor
1081 1092 ancestors.add(n) # Mark as ancestor
1082 1093 # Add non-nullid parents to list of nodes to tag.
1083 1094 nodestotag.update(
1084 1095 [p for p in self.parents(n) if p != self.nullid]
1085 1096 )
1086 1097 elif n in heads: # We've seen it before, is it a fake head?
1087 1098 # So it is, real heads should not be the ancestors of
1088 1099 # any other heads.
1089 1100 heads.pop(n)
1090 1101 if not ancestors:
1091 1102 return nonodes
1092 1103 # Now that we have our set of ancestors, we want to remove any
1093 1104 # roots that are not ancestors.
1094 1105
1095 1106 # If one of the roots was nullid, everything is included anyway.
1096 1107 if lowestrev > nullrev:
1097 1108 # But, since we weren't, let's recompute the lowest rev to not
1098 1109 # include roots that aren't ancestors.
1099 1110
1100 1111 # Filter out roots that aren't ancestors of heads
1101 1112 roots = [root for root in roots if root in ancestors]
1102 1113 # Recompute the lowest revision
1103 1114 if roots:
1104 1115 lowestrev = min([self.rev(root) for root in roots])
1105 1116 else:
1106 1117 # No more roots? Return empty list
1107 1118 return nonodes
1108 1119 else:
1109 1120 # We are descending from nullid, and don't need to care about
1110 1121 # any other roots.
1111 1122 lowestrev = nullrev
1112 1123 roots = [self.nullid]
1113 1124 # Transform our roots list into a set.
1114 1125 descendants = set(roots)
1115 1126 # Also, keep the original roots so we can filter out roots that aren't
1116 1127 # 'real' roots (i.e. are descended from other roots).
1117 1128 roots = descendants.copy()
1118 1129 # Our topologically sorted list of output nodes.
1119 1130 orderedout = []
1120 1131 # Don't start at nullid since we don't want nullid in our output list,
1121 1132 # and if nullid shows up in descendants, empty parents will look like
1122 1133 # they're descendants.
1123 1134 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1124 1135 n = self.node(r)
1125 1136 isdescendant = False
1126 1137 if lowestrev == nullrev: # Everybody is a descendant of nullid
1127 1138 isdescendant = True
1128 1139 elif n in descendants:
1129 1140 # n is already a descendant
1130 1141 isdescendant = True
1131 1142 # This check only needs to be done here because all the roots
1132 1143 # will start being marked is descendants before the loop.
1133 1144 if n in roots:
1134 1145 # If n was a root, check if it's a 'real' root.
1135 1146 p = tuple(self.parents(n))
1136 1147 # If any of its parents are descendants, it's not a root.
1137 1148 if (p[0] in descendants) or (p[1] in descendants):
1138 1149 roots.remove(n)
1139 1150 else:
1140 1151 p = tuple(self.parents(n))
1141 1152 # A node is a descendant if either of its parents are
1142 1153 # descendants. (We seeded the dependents list with the roots
1143 1154 # up there, remember?)
1144 1155 if (p[0] in descendants) or (p[1] in descendants):
1145 1156 descendants.add(n)
1146 1157 isdescendant = True
1147 1158 if isdescendant and ((ancestors is None) or (n in ancestors)):
1148 1159 # Only include nodes that are both descendants and ancestors.
1149 1160 orderedout.append(n)
1150 1161 if (ancestors is not None) and (n in heads):
1151 1162 # We're trying to figure out which heads are reachable
1152 1163 # from roots.
1153 1164 # Mark this head as having been reached
1154 1165 heads[n] = True
1155 1166 elif ancestors is None:
1156 1167 # Otherwise, we're trying to discover the heads.
1157 1168 # Assume this is a head because if it isn't, the next step
1158 1169 # will eventually remove it.
1159 1170 heads[n] = True
1160 1171 # But, obviously its parents aren't.
1161 1172 for p in self.parents(n):
1162 1173 heads.pop(p, None)
1163 1174 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1164 1175 roots = list(roots)
1165 1176 assert orderedout
1166 1177 assert roots
1167 1178 assert heads
1168 1179 return (orderedout, roots, heads)
1169 1180
1170 1181 def headrevs(self, revs=None):
1171 1182 if revs is None:
1172 1183 try:
1173 1184 return self.index.headrevs()
1174 1185 except AttributeError:
1175 1186 return self._headrevs()
1176 1187 if rustdagop is not None:
1177 1188 return rustdagop.headrevs(self.index, revs)
1178 1189 return dagop.headrevs(revs, self._uncheckedparentrevs)
1179 1190
1180 1191 def computephases(self, roots):
1181 1192 return self.index.computephasesmapsets(roots)
1182 1193
1183 1194 def _headrevs(self):
1184 1195 count = len(self)
1185 1196 if not count:
1186 1197 return [nullrev]
1187 1198 # we won't iter over filtered rev so nobody is a head at start
1188 1199 ishead = [0] * (count + 1)
1189 1200 index = self.index
1190 1201 for r in self:
1191 1202 ishead[r] = 1 # I may be an head
1192 1203 e = index[r]
1193 1204 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1194 1205 return [r for r, val in enumerate(ishead) if val]
1195 1206
1196 1207 def heads(self, start=None, stop=None):
1197 1208 """return the list of all nodes that have no children
1198 1209
1199 1210 if start is specified, only heads that are descendants of
1200 1211 start will be returned
1201 1212 if stop is specified, it will consider all the revs from stop
1202 1213 as if they had no children
1203 1214 """
1204 1215 if start is None and stop is None:
1205 1216 if not len(self):
1206 1217 return [self.nullid]
1207 1218 return [self.node(r) for r in self.headrevs()]
1208 1219
1209 1220 if start is None:
1210 1221 start = nullrev
1211 1222 else:
1212 1223 start = self.rev(start)
1213 1224
1214 1225 stoprevs = {self.rev(n) for n in stop or []}
1215 1226
1216 1227 revs = dagop.headrevssubset(
1217 1228 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1218 1229 )
1219 1230
1220 1231 return [self.node(rev) for rev in revs]
1221 1232
1222 1233 def children(self, node):
1223 1234 """find the children of a given node"""
1224 1235 c = []
1225 1236 p = self.rev(node)
1226 1237 for r in self.revs(start=p + 1):
1227 1238 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1228 1239 if prevs:
1229 1240 for pr in prevs:
1230 1241 if pr == p:
1231 1242 c.append(self.node(r))
1232 1243 elif p == nullrev:
1233 1244 c.append(self.node(r))
1234 1245 return c
1235 1246
1236 1247 def commonancestorsheads(self, a, b):
1237 1248 """calculate all the heads of the common ancestors of nodes a and b"""
1238 1249 a, b = self.rev(a), self.rev(b)
1239 1250 ancs = self._commonancestorsheads(a, b)
1240 1251 return pycompat.maplist(self.node, ancs)
1241 1252
1242 1253 def _commonancestorsheads(self, *revs):
1243 1254 """calculate all the heads of the common ancestors of revs"""
1244 1255 try:
1245 1256 ancs = self.index.commonancestorsheads(*revs)
1246 1257 except (AttributeError, OverflowError): # C implementation failed
1247 1258 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1248 1259 return ancs
1249 1260
1250 1261 def isancestor(self, a, b):
1251 1262 """return True if node a is an ancestor of node b
1252 1263
1253 1264 A revision is considered an ancestor of itself."""
1254 1265 a, b = self.rev(a), self.rev(b)
1255 1266 return self.isancestorrev(a, b)
1256 1267
1257 1268 def isancestorrev(self, a, b):
1258 1269 """return True if revision a is an ancestor of revision b
1259 1270
1260 1271 A revision is considered an ancestor of itself.
1261 1272
1262 1273 The implementation of this is trivial but the use of
1263 1274 reachableroots is not."""
1264 1275 if a == nullrev:
1265 1276 return True
1266 1277 elif a == b:
1267 1278 return True
1268 1279 elif a > b:
1269 1280 return False
1270 1281 return bool(self.reachableroots(a, [b], [a], includepath=False))
1271 1282
1272 1283 def reachableroots(self, minroot, heads, roots, includepath=False):
1273 1284 """return (heads(::(<roots> and <roots>::<heads>)))
1274 1285
1275 1286 If includepath is True, return (<roots>::<heads>)."""
1276 1287 try:
1277 1288 return self.index.reachableroots2(
1278 1289 minroot, heads, roots, includepath
1279 1290 )
1280 1291 except AttributeError:
1281 1292 return dagop._reachablerootspure(
1282 1293 self.parentrevs, minroot, roots, heads, includepath
1283 1294 )
1284 1295
1285 1296 def ancestor(self, a, b):
1286 1297 """calculate the "best" common ancestor of nodes a and b"""
1287 1298
1288 1299 a, b = self.rev(a), self.rev(b)
1289 1300 try:
1290 1301 ancs = self.index.ancestors(a, b)
1291 1302 except (AttributeError, OverflowError):
1292 1303 ancs = ancestor.ancestors(self.parentrevs, a, b)
1293 1304 if ancs:
1294 1305 # choose a consistent winner when there's a tie
1295 1306 return min(map(self.node, ancs))
1296 1307 return self.nullid
1297 1308
1298 1309 def _match(self, id):
1299 1310 if isinstance(id, int):
1300 1311 # rev
1301 1312 return self.node(id)
1302 1313 if len(id) == self.nodeconstants.nodelen:
1303 1314 # possibly a binary node
1304 1315 # odds of a binary node being all hex in ASCII are 1 in 10**25
1305 1316 try:
1306 1317 node = id
1307 1318 self.rev(node) # quick search the index
1308 1319 return node
1309 1320 except error.LookupError:
1310 1321 pass # may be partial hex id
1311 1322 try:
1312 1323 # str(rev)
1313 1324 rev = int(id)
1314 1325 if b"%d" % rev != id:
1315 1326 raise ValueError
1316 1327 if rev < 0:
1317 1328 rev = len(self) + rev
1318 1329 if rev < 0 or rev >= len(self):
1319 1330 raise ValueError
1320 1331 return self.node(rev)
1321 1332 except (ValueError, OverflowError):
1322 1333 pass
1323 1334 if len(id) == 2 * self.nodeconstants.nodelen:
1324 1335 try:
1325 1336 # a full hex nodeid?
1326 1337 node = bin(id)
1327 1338 self.rev(node)
1328 1339 return node
1329 1340 except (TypeError, error.LookupError):
1330 1341 pass
1331 1342
1332 1343 def _partialmatch(self, id):
1333 1344 # we don't care wdirfilenodeids as they should be always full hash
1334 1345 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1335 1346 try:
1336 1347 partial = self.index.partialmatch(id)
1337 1348 if partial and self.hasnode(partial):
1338 1349 if maybewdir:
1339 1350 # single 'ff...' match in radix tree, ambiguous with wdir
1340 1351 raise error.RevlogError
1341 1352 return partial
1342 1353 if maybewdir:
1343 1354 # no 'ff...' match in radix tree, wdir identified
1344 1355 raise error.WdirUnsupported
1345 1356 return None
1346 1357 except error.RevlogError:
1347 1358 # parsers.c radix tree lookup gave multiple matches
1348 1359 # fast path: for unfiltered changelog, radix tree is accurate
1349 1360 if not getattr(self, 'filteredrevs', None):
1350 1361 raise error.AmbiguousPrefixLookupError(
1351 1362 id, self.indexfile, _(b'ambiguous identifier')
1352 1363 )
1353 1364 # fall through to slow path that filters hidden revisions
1354 1365 except (AttributeError, ValueError):
1355 1366 # we are pure python, or key was too short to search radix tree
1356 1367 pass
1357 1368
1358 1369 if id in self._pcache:
1359 1370 return self._pcache[id]
1360 1371
1361 1372 if len(id) <= 40:
1362 1373 try:
1363 1374 # hex(node)[:...]
1364 1375 l = len(id) // 2 # grab an even number of digits
1365 1376 prefix = bin(id[: l * 2])
1366 1377 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1367 1378 nl = [
1368 1379 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1369 1380 ]
1370 1381 if self.nodeconstants.nullhex.startswith(id):
1371 1382 nl.append(self.nullid)
1372 1383 if len(nl) > 0:
1373 1384 if len(nl) == 1 and not maybewdir:
1374 1385 self._pcache[id] = nl[0]
1375 1386 return nl[0]
1376 1387 raise error.AmbiguousPrefixLookupError(
1377 1388 id, self.indexfile, _(b'ambiguous identifier')
1378 1389 )
1379 1390 if maybewdir:
1380 1391 raise error.WdirUnsupported
1381 1392 return None
1382 1393 except TypeError:
1383 1394 pass
1384 1395
1385 1396 def lookup(self, id):
1386 1397 """locate a node based on:
1387 1398 - revision number or str(revision number)
1388 1399 - nodeid or subset of hex nodeid
1389 1400 """
1390 1401 n = self._match(id)
1391 1402 if n is not None:
1392 1403 return n
1393 1404 n = self._partialmatch(id)
1394 1405 if n:
1395 1406 return n
1396 1407
1397 1408 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1398 1409
1399 1410 def shortest(self, node, minlength=1):
1400 1411 """Find the shortest unambiguous prefix that matches node."""
1401 1412
1402 1413 def isvalid(prefix):
1403 1414 try:
1404 1415 matchednode = self._partialmatch(prefix)
1405 1416 except error.AmbiguousPrefixLookupError:
1406 1417 return False
1407 1418 except error.WdirUnsupported:
1408 1419 # single 'ff...' match
1409 1420 return True
1410 1421 if matchednode is None:
1411 1422 raise error.LookupError(node, self.indexfile, _(b'no node'))
1412 1423 return True
1413 1424
1414 1425 def maybewdir(prefix):
1415 1426 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1416 1427
1417 1428 hexnode = hex(node)
1418 1429
1419 1430 def disambiguate(hexnode, minlength):
1420 1431 """Disambiguate against wdirid."""
1421 1432 for length in range(minlength, len(hexnode) + 1):
1422 1433 prefix = hexnode[:length]
1423 1434 if not maybewdir(prefix):
1424 1435 return prefix
1425 1436
1426 1437 if not getattr(self, 'filteredrevs', None):
1427 1438 try:
1428 1439 length = max(self.index.shortest(node), minlength)
1429 1440 return disambiguate(hexnode, length)
1430 1441 except error.RevlogError:
1431 1442 if node != self.nodeconstants.wdirid:
1432 1443 raise error.LookupError(node, self.indexfile, _(b'no node'))
1433 1444 except AttributeError:
1434 1445 # Fall through to pure code
1435 1446 pass
1436 1447
1437 1448 if node == self.nodeconstants.wdirid:
1438 1449 for length in range(minlength, len(hexnode) + 1):
1439 1450 prefix = hexnode[:length]
1440 1451 if isvalid(prefix):
1441 1452 return prefix
1442 1453
1443 1454 for length in range(minlength, len(hexnode) + 1):
1444 1455 prefix = hexnode[:length]
1445 1456 if isvalid(prefix):
1446 1457 return disambiguate(hexnode, length)
1447 1458
1448 1459 def cmp(self, node, text):
1449 1460 """compare text with a given file revision
1450 1461
1451 1462 returns True if text is different than what is stored.
1452 1463 """
1453 1464 p1, p2 = self.parents(node)
1454 1465 return storageutil.hashrevisionsha1(text, p1, p2) != node
1455 1466
1456 1467 def _cachesegment(self, offset, data):
1457 1468 """Add a segment to the revlog cache.
1458 1469
1459 1470 Accepts an absolute offset and the data that is at that location.
1460 1471 """
1461 1472 o, d = self._chunkcache
1462 1473 # try to add to existing cache
1463 1474 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1464 1475 self._chunkcache = o, d + data
1465 1476 else:
1466 1477 self._chunkcache = offset, data
1467 1478
1468 1479 def _readsegment(self, offset, length, df=None):
1469 1480 """Load a segment of raw data from the revlog.
1470 1481
1471 1482 Accepts an absolute offset, length to read, and an optional existing
1472 1483 file handle to read from.
1473 1484
1474 1485 If an existing file handle is passed, it will be seeked and the
1475 1486 original seek position will NOT be restored.
1476 1487
1477 1488 Returns a str or buffer of raw byte data.
1478 1489
1479 1490 Raises if the requested number of bytes could not be read.
1480 1491 """
1481 1492 # Cache data both forward and backward around the requested
1482 1493 # data, in a fixed size window. This helps speed up operations
1483 1494 # involving reading the revlog backwards.
1484 1495 cachesize = self._chunkcachesize
1485 1496 realoffset = offset & ~(cachesize - 1)
1486 1497 reallength = (
1487 1498 (offset + length + cachesize) & ~(cachesize - 1)
1488 1499 ) - realoffset
1489 1500 with self._datareadfp(df) as df:
1490 1501 df.seek(realoffset)
1491 1502 d = df.read(reallength)
1492 1503
1493 1504 self._cachesegment(realoffset, d)
1494 1505 if offset != realoffset or reallength != length:
1495 1506 startoffset = offset - realoffset
1496 1507 if len(d) - startoffset < length:
1497 1508 raise error.RevlogError(
1498 1509 _(
1499 1510 b'partial read of revlog %s; expected %d bytes from '
1500 1511 b'offset %d, got %d'
1501 1512 )
1502 1513 % (
1503 1514 self.indexfile if self._inline else self.datafile,
1504 1515 length,
1505 1516 realoffset,
1506 1517 len(d) - startoffset,
1507 1518 )
1508 1519 )
1509 1520
1510 1521 return util.buffer(d, startoffset, length)
1511 1522
1512 1523 if len(d) < length:
1513 1524 raise error.RevlogError(
1514 1525 _(
1515 1526 b'partial read of revlog %s; expected %d bytes from offset '
1516 1527 b'%d, got %d'
1517 1528 )
1518 1529 % (
1519 1530 self.indexfile if self._inline else self.datafile,
1520 1531 length,
1521 1532 offset,
1522 1533 len(d),
1523 1534 )
1524 1535 )
1525 1536
1526 1537 return d
1527 1538
1528 1539 def _getsegment(self, offset, length, df=None):
1529 1540 """Obtain a segment of raw data from the revlog.
1530 1541
1531 1542 Accepts an absolute offset, length of bytes to obtain, and an
1532 1543 optional file handle to the already-opened revlog. If the file
1533 1544 handle is used, it's original seek position will not be preserved.
1534 1545
1535 1546 Requests for data may be returned from a cache.
1536 1547
1537 1548 Returns a str or a buffer instance of raw byte data.
1538 1549 """
1539 1550 o, d = self._chunkcache
1540 1551 l = len(d)
1541 1552
1542 1553 # is it in the cache?
1543 1554 cachestart = offset - o
1544 1555 cacheend = cachestart + length
1545 1556 if cachestart >= 0 and cacheend <= l:
1546 1557 if cachestart == 0 and cacheend == l:
1547 1558 return d # avoid a copy
1548 1559 return util.buffer(d, cachestart, cacheend - cachestart)
1549 1560
1550 1561 return self._readsegment(offset, length, df=df)
1551 1562
1552 1563 def _getsegmentforrevs(self, startrev, endrev, df=None):
1553 1564 """Obtain a segment of raw data corresponding to a range of revisions.
1554 1565
1555 1566 Accepts the start and end revisions and an optional already-open
1556 1567 file handle to be used for reading. If the file handle is read, its
1557 1568 seek position will not be preserved.
1558 1569
1559 1570 Requests for data may be satisfied by a cache.
1560 1571
1561 1572 Returns a 2-tuple of (offset, data) for the requested range of
1562 1573 revisions. Offset is the integer offset from the beginning of the
1563 1574 revlog and data is a str or buffer of the raw byte data.
1564 1575
1565 1576 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1566 1577 to determine where each revision's data begins and ends.
1567 1578 """
1568 1579 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1569 1580 # (functions are expensive).
1570 1581 index = self.index
1571 1582 istart = index[startrev]
1572 1583 start = int(istart[0] >> 16)
1573 1584 if startrev == endrev:
1574 1585 end = start + istart[1]
1575 1586 else:
1576 1587 iend = index[endrev]
1577 1588 end = int(iend[0] >> 16) + iend[1]
1578 1589
1579 1590 if self._inline:
1580 1591 start += (startrev + 1) * self.index.entry_size
1581 1592 end += (endrev + 1) * self.index.entry_size
1582 1593 length = end - start
1583 1594
1584 1595 return start, self._getsegment(start, length, df=df)
1585 1596
1586 1597 def _chunk(self, rev, df=None):
1587 1598 """Obtain a single decompressed chunk for a revision.
1588 1599
1589 1600 Accepts an integer revision and an optional already-open file handle
1590 1601 to be used for reading. If used, the seek position of the file will not
1591 1602 be preserved.
1592 1603
1593 1604 Returns a str holding uncompressed data for the requested revision.
1594 1605 """
1595 1606 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1596 1607
1597 1608 def _chunks(self, revs, df=None, targetsize=None):
1598 1609 """Obtain decompressed chunks for the specified revisions.
1599 1610
1600 1611 Accepts an iterable of numeric revisions that are assumed to be in
1601 1612 ascending order. Also accepts an optional already-open file handle
1602 1613 to be used for reading. If used, the seek position of the file will
1603 1614 not be preserved.
1604 1615
1605 1616 This function is similar to calling ``self._chunk()`` multiple times,
1606 1617 but is faster.
1607 1618
1608 1619 Returns a list with decompressed data for each requested revision.
1609 1620 """
1610 1621 if not revs:
1611 1622 return []
1612 1623 start = self.start
1613 1624 length = self.length
1614 1625 inline = self._inline
1615 1626 iosize = self.index.entry_size
1616 1627 buffer = util.buffer
1617 1628
1618 1629 l = []
1619 1630 ladd = l.append
1620 1631
1621 1632 if not self._withsparseread:
1622 1633 slicedchunks = (revs,)
1623 1634 else:
1624 1635 slicedchunks = deltautil.slicechunk(
1625 1636 self, revs, targetsize=targetsize
1626 1637 )
1627 1638
1628 1639 for revschunk in slicedchunks:
1629 1640 firstrev = revschunk[0]
1630 1641 # Skip trailing revisions with empty diff
1631 1642 for lastrev in revschunk[::-1]:
1632 1643 if length(lastrev) != 0:
1633 1644 break
1634 1645
1635 1646 try:
1636 1647 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 1648 except OverflowError:
1638 1649 # issue4215 - we can't cache a run of chunks greater than
1639 1650 # 2G on Windows
1640 1651 return [self._chunk(rev, df=df) for rev in revschunk]
1641 1652
1642 1653 decomp = self.decompress
1643 1654 for rev in revschunk:
1644 1655 chunkstart = start(rev)
1645 1656 if inline:
1646 1657 chunkstart += (rev + 1) * iosize
1647 1658 chunklength = length(rev)
1648 1659 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649 1660
1650 1661 return l
1651 1662
1652 1663 def _chunkclear(self):
1653 1664 """Clear the raw chunk cache."""
1654 1665 self._chunkcache = (0, b'')
1655 1666
1656 1667 def deltaparent(self, rev):
1657 1668 """return deltaparent of the given revision"""
1658 1669 base = self.index[rev][3]
1659 1670 if base == rev:
1660 1671 return nullrev
1661 1672 elif self._generaldelta:
1662 1673 return base
1663 1674 else:
1664 1675 return rev - 1
1665 1676
1666 1677 def issnapshot(self, rev):
1667 1678 """tells whether rev is a snapshot"""
1668 1679 if not self._sparserevlog:
1669 1680 return self.deltaparent(rev) == nullrev
1670 1681 elif util.safehasattr(self.index, b'issnapshot'):
1671 1682 # directly assign the method to cache the testing and access
1672 1683 self.issnapshot = self.index.issnapshot
1673 1684 return self.issnapshot(rev)
1674 1685 if rev == nullrev:
1675 1686 return True
1676 1687 entry = self.index[rev]
1677 1688 base = entry[3]
1678 1689 if base == rev:
1679 1690 return True
1680 1691 if base == nullrev:
1681 1692 return True
1682 1693 p1 = entry[5]
1683 1694 p2 = entry[6]
1684 1695 if base == p1 or base == p2:
1685 1696 return False
1686 1697 return self.issnapshot(base)
1687 1698
1688 1699 def snapshotdepth(self, rev):
1689 1700 """number of snapshot in the chain before this one"""
1690 1701 if not self.issnapshot(rev):
1691 1702 raise error.ProgrammingError(b'revision %d not a snapshot')
1692 1703 return len(self._deltachain(rev)[0]) - 1
1693 1704
1694 1705 def revdiff(self, rev1, rev2):
1695 1706 """return or calculate a delta between two revisions
1696 1707
1697 1708 The delta calculated is in binary form and is intended to be written to
1698 1709 revlog data directly. So this function needs raw revision data.
1699 1710 """
1700 1711 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1701 1712 return bytes(self._chunk(rev2))
1702 1713
1703 1714 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1704 1715
1705 1716 def _processflags(self, text, flags, operation, raw=False):
1706 1717 """deprecated entry point to access flag processors"""
1707 1718 msg = b'_processflag(...) use the specialized variant'
1708 1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1709 1720 if raw:
1710 1721 return text, flagutil.processflagsraw(self, text, flags)
1711 1722 elif operation == b'read':
1712 1723 return flagutil.processflagsread(self, text, flags)
1713 1724 else: # write operation
1714 1725 return flagutil.processflagswrite(self, text, flags)
1715 1726
1716 1727 def revision(self, nodeorrev, _df=None, raw=False):
1717 1728 """return an uncompressed revision of a given node or revision
1718 1729 number.
1719 1730
1720 1731 _df - an existing file handle to read from. (internal-only)
1721 1732 raw - an optional argument specifying if the revision data is to be
1722 1733 treated as raw data when applying flag transforms. 'raw' should be set
1723 1734 to True when generating changegroups or in debug commands.
1724 1735 """
1725 1736 if raw:
1726 1737 msg = (
1727 1738 b'revlog.revision(..., raw=True) is deprecated, '
1728 1739 b'use revlog.rawdata(...)'
1729 1740 )
1730 1741 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1731 1742 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1732 1743
1733 1744 def sidedata(self, nodeorrev, _df=None):
1734 1745 """a map of extra data related to the changeset but not part of the hash
1735 1746
1736 1747 This function currently return a dictionary. However, more advanced
1737 1748 mapping object will likely be used in the future for a more
1738 1749 efficient/lazy code.
1739 1750 """
1740 1751 return self._revisiondata(nodeorrev, _df)[1]
1741 1752
1742 1753 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1743 1754 # deal with <nodeorrev> argument type
1744 1755 if isinstance(nodeorrev, int):
1745 1756 rev = nodeorrev
1746 1757 node = self.node(rev)
1747 1758 else:
1748 1759 node = nodeorrev
1749 1760 rev = None
1750 1761
1751 1762 # fast path the special `nullid` rev
1752 1763 if node == self.nullid:
1753 1764 return b"", {}
1754 1765
1755 1766 # ``rawtext`` is the text as stored inside the revlog. Might be the
1756 1767 # revision or might need to be processed to retrieve the revision.
1757 1768 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1758 1769
1759 1770 if self.version & 0xFFFF == REVLOGV2:
1760 1771 if rev is None:
1761 1772 rev = self.rev(node)
1762 1773 sidedata = self._sidedata(rev)
1763 1774 else:
1764 1775 sidedata = {}
1765 1776
1766 1777 if raw and validated:
1767 1778 # if we don't want to process the raw text and that raw
1768 1779 # text is cached, we can exit early.
1769 1780 return rawtext, sidedata
1770 1781 if rev is None:
1771 1782 rev = self.rev(node)
1772 1783 # the revlog's flag for this revision
1773 1784 # (usually alter its state or content)
1774 1785 flags = self.flags(rev)
1775 1786
1776 1787 if validated and flags == REVIDX_DEFAULT_FLAGS:
1777 1788 # no extra flags set, no flag processor runs, text = rawtext
1778 1789 return rawtext, sidedata
1779 1790
1780 1791 if raw:
1781 1792 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1782 1793 text = rawtext
1783 1794 else:
1784 1795 r = flagutil.processflagsread(self, rawtext, flags)
1785 1796 text, validatehash = r
1786 1797 if validatehash:
1787 1798 self.checkhash(text, node, rev=rev)
1788 1799 if not validated:
1789 1800 self._revisioncache = (node, rev, rawtext)
1790 1801
1791 1802 return text, sidedata
1792 1803
1793 1804 def _rawtext(self, node, rev, _df=None):
1794 1805 """return the possibly unvalidated rawtext for a revision
1795 1806
1796 1807 returns (rev, rawtext, validated)
1797 1808 """
1798 1809
1799 1810 # revision in the cache (could be useful to apply delta)
1800 1811 cachedrev = None
1801 1812 # An intermediate text to apply deltas to
1802 1813 basetext = None
1803 1814
1804 1815 # Check if we have the entry in cache
1805 1816 # The cache entry looks like (node, rev, rawtext)
1806 1817 if self._revisioncache:
1807 1818 if self._revisioncache[0] == node:
1808 1819 return (rev, self._revisioncache[2], True)
1809 1820 cachedrev = self._revisioncache[1]
1810 1821
1811 1822 if rev is None:
1812 1823 rev = self.rev(node)
1813 1824
1814 1825 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1815 1826 if stopped:
1816 1827 basetext = self._revisioncache[2]
1817 1828
1818 1829 # drop cache to save memory, the caller is expected to
1819 1830 # update self._revisioncache after validating the text
1820 1831 self._revisioncache = None
1821 1832
1822 1833 targetsize = None
1823 1834 rawsize = self.index[rev][2]
1824 1835 if 0 <= rawsize:
1825 1836 targetsize = 4 * rawsize
1826 1837
1827 1838 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1828 1839 if basetext is None:
1829 1840 basetext = bytes(bins[0])
1830 1841 bins = bins[1:]
1831 1842
1832 1843 rawtext = mdiff.patches(basetext, bins)
1833 1844 del basetext # let us have a chance to free memory early
1834 1845 return (rev, rawtext, False)
1835 1846
1836 1847 def _sidedata(self, rev):
1837 1848 """Return the sidedata for a given revision number."""
1838 1849 index_entry = self.index[rev]
1839 1850 sidedata_offset = index_entry[8]
1840 1851 sidedata_size = index_entry[9]
1841 1852
1842 1853 if self._inline:
1843 1854 sidedata_offset += self.index.entry_size * (1 + rev)
1844 1855 if sidedata_size == 0:
1845 1856 return {}
1846 1857
1847 1858 segment = self._getsegment(sidedata_offset, sidedata_size)
1848 1859 sidedata = sidedatautil.deserialize_sidedata(segment)
1849 1860 return sidedata
1850 1861
1851 1862 def rawdata(self, nodeorrev, _df=None):
1852 1863 """return an uncompressed raw data of a given node or revision number.
1853 1864
1854 1865 _df - an existing file handle to read from. (internal-only)
1855 1866 """
1856 1867 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1857 1868
1858 1869 def hash(self, text, p1, p2):
1859 1870 """Compute a node hash.
1860 1871
1861 1872 Available as a function so that subclasses can replace the hash
1862 1873 as needed.
1863 1874 """
1864 1875 return storageutil.hashrevisionsha1(text, p1, p2)
1865 1876
1866 1877 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1867 1878 """Check node hash integrity.
1868 1879
1869 1880 Available as a function so that subclasses can extend hash mismatch
1870 1881 behaviors as needed.
1871 1882 """
1872 1883 try:
1873 1884 if p1 is None and p2 is None:
1874 1885 p1, p2 = self.parents(node)
1875 1886 if node != self.hash(text, p1, p2):
1876 1887 # Clear the revision cache on hash failure. The revision cache
1877 1888 # only stores the raw revision and clearing the cache does have
1878 1889 # the side-effect that we won't have a cache hit when the raw
1879 1890 # revision data is accessed. But this case should be rare and
1880 1891 # it is extra work to teach the cache about the hash
1881 1892 # verification state.
1882 1893 if self._revisioncache and self._revisioncache[0] == node:
1883 1894 self._revisioncache = None
1884 1895
1885 1896 revornode = rev
1886 1897 if revornode is None:
1887 1898 revornode = templatefilters.short(hex(node))
1888 1899 raise error.RevlogError(
1889 1900 _(b"integrity check failed on %s:%s")
1890 1901 % (self.indexfile, pycompat.bytestr(revornode))
1891 1902 )
1892 1903 except error.RevlogError:
1893 1904 if self._censorable and storageutil.iscensoredtext(text):
1894 1905 raise error.CensoredNodeError(self.indexfile, node, text)
1895 1906 raise
1896 1907
1897 1908 def _enforceinlinesize(self, tr, fp=None):
1898 1909 """Check if the revlog is too big for inline and convert if so.
1899 1910
1900 1911 This should be called after revisions are added to the revlog. If the
1901 1912 revlog has grown too large to be an inline revlog, it will convert it
1902 1913 to use multiple index and data files.
1903 1914 """
1904 1915 tiprev = len(self) - 1
1905 1916 if (
1906 1917 not self._inline
1907 1918 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1908 1919 ):
1909 1920 return
1910 1921
1911 1922 troffset = tr.findoffset(self.indexfile)
1912 1923 if troffset is None:
1913 1924 raise error.RevlogError(
1914 1925 _(b"%s not found in the transaction") % self.indexfile
1915 1926 )
1916 1927 trindex = 0
1917 1928 tr.add(self.datafile, 0)
1918 1929
1919 1930 if fp:
1920 1931 fp.flush()
1921 1932 fp.close()
1922 1933 # We can't use the cached file handle after close(). So prevent
1923 1934 # its usage.
1924 1935 self._writinghandles = None
1925 1936
1926 1937 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1927 1938 for r in self:
1928 1939 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1929 1940 if troffset <= self.start(r):
1930 1941 trindex = r
1931 1942
1932 1943 with self._indexfp(b'w') as fp:
1933 1944 self.version &= ~FLAG_INLINE_DATA
1934 1945 self._inline = False
1935 1946 for i in self:
1936 1947 e = self.index.entry_binary(i)
1937 1948 if i == 0:
1938 1949 header = self.index.pack_header(self.version)
1939 1950 e = header + e
1940 1951 fp.write(e)
1941 1952
1942 1953 # the temp file replace the real index when we exit the context
1943 1954 # manager
1944 1955
1945 1956 tr.replace(self.indexfile, trindex * self.index.entry_size)
1946 1957 nodemaputil.setup_persistent_nodemap(tr, self)
1947 1958 self._chunkclear()
1948 1959
1949 1960 def _nodeduplicatecallback(self, transaction, node):
1950 1961 """called when trying to add a node already stored."""
1951 1962
1952 1963 def addrevision(
1953 1964 self,
1954 1965 text,
1955 1966 transaction,
1956 1967 link,
1957 1968 p1,
1958 1969 p2,
1959 1970 cachedelta=None,
1960 1971 node=None,
1961 1972 flags=REVIDX_DEFAULT_FLAGS,
1962 1973 deltacomputer=None,
1963 1974 sidedata=None,
1964 1975 ):
1965 1976 """add a revision to the log
1966 1977
1967 1978 text - the revision data to add
1968 1979 transaction - the transaction object used for rollback
1969 1980 link - the linkrev data to add
1970 1981 p1, p2 - the parent nodeids of the revision
1971 1982 cachedelta - an optional precomputed delta
1972 1983 node - nodeid of revision; typically node is not specified, and it is
1973 1984 computed by default as hash(text, p1, p2), however subclasses might
1974 1985 use different hashing method (and override checkhash() in such case)
1975 1986 flags - the known flags to set on the revision
1976 1987 deltacomputer - an optional deltacomputer instance shared between
1977 1988 multiple calls
1978 1989 """
1979 1990 if link == nullrev:
1980 1991 raise error.RevlogError(
1981 1992 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1982 1993 )
1983 1994
1984 1995 if sidedata is None:
1985 1996 sidedata = {}
1986 1997 elif not self.hassidedata:
1987 1998 raise error.ProgrammingError(
1988 1999 _(b"trying to add sidedata to a revlog who don't support them")
1989 2000 )
1990 2001
1991 2002 if flags:
1992 2003 node = node or self.hash(text, p1, p2)
1993 2004
1994 2005 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
1995 2006
1996 2007 # If the flag processor modifies the revision data, ignore any provided
1997 2008 # cachedelta.
1998 2009 if rawtext != text:
1999 2010 cachedelta = None
2000 2011
2001 2012 if len(rawtext) > _maxentrysize:
2002 2013 raise error.RevlogError(
2003 2014 _(
2004 2015 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2005 2016 )
2006 2017 % (self.indexfile, len(rawtext))
2007 2018 )
2008 2019
2009 2020 node = node or self.hash(rawtext, p1, p2)
2010 2021 rev = self.index.get_rev(node)
2011 2022 if rev is not None:
2012 2023 return rev
2013 2024
2014 2025 if validatehash:
2015 2026 self.checkhash(rawtext, node, p1=p1, p2=p2)
2016 2027
2017 2028 return self.addrawrevision(
2018 2029 rawtext,
2019 2030 transaction,
2020 2031 link,
2021 2032 p1,
2022 2033 p2,
2023 2034 node,
2024 2035 flags,
2025 2036 cachedelta=cachedelta,
2026 2037 deltacomputer=deltacomputer,
2027 2038 sidedata=sidedata,
2028 2039 )
2029 2040
2030 2041 def addrawrevision(
2031 2042 self,
2032 2043 rawtext,
2033 2044 transaction,
2034 2045 link,
2035 2046 p1,
2036 2047 p2,
2037 2048 node,
2038 2049 flags,
2039 2050 cachedelta=None,
2040 2051 deltacomputer=None,
2041 2052 sidedata=None,
2042 2053 ):
2043 2054 """add a raw revision with known flags, node and parents
2044 2055 useful when reusing a revision not stored in this revlog (ex: received
2045 2056 over wire, or read from an external bundle).
2046 2057 """
2047 2058 dfh = None
2048 2059 if not self._inline:
2049 2060 dfh = self._datafp(b"a+")
2050 2061 ifh = self._indexfp(b"a+")
2051 2062 try:
2052 2063 return self._addrevision(
2053 2064 node,
2054 2065 rawtext,
2055 2066 transaction,
2056 2067 link,
2057 2068 p1,
2058 2069 p2,
2059 2070 flags,
2060 2071 cachedelta,
2061 2072 ifh,
2062 2073 dfh,
2063 2074 deltacomputer=deltacomputer,
2064 2075 sidedata=sidedata,
2065 2076 )
2066 2077 finally:
2067 2078 if dfh:
2068 2079 dfh.close()
2069 2080 ifh.close()
2070 2081
2071 2082 def compress(self, data):
2072 2083 """Generate a possibly-compressed representation of data."""
2073 2084 if not data:
2074 2085 return b'', data
2075 2086
2076 2087 compressed = self._compressor.compress(data)
2077 2088
2078 2089 if compressed:
2079 2090 # The revlog compressor added the header in the returned data.
2080 2091 return b'', compressed
2081 2092
2082 2093 if data[0:1] == b'\0':
2083 2094 return b'', data
2084 2095 return b'u', data
2085 2096
2086 2097 def decompress(self, data):
2087 2098 """Decompress a revlog chunk.
2088 2099
2089 2100 The chunk is expected to begin with a header identifying the
2090 2101 format type so it can be routed to an appropriate decompressor.
2091 2102 """
2092 2103 if not data:
2093 2104 return data
2094 2105
2095 2106 # Revlogs are read much more frequently than they are written and many
2096 2107 # chunks only take microseconds to decompress, so performance is
2097 2108 # important here.
2098 2109 #
2099 2110 # We can make a few assumptions about revlogs:
2100 2111 #
2101 2112 # 1) the majority of chunks will be compressed (as opposed to inline
2102 2113 # raw data).
2103 2114 # 2) decompressing *any* data will likely by at least 10x slower than
2104 2115 # returning raw inline data.
2105 2116 # 3) we want to prioritize common and officially supported compression
2106 2117 # engines
2107 2118 #
2108 2119 # It follows that we want to optimize for "decompress compressed data
2109 2120 # when encoded with common and officially supported compression engines"
2110 2121 # case over "raw data" and "data encoded by less common or non-official
2111 2122 # compression engines." That is why we have the inline lookup first
2112 2123 # followed by the compengines lookup.
2113 2124 #
2114 2125 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2115 2126 # compressed chunks. And this matters for changelog and manifest reads.
2116 2127 t = data[0:1]
2117 2128
2118 2129 if t == b'x':
2119 2130 try:
2120 2131 return _zlibdecompress(data)
2121 2132 except zlib.error as e:
2122 2133 raise error.RevlogError(
2123 2134 _(b'revlog decompress error: %s')
2124 2135 % stringutil.forcebytestr(e)
2125 2136 )
2126 2137 # '\0' is more common than 'u' so it goes first.
2127 2138 elif t == b'\0':
2128 2139 return data
2129 2140 elif t == b'u':
2130 2141 return util.buffer(data, 1)
2131 2142
2132 2143 try:
2133 2144 compressor = self._decompressors[t]
2134 2145 except KeyError:
2135 2146 try:
2136 2147 engine = util.compengines.forrevlogheader(t)
2137 2148 compressor = engine.revlogcompressor(self._compengineopts)
2138 2149 self._decompressors[t] = compressor
2139 2150 except KeyError:
2140 2151 raise error.RevlogError(
2141 2152 _(b'unknown compression type %s') % binascii.hexlify(t)
2142 2153 )
2143 2154
2144 2155 return compressor.decompress(data)
2145 2156
2146 2157 def _addrevision(
2147 2158 self,
2148 2159 node,
2149 2160 rawtext,
2150 2161 transaction,
2151 2162 link,
2152 2163 p1,
2153 2164 p2,
2154 2165 flags,
2155 2166 cachedelta,
2156 2167 ifh,
2157 2168 dfh,
2158 2169 alwayscache=False,
2159 2170 deltacomputer=None,
2160 2171 sidedata=None,
2161 2172 ):
2162 2173 """internal function to add revisions to the log
2163 2174
2164 2175 see addrevision for argument descriptions.
2165 2176
2166 2177 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2167 2178
2168 2179 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2169 2180 be used.
2170 2181
2171 2182 invariants:
2172 2183 - rawtext is optional (can be None); if not set, cachedelta must be set.
2173 2184 if both are set, they must correspond to each other.
2174 2185 """
2175 2186 if node == self.nullid:
2176 2187 raise error.RevlogError(
2177 2188 _(b"%s: attempt to add null revision") % self.indexfile
2178 2189 )
2179 2190 if (
2180 2191 node == self.nodeconstants.wdirid
2181 2192 or node in self.nodeconstants.wdirfilenodeids
2182 2193 ):
2183 2194 raise error.RevlogError(
2184 2195 _(b"%s: attempt to add wdir revision") % self.indexfile
2185 2196 )
2186 2197
2187 2198 if self._inline:
2188 2199 fh = ifh
2189 2200 else:
2190 2201 fh = dfh
2191 2202
2192 2203 btext = [rawtext]
2193 2204
2194 2205 curr = len(self)
2195 2206 prev = curr - 1
2196 2207
2197 2208 offset = self._get_data_offset(prev)
2198 2209
2199 2210 if self._concurrencychecker:
2200 2211 if self._inline:
2201 2212 # offset is "as if" it were in the .d file, so we need to add on
2202 2213 # the size of the entry metadata.
2203 2214 self._concurrencychecker(
2204 2215 ifh, self.indexfile, offset + curr * self.index.entry_size
2205 2216 )
2206 2217 else:
2207 2218 # Entries in the .i are a consistent size.
2208 2219 self._concurrencychecker(
2209 2220 ifh, self.indexfile, curr * self.index.entry_size
2210 2221 )
2211 2222 self._concurrencychecker(dfh, self.datafile, offset)
2212 2223
2213 2224 p1r, p2r = self.rev(p1), self.rev(p2)
2214 2225
2215 2226 # full versions are inserted when the needed deltas
2216 2227 # become comparable to the uncompressed text
2217 2228 if rawtext is None:
2218 2229 # need rawtext size, before changed by flag processors, which is
2219 2230 # the non-raw size. use revlog explicitly to avoid filelog's extra
2220 2231 # logic that might remove metadata size.
2221 2232 textlen = mdiff.patchedsize(
2222 2233 revlog.size(self, cachedelta[0]), cachedelta[1]
2223 2234 )
2224 2235 else:
2225 2236 textlen = len(rawtext)
2226 2237
2227 2238 if deltacomputer is None:
2228 2239 deltacomputer = deltautil.deltacomputer(self)
2229 2240
2230 2241 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2231 2242
2232 2243 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2233 2244
2234 2245 if sidedata:
2235 2246 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2236 2247 sidedata_offset = offset + deltainfo.deltalen
2237 2248 else:
2238 2249 serialized_sidedata = b""
2239 2250 # Don't store the offset if the sidedata is empty, that way
2240 2251 # we can easily detect empty sidedata and they will be no different
2241 2252 # than ones we manually add.
2242 2253 sidedata_offset = 0
2243 2254
2244 2255 e = (
2245 2256 offset_type(offset, flags),
2246 2257 deltainfo.deltalen,
2247 2258 textlen,
2248 2259 deltainfo.base,
2249 2260 link,
2250 2261 p1r,
2251 2262 p2r,
2252 2263 node,
2253 2264 sidedata_offset,
2254 2265 len(serialized_sidedata),
2255 2266 )
2256 2267
2257 2268 if self.version & 0xFFFF != REVLOGV2:
2258 2269 e = e[:8]
2259 2270
2260 2271 self.index.append(e)
2261 2272 entry = self.index.entry_binary(curr)
2262 2273 if curr == 0:
2263 2274 header = self.index.pack_header(self.version)
2264 2275 entry = header + entry
2265 2276 self._writeentry(
2266 2277 transaction,
2267 2278 ifh,
2268 2279 dfh,
2269 2280 entry,
2270 2281 deltainfo.data,
2271 2282 link,
2272 2283 offset,
2273 2284 serialized_sidedata,
2274 2285 )
2275 2286
2276 2287 rawtext = btext[0]
2277 2288
2278 2289 if alwayscache and rawtext is None:
2279 2290 rawtext = deltacomputer.buildtext(revinfo, fh)
2280 2291
2281 2292 if type(rawtext) == bytes: # only accept immutable objects
2282 2293 self._revisioncache = (node, curr, rawtext)
2283 2294 self._chainbasecache[curr] = deltainfo.chainbase
2284 2295 return curr
2285 2296
2286 2297 def _get_data_offset(self, prev):
2287 2298 """Returns the current offset in the (in-transaction) data file.
2288 2299 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2289 2300 file to store that information: since sidedata can be rewritten to the
2290 2301 end of the data file within a transaction, you can have cases where, for
2291 2302 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2292 2303 to `n - 1`'s sidedata being written after `n`'s data.
2293 2304
2294 2305 TODO cache this in a docket file before getting out of experimental."""
2295 2306 if self.version & 0xFFFF != REVLOGV2:
2296 2307 return self.end(prev)
2297 2308
2298 2309 offset = 0
2299 2310 for rev, entry in enumerate(self.index):
2300 2311 sidedata_end = entry[8] + entry[9]
2301 2312 # Sidedata for a previous rev has potentially been written after
2302 2313 # this rev's end, so take the max.
2303 2314 offset = max(self.end(rev), offset, sidedata_end)
2304 2315 return offset
2305 2316
2306 2317 def _writeentry(
2307 2318 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2308 2319 ):
2309 2320 # Files opened in a+ mode have inconsistent behavior on various
2310 2321 # platforms. Windows requires that a file positioning call be made
2311 2322 # when the file handle transitions between reads and writes. See
2312 2323 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2313 2324 # platforms, Python or the platform itself can be buggy. Some versions
2314 2325 # of Solaris have been observed to not append at the end of the file
2315 2326 # if the file was seeked to before the end. See issue4943 for more.
2316 2327 #
2317 2328 # We work around this issue by inserting a seek() before writing.
2318 2329 # Note: This is likely not necessary on Python 3. However, because
2319 2330 # the file handle is reused for reads and may be seeked there, we need
2320 2331 # to be careful before changing this.
2321 2332 ifh.seek(0, os.SEEK_END)
2322 2333 if dfh:
2323 2334 dfh.seek(0, os.SEEK_END)
2324 2335
2325 2336 curr = len(self) - 1
2326 2337 if not self._inline:
2327 2338 transaction.add(self.datafile, offset)
2328 2339 transaction.add(self.indexfile, curr * len(entry))
2329 2340 if data[0]:
2330 2341 dfh.write(data[0])
2331 2342 dfh.write(data[1])
2332 2343 if sidedata:
2333 2344 dfh.write(sidedata)
2334 2345 ifh.write(entry)
2335 2346 else:
2336 2347 offset += curr * self.index.entry_size
2337 2348 transaction.add(self.indexfile, offset)
2338 2349 ifh.write(entry)
2339 2350 ifh.write(data[0])
2340 2351 ifh.write(data[1])
2341 2352 if sidedata:
2342 2353 ifh.write(sidedata)
2343 2354 self._enforceinlinesize(transaction, ifh)
2344 2355 nodemaputil.setup_persistent_nodemap(transaction, self)
2345 2356
2346 2357 def addgroup(
2347 2358 self,
2348 2359 deltas,
2349 2360 linkmapper,
2350 2361 transaction,
2351 2362 alwayscache=False,
2352 2363 addrevisioncb=None,
2353 2364 duplicaterevisioncb=None,
2354 2365 ):
2355 2366 """
2356 2367 add a delta group
2357 2368
2358 2369 given a set of deltas, add them to the revision log. the
2359 2370 first delta is against its parent, which should be in our
2360 2371 log, the rest are against the previous delta.
2361 2372
2362 2373 If ``addrevisioncb`` is defined, it will be called with arguments of
2363 2374 this revlog and the node that was added.
2364 2375 """
2365 2376
2366 2377 if self._writinghandles:
2367 2378 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2368 2379
2369 2380 r = len(self)
2370 2381 end = 0
2371 2382 if r:
2372 2383 end = self.end(r - 1)
2373 2384 ifh = self._indexfp(b"a+")
2374 2385 isize = r * self.index.entry_size
2375 2386 if self._inline:
2376 2387 transaction.add(self.indexfile, end + isize)
2377 2388 dfh = None
2378 2389 else:
2379 2390 transaction.add(self.indexfile, isize)
2380 2391 transaction.add(self.datafile, end)
2381 2392 dfh = self._datafp(b"a+")
2382 2393
2383 2394 def flush():
2384 2395 if dfh:
2385 2396 dfh.flush()
2386 2397 ifh.flush()
2387 2398
2388 2399 self._writinghandles = (ifh, dfh)
2389 2400 empty = True
2390 2401
2391 2402 try:
2392 2403 deltacomputer = deltautil.deltacomputer(self)
2393 2404 # loop through our set of deltas
2394 2405 for data in deltas:
2395 2406 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2396 2407 link = linkmapper(linknode)
2397 2408 flags = flags or REVIDX_DEFAULT_FLAGS
2398 2409
2399 2410 rev = self.index.get_rev(node)
2400 2411 if rev is not None:
2401 2412 # this can happen if two branches make the same change
2402 2413 self._nodeduplicatecallback(transaction, rev)
2403 2414 if duplicaterevisioncb:
2404 2415 duplicaterevisioncb(self, rev)
2405 2416 empty = False
2406 2417 continue
2407 2418
2408 2419 for p in (p1, p2):
2409 2420 if not self.index.has_node(p):
2410 2421 raise error.LookupError(
2411 2422 p, self.indexfile, _(b'unknown parent')
2412 2423 )
2413 2424
2414 2425 if not self.index.has_node(deltabase):
2415 2426 raise error.LookupError(
2416 2427 deltabase, self.indexfile, _(b'unknown delta base')
2417 2428 )
2418 2429
2419 2430 baserev = self.rev(deltabase)
2420 2431
2421 2432 if baserev != nullrev and self.iscensored(baserev):
2422 2433 # if base is censored, delta must be full replacement in a
2423 2434 # single patch operation
2424 2435 hlen = struct.calcsize(b">lll")
2425 2436 oldlen = self.rawsize(baserev)
2426 2437 newlen = len(delta) - hlen
2427 2438 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2428 2439 raise error.CensoredBaseError(
2429 2440 self.indexfile, self.node(baserev)
2430 2441 )
2431 2442
2432 2443 if not flags and self._peek_iscensored(baserev, delta, flush):
2433 2444 flags |= REVIDX_ISCENSORED
2434 2445
2435 2446 # We assume consumers of addrevisioncb will want to retrieve
2436 2447 # the added revision, which will require a call to
2437 2448 # revision(). revision() will fast path if there is a cache
2438 2449 # hit. So, we tell _addrevision() to always cache in this case.
2439 2450 # We're only using addgroup() in the context of changegroup
2440 2451 # generation so the revision data can always be handled as raw
2441 2452 # by the flagprocessor.
2442 2453 rev = self._addrevision(
2443 2454 node,
2444 2455 None,
2445 2456 transaction,
2446 2457 link,
2447 2458 p1,
2448 2459 p2,
2449 2460 flags,
2450 2461 (baserev, delta),
2451 2462 ifh,
2452 2463 dfh,
2453 2464 alwayscache=alwayscache,
2454 2465 deltacomputer=deltacomputer,
2455 2466 sidedata=sidedata,
2456 2467 )
2457 2468
2458 2469 if addrevisioncb:
2459 2470 addrevisioncb(self, rev)
2460 2471 empty = False
2461 2472
2462 2473 if not dfh and not self._inline:
2463 2474 # addrevision switched from inline to conventional
2464 2475 # reopen the index
2465 2476 ifh.close()
2466 2477 dfh = self._datafp(b"a+")
2467 2478 ifh = self._indexfp(b"a+")
2468 2479 self._writinghandles = (ifh, dfh)
2469 2480 finally:
2470 2481 self._writinghandles = None
2471 2482
2472 2483 if dfh:
2473 2484 dfh.close()
2474 2485 ifh.close()
2475 2486 return not empty
2476 2487
2477 2488 def iscensored(self, rev):
2478 2489 """Check if a file revision is censored."""
2479 2490 if not self._censorable:
2480 2491 return False
2481 2492
2482 2493 return self.flags(rev) & REVIDX_ISCENSORED
2483 2494
2484 2495 def _peek_iscensored(self, baserev, delta, flush):
2485 2496 """Quickly check if a delta produces a censored revision."""
2486 2497 if not self._censorable:
2487 2498 return False
2488 2499
2489 2500 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2490 2501
2491 2502 def getstrippoint(self, minlink):
2492 2503 """find the minimum rev that must be stripped to strip the linkrev
2493 2504
2494 2505 Returns a tuple containing the minimum rev and a set of all revs that
2495 2506 have linkrevs that will be broken by this strip.
2496 2507 """
2497 2508 return storageutil.resolvestripinfo(
2498 2509 minlink,
2499 2510 len(self) - 1,
2500 2511 self.headrevs(),
2501 2512 self.linkrev,
2502 2513 self.parentrevs,
2503 2514 )
2504 2515
2505 2516 def strip(self, minlink, transaction):
2506 2517 """truncate the revlog on the first revision with a linkrev >= minlink
2507 2518
2508 2519 This function is called when we're stripping revision minlink and
2509 2520 its descendants from the repository.
2510 2521
2511 2522 We have to remove all revisions with linkrev >= minlink, because
2512 2523 the equivalent changelog revisions will be renumbered after the
2513 2524 strip.
2514 2525
2515 2526 So we truncate the revlog on the first of these revisions, and
2516 2527 trust that the caller has saved the revisions that shouldn't be
2517 2528 removed and that it'll re-add them after this truncation.
2518 2529 """
2519 2530 if len(self) == 0:
2520 2531 return
2521 2532
2522 2533 rev, _ = self.getstrippoint(minlink)
2523 2534 if rev == len(self):
2524 2535 return
2525 2536
2526 2537 # first truncate the files on disk
2527 2538 end = self.start(rev)
2528 2539 if not self._inline:
2529 2540 transaction.add(self.datafile, end)
2530 2541 end = rev * self.index.entry_size
2531 2542 else:
2532 2543 end += rev * self.index.entry_size
2533 2544
2534 2545 transaction.add(self.indexfile, end)
2535 2546
2536 2547 # then reset internal state in memory to forget those revisions
2537 2548 self._revisioncache = None
2538 2549 self._chaininfocache = util.lrucachedict(500)
2539 2550 self._chunkclear()
2540 2551
2541 2552 del self.index[rev:-1]
2542 2553
2543 2554 def checksize(self):
2544 2555 """Check size of index and data files
2545 2556
2546 2557 return a (dd, di) tuple.
2547 2558 - dd: extra bytes for the "data" file
2548 2559 - di: extra bytes for the "index" file
2549 2560
2550 2561 A healthy revlog will return (0, 0).
2551 2562 """
2552 2563 expected = 0
2553 2564 if len(self):
2554 2565 expected = max(0, self.end(len(self) - 1))
2555 2566
2556 2567 try:
2557 2568 with self._datafp() as f:
2558 2569 f.seek(0, io.SEEK_END)
2559 2570 actual = f.tell()
2560 2571 dd = actual - expected
2561 2572 except IOError as inst:
2562 2573 if inst.errno != errno.ENOENT:
2563 2574 raise
2564 2575 dd = 0
2565 2576
2566 2577 try:
2567 2578 f = self.opener(self.indexfile)
2568 2579 f.seek(0, io.SEEK_END)
2569 2580 actual = f.tell()
2570 2581 f.close()
2571 2582 s = self.index.entry_size
2572 2583 i = max(0, actual // s)
2573 2584 di = actual - (i * s)
2574 2585 if self._inline:
2575 2586 databytes = 0
2576 2587 for r in self:
2577 2588 databytes += max(0, self.length(r))
2578 2589 dd = 0
2579 2590 di = actual - len(self) * s - databytes
2580 2591 except IOError as inst:
2581 2592 if inst.errno != errno.ENOENT:
2582 2593 raise
2583 2594 di = 0
2584 2595
2585 2596 return (dd, di)
2586 2597
2587 2598 def files(self):
2588 2599 res = [self.indexfile]
2589 2600 if not self._inline:
2590 2601 res.append(self.datafile)
2591 2602 return res
2592 2603
2593 2604 def emitrevisions(
2594 2605 self,
2595 2606 nodes,
2596 2607 nodesorder=None,
2597 2608 revisiondata=False,
2598 2609 assumehaveparentrevisions=False,
2599 2610 deltamode=repository.CG_DELTAMODE_STD,
2600 2611 sidedata_helpers=None,
2601 2612 ):
2602 2613 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2603 2614 raise error.ProgrammingError(
2604 2615 b'unhandled value for nodesorder: %s' % nodesorder
2605 2616 )
2606 2617
2607 2618 if nodesorder is None and not self._generaldelta:
2608 2619 nodesorder = b'storage'
2609 2620
2610 2621 if (
2611 2622 not self._storedeltachains
2612 2623 and deltamode != repository.CG_DELTAMODE_PREV
2613 2624 ):
2614 2625 deltamode = repository.CG_DELTAMODE_FULL
2615 2626
2616 2627 return storageutil.emitrevisions(
2617 2628 self,
2618 2629 nodes,
2619 2630 nodesorder,
2620 2631 revlogrevisiondelta,
2621 2632 deltaparentfn=self.deltaparent,
2622 2633 candeltafn=self.candelta,
2623 2634 rawsizefn=self.rawsize,
2624 2635 revdifffn=self.revdiff,
2625 2636 flagsfn=self.flags,
2626 2637 deltamode=deltamode,
2627 2638 revisiondata=revisiondata,
2628 2639 assumehaveparentrevisions=assumehaveparentrevisions,
2629 2640 sidedata_helpers=sidedata_helpers,
2630 2641 )
2631 2642
2632 2643 DELTAREUSEALWAYS = b'always'
2633 2644 DELTAREUSESAMEREVS = b'samerevs'
2634 2645 DELTAREUSENEVER = b'never'
2635 2646
2636 2647 DELTAREUSEFULLADD = b'fulladd'
2637 2648
2638 2649 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2639 2650
2640 2651 def clone(
2641 2652 self,
2642 2653 tr,
2643 2654 destrevlog,
2644 2655 addrevisioncb=None,
2645 2656 deltareuse=DELTAREUSESAMEREVS,
2646 2657 forcedeltabothparents=None,
2647 2658 sidedatacompanion=None,
2648 2659 ):
2649 2660 """Copy this revlog to another, possibly with format changes.
2650 2661
2651 2662 The destination revlog will contain the same revisions and nodes.
2652 2663 However, it may not be bit-for-bit identical due to e.g. delta encoding
2653 2664 differences.
2654 2665
2655 2666 The ``deltareuse`` argument control how deltas from the existing revlog
2656 2667 are preserved in the destination revlog. The argument can have the
2657 2668 following values:
2658 2669
2659 2670 DELTAREUSEALWAYS
2660 2671 Deltas will always be reused (if possible), even if the destination
2661 2672 revlog would not select the same revisions for the delta. This is the
2662 2673 fastest mode of operation.
2663 2674 DELTAREUSESAMEREVS
2664 2675 Deltas will be reused if the destination revlog would pick the same
2665 2676 revisions for the delta. This mode strikes a balance between speed
2666 2677 and optimization.
2667 2678 DELTAREUSENEVER
2668 2679 Deltas will never be reused. This is the slowest mode of execution.
2669 2680 This mode can be used to recompute deltas (e.g. if the diff/delta
2670 2681 algorithm changes).
2671 2682 DELTAREUSEFULLADD
2672 2683 Revision will be re-added as if their were new content. This is
2673 2684 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2674 2685 eg: large file detection and handling.
2675 2686
2676 2687 Delta computation can be slow, so the choice of delta reuse policy can
2677 2688 significantly affect run time.
2678 2689
2679 2690 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2680 2691 two extremes. Deltas will be reused if they are appropriate. But if the
2681 2692 delta could choose a better revision, it will do so. This means if you
2682 2693 are converting a non-generaldelta revlog to a generaldelta revlog,
2683 2694 deltas will be recomputed if the delta's parent isn't a parent of the
2684 2695 revision.
2685 2696
2686 2697 In addition to the delta policy, the ``forcedeltabothparents``
2687 2698 argument controls whether to force compute deltas against both parents
2688 2699 for merges. By default, the current default is used.
2689 2700
2690 2701 If not None, the `sidedatacompanion` is callable that accept two
2691 2702 arguments:
2692 2703
2693 2704 (srcrevlog, rev)
2694 2705
2695 2706 and return a quintet that control changes to sidedata content from the
2696 2707 old revision to the new clone result:
2697 2708
2698 2709 (dropall, filterout, update, new_flags, dropped_flags)
2699 2710
2700 2711 * if `dropall` is True, all sidedata should be dropped
2701 2712 * `filterout` is a set of sidedata keys that should be dropped
2702 2713 * `update` is a mapping of additionnal/new key -> value
2703 2714 * new_flags is a bitfields of new flags that the revision should get
2704 2715 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2705 2716 """
2706 2717 if deltareuse not in self.DELTAREUSEALL:
2707 2718 raise ValueError(
2708 2719 _(b'value for deltareuse invalid: %s') % deltareuse
2709 2720 )
2710 2721
2711 2722 if len(destrevlog):
2712 2723 raise ValueError(_(b'destination revlog is not empty'))
2713 2724
2714 2725 if getattr(self, 'filteredrevs', None):
2715 2726 raise ValueError(_(b'source revlog has filtered revisions'))
2716 2727 if getattr(destrevlog, 'filteredrevs', None):
2717 2728 raise ValueError(_(b'destination revlog has filtered revisions'))
2718 2729
2719 2730 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2720 2731 # if possible.
2721 2732 oldlazydelta = destrevlog._lazydelta
2722 2733 oldlazydeltabase = destrevlog._lazydeltabase
2723 2734 oldamd = destrevlog._deltabothparents
2724 2735
2725 2736 try:
2726 2737 if deltareuse == self.DELTAREUSEALWAYS:
2727 2738 destrevlog._lazydeltabase = True
2728 2739 destrevlog._lazydelta = True
2729 2740 elif deltareuse == self.DELTAREUSESAMEREVS:
2730 2741 destrevlog._lazydeltabase = False
2731 2742 destrevlog._lazydelta = True
2732 2743 elif deltareuse == self.DELTAREUSENEVER:
2733 2744 destrevlog._lazydeltabase = False
2734 2745 destrevlog._lazydelta = False
2735 2746
2736 2747 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2737 2748
2738 2749 self._clone(
2739 2750 tr,
2740 2751 destrevlog,
2741 2752 addrevisioncb,
2742 2753 deltareuse,
2743 2754 forcedeltabothparents,
2744 2755 sidedatacompanion,
2745 2756 )
2746 2757
2747 2758 finally:
2748 2759 destrevlog._lazydelta = oldlazydelta
2749 2760 destrevlog._lazydeltabase = oldlazydeltabase
2750 2761 destrevlog._deltabothparents = oldamd
2751 2762
2752 2763 def _clone(
2753 2764 self,
2754 2765 tr,
2755 2766 destrevlog,
2756 2767 addrevisioncb,
2757 2768 deltareuse,
2758 2769 forcedeltabothparents,
2759 2770 sidedatacompanion,
2760 2771 ):
2761 2772 """perform the core duty of `revlog.clone` after parameter processing"""
2762 2773 deltacomputer = deltautil.deltacomputer(destrevlog)
2763 2774 index = self.index
2764 2775 for rev in self:
2765 2776 entry = index[rev]
2766 2777
2767 2778 # Some classes override linkrev to take filtered revs into
2768 2779 # account. Use raw entry from index.
2769 2780 flags = entry[0] & 0xFFFF
2770 2781 linkrev = entry[4]
2771 2782 p1 = index[entry[5]][7]
2772 2783 p2 = index[entry[6]][7]
2773 2784 node = entry[7]
2774 2785
2775 2786 sidedataactions = (False, [], {}, 0, 0)
2776 2787 if sidedatacompanion is not None:
2777 2788 sidedataactions = sidedatacompanion(self, rev)
2778 2789
2779 2790 # (Possibly) reuse the delta from the revlog if allowed and
2780 2791 # the revlog chunk is a delta.
2781 2792 cachedelta = None
2782 2793 rawtext = None
2783 2794 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2784 2795 dropall = sidedataactions[0]
2785 2796 filterout = sidedataactions[1]
2786 2797 update = sidedataactions[2]
2787 2798 new_flags = sidedataactions[3]
2788 2799 dropped_flags = sidedataactions[4]
2789 2800 text, sidedata = self._revisiondata(rev)
2790 2801 if dropall:
2791 2802 sidedata = {}
2792 2803 for key in filterout:
2793 2804 sidedata.pop(key, None)
2794 2805 sidedata.update(update)
2795 2806 if not sidedata:
2796 2807 sidedata = None
2797 2808
2798 2809 flags |= new_flags
2799 2810 flags &= ~dropped_flags
2800 2811
2801 2812 destrevlog.addrevision(
2802 2813 text,
2803 2814 tr,
2804 2815 linkrev,
2805 2816 p1,
2806 2817 p2,
2807 2818 cachedelta=cachedelta,
2808 2819 node=node,
2809 2820 flags=flags,
2810 2821 deltacomputer=deltacomputer,
2811 2822 sidedata=sidedata,
2812 2823 )
2813 2824 else:
2814 2825 if destrevlog._lazydelta:
2815 2826 dp = self.deltaparent(rev)
2816 2827 if dp != nullrev:
2817 2828 cachedelta = (dp, bytes(self._chunk(rev)))
2818 2829
2819 2830 if not cachedelta:
2820 2831 rawtext = self.rawdata(rev)
2821 2832
2822 2833 ifh = destrevlog.opener(
2823 2834 destrevlog.indexfile, b'a+', checkambig=False
2824 2835 )
2825 2836 dfh = None
2826 2837 if not destrevlog._inline:
2827 2838 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2828 2839 try:
2829 2840 destrevlog._addrevision(
2830 2841 node,
2831 2842 rawtext,
2832 2843 tr,
2833 2844 linkrev,
2834 2845 p1,
2835 2846 p2,
2836 2847 flags,
2837 2848 cachedelta,
2838 2849 ifh,
2839 2850 dfh,
2840 2851 deltacomputer=deltacomputer,
2841 2852 )
2842 2853 finally:
2843 2854 if dfh:
2844 2855 dfh.close()
2845 2856 ifh.close()
2846 2857
2847 2858 if addrevisioncb:
2848 2859 addrevisioncb(self, rev, node)
2849 2860
2850 2861 def censorrevision(self, tr, censornode, tombstone=b''):
2851 2862 if (self.version & 0xFFFF) == REVLOGV0:
2852 2863 raise error.RevlogError(
2853 2864 _(b'cannot censor with version %d revlogs') % self.version
2854 2865 )
2855 2866
2856 2867 censorrev = self.rev(censornode)
2857 2868 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2858 2869
2859 2870 if len(tombstone) > self.rawsize(censorrev):
2860 2871 raise error.Abort(
2861 2872 _(b'censor tombstone must be no longer than censored data')
2862 2873 )
2863 2874
2864 2875 # Rewriting the revlog in place is hard. Our strategy for censoring is
2865 2876 # to create a new revlog, copy all revisions to it, then replace the
2866 2877 # revlogs on transaction close.
2867 2878
2868 2879 newindexfile = self.indexfile + b'.tmpcensored'
2869 2880 newdatafile = self.datafile + b'.tmpcensored'
2870 2881
2871 2882 # This is a bit dangerous. We could easily have a mismatch of state.
2872 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2883 newrl = revlog(
2884 self.opener,
2885 target=self.target,
2886 indexfile=newindexfile,
2887 datafile=newdatafile,
2888 censorable=True,
2889 )
2873 2890 newrl.version = self.version
2874 2891 newrl._generaldelta = self._generaldelta
2875 2892 newrl._parse_index = self._parse_index
2876 2893
2877 2894 for rev in self.revs():
2878 2895 node = self.node(rev)
2879 2896 p1, p2 = self.parents(node)
2880 2897
2881 2898 if rev == censorrev:
2882 2899 newrl.addrawrevision(
2883 2900 tombstone,
2884 2901 tr,
2885 2902 self.linkrev(censorrev),
2886 2903 p1,
2887 2904 p2,
2888 2905 censornode,
2889 2906 REVIDX_ISCENSORED,
2890 2907 )
2891 2908
2892 2909 if newrl.deltaparent(rev) != nullrev:
2893 2910 raise error.Abort(
2894 2911 _(
2895 2912 b'censored revision stored as delta; '
2896 2913 b'cannot censor'
2897 2914 ),
2898 2915 hint=_(
2899 2916 b'censoring of revlogs is not '
2900 2917 b'fully implemented; please report '
2901 2918 b'this bug'
2902 2919 ),
2903 2920 )
2904 2921 continue
2905 2922
2906 2923 if self.iscensored(rev):
2907 2924 if self.deltaparent(rev) != nullrev:
2908 2925 raise error.Abort(
2909 2926 _(
2910 2927 b'cannot censor due to censored '
2911 2928 b'revision having delta stored'
2912 2929 )
2913 2930 )
2914 2931 rawtext = self._chunk(rev)
2915 2932 else:
2916 2933 rawtext = self.rawdata(rev)
2917 2934
2918 2935 newrl.addrawrevision(
2919 2936 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2920 2937 )
2921 2938
2922 2939 tr.addbackup(self.indexfile, location=b'store')
2923 2940 if not self._inline:
2924 2941 tr.addbackup(self.datafile, location=b'store')
2925 2942
2926 2943 self.opener.rename(newrl.indexfile, self.indexfile)
2927 2944 if not self._inline:
2928 2945 self.opener.rename(newrl.datafile, self.datafile)
2929 2946
2930 2947 self.clearcaches()
2931 2948 self._loadindex()
2932 2949
2933 2950 def verifyintegrity(self, state):
2934 2951 """Verifies the integrity of the revlog.
2935 2952
2936 2953 Yields ``revlogproblem`` instances describing problems that are
2937 2954 found.
2938 2955 """
2939 2956 dd, di = self.checksize()
2940 2957 if dd:
2941 2958 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2942 2959 if di:
2943 2960 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2944 2961
2945 2962 version = self.version & 0xFFFF
2946 2963
2947 2964 # The verifier tells us what version revlog we should be.
2948 2965 if version != state[b'expectedversion']:
2949 2966 yield revlogproblem(
2950 2967 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2951 2968 % (self.indexfile, version, state[b'expectedversion'])
2952 2969 )
2953 2970
2954 2971 state[b'skipread'] = set()
2955 2972 state[b'safe_renamed'] = set()
2956 2973
2957 2974 for rev in self:
2958 2975 node = self.node(rev)
2959 2976
2960 2977 # Verify contents. 4 cases to care about:
2961 2978 #
2962 2979 # common: the most common case
2963 2980 # rename: with a rename
2964 2981 # meta: file content starts with b'\1\n', the metadata
2965 2982 # header defined in filelog.py, but without a rename
2966 2983 # ext: content stored externally
2967 2984 #
2968 2985 # More formally, their differences are shown below:
2969 2986 #
2970 2987 # | common | rename | meta | ext
2971 2988 # -------------------------------------------------------
2972 2989 # flags() | 0 | 0 | 0 | not 0
2973 2990 # renamed() | False | True | False | ?
2974 2991 # rawtext[0:2]=='\1\n'| False | True | True | ?
2975 2992 #
2976 2993 # "rawtext" means the raw text stored in revlog data, which
2977 2994 # could be retrieved by "rawdata(rev)". "text"
2978 2995 # mentioned below is "revision(rev)".
2979 2996 #
2980 2997 # There are 3 different lengths stored physically:
2981 2998 # 1. L1: rawsize, stored in revlog index
2982 2999 # 2. L2: len(rawtext), stored in revlog data
2983 3000 # 3. L3: len(text), stored in revlog data if flags==0, or
2984 3001 # possibly somewhere else if flags!=0
2985 3002 #
2986 3003 # L1 should be equal to L2. L3 could be different from them.
2987 3004 # "text" may or may not affect commit hash depending on flag
2988 3005 # processors (see flagutil.addflagprocessor).
2989 3006 #
2990 3007 # | common | rename | meta | ext
2991 3008 # -------------------------------------------------
2992 3009 # rawsize() | L1 | L1 | L1 | L1
2993 3010 # size() | L1 | L2-LM | L1(*) | L1 (?)
2994 3011 # len(rawtext) | L2 | L2 | L2 | L2
2995 3012 # len(text) | L2 | L2 | L2 | L3
2996 3013 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2997 3014 #
2998 3015 # LM: length of metadata, depending on rawtext
2999 3016 # (*): not ideal, see comment in filelog.size
3000 3017 # (?): could be "- len(meta)" if the resolved content has
3001 3018 # rename metadata
3002 3019 #
3003 3020 # Checks needed to be done:
3004 3021 # 1. length check: L1 == L2, in all cases.
3005 3022 # 2. hash check: depending on flag processor, we may need to
3006 3023 # use either "text" (external), or "rawtext" (in revlog).
3007 3024
3008 3025 try:
3009 3026 skipflags = state.get(b'skipflags', 0)
3010 3027 if skipflags:
3011 3028 skipflags &= self.flags(rev)
3012 3029
3013 3030 _verify_revision(self, skipflags, state, node)
3014 3031
3015 3032 l1 = self.rawsize(rev)
3016 3033 l2 = len(self.rawdata(node))
3017 3034
3018 3035 if l1 != l2:
3019 3036 yield revlogproblem(
3020 3037 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3021 3038 node=node,
3022 3039 )
3023 3040
3024 3041 except error.CensoredNodeError:
3025 3042 if state[b'erroroncensored']:
3026 3043 yield revlogproblem(
3027 3044 error=_(b'censored file data'), node=node
3028 3045 )
3029 3046 state[b'skipread'].add(node)
3030 3047 except Exception as e:
3031 3048 yield revlogproblem(
3032 3049 error=_(b'unpacking %s: %s')
3033 3050 % (short(node), stringutil.forcebytestr(e)),
3034 3051 node=node,
3035 3052 )
3036 3053 state[b'skipread'].add(node)
3037 3054
3038 3055 def storageinfo(
3039 3056 self,
3040 3057 exclusivefiles=False,
3041 3058 sharedfiles=False,
3042 3059 revisionscount=False,
3043 3060 trackedsize=False,
3044 3061 storedsize=False,
3045 3062 ):
3046 3063 d = {}
3047 3064
3048 3065 if exclusivefiles:
3049 3066 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3050 3067 if not self._inline:
3051 3068 d[b'exclusivefiles'].append((self.opener, self.datafile))
3052 3069
3053 3070 if sharedfiles:
3054 3071 d[b'sharedfiles'] = []
3055 3072
3056 3073 if revisionscount:
3057 3074 d[b'revisionscount'] = len(self)
3058 3075
3059 3076 if trackedsize:
3060 3077 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3061 3078
3062 3079 if storedsize:
3063 3080 d[b'storedsize'] = sum(
3064 3081 self.opener.stat(path).st_size for path in self.files()
3065 3082 )
3066 3083
3067 3084 return d
3068 3085
3069 3086 def rewrite_sidedata(self, helpers, startrev, endrev):
3070 3087 if self.version & 0xFFFF != REVLOGV2:
3071 3088 return
3072 3089 # inline are not yet supported because they suffer from an issue when
3073 3090 # rewriting them (since it's not an append-only operation).
3074 3091 # See issue6485.
3075 3092 assert not self._inline
3076 3093 if not helpers[1] and not helpers[2]:
3077 3094 # Nothing to generate or remove
3078 3095 return
3079 3096
3080 3097 new_entries = []
3081 3098 # append the new sidedata
3082 3099 with self._datafp(b'a+') as fp:
3083 3100 # Maybe this bug still exists, see revlog._writeentry
3084 3101 fp.seek(0, os.SEEK_END)
3085 3102 current_offset = fp.tell()
3086 3103 for rev in range(startrev, endrev + 1):
3087 3104 entry = self.index[rev]
3088 3105 new_sidedata = storageutil.run_sidedata_helpers(
3089 3106 store=self,
3090 3107 sidedata_helpers=helpers,
3091 3108 sidedata={},
3092 3109 rev=rev,
3093 3110 )
3094 3111
3095 3112 serialized_sidedata = sidedatautil.serialize_sidedata(
3096 3113 new_sidedata
3097 3114 )
3098 3115 if entry[8] != 0 or entry[9] != 0:
3099 3116 # rewriting entries that already have sidedata is not
3100 3117 # supported yet, because it introduces garbage data in the
3101 3118 # revlog.
3102 3119 msg = b"Rewriting existing sidedata is not supported yet"
3103 3120 raise error.Abort(msg)
3104 3121 entry = entry[:8]
3105 3122 entry += (current_offset, len(serialized_sidedata))
3106 3123
3107 3124 fp.write(serialized_sidedata)
3108 3125 new_entries.append(entry)
3109 3126 current_offset += len(serialized_sidedata)
3110 3127
3111 3128 # rewrite the new index entries
3112 3129 with self._indexfp(b'w+') as fp:
3113 3130 fp.seek(startrev * self.index.entry_size)
3114 3131 for i, entry in enumerate(new_entries):
3115 3132 rev = startrev + i
3116 3133 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3117 3134 packed = self.index.entry_binary(rev)
3118 3135 if rev == 0:
3119 3136 header = self.index.pack_header(self.version)
3120 3137 packed = header + packed
3121 3138 fp.write(packed)
@@ -1,107 +1,121 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 ### Internal utily constants
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
22
23 ALL_KINDS = {
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
27 KIND_OTHER,
28 }
29
16 30 ### main revlog header
17 31
18 32 INDEX_HEADER = struct.Struct(b">I")
19 33
20 34 ## revlog version
21 35 REVLOGV0 = 0
22 36 REVLOGV1 = 1
23 37 # Dummy value until file format is finalized.
24 38 REVLOGV2 = 0xDEAD
25 39
26 40 ## global revlog header flags
27 41 # Shared across v1 and v2.
28 42 FLAG_INLINE_DATA = 1 << 16
29 43 # Only used by v1, implied by v2.
30 44 FLAG_GENERALDELTA = 1 << 17
31 45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
32 46 REVLOG_DEFAULT_FORMAT = REVLOGV1
33 47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 48 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
35 49 REVLOGV2_FLAGS = FLAG_INLINE_DATA
36 50
37 51 ### individual entry
38 52
39 53 ## index v0:
40 54 # 4 bytes: offset
41 55 # 4 bytes: compressed length
42 56 # 4 bytes: base rev
43 57 # 4 bytes: link rev
44 58 # 20 bytes: parent 1 nodeid
45 59 # 20 bytes: parent 2 nodeid
46 60 # 20 bytes: nodeid
47 61 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
48 62
49 63 ## index v1
50 64 # 6 bytes: offset
51 65 # 2 bytes: flags
52 66 # 4 bytes: compressed length
53 67 # 4 bytes: uncompressed length
54 68 # 4 bytes: base rev
55 69 # 4 bytes: link rev
56 70 # 4 bytes: parent 1 rev
57 71 # 4 bytes: parent 2 rev
58 72 # 32 bytes: nodeid
59 73 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
60 74 assert INDEX_ENTRY_V1.size == 32 * 2
61 75
62 76 # 6 bytes: offset
63 77 # 2 bytes: flags
64 78 # 4 bytes: compressed length
65 79 # 4 bytes: uncompressed length
66 80 # 4 bytes: base rev
67 81 # 4 bytes: link rev
68 82 # 4 bytes: parent 1 rev
69 83 # 4 bytes: parent 2 rev
70 84 # 32 bytes: nodeid
71 85 # 8 bytes: sidedata offset
72 86 # 4 bytes: sidedata compressed length
73 87 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
74 88 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
75 89 assert INDEX_ENTRY_V2.size == 32 * 3
76 90
77 91 # revlog index flags
78 92
79 93 # For historical reasons, revlog's internal flags were exposed via the
80 94 # wire protocol and are even exposed in parts of the storage APIs.
81 95
82 96 # revision has censor metadata, must be verified
83 97 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
84 98 # revision hash does not match data (narrowhg)
85 99 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
86 100 # revision data is stored externally
87 101 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
88 102 # revision data contains extra metadata not part of the official digest
89 103 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
90 104 # revision changes files in a way that could affect copy tracing.
91 105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
92 106 REVIDX_DEFAULT_FLAGS = 0
93 107 # stable order in which flags need to be processed and their processors applied
94 108 REVIDX_FLAGS_ORDER = [
95 109 REVIDX_ISCENSORED,
96 110 REVIDX_ELLIPSIS,
97 111 REVIDX_EXTSTORED,
98 112 REVIDX_SIDEDATA,
99 113 REVIDX_HASCOPIESINFO,
100 114 ]
101 115
102 116 # bitmark for flags that could cause rawdata content change
103 117 REVIDX_RAWTEXT_CHANGING_FLAGS = (
104 118 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
105 119 )
106 120
107 121 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now