##// END OF EJS Templates
statprof: remove superfluous sys.version_info check...
Gregory Szorc -
r49794:37537a4d default
parent child Browse files
Show More
@@ -1,1092 +1,1090 b''
1 1 ## statprof.py
2 2 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
3 3 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
4 4 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
5 5 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
6 6
7 7 ## This library is free software; you can redistribute it and/or
8 8 ## modify it under the terms of the GNU Lesser General Public
9 9 ## License as published by the Free Software Foundation; either
10 10 ## version 2.1 of the License, or (at your option) any later version.
11 11 ##
12 12 ## This library is distributed in the hope that it will be useful,
13 13 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
14 14 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 15 ## Lesser General Public License for more details.
16 16 ##
17 17 ## You should have received a copy of the GNU Lesser General Public
18 18 ## License along with this program; if not, contact:
19 19 ##
20 20 ## Free Software Foundation Voice: +1-617-542-5942
21 21 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
22 22 ## Boston, MA 02111-1307, USA gnu@gnu.org
23 23
24 24 """
25 25 statprof is intended to be a fairly simple statistical profiler for
26 26 python. It was ported directly from a statistical profiler for guile,
27 27 also named statprof, available from guile-lib [0].
28 28
29 29 [0] http://wingolog.org/software/guile-lib/statprof/
30 30
31 31 To start profiling, call statprof.start():
32 32 >>> start()
33 33
34 34 Then run whatever it is that you want to profile, for example:
35 35 >>> import test.pystone; test.pystone.pystones()
36 36
37 37 Then stop the profiling and print out the results:
38 38 >>> stop()
39 39 >>> display()
40 40 % cumulative self
41 41 time seconds seconds name
42 42 26.72 1.40 0.37 pystone.py:79:Proc0
43 43 13.79 0.56 0.19 pystone.py:133:Proc1
44 44 13.79 0.19 0.19 pystone.py:208:Proc8
45 45 10.34 0.16 0.14 pystone.py:229:Func2
46 46 6.90 0.10 0.10 pystone.py:45:__init__
47 47 4.31 0.16 0.06 pystone.py:53:copy
48 48 ...
49 49
50 50 All of the numerical data is statistically approximate. In the
51 51 following column descriptions, and in all of statprof, "time" refers
52 52 to execution time (both user and system), not wall clock time.
53 53
54 54 % time
55 55 The percent of the time spent inside the procedure itself (not
56 56 counting children).
57 57
58 58 cumulative seconds
59 59 The total number of seconds spent in the procedure, including
60 60 children.
61 61
62 62 self seconds
63 63 The total number of seconds spent in the procedure itself (not
64 64 counting children).
65 65
66 66 name
67 67 The name of the procedure.
68 68
69 69 By default statprof keeps the data collected from previous runs. If you
70 70 want to clear the collected data, call reset():
71 71 >>> reset()
72 72
73 73 reset() can also be used to change the sampling frequency from the
74 74 default of 1000 Hz. For example, to tell statprof to sample 50 times a
75 75 second:
76 76 >>> reset(50)
77 77
78 78 This means that statprof will sample the call stack after every 1/50 of
79 79 a second of user + system time spent running on behalf of the python
80 80 process. When your process is idle (for example, blocking in a read(),
81 81 as is the case at the listener), the clock does not advance. For this
82 82 reason statprof is not currently not suitable for profiling io-bound
83 83 operations.
84 84
85 85 The profiler uses the hash of the code object itself to identify the
86 86 procedures, so it won't confuse different procedures with the same name.
87 87 They will show up as two different rows in the output.
88 88
89 89 Right now the profiler is quite simplistic. I cannot provide
90 90 call-graphs or other higher level information. What you see in the
91 91 table is pretty much all there is. Patches are welcome :-)
92 92
93 93
94 94 Threading
95 95 ---------
96 96
97 97 Because signals only get delivered to the main thread in Python,
98 98 statprof only profiles the main thread. However because the time
99 99 reporting function uses per-process timers, the results can be
100 100 significantly off if other threads' work patterns are not similar to the
101 101 main thread's work patterns.
102 102 """
103 103 # no-check-code
104 104
105 105 import collections
106 106 import contextlib
107 107 import getopt
108 108 import inspect
109 109 import json
110 110 import os
111 111 import signal
112 112 import sys
113 113 import threading
114 114 import time
115 115
116 116 from .pycompat import open
117 117 from . import (
118 118 encoding,
119 119 pycompat,
120 120 )
121 121
122 122 defaultdict = collections.defaultdict
123 123 contextmanager = contextlib.contextmanager
124 124
125 125 __all__ = [b'start', b'stop', b'reset', b'display', b'profile']
126 126
127 127 skips = {
128 128 "util.py:check",
129 129 "extensions.py:closure",
130 130 "color.py:colorcmd",
131 131 "dispatch.py:checkargs",
132 132 "dispatch.py:<lambda>",
133 133 "dispatch.py:_runcatch",
134 134 "dispatch.py:_dispatch",
135 135 "dispatch.py:_runcommand",
136 136 "pager.py:pagecmd",
137 137 "dispatch.py:run",
138 138 "dispatch.py:dispatch",
139 139 "dispatch.py:runcommand",
140 140 "hg.py:<module>",
141 141 "evolve.py:warnobserrors",
142 142 }
143 143
144 144 ###########################################################################
145 145 ## Utils
146 146
147 147
148 148 def clock():
149 149 times = os.times()
150 150 return (times[0] + times[1], times[4])
151 151
152 152
153 153 ###########################################################################
154 154 ## Collection data structures
155 155
156 156
157 157 class ProfileState(object):
158 158 def __init__(self, frequency=None):
159 159 self.reset(frequency)
160 160 self.track = b'cpu'
161 161
162 162 def reset(self, frequency=None):
163 163 # total so far
164 164 self.accumulated_time = (0.0, 0.0)
165 165 # start_time when timer is active
166 166 self.last_start_time = None
167 167 # a float
168 168 if frequency:
169 169 self.sample_interval = 1.0 / frequency
170 170 elif not pycompat.hasattr(self, 'sample_interval'):
171 171 # default to 1000 Hz
172 172 self.sample_interval = 1.0 / 1000.0
173 173 else:
174 174 # leave the frequency as it was
175 175 pass
176 176 self.remaining_prof_time = None
177 177 # for user start/stop nesting
178 178 self.profile_level = 0
179 179
180 180 self.samples = []
181 181
182 182 def accumulate_time(self, stop_time):
183 183 increment = (
184 184 stop_time[0] - self.last_start_time[0],
185 185 stop_time[1] - self.last_start_time[1],
186 186 )
187 187 self.accumulated_time = (
188 188 self.accumulated_time[0] + increment[0],
189 189 self.accumulated_time[1] + increment[1],
190 190 )
191 191
192 192 def seconds_per_sample(self):
193 193 return self.accumulated_time[self.timeidx] / len(self.samples)
194 194
195 195 @property
196 196 def timeidx(self):
197 197 if self.track == b'real':
198 198 return 1
199 199 return 0
200 200
201 201
202 202 state = ProfileState()
203 203
204 204
205 205 class CodeSite(object):
206 206 cache = {}
207 207
208 208 __slots__ = ('path', 'lineno', 'function', 'source')
209 209
210 210 def __init__(self, path, lineno, function):
211 211 assert isinstance(path, bytes)
212 212 self.path = path
213 213 self.lineno = lineno
214 214 assert isinstance(function, bytes)
215 215 self.function = function
216 216 self.source = None
217 217
218 218 def __eq__(self, other):
219 219 try:
220 220 return self.lineno == other.lineno and self.path == other.path
221 221 except:
222 222 return False
223 223
224 224 def __hash__(self):
225 225 return hash((self.lineno, self.path))
226 226
227 227 @classmethod
228 228 def get(cls, path, lineno, function):
229 229 k = (path, lineno)
230 230 try:
231 231 return cls.cache[k]
232 232 except KeyError:
233 233 v = cls(path, lineno, function)
234 234 cls.cache[k] = v
235 235 return v
236 236
237 237 def getsource(self, length):
238 238 if self.source is None:
239 239 lineno = self.lineno - 1
240 240 try:
241 241 with open(self.path, b'rb') as fp:
242 242 for i, line in enumerate(fp):
243 243 if i == lineno:
244 244 self.source = line.strip()
245 245 break
246 246 except:
247 247 pass
248 248 if self.source is None:
249 249 self.source = b''
250 250
251 251 source = self.source
252 252 if len(source) > length:
253 253 source = source[: (length - 3)] + b"..."
254 254 return source
255 255
256 256 def filename(self):
257 257 return os.path.basename(self.path)
258 258
259 259 def skipname(self):
260 260 return '%s:%s' % (self.filename(), self.function)
261 261
262 262
263 263 class Sample(object):
264 264 __slots__ = ('stack', 'time')
265 265
266 266 def __init__(self, stack, time):
267 267 self.stack = stack
268 268 self.time = time
269 269
270 270 @classmethod
271 271 def from_frame(cls, frame, time):
272 272 stack = []
273 273
274 274 while frame:
275 275 stack.append(
276 276 CodeSite.get(
277 277 pycompat.sysbytes(frame.f_code.co_filename),
278 278 frame.f_lineno,
279 279 pycompat.sysbytes(frame.f_code.co_name),
280 280 )
281 281 )
282 282 frame = frame.f_back
283 283
284 284 return Sample(stack, time)
285 285
286 286
287 287 ###########################################################################
288 288 ## SIGPROF handler
289 289
290 290
291 291 def profile_signal_handler(signum, frame):
292 292 if state.profile_level > 0:
293 293 now = clock()
294 294 state.accumulate_time(now)
295 295
296 296 timestamp = state.accumulated_time[state.timeidx]
297 297 state.samples.append(Sample.from_frame(frame, timestamp))
298 298
299 299 signal.setitimer(signal.ITIMER_PROF, state.sample_interval, 0.0)
300 300 state.last_start_time = now
301 301
302 302
303 303 stopthread = threading.Event()
304 304
305 305
306 306 def samplerthread(tid):
307 307 while not stopthread.is_set():
308 308 now = clock()
309 309 state.accumulate_time(now)
310 310
311 311 frame = sys._current_frames()[tid]
312 312
313 313 timestamp = state.accumulated_time[state.timeidx]
314 314 state.samples.append(Sample.from_frame(frame, timestamp))
315 315
316 316 state.last_start_time = now
317 317 time.sleep(state.sample_interval)
318 318
319 319 stopthread.clear()
320 320
321 321
322 322 ###########################################################################
323 323 ## Profiling API
324 324
325 325
326 326 def is_active():
327 327 return state.profile_level > 0
328 328
329 329
330 330 lastmechanism = None
331 331
332 332
333 333 def start(mechanism=b'thread', track=b'cpu'):
334 334 '''Install the profiling signal handler, and start profiling.'''
335 335 state.track = track # note: nesting different mode won't work
336 336 state.profile_level += 1
337 337 if state.profile_level == 1:
338 338 state.last_start_time = clock()
339 339 rpt = state.remaining_prof_time
340 340 state.remaining_prof_time = None
341 341
342 342 global lastmechanism
343 343 lastmechanism = mechanism
344 344
345 345 if mechanism == b'signal':
346 346 signal.signal(signal.SIGPROF, profile_signal_handler)
347 347 signal.setitimer(
348 348 signal.ITIMER_PROF, rpt or state.sample_interval, 0.0
349 349 )
350 350 elif mechanism == b'thread':
351 351 frame = inspect.currentframe()
352 352 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
353 353 state.thread = threading.Thread(
354 354 target=samplerthread, args=(tid,), name="samplerthread"
355 355 )
356 356 state.thread.start()
357 357
358 358
359 359 def stop():
360 360 '''Stop profiling, and uninstall the profiling signal handler.'''
361 361 state.profile_level -= 1
362 362 if state.profile_level == 0:
363 363 if lastmechanism == b'signal':
364 364 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
365 365 signal.signal(signal.SIGPROF, signal.SIG_IGN)
366 366 state.remaining_prof_time = rpt[0]
367 367 elif lastmechanism == b'thread':
368 368 stopthread.set()
369 369 state.thread.join()
370 370
371 371 state.accumulate_time(clock())
372 372 state.last_start_time = None
373 373 statprofpath = encoding.environ.get(b'STATPROF_DEST')
374 374 if statprofpath:
375 375 save_data(statprofpath)
376 376
377 377 return state
378 378
379 379
380 380 def save_data(path):
381 381 with open(path, b'w+') as file:
382 382 file.write(b"%f %f\n" % state.accumulated_time)
383 383 for sample in state.samples:
384 384 time = sample.time
385 385 stack = sample.stack
386 386 sites = [
387 387 b'\1'.join([s.path, b'%d' % s.lineno, s.function])
388 388 for s in stack
389 389 ]
390 390 file.write(b"%d\0%s\n" % (time, b'\0'.join(sites)))
391 391
392 392
393 393 def load_data(path):
394 394 lines = open(path, b'rb').read().splitlines()
395 395
396 396 state.accumulated_time = [float(value) for value in lines[0].split()]
397 397 state.samples = []
398 398 for line in lines[1:]:
399 399 parts = line.split(b'\0')
400 400 time = float(parts[0])
401 401 rawsites = parts[1:]
402 402 sites = []
403 403 for rawsite in rawsites:
404 404 siteparts = rawsite.split(b'\1')
405 405 sites.append(
406 406 CodeSite.get(siteparts[0], int(siteparts[1]), siteparts[2])
407 407 )
408 408
409 409 state.samples.append(Sample(sites, time))
410 410
411 411
412 412 def reset(frequency=None):
413 413 """Clear out the state of the profiler. Do not call while the
414 414 profiler is running.
415 415
416 416 The optional frequency argument specifies the number of samples to
417 417 collect per second."""
418 418 assert state.profile_level == 0, b"Can't reset() while statprof is running"
419 419 CodeSite.cache.clear()
420 420 state.reset(frequency)
421 421
422 422
423 423 @contextmanager
424 424 def profile():
425 425 start()
426 426 try:
427 427 yield
428 428 finally:
429 429 stop()
430 430 display()
431 431
432 432
433 433 ###########################################################################
434 434 ## Reporting API
435 435
436 436
437 437 class SiteStats(object):
438 438 def __init__(self, site):
439 439 self.site = site
440 440 self.selfcount = 0
441 441 self.totalcount = 0
442 442
443 443 def addself(self):
444 444 self.selfcount += 1
445 445
446 446 def addtotal(self):
447 447 self.totalcount += 1
448 448
449 449 def selfpercent(self):
450 450 return self.selfcount / len(state.samples) * 100
451 451
452 452 def totalpercent(self):
453 453 return self.totalcount / len(state.samples) * 100
454 454
455 455 def selfseconds(self):
456 456 return self.selfcount * state.seconds_per_sample()
457 457
458 458 def totalseconds(self):
459 459 return self.totalcount * state.seconds_per_sample()
460 460
461 461 @classmethod
462 462 def buildstats(cls, samples):
463 463 stats = {}
464 464
465 465 for sample in samples:
466 466 for i, site in enumerate(sample.stack):
467 467 sitestat = stats.get(site)
468 468 if not sitestat:
469 469 sitestat = SiteStats(site)
470 470 stats[site] = sitestat
471 471
472 472 sitestat.addtotal()
473 473
474 474 if i == 0:
475 475 sitestat.addself()
476 476
477 477 return [s for s in stats.values()]
478 478
479 479
480 480 class DisplayFormats:
481 481 ByLine = 0
482 482 ByMethod = 1
483 483 AboutMethod = 2
484 484 Hotpath = 3
485 485 FlameGraph = 4
486 486 Json = 5
487 487 Chrome = 6
488 488
489 489
490 490 def display(fp=None, format=3, data=None, **kwargs):
491 491 '''Print statistics, either to stdout or the given file object.'''
492 492 if data is None:
493 493 data = state
494 494
495 495 if fp is None:
496 496 from .utils import procutil
497 497
498 498 fp = procutil.stdout
499 499 if len(data.samples) == 0:
500 500 fp.write(b'No samples recorded.\n')
501 501 return
502 502
503 503 if format == DisplayFormats.ByLine:
504 504 display_by_line(data, fp)
505 505 elif format == DisplayFormats.ByMethod:
506 506 display_by_method(data, fp)
507 507 elif format == DisplayFormats.AboutMethod:
508 508 display_about_method(data, fp, **kwargs)
509 509 elif format == DisplayFormats.Hotpath:
510 510 display_hotpath(data, fp, **kwargs)
511 511 elif format == DisplayFormats.FlameGraph:
512 512 write_to_flame(data, fp, **kwargs)
513 513 elif format == DisplayFormats.Json:
514 514 write_to_json(data, fp)
515 515 elif format == DisplayFormats.Chrome:
516 516 write_to_chrome(data, fp, **kwargs)
517 517 else:
518 518 raise Exception("Invalid display format")
519 519
520 520 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
521 521 fp.write(b'---\n')
522 522 fp.write(b'Sample count: %d\n' % len(data.samples))
523 523 fp.write(b'Total time: %f seconds (%f wall)\n' % data.accumulated_time)
524 524
525 525
526 526 def display_by_line(data, fp):
527 527 """Print the profiler data with each sample line represented
528 528 as one row in a table. Sorted by self-time per line."""
529 529 stats = SiteStats.buildstats(data.samples)
530 530 stats.sort(reverse=True, key=lambda x: x.selfseconds())
531 531
532 532 fp.write(
533 533 b'%5.5s %10.10s %7.7s %-8.8s\n'
534 534 % (b'% ', b'cumulative', b'self', b'')
535 535 )
536 536 fp.write(
537 537 b'%5.5s %9.9s %8.8s %-8.8s\n'
538 538 % (b"time", b"seconds", b"seconds", b"name")
539 539 )
540 540
541 541 for stat in stats:
542 542 site = stat.site
543 543 sitelabel = b'%s:%d:%s' % (site.filename(), site.lineno, site.function)
544 544 fp.write(
545 545 b'%6.2f %9.2f %9.2f %s\n'
546 546 % (
547 547 stat.selfpercent(),
548 548 stat.totalseconds(),
549 549 stat.selfseconds(),
550 550 sitelabel,
551 551 )
552 552 )
553 553
554 554
555 555 def display_by_method(data, fp):
556 556 """Print the profiler data with each sample function represented
557 557 as one row in a table. Important lines within that function are
558 558 output as nested rows. Sorted by self-time per line."""
559 559 fp.write(
560 560 b'%5.5s %10.10s %7.7s %-8.8s\n'
561 561 % (b'% ', b'cumulative', b'self', b'')
562 562 )
563 563 fp.write(
564 564 b'%5.5s %9.9s %8.8s %-8.8s\n'
565 565 % (b"time", b"seconds", b"seconds", b"name")
566 566 )
567 567
568 568 stats = SiteStats.buildstats(data.samples)
569 569
570 570 grouped = defaultdict(list)
571 571 for stat in stats:
572 572 grouped[stat.site.filename() + b":" + stat.site.function].append(stat)
573 573
574 574 # compute sums for each function
575 575 functiondata = []
576 576 for fname, sitestats in grouped.items():
577 577 total_cum_sec = 0
578 578 total_self_sec = 0
579 579 total_percent = 0
580 580 for stat in sitestats:
581 581 total_cum_sec += stat.totalseconds()
582 582 total_self_sec += stat.selfseconds()
583 583 total_percent += stat.selfpercent()
584 584
585 585 functiondata.append(
586 586 (fname, total_cum_sec, total_self_sec, total_percent, sitestats)
587 587 )
588 588
589 589 # sort by total self sec
590 590 functiondata.sort(reverse=True, key=lambda x: x[2])
591 591
592 592 for function in functiondata:
593 593 if function[3] < 0.05:
594 594 continue
595 595 fp.write(
596 596 b'%6.2f %9.2f %9.2f %s\n'
597 597 % (
598 598 function[3], # total percent
599 599 function[1], # total cum sec
600 600 function[2], # total self sec
601 601 function[0],
602 602 )
603 603 ) # file:function
604 604
605 605 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
606 606 for stat in function[4]:
607 607 # only show line numbers for significant locations (>1% time spent)
608 608 if stat.selfpercent() > 1:
609 609 source = stat.site.getsource(25)
610 if sys.version_info.major >= 3 and not isinstance(
611 source, bytes
612 ):
610 if not isinstance(source, bytes):
613 611 source = pycompat.bytestr(source)
614 612
615 613 stattuple = (
616 614 stat.selfpercent(),
617 615 stat.selfseconds(),
618 616 stat.site.lineno,
619 617 source,
620 618 )
621 619
622 620 fp.write(b'%33.0f%% %6.2f line %d: %s\n' % stattuple)
623 621
624 622
625 623 def display_about_method(data, fp, function=None, **kwargs):
626 624 if function is None:
627 625 raise Exception("Invalid function")
628 626
629 627 filename = None
630 628 if b':' in function:
631 629 filename, function = function.split(b':')
632 630
633 631 relevant_samples = 0
634 632 parents = {}
635 633 children = {}
636 634
637 635 for sample in data.samples:
638 636 for i, site in enumerate(sample.stack):
639 637 if site.function == function and (
640 638 not filename or site.filename() == filename
641 639 ):
642 640 relevant_samples += 1
643 641 if i != len(sample.stack) - 1:
644 642 parent = sample.stack[i + 1]
645 643 if parent in parents:
646 644 parents[parent] = parents[parent] + 1
647 645 else:
648 646 parents[parent] = 1
649 647
650 648 if site in children:
651 649 children[site] = children[site] + 1
652 650 else:
653 651 children[site] = 1
654 652
655 653 parents = [(parent, count) for parent, count in parents.items()]
656 654 parents.sort(reverse=True, key=lambda x: x[1])
657 655 for parent, count in parents:
658 656 fp.write(
659 657 b'%6.2f%% %s:%s line %s: %s\n'
660 658 % (
661 659 count / relevant_samples * 100,
662 660 pycompat.fsencode(parent.filename()),
663 661 pycompat.sysbytes(parent.function),
664 662 parent.lineno,
665 663 pycompat.sysbytes(parent.getsource(50)),
666 664 )
667 665 )
668 666
669 667 stats = SiteStats.buildstats(data.samples)
670 668 stats = [
671 669 s
672 670 for s in stats
673 671 if s.site.function == function
674 672 and (not filename or s.site.filename() == filename)
675 673 ]
676 674
677 675 total_cum_sec = 0
678 676 total_self_sec = 0
679 677 total_self_percent = 0
680 678 total_cum_percent = 0
681 679 for stat in stats:
682 680 total_cum_sec += stat.totalseconds()
683 681 total_self_sec += stat.selfseconds()
684 682 total_self_percent += stat.selfpercent()
685 683 total_cum_percent += stat.totalpercent()
686 684
687 685 fp.write(
688 686 b'\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n\n'
689 687 % (
690 688 pycompat.sysbytes(filename or b'___'),
691 689 pycompat.sysbytes(function),
692 690 total_cum_sec,
693 691 total_cum_percent,
694 692 total_self_sec,
695 693 total_self_percent,
696 694 )
697 695 )
698 696
699 697 children = [(child, count) for child, count in children.items()]
700 698 children.sort(reverse=True, key=lambda x: x[1])
701 699 for child, count in children:
702 700 fp.write(
703 701 b' %6.2f%% line %s: %s\n'
704 702 % (
705 703 count / relevant_samples * 100,
706 704 child.lineno,
707 705 pycompat.sysbytes(child.getsource(50)),
708 706 )
709 707 )
710 708
711 709
712 710 def display_hotpath(data, fp, limit=0.05, **kwargs):
713 711 class HotNode(object):
714 712 def __init__(self, site):
715 713 self.site = site
716 714 self.count = 0
717 715 self.children = {}
718 716
719 717 def add(self, stack, time):
720 718 self.count += time
721 719 site = stack[0]
722 720 child = self.children.get(site)
723 721 if not child:
724 722 child = HotNode(site)
725 723 self.children[site] = child
726 724
727 725 if len(stack) > 1:
728 726 i = 1
729 727 # Skip boiler plate parts of the stack
730 728 while i < len(stack) and stack[i].skipname() in skips:
731 729 i += 1
732 730 if i < len(stack):
733 731 child.add(stack[i:], time)
734 732 else:
735 733 # Normally this is done by the .add() calls
736 734 child.count += time
737 735
738 736 root = HotNode(None)
739 737 lasttime = data.samples[0].time
740 738 for sample in data.samples:
741 739 root.add(sample.stack[::-1], sample.time - lasttime)
742 740 lasttime = sample.time
743 741 showtime = kwargs.get('showtime', True)
744 742
745 743 def _write(node, depth, multiple_siblings):
746 744 site = node.site
747 745 visiblechildren = [
748 746 c for c in node.children.values() if c.count >= (limit * root.count)
749 747 ]
750 748 if site:
751 749 indent = depth * 2 - 1
752 750 filename = (site.filename() + b':').ljust(15)
753 751 function = site.function
754 752
755 753 # lots of string formatting
756 754 listpattern = (
757 755 b''.ljust(indent)
758 756 + (b'\\' if multiple_siblings else b'|')
759 757 + b' %4.1f%%'
760 758 + (b' %5.2fs' % node.count if showtime else b'')
761 759 + b' %s %s'
762 760 )
763 761 liststring = listpattern % (
764 762 node.count / root.count * 100,
765 763 filename,
766 764 function,
767 765 )
768 766 # 4 to account for the word 'line'
769 767 spacing_len = max(4, 55 - len(liststring))
770 768 prefix = b''
771 769 if spacing_len == 4:
772 770 prefix = b', '
773 771
774 772 codepattern = b'%s%s %d: %s%s'
775 773 codestring = codepattern % (
776 774 prefix,
777 775 b'line'.rjust(spacing_len),
778 776 site.lineno,
779 777 b''.ljust(max(0, 4 - len(str(site.lineno)))),
780 778 site.getsource(30),
781 779 )
782 780
783 781 finalstring = liststring + codestring
784 782 childrensamples = sum([c.count for c in node.children.values()])
785 783 # Make frames that performed more than 10% of the operation red
786 784 if node.count - childrensamples > (0.1 * root.count):
787 785 finalstring = b'\033[91m' + finalstring + b'\033[0m'
788 786 # Make frames that didn't actually perform work dark grey
789 787 elif node.count - childrensamples == 0:
790 788 finalstring = b'\033[90m' + finalstring + b'\033[0m'
791 789 fp.write(finalstring + b'\n')
792 790
793 791 newdepth = depth
794 792 if len(visiblechildren) > 1 or multiple_siblings:
795 793 newdepth += 1
796 794
797 795 visiblechildren.sort(reverse=True, key=lambda x: x.count)
798 796 for child in visiblechildren:
799 797 _write(child, newdepth, len(visiblechildren) > 1)
800 798
801 799 if root.count > 0:
802 800 _write(root, 0, False)
803 801
804 802
805 803 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
806 804 if scriptpath is None:
807 805 scriptpath = encoding.environ[b'HOME'] + b'/flamegraph.pl'
808 806 if not os.path.exists(scriptpath):
809 807 fp.write(b'error: missing %s\n' % scriptpath)
810 808 fp.write(b'get it here: https://github.com/brendangregg/FlameGraph\n')
811 809 return
812 810
813 811 lines = {}
814 812 for sample in data.samples:
815 813 sites = [s.function for s in sample.stack]
816 814 sites.reverse()
817 815 line = b';'.join(sites)
818 816 if line in lines:
819 817 lines[line] = lines[line] + 1
820 818 else:
821 819 lines[line] = 1
822 820
823 821 fd, path = pycompat.mkstemp()
824 822
825 823 with open(path, b"w+") as file:
826 824 for line, count in lines.items():
827 825 file.write(b"%s %d\n" % (line, count))
828 826
829 827 if outputfile is None:
830 828 outputfile = b'~/flamegraph.svg'
831 829
832 830 os.system(b"perl ~/flamegraph.pl %s > %s" % (path, outputfile))
833 831 fp.write(b'Written to %s\n' % outputfile)
834 832
835 833
836 834 _pathcache = {}
837 835
838 836
839 837 def simplifypath(path):
840 838 """Attempt to make the path to a Python module easier to read by
841 839 removing whatever part of the Python search path it was found
842 840 on."""
843 841
844 842 if path in _pathcache:
845 843 return _pathcache[path]
846 844 hgpath = encoding.__file__.rsplit(os.sep, 2)[0]
847 845 for p in [hgpath] + sys.path:
848 846 prefix = p + os.sep
849 847 if path.startswith(prefix):
850 848 path = path[len(prefix) :]
851 849 break
852 850 _pathcache[path] = path
853 851 return path
854 852
855 853
856 854 def write_to_json(data, fp):
857 855 samples = []
858 856
859 857 for sample in data.samples:
860 858 stack = []
861 859
862 860 for frame in sample.stack:
863 861 stack.append(
864 862 (
865 863 pycompat.sysstr(frame.path),
866 864 frame.lineno,
867 865 pycompat.sysstr(frame.function),
868 866 )
869 867 )
870 868
871 869 samples.append((sample.time, stack))
872 870
873 871 data = json.dumps(samples)
874 872 if not isinstance(data, bytes):
875 873 data = data.encode('utf-8')
876 874
877 875 fp.write(data)
878 876
879 877
880 878 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
881 879 samples = []
882 880 laststack = collections.deque()
883 881 lastseen = collections.deque()
884 882
885 883 # The Chrome tracing format allows us to use a compact stack
886 884 # representation to save space. It's fiddly but worth it.
887 885 # We maintain a bijection between stack and ID.
888 886 stack2id = {}
889 887 id2stack = [] # will eventually be rendered
890 888
891 889 def stackid(stack):
892 890 if not stack:
893 891 return
894 892 if stack in stack2id:
895 893 return stack2id[stack]
896 894 parent = stackid(stack[1:])
897 895 myid = len(stack2id)
898 896 stack2id[stack] = myid
899 897 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
900 898 if parent is not None:
901 899 id2stack[-1].update(parent=parent)
902 900 return myid
903 901
904 902 # The sampling profiler can sample multiple times without
905 903 # advancing the clock, potentially causing the Chrome trace viewer
906 904 # to render single-pixel columns that we cannot zoom in on. We
907 905 # work around this by pretending that zero-duration samples are a
908 906 # millisecond in length.
909 907
910 908 clamp = 0.001
911 909
912 910 # We provide knobs that by default attempt to filter out stack
913 911 # frames that are too noisy:
914 912 #
915 913 # * A few take almost all execution time. These are usually boring
916 914 # setup functions, giving a stack that is deep but uninformative.
917 915 #
918 916 # * Numerous samples take almost no time, but introduce lots of
919 917 # noisy, oft-deep "spines" into a rendered profile.
920 918
921 919 blacklist = set()
922 920 totaltime = data.samples[-1].time - data.samples[0].time
923 921 minthreshold = totaltime * minthreshold
924 922 maxthreshold = max(totaltime * maxthreshold, clamp)
925 923
926 924 def poplast():
927 925 oldsid = stackid(tuple(laststack))
928 926 oldcat, oldfunc = laststack.popleft()
929 927 oldtime, oldidx = lastseen.popleft()
930 928 duration = sample.time - oldtime
931 929 if minthreshold <= duration <= maxthreshold:
932 930 # ensure no zero-duration events
933 931 sampletime = max(oldtime + clamp, sample.time)
934 932 samples.append(
935 933 dict(
936 934 ph='E',
937 935 name=oldfunc,
938 936 cat=oldcat,
939 937 sf=oldsid,
940 938 ts=sampletime * 1e6,
941 939 pid=0,
942 940 )
943 941 )
944 942 else:
945 943 blacklist.add(oldidx)
946 944
947 945 # Much fiddling to synthesize correctly(ish) nested begin/end
948 946 # events given only stack snapshots.
949 947
950 948 for sample in data.samples:
951 949 stack = tuple(
952 950 (
953 951 (
954 952 '%s:%d'
955 953 % (simplifypath(pycompat.sysstr(frame.path)), frame.lineno),
956 954 pycompat.sysstr(frame.function),
957 955 )
958 956 for frame in sample.stack
959 957 )
960 958 )
961 959 qstack = collections.deque(stack)
962 960 if laststack == qstack:
963 961 continue
964 962 while laststack and qstack and laststack[-1] == qstack[-1]:
965 963 laststack.pop()
966 964 qstack.pop()
967 965 while laststack:
968 966 poplast()
969 967 for f in reversed(qstack):
970 968 lastseen.appendleft((sample.time, len(samples)))
971 969 laststack.appendleft(f)
972 970 path, name = f
973 971 sid = stackid(tuple(laststack))
974 972 samples.append(
975 973 dict(
976 974 ph='B',
977 975 name=name,
978 976 cat=path,
979 977 ts=sample.time * 1e6,
980 978 sf=sid,
981 979 pid=0,
982 980 )
983 981 )
984 982 laststack = collections.deque(stack)
985 983 while laststack:
986 984 poplast()
987 985 events = [
988 986 sample for idx, sample in enumerate(samples) if idx not in blacklist
989 987 ]
990 988 frames = collections.OrderedDict(
991 989 (str(k), v) for (k, v) in enumerate(id2stack)
992 990 )
993 991 data = json.dumps(dict(traceEvents=events, stackFrames=frames), indent=1)
994 992 if not isinstance(data, bytes):
995 993 data = data.encode('utf-8')
996 994 fp.write(data)
997 995 fp.write(b'\n')
998 996
999 997
1000 998 def printusage():
1001 999 print(
1002 1000 r"""
1003 1001 The statprof command line allows you to inspect the last profile's results in
1004 1002 the following forms:
1005 1003
1006 1004 usage:
1007 1005 hotpath [-l --limit percent]
1008 1006 Shows a graph of calls with the percent of time each takes.
1009 1007 Red calls take over 10%% of the total time themselves.
1010 1008 lines
1011 1009 Shows the actual sampled lines.
1012 1010 functions
1013 1011 Shows the samples grouped by function.
1014 1012 function [filename:]functionname
1015 1013 Shows the callers and callees of a particular function.
1016 1014 flame [-s --script-path] [-o --output-file path]
1017 1015 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
1018 1016 Requires that ~/flamegraph.pl exist.
1019 1017 (Specify alternate script path with --script-path.)"""
1020 1018 )
1021 1019
1022 1020
1023 1021 def main(argv=None):
1024 1022 if argv is None:
1025 1023 argv = sys.argv
1026 1024
1027 1025 if len(argv) == 1:
1028 1026 printusage()
1029 1027 return 0
1030 1028
1031 1029 displayargs = {}
1032 1030
1033 1031 optstart = 2
1034 1032 displayargs[b'function'] = None
1035 1033 if argv[1] == 'hotpath':
1036 1034 displayargs[b'format'] = DisplayFormats.Hotpath
1037 1035 elif argv[1] == 'lines':
1038 1036 displayargs[b'format'] = DisplayFormats.ByLine
1039 1037 elif argv[1] == 'functions':
1040 1038 displayargs[b'format'] = DisplayFormats.ByMethod
1041 1039 elif argv[1] == 'function':
1042 1040 displayargs[b'format'] = DisplayFormats.AboutMethod
1043 1041 displayargs[b'function'] = argv[2]
1044 1042 optstart = 3
1045 1043 elif argv[1] == 'flame':
1046 1044 displayargs[b'format'] = DisplayFormats.FlameGraph
1047 1045 else:
1048 1046 printusage()
1049 1047 return 0
1050 1048
1051 1049 # process options
1052 1050 try:
1053 1051 opts, args = pycompat.getoptb(
1054 1052 sys.argv[optstart:],
1055 1053 b"hl:f:o:p:",
1056 1054 [b"help", b"limit=", b"file=", b"output-file=", b"script-path="],
1057 1055 )
1058 1056 except getopt.error as msg:
1059 1057 print(msg)
1060 1058 printusage()
1061 1059 return 2
1062 1060
1063 1061 displayargs[b'limit'] = 0.05
1064 1062 path = None
1065 1063 for o, value in opts:
1066 1064 if o in ("-l", "--limit"):
1067 1065 displayargs[b'limit'] = float(value)
1068 1066 elif o in ("-f", "--file"):
1069 1067 path = value
1070 1068 elif o in ("-o", "--output-file"):
1071 1069 displayargs[b'outputfile'] = value
1072 1070 elif o in ("-p", "--script-path"):
1073 1071 displayargs[b'scriptpath'] = value
1074 1072 elif o in ("-h", "help"):
1075 1073 printusage()
1076 1074 return 0
1077 1075 else:
1078 1076 assert False, "unhandled option %s" % o
1079 1077
1080 1078 if not path:
1081 1079 print('must specify --file to load')
1082 1080 return 1
1083 1081
1084 1082 load_data(path=path)
1085 1083
1086 1084 display(**pycompat.strkwargs(displayargs))
1087 1085
1088 1086 return 0
1089 1087
1090 1088
1091 1089 if __name__ == "__main__":
1092 1090 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now