##// END OF EJS Templates
statprof: also gather wall time...
Boris Feld -
r38278:af402c6b default
parent child Browse files
Show More
@@ -1,928 +1,936
1 1 #!/usr/bin/env python
2 2 ## statprof.py
3 3 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
4 4 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
5 5 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
6 6 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
7 7
8 8 ## This library is free software; you can redistribute it and/or
9 9 ## modify it under the terms of the GNU Lesser General Public
10 10 ## License as published by the Free Software Foundation; either
11 11 ## version 2.1 of the License, or (at your option) any later version.
12 12 ##
13 13 ## This library is distributed in the hope that it will be useful,
14 14 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 15 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 16 ## Lesser General Public License for more details.
17 17 ##
18 18 ## You should have received a copy of the GNU Lesser General Public
19 19 ## License along with this program; if not, contact:
20 20 ##
21 21 ## Free Software Foundation Voice: +1-617-542-5942
22 22 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
23 23 ## Boston, MA 02111-1307, USA gnu@gnu.org
24 24
25 25 """
26 26 statprof is intended to be a fairly simple statistical profiler for
27 27 python. It was ported directly from a statistical profiler for guile,
28 28 also named statprof, available from guile-lib [0].
29 29
30 30 [0] http://wingolog.org/software/guile-lib/statprof/
31 31
32 32 To start profiling, call statprof.start():
33 33 >>> start()
34 34
35 35 Then run whatever it is that you want to profile, for example:
36 36 >>> import test.pystone; test.pystone.pystones()
37 37
38 38 Then stop the profiling and print out the results:
39 39 >>> stop()
40 40 >>> display()
41 41 % cumulative self
42 42 time seconds seconds name
43 43 26.72 1.40 0.37 pystone.py:79:Proc0
44 44 13.79 0.56 0.19 pystone.py:133:Proc1
45 45 13.79 0.19 0.19 pystone.py:208:Proc8
46 46 10.34 0.16 0.14 pystone.py:229:Func2
47 47 6.90 0.10 0.10 pystone.py:45:__init__
48 48 4.31 0.16 0.06 pystone.py:53:copy
49 49 ...
50 50
51 51 All of the numerical data is statistically approximate. In the
52 52 following column descriptions, and in all of statprof, "time" refers
53 53 to execution time (both user and system), not wall clock time.
54 54
55 55 % time
56 56 The percent of the time spent inside the procedure itself (not
57 57 counting children).
58 58
59 59 cumulative seconds
60 60 The total number of seconds spent in the procedure, including
61 61 children.
62 62
63 63 self seconds
64 64 The total number of seconds spent in the procedure itself (not
65 65 counting children).
66 66
67 67 name
68 68 The name of the procedure.
69 69
70 70 By default statprof keeps the data collected from previous runs. If you
71 71 want to clear the collected data, call reset():
72 72 >>> reset()
73 73
74 74 reset() can also be used to change the sampling frequency from the
75 75 default of 1000 Hz. For example, to tell statprof to sample 50 times a
76 76 second:
77 77 >>> reset(50)
78 78
79 79 This means that statprof will sample the call stack after every 1/50 of
80 80 a second of user + system time spent running on behalf of the python
81 81 process. When your process is idle (for example, blocking in a read(),
82 82 as is the case at the listener), the clock does not advance. For this
83 83 reason statprof is not currently not suitable for profiling io-bound
84 84 operations.
85 85
86 86 The profiler uses the hash of the code object itself to identify the
87 87 procedures, so it won't confuse different procedures with the same name.
88 88 They will show up as two different rows in the output.
89 89
90 90 Right now the profiler is quite simplistic. I cannot provide
91 91 call-graphs or other higher level information. What you see in the
92 92 table is pretty much all there is. Patches are welcome :-)
93 93
94 94
95 95 Threading
96 96 ---------
97 97
98 98 Because signals only get delivered to the main thread in Python,
99 99 statprof only profiles the main thread. However because the time
100 100 reporting function uses per-process timers, the results can be
101 101 significantly off if other threads' work patterns are not similar to the
102 102 main thread's work patterns.
103 103 """
104 104 # no-check-code
105 105 from __future__ import absolute_import, division, print_function
106 106
107 107 import collections
108 108 import contextlib
109 109 import getopt
110 110 import inspect
111 111 import json
112 112 import os
113 113 import signal
114 114 import sys
115 115 import threading
116 116 import time
117 117
118 118 from . import (
119 119 encoding,
120 120 pycompat,
121 121 )
122 122
123 123 defaultdict = collections.defaultdict
124 124 contextmanager = contextlib.contextmanager
125 125
126 126 __all__ = ['start', 'stop', 'reset', 'display', 'profile']
127 127
128 128 skips = {"util.py:check", "extensions.py:closure",
129 129 "color.py:colorcmd", "dispatch.py:checkargs",
130 130 "dispatch.py:<lambda>", "dispatch.py:_runcatch",
131 131 "dispatch.py:_dispatch", "dispatch.py:_runcommand",
132 132 "pager.py:pagecmd", "dispatch.py:run",
133 133 "dispatch.py:dispatch", "dispatch.py:runcommand",
134 134 "hg.py:<module>", "evolve.py:warnobserrors",
135 135 }
136 136
137 137 ###########################################################################
138 138 ## Utils
139 139
140 140 def clock():
141 141 times = os.times()
142 return times[0] + times[1]
142 return (times[0] + times[1], times[4])
143 143
144 144
145 145 ###########################################################################
146 146 ## Collection data structures
147 147
148 148 class ProfileState(object):
149 149 def __init__(self, frequency=None):
150 150 self.reset(frequency)
151 151
152 152 def reset(self, frequency=None):
153 153 # total so far
154 self.accumulated_time = 0.0
154 self.accumulated_time = (0.0, 0.0)
155 155 # start_time when timer is active
156 156 self.last_start_time = None
157 157 # a float
158 158 if frequency:
159 159 self.sample_interval = 1.0 / frequency
160 160 elif not hasattr(self, 'sample_interval'):
161 161 # default to 1000 Hz
162 162 self.sample_interval = 1.0 / 1000.0
163 163 else:
164 164 # leave the frequency as it was
165 165 pass
166 166 self.remaining_prof_time = None
167 167 # for user start/stop nesting
168 168 self.profile_level = 0
169 169
170 170 self.samples = []
171 171
172 172 def accumulate_time(self, stop_time):
173 self.accumulated_time += stop_time - self.last_start_time
173 increment = (
174 stop_time[0] - self.last_start_time[0],
175 stop_time[1] - self.last_start_time[1],
176 )
177 self.accumulated_time = (
178 self.accumulated_time[0] + increment[0],
179 self.accumulated_time[1] + increment[1],
180 )
174 181
175 182 def seconds_per_sample(self):
176 return self.accumulated_time / len(self.samples)
183 return self.accumulated_time[0] / len(self.samples)
177 184
178 185 state = ProfileState()
179 186
180 187
181 188 class CodeSite(object):
182 189 cache = {}
183 190
184 191 __slots__ = (u'path', u'lineno', u'function', u'source')
185 192
186 193 def __init__(self, path, lineno, function):
187 194 self.path = path
188 195 self.lineno = lineno
189 196 self.function = function
190 197 self.source = None
191 198
192 199 def __eq__(self, other):
193 200 try:
194 201 return (self.lineno == other.lineno and
195 202 self.path == other.path)
196 203 except:
197 204 return False
198 205
199 206 def __hash__(self):
200 207 return hash((self.lineno, self.path))
201 208
202 209 @classmethod
203 210 def get(cls, path, lineno, function):
204 211 k = (path, lineno)
205 212 try:
206 213 return cls.cache[k]
207 214 except KeyError:
208 215 v = cls(path, lineno, function)
209 216 cls.cache[k] = v
210 217 return v
211 218
212 219 def getsource(self, length):
213 220 if self.source is None:
214 221 lineno = self.lineno - 1
215 222 fp = None
216 223 try:
217 224 fp = open(self.path)
218 225 for i, line in enumerate(fp):
219 226 if i == lineno:
220 227 self.source = line.strip()
221 228 break
222 229 except:
223 230 pass
224 231 finally:
225 232 if fp:
226 233 fp.close()
227 234 if self.source is None:
228 235 self.source = ''
229 236
230 237 source = self.source
231 238 if len(source) > length:
232 239 source = source[:(length - 3)] + "..."
233 240 return source
234 241
235 242 def filename(self):
236 243 return os.path.basename(self.path)
237 244
238 245 class Sample(object):
239 246 __slots__ = (u'stack', u'time')
240 247
241 248 def __init__(self, stack, time):
242 249 self.stack = stack
243 250 self.time = time
244 251
245 252 @classmethod
246 253 def from_frame(cls, frame, time):
247 254 stack = []
248 255
249 256 while frame:
250 257 stack.append(CodeSite.get(frame.f_code.co_filename, frame.f_lineno,
251 258 frame.f_code.co_name))
252 259 frame = frame.f_back
253 260
254 261 return Sample(stack, time)
255 262
256 263 ###########################################################################
257 264 ## SIGPROF handler
258 265
259 266 def profile_signal_handler(signum, frame):
260 267 if state.profile_level > 0:
261 268 now = clock()
262 269 state.accumulate_time(now)
263 270
264 state.samples.append(Sample.from_frame(frame, state.accumulated_time))
271 state.samples.append(Sample.from_frame(frame, state.accumulated_time[0]))
265 272
266 273 signal.setitimer(signal.ITIMER_PROF,
267 274 state.sample_interval, 0.0)
268 275 state.last_start_time = now
269 276
270 277 stopthread = threading.Event()
271 278 def samplerthread(tid):
272 279 while not stopthread.is_set():
273 280 now = clock()
274 281 state.accumulate_time(now)
275 282
276 283 frame = sys._current_frames()[tid]
277 state.samples.append(Sample.from_frame(frame, state.accumulated_time))
284 state.samples.append(Sample.from_frame(frame, state.accumulated_time[0]))
278 285
279 286 state.last_start_time = now
280 287 time.sleep(state.sample_interval)
281 288
282 289 stopthread.clear()
283 290
284 291 ###########################################################################
285 292 ## Profiling API
286 293
287 294 def is_active():
288 295 return state.profile_level > 0
289 296
290 297 lastmechanism = None
291 298 def start(mechanism='thread'):
292 299 '''Install the profiling signal handler, and start profiling.'''
293 300 state.profile_level += 1
294 301 if state.profile_level == 1:
295 302 state.last_start_time = clock()
296 303 rpt = state.remaining_prof_time
297 304 state.remaining_prof_time = None
298 305
299 306 global lastmechanism
300 307 lastmechanism = mechanism
301 308
302 309 if mechanism == 'signal':
303 310 signal.signal(signal.SIGPROF, profile_signal_handler)
304 311 signal.setitimer(signal.ITIMER_PROF,
305 312 rpt or state.sample_interval, 0.0)
306 313 elif mechanism == 'thread':
307 314 frame = inspect.currentframe()
308 315 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
309 316 state.thread = threading.Thread(target=samplerthread,
310 317 args=(tid,), name="samplerthread")
311 318 state.thread.start()
312 319
313 320 def stop():
314 321 '''Stop profiling, and uninstall the profiling signal handler.'''
315 322 state.profile_level -= 1
316 323 if state.profile_level == 0:
317 324 if lastmechanism == 'signal':
318 325 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
319 326 signal.signal(signal.SIGPROF, signal.SIG_IGN)
320 327 state.remaining_prof_time = rpt[0]
321 328 elif lastmechanism == 'thread':
322 329 stopthread.set()
323 330 state.thread.join()
324 331
325 332 state.accumulate_time(clock())
326 333 state.last_start_time = None
327 334 statprofpath = encoding.environ.get('STATPROF_DEST')
328 335 if statprofpath:
329 336 save_data(statprofpath)
330 337
331 338 return state
332 339
333 340 def save_data(path):
334 341 with open(path, 'w+') as file:
335 342 file.write(str(state.accumulated_time) + '\n')
336 343 for sample in state.samples:
337 344 time = str(sample.time)
338 345 stack = sample.stack
339 346 sites = ['\1'.join([s.path, str(s.lineno), s.function])
340 347 for s in stack]
341 348 file.write(time + '\0' + '\0'.join(sites) + '\n')
342 349
343 350 def load_data(path):
344 351 lines = open(path, 'r').read().splitlines()
345 352
346 353 state.accumulated_time = float(lines[0])
347 354 state.samples = []
348 355 for line in lines[1:]:
349 356 parts = line.split('\0')
350 357 time = float(parts[0])
351 358 rawsites = parts[1:]
352 359 sites = []
353 360 for rawsite in rawsites:
354 361 siteparts = rawsite.split('\1')
355 362 sites.append(CodeSite.get(siteparts[0], int(siteparts[1]),
356 363 siteparts[2]))
357 364
358 365 state.samples.append(Sample(sites, time))
359 366
360 367
361 368
362 369 def reset(frequency=None):
363 370 '''Clear out the state of the profiler. Do not call while the
364 371 profiler is running.
365 372
366 373 The optional frequency argument specifies the number of samples to
367 374 collect per second.'''
368 375 assert state.profile_level == 0, "Can't reset() while statprof is running"
369 376 CodeSite.cache.clear()
370 377 state.reset(frequency)
371 378
372 379
373 380 @contextmanager
374 381 def profile():
375 382 start()
376 383 try:
377 384 yield
378 385 finally:
379 386 stop()
380 387 display()
381 388
382 389
383 390 ###########################################################################
384 391 ## Reporting API
385 392
386 393 class SiteStats(object):
387 394 def __init__(self, site):
388 395 self.site = site
389 396 self.selfcount = 0
390 397 self.totalcount = 0
391 398
392 399 def addself(self):
393 400 self.selfcount += 1
394 401
395 402 def addtotal(self):
396 403 self.totalcount += 1
397 404
398 405 def selfpercent(self):
399 406 return self.selfcount / len(state.samples) * 100
400 407
401 408 def totalpercent(self):
402 409 return self.totalcount / len(state.samples) * 100
403 410
404 411 def selfseconds(self):
405 412 return self.selfcount * state.seconds_per_sample()
406 413
407 414 def totalseconds(self):
408 415 return self.totalcount * state.seconds_per_sample()
409 416
410 417 @classmethod
411 418 def buildstats(cls, samples):
412 419 stats = {}
413 420
414 421 for sample in samples:
415 422 for i, site in enumerate(sample.stack):
416 423 sitestat = stats.get(site)
417 424 if not sitestat:
418 425 sitestat = SiteStats(site)
419 426 stats[site] = sitestat
420 427
421 428 sitestat.addtotal()
422 429
423 430 if i == 0:
424 431 sitestat.addself()
425 432
426 433 return [s for s in stats.itervalues()]
427 434
428 435 class DisplayFormats:
429 436 ByLine = 0
430 437 ByMethod = 1
431 438 AboutMethod = 2
432 439 Hotpath = 3
433 440 FlameGraph = 4
434 441 Json = 5
435 442 Chrome = 6
436 443
437 444 def display(fp=None, format=3, data=None, **kwargs):
438 445 '''Print statistics, either to stdout or the given file object.'''
439 446 data = data or state
440 447
441 448 if fp is None:
442 449 import sys
443 450 fp = sys.stdout
444 451 if len(data.samples) == 0:
445 452 print('No samples recorded.', file=fp)
446 453 return
447 454
448 455 if format == DisplayFormats.ByLine:
449 456 display_by_line(data, fp)
450 457 elif format == DisplayFormats.ByMethod:
451 458 display_by_method(data, fp)
452 459 elif format == DisplayFormats.AboutMethod:
453 460 display_about_method(data, fp, **kwargs)
454 461 elif format == DisplayFormats.Hotpath:
455 462 display_hotpath(data, fp, **kwargs)
456 463 elif format == DisplayFormats.FlameGraph:
457 464 write_to_flame(data, fp, **kwargs)
458 465 elif format == DisplayFormats.Json:
459 466 write_to_json(data, fp)
460 467 elif format == DisplayFormats.Chrome:
461 468 write_to_chrome(data, fp, **kwargs)
462 469 else:
463 470 raise Exception("Invalid display format")
464 471
465 472 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
466 473 print('---', file=fp)
467 474 print('Sample count: %d' % len(data.samples), file=fp)
468 print('Total time: %f seconds' % data.accumulated_time, file=fp)
475 print('Total time: %f seconds (%f wall)' % data.accumulated_time,
476 file=fp)
469 477
470 478 def display_by_line(data, fp):
471 479 '''Print the profiler data with each sample line represented
472 480 as one row in a table. Sorted by self-time per line.'''
473 481 stats = SiteStats.buildstats(data.samples)
474 482 stats.sort(reverse=True, key=lambda x: x.selfseconds())
475 483
476 484 print('%5.5s %10.10s %7.7s %-8.8s' %
477 485 ('% ', 'cumulative', 'self', ''), file=fp)
478 486 print('%5.5s %9.9s %8.8s %-8.8s' %
479 487 ("time", "seconds", "seconds", "name"), file=fp)
480 488
481 489 for stat in stats:
482 490 site = stat.site
483 491 sitelabel = '%s:%d:%s' % (site.filename(), site.lineno, site.function)
484 492 print('%6.2f %9.2f %9.2f %s' % (stat.selfpercent(),
485 493 stat.totalseconds(),
486 494 stat.selfseconds(),
487 495 sitelabel),
488 496 file=fp)
489 497
490 498 def display_by_method(data, fp):
491 499 '''Print the profiler data with each sample function represented
492 500 as one row in a table. Important lines within that function are
493 501 output as nested rows. Sorted by self-time per line.'''
494 502 print('%5.5s %10.10s %7.7s %-8.8s' %
495 503 ('% ', 'cumulative', 'self', ''), file=fp)
496 504 print('%5.5s %9.9s %8.8s %-8.8s' %
497 505 ("time", "seconds", "seconds", "name"), file=fp)
498 506
499 507 stats = SiteStats.buildstats(data.samples)
500 508
501 509 grouped = defaultdict(list)
502 510 for stat in stats:
503 511 grouped[stat.site.filename() + ":" + stat.site.function].append(stat)
504 512
505 513 # compute sums for each function
506 514 functiondata = []
507 515 for fname, sitestats in grouped.iteritems():
508 516 total_cum_sec = 0
509 517 total_self_sec = 0
510 518 total_percent = 0
511 519 for stat in sitestats:
512 520 total_cum_sec += stat.totalseconds()
513 521 total_self_sec += stat.selfseconds()
514 522 total_percent += stat.selfpercent()
515 523
516 524 functiondata.append((fname,
517 525 total_cum_sec,
518 526 total_self_sec,
519 527 total_percent,
520 528 sitestats))
521 529
522 530 # sort by total self sec
523 531 functiondata.sort(reverse=True, key=lambda x: x[2])
524 532
525 533 for function in functiondata:
526 534 if function[3] < 0.05:
527 535 continue
528 536 print('%6.2f %9.2f %9.2f %s' % (function[3], # total percent
529 537 function[1], # total cum sec
530 538 function[2], # total self sec
531 539 function[0]), # file:function
532 540 file=fp)
533 541 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
534 542 for stat in function[4]:
535 543 # only show line numbers for significant locations (>1% time spent)
536 544 if stat.selfpercent() > 1:
537 545 source = stat.site.getsource(25)
538 546 stattuple = (stat.selfpercent(), stat.selfseconds(),
539 547 stat.site.lineno, source)
540 548
541 549 print('%33.0f%% %6.2f line %s: %s' % (stattuple), file=fp)
542 550
543 551 def display_about_method(data, fp, function=None, **kwargs):
544 552 if function is None:
545 553 raise Exception("Invalid function")
546 554
547 555 filename = None
548 556 if ':' in function:
549 557 filename, function = function.split(':')
550 558
551 559 relevant_samples = 0
552 560 parents = {}
553 561 children = {}
554 562
555 563 for sample in data.samples:
556 564 for i, site in enumerate(sample.stack):
557 565 if site.function == function and (not filename
558 566 or site.filename() == filename):
559 567 relevant_samples += 1
560 568 if i != len(sample.stack) - 1:
561 569 parent = sample.stack[i + 1]
562 570 if parent in parents:
563 571 parents[parent] = parents[parent] + 1
564 572 else:
565 573 parents[parent] = 1
566 574
567 575 if site in children:
568 576 children[site] = children[site] + 1
569 577 else:
570 578 children[site] = 1
571 579
572 580 parents = [(parent, count) for parent, count in parents.iteritems()]
573 581 parents.sort(reverse=True, key=lambda x: x[1])
574 582 for parent, count in parents:
575 583 print('%6.2f%% %s:%s line %s: %s' %
576 584 (count / relevant_samples * 100, parent.filename(),
577 585 parent.function, parent.lineno, parent.getsource(50)), file=fp)
578 586
579 587 stats = SiteStats.buildstats(data.samples)
580 588 stats = [s for s in stats
581 589 if s.site.function == function and
582 590 (not filename or s.site.filename() == filename)]
583 591
584 592 total_cum_sec = 0
585 593 total_self_sec = 0
586 594 total_self_percent = 0
587 595 total_cum_percent = 0
588 596 for stat in stats:
589 597 total_cum_sec += stat.totalseconds()
590 598 total_self_sec += stat.selfseconds()
591 599 total_self_percent += stat.selfpercent()
592 600 total_cum_percent += stat.totalpercent()
593 601
594 602 print(
595 603 '\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n' %
596 604 (
597 605 filename or '___',
598 606 function,
599 607 total_cum_sec,
600 608 total_cum_percent,
601 609 total_self_sec,
602 610 total_self_percent
603 611 ), file=fp)
604 612
605 613 children = [(child, count) for child, count in children.iteritems()]
606 614 children.sort(reverse=True, key=lambda x: x[1])
607 615 for child, count in children:
608 616 print(' %6.2f%% line %s: %s' %
609 617 (count / relevant_samples * 100, child.lineno,
610 618 child.getsource(50)), file=fp)
611 619
612 620 def display_hotpath(data, fp, limit=0.05, **kwargs):
613 621 class HotNode(object):
614 622 def __init__(self, site):
615 623 self.site = site
616 624 self.count = 0
617 625 self.children = {}
618 626
619 627 def add(self, stack, time):
620 628 self.count += time
621 629 site = stack[0]
622 630 child = self.children.get(site)
623 631 if not child:
624 632 child = HotNode(site)
625 633 self.children[site] = child
626 634
627 635 if len(stack) > 1:
628 636 i = 1
629 637 # Skip boiler plate parts of the stack
630 638 while i < len(stack) and '%s:%s' % (stack[i].filename(), stack[i].function) in skips:
631 639 i += 1
632 640 if i < len(stack):
633 641 child.add(stack[i:], time)
634 642
635 643 root = HotNode(None)
636 644 lasttime = data.samples[0].time
637 645 for sample in data.samples:
638 646 root.add(sample.stack[::-1], sample.time - lasttime)
639 647 lasttime = sample.time
640 648
641 649 def _write(node, depth, multiple_siblings):
642 650 site = node.site
643 651 visiblechildren = [c for c in node.children.itervalues()
644 652 if c.count >= (limit * root.count)]
645 653 if site:
646 654 indent = depth * 2 - 1
647 655 filename = ''
648 656 function = ''
649 657 if len(node.children) > 0:
650 658 childsite = list(node.children.itervalues())[0].site
651 659 filename = (childsite.filename() + ':').ljust(15)
652 660 function = childsite.function
653 661
654 662 # lots of string formatting
655 663 listpattern = ''.ljust(indent) +\
656 664 ('\\' if multiple_siblings else '|') +\
657 665 ' %4.1f%% %s %s'
658 666 liststring = listpattern % (node.count / root.count * 100,
659 667 filename, function)
660 668 codepattern = '%' + str(55 - len(liststring)) + 's %s: %s'
661 669 codestring = codepattern % ('line', site.lineno, site.getsource(30))
662 670
663 671 finalstring = liststring + codestring
664 672 childrensamples = sum([c.count for c in node.children.itervalues()])
665 673 # Make frames that performed more than 10% of the operation red
666 674 if node.count - childrensamples > (0.1 * root.count):
667 675 finalstring = '\033[91m' + finalstring + '\033[0m'
668 676 # Make frames that didn't actually perform work dark grey
669 677 elif node.count - childrensamples == 0:
670 678 finalstring = '\033[90m' + finalstring + '\033[0m'
671 679 print(finalstring, file=fp)
672 680
673 681 newdepth = depth
674 682 if len(visiblechildren) > 1 or multiple_siblings:
675 683 newdepth += 1
676 684
677 685 visiblechildren.sort(reverse=True, key=lambda x: x.count)
678 686 for child in visiblechildren:
679 687 _write(child, newdepth, len(visiblechildren) > 1)
680 688
681 689 if root.count > 0:
682 690 _write(root, 0, False)
683 691
684 692 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
685 693 if scriptpath is None:
686 694 scriptpath = encoding.environ['HOME'] + '/flamegraph.pl'
687 695 if not os.path.exists(scriptpath):
688 696 print("error: missing %s" % scriptpath, file=fp)
689 697 print("get it here: https://github.com/brendangregg/FlameGraph",
690 698 file=fp)
691 699 return
692 700
693 701 fd, path = pycompat.mkstemp()
694 702
695 703 file = open(path, "w+")
696 704
697 705 lines = {}
698 706 for sample in data.samples:
699 707 sites = [s.function for s in sample.stack]
700 708 sites.reverse()
701 709 line = ';'.join(sites)
702 710 if line in lines:
703 711 lines[line] = lines[line] + 1
704 712 else:
705 713 lines[line] = 1
706 714
707 715 for line, count in lines.iteritems():
708 716 file.write("%s %s\n" % (line, count))
709 717
710 718 file.close()
711 719
712 720 if outputfile is None:
713 721 outputfile = '~/flamegraph.svg'
714 722
715 723 os.system("perl ~/flamegraph.pl %s > %s" % (path, outputfile))
716 724 print("Written to %s" % outputfile, file=fp)
717 725
718 726 _pathcache = {}
719 727 def simplifypath(path):
720 728 '''Attempt to make the path to a Python module easier to read by
721 729 removing whatever part of the Python search path it was found
722 730 on.'''
723 731
724 732 if path in _pathcache:
725 733 return _pathcache[path]
726 734 hgpath = pycompat.fsencode(encoding.__file__).rsplit(os.sep, 2)[0]
727 735 for p in [hgpath] + sys.path:
728 736 prefix = p + os.sep
729 737 if path.startswith(prefix):
730 738 path = path[len(prefix):]
731 739 break
732 740 _pathcache[path] = path
733 741 return path
734 742
735 743 def write_to_json(data, fp):
736 744 samples = []
737 745
738 746 for sample in data.samples:
739 747 stack = []
740 748
741 749 for frame in sample.stack:
742 750 stack.append((frame.path, frame.lineno, frame.function))
743 751
744 752 samples.append((sample.time, stack))
745 753
746 754 print(json.dumps(samples), file=fp)
747 755
748 756 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
749 757 samples = []
750 758 laststack = collections.deque()
751 759 lastseen = collections.deque()
752 760
753 761 # The Chrome tracing format allows us to use a compact stack
754 762 # representation to save space. It's fiddly but worth it.
755 763 # We maintain a bijection between stack and ID.
756 764 stack2id = {}
757 765 id2stack = [] # will eventually be rendered
758 766
759 767 def stackid(stack):
760 768 if not stack:
761 769 return
762 770 if stack in stack2id:
763 771 return stack2id[stack]
764 772 parent = stackid(stack[1:])
765 773 myid = len(stack2id)
766 774 stack2id[stack] = myid
767 775 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
768 776 if parent is not None:
769 777 id2stack[-1].update(parent=parent)
770 778 return myid
771 779
772 780 def endswith(a, b):
773 781 return list(a)[-len(b):] == list(b)
774 782
775 783 # The sampling profiler can sample multiple times without
776 784 # advancing the clock, potentially causing the Chrome trace viewer
777 785 # to render single-pixel columns that we cannot zoom in on. We
778 786 # work around this by pretending that zero-duration samples are a
779 787 # millisecond in length.
780 788
781 789 clamp = 0.001
782 790
783 791 # We provide knobs that by default attempt to filter out stack
784 792 # frames that are too noisy:
785 793 #
786 794 # * A few take almost all execution time. These are usually boring
787 795 # setup functions, giving a stack that is deep but uninformative.
788 796 #
789 797 # * Numerous samples take almost no time, but introduce lots of
790 798 # noisy, oft-deep "spines" into a rendered profile.
791 799
792 800 blacklist = set()
793 801 totaltime = data.samples[-1].time - data.samples[0].time
794 802 minthreshold = totaltime * minthreshold
795 803 maxthreshold = max(totaltime * maxthreshold, clamp)
796 804
797 805 def poplast():
798 806 oldsid = stackid(tuple(laststack))
799 807 oldcat, oldfunc = laststack.popleft()
800 808 oldtime, oldidx = lastseen.popleft()
801 809 duration = sample.time - oldtime
802 810 if minthreshold <= duration <= maxthreshold:
803 811 # ensure no zero-duration events
804 812 sampletime = max(oldtime + clamp, sample.time)
805 813 samples.append(dict(ph='E', name=oldfunc, cat=oldcat, sf=oldsid,
806 814 ts=sampletime*1e6, pid=0))
807 815 else:
808 816 blacklist.add(oldidx)
809 817
810 818 # Much fiddling to synthesize correctly(ish) nested begin/end
811 819 # events given only stack snapshots.
812 820
813 821 for sample in data.samples:
814 822 tos = sample.stack[0]
815 823 name = tos.function
816 824 path = simplifypath(tos.path)
817 825 stack = tuple((('%s:%d' % (simplifypath(frame.path), frame.lineno),
818 826 frame.function) for frame in sample.stack))
819 827 qstack = collections.deque(stack)
820 828 if laststack == qstack:
821 829 continue
822 830 while laststack and qstack and laststack[-1] == qstack[-1]:
823 831 laststack.pop()
824 832 qstack.pop()
825 833 while laststack:
826 834 poplast()
827 835 for f in reversed(qstack):
828 836 lastseen.appendleft((sample.time, len(samples)))
829 837 laststack.appendleft(f)
830 838 path, name = f
831 839 sid = stackid(tuple(laststack))
832 840 samples.append(dict(ph='B', name=name, cat=path, ts=sample.time*1e6,
833 841 sf=sid, pid=0))
834 842 laststack = collections.deque(stack)
835 843 while laststack:
836 844 poplast()
837 845 events = [s[1] for s in enumerate(samples) if s[0] not in blacklist]
838 846 frames = collections.OrderedDict((str(k), v)
839 847 for (k,v) in enumerate(id2stack))
840 848 json.dump(dict(traceEvents=events, stackFrames=frames), fp, indent=1)
841 849 fp.write('\n')
842 850
843 851 def printusage():
844 852 print("""
845 853 The statprof command line allows you to inspect the last profile's results in
846 854 the following forms:
847 855
848 856 usage:
849 857 hotpath [-l --limit percent]
850 858 Shows a graph of calls with the percent of time each takes.
851 859 Red calls take over 10%% of the total time themselves.
852 860 lines
853 861 Shows the actual sampled lines.
854 862 functions
855 863 Shows the samples grouped by function.
856 864 function [filename:]functionname
857 865 Shows the callers and callees of a particular function.
858 866 flame [-s --script-path] [-o --output-file path]
859 867 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
860 868 Requires that ~/flamegraph.pl exist.
861 869 (Specify alternate script path with --script-path.)""")
862 870
863 871 def main(argv=None):
864 872 if argv is None:
865 873 argv = sys.argv
866 874
867 875 if len(argv) == 1:
868 876 printusage()
869 877 return 0
870 878
871 879 displayargs = {}
872 880
873 881 optstart = 2
874 882 displayargs['function'] = None
875 883 if argv[1] == 'hotpath':
876 884 displayargs['format'] = DisplayFormats.Hotpath
877 885 elif argv[1] == 'lines':
878 886 displayargs['format'] = DisplayFormats.ByLine
879 887 elif argv[1] == 'functions':
880 888 displayargs['format'] = DisplayFormats.ByMethod
881 889 elif argv[1] == 'function':
882 890 displayargs['format'] = DisplayFormats.AboutMethod
883 891 displayargs['function'] = argv[2]
884 892 optstart = 3
885 893 elif argv[1] == 'flame':
886 894 displayargs['format'] = DisplayFormats.FlameGraph
887 895 else:
888 896 printusage()
889 897 return 0
890 898
891 899 # process options
892 900 try:
893 901 opts, args = pycompat.getoptb(sys.argv[optstart:], "hl:f:o:p:",
894 902 ["help", "limit=", "file=", "output-file=", "script-path="])
895 903 except getopt.error as msg:
896 904 print(msg)
897 905 printusage()
898 906 return 2
899 907
900 908 displayargs['limit'] = 0.05
901 909 path = None
902 910 for o, value in opts:
903 911 if o in ("-l", "--limit"):
904 912 displayargs['limit'] = float(value)
905 913 elif o in ("-f", "--file"):
906 914 path = value
907 915 elif o in ("-o", "--output-file"):
908 916 displayargs['outputfile'] = value
909 917 elif o in ("-p", "--script-path"):
910 918 displayargs['scriptpath'] = value
911 919 elif o in ("-h", "help"):
912 920 printusage()
913 921 return 0
914 922 else:
915 923 assert False, "unhandled option %s" % o
916 924
917 925 if not path:
918 926 print('must specify --file to load')
919 927 return 1
920 928
921 929 load_data(path=path)
922 930
923 931 display(**pycompat.strkwargs(displayargs))
924 932
925 933 return 0
926 934
927 935 if __name__ == "__main__":
928 936 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now