##// END OF EJS Templates
py3: use raw strings for stack names...
Gregory Szorc -
r40232:2864f8d3 default
parent child Browse files
Show More
@@ -1,952 +1,961 b''
1 1 #!/usr/bin/env python
2 2 ## statprof.py
3 3 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
4 4 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
5 5 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
6 6 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
7 7
8 8 ## This library is free software; you can redistribute it and/or
9 9 ## modify it under the terms of the GNU Lesser General Public
10 10 ## License as published by the Free Software Foundation; either
11 11 ## version 2.1 of the License, or (at your option) any later version.
12 12 ##
13 13 ## This library is distributed in the hope that it will be useful,
14 14 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 15 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 16 ## Lesser General Public License for more details.
17 17 ##
18 18 ## You should have received a copy of the GNU Lesser General Public
19 19 ## License along with this program; if not, contact:
20 20 ##
21 21 ## Free Software Foundation Voice: +1-617-542-5942
22 22 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
23 23 ## Boston, MA 02111-1307, USA gnu@gnu.org
24 24
25 25 """
26 26 statprof is intended to be a fairly simple statistical profiler for
27 27 python. It was ported directly from a statistical profiler for guile,
28 28 also named statprof, available from guile-lib [0].
29 29
30 30 [0] http://wingolog.org/software/guile-lib/statprof/
31 31
32 32 To start profiling, call statprof.start():
33 33 >>> start()
34 34
35 35 Then run whatever it is that you want to profile, for example:
36 36 >>> import test.pystone; test.pystone.pystones()
37 37
38 38 Then stop the profiling and print out the results:
39 39 >>> stop()
40 40 >>> display()
41 41 % cumulative self
42 42 time seconds seconds name
43 43 26.72 1.40 0.37 pystone.py:79:Proc0
44 44 13.79 0.56 0.19 pystone.py:133:Proc1
45 45 13.79 0.19 0.19 pystone.py:208:Proc8
46 46 10.34 0.16 0.14 pystone.py:229:Func2
47 47 6.90 0.10 0.10 pystone.py:45:__init__
48 48 4.31 0.16 0.06 pystone.py:53:copy
49 49 ...
50 50
51 51 All of the numerical data is statistically approximate. In the
52 52 following column descriptions, and in all of statprof, "time" refers
53 53 to execution time (both user and system), not wall clock time.
54 54
55 55 % time
56 56 The percent of the time spent inside the procedure itself (not
57 57 counting children).
58 58
59 59 cumulative seconds
60 60 The total number of seconds spent in the procedure, including
61 61 children.
62 62
63 63 self seconds
64 64 The total number of seconds spent in the procedure itself (not
65 65 counting children).
66 66
67 67 name
68 68 The name of the procedure.
69 69
70 70 By default statprof keeps the data collected from previous runs. If you
71 71 want to clear the collected data, call reset():
72 72 >>> reset()
73 73
74 74 reset() can also be used to change the sampling frequency from the
75 75 default of 1000 Hz. For example, to tell statprof to sample 50 times a
76 76 second:
77 77 >>> reset(50)
78 78
79 79 This means that statprof will sample the call stack after every 1/50 of
80 80 a second of user + system time spent running on behalf of the python
81 81 process. When your process is idle (for example, blocking in a read(),
82 82 as is the case at the listener), the clock does not advance. For this
83 83 reason statprof is not currently not suitable for profiling io-bound
84 84 operations.
85 85
86 86 The profiler uses the hash of the code object itself to identify the
87 87 procedures, so it won't confuse different procedures with the same name.
88 88 They will show up as two different rows in the output.
89 89
90 90 Right now the profiler is quite simplistic. I cannot provide
91 91 call-graphs or other higher level information. What you see in the
92 92 table is pretty much all there is. Patches are welcome :-)
93 93
94 94
95 95 Threading
96 96 ---------
97 97
98 98 Because signals only get delivered to the main thread in Python,
99 99 statprof only profiles the main thread. However because the time
100 100 reporting function uses per-process timers, the results can be
101 101 significantly off if other threads' work patterns are not similar to the
102 102 main thread's work patterns.
103 103 """
104 104 # no-check-code
105 105 from __future__ import absolute_import, division, print_function
106 106
107 107 import collections
108 108 import contextlib
109 109 import getopt
110 110 import inspect
111 111 import json
112 112 import os
113 113 import signal
114 114 import sys
115 115 import threading
116 116 import time
117 117
118 118 from . import (
119 119 encoding,
120 120 pycompat,
121 121 )
122 122
123 123 defaultdict = collections.defaultdict
124 124 contextmanager = contextlib.contextmanager
125 125
126 126 __all__ = ['start', 'stop', 'reset', 'display', 'profile']
127 127
128 skips = {"util.py:check", "extensions.py:closure",
129 "color.py:colorcmd", "dispatch.py:checkargs",
130 "dispatch.py:<lambda>", "dispatch.py:_runcatch",
131 "dispatch.py:_dispatch", "dispatch.py:_runcommand",
132 "pager.py:pagecmd", "dispatch.py:run",
133 "dispatch.py:dispatch", "dispatch.py:runcommand",
134 "hg.py:<module>", "evolve.py:warnobserrors",
128 skips = {
129 r"util.py:check",
130 r"extensions.py:closure",
131 r"color.py:colorcmd",
132 r"dispatch.py:checkargs",
133 r"dispatch.py:<lambda>",
134 r"dispatch.py:_runcatch",
135 r"dispatch.py:_dispatch",
136 r"dispatch.py:_runcommand",
137 r"pager.py:pagecmd",
138 r"dispatch.py:run",
139 r"dispatch.py:dispatch",
140 r"dispatch.py:runcommand",
141 r"hg.py:<module>",
142 r"evolve.py:warnobserrors",
135 143 }
136 144
137 145 ###########################################################################
138 146 ## Utils
139 147
140 148 def clock():
141 149 times = os.times()
142 150 return (times[0] + times[1], times[4])
143 151
144 152
145 153 ###########################################################################
146 154 ## Collection data structures
147 155
148 156 class ProfileState(object):
149 157 def __init__(self, frequency=None):
150 158 self.reset(frequency)
151 159 self.track = 'cpu'
152 160
153 161 def reset(self, frequency=None):
154 162 # total so far
155 163 self.accumulated_time = (0.0, 0.0)
156 164 # start_time when timer is active
157 165 self.last_start_time = None
158 166 # a float
159 167 if frequency:
160 168 self.sample_interval = 1.0 / frequency
161 169 elif not hasattr(self, 'sample_interval'):
162 170 # default to 1000 Hz
163 171 self.sample_interval = 1.0 / 1000.0
164 172 else:
165 173 # leave the frequency as it was
166 174 pass
167 175 self.remaining_prof_time = None
168 176 # for user start/stop nesting
169 177 self.profile_level = 0
170 178
171 179 self.samples = []
172 180
173 181 def accumulate_time(self, stop_time):
174 182 increment = (
175 183 stop_time[0] - self.last_start_time[0],
176 184 stop_time[1] - self.last_start_time[1],
177 185 )
178 186 self.accumulated_time = (
179 187 self.accumulated_time[0] + increment[0],
180 188 self.accumulated_time[1] + increment[1],
181 189 )
182 190
183 191 def seconds_per_sample(self):
184 192 return self.accumulated_time[self.timeidx] / len(self.samples)
185 193
186 194 @property
187 195 def timeidx(self):
188 196 if self.track == 'real':
189 197 return 1
190 198 return 0
191 199
192 200 state = ProfileState()
193 201
194 202
195 203 class CodeSite(object):
196 204 cache = {}
197 205
198 206 __slots__ = (u'path', u'lineno', u'function', u'source')
199 207
200 208 def __init__(self, path, lineno, function):
201 209 self.path = path
202 210 self.lineno = lineno
203 211 self.function = function
204 212 self.source = None
205 213
206 214 def __eq__(self, other):
207 215 try:
208 216 return (self.lineno == other.lineno and
209 217 self.path == other.path)
210 218 except:
211 219 return False
212 220
213 221 def __hash__(self):
214 222 return hash((self.lineno, self.path))
215 223
216 224 @classmethod
217 225 def get(cls, path, lineno, function):
218 226 k = (path, lineno)
219 227 try:
220 228 return cls.cache[k]
221 229 except KeyError:
222 230 v = cls(path, lineno, function)
223 231 cls.cache[k] = v
224 232 return v
225 233
226 234 def getsource(self, length):
227 235 if self.source is None:
228 236 lineno = self.lineno - 1
229 237 fp = None
230 238 try:
231 239 fp = open(self.path)
232 240 for i, line in enumerate(fp):
233 241 if i == lineno:
234 242 self.source = line.strip()
235 243 break
236 244 except:
237 245 pass
238 246 finally:
239 247 if fp:
240 248 fp.close()
241 249 if self.source is None:
242 250 self.source = ''
243 251
244 252 source = self.source
245 253 if len(source) > length:
246 254 source = source[:(length - 3)] + "..."
247 255 return source
248 256
249 257 def filename(self):
250 258 return os.path.basename(self.path)
251 259
252 260 class Sample(object):
253 261 __slots__ = (u'stack', u'time')
254 262
255 263 def __init__(self, stack, time):
256 264 self.stack = stack
257 265 self.time = time
258 266
259 267 @classmethod
260 268 def from_frame(cls, frame, time):
261 269 stack = []
262 270
263 271 while frame:
264 272 stack.append(CodeSite.get(frame.f_code.co_filename, frame.f_lineno,
265 273 frame.f_code.co_name))
266 274 frame = frame.f_back
267 275
268 276 return Sample(stack, time)
269 277
270 278 ###########################################################################
271 279 ## SIGPROF handler
272 280
273 281 def profile_signal_handler(signum, frame):
274 282 if state.profile_level > 0:
275 283 now = clock()
276 284 state.accumulate_time(now)
277 285
278 286 timestamp = state.accumulated_time[state.timeidx]
279 287 state.samples.append(Sample.from_frame(frame, timestamp))
280 288
281 289 signal.setitimer(signal.ITIMER_PROF,
282 290 state.sample_interval, 0.0)
283 291 state.last_start_time = now
284 292
285 293 stopthread = threading.Event()
286 294 def samplerthread(tid):
287 295 while not stopthread.is_set():
288 296 now = clock()
289 297 state.accumulate_time(now)
290 298
291 299 frame = sys._current_frames()[tid]
292 300
293 301 timestamp = state.accumulated_time[state.timeidx]
294 302 state.samples.append(Sample.from_frame(frame, timestamp))
295 303
296 304 state.last_start_time = now
297 305 time.sleep(state.sample_interval)
298 306
299 307 stopthread.clear()
300 308
301 309 ###########################################################################
302 310 ## Profiling API
303 311
304 312 def is_active():
305 313 return state.profile_level > 0
306 314
307 315 lastmechanism = None
308 316 def start(mechanism='thread', track='cpu'):
309 317 '''Install the profiling signal handler, and start profiling.'''
310 318 state.track = track # note: nesting different mode won't work
311 319 state.profile_level += 1
312 320 if state.profile_level == 1:
313 321 state.last_start_time = clock()
314 322 rpt = state.remaining_prof_time
315 323 state.remaining_prof_time = None
316 324
317 325 global lastmechanism
318 326 lastmechanism = mechanism
319 327
320 328 if mechanism == 'signal':
321 329 signal.signal(signal.SIGPROF, profile_signal_handler)
322 330 signal.setitimer(signal.ITIMER_PROF,
323 331 rpt or state.sample_interval, 0.0)
324 332 elif mechanism == 'thread':
325 333 frame = inspect.currentframe()
326 334 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
327 335 state.thread = threading.Thread(target=samplerthread,
328 336 args=(tid,), name="samplerthread")
329 337 state.thread.start()
330 338
331 339 def stop():
332 340 '''Stop profiling, and uninstall the profiling signal handler.'''
333 341 state.profile_level -= 1
334 342 if state.profile_level == 0:
335 343 if lastmechanism == 'signal':
336 344 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
337 345 signal.signal(signal.SIGPROF, signal.SIG_IGN)
338 346 state.remaining_prof_time = rpt[0]
339 347 elif lastmechanism == 'thread':
340 348 stopthread.set()
341 349 state.thread.join()
342 350
343 351 state.accumulate_time(clock())
344 352 state.last_start_time = None
345 353 statprofpath = encoding.environ.get('STATPROF_DEST')
346 354 if statprofpath:
347 355 save_data(statprofpath)
348 356
349 357 return state
350 358
351 359 def save_data(path):
352 360 with open(path, 'w+') as file:
353 361 file.write("%f %f\n" % state.accumulated_time)
354 362 for sample in state.samples:
355 363 time = str(sample.time)
356 364 stack = sample.stack
357 365 sites = ['\1'.join([s.path, str(s.lineno), s.function])
358 366 for s in stack]
359 367 file.write("%s\0%s\n" % (time, '\0'.join(sites)))
360 368
361 369 def load_data(path):
362 370 lines = open(path, 'r').read().splitlines()
363 371
364 372 state.accumulated_time = [float(value) for value in lines[0].split()]
365 373 state.samples = []
366 374 for line in lines[1:]:
367 375 parts = line.split('\0')
368 376 time = float(parts[0])
369 377 rawsites = parts[1:]
370 378 sites = []
371 379 for rawsite in rawsites:
372 380 siteparts = rawsite.split('\1')
373 381 sites.append(CodeSite.get(siteparts[0], int(siteparts[1]),
374 382 siteparts[2]))
375 383
376 384 state.samples.append(Sample(sites, time))
377 385
378 386
379 387
380 388 def reset(frequency=None):
381 389 '''Clear out the state of the profiler. Do not call while the
382 390 profiler is running.
383 391
384 392 The optional frequency argument specifies the number of samples to
385 393 collect per second.'''
386 394 assert state.profile_level == 0, "Can't reset() while statprof is running"
387 395 CodeSite.cache.clear()
388 396 state.reset(frequency)
389 397
390 398
391 399 @contextmanager
392 400 def profile():
393 401 start()
394 402 try:
395 403 yield
396 404 finally:
397 405 stop()
398 406 display()
399 407
400 408
401 409 ###########################################################################
402 410 ## Reporting API
403 411
404 412 class SiteStats(object):
405 413 def __init__(self, site):
406 414 self.site = site
407 415 self.selfcount = 0
408 416 self.totalcount = 0
409 417
410 418 def addself(self):
411 419 self.selfcount += 1
412 420
413 421 def addtotal(self):
414 422 self.totalcount += 1
415 423
416 424 def selfpercent(self):
417 425 return self.selfcount / len(state.samples) * 100
418 426
419 427 def totalpercent(self):
420 428 return self.totalcount / len(state.samples) * 100
421 429
422 430 def selfseconds(self):
423 431 return self.selfcount * state.seconds_per_sample()
424 432
425 433 def totalseconds(self):
426 434 return self.totalcount * state.seconds_per_sample()
427 435
428 436 @classmethod
429 437 def buildstats(cls, samples):
430 438 stats = {}
431 439
432 440 for sample in samples:
433 441 for i, site in enumerate(sample.stack):
434 442 sitestat = stats.get(site)
435 443 if not sitestat:
436 444 sitestat = SiteStats(site)
437 445 stats[site] = sitestat
438 446
439 447 sitestat.addtotal()
440 448
441 449 if i == 0:
442 450 sitestat.addself()
443 451
444 452 return [s for s in stats.itervalues()]
445 453
446 454 class DisplayFormats:
447 455 ByLine = 0
448 456 ByMethod = 1
449 457 AboutMethod = 2
450 458 Hotpath = 3
451 459 FlameGraph = 4
452 460 Json = 5
453 461 Chrome = 6
454 462
455 463 def display(fp=None, format=3, data=None, **kwargs):
456 464 '''Print statistics, either to stdout or the given file object.'''
457 465 if data is None:
458 466 data = state
459 467
460 468 if fp is None:
461 469 import sys
462 470 fp = sys.stdout
463 471 if len(data.samples) == 0:
464 472 print('No samples recorded.', file=fp)
465 473 return
466 474
467 475 if format == DisplayFormats.ByLine:
468 476 display_by_line(data, fp)
469 477 elif format == DisplayFormats.ByMethod:
470 478 display_by_method(data, fp)
471 479 elif format == DisplayFormats.AboutMethod:
472 480 display_about_method(data, fp, **kwargs)
473 481 elif format == DisplayFormats.Hotpath:
474 482 display_hotpath(data, fp, **kwargs)
475 483 elif format == DisplayFormats.FlameGraph:
476 484 write_to_flame(data, fp, **kwargs)
477 485 elif format == DisplayFormats.Json:
478 486 write_to_json(data, fp)
479 487 elif format == DisplayFormats.Chrome:
480 488 write_to_chrome(data, fp, **kwargs)
481 489 else:
482 490 raise Exception("Invalid display format")
483 491
484 492 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
485 493 print('---', file=fp)
486 494 print('Sample count: %d' % len(data.samples), file=fp)
487 495 print('Total time: %f seconds (%f wall)' % data.accumulated_time,
488 496 file=fp)
489 497
490 498 def display_by_line(data, fp):
491 499 '''Print the profiler data with each sample line represented
492 500 as one row in a table. Sorted by self-time per line.'''
493 501 stats = SiteStats.buildstats(data.samples)
494 502 stats.sort(reverse=True, key=lambda x: x.selfseconds())
495 503
496 504 print('%5.5s %10.10s %7.7s %-8.8s' %
497 505 ('% ', 'cumulative', 'self', ''), file=fp)
498 506 print('%5.5s %9.9s %8.8s %-8.8s' %
499 507 ("time", "seconds", "seconds", "name"), file=fp)
500 508
501 509 for stat in stats:
502 510 site = stat.site
503 511 sitelabel = '%s:%d:%s' % (site.filename(), site.lineno, site.function)
504 512 print('%6.2f %9.2f %9.2f %s' % (stat.selfpercent(),
505 513 stat.totalseconds(),
506 514 stat.selfseconds(),
507 515 sitelabel),
508 516 file=fp)
509 517
510 518 def display_by_method(data, fp):
511 519 '''Print the profiler data with each sample function represented
512 520 as one row in a table. Important lines within that function are
513 521 output as nested rows. Sorted by self-time per line.'''
514 522 print('%5.5s %10.10s %7.7s %-8.8s' %
515 523 ('% ', 'cumulative', 'self', ''), file=fp)
516 524 print('%5.5s %9.9s %8.8s %-8.8s' %
517 525 ("time", "seconds", "seconds", "name"), file=fp)
518 526
519 527 stats = SiteStats.buildstats(data.samples)
520 528
521 529 grouped = defaultdict(list)
522 530 for stat in stats:
523 531 grouped[stat.site.filename() + ":" + stat.site.function].append(stat)
524 532
525 533 # compute sums for each function
526 534 functiondata = []
527 535 for fname, sitestats in grouped.iteritems():
528 536 total_cum_sec = 0
529 537 total_self_sec = 0
530 538 total_percent = 0
531 539 for stat in sitestats:
532 540 total_cum_sec += stat.totalseconds()
533 541 total_self_sec += stat.selfseconds()
534 542 total_percent += stat.selfpercent()
535 543
536 544 functiondata.append((fname,
537 545 total_cum_sec,
538 546 total_self_sec,
539 547 total_percent,
540 548 sitestats))
541 549
542 550 # sort by total self sec
543 551 functiondata.sort(reverse=True, key=lambda x: x[2])
544 552
545 553 for function in functiondata:
546 554 if function[3] < 0.05:
547 555 continue
548 556 print('%6.2f %9.2f %9.2f %s' % (function[3], # total percent
549 557 function[1], # total cum sec
550 558 function[2], # total self sec
551 559 function[0]), # file:function
552 560 file=fp)
553 561 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
554 562 for stat in function[4]:
555 563 # only show line numbers for significant locations (>1% time spent)
556 564 if stat.selfpercent() > 1:
557 565 source = stat.site.getsource(25)
558 566 stattuple = (stat.selfpercent(), stat.selfseconds(),
559 567 stat.site.lineno, source)
560 568
561 569 print('%33.0f%% %6.2f line %s: %s' % (stattuple), file=fp)
562 570
563 571 def display_about_method(data, fp, function=None, **kwargs):
564 572 if function is None:
565 573 raise Exception("Invalid function")
566 574
567 575 filename = None
568 576 if ':' in function:
569 577 filename, function = function.split(':')
570 578
571 579 relevant_samples = 0
572 580 parents = {}
573 581 children = {}
574 582
575 583 for sample in data.samples:
576 584 for i, site in enumerate(sample.stack):
577 585 if site.function == function and (not filename
578 586 or site.filename() == filename):
579 587 relevant_samples += 1
580 588 if i != len(sample.stack) - 1:
581 589 parent = sample.stack[i + 1]
582 590 if parent in parents:
583 591 parents[parent] = parents[parent] + 1
584 592 else:
585 593 parents[parent] = 1
586 594
587 595 if site in children:
588 596 children[site] = children[site] + 1
589 597 else:
590 598 children[site] = 1
591 599
592 600 parents = [(parent, count) for parent, count in parents.iteritems()]
593 601 parents.sort(reverse=True, key=lambda x: x[1])
594 602 for parent, count in parents:
595 603 print('%6.2f%% %s:%s line %s: %s' %
596 604 (count / relevant_samples * 100, parent.filename(),
597 605 parent.function, parent.lineno, parent.getsource(50)), file=fp)
598 606
599 607 stats = SiteStats.buildstats(data.samples)
600 608 stats = [s for s in stats
601 609 if s.site.function == function and
602 610 (not filename or s.site.filename() == filename)]
603 611
604 612 total_cum_sec = 0
605 613 total_self_sec = 0
606 614 total_self_percent = 0
607 615 total_cum_percent = 0
608 616 for stat in stats:
609 617 total_cum_sec += stat.totalseconds()
610 618 total_self_sec += stat.selfseconds()
611 619 total_self_percent += stat.selfpercent()
612 620 total_cum_percent += stat.totalpercent()
613 621
614 622 print(
615 623 '\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n' %
616 624 (
617 625 filename or '___',
618 626 function,
619 627 total_cum_sec,
620 628 total_cum_percent,
621 629 total_self_sec,
622 630 total_self_percent
623 631 ), file=fp)
624 632
625 633 children = [(child, count) for child, count in children.iteritems()]
626 634 children.sort(reverse=True, key=lambda x: x[1])
627 635 for child, count in children:
628 636 print(' %6.2f%% line %s: %s' %
629 637 (count / relevant_samples * 100, child.lineno,
630 638 child.getsource(50)), file=fp)
631 639
632 640 def display_hotpath(data, fp, limit=0.05, **kwargs):
633 641 class HotNode(object):
634 642 def __init__(self, site):
635 643 self.site = site
636 644 self.count = 0
637 645 self.children = {}
638 646
639 647 def add(self, stack, time):
640 648 self.count += time
641 649 site = stack[0]
642 650 child = self.children.get(site)
643 651 if not child:
644 652 child = HotNode(site)
645 653 self.children[site] = child
646 654
647 655 if len(stack) > 1:
648 656 i = 1
649 657 # Skip boiler plate parts of the stack
650 while i < len(stack) and '%s:%s' % (stack[i].filename(), stack[i].function) in skips:
658 name = r'%s:%s' % (stack[i].filename(), stack[i].function)
659 while i < len(stack) and name in skips:
651 660 i += 1
652 661 if i < len(stack):
653 662 child.add(stack[i:], time)
654 663
655 664 root = HotNode(None)
656 665 lasttime = data.samples[0].time
657 666 for sample in data.samples:
658 667 root.add(sample.stack[::-1], sample.time - lasttime)
659 668 lasttime = sample.time
660 669
661 670 def _write(node, depth, multiple_siblings):
662 671 site = node.site
663 672 visiblechildren = [c for c in node.children.itervalues()
664 673 if c.count >= (limit * root.count)]
665 674 if site:
666 675 indent = depth * 2 - 1
667 676 filename = ''
668 677 function = ''
669 678 if len(node.children) > 0:
670 679 childsite = list(node.children.itervalues())[0].site
671 680 filename = (childsite.filename() + ':').ljust(15)
672 681 function = childsite.function
673 682
674 683 # lots of string formatting
675 684 listpattern = ''.ljust(indent) +\
676 685 ('\\' if multiple_siblings else '|') +\
677 686 ' %4.1f%% %s %s'
678 687 liststring = listpattern % (node.count / root.count * 100,
679 688 filename, function)
680 689 codepattern = '%' + str(55 - len(liststring)) + 's %s: %s'
681 690 codestring = codepattern % ('line', site.lineno, site.getsource(30))
682 691
683 692 finalstring = liststring + codestring
684 693 childrensamples = sum([c.count for c in node.children.itervalues()])
685 694 # Make frames that performed more than 10% of the operation red
686 695 if node.count - childrensamples > (0.1 * root.count):
687 696 finalstring = '\033[91m' + finalstring + '\033[0m'
688 697 # Make frames that didn't actually perform work dark grey
689 698 elif node.count - childrensamples == 0:
690 699 finalstring = '\033[90m' + finalstring + '\033[0m'
691 700 print(finalstring, file=fp)
692 701
693 702 newdepth = depth
694 703 if len(visiblechildren) > 1 or multiple_siblings:
695 704 newdepth += 1
696 705
697 706 visiblechildren.sort(reverse=True, key=lambda x: x.count)
698 707 for child in visiblechildren:
699 708 _write(child, newdepth, len(visiblechildren) > 1)
700 709
701 710 if root.count > 0:
702 711 _write(root, 0, False)
703 712
704 713 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
705 714 if scriptpath is None:
706 715 scriptpath = encoding.environ['HOME'] + '/flamegraph.pl'
707 716 if not os.path.exists(scriptpath):
708 717 print("error: missing %s" % scriptpath, file=fp)
709 718 print("get it here: https://github.com/brendangregg/FlameGraph",
710 719 file=fp)
711 720 return
712 721
713 722 fd, path = pycompat.mkstemp()
714 723
715 724 file = open(path, "w+")
716 725
717 726 lines = {}
718 727 for sample in data.samples:
719 728 sites = [s.function for s in sample.stack]
720 729 sites.reverse()
721 730 line = ';'.join(sites)
722 731 if line in lines:
723 732 lines[line] = lines[line] + 1
724 733 else:
725 734 lines[line] = 1
726 735
727 736 for line, count in lines.iteritems():
728 737 file.write("%s %s\n" % (line, count))
729 738
730 739 file.close()
731 740
732 741 if outputfile is None:
733 742 outputfile = '~/flamegraph.svg'
734 743
735 744 os.system("perl ~/flamegraph.pl %s > %s" % (path, outputfile))
736 745 print("Written to %s" % outputfile, file=fp)
737 746
738 747 _pathcache = {}
739 748 def simplifypath(path):
740 749 '''Attempt to make the path to a Python module easier to read by
741 750 removing whatever part of the Python search path it was found
742 751 on.'''
743 752
744 753 if path in _pathcache:
745 754 return _pathcache[path]
746 755 hgpath = pycompat.fsencode(encoding.__file__).rsplit(os.sep, 2)[0]
747 756 for p in [hgpath] + sys.path:
748 757 prefix = p + os.sep
749 758 if path.startswith(prefix):
750 759 path = path[len(prefix):]
751 760 break
752 761 _pathcache[path] = path
753 762 return path
754 763
755 764 def write_to_json(data, fp):
756 765 samples = []
757 766
758 767 for sample in data.samples:
759 768 stack = []
760 769
761 770 for frame in sample.stack:
762 771 stack.append((frame.path, frame.lineno, frame.function))
763 772
764 773 samples.append((sample.time, stack))
765 774
766 775 data = json.dumps(samples)
767 776 if not isinstance(data, bytes):
768 777 data = data.encode('utf-8')
769 778
770 779 fp.write(data)
771 780
772 781 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
773 782 samples = []
774 783 laststack = collections.deque()
775 784 lastseen = collections.deque()
776 785
777 786 # The Chrome tracing format allows us to use a compact stack
778 787 # representation to save space. It's fiddly but worth it.
779 788 # We maintain a bijection between stack and ID.
780 789 stack2id = {}
781 790 id2stack = [] # will eventually be rendered
782 791
783 792 def stackid(stack):
784 793 if not stack:
785 794 return
786 795 if stack in stack2id:
787 796 return stack2id[stack]
788 797 parent = stackid(stack[1:])
789 798 myid = len(stack2id)
790 799 stack2id[stack] = myid
791 800 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
792 801 if parent is not None:
793 802 id2stack[-1].update(parent=parent)
794 803 return myid
795 804
796 805 def endswith(a, b):
797 806 return list(a)[-len(b):] == list(b)
798 807
799 808 # The sampling profiler can sample multiple times without
800 809 # advancing the clock, potentially causing the Chrome trace viewer
801 810 # to render single-pixel columns that we cannot zoom in on. We
802 811 # work around this by pretending that zero-duration samples are a
803 812 # millisecond in length.
804 813
805 814 clamp = 0.001
806 815
807 816 # We provide knobs that by default attempt to filter out stack
808 817 # frames that are too noisy:
809 818 #
810 819 # * A few take almost all execution time. These are usually boring
811 820 # setup functions, giving a stack that is deep but uninformative.
812 821 #
813 822 # * Numerous samples take almost no time, but introduce lots of
814 823 # noisy, oft-deep "spines" into a rendered profile.
815 824
816 825 blacklist = set()
817 826 totaltime = data.samples[-1].time - data.samples[0].time
818 827 minthreshold = totaltime * minthreshold
819 828 maxthreshold = max(totaltime * maxthreshold, clamp)
820 829
821 830 def poplast():
822 831 oldsid = stackid(tuple(laststack))
823 832 oldcat, oldfunc = laststack.popleft()
824 833 oldtime, oldidx = lastseen.popleft()
825 834 duration = sample.time - oldtime
826 835 if minthreshold <= duration <= maxthreshold:
827 836 # ensure no zero-duration events
828 837 sampletime = max(oldtime + clamp, sample.time)
829 838 samples.append(dict(ph='E', name=oldfunc, cat=oldcat, sf=oldsid,
830 839 ts=sampletime*1e6, pid=0))
831 840 else:
832 841 blacklist.add(oldidx)
833 842
834 843 # Much fiddling to synthesize correctly(ish) nested begin/end
835 844 # events given only stack snapshots.
836 845
837 846 for sample in data.samples:
838 847 tos = sample.stack[0]
839 848 name = tos.function
840 849 path = simplifypath(tos.path)
841 850 stack = tuple((('%s:%d' % (simplifypath(frame.path), frame.lineno),
842 851 frame.function) for frame in sample.stack))
843 852 qstack = collections.deque(stack)
844 853 if laststack == qstack:
845 854 continue
846 855 while laststack and qstack and laststack[-1] == qstack[-1]:
847 856 laststack.pop()
848 857 qstack.pop()
849 858 while laststack:
850 859 poplast()
851 860 for f in reversed(qstack):
852 861 lastseen.appendleft((sample.time, len(samples)))
853 862 laststack.appendleft(f)
854 863 path, name = f
855 864 sid = stackid(tuple(laststack))
856 865 samples.append(dict(ph='B', name=name, cat=path, ts=sample.time*1e6,
857 866 sf=sid, pid=0))
858 867 laststack = collections.deque(stack)
859 868 while laststack:
860 869 poplast()
861 870 events = [s[1] for s in enumerate(samples) if s[0] not in blacklist]
862 871 frames = collections.OrderedDict((str(k), v)
863 872 for (k,v) in enumerate(id2stack))
864 873 json.dump(dict(traceEvents=events, stackFrames=frames), fp, indent=1)
865 874 fp.write('\n')
866 875
867 876 def printusage():
868 877 print("""
869 878 The statprof command line allows you to inspect the last profile's results in
870 879 the following forms:
871 880
872 881 usage:
873 882 hotpath [-l --limit percent]
874 883 Shows a graph of calls with the percent of time each takes.
875 884 Red calls take over 10%% of the total time themselves.
876 885 lines
877 886 Shows the actual sampled lines.
878 887 functions
879 888 Shows the samples grouped by function.
880 889 function [filename:]functionname
881 890 Shows the callers and callees of a particular function.
882 891 flame [-s --script-path] [-o --output-file path]
883 892 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
884 893 Requires that ~/flamegraph.pl exist.
885 894 (Specify alternate script path with --script-path.)""")
886 895
887 896 def main(argv=None):
888 897 if argv is None:
889 898 argv = sys.argv
890 899
891 900 if len(argv) == 1:
892 901 printusage()
893 902 return 0
894 903
895 904 displayargs = {}
896 905
897 906 optstart = 2
898 907 displayargs['function'] = None
899 908 if argv[1] == 'hotpath':
900 909 displayargs['format'] = DisplayFormats.Hotpath
901 910 elif argv[1] == 'lines':
902 911 displayargs['format'] = DisplayFormats.ByLine
903 912 elif argv[1] == 'functions':
904 913 displayargs['format'] = DisplayFormats.ByMethod
905 914 elif argv[1] == 'function':
906 915 displayargs['format'] = DisplayFormats.AboutMethod
907 916 displayargs['function'] = argv[2]
908 917 optstart = 3
909 918 elif argv[1] == 'flame':
910 919 displayargs['format'] = DisplayFormats.FlameGraph
911 920 else:
912 921 printusage()
913 922 return 0
914 923
915 924 # process options
916 925 try:
917 926 opts, args = pycompat.getoptb(sys.argv[optstart:], "hl:f:o:p:",
918 927 ["help", "limit=", "file=", "output-file=", "script-path="])
919 928 except getopt.error as msg:
920 929 print(msg)
921 930 printusage()
922 931 return 2
923 932
924 933 displayargs['limit'] = 0.05
925 934 path = None
926 935 for o, value in opts:
927 936 if o in ("-l", "--limit"):
928 937 displayargs['limit'] = float(value)
929 938 elif o in ("-f", "--file"):
930 939 path = value
931 940 elif o in ("-o", "--output-file"):
932 941 displayargs['outputfile'] = value
933 942 elif o in ("-p", "--script-path"):
934 943 displayargs['scriptpath'] = value
935 944 elif o in ("-h", "help"):
936 945 printusage()
937 946 return 0
938 947 else:
939 948 assert False, "unhandled option %s" % o
940 949
941 950 if not path:
942 951 print('must specify --file to load')
943 952 return 1
944 953
945 954 load_data(path=path)
946 955
947 956 display(**pycompat.strkwargs(displayargs))
948 957
949 958 return 0
950 959
951 960 if __name__ == "__main__":
952 961 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now