##// END OF EJS Templates
statprof: clean up unicode/bytes a little...
Augie Fackler -
r40519:8664fdc1 default
parent child Browse files
Show More
@@ -1,970 +1,975
1 1 #!/usr/bin/env python
2 2 ## statprof.py
3 3 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
4 4 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
5 5 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
6 6 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
7 7
8 8 ## This library is free software; you can redistribute it and/or
9 9 ## modify it under the terms of the GNU Lesser General Public
10 10 ## License as published by the Free Software Foundation; either
11 11 ## version 2.1 of the License, or (at your option) any later version.
12 12 ##
13 13 ## This library is distributed in the hope that it will be useful,
14 14 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 15 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 16 ## Lesser General Public License for more details.
17 17 ##
18 18 ## You should have received a copy of the GNU Lesser General Public
19 19 ## License along with this program; if not, contact:
20 20 ##
21 21 ## Free Software Foundation Voice: +1-617-542-5942
22 22 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
23 23 ## Boston, MA 02111-1307, USA gnu@gnu.org
24 24
25 25 """
26 26 statprof is intended to be a fairly simple statistical profiler for
27 27 python. It was ported directly from a statistical profiler for guile,
28 28 also named statprof, available from guile-lib [0].
29 29
30 30 [0] http://wingolog.org/software/guile-lib/statprof/
31 31
32 32 To start profiling, call statprof.start():
33 33 >>> start()
34 34
35 35 Then run whatever it is that you want to profile, for example:
36 36 >>> import test.pystone; test.pystone.pystones()
37 37
38 38 Then stop the profiling and print out the results:
39 39 >>> stop()
40 40 >>> display()
41 41 % cumulative self
42 42 time seconds seconds name
43 43 26.72 1.40 0.37 pystone.py:79:Proc0
44 44 13.79 0.56 0.19 pystone.py:133:Proc1
45 45 13.79 0.19 0.19 pystone.py:208:Proc8
46 46 10.34 0.16 0.14 pystone.py:229:Func2
47 47 6.90 0.10 0.10 pystone.py:45:__init__
48 48 4.31 0.16 0.06 pystone.py:53:copy
49 49 ...
50 50
51 51 All of the numerical data is statistically approximate. In the
52 52 following column descriptions, and in all of statprof, "time" refers
53 53 to execution time (both user and system), not wall clock time.
54 54
55 55 % time
56 56 The percent of the time spent inside the procedure itself (not
57 57 counting children).
58 58
59 59 cumulative seconds
60 60 The total number of seconds spent in the procedure, including
61 61 children.
62 62
63 63 self seconds
64 64 The total number of seconds spent in the procedure itself (not
65 65 counting children).
66 66
67 67 name
68 68 The name of the procedure.
69 69
70 70 By default statprof keeps the data collected from previous runs. If you
71 71 want to clear the collected data, call reset():
72 72 >>> reset()
73 73
74 74 reset() can also be used to change the sampling frequency from the
75 75 default of 1000 Hz. For example, to tell statprof to sample 50 times a
76 76 second:
77 77 >>> reset(50)
78 78
79 79 This means that statprof will sample the call stack after every 1/50 of
80 80 a second of user + system time spent running on behalf of the python
81 81 process. When your process is idle (for example, blocking in a read(),
82 82 as is the case at the listener), the clock does not advance. For this
83 83 reason statprof is not currently not suitable for profiling io-bound
84 84 operations.
85 85
86 86 The profiler uses the hash of the code object itself to identify the
87 87 procedures, so it won't confuse different procedures with the same name.
88 88 They will show up as two different rows in the output.
89 89
90 90 Right now the profiler is quite simplistic. I cannot provide
91 91 call-graphs or other higher level information. What you see in the
92 92 table is pretty much all there is. Patches are welcome :-)
93 93
94 94
95 95 Threading
96 96 ---------
97 97
98 98 Because signals only get delivered to the main thread in Python,
99 99 statprof only profiles the main thread. However because the time
100 100 reporting function uses per-process timers, the results can be
101 101 significantly off if other threads' work patterns are not similar to the
102 102 main thread's work patterns.
103 103 """
104 104 # no-check-code
105 105 from __future__ import absolute_import, division, print_function
106 106
107 107 import collections
108 108 import contextlib
109 109 import getopt
110 110 import inspect
111 111 import json
112 112 import os
113 113 import signal
114 114 import sys
115 115 import threading
116 116 import time
117 117
118 118 from . import (
119 119 encoding,
120 120 pycompat,
121 121 )
122 122
123 123 defaultdict = collections.defaultdict
124 124 contextmanager = contextlib.contextmanager
125 125
126 126 __all__ = ['start', 'stop', 'reset', 'display', 'profile']
127 127
128 128 skips = {
129 129 r"util.py:check",
130 130 r"extensions.py:closure",
131 131 r"color.py:colorcmd",
132 132 r"dispatch.py:checkargs",
133 133 r"dispatch.py:<lambda>",
134 134 r"dispatch.py:_runcatch",
135 135 r"dispatch.py:_dispatch",
136 136 r"dispatch.py:_runcommand",
137 137 r"pager.py:pagecmd",
138 138 r"dispatch.py:run",
139 139 r"dispatch.py:dispatch",
140 140 r"dispatch.py:runcommand",
141 141 r"hg.py:<module>",
142 142 r"evolve.py:warnobserrors",
143 143 }
144 144
145 145 ###########################################################################
146 146 ## Utils
147 147
148 148 def clock():
149 149 times = os.times()
150 150 return (times[0] + times[1], times[4])
151 151
152 152
153 153 ###########################################################################
154 154 ## Collection data structures
155 155
156 156 class ProfileState(object):
157 157 def __init__(self, frequency=None):
158 158 self.reset(frequency)
159 159 self.track = 'cpu'
160 160
161 161 def reset(self, frequency=None):
162 162 # total so far
163 163 self.accumulated_time = (0.0, 0.0)
164 164 # start_time when timer is active
165 165 self.last_start_time = None
166 166 # a float
167 167 if frequency:
168 168 self.sample_interval = 1.0 / frequency
169 169 elif not hasattr(self, 'sample_interval'):
170 170 # default to 1000 Hz
171 171 self.sample_interval = 1.0 / 1000.0
172 172 else:
173 173 # leave the frequency as it was
174 174 pass
175 175 self.remaining_prof_time = None
176 176 # for user start/stop nesting
177 177 self.profile_level = 0
178 178
179 179 self.samples = []
180 180
181 181 def accumulate_time(self, stop_time):
182 182 increment = (
183 183 stop_time[0] - self.last_start_time[0],
184 184 stop_time[1] - self.last_start_time[1],
185 185 )
186 186 self.accumulated_time = (
187 187 self.accumulated_time[0] + increment[0],
188 188 self.accumulated_time[1] + increment[1],
189 189 )
190 190
191 191 def seconds_per_sample(self):
192 192 return self.accumulated_time[self.timeidx] / len(self.samples)
193 193
194 194 @property
195 195 def timeidx(self):
196 196 if self.track == 'real':
197 197 return 1
198 198 return 0
199 199
200 200 state = ProfileState()
201 201
202 202
203 203 class CodeSite(object):
204 204 cache = {}
205 205
206 206 __slots__ = (u'path', u'lineno', u'function', u'source')
207 207
208 208 def __init__(self, path, lineno, function):
209 209 assert isinstance(path, bytes)
210 210 self.path = path
211 211 self.lineno = lineno
212 212 assert isinstance(function, bytes)
213 213 self.function = function
214 214 self.source = None
215 215
216 216 def __eq__(self, other):
217 217 try:
218 218 return (self.lineno == other.lineno and
219 219 self.path == other.path)
220 220 except:
221 221 return False
222 222
223 223 def __hash__(self):
224 224 return hash((self.lineno, self.path))
225 225
226 226 @classmethod
227 227 def get(cls, path, lineno, function):
228 228 k = (path, lineno)
229 229 try:
230 230 return cls.cache[k]
231 231 except KeyError:
232 232 v = cls(path, lineno, function)
233 233 cls.cache[k] = v
234 234 return v
235 235
236 236 def getsource(self, length):
237 237 if self.source is None:
238 238 lineno = self.lineno - 1
239 239 fp = None
240 240 try:
241 fp = open(self.path)
241 fp = open(self.path, 'rb')
242 242 for i, line in enumerate(fp):
243 243 if i == lineno:
244 244 self.source = line.strip()
245 245 break
246 246 except:
247 247 pass
248 248 finally:
249 249 if fp:
250 250 fp.close()
251 251 if self.source is None:
252 252 self.source = ''
253 253
254 254 source = self.source
255 255 if len(source) > length:
256 256 source = source[:(length - 3)] + "..."
257 257 return source
258 258
259 259 def filename(self):
260 260 return os.path.basename(self.path)
261 261
262 262 def skipname(self):
263 263 return r'%s:%s' % (self.filename(), self.function)
264 264
265 265 class Sample(object):
266 266 __slots__ = (u'stack', u'time')
267 267
268 268 def __init__(self, stack, time):
269 269 self.stack = stack
270 270 self.time = time
271 271
272 272 @classmethod
273 273 def from_frame(cls, frame, time):
274 274 stack = []
275 275
276 276 while frame:
277 stack.append(CodeSite.get(frame.f_code.co_filename, frame.f_lineno,
278 frame.f_code.co_name))
277 stack.append(CodeSite.get(
278 pycompat.sysbytes(frame.f_code.co_filename),
279 frame.f_lineno,
280 pycompat.sysbytes(frame.f_code.co_name)))
279 281 frame = frame.f_back
280 282
281 283 return Sample(stack, time)
282 284
283 285 ###########################################################################
284 286 ## SIGPROF handler
285 287
286 288 def profile_signal_handler(signum, frame):
287 289 if state.profile_level > 0:
288 290 now = clock()
289 291 state.accumulate_time(now)
290 292
291 293 timestamp = state.accumulated_time[state.timeidx]
292 294 state.samples.append(Sample.from_frame(frame, timestamp))
293 295
294 296 signal.setitimer(signal.ITIMER_PROF,
295 297 state.sample_interval, 0.0)
296 298 state.last_start_time = now
297 299
298 300 stopthread = threading.Event()
299 301 def samplerthread(tid):
300 302 while not stopthread.is_set():
301 303 now = clock()
302 304 state.accumulate_time(now)
303 305
304 306 frame = sys._current_frames()[tid]
305 307
306 308 timestamp = state.accumulated_time[state.timeidx]
307 309 state.samples.append(Sample.from_frame(frame, timestamp))
308 310
309 311 state.last_start_time = now
310 312 time.sleep(state.sample_interval)
311 313
312 314 stopthread.clear()
313 315
314 316 ###########################################################################
315 317 ## Profiling API
316 318
317 319 def is_active():
318 320 return state.profile_level > 0
319 321
320 322 lastmechanism = None
321 323 def start(mechanism='thread', track='cpu'):
322 324 '''Install the profiling signal handler, and start profiling.'''
323 325 state.track = track # note: nesting different mode won't work
324 326 state.profile_level += 1
325 327 if state.profile_level == 1:
326 328 state.last_start_time = clock()
327 329 rpt = state.remaining_prof_time
328 330 state.remaining_prof_time = None
329 331
330 332 global lastmechanism
331 333 lastmechanism = mechanism
332 334
333 335 if mechanism == 'signal':
334 336 signal.signal(signal.SIGPROF, profile_signal_handler)
335 337 signal.setitimer(signal.ITIMER_PROF,
336 338 rpt or state.sample_interval, 0.0)
337 339 elif mechanism == 'thread':
338 340 frame = inspect.currentframe()
339 341 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
340 342 state.thread = threading.Thread(target=samplerthread,
341 343 args=(tid,), name="samplerthread")
342 344 state.thread.start()
343 345
344 346 def stop():
345 347 '''Stop profiling, and uninstall the profiling signal handler.'''
346 348 state.profile_level -= 1
347 349 if state.profile_level == 0:
348 350 if lastmechanism == 'signal':
349 351 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
350 352 signal.signal(signal.SIGPROF, signal.SIG_IGN)
351 353 state.remaining_prof_time = rpt[0]
352 354 elif lastmechanism == 'thread':
353 355 stopthread.set()
354 356 state.thread.join()
355 357
356 358 state.accumulate_time(clock())
357 359 state.last_start_time = None
358 360 statprofpath = encoding.environ.get('STATPROF_DEST')
359 361 if statprofpath:
360 362 save_data(statprofpath)
361 363
362 364 return state
363 365
364 366 def save_data(path):
365 367 with open(path, 'w+') as file:
366 368 file.write("%f %f\n" % state.accumulated_time)
367 369 for sample in state.samples:
368 370 time = sample.time
369 371 stack = sample.stack
370 372 sites = ['\1'.join([s.path, b'%d' % s.lineno, s.function])
371 373 for s in stack]
372 374 file.write("%d\0%s\n" % (time, '\0'.join(sites)))
373 375
374 376 def load_data(path):
375 lines = open(path, 'r').read().splitlines()
377 lines = open(path, 'rb').read().splitlines()
376 378
377 379 state.accumulated_time = [float(value) for value in lines[0].split()]
378 380 state.samples = []
379 381 for line in lines[1:]:
380 382 parts = line.split('\0')
381 383 time = float(parts[0])
382 384 rawsites = parts[1:]
383 385 sites = []
384 386 for rawsite in rawsites:
385 387 siteparts = rawsite.split('\1')
386 388 sites.append(CodeSite.get(siteparts[0], int(siteparts[1]),
387 389 siteparts[2]))
388 390
389 391 state.samples.append(Sample(sites, time))
390 392
391 393
392 394
393 395 def reset(frequency=None):
394 396 '''Clear out the state of the profiler. Do not call while the
395 397 profiler is running.
396 398
397 399 The optional frequency argument specifies the number of samples to
398 400 collect per second.'''
399 401 assert state.profile_level == 0, "Can't reset() while statprof is running"
400 402 CodeSite.cache.clear()
401 403 state.reset(frequency)
402 404
403 405
404 406 @contextmanager
405 407 def profile():
406 408 start()
407 409 try:
408 410 yield
409 411 finally:
410 412 stop()
411 413 display()
412 414
413 415
414 416 ###########################################################################
415 417 ## Reporting API
416 418
417 419 class SiteStats(object):
418 420 def __init__(self, site):
419 421 self.site = site
420 422 self.selfcount = 0
421 423 self.totalcount = 0
422 424
423 425 def addself(self):
424 426 self.selfcount += 1
425 427
426 428 def addtotal(self):
427 429 self.totalcount += 1
428 430
429 431 def selfpercent(self):
430 432 return self.selfcount / len(state.samples) * 100
431 433
432 434 def totalpercent(self):
433 435 return self.totalcount / len(state.samples) * 100
434 436
435 437 def selfseconds(self):
436 438 return self.selfcount * state.seconds_per_sample()
437 439
438 440 def totalseconds(self):
439 441 return self.totalcount * state.seconds_per_sample()
440 442
441 443 @classmethod
442 444 def buildstats(cls, samples):
443 445 stats = {}
444 446
445 447 for sample in samples:
446 448 for i, site in enumerate(sample.stack):
447 449 sitestat = stats.get(site)
448 450 if not sitestat:
449 451 sitestat = SiteStats(site)
450 452 stats[site] = sitestat
451 453
452 454 sitestat.addtotal()
453 455
454 456 if i == 0:
455 457 sitestat.addself()
456 458
457 459 return [s for s in stats.itervalues()]
458 460
459 461 class DisplayFormats:
460 462 ByLine = 0
461 463 ByMethod = 1
462 464 AboutMethod = 2
463 465 Hotpath = 3
464 466 FlameGraph = 4
465 467 Json = 5
466 468 Chrome = 6
467 469
468 470 def display(fp=None, format=3, data=None, **kwargs):
469 471 '''Print statistics, either to stdout or the given file object.'''
470 472 if data is None:
471 473 data = state
472 474
473 475 if fp is None:
474 476 import sys
475 477 fp = sys.stdout
476 478 if len(data.samples) == 0:
477 479 fp.write(b'No samples recorded.\n')
478 480 return
479 481
480 482 if format == DisplayFormats.ByLine:
481 483 display_by_line(data, fp)
482 484 elif format == DisplayFormats.ByMethod:
483 485 display_by_method(data, fp)
484 486 elif format == DisplayFormats.AboutMethod:
485 487 display_about_method(data, fp, **kwargs)
486 488 elif format == DisplayFormats.Hotpath:
487 489 display_hotpath(data, fp, **kwargs)
488 490 elif format == DisplayFormats.FlameGraph:
489 491 write_to_flame(data, fp, **kwargs)
490 492 elif format == DisplayFormats.Json:
491 493 write_to_json(data, fp)
492 494 elif format == DisplayFormats.Chrome:
493 495 write_to_chrome(data, fp, **kwargs)
494 496 else:
495 497 raise Exception("Invalid display format")
496 498
497 499 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
498 500 fp.write(b'---\n')
499 501 fp.write(b'Sample count: %d\n' % len(data.samples))
500 502 fp.write(b'Total time: %f seconds (%f wall)\n' % data.accumulated_time)
501 503
502 504 def display_by_line(data, fp):
503 505 '''Print the profiler data with each sample line represented
504 506 as one row in a table. Sorted by self-time per line.'''
505 507 stats = SiteStats.buildstats(data.samples)
506 508 stats.sort(reverse=True, key=lambda x: x.selfseconds())
507 509
508 510 fp.write(b'%5.5s %10.10s %7.7s %-8.8s\n' % (
509 511 b'% ', b'cumulative', b'self', b''))
510 512 fp.write(b'%5.5s %9.9s %8.8s %-8.8s\n' % (
511 513 b"time", b"seconds", b"seconds", b"name"))
512 514
513 515 for stat in stats:
514 516 site = stat.site
515 sitelabel = '%s:%d:%s' % (pycompat.fsencode(site.filename()),
517 sitelabel = '%s:%d:%s' % (site.filename(),
516 518 site.lineno,
517 pycompat.sysbytes(site.function))
519 site.function)
518 520 fp.write(b'%6.2f %9.2f %9.2f %s\n' % (
519 521 stat.selfpercent(), stat.totalseconds(),
520 522 stat.selfseconds(), sitelabel))
521 523
522 524 def display_by_method(data, fp):
523 525 '''Print the profiler data with each sample function represented
524 526 as one row in a table. Important lines within that function are
525 527 output as nested rows. Sorted by self-time per line.'''
526 528 fp.write(b'%5.5s %10.10s %7.7s %-8.8s\n' %
527 529 ('% ', 'cumulative', 'self', ''))
528 530 fp.write(b'%5.5s %9.9s %8.8s %-8.8s\n' %
529 531 ("time", "seconds", "seconds", "name"))
530 532
531 533 stats = SiteStats.buildstats(data.samples)
532 534
533 535 grouped = defaultdict(list)
534 536 for stat in stats:
535 grouped[stat.site.filename() + r":" + stat.site.function].append(stat)
537 grouped[stat.site.filename() + b":" + stat.site.function].append(stat)
536 538
537 539 # compute sums for each function
538 540 functiondata = []
539 541 for fname, sitestats in grouped.iteritems():
540 542 total_cum_sec = 0
541 543 total_self_sec = 0
542 544 total_percent = 0
543 545 for stat in sitestats:
544 546 total_cum_sec += stat.totalseconds()
545 547 total_self_sec += stat.selfseconds()
546 548 total_percent += stat.selfpercent()
547 549
548 550 functiondata.append((fname,
549 551 total_cum_sec,
550 552 total_self_sec,
551 553 total_percent,
552 554 sitestats))
553 555
554 556 # sort by total self sec
555 557 functiondata.sort(reverse=True, key=lambda x: x[2])
556 558
557 559 for function in functiondata:
558 560 if function[3] < 0.05:
559 561 continue
560 562 fp.write(b'%6.2f %9.2f %9.2f %s\n' % (
561 563 function[3], # total percent
562 564 function[1], # total cum sec
563 565 function[2], # total self sec
564 pycompat.sysbytes(function[0]))) # file:function
566 function[0])) # file:function
565 567
566 568 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
567 569 for stat in function[4]:
568 570 # only show line numbers for significant locations (>1% time spent)
569 571 if stat.selfpercent() > 1:
570 572 source = stat.site.getsource(25)
571 573 if sys.version_info.major >= 3 and not isinstance(source, bytes):
572 574 source = pycompat.bytestr(source)
573 575
574 576 stattuple = (stat.selfpercent(), stat.selfseconds(),
575 577 stat.site.lineno, source)
576 578
577 579 fp.write(b'%33.0f%% %6.2f line %d: %s\n' % stattuple)
578 580
579 581 def display_about_method(data, fp, function=None, **kwargs):
580 582 if function is None:
581 583 raise Exception("Invalid function")
582 584
583 585 filename = None
584 586 if ':' in function:
585 587 filename, function = function.split(':')
586 588
587 589 relevant_samples = 0
588 590 parents = {}
589 591 children = {}
590 592
591 593 for sample in data.samples:
592 594 for i, site in enumerate(sample.stack):
593 595 if site.function == function and (not filename
594 596 or site.filename() == filename):
595 597 relevant_samples += 1
596 598 if i != len(sample.stack) - 1:
597 599 parent = sample.stack[i + 1]
598 600 if parent in parents:
599 601 parents[parent] = parents[parent] + 1
600 602 else:
601 603 parents[parent] = 1
602 604
603 605 if site in children:
604 606 children[site] = children[site] + 1
605 607 else:
606 608 children[site] = 1
607 609
608 610 parents = [(parent, count) for parent, count in parents.iteritems()]
609 611 parents.sort(reverse=True, key=lambda x: x[1])
610 612 for parent, count in parents:
611 613 fp.write(b'%6.2f%% %s:%s line %s: %s\n' %
612 614 (count / relevant_samples * 100,
613 615 pycompat.fsencode(parent.filename()),
614 616 pycompat.sysbytes(parent.function),
615 617 parent.lineno,
616 618 pycompat.sysbytes(parent.getsource(50))))
617 619
618 620 stats = SiteStats.buildstats(data.samples)
619 621 stats = [s for s in stats
620 622 if s.site.function == function and
621 623 (not filename or s.site.filename() == filename)]
622 624
623 625 total_cum_sec = 0
624 626 total_self_sec = 0
625 627 total_self_percent = 0
626 628 total_cum_percent = 0
627 629 for stat in stats:
628 630 total_cum_sec += stat.totalseconds()
629 631 total_self_sec += stat.selfseconds()
630 632 total_self_percent += stat.selfpercent()
631 633 total_cum_percent += stat.totalpercent()
632 634
633 635 fp.write(
634 636 b'\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n\n'
635 637 % (
636 638 pycompat.sysbytes(filename or '___'),
637 639 pycompat.sysbytes(function),
638 640 total_cum_sec,
639 641 total_cum_percent,
640 642 total_self_sec,
641 643 total_self_percent
642 644 ))
643 645
644 646 children = [(child, count) for child, count in children.iteritems()]
645 647 children.sort(reverse=True, key=lambda x: x[1])
646 648 for child, count in children:
647 649 fp.write(b' %6.2f%% line %s: %s\n' %
648 650 (count / relevant_samples * 100, child.lineno,
649 651 pycompat.sysbytes(child.getsource(50))))
650 652
651 653 def display_hotpath(data, fp, limit=0.05, **kwargs):
652 654 class HotNode(object):
653 655 def __init__(self, site):
654 656 self.site = site
655 657 self.count = 0
656 658 self.children = {}
657 659
658 660 def add(self, stack, time):
659 661 self.count += time
660 662 site = stack[0]
661 663 child = self.children.get(site)
662 664 if not child:
663 665 child = HotNode(site)
664 666 self.children[site] = child
665 667
666 668 if len(stack) > 1:
667 669 i = 1
668 670 # Skip boiler plate parts of the stack
669 671 while i < len(stack) and stack[i].skipname() in skips:
670 672 i += 1
671 673 if i < len(stack):
672 674 child.add(stack[i:], time)
673 675
674 676 root = HotNode(None)
675 677 lasttime = data.samples[0].time
676 678 for sample in data.samples:
677 679 root.add(sample.stack[::-1], sample.time - lasttime)
678 680 lasttime = sample.time
679 681
680 682 def _write(node, depth, multiple_siblings):
681 683 site = node.site
682 684 visiblechildren = [c for c in node.children.itervalues()
683 685 if c.count >= (limit * root.count)]
684 686 if site:
685 687 indent = depth * 2 - 1
686 688 filename = ''
687 689 function = ''
688 690 if len(node.children) > 0:
689 691 childsite = list(node.children.itervalues())[0].site
690 692 filename = (childsite.filename() + ':').ljust(15)
691 693 function = childsite.function
692 694
693 695 # lots of string formatting
694 696 listpattern = ''.ljust(indent) +\
695 697 ('\\' if multiple_siblings else '|') +\
696 698 ' %4.1f%% %s %s'
697 699 liststring = listpattern % (node.count / root.count * 100,
698 700 filename, function)
699 codepattern = '%' + str(55 - len(liststring)) + 's %s: %s'
701 codepattern = '%' + ('%d' % (55 - len(liststring))) + 's %d: %s'
700 702 codestring = codepattern % ('line', site.lineno, site.getsource(30))
701 703
702 704 finalstring = liststring + codestring
703 705 childrensamples = sum([c.count for c in node.children.itervalues()])
704 706 # Make frames that performed more than 10% of the operation red
705 707 if node.count - childrensamples > (0.1 * root.count):
706 708 finalstring = '\033[91m' + finalstring + '\033[0m'
707 709 # Make frames that didn't actually perform work dark grey
708 710 elif node.count - childrensamples == 0:
709 711 finalstring = '\033[90m' + finalstring + '\033[0m'
710 712 fp.write(finalstring + b'\n')
711 713
712 714 newdepth = depth
713 715 if len(visiblechildren) > 1 or multiple_siblings:
714 716 newdepth += 1
715 717
716 718 visiblechildren.sort(reverse=True, key=lambda x: x.count)
717 719 for child in visiblechildren:
718 720 _write(child, newdepth, len(visiblechildren) > 1)
719 721
720 722 if root.count > 0:
721 723 _write(root, 0, False)
722 724
723 725 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
724 726 if scriptpath is None:
725 727 scriptpath = encoding.environ['HOME'] + '/flamegraph.pl'
726 728 if not os.path.exists(scriptpath):
727 729 fp.write(b'error: missing %s\n' % scriptpath)
728 730 fp.write(b'get it here: https://github.com/brendangregg/FlameGraph\n')
729 731 return
730 732
731 733 fd, path = pycompat.mkstemp()
732 734
733 735 file = open(path, "w+")
734 736
735 737 lines = {}
736 738 for sample in data.samples:
737 739 sites = [s.function for s in sample.stack]
738 740 sites.reverse()
739 741 line = ';'.join(sites)
740 742 if line in lines:
741 743 lines[line] = lines[line] + 1
742 744 else:
743 745 lines[line] = 1
744 746
745 747 for line, count in lines.iteritems():
746 748 file.write("%s %d\n" % (line, count))
747 749
748 750 file.close()
749 751
750 752 if outputfile is None:
751 753 outputfile = '~/flamegraph.svg'
752 754
753 755 os.system("perl ~/flamegraph.pl %s > %s" % (path, outputfile))
754 756 fp.write(b'Written to %s\n' % outputfile)
755 757
756 758 _pathcache = {}
757 759 def simplifypath(path):
758 760 '''Attempt to make the path to a Python module easier to read by
759 761 removing whatever part of the Python search path it was found
760 762 on.'''
761 763
762 764 if path in _pathcache:
763 765 return _pathcache[path]
764 766 hgpath = pycompat.fsencode(encoding.__file__).rsplit(os.sep, 2)[0]
765 767 for p in [hgpath] + sys.path:
766 768 prefix = p + os.sep
767 769 if path.startswith(prefix):
768 770 path = path[len(prefix):]
769 771 break
770 772 _pathcache[path] = path
771 773 return path
772 774
773 775 def write_to_json(data, fp):
774 776 samples = []
775 777
776 778 for sample in data.samples:
777 779 stack = []
778 780
779 781 for frame in sample.stack:
780 stack.append((frame.path, frame.lineno, frame.function))
782 stack.append(
783 (pycompat.sysstr(frame.path),
784 frame.lineno,
785 pycompat.sysstr(frame.function)))
781 786
782 787 samples.append((sample.time, stack))
783 788
784 789 data = json.dumps(samples)
785 790 if not isinstance(data, bytes):
786 791 data = data.encode('utf-8')
787 792
788 793 fp.write(data)
789 794
790 795 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
791 796 samples = []
792 797 laststack = collections.deque()
793 798 lastseen = collections.deque()
794 799
795 800 # The Chrome tracing format allows us to use a compact stack
796 801 # representation to save space. It's fiddly but worth it.
797 802 # We maintain a bijection between stack and ID.
798 803 stack2id = {}
799 804 id2stack = [] # will eventually be rendered
800 805
801 806 def stackid(stack):
802 807 if not stack:
803 808 return
804 809 if stack in stack2id:
805 810 return stack2id[stack]
806 811 parent = stackid(stack[1:])
807 812 myid = len(stack2id)
808 813 stack2id[stack] = myid
809 814 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
810 815 if parent is not None:
811 816 id2stack[-1].update(parent=parent)
812 817 return myid
813 818
814 819 def endswith(a, b):
815 820 return list(a)[-len(b):] == list(b)
816 821
817 822 # The sampling profiler can sample multiple times without
818 823 # advancing the clock, potentially causing the Chrome trace viewer
819 824 # to render single-pixel columns that we cannot zoom in on. We
820 825 # work around this by pretending that zero-duration samples are a
821 826 # millisecond in length.
822 827
823 828 clamp = 0.001
824 829
825 830 # We provide knobs that by default attempt to filter out stack
826 831 # frames that are too noisy:
827 832 #
828 833 # * A few take almost all execution time. These are usually boring
829 834 # setup functions, giving a stack that is deep but uninformative.
830 835 #
831 836 # * Numerous samples take almost no time, but introduce lots of
832 837 # noisy, oft-deep "spines" into a rendered profile.
833 838
834 839 blacklist = set()
835 840 totaltime = data.samples[-1].time - data.samples[0].time
836 841 minthreshold = totaltime * minthreshold
837 842 maxthreshold = max(totaltime * maxthreshold, clamp)
838 843
839 844 def poplast():
840 845 oldsid = stackid(tuple(laststack))
841 846 oldcat, oldfunc = laststack.popleft()
842 847 oldtime, oldidx = lastseen.popleft()
843 848 duration = sample.time - oldtime
844 849 if minthreshold <= duration <= maxthreshold:
845 850 # ensure no zero-duration events
846 851 sampletime = max(oldtime + clamp, sample.time)
847 852 samples.append(dict(ph='E', name=oldfunc, cat=oldcat, sf=oldsid,
848 853 ts=sampletime*1e6, pid=0))
849 854 else:
850 855 blacklist.add(oldidx)
851 856
852 857 # Much fiddling to synthesize correctly(ish) nested begin/end
853 858 # events given only stack snapshots.
854 859
855 860 for sample in data.samples:
856 861 tos = sample.stack[0]
857 862 name = tos.function
858 863 path = simplifypath(tos.path)
859 864 stack = tuple((('%s:%d' % (simplifypath(frame.path), frame.lineno),
860 865 frame.function) for frame in sample.stack))
861 866 qstack = collections.deque(stack)
862 867 if laststack == qstack:
863 868 continue
864 869 while laststack and qstack and laststack[-1] == qstack[-1]:
865 870 laststack.pop()
866 871 qstack.pop()
867 872 while laststack:
868 873 poplast()
869 874 for f in reversed(qstack):
870 875 lastseen.appendleft((sample.time, len(samples)))
871 876 laststack.appendleft(f)
872 877 path, name = f
873 878 sid = stackid(tuple(laststack))
874 879 samples.append(dict(ph='B', name=name, cat=path, ts=sample.time*1e6,
875 880 sf=sid, pid=0))
876 881 laststack = collections.deque(stack)
877 882 while laststack:
878 883 poplast()
879 884 events = [s[1] for s in enumerate(samples) if s[0] not in blacklist]
880 885 frames = collections.OrderedDict((str(k), v)
881 886 for (k,v) in enumerate(id2stack))
882 887 json.dump(dict(traceEvents=events, stackFrames=frames), fp, indent=1)
883 888 fp.write('\n')
884 889
885 890 def printusage():
886 891 print(r"""
887 892 The statprof command line allows you to inspect the last profile's results in
888 893 the following forms:
889 894
890 895 usage:
891 896 hotpath [-l --limit percent]
892 897 Shows a graph of calls with the percent of time each takes.
893 898 Red calls take over 10%% of the total time themselves.
894 899 lines
895 900 Shows the actual sampled lines.
896 901 functions
897 902 Shows the samples grouped by function.
898 903 function [filename:]functionname
899 904 Shows the callers and callees of a particular function.
900 905 flame [-s --script-path] [-o --output-file path]
901 906 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
902 907 Requires that ~/flamegraph.pl exist.
903 908 (Specify alternate script path with --script-path.)""")
904 909
905 910 def main(argv=None):
906 911 if argv is None:
907 912 argv = sys.argv
908 913
909 914 if len(argv) == 1:
910 915 printusage()
911 916 return 0
912 917
913 918 displayargs = {}
914 919
915 920 optstart = 2
916 921 displayargs['function'] = None
917 922 if argv[1] == r'hotpath':
918 923 displayargs['format'] = DisplayFormats.Hotpath
919 924 elif argv[1] == r'lines':
920 925 displayargs['format'] = DisplayFormats.ByLine
921 926 elif argv[1] == r'functions':
922 927 displayargs['format'] = DisplayFormats.ByMethod
923 928 elif argv[1] == r'function':
924 929 displayargs['format'] = DisplayFormats.AboutMethod
925 930 displayargs['function'] = argv[2]
926 931 optstart = 3
927 932 elif argv[1] == r'flame':
928 933 displayargs['format'] = DisplayFormats.FlameGraph
929 934 else:
930 935 printusage()
931 936 return 0
932 937
933 938 # process options
934 939 try:
935 940 opts, args = pycompat.getoptb(sys.argv[optstart:], "hl:f:o:p:",
936 941 ["help", "limit=", "file=", "output-file=", "script-path="])
937 942 except getopt.error as msg:
938 943 print(msg)
939 944 printusage()
940 945 return 2
941 946
942 947 displayargs['limit'] = 0.05
943 948 path = None
944 949 for o, value in opts:
945 950 if o in (r"-l", r"--limit"):
946 951 displayargs['limit'] = float(value)
947 952 elif o in (r"-f", r"--file"):
948 953 path = value
949 954 elif o in (r"-o", r"--output-file"):
950 955 displayargs['outputfile'] = value
951 956 elif o in (r"-p", r"--script-path"):
952 957 displayargs['scriptpath'] = value
953 958 elif o in (r"-h", r"help"):
954 959 printusage()
955 960 return 0
956 961 else:
957 962 assert False, "unhandled option %s" % o
958 963
959 964 if not path:
960 965 print(r'must specify --file to load')
961 966 return 1
962 967
963 968 load_data(path=path)
964 969
965 970 display(**pycompat.strkwargs(displayargs))
966 971
967 972 return 0
968 973
969 974 if __name__ == r"__main__":
970 975 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now