##// END OF EJS Templates
statprof: separate functions and "line", assume 4 digit line numbers...
Kyle Lippincott -
r46639:8b0a3ff5 default
parent child Browse files
Show More
@@ -1,1089 +1,1097 b''
1 1 ## statprof.py
2 2 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
3 3 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
4 4 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
5 5 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
6 6
7 7 ## This library is free software; you can redistribute it and/or
8 8 ## modify it under the terms of the GNU Lesser General Public
9 9 ## License as published by the Free Software Foundation; either
10 10 ## version 2.1 of the License, or (at your option) any later version.
11 11 ##
12 12 ## This library is distributed in the hope that it will be useful,
13 13 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
14 14 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 15 ## Lesser General Public License for more details.
16 16 ##
17 17 ## You should have received a copy of the GNU Lesser General Public
18 18 ## License along with this program; if not, contact:
19 19 ##
20 20 ## Free Software Foundation Voice: +1-617-542-5942
21 21 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
22 22 ## Boston, MA 02111-1307, USA gnu@gnu.org
23 23
24 24 """
25 25 statprof is intended to be a fairly simple statistical profiler for
26 26 python. It was ported directly from a statistical profiler for guile,
27 27 also named statprof, available from guile-lib [0].
28 28
29 29 [0] http://wingolog.org/software/guile-lib/statprof/
30 30
31 31 To start profiling, call statprof.start():
32 32 >>> start()
33 33
34 34 Then run whatever it is that you want to profile, for example:
35 35 >>> import test.pystone; test.pystone.pystones()
36 36
37 37 Then stop the profiling and print out the results:
38 38 >>> stop()
39 39 >>> display()
40 40 % cumulative self
41 41 time seconds seconds name
42 42 26.72 1.40 0.37 pystone.py:79:Proc0
43 43 13.79 0.56 0.19 pystone.py:133:Proc1
44 44 13.79 0.19 0.19 pystone.py:208:Proc8
45 45 10.34 0.16 0.14 pystone.py:229:Func2
46 46 6.90 0.10 0.10 pystone.py:45:__init__
47 47 4.31 0.16 0.06 pystone.py:53:copy
48 48 ...
49 49
50 50 All of the numerical data is statistically approximate. In the
51 51 following column descriptions, and in all of statprof, "time" refers
52 52 to execution time (both user and system), not wall clock time.
53 53
54 54 % time
55 55 The percent of the time spent inside the procedure itself (not
56 56 counting children).
57 57
58 58 cumulative seconds
59 59 The total number of seconds spent in the procedure, including
60 60 children.
61 61
62 62 self seconds
63 63 The total number of seconds spent in the procedure itself (not
64 64 counting children).
65 65
66 66 name
67 67 The name of the procedure.
68 68
69 69 By default statprof keeps the data collected from previous runs. If you
70 70 want to clear the collected data, call reset():
71 71 >>> reset()
72 72
73 73 reset() can also be used to change the sampling frequency from the
74 74 default of 1000 Hz. For example, to tell statprof to sample 50 times a
75 75 second:
76 76 >>> reset(50)
77 77
78 78 This means that statprof will sample the call stack after every 1/50 of
79 79 a second of user + system time spent running on behalf of the python
80 80 process. When your process is idle (for example, blocking in a read(),
81 81 as is the case at the listener), the clock does not advance. For this
82 82 reason statprof is not currently not suitable for profiling io-bound
83 83 operations.
84 84
85 85 The profiler uses the hash of the code object itself to identify the
86 86 procedures, so it won't confuse different procedures with the same name.
87 87 They will show up as two different rows in the output.
88 88
89 89 Right now the profiler is quite simplistic. I cannot provide
90 90 call-graphs or other higher level information. What you see in the
91 91 table is pretty much all there is. Patches are welcome :-)
92 92
93 93
94 94 Threading
95 95 ---------
96 96
97 97 Because signals only get delivered to the main thread in Python,
98 98 statprof only profiles the main thread. However because the time
99 99 reporting function uses per-process timers, the results can be
100 100 significantly off if other threads' work patterns are not similar to the
101 101 main thread's work patterns.
102 102 """
103 103 # no-check-code
104 104 from __future__ import absolute_import, division, print_function
105 105
106 106 import collections
107 107 import contextlib
108 108 import getopt
109 109 import inspect
110 110 import json
111 111 import os
112 112 import signal
113 113 import sys
114 114 import threading
115 115 import time
116 116
117 117 from .pycompat import open
118 118 from . import (
119 119 encoding,
120 120 pycompat,
121 121 )
122 122
123 123 defaultdict = collections.defaultdict
124 124 contextmanager = contextlib.contextmanager
125 125
126 126 __all__ = [b'start', b'stop', b'reset', b'display', b'profile']
127 127
128 128 skips = {
129 129 "util.py:check",
130 130 "extensions.py:closure",
131 131 "color.py:colorcmd",
132 132 "dispatch.py:checkargs",
133 133 "dispatch.py:<lambda>",
134 134 "dispatch.py:_runcatch",
135 135 "dispatch.py:_dispatch",
136 136 "dispatch.py:_runcommand",
137 137 "pager.py:pagecmd",
138 138 "dispatch.py:run",
139 139 "dispatch.py:dispatch",
140 140 "dispatch.py:runcommand",
141 141 "hg.py:<module>",
142 142 "evolve.py:warnobserrors",
143 143 }
144 144
145 145 ###########################################################################
146 146 ## Utils
147 147
148 148
149 149 def clock():
150 150 times = os.times()
151 151 return (times[0] + times[1], times[4])
152 152
153 153
154 154 ###########################################################################
155 155 ## Collection data structures
156 156
157 157
158 158 class ProfileState(object):
159 159 def __init__(self, frequency=None):
160 160 self.reset(frequency)
161 161 self.track = b'cpu'
162 162
163 163 def reset(self, frequency=None):
164 164 # total so far
165 165 self.accumulated_time = (0.0, 0.0)
166 166 # start_time when timer is active
167 167 self.last_start_time = None
168 168 # a float
169 169 if frequency:
170 170 self.sample_interval = 1.0 / frequency
171 171 elif not pycompat.hasattr(self, 'sample_interval'):
172 172 # default to 1000 Hz
173 173 self.sample_interval = 1.0 / 1000.0
174 174 else:
175 175 # leave the frequency as it was
176 176 pass
177 177 self.remaining_prof_time = None
178 178 # for user start/stop nesting
179 179 self.profile_level = 0
180 180
181 181 self.samples = []
182 182
183 183 def accumulate_time(self, stop_time):
184 184 increment = (
185 185 stop_time[0] - self.last_start_time[0],
186 186 stop_time[1] - self.last_start_time[1],
187 187 )
188 188 self.accumulated_time = (
189 189 self.accumulated_time[0] + increment[0],
190 190 self.accumulated_time[1] + increment[1],
191 191 )
192 192
193 193 def seconds_per_sample(self):
194 194 return self.accumulated_time[self.timeidx] / len(self.samples)
195 195
196 196 @property
197 197 def timeidx(self):
198 198 if self.track == b'real':
199 199 return 1
200 200 return 0
201 201
202 202
203 203 state = ProfileState()
204 204
205 205
206 206 class CodeSite(object):
207 207 cache = {}
208 208
209 209 __slots__ = ('path', 'lineno', 'function', 'source')
210 210
211 211 def __init__(self, path, lineno, function):
212 212 assert isinstance(path, bytes)
213 213 self.path = path
214 214 self.lineno = lineno
215 215 assert isinstance(function, bytes)
216 216 self.function = function
217 217 self.source = None
218 218
219 219 def __eq__(self, other):
220 220 try:
221 221 return self.lineno == other.lineno and self.path == other.path
222 222 except:
223 223 return False
224 224
225 225 def __hash__(self):
226 226 return hash((self.lineno, self.path))
227 227
228 228 @classmethod
229 229 def get(cls, path, lineno, function):
230 230 k = (path, lineno)
231 231 try:
232 232 return cls.cache[k]
233 233 except KeyError:
234 234 v = cls(path, lineno, function)
235 235 cls.cache[k] = v
236 236 return v
237 237
238 238 def getsource(self, length):
239 239 if self.source is None:
240 240 lineno = self.lineno - 1
241 241 try:
242 242 with open(self.path, b'rb') as fp:
243 243 for i, line in enumerate(fp):
244 244 if i == lineno:
245 245 self.source = line.strip()
246 246 break
247 247 except:
248 248 pass
249 249 if self.source is None:
250 250 self.source = b''
251 251
252 252 source = self.source
253 253 if len(source) > length:
254 254 source = source[: (length - 3)] + b"..."
255 255 return source
256 256
257 257 def filename(self):
258 258 return os.path.basename(self.path)
259 259
260 260 def skipname(self):
261 261 return '%s:%s' % (self.filename(), self.function)
262 262
263 263
264 264 class Sample(object):
265 265 __slots__ = ('stack', 'time')
266 266
267 267 def __init__(self, stack, time):
268 268 self.stack = stack
269 269 self.time = time
270 270
271 271 @classmethod
272 272 def from_frame(cls, frame, time):
273 273 stack = []
274 274
275 275 while frame:
276 276 stack.append(
277 277 CodeSite.get(
278 278 pycompat.sysbytes(frame.f_code.co_filename),
279 279 frame.f_lineno,
280 280 pycompat.sysbytes(frame.f_code.co_name),
281 281 )
282 282 )
283 283 frame = frame.f_back
284 284
285 285 return Sample(stack, time)
286 286
287 287
288 288 ###########################################################################
289 289 ## SIGPROF handler
290 290
291 291
292 292 def profile_signal_handler(signum, frame):
293 293 if state.profile_level > 0:
294 294 now = clock()
295 295 state.accumulate_time(now)
296 296
297 297 timestamp = state.accumulated_time[state.timeidx]
298 298 state.samples.append(Sample.from_frame(frame, timestamp))
299 299
300 300 signal.setitimer(signal.ITIMER_PROF, state.sample_interval, 0.0)
301 301 state.last_start_time = now
302 302
303 303
304 304 stopthread = threading.Event()
305 305
306 306
307 307 def samplerthread(tid):
308 308 while not stopthread.is_set():
309 309 now = clock()
310 310 state.accumulate_time(now)
311 311
312 312 frame = sys._current_frames()[tid]
313 313
314 314 timestamp = state.accumulated_time[state.timeidx]
315 315 state.samples.append(Sample.from_frame(frame, timestamp))
316 316
317 317 state.last_start_time = now
318 318 time.sleep(state.sample_interval)
319 319
320 320 stopthread.clear()
321 321
322 322
323 323 ###########################################################################
324 324 ## Profiling API
325 325
326 326
327 327 def is_active():
328 328 return state.profile_level > 0
329 329
330 330
331 331 lastmechanism = None
332 332
333 333
334 334 def start(mechanism=b'thread', track=b'cpu'):
335 335 '''Install the profiling signal handler, and start profiling.'''
336 336 state.track = track # note: nesting different mode won't work
337 337 state.profile_level += 1
338 338 if state.profile_level == 1:
339 339 state.last_start_time = clock()
340 340 rpt = state.remaining_prof_time
341 341 state.remaining_prof_time = None
342 342
343 343 global lastmechanism
344 344 lastmechanism = mechanism
345 345
346 346 if mechanism == b'signal':
347 347 signal.signal(signal.SIGPROF, profile_signal_handler)
348 348 signal.setitimer(
349 349 signal.ITIMER_PROF, rpt or state.sample_interval, 0.0
350 350 )
351 351 elif mechanism == b'thread':
352 352 frame = inspect.currentframe()
353 353 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
354 354 state.thread = threading.Thread(
355 355 target=samplerthread, args=(tid,), name="samplerthread"
356 356 )
357 357 state.thread.start()
358 358
359 359
360 360 def stop():
361 361 '''Stop profiling, and uninstall the profiling signal handler.'''
362 362 state.profile_level -= 1
363 363 if state.profile_level == 0:
364 364 if lastmechanism == b'signal':
365 365 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
366 366 signal.signal(signal.SIGPROF, signal.SIG_IGN)
367 367 state.remaining_prof_time = rpt[0]
368 368 elif lastmechanism == b'thread':
369 369 stopthread.set()
370 370 state.thread.join()
371 371
372 372 state.accumulate_time(clock())
373 373 state.last_start_time = None
374 374 statprofpath = encoding.environ.get(b'STATPROF_DEST')
375 375 if statprofpath:
376 376 save_data(statprofpath)
377 377
378 378 return state
379 379
380 380
381 381 def save_data(path):
382 382 with open(path, b'w+') as file:
383 383 file.write(b"%f %f\n" % state.accumulated_time)
384 384 for sample in state.samples:
385 385 time = sample.time
386 386 stack = sample.stack
387 387 sites = [
388 388 b'\1'.join([s.path, b'%d' % s.lineno, s.function])
389 389 for s in stack
390 390 ]
391 391 file.write(b"%d\0%s\n" % (time, b'\0'.join(sites)))
392 392
393 393
394 394 def load_data(path):
395 395 lines = open(path, b'rb').read().splitlines()
396 396
397 397 state.accumulated_time = [float(value) for value in lines[0].split()]
398 398 state.samples = []
399 399 for line in lines[1:]:
400 400 parts = line.split(b'\0')
401 401 time = float(parts[0])
402 402 rawsites = parts[1:]
403 403 sites = []
404 404 for rawsite in rawsites:
405 405 siteparts = rawsite.split(b'\1')
406 406 sites.append(
407 407 CodeSite.get(siteparts[0], int(siteparts[1]), siteparts[2])
408 408 )
409 409
410 410 state.samples.append(Sample(sites, time))
411 411
412 412
413 413 def reset(frequency=None):
414 414 """Clear out the state of the profiler. Do not call while the
415 415 profiler is running.
416 416
417 417 The optional frequency argument specifies the number of samples to
418 418 collect per second."""
419 419 assert state.profile_level == 0, b"Can't reset() while statprof is running"
420 420 CodeSite.cache.clear()
421 421 state.reset(frequency)
422 422
423 423
424 424 @contextmanager
425 425 def profile():
426 426 start()
427 427 try:
428 428 yield
429 429 finally:
430 430 stop()
431 431 display()
432 432
433 433
434 434 ###########################################################################
435 435 ## Reporting API
436 436
437 437
438 438 class SiteStats(object):
439 439 def __init__(self, site):
440 440 self.site = site
441 441 self.selfcount = 0
442 442 self.totalcount = 0
443 443
444 444 def addself(self):
445 445 self.selfcount += 1
446 446
447 447 def addtotal(self):
448 448 self.totalcount += 1
449 449
450 450 def selfpercent(self):
451 451 return self.selfcount / len(state.samples) * 100
452 452
453 453 def totalpercent(self):
454 454 return self.totalcount / len(state.samples) * 100
455 455
456 456 def selfseconds(self):
457 457 return self.selfcount * state.seconds_per_sample()
458 458
459 459 def totalseconds(self):
460 460 return self.totalcount * state.seconds_per_sample()
461 461
462 462 @classmethod
463 463 def buildstats(cls, samples):
464 464 stats = {}
465 465
466 466 for sample in samples:
467 467 for i, site in enumerate(sample.stack):
468 468 sitestat = stats.get(site)
469 469 if not sitestat:
470 470 sitestat = SiteStats(site)
471 471 stats[site] = sitestat
472 472
473 473 sitestat.addtotal()
474 474
475 475 if i == 0:
476 476 sitestat.addself()
477 477
478 478 return [s for s in pycompat.itervalues(stats)]
479 479
480 480
481 481 class DisplayFormats:
482 482 ByLine = 0
483 483 ByMethod = 1
484 484 AboutMethod = 2
485 485 Hotpath = 3
486 486 FlameGraph = 4
487 487 Json = 5
488 488 Chrome = 6
489 489
490 490
491 491 def display(fp=None, format=3, data=None, **kwargs):
492 492 '''Print statistics, either to stdout or the given file object.'''
493 493 if data is None:
494 494 data = state
495 495
496 496 if fp is None:
497 497 import sys
498 498
499 499 fp = sys.stdout
500 500 if len(data.samples) == 0:
501 501 fp.write(b'No samples recorded.\n')
502 502 return
503 503
504 504 if format == DisplayFormats.ByLine:
505 505 display_by_line(data, fp)
506 506 elif format == DisplayFormats.ByMethod:
507 507 display_by_method(data, fp)
508 508 elif format == DisplayFormats.AboutMethod:
509 509 display_about_method(data, fp, **kwargs)
510 510 elif format == DisplayFormats.Hotpath:
511 511 display_hotpath(data, fp, **kwargs)
512 512 elif format == DisplayFormats.FlameGraph:
513 513 write_to_flame(data, fp, **kwargs)
514 514 elif format == DisplayFormats.Json:
515 515 write_to_json(data, fp)
516 516 elif format == DisplayFormats.Chrome:
517 517 write_to_chrome(data, fp, **kwargs)
518 518 else:
519 519 raise Exception(b"Invalid display format")
520 520
521 521 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
522 522 fp.write(b'---\n')
523 523 fp.write(b'Sample count: %d\n' % len(data.samples))
524 524 fp.write(b'Total time: %f seconds (%f wall)\n' % data.accumulated_time)
525 525
526 526
527 527 def display_by_line(data, fp):
528 528 """Print the profiler data with each sample line represented
529 529 as one row in a table. Sorted by self-time per line."""
530 530 stats = SiteStats.buildstats(data.samples)
531 531 stats.sort(reverse=True, key=lambda x: x.selfseconds())
532 532
533 533 fp.write(
534 534 b'%5.5s %10.10s %7.7s %-8.8s\n'
535 535 % (b'% ', b'cumulative', b'self', b'')
536 536 )
537 537 fp.write(
538 538 b'%5.5s %9.9s %8.8s %-8.8s\n'
539 539 % (b"time", b"seconds", b"seconds", b"name")
540 540 )
541 541
542 542 for stat in stats:
543 543 site = stat.site
544 544 sitelabel = b'%s:%d:%s' % (site.filename(), site.lineno, site.function)
545 545 fp.write(
546 546 b'%6.2f %9.2f %9.2f %s\n'
547 547 % (
548 548 stat.selfpercent(),
549 549 stat.totalseconds(),
550 550 stat.selfseconds(),
551 551 sitelabel,
552 552 )
553 553 )
554 554
555 555
556 556 def display_by_method(data, fp):
557 557 """Print the profiler data with each sample function represented
558 558 as one row in a table. Important lines within that function are
559 559 output as nested rows. Sorted by self-time per line."""
560 560 fp.write(
561 561 b'%5.5s %10.10s %7.7s %-8.8s\n'
562 562 % (b'% ', b'cumulative', b'self', b'')
563 563 )
564 564 fp.write(
565 565 b'%5.5s %9.9s %8.8s %-8.8s\n'
566 566 % (b"time", b"seconds", b"seconds", b"name")
567 567 )
568 568
569 569 stats = SiteStats.buildstats(data.samples)
570 570
571 571 grouped = defaultdict(list)
572 572 for stat in stats:
573 573 grouped[stat.site.filename() + b":" + stat.site.function].append(stat)
574 574
575 575 # compute sums for each function
576 576 functiondata = []
577 577 for fname, sitestats in pycompat.iteritems(grouped):
578 578 total_cum_sec = 0
579 579 total_self_sec = 0
580 580 total_percent = 0
581 581 for stat in sitestats:
582 582 total_cum_sec += stat.totalseconds()
583 583 total_self_sec += stat.selfseconds()
584 584 total_percent += stat.selfpercent()
585 585
586 586 functiondata.append(
587 587 (fname, total_cum_sec, total_self_sec, total_percent, sitestats)
588 588 )
589 589
590 590 # sort by total self sec
591 591 functiondata.sort(reverse=True, key=lambda x: x[2])
592 592
593 593 for function in functiondata:
594 594 if function[3] < 0.05:
595 595 continue
596 596 fp.write(
597 597 b'%6.2f %9.2f %9.2f %s\n'
598 598 % (
599 599 function[3], # total percent
600 600 function[1], # total cum sec
601 601 function[2], # total self sec
602 602 function[0],
603 603 )
604 604 ) # file:function
605 605
606 606 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
607 607 for stat in function[4]:
608 608 # only show line numbers for significant locations (>1% time spent)
609 609 if stat.selfpercent() > 1:
610 610 source = stat.site.getsource(25)
611 611 if sys.version_info.major >= 3 and not isinstance(
612 612 source, bytes
613 613 ):
614 614 source = pycompat.bytestr(source)
615 615
616 616 stattuple = (
617 617 stat.selfpercent(),
618 618 stat.selfseconds(),
619 619 stat.site.lineno,
620 620 source,
621 621 )
622 622
623 623 fp.write(b'%33.0f%% %6.2f line %d: %s\n' % stattuple)
624 624
625 625
626 626 def display_about_method(data, fp, function=None, **kwargs):
627 627 if function is None:
628 628 raise Exception(b"Invalid function")
629 629
630 630 filename = None
631 631 if b':' in function:
632 632 filename, function = function.split(b':')
633 633
634 634 relevant_samples = 0
635 635 parents = {}
636 636 children = {}
637 637
638 638 for sample in data.samples:
639 639 for i, site in enumerate(sample.stack):
640 640 if site.function == function and (
641 641 not filename or site.filename() == filename
642 642 ):
643 643 relevant_samples += 1
644 644 if i != len(sample.stack) - 1:
645 645 parent = sample.stack[i + 1]
646 646 if parent in parents:
647 647 parents[parent] = parents[parent] + 1
648 648 else:
649 649 parents[parent] = 1
650 650
651 651 if site in children:
652 652 children[site] = children[site] + 1
653 653 else:
654 654 children[site] = 1
655 655
656 656 parents = [(parent, count) for parent, count in pycompat.iteritems(parents)]
657 657 parents.sort(reverse=True, key=lambda x: x[1])
658 658 for parent, count in parents:
659 659 fp.write(
660 660 b'%6.2f%% %s:%s line %s: %s\n'
661 661 % (
662 662 count / relevant_samples * 100,
663 663 pycompat.fsencode(parent.filename()),
664 664 pycompat.sysbytes(parent.function),
665 665 parent.lineno,
666 666 pycompat.sysbytes(parent.getsource(50)),
667 667 )
668 668 )
669 669
670 670 stats = SiteStats.buildstats(data.samples)
671 671 stats = [
672 672 s
673 673 for s in stats
674 674 if s.site.function == function
675 675 and (not filename or s.site.filename() == filename)
676 676 ]
677 677
678 678 total_cum_sec = 0
679 679 total_self_sec = 0
680 680 total_self_percent = 0
681 681 total_cum_percent = 0
682 682 for stat in stats:
683 683 total_cum_sec += stat.totalseconds()
684 684 total_self_sec += stat.selfseconds()
685 685 total_self_percent += stat.selfpercent()
686 686 total_cum_percent += stat.totalpercent()
687 687
688 688 fp.write(
689 689 b'\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n\n'
690 690 % (
691 691 pycompat.sysbytes(filename or b'___'),
692 692 pycompat.sysbytes(function),
693 693 total_cum_sec,
694 694 total_cum_percent,
695 695 total_self_sec,
696 696 total_self_percent,
697 697 )
698 698 )
699 699
700 700 children = [(child, count) for child, count in pycompat.iteritems(children)]
701 701 children.sort(reverse=True, key=lambda x: x[1])
702 702 for child, count in children:
703 703 fp.write(
704 704 b' %6.2f%% line %s: %s\n'
705 705 % (
706 706 count / relevant_samples * 100,
707 707 child.lineno,
708 708 pycompat.sysbytes(child.getsource(50)),
709 709 )
710 710 )
711 711
712 712
713 713 def display_hotpath(data, fp, limit=0.05, **kwargs):
714 714 class HotNode(object):
715 715 def __init__(self, site):
716 716 self.site = site
717 717 self.count = 0
718 718 self.children = {}
719 719
720 720 def add(self, stack, time):
721 721 self.count += time
722 722 site = stack[0]
723 723 child = self.children.get(site)
724 724 if not child:
725 725 child = HotNode(site)
726 726 self.children[site] = child
727 727
728 728 if len(stack) > 1:
729 729 i = 1
730 730 # Skip boiler plate parts of the stack
731 731 while i < len(stack) and stack[i].skipname() in skips:
732 732 i += 1
733 733 if i < len(stack):
734 734 child.add(stack[i:], time)
735 735 else:
736 736 # Normally this is done by the .add() calls
737 737 child.count += time
738 738
739 739 root = HotNode(None)
740 740 lasttime = data.samples[0].time
741 741 for sample in data.samples:
742 742 root.add(sample.stack[::-1], sample.time - lasttime)
743 743 lasttime = sample.time
744 744 showtime = kwargs.get('showtime', True)
745 745
746 746 def _write(node, depth, multiple_siblings):
747 747 site = node.site
748 748 visiblechildren = [
749 749 c
750 750 for c in pycompat.itervalues(node.children)
751 751 if c.count >= (limit * root.count)
752 752 ]
753 753 if site:
754 754 indent = depth * 2 - 1
755 755 filename = (site.filename() + b':').ljust(15)
756 756 function = site.function
757 757
758 758 # lots of string formatting
759 759 listpattern = (
760 760 b''.ljust(indent)
761 761 + (b'\\' if multiple_siblings else b'|')
762 762 + b' %4.1f%%'
763 763 + (b' %5.2fs' % node.count if showtime else b'')
764 764 + b' %s %s'
765 765 )
766 766 liststring = listpattern % (
767 767 node.count / root.count * 100,
768 768 filename,
769 769 function,
770 770 )
771 codepattern = b'%' + (b'%d' % (55 - len(liststring))) + b's %d: %s'
771 # 4 to account for the word 'line'
772 spacing_len = max(4, 55 - len(liststring))
773 prefix = b''
774 if spacing_len == 4:
775 prefix = b', '
776
777 codepattern = b'%s%s %d: %s%s'
772 778 codestring = codepattern % (
773 b'line',
779 prefix,
780 b'line'.rjust(spacing_len),
774 781 site.lineno,
782 b''.ljust(max(0, 4 - len(str(site.lineno)))),
775 783 site.getsource(30),
776 784 )
777 785
778 786 finalstring = liststring + codestring
779 787 childrensamples = sum(
780 788 [c.count for c in pycompat.itervalues(node.children)]
781 789 )
782 790 # Make frames that performed more than 10% of the operation red
783 791 if node.count - childrensamples > (0.1 * root.count):
784 792 finalstring = b'\033[91m' + finalstring + b'\033[0m'
785 793 # Make frames that didn't actually perform work dark grey
786 794 elif node.count - childrensamples == 0:
787 795 finalstring = b'\033[90m' + finalstring + b'\033[0m'
788 796 fp.write(finalstring + b'\n')
789 797
790 798 newdepth = depth
791 799 if len(visiblechildren) > 1 or multiple_siblings:
792 800 newdepth += 1
793 801
794 802 visiblechildren.sort(reverse=True, key=lambda x: x.count)
795 803 for child in visiblechildren:
796 804 _write(child, newdepth, len(visiblechildren) > 1)
797 805
798 806 if root.count > 0:
799 807 _write(root, 0, False)
800 808
801 809
802 810 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
803 811 if scriptpath is None:
804 812 scriptpath = encoding.environ[b'HOME'] + b'/flamegraph.pl'
805 813 if not os.path.exists(scriptpath):
806 814 fp.write(b'error: missing %s\n' % scriptpath)
807 815 fp.write(b'get it here: https://github.com/brendangregg/FlameGraph\n')
808 816 return
809 817
810 818 lines = {}
811 819 for sample in data.samples:
812 820 sites = [s.function for s in sample.stack]
813 821 sites.reverse()
814 822 line = b';'.join(sites)
815 823 if line in lines:
816 824 lines[line] = lines[line] + 1
817 825 else:
818 826 lines[line] = 1
819 827
820 828 fd, path = pycompat.mkstemp()
821 829
822 830 with open(path, b"w+") as file:
823 831 for line, count in pycompat.iteritems(lines):
824 832 file.write(b"%s %d\n" % (line, count))
825 833
826 834 if outputfile is None:
827 835 outputfile = b'~/flamegraph.svg'
828 836
829 837 os.system(b"perl ~/flamegraph.pl %s > %s" % (path, outputfile))
830 838 fp.write(b'Written to %s\n' % outputfile)
831 839
832 840
833 841 _pathcache = {}
834 842
835 843
836 844 def simplifypath(path):
837 845 """Attempt to make the path to a Python module easier to read by
838 846 removing whatever part of the Python search path it was found
839 847 on."""
840 848
841 849 if path in _pathcache:
842 850 return _pathcache[path]
843 851 hgpath = encoding.__file__.rsplit(os.sep, 2)[0]
844 852 for p in [hgpath] + sys.path:
845 853 prefix = p + os.sep
846 854 if path.startswith(prefix):
847 855 path = path[len(prefix) :]
848 856 break
849 857 _pathcache[path] = path
850 858 return path
851 859
852 860
853 861 def write_to_json(data, fp):
854 862 samples = []
855 863
856 864 for sample in data.samples:
857 865 stack = []
858 866
859 867 for frame in sample.stack:
860 868 stack.append(
861 869 (
862 870 pycompat.sysstr(frame.path),
863 871 frame.lineno,
864 872 pycompat.sysstr(frame.function),
865 873 )
866 874 )
867 875
868 876 samples.append((sample.time, stack))
869 877
870 878 data = json.dumps(samples)
871 879 if not isinstance(data, bytes):
872 880 data = data.encode('utf-8')
873 881
874 882 fp.write(data)
875 883
876 884
877 885 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
878 886 samples = []
879 887 laststack = collections.deque()
880 888 lastseen = collections.deque()
881 889
882 890 # The Chrome tracing format allows us to use a compact stack
883 891 # representation to save space. It's fiddly but worth it.
884 892 # We maintain a bijection between stack and ID.
885 893 stack2id = {}
886 894 id2stack = [] # will eventually be rendered
887 895
888 896 def stackid(stack):
889 897 if not stack:
890 898 return
891 899 if stack in stack2id:
892 900 return stack2id[stack]
893 901 parent = stackid(stack[1:])
894 902 myid = len(stack2id)
895 903 stack2id[stack] = myid
896 904 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
897 905 if parent is not None:
898 906 id2stack[-1].update(parent=parent)
899 907 return myid
900 908
901 909 # The sampling profiler can sample multiple times without
902 910 # advancing the clock, potentially causing the Chrome trace viewer
903 911 # to render single-pixel columns that we cannot zoom in on. We
904 912 # work around this by pretending that zero-duration samples are a
905 913 # millisecond in length.
906 914
907 915 clamp = 0.001
908 916
909 917 # We provide knobs that by default attempt to filter out stack
910 918 # frames that are too noisy:
911 919 #
912 920 # * A few take almost all execution time. These are usually boring
913 921 # setup functions, giving a stack that is deep but uninformative.
914 922 #
915 923 # * Numerous samples take almost no time, but introduce lots of
916 924 # noisy, oft-deep "spines" into a rendered profile.
917 925
918 926 blacklist = set()
919 927 totaltime = data.samples[-1].time - data.samples[0].time
920 928 minthreshold = totaltime * minthreshold
921 929 maxthreshold = max(totaltime * maxthreshold, clamp)
922 930
923 931 def poplast():
924 932 oldsid = stackid(tuple(laststack))
925 933 oldcat, oldfunc = laststack.popleft()
926 934 oldtime, oldidx = lastseen.popleft()
927 935 duration = sample.time - oldtime
928 936 if minthreshold <= duration <= maxthreshold:
929 937 # ensure no zero-duration events
930 938 sampletime = max(oldtime + clamp, sample.time)
931 939 samples.append(
932 940 dict(
933 941 ph='E',
934 942 name=oldfunc,
935 943 cat=oldcat,
936 944 sf=oldsid,
937 945 ts=sampletime * 1e6,
938 946 pid=0,
939 947 )
940 948 )
941 949 else:
942 950 blacklist.add(oldidx)
943 951
944 952 # Much fiddling to synthesize correctly(ish) nested begin/end
945 953 # events given only stack snapshots.
946 954
947 955 for sample in data.samples:
948 956 stack = tuple(
949 957 (
950 958 (
951 959 '%s:%d'
952 960 % (simplifypath(pycompat.sysstr(frame.path)), frame.lineno),
953 961 pycompat.sysstr(frame.function),
954 962 )
955 963 for frame in sample.stack
956 964 )
957 965 )
958 966 qstack = collections.deque(stack)
959 967 if laststack == qstack:
960 968 continue
961 969 while laststack and qstack and laststack[-1] == qstack[-1]:
962 970 laststack.pop()
963 971 qstack.pop()
964 972 while laststack:
965 973 poplast()
966 974 for f in reversed(qstack):
967 975 lastseen.appendleft((sample.time, len(samples)))
968 976 laststack.appendleft(f)
969 977 path, name = f
970 978 sid = stackid(tuple(laststack))
971 979 samples.append(
972 980 dict(
973 981 ph='B',
974 982 name=name,
975 983 cat=path,
976 984 ts=sample.time * 1e6,
977 985 sf=sid,
978 986 pid=0,
979 987 )
980 988 )
981 989 laststack = collections.deque(stack)
982 990 while laststack:
983 991 poplast()
984 992 events = [
985 993 sample for idx, sample in enumerate(samples) if idx not in blacklist
986 994 ]
987 995 frames = collections.OrderedDict(
988 996 (str(k), v) for (k, v) in enumerate(id2stack)
989 997 )
990 998 data = json.dumps(dict(traceEvents=events, stackFrames=frames), indent=1)
991 999 if not isinstance(data, bytes):
992 1000 data = data.encode('utf-8')
993 1001 fp.write(data)
994 1002 fp.write(b'\n')
995 1003
996 1004
997 1005 def printusage():
998 1006 print(
999 1007 r"""
1000 1008 The statprof command line allows you to inspect the last profile's results in
1001 1009 the following forms:
1002 1010
1003 1011 usage:
1004 1012 hotpath [-l --limit percent]
1005 1013 Shows a graph of calls with the percent of time each takes.
1006 1014 Red calls take over 10%% of the total time themselves.
1007 1015 lines
1008 1016 Shows the actual sampled lines.
1009 1017 functions
1010 1018 Shows the samples grouped by function.
1011 1019 function [filename:]functionname
1012 1020 Shows the callers and callees of a particular function.
1013 1021 flame [-s --script-path] [-o --output-file path]
1014 1022 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
1015 1023 Requires that ~/flamegraph.pl exist.
1016 1024 (Specify alternate script path with --script-path.)"""
1017 1025 )
1018 1026
1019 1027
1020 1028 def main(argv=None):
1021 1029 if argv is None:
1022 1030 argv = sys.argv
1023 1031
1024 1032 if len(argv) == 1:
1025 1033 printusage()
1026 1034 return 0
1027 1035
1028 1036 displayargs = {}
1029 1037
1030 1038 optstart = 2
1031 1039 displayargs[b'function'] = None
1032 1040 if argv[1] == 'hotpath':
1033 1041 displayargs[b'format'] = DisplayFormats.Hotpath
1034 1042 elif argv[1] == 'lines':
1035 1043 displayargs[b'format'] = DisplayFormats.ByLine
1036 1044 elif argv[1] == 'functions':
1037 1045 displayargs[b'format'] = DisplayFormats.ByMethod
1038 1046 elif argv[1] == 'function':
1039 1047 displayargs[b'format'] = DisplayFormats.AboutMethod
1040 1048 displayargs[b'function'] = argv[2]
1041 1049 optstart = 3
1042 1050 elif argv[1] == 'flame':
1043 1051 displayargs[b'format'] = DisplayFormats.FlameGraph
1044 1052 else:
1045 1053 printusage()
1046 1054 return 0
1047 1055
1048 1056 # process options
1049 1057 try:
1050 1058 opts, args = pycompat.getoptb(
1051 1059 sys.argv[optstart:],
1052 1060 b"hl:f:o:p:",
1053 1061 [b"help", b"limit=", b"file=", b"output-file=", b"script-path="],
1054 1062 )
1055 1063 except getopt.error as msg:
1056 1064 print(msg)
1057 1065 printusage()
1058 1066 return 2
1059 1067
1060 1068 displayargs[b'limit'] = 0.05
1061 1069 path = None
1062 1070 for o, value in opts:
1063 1071 if o in ("-l", "--limit"):
1064 1072 displayargs[b'limit'] = float(value)
1065 1073 elif o in ("-f", "--file"):
1066 1074 path = value
1067 1075 elif o in ("-o", "--output-file"):
1068 1076 displayargs[b'outputfile'] = value
1069 1077 elif o in ("-p", "--script-path"):
1070 1078 displayargs[b'scriptpath'] = value
1071 1079 elif o in ("-h", "help"):
1072 1080 printusage()
1073 1081 return 0
1074 1082 else:
1075 1083 assert False, b"unhandled option %s" % o
1076 1084
1077 1085 if not path:
1078 1086 print('must specify --file to load')
1079 1087 return 1
1080 1088
1081 1089 load_data(path=path)
1082 1090
1083 1091 display(**pycompat.strkwargs(displayargs))
1084 1092
1085 1093 return 0
1086 1094
1087 1095
1088 1096 if __name__ == "__main__":
1089 1097 sys.exit(main())
@@ -1,169 +1,169 b''
1 1 test --time
2 2
3 3 $ hg --time help -q help 2>&1 | grep time > /dev/null
4 4 $ hg init a
5 5 $ cd a
6 6
7 7 Function to check that statprof ran
8 8 $ statprofran () {
9 9 > egrep 'Sample count:|No samples recorded' > /dev/null
10 10 > }
11 11
12 12 test --profile
13 13
14 14 $ hg st --profile 2>&1 | statprofran
15 15
16 16 Abreviated version
17 17
18 18 $ hg st --prof 2>&1 | statprofran
19 19
20 20 In alias
21 21
22 22 $ hg --config "alias.profst=status --profile" profst 2>&1 | statprofran
23 23
24 24 #if lsprof
25 25
26 26 $ prof='hg --config profiling.type=ls --profile'
27 27
28 28 $ $prof st 2>../out
29 29 $ grep CallCount ../out > /dev/null || cat ../out
30 30
31 31 $ $prof --config profiling.output=../out st
32 32 $ grep CallCount ../out > /dev/null || cat ../out
33 33
34 34 $ $prof --config profiling.output=blackbox --config extensions.blackbox= st
35 35 $ grep CallCount .hg/blackbox.log > /dev/null || cat .hg/blackbox.log
36 36
37 37 $ $prof --config profiling.format=text st 2>../out
38 38 $ grep CallCount ../out > /dev/null || cat ../out
39 39
40 40 $ echo "[profiling]" >> $HGRCPATH
41 41 $ echo "format=kcachegrind" >> $HGRCPATH
42 42
43 43 $ $prof st 2>../out
44 44 $ grep 'events: Ticks' ../out > /dev/null || cat ../out
45 45
46 46 $ $prof --config profiling.output=../out st
47 47 $ grep 'events: Ticks' ../out > /dev/null || cat ../out
48 48
49 49 #endif
50 50
51 51 #if lsprof serve
52 52
53 53 Profiling of HTTP requests works
54 54
55 55 $ $prof --config profiling.format=text --config profiling.output=../profile.log serve -d -p $HGPORT --pid-file ../hg.pid -A ../access.log
56 56 $ cat ../hg.pid >> $DAEMON_PIDS
57 57 $ hg -q clone -U http://localhost:$HGPORT ../clone
58 58
59 59 A single profile is logged because file logging doesn't append
60 60 $ grep CallCount ../profile.log | wc -l
61 61 \s*1 (re)
62 62
63 63 #endif
64 64
65 65 Install an extension that can sleep and guarantee a profiler has time to run
66 66
67 $ cat >> sleepext.py << EOF
67 $ cat >> sleepext_with_a_long_filename.py << EOF
68 68 > import time
69 69 > from mercurial import registrar
70 70 > cmdtable = {}
71 71 > command = registrar.command(cmdtable)
72 72 > @command(b'sleep', [], b'hg sleep')
73 > def sleep(ui, *args, **kwargs):
73 > def sleep_for_at_least_one_stat_cycle(ui, *args, **kwargs):
74 74 > time.sleep(0.1)
75 75 > EOF
76 76
77 77 $ cat >> $HGRCPATH << EOF
78 78 > [extensions]
79 > sleep = `pwd`/sleepext.py
79 > sleep = `pwd`/sleepext_with_a_long_filename.py
80 80 > EOF
81 81
82 82 statistical profiler works
83 83
84 84 $ hg --profile sleep 2>../out
85 85 $ cat ../out | statprofran
86 86
87 87 Various statprof formatters work
88 88
89 89 $ hg --profile --config profiling.statformat=byline sleep 2>../out || cat ../out
90 90 $ grep -v _path_stat ../out | head -n 3
91 91 % cumulative self
92 92 time seconds seconds name
93 * sleepext.py:*:sleep (glob)
93 * sleepext_with_a_long_filename.py:*:sleep_for_at_least_one_stat_cycle (glob)
94 94 $ cat ../out | statprofran
95 95
96 96 $ hg --profile --config profiling.statformat=bymethod sleep 2>../out || cat ../out
97 97 $ head -n 1 ../out
98 98 % cumulative self
99 99 $ cat ../out | statprofran
100 100
101 101 $ hg --profile --config profiling.statformat=hotpath sleep 2>../out || cat ../out
102 102 $ cat ../out | statprofran
103 $ grep sleepext.py ../out
104 .* [0-9.]+% [0-9.]+s sleepext.py:\s*sleep line 7: time\.sleep.* (re)
103 $ grep sleepext_with_a_long_filename.py ../out
104 .* [0-9.]+% [0-9.]+s sleepext_with_a_long_filename.py:\s*sleep_for_at_least_one_stat_cycle, line 7: time\.sleep.* (re)
105 105
106 106 $ hg --profile --config profiling.statformat=json sleep 2>../out || cat ../out
107 107 $ cat ../out
108 108 \[\[-?\d+.* (re)
109 109
110 110 statprof can be used as a standalone module
111 111
112 112 $ "$PYTHON" -m mercurial.statprof hotpath
113 113 must specify --file to load
114 114 [1]
115 115
116 116 $ cd ..
117 117
118 118 #if no-chg
119 119 profiler extension could be loaded before other extensions
120 120
121 121 $ cat > fooprof.py <<EOF
122 122 > from __future__ import absolute_import
123 123 > import contextlib
124 124 > import sys
125 125 > @contextlib.contextmanager
126 126 > def profile(ui, fp):
127 127 > print('fooprof: start profile')
128 128 > sys.stdout.flush()
129 129 > yield
130 130 > print('fooprof: end profile')
131 131 > sys.stdout.flush()
132 132 > def extsetup(ui):
133 133 > ui.write(b'fooprof: loaded\n')
134 134 > EOF
135 135
136 136 $ cat > otherextension.py <<EOF
137 137 > from __future__ import absolute_import
138 138 > def extsetup(ui):
139 139 > ui.write(b'otherextension: loaded\n')
140 140 > EOF
141 141
142 142 $ hg init b
143 143 $ cd b
144 144 $ cat >> .hg/hgrc <<EOF
145 145 > [extensions]
146 146 > other = $TESTTMP/otherextension.py
147 147 > fooprof = $TESTTMP/fooprof.py
148 148 > EOF
149 149
150 150 $ hg root
151 151 otherextension: loaded
152 152 fooprof: loaded
153 153 $TESTTMP/b
154 154 $ HGPROF=fooprof hg root --profile
155 155 fooprof: loaded
156 156 fooprof: start profile
157 157 otherextension: loaded
158 158 $TESTTMP/b
159 159 fooprof: end profile
160 160
161 161 $ HGPROF=other hg root --profile 2>&1 | head -n 2
162 162 otherextension: loaded
163 163 unrecognized profiler 'other' - ignored
164 164
165 165 $ HGPROF=unknown hg root --profile 2>&1 | head -n 1
166 166 unrecognized profiler 'unknown' - ignored
167 167
168 168 $ cd ..
169 169 #endif
General Comments 0
You need to be logged in to leave comments. Login now