##// END OF EJS Templates
parsers: inline fields of dirstate values in C version...
parsers: inline fields of dirstate values in C version Previously, while unpacking the dirstate we'd create 3-4 new CPython objects for most dirstate values: - the state is a single character string, which is pooled by CPython - the mode is a new object if it isn't 0 due to being in the lookup set - the size is a new object if it is greater than 255 - the mtime is a new object if it isn't -1 due to being in the lookup set - the tuple to contain them all In some cases such as regular hg status, we actually look at all the objects. In other cases like hg add, hg status for a subdirectory, or hg status with the third-party hgwatchman enabled, we look at almost none of the objects. This patch eliminates most object creation in these cases by defining a custom C struct that is exposed to Python with an interface similar to a tuple. Only when tuple elements are actually requested are the respective objects created. The gains, where they're expected, are significant. The following tests are run against a working copy with over 270,000 files. parse_dirstate becomes significantly faster: $ hg perfdirstate before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35) after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95) and as a result, several commands benefit: $ time hg status # with hgwatchman enabled before: 0.42s user 0.14s system 99% cpu 0.563 total after: 0.34s user 0.12s system 99% cpu 0.471 total $ time hg add new-file before: 0.85s user 0.18s system 99% cpu 1.033 total after: 0.76s user 0.17s system 99% cpu 0.931 total There is a slight regression in regular status performance, but this is fixed in an upcoming patch.

File last commit:

r20034:1e5b38a9 default
r21809:e250b830 default
Show More
worker.py
158 lines | 4.4 KiB | text/x-python | PythonLexer
Bryan O'Sullivan
worker: count the number of CPUs...
r18635 # worker.py - master-slave parallelism support
#
# Copyright 2013 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Bryan O'Sullivan
worker: estimate whether it's worth running a task in parallel...
r18636 from i18n import _
Augie Fackler
cleanup: move stdlib imports to their own import statement...
r20034 import errno, os, signal, sys, threading
import util
Bryan O'Sullivan
worker: count the number of CPUs...
r18635
def countcpus():
'''try to count the number of CPUs on the system'''
# posix
try:
n = int(os.sysconf('SC_NPROCESSORS_ONLN'))
if n > 0:
return n
except (AttributeError, ValueError):
pass
# windows
try:
n = int(os.environ['NUMBER_OF_PROCESSORS'])
if n > 0:
return n
except (KeyError, ValueError):
pass
return 1
Bryan O'Sullivan
worker: estimate whether it's worth running a task in parallel...
r18636
def _numworkers(ui):
s = ui.config('worker', 'numcpus')
if s:
try:
n = int(s)
if n >= 1:
return n
except ValueError:
raise util.Abort(_('number of cpus must be an integer'))
return min(max(countcpus(), 4), 32)
if os.name == 'posix':
_startupcost = 0.01
else:
_startupcost = 1e30
def worthwhile(ui, costperop, nops):
'''try to determine whether the benefit of multiple processes can
outweigh the cost of starting them'''
linear = costperop * nops
workers = _numworkers(ui)
benefit = linear - (_startupcost * workers + linear / workers)
return benefit >= 0.15
Bryan O'Sullivan
worker: partition a list (of tasks) into equal-sized chunks
r18637
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 def worker(ui, costperarg, func, staticargs, args):
'''run a function, possibly in parallel in multiple worker
processes.
returns a progress iterator
costperarg - cost of a single task
func - function to run
staticargs - arguments to pass to every invocation of the function
args - arguments to split into chunks, to pass to individual
workers
'''
if worthwhile(ui, costperarg, len(args)):
return _platformworker(ui, func, staticargs, args)
return func(*staticargs + (args,))
def _posixworker(ui, func, staticargs, args):
rfd, wfd = os.pipe()
workers = _numworkers(ui)
Bryan O'Sullivan
worker: fix a race in SIGINT handling...
r18708 oldhandler = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, signal.SIG_IGN)
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 pids, problem = [], [0]
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 for pargs in partition(args, workers):
pid = os.fork()
if pid == 0:
Bryan O'Sullivan
worker: fix a race in SIGINT handling...
r18708 signal.signal(signal.SIGINT, oldhandler)
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 try:
os.close(rfd)
for i, item in func(*(staticargs + (pargs,))):
os.write(wfd, '%d %s\n' % (i, item))
os._exit(0)
except KeyboardInterrupt:
os._exit(255)
Matt Mackall
worker: properly report errors from worker processes (issue3982)
r19408 # other exceptions are allowed to propagate, we rely
# on lock.py's pid checks to avoid release callbacks
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 pids.append(pid)
pids.reverse()
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 os.close(wfd)
fp = os.fdopen(rfd, 'rb', 0)
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 def killworkers():
# if one worker bails, there's no good reason to wait for the rest
for p in pids:
try:
os.kill(p, signal.SIGTERM)
except OSError, err:
if err.errno != errno.ESRCH:
raise
def waitforworkers():
for _ in pids:
st = _exitstatus(os.wait()[1])
Matt Mackall
worker: check problem state correctly (issue3982)...
r19406 if st and not problem[0]:
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 problem[0] = st
killworkers()
t = threading.Thread(target=waitforworkers)
t.start()
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 def cleanup():
signal.signal(signal.SIGINT, oldhandler)
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 t.join()
status = problem[0]
if status:
if status < 0:
os.kill(os.getpid(), -status)
sys.exit(status)
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 try:
for line in fp:
l = line.split(' ', 1)
yield int(l[0]), l[1][:-1]
except: # re-raises
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 killworkers()
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 cleanup()
raise
cleanup()
Bryan O'Sullivan
worker: on error, exit similarly to the first failing worker...
r18707 def _posixexitstatus(code):
'''convert a posix exit status into the same form returned by
os.spawnv
returns None if the process was stopped instead of exiting'''
if os.WIFEXITED(code):
return os.WEXITSTATUS(code)
elif os.WIFSIGNALED(code):
return -os.WTERMSIG(code)
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 if os.name != 'nt':
_platformworker = _posixworker
Bryan O'Sullivan
worker: on error, exit similarly to the first failing worker...
r18707 _exitstatus = _posixexitstatus
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638
Bryan O'Sullivan
worker: partition a list (of tasks) into equal-sized chunks
r18637 def partition(lst, nslices):
'''partition a list into N slices of equal size'''
n = len(lst)
chunk, slop = n / nslices, n % nslices
end = 0
for i in xrange(nslices):
start = end
end = start + chunk
if slop:
end += 1
slop -= 1
yield lst[start:end]