##// END OF EJS Templates
streamclone: support for producing and consuming stream clone bundles...
streamclone: support for producing and consuming stream clone bundles Up to this point, stream clones only existed as a dynamically generated data format produced and consumed during streaming clones. In order to support this efficient cloning format with the clone bundles feature, we need a more formal, on disk representation of the streaming clone data. This patch introduces a new "bundle" type for streaming clones. Unlike existing bundles, it does not contain changegroup data. It does, however, share the same concepts like the 4 byte header which identifies the type of data that follows and the 2 byte abbreviation for compression types (of which only "UN" is currently supported). The new bundle format is essentially the existing stream clone version 1 data format with some headers at the beginning. Content negotiation at stream clone request time checked for repository format/requirements compatibility before initiating a stream clone. We can't do active content negotiation when using clone bundles. So, we put this set of requirements inside the payload so consumers have a built-in mechanism for checking compatibility before reading and applying lots of data. Of course, we will also advertise this requirements set in clone bundles. But that's for another patch. We currently don't have a mechanism to produce and consume this new bundle format. This will be implemented in upcoming patches. It's worth noting that if a legacy client attempts to `hg unbundle` a stream clone bundle (with the "HGS1" header), it will abort with: "unknown bundle version S1," which seems appropriate.

File last commit:

r26587:56b2bcea default
r26755:bb0b955d default
Show More
worker.py
165 lines | 4.4 KiB | text/x-python | PythonLexer
Bryan O'Sullivan
worker: count the number of CPUs...
r18635 # worker.py - master-slave parallelism support
#
# Copyright 2013 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Gregory Szorc
worker: use absolute_import
r25992 from __future__ import absolute_import
import errno
import os
import signal
import sys
import threading
from .i18n import _
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 from . import error
Bryan O'Sullivan
worker: count the number of CPUs...
r18635
def countcpus():
'''try to count the number of CPUs on the system'''
Gregory Szorc
worker: restore old countcpus code (issue4869)...
r26568
# posix
Bryan O'Sullivan
worker: count the number of CPUs...
r18635 try:
Gregory Szorc
worker: restore old countcpus code (issue4869)...
r26568 n = int(os.sysconf('SC_NPROCESSORS_ONLN'))
if n > 0:
return n
except (AttributeError, ValueError):
pass
# windows
try:
n = int(os.environ['NUMBER_OF_PROCESSORS'])
if n > 0:
return n
except (KeyError, ValueError):
pass
return 1
Bryan O'Sullivan
worker: estimate whether it's worth running a task in parallel...
r18636
def _numworkers(ui):
s = ui.config('worker', 'numcpus')
if s:
try:
n = int(s)
if n >= 1:
return n
except ValueError:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(_('number of cpus must be an integer'))
Bryan O'Sullivan
worker: estimate whether it's worth running a task in parallel...
r18636 return min(max(countcpus(), 4), 32)
if os.name == 'posix':
_startupcost = 0.01
else:
_startupcost = 1e30
def worthwhile(ui, costperop, nops):
'''try to determine whether the benefit of multiple processes can
outweigh the cost of starting them'''
linear = costperop * nops
workers = _numworkers(ui)
benefit = linear - (_startupcost * workers + linear / workers)
return benefit >= 0.15
Bryan O'Sullivan
worker: partition a list (of tasks) into equal-sized chunks
r18637
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 def worker(ui, costperarg, func, staticargs, args):
'''run a function, possibly in parallel in multiple worker
processes.
returns a progress iterator
costperarg - cost of a single task
func - function to run
staticargs - arguments to pass to every invocation of the function
args - arguments to split into chunks, to pass to individual
workers
'''
if worthwhile(ui, costperarg, len(args)):
return _platformworker(ui, func, staticargs, args)
return func(*staticargs + (args,))
def _posixworker(ui, func, staticargs, args):
rfd, wfd = os.pipe()
workers = _numworkers(ui)
Bryan O'Sullivan
worker: fix a race in SIGINT handling...
r18708 oldhandler = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, signal.SIG_IGN)
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 pids, problem = [], [0]
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 for pargs in partition(args, workers):
pid = os.fork()
if pid == 0:
Bryan O'Sullivan
worker: fix a race in SIGINT handling...
r18708 signal.signal(signal.SIGINT, oldhandler)
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 try:
os.close(rfd)
for i, item in func(*(staticargs + (pargs,))):
os.write(wfd, '%d %s\n' % (i, item))
os._exit(0)
except KeyboardInterrupt:
os._exit(255)
Matt Mackall
worker: properly report errors from worker processes (issue3982)
r19408 # other exceptions are allowed to propagate, we rely
# on lock.py's pid checks to avoid release callbacks
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 pids.append(pid)
pids.reverse()
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 os.close(wfd)
fp = os.fdopen(rfd, 'rb', 0)
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 def killworkers():
# if one worker bails, there's no good reason to wait for the rest
for p in pids:
try:
os.kill(p, signal.SIGTERM)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except OSError as err:
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 if err.errno != errno.ESRCH:
raise
def waitforworkers():
Mads Kiilerich
cleanup: avoid _ for local unused tmp variables - that is reserved for i18n...
r22199 for _pid in pids:
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 st = _exitstatus(os.wait()[1])
Matt Mackall
worker: check problem state correctly (issue3982)...
r19406 if st and not problem[0]:
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 problem[0] = st
killworkers()
t = threading.Thread(target=waitforworkers)
t.start()
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 def cleanup():
signal.signal(signal.SIGINT, oldhandler)
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 t.join()
status = problem[0]
if status:
if status < 0:
os.kill(os.getpid(), -status)
sys.exit(status)
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 try:
for line in fp:
l = line.split(' ', 1)
yield int(l[0]), l[1][:-1]
except: # re-raises
Bryan O'Sullivan
worker: handle worker failures more aggressively...
r18709 killworkers()
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 cleanup()
raise
cleanup()
Bryan O'Sullivan
worker: on error, exit similarly to the first failing worker...
r18707 def _posixexitstatus(code):
'''convert a posix exit status into the same form returned by
os.spawnv
returns None if the process was stopped instead of exiting'''
if os.WIFEXITED(code):
return os.WEXITSTATUS(code)
elif os.WIFSIGNALED(code):
return -os.WTERMSIG(code)
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638 if os.name != 'nt':
_platformworker = _posixworker
Bryan O'Sullivan
worker: on error, exit similarly to the first failing worker...
r18707 _exitstatus = _posixexitstatus
Bryan O'Sullivan
worker: allow a function to be run in multiple worker processes...
r18638
Bryan O'Sullivan
worker: partition a list (of tasks) into equal-sized chunks
r18637 def partition(lst, nslices):
'''partition a list into N slices of equal size'''
n = len(lst)
chunk, slop = n / nslices, n % nslices
end = 0
for i in xrange(nslices):
start = end
end = start + chunk
if slop:
end += 1
slop -= 1
yield lst[start:end]