##// END OF EJS Templates
configs: moved all gunicorn config to python file
configs: moved all gunicorn config to python file

File last commit:

r1143:7071fe53 default
r1143:7071fe53 default
Show More
gunicorn_config.py
506 lines | 16.2 KiB | text/x-python | PythonLexer
/ configs / gunicorn_config.py
config: added example gunicorn configuration
r476 """
gunicorn: updated gunicorn config based on release changes
r808 Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
config: added example gunicorn configuration
r476 """
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 import gc
import os
import sys
gunicorn: updated gunicorn config to add memory monitoring
r761 import math
config: added example gunicorn configuration
r476 import time
import threading
import traceback
gunicorn: updated gunicorn config to add memory monitoring
r761 import random
configs: fixed IP extraction in gunicorn
r1025 import socket
configs: moved all gunicorn config to python file
r1143 import dataclasses
config: added example gunicorn configuration
r476 from gunicorn.glogging import Logger
gunicorn: updated gunicorn config to add memory monitoring
r761 def get_workers():
import multiprocessing
return multiprocessing.cpu_count() * 2 + 1
configs: moved all gunicorn config to python file
r1143
bind = "127.0.0.1:10010"
# Error logging output for gunicorn (-) is stdout
config: added example gunicorn configuration
r476 errorlog = '-'
configs: moved all gunicorn config to python file
r1143
# Access logging output for gunicorn (-) is stdout
config: added example gunicorn configuration
r476 accesslog = '-'
# SERVER MECHANICS
# None == system temp dir
# worker_tmp_dir is recommended to be set to some tmpfs
worker_tmp_dir = None
tmp_upload_dir = None
configs: moved all gunicorn config to python file
r1143 # use re-use port logic
config: updated configs to still not comment out logging configuration....
r1024 #reuse_port = True
config: added example gunicorn configuration
r476 # Custom log format
core: re-implemented the way how configuration can be made...
r1021 #access_log_format = (
# '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
config: added example gunicorn configuration
r476
configs: added changed require to track logging with loki or logstash
r1019 # loki format for easier parsing in grafana
core: re-implemented the way how configuration can be made...
r1021 access_log_format = (
'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
configs: added changed require to track logging with loki or logstash
r1019
configs: moved all gunicorn config to python file
r1143
# Sets the number of process workers. More workers means more concurrent connections
# RhodeCode can handle at the same time. Each additional worker also it increases
# memory usage as each has it's own set of caches.
# Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
# than 8-10 unless for huge deployments .e.g 700-1000 users.
# `instance_id = *` must be set in the [app:main] section below (which is the default)
# when using more than 1 worker.
workers = 6
# self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
gunicorn: updated gunicorn config to add memory monitoring
r761 # workers = get_workers()
configs: moved all gunicorn config to python file
r1143 # Gunicorn access log level
loglevel = 'info'
# Process name visible in process list
proc_name = 'rhodecode_vcsserver'
# Type of worker class, one of `sync`, `gevent`
# currently `sync` is the only option allowed.
worker_class = 'sync'
# The maximum number of simultaneous clients. Valid only for gevent
worker_connections = 10
# Max number of requests that worker will handle before being gracefully restarted.
# Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
max_requests = 2000
max_requests_jitter = 30
# The maximum number of pending connections.
# Exceeding this number results in the client getting an error when attempting to connect.
backlog = 64
# Amount of time a worker can spend with handling a request before it
# gets killed and restarted. By default set to 21600 (6hrs)
# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
timeout = 21600
# The maximum size of HTTP request line in bytes.
# 0 for unlimited
limit_request_line = 0
# Limit the number of HTTP headers fields in a request.
# By default this value is 100 and can't be larger than 32768.
limit_request_fields = 32768
# Limit the allowed size of an HTTP request header field.
# Value is a positive number or 0.
# Setting it to 0 will allow unlimited header field sizes.
limit_request_field_size = 0
# Timeout for graceful workers restart.
# After receiving a restart signal, workers have this much time to finish
# serving requests. Workers still alive after the timeout (starting from the
# receipt of the restart signal) are force killed.
# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
graceful_timeout = 21600
# The number of seconds to wait for requests on a Keep-Alive connection.
# Generally set in the 1-5 seconds range.
keepalive = 2
# Maximum memory usage that each worker can use before it will receive a
# graceful restart signal 0 = memory monitoring is disabled
# Examples: 268435456 (256MB), 536870912 (512MB)
# 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
memory_max_usage = 0
# How often in seconds to check for memory usage for each gunicorn worker
memory_usage_check_interval = 60
# Threshold value for which we don't recycle worker if GarbageCollection
# frees up enough resources. Before each restart we try to run GC on worker
# in case we get enough free memory after that, restart will not happen.
memory_usage_recovery_threshold = 0.8
@dataclasses.dataclass
class MemoryCheckConfig:
max_usage: int
check_interval: int
recovery_threshold: float
config: added example gunicorn configuration
r476
gunicorn: updated gunicorn config to add memory monitoring
r761 def _get_process_rss(pid=None):
try:
import psutil
if pid:
proc = psutil.Process(pid)
else:
proc = psutil.Process()
return proc.memory_info().rss
except Exception:
return None
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 def _get_config(ini_path):
py3: compat and code fixes
r1042 import configparser
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
try:
config: updated gunicorn config
r807 config = configparser.RawConfigParser()
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 config.read(ini_path)
return config
except Exception:
return None
configs: moved all gunicorn config to python file
r1143 def get_memory_usage_params(config=None):
# memory spec defaults
_memory_max_usage = memory_max_usage
_memory_usage_check_interval = memory_usage_check_interval
_memory_usage_recovery_threshold = memory_usage_recovery_threshold
if config:
ini_path = os.path.abspath(config)
conf = _get_config(ini_path)
section = 'server:main'
if conf and conf.has_section(section):
if conf.has_option(section, 'memory_max_usage'):
_memory_max_usage = conf.getint(section, 'memory_max_usage')
if conf.has_option(section, 'memory_usage_check_interval'):
_memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
if conf.has_option(section, 'memory_usage_recovery_threshold'):
_memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
_memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
or _memory_max_usage)
_memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
or _memory_usage_check_interval)
_memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
or _memory_usage_recovery_threshold)
return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
def _time_with_offset(check_interval):
return time.time() - random.randint(0, check_interval/2.0)
config: added example gunicorn configuration
r476
def pre_fork(server, worker):
pass
gunicorn: updated gunicorn config to add memory monitoring
r761 def post_fork(server, worker):
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
configs: moved all gunicorn config to python file
r1143 memory_conf = get_memory_usage_params()
_memory_max_usage = memory_conf.max_usage
_memory_usage_check_interval = memory_conf.check_interval
_memory_usage_recovery_threshold = memory_conf.recovery_threshold
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
core: re-implemented the way how configuration can be made...
r1021 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
or _memory_max_usage)
worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
or _memory_usage_check_interval)
worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
or _memory_usage_recovery_threshold)
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
gunicorn: updated gunicorn config to add memory monitoring
r761 # register memory last check time, with some random offset so we don't recycle all
# at once
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
if _memory_max_usage:
configs: moved all gunicorn config to python file
r1143 server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 _format_data_size(_memory_max_usage))
else:
configs: moved all gunicorn config to python file
r1143 server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
gunicorn: updated gunicorn config to add memory monitoring
r761
config: added example gunicorn configuration
r476 def pre_exec(server):
server.log.info("Forked child, re-executing.")
def on_starting(server):
gunicorn: updated gunicorn config to add memory monitoring
r761 server_lbl = '{} {}'.format(server.proc_name, server.address)
server.log.info("Server %s is starting.", server_lbl)
config: added example gunicorn configuration
r476
def when_ready(server):
gunicorn: updated gunicorn config to add memory monitoring
r761 server.log.info("Server %s is ready. Spawning workers", server)
config: added example gunicorn configuration
r476
def on_reload(server):
pass
gunicorn: updated gunicorn config to add memory monitoring
r761 def _format_data_size(size, unit="B", precision=1, binary=True):
"""Format a number using SI units (kilo, mega, etc.).
``size``: The number as a float or int.
``unit``: The unit name in plural form. Examples: "bytes", "B".
``precision``: How many digits to the right of the decimal point. Default
is 1. 0 suppresses the decimal point.
``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
If true, use base-2 binary prefixes (kibi = Ki = 1024).
``full_name``: If false (default), use the prefix abbreviation ("k" or
"Ki"). If true, use the full prefix ("kilo" or "kibi"). If false,
use abbreviation ("k" or "Ki").
"""
if not binary:
base = 1000
multiples = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
else:
base = 1024
multiples = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi')
sign = ""
if size > 0:
m = int(math.log(size, base))
elif size < 0:
sign = "-"
size = -size
m = int(math.log(size, base))
else:
m = 0
if m > 8:
m = 8
if m == 0:
precision = '%.0f'
else:
precision = '%%.%df' % precision
size = precision % (size / math.pow(base, m))
return '%s%s %s%s' % (sign, size.strip(), multiples[m], unit)
def _check_memory_usage(worker):
configs: moved all gunicorn config to python file
r1143 _memory_max_usage = worker._memory_max_usage
if not _memory_max_usage:
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 return
gunicorn: updated gunicorn config to add memory monitoring
r761
configs: moved all gunicorn config to python file
r1143 _memory_usage_check_interval = worker._memory_usage_check_interval
_memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
gunicorn: updated gunicorn config to add memory monitoring
r761
elapsed = time.time() - worker._last_memory_check_time
configs: moved all gunicorn config to python file
r1143 if elapsed > _memory_usage_check_interval:
gunicorn: updated gunicorn config to add memory monitoring
r761 mem_usage = _get_process_rss()
configs: moved all gunicorn config to python file
r1143 if mem_usage and mem_usage > _memory_max_usage:
gunicorn: updated gunicorn config to add memory monitoring
r761 worker.log.info(
"memory usage %s > %s, forcing gc",
configs: moved all gunicorn config to python file
r1143 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
gunicorn: updated gunicorn config to add memory monitoring
r761 # Try to clean it up by forcing a full collection.
gc.collect()
mem_usage = _get_process_rss()
configs: moved all gunicorn config to python file
r1143 if mem_usage > _memory_usage_recovery_threshold:
gunicorn: updated gunicorn config to add memory monitoring
r761 # Didn't clean up enough, we'll have to terminate.
worker.log.warning(
"memory usage %s > %s after gc, quitting",
configs: moved all gunicorn config to python file
r1143 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
gunicorn: updated gunicorn config to add memory monitoring
r761 # This will cause worker to auto-restart itself
worker.alive = False
worker._last_memory_check_time = time.time()
config: added example gunicorn configuration
r476 def worker_int(worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
config: added example gunicorn configuration
r476
# get traceback info, on worker crash
configs: moved all gunicorn config to python file
r1143 def get_thread_id(t_id):
id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
return id2name.get(t_id, "unknown_thread_id")
config: added example gunicorn configuration
r476 code = []
configs: moved all gunicorn config to python file
r1143 for thread_id, stack in sys._current_frames().items(): # noqa
config: added example gunicorn configuration
r476 code.append(
configs: moved all gunicorn config to python file
r1143 "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
config: added example gunicorn configuration
r476 for fname, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
if line:
code.append(" %s" % (line.strip()))
worker.log.debug("\n".join(code))
def worker_abort(worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
config: added example gunicorn configuration
r476
def worker_exit(server, worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker exit", worker.pid)
config: added example gunicorn configuration
r476
def child_exit(server, worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
config: added example gunicorn configuration
r476
def pre_request(worker, req):
worker.start_time = time.time()
worker.log.debug(
"GNCRN PRE WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
def post_request(worker, req, environ, resp):
total_time = time.time() - worker.start_time
gunicorn: updated gunicorn config based on release changes
r808 # Gunicorn sometimes has problems with reading the status_code
status_code = getattr(resp, 'status_code', '')
config: added example gunicorn configuration
r476 worker.log.debug(
core: added more accurate time measurements
r737 "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
gunicorn: updated gunicorn config based on release changes
r808 worker.nr, req.method, req.path, status_code, total_time)
gunicorn: updated gunicorn config to add memory monitoring
r761 _check_memory_usage(worker)
config: added example gunicorn configuration
r476
configs: fixed IP extraction in gunicorn
r1025 def _filter_proxy(ip):
"""
Passed in IP addresses in HEADERS can be in a special format of multiple
ips. Those comma separated IPs are passed from various proxies in the
chain of request processing. The left-most being the original client.
We only care about the first IP which came from the org. client.
:param ip: ip string from headers
"""
if ',' in ip:
_ips = ip.split(',')
_first_ip = _ips[0].strip()
return _first_ip
return ip
def _filter_port(ip):
"""
Removes a port from ip, there are 4 main cases to handle here.
- ipv4 eg. 127.0.0.1
- ipv6 eg. ::1
- ipv4+port eg. 127.0.0.1:8080
- ipv6+port eg. [::1]:8080
:param ip:
"""
def is_ipv6(ip_addr):
if hasattr(socket, 'inet_pton'):
try:
socket.inet_pton(socket.AF_INET6, ip_addr)
except socket.error:
return False
else:
return False
return True
if ':' not in ip: # must be ipv4 pure ip
return ip
if '[' in ip and ']' in ip: # ipv6 with port
return ip.split(']')[0][1:].lower()
# must be ipv6 or ipv4 with port
if is_ipv6(ip):
return ip
else:
ip, _port = ip.split(':')[:2] # means ipv4+port
return ip
def get_ip_addr(environ):
proxy_key = 'HTTP_X_REAL_IP'
proxy_key2 = 'HTTP_X_FORWARDED_FOR'
def_key = 'REMOTE_ADDR'
configs: moved all gunicorn config to python file
r1143
def _filters(x):
return _filter_port(_filter_proxy(x))
configs: fixed IP extraction in gunicorn
r1025
ip = environ.get(proxy_key)
if ip:
return _filters(ip)
ip = environ.get(proxy_key2)
if ip:
return _filters(ip)
ip = environ.get(def_key, '0.0.0.0')
return _filters(ip)
config: added example gunicorn configuration
r476 class RhodeCodeLogger(Logger):
"""
Custom Logger that allows some customization that gunicorn doesn't allow
"""
datefmt = r"%Y-%m-%d %H:%M:%S"
def __init__(self, cfg):
Logger.__init__(self, cfg)
def now(self):
""" return date in RhodeCode Log format """
now = time.time()
gunicorn: fixed python3 compat
r1056 msecs = int((now - int(now)) * 1000)
config: added example gunicorn configuration
r476 return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
configs: fixed IP extraction in gunicorn
r1025 def atoms(self, resp, req, environ, request_time):
""" Gets atoms for log formatting.
"""
status = resp.status
if isinstance(status, str):
status = status.split(None, 1)[0]
atoms = {
'h': get_ip_addr(environ),
'l': '-',
'u': self._get_user(environ) or '-',
't': self.now(),
'r': "%s %s %s" % (environ['REQUEST_METHOD'],
environ['RAW_URI'],
environ["SERVER_PROTOCOL"]),
's': status,
'm': environ.get('REQUEST_METHOD'),
'U': environ.get('PATH_INFO'),
'q': environ.get('QUERY_STRING'),
'H': environ.get('SERVER_PROTOCOL'),
'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
'B': getattr(resp, 'sent', None),
'f': environ.get('HTTP_REFERER', '-'),
'a': environ.get('HTTP_USER_AGENT', '-'),
'T': request_time.seconds,
'D': (request_time.seconds * 1000000) + request_time.microseconds,
'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
'p': "<%s>" % os.getpid()
}
# add request headers
if hasattr(req, 'headers'):
req_headers = req.headers
else:
req_headers = req
if hasattr(req_headers, "items"):
req_headers = req_headers.items()
atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
resp_headers = resp.headers
if hasattr(resp_headers, "items"):
resp_headers = resp_headers.items()
# add response headers
atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
# add environ variables
environ_variables = environ.items()
atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
return atoms
config: added example gunicorn configuration
r476
configs: moved all gunicorn config to python file
r1143
config: added example gunicorn configuration
r476 logger_class = RhodeCodeLogger