##// END OF EJS Templates
chore(deps): bumped orjson==3.9.13
chore(deps): bumped orjson==3.9.13

File last commit:

r1170:e15a1319 default
r1202:e43f9322 default
Show More
gunicorn_config.py
517 lines | 16.2 KiB | text/x-python | PythonLexer
/ configs / gunicorn_config.py
config: added example gunicorn configuration
r476 """
gunicorn: updated gunicorn config based on release changes
r808 Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
config: added example gunicorn configuration
r476 """
switch to ruff from black
r1170 import contextlib
import dataclasses
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 import gc
gunicorn: updated gunicorn config to add memory monitoring
r761 import math
switch to ruff from black
r1170 import os
gunicorn: updated gunicorn config to add memory monitoring
r761 import random
configs: fixed IP extraction in gunicorn
r1025 import socket
switch to ruff from black
r1170 import sys
import threading
import time
import traceback
config: added example gunicorn configuration
r476 from gunicorn.glogging import Logger
gunicorn: updated gunicorn config to add memory monitoring
r761 def get_workers():
import multiprocessing
switch to ruff from black
r1170
gunicorn: updated gunicorn config to add memory monitoring
r761 return multiprocessing.cpu_count() * 2 + 1
configs: moved all gunicorn config to python file
r1143
bind = "127.0.0.1:10010"
# Error logging output for gunicorn (-) is stdout
switch to ruff from black
r1170 errorlog = "-"
configs: moved all gunicorn config to python file
r1143
# Access logging output for gunicorn (-) is stdout
switch to ruff from black
r1170 accesslog = "-"
config: added example gunicorn configuration
r476
# SERVER MECHANICS
# None == system temp dir
# worker_tmp_dir is recommended to be set to some tmpfs
worker_tmp_dir = None
tmp_upload_dir = None
configs: moved all gunicorn config to python file
r1143 # use re-use port logic
switch to ruff from black
r1170 # reuse_port = True
config: updated configs to still not comment out logging configuration....
r1024
config: added example gunicorn configuration
r476 # Custom log format
switch to ruff from black
r1170 # access_log_format = (
core: re-implemented the way how configuration can be made...
r1021 # '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
config: added example gunicorn configuration
r476
configs: added changed require to track logging with loki or logstash
r1019 # loki format for easier parsing in grafana
switch to ruff from black
r1170 access_log_format = 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"'
configs: added changed require to track logging with loki or logstash
r1019
configs: moved all gunicorn config to python file
r1143
# Sets the number of process workers. More workers means more concurrent connections
# RhodeCode can handle at the same time. Each additional worker also it increases
# memory usage as each has it's own set of caches.
# Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
# than 8-10 unless for huge deployments .e.g 700-1000 users.
# `instance_id = *` must be set in the [app:main] section below (which is the default)
# when using more than 1 worker.
workers = 6
# self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
gunicorn: updated gunicorn config to add memory monitoring
r761 # workers = get_workers()
configs: moved all gunicorn config to python file
r1143 # Gunicorn access log level
switch to ruff from black
r1170 loglevel = "info"
configs: moved all gunicorn config to python file
r1143
updated configs
r1155 # Process name visible in a process list
switch to ruff from black
r1170 proc_name = "rhodecode_vcsserver"
configs: moved all gunicorn config to python file
r1143
# Type of worker class, one of `sync`, `gevent`
# currently `sync` is the only option allowed.
switch to ruff from black
r1170 worker_class = "sync"
configs: moved all gunicorn config to python file
r1143
# The maximum number of simultaneous clients. Valid only for gevent
worker_connections = 10
# Max number of requests that worker will handle before being gracefully restarted.
# Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
max_requests = 2000
config: improve gunicorn configs
r1164 max_requests_jitter = int(max_requests * 0.2) # 20% of max_requests
configs: moved all gunicorn config to python file
r1143
# The maximum number of pending connections.
# Exceeding this number results in the client getting an error when attempting to connect.
backlog = 64
config: improve gunicorn configs
r1164 # The Amount of time a worker can spend with handling a request before it
# gets killed and restarted. By default, set to 21600 (6hrs)
configs: moved all gunicorn config to python file
r1143 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
timeout = 21600
# The maximum size of HTTP request line in bytes.
# 0 for unlimited
limit_request_line = 0
# Limit the number of HTTP headers fields in a request.
# By default this value is 100 and can't be larger than 32768.
limit_request_fields = 32768
# Limit the allowed size of an HTTP request header field.
# Value is a positive number or 0.
# Setting it to 0 will allow unlimited header field sizes.
limit_request_field_size = 0
# Timeout for graceful workers restart.
# After receiving a restart signal, workers have this much time to finish
# serving requests. Workers still alive after the timeout (starting from the
# receipt of the restart signal) are force killed.
# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
graceful_timeout = 21600
# The number of seconds to wait for requests on a Keep-Alive connection.
# Generally set in the 1-5 seconds range.
keepalive = 2
# Maximum memory usage that each worker can use before it will receive a
# graceful restart signal 0 = memory monitoring is disabled
# Examples: 268435456 (256MB), 536870912 (512MB)
# 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
gunicorn: print config on startup
r1150 # Dynamic formula 1024 * 1024 * 256 == 256MBs
configs: moved all gunicorn config to python file
r1143 memory_max_usage = 0
# How often in seconds to check for memory usage for each gunicorn worker
memory_usage_check_interval = 60
# Threshold value for which we don't recycle worker if GarbageCollection
gunicorn: print config on startup
r1150 # frees up enough resources. Before each restart, we try to run GC on worker
config: improve gunicorn configs
r1164 # in case we get enough free memory after that; restart will not happen.
configs: moved all gunicorn config to python file
r1143 memory_usage_recovery_threshold = 0.8
@dataclasses.dataclass
class MemoryCheckConfig:
max_usage: int
check_interval: int
recovery_threshold: float
config: added example gunicorn configuration
r476
gunicorn: updated gunicorn config to add memory monitoring
r761 def _get_process_rss(pid=None):
switch to ruff from black
r1170 with contextlib.suppress(Exception):
gunicorn: updated gunicorn config to add memory monitoring
r761 import psutil
switch to ruff from black
r1170
gunicorn: updated gunicorn config to add memory monitoring
r761 if pid:
proc = psutil.Process(pid)
else:
proc = psutil.Process()
return proc.memory_info().rss
switch to ruff from black
r1170
return None
gunicorn: updated gunicorn config to add memory monitoring
r761
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 def _get_config(ini_path):
py3: compat and code fixes
r1042 import configparser
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
switch to ruff from black
r1170 with contextlib.suppress(Exception):
config: updated gunicorn config
r807 config = configparser.RawConfigParser()
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 config.read(ini_path)
return config
switch to ruff from black
r1170
return None
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
configs: moved all gunicorn config to python file
r1143 def get_memory_usage_params(config=None):
# memory spec defaults
_memory_max_usage = memory_max_usage
_memory_usage_check_interval = memory_usage_check_interval
_memory_usage_recovery_threshold = memory_usage_recovery_threshold
if config:
ini_path = os.path.abspath(config)
conf = _get_config(ini_path)
switch to ruff from black
r1170 section = "server:main"
configs: moved all gunicorn config to python file
r1143 if conf and conf.has_section(section):
switch to ruff from black
r1170 if conf.has_option(section, "memory_max_usage"):
_memory_max_usage = conf.getint(section, "memory_max_usage")
configs: moved all gunicorn config to python file
r1143
switch to ruff from black
r1170 if conf.has_option(section, "memory_usage_check_interval"):
_memory_usage_check_interval = conf.getint(section, "memory_usage_check_interval")
configs: moved all gunicorn config to python file
r1143
switch to ruff from black
r1170 if conf.has_option(section, "memory_usage_recovery_threshold"):
_memory_usage_recovery_threshold = conf.getfloat(section, "memory_usage_recovery_threshold")
configs: moved all gunicorn config to python file
r1143
switch to ruff from black
r1170 _memory_max_usage = int(os.environ.get("RC_GUNICORN_MEMORY_MAX_USAGE", "") or _memory_max_usage)
_memory_usage_check_interval = int(
os.environ.get("RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL", "") or _memory_usage_check_interval
)
_memory_usage_recovery_threshold = float(
os.environ.get("RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD", "") or _memory_usage_recovery_threshold
)
configs: moved all gunicorn config to python file
r1143
return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
def _time_with_offset(check_interval):
switch to ruff from black
r1170 return time.time() - random.randint(0, check_interval / 2.0)
config: added example gunicorn configuration
r476
def pre_fork(server, worker):
pass
gunicorn: updated gunicorn config to add memory monitoring
r761 def post_fork(server, worker):
configs: moved all gunicorn config to python file
r1143 memory_conf = get_memory_usage_params()
_memory_max_usage = memory_conf.max_usage
_memory_usage_check_interval = memory_conf.check_interval
_memory_usage_recovery_threshold = memory_conf.recovery_threshold
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
switch to ruff from black
r1170 worker._memory_max_usage = int(os.environ.get("RC_GUNICORN_MEMORY_MAX_USAGE", "") or _memory_max_usage)
worker._memory_usage_check_interval = int(
os.environ.get("RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL", "") or _memory_usage_check_interval
)
worker._memory_usage_recovery_threshold = float(
os.environ.get("RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD", "") or _memory_usage_recovery_threshold
)
configs: moved most of configuration back to .ini files instead of gunicorn file
r801
gunicorn: updated gunicorn config to add memory monitoring
r761 # register memory last check time, with some random offset so we don't recycle all
# at once
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
if _memory_max_usage:
switch to ruff from black
r1170 server.log.info(
"pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid, _format_data_size(_memory_max_usage)
)
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 else:
configs: moved all gunicorn config to python file
r1143 server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
gunicorn: updated gunicorn config to add memory monitoring
r761
config: added example gunicorn configuration
r476 def pre_exec(server):
server.log.info("Forked child, re-executing.")
def on_starting(server):
switch to ruff from black
r1170 server_lbl = "{} {}".format(server.proc_name, server.address)
gunicorn: updated gunicorn config to add memory monitoring
r761 server.log.info("Server %s is starting.", server_lbl)
switch to ruff from black
r1170 server.log.info("Config:")
gunicorn: print config on startup
r1150 server.log.info(f"\n{server.cfg}")
server.log.info(get_memory_usage_params())
config: added example gunicorn configuration
r476
def when_ready(server):
gunicorn: updated gunicorn config to add memory monitoring
r761 server.log.info("Server %s is ready. Spawning workers", server)
config: added example gunicorn configuration
r476
def on_reload(server):
pass
gunicorn: updated gunicorn config to add memory monitoring
r761 def _format_data_size(size, unit="B", precision=1, binary=True):
"""Format a number using SI units (kilo, mega, etc.).
``size``: The number as a float or int.
``unit``: The unit name in plural form. Examples: "bytes", "B".
``precision``: How many digits to the right of the decimal point. Default
is 1. 0 suppresses the decimal point.
``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
If true, use base-2 binary prefixes (kibi = Ki = 1024).
``full_name``: If false (default), use the prefix abbreviation ("k" or
"Ki"). If true, use the full prefix ("kilo" or "kibi"). If false,
use abbreviation ("k" or "Ki").
"""
if not binary:
base = 1000
switch to ruff from black
r1170 multiples = ("", "k", "M", "G", "T", "P", "E", "Z", "Y")
gunicorn: updated gunicorn config to add memory monitoring
r761 else:
base = 1024
switch to ruff from black
r1170 multiples = ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi")
gunicorn: updated gunicorn config to add memory monitoring
r761
sign = ""
if size > 0:
m = int(math.log(size, base))
elif size < 0:
sign = "-"
size = -size
m = int(math.log(size, base))
else:
m = 0
if m > 8:
m = 8
if m == 0:
switch to ruff from black
r1170 precision = "%.0f"
gunicorn: updated gunicorn config to add memory monitoring
r761 else:
switch to ruff from black
r1170 precision = "%%.%df" % precision
gunicorn: updated gunicorn config to add memory monitoring
r761
size = precision % (size / math.pow(base, m))
switch to ruff from black
r1170 return "%s%s %s%s" % (sign, size.strip(), multiples[m], unit)
gunicorn: updated gunicorn config to add memory monitoring
r761
def _check_memory_usage(worker):
configs: moved all gunicorn config to python file
r1143 _memory_max_usage = worker._memory_max_usage
if not _memory_max_usage:
configs: moved most of configuration back to .ini files instead of gunicorn file
r801 return
gunicorn: updated gunicorn config to add memory monitoring
r761
configs: moved all gunicorn config to python file
r1143 _memory_usage_check_interval = worker._memory_usage_check_interval
_memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
gunicorn: updated gunicorn config to add memory monitoring
r761
elapsed = time.time() - worker._last_memory_check_time
configs: moved all gunicorn config to python file
r1143 if elapsed > _memory_usage_check_interval:
gunicorn: updated gunicorn config to add memory monitoring
r761 mem_usage = _get_process_rss()
configs: moved all gunicorn config to python file
r1143 if mem_usage and mem_usage > _memory_max_usage:
gunicorn: updated gunicorn config to add memory monitoring
r761 worker.log.info(
switch to ruff from black
r1170 "memory usage %s > %s, forcing gc", _format_data_size(mem_usage), _format_data_size(_memory_max_usage)
)
gunicorn: updated gunicorn config to add memory monitoring
r761 # Try to clean it up by forcing a full collection.
gc.collect()
mem_usage = _get_process_rss()
configs: moved all gunicorn config to python file
r1143 if mem_usage > _memory_usage_recovery_threshold:
gunicorn: updated gunicorn config to add memory monitoring
r761 # Didn't clean up enough, we'll have to terminate.
worker.log.warning(
"memory usage %s > %s after gc, quitting",
switch to ruff from black
r1170 _format_data_size(mem_usage),
_format_data_size(_memory_max_usage),
)
gunicorn: updated gunicorn config to add memory monitoring
r761 # This will cause worker to auto-restart itself
worker.alive = False
worker._last_memory_check_time = time.time()
config: added example gunicorn configuration
r476 def worker_int(worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
config: added example gunicorn configuration
r476
config: improve gunicorn configs
r1164 # get traceback info, when a worker crashes
configs: moved all gunicorn config to python file
r1143 def get_thread_id(t_id):
id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
return id2name.get(t_id, "unknown_thread_id")
config: added example gunicorn configuration
r476 code = []
configs: moved all gunicorn config to python file
r1143 for thread_id, stack in sys._current_frames().items(): # noqa
switch to ruff from black
r1170 code.append("\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
config: added example gunicorn configuration
r476 for fname, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
if line:
code.append(" %s" % (line.strip()))
worker.log.debug("\n".join(code))
def worker_abort(worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
config: added example gunicorn configuration
r476
def worker_exit(server, worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker exit", worker.pid)
config: added example gunicorn configuration
r476
def child_exit(server, worker):
configs: moved all gunicorn config to python file
r1143 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
config: added example gunicorn configuration
r476
def pre_request(worker, req):
worker.start_time = time.time()
switch to ruff from black
r1170 worker.log.debug("GNCRN PRE WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
config: added example gunicorn configuration
r476
def post_request(worker, req, environ, resp):
total_time = time.time() - worker.start_time
gunicorn: updated gunicorn config based on release changes
r808 # Gunicorn sometimes has problems with reading the status_code
switch to ruff from black
r1170 status_code = getattr(resp, "status_code", "")
config: added example gunicorn configuration
r476 worker.log.debug(
core: added more accurate time measurements
r737 "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
switch to ruff from black
r1170 worker.nr,
req.method,
req.path,
status_code,
total_time,
)
gunicorn: updated gunicorn config to add memory monitoring
r761 _check_memory_usage(worker)
config: added example gunicorn configuration
r476
configs: fixed IP extraction in gunicorn
r1025 def _filter_proxy(ip):
"""
Passed in IP addresses in HEADERS can be in a special format of multiple
ips. Those comma separated IPs are passed from various proxies in the
chain of request processing. The left-most being the original client.
We only care about the first IP which came from the org. client.
:param ip: ip string from headers
"""
switch to ruff from black
r1170 if "," in ip:
_ips = ip.split(",")
configs: fixed IP extraction in gunicorn
r1025 _first_ip = _ips[0].strip()
return _first_ip
return ip
def _filter_port(ip):
"""
Removes a port from ip, there are 4 main cases to handle here.
- ipv4 eg. 127.0.0.1
- ipv6 eg. ::1
- ipv4+port eg. 127.0.0.1:8080
- ipv6+port eg. [::1]:8080
:param ip:
"""
switch to ruff from black
r1170
configs: fixed IP extraction in gunicorn
r1025 def is_ipv6(ip_addr):
switch to ruff from black
r1170 if hasattr(socket, "inet_pton"):
configs: fixed IP extraction in gunicorn
r1025 try:
socket.inet_pton(socket.AF_INET6, ip_addr)
except socket.error:
return False
else:
return False
return True
switch to ruff from black
r1170 if ":" not in ip: # must be ipv4 pure ip
configs: fixed IP extraction in gunicorn
r1025 return ip
switch to ruff from black
r1170 if "[" in ip and "]" in ip: # ipv6 with port
return ip.split("]")[0][1:].lower()
configs: fixed IP extraction in gunicorn
r1025
# must be ipv6 or ipv4 with port
if is_ipv6(ip):
return ip
else:
switch to ruff from black
r1170 ip, _port = ip.split(":")[:2] # means ipv4+port
configs: fixed IP extraction in gunicorn
r1025 return ip
def get_ip_addr(environ):
switch to ruff from black
r1170 proxy_key = "HTTP_X_REAL_IP"
proxy_key2 = "HTTP_X_FORWARDED_FOR"
def_key = "REMOTE_ADDR"
configs: moved all gunicorn config to python file
r1143
def _filters(x):
return _filter_port(_filter_proxy(x))
configs: fixed IP extraction in gunicorn
r1025
ip = environ.get(proxy_key)
if ip:
return _filters(ip)
ip = environ.get(proxy_key2)
if ip:
return _filters(ip)
switch to ruff from black
r1170 ip = environ.get(def_key, "0.0.0.0")
configs: fixed IP extraction in gunicorn
r1025 return _filters(ip)
config: added example gunicorn configuration
r476 class RhodeCodeLogger(Logger):
"""
Custom Logger that allows some customization that gunicorn doesn't allow
"""
datefmt = r"%Y-%m-%d %H:%M:%S"
def __init__(self, cfg):
Logger.__init__(self, cfg)
def now(self):
switch to ruff from black
r1170 """return date in RhodeCode Log format"""
config: added example gunicorn configuration
r476 now = time.time()
gunicorn: fixed python3 compat
r1056 msecs = int((now - int(now)) * 1000)
switch to ruff from black
r1170 return time.strftime(self.datefmt, time.localtime(now)) + ".{0:03d}".format(msecs)
config: added example gunicorn configuration
r476
configs: fixed IP extraction in gunicorn
r1025 def atoms(self, resp, req, environ, request_time):
switch to ruff from black
r1170 """Gets atoms for log formatting."""
configs: fixed IP extraction in gunicorn
r1025 status = resp.status
if isinstance(status, str):
status = status.split(None, 1)[0]
atoms = {
switch to ruff from black
r1170 "h": get_ip_addr(environ),
"l": "-",
"u": self._get_user(environ) or "-",
"t": self.now(),
"r": "%s %s %s" % (environ["REQUEST_METHOD"], environ["RAW_URI"], environ["SERVER_PROTOCOL"]),
"s": status,
"m": environ.get("REQUEST_METHOD"),
"U": environ.get("PATH_INFO"),
"q": environ.get("QUERY_STRING"),
"H": environ.get("SERVER_PROTOCOL"),
"b": getattr(resp, "sent", None) is not None and str(resp.sent) or "-",
"B": getattr(resp, "sent", None),
"f": environ.get("HTTP_REFERER", "-"),
"a": environ.get("HTTP_USER_AGENT", "-"),
"T": request_time.seconds,
"D": (request_time.seconds * 1000000) + request_time.microseconds,
"M": (request_time.seconds * 1000) + int(request_time.microseconds / 1000),
"L": "%d.%06d" % (request_time.seconds, request_time.microseconds),
"p": "<%s>" % os.getpid(),
configs: fixed IP extraction in gunicorn
r1025 }
# add request headers
switch to ruff from black
r1170 if hasattr(req, "headers"):
configs: fixed IP extraction in gunicorn
r1025 req_headers = req.headers
else:
req_headers = req
if hasattr(req_headers, "items"):
req_headers = req_headers.items()
atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
resp_headers = resp.headers
if hasattr(resp_headers, "items"):
resp_headers = resp_headers.items()
# add response headers
atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
# add environ variables
environ_variables = environ.items()
atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
return atoms
config: added example gunicorn configuration
r476
configs: moved all gunicorn config to python file
r1143
config: added example gunicorn configuration
r476 logger_class = RhodeCodeLogger