# HG changeset patch
# User RhodeCode Admin <admin@rhodecode.com>
# Date 2023-08-05 21:26:16
# Node ID d0f694ba3b24993f084c9d3ba86635f458e68b16
# Parent  e080003367e801aec8e8959ae069e0b56d4bfd5b

configs: moved gunicorn configs to python files

diff --git a/configs/development.ini b/configs/development.ini
--- a/configs/development.ini
+++ b/configs/development.ini
@@ -27,9 +27,10 @@ debug = true
 #smtp_use_ssl = true
 
 [server:main]
-; COMMON HOST/IP CONFIG
+; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
+; Host port for gunicorn are controlled by gunicorn_conf.py
 host = 127.0.0.1
-port = 5000
+port = 10020
 
 ; ##################################################
 ; WAITRESS WSGI SERVER - Recommended for Development
@@ -53,85 +54,11 @@ asyncore_use_poll = true
 ; GUNICORN APPLICATION SERVER
 ; ###########################
 
-; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
+; run with gunicorn --paste rhodecode.ini --config gunicorn_conf.py
 
 ; Module to use, this setting shouldn't be changed
 #use = egg:gunicorn#main
 
-; Sets the number of process workers. More workers means more concurrent connections
-; RhodeCode can handle at the same time. Each additional worker also it increases
-; memory usage as each has it's own set of caches.
-; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
-; than 8-10 unless for really big deployments .e.g 700-1000 users.
-; `instance_id = *` must be set in the [app:main] section below (which is the default)
-; when using more than 1 worker.
-#workers = 2
-
-; Gunicorn access log level
-#loglevel = info
-
-; Process name visible in process list
-#proc_name = rhodecode
-
-; Type of worker class, one of `sync`, `gevent`
-; Recommended type is `gevent`
-#worker_class = gevent
-
-; The maximum number of simultaneous clients per worker. Valid only for gevent
-#worker_connections = 10
-
-; The maximum number of pending connections worker will queue to handle
-#backlog = 64
-
-; Max number of requests that worker will handle before being gracefully restarted.
-; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
-#max_requests = 1000
-#max_requests_jitter = 30
-
-; Amount of time a worker can spend with handling a request before it
-; gets killed and restarted. By default set to 21600 (6hrs)
-; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-#timeout = 21600
-
-; The maximum size of HTTP request line in bytes.
-; 0 for unlimited
-#limit_request_line = 0
-
-; Limit the number of HTTP headers fields in a request.
-; By default this value is 100 and can't be larger than 32768.
-#limit_request_fields = 32768
-
-; Limit the allowed size of an HTTP request header field.
-; Value is a positive number or 0.
-; Setting it to 0 will allow unlimited header field sizes.
-#limit_request_field_size = 0
-
-; Timeout for graceful workers restart.
-; After receiving a restart signal, workers have this much time to finish
-; serving requests. Workers still alive after the timeout (starting from the
-; receipt of the restart signal) are force killed.
-; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-#graceful_timeout = 3600
-
-# The number of seconds to wait for requests on a Keep-Alive connection.
-# Generally set in the 1-5 seconds range.
-#keepalive = 2
-
-; Maximum memory usage that each worker can use before it will receive a
-; graceful restart signal 0 = memory monitoring is disabled
-; Examples: 268435456 (256MB), 536870912 (512MB)
-; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
-#memory_max_usage = 0
-
-; How often in seconds to check for memory usage for each gunicorn worker
-#memory_usage_check_interval = 60
-
-; Threshold value for which we don't recycle worker if GarbageCollection
-; frees up enough resources. Before each restart we try to run GC on worker
-; in case we get enough free memory after that, restart will not happen.
-#memory_usage_recovery_threshold = 0.8
-
-
 ; Prefix middleware for RhodeCode.
 ; recommended when using proxy setup.
 ; allows to set RhodeCode under a prefix in server.
@@ -625,6 +552,9 @@ sqlalchemy.db1.pool_recycle = 3600
 
 ; the number of connections to keep open inside the connection pool.
 ; 0 indicates no limit
+; the general calculus with gevent is:
+; if your system allows 500 concurrent greenlets (max_connections) that all do database access,
+; then increase pool size + max overflow so that they add up to 500.
 #sqlalchemy.db1.pool_size = 5
 
 ; The number of connections to allow in connection pool "overflow", that is
diff --git a/configs/gunicorn_config.py b/configs/gunicorn_config.py
--- a/configs/gunicorn_config.py
+++ b/configs/gunicorn_config.py
@@ -12,6 +12,7 @@ import threading
 import traceback
 import random
 import socket
+import dataclasses
 from gunicorn.glogging import Logger
 
 
@@ -19,8 +20,14 @@ def get_workers():
     import multiprocessing
     return multiprocessing.cpu_count() * 2 + 1
 
-# GLOBAL
+
+bind = "127.0.0.1:10020"
+
+
+# Error logging output for gunicorn (-) is stdout
 errorlog = '-'
+
+# Access logging output for gunicorn (-) is stdout
 accesslog = '-'
 
 
@@ -30,6 +37,7 @@ accesslog = '-'
 worker_tmp_dir = None
 tmp_upload_dir = None
 
+# use re-use port logic
 #reuse_port = True
 
 # Custom log format
@@ -40,9 +48,91 @@ tmp_upload_dir = None
 access_log_format = (
     'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
 
-# self adjust workers based on CPU count
+
+# Sets the number of process workers. More workers means more concurrent connections
+# RhodeCode can handle at the same time. Each additional worker also it increases
+# memory usage as each has it's own set of caches.
+# Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
+# than 8-10 unless for huge deployments .e.g 700-1000 users.
+# `instance_id = *` must be set in the [app:main] section below (which is the default)
+# when using more than 1 worker.
+workers = 4
+
+# self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
 # workers = get_workers()
 
+# Gunicorn access log level
+loglevel = 'info'
+
+# Process name visible in process list
+proc_name = 'rhodecode_enterprise'
+
+# Type of worker class, one of `sync`, `gevent`
+# currently `sync` is the only option allowed.
+worker_class = 'gevent'
+
+# The maximum number of simultaneous clients. Valid only for gevent
+worker_connections = 10
+
+# Max number of requests that worker will handle before being gracefully restarted.
+# Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
+max_requests = 2000
+max_requests_jitter = 30
+
+# The maximum number of pending connections.
+# Exceeding this number results in the client getting an error when attempting to connect.
+backlog = 64
+
+# Amount of time a worker can spend with handling a request before it
+# gets killed and restarted. By default set to 21600 (6hrs)
+# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
+timeout = 21600
+
+# The maximum size of HTTP request line in bytes.
+# 0 for unlimited
+limit_request_line = 0
+
+# Limit the number of HTTP headers fields in a request.
+# By default this value is 100 and can't be larger than 32768.
+limit_request_fields = 32768
+
+# Limit the allowed size of an HTTP request header field.
+# Value is a positive number or 0.
+# Setting it to 0 will allow unlimited header field sizes.
+limit_request_field_size = 0
+
+# Timeout for graceful workers restart.
+# After receiving a restart signal, workers have this much time to finish
+# serving requests. Workers still alive after the timeout (starting from the
+# receipt of the restart signal) are force killed.
+# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
+graceful_timeout = 21600
+
+# The number of seconds to wait for requests on a Keep-Alive connection.
+# Generally set in the 1-5 seconds range.
+keepalive = 2
+
+# Maximum memory usage that each worker can use before it will receive a
+# graceful restart signal 0 = memory monitoring is disabled
+# Examples: 268435456 (256MB), 536870912 (512MB)
+# 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
+memory_max_usage = 0
+
+# How often in seconds to check for memory usage for each gunicorn worker
+memory_usage_check_interval = 60
+
+# Threshold value for which we don't recycle worker if GarbageCollection
+# frees up enough resources. Before each restart we try to run GC on worker
+# in case we get enough free memory after that, restart will not happen.
+memory_usage_recovery_threshold = 0.8
+
+
+@dataclasses.dataclass
+class MemoryCheckConfig:
+    max_usage: int
+    check_interval: int
+    recovery_threshold: float
+
 
 def _get_process_rss(pid=None):
     try:
@@ -67,8 +157,40 @@ def _get_config(ini_path):
         return None
 
 
-def _time_with_offset(memory_usage_check_interval):
-    return time.time() - random.randint(0, memory_usage_check_interval/2.0)
+def get_memory_usage_params(config=None):
+    # memory spec defaults
+    _memory_max_usage = memory_max_usage
+    _memory_usage_check_interval = memory_usage_check_interval
+    _memory_usage_recovery_threshold = memory_usage_recovery_threshold
+
+    if config:
+        ini_path = os.path.abspath(config)
+        conf = _get_config(ini_path)
+
+        section = 'server:main'
+        if conf and conf.has_section(section):
+
+            if conf.has_option(section, 'memory_max_usage'):
+                _memory_max_usage = conf.getint(section, 'memory_max_usage')
+
+            if conf.has_option(section, 'memory_usage_check_interval'):
+                _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
+
+            if conf.has_option(section, 'memory_usage_recovery_threshold'):
+                _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
+
+    _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
+                            or _memory_max_usage)
+    _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
+                                       or _memory_usage_check_interval)
+    _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
+                                             or _memory_usage_recovery_threshold)
+
+    return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
+
+
+def _time_with_offset(check_interval):
+    return time.time() - random.randint(0, check_interval/2.0)
 
 
 def pre_fork(server, worker):
@@ -77,25 +199,10 @@ def pre_fork(server, worker):
 
 def post_fork(server, worker):
 
-    # memory spec defaults
-    _memory_max_usage = 0
-    _memory_usage_check_interval = 60
-    _memory_usage_recovery_threshold = 0.8
-
-    ini_path = os.path.abspath(server.cfg.paste)
-    conf = _get_config(ini_path)
-
-    section = 'server:main'
-    if conf and conf.has_section(section):
-
-        if conf.has_option(section, 'memory_max_usage'):
-            _memory_max_usage = conf.getint(section, 'memory_max_usage')
-
-        if conf.has_option(section, 'memory_usage_check_interval'):
-            _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
-
-        if conf.has_option(section, 'memory_usage_recovery_threshold'):
-            _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
+    memory_conf = get_memory_usage_params()
+    _memory_max_usage = memory_conf.max_usage
+    _memory_usage_check_interval = memory_conf.check_interval
+    _memory_usage_recovery_threshold = memory_conf.recovery_threshold
 
     worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
                                    or _memory_max_usage)
@@ -109,10 +216,10 @@ def post_fork(server, worker):
     worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
 
     if _memory_max_usage:
-        server.log.info("[%-10s] WORKER spawned with max memory set at %s", worker.pid,
+        server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
                         _format_data_size(_memory_max_usage))
     else:
-        server.log.info("[%-10s] WORKER spawned", worker.pid)
+        server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
 
 
 def pre_exec(server):
@@ -181,42 +288,45 @@ def _format_data_size(size, unit="B", pr
 
 
 def _check_memory_usage(worker):
-    memory_max_usage = worker._memory_max_usage
-    if not memory_max_usage:
+    _memory_max_usage = worker._memory_max_usage
+    if not _memory_max_usage:
         return
 
-    memory_usage_check_interval = worker._memory_usage_check_interval
-    memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
+    _memory_usage_check_interval = worker._memory_usage_check_interval
+    _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
 
     elapsed = time.time() - worker._last_memory_check_time
-    if elapsed > memory_usage_check_interval:
+    if elapsed > _memory_usage_check_interval:
         mem_usage = _get_process_rss()
-        if mem_usage and mem_usage > memory_max_usage:
+        if mem_usage and mem_usage > _memory_max_usage:
             worker.log.info(
                 "memory usage %s > %s, forcing gc",
-                _format_data_size(mem_usage), _format_data_size(memory_max_usage))
+                _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
             # Try to clean it up by forcing a full collection.
             gc.collect()
             mem_usage = _get_process_rss()
-            if mem_usage > memory_usage_recovery_threshold:
+            if mem_usage > _memory_usage_recovery_threshold:
                 # Didn't clean up enough, we'll have to terminate.
                 worker.log.warning(
                     "memory usage %s > %s after gc, quitting",
-                    _format_data_size(mem_usage), _format_data_size(memory_max_usage))
+                    _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                 # This will cause worker to auto-restart itself
                 worker.alive = False
         worker._last_memory_check_time = time.time()
 
 
 def worker_int(worker):
-    worker.log.info("[%-10s] worker received INT or QUIT signal", worker.pid)
+    worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
 
     # get traceback info, on worker crash
-    id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
+    def get_thread_id(t_id):
+        id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
+        return id2name.get(t_id, "unknown_thread_id")
+
     code = []
     for thread_id, stack in sys._current_frames().items():
         code.append(
-            "\n# Thread: %s(%d)" % (id2name.get(thread_id, ""), thread_id))
+            "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
         for fname, lineno, name, line in traceback.extract_stack(stack):
             code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
             if line:
@@ -225,15 +335,15 @@ def worker_int(worker):
 
 
 def worker_abort(worker):
-    worker.log.info("[%-10s] worker received SIGABRT signal", worker.pid)
+    worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
 
 
 def worker_exit(server, worker):
-    worker.log.info("[%-10s] worker exit", worker.pid)
+    worker.log.info("pid=[%-10s] worker exit", worker.pid)
 
 
 def child_exit(server, worker):
-    worker.log.info("[%-10s] worker child exit", worker.pid)
+    worker.log.info("pid=[%-10s] worker child exit", worker.pid)
 
 
 def pre_request(worker, req):
@@ -306,7 +416,9 @@ def get_ip_addr(environ):
     proxy_key = 'HTTP_X_REAL_IP'
     proxy_key2 = 'HTTP_X_FORWARDED_FOR'
     def_key = 'REMOTE_ADDR'
-    _filters = lambda x: _filter_port(_filter_proxy(x))
+
+    def _filters(x):
+        return _filter_port(_filter_proxy(x))
 
     ip = environ.get(proxy_key)
     if ip:
@@ -390,4 +502,5 @@ class RhodeCodeLogger(Logger):
 
         return atoms
 
+
 logger_class = RhodeCodeLogger
diff --git a/configs/production.ini b/configs/production.ini
--- a/configs/production.ini
+++ b/configs/production.ini
@@ -27,94 +27,21 @@ debug = false
 #smtp_use_ssl = true
 
 [server:main]
-; COMMON HOST/IP CONFIG
+; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
+; Host port for gunicorn are controlled by gunicorn_conf.py
 host = 127.0.0.1
-port = 5000
+port = 10020
 
 
 ; ###########################
 ; GUNICORN APPLICATION SERVER
 ; ###########################
 
-; run with gunicorn --paste rhodecode.ini
+; run with gunicorn --paste rhodecode.ini --config gunicorn_conf.py
 
 ; Module to use, this setting shouldn't be changed
 use = egg:gunicorn#main
 
-; Sets the number of process workers. More workers means more concurrent connections
-; RhodeCode can handle at the same time. Each additional worker also it increases
-; memory usage as each has it's own set of caches.
-; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
-; than 8-10 unless for really big deployments .e.g 700-1000 users.
-; `instance_id = *` must be set in the [app:main] section below (which is the default)
-; when using more than 1 worker.
-workers = 2
-
-; Gunicorn access log level
-loglevel = info
-
-; Process name visible in process list
-proc_name = rhodecode
-
-; Type of worker class, one of `sync`, `gevent`
-; Recommended type is `gevent`
-worker_class = gevent
-
-; The maximum number of simultaneous clients per worker. Valid only for gevent
-worker_connections = 10
-
-; The maximum number of pending connections worker will queue to handle
-backlog = 64
-
-; Max number of requests that worker will handle before being gracefully restarted.
-; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
-max_requests = 1000
-max_requests_jitter = 30
-
-; Amount of time a worker can spend with handling a request before it
-; gets killed and restarted. By default set to 21600 (6hrs)
-; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-timeout = 21600
-
-; The maximum size of HTTP request line in bytes.
-; 0 for unlimited
-limit_request_line = 0
-
-; Limit the number of HTTP headers fields in a request.
-; By default this value is 100 and can't be larger than 32768.
-limit_request_fields = 32768
-
-; Limit the allowed size of an HTTP request header field.
-; Value is a positive number or 0.
-; Setting it to 0 will allow unlimited header field sizes.
-limit_request_field_size = 0
-
-; Timeout for graceful workers restart.
-; After receiving a restart signal, workers have this much time to finish
-; serving requests. Workers still alive after the timeout (starting from the
-; receipt of the restart signal) are force killed.
-; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-graceful_timeout = 3600
-
-# The number of seconds to wait for requests on a Keep-Alive connection.
-# Generally set in the 1-5 seconds range.
-keepalive = 2
-
-; Maximum memory usage that each worker can use before it will receive a
-; graceful restart signal 0 = memory monitoring is disabled
-; Examples: 268435456 (256MB), 536870912 (512MB)
-; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
-memory_max_usage = 0
-
-; How often in seconds to check for memory usage for each gunicorn worker
-memory_usage_check_interval = 60
-
-; Threshold value for which we don't recycle worker if GarbageCollection
-; frees up enough resources. Before each restart we try to run GC on worker
-; in case we get enough free memory after that, restart will not happen.
-memory_usage_recovery_threshold = 0.8
-
-
 ; Prefix middleware for RhodeCode.
 ; recommended when using proxy setup.
 ; allows to set RhodeCode under a prefix in server.