##// END OF EJS Templates
configs: improve gunicor config on worker_recycle logic
super-admin -
r5174:60fffd9c default
parent child Browse files
Show More
@@ -1,510 +1,510 b''
1 """
1 """
2 Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
2 Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
3 Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
3 Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
4 """
4 """
5
5
6 import gc
6 import gc
7 import os
7 import os
8 import sys
8 import sys
9 import math
9 import math
10 import time
10 import time
11 import threading
11 import threading
12 import traceback
12 import traceback
13 import random
13 import random
14 import socket
14 import socket
15 import dataclasses
15 import dataclasses
16 from gunicorn.glogging import Logger
16 from gunicorn.glogging import Logger
17
17
18
18
19 def get_workers():
19 def get_workers():
20 import multiprocessing
20 import multiprocessing
21 return multiprocessing.cpu_count() * 2 + 1
21 return multiprocessing.cpu_count() * 2 + 1
22
22
23
23
24 bind = "127.0.0.1:10020"
24 bind = "127.0.0.1:10020"
25
25
26
26
27 # Error logging output for gunicorn (-) is stdout
27 # Error logging output for gunicorn (-) is stdout
28 errorlog = '-'
28 errorlog = '-'
29
29
30 # Access logging output for gunicorn (-) is stdout
30 # Access logging output for gunicorn (-) is stdout
31 accesslog = '-'
31 accesslog = '-'
32
32
33
33
34 # SERVER MECHANICS
34 # SERVER MECHANICS
35 # None == system temp dir
35 # None == system temp dir
36 # worker_tmp_dir is recommended to be set to some tmpfs
36 # worker_tmp_dir is recommended to be set to some tmpfs
37 worker_tmp_dir = None
37 worker_tmp_dir = None
38 tmp_upload_dir = None
38 tmp_upload_dir = None
39
39
40 # use re-use port logic
40 # use re-use port logic
41 #reuse_port = True
41 #reuse_port = True
42
42
43 # Custom log format
43 # Custom log format
44 #access_log_format = (
44 #access_log_format = (
45 # '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
45 # '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
46
46
47 # loki format for easier parsing in grafana
47 # loki format for easier parsing in grafana
48 access_log_format = (
48 access_log_format = (
49 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
49 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
50
50
51
51
52 # Sets the number of process workers. More workers means more concurrent connections
52 # Sets the number of process workers. More workers means more concurrent connections
53 # RhodeCode can handle at the same time. Each additional worker also it increases
53 # RhodeCode can handle at the same time. Each additional worker also it increases
54 # memory usage as each has it's own set of caches.
54 # memory usage as each has it's own set of caches.
55 # Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
55 # Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
56 # than 8-10 unless for huge deployments .e.g 700-1000 users.
56 # than 8-10 unless for huge deployments .e.g 700-1000 users.
57 # `instance_id = *` must be set in the [app:main] section below (which is the default)
57 # `instance_id = *` must be set in the [app:main] section below (which is the default)
58 # when using more than 1 worker.
58 # when using more than 1 worker.
59 workers = 4
59 workers = 4
60
60
61 # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
61 # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
62 # workers = get_workers()
62 # workers = get_workers()
63
63
64 # Gunicorn access log level
64 # Gunicorn access log level
65 loglevel = 'info'
65 loglevel = 'info'
66
66
67 # Process name visible in a process list
67 # Process name visible in a process list
68 proc_name = 'rhodecode_enterprise'
68 proc_name = 'rhodecode_enterprise'
69
69
70 # Type of worker class, one of `sync`, `gevent`
70 # Type of worker class, one of `sync`, `gevent`
71 # currently `sync` is the only option allowed.
71 # currently `sync` is the only option allowed.
72 worker_class = 'gevent'
72 worker_class = 'gevent'
73
73
74 # The maximum number of simultaneous clients. Valid only for gevent
74 # The maximum number of simultaneous clients. Valid only for gevent
75 worker_connections = 10
75 worker_connections = 10
76
76
77 # Max number of requests that worker will handle before being gracefully restarted.
77 # Max number of requests that worker will handle before being gracefully restarted.
78 # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
78 # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
79 max_requests = 2000
79 max_requests = 2000
80 max_requests_jitter = 30
80 max_requests_jitter = int(max_requests * 0.2) # 20% of max_requests
81
81
82 # The maximum number of pending connections.
82 # The maximum number of pending connections.
83 # Exceeding this number results in the client getting an error when attempting to connect.
83 # Exceeding this number results in the client getting an error when attempting to connect.
84 backlog = 64
84 backlog = 64
85
85
86 # Amount of time a worker can spend with handling a request before it
86 # The Amount of time a worker can spend with handling a request before it
87 # gets killed and restarted. By default set to 21600 (6hrs)
87 # gets killed and restarted. By default, set to 21600 (6hrs)
88 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
88 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
89 timeout = 21600
89 timeout = 21600
90
90
91 # The maximum size of HTTP request line in bytes.
91 # The maximum size of HTTP request line in bytes.
92 # 0 for unlimited
92 # 0 for unlimited
93 limit_request_line = 0
93 limit_request_line = 0
94
94
95 # Limit the number of HTTP headers fields in a request.
95 # Limit the number of HTTP headers fields in a request.
96 # By default this value is 100 and can't be larger than 32768.
96 # By default this value is 100 and can't be larger than 32768.
97 limit_request_fields = 32768
97 limit_request_fields = 32768
98
98
99 # Limit the allowed size of an HTTP request header field.
99 # Limit the allowed size of an HTTP request header field.
100 # Value is a positive number or 0.
100 # Value is a positive number or 0.
101 # Setting it to 0 will allow unlimited header field sizes.
101 # Setting it to 0 will allow unlimited header field sizes.
102 limit_request_field_size = 0
102 limit_request_field_size = 0
103
103
104 # Timeout for graceful workers restart.
104 # Timeout for graceful workers restart.
105 # After receiving a restart signal, workers have this much time to finish
105 # After receiving a restart signal, workers have this much time to finish
106 # serving requests. Workers still alive after the timeout (starting from the
106 # serving requests. Workers still alive after the timeout (starting from the
107 # receipt of the restart signal) are force killed.
107 # receipt of the restart signal) are force killed.
108 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
108 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
109 graceful_timeout = 21600
109 graceful_timeout = 21600
110
110
111 # The number of seconds to wait for requests on a Keep-Alive connection.
111 # The number of seconds to wait for requests on a Keep-Alive connection.
112 # Generally set in the 1-5 seconds range.
112 # Generally set in the 1-5 seconds range.
113 keepalive = 2
113 keepalive = 2
114
114
115 # Maximum memory usage that each worker can use before it will receive a
115 # Maximum memory usage that each worker can use before it will receive a
116 # graceful restart signal 0 = memory monitoring is disabled
116 # graceful restart signal 0 = memory monitoring is disabled
117 # Examples: 268435456 (256MB), 536870912 (512MB)
117 # Examples: 268435456 (256MB), 536870912 (512MB)
118 # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
118 # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
119 # Dynamic formula 1024 * 1024 * 256 == 256MBs
119 # Dynamic formula 1024 * 1024 * 256 == 256MBs
120 memory_max_usage = 0
120 memory_max_usage = 0
121
121
122 # How often in seconds to check for memory usage for each gunicorn worker
122 # How often in seconds to check for memory usage for each gunicorn worker
123 memory_usage_check_interval = 60
123 memory_usage_check_interval = 60
124
124
125 # Threshold value for which we don't recycle worker if GarbageCollection
125 # Threshold value for which we don't recycle worker if GarbageCollection
126 # frees up enough resources. Before each restart, we try to run GC on worker
126 # frees up enough resources. Before each restart, we try to run GC on worker
127 # in case we get enough free memory after that, restart will not happen.
127 # in case we get enough free memory after that; restart will not happen.
128 memory_usage_recovery_threshold = 0.8
128 memory_usage_recovery_threshold = 0.8
129
129
130
130
131 @dataclasses.dataclass
131 @dataclasses.dataclass
132 class MemoryCheckConfig:
132 class MemoryCheckConfig:
133 max_usage: int
133 max_usage: int
134 check_interval: int
134 check_interval: int
135 recovery_threshold: float
135 recovery_threshold: float
136
136
137
137
138 def _get_process_rss(pid=None):
138 def _get_process_rss(pid=None):
139 try:
139 try:
140 import psutil
140 import psutil
141 if pid:
141 if pid:
142 proc = psutil.Process(pid)
142 proc = psutil.Process(pid)
143 else:
143 else:
144 proc = psutil.Process()
144 proc = psutil.Process()
145 return proc.memory_info().rss
145 return proc.memory_info().rss
146 except Exception:
146 except Exception:
147 return None
147 return None
148
148
149
149
150 def _get_config(ini_path):
150 def _get_config(ini_path):
151 import configparser
151 import configparser
152
152
153 try:
153 try:
154 config = configparser.RawConfigParser()
154 config = configparser.RawConfigParser()
155 config.read(ini_path)
155 config.read(ini_path)
156 return config
156 return config
157 except Exception:
157 except Exception:
158 return None
158 return None
159
159
160
160
161 def get_memory_usage_params(config=None):
161 def get_memory_usage_params(config=None):
162 # memory spec defaults
162 # memory spec defaults
163 _memory_max_usage = memory_max_usage
163 _memory_max_usage = memory_max_usage
164 _memory_usage_check_interval = memory_usage_check_interval
164 _memory_usage_check_interval = memory_usage_check_interval
165 _memory_usage_recovery_threshold = memory_usage_recovery_threshold
165 _memory_usage_recovery_threshold = memory_usage_recovery_threshold
166
166
167 if config:
167 if config:
168 ini_path = os.path.abspath(config)
168 ini_path = os.path.abspath(config)
169 conf = _get_config(ini_path)
169 conf = _get_config(ini_path)
170
170
171 section = 'server:main'
171 section = 'server:main'
172 if conf and conf.has_section(section):
172 if conf and conf.has_section(section):
173
173
174 if conf.has_option(section, 'memory_max_usage'):
174 if conf.has_option(section, 'memory_max_usage'):
175 _memory_max_usage = conf.getint(section, 'memory_max_usage')
175 _memory_max_usage = conf.getint(section, 'memory_max_usage')
176
176
177 if conf.has_option(section, 'memory_usage_check_interval'):
177 if conf.has_option(section, 'memory_usage_check_interval'):
178 _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
178 _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
179
179
180 if conf.has_option(section, 'memory_usage_recovery_threshold'):
180 if conf.has_option(section, 'memory_usage_recovery_threshold'):
181 _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
181 _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
182
182
183 _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
183 _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
184 or _memory_max_usage)
184 or _memory_max_usage)
185 _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
185 _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
186 or _memory_usage_check_interval)
186 or _memory_usage_check_interval)
187 _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
187 _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
188 or _memory_usage_recovery_threshold)
188 or _memory_usage_recovery_threshold)
189
189
190 return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
190 return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
191
191
192
192
193 def _time_with_offset(check_interval):
193 def _time_with_offset(check_interval):
194 return time.time() - random.randint(0, check_interval/2.0)
194 return time.time() - random.randint(0, check_interval/2.0)
195
195
196
196
197 def pre_fork(server, worker):
197 def pre_fork(server, worker):
198 pass
198 pass
199
199
200
200
201 def post_fork(server, worker):
201 def post_fork(server, worker):
202
202
203 memory_conf = get_memory_usage_params()
203 memory_conf = get_memory_usage_params()
204 _memory_max_usage = memory_conf.max_usage
204 _memory_max_usage = memory_conf.max_usage
205 _memory_usage_check_interval = memory_conf.check_interval
205 _memory_usage_check_interval = memory_conf.check_interval
206 _memory_usage_recovery_threshold = memory_conf.recovery_threshold
206 _memory_usage_recovery_threshold = memory_conf.recovery_threshold
207
207
208 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
208 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
209 or _memory_max_usage)
209 or _memory_max_usage)
210 worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
210 worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
211 or _memory_usage_check_interval)
211 or _memory_usage_check_interval)
212 worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
212 worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
213 or _memory_usage_recovery_threshold)
213 or _memory_usage_recovery_threshold)
214
214
215 # register memory last check time, with some random offset so we don't recycle all
215 # register memory last check time, with some random offset so we don't recycle all
216 # at once
216 # at once
217 worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
217 worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
218
218
219 if _memory_max_usage:
219 if _memory_max_usage:
220 server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
220 server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
221 _format_data_size(_memory_max_usage))
221 _format_data_size(_memory_max_usage))
222 else:
222 else:
223 server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
223 server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
224
224
225
225
226 def pre_exec(server):
226 def pre_exec(server):
227 server.log.info("Forked child, re-executing.")
227 server.log.info("Forked child, re-executing.")
228
228
229
229
230 def on_starting(server):
230 def on_starting(server):
231 server_lbl = '{} {}'.format(server.proc_name, server.address)
231 server_lbl = '{} {}'.format(server.proc_name, server.address)
232 server.log.info("Server %s is starting.", server_lbl)
232 server.log.info("Server %s is starting.", server_lbl)
233 server.log.info('Config:')
233 server.log.info('Config:')
234 server.log.info(f"\n{server.cfg}")
234 server.log.info(f"\n{server.cfg}")
235 server.log.info(get_memory_usage_params())
235 server.log.info(get_memory_usage_params())
236
236
237
237
238 def when_ready(server):
238 def when_ready(server):
239 server.log.info("Server %s is ready. Spawning workers", server)
239 server.log.info("Server %s is ready. Spawning workers", server)
240
240
241
241
242 def on_reload(server):
242 def on_reload(server):
243 pass
243 pass
244
244
245
245
246 def _format_data_size(size, unit="B", precision=1, binary=True):
246 def _format_data_size(size, unit="B", precision=1, binary=True):
247 """Format a number using SI units (kilo, mega, etc.).
247 """Format a number using SI units (kilo, mega, etc.).
248
248
249 ``size``: The number as a float or int.
249 ``size``: The number as a float or int.
250
250
251 ``unit``: The unit name in plural form. Examples: "bytes", "B".
251 ``unit``: The unit name in plural form. Examples: "bytes", "B".
252
252
253 ``precision``: How many digits to the right of the decimal point. Default
253 ``precision``: How many digits to the right of the decimal point. Default
254 is 1. 0 suppresses the decimal point.
254 is 1. 0 suppresses the decimal point.
255
255
256 ``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
256 ``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
257 If true, use base-2 binary prefixes (kibi = Ki = 1024).
257 If true, use base-2 binary prefixes (kibi = Ki = 1024).
258
258
259 ``full_name``: If false (default), use the prefix abbreviation ("k" or
259 ``full_name``: If false (default), use the prefix abbreviation ("k" or
260 "Ki"). If true, use the full prefix ("kilo" or "kibi"). If false,
260 "Ki"). If true, use the full prefix ("kilo" or "kibi"). If false,
261 use abbreviation ("k" or "Ki").
261 use abbreviation ("k" or "Ki").
262
262
263 """
263 """
264
264
265 if not binary:
265 if not binary:
266 base = 1000
266 base = 1000
267 multiples = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
267 multiples = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
268 else:
268 else:
269 base = 1024
269 base = 1024
270 multiples = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi')
270 multiples = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi')
271
271
272 sign = ""
272 sign = ""
273 if size > 0:
273 if size > 0:
274 m = int(math.log(size, base))
274 m = int(math.log(size, base))
275 elif size < 0:
275 elif size < 0:
276 sign = "-"
276 sign = "-"
277 size = -size
277 size = -size
278 m = int(math.log(size, base))
278 m = int(math.log(size, base))
279 else:
279 else:
280 m = 0
280 m = 0
281 if m > 8:
281 if m > 8:
282 m = 8
282 m = 8
283
283
284 if m == 0:
284 if m == 0:
285 precision = '%.0f'
285 precision = '%.0f'
286 else:
286 else:
287 precision = '%%.%df' % precision
287 precision = '%%.%df' % precision
288
288
289 size = precision % (size / math.pow(base, m))
289 size = precision % (size / math.pow(base, m))
290
290
291 return '%s%s %s%s' % (sign, size.strip(), multiples[m], unit)
291 return '%s%s %s%s' % (sign, size.strip(), multiples[m], unit)
292
292
293
293
294 def _check_memory_usage(worker):
294 def _check_memory_usage(worker):
295 _memory_max_usage = worker._memory_max_usage
295 _memory_max_usage = worker._memory_max_usage
296 if not _memory_max_usage:
296 if not _memory_max_usage:
297 return
297 return
298
298
299 _memory_usage_check_interval = worker._memory_usage_check_interval
299 _memory_usage_check_interval = worker._memory_usage_check_interval
300 _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
300 _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
301
301
302 elapsed = time.time() - worker._last_memory_check_time
302 elapsed = time.time() - worker._last_memory_check_time
303 if elapsed > _memory_usage_check_interval:
303 if elapsed > _memory_usage_check_interval:
304 mem_usage = _get_process_rss()
304 mem_usage = _get_process_rss()
305 if mem_usage and mem_usage > _memory_max_usage:
305 if mem_usage and mem_usage > _memory_max_usage:
306 worker.log.info(
306 worker.log.info(
307 "memory usage %s > %s, forcing gc",
307 "memory usage %s > %s, forcing gc",
308 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
308 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
309 # Try to clean it up by forcing a full collection.
309 # Try to clean it up by forcing a full collection.
310 gc.collect()
310 gc.collect()
311 mem_usage = _get_process_rss()
311 mem_usage = _get_process_rss()
312 if mem_usage > _memory_usage_recovery_threshold:
312 if mem_usage > _memory_usage_recovery_threshold:
313 # Didn't clean up enough, we'll have to terminate.
313 # Didn't clean up enough, we'll have to terminate.
314 worker.log.warning(
314 worker.log.warning(
315 "memory usage %s > %s after gc, quitting",
315 "memory usage %s > %s after gc, quitting",
316 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
316 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
317 # This will cause worker to auto-restart itself
317 # This will cause worker to auto-restart itself
318 worker.alive = False
318 worker.alive = False
319 worker._last_memory_check_time = time.time()
319 worker._last_memory_check_time = time.time()
320
320
321
321
322 def worker_int(worker):
322 def worker_int(worker):
323 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
323 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
324
324
325 # get traceback info, on worker crash
325 # get traceback info, when a worker crashes
326 def get_thread_id(t_id):
326 def get_thread_id(t_id):
327 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
327 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
328 return id2name.get(t_id, "unknown_thread_id")
328 return id2name.get(t_id, "unknown_thread_id")
329
329
330 code = []
330 code = []
331 for thread_id, stack in sys._current_frames().items(): # noqa
331 for thread_id, stack in sys._current_frames().items(): # noqa
332 code.append(
332 code.append(
333 "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
333 "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
334 for fname, lineno, name, line in traceback.extract_stack(stack):
334 for fname, lineno, name, line in traceback.extract_stack(stack):
335 code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
335 code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
336 if line:
336 if line:
337 code.append(" %s" % (line.strip()))
337 code.append(" %s" % (line.strip()))
338 worker.log.debug("\n".join(code))
338 worker.log.debug("\n".join(code))
339
339
340
340
341 def worker_abort(worker):
341 def worker_abort(worker):
342 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
342 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
343
343
344
344
345 def worker_exit(server, worker):
345 def worker_exit(server, worker):
346 worker.log.info("pid=[%-10s] worker exit", worker.pid)
346 worker.log.info("pid=[%-10s] worker exit", worker.pid)
347
347
348
348
349 def child_exit(server, worker):
349 def child_exit(server, worker):
350 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
350 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
351
351
352
352
353 def pre_request(worker, req):
353 def pre_request(worker, req):
354 worker.start_time = time.time()
354 worker.start_time = time.time()
355 worker.log.debug(
355 worker.log.debug(
356 "GNCRN PRE WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
356 "GNCRN PRE WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
357
357
358
358
359 def post_request(worker, req, environ, resp):
359 def post_request(worker, req, environ, resp):
360 total_time = time.time() - worker.start_time
360 total_time = time.time() - worker.start_time
361 # Gunicorn sometimes has problems with reading the status_code
361 # Gunicorn sometimes has problems with reading the status_code
362 status_code = getattr(resp, 'status_code', '')
362 status_code = getattr(resp, 'status_code', '')
363 worker.log.debug(
363 worker.log.debug(
364 "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
364 "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
365 worker.nr, req.method, req.path, status_code, total_time)
365 worker.nr, req.method, req.path, status_code, total_time)
366 _check_memory_usage(worker)
366 _check_memory_usage(worker)
367
367
368
368
369 def _filter_proxy(ip):
369 def _filter_proxy(ip):
370 """
370 """
371 Passed in IP addresses in HEADERS can be in a special format of multiple
371 Passed in IP addresses in HEADERS can be in a special format of multiple
372 ips. Those comma separated IPs are passed from various proxies in the
372 ips. Those comma separated IPs are passed from various proxies in the
373 chain of request processing. The left-most being the original client.
373 chain of request processing. The left-most being the original client.
374 We only care about the first IP which came from the org. client.
374 We only care about the first IP which came from the org. client.
375
375
376 :param ip: ip string from headers
376 :param ip: ip string from headers
377 """
377 """
378 if ',' in ip:
378 if ',' in ip:
379 _ips = ip.split(',')
379 _ips = ip.split(',')
380 _first_ip = _ips[0].strip()
380 _first_ip = _ips[0].strip()
381 return _first_ip
381 return _first_ip
382 return ip
382 return ip
383
383
384
384
385 def _filter_port(ip):
385 def _filter_port(ip):
386 """
386 """
387 Removes a port from ip, there are 4 main cases to handle here.
387 Removes a port from ip, there are 4 main cases to handle here.
388 - ipv4 eg. 127.0.0.1
388 - ipv4 eg. 127.0.0.1
389 - ipv6 eg. ::1
389 - ipv6 eg. ::1
390 - ipv4+port eg. 127.0.0.1:8080
390 - ipv4+port eg. 127.0.0.1:8080
391 - ipv6+port eg. [::1]:8080
391 - ipv6+port eg. [::1]:8080
392
392
393 :param ip:
393 :param ip:
394 """
394 """
395 def is_ipv6(ip_addr):
395 def is_ipv6(ip_addr):
396 if hasattr(socket, 'inet_pton'):
396 if hasattr(socket, 'inet_pton'):
397 try:
397 try:
398 socket.inet_pton(socket.AF_INET6, ip_addr)
398 socket.inet_pton(socket.AF_INET6, ip_addr)
399 except socket.error:
399 except socket.error:
400 return False
400 return False
401 else:
401 else:
402 return False
402 return False
403 return True
403 return True
404
404
405 if ':' not in ip: # must be ipv4 pure ip
405 if ':' not in ip: # must be ipv4 pure ip
406 return ip
406 return ip
407
407
408 if '[' in ip and ']' in ip: # ipv6 with port
408 if '[' in ip and ']' in ip: # ipv6 with port
409 return ip.split(']')[0][1:].lower()
409 return ip.split(']')[0][1:].lower()
410
410
411 # must be ipv6 or ipv4 with port
411 # must be ipv6 or ipv4 with port
412 if is_ipv6(ip):
412 if is_ipv6(ip):
413 return ip
413 return ip
414 else:
414 else:
415 ip, _port = ip.split(':')[:2] # means ipv4+port
415 ip, _port = ip.split(':')[:2] # means ipv4+port
416 return ip
416 return ip
417
417
418
418
419 def get_ip_addr(environ):
419 def get_ip_addr(environ):
420 proxy_key = 'HTTP_X_REAL_IP'
420 proxy_key = 'HTTP_X_REAL_IP'
421 proxy_key2 = 'HTTP_X_FORWARDED_FOR'
421 proxy_key2 = 'HTTP_X_FORWARDED_FOR'
422 def_key = 'REMOTE_ADDR'
422 def_key = 'REMOTE_ADDR'
423
423
424 def _filters(x):
424 def _filters(x):
425 return _filter_port(_filter_proxy(x))
425 return _filter_port(_filter_proxy(x))
426
426
427 ip = environ.get(proxy_key)
427 ip = environ.get(proxy_key)
428 if ip:
428 if ip:
429 return _filters(ip)
429 return _filters(ip)
430
430
431 ip = environ.get(proxy_key2)
431 ip = environ.get(proxy_key2)
432 if ip:
432 if ip:
433 return _filters(ip)
433 return _filters(ip)
434
434
435 ip = environ.get(def_key, '0.0.0.0')
435 ip = environ.get(def_key, '0.0.0.0')
436 return _filters(ip)
436 return _filters(ip)
437
437
438
438
439 class RhodeCodeLogger(Logger):
439 class RhodeCodeLogger(Logger):
440 """
440 """
441 Custom Logger that allows some customization that gunicorn doesn't allow
441 Custom Logger that allows some customization that gunicorn doesn't allow
442 """
442 """
443
443
444 datefmt = r"%Y-%m-%d %H:%M:%S"
444 datefmt = r"%Y-%m-%d %H:%M:%S"
445
445
446 def __init__(self, cfg):
446 def __init__(self, cfg):
447 Logger.__init__(self, cfg)
447 Logger.__init__(self, cfg)
448
448
449 def now(self):
449 def now(self):
450 """ return date in RhodeCode Log format """
450 """ return date in RhodeCode Log format """
451 now = time.time()
451 now = time.time()
452 msecs = int((now - int(now)) * 1000)
452 msecs = int((now - int(now)) * 1000)
453 return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
453 return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
454
454
455 def atoms(self, resp, req, environ, request_time):
455 def atoms(self, resp, req, environ, request_time):
456 """ Gets atoms for log formatting.
456 """ Gets atoms for log formatting.
457 """
457 """
458 status = resp.status
458 status = resp.status
459 if isinstance(status, str):
459 if isinstance(status, str):
460 status = status.split(None, 1)[0]
460 status = status.split(None, 1)[0]
461 atoms = {
461 atoms = {
462 'h': get_ip_addr(environ),
462 'h': get_ip_addr(environ),
463 'l': '-',
463 'l': '-',
464 'u': self._get_user(environ) or '-',
464 'u': self._get_user(environ) or '-',
465 't': self.now(),
465 't': self.now(),
466 'r': "%s %s %s" % (environ['REQUEST_METHOD'],
466 'r': "%s %s %s" % (environ['REQUEST_METHOD'],
467 environ['RAW_URI'],
467 environ['RAW_URI'],
468 environ["SERVER_PROTOCOL"]),
468 environ["SERVER_PROTOCOL"]),
469 's': status,
469 's': status,
470 'm': environ.get('REQUEST_METHOD'),
470 'm': environ.get('REQUEST_METHOD'),
471 'U': environ.get('PATH_INFO'),
471 'U': environ.get('PATH_INFO'),
472 'q': environ.get('QUERY_STRING'),
472 'q': environ.get('QUERY_STRING'),
473 'H': environ.get('SERVER_PROTOCOL'),
473 'H': environ.get('SERVER_PROTOCOL'),
474 'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
474 'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
475 'B': getattr(resp, 'sent', None),
475 'B': getattr(resp, 'sent', None),
476 'f': environ.get('HTTP_REFERER', '-'),
476 'f': environ.get('HTTP_REFERER', '-'),
477 'a': environ.get('HTTP_USER_AGENT', '-'),
477 'a': environ.get('HTTP_USER_AGENT', '-'),
478 'T': request_time.seconds,
478 'T': request_time.seconds,
479 'D': (request_time.seconds * 1000000) + request_time.microseconds,
479 'D': (request_time.seconds * 1000000) + request_time.microseconds,
480 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
480 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
481 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
481 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
482 'p': "<%s>" % os.getpid()
482 'p': "<%s>" % os.getpid()
483 }
483 }
484
484
485 # add request headers
485 # add request headers
486 if hasattr(req, 'headers'):
486 if hasattr(req, 'headers'):
487 req_headers = req.headers
487 req_headers = req.headers
488 else:
488 else:
489 req_headers = req
489 req_headers = req
490
490
491 if hasattr(req_headers, "items"):
491 if hasattr(req_headers, "items"):
492 req_headers = req_headers.items()
492 req_headers = req_headers.items()
493
493
494 atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
494 atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
495
495
496 resp_headers = resp.headers
496 resp_headers = resp.headers
497 if hasattr(resp_headers, "items"):
497 if hasattr(resp_headers, "items"):
498 resp_headers = resp_headers.items()
498 resp_headers = resp_headers.items()
499
499
500 # add response headers
500 # add response headers
501 atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
501 atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
502
502
503 # add environ variables
503 # add environ variables
504 environ_variables = environ.items()
504 environ_variables = environ.items()
505 atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
505 atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
506
506
507 return atoms
507 return atoms
508
508
509
509
510 logger_class = RhodeCodeLogger
510 logger_class = RhodeCodeLogger
General Comments 0
You need to be logged in to leave comments. Login now