# HG changeset patch # User RhodeCode Admin # Date 2022-12-21 13:55:03 # Node ID e4b422d50460998bdd30f38e93eac000a450660c # Parent 252be6a783d21cbf58aff1b5a325d65521258710 configs: fixed IP extraction in gunicorn diff --git a/configs/gunicorn_config.py b/configs/gunicorn_config.py --- a/configs/gunicorn_config.py +++ b/configs/gunicorn_config.py @@ -11,6 +11,7 @@ import time import threading import traceback import random +import socket from gunicorn.glogging import Logger @@ -254,6 +255,74 @@ def post_request(worker, req, environ, r _check_memory_usage(worker) +def _filter_proxy(ip): + """ + Passed in IP addresses in HEADERS can be in a special format of multiple + ips. Those comma separated IPs are passed from various proxies in the + chain of request processing. The left-most being the original client. + We only care about the first IP which came from the org. client. + + :param ip: ip string from headers + """ + if ',' in ip: + _ips = ip.split(',') + _first_ip = _ips[0].strip() + return _first_ip + return ip + + +def _filter_port(ip): + """ + Removes a port from ip, there are 4 main cases to handle here. + - ipv4 eg. 127.0.0.1 + - ipv6 eg. ::1 + - ipv4+port eg. 127.0.0.1:8080 + - ipv6+port eg. [::1]:8080 + + :param ip: + """ + def is_ipv6(ip_addr): + if hasattr(socket, 'inet_pton'): + try: + socket.inet_pton(socket.AF_INET6, ip_addr) + except socket.error: + return False + else: + return False + return True + + if ':' not in ip: # must be ipv4 pure ip + return ip + + if '[' in ip and ']' in ip: # ipv6 with port + return ip.split(']')[0][1:].lower() + + # must be ipv6 or ipv4 with port + if is_ipv6(ip): + return ip + else: + ip, _port = ip.split(':')[:2] # means ipv4+port + return ip + + +def get_ip_addr(environ): + proxy_key = 'HTTP_X_REAL_IP' + proxy_key2 = 'HTTP_X_FORWARDED_FOR' + def_key = 'REMOTE_ADDR' + _filters = lambda x: _filter_port(_filter_proxy(x)) + + ip = environ.get(proxy_key) + if ip: + return _filters(ip) + + ip = environ.get(proxy_key2) + if ip: + return _filters(ip) + + ip = environ.get(def_key, '0.0.0.0') + return _filters(ip) + + class RhodeCodeLogger(Logger): """ Custom Logger that allows some customization that gunicorn doesn't allow @@ -270,5 +339,58 @@ class RhodeCodeLogger(Logger): msecs = int((now - long(now)) * 1000) return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs) + def atoms(self, resp, req, environ, request_time): + """ Gets atoms for log formatting. + """ + status = resp.status + if isinstance(status, str): + status = status.split(None, 1)[0] + atoms = { + 'h': get_ip_addr(environ), + 'l': '-', + 'u': self._get_user(environ) or '-', + 't': self.now(), + 'r': "%s %s %s" % (environ['REQUEST_METHOD'], + environ['RAW_URI'], + environ["SERVER_PROTOCOL"]), + 's': status, + 'm': environ.get('REQUEST_METHOD'), + 'U': environ.get('PATH_INFO'), + 'q': environ.get('QUERY_STRING'), + 'H': environ.get('SERVER_PROTOCOL'), + 'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-', + 'B': getattr(resp, 'sent', None), + 'f': environ.get('HTTP_REFERER', '-'), + 'a': environ.get('HTTP_USER_AGENT', '-'), + 'T': request_time.seconds, + 'D': (request_time.seconds * 1000000) + request_time.microseconds, + 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000), + 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds), + 'p': "<%s>" % os.getpid() + } + + # add request headers + if hasattr(req, 'headers'): + req_headers = req.headers + else: + req_headers = req + + if hasattr(req_headers, "items"): + req_headers = req_headers.items() + + atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers}) + + resp_headers = resp.headers + if hasattr(resp_headers, "items"): + resp_headers = resp_headers.items() + + # add response headers + atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers}) + + # add environ variables + environ_variables = environ.items() + atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables}) + + return atoms logger_class = RhodeCodeLogger