appenlight Commit - r175:765d965d · RhodeCode Free Hosting

reformat: black

ergo -

r175:765d965d

parent child

backend/src/appenlight/celery/tasks.py

0 +5 -7

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             import bisect
             import collections
             import math
             from datetime import datetime, timedelta
             import sqlalchemy as sa
             import elasticsearch.exceptions
             import elasticsearch.helpers
             from celery.utils.log import get_task_logger
             from zope.sqlalchemy import mark_changed
             from pyramid.threadlocal import get_current_request, get_current_registry
             from ziggurat_foundations.models.services.resource import ResourceService
             from appenlight.celery import celery
             from appenlight.models.report_group import ReportGroup
             from appenlight.models import DBSession, Datastores
             from appenlight.models.report import Report
             from appenlight.models.log import Log
             from appenlight.models.metric import Metric
             from appenlight.models.event import Event
             from appenlight.models.services.application import ApplicationService
             from appenlight.models.services.event import EventService
             from appenlight.models.services.log import LogService
             from appenlight.models.services.report import ReportService
             from appenlight.models.services.report_group import ReportGroupService
             from appenlight.models.services.user import UserService
             from appenlight.models.tag import Tag
             from appenlight.lib import print_traceback
             from appenlight.lib.utils import parse_proto, in_batches
             from appenlight.lib.ext_json import json
             from appenlight.lib.redis_keys import REDIS_KEYS
             from appenlight.lib.enums import ReportType
             log = get_task_logger(__name__)
             sample_boundries = (
                 list(range(100, 1000, 100))
                 + list(range(1000, 10000, 1000))
                 + list(range(10000, 100000, 5000))
             )
             def pick_sample(total_occurences, report_type=None):
                 every = 1.0
                 position = bisect.bisect_left(sample_boundries, total_occurences)
                 if position > 0:
                     if report_type == ReportType.not_found:
                         divide = 10.0
                     else:
                         divide = 100.0
                     every = sample_boundries[position - 1] / divide
                 return total_occurences % every == 0
             @celery.task(queue="default", default_retry_delay=1, max_retries=2)
             def test_exception_task():
                 log.error("test celery log", extra={"location": "celery"})
                 log.warning("test celery log", extra={"location": "celery"})
                 raise Exception("Celery exception test")
             @celery.task(queue="default", default_retry_delay=1, max_retries=2)
             def test_retry_exception_task():
                 try:
                     import time
                     time.sleep(1.3)
                     log.error("test retry celery log", extra={"location": "celery"})
                     log.warning("test retry celery log", extra={"location": "celery"})
                     raise Exception("Celery exception test")
                 except Exception as exc:
                     if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                         raise
                     test_retry_exception_task.retry(exc=exc)
             @celery.task(queue="reports", default_retry_delay=600, max_retries=144)
             def add_reports(resource_id, request_params, dataset, **kwargs):
                 proto_version = parse_proto(request_params.get("protocol_version", ""))
                 current_time = datetime.utcnow().replace(second=0, microsecond=0)
                 try:
                     # we will store solr docs here for single insert
                     es_report_docs = {}
                     es_report_group_docs = {}
                     resource = ApplicationService.by_id(resource_id)
                     tags = []
                     es_slow_calls_docs = {}
                     es_reports_stats_rows = {}
                     for report_data in dataset:
                         # build report details for later
                         added_details = 0
                         report = Report()
                         report.set_data(report_data, resource, proto_version)
                         report._skip_ft_index = True
                         # find latest group in this months partition
                         report_group = ReportGroupService.by_hash_and_resource(
                             report.resource_id,
                             report.grouping_hash,
                             since_when=datetime.utcnow().date().replace(day=1),
                         )
                         occurences = report_data.get("occurences", 1)
                         if not report_group:
                             # total reports will be +1 moment later
                             report_group = ReportGroup(
                                 grouping_hash=report.grouping_hash,
                                 occurences=0,
                                 total_reports=0,
                                 last_report=0,
                                 priority=report.priority,
                                 error=report.error,
                                 first_timestamp=report.start_time,
                             )
                             report_group._skip_ft_index = True
                             report_group.report_type = report.report_type
                         report.report_group_time = report_group.first_timestamp
                         add_sample = pick_sample(
                             report_group.occurences, report_type=report_group.report_type
                         )
                         if add_sample:
                             resource.report_groups.append(report_group)
                             report_group.reports.append(report)
                             added_details += 1
                             DBSession.flush()
                             if report.partition_id not in es_report_docs:
                                 es_report_docs[report.partition_id] = []
                             es_report_docs[report.partition_id].append(report.es_doc())
                             tags.extend(list(report.tags.items()))
                             slow_calls = report.add_slow_calls(report_data, report_group)
                             DBSession.flush()
                             for s_call in slow_calls:
                                 if s_call.partition_id not in es_slow_calls_docs:
                                     es_slow_calls_docs[s_call.partition_id] = []
                                 es_slow_calls_docs[s_call.partition_id].append(s_call.es_doc())
                                 # try generating new stat rows if needed
                         else:
                             # required for postprocessing to not fail later
                             report.report_group = report_group
                         stat_row = ReportService.generate_stat_rows(report, resource, report_group)
                         if stat_row.partition_id not in es_reports_stats_rows:
                             es_reports_stats_rows[stat_row.partition_id] = []
                         es_reports_stats_rows[stat_row.partition_id].append(stat_row.es_doc())
                         # see if we should mark 10th occurence of report
                         last_occurences_10 = int(math.floor(report_group.occurences / 10))
                         curr_occurences_10 = int(
                             math.floor((report_group.occurences + report.occurences) / 10)
                         )
                         last_occurences_100 = int(math.floor(report_group.occurences / 100))
                         curr_occurences_100 = int(
                             math.floor((report_group.occurences + report.occurences) / 100)
                         )
                         notify_occurences_10 = last_occurences_10 != curr_occurences_10
                         notify_occurences_100 = last_occurences_100 != curr_occurences_100
                         report_group.occurences = ReportGroup.occurences + occurences
                         report_group.last_timestamp = report.start_time
                         report_group.summed_duration = ReportGroup.summed_duration + report.duration
                         summed_duration = ReportGroup.summed_duration + report.duration
                         summed_occurences = ReportGroup.occurences + occurences
                         report_group.average_duration = summed_duration / summed_occurences
                         report_group.run_postprocessing(report)
                         if added_details:
                             report_group.total_reports = ReportGroup.total_reports + 1
                             report_group.last_report = report.id
                         report_group.set_notification_info(
                             notify_10=notify_occurences_10, notify_100=notify_occurences_100
                         )
                         DBSession.flush()
                         report_group.get_report().notify_channel(report_group)
                         if report_group.partition_id not in es_report_group_docs:
                             es_report_group_docs[report_group.partition_id] = []
                         es_report_group_docs[report_group.partition_id].append(
                             report_group.es_doc()
                         )
                         action = "REPORT"
                         log_msg = "%s: %s %s, client: %s, proto: %s" % (
                             action,
                             report_data.get("http_status", "unknown"),
                             str(resource),
                             report_data.get("client"),
                             proto_version,
                         )
                         log.info(log_msg)
                     total_reports = len(dataset)
                     redis_pipeline = Datastores.redis.pipeline(transaction=False)
                     key = REDIS_KEYS["counters"]["reports_per_minute"].format(current_time)
                     redis_pipeline.incr(key, total_reports)
                     redis_pipeline.expire(key, 3600 * 24)
                     key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                         resource.owner_user_id, current_time
                     )
                     redis_pipeline.incr(key, total_reports)
                     redis_pipeline.expire(key, 3600)
                     key = REDIS_KEYS["counters"]["reports_per_hour_per_app"].format(
                         resource_id, current_time.replace(minute=0)
                     )
                     redis_pipeline.incr(key, total_reports)
                     redis_pipeline.expire(key, 3600 * 24 * 7)
                     redis_pipeline.sadd(
                         REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                             current_time.replace(minute=0)
                         ),
                         resource_id,
                     )
                     redis_pipeline.execute()
                     add_reports_es(es_report_group_docs, es_report_docs)
                     add_reports_slow_calls_es(es_slow_calls_docs)
                     add_reports_stats_rows_es(es_reports_stats_rows)
                     return True
                 except Exception as exc:
                     print_traceback(log)
                     if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                         raise
                     add_reports.retry(exc=exc)
             @celery.task(queue="es", default_retry_delay=600, max_retries=144)
             def add_reports_es(report_group_docs, report_docs):
                 for k, v in report_group_docs.items():
                     to_update = {"_index": k, "_type": "report"}
                     [i.update(to_update) for i in v]
                     elasticsearch.helpers.bulk(Datastores.es, v)
                 for k, v in report_docs.items():
                     to_update = {"_index": k, "_type": "report"}
                     [i.update(to_update) for i in v]
                     elasticsearch.helpers.bulk(Datastores.es, v)
             @celery.task(queue="es", default_retry_delay=600, max_retries=144)
             def add_reports_slow_calls_es(es_docs):
                 for k, v in es_docs.items():
                     to_update = {"_index": k, "_type": "log"}
                     [i.update(to_update) for i in v]
                     elasticsearch.helpers.bulk(Datastores.es, v)
             @celery.task(queue="es", default_retry_delay=600, max_retries=144)
             def add_reports_stats_rows_es(es_docs):
                 for k, v in es_docs.items():
                     to_update = {"_index": k, "_type": "report"}
                     [i.update(to_update) for i in v]
                     elasticsearch.helpers.bulk(Datastores.es, v)
             @celery.task(queue="logs", default_retry_delay=600, max_retries=144)
             def add_logs(resource_id, request_params, dataset, **kwargs):
                 proto_version = request_params.get("protocol_version")
                 current_time = datetime.utcnow().replace(second=0, microsecond=0)
                 try:
                     es_docs = collections.defaultdict(list)
                     resource = ApplicationService.by_id_cached()(resource_id)
                     resource = DBSession.merge(resource, load=False)
                     ns_pairs = []
                     for entry in dataset:
                         # gather pk and ns so we can remove older versions of row later
                         if entry["primary_key"] is not None:
                             ns_pairs.append({"pk": entry["primary_key"], "ns": entry["namespace"]})
                         log_entry = Log()
                         log_entry.set_data(entry, resource=resource)
                         log_entry._skip_ft_index = True
                         resource.logs.append(log_entry)
                         DBSession.flush()
                         # insert non pk rows first
                         if entry["primary_key"] is None:
                             es_docs[log_entry.partition_id].append(log_entry.es_doc())
                     # 2nd pass to delete all log entries from db for same pk/ns pair
                     if ns_pairs:
                         ids_to_delete = []
                         es_docs = collections.defaultdict(list)
                         es_docs_to_delete = collections.defaultdict(list)
                         found_pkey_logs = LogService.query_by_primary_key_and_namespace(
                             list_of_pairs=ns_pairs
                         )
                         log_dict = {}
                         for log_entry in found_pkey_logs:
                             log_key = (log_entry.primary_key, log_entry.namespace)
                             if log_key not in log_dict:
                                 log_dict[log_key] = []
                             log_dict[log_key].append(log_entry)
                         for ns, entry_list in log_dict.items():
                             entry_list = sorted(entry_list, key=lambda x: x.timestamp)
                             # newest row needs to be indexed in es
                             log_entry = entry_list[-1]
                             # delete everything from pg and ES, leave the last row in pg
                             for e in entry_list[:-1]:
                                 ids_to_delete.append(e.log_id)
                                 es_docs_to_delete[e.partition_id].append(e.delete_hash)
                             es_docs_to_delete[log_entry.partition_id].append(log_entry.delete_hash)
                             es_docs[log_entry.partition_id].append(log_entry.es_doc())
                         if ids_to_delete:
                             query = DBSession.query(Log).filter(Log.log_id.in_(ids_to_delete))
                             query.delete(synchronize_session=False)
                         if es_docs_to_delete:
                             # batch this to avoid problems with default ES bulk limits
                             for es_index in es_docs_to_delete.keys():
                                 for batch in in_batches(es_docs_to_delete[es_index], 20):
                                     query = {"query": {"terms": {"delete_hash": batch}}}
                                     try:
                                         Datastores.es.delete_by_query(
-                                            index=es_index, doc_type="log",
+                                            index=es_index,
-                                            body=query, conflicts="proceed"
+                                            doc_type="log",
+                                            body=query,
+                                            conflicts="proceed",
                                         )
                                     except elasticsearch.exceptions.NotFoundError as exc:
                                         msg = "skipping index {}".format(es_index)
                                         log.info(msg)
                     total_logs = len(dataset)
                     log_msg = "LOG_NEW: %s, entries: %s, proto:%s" % (
                         str(resource),
                         total_logs,
                         proto_version,
                     )
                     log.info(log_msg)
                     # mark_changed(session)
                     redis_pipeline = Datastores.redis.pipeline(transaction=False)
                     key = REDIS_KEYS["counters"]["logs_per_minute"].format(current_time)
                     redis_pipeline.incr(key, total_logs)
                     redis_pipeline.expire(key, 3600 * 24)
                     key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                         resource.owner_user_id, current_time
                     )
                     redis_pipeline.incr(key, total_logs)
                     redis_pipeline.expire(key, 3600)
                     key = REDIS_KEYS["counters"]["logs_per_hour_per_app"].format(
                         resource_id, current_time.replace(minute=0)
                     )
                     redis_pipeline.incr(key, total_logs)
                     redis_pipeline.expire(key, 3600 * 24 * 7)
                     redis_pipeline.sadd(
                         REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                             current_time.replace(minute=0)
                         ),
                         resource_id,
                     )
                     redis_pipeline.execute()
                     add_logs_es(es_docs)
                     return True
                 except Exception as exc:
                     print_traceback(log)
                     if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                         raise
                     add_logs.retry(exc=exc)
             @celery.task(queue="es", default_retry_delay=600, max_retries=144)
             def add_logs_es(es_docs):
                 for k, v in es_docs.items():
                     to_update = {"_index": k, "_type": "log"}
                     [i.update(to_update) for i in v]
                     elasticsearch.helpers.bulk(Datastores.es, v)
             @celery.task(queue="metrics", default_retry_delay=600, max_retries=144)
             def add_metrics(resource_id, request_params, dataset, proto_version):
                 current_time = datetime.utcnow().replace(second=0, microsecond=0)
                 try:
                     resource = ApplicationService.by_id_cached()(resource_id)
                     resource = DBSession.merge(resource, load=False)
                     es_docs = []
                     rows = []
                     for metric in dataset:
                         tags = dict(metric["tags"])
                         server_n = tags.get("server_name", metric["server_name"]).lower()
                         tags["server_name"] = server_n or "unknown"
                         new_metric = Metric(
                             timestamp=metric["timestamp"],
                             resource_id=resource.resource_id,
                             namespace=metric["namespace"],
                             tags=tags,
                         )
                         rows.append(new_metric)
                         es_docs.append(new_metric.es_doc())
                     session = DBSession()
                     session.bulk_save_objects(rows)
                     session.flush()
                     action = "METRICS"
                     metrics_msg = "%s: %s, metrics: %s, proto:%s" % (
                         action,
                         str(resource),
                         len(dataset),
                         proto_version,
                     )
                     log.info(metrics_msg)
                     mark_changed(session)
                     redis_pipeline = Datastores.redis.pipeline(transaction=False)
                     key = REDIS_KEYS["counters"]["metrics_per_minute"].format(current_time)
                     redis_pipeline.incr(key, len(rows))
                     redis_pipeline.expire(key, 3600 * 24)
                     key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                         resource.owner_user_id, current_time
                     )
                     redis_pipeline.incr(key, len(rows))
                     redis_pipeline.expire(key, 3600)
                     key = REDIS_KEYS["counters"]["metrics_per_hour_per_app"].format(
                         resource_id, current_time.replace(minute=0)
                     )
                     redis_pipeline.incr(key, len(rows))
                     redis_pipeline.expire(key, 3600 * 24 * 7)
                     redis_pipeline.sadd(
                         REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                             current_time.replace(minute=0)
                         ),
                         resource_id,
                     )
                     redis_pipeline.execute()
                     add_metrics_es(es_docs)
                     return True
                 except Exception as exc:
                     print_traceback(log)
                     if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                         raise
                     add_metrics.retry(exc=exc)
             @celery.task(queue="es", default_retry_delay=600, max_retries=144)
             def add_metrics_es(es_docs):
                 for doc in es_docs:
                     partition = "rcae_m_%s" % doc["timestamp"].strftime("%Y_%m_%d")
                     Datastores.es.index(partition, "log", doc)
             @celery.task(queue="default", default_retry_delay=5, max_retries=2)
             def check_user_report_notifications(resource_id):
                 since_when = datetime.utcnow()
                 try:
                     request = get_current_request()
                     application = ApplicationService.by_id(resource_id)
                     if not application:
                         return
                     error_key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                         ReportType.error, resource_id
                     )
                     slow_key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                         ReportType.slow, resource_id
                     )
                     error_group_ids = Datastores.redis.smembers(error_key)
                     slow_group_ids = Datastores.redis.smembers(slow_key)
                     Datastores.redis.delete(error_key)
                     Datastores.redis.delete(slow_key)
                     err_gids = [int(g_id) for g_id in error_group_ids]
                     slow_gids = [int(g_id) for g_id in list(slow_group_ids)]
                     group_ids = err_gids + slow_gids
                     occurence_dict = {}
                     for g_id in group_ids:
                         key = REDIS_KEYS["counters"]["report_group_occurences"].format(g_id)
                         val = Datastores.redis.get(key)
                         Datastores.redis.delete(key)
                         if val:
                             occurence_dict[g_id] = int(val)
                         else:
                             occurence_dict[g_id] = 1
                     report_groups = ReportGroupService.by_ids(group_ids)
                     report_groups.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                     ApplicationService.check_for_groups_alert(
                         application,
                         "alert",
                         report_groups=report_groups,
                         occurence_dict=occurence_dict,
                     )
                     users = set(
                         [p.user for p in ResourceService.users_for_perm(application, "view")]
                     )
                     report_groups = report_groups.all()
                     for user in users:
                         UserService.report_notify(
                             user,
                             request,
                             application,
                             report_groups=report_groups,
                             occurence_dict=occurence_dict,
                         )
                     for group in report_groups:
                         # marks report_groups as notified
                         if not group.notified:
                             group.notified = True
                 except Exception as exc:
                     print_traceback(log)
                     raise
             @celery.task(queue="default", default_retry_delay=5, max_retries=2)
             def check_alerts(resource_id):
                 since_when = datetime.utcnow()
                 try:
                     request = get_current_request()
                     application = ApplicationService.by_id(resource_id)
                     if not application:
                         return
                     error_key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                         ReportType.error, resource_id
                     )
                     slow_key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                         ReportType.slow, resource_id
                     )
                     error_group_ids = Datastores.redis.smembers(error_key)
                     slow_group_ids = Datastores.redis.smembers(slow_key)
                     Datastores.redis.delete(error_key)
                     Datastores.redis.delete(slow_key)
                     err_gids = [int(g_id) for g_id in error_group_ids]
                     slow_gids = [int(g_id) for g_id in list(slow_group_ids)]
                     group_ids = err_gids + slow_gids
                     occurence_dict = {}
                     for g_id in group_ids:
                         key = REDIS_KEYS["counters"]["report_group_occurences_alerting"].format(
                             g_id
                         )
                         val = Datastores.redis.get(key)
                         Datastores.redis.delete(key)
                         if val:
                             occurence_dict[g_id] = int(val)
                         else:
                             occurence_dict[g_id] = 1
                     report_groups = ReportGroupService.by_ids(group_ids)
                     report_groups.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                     ApplicationService.check_for_groups_alert(
                         application,
                         "alert",
                         report_groups=report_groups,
                         occurence_dict=occurence_dict,
                         since_when=since_when,
                     )
                 except Exception as exc:
                     print_traceback(log)
                     raise
             @celery.task(queue="default", default_retry_delay=1, max_retries=2)
             def close_alerts():
                 log.warning("Checking alerts")
                 since_when = datetime.utcnow()
                 try:
                     event_types = [
                         Event.types["error_report_alert"],
                         Event.types["slow_report_alert"],
                     ]
                     statuses = [Event.statuses["active"]]
                     # get events older than 5 min
                     events = EventService.by_type_and_status(
                         event_types, statuses, older_than=(since_when - timedelta(minutes=5))
                     )
                     for event in events:
                         # see if we can close them
                         event.validate_or_close(since_when=(since_when - timedelta(minutes=1)))
                 except Exception as exc:
                     print_traceback(log)
                     raise
             @celery.task(queue="default", default_retry_delay=600, max_retries=144)
             def update_tag_counter(tag_name, tag_value, count):
                 try:
                     query = (
                         DBSession.query(Tag)
                         .filter(Tag.name == tag_name)
                         .filter(
                             sa.cast(Tag.value, sa.types.TEXT)
                             == sa.cast(json.dumps(tag_value), sa.types.TEXT)
                         )
                     )
                     query.update(
                         {"times_seen": Tag.times_seen + count, "last_timestamp": datetime.utcnow()},
                         synchronize_session=False,
                     )
                     session = DBSession()
                     mark_changed(session)
                     return True
                 except Exception as exc:
                     print_traceback(log)
                     if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                         raise
                     update_tag_counter.retry(exc=exc)
             @celery.task(queue="default")
             def update_tag_counters():
                 """
                 Sets task to update counters for application tags
                 """
                 tags = Datastores.redis.lrange(REDIS_KEYS["seen_tag_list"], 0, -1)
                 Datastores.redis.delete(REDIS_KEYS["seen_tag_list"])
                 c = collections.Counter(tags)
                 for t_json, count in c.items():
                     tag_info = json.loads(t_json)
                     update_tag_counter.delay(tag_info[0], tag_info[1], count)
             @celery.task(queue="default")
             def daily_digest():
                 """
                 Sends daily digest with top 50 error reports
                 """
                 request = get_current_request()
                 apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports"])
                 Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports"])
                 since_when = datetime.utcnow() - timedelta(hours=8)
                 log.warning("Generating daily digests")
                 for resource_id in apps:
                     resource_id = resource_id.decode("utf8")
                     end_date = datetime.utcnow().replace(microsecond=0, second=0)
                     filter_settings = {
                         "resource": [resource_id],
                         "tags": [{"name": "type", "value": ["error"], "op": None}],
                         "type": "error",
                         "start_date": since_when,
                         "end_date": end_date,
                     }
                     reports = ReportGroupService.get_trending(
                         request, filter_settings=filter_settings, limit=50
                     )
                     application = ApplicationService.by_id(resource_id)
                     if application:
                         users = set(
                             [p.user for p in ResourceService.users_for_perm(application, "view")]
                         )
                         for user in users:
                             user.send_digest(
                                 request, application, reports=reports, since_when=since_when
                             )
             @celery.task(queue="default")
             def notifications_reports():
                 """
                 Loop that checks redis for info and then issues new tasks to celery to
                 issue notifications
                 """
                 apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports"])
                 Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports"])
                 for app in apps:
                     log.warning("Notify for app: %s" % app)
                     check_user_report_notifications.delay(app.decode("utf8"))
             @celery.task(queue="default")
             def alerting_reports():
                 """
                 Loop that checks redis for info and then issues new tasks to celery to
                 perform the following:
                 - which applications should have new alerts opened
                 """
                 apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports_alerting"])
                 Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports_alerting"])
                 for app in apps:
                     log.warning("Notify for app: %s" % app)
                     check_alerts.delay(app.decode("utf8"))
             @celery.task(
                 queue="default", soft_time_limit=3600 * 4, hard_time_limit=3600 * 4, max_retries=144
             )
             def logs_cleanup(resource_id, filter_settings):
                 request = get_current_request()
                 request.tm.begin()
-                es_query = {
+                es_query = {"query": {"bool": {"filter": [{"term": {"resource_id": resource_id}}]}}}
-                    "query": {
-                        "bool": {"filter": [{"term": {"resource_id": resource_id}}]}
                 query = DBSession.query(Log).filter(Log.resource_id == resource_id)
                 if filter_settings["namespace"]:
                     query = query.filter(Log.namespace == filter_settings["namespace"][0])
                     es_query["query"]["bool"]["filter"].append(
                         {"term": {"namespace": filter_settings["namespace"][0]}}
                     )
                 query.delete(synchronize_session=False)
                 request.tm.commit()
                 Datastores.es.delete_by_query(
                     index="rcae_l_*", doc_type="log", body=es_query, conflicts="proceed"
                 )

backend/src/appenlight/lib/utils/__init__.py

0 +3 -1

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             """
             Utility functions.
             """
             import logging
             import requests
             import hashlib
             import json
             import copy
             import uuid
             import appenlight.lib.helpers as h
             from collections import namedtuple
             from datetime import timedelta, datetime, date
             from dogpile.cache.api import NO_VALUE
             from appenlight.models import Datastores
             from appenlight.validators import LogSearchSchema, TagListSchema, accepted_search_params
             from itsdangerous import TimestampSigner
             from ziggurat_foundations.permissions import ALL_PERMISSIONS
             from ziggurat_foundations.models.services.user import UserService
             from dateutil.relativedelta import relativedelta
             from dateutil.rrule import rrule, MONTHLY, DAILY
             log = logging.getLogger(__name__)
             Stat = namedtuple("Stat", "start_interval value")
             def default_extractor(item):
                 """
                 :param item - item to extract date from
                 """
                 if hasattr(item, "start_interval"):
                     return item.start_interval
                 return item["start_interval"]
             # fast gap generator
             def gap_gen_default(start, step, itemiterator, end_time=None, iv_extractor=None):
                 """ generates a list of time/value items based on step and itemiterator
                     if there are entries missing from iterator time/None will be returned
                     instead
                 :param start - datetime - what time should we start generating our values
                 :param step - timedelta - stepsize
                 :param itemiterator - iterable - we will check this iterable for values
                 corresponding to generated steps
                 :param end_time - datetime - when last step is >= end_time stop iterating
                 :param iv_extractor - extracts current step from iterable items
                 """
                 if not iv_extractor:
                     iv_extractor = default_extractor
                 next_step = start
                 minutes = step.total_seconds() / 60.0
                 while next_step.minute % minutes != 0:
                     next_step = next_step.replace(minute=next_step.minute - 1)
                 for item in itemiterator:
                     item_start_interval = iv_extractor(item)
                     # do we have a match for current time step in our data?
                     # no gen a new tuple with 0 values
                     while next_step < item_start_interval:
                         yield Stat(next_step, None)
                         next_step = next_step + step
                     if next_step == item_start_interval:
                         yield Stat(item_start_interval, item)
                         next_step = next_step + step
                 if end_time:
                     while next_step < end_time:
                         yield Stat(next_step, None)
                         next_step = next_step + step
             class DateTimeEncoder(json.JSONEncoder):
                 """ Simple datetime to ISO encoder for json serialization"""
                 def default(self, obj):
                     if isinstance(obj, date):
                         return obj.isoformat()
                     if isinstance(obj, datetime):
                         return obj.isoformat()
                     return json.JSONEncoder.default(self, obj)
             def channelstream_request(
                 secret, endpoint, payload, throw_exceptions=False, servers=None
             ):
                 responses = []
                 if not servers:
                     servers = []
                 signer = TimestampSigner(secret)
                 sig_for_server = signer.sign(endpoint)
                 for secret, server in [(s["secret"], s["server"]) for s in servers]:
                     response = {}
                     secret_headers = {
                         "x-channelstream-secret": sig_for_server,
                         "x-channelstream-endpoint": endpoint,
                         "Content-Type": "application/json",
                     }
                     url = "%s%s" % (server, endpoint)
                     try:
                         response = requests.post(
                             url,
                             data=json.dumps(payload, cls=DateTimeEncoder),
                             headers=secret_headers,
                             verify=False,
                             timeout=2,
                         ).json()
                     except requests.exceptions.RequestException as e:
                         if throw_exceptions:
                             raise
                     responses.append(response)
                 return responses
             def add_cors_headers(response):
                 # allow CORS
                 response.headers.add("Access-Control-Allow-Origin", "*")
                 response.headers.add("XDomainRequestAllowed", "1")
                 response.headers.add("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
                 # response.headers.add('Access-Control-Allow-Credentials', 'true')
                 response.headers.add(
                     "Access-Control-Allow-Headers",
                     "Content-Type, Depth, User-Agent, X-File-Size, X-Requested-With, If-Modified-Since, X-File-Name, Cache-Control, Pragma, Origin, Connection, Referer, Cookie",
                 )
                 response.headers.add("Access-Control-Max-Age", "86400")
             from sqlalchemy.sql import compiler
             from psycopg2.extensions import adapt as sqlescape
             # or use the appropiate escape function from your db driver
             def compile_query(query):
                 dialect = query.session.bind.dialect
                 statement = query.statement
                 comp = compiler.SQLCompiler(dialect, statement)
                 comp.compile()
                 enc = dialect.encoding
                 params = {}
                 for k, v in comp.params.items():
                     if isinstance(v, str):
                         v = v.encode(enc)
                     params[k] = sqlescape(v)
                 return (comp.string.encode(enc) % params).decode(enc)
             def convert_es_type(input_data):
                 """
                 This might need to convert some text or other types to corresponding ES types
                 """
                 return str(input_data)
             ProtoVersion = namedtuple("ProtoVersion", ["major", "minor", "patch"])
             def parse_proto(input_data):
                 try:
                     parts = [int(x) for x in input_data.split(".")]
                     while len(parts) < 3:
                         parts.append(0)
                     return ProtoVersion(*parts)
                 except Exception as e:
                     log.info("Unknown protocol version: %s" % e)
                 return ProtoVersion(99, 99, 99)
             def es_index_name_limiter(
                 start_date=None, end_date=None, months_in_past=6, ixtypes=None
             ):
                 """
                 This function limits the search to 6 months by default so we don't have to
                 query 300 elasticsearch indices for 20 years of historical data for example
                 """
                 # should be cached later
                 def get_possible_names():
                     return list(Datastores.es.indices.get_alias("*"))
                 possible_names = get_possible_names()
                 es_index_types = []
                 if not ixtypes:
                     ixtypes = ["reports", "metrics", "logs"]
                 for t in ixtypes:
                     if t == "reports":
                         es_index_types.append("rcae_r_%s")
                     elif t == "logs":
                         es_index_types.append("rcae_l_%s")
                     elif t == "metrics":
                         es_index_types.append("rcae_m_%s")
                     elif t == "uptime":
                         es_index_types.append("rcae_uptime_ce_%s")
                     elif t == "slow_calls":
                         es_index_types.append("rcae_sc_%s")
                 if start_date:
                     start_date = copy.copy(start_date)
                 else:
                     if not end_date:
                         end_date = datetime.utcnow()
                     start_date = end_date + relativedelta(months=months_in_past * -1)
                 if not end_date:
                     end_date = start_date + relativedelta(months=months_in_past)
                 index_dates = list(
                     rrule(
                         MONTHLY,
                         dtstart=start_date.date().replace(day=1),
                         until=end_date.date(),
                         count=36,
                     )
                 )
                 index_names = []
                 for ix_type in es_index_types:
                     to_extend = [
                         ix_type % d.strftime("%Y_%m")
                         for d in index_dates
                         if ix_type % d.strftime("%Y_%m") in possible_names
                     ]
                     index_names.extend(to_extend)
                     for day in list(
                         rrule(DAILY, dtstart=start_date.date(), until=end_date.date(), count=366)
                     ):
                         ix_name = ix_type % day.strftime("%Y_%m_%d")
                         if ix_name in possible_names:
                             index_names.append(ix_name)
                 return index_names
             def build_filter_settings_from_query_dict(
                 request, params=None, override_app_ids=None, resource_permissions=None
             ):
                 """
                 Builds list of normalized search terms for ES from query params
                 ensuring application list is restricted to only applications user
                 has access to
                 :param params (dictionary)
                 :param override_app_ids - list of application id's to use instead of
                 applications user normally has access to
                 """
                 params = copy.deepcopy(params)
                 applications = []
                 if not resource_permissions:
                     resource_permissions = ["view"]
                 if request.user:
                     applications = UserService.resources_with_perms(
                         request.user, resource_permissions, resource_types=["application"]
                     )
                 # CRITICAL - this ensures our resultset is limited to only the ones
                 # user has view permissions
                 all_possible_app_ids = set([app.resource_id for app in applications])
                 # if override is preset we force permission for app to be present
                 # this allows users to see dashboards and applications they would
                 # normally not be able to
                 if override_app_ids:
                     all_possible_app_ids = set(override_app_ids)
                 schema = LogSearchSchema().bind(resources=all_possible_app_ids)
                 tag_schema = TagListSchema()
                 filter_settings = schema.deserialize(params)
                 tag_list = []
                 for k, v in list(filter_settings.items()):
                     if k in accepted_search_params:
                         continue
                     tag_list.append({"name": k, "value": v, "op": "eq"})
                     # remove the key from filter_settings
                     filter_settings.pop(k, None)
                 tags = tag_schema.deserialize(tag_list)
                 filter_settings["tags"] = tags
                 return filter_settings
             def gen_uuid():
                 return str(uuid.uuid4())
             def gen_uuid4_sha_hex():
                 return hashlib.sha1(uuid.uuid4().bytes).hexdigest()
             def permission_tuple_to_dict(data):
                 out = {
                     "user_name": None,
                     "perm_name": data.perm_name,
                     "owner": data.owner,
                     "type": data.type,
                     "resource_name": None,
                     "resource_type": None,
                     "resource_id": None,
                     "group_name": None,
                     "group_id": None,
                 }
                 if data.user:
                     out["user_name"] = data.user.user_name
                 if data.perm_name == ALL_PERMISSIONS:
                     out["perm_name"] = "__all_permissions__"
                 if data.resource:
                     out["resource_name"] = data.resource.resource_name
                     out["resource_type"] = data.resource.resource_type
                     out["resource_id"] = data.resource.resource_id
                 if data.group:
                     out["group_name"] = data.group.group_name
                     out["group_id"] = data.group.id
                 return out
             def get_cached_buckets(
                 request,
                 stats_since,
                 end_time,
                 fn,
                 cache_key,
                 gap_gen=None,
                 db_session=None,
                 step_interval=None,
                 iv_extractor=None,
                 rerange=False,
                 *args,
                 **kwargs
             ):
                 """ Takes "fn" that should return some data and tries to load the data
                 dividing it into daily buckets - if the stats_since and end time give a
                 delta bigger than 24hours, then only "todays" data is computed on the fly
                 :param request: (request) request object
                 :param stats_since: (datetime) start date of buckets range
                 :param end_time: (datetime) end date of buckets range - utcnow() if None
                 :param fn: (callable) callable to use to populate buckets should have
                 following signature:
                     def get_data(request, since_when, until, *args, **kwargs):
                 :param cache_key: (string) cache key that will be used to build bucket
                 caches
                 :param gap_gen: (callable) gap generator - should return step intervals
                 to use with out `fn` callable
                 :param db_session: (Session) sqlalchemy session
                 :param step_interval: (timedelta) optional step interval if we want to
                 override the default determined from total start/end time delta
                 :param iv_extractor: (callable) used to get step intervals from data
                 returned by `fn` callable
                 :param rerange: (bool) handy if we want to change ranges from hours to
                 days when cached data is missing - will shorten execution time if `fn`
                 callable supports that and we are working with multiple rows - like metrics
                 :param args:
                 :param kwargs:
                 :return: iterable
                 """
                 if not end_time:
                     end_time = datetime.utcnow().replace(second=0, microsecond=0)
                 delta = end_time - stats_since
                 # if smaller than 3 days we want to group by 5min else by 1h,
                 # for 60 min group by min
                 if not gap_gen:
                     gap_gen = gap_gen_default
                 if not iv_extractor:
                     iv_extractor = default_extractor
                 # do not use custom interval if total time range with new iv would exceed
                 # end time
                 if not step_interval or stats_since + step_interval >= end_time:
                     if delta < h.time_deltas.get("12h")["delta"]:
                         step_interval = timedelta(seconds=60)
                     elif delta < h.time_deltas.get("3d")["delta"]:
                         step_interval = timedelta(seconds=60 * 5)
                     elif delta > h.time_deltas.get("2w")["delta"]:
                         step_interval = timedelta(days=1)
                     else:
                         step_interval = timedelta(minutes=60)
                 if step_interval >= timedelta(minutes=60):
                     log.info(
                         "cached_buckets:{}: adjusting start time "
                         "for hourly or daily intervals".format(cache_key)
                     )
                     stats_since = stats_since.replace(hour=0, minute=0)
                 ranges = [
                     i.start_interval
                     for i in list(gap_gen(stats_since, step_interval, [], end_time=end_time))
                 ]
                 buckets = {}
                 storage_key = "buckets:" + cache_key + "{}|{}"
                 # this means we basicly cache per hour in 3-14 day intervals but i think
                 # its fine at this point - will be faster than db access anyways
                 if len(ranges) >= 1:
                     last_ranges = [ranges[-1]]
                 else:
                     last_ranges = []
                 if step_interval >= timedelta(minutes=60):
                     for r in ranges:
                         k = storage_key.format(step_interval.total_seconds(), r)
                         value = request.registry.cache_regions.redis_day_30.get(k)
                         # last buckets are never loaded from cache
                         is_last_result = r >= end_time - timedelta(hours=6) or r in last_ranges
                         if value is not NO_VALUE and not is_last_result:
                             log.info(
                                 "cached_buckets:{}: "
                                 "loading range {} from cache".format(cache_key, r)
                             )
                             buckets[r] = value
                         else:
                             log.info(
                                 "cached_buckets:{}: "
                                 "loading range {} from storage".format(cache_key, r)
                             )
                             range_size = step_interval
                             if (
                                 step_interval == timedelta(minutes=60)
                                 and not is_last_result
                                 and rerange
                             ):
                                 range_size = timedelta(days=1)
                                 r = r.replace(hour=0, minute=0)
                                 log.info(
                                     "cached_buckets:{}: "
                                     "loading collapsed "
                                     "range {} {}".format(cache_key, r, r + range_size)
                                 )
                             bucket_data = fn(
                                 request,
                                 r,
                                 r + range_size,
                                 step_interval,
                                 gap_gen,
                                 bucket_count=len(ranges),
                                 *args,
                                 **kwargs
                             )
                             for b in bucket_data:
                                 b_iv = iv_extractor(b)
                                 buckets[b_iv] = b
                                 k2 = storage_key.format(step_interval.total_seconds(), b_iv)
                                 request.registry.cache_regions.redis_day_30.set(k2, b)
                     log.info("cached_buckets:{}: saving cache".format(cache_key))
                 else:
                     # bucket count is 1 for short time ranges <= 24h from now
                     bucket_data = fn(
                         request,
                         stats_since,
                         end_time,
                         step_interval,
                         gap_gen,
                         bucket_count=1,
                         *args,
                         **kwargs
                     )
                     for b in bucket_data:
                         buckets[iv_extractor(b)] = b
                 return buckets
             def get_cached_split_data(
                 request, stats_since, end_time, fn, cache_key, db_session=None, *args, **kwargs
             ):
                 """ Takes "fn" that should return some data and tries to load the data
                 dividing it into 2 buckets - cached "since_from" bucket and "today"
                 bucket - then the data can be reduced into single value
                 Data is cached if the stats_since and end time give a delta bigger
                 than 24hours - then only 24h is computed on the fly
                 """
                 if not end_time:
                     end_time = datetime.utcnow().replace(second=0, microsecond=0)
                 delta = end_time - stats_since
                 if delta >= timedelta(minutes=60):
                     log.info(
                         "cached_split_data:{}: adjusting start time "
                         "for hourly or daily intervals".format(cache_key)
                     )
                     stats_since = stats_since.replace(hour=0, minute=0)
                 storage_key = "buckets_split_data:" + cache_key + ":{}|{}"
                 old_end_time = end_time.replace(hour=0, minute=0)
                 final_storage_key = storage_key.format(delta.total_seconds(), old_end_time)
                 older_data = None
                 cdata = request.registry.cache_regions.redis_day_7.get(final_storage_key)
                 if cdata:
                     log.info("cached_split_data:{}: found old " "bucket data".format(cache_key))
                     older_data = cdata
                 if stats_since < end_time - h.time_deltas.get("24h")["delta"] and not cdata:
                     log.info(
                         "cached_split_data:{}: didn't find the "
                         "start bucket in cache so load older data".format(cache_key)
                     )
                     recent_stats_since = old_end_time
                     older_data = fn(
                         request,
                         stats_since,
                         recent_stats_since,
                         db_session=db_session,
                         *args,
                         **kwargs
                     )
                     request.registry.cache_regions.redis_day_7.set(final_storage_key, older_data)
                 elif stats_since < end_time - h.time_deltas.get("24h")["delta"]:
                     recent_stats_since = old_end_time
                 else:
                     recent_stats_since = stats_since
                 log.info(
                     "cached_split_data:{}: loading fresh "
                     "data bucksts from last 24h ".format(cache_key)
                 )
                 todays_data = fn(
                     request, recent_stats_since, end_time, db_session=db_session, *args, **kwargs
                 )
                 return older_data, todays_data
             def in_batches(seq, size):
                 """
                 Splits am iterable into batches of specified size
                 :param seq (iterable)
                 :param size integer
                 """
                 return (seq[pos : pos + size] for pos in range(0, len(seq), size))
             def get_es_info(cache_regions, es_conn):
                 @cache_regions.memory_min_10.cache_on_arguments()
                 def get_es_info_cached():
                     returned_info = {"raw_info": es_conn.info()}
-                    returned_info["version"] = returned_info["raw_info"]["version"]["number"].split('.')
+                    returned_info["version"] = returned_info["raw_info"]["version"]["number"].split(
+                        "."
+                    )
                     return returned_info
                 return get_es_info_cached()

backend/src/appenlight/models/report.py

0 +6 -6

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             from datetime import datetime, timedelta
             import math
             import uuid
             import hashlib
             import copy
             import urllib.parse
             import logging
             import sqlalchemy as sa
             from appenlight.models import Base, Datastores
             from appenlight.lib.utils.date_utils import convert_date
             from appenlight.lib.utils import convert_es_type
             from appenlight.models.slow_call import SlowCall
             from appenlight.lib.utils import channelstream_request
             from appenlight.lib.enums import ReportType, Language
             from pyramid.threadlocal import get_current_registry, get_current_request
             from sqlalchemy.dialects.postgresql import JSON
             from ziggurat_foundations.models.base import BaseModel
             log = logging.getLogger(__name__)
             REPORT_TYPE_MATRIX = {
                 "http_status": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
                 "group:priority": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
                 "duration": {"type": "float", "ops": ("ge", "le")},
                 "url_domain": {
                     "type": "unicode",
                     "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                 },
                 "url_path": {
                     "type": "unicode",
                     "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                 },
                 "error": {
                     "type": "unicode",
                     "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                 },
                 "tags:server_name": {
                     "type": "unicode",
                     "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                 },
                 "traceback": {"type": "unicode", "ops": ("contains",)},
                 "group:occurences": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
             }
             class Report(Base, BaseModel):
                 __tablename__ = "reports"
                 __table_args__ = {"implicit_returning": False}
                 id = sa.Column(sa.Integer, nullable=False, primary_key=True)
                 group_id = sa.Column(
                     sa.BigInteger,
                     sa.ForeignKey("reports_groups.id", ondelete="cascade", onupdate="cascade"),
                 )
                 resource_id = sa.Column(sa.Integer(), nullable=False, index=True)
                 report_type = sa.Column(sa.Integer(), nullable=False, index=True)
                 error = sa.Column(sa.UnicodeText(), index=True)
                 extra = sa.Column(JSON(), default={})
                 request = sa.Column(JSON(), nullable=False, default={})
                 ip = sa.Column(sa.String(39), index=True, default="")
                 username = sa.Column(sa.Unicode(255), default="")
                 user_agent = sa.Column(sa.Unicode(255), default="")
                 url = sa.Column(sa.UnicodeText(), index=True)
                 request_id = sa.Column(sa.Text())
                 request_stats = sa.Column(JSON(), nullable=False, default={})
                 traceback = sa.Column(JSON(), nullable=False, default=None)
                 traceback_hash = sa.Column(sa.Text())
                 start_time = sa.Column(
                     sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                 )
                 end_time = sa.Column(sa.DateTime())
                 duration = sa.Column(sa.Float, default=0)
                 http_status = sa.Column(sa.Integer, index=True)
                 url_domain = sa.Column(sa.Unicode(100), index=True)
                 url_path = sa.Column(sa.Unicode(255), index=True)
                 tags = sa.Column(JSON(), nullable=False, default={})
                 language = sa.Column(sa.Integer(), default=0)
                 # this is used to determine partition for the report
                 report_group_time = sa.Column(
                     sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                 )
                 logs = sa.orm.relationship(
                     "Log",
                     lazy="dynamic",
                     passive_deletes=True,
                     passive_updates=True,
                     primaryjoin="and_(Report.request_id==Log.request_id, "
                     "Log.request_id != None, Log.request_id != '')",
                     foreign_keys="[Log.request_id]",
                 )
                 slow_calls = sa.orm.relationship(
                     "SlowCall",
                     backref="detail",
                     cascade="all, delete-orphan",
                     passive_deletes=True,
                     passive_updates=True,
                     order_by="SlowCall.timestamp",
                 )
                 def set_data(self, data, resource, protocol_version=None):
                     self.http_status = data["http_status"]
                     self.priority = data["priority"]
                     self.error = data["error"]
                     report_language = data.get("language", "").lower()
                     self.language = getattr(Language, report_language, Language.unknown)
                     # we need temp holder here to decide later
                     # if we want to to commit the tags if report is marked for creation
                     self.tags = {"server_name": data["server"], "view_name": data["view_name"]}
                     if data.get("tags"):
                         for tag_tuple in data["tags"]:
                             self.tags[tag_tuple[0]] = tag_tuple[1]
                     self.traceback = data["traceback"]
                     stripped_traceback = self.stripped_traceback()
                     tb_repr = repr(stripped_traceback).encode("utf8")
                     self.traceback_hash = hashlib.sha1(tb_repr).hexdigest()
                     url_info = urllib.parse.urlsplit(data.get("url", ""), allow_fragments=False)
                     self.url_domain = url_info.netloc[:128]
                     self.url_path = url_info.path[:2048]
                     self.occurences = data["occurences"]
                     if self.error:
                         self.report_type = ReportType.error
                     else:
                         self.report_type = ReportType.slow
                     # but if its status 404 its 404 type
                     if self.http_status in [404, "404"] or self.error == "404 Not Found":
                         self.report_type = ReportType.not_found
                         self.error = ""
                     self.generate_grouping_hash(
                         data.get("appenlight.group_string", data.get("group_string")),
                         resource.default_grouping,
                         protocol_version,
                     )
                     # details
                     if data["http_status"] in [404, "404"]:
                         data = {
                             "username": data["username"],
                             "ip": data["ip"],
                             "url": data["url"],
                             "user_agent": data["user_agent"],
                         }
                         if data.get("HTTP_REFERER") or data.get("http_referer"):
                             data["HTTP_REFERER"] = data.get("HTTP_REFERER", "") or data.get(
                                 "http_referer", ""
                             )
                     self.resource_id = resource.resource_id
                     self.username = data["username"]
                     self.user_agent = data["user_agent"]
                     self.ip = data["ip"]
                     self.extra = {}
                     if data.get("extra"):
                         for extra_tuple in data["extra"]:
                             self.extra[extra_tuple[0]] = extra_tuple[1]
                     self.url = data["url"]
                     self.request_id = data.get("request_id", "").replace("-", "") or str(
                         uuid.uuid4()
                     )
                     request_data = data.get("request", {})
                     self.request = request_data
                     self.request_stats = data.get("request_stats") or {}
                     traceback = data.get("traceback")
                     if not traceback:
                         traceback = data.get("frameinfo")
                     self.traceback = traceback
                     start_date = convert_date(data.get("start_time"))
                     if not self.start_time or self.start_time < start_date:
                         self.start_time = start_date
                     self.end_time = convert_date(data.get("end_time"), False)
                     self.duration = 0
                     if self.start_time and self.end_time:
                         d = self.end_time - self.start_time
                         self.duration = d.total_seconds()
                     # update tags with other vars
                     if self.username:
                         self.tags["user_name"] = self.username
                     self.tags["report_language"] = Language.key_from_value(self.language)
                 def add_slow_calls(self, data, report_group):
                     slow_calls = []
                     for call in data.get("slow_calls", []):
                         sc_inst = SlowCall()
                         sc_inst.set_data(
                             call, resource_id=self.resource_id, report_group=report_group
                         )
                         slow_calls.append(sc_inst)
                         self.slow_calls.extend(slow_calls)
                     return slow_calls
                 def get_dict(self, request, details=False, exclude_keys=None, include_keys=None):
                     from appenlight.models.services.report_group import ReportGroupService
                     instance_dict = super(Report, self).get_dict()
                     instance_dict["req_stats"] = self.req_stats()
                     instance_dict["group"] = {}
                     instance_dict["group"]["id"] = self.report_group.id
                     instance_dict["group"]["total_reports"] = self.report_group.total_reports
                     instance_dict["group"]["last_report"] = self.report_group.last_report
                     instance_dict["group"]["priority"] = self.report_group.priority
                     instance_dict["group"]["occurences"] = self.report_group.occurences
                     instance_dict["group"]["last_timestamp"] = self.report_group.last_timestamp
                     instance_dict["group"]["first_timestamp"] = self.report_group.first_timestamp
                     instance_dict["group"]["public"] = self.report_group.public
                     instance_dict["group"]["fixed"] = self.report_group.fixed
                     instance_dict["group"]["read"] = self.report_group.read
                     instance_dict["group"]["average_duration"] = self.report_group.average_duration
                     instance_dict["resource_name"] = self.report_group.application.resource_name
                     instance_dict["report_type"] = self.report_type
                     if instance_dict["http_status"] == 404 and not instance_dict["error"]:
                         instance_dict["error"] = "404 Not Found"
                     if details:
                         instance_dict[
                             "affected_users_count"
                         ] = ReportGroupService.affected_users_count(self.report_group)
                         instance_dict["top_affected_users"] = [
                             {"username": u.username, "count": u.count}
                             for u in ReportGroupService.top_affected_users(self.report_group)
                         ]
                         instance_dict["application"] = {"integrations": []}
                         for integration in self.report_group.application.integrations:
                             if integration.front_visible:
                                 instance_dict["application"]["integrations"].append(
                                     {
                                         "name": integration.integration_name,
                                         "action": integration.integration_action,
                                     }
                                 )
                         instance_dict["comments"] = [
                             c.get_dict() for c in self.report_group.comments
                         ]
                         instance_dict["group"]["next_report"] = None
                         instance_dict["group"]["previous_report"] = None
                         next_in_group = self.get_next_in_group(request)
                         previous_in_group = self.get_previous_in_group(request)
                         if next_in_group:
                             instance_dict["group"]["next_report"] = next_in_group
                         if previous_in_group:
                             instance_dict["group"]["previous_report"] = previous_in_group
                         # slow call ordering
                         def find_parent(row, data):
                             for r in reversed(data):
                                 try:
                                     if (
                                         row["timestamp"] > r["timestamp"]
                                         and row["end_time"] < r["end_time"]
                                     ):
                                         return r
                                 except TypeError as e:
                                     log.warning("reports_view.find_parent: %s" % e)
                             return None
                         new_calls = []
                         calls = [c.get_dict() for c in self.slow_calls]
                         while calls:
                             # start from end
                             for x in range(len(calls) - 1, -1, -1):
                                 parent = find_parent(calls[x], calls)
                                 if parent:
                                     parent["children"].append(calls[x])
                                 else:
                                     # no parent at all? append to new calls anyways
                                     new_calls.append(calls[x])
                                     # print 'append', calls[x]
                                 del calls[x]
                                 break
                         instance_dict["slow_calls"] = new_calls
                     instance_dict["front_url"] = self.get_public_url(request)
                     exclude_keys_list = exclude_keys or []
                     include_keys_list = include_keys or []
                     for k in list(instance_dict.keys()):
                         if k == "group":
                             continue
                         if k in exclude_keys_list or (k not in include_keys_list and include_keys):
                             del instance_dict[k]
                     return instance_dict
                 def get_previous_in_group(self, request):
                     query = {
                         "size": 1,
                         "query": {
                             "bool": {
                                 "filter": [
                                     {"term": {"group_id": self.group_id}},
                                     {"range": {"report_id": {"lt": self.id}}},
                                 ]
                             }
                         },
                         "sort": [{"_doc": {"order": "desc"}}],
                     }
                     result = request.es_conn.search(
                         body=query, index=self.partition_id, doc_type="report"
                     )
                     if result["hits"]["total"]:
                         return result["hits"]["hits"][0]["_source"]["report_id"]
                 def get_next_in_group(self, request):
                     query = {
                         "size": 1,
                         "query": {
                             "bool": {
                                 "filter": [
                                     {"term": {"group_id": self.group_id}},
                                     {"range": {"report_id": {"gt": self.id}}},
                                 ]
                             }
                         },
                         "sort": [{"_doc": {"order": "asc"}}],
                     }
                     result = request.es_conn.search(
                         body=query, index=self.partition_id, doc_type="report"
                     )
                     if result["hits"]["total"]:
                         return result["hits"]["hits"][0]["_source"]["report_id"]
                 def get_public_url(self, request=None, report_group=None, _app_url=None):
                     """
                     Returns url that user can use to visit specific report
                     """
                     if not request:
                         request = get_current_request()
                     url = request.route_url("/", _app_url=_app_url)
                     if report_group:
                         return (url + "ui/report/%s/%s") % (report_group.id, self.id)
                     return (url + "ui/report/%s/%s") % (self.group_id, self.id)
                 def req_stats(self):
                     stats = self.request_stats.copy()
                     stats["percentages"] = {}
                     stats["percentages"]["main"] = 100.0
                     main = stats.get("main", 0.0)
                     if not main:
                         return None
                     for name, call_time in stats.items():
                         if "calls" not in name and "main" not in name and "percentages" not in name:
                             stats["main"] -= call_time
                             stats["percentages"][name] = math.floor((call_time / main * 100.0))
                             stats["percentages"]["main"] -= stats["percentages"][name]
                     if stats["percentages"]["main"] < 0.0:
                         stats["percentages"]["main"] = 0.0
                         stats["main"] = 0.0
                     return stats
                 def generate_grouping_hash(
                     self, hash_string=None, default_grouping=None, protocol_version=None
                 ):
                     """
                     Generates SHA1 hash that will be used to group reports together
                     """
                     if not hash_string:
                         location = self.tags.get("view_name") or self.url_path
                         server_name = self.tags.get("server_name") or ""
                         if default_grouping == "url_traceback":
                             hash_string = "%s_%s_%s" % (self.traceback_hash, location, self.error)
                             if self.language == Language.javascript:
                                 hash_string = "%s_%s" % (self.traceback_hash, self.error)
                         elif default_grouping == "traceback_server":
                             hash_string = "%s_%s" % (self.traceback_hash, server_name)
                             if self.language == Language.javascript:
                                 hash_string = "%s_%s" % (self.traceback_hash, server_name)
                         else:
                             hash_string = "%s_%s" % (self.error, location)
                     month = datetime.utcnow().date().replace(day=1)
                     hash_string = "{}_{}".format(month, hash_string)
                     binary_string = hash_string.encode("utf8")
                     self.grouping_hash = hashlib.sha1(binary_string).hexdigest()
                     return self.grouping_hash
                 def stripped_traceback(self):
                     """
                     Traceback without local vars
                     """
                     stripped_traceback = copy.deepcopy(self.traceback)
                     if isinstance(stripped_traceback, list):
                         for row in stripped_traceback:
                             row.pop("vars", None)
                     return stripped_traceback
                 def notify_channel(self, report_group):
                     """
                     Sends notification to websocket channel
                     """
                     settings = get_current_registry().settings
                     log.info("notify channelstream")
                     if self.report_type != ReportType.error:
                         return
                     payload = {
                         "type": "message",
                         "user": "__system__",
                         "channel": "app_%s" % self.resource_id,
                         "message": {
                             "topic": "front_dashboard.new_topic",
                             "report": {
                                 "group": {
                                     "priority": report_group.priority,
                                     "first_timestamp": report_group.first_timestamp,
                                     "last_timestamp": report_group.last_timestamp,
                                     "average_duration": report_group.average_duration,
                                     "occurences": report_group.occurences,
                                 },
                                 "report_id": self.id,
                                 "group_id": self.group_id,
                                 "resource_id": self.resource_id,
                                 "http_status": self.http_status,
                                 "url_domain": self.url_domain,
                                 "url_path": self.url_path,
                                 "error": self.error or "",
                                 "server": self.tags.get("server_name"),
                                 "view_name": self.tags.get("view_name"),
                                 "front_url": self.get_public_url(),
                             },
                         },
                     }
                     channelstream_request(
                         settings["cometd.secret"],
                         "/message",
                         [payload],
                         servers=[settings["cometd_servers"]],
                     )
                 def es_doc(self):
                     tags = {}
                     tag_list = []
                     for name, value in self.tags.items():
                         name = name.replace(".", "_")
                         tag_list.append(name)
                         tags[name] = {
                             "values": convert_es_type(value),
                             "numeric_values": value
                             if (isinstance(value, (int, float)) and not isinstance(value, bool))
                             else None,
                         }
                     if "user_name" not in self.tags and self.username:
                         tags["user_name"] = {"value": [self.username], "numeric_value": None}
                     return {
                         "_id": str(self.id),
                         "report_id": str(self.id),
                         "resource_id": self.resource_id,
                         "http_status": self.http_status or "",
                         "start_time": self.start_time,
                         "end_time": self.end_time,
                         "url_domain": self.url_domain if self.url_domain else "",
                         "url_path": self.url_path if self.url_path else "",
                         "duration": self.duration,
                         "error": self.error if self.error else "",
                         "report_type": self.report_type,
                         "request_id": self.request_id,
                         "ip": self.ip,
                         "group_id": str(self.group_id),
                         "type": "report",
-                        "join_field": {
+                        "join_field": {"name": "report", "parent": str(self.group_id)},
-                            "name": "report",
-                            "parent": str(self.group_id)
-                        },
                         "tags": tags,
                         "tag_list": tag_list,
-                        "_routing": str(self.group_id)
+                        "_routing": str(self.group_id),
                     }
                 @property
                 def partition_id(self):
                     return "rcae_r_%s" % self.report_group_time.strftime("%Y_%m")
                 def partition_range(self):
                     start_date = self.report_group_time.date().replace(day=1)
                     end_date = start_date + timedelta(days=40)
                     end_date = end_date.replace(day=1)
                     return start_date, end_date
             def after_insert(mapper, connection, target):
                 if not hasattr(target, "_skip_ft_index"):
                     data = target.es_doc()
                     data.pop("_id", None)
                     Datastores.es.index(
                         target.partition_id, "report", data, parent=target.group_id, id=target.id
                     )
             def after_update(mapper, connection, target):
                 if not hasattr(target, "_skip_ft_index"):
                     data = target.es_doc()
                     data.pop("_id", None)
                     Datastores.es.index(
                         target.partition_id, "report", data, parent=target.group_id, id=target.id
                     )
             def after_delete(mapper, connection, target):
                 if not hasattr(target, "_skip_ft_index"):
                     query = {"query": {"term": {"report_id": target.id}}}
                     Datastores.es.delete_by_query(
-                        index=target.partition_id, doc_type="report", body=query, conflicts="proceed"
+                        index=target.partition_id,
+                        doc_type="report",
+                        body=query,
+                        conflicts="proceed",
                     )
             sa.event.listen(Report, "after_insert", after_insert)
             sa.event.listen(Report, "after_update", after_update)
             sa.event.listen(Report, "after_delete", after_delete)

backend/src/appenlight/models/report_group.py

0 +1 -3

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             import logging
             import sqlalchemy as sa
             from datetime import datetime, timedelta
             from pyramid.threadlocal import get_current_request
             from sqlalchemy.dialects.postgresql import JSON
             from ziggurat_foundations.models.base import BaseModel
             from appenlight.models import Base, get_db_session, Datastores
             from appenlight.lib.enums import ReportType
             from appenlight.lib.rule import Rule
             from appenlight.lib.redis_keys import REDIS_KEYS
             from appenlight.models.report import REPORT_TYPE_MATRIX
             log = logging.getLogger(__name__)
             class ReportGroup(Base, BaseModel):
                 __tablename__ = "reports_groups"
                 __table_args__ = {"implicit_returning": False}
                 id = sa.Column(sa.BigInteger(), nullable=False, primary_key=True)
                 resource_id = sa.Column(
                     sa.Integer(),
                     sa.ForeignKey(
                         "applications.resource_id", onupdate="CASCADE", ondelete="CASCADE"
                     ),
                     nullable=False,
                     index=True,
                 )
                 priority = sa.Column(
                     sa.Integer, nullable=False, index=True, default=5, server_default="5"
                 )
                 first_timestamp = sa.Column(
                     sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                 )
                 last_timestamp = sa.Column(
                     sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                 )
                 error = sa.Column(sa.UnicodeText(), index=True)
                 grouping_hash = sa.Column(sa.String(40), default="")
                 triggered_postprocesses_ids = sa.Column(JSON(), nullable=False, default=list)
                 report_type = sa.Column(sa.Integer, default=1)
                 total_reports = sa.Column(sa.Integer, default=1)
                 last_report = sa.Column(sa.Integer)
                 occurences = sa.Column(sa.Integer, default=1)
                 average_duration = sa.Column(sa.Float, default=0)
                 summed_duration = sa.Column(sa.Float, default=0)
                 read = sa.Column(sa.Boolean(), index=True, default=False)
                 fixed = sa.Column(sa.Boolean(), index=True, default=False)
                 notified = sa.Column(sa.Boolean(), index=True, default=False)
                 public = sa.Column(sa.Boolean(), index=True, default=False)
                 reports = sa.orm.relationship(
                     "Report",
                     lazy="dynamic",
                     backref="report_group",
                     cascade="all, delete-orphan",
                     passive_deletes=True,
                     passive_updates=True,
                 )
                 comments = sa.orm.relationship(
                     "ReportComment",
                     lazy="dynamic",
                     backref="report",
                     cascade="all, delete-orphan",
                     passive_deletes=True,
                     passive_updates=True,
                     order_by="ReportComment.comment_id",
                 )
                 assigned_users = sa.orm.relationship(
                     "User",
                     backref=sa.orm.backref(
                         "assigned_reports_relation",
                         lazy="dynamic",
                         order_by=sa.desc(sa.text("reports_groups.id")),
                     ),
                     passive_deletes=True,
                     passive_updates=True,
                     secondary="reports_assignments",
                     order_by="User.user_name",
                 )
                 stats = sa.orm.relationship(
                     "ReportStat",
                     lazy="dynamic",
                     backref="report",
                     passive_deletes=True,
                     passive_updates=True,
                 )
                 last_report_ref = sa.orm.relationship(
                     "Report",
                     uselist=False,
                     primaryjoin="ReportGroup.last_report " "== Report.id",
                     foreign_keys="Report.id",
                     cascade="all, delete-orphan",
                     passive_deletes=True,
                     passive_updates=True,
                 )
                 def __repr__(self):
                     return "<ReportGroup id:{}>".format(self.id)
                 def get_report(self, report_id=None, public=False):
                     """
                     Gets report with specific id or latest report if id was not specified
                     """
                     from .report import Report
                     if not report_id:
                         return self.last_report_ref
                     else:
                         return self.reports.filter(Report.id == report_id).first()
                 def get_public_url(self, request, _app_url=None):
                     url = request.route_url("/", _app_url=_app_url)
                     return (url + "ui/report/%s") % self.id
                 def run_postprocessing(self, report):
                     """
                     Alters report group priority based on postprocessing configuration
                     """
                     request = get_current_request()
                     get_db_session(None, self).flush()
                     for action in self.application.postprocess_conf:
                         get_db_session(None, self).flush()
                         rule_obj = Rule(action.rule, REPORT_TYPE_MATRIX)
                         report_dict = report.get_dict(request)
                         # if was not processed yet
                         if (
                             rule_obj.match(report_dict)
                             and action.pkey not in self.triggered_postprocesses_ids
                         ):
                             action.postprocess(self)
                             # this way sqla can track mutation of list
                             self.triggered_postprocesses_ids = self.triggered_postprocesses_ids + [
                                 action.pkey
                             ]
                     get_db_session(None, self).flush()
                     # do not go out of bounds
                     if self.priority < 1:
                         self.priority = 1
                     if self.priority > 10:
                         self.priority = 10
                 def get_dict(self, request):
                     instance_dict = super(ReportGroup, self).get_dict()
                     instance_dict["server_name"] = self.get_report().tags.get("server_name")
                     instance_dict["view_name"] = self.get_report().tags.get("view_name")
                     instance_dict["resource_name"] = self.application.resource_name
                     instance_dict["report_type"] = self.get_report().report_type
                     instance_dict["url_path"] = self.get_report().url_path
                     instance_dict["front_url"] = self.get_report().get_public_url(request)
                     del instance_dict["triggered_postprocesses_ids"]
                     return instance_dict
                 def es_doc(self):
                     return {
                         "_id": str(self.id),
                         "group_id": str(self.id),
                         "resource_id": self.resource_id,
                         "error": self.error,
                         "fixed": self.fixed,
                         "public": self.public,
                         "read": self.read,
                         "priority": self.priority,
                         "occurences": self.occurences,
                         "average_duration": self.average_duration,
                         "summed_duration": self.summed_duration,
                         "first_timestamp": self.first_timestamp,
                         "last_timestamp": self.last_timestamp,
                         "type": "report_group",
-                        "join_field": {
+                        "join_field": {"name": "report_group"},
-                            "name": "report_group"
-                        },
                     }
                 def set_notification_info(self, notify_10=False, notify_100=False):
                     """
                     Update redis notification maps for notification job
                     """
                     current_time = datetime.utcnow().replace(second=0, microsecond=0)
                     # global app counter
                     key = REDIS_KEYS["counters"]["reports_per_type"].format(
                         self.report_type, current_time
                     )
                     redis_pipeline = Datastores.redis.pipeline()
                     redis_pipeline.incr(key)
                     redis_pipeline.expire(key, 3600 * 24)
                     # detailed app notification for alerts and notifications
                     redis_pipeline.sadd(REDIS_KEYS["apps_that_had_reports"], self.resource_id)
                     redis_pipeline.sadd(
                         REDIS_KEYS["apps_that_had_reports_alerting"], self.resource_id
                     )
                     # only notify for exceptions here
                     if self.report_type == ReportType.error:
                         redis_pipeline.sadd(REDIS_KEYS["apps_that_had_reports"], self.resource_id)
                         redis_pipeline.sadd(
                             REDIS_KEYS["apps_that_had_error_reports_alerting"], self.resource_id
                         )
                     key = REDIS_KEYS["counters"]["report_group_occurences"].format(self.id)
                     redis_pipeline.incr(key)
                     redis_pipeline.expire(key, 3600 * 24)
                     key = REDIS_KEYS["counters"]["report_group_occurences_alerting"].format(self.id)
                     redis_pipeline.incr(key)
                     redis_pipeline.expire(key, 3600 * 24)
                     if notify_10:
                         key = REDIS_KEYS["counters"]["report_group_occurences_10th"].format(self.id)
                         redis_pipeline.setex(key, 3600 * 24, 1)
                     if notify_100:
                         key = REDIS_KEYS["counters"]["report_group_occurences_100th"].format(
                             self.id
                         )
                         redis_pipeline.setex(key, 3600 * 24, 1)
                     key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                         self.report_type, self.resource_id
                     )
                     redis_pipeline.sadd(key, self.id)
                     redis_pipeline.expire(key, 3600 * 24)
                     key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                         self.report_type, self.resource_id
                     )
                     redis_pipeline.sadd(key, self.id)
                     redis_pipeline.expire(key, 3600 * 24)
                     redis_pipeline.execute()
                 @property
                 def partition_id(self):
                     return "rcae_r_%s" % self.first_timestamp.strftime("%Y_%m")
                 def partition_range(self):
                     start_date = self.first_timestamp.date().replace(day=1)
                     end_date = start_date + timedelta(days=40)
                     end_date = end_date.replace(day=1)
                     return start_date, end_date
             def after_insert(mapper, connection, target):
                 if not hasattr(target, "_skip_ft_index"):
                     data = target.es_doc()
                     data.pop("_id", None)
                     Datastores.es.index(target.partition_id, "report", data, id=target.id)
             def after_update(mapper, connection, target):
                 if not hasattr(target, "_skip_ft_index"):
                     data = target.es_doc()
                     data.pop("_id", None)
                     Datastores.es.index(target.partition_id, "report", data, id=target.id)
             def after_delete(mapper, connection, target):
                 query = {"query": {"term": {"group_id": target.id}}}
                 # delete by query
                 Datastores.es.delete_by_query(
                     index=target.partition_id, doc_type="report", body=query, conflicts="proceed"
                 )
             sa.event.listen(ReportGroup, "after_insert", after_insert)
             sa.event.listen(ReportGroup, "after_update", after_update)
             sa.event.listen(ReportGroup, "after_delete", after_delete)

backend/src/appenlight/models/services/log.py

0 +1 -5

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             import paginate
             import logging
             import sqlalchemy as sa
             from appenlight.models.log import Log
             from appenlight.models import get_db_session, Datastores
             from appenlight.models.services.base import BaseService
             from appenlight.lib.utils import es_index_name_limiter
             log = logging.getLogger(__name__)
             class LogService(BaseService):
                 @classmethod
                 def get_logs(cls, resource_ids=None, filter_settings=None, db_session=None):
                     # ensure we always have id's passed
                     if not resource_ids:
                         # raise Exception('No App ID passed')
                         return []
                     db_session = get_db_session(db_session)
                     q = db_session.query(Log)
                     q = q.filter(Log.resource_id.in_(resource_ids))
                     if filter_settings.get("start_date"):
                         q = q.filter(Log.timestamp >= filter_settings.get("start_date"))
                     if filter_settings.get("end_date"):
                         q = q.filter(Log.timestamp <= filter_settings.get("end_date"))
                     if filter_settings.get("log_level"):
                         q = q.filter(Log.log_level == filter_settings.get("log_level").upper())
                     if filter_settings.get("request_id"):
                         request_id = filter_settings.get("request_id", "")
                         q = q.filter(Log.request_id == request_id.replace("-", ""))
                     if filter_settings.get("namespace"):
                         q = q.filter(Log.namespace == filter_settings.get("namespace"))
                     q = q.order_by(sa.desc(Log.timestamp))
                     return q
                 @classmethod
                 def es_query_builder(cls, app_ids, filter_settings):
                     if not filter_settings:
                         filter_settings = {}
                     query = {
-                        "query": {
+                        "query": {"bool": {"filter": [{"terms": {"resource_id": list(app_ids)}}]}}
-                            "bool": {
-                                "filter": [{"terms": {"resource_id": list(app_ids)}}]
                     }
                     start_date = filter_settings.get("start_date")
                     end_date = filter_settings.get("end_date")
                     filter_part = query["query"]["bool"]["filter"]
                     for tag in filter_settings.get("tags", []):
                         tag_values = [v.lower() for v in tag["value"]]
                         key = "tags.%s.values" % tag["name"].replace(".", "_")
                         filter_part.append({"terms": {key: tag_values}})
                     date_range = {"range": {"timestamp": {}}}
                     if start_date:
                         date_range["range"]["timestamp"]["gte"] = start_date
                     if end_date:
                         date_range["range"]["timestamp"]["lte"] = end_date
                     if start_date or end_date:
                         filter_part.append(date_range)
                     levels = filter_settings.get("level")
                     if levels:
                         filter_part.append({"terms": {"log_level": levels}})
                     namespaces = filter_settings.get("namespace")
                     if namespaces:
                         filter_part.append({"terms": {"namespace": namespaces}})
                     request_ids = filter_settings.get("request_id")
                     if request_ids:
                         filter_part.append({"terms": {"request_id": request_ids}})
                     messages = filter_settings.get("message")
                     if messages:
                         query["query"]["bool"]["must"] = {
                             "match": {"message": {"query": " ".join(messages), "operator": "and"}}
                         }
                     return query
                 @classmethod
                 def get_time_series_aggregate(cls, app_ids=None, filter_settings=None):
                     if not app_ids:
                         return {}
                     es_query = cls.es_query_builder(app_ids, filter_settings)
                     es_query["aggs"] = {
                         "events_over_time": {
                             "date_histogram": {
                                 "field": "timestamp",
                                 "interval": "1h",
                                 "min_doc_count": 0,
                                 "extended_bounds": {
                                     "max": filter_settings.get("end_date"),
                                     "min": filter_settings.get("start_date"),
                                 },
                             }
                         }
                     }
                     log.debug(es_query)
                     index_names = es_index_name_limiter(
                         filter_settings.get("start_date"),
                         filter_settings.get("end_date"),
                         ixtypes=["logs"],
                     )
                     if index_names:
                         results = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                     else:
                         results = []
                     return results
                 @classmethod
                 def get_search_iterator(
                     cls,
                     app_ids=None,
                     page=1,
                     items_per_page=50,
                     order_by=None,
                     filter_settings=None,
                     limit=None,
                 ):
                     if not app_ids:
                         return {}, 0
                     es_query = cls.es_query_builder(app_ids, filter_settings)
                     sort_query = {"sort": [{"timestamp": {"order": "desc"}}]}
                     es_query.update(sort_query)
                     log.debug(es_query)
                     es_from = (page - 1) * items_per_page
                     index_names = es_index_name_limiter(
                         filter_settings.get("start_date"),
                         filter_settings.get("end_date"),
                         ixtypes=["logs"],
                     )
                     if not index_names:
                         return {}, 0
                     results = Datastores.es.search(
                         body=es_query,
                         index=index_names,
                         doc_type="log",
                         size=items_per_page,
                         from_=es_from,
                     )
                     if results["hits"]["total"] > 5000:
                         count = 5000
                     else:
                         count = results["hits"]["total"]
                     return results["hits"], count
                 @classmethod
                 def get_paginator_by_app_ids(
                     cls,
                     app_ids=None,
                     page=1,
                     item_count=None,
                     items_per_page=50,
                     order_by=None,
                     filter_settings=None,
                     exclude_columns=None,
                     db_session=None,
                 ):
                     if not filter_settings:
                         filter_settings = {}
                     results, item_count = cls.get_search_iterator(
                         app_ids, page, items_per_page, order_by, filter_settings
                     )
                     paginator = paginate.Page(
                         [], item_count=item_count, items_per_page=items_per_page, **filter_settings
                     )
                     ordered_ids = tuple(
                         item["_source"]["log_id"] for item in results.get("hits", [])
                     )
                     sorted_instance_list = []
                     if ordered_ids:
                         db_session = get_db_session(db_session)
                         query = db_session.query(Log)
                         query = query.filter(Log.log_id.in_(ordered_ids))
                         query = query.order_by(sa.desc("timestamp"))
                         sa_items = query.all()
                         # resort by score
                         for i_id in ordered_ids:
                             for item in sa_items:
                                 if str(item.log_id) == str(i_id):
                                     sorted_instance_list.append(item)
                     paginator.sa_items = sorted_instance_list
                     return paginator
                 @classmethod
                 def query_by_primary_key_and_namespace(cls, list_of_pairs, db_session=None):
                     db_session = get_db_session(db_session)
                     list_of_conditions = []
                     query = db_session.query(Log)
                     for pair in list_of_pairs:
                         list_of_conditions.append(
                             sa.and_(Log.primary_key == pair["pk"], Log.namespace == pair["ns"])
                         )
                     query = query.filter(sa.or_(*list_of_conditions))
                     query = query.order_by(sa.asc(Log.timestamp), sa.asc(Log.log_id))
                     return query

backend/src/appenlight/models/services/report_group.py

0 +15 -15

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             import logging
             import paginate
             import sqlalchemy as sa
             import appenlight.lib.helpers as h
             from datetime import datetime
             from appenlight.models import get_db_session, Datastores
             from appenlight.models.report import Report
             from appenlight.models.report_group import ReportGroup
             from appenlight.models.report_comment import ReportComment
             from appenlight.models.user import User
             from appenlight.models.services.base import BaseService
             from appenlight.lib.enums import ReportType
             from appenlight.lib.utils import es_index_name_limiter
             log = logging.getLogger(__name__)
             class ReportGroupService(BaseService):
                 @classmethod
                 def get_trending(cls, request, filter_settings, limit=15, db_session=None):
                     """
                     Returns report groups trending for specific time interval
                     """
                     db_session = get_db_session(db_session)
                     tags = []
                     if filter_settings.get("tags"):
                         for tag in filter_settings["tags"]:
                             tags.append(
                                 {"terms": {"tags.{}.values".format(tag["name"]): tag["value"]}}
                             )
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["reports"],
                     )
                     if not index_names or not filter_settings["resource"]:
                         return []
                     es_query = {
                         "aggs": {
                             "parent_agg": {
                                 "aggs": {
                                     "groups": {
                                         "aggs": {
                                             "sub_agg": {
-                                                "value_count": {"field": "tags.group_id.values.keyword"}
+                                                "value_count": {
+                                                    "field": "tags.group_id.values.keyword"
+                                                }
                                             }
                                         },
                                         "filter": {"exists": {"field": "tags.group_id.values"}},
                                     }
                                 },
                                 "terms": {"field": "tags.group_id.values.keyword", "size": limit},
                             }
                         },
                         "query": {
                             "bool": {
                                 "filter": [
+                                    {"terms": {"resource_id": [filter_settings["resource"][0]]}},
-                                        "terms": {
-                                            "resource_id": [filter_settings["resource"][0]]
-                                    },
                                     {
                                         "range": {
                                             "timestamp": {
                                                 "gte": filter_settings["start_date"],
                                                 "lte": filter_settings["end_date"],
                                             }
                                         }
                                     },
                                 ]
                             }
                         },
                     }
                     if tags:
                         es_query["query"]["bool"]["filter"].extend(tags)
                     result = Datastores.es.search(
                         body=es_query, index=index_names, doc_type="report", size=0
                     )
                     series = []
                     for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                         series.append(
                             {"key": bucket["key"], "groups": bucket["groups"]["sub_agg"]["value"]}
                         )
                     report_groups_d = {}
                     for g in series:
                         report_groups_d[int(g["key"])] = g["groups"] or 0
                     query = db_session.query(ReportGroup)
                     query = query.filter(ReportGroup.id.in_(list(report_groups_d.keys())))
                     query = query.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                     results = [(report_groups_d[group.id], group) for group in query]
                     return sorted(results, reverse=True, key=lambda x: x[0])
                 @classmethod
                 def get_search_iterator(
                     cls,
                     app_ids=None,
                     page=1,
                     items_per_page=50,
                     order_by=None,
                     filter_settings=None,
                     limit=None,
                 ):
                     if not app_ids:
                         return {}
                     if not filter_settings:
                         filter_settings = {}
                     query = {
                         "size": 0,
                         "query": {
                             "bool": {
                                 "must": [],
                                 "should": [],
-                                "filter": [{"terms": {"resource_id": list(app_ids)}}]
+                                "filter": [{"terms": {"resource_id": list(app_ids)}}],
                             }
                         },
                         "aggs": {
                             "top_groups": {
                                 "terms": {
                                     "size": 5000,
                                     "field": "join_field#report_group",
                                     "order": {"newest": "desc"},
                                 },
                                 "aggs": {
                                     "top_reports_hits": {
                                         "top_hits": {"size": 1, "sort": {"start_time": "desc"}}
                                     },
                                     "newest": {"max": {"field": "start_time"}},
                                 },
                             }
                         },
                     }
                     start_date = filter_settings.get("start_date")
                     end_date = filter_settings.get("end_date")
                     filter_part = query["query"]["bool"]["filter"]
                     date_range = {"range": {"start_time": {}}}
                     if start_date:
                         date_range["range"]["start_time"]["gte"] = start_date
                     if end_date:
                         date_range["range"]["start_time"]["lte"] = end_date
                     if start_date or end_date:
                         filter_part.append(date_range)
                     priorities = filter_settings.get("priority")
                     for tag in filter_settings.get("tags", []):
                         tag_values = [v.lower() for v in tag["value"]]
                         key = "tags.%s.values" % tag["name"].replace(".", "_")
                         filter_part.append({"terms": {key: tag_values}})
                     if priorities:
                         filter_part.append(
                             {
                                 "has_parent": {
                                     "parent_type": "report_group",
                                     "query": {"terms": {"priority": priorities}},
                                 }
                             }
                         )
                     min_occurences = filter_settings.get("min_occurences")
                     if min_occurences:
                         filter_part.append(
                             {
                                 "has_parent": {
                                     "parent_type": "report_group",
                                     "query": {"range": {"occurences": {"gte": min_occurences[0]}}},
                                 }
                             }
                         )
                     min_duration = filter_settings.get("min_duration")
                     max_duration = filter_settings.get("max_duration")
                     request_ids = filter_settings.get("request_id")
                     if request_ids:
                         filter_part.append({"terms": {"request_id": request_ids}})
                     duration_range = {"range": {"average_duration": {}}}
                     if min_duration:
                         duration_range["range"]["average_duration"]["gte"] = min_duration[0]
                     if max_duration:
                         duration_range["range"]["average_duration"]["lte"] = max_duration[0]
                     if min_duration or max_duration:
                         filter_part.append(
                             {"has_parent": {"parent_type": "report_group", "query": duration_range}}
                         )
                     http_status = filter_settings.get("http_status")
                     report_type = filter_settings.get("report_type", [ReportType.error])
                     # set error report type if http status is not found
                     # and we are dealing with slow reports
                     if not http_status or ReportType.slow in report_type:
                         filter_part.append({"terms": {"report_type": report_type}})
                     if http_status:
                         filter_part.append({"terms": {"http_status": http_status}})
                     messages = filter_settings.get("message")
                     if messages:
                         condition = {"match": {"message": " ".join(messages)}}
                         query["query"]["bool"]["must"].append(condition)
                     errors = filter_settings.get("error")
                     if errors:
                         condition = {"match": {"error": " ".join(errors)}}
                         query["query"]["bool"]["must"].append(condition)
                     url_domains = filter_settings.get("url_domain")
                     if url_domains:
                         condition = {"terms": {"url_domain": url_domains}}
                         query["query"]["bool"]["must"].append(condition)
                     url_paths = filter_settings.get("url_path")
                     if url_paths:
                         condition = {"terms": {"url_path": url_paths}}
                         query["query"]["bool"]["must"].append(condition)
                     if filter_settings.get("report_status"):
                         for status in filter_settings.get("report_status"):
                             if status == "never_reviewed":
                                 filter_part.append(
                                     {
                                         "has_parent": {
                                             "parent_type": "report_group",
                                             "query": {"term": {"read": False}},
                                         }
                                     }
                                 )
                             elif status == "reviewed":
                                 filter_part.append(
                                     {
                                         "has_parent": {
                                             "parent_type": "report_group",
                                             "query": {"term": {"read": True}},
                                         }
                                     }
                                 )
                             elif status == "public":
                                 filter_part.append(
                                     {
                                         "has_parent": {
                                             "parent_type": "report_group",
                                             "query": {"term": {"public": True}},
                                         }
                                     }
                                 )
                             elif status == "fixed":
                                 filter_part.append(
                                     {
                                         "has_parent": {
                                             "parent_type": "report_group",
                                             "query": {"term": {"fixed": True}},
                                         }
                                     }
                                 )
                     # logging.getLogger('pyelasticsearch').setLevel(logging.DEBUG)
                     index_names = es_index_name_limiter(
                         filter_settings.get("start_date"),
                         filter_settings.get("end_date"),
                         ixtypes=["reports"],
                     )
                     if index_names:
                         results = Datastores.es.search(
                             body=query,
                             index=index_names,
                             doc_type=["report", "report_group"],
                             size=0,
                         )
                     else:
                         return []
                     return results["aggregations"]
                 @classmethod
                 def get_paginator_by_app_ids(
                     cls,
                     app_ids=None,
                     page=1,
                     item_count=None,
                     items_per_page=50,
                     order_by=None,
                     filter_settings=None,
                     exclude_columns=None,
                     db_session=None,
                 ):
                     if not filter_settings:
                         filter_settings = {}
                     results = cls.get_search_iterator(
                         app_ids, page, items_per_page, order_by, filter_settings
                     )
                     ordered_ids = []
                     if results:
                         for item in results["top_groups"]["buckets"]:
-                            pg_id = item["top_reports_hits"]["hits"]["hits"][0]["_source"]["report_id"]
+                            pg_id = item["top_reports_hits"]["hits"]["hits"][0]["_source"][
+                                "report_id"
+                            ]
                             ordered_ids.append(pg_id)
                     log.info(filter_settings)
                     paginator = paginate.Page(
                         ordered_ids, items_per_page=items_per_page, **filter_settings
                     )
                     sa_items = ()
                     if paginator.items:
                         db_session = get_db_session(db_session)
                         # latest report detail
                         query = db_session.query(Report)
                         query = query.options(sa.orm.joinedload(Report.report_group))
                         query = query.filter(Report.id.in_(paginator.items))
                         if filter_settings.get("order_col"):
                             order_col = filter_settings.get("order_col")
                             if filter_settings.get("order_dir") == "dsc":
                                 sort_on = "desc"
                             else:
                                 sort_on = "asc"
                             if order_col == "when":
                                 order_col = "last_timestamp"
                             query = query.order_by(
                                 getattr(sa, sort_on)(getattr(ReportGroup, order_col))
                             )
                         sa_items = query.all()
                     sorted_instance_list = []
                     for i_id in ordered_ids:
                         for report in sa_items:
                             if str(report.id) == i_id and report not in sorted_instance_list:
                                 sorted_instance_list.append(report)
                     paginator.sa_items = sorted_instance_list
                     return paginator
                 @classmethod
                 def by_app_ids(cls, app_ids=None, order_by=True, db_session=None):
                     db_session = get_db_session(db_session)
                     q = db_session.query(ReportGroup)
                     if app_ids:
                         q = q.filter(ReportGroup.resource_id.in_(app_ids))
                     if order_by:
                         q = q.order_by(sa.desc(ReportGroup.id))
                     return q
                 @classmethod
                 def by_id(cls, group_id, app_ids=None, db_session=None):
                     db_session = get_db_session(db_session)
                     q = db_session.query(ReportGroup).filter(ReportGroup.id == int(group_id))
                     if app_ids:
                         q = q.filter(ReportGroup.resource_id.in_(app_ids))
                     return q.first()
                 @classmethod
                 def by_ids(cls, group_ids=None, db_session=None):
                     db_session = get_db_session(db_session)
                     query = db_session.query(ReportGroup)
                     query = query.filter(ReportGroup.id.in_(group_ids))
                     return query
                 @classmethod
                 def by_hash_and_resource(
                     cls, resource_id, grouping_hash, since_when=None, db_session=None
                 ):
                     db_session = get_db_session(db_session)
                     q = db_session.query(ReportGroup)
                     q = q.filter(ReportGroup.resource_id == resource_id)
                     q = q.filter(ReportGroup.grouping_hash == grouping_hash)
                     q = q.filter(ReportGroup.fixed == False)
                     if since_when:
                         q = q.filter(ReportGroup.first_timestamp >= since_when)
                     return q.first()
                 @classmethod
                 def users_commenting(cls, report_group, exclude_user_id=None, db_session=None):
                     db_session = get_db_session(None, report_group)
                     query = db_session.query(User).distinct()
                     query = query.filter(User.id == ReportComment.owner_id)
                     query = query.filter(ReportComment.group_id == report_group.id)
                     if exclude_user_id:
                         query = query.filter(ReportComment.owner_id != exclude_user_id)
                     return query
                 @classmethod
                 def affected_users_count(cls, report_group, db_session=None):
                     db_session = get_db_session(db_session)
                     query = db_session.query(sa.func.count(Report.username))
                     query = query.filter(Report.group_id == report_group.id)
                     query = query.filter(Report.username != "")
                     query = query.filter(Report.username != None)
                     query = query.group_by(Report.username)
                     return query.count()
                 @classmethod
                 def top_affected_users(cls, report_group, db_session=None):
                     db_session = get_db_session(db_session)
                     count_label = sa.func.count(Report.username).label("count")
                     query = db_session.query(Report.username, count_label)
                     query = query.filter(Report.group_id == report_group.id)
                     query = query.filter(Report.username != None)
                     query = query.filter(Report.username != "")
                     query = query.group_by(Report.username)
                     query = query.order_by(sa.desc(count_label))
                     query = query.limit(50)
                     return query
                 @classmethod
                 def get_report_stats(cls, request, filter_settings):
                     """
                     Gets report dashboard graphs
                     Returns information for BAR charts with occurences/interval information
                     detailed means version that returns time intervals - non detailed
                     returns total sum
                     """
                     delta = filter_settings["end_date"] - filter_settings["start_date"]
                     if delta < h.time_deltas.get("12h")["delta"]:
                         interval = "1m"
                     elif delta <= h.time_deltas.get("3d")["delta"]:
                         interval = "5m"
                     elif delta >= h.time_deltas.get("2w")["delta"]:
                         interval = "24h"
                     else:
                         interval = "1h"
                     group_id = filter_settings.get("group_id")
                     es_query = {
                         "aggs": {
                             "parent_agg": {
                                 "aggs": {
                                     "types": {
                                         "aggs": {
-                                            "sub_agg": {"terms": {"field": "tags.type.values.keyword"}}
+                                            "sub_agg": {
+                                                "terms": {"field": "tags.type.values.keyword"}
+                                            }
                                         },
                                         "filter": {
                                             "bool": {
-                                            "filter": [{"exists": {"field": "tags.type.values"}}]
+                                                "filter": [
+                                                    {"exists": {"field": "tags.type.values"}}
+                                                ]
                                             }
                                         },
                                     }
                                 },
                                 "date_histogram": {
                                     "extended_bounds": {
                                         "max": filter_settings["end_date"],
                                         "min": filter_settings["start_date"],
                                     },
                                     "field": "timestamp",
                                     "interval": interval,
                                     "min_doc_count": 0,
                                 },
                             }
                         },
                         "query": {
                             "bool": {
                                 "filter": [
+                                    {"terms": {"resource_id": [filter_settings["resource"][0]]}},
-                                        "terms": {
-                                            "resource_id": [filter_settings["resource"][0]]
-                                    },
                                     {
                                         "range": {
                                             "timestamp": {
                                                 "gte": filter_settings["start_date"],
                                                 "lte": filter_settings["end_date"],
                                             }
                                         }
                                     },
                                 ]
                             }
                         },
                     }
                     if group_id:
                         parent_agg = es_query["aggs"]["parent_agg"]
                         filters = parent_agg["aggs"]["types"]["filter"]["bool"]["filter"]
                         filters.append({"terms": {"tags.group_id.values": [group_id]}})
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["reports"],
                     )
                     if not index_names:
                         return []
                     result = Datastores.es.search(
                         body=es_query, index=index_names, doc_type="log", size=0
                     )
                     series = []
                     for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                         point = {
                             "x": datetime.utcfromtimestamp(int(bucket["key"]) / 1000),
                             "report": 0,
                             "not_found": 0,
                             "slow_report": 0,
                         }
                         for subbucket in bucket["types"]["sub_agg"]["buckets"]:
                             if subbucket["key"] == "slow":
                                 point["slow_report"] = subbucket["doc_count"]
                             elif subbucket["key"] == "error":
                                 point["report"] = subbucket["doc_count"]
                             elif subbucket["key"] == "not_found":
                                 point["not_found"] = subbucket["doc_count"]
                         series.append(point)
                     return series

backend/src/appenlight/models/services/report_stat.py

0 +3 -1

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             from appenlight.models import Datastores
             from appenlight.models.services.base import BaseService
             from appenlight.lib.enums import ReportType
             from appenlight.lib.utils import es_index_name_limiter
             class ReportStatService(BaseService):
                 @classmethod
                 def count_by_type(cls, report_type, resource_id, since_when):
                     report_type = ReportType.key_from_value(report_type)
                     index_names = es_index_name_limiter(start_date=since_when, ixtypes=["reports"])
                     es_query = {
                         "aggs": {
                             "reports": {
                                 "aggs": {
-                                    "sub_agg": {"value_count": {"field": "tags.group_id.values.keyword"}}
+                                    "sub_agg": {
+                                        "value_count": {"field": "tags.group_id.values.keyword"}
+                                    }
                                 },
                                 "filter": {
                                     "bool": {
                                         "filter": [
                                             {"terms": {"resource_id": [resource_id]}},
                                             {"exists": {"field": "tags.group_id.values"}},
                                         ]
                                     }
                                 },
                             }
                         },
                         "query": {
                             "bool": {
                                 "filter": [
                                     {"terms": {"resource_id": [resource_id]}},
                                     {"terms": {"tags.type.values": [report_type]}},
                                     {"range": {"timestamp": {"gte": since_when}}},
                                 ]
                             }
                         },
                     }
                     if index_names:
                         result = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                         return result["aggregations"]["reports"]["sub_agg"]["value"]
                     else:
                         return 0

backend/src/appenlight/models/services/request_metric.py

0 +33 -22

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             from datetime import datetime
             import appenlight.lib.helpers as h
             from appenlight.models import get_db_session, Datastores
             from appenlight.models.services.base import BaseService
             from appenlight.lib.enums import ReportType
             from appenlight.lib.utils import es_index_name_limiter
             try:
                 from ae_uptime_ce.models.services.uptime_metric import UptimeMetricService
             except ImportError:
                 UptimeMetricService = None
             def check_key(key, stats, uptime, total_seconds):
                 if key not in stats:
                     stats[key] = {
                         "name": key,
                         "requests": 0,
                         "errors": 0,
                         "tolerated_requests": 0,
                         "frustrating_requests": 0,
                         "satisfying_requests": 0,
                         "total_minutes": total_seconds / 60.0,
                         "uptime": uptime,
                         "apdex": 0,
                         "rpm": 0,
                         "response_time": 0,
                         "avg_response_time": 0,
                     }
             class RequestMetricService(BaseService):
                 @classmethod
                 def get_metrics_stats(cls, request, filter_settings, db_session=None):
                     delta = filter_settings["end_date"] - filter_settings["start_date"]
                     if delta < h.time_deltas.get("12h")["delta"]:
                         interval = "1m"
                     elif delta <= h.time_deltas.get("3d")["delta"]:
                         interval = "5m"
                     elif delta >= h.time_deltas.get("2w")["delta"]:
                         interval = "24h"
                     else:
                         interval = "1h"
                     filter_settings["namespace"] = ["appenlight.request_metric"]
                     es_query = {
                         "aggs": {
                             "parent_agg": {
                                 "aggs": {
                                     "custom": {
                                         "aggs": {
                                             "sub_agg": {
                                                 "sum": {"field": "tags.custom.numeric_values"}
                                             }
                                         },
                                         "filter": {
                                             "exists": {"field": "tags.custom.numeric_values"}
                                         },
                                     },
                                     "main": {
                                         "aggs": {
                                             "sub_agg": {
                                                 "sum": {"field": "tags.main.numeric_values"}
                                             }
                                         },
                                         "filter": {"exists": {"field": "tags.main.numeric_values"}},
                                     },
                                     "nosql": {
                                         "aggs": {
                                             "sub_agg": {
                                                 "sum": {"field": "tags.nosql.numeric_values"}
                                             }
                                         },
                                         "filter": {
                                             "exists": {"field": "tags.nosql.numeric_values"}
                                         },
                                     },
                                     "remote": {
                                         "aggs": {
                                             "sub_agg": {
                                                 "sum": {"field": "tags.remote.numeric_values"}
                                             }
                                         },
                                         "filter": {
                                             "exists": {"field": "tags.remote.numeric_values"}
                                         },
                                     },
                                     "requests": {
                                         "aggs": {
                                             "sub_agg": {
                                                 "sum": {"field": "tags.requests.numeric_values"}
                                             }
                                         },
                                         "filter": {
                                             "exists": {"field": "tags.requests.numeric_values"}
                                         },
                                     },
                                     "sql": {
                                         "aggs": {
                                             "sub_agg": {"sum": {"field": "tags.sql.numeric_values"}}
                                         },
                                         "filter": {"exists": {"field": "tags.sql.numeric_values"}},
                                     },
                                     "tmpl": {
                                         "aggs": {
                                             "sub_agg": {
                                                 "sum": {"field": "tags.tmpl.numeric_values"}
                                             }
                                         },
                                         "filter": {"exists": {"field": "tags.tmpl.numeric_values"}},
                                     },
                                 },
                                 "date_histogram": {
                                     "extended_bounds": {
                                         "max": filter_settings["end_date"],
                                         "min": filter_settings["start_date"],
                                     },
                                     "field": "timestamp",
                                     "interval": interval,
                                     "min_doc_count": 0,
                                 },
                             }
                         },
                         "query": {
                             "bool": {
                                 "filter": [
+                                    {"terms": {"resource_id": [filter_settings["resource"][0]]}},
-                                        "terms": {
-                                            "resource_id": [filter_settings["resource"][0]]
-                                    },
                                     {
                                         "range": {
                                             "timestamp": {
                                                 "gte": filter_settings["start_date"],
                                                 "lte": filter_settings["end_date"],
                                             }
                                         }
                                     },
                                     {"terms": {"namespace": ["appenlight.request_metric"]}},
                                 ]
                             }
                         },
                     }
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["metrics"],
                     )
                     if not index_names:
                         return []
                     result = Datastores.es.search(
                         body=es_query, index=index_names, doc_type="log", size=0
                     )
                     plot_data = []
                     for item in result["aggregations"]["parent_agg"]["buckets"]:
                         x_time = datetime.utcfromtimestamp(int(item["key"]) / 1000)
                         point = {"x": x_time}
                         for key in ["custom", "main", "nosql", "remote", "requests", "sql", "tmpl"]:
                             value = item[key]["sub_agg"]["value"]
                             point[key] = round(value, 3) if value else 0
                         plot_data.append(point)
                     return plot_data
                 @classmethod
                 def get_requests_breakdown(cls, request, filter_settings, db_session=None):
                     db_session = get_db_session(db_session)
                     # fetch total time of all requests in this time range
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["metrics"],
                     )
                     if index_names and filter_settings["resource"]:
                         es_query = {
                             "aggs": {
                                 "main": {
                                     "aggs": {
                                         "sub_agg": {"sum": {"field": "tags.main.numeric_values"}}
                                     },
                                     "filter": {"exists": {"field": "tags.main.numeric_values"}},
                                 }
                             },
                             "query": {
                                 "bool": {
                                     "filter": [
                                         {
                                             "terms": {
                                                 "resource_id": [filter_settings["resource"][0]]
                                             }
                                         },
                                         {
                                             "range": {
                                                 "timestamp": {
                                                     "gte": filter_settings["start_date"],
                                                     "lte": filter_settings["end_date"],
                                                 }
                                             }
                                         },
                                         {"terms": {"namespace": ["appenlight.request_metric"]}},
                                     ]
                                 }
                             },
                         }
                         result = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                         total_time_spent = result["aggregations"]["main"]["sub_agg"]["value"]
                     else:
                         total_time_spent = 0
                     script_text = "doc['tags.main.numeric_values'].value / {}".format(
                         total_time_spent
                     )
                     if total_time_spent == 0:
-                        script_text = '0'
+                        script_text = "0"
                     if index_names and filter_settings["resource"]:
                         es_query = {
                             "aggs": {
                                 "parent_agg": {
                                     "aggs": {
                                         "main": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.main.numeric_values"}
                                                 }
                                             },
                                             "filter": {
                                                 "exists": {"field": "tags.main.numeric_values"}
                                             },
                                         },
                                         "percentage": {
-                                            "aggs": {
+                                            "aggs": {"sub_agg": {"sum": {"script": script_text}}},
-                                                "sub_agg": {
-                                                    "sum": {
-                                                        "script": script_text,
-                                            },
                                             "filter": {
                                                 "exists": {"field": "tags.main.numeric_values"}
                                             },
                                         },
                                         "requests": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.requests.numeric_values"}
                                                 }
                                             },
                                             "filter": {
                                                 "exists": {"field": "tags.requests.numeric_values"}
                                             },
                                         },
                                     },
                                     "terms": {
                                         "field": "tags.view_name.values.keyword",
                                         "order": {"percentage>sub_agg": "desc"},
                                         "size": 15,
                                     },
                                 }
                             },
                             "query": {
                                 "bool": {
                                     "filter": [
                                         {
                                             "terms": {
                                                 "resource_id": [filter_settings["resource"][0]]
                                             }
                                         },
                                         {
                                             "range": {
                                                 "timestamp": {
                                                     "gte": filter_settings["start_date"],
                                                     "lte": filter_settings["end_date"],
                                                 }
                                             }
                                         },
                                     ]
                                 }
                             },
                         }
                         result = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                         series = result["aggregations"]["parent_agg"]["buckets"]
                     else:
                         series = []
                     and_part = [
                         {"term": {"resource_id": filter_settings["resource"][0]}},
                         {"terms": {"tags.view_name.values": [row["key"] for row in series]}},
                         {"term": {"report_type": str(ReportType.slow)}},
                     ]
                     query = {
                         "aggs": {
                             "top_reports": {
-                                "terms": {"field": "tags.view_name.values.keyword", "size": len(series)},
+                                "terms": {
+                                    "field": "tags.view_name.values.keyword",
+                                    "size": len(series),
+                                },
                                 "aggs": {
                                     "top_calls_hits": {
                                         "top_hits": {"sort": {"start_time": "desc"}, "size": 5}
                                     }
                                 },
                             }
                         },
                         "query": {"bool": {"filter": and_part}},
                     }
                     details = {}
                     index_names = es_index_name_limiter(ixtypes=["reports"])
                     if index_names and series:
                         result = Datastores.es.search(
                             body=query, doc_type="report", size=0, index=index_names
                         )
                         for bucket in result["aggregations"]["top_reports"]["buckets"]:
                             details[bucket["key"]] = []
                             for hit in bucket["top_calls_hits"]["hits"]["hits"]:
                                 details[bucket["key"]].append(
                                     {
                                         "report_id": hit["_source"]["request_metric_id"],
                                         "group_id": hit["_source"]["group_id"],
                                     }
                                 )
                     results = []
                     for row in series:
                         result = {
                             "key": row["key"],
                             "main": row["main"]["sub_agg"]["value"],
                             "requests": row["requests"]["sub_agg"]["value"],
                         }
                         # es can return 'infinity'
                         try:
                             result["percentage"] = float(row["percentage"]["sub_agg"]["value"])
                         except ValueError:
                             result["percentage"] = 0
                         result["latest_details"] = details.get(row["key"]) or []
                         results.append(result)
                     return results
                 @classmethod
                 def get_apdex_stats(cls, request, filter_settings, threshold=1, db_session=None):
                     """
                     Returns information and calculates APDEX score per server for dashboard
                     server information (upper right stats boxes)
                     """
                     # Apdex t = (Satisfied Count + Tolerated Count / 2) / Total Samples
                     db_session = get_db_session(db_session)
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["metrics"],
                     )
                     requests_series = []
                     if index_names and filter_settings["resource"]:
                         es_query = {
                             "aggs": {
                                 "parent_agg": {
                                     "aggs": {
                                         "frustrating": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.requests.numeric_values"}
                                                 }
                                             },
                                             "filter": {
                                                 "bool": {
                                                     "filter": [
                                                         {
                                                             "range": {
-                                                                "tags.main.numeric_values": {"gte": "4"}
+                                                                "tags.main.numeric_values": {
+                                                                    "gte": "4"
+                                                                }
                                                             }
                                                         },
                                                         {
                                                             "exists": {
                                                                 "field": "tags.requests.numeric_values"
                                                             }
                                                         },
                                                     ]
                                                 }
                                             },
                                         },
                                         "main": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.main.numeric_values"}
                                                 }
                                             },
                                             "filter": {
                                                 "exists": {"field": "tags.main.numeric_values"}
                                             },
                                         },
                                         "requests": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.requests.numeric_values"}
                                                 }
                                             },
                                             "filter": {
                                                 "exists": {"field": "tags.requests.numeric_values"}
                                             },
                                         },
                                         "tolerated": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.requests.numeric_values"}
                                                 }
                                             },
                                             "filter": {
-                                                "bool": {"filter": [
+                                                "bool": {
+                                                    "filter": [
                                                         {
                                                             "range": {
-                                                            "tags.main.numeric_values": {"gte": "1"}
+                                                                "tags.main.numeric_values": {
+                                                                    "gte": "1"
+                                                                }
                                                             }
                                                         },
                                                         {
                                                             "range": {
-                                                            "tags.main.numeric_values": {"lt": "4"}
+                                                                "tags.main.numeric_values": {
+                                                                    "lt": "4"
+                                                                }
                                                             }
                                                         },
                                                         {
                                                             "exists": {
                                                                 "field": "tags.requests.numeric_values"
                                                             }
                                                         },
-                                                ]}
+                                                }
+                                            },
                                         },
                                     },
+                                    "terms": {
+                                        "field": "tags.server_name.values.keyword",
+                                        "size": 999999,
                                     },
-                                    "terms": {"field": "tags.server_name.values.keyword", "size": 999999},
                                 }
                             },
                             "query": {
                                 "bool": {
                                     "filter": [
                                         {
                                             "terms": {
                                                 "resource_id": [filter_settings["resource"][0]]
                                             }
                                         },
                                         {
                                             "range": {
                                                 "timestamp": {
                                                     "gte": filter_settings["start_date"],
                                                     "lte": filter_settings["end_date"],
                                                 }
                                             }
                                         },
                                         {"terms": {"namespace": ["appenlight.request_metric"]}},
                                     ]
                                 }
                             },
                         }
                         result = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                         for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                             requests_series.append(
                                 {
                                     "frustrating": bucket["frustrating"]["sub_agg"]["value"],
                                     "main": bucket["main"]["sub_agg"]["value"],
                                     "requests": bucket["requests"]["sub_agg"]["value"],
                                     "tolerated": bucket["tolerated"]["sub_agg"]["value"],
                                     "key": bucket["key"],
                                 }
                             )
                     since_when = filter_settings["start_date"]
                     until = filter_settings["end_date"]
                     # total errors
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["reports"],
                     )
                     report_series = []
                     if index_names and filter_settings["resource"]:
                         report_type = ReportType.key_from_value(ReportType.error)
                         es_query = {
                             "aggs": {
                                 "parent_agg": {
                                     "aggs": {
                                         "errors": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {
                                                         "field": "tags.occurences.numeric_values"
                                                     }
                                                 }
                                             },
                                             "filter": {
                                                 "bool": {
                                                     "filter": [
-                                                        {"terms": {"tags.type.values": [report_type]}},
+                                                            "terms": {
+                                                                "tags.type.values": [report_type]
+                                                            }
+                                                        },
                                                         {
                                                             "exists": {
                                                                 "field": "tags.occurences.numeric_values"
                                                             }
                                                         },
                                                     ]
                                                 }
                                             },
                                         }
                                     },
-                                    "terms": {"field": "tags.server_name.values.keyword", "size": 999999},
+                                    "terms": {
+                                        "field": "tags.server_name.values.keyword",
+                                        "size": 999999,
+                                    },
                                 }
                             },
                             "query": {
                                 "bool": {
                                     "filter": [
                                         {
                                             "terms": {
                                                 "resource_id": [filter_settings["resource"][0]]
                                             }
                                         },
                                         {
                                             "range": {
                                                 "timestamp": {
                                                     "gte": filter_settings["start_date"],
                                                     "lte": filter_settings["end_date"],
                                                 }
                                             }
                                         },
                                         {"terms": {"namespace": ["appenlight.error"]}},
                                     ]
                                 }
                             },
                         }
                         result = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                         for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                             report_series.append(
                                 {
                                     "key": bucket["key"],
                                     "errors": bucket["errors"]["sub_agg"]["value"],
                                 }
                             )
                     stats = {}
                     if UptimeMetricService is not None:
                         uptime = UptimeMetricService.get_uptime_by_app(
                             filter_settings["resource"][0], since_when=since_when, until=until
                         )
                     else:
                         uptime = 0
                     total_seconds = (until - since_when).total_seconds()
                     for stat in requests_series:
                         check_key(stat["key"], stats, uptime, total_seconds)
                         stats[stat["key"]]["requests"] = int(stat["requests"])
                         stats[stat["key"]]["response_time"] = stat["main"]
                         stats[stat["key"]]["tolerated_requests"] = stat["tolerated"]
                         stats[stat["key"]]["frustrating_requests"] = stat["frustrating"]
                     for server in report_series:
                         check_key(server["key"], stats, uptime, total_seconds)
                         stats[server["key"]]["errors"] = server["errors"]
                     server_stats = list(stats.values())
                     for stat in server_stats:
                         stat["satisfying_requests"] = (
                             stat["requests"]
                             - stat["errors"]
                             - stat["frustrating_requests"]
                             - stat["tolerated_requests"]
                         )
                         if stat["satisfying_requests"] < 0:
                             stat["satisfying_requests"] = 0
                         if stat["requests"]:
                             stat["avg_response_time"] = round(
                                 stat["response_time"] / stat["requests"], 3
                             )
                             qual_requests = (
                                 stat["satisfying_requests"] + stat["tolerated_requests"] / 2.0
                             )
                             stat["apdex"] = round((qual_requests / stat["requests"]) * 100, 2)
                             stat["rpm"] = round(stat["requests"] / stat["total_minutes"], 2)
                     return sorted(server_stats, key=lambda x: x["name"])

backend/src/appenlight/models/services/slow_call.py

0 +5 -6

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             from appenlight.models import get_db_session, Datastores
             from appenlight.models.report import Report
             from appenlight.models.services.base import BaseService
             from appenlight.lib.utils import es_index_name_limiter
             class SlowCallService(BaseService):
                 @classmethod
                 def get_time_consuming_calls(cls, request, filter_settings, db_session=None):
                     db_session = get_db_session(db_session)
                     # get slow calls from older partitions too
                     index_names = es_index_name_limiter(
                         start_date=filter_settings["start_date"],
                         end_date=filter_settings["end_date"],
                         ixtypes=["slow_calls"],
                     )
                     if index_names and filter_settings["resource"]:
                         # get longest time taking hashes
                         es_query = {
                             "aggs": {
                                 "parent_agg": {
                                     "aggs": {
                                         "duration": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "sum": {"field": "tags.duration.numeric_values"}
                                                 }
                                             },
                                             "filter": {
                                                 "exists": {"field": "tags.duration.numeric_values"}
                                             },
                                         },
                                         "total": {
                                             "aggs": {
                                                 "sub_agg": {
                                                     "value_count": {
                                                         "field": "tags.statement_hash.values.keyword"
                                                     }
                                                 }
                                             },
                                             "filter": {
                                                 "exists": {"field": "tags.statement_hash.values"}
                                             },
                                         },
                                     },
                                     "terms": {
                                         "field": "tags.statement_hash.values.keyword",
                                         "order": {"duration>sub_agg": "desc"},
                                         "size": 15,
                                     },
                                 }
                             },
                             "query": {
                                 "bool": {
                                     "filter": [
                                         {
                                             "terms": {
                                                 "resource_id": [filter_settings["resource"][0]]
                                             }
                                         },
                                         {
                                             "range": {
                                                 "timestamp": {
                                                     "gte": filter_settings["start_date"],
                                                     "lte": filter_settings["end_date"],
                                                 }
                                             }
                                         },
                                     ]
                                 }
                             },
                         }
                         result = Datastores.es.search(
                             body=es_query, index=index_names, doc_type="log", size=0
                         )
                         results = result["aggregations"]["parent_agg"]["buckets"]
                     else:
                         return []
                     hashes = [i["key"] for i in results]
                     # get queries associated with hashes
                     calls_query = {
                         "aggs": {
                             "top_calls": {
-                                "terms": {"field": "tags.statement_hash.values.keyword", "size": 15},
+                                "terms": {
+                                    "field": "tags.statement_hash.values.keyword",
+                                    "size": 15,
+                                },
                                 "aggs": {
                                     "top_calls_hits": {
                                         "top_hits": {"sort": {"timestamp": "desc"}, "size": 5}
                                     }
                                 },
                             }
                         },
                         "query": {
                             "bool": {
                                 "filter": [
+                                    {"terms": {"resource_id": [filter_settings["resource"][0]]}},
-                                        "terms": {
-                                            "resource_id": [filter_settings["resource"][0]]
-                                    },
                                     {"terms": {"tags.statement_hash.values": hashes}},
                                     {
                                         "range": {
                                             "timestamp": {
                                                 "gte": filter_settings["start_date"],
                                                 "lte": filter_settings["end_date"],
                                             }
                                         }
                                     },
                                 ]
                             }
                         },
                     }
                     calls = Datastores.es.search(
                         body=calls_query, index=index_names, doc_type="log", size=0
                     )
                     call_results = {}
                     report_ids = []
                     for call in calls["aggregations"]["top_calls"]["buckets"]:
                         hits = call["top_calls_hits"]["hits"]["hits"]
                         call_results[call["key"]] = [i["_source"] for i in hits]
                         report_ids.extend(
                             [i["_source"]["tags"]["report_id"]["values"] for i in hits]
                         )
                     if report_ids:
                         r_query = db_session.query(Report.group_id, Report.id)
                         r_query = r_query.filter(Report.id.in_(report_ids))
                         r_query = r_query.filter(Report.start_time >= filter_settings["start_date"])
                     else:
                         r_query = []
                     reports_reversed = {}
                     for report in r_query:
                         reports_reversed[report.id] = report.group_id
                     final_results = []
                     for item in results:
                         if item["key"] not in call_results:
                             continue
                         call = call_results[item["key"]][0]
                         row = {
                             "occurences": item["total"]["sub_agg"]["value"],
                             "total_duration": round(item["duration"]["sub_agg"]["value"]),
                             "statement": call["message"],
                             "statement_type": call["tags"]["type"]["values"],
                             "statement_subtype": call["tags"]["subtype"]["values"],
                             "statement_hash": item["key"],
                             "latest_details": [],
                         }
                         if row["statement_type"] in ["tmpl", " remote"]:
                             params = (
                                 call["tags"]["parameters"]["values"]
                                 if "parameters" in call["tags"]
                                 else ""
                             )
                             row["statement"] = "{} ({})".format(call["message"], params)
                         for call in call_results[item["key"]]:
                             report_id = call["tags"]["report_id"]["values"]
                             group_id = reports_reversed.get(report_id)
                             if group_id:
                                 row["latest_details"].append(
                                     {"group_id": group_id, "report_id": report_id}
                                 )
                         final_results.append(row)
                     return final_results

backend/src/appenlight/scripts/reindex_elasticsearch.py

0 +33 -46

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             import argparse
             import datetime
             import logging
             import copy
             import sqlalchemy as sa
             import elasticsearch.exceptions
             import elasticsearch.helpers
             from collections import defaultdict
             from pyramid.paster import setup_logging
             from pyramid.paster import bootstrap
             from appenlight.models import DBSession, Datastores, metadata
             from appenlight.lib import get_callable
             from appenlight.models.report_group import ReportGroup
             from appenlight.models.report import Report
             from appenlight.models.report_stat import ReportStat
             from appenlight.models.log import Log
             from appenlight.models.slow_call import SlowCall
             from appenlight.models.metric import Metric
             log = logging.getLogger(__name__)
             tables = {
                 "slow_calls_p_": [],
                 "reports_stats_p_": [],
                 "reports_p_": [],
                 "reports_groups_p_": [],
                 "logs_p_": [],
                 "metrics_p_": [],
             }
             def detect_tables(table_prefix):
                 found_tables = []
                 db_tables_query = """
                 SELECT tablename FROM pg_tables WHERE tablename NOT LIKE 'pg_%' AND
                 tablename NOT LIKE 'sql_%' ORDER BY tablename ASC;"""
                 for table in DBSession.execute(db_tables_query).fetchall():
                     tablename = table.tablename
                     if tablename.startswith(table_prefix):
                         t = sa.Table(
                             tablename, metadata, autoload=True, autoload_with=DBSession.bind.engine
                         )
                         found_tables.append(t)
                 return found_tables
             def main():
                 """
                 Recreates Elasticsearch indexes
                 Performs reindex of whole db to Elasticsearch
                 """
                 # need parser twice because we first need to load ini file
                 # bootstrap pyramid and then load plugins
                 pre_parser = argparse.ArgumentParser(
                     description="Reindex AppEnlight data", add_help=False
                 )
                 pre_parser.add_argument(
                     "-c", "--config", required=True, help="Configuration ini file of application"
                 )
                 pre_parser.add_argument("-h", "--help", help="Show help", nargs="?")
                 pre_parser.add_argument(
                     "-t", "--types", nargs="+", help="Which parts of database should get reindexed"
                 )
                 args = pre_parser.parse_args()
                 config_uri = args.config
                 setup_logging(config_uri)
                 log.setLevel(logging.INFO)
                 env = bootstrap(config_uri)
                 parser = argparse.ArgumentParser(description="Reindex AppEnlight data")
                 choices = {
                     "reports": "appenlight.scripts.reindex_elasticsearch:reindex_reports",
                     "logs": "appenlight.scripts.reindex_elasticsearch:reindex_logs",
                     "metrics": "appenlight.scripts.reindex_elasticsearch:reindex_metrics",
                     "slow_calls": "appenlight.scripts.reindex_elasticsearch:reindex_slow_calls",
                     "template": "appenlight.scripts.reindex_elasticsearch:update_template",
                 }
                 for k, v in env["registry"].appenlight_plugins.items():
                     if v.get("fulltext_indexer"):
                         choices[k] = v["fulltext_indexer"]
                 parser.add_argument(
                     "-t",
                     "--types",
                     nargs="*",
                     choices=["all"] + list(choices.keys()),
                     default=[],
                     help="Which parts of database should get reindexed",
                 )
                 parser.add_argument(
                     "-c", "--config", required=True, help="Configuration ini file of application"
                 )
                 args = parser.parse_args()
                 if "all" in args.types:
                     args.types = list(choices.keys())
                 print("Selected types to reindex: {}".format(args.types))
                 log.info("settings {}".format(args.types))
                 if "template" in args.types:
                     get_callable(choices["template"])()
                     args.types.remove("template")
                 for selected in args.types:
                     get_callable(choices[selected])()
             def update_template():
                 try:
                     Datastores.es.indices.delete_template("rcae_reports")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 try:
                     Datastores.es.indices.delete_template("rcae_logs")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 try:
                     Datastores.es.indices.delete_template("rcae_slow_calls")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 try:
                     Datastores.es.indices.delete_template("rcae_metrics")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 log.info("updating elasticsearch template")
                 tag_templates = [
                     {
                         "values": {
                             "path_match": "tags.*",
                             "mapping": {
                                 "type": "object",
                                 "properties": {
-                                    "values": {"type": "text", "analyzer": "tag_value",
+                                    "values": {
+                                        "type": "text",
+                                        "analyzer": "tag_value",
                                         "fields": {
-                                                   "keyword": {
+                                            "keyword": {"type": "keyword", "ignore_above": 256}
-                                                       "type": "keyword",
+                                        },
-                                                       "ignore_above": 256
+                                    },
-                                               }},
                                     "numeric_values": {"type": "float"},
                                 },
                             },
                         }
                     }
                 ]
                 shared_analysis = {
                     "analyzer": {
                         "url_path": {
                             "type": "custom",
                             "char_filter": [],
                             "tokenizer": "path_hierarchy",
                             "filter": [],
                         },
                         "tag_value": {
                             "type": "custom",
                             "char_filter": [],
                             "tokenizer": "keyword",
                             "filter": ["lowercase"],
                         },
                     }
                 }
                 shared_log_mapping = {
                     "_all": {"enabled": False},
                     "dynamic_templates": tag_templates,
                     "properties": {
                         "pg_id": {"type": "keyword", "index": True},
                         "delete_hash": {"type": "keyword", "index": True},
                         "resource_id": {"type": "integer"},
                         "timestamp": {"type": "date"},
                         "permanent": {"type": "boolean"},
                         "request_id": {"type": "keyword", "index": True},
                         "log_level": {"type": "text", "analyzer": "simple"},
                         "message": {"type": "text", "analyzer": "simple"},
                         "namespace": {
                             "type": "text",
                             "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                         },
                         "tags": {"type": "object"},
-                        "tag_list": {"type": "text", "analyzer": "tag_value",
+                        "tag_list": {
-                                     "fields": {
+                            "type": "text",
-                                         "keyword": {
+                            "analyzer": "tag_value",
-                                             "type": "keyword",
+                            "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
-                                             "ignore_above": 256
+                        },
-                                     }},
                     },
                 }
                 report_schema = {
                     "template": "rcae_r_*",
                     "settings": {
                         "index": {
                             "refresh_interval": "5s",
-                            "translog": {"sync_interval": "5s", "durability": "async"}
+                            "translog": {"sync_interval": "5s", "durability": "async"},
                         },
                         "number_of_shards": 5,
                         "analysis": shared_analysis,
                     },
                     "mappings": {
                         "report": {
                             "_all": {"enabled": False},
                             "dynamic_templates": tag_templates,
                             "properties": {
                                 "type": {"type": "keyword", "index": True},
                                 # report group
                                 "group_id": {"type": "keyword", "index": True},
                                 "resource_id": {"type": "integer"},
                                 "priority": {"type": "integer"},
                                 "error": {"type": "text", "analyzer": "simple"},
                                 "read": {"type": "boolean"},
                                 "occurences": {"type": "integer"},
                                 "fixed": {"type": "boolean"},
                                 "first_timestamp": {"type": "date"},
                                 "last_timestamp": {"type": "date"},
                                 "average_duration": {"type": "float"},
                                 "summed_duration": {"type": "float"},
                                 "public": {"type": "boolean"},
                                 # report
                                 "report_id": {"type": "keyword", "index": True},
                                 "http_status": {"type": "integer"},
                                 "ip": {"type": "keyword", "index": True},
                                 "url_domain": {"type": "text", "analyzer": "simple"},
                                 "url_path": {"type": "text", "analyzer": "url_path"},
                                 "report_type": {"type": "integer"},
                                 "start_time": {"type": "date"},
                                 "request_id": {"type": "keyword", "index": True},
                                 "end_time": {"type": "date"},
                                 "duration": {"type": "float"},
                                 "tags": {"type": "object"},
-                                "tag_list": {"type": "text", "analyzer": "tag_value",
+                                "tag_list": {
-                                             "fields": {
+                                    "type": "text",
-                                                 "keyword": {
+                                    "analyzer": "tag_value",
-                                                     "type": "keyword",
+                                    "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
-                                                     "ignore_above": 256
+                                },
-                                             }},
                                 "extra": {"type": "object"},
                                 # report stats
                                 "report_stat_id": {"type": "keyword", "index": True},
                                 "timestamp": {"type": "date"},
                                 "permanent": {"type": "boolean"},
                                 "log_level": {"type": "text", "analyzer": "simple"},
                                 "message": {"type": "text", "analyzer": "simple"},
                                 "namespace": {
                                     "type": "text",
                                     "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                                 },
                                 "join_field": {
                                     "type": "join",
-                                    "relations": {
+                                    "relations": {"report_group": ["report", "report_stat"]},
-                                        "report_group": ["report", "report_stat"]
+                                },
                             },
                         }
+                    },
                 }
                 Datastores.es.indices.put_template("rcae_reports", body=report_schema)
                 logs_mapping = copy.deepcopy(shared_log_mapping)
                 logs_mapping["properties"]["log_id"] = logs_mapping["properties"]["pg_id"]
                 del logs_mapping["properties"]["pg_id"]
                 log_template = {
                     "template": "rcae_l_*",
                     "settings": {
                         "index": {
                             "refresh_interval": "5s",
                             "translog": {"sync_interval": "5s", "durability": "async"},
                         },
                         "number_of_shards": 5,
                         "analysis": shared_analysis,
                     },
-                    "mappings": {
+                    "mappings": {"log": logs_mapping},
-                        "log": logs_mapping,
-                    },
                 }
                 Datastores.es.indices.put_template("rcae_logs", body=log_template)
                 slow_call_mapping = copy.deepcopy(shared_log_mapping)
-                slow_call_mapping["properties"]["slow_call_id"] = slow_call_mapping["properties"]["pg_id"]
+                slow_call_mapping["properties"]["slow_call_id"] = slow_call_mapping["properties"][
+                    "pg_id"
+                ]
                 del slow_call_mapping["properties"]["pg_id"]
                 slow_call_template = {
                     "template": "rcae_sc_*",
                     "settings": {
                         "index": {
                             "refresh_interval": "5s",
                             "translog": {"sync_interval": "5s", "durability": "async"},
                         },
                         "number_of_shards": 5,
                         "analysis": shared_analysis,
                     },
-                    "mappings": {
+                    "mappings": {"log": slow_call_mapping},
-                        "log": slow_call_mapping,
-                    },
                 }
                 Datastores.es.indices.put_template("rcae_slow_calls", body=slow_call_template)
                 metric_mapping = copy.deepcopy(shared_log_mapping)
                 metric_mapping["properties"]["metric_id"] = metric_mapping["properties"]["pg_id"]
                 del metric_mapping["properties"]["pg_id"]
                 metrics_template = {
                     "template": "rcae_m_*",
                     "settings": {
                         "index": {
                             "refresh_interval": "5s",
                             "translog": {"sync_interval": "5s", "durability": "async"},
                         },
                         "number_of_shards": 5,
                         "analysis": shared_analysis,
                     },
-                    "mappings": {
+                    "mappings": {"log": metric_mapping},
-                        "log": metric_mapping,
-                    },
                 }
                 Datastores.es.indices.put_template("rcae_metrics", body=metrics_template)
                 uptime_metric_mapping = copy.deepcopy(shared_log_mapping)
-                uptime_metric_mapping["properties"]["uptime_id"] = uptime_metric_mapping["properties"]["pg_id"]
+                uptime_metric_mapping["properties"]["uptime_id"] = uptime_metric_mapping[
+                    "properties"
+                ]["pg_id"]
                 del uptime_metric_mapping["properties"]["pg_id"]
                 uptime_metrics_template = {
                     "template": "rcae_uptime_ce_*",
                     "settings": {
                         "index": {
                             "refresh_interval": "5s",
                             "translog": {"sync_interval": "5s", "durability": "async"},
                         },
                         "number_of_shards": 5,
                         "analysis": shared_analysis,
                     },
-                    "mappings": {
+                    "mappings": {"log": shared_log_mapping},
-                        "log": shared_log_mapping,
-                    },
                 }
-                Datastores.es.indices.put_template("rcae_uptime_metrics", body=uptime_metrics_template)
+                Datastores.es.indices.put_template(
+                    "rcae_uptime_metrics", body=uptime_metrics_template
+                )
             def reindex_reports():
                 reports_groups_tables = detect_tables("reports_groups_p_")
                 try:
                     Datastores.es.indices.delete("`rcae_r_*")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 log.info("reindexing report groups")
                 i = 0
                 task_start = datetime.datetime.now()
                 for partition_table in reports_groups_tables:
                     conn = DBSession.connection().execution_options(stream_results=True)
                     result = conn.execute(partition_table.select())
                     while True:
                         chunk = result.fetchmany(2000)
                         if not chunk:
                             break
                         es_docs = defaultdict(list)
                         for row in chunk:
                             i += 1
                             item = ReportGroup(**dict(list(row.items())))
                             d_range = item.partition_id
                             es_docs[d_range].append(item.es_doc())
                         if es_docs:
                             name = partition_table.name
                             log.info("round {}, {}".format(i, name))
                             for k, v in es_docs.items():
                                 to_update = {"_index": k, "_type": "report"}
                                 [i.update(to_update) for i in v]
                                 elasticsearch.helpers.bulk(Datastores.es, v)
                 log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
                 i = 0
                 log.info("reindexing reports")
                 task_start = datetime.datetime.now()
                 reports_tables = detect_tables("reports_p_")
                 for partition_table in reports_tables:
                     conn = DBSession.connection().execution_options(stream_results=True)
                     result = conn.execute(partition_table.select())
                     while True:
                         chunk = result.fetchmany(2000)
                         if not chunk:
                             break
                         es_docs = defaultdict(list)
                         for row in chunk:
                             i += 1
                             item = Report(**dict(list(row.items())))
                             d_range = item.partition_id
                             es_docs[d_range].append(item.es_doc())
                         if es_docs:
                             name = partition_table.name
                             log.info("round {}, {}".format(i, name))
                             for k, v in es_docs.items():
                                 to_update = {"_index": k, "_type": "report"}
                                 [i.update(to_update) for i in v]
                                 elasticsearch.helpers.bulk(Datastores.es, v)
                 log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
                 log.info("reindexing reports stats")
                 i = 0
                 task_start = datetime.datetime.now()
                 reports_stats_tables = detect_tables("reports_stats_p_")
                 for partition_table in reports_stats_tables:
                     conn = DBSession.connection().execution_options(stream_results=True)
                     result = conn.execute(partition_table.select())
                     while True:
                         chunk = result.fetchmany(2000)
                         if not chunk:
                             break
                         es_docs = defaultdict(list)
                         for row in chunk:
                             rd = dict(list(row.items()))
                             # remove legacy columns
                             # TODO: remove the column later
                             rd.pop("size", None)
                             item = ReportStat(**rd)
                             i += 1
                             d_range = item.partition_id
                             es_docs[d_range].append(item.es_doc())
                         if es_docs:
                             name = partition_table.name
                             log.info("round  {}, {}".format(i, name))
                             for k, v in es_docs.items():
                                 to_update = {"_index": k, "_type": "report"}
                                 [i.update(to_update) for i in v]
                                 elasticsearch.helpers.bulk(Datastores.es, v)
                 log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
             def reindex_logs():
                 try:
                     Datastores.es.indices.delete("rcae_l_*")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 # logs
                 log.info("reindexing logs")
                 i = 0
                 task_start = datetime.datetime.now()
                 log_tables = detect_tables("logs_p_")
                 for partition_table in log_tables:
                     conn = DBSession.connection().execution_options(stream_results=True)
                     result = conn.execute(partition_table.select())
                     while True:
                         chunk = result.fetchmany(2000)
                         if not chunk:
                             break
                         es_docs = defaultdict(list)
                         for row in chunk:
                             i += 1
                             item = Log(**dict(list(row.items())))
                             d_range = item.partition_id
                             es_docs[d_range].append(item.es_doc())
                         if es_docs:
                             name = partition_table.name
                             log.info("round  {}, {}".format(i, name))
                             for k, v in es_docs.items():
                                 to_update = {"_index": k, "_type": "log"}
                                 [i.update(to_update) for i in v]
                                 elasticsearch.helpers.bulk(Datastores.es, v)
                 log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
             def reindex_metrics():
                 try:
                     Datastores.es.indices.delete("rcae_m_*")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 log.info("reindexing applications metrics")
                 i = 0
                 task_start = datetime.datetime.now()
                 metric_tables = detect_tables("metrics_p_")
                 for partition_table in metric_tables:
                     conn = DBSession.connection().execution_options(stream_results=True)
                     result = conn.execute(partition_table.select())
                     while True:
                         chunk = result.fetchmany(2000)
                         if not chunk:
                             break
                         es_docs = defaultdict(list)
                         for row in chunk:
                             i += 1
                             item = Metric(**dict(list(row.items())))
                             d_range = item.partition_id
                             es_docs[d_range].append(item.es_doc())
                         if es_docs:
                             name = partition_table.name
                             log.info("round  {}, {}".format(i, name))
                             for k, v in es_docs.items():
                                 to_update = {"_index": k, "_type": "log"}
                                 [i.update(to_update) for i in v]
                                 elasticsearch.helpers.bulk(Datastores.es, v)
                 log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
             def reindex_slow_calls():
                 try:
                     Datastores.es.indices.delete("rcae_sc_*")
                 except elasticsearch.exceptions.NotFoundError as e:
                     log.error(e)
                 log.info("reindexing slow calls")
                 i = 0
                 task_start = datetime.datetime.now()
                 slow_calls_tables = detect_tables("slow_calls_p_")
                 for partition_table in slow_calls_tables:
                     conn = DBSession.connection().execution_options(stream_results=True)
                     result = conn.execute(partition_table.select())
                     while True:
                         chunk = result.fetchmany(2000)
                         if not chunk:
                             break
                         es_docs = defaultdict(list)
                         for row in chunk:
                             i += 1
                             item = SlowCall(**dict(list(row.items())))
                             d_range = item.partition_id
                             es_docs[d_range].append(item.es_doc())
                         if es_docs:
                             name = partition_table.name
                             log.info("round  {}, {}".format(i, name))
                             for k, v in es_docs.items():
                                 to_update = {"_index": k, "_type": "log"}
                                 [i.update(to_update) for i in v]
                                 elasticsearch.helpers.bulk(Datastores.es, v)
                 log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
             if __name__ == "__main__":
                 main()

backend/src/appenlight/views/logs.py

0 +3 -15

             # -*- coding: utf-8 -*-
             # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
             #
             # Licensed under the Apache License, Version 2.0 (the "License");
             # you may not use this file except in compliance with the License.
             # You may obtain a copy of the License at
             #
             #   http://www.apache.org/licenses/LICENSE-2.0
             #
             # Unless required by applicable law or agreed to in writing, software
             # distributed under the License is distributed on an "AS IS" BASIS,
             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
             # See the License for the specific language governing permissions and
             # limitations under the License.
             import logging
             from datetime import datetime, timedelta
             from pyramid.view import view_config
             from pyramid.httpexceptions import HTTPUnprocessableEntity
             from appenlight.models import Datastores, Log
             from appenlight.models.services.log import LogService
             from appenlight.lib.utils import (
                 build_filter_settings_from_query_dict,
                 es_index_name_limiter,
             )
             from appenlight.lib.helpers import gen_pagination_headers
             from appenlight.celery.tasks import logs_cleanup
             log = logging.getLogger(__name__)
             section_filters_key = "appenlight:logs:filter:%s"
             @view_config(route_name="logs_no_id", renderer="json", permission="authenticated")
             def fetch_logs(request):
                 """
                 Returns list of log entries from Elasticsearch
                 """
                 filter_settings = build_filter_settings_from_query_dict(
                     request, request.GET.mixed()
                 )
                 logs_paginator = LogService.get_paginator_by_app_ids(
                     app_ids=filter_settings["resource"],
                     page=filter_settings["page"],
                     filter_settings=filter_settings,
                 )
                 headers = gen_pagination_headers(request, logs_paginator)
                 request.response.headers.update(headers)
                 return [l.get_dict() for l in logs_paginator.sa_items]
             @view_config(
                 route_name="section_view",
                 match_param=["section=logs_section", "view=fetch_series"],
                 renderer="json",
                 permission="authenticated",
             )
             def logs_fetch_series(request):
                 """
                 Handles metric dashboard graphs
                 Returns information for time/tier breakdown
                 """
                 filter_settings = build_filter_settings_from_query_dict(
                     request, request.GET.mixed()
                 )
                 paginator = LogService.get_paginator_by_app_ids(
                     app_ids=filter_settings["resource"],
                     page=1,
                     filter_settings=filter_settings,
                     items_per_page=1,
                 )
                 now = datetime.utcnow().replace(microsecond=0, second=0)
                 delta = timedelta(days=7)
                 if paginator.sa_items:
                     start_date = paginator.sa_items[-1].timestamp.replace(microsecond=0, second=0)
                     filter_settings["start_date"] = start_date - delta
                 else:
                     filter_settings["start_date"] = now - delta
                 filter_settings["end_date"] = filter_settings["start_date"] + timedelta(days=7)
                 @request.registry.cache_regions.redis_sec_30.cache_on_arguments("logs_graphs")
                 def cached(apps, search_params, delta, now):
                     data = LogService.get_time_series_aggregate(
                         filter_settings["resource"], filter_settings
                     )
                     if not data:
                         return []
                     buckets = data["aggregations"]["events_over_time"]["buckets"]
                     return [
                         {
                             "x": datetime.utcfromtimestamp(item["key"] / 1000),
                             "logs": item["doc_count"],
                         }
                         for item in buckets
                     ]
                 return cached(filter_settings, request.GET.mixed(), delta, now)
             @view_config(
                 route_name="logs_no_id",
                 renderer="json",
                 request_method="DELETE",
                 permission="authenticated",
             )
             def logs_mass_delete(request):
                 params = request.GET.mixed()
                 if "resource" not in params:
                     raise HTTPUnprocessableEntity()
                 # this might be '' and then colander will not validate the schema
                 if not params.get("namespace"):
                     params.pop("namespace", None)
                 filter_settings = build_filter_settings_from_query_dict(
                     request, params, resource_permissions=["update_reports"]
                 )
                 resource_id = list(filter_settings["resource"])[0]
                 # filter settings returns list of all of users applications
                 # if app is not matching - normally we would not care as its used for search
                 # but here user playing with params would possibly wipe out their whole data
                 if int(resource_id) != int(params["resource"]):
                     raise HTTPUnprocessableEntity()
                 logs_cleanup.delay(resource_id, filter_settings)
                 msg = (
                     "Log cleanup process started - it may take a while for "
                     "everything to get removed"
                 )
                 request.session.flash(msg)
                 return {}
             @view_config(
                 route_name="section_view",
                 match_param=("view=common_tags", "section=logs_section"),
                 renderer="json",
                 permission="authenticated",
             )
             def common_tags(request):
                 config = request.GET.mixed()
                 filter_settings = build_filter_settings_from_query_dict(request, config)
                 resources = list(filter_settings["resource"])
                 query = {
-                    "query": {
+                    "query": {"bool": {"filter": [{"terms": {"resource_id": list(resources)}}]}}
-                        "bool": {
-                            "filter": [{"terms": {"resource_id": list(resources)}}]
                 }
                 start_date = filter_settings.get("start_date")
                 end_date = filter_settings.get("end_date")
                 filter_part = query["query"]["bool"]["filter"]
                 date_range = {"range": {"timestamp": {}}}
                 if start_date:
                     date_range["range"]["timestamp"]["gte"] = start_date
                 if end_date:
                     date_range["range"]["timestamp"]["lte"] = end_date
                 if start_date or end_date:
                     filter_part.append(date_range)
                 levels = filter_settings.get("level")
                 if levels:
                     filter_part.append({"terms": {"log_level": levels}})
                 namespaces = filter_settings.get("namespace")
                 if namespaces:
                     filter_part.append({"terms": {"namespace": namespaces}})
                 query["aggs"] = {"sub_agg": {"terms": {"field": "tag_list.keyword", "size": 50}}}
                 # tags
                 index_names = es_index_name_limiter(ixtypes=[config.get("datasource", "logs")])
                 result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0)
                 tag_buckets = result["aggregations"]["sub_agg"].get("buckets", [])
                 # namespaces
                 query["aggs"] = {"sub_agg": {"terms": {"field": "namespace.keyword", "size": 50}}}
                 result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0)
                 namespaces_buckets = result["aggregations"]["sub_agg"].get("buckets", [])
                 return {
                     "tags": [item["key"] for item in tag_buckets],
                     "namespaces": [item["key"] for item in namespaces_buckets],
                 }
             @view_config(
                 route_name="section_view",
                 match_param=("view=common_values", "section=logs_section"),
                 renderer="json",
                 permission="authenticated",
             )
             def common_values(request):
                 config = request.GET.mixed()
                 datasource = config.pop("datasource", "logs")
                 filter_settings = build_filter_settings_from_query_dict(request, config)
                 resources = list(filter_settings["resource"])
                 tag_name = filter_settings["tags"][0]["value"][0]
-                and_part = [
+                and_part = [{"terms": {"resource_id": list(resources)}}]
-                    {"terms": {"resource_id": list(resources)}},
                 if filter_settings["namespace"]:
                     and_part.append({"terms": {"namespace": filter_settings["namespace"]}})
-                query = {
+                query = {"query": {"bool": {"filter": and_part}}}
-                    "query": {
-                        "bool": {
-                            "filter": and_part
                 query["aggs"] = {
                     "sub_agg": {"terms": {"field": "tags.{}.values".format(tag_name), "size": 50}}
                 }
                 index_names = es_index_name_limiter(ixtypes=[datasource])
                 result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0)
                 values_buckets = result["aggregations"]["sub_agg"].get("buckets", [])
                 return {"values": [item["key"] for item in values_buckets]}

General Comments 4

vaingmuny

|

Auto status change to "Under Review"

vaingmuny

|

Auto status change to "Under Review"

Write
Preview

You need to be logged in to leave comments. Login now

		Auto status change to \|new_status\|...
		rgrtg
		Hi
		Auto status change to \|new_status\|...

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages