appenlight Commit - r175:765d965d · RhodeCode Free Hosting

reformat: black

ergo -

r175:765d965d

parent child

backend/src/appenlight/celery/tasks.py

0 +5 -7

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import bisect
              import collections
              import math
              from datetime import datetime, timedelta
              import sqlalchemy as sa
              import elasticsearch.exceptions
              import elasticsearch.helpers
              from celery.utils.log import get_task_logger
              from zope.sqlalchemy import mark_changed
              from pyramid.threadlocal import get_current_request, get_current_registry
              from ziggurat_foundations.models.services.resource import ResourceService
              from appenlight.celery import celery
              from appenlight.models.report_group import ReportGroup
              from appenlight.models import DBSession, Datastores
              from appenlight.models.report import Report
              from appenlight.models.log import Log
              from appenlight.models.metric import Metric
              from appenlight.models.event import Event
              from appenlight.models.services.application import ApplicationService
              from appenlight.models.services.event import EventService
              from appenlight.models.services.log import LogService
              from appenlight.models.services.report import ReportService
              from appenlight.models.services.report_group import ReportGroupService
              from appenlight.models.services.user import UserService
              from appenlight.models.tag import Tag
              from appenlight.lib import print_traceback
              from appenlight.lib.utils import parse_proto, in_batches
              from appenlight.lib.ext_json import json
              from appenlight.lib.redis_keys import REDIS_KEYS
              from appenlight.lib.enums import ReportType
              log = get_task_logger(__name__)
              sample_boundries = (
                  list(range(100, 1000, 100))
                  + list(range(1000, 10000, 1000))
                  + list(range(10000, 100000, 5000))
              )
              def pick_sample(total_occurences, report_type=None):
                  every = 1.0
                  position = bisect.bisect_left(sample_boundries, total_occurences)
                  if position > 0:
                      if report_type == ReportType.not_found:
                          divide = 10.0
                      else:
                          divide = 100.0
                      every = sample_boundries[position - 1] / divide
                  return total_occurences % every == 0
              @celery.task(queue="default", default_retry_delay=1, max_retries=2)
              def test_exception_task():
                  log.error("test celery log", extra={"location": "celery"})
                  log.warning("test celery log", extra={"location": "celery"})
                  raise Exception("Celery exception test")
              @celery.task(queue="default", default_retry_delay=1, max_retries=2)
              def test_retry_exception_task():
                  try:
                      import time
                      time.sleep(1.3)
                      log.error("test retry celery log", extra={"location": "celery"})
                      log.warning("test retry celery log", extra={"location": "celery"})
                      raise Exception("Celery exception test")
                  except Exception as exc:
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      test_retry_exception_task.retry(exc=exc)
              @celery.task(queue="reports", default_retry_delay=600, max_retries=144)
              def add_reports(resource_id, request_params, dataset, **kwargs):
                  proto_version = parse_proto(request_params.get("protocol_version", ""))
                  current_time = datetime.utcnow().replace(second=0, microsecond=0)
                  try:
                      # we will store solr docs here for single insert
                      es_report_docs = {}
                      es_report_group_docs = {}
                      resource = ApplicationService.by_id(resource_id)
                      tags = []
                      es_slow_calls_docs = {}
                      es_reports_stats_rows = {}
                      for report_data in dataset:
                          # build report details for later
                          added_details = 0
                          report = Report()
                          report.set_data(report_data, resource, proto_version)
                          report._skip_ft_index = True
                          # find latest group in this months partition
                          report_group = ReportGroupService.by_hash_and_resource(
                              report.resource_id,
                              report.grouping_hash,
                              since_when=datetime.utcnow().date().replace(day=1),
                          )
                          occurences = report_data.get("occurences", 1)
                          if not report_group:
                              # total reports will be +1 moment later
                              report_group = ReportGroup(
                                  grouping_hash=report.grouping_hash,
                                  occurences=0,
                                  total_reports=0,
                                  last_report=0,
                                  priority=report.priority,
                                  error=report.error,
                                  first_timestamp=report.start_time,
                              )
                              report_group._skip_ft_index = True
                              report_group.report_type = report.report_type
                          report.report_group_time = report_group.first_timestamp
                          add_sample = pick_sample(
                              report_group.occurences, report_type=report_group.report_type
                          )
                          if add_sample:
                              resource.report_groups.append(report_group)
                              report_group.reports.append(report)
                              added_details += 1
                              DBSession.flush()
                              if report.partition_id not in es_report_docs:
                                  es_report_docs[report.partition_id] = []
                              es_report_docs[report.partition_id].append(report.es_doc())
                              tags.extend(list(report.tags.items()))
                              slow_calls = report.add_slow_calls(report_data, report_group)
                              DBSession.flush()
                              for s_call in slow_calls:
                                  if s_call.partition_id not in es_slow_calls_docs:
                                      es_slow_calls_docs[s_call.partition_id] = []
                                  es_slow_calls_docs[s_call.partition_id].append(s_call.es_doc())
                                  # try generating new stat rows if needed
                          else:
                              # required for postprocessing to not fail later
                              report.report_group = report_group
                          stat_row = ReportService.generate_stat_rows(report, resource, report_group)
                          if stat_row.partition_id not in es_reports_stats_rows:
                              es_reports_stats_rows[stat_row.partition_id] = []
                          es_reports_stats_rows[stat_row.partition_id].append(stat_row.es_doc())
                          # see if we should mark 10th occurence of report
                          last_occurences_10 = int(math.floor(report_group.occurences / 10))
                          curr_occurences_10 = int(
                              math.floor((report_group.occurences + report.occurences) / 10)
                          )
                          last_occurences_100 = int(math.floor(report_group.occurences / 100))
                          curr_occurences_100 = int(
                              math.floor((report_group.occurences + report.occurences) / 100)
                          )
                          notify_occurences_10 = last_occurences_10 != curr_occurences_10
                          notify_occurences_100 = last_occurences_100 != curr_occurences_100
                          report_group.occurences = ReportGroup.occurences + occurences
                          report_group.last_timestamp = report.start_time
                          report_group.summed_duration = ReportGroup.summed_duration + report.duration
                          summed_duration = ReportGroup.summed_duration + report.duration
                          summed_occurences = ReportGroup.occurences + occurences
                          report_group.average_duration = summed_duration / summed_occurences
                          report_group.run_postprocessing(report)
                          if added_details:
                              report_group.total_reports = ReportGroup.total_reports + 1
                              report_group.last_report = report.id
                          report_group.set_notification_info(
                              notify_10=notify_occurences_10, notify_100=notify_occurences_100
                          )
                          DBSession.flush()
                          report_group.get_report().notify_channel(report_group)
                          if report_group.partition_id not in es_report_group_docs:
                              es_report_group_docs[report_group.partition_id] = []
                          es_report_group_docs[report_group.partition_id].append(
                              report_group.es_doc()
                          )
                          action = "REPORT"
                          log_msg = "%s: %s %s, client: %s, proto: %s" % (
                              action,
                              report_data.get("http_status", "unknown"),
                              str(resource),
                              report_data.get("client"),
                              proto_version,
                          )
                          log.info(log_msg)
                      total_reports = len(dataset)
                      redis_pipeline = Datastores.redis.pipeline(transaction=False)
                      key = REDIS_KEYS["counters"]["reports_per_minute"].format(current_time)
                      redis_pipeline.incr(key, total_reports)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                          resource.owner_user_id, current_time
                      )
                      redis_pipeline.incr(key, total_reports)
                      redis_pipeline.expire(key, 3600)
                      key = REDIS_KEYS["counters"]["reports_per_hour_per_app"].format(
                          resource_id, current_time.replace(minute=0)
                      )
                      redis_pipeline.incr(key, total_reports)
                      redis_pipeline.expire(key, 3600 * 24 * 7)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                              current_time.replace(minute=0)
                          ),
                          resource_id,
                      )
                      redis_pipeline.execute()
                      add_reports_es(es_report_group_docs, es_report_docs)
                      add_reports_slow_calls_es(es_slow_calls_docs)
                      add_reports_stats_rows_es(es_reports_stats_rows)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      add_reports.retry(exc=exc)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_reports_es(report_group_docs, report_docs):
                  for k, v in report_group_docs.items():
                      to_update = {"_index": k, "_type": "report"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
                  for k, v in report_docs.items():
                      to_update = {"_index": k, "_type": "report"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_reports_slow_calls_es(es_docs):
                  for k, v in es_docs.items():
                      to_update = {"_index": k, "_type": "log"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_reports_stats_rows_es(es_docs):
                  for k, v in es_docs.items():
                      to_update = {"_index": k, "_type": "report"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="logs", default_retry_delay=600, max_retries=144)
              def add_logs(resource_id, request_params, dataset, **kwargs):
                  proto_version = request_params.get("protocol_version")
                  current_time = datetime.utcnow().replace(second=0, microsecond=0)
                  try:
                      es_docs = collections.defaultdict(list)
                      resource = ApplicationService.by_id_cached()(resource_id)
                      resource = DBSession.merge(resource, load=False)
                      ns_pairs = []
                      for entry in dataset:
                          # gather pk and ns so we can remove older versions of row later
                          if entry["primary_key"] is not None:
                              ns_pairs.append({"pk": entry["primary_key"], "ns": entry["namespace"]})
                          log_entry = Log()
                          log_entry.set_data(entry, resource=resource)
                          log_entry._skip_ft_index = True
                          resource.logs.append(log_entry)
                          DBSession.flush()
                          # insert non pk rows first
                          if entry["primary_key"] is None:
                              es_docs[log_entry.partition_id].append(log_entry.es_doc())
                      # 2nd pass to delete all log entries from db for same pk/ns pair
                      if ns_pairs:
                          ids_to_delete = []
                          es_docs = collections.defaultdict(list)
                          es_docs_to_delete = collections.defaultdict(list)
                          found_pkey_logs = LogService.query_by_primary_key_and_namespace(
                              list_of_pairs=ns_pairs
                          )
                          log_dict = {}
                          for log_entry in found_pkey_logs:
                              log_key = (log_entry.primary_key, log_entry.namespace)
                              if log_key not in log_dict:
                                  log_dict[log_key] = []
                              log_dict[log_key].append(log_entry)
                          for ns, entry_list in log_dict.items():
                              entry_list = sorted(entry_list, key=lambda x: x.timestamp)
                              # newest row needs to be indexed in es
                              log_entry = entry_list[-1]
                              # delete everything from pg and ES, leave the last row in pg
                              for e in entry_list[:-1]:
                                  ids_to_delete.append(e.log_id)
                                  es_docs_to_delete[e.partition_id].append(e.delete_hash)
                              es_docs_to_delete[log_entry.partition_id].append(log_entry.delete_hash)
                              es_docs[log_entry.partition_id].append(log_entry.es_doc())
                          if ids_to_delete:
                              query = DBSession.query(Log).filter(Log.log_id.in_(ids_to_delete))
                              query.delete(synchronize_session=False)
                          if es_docs_to_delete:
                              # batch this to avoid problems with default ES bulk limits
                              for es_index in es_docs_to_delete.keys():
                                  for batch in in_batches(es_docs_to_delete[es_index], 20):
                                      query = {"query": {"terms": {"delete_hash": batch}}}
                                      try:
                                          Datastores.es.delete_by_query(
-                                             index=es_index, doc_type="log",
-                                             body=query, conflicts="proceed"
+                                             index=es_index,
+                                             doc_type="log",
+                                             body=query,
+                                             conflicts="proceed",
                                          )
                                      except elasticsearch.exceptions.NotFoundError as exc:
                                          msg = "skipping index {}".format(es_index)
                                          log.info(msg)
                      total_logs = len(dataset)
                      log_msg = "LOG_NEW: %s, entries: %s, proto:%s" % (
                          str(resource),
                          total_logs,
                          proto_version,
                      )
                      log.info(log_msg)
                      # mark_changed(session)
                      redis_pipeline = Datastores.redis.pipeline(transaction=False)
                      key = REDIS_KEYS["counters"]["logs_per_minute"].format(current_time)
                      redis_pipeline.incr(key, total_logs)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                          resource.owner_user_id, current_time
                      )
                      redis_pipeline.incr(key, total_logs)
                      redis_pipeline.expire(key, 3600)
                      key = REDIS_KEYS["counters"]["logs_per_hour_per_app"].format(
                          resource_id, current_time.replace(minute=0)
                      )
                      redis_pipeline.incr(key, total_logs)
                      redis_pipeline.expire(key, 3600 * 24 * 7)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                              current_time.replace(minute=0)
                          ),
                          resource_id,
                      )
                      redis_pipeline.execute()
                      add_logs_es(es_docs)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      add_logs.retry(exc=exc)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_logs_es(es_docs):
                  for k, v in es_docs.items():
                      to_update = {"_index": k, "_type": "log"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="metrics", default_retry_delay=600, max_retries=144)
              def add_metrics(resource_id, request_params, dataset, proto_version):
                  current_time = datetime.utcnow().replace(second=0, microsecond=0)
                  try:
                      resource = ApplicationService.by_id_cached()(resource_id)
                      resource = DBSession.merge(resource, load=False)
                      es_docs = []
                      rows = []
                      for metric in dataset:
                          tags = dict(metric["tags"])
                          server_n = tags.get("server_name", metric["server_name"]).lower()
                          tags["server_name"] = server_n or "unknown"
                          new_metric = Metric(
                              timestamp=metric["timestamp"],
                              resource_id=resource.resource_id,
                              namespace=metric["namespace"],
                              tags=tags,
                          )
                          rows.append(new_metric)
                          es_docs.append(new_metric.es_doc())
                      session = DBSession()
                      session.bulk_save_objects(rows)
                      session.flush()
                      action = "METRICS"
                      metrics_msg = "%s: %s, metrics: %s, proto:%s" % (
                          action,
                          str(resource),
                          len(dataset),
                          proto_version,
                      )
                      log.info(metrics_msg)
                      mark_changed(session)
                      redis_pipeline = Datastores.redis.pipeline(transaction=False)
                      key = REDIS_KEYS["counters"]["metrics_per_minute"].format(current_time)
                      redis_pipeline.incr(key, len(rows))
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                          resource.owner_user_id, current_time
                      )
                      redis_pipeline.incr(key, len(rows))
                      redis_pipeline.expire(key, 3600)
                      key = REDIS_KEYS["counters"]["metrics_per_hour_per_app"].format(
                          resource_id, current_time.replace(minute=0)
                      )
                      redis_pipeline.incr(key, len(rows))
                      redis_pipeline.expire(key, 3600 * 24 * 7)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                              current_time.replace(minute=0)
                          ),
                          resource_id,
                      )
                      redis_pipeline.execute()
                      add_metrics_es(es_docs)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      add_metrics.retry(exc=exc)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_metrics_es(es_docs):
                  for doc in es_docs:
                      partition = "rcae_m_%s" % doc["timestamp"].strftime("%Y_%m_%d")
                      Datastores.es.index(partition, "log", doc)
              @celery.task(queue="default", default_retry_delay=5, max_retries=2)
              def check_user_report_notifications(resource_id):
                  since_when = datetime.utcnow()
                  try:
                      request = get_current_request()
                      application = ApplicationService.by_id(resource_id)
                      if not application:
                          return
                      error_key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                          ReportType.error, resource_id
                      )
                      slow_key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                          ReportType.slow, resource_id
                      )
                      error_group_ids = Datastores.redis.smembers(error_key)
                      slow_group_ids = Datastores.redis.smembers(slow_key)
                      Datastores.redis.delete(error_key)
                      Datastores.redis.delete(slow_key)
                      err_gids = [int(g_id) for g_id in error_group_ids]
                      slow_gids = [int(g_id) for g_id in list(slow_group_ids)]
                      group_ids = err_gids + slow_gids
                      occurence_dict = {}
                      for g_id in group_ids:
                          key = REDIS_KEYS["counters"]["report_group_occurences"].format(g_id)
                          val = Datastores.redis.get(key)
                          Datastores.redis.delete(key)
                          if val:
                              occurence_dict[g_id] = int(val)
                          else:
                              occurence_dict[g_id] = 1
                      report_groups = ReportGroupService.by_ids(group_ids)
                      report_groups.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                      ApplicationService.check_for_groups_alert(
                          application,
                          "alert",
                          report_groups=report_groups,
                          occurence_dict=occurence_dict,
                      )
                      users = set(
                          [p.user for p in ResourceService.users_for_perm(application, "view")]
                      )
                      report_groups = report_groups.all()
                      for user in users:
                          UserService.report_notify(
                              user,
                              request,
                              application,
                              report_groups=report_groups,
                              occurence_dict=occurence_dict,
                          )
                      for group in report_groups:
                          # marks report_groups as notified
                          if not group.notified:
                              group.notified = True
                  except Exception as exc:
                      print_traceback(log)
                      raise
              @celery.task(queue="default", default_retry_delay=5, max_retries=2)
              def check_alerts(resource_id):
                  since_when = datetime.utcnow()
                  try:
                      request = get_current_request()
                      application = ApplicationService.by_id(resource_id)
                      if not application:
                          return
                      error_key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                          ReportType.error, resource_id
                      )
                      slow_key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                          ReportType.slow, resource_id
                      )
                      error_group_ids = Datastores.redis.smembers(error_key)
                      slow_group_ids = Datastores.redis.smembers(slow_key)
                      Datastores.redis.delete(error_key)
                      Datastores.redis.delete(slow_key)
                      err_gids = [int(g_id) for g_id in error_group_ids]
                      slow_gids = [int(g_id) for g_id in list(slow_group_ids)]
                      group_ids = err_gids + slow_gids
                      occurence_dict = {}
                      for g_id in group_ids:
                          key = REDIS_KEYS["counters"]["report_group_occurences_alerting"].format(
                              g_id
                          )
                          val = Datastores.redis.get(key)
                          Datastores.redis.delete(key)
                          if val:
                              occurence_dict[g_id] = int(val)
                          else:
                              occurence_dict[g_id] = 1
                      report_groups = ReportGroupService.by_ids(group_ids)
                      report_groups.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                      ApplicationService.check_for_groups_alert(
                          application,
                          "alert",
                          report_groups=report_groups,
                          occurence_dict=occurence_dict,
                          since_when=since_when,
                      )
                  except Exception as exc:
                      print_traceback(log)
                      raise
              @celery.task(queue="default", default_retry_delay=1, max_retries=2)
              def close_alerts():
                  log.warning("Checking alerts")
                  since_when = datetime.utcnow()
                  try:
                      event_types = [
                          Event.types["error_report_alert"],
                          Event.types["slow_report_alert"],
                      ]
                      statuses = [Event.statuses["active"]]
                      # get events older than 5 min
                      events = EventService.by_type_and_status(
                          event_types, statuses, older_than=(since_when - timedelta(minutes=5))
                      )
                      for event in events:
                          # see if we can close them
                          event.validate_or_close(since_when=(since_when - timedelta(minutes=1)))
                  except Exception as exc:
                      print_traceback(log)
                      raise
              @celery.task(queue="default", default_retry_delay=600, max_retries=144)
              def update_tag_counter(tag_name, tag_value, count):
                  try:
                      query = (
                          DBSession.query(Tag)
                          .filter(Tag.name == tag_name)
                          .filter(
                              sa.cast(Tag.value, sa.types.TEXT)
                              == sa.cast(json.dumps(tag_value), sa.types.TEXT)
                          )
                      )
                      query.update(
                          {"times_seen": Tag.times_seen + count, "last_timestamp": datetime.utcnow()},
                          synchronize_session=False,
                      )
                      session = DBSession()
                      mark_changed(session)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      update_tag_counter.retry(exc=exc)
              @celery.task(queue="default")
              def update_tag_counters():
                  """
                  Sets task to update counters for application tags
                  """
                  tags = Datastores.redis.lrange(REDIS_KEYS["seen_tag_list"], 0, -1)
                  Datastores.redis.delete(REDIS_KEYS["seen_tag_list"])
                  c = collections.Counter(tags)
                  for t_json, count in c.items():
                      tag_info = json.loads(t_json)
                      update_tag_counter.delay(tag_info[0], tag_info[1], count)
              @celery.task(queue="default")
              def daily_digest():
                  """
                  Sends daily digest with top 50 error reports
                  """
                  request = get_current_request()
                  apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports"])
                  Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports"])
                  since_when = datetime.utcnow() - timedelta(hours=8)
                  log.warning("Generating daily digests")
                  for resource_id in apps:
                      resource_id = resource_id.decode("utf8")
                      end_date = datetime.utcnow().replace(microsecond=0, second=0)
                      filter_settings = {
                          "resource": [resource_id],
                          "tags": [{"name": "type", "value": ["error"], "op": None}],
                          "type": "error",
                          "start_date": since_when,
                          "end_date": end_date,
                      }
                      reports = ReportGroupService.get_trending(
                          request, filter_settings=filter_settings, limit=50
                      )
                      application = ApplicationService.by_id(resource_id)
                      if application:
                          users = set(
                              [p.user for p in ResourceService.users_for_perm(application, "view")]
                          )
                          for user in users:
                              user.send_digest(
                                  request, application, reports=reports, since_when=since_when
                              )
              @celery.task(queue="default")
              def notifications_reports():
                  """
                  Loop that checks redis for info and then issues new tasks to celery to
                  issue notifications
                  """
                  apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports"])
                  Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports"])
                  for app in apps:
                      log.warning("Notify for app: %s" % app)
                      check_user_report_notifications.delay(app.decode("utf8"))
              @celery.task(queue="default")
              def alerting_reports():
                  """
                  Loop that checks redis for info and then issues new tasks to celery to
                  perform the following:
                  - which applications should have new alerts opened
                  """
                  apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports_alerting"])
                  Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports_alerting"])
                  for app in apps:
                      log.warning("Notify for app: %s" % app)
                      check_alerts.delay(app.decode("utf8"))
              @celery.task(
                  queue="default", soft_time_limit=3600 * 4, hard_time_limit=3600 * 4, max_retries=144
              )
              def logs_cleanup(resource_id, filter_settings):
                  request = get_current_request()
                  request.tm.begin()
-                 es_query = {
-                     "query": {
-                         "bool": {"filter": [{"term": {"resource_id": resource_id}}]}
+                     }
+                 }
+                 es_query = {"query": {"bool": {"filter": [{"term": {"resource_id": resource_id}}]}}}
                  query = DBSession.query(Log).filter(Log.resource_id == resource_id)
                  if filter_settings["namespace"]:
                      query = query.filter(Log.namespace == filter_settings["namespace"][0])
                      es_query["query"]["bool"]["filter"].append(
                          {"term": {"namespace": filter_settings["namespace"][0]}}
                      )
                  query.delete(synchronize_session=False)
                  request.tm.commit()
                  Datastores.es.delete_by_query(
-                     index="rcae_l_*",  doc_type="log", body=es_query, conflicts="proceed"
                  )

backend/src/appenlight/lib/utils/__init__.py

0 +3 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              """
              Utility functions.
              """
              import logging
              import requests
              import hashlib
              import json
              import copy
              import uuid
              import appenlight.lib.helpers as h
              from collections import namedtuple
              from datetime import timedelta, datetime, date
              from dogpile.cache.api import NO_VALUE
              from appenlight.models import Datastores
              from appenlight.validators import LogSearchSchema, TagListSchema, accepted_search_params
              from itsdangerous import TimestampSigner
              from ziggurat_foundations.permissions import ALL_PERMISSIONS
              from ziggurat_foundations.models.services.user import UserService
              from dateutil.relativedelta import relativedelta
              from dateutil.rrule import rrule, MONTHLY, DAILY
              log = logging.getLogger(__name__)
              Stat = namedtuple("Stat", "start_interval value")
              def default_extractor(item):
                  """
                  :param item - item to extract date from
                  """
                  if hasattr(item, "start_interval"):
                      return item.start_interval
                  return item["start_interval"]
              # fast gap generator
              def gap_gen_default(start, step, itemiterator, end_time=None, iv_extractor=None):
                  """ generates a list of time/value items based on step and itemiterator
                      if there are entries missing from iterator time/None will be returned
                      instead
                  :param start - datetime - what time should we start generating our values
                  :param step - timedelta - stepsize
                  :param itemiterator - iterable - we will check this iterable for values
                  corresponding to generated steps
                  :param end_time - datetime - when last step is >= end_time stop iterating
                  :param iv_extractor - extracts current step from iterable items
                  """
                  if not iv_extractor:
                      iv_extractor = default_extractor
                  next_step = start
                  minutes = step.total_seconds() / 60.0
                  while next_step.minute % minutes != 0:
                      next_step = next_step.replace(minute=next_step.minute - 1)
                  for item in itemiterator:
                      item_start_interval = iv_extractor(item)
                      # do we have a match for current time step in our data?
                      # no gen a new tuple with 0 values
                      while next_step < item_start_interval:
                          yield Stat(next_step, None)
                          next_step = next_step + step
                      if next_step == item_start_interval:
                          yield Stat(item_start_interval, item)
                          next_step = next_step + step
                  if end_time:
                      while next_step < end_time:
                          yield Stat(next_step, None)
                          next_step = next_step + step
              class DateTimeEncoder(json.JSONEncoder):
                  """ Simple datetime to ISO encoder for json serialization"""
                  def default(self, obj):
                      if isinstance(obj, date):
                          return obj.isoformat()
                      if isinstance(obj, datetime):
                          return obj.isoformat()
                      return json.JSONEncoder.default(self, obj)
              def channelstream_request(
                  secret, endpoint, payload, throw_exceptions=False, servers=None
              ):
                  responses = []
                  if not servers:
                      servers = []
                  signer = TimestampSigner(secret)
                  sig_for_server = signer.sign(endpoint)
                  for secret, server in [(s["secret"], s["server"]) for s in servers]:
                      response = {}
                      secret_headers = {
                          "x-channelstream-secret": sig_for_server,
                          "x-channelstream-endpoint": endpoint,
                          "Content-Type": "application/json",
                      }
                      url = "%s%s" % (server, endpoint)
                      try:
                          response = requests.post(
                              url,
                              data=json.dumps(payload, cls=DateTimeEncoder),
                              headers=secret_headers,
                              verify=False,
                              timeout=2,
                          ).json()
                      except requests.exceptions.RequestException as e:
                          if throw_exceptions:
                              raise
                      responses.append(response)
                  return responses
              def add_cors_headers(response):
                  # allow CORS
                  response.headers.add("Access-Control-Allow-Origin", "*")
                  response.headers.add("XDomainRequestAllowed", "1")
                  response.headers.add("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
                  # response.headers.add('Access-Control-Allow-Credentials', 'true')
                  response.headers.add(
                      "Access-Control-Allow-Headers",
                      "Content-Type, Depth, User-Agent, X-File-Size, X-Requested-With, If-Modified-Since, X-File-Name, Cache-Control, Pragma, Origin, Connection, Referer, Cookie",
                  )
                  response.headers.add("Access-Control-Max-Age", "86400")
              from sqlalchemy.sql import compiler
              from psycopg2.extensions import adapt as sqlescape
              # or use the appropiate escape function from your db driver
              def compile_query(query):
                  dialect = query.session.bind.dialect
                  statement = query.statement
                  comp = compiler.SQLCompiler(dialect, statement)
                  comp.compile()
                  enc = dialect.encoding
                  params = {}
                  for k, v in comp.params.items():
                      if isinstance(v, str):
                          v = v.encode(enc)
                      params[k] = sqlescape(v)
                  return (comp.string.encode(enc) % params).decode(enc)
              def convert_es_type(input_data):
                  """
                  This might need to convert some text or other types to corresponding ES types
                  """
                  return str(input_data)
              ProtoVersion = namedtuple("ProtoVersion", ["major", "minor", "patch"])
              def parse_proto(input_data):
                  try:
                      parts = [int(x) for x in input_data.split(".")]
                      while len(parts) < 3:
                          parts.append(0)
                      return ProtoVersion(*parts)
                  except Exception as e:
                      log.info("Unknown protocol version: %s" % e)
                  return ProtoVersion(99, 99, 99)
              def es_index_name_limiter(
                  start_date=None, end_date=None, months_in_past=6, ixtypes=None
              ):
                  """
                  This function limits the search to 6 months by default so we don't have to
                  query 300 elasticsearch indices for 20 years of historical data for example
                  """
                  # should be cached later
                  def get_possible_names():
                      return list(Datastores.es.indices.get_alias("*"))
                  possible_names = get_possible_names()
                  es_index_types = []
                  if not ixtypes:
                      ixtypes = ["reports", "metrics", "logs"]
                  for t in ixtypes:
                      if t == "reports":
                          es_index_types.append("rcae_r_%s")
                      elif t == "logs":
                          es_index_types.append("rcae_l_%s")
                      elif t == "metrics":
                          es_index_types.append("rcae_m_%s")
                      elif t == "uptime":
                          es_index_types.append("rcae_uptime_ce_%s")
                      elif t == "slow_calls":
                          es_index_types.append("rcae_sc_%s")
                  if start_date:
                      start_date = copy.copy(start_date)
                  else:
                      if not end_date:
                          end_date = datetime.utcnow()
                      start_date = end_date + relativedelta(months=months_in_past * -1)
                  if not end_date:
                      end_date = start_date + relativedelta(months=months_in_past)
                  index_dates = list(
                      rrule(
                          MONTHLY,
                          dtstart=start_date.date().replace(day=1),
                          until=end_date.date(),
                          count=36,
                      )
                  )
                  index_names = []
                  for ix_type in es_index_types:
                      to_extend = [
                          ix_type % d.strftime("%Y_%m")
                          for d in index_dates
                          if ix_type % d.strftime("%Y_%m") in possible_names
                      ]
                      index_names.extend(to_extend)
                      for day in list(
                          rrule(DAILY, dtstart=start_date.date(), until=end_date.date(), count=366)
                      ):
                          ix_name = ix_type % day.strftime("%Y_%m_%d")
                          if ix_name in possible_names:
                              index_names.append(ix_name)
                  return index_names
              def build_filter_settings_from_query_dict(
                  request, params=None, override_app_ids=None, resource_permissions=None
              ):
                  """
                  Builds list of normalized search terms for ES from query params
                  ensuring application list is restricted to only applications user
                  has access to
                  :param params (dictionary)
                  :param override_app_ids - list of application id's to use instead of
                  applications user normally has access to
                  """
                  params = copy.deepcopy(params)
                  applications = []
                  if not resource_permissions:
                      resource_permissions = ["view"]
                  if request.user:
                      applications = UserService.resources_with_perms(
                          request.user, resource_permissions, resource_types=["application"]
                      )
                  # CRITICAL - this ensures our resultset is limited to only the ones
                  # user has view permissions
                  all_possible_app_ids = set([app.resource_id for app in applications])
                  # if override is preset we force permission for app to be present
                  # this allows users to see dashboards and applications they would
                  # normally not be able to
                  if override_app_ids:
                      all_possible_app_ids = set(override_app_ids)
                  schema = LogSearchSchema().bind(resources=all_possible_app_ids)
                  tag_schema = TagListSchema()
                  filter_settings = schema.deserialize(params)
                  tag_list = []
                  for k, v in list(filter_settings.items()):
                      if k in accepted_search_params:
                          continue
                      tag_list.append({"name": k, "value": v, "op": "eq"})
                      # remove the key from filter_settings
                      filter_settings.pop(k, None)
                  tags = tag_schema.deserialize(tag_list)
                  filter_settings["tags"] = tags
                  return filter_settings
              def gen_uuid():
                  return str(uuid.uuid4())
              def gen_uuid4_sha_hex():
                  return hashlib.sha1(uuid.uuid4().bytes).hexdigest()
              def permission_tuple_to_dict(data):
                  out = {
                      "user_name": None,
                      "perm_name": data.perm_name,
                      "owner": data.owner,
                      "type": data.type,
                      "resource_name": None,
                      "resource_type": None,
                      "resource_id": None,
                      "group_name": None,
                      "group_id": None,
                  }
                  if data.user:
                      out["user_name"] = data.user.user_name
                  if data.perm_name == ALL_PERMISSIONS:
                      out["perm_name"] = "__all_permissions__"
                  if data.resource:
                      out["resource_name"] = data.resource.resource_name
                      out["resource_type"] = data.resource.resource_type
                      out["resource_id"] = data.resource.resource_id
                  if data.group:
                      out["group_name"] = data.group.group_name
                      out["group_id"] = data.group.id
                  return out
              def get_cached_buckets(
                  request,
                  stats_since,
                  end_time,
                  fn,
                  cache_key,
                  gap_gen=None,
                  db_session=None,
                  step_interval=None,
                  iv_extractor=None,
                  rerange=False,
                  *args,
                  **kwargs
              ):
                  """ Takes "fn" that should return some data and tries to load the data
                  dividing it into daily buckets - if the stats_since and end time give a
                  delta bigger than 24hours, then only "todays" data is computed on the fly
                  :param request: (request) request object
                  :param stats_since: (datetime) start date of buckets range
                  :param end_time: (datetime) end date of buckets range - utcnow() if None
                  :param fn: (callable) callable to use to populate buckets should have
                  following signature:
                      def get_data(request, since_when, until, *args, **kwargs):
                  :param cache_key: (string) cache key that will be used to build bucket
                  caches
                  :param gap_gen: (callable) gap generator - should return step intervals
                  to use with out `fn` callable
                  :param db_session: (Session) sqlalchemy session
                  :param step_interval: (timedelta) optional step interval if we want to
                  override the default determined from total start/end time delta
                  :param iv_extractor: (callable) used to get step intervals from data
                  returned by `fn` callable
                  :param rerange: (bool) handy if we want to change ranges from hours to
                  days when cached data is missing - will shorten execution time if `fn`
                  callable supports that and we are working with multiple rows - like metrics
                  :param args:
                  :param kwargs:
                  :return: iterable
                  """
                  if not end_time:
                      end_time = datetime.utcnow().replace(second=0, microsecond=0)
                  delta = end_time - stats_since
                  # if smaller than 3 days we want to group by 5min else by 1h,
                  # for 60 min group by min
                  if not gap_gen:
                      gap_gen = gap_gen_default
                  if not iv_extractor:
                      iv_extractor = default_extractor
                  # do not use custom interval if total time range with new iv would exceed
                  # end time
                  if not step_interval or stats_since + step_interval >= end_time:
                      if delta < h.time_deltas.get("12h")["delta"]:
                          step_interval = timedelta(seconds=60)
                      elif delta < h.time_deltas.get("3d")["delta"]:
                          step_interval = timedelta(seconds=60 * 5)
                      elif delta > h.time_deltas.get("2w")["delta"]:
                          step_interval = timedelta(days=1)
                      else:
                          step_interval = timedelta(minutes=60)
                  if step_interval >= timedelta(minutes=60):
                      log.info(
                          "cached_buckets:{}: adjusting start time "
                          "for hourly or daily intervals".format(cache_key)
                      )
                      stats_since = stats_since.replace(hour=0, minute=0)
                  ranges = [
                      i.start_interval
                      for i in list(gap_gen(stats_since, step_interval, [], end_time=end_time))
                  ]
                  buckets = {}
                  storage_key = "buckets:" + cache_key + "{}|{}"
                  # this means we basicly cache per hour in 3-14 day intervals but i think
                  # its fine at this point - will be faster than db access anyways
                  if len(ranges) >= 1:
                      last_ranges = [ranges[-1]]
                  else:
                      last_ranges = []
                  if step_interval >= timedelta(minutes=60):
                      for r in ranges:
                          k = storage_key.format(step_interval.total_seconds(), r)
                          value = request.registry.cache_regions.redis_day_30.get(k)
                          # last buckets are never loaded from cache
                          is_last_result = r >= end_time - timedelta(hours=6) or r in last_ranges
                          if value is not NO_VALUE and not is_last_result:
                              log.info(
                                  "cached_buckets:{}: "
                                  "loading range {} from cache".format(cache_key, r)
                              )
                              buckets[r] = value
                          else:
                              log.info(
                                  "cached_buckets:{}: "
                                  "loading range {} from storage".format(cache_key, r)
                              )
                              range_size = step_interval
                              if (
                                  step_interval == timedelta(minutes=60)
                                  and not is_last_result
                                  and rerange
                              ):
                                  range_size = timedelta(days=1)
                                  r = r.replace(hour=0, minute=0)
                                  log.info(
                                      "cached_buckets:{}: "
                                      "loading collapsed "
                                      "range {} {}".format(cache_key, r, r + range_size)
                                  )
                              bucket_data = fn(
                                  request,
                                  r,
                                  r + range_size,
                                  step_interval,
                                  gap_gen,
                                  bucket_count=len(ranges),
                                  *args,
                                  **kwargs
                              )
                              for b in bucket_data:
                                  b_iv = iv_extractor(b)
                                  buckets[b_iv] = b
                                  k2 = storage_key.format(step_interval.total_seconds(), b_iv)
                                  request.registry.cache_regions.redis_day_30.set(k2, b)
                      log.info("cached_buckets:{}: saving cache".format(cache_key))
                  else:
                      # bucket count is 1 for short time ranges <= 24h from now
                      bucket_data = fn(
                          request,
                          stats_since,
                          end_time,
                          step_interval,
                          gap_gen,
                          bucket_count=1,
                          *args,
                          **kwargs
                      )
                      for b in bucket_data:
                          buckets[iv_extractor(b)] = b
                  return buckets
              def get_cached_split_data(
                  request, stats_since, end_time, fn, cache_key, db_session=None, *args, **kwargs
              ):
                  """ Takes "fn" that should return some data and tries to load the data
                  dividing it into 2 buckets - cached "since_from" bucket and "today"
                  bucket - then the data can be reduced into single value
                  Data is cached if the stats_since and end time give a delta bigger
                  than 24hours - then only 24h is computed on the fly
                  """
                  if not end_time:
                      end_time = datetime.utcnow().replace(second=0, microsecond=0)
                  delta = end_time - stats_since
                  if delta >= timedelta(minutes=60):
                      log.info(
                          "cached_split_data:{}: adjusting start time "
                          "for hourly or daily intervals".format(cache_key)
                      )
                      stats_since = stats_since.replace(hour=0, minute=0)
                  storage_key = "buckets_split_data:" + cache_key + ":{}|{}"
                  old_end_time = end_time.replace(hour=0, minute=0)
                  final_storage_key = storage_key.format(delta.total_seconds(), old_end_time)
                  older_data = None
                  cdata = request.registry.cache_regions.redis_day_7.get(final_storage_key)
                  if cdata:
                      log.info("cached_split_data:{}: found old " "bucket data".format(cache_key))
                      older_data = cdata
                  if stats_since < end_time - h.time_deltas.get("24h")["delta"] and not cdata:
                      log.info(
                          "cached_split_data:{}: didn't find the "
                          "start bucket in cache so load older data".format(cache_key)
                      )
                      recent_stats_since = old_end_time
                      older_data = fn(
                          request,
                          stats_since,
                          recent_stats_since,
                          db_session=db_session,
                          *args,
                          **kwargs
                      )
                      request.registry.cache_regions.redis_day_7.set(final_storage_key, older_data)
                  elif stats_since < end_time - h.time_deltas.get("24h")["delta"]:
                      recent_stats_since = old_end_time
                  else:
                      recent_stats_since = stats_since
                  log.info(
                      "cached_split_data:{}: loading fresh "
                      "data bucksts from last 24h ".format(cache_key)
                  )
                  todays_data = fn(
                      request, recent_stats_since, end_time, db_session=db_session, *args, **kwargs
                  )
                  return older_data, todays_data
              def in_batches(seq, size):
                  """
                  Splits am iterable into batches of specified size
                  :param seq (iterable)
                  :param size integer
                  """
                  return (seq[pos : pos + size] for pos in range(0, len(seq), size))
              def get_es_info(cache_regions, es_conn):
                  @cache_regions.memory_min_10.cache_on_arguments()
                  def get_es_info_cached():
                      returned_info = {"raw_info": es_conn.info()}
-                     returned_info["version"] = returned_info["raw_info"]["version"]["number"].split('.')
+                     returned_info["version"] = returned_info["raw_info"]["version"]["number"].split(
+                         "."
+                     )
                      return returned_info
                  return get_es_info_cached()

backend/src/appenlight/models/report.py

0 +6 -6

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from datetime import datetime, timedelta
              import math
              import uuid
              import hashlib
              import copy
              import urllib.parse
              import logging
              import sqlalchemy as sa
              from appenlight.models import Base, Datastores
              from appenlight.lib.utils.date_utils import convert_date
              from appenlight.lib.utils import convert_es_type
              from appenlight.models.slow_call import SlowCall
              from appenlight.lib.utils import channelstream_request
              from appenlight.lib.enums import ReportType, Language
              from pyramid.threadlocal import get_current_registry, get_current_request
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              log = logging.getLogger(__name__)
              REPORT_TYPE_MATRIX = {
                  "http_status": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
                  "group:priority": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
                  "duration": {"type": "float", "ops": ("ge", "le")},
                  "url_domain": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "url_path": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "error": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "tags:server_name": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "traceback": {"type": "unicode", "ops": ("contains",)},
                  "group:occurences": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
              }
              class Report(Base, BaseModel):
                  __tablename__ = "reports"
                  __table_args__ = {"implicit_returning": False}
                  id = sa.Column(sa.Integer, nullable=False, primary_key=True)
                  group_id = sa.Column(
                      sa.BigInteger,
                      sa.ForeignKey("reports_groups.id", ondelete="cascade", onupdate="cascade"),
                  )
                  resource_id = sa.Column(sa.Integer(), nullable=False, index=True)
                  report_type = sa.Column(sa.Integer(), nullable=False, index=True)
                  error = sa.Column(sa.UnicodeText(), index=True)
                  extra = sa.Column(JSON(), default={})
                  request = sa.Column(JSON(), nullable=False, default={})
                  ip = sa.Column(sa.String(39), index=True, default="")
                  username = sa.Column(sa.Unicode(255), default="")
                  user_agent = sa.Column(sa.Unicode(255), default="")
                  url = sa.Column(sa.UnicodeText(), index=True)
                  request_id = sa.Column(sa.Text())
                  request_stats = sa.Column(JSON(), nullable=False, default={})
                  traceback = sa.Column(JSON(), nullable=False, default=None)
                  traceback_hash = sa.Column(sa.Text())
                  start_time = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  end_time = sa.Column(sa.DateTime())
                  duration = sa.Column(sa.Float, default=0)
                  http_status = sa.Column(sa.Integer, index=True)
                  url_domain = sa.Column(sa.Unicode(100), index=True)
                  url_path = sa.Column(sa.Unicode(255), index=True)
                  tags = sa.Column(JSON(), nullable=False, default={})
                  language = sa.Column(sa.Integer(), default=0)
                  # this is used to determine partition for the report
                  report_group_time = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  logs = sa.orm.relationship(
                      "Log",
                      lazy="dynamic",
                      passive_deletes=True,
                      passive_updates=True,
                      primaryjoin="and_(Report.request_id==Log.request_id, "
                      "Log.request_id != None, Log.request_id != '')",
                      foreign_keys="[Log.request_id]",
                  )
                  slow_calls = sa.orm.relationship(
                      "SlowCall",
                      backref="detail",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                      order_by="SlowCall.timestamp",
                  )
                  def set_data(self, data, resource, protocol_version=None):
                      self.http_status = data["http_status"]
                      self.priority = data["priority"]
                      self.error = data["error"]
                      report_language = data.get("language", "").lower()
                      self.language = getattr(Language, report_language, Language.unknown)
                      # we need temp holder here to decide later
                      # if we want to to commit the tags if report is marked for creation
                      self.tags = {"server_name": data["server"], "view_name": data["view_name"]}
                      if data.get("tags"):
                          for tag_tuple in data["tags"]:
                              self.tags[tag_tuple[0]] = tag_tuple[1]
                      self.traceback = data["traceback"]
                      stripped_traceback = self.stripped_traceback()
                      tb_repr = repr(stripped_traceback).encode("utf8")
                      self.traceback_hash = hashlib.sha1(tb_repr).hexdigest()
                      url_info = urllib.parse.urlsplit(data.get("url", ""), allow_fragments=False)
                      self.url_domain = url_info.netloc[:128]
                      self.url_path = url_info.path[:2048]
                      self.occurences = data["occurences"]
                      if self.error:
                          self.report_type = ReportType.error
                      else:
                          self.report_type = ReportType.slow
                      # but if its status 404 its 404 type
                      if self.http_status in [404, "404"] or self.error == "404 Not Found":
                          self.report_type = ReportType.not_found
                          self.error = ""
                      self.generate_grouping_hash(
                          data.get("appenlight.group_string", data.get("group_string")),
                          resource.default_grouping,
                          protocol_version,
                      )
                      # details
                      if data["http_status"] in [404, "404"]:
                          data = {
                              "username": data["username"],
                              "ip": data["ip"],
                              "url": data["url"],
                              "user_agent": data["user_agent"],
                          }
                          if data.get("HTTP_REFERER") or data.get("http_referer"):
                              data["HTTP_REFERER"] = data.get("HTTP_REFERER", "") or data.get(
                                  "http_referer", ""
                              )
                      self.resource_id = resource.resource_id
                      self.username = data["username"]
                      self.user_agent = data["user_agent"]
                      self.ip = data["ip"]
                      self.extra = {}
                      if data.get("extra"):
                          for extra_tuple in data["extra"]:
                              self.extra[extra_tuple[0]] = extra_tuple[1]
                      self.url = data["url"]
                      self.request_id = data.get("request_id", "").replace("-", "") or str(
                          uuid.uuid4()
                      )
                      request_data = data.get("request", {})
                      self.request = request_data
                      self.request_stats = data.get("request_stats") or {}
                      traceback = data.get("traceback")
                      if not traceback:
                          traceback = data.get("frameinfo")
                      self.traceback = traceback
                      start_date = convert_date(data.get("start_time"))
                      if not self.start_time or self.start_time < start_date:
                          self.start_time = start_date
                      self.end_time = convert_date(data.get("end_time"), False)
                      self.duration = 0
                      if self.start_time and self.end_time:
                          d = self.end_time - self.start_time
                          self.duration = d.total_seconds()
                      # update tags with other vars
                      if self.username:
                          self.tags["user_name"] = self.username
                      self.tags["report_language"] = Language.key_from_value(self.language)
                  def add_slow_calls(self, data, report_group):
                      slow_calls = []
                      for call in data.get("slow_calls", []):
                          sc_inst = SlowCall()
                          sc_inst.set_data(
                              call, resource_id=self.resource_id, report_group=report_group
                          )
                          slow_calls.append(sc_inst)
                          self.slow_calls.extend(slow_calls)
                      return slow_calls
                  def get_dict(self, request, details=False, exclude_keys=None, include_keys=None):
                      from appenlight.models.services.report_group import ReportGroupService
                      instance_dict = super(Report, self).get_dict()
                      instance_dict["req_stats"] = self.req_stats()
                      instance_dict["group"] = {}
                      instance_dict["group"]["id"] = self.report_group.id
                      instance_dict["group"]["total_reports"] = self.report_group.total_reports
                      instance_dict["group"]["last_report"] = self.report_group.last_report
                      instance_dict["group"]["priority"] = self.report_group.priority
                      instance_dict["group"]["occurences"] = self.report_group.occurences
                      instance_dict["group"]["last_timestamp"] = self.report_group.last_timestamp
                      instance_dict["group"]["first_timestamp"] = self.report_group.first_timestamp
                      instance_dict["group"]["public"] = self.report_group.public
                      instance_dict["group"]["fixed"] = self.report_group.fixed
                      instance_dict["group"]["read"] = self.report_group.read
                      instance_dict["group"]["average_duration"] = self.report_group.average_duration
                      instance_dict["resource_name"] = self.report_group.application.resource_name
                      instance_dict["report_type"] = self.report_type
                      if instance_dict["http_status"] == 404 and not instance_dict["error"]:
                          instance_dict["error"] = "404 Not Found"
                      if details:
                          instance_dict[
                              "affected_users_count"
                          ] = ReportGroupService.affected_users_count(self.report_group)
                          instance_dict["top_affected_users"] = [
                              {"username": u.username, "count": u.count}
                              for u in ReportGroupService.top_affected_users(self.report_group)
                          ]
                          instance_dict["application"] = {"integrations": []}
                          for integration in self.report_group.application.integrations:
                              if integration.front_visible:
                                  instance_dict["application"]["integrations"].append(
                                      {
                                          "name": integration.integration_name,
                                          "action": integration.integration_action,
                                      }
                                  )
                          instance_dict["comments"] = [
                              c.get_dict() for c in self.report_group.comments
                          ]
                          instance_dict["group"]["next_report"] = None
                          instance_dict["group"]["previous_report"] = None
                          next_in_group = self.get_next_in_group(request)
                          previous_in_group = self.get_previous_in_group(request)
                          if next_in_group:
                              instance_dict["group"]["next_report"] = next_in_group
                          if previous_in_group:
                              instance_dict["group"]["previous_report"] = previous_in_group
                          # slow call ordering
                          def find_parent(row, data):
                              for r in reversed(data):
                                  try:
                                      if (
                                          row["timestamp"] > r["timestamp"]
                                          and row["end_time"] < r["end_time"]
                                      ):
                                          return r
                                  except TypeError as e:
                                      log.warning("reports_view.find_parent: %s" % e)
                              return None
                          new_calls = []
                          calls = [c.get_dict() for c in self.slow_calls]
                          while calls:
                              # start from end
                              for x in range(len(calls) - 1, -1, -1):
                                  parent = find_parent(calls[x], calls)
                                  if parent:
                                      parent["children"].append(calls[x])
                                  else:
                                      # no parent at all? append to new calls anyways
                                      new_calls.append(calls[x])
                                      # print 'append', calls[x]
                                  del calls[x]
                                  break
                          instance_dict["slow_calls"] = new_calls
                      instance_dict["front_url"] = self.get_public_url(request)
                      exclude_keys_list = exclude_keys or []
                      include_keys_list = include_keys or []
                      for k in list(instance_dict.keys()):
                          if k == "group":
                              continue
                          if k in exclude_keys_list or (k not in include_keys_list and include_keys):
                              del instance_dict[k]
                      return instance_dict
                  def get_previous_in_group(self, request):
                      query = {
                          "size": 1,
                          "query": {
                              "bool": {
                                  "filter": [
                                      {"term": {"group_id": self.group_id}},
                                      {"range": {"report_id": {"lt": self.id}}},
                                  ]
                              }
                          },
                          "sort": [{"_doc": {"order": "desc"}}],
                      }
                      result = request.es_conn.search(
                          body=query, index=self.partition_id, doc_type="report"
                      )
                      if result["hits"]["total"]:
                          return result["hits"]["hits"][0]["_source"]["report_id"]
                  def get_next_in_group(self, request):
                      query = {
                          "size": 1,
                          "query": {
                              "bool": {
                                  "filter": [
                                      {"term": {"group_id": self.group_id}},
                                      {"range": {"report_id": {"gt": self.id}}},
                                  ]
                              }
                          },
                          "sort": [{"_doc": {"order": "asc"}}],
                      }
                      result = request.es_conn.search(
                          body=query, index=self.partition_id, doc_type="report"
                      )
                      if result["hits"]["total"]:
                          return result["hits"]["hits"][0]["_source"]["report_id"]
                  def get_public_url(self, request=None, report_group=None, _app_url=None):
                      """
                      Returns url that user can use to visit specific report
                      """
                      if not request:
                          request = get_current_request()
                      url = request.route_url("/", _app_url=_app_url)
                      if report_group:
                          return (url + "ui/report/%s/%s") % (report_group.id, self.id)
                      return (url + "ui/report/%s/%s") % (self.group_id, self.id)
                  def req_stats(self):
                      stats = self.request_stats.copy()
                      stats["percentages"] = {}
                      stats["percentages"]["main"] = 100.0
                      main = stats.get("main", 0.0)
                      if not main:
                          return None
                      for name, call_time in stats.items():
                          if "calls" not in name and "main" not in name and "percentages" not in name:
                              stats["main"] -= call_time
                              stats["percentages"][name] = math.floor((call_time / main * 100.0))
                              stats["percentages"]["main"] -= stats["percentages"][name]
                      if stats["percentages"]["main"] < 0.0:
                          stats["percentages"]["main"] = 0.0
                          stats["main"] = 0.0
                      return stats
                  def generate_grouping_hash(
                      self, hash_string=None, default_grouping=None, protocol_version=None
                  ):
                      """
                      Generates SHA1 hash that will be used to group reports together
                      """
                      if not hash_string:
                          location = self.tags.get("view_name") or self.url_path
                          server_name = self.tags.get("server_name") or ""
                          if default_grouping == "url_traceback":
                              hash_string = "%s_%s_%s" % (self.traceback_hash, location, self.error)
                              if self.language == Language.javascript:
                                  hash_string = "%s_%s" % (self.traceback_hash, self.error)
                          elif default_grouping == "traceback_server":
                              hash_string = "%s_%s" % (self.traceback_hash, server_name)
                              if self.language == Language.javascript:
                                  hash_string = "%s_%s" % (self.traceback_hash, server_name)
                          else:
                              hash_string = "%s_%s" % (self.error, location)
                      month = datetime.utcnow().date().replace(day=1)
                      hash_string = "{}_{}".format(month, hash_string)
                      binary_string = hash_string.encode("utf8")
                      self.grouping_hash = hashlib.sha1(binary_string).hexdigest()
                      return self.grouping_hash
                  def stripped_traceback(self):
                      """
                      Traceback without local vars
                      """
                      stripped_traceback = copy.deepcopy(self.traceback)
                      if isinstance(stripped_traceback, list):
                          for row in stripped_traceback:
                              row.pop("vars", None)
                      return stripped_traceback
                  def notify_channel(self, report_group):
                      """
                      Sends notification to websocket channel
                      """
                      settings = get_current_registry().settings
                      log.info("notify channelstream")
                      if self.report_type != ReportType.error:
                          return
                      payload = {
                          "type": "message",
                          "user": "__system__",
                          "channel": "app_%s" % self.resource_id,
                          "message": {
                              "topic": "front_dashboard.new_topic",
                              "report": {
                                  "group": {
                                      "priority": report_group.priority,
                                      "first_timestamp": report_group.first_timestamp,
                                      "last_timestamp": report_group.last_timestamp,
                                      "average_duration": report_group.average_duration,
                                      "occurences": report_group.occurences,
                                  },
                                  "report_id": self.id,
                                  "group_id": self.group_id,
                                  "resource_id": self.resource_id,
                                  "http_status": self.http_status,
                                  "url_domain": self.url_domain,
                                  "url_path": self.url_path,
                                  "error": self.error or "",
                                  "server": self.tags.get("server_name"),
                                  "view_name": self.tags.get("view_name"),
                                  "front_url": self.get_public_url(),
                              },
                          },
                      }
                      channelstream_request(
                          settings["cometd.secret"],
                          "/message",
                          [payload],
                          servers=[settings["cometd_servers"]],
                      )
                  def es_doc(self):
                      tags = {}
                      tag_list = []
                      for name, value in self.tags.items():
                          name = name.replace(".", "_")
                          tag_list.append(name)
                          tags[name] = {
                              "values": convert_es_type(value),
                              "numeric_values": value
                              if (isinstance(value, (int, float)) and not isinstance(value, bool))
                              else None,
                          }
                      if "user_name" not in self.tags and self.username:
                          tags["user_name"] = {"value": [self.username], "numeric_value": None}
                      return {
                          "_id": str(self.id),
                          "report_id": str(self.id),
                          "resource_id": self.resource_id,
                          "http_status": self.http_status or "",
                          "start_time": self.start_time,
                          "end_time": self.end_time,
                          "url_domain": self.url_domain if self.url_domain else "",
                          "url_path": self.url_path if self.url_path else "",
                          "duration": self.duration,
                          "error": self.error if self.error else "",
                          "report_type": self.report_type,
                          "request_id": self.request_id,
                          "ip": self.ip,
                          "group_id": str(self.group_id),
                          "type": "report",
-                         "join_field": {
-                             "name": "report",
-                             "parent": str(self.group_id)
-                         },
+                         "join_field": {"name": "report", "parent": str(self.group_id)},
                          "tags": tags,
                          "tag_list": tag_list,
-                         "_routing": str(self.group_id)
+                         "_routing": str(self.group_id),
                      }
                  @property
                  def partition_id(self):
                      return "rcae_r_%s" % self.report_group_time.strftime("%Y_%m")
                  def partition_range(self):
                      start_date = self.report_group_time.date().replace(day=1)
                      end_date = start_date + timedelta(days=40)
                      end_date = end_date.replace(day=1)
                      return start_date, end_date
              def after_insert(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
                      Datastores.es.index(
                          target.partition_id, "report", data, parent=target.group_id, id=target.id
                      )
              def after_update(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
                      Datastores.es.index(
                          target.partition_id, "report", data, parent=target.group_id, id=target.id
                      )
              def after_delete(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      query = {"query": {"term": {"report_id": target.id}}}
                      Datastores.es.delete_by_query(
-                         index=target.partition_id, doc_type="report", body=query, conflicts="proceed"
+                         index=target.partition_id,
+                         doc_type="report",
+                         body=query,
+                         conflicts="proceed",
                      )
              sa.event.listen(Report, "after_insert", after_insert)
              sa.event.listen(Report, "after_update", after_update)
              sa.event.listen(Report, "after_delete", after_delete)

backend/src/appenlight/models/report_group.py

0 +1 -3

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import logging
              import sqlalchemy as sa
              from datetime import datetime, timedelta
              from pyramid.threadlocal import get_current_request
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              from appenlight.models import Base, get_db_session, Datastores
              from appenlight.lib.enums import ReportType
              from appenlight.lib.rule import Rule
              from appenlight.lib.redis_keys import REDIS_KEYS
              from appenlight.models.report import REPORT_TYPE_MATRIX
              log = logging.getLogger(__name__)
              class ReportGroup(Base, BaseModel):
                  __tablename__ = "reports_groups"
                  __table_args__ = {"implicit_returning": False}
                  id = sa.Column(sa.BigInteger(), nullable=False, primary_key=True)
                  resource_id = sa.Column(
                      sa.Integer(),
                      sa.ForeignKey(
                          "applications.resource_id", onupdate="CASCADE", ondelete="CASCADE"
                      ),
                      nullable=False,
                      index=True,
                  )
                  priority = sa.Column(
                      sa.Integer, nullable=False, index=True, default=5, server_default="5"
                  )
                  first_timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  last_timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  error = sa.Column(sa.UnicodeText(), index=True)
                  grouping_hash = sa.Column(sa.String(40), default="")
                  triggered_postprocesses_ids = sa.Column(JSON(), nullable=False, default=list)
                  report_type = sa.Column(sa.Integer, default=1)
                  total_reports = sa.Column(sa.Integer, default=1)
                  last_report = sa.Column(sa.Integer)
                  occurences = sa.Column(sa.Integer, default=1)
                  average_duration = sa.Column(sa.Float, default=0)
                  summed_duration = sa.Column(sa.Float, default=0)
                  read = sa.Column(sa.Boolean(), index=True, default=False)
                  fixed = sa.Column(sa.Boolean(), index=True, default=False)
                  notified = sa.Column(sa.Boolean(), index=True, default=False)
                  public = sa.Column(sa.Boolean(), index=True, default=False)
                  reports = sa.orm.relationship(
                      "Report",
                      lazy="dynamic",
                      backref="report_group",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                  )
                  comments = sa.orm.relationship(
                      "ReportComment",
                      lazy="dynamic",
                      backref="report",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                      order_by="ReportComment.comment_id",
                  )
                  assigned_users = sa.orm.relationship(
                      "User",
                      backref=sa.orm.backref(
                          "assigned_reports_relation",
                          lazy="dynamic",
                          order_by=sa.desc(sa.text("reports_groups.id")),
                      ),
                      passive_deletes=True,
                      passive_updates=True,
                      secondary="reports_assignments",
                      order_by="User.user_name",
                  )
                  stats = sa.orm.relationship(
                      "ReportStat",
                      lazy="dynamic",
                      backref="report",
                      passive_deletes=True,
                      passive_updates=True,
                  )
                  last_report_ref = sa.orm.relationship(
                      "Report",
                      uselist=False,
                      primaryjoin="ReportGroup.last_report " "== Report.id",
                      foreign_keys="Report.id",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                  )
                  def __repr__(self):
                      return "<ReportGroup id:{}>".format(self.id)
                  def get_report(self, report_id=None, public=False):
                      """
                      Gets report with specific id or latest report if id was not specified
                      """
                      from .report import Report
                      if not report_id:
                          return self.last_report_ref
                      else:
                          return self.reports.filter(Report.id == report_id).first()
                  def get_public_url(self, request, _app_url=None):
                      url = request.route_url("/", _app_url=_app_url)
                      return (url + "ui/report/%s") % self.id
                  def run_postprocessing(self, report):
                      """
                      Alters report group priority based on postprocessing configuration
                      """
                      request = get_current_request()
                      get_db_session(None, self).flush()
                      for action in self.application.postprocess_conf:
                          get_db_session(None, self).flush()
                          rule_obj = Rule(action.rule, REPORT_TYPE_MATRIX)
                          report_dict = report.get_dict(request)
                          # if was not processed yet
                          if (
                              rule_obj.match(report_dict)
                              and action.pkey not in self.triggered_postprocesses_ids
                          ):
                              action.postprocess(self)
                              # this way sqla can track mutation of list
                              self.triggered_postprocesses_ids = self.triggered_postprocesses_ids + [
                                  action.pkey
                              ]
                      get_db_session(None, self).flush()
                      # do not go out of bounds
                      if self.priority < 1:
                          self.priority = 1
                      if self.priority > 10:
                          self.priority = 10
                  def get_dict(self, request):
                      instance_dict = super(ReportGroup, self).get_dict()
                      instance_dict["server_name"] = self.get_report().tags.get("server_name")
                      instance_dict["view_name"] = self.get_report().tags.get("view_name")
                      instance_dict["resource_name"] = self.application.resource_name
                      instance_dict["report_type"] = self.get_report().report_type
                      instance_dict["url_path"] = self.get_report().url_path
                      instance_dict["front_url"] = self.get_report().get_public_url(request)
                      del instance_dict["triggered_postprocesses_ids"]
                      return instance_dict
                  def es_doc(self):
                      return {
                          "_id": str(self.id),
                          "group_id": str(self.id),
                          "resource_id": self.resource_id,
                          "error": self.error,
                          "fixed": self.fixed,
                          "public": self.public,
                          "read": self.read,
                          "priority": self.priority,
                          "occurences": self.occurences,
                          "average_duration": self.average_duration,
                          "summed_duration": self.summed_duration,
                          "first_timestamp": self.first_timestamp,
                          "last_timestamp": self.last_timestamp,
                          "type": "report_group",
-                         "join_field": {
-                             "name": "report_group"
-                         },
+                         "join_field": {"name": "report_group"},
                      }
                  def set_notification_info(self, notify_10=False, notify_100=False):
                      """
                      Update redis notification maps for notification job
                      """
                      current_time = datetime.utcnow().replace(second=0, microsecond=0)
                      # global app counter
                      key = REDIS_KEYS["counters"]["reports_per_type"].format(
                          self.report_type, current_time
                      )
                      redis_pipeline = Datastores.redis.pipeline()
                      redis_pipeline.incr(key)
                      redis_pipeline.expire(key, 3600 * 24)
                      # detailed app notification for alerts and notifications
                      redis_pipeline.sadd(REDIS_KEYS["apps_that_had_reports"], self.resource_id)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_had_reports_alerting"], self.resource_id
                      )
                      # only notify for exceptions here
                      if self.report_type == ReportType.error:
                          redis_pipeline.sadd(REDIS_KEYS["apps_that_had_reports"], self.resource_id)
                          redis_pipeline.sadd(
                              REDIS_KEYS["apps_that_had_error_reports_alerting"], self.resource_id
                          )
                      key = REDIS_KEYS["counters"]["report_group_occurences"].format(self.id)
                      redis_pipeline.incr(key)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["report_group_occurences_alerting"].format(self.id)
                      redis_pipeline.incr(key)
                      redis_pipeline.expire(key, 3600 * 24)
                      if notify_10:
                          key = REDIS_KEYS["counters"]["report_group_occurences_10th"].format(self.id)
                          redis_pipeline.setex(key, 3600 * 24, 1)
                      if notify_100:
                          key = REDIS_KEYS["counters"]["report_group_occurences_100th"].format(
                              self.id
                          )
                          redis_pipeline.setex(key, 3600 * 24, 1)
                      key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                          self.report_type, self.resource_id
                      )
                      redis_pipeline.sadd(key, self.id)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                          self.report_type, self.resource_id
                      )
                      redis_pipeline.sadd(key, self.id)
                      redis_pipeline.expire(key, 3600 * 24)
                      redis_pipeline.execute()
                  @property
                  def partition_id(self):
                      return "rcae_r_%s" % self.first_timestamp.strftime("%Y_%m")
                  def partition_range(self):
                      start_date = self.first_timestamp.date().replace(day=1)
                      end_date = start_date + timedelta(days=40)
                      end_date = end_date.replace(day=1)
                      return start_date, end_date
              def after_insert(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
                      Datastores.es.index(target.partition_id, "report", data, id=target.id)
              def after_update(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
                      Datastores.es.index(target.partition_id, "report", data, id=target.id)
              def after_delete(mapper, connection, target):
                  query = {"query": {"term": {"group_id": target.id}}}
                  # delete by query
                  Datastores.es.delete_by_query(
                      index=target.partition_id, doc_type="report", body=query, conflicts="proceed"
                  )
              sa.event.listen(ReportGroup, "after_insert", after_insert)
              sa.event.listen(ReportGroup, "after_update", after_update)
              sa.event.listen(ReportGroup, "after_delete", after_delete)

backend/src/appenlight/models/services/log.py

0 +1 -5

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import paginate
              import logging
              import sqlalchemy as sa
              from appenlight.models.log import Log
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.services.base import BaseService
              from appenlight.lib.utils import es_index_name_limiter
              log = logging.getLogger(__name__)
              class LogService(BaseService):
                  @classmethod
                  def get_logs(cls, resource_ids=None, filter_settings=None, db_session=None):
                      # ensure we always have id's passed
                      if not resource_ids:
                          # raise Exception('No App ID passed')
                          return []
                      db_session = get_db_session(db_session)
                      q = db_session.query(Log)
                      q = q.filter(Log.resource_id.in_(resource_ids))
                      if filter_settings.get("start_date"):
                          q = q.filter(Log.timestamp >= filter_settings.get("start_date"))
                      if filter_settings.get("end_date"):
                          q = q.filter(Log.timestamp <= filter_settings.get("end_date"))
                      if filter_settings.get("log_level"):
                          q = q.filter(Log.log_level == filter_settings.get("log_level").upper())
                      if filter_settings.get("request_id"):
                          request_id = filter_settings.get("request_id", "")
                          q = q.filter(Log.request_id == request_id.replace("-", ""))
                      if filter_settings.get("namespace"):
                          q = q.filter(Log.namespace == filter_settings.get("namespace"))
                      q = q.order_by(sa.desc(Log.timestamp))
                      return q
                  @classmethod
                  def es_query_builder(cls, app_ids, filter_settings):
                      if not filter_settings:
                          filter_settings = {}
                      query = {
-                         "query": {
-                             "bool": {
-                                 "filter": [{"terms": {"resource_id": list(app_ids)}}]
+                             }
+                         }
+                         "query": {"bool": {"filter": [{"terms": {"resource_id": list(app_ids)}}]}}
                      }
                      start_date = filter_settings.get("start_date")
                      end_date = filter_settings.get("end_date")
                      filter_part = query["query"]["bool"]["filter"]
                      for tag in filter_settings.get("tags", []):
                          tag_values = [v.lower() for v in tag["value"]]
                          key = "tags.%s.values" % tag["name"].replace(".", "_")
                          filter_part.append({"terms": {key: tag_values}})
                      date_range = {"range": {"timestamp": {}}}
                      if start_date:
                          date_range["range"]["timestamp"]["gte"] = start_date
                      if end_date:
                          date_range["range"]["timestamp"]["lte"] = end_date
                      if start_date or end_date:
                          filter_part.append(date_range)
                      levels = filter_settings.get("level")
                      if levels:
                          filter_part.append({"terms": {"log_level": levels}})
                      namespaces = filter_settings.get("namespace")
                      if namespaces:
                          filter_part.append({"terms": {"namespace": namespaces}})
                      request_ids = filter_settings.get("request_id")
                      if request_ids:
                          filter_part.append({"terms": {"request_id": request_ids}})
                      messages = filter_settings.get("message")
                      if messages:
                          query["query"]["bool"]["must"] = {
                              "match": {"message": {"query": " ".join(messages), "operator": "and"}}
                          }
                      return query
                  @classmethod
                  def get_time_series_aggregate(cls, app_ids=None, filter_settings=None):
                      if not app_ids:
                          return {}
                      es_query = cls.es_query_builder(app_ids, filter_settings)
                      es_query["aggs"] = {
                          "events_over_time": {
                              "date_histogram": {
                                  "field": "timestamp",
                                  "interval": "1h",
                                  "min_doc_count": 0,
                                  "extended_bounds": {
                                      "max": filter_settings.get("end_date"),
                                      "min": filter_settings.get("start_date"),
                                  },
                              }
                          }
                      }
                      log.debug(es_query)
                      index_names = es_index_name_limiter(
                          filter_settings.get("start_date"),
                          filter_settings.get("end_date"),
                          ixtypes=["logs"],
                      )
                      if index_names:
                          results = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                      else:
                          results = []
                      return results
                  @classmethod
                  def get_search_iterator(
-                         cls,
-                         app_ids=None,
-                         page=1,
-                         items_per_page=50,
-                         order_by=None,
-                         filter_settings=None,
-                         limit=None,
                  ):
                      if not app_ids:
                          return {}, 0
                      es_query = cls.es_query_builder(app_ids, filter_settings)
                      sort_query = {"sort": [{"timestamp": {"order": "desc"}}]}
                      es_query.update(sort_query)
                      log.debug(es_query)
                      es_from = (page - 1) * items_per_page
                      index_names = es_index_name_limiter(
                          filter_settings.get("start_date"),
                          filter_settings.get("end_date"),
                          ixtypes=["logs"],
                      )
                      if not index_names:
                          return {}, 0
                      results = Datastores.es.search(
                          body=es_query,
                          index=index_names,
                          doc_type="log",
                          size=items_per_page,
                          from_=es_from,
                      )
                      if results["hits"]["total"] > 5000:
                          count = 5000
                      else:
                          count = results["hits"]["total"]
                      return results["hits"], count
                  @classmethod
                  def get_paginator_by_app_ids(
-                         cls,
-                         app_ids=None,
-                         page=1,
-                         item_count=None,
-                         items_per_page=50,
-                         order_by=None,
-                         filter_settings=None,
-                         exclude_columns=None,
-                         db_session=None,
                  ):
                      if not filter_settings:
                          filter_settings = {}
                      results, item_count = cls.get_search_iterator(
                          app_ids, page, items_per_page, order_by, filter_settings
                      )
                      paginator = paginate.Page(
                          [], item_count=item_count, items_per_page=items_per_page, **filter_settings
                      )
                      ordered_ids = tuple(
                          item["_source"]["log_id"] for item in results.get("hits", [])
                      )
                      sorted_instance_list = []
                      if ordered_ids:
                          db_session = get_db_session(db_session)
                          query = db_session.query(Log)
                          query = query.filter(Log.log_id.in_(ordered_ids))
                          query = query.order_by(sa.desc("timestamp"))
                          sa_items = query.all()
                          # resort by score
                          for i_id in ordered_ids:
                              for item in sa_items:
                                  if str(item.log_id) == str(i_id):
                                      sorted_instance_list.append(item)
                      paginator.sa_items = sorted_instance_list
                      return paginator
                  @classmethod
                  def query_by_primary_key_and_namespace(cls, list_of_pairs, db_session=None):
                      db_session = get_db_session(db_session)
                      list_of_conditions = []
                      query = db_session.query(Log)
                      for pair in list_of_pairs:
                          list_of_conditions.append(
                              sa.and_(Log.primary_key == pair["pk"], Log.namespace == pair["ns"])
                          )
                      query = query.filter(sa.or_(*list_of_conditions))
                      query = query.order_by(sa.asc(Log.timestamp), sa.asc(Log.log_id))
                      return query

backend/src/appenlight/models/services/report_group.py

0 +15 -15

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import logging
              import paginate
              import sqlalchemy as sa
              import appenlight.lib.helpers as h
              from datetime import datetime
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.report import Report
              from appenlight.models.report_group import ReportGroup
              from appenlight.models.report_comment import ReportComment
              from appenlight.models.user import User
              from appenlight.models.services.base import BaseService
              from appenlight.lib.enums import ReportType
              from appenlight.lib.utils import es_index_name_limiter
              log = logging.getLogger(__name__)
              class ReportGroupService(BaseService):
                  @classmethod
                  def get_trending(cls, request, filter_settings, limit=15, db_session=None):
                      """
                      Returns report groups trending for specific time interval
                      """
                      db_session = get_db_session(db_session)
                      tags = []
                      if filter_settings.get("tags"):
                          for tag in filter_settings["tags"]:
                              tags.append(
                                  {"terms": {"tags.{}.values".format(tag["name"]): tag["value"]}}
                              )
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["reports"],
                      )
                      if not index_names or not filter_settings["resource"]:
                          return []
                      es_query = {
                          "aggs": {
                              "parent_agg": {
                                  "aggs": {
                                      "groups": {
                                          "aggs": {
                                              "sub_agg": {
-                                                 "value_count": {"field": "tags.group_id.values.keyword"}
+                                                 "value_count": {
+                                                     "field": "tags.group_id.values.keyword"
+                                                 }
                                              }
                                          },
                                          "filter": {"exists": {"field": "tags.group_id.values"}},
                                      }
                                  },
                                  "terms": {"field": "tags.group_id.values.keyword", "size": limit},
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
+                                     {
-                                         "terms": {
-                                             "resource_id": [filter_settings["resource"][0]]
+                                         }
-                                     },
+                                     {"terms": {"resource_id": [filter_settings["resource"][0]]}},
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                  ]
                              }
                          },
                      }
                      if tags:
                          es_query["query"]["bool"]["filter"].extend(tags)
                      result = Datastores.es.search(
                          body=es_query, index=index_names, doc_type="report", size=0
                      )
                      series = []
                      for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                          series.append(
                              {"key": bucket["key"], "groups": bucket["groups"]["sub_agg"]["value"]}
                          )
                      report_groups_d = {}
                      for g in series:
                          report_groups_d[int(g["key"])] = g["groups"] or 0
                      query = db_session.query(ReportGroup)
                      query = query.filter(ReportGroup.id.in_(list(report_groups_d.keys())))
                      query = query.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                      results = [(report_groups_d[group.id], group) for group in query]
                      return sorted(results, reverse=True, key=lambda x: x[0])
                  @classmethod
                  def get_search_iterator(
                      cls,
                      app_ids=None,
                      page=1,
                      items_per_page=50,
                      order_by=None,
                      filter_settings=None,
                      limit=None,
                  ):
                      if not app_ids:
                          return {}
                      if not filter_settings:
                          filter_settings = {}
                      query = {
                          "size": 0,
                          "query": {
                              "bool": {
                                  "must": [],
                                  "should": [],
-                                 "filter": [{"terms": {"resource_id": list(app_ids)}}]
+                                 "filter": [{"terms": {"resource_id": list(app_ids)}}],
                              }
                          },
                          "aggs": {
                              "top_groups": {
                                  "terms": {
                                      "size": 5000,
                                      "field": "join_field#report_group",
                                      "order": {"newest": "desc"},
                                  },
                                  "aggs": {
                                      "top_reports_hits": {
                                          "top_hits": {"size": 1, "sort": {"start_time": "desc"}}
                                      },
                                      "newest": {"max": {"field": "start_time"}},
                                  },
                              }
                          },
                      }
                      start_date = filter_settings.get("start_date")
                      end_date = filter_settings.get("end_date")
                      filter_part = query["query"]["bool"]["filter"]
                      date_range = {"range": {"start_time": {}}}
                      if start_date:
                          date_range["range"]["start_time"]["gte"] = start_date
                      if end_date:
                          date_range["range"]["start_time"]["lte"] = end_date
                      if start_date or end_date:
                          filter_part.append(date_range)
                      priorities = filter_settings.get("priority")
                      for tag in filter_settings.get("tags", []):
                          tag_values = [v.lower() for v in tag["value"]]
                          key = "tags.%s.values" % tag["name"].replace(".", "_")
                          filter_part.append({"terms": {key: tag_values}})
                      if priorities:
                          filter_part.append(
                              {
                                  "has_parent": {
                                      "parent_type": "report_group",
                                      "query": {"terms": {"priority": priorities}},
                                  }
                              }
                          )
                      min_occurences = filter_settings.get("min_occurences")
                      if min_occurences:
                          filter_part.append(
                              {
                                  "has_parent": {
                                      "parent_type": "report_group",
                                      "query": {"range": {"occurences": {"gte": min_occurences[0]}}},
                                  }
                              }
                          )
                      min_duration = filter_settings.get("min_duration")
                      max_duration = filter_settings.get("max_duration")
                      request_ids = filter_settings.get("request_id")
                      if request_ids:
                          filter_part.append({"terms": {"request_id": request_ids}})
                      duration_range = {"range": {"average_duration": {}}}
                      if min_duration:
                          duration_range["range"]["average_duration"]["gte"] = min_duration[0]
                      if max_duration:
                          duration_range["range"]["average_duration"]["lte"] = max_duration[0]
                      if min_duration or max_duration:
                          filter_part.append(
                              {"has_parent": {"parent_type": "report_group", "query": duration_range}}
                          )
                      http_status = filter_settings.get("http_status")
                      report_type = filter_settings.get("report_type", [ReportType.error])
                      # set error report type if http status is not found
                      # and we are dealing with slow reports
                      if not http_status or ReportType.slow in report_type:
                          filter_part.append({"terms": {"report_type": report_type}})
                      if http_status:
                          filter_part.append({"terms": {"http_status": http_status}})
                      messages = filter_settings.get("message")
                      if messages:
                          condition = {"match": {"message": " ".join(messages)}}
                          query["query"]["bool"]["must"].append(condition)
                      errors = filter_settings.get("error")
                      if errors:
                          condition = {"match": {"error": " ".join(errors)}}
                          query["query"]["bool"]["must"].append(condition)
                      url_domains = filter_settings.get("url_domain")
                      if url_domains:
                          condition = {"terms": {"url_domain": url_domains}}
                          query["query"]["bool"]["must"].append(condition)
                      url_paths = filter_settings.get("url_path")
                      if url_paths:
                          condition = {"terms": {"url_path": url_paths}}
                          query["query"]["bool"]["must"].append(condition)
                      if filter_settings.get("report_status"):
                          for status in filter_settings.get("report_status"):
                              if status == "never_reviewed":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"read": False}},
                                          }
                                      }
                                  )
                              elif status == "reviewed":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"read": True}},
                                          }
                                      }
                                  )
                              elif status == "public":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"public": True}},
                                          }
                                      }
                                  )
                              elif status == "fixed":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"fixed": True}},
                                          }
                                      }
                                  )
                      # logging.getLogger('pyelasticsearch').setLevel(logging.DEBUG)
                      index_names = es_index_name_limiter(
                          filter_settings.get("start_date"),
                          filter_settings.get("end_date"),
                          ixtypes=["reports"],
                      )
                      if index_names:
                          results = Datastores.es.search(
                              body=query,
                              index=index_names,
                              doc_type=["report", "report_group"],
                              size=0,
                          )
                      else:
                          return []
                      return results["aggregations"]
                  @classmethod
                  def get_paginator_by_app_ids(
                      cls,
                      app_ids=None,
                      page=1,
                      item_count=None,
                      items_per_page=50,
                      order_by=None,
                      filter_settings=None,
                      exclude_columns=None,
                      db_session=None,
                  ):
                      if not filter_settings:
                          filter_settings = {}
                      results = cls.get_search_iterator(
                          app_ids, page, items_per_page, order_by, filter_settings
                      )
                      ordered_ids = []
                      if results:
                          for item in results["top_groups"]["buckets"]:
-                             pg_id = item["top_reports_hits"]["hits"]["hits"][0]["_source"]["report_id"]
+                             pg_id = item["top_reports_hits"]["hits"]["hits"][0]["_source"][
+                                 "report_id"
+                             ]
                              ordered_ids.append(pg_id)
                      log.info(filter_settings)
                      paginator = paginate.Page(
                          ordered_ids, items_per_page=items_per_page, **filter_settings
                      )
                      sa_items = ()
                      if paginator.items:
                          db_session = get_db_session(db_session)
                          # latest report detail
                          query = db_session.query(Report)
                          query = query.options(sa.orm.joinedload(Report.report_group))
                          query = query.filter(Report.id.in_(paginator.items))
                          if filter_settings.get("order_col"):
                              order_col = filter_settings.get("order_col")
                              if filter_settings.get("order_dir") == "dsc":
                                  sort_on = "desc"
                              else:
                                  sort_on = "asc"
                              if order_col == "when":
                                  order_col = "last_timestamp"
                              query = query.order_by(
                                  getattr(sa, sort_on)(getattr(ReportGroup, order_col))
                              )
                          sa_items = query.all()
                      sorted_instance_list = []
                      for i_id in ordered_ids:
                          for report in sa_items:
                              if str(report.id) == i_id and report not in sorted_instance_list:
                                  sorted_instance_list.append(report)
                      paginator.sa_items = sorted_instance_list
                      return paginator
                  @classmethod
                  def by_app_ids(cls, app_ids=None, order_by=True, db_session=None):
                      db_session = get_db_session(db_session)
                      q = db_session.query(ReportGroup)
                      if app_ids:
                          q = q.filter(ReportGroup.resource_id.in_(app_ids))
                      if order_by:
                          q = q.order_by(sa.desc(ReportGroup.id))
                      return q
                  @classmethod
                  def by_id(cls, group_id, app_ids=None, db_session=None):
                      db_session = get_db_session(db_session)
                      q = db_session.query(ReportGroup).filter(ReportGroup.id == int(group_id))
                      if app_ids:
                          q = q.filter(ReportGroup.resource_id.in_(app_ids))
                      return q.first()
                  @classmethod
                  def by_ids(cls, group_ids=None, db_session=None):
                      db_session = get_db_session(db_session)
                      query = db_session.query(ReportGroup)
                      query = query.filter(ReportGroup.id.in_(group_ids))
                      return query
                  @classmethod
                  def by_hash_and_resource(
                      cls, resource_id, grouping_hash, since_when=None, db_session=None
                  ):
                      db_session = get_db_session(db_session)
                      q = db_session.query(ReportGroup)
                      q = q.filter(ReportGroup.resource_id == resource_id)
                      q = q.filter(ReportGroup.grouping_hash == grouping_hash)
                      q = q.filter(ReportGroup.fixed == False)
                      if since_when:
                          q = q.filter(ReportGroup.first_timestamp >= since_when)
                      return q.first()
                  @classmethod
                  def users_commenting(cls, report_group, exclude_user_id=None, db_session=None):
                      db_session = get_db_session(None, report_group)
                      query = db_session.query(User).distinct()
                      query = query.filter(User.id == ReportComment.owner_id)
                      query = query.filter(ReportComment.group_id == report_group.id)
                      if exclude_user_id:
                          query = query.filter(ReportComment.owner_id != exclude_user_id)
                      return query
                  @classmethod
                  def affected_users_count(cls, report_group, db_session=None):
                      db_session = get_db_session(db_session)
                      query = db_session.query(sa.func.count(Report.username))
                      query = query.filter(Report.group_id == report_group.id)
                      query = query.filter(Report.username != "")
                      query = query.filter(Report.username != None)
                      query = query.group_by(Report.username)
                      return query.count()
                  @classmethod
                  def top_affected_users(cls, report_group, db_session=None):
                      db_session = get_db_session(db_session)
                      count_label = sa.func.count(Report.username).label("count")
                      query = db_session.query(Report.username, count_label)
                      query = query.filter(Report.group_id == report_group.id)
                      query = query.filter(Report.username != None)
                      query = query.filter(Report.username != "")
                      query = query.group_by(Report.username)
                      query = query.order_by(sa.desc(count_label))
                      query = query.limit(50)
                      return query
                  @classmethod
                  def get_report_stats(cls, request, filter_settings):
                      """
                      Gets report dashboard graphs
                      Returns information for BAR charts with occurences/interval information
                      detailed means version that returns time intervals - non detailed
                      returns total sum
                      """
                      delta = filter_settings["end_date"] - filter_settings["start_date"]
                      if delta < h.time_deltas.get("12h")["delta"]:
                          interval = "1m"
                      elif delta <= h.time_deltas.get("3d")["delta"]:
                          interval = "5m"
                      elif delta >= h.time_deltas.get("2w")["delta"]:
                          interval = "24h"
                      else:
                          interval = "1h"
                      group_id = filter_settings.get("group_id")
                      es_query = {
                          "aggs": {
                              "parent_agg": {
                                  "aggs": {
                                      "types": {
                                          "aggs": {
-                                             "sub_agg": {"terms": {"field": "tags.type.values.keyword"}}
+                                             "sub_agg": {
+                                                 "terms": {"field": "tags.type.values.keyword"}
+                                             }
                                          },
                                          "filter": {
                                          "bool": {
-                                             "filter": [{"exists": {"field": "tags.type.values"}}]
+                                                 "filter": [
+                                                     {"exists": {"field": "tags.type.values"}}
+                                                 ]
                                          }
                                          },
                                      }
                                  },
                                  "date_histogram": {
                                      "extended_bounds": {
                                          "max": filter_settings["end_date"],
                                          "min": filter_settings["start_date"],
                                      },
                                      "field": "timestamp",
                                      "interval": interval,
                                      "min_doc_count": 0,
                                  },
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
+                                     {
-                                         "terms": {
-                                             "resource_id": [filter_settings["resource"][0]]
+                                         }
-                                     },
+                                     {"terms": {"resource_id": [filter_settings["resource"][0]]}},
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                  ]
                              }
                          },
                      }
                      if group_id:
                          parent_agg = es_query["aggs"]["parent_agg"]
                          filters = parent_agg["aggs"]["types"]["filter"]["bool"]["filter"]
                          filters.append({"terms": {"tags.group_id.values": [group_id]}})
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["reports"],
                      )
                      if not index_names:
                          return []
                      result = Datastores.es.search(
                          body=es_query, index=index_names, doc_type="log", size=0
                      )
                      series = []
                      for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                          point = {
                              "x": datetime.utcfromtimestamp(int(bucket["key"]) / 1000),
                              "report": 0,
                              "not_found": 0,
                              "slow_report": 0,
                          }
                          for subbucket in bucket["types"]["sub_agg"]["buckets"]:
                              if subbucket["key"] == "slow":
                                  point["slow_report"] = subbucket["doc_count"]
                              elif subbucket["key"] == "error":
                                  point["report"] = subbucket["doc_count"]
                              elif subbucket["key"] == "not_found":
                                  point["not_found"] = subbucket["doc_count"]
                          series.append(point)
                      return series

backend/src/appenlight/models/services/report_stat.py

0 +3 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from appenlight.models import Datastores
              from appenlight.models.services.base import BaseService
              from appenlight.lib.enums import ReportType
              from appenlight.lib.utils import es_index_name_limiter
              class ReportStatService(BaseService):
                  @classmethod
                  def count_by_type(cls, report_type, resource_id, since_when):
                      report_type = ReportType.key_from_value(report_type)
                      index_names = es_index_name_limiter(start_date=since_when, ixtypes=["reports"])
                      es_query = {
                          "aggs": {
                              "reports": {
                                  "aggs": {
-                                     "sub_agg": {"value_count": {"field": "tags.group_id.values.keyword"}}
+                                     "sub_agg": {
+                                         "value_count": {"field": "tags.group_id.values.keyword"}
+                                     }
                                  },
                                  "filter": {
                                      "bool": {
                                          "filter": [
                                              {"terms": {"resource_id": [resource_id]}},
                                              {"exists": {"field": "tags.group_id.values"}},
                                          ]
                                      }
                                  },
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
                                      {"terms": {"resource_id": [resource_id]}},
                                      {"terms": {"tags.type.values": [report_type]}},
                                      {"range": {"timestamp": {"gte": since_when}}},
                                  ]
                              }
                          },
                      }
                      if index_names:
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          return result["aggregations"]["reports"]["sub_agg"]["value"]
                      else:
                          return 0

backend/src/appenlight/models/services/request_metric.py

0 +33 -22

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from datetime import datetime
              import appenlight.lib.helpers as h
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.services.base import BaseService
              from appenlight.lib.enums import ReportType
              from appenlight.lib.utils import es_index_name_limiter
              try:
                  from ae_uptime_ce.models.services.uptime_metric import UptimeMetricService
              except ImportError:
                  UptimeMetricService = None
              def check_key(key, stats, uptime, total_seconds):
                  if key not in stats:
                      stats[key] = {
                          "name": key,
                          "requests": 0,
                          "errors": 0,
                          "tolerated_requests": 0,
                          "frustrating_requests": 0,
                          "satisfying_requests": 0,
                          "total_minutes": total_seconds / 60.0,
                          "uptime": uptime,
                          "apdex": 0,
                          "rpm": 0,
                          "response_time": 0,
                          "avg_response_time": 0,
                      }
              class RequestMetricService(BaseService):
                  @classmethod
                  def get_metrics_stats(cls, request, filter_settings, db_session=None):
                      delta = filter_settings["end_date"] - filter_settings["start_date"]
                      if delta < h.time_deltas.get("12h")["delta"]:
                          interval = "1m"
                      elif delta <= h.time_deltas.get("3d")["delta"]:
                          interval = "5m"
                      elif delta >= h.time_deltas.get("2w")["delta"]:
                          interval = "24h"
                      else:
                          interval = "1h"
                      filter_settings["namespace"] = ["appenlight.request_metric"]
                      es_query = {
                          "aggs": {
                              "parent_agg": {
                                  "aggs": {
                                      "custom": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.custom.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.custom.numeric_values"}
                                          },
                                      },
                                      "main": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.main.numeric_values"}
                                              }
                                          },
                                          "filter": {"exists": {"field": "tags.main.numeric_values"}},
                                      },
                                      "nosql": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.nosql.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.nosql.numeric_values"}
                                          },
                                      },
                                      "remote": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.remote.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.remote.numeric_values"}
                                          },
                                      },
                                      "requests": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.requests.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.requests.numeric_values"}
                                          },
                                      },
                                      "sql": {
                                          "aggs": {
                                              "sub_agg": {"sum": {"field": "tags.sql.numeric_values"}}
                                          },
                                          "filter": {"exists": {"field": "tags.sql.numeric_values"}},
                                      },
                                      "tmpl": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.tmpl.numeric_values"}
                                              }
                                          },
                                          "filter": {"exists": {"field": "tags.tmpl.numeric_values"}},
                                      },
                                  },
                                  "date_histogram": {
                                      "extended_bounds": {
                                          "max": filter_settings["end_date"],
                                          "min": filter_settings["start_date"],
                                      },
                                      "field": "timestamp",
                                      "interval": interval,
                                      "min_doc_count": 0,
                                  },
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
+                                     {
-                                         "terms": {
-                                             "resource_id": [filter_settings["resource"][0]]
+                                         }
-                                     },
+                                     {"terms": {"resource_id": [filter_settings["resource"][0]]}},
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                      {"terms": {"namespace": ["appenlight.request_metric"]}},
                                  ]
                              }
                          },
                      }
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["metrics"],
                      )
                      if not index_names:
                          return []
                      result = Datastores.es.search(
                          body=es_query, index=index_names, doc_type="log", size=0
                      )
                      plot_data = []
                      for item in result["aggregations"]["parent_agg"]["buckets"]:
                          x_time = datetime.utcfromtimestamp(int(item["key"]) / 1000)
                          point = {"x": x_time}
                          for key in ["custom", "main", "nosql", "remote", "requests", "sql", "tmpl"]:
                              value = item[key]["sub_agg"]["value"]
                              point[key] = round(value, 3) if value else 0
                          plot_data.append(point)
                      return plot_data
                  @classmethod
                  def get_requests_breakdown(cls, request, filter_settings, db_session=None):
                      db_session = get_db_session(db_session)
                      # fetch total time of all requests in this time range
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["metrics"],
                      )
                      if index_names and filter_settings["resource"]:
                          es_query = {
                              "aggs": {
                                  "main": {
                                      "aggs": {
                                          "sub_agg": {"sum": {"field": "tags.main.numeric_values"}}
                                      },
                                      "filter": {"exists": {"field": "tags.main.numeric_values"}},
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                          {"terms": {"namespace": ["appenlight.request_metric"]}},
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          total_time_spent = result["aggregations"]["main"]["sub_agg"]["value"]
                      else:
                          total_time_spent = 0
                      script_text = "doc['tags.main.numeric_values'].value / {}".format(
                          total_time_spent
                      )
                      if total_time_spent == 0:
-                         script_text = '0'
+                         script_text = "0"
                      if index_names and filter_settings["resource"]:
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "main": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.main.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.main.numeric_values"}
                                              },
                                          },
                                          "percentage": {
-                                             "aggs": {
-                                                 "sub_agg": {
-                                                     "sum": {
-                                                         "script": script_text,
+                                                     }
+                                                 }
-                                             },
+                                             "aggs": {"sub_agg": {"sum": {"script": script_text}}},
                                              "filter": {
                                                  "exists": {"field": "tags.main.numeric_values"}
                                              },
                                          },
                                          "requests": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.requests.numeric_values"}
                                              },
                                          },
                                      },
                                      "terms": {
                                          "field": "tags.view_name.values.keyword",
                                          "order": {"percentage>sub_agg": "desc"},
                                          "size": 15,
                                      },
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          series = result["aggregations"]["parent_agg"]["buckets"]
                      else:
                          series = []
                      and_part = [
                          {"term": {"resource_id": filter_settings["resource"][0]}},
                          {"terms": {"tags.view_name.values": [row["key"] for row in series]}},
                          {"term": {"report_type": str(ReportType.slow)}},
                      ]
                      query = {
                          "aggs": {
                              "top_reports": {
-                                 "terms": {"field": "tags.view_name.values.keyword", "size": len(series)},
+                                 "terms": {
+                                     "field": "tags.view_name.values.keyword",
+                                     "size": len(series),
+                                 },
                                  "aggs": {
                                      "top_calls_hits": {
                                          "top_hits": {"sort": {"start_time": "desc"}, "size": 5}
                                      }
                                  },
                              }
                          },
                          "query": {"bool": {"filter": and_part}},
                      }
                      details = {}
                      index_names = es_index_name_limiter(ixtypes=["reports"])
                      if index_names and series:
                          result = Datastores.es.search(
                              body=query, doc_type="report", size=0, index=index_names
                          )
                          for bucket in result["aggregations"]["top_reports"]["buckets"]:
                              details[bucket["key"]] = []
                              for hit in bucket["top_calls_hits"]["hits"]["hits"]:
                                  details[bucket["key"]].append(
                                      {
                                          "report_id": hit["_source"]["request_metric_id"],
                                          "group_id": hit["_source"]["group_id"],
                                      }
                                  )
                      results = []
                      for row in series:
                          result = {
                              "key": row["key"],
                              "main": row["main"]["sub_agg"]["value"],
                              "requests": row["requests"]["sub_agg"]["value"],
                          }
                          # es can return 'infinity'
                          try:
                              result["percentage"] = float(row["percentage"]["sub_agg"]["value"])
                          except ValueError:
                              result["percentage"] = 0
                          result["latest_details"] = details.get(row["key"]) or []
                          results.append(result)
                      return results
                  @classmethod
                  def get_apdex_stats(cls, request, filter_settings, threshold=1, db_session=None):
                      """
                      Returns information and calculates APDEX score per server for dashboard
                      server information (upper right stats boxes)
                      """
                      # Apdex t = (Satisfied Count + Tolerated Count / 2) / Total Samples
                      db_session = get_db_session(db_session)
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["metrics"],
                      )
                      requests_series = []
                      if index_names and filter_settings["resource"]:
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "frustrating": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "bool": {
                                                      "filter": [
                                                          {
                                                              "range": {
-                                                                 "tags.main.numeric_values": {"gte": "4"}
+                                                                 "tags.main.numeric_values": {
+                                                                     "gte": "4"
+                                                                 }
                                                              }
                                                          },
                                                          {
                                                              "exists": {
                                                                  "field": "tags.requests.numeric_values"
                                                              }
                                                          },
                                                      ]
                                                  }
                                              },
                                          },
                                          "main": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.main.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.main.numeric_values"}
                                              },
                                          },
                                          "requests": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.requests.numeric_values"}
                                              },
                                          },
                                          "tolerated": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
-                                                 "bool": {"filter": [
+                                                 "bool": {
+                                                     "filter": [
                                                      {
                                                          "range": {
-                                                             "tags.main.numeric_values": {"gte": "1"}
+                                                                 "tags.main.numeric_values": {
+                                                                     "gte": "1"
+                                                                 }
                                                          }
                                                      },
                                                      {
                                                          "range": {
-                                                             "tags.main.numeric_values": {"lt": "4"}
+                                                                 "tags.main.numeric_values": {
+                                                                     "lt": "4"
+                                                                 }
                                                          }
                                                      },
                                                      {
                                                          "exists": {
                                                              "field": "tags.requests.numeric_values"
                                                          }
                                                      },
-                                                 ]}
+                                                     ]
+                                                 }
+                                             },
-                                             },
-                                         },
+                                     "terms": {
+                                         "field": "tags.server_name.values.keyword",
+                                         "size": 999999,
                                      },
-                                     "terms": {"field": "tags.server_name.values.keyword", "size": 999999},
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                          {"terms": {"namespace": ["appenlight.request_metric"]}},
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                              requests_series.append(
                                  {
                                      "frustrating": bucket["frustrating"]["sub_agg"]["value"],
                                      "main": bucket["main"]["sub_agg"]["value"],
                                      "requests": bucket["requests"]["sub_agg"]["value"],
                                      "tolerated": bucket["tolerated"]["sub_agg"]["value"],
                                      "key": bucket["key"],
                                  }
                              )
                      since_when = filter_settings["start_date"]
                      until = filter_settings["end_date"]
                      # total errors
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["reports"],
                      )
                      report_series = []
                      if index_names and filter_settings["resource"]:
                          report_type = ReportType.key_from_value(ReportType.error)
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "errors": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {
                                                          "field": "tags.occurences.numeric_values"
                                                      }
                                                  }
                                              },
                                              "filter": {
                                                  "bool": {
                                                      "filter": [
-                                                         {"terms": {"tags.type.values": [report_type]}},
+                                                         {
+                                                             "terms": {
+                                                                 "tags.type.values": [report_type]
+                                                             }
+                                                         },
                                                          {
                                                              "exists": {
                                                                  "field": "tags.occurences.numeric_values"
                                                              }
                                                          },
                                                      ]
                                                  }
                                              },
                                          }
                                      },
-                                     "terms": {"field": "tags.server_name.values.keyword", "size": 999999},
+                                     "terms": {
+                                         "field": "tags.server_name.values.keyword",
+                                         "size": 999999,
+                                     },
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                          {"terms": {"namespace": ["appenlight.error"]}},
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                              report_series.append(
                                  {
                                      "key": bucket["key"],
                                      "errors": bucket["errors"]["sub_agg"]["value"],
                                  }
                              )
                      stats = {}
                      if UptimeMetricService is not None:
                          uptime = UptimeMetricService.get_uptime_by_app(
                              filter_settings["resource"][0], since_when=since_when, until=until
                          )
                      else:
                          uptime = 0
                      total_seconds = (until - since_when).total_seconds()
                      for stat in requests_series:
                          check_key(stat["key"], stats, uptime, total_seconds)
                          stats[stat["key"]]["requests"] = int(stat["requests"])
                          stats[stat["key"]]["response_time"] = stat["main"]
                          stats[stat["key"]]["tolerated_requests"] = stat["tolerated"]
                          stats[stat["key"]]["frustrating_requests"] = stat["frustrating"]
                      for server in report_series:
                          check_key(server["key"], stats, uptime, total_seconds)
                          stats[server["key"]]["errors"] = server["errors"]
                      server_stats = list(stats.values())
                      for stat in server_stats:
                          stat["satisfying_requests"] = (
-                                 stat["requests"]
-                                 - stat["errors"]
-                                 - stat["frustrating_requests"]
-                                 - stat["tolerated_requests"]
                          )
                          if stat["satisfying_requests"] < 0:
                              stat["satisfying_requests"] = 0
                          if stat["requests"]:
                              stat["avg_response_time"] = round(
                                  stat["response_time"] / stat["requests"], 3
                              )
                              qual_requests = (
-                                     stat["satisfying_requests"] + stat["tolerated_requests"] / 2.0
                              )
                              stat["apdex"] = round((qual_requests / stat["requests"]) * 100, 2)
                              stat["rpm"] = round(stat["requests"] / stat["total_minutes"], 2)
                      return sorted(server_stats, key=lambda x: x["name"])

backend/src/appenlight/models/services/slow_call.py

0 +5 -6

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.report import Report
              from appenlight.models.services.base import BaseService
              from appenlight.lib.utils import es_index_name_limiter
              class SlowCallService(BaseService):
                  @classmethod
                  def get_time_consuming_calls(cls, request, filter_settings, db_session=None):
                      db_session = get_db_session(db_session)
                      # get slow calls from older partitions too
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["slow_calls"],
                      )
                      if index_names and filter_settings["resource"]:
                          # get longest time taking hashes
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "duration": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.duration.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.duration.numeric_values"}
                                              },
                                          },
                                          "total": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "value_count": {
                                                          "field": "tags.statement_hash.values.keyword"
                                                      }
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.statement_hash.values"}
                                              },
                                          },
                                      },
                                      "terms": {
                                          "field": "tags.statement_hash.values.keyword",
                                          "order": {"duration>sub_agg": "desc"},
                                          "size": 15,
                                      },
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          results = result["aggregations"]["parent_agg"]["buckets"]
                      else:
                          return []
                      hashes = [i["key"] for i in results]
                      # get queries associated with hashes
                      calls_query = {
                          "aggs": {
                              "top_calls": {
-                                 "terms": {"field": "tags.statement_hash.values.keyword", "size": 15},
+                                 "terms": {
+                                     "field": "tags.statement_hash.values.keyword",
+                                     "size": 15,
+                                 },
                                  "aggs": {
                                      "top_calls_hits": {
                                          "top_hits": {"sort": {"timestamp": "desc"}, "size": 5}
                                      }
                                  },
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
+                                     {
-                                         "terms": {
-                                             "resource_id": [filter_settings["resource"][0]]
+                                         }
-                                     },
+                                     {"terms": {"resource_id": [filter_settings["resource"][0]]}},
                                      {"terms": {"tags.statement_hash.values": hashes}},
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                  ]
                              }
                          },
                      }
                      calls = Datastores.es.search(
                          body=calls_query, index=index_names, doc_type="log", size=0
                      )
                      call_results = {}
                      report_ids = []
                      for call in calls["aggregations"]["top_calls"]["buckets"]:
                          hits = call["top_calls_hits"]["hits"]["hits"]
                          call_results[call["key"]] = [i["_source"] for i in hits]
                          report_ids.extend(
                              [i["_source"]["tags"]["report_id"]["values"] for i in hits]
                          )
                      if report_ids:
                          r_query = db_session.query(Report.group_id, Report.id)
                          r_query = r_query.filter(Report.id.in_(report_ids))
                          r_query = r_query.filter(Report.start_time >= filter_settings["start_date"])
                      else:
                          r_query = []
                      reports_reversed = {}
                      for report in r_query:
                          reports_reversed[report.id] = report.group_id
                      final_results = []
                      for item in results:
                          if item["key"] not in call_results:
                              continue
                          call = call_results[item["key"]][0]
                          row = {
                              "occurences": item["total"]["sub_agg"]["value"],
                              "total_duration": round(item["duration"]["sub_agg"]["value"]),
                              "statement": call["message"],
                              "statement_type": call["tags"]["type"]["values"],
                              "statement_subtype": call["tags"]["subtype"]["values"],
                              "statement_hash": item["key"],
                              "latest_details": [],
                          }
                          if row["statement_type"] in ["tmpl", " remote"]:
                              params = (
                                  call["tags"]["parameters"]["values"]
                                  if "parameters" in call["tags"]
                                  else ""
                              )
                              row["statement"] = "{} ({})".format(call["message"], params)
                          for call in call_results[item["key"]]:
                              report_id = call["tags"]["report_id"]["values"]
                              group_id = reports_reversed.get(report_id)
                              if group_id:
                                  row["latest_details"].append(
                                      {"group_id": group_id, "report_id": report_id}
                                  )
                          final_results.append(row)
                      return final_results

backend/src/appenlight/scripts/reindex_elasticsearch.py

0 +33 -46

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import argparse
              import datetime
              import logging
              import copy
              import sqlalchemy as sa
              import elasticsearch.exceptions
              import elasticsearch.helpers
              from collections import defaultdict
              from pyramid.paster import setup_logging
              from pyramid.paster import bootstrap
              from appenlight.models import DBSession, Datastores, metadata
              from appenlight.lib import get_callable
              from appenlight.models.report_group import ReportGroup
              from appenlight.models.report import Report
              from appenlight.models.report_stat import ReportStat
              from appenlight.models.log import Log
              from appenlight.models.slow_call import SlowCall
              from appenlight.models.metric import Metric
              log = logging.getLogger(__name__)
              tables = {
                  "slow_calls_p_": [],
                  "reports_stats_p_": [],
                  "reports_p_": [],
                  "reports_groups_p_": [],
                  "logs_p_": [],
                  "metrics_p_": [],
              }
              def detect_tables(table_prefix):
                  found_tables = []
                  db_tables_query = """
                  SELECT tablename FROM pg_tables WHERE tablename NOT LIKE 'pg_%' AND
                  tablename NOT LIKE 'sql_%' ORDER BY tablename ASC;"""
                  for table in DBSession.execute(db_tables_query).fetchall():
                      tablename = table.tablename
                      if tablename.startswith(table_prefix):
                          t = sa.Table(
                              tablename, metadata, autoload=True, autoload_with=DBSession.bind.engine
                          )
                          found_tables.append(t)
                  return found_tables
              def main():
                  """
                  Recreates Elasticsearch indexes
                  Performs reindex of whole db to Elasticsearch
                  """
                  # need parser twice because we first need to load ini file
                  # bootstrap pyramid and then load plugins
                  pre_parser = argparse.ArgumentParser(
                      description="Reindex AppEnlight data", add_help=False
                  )
                  pre_parser.add_argument(
                      "-c", "--config", required=True, help="Configuration ini file of application"
                  )
                  pre_parser.add_argument("-h", "--help", help="Show help", nargs="?")
                  pre_parser.add_argument(
                      "-t", "--types", nargs="+", help="Which parts of database should get reindexed"
                  )
                  args = pre_parser.parse_args()
                  config_uri = args.config
                  setup_logging(config_uri)
                  log.setLevel(logging.INFO)
                  env = bootstrap(config_uri)
                  parser = argparse.ArgumentParser(description="Reindex AppEnlight data")
                  choices = {
                      "reports": "appenlight.scripts.reindex_elasticsearch:reindex_reports",
                      "logs": "appenlight.scripts.reindex_elasticsearch:reindex_logs",
                      "metrics": "appenlight.scripts.reindex_elasticsearch:reindex_metrics",
                      "slow_calls": "appenlight.scripts.reindex_elasticsearch:reindex_slow_calls",
                      "template": "appenlight.scripts.reindex_elasticsearch:update_template",
                  }
                  for k, v in env["registry"].appenlight_plugins.items():
                      if v.get("fulltext_indexer"):
                          choices[k] = v["fulltext_indexer"]
                  parser.add_argument(
                      "-t",
                      "--types",
                      nargs="*",
                      choices=["all"] + list(choices.keys()),
                      default=[],
                      help="Which parts of database should get reindexed",
                  )
                  parser.add_argument(
                      "-c", "--config", required=True, help="Configuration ini file of application"
                  )
                  args = parser.parse_args()
                  if "all" in args.types:
                      args.types = list(choices.keys())
                  print("Selected types to reindex: {}".format(args.types))
                  log.info("settings {}".format(args.types))
                  if "template" in args.types:
                      get_callable(choices["template"])()
                      args.types.remove("template")
                  for selected in args.types:
                      get_callable(choices[selected])()
              def update_template():
                  try:
                      Datastores.es.indices.delete_template("rcae_reports")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  try:
                      Datastores.es.indices.delete_template("rcae_logs")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  try:
                      Datastores.es.indices.delete_template("rcae_slow_calls")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  try:
                      Datastores.es.indices.delete_template("rcae_metrics")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("updating elasticsearch template")
                  tag_templates = [
                      {
                          "values": {
                              "path_match": "tags.*",
                              "mapping": {
                                  "type": "object",
                                  "properties": {
-                                     "values": {"type": "text", "analyzer": "tag_value",
+                                     "values": {
+                                         "type": "text",
+                                         "analyzer": "tag_value",
-                                                "fields": {
-                                                    "keyword": {
-                                                        "type": "keyword",
-                                                        "ignore_above": 256
+                                                    }
-                                                }},
+                                             "keyword": {"type": "keyword", "ignore_above": 256}
+                                         },
+                                     },
                                      "numeric_values": {"type": "float"},
                                  },
                              },
                          }
                      }
                  ]
                  shared_analysis = {
                      "analyzer": {
                          "url_path": {
                              "type": "custom",
                              "char_filter": [],
                              "tokenizer": "path_hierarchy",
                              "filter": [],
                          },
                          "tag_value": {
                              "type": "custom",
                              "char_filter": [],
                              "tokenizer": "keyword",
                              "filter": ["lowercase"],
                          },
                      }
                  }
                  shared_log_mapping = {
                      "_all": {"enabled": False},
                      "dynamic_templates": tag_templates,
                      "properties": {
                          "pg_id": {"type": "keyword", "index": True},
                          "delete_hash": {"type": "keyword", "index": True},
                          "resource_id": {"type": "integer"},
                          "timestamp": {"type": "date"},
                          "permanent": {"type": "boolean"},
                          "request_id": {"type": "keyword", "index": True},
                          "log_level": {"type": "text", "analyzer": "simple"},
                          "message": {"type": "text", "analyzer": "simple"},
                          "namespace": {
                              "type": "text",
                              "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                          },
                          "tags": {"type": "object"},
-                         "tag_list": {"type": "text", "analyzer": "tag_value",
-                                      "fields": {
-                                          "keyword": {
-                                              "type": "keyword",
-                                              "ignore_above": 256
+                                          }
-                                      }},
+                         "tag_list": {
+                             "type": "text",
+                             "analyzer": "tag_value",
+                             "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
+                         },
                      },
                  }
                  report_schema = {
                      "template": "rcae_r_*",
                      "settings": {
                          "index": {
                              "refresh_interval": "5s",
-                             "translog": {"sync_interval": "5s", "durability": "async"}
+                             "translog": {"sync_interval": "5s", "durability": "async"},
                          },
                          "number_of_shards": 5,
                          "analysis": shared_analysis,
                      },
                      "mappings": {
                          "report": {
                              "_all": {"enabled": False},
                              "dynamic_templates": tag_templates,
                              "properties": {
                                  "type": {"type": "keyword", "index": True},
                                  # report group
                                  "group_id": {"type": "keyword", "index": True},
                                  "resource_id": {"type": "integer"},
                                  "priority": {"type": "integer"},
                                  "error": {"type": "text", "analyzer": "simple"},
                                  "read": {"type": "boolean"},
                                  "occurences": {"type": "integer"},
                                  "fixed": {"type": "boolean"},
                                  "first_timestamp": {"type": "date"},
                                  "last_timestamp": {"type": "date"},
                                  "average_duration": {"type": "float"},
                                  "summed_duration": {"type": "float"},
                                  "public": {"type": "boolean"},
                                  # report
                                  "report_id": {"type": "keyword", "index": True},
                                  "http_status": {"type": "integer"},
                                  "ip": {"type": "keyword", "index": True},
                                  "url_domain": {"type": "text", "analyzer": "simple"},
                                  "url_path": {"type": "text", "analyzer": "url_path"},
                                  "report_type": {"type": "integer"},
                                  "start_time": {"type": "date"},
                                  "request_id": {"type": "keyword", "index": True},
                                  "end_time": {"type": "date"},
                                  "duration": {"type": "float"},
                                  "tags": {"type": "object"},
-                                 "tag_list": {"type": "text", "analyzer": "tag_value",
-                                              "fields": {
-                                                  "keyword": {
-                                                      "type": "keyword",
-                                                      "ignore_above": 256
+                                                  }
-                                              }},
+                                 "tag_list": {
+                                     "type": "text",
+                                     "analyzer": "tag_value",
+                                     "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
+                                 },
                                  "extra": {"type": "object"},
                                  # report stats
                                  "report_stat_id": {"type": "keyword", "index": True},
                                  "timestamp": {"type": "date"},
                                  "permanent": {"type": "boolean"},
                                  "log_level": {"type": "text", "analyzer": "simple"},
                                  "message": {"type": "text", "analyzer": "simple"},
                                  "namespace": {
                                      "type": "text",
                                      "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                                  },
                                  "join_field": {
                                      "type": "join",
-                                     "relations": {
-                                         "report_group": ["report", "report_stat"]
+                                     }
+                                 }
+                                     "relations": {"report_group": ["report", "report_stat"]},
+                                 },
                              },
                          }
+                     }
+                     },
                  }
                  Datastores.es.indices.put_template("rcae_reports", body=report_schema)
                  logs_mapping = copy.deepcopy(shared_log_mapping)
                  logs_mapping["properties"]["log_id"] = logs_mapping["properties"]["pg_id"]
                  del logs_mapping["properties"]["pg_id"]
                  log_template = {
                      "template": "rcae_l_*",
                      "settings": {
                          "index": {
                              "refresh_interval": "5s",
                              "translog": {"sync_interval": "5s", "durability": "async"},
                          },
                          "number_of_shards": 5,
                          "analysis": shared_analysis,
                      },
-                     "mappings": {
-                         "log": logs_mapping,
-                     },
+                     "mappings": {"log": logs_mapping},
                  }
                  Datastores.es.indices.put_template("rcae_logs", body=log_template)
                  slow_call_mapping = copy.deepcopy(shared_log_mapping)
-                 slow_call_mapping["properties"]["slow_call_id"] = slow_call_mapping["properties"]["pg_id"]
+                 slow_call_mapping["properties"]["slow_call_id"] = slow_call_mapping["properties"][
+                     "pg_id"
+                 ]
                  del slow_call_mapping["properties"]["pg_id"]
                  slow_call_template = {
                      "template": "rcae_sc_*",
                      "settings": {
                          "index": {
                              "refresh_interval": "5s",
                              "translog": {"sync_interval": "5s", "durability": "async"},
                          },
                          "number_of_shards": 5,
                          "analysis": shared_analysis,
                      },
-                     "mappings": {
-                         "log": slow_call_mapping,
-                     },
+                     "mappings": {"log": slow_call_mapping},
                  }
                  Datastores.es.indices.put_template("rcae_slow_calls", body=slow_call_template)
                  metric_mapping = copy.deepcopy(shared_log_mapping)
                  metric_mapping["properties"]["metric_id"] = metric_mapping["properties"]["pg_id"]
                  del metric_mapping["properties"]["pg_id"]
                  metrics_template = {
                      "template": "rcae_m_*",
                      "settings": {
                          "index": {
                              "refresh_interval": "5s",
                              "translog": {"sync_interval": "5s", "durability": "async"},
                          },
                          "number_of_shards": 5,
                          "analysis": shared_analysis,
                      },
-                     "mappings": {
-                         "log": metric_mapping,
-                     },
+                     "mappings": {"log": metric_mapping},
                  }
                  Datastores.es.indices.put_template("rcae_metrics", body=metrics_template)
                  uptime_metric_mapping = copy.deepcopy(shared_log_mapping)
-                 uptime_metric_mapping["properties"]["uptime_id"] = uptime_metric_mapping["properties"]["pg_id"]
+                 uptime_metric_mapping["properties"]["uptime_id"] = uptime_metric_mapping[
+                     "properties"
+                 ]["pg_id"]
                  del uptime_metric_mapping["properties"]["pg_id"]
                  uptime_metrics_template = {
                      "template": "rcae_uptime_ce_*",
                      "settings": {
                          "index": {
                              "refresh_interval": "5s",
                              "translog": {"sync_interval": "5s", "durability": "async"},
                          },
                          "number_of_shards": 5,
                          "analysis": shared_analysis,
                      },
-                     "mappings": {
-                         "log": shared_log_mapping,
-                     },
+                     "mappings": {"log": shared_log_mapping},
                  }
-                 Datastores.es.indices.put_template("rcae_uptime_metrics", body=uptime_metrics_template)
+                 Datastores.es.indices.put_template(
+                     "rcae_uptime_metrics", body=uptime_metrics_template
+                 )
              def reindex_reports():
                  reports_groups_tables = detect_tables("reports_groups_p_")
                  try:
                      Datastores.es.indices.delete("`rcae_r_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("reindexing report groups")
                  i = 0
                  task_start = datetime.datetime.now()
                  for partition_table in reports_groups_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = ReportGroup(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "report"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
                  i = 0
                  log.info("reindexing reports")
                  task_start = datetime.datetime.now()
                  reports_tables = detect_tables("reports_p_")
                  for partition_table in reports_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = Report(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "report"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
                  log.info("reindexing reports stats")
                  i = 0
                  task_start = datetime.datetime.now()
                  reports_stats_tables = detect_tables("reports_stats_p_")
                  for partition_table in reports_stats_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              rd = dict(list(row.items()))
                              # remove legacy columns
                              # TODO: remove the column later
                              rd.pop("size", None)
                              item = ReportStat(**rd)
                              i += 1
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "report"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              def reindex_logs():
                  try:
                      Datastores.es.indices.delete("rcae_l_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  # logs
                  log.info("reindexing logs")
                  i = 0
                  task_start = datetime.datetime.now()
                  log_tables = detect_tables("logs_p_")
                  for partition_table in log_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = Log(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "log"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              def reindex_metrics():
                  try:
                      Datastores.es.indices.delete("rcae_m_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("reindexing applications metrics")
                  i = 0
                  task_start = datetime.datetime.now()
                  metric_tables = detect_tables("metrics_p_")
                  for partition_table in metric_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = Metric(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "log"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              def reindex_slow_calls():
                  try:
                      Datastores.es.indices.delete("rcae_sc_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("reindexing slow calls")
                  i = 0
                  task_start = datetime.datetime.now()
                  slow_calls_tables = detect_tables("slow_calls_p_")
                  for partition_table in slow_calls_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = SlowCall(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "log"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              if __name__ == "__main__":
                  main()

backend/src/appenlight/views/logs.py

0 +3 -15

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import logging
              from datetime import datetime, timedelta
              from pyramid.view import view_config
              from pyramid.httpexceptions import HTTPUnprocessableEntity
              from appenlight.models import Datastores, Log
              from appenlight.models.services.log import LogService
              from appenlight.lib.utils import (
                  build_filter_settings_from_query_dict,
                  es_index_name_limiter,
              )
              from appenlight.lib.helpers import gen_pagination_headers
              from appenlight.celery.tasks import logs_cleanup
              log = logging.getLogger(__name__)
              section_filters_key = "appenlight:logs:filter:%s"
              @view_config(route_name="logs_no_id", renderer="json", permission="authenticated")
              def fetch_logs(request):
                  """
                  Returns list of log entries from Elasticsearch
                  """
                  filter_settings = build_filter_settings_from_query_dict(
                      request, request.GET.mixed()
                  )
                  logs_paginator = LogService.get_paginator_by_app_ids(
                      app_ids=filter_settings["resource"],
                      page=filter_settings["page"],
                      filter_settings=filter_settings,
                  )
                  headers = gen_pagination_headers(request, logs_paginator)
                  request.response.headers.update(headers)
                  return [l.get_dict() for l in logs_paginator.sa_items]
              @view_config(
                  route_name="section_view",
                  match_param=["section=logs_section", "view=fetch_series"],
                  renderer="json",
                  permission="authenticated",
              )
              def logs_fetch_series(request):
                  """
                  Handles metric dashboard graphs
                  Returns information for time/tier breakdown
                  """
                  filter_settings = build_filter_settings_from_query_dict(
                      request, request.GET.mixed()
                  )
                  paginator = LogService.get_paginator_by_app_ids(
                      app_ids=filter_settings["resource"],
                      page=1,
                      filter_settings=filter_settings,
                      items_per_page=1,
                  )
                  now = datetime.utcnow().replace(microsecond=0, second=0)
                  delta = timedelta(days=7)
                  if paginator.sa_items:
                      start_date = paginator.sa_items[-1].timestamp.replace(microsecond=0, second=0)
                      filter_settings["start_date"] = start_date - delta
                  else:
                      filter_settings["start_date"] = now - delta
                  filter_settings["end_date"] = filter_settings["start_date"] + timedelta(days=7)
                  @request.registry.cache_regions.redis_sec_30.cache_on_arguments("logs_graphs")
                  def cached(apps, search_params, delta, now):
                      data = LogService.get_time_series_aggregate(
                          filter_settings["resource"], filter_settings
                      )
                      if not data:
                          return []
                      buckets = data["aggregations"]["events_over_time"]["buckets"]
                      return [
                          {
                              "x": datetime.utcfromtimestamp(item["key"] / 1000),
                              "logs": item["doc_count"],
                          }
                          for item in buckets
                      ]
                  return cached(filter_settings, request.GET.mixed(), delta, now)
              @view_config(
                  route_name="logs_no_id",
                  renderer="json",
                  request_method="DELETE",
                  permission="authenticated",
              )
              def logs_mass_delete(request):
                  params = request.GET.mixed()
                  if "resource" not in params:
                      raise HTTPUnprocessableEntity()
                  # this might be '' and then colander will not validate the schema
                  if not params.get("namespace"):
                      params.pop("namespace", None)
                  filter_settings = build_filter_settings_from_query_dict(
                      request, params, resource_permissions=["update_reports"]
                  )
                  resource_id = list(filter_settings["resource"])[0]
                  # filter settings returns list of all of users applications
                  # if app is not matching - normally we would not care as its used for search
                  # but here user playing with params would possibly wipe out their whole data
                  if int(resource_id) != int(params["resource"]):
                      raise HTTPUnprocessableEntity()
                  logs_cleanup.delay(resource_id, filter_settings)
                  msg = (
                      "Log cleanup process started - it may take a while for "
                      "everything to get removed"
                  )
                  request.session.flash(msg)
                  return {}
              @view_config(
                  route_name="section_view",
                  match_param=("view=common_tags", "section=logs_section"),
                  renderer="json",
                  permission="authenticated",
              )
              def common_tags(request):
                  config = request.GET.mixed()
                  filter_settings = build_filter_settings_from_query_dict(request, config)
                  resources = list(filter_settings["resource"])
                  query = {
-                     "query": {
-                         "bool": {
-                             "filter": [{"terms": {"resource_id": list(resources)}}]
+                         }
+                     }
+                     "query": {"bool": {"filter": [{"terms": {"resource_id": list(resources)}}]}}
                  }
                  start_date = filter_settings.get("start_date")
                  end_date = filter_settings.get("end_date")
                  filter_part = query["query"]["bool"]["filter"]
                  date_range = {"range": {"timestamp": {}}}
                  if start_date:
                      date_range["range"]["timestamp"]["gte"] = start_date
                  if end_date:
                      date_range["range"]["timestamp"]["lte"] = end_date
                  if start_date or end_date:
                      filter_part.append(date_range)
                  levels = filter_settings.get("level")
                  if levels:
                      filter_part.append({"terms": {"log_level": levels}})
                  namespaces = filter_settings.get("namespace")
                  if namespaces:
                      filter_part.append({"terms": {"namespace": namespaces}})
                  query["aggs"] = {"sub_agg": {"terms": {"field": "tag_list.keyword", "size": 50}}}
                  # tags
                  index_names = es_index_name_limiter(ixtypes=[config.get("datasource", "logs")])
                  result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0)
                  tag_buckets = result["aggregations"]["sub_agg"].get("buckets", [])
                  # namespaces
                  query["aggs"] = {"sub_agg": {"terms": {"field": "namespace.keyword", "size": 50}}}
                  result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0)
                  namespaces_buckets = result["aggregations"]["sub_agg"].get("buckets", [])
                  return {
                      "tags": [item["key"] for item in tag_buckets],
                      "namespaces": [item["key"] for item in namespaces_buckets],
                  }
              @view_config(
                  route_name="section_view",
                  match_param=("view=common_values", "section=logs_section"),
                  renderer="json",
                  permission="authenticated",
              )
              def common_values(request):
                  config = request.GET.mixed()
                  datasource = config.pop("datasource", "logs")
                  filter_settings = build_filter_settings_from_query_dict(request, config)
                  resources = list(filter_settings["resource"])
                  tag_name = filter_settings["tags"][0]["value"][0]
-                 and_part = [
-                     {"terms": {"resource_id": list(resources)}},
+                 ]
+                 and_part = [{"terms": {"resource_id": list(resources)}}]
                  if filter_settings["namespace"]:
                      and_part.append({"terms": {"namespace": filter_settings["namespace"]}})
-                 query = {
-                     "query": {
-                         "bool": {
-                             "filter": and_part
+                         }
+                     }
+                 }
+                 query = {"query": {"bool": {"filter": and_part}}}
                  query["aggs"] = {
                      "sub_agg": {"terms": {"field": "tags.{}.values".format(tag_name), "size": 50}}
                  }
                  index_names = es_index_name_limiter(ixtypes=[datasource])
                  result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0)
                  values_buckets = result["aggregations"]["sub_agg"].get("buckets", [])
                  return {"values": [item["key"] for item in values_buckets]}

General Comments 4

vaingmuny

|

Auto status change to "Under Review"

vaingmuny

|

Auto status change to "Under Review"

Write
Preview

You need to be logged in to leave comments. Login now

		Auto status change to \|new_status\|...
		rgrtg
		Hi
		Auto status change to \|new_status\|...

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages