appenlight Commit - r168:b242e5b8 · RhodeCode Free Hosting

elasticsearch: move to single doctype indices

ergo -

r168:b242e5b8

parent child

backend/src/appenlight/celery/tasks.py

0 +3 -3

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import bisect
              import collections
              import math
              from datetime import datetime, timedelta
              import sqlalchemy as sa
              import elasticsearch.exceptions
              import elasticsearch.helpers
              from celery.utils.log import get_task_logger
              from zope.sqlalchemy import mark_changed
              from pyramid.threadlocal import get_current_request, get_current_registry
              from ziggurat_foundations.models.services.resource import ResourceService
              from appenlight.celery import celery
              from appenlight.models.report_group import ReportGroup
              from appenlight.models import DBSession, Datastores
              from appenlight.models.report import Report
              from appenlight.models.log import Log
              from appenlight.models.metric import Metric
              from appenlight.models.event import Event
              from appenlight.models.services.application import ApplicationService
              from appenlight.models.services.event import EventService
              from appenlight.models.services.log import LogService
              from appenlight.models.services.report import ReportService
              from appenlight.models.services.report_group import ReportGroupService
              from appenlight.models.services.user import UserService
              from appenlight.models.tag import Tag
              from appenlight.lib import print_traceback
              from appenlight.lib.utils import parse_proto, in_batches
              from appenlight.lib.ext_json import json
              from appenlight.lib.redis_keys import REDIS_KEYS
              from appenlight.lib.enums import ReportType
              log = get_task_logger(__name__)
              sample_boundries = (
                  list(range(100, 1000, 100))
                  + list(range(1000, 10000, 1000))
                  + list(range(10000, 100000, 5000))
              )
              def pick_sample(total_occurences, report_type=None):
                  every = 1.0
                  position = bisect.bisect_left(sample_boundries, total_occurences)
                  if position > 0:
                      if report_type == ReportType.not_found:
                          divide = 10.0
                      else:
                          divide = 100.0
                      every = sample_boundries[position - 1] / divide
                  return total_occurences % every == 0
              @celery.task(queue="default", default_retry_delay=1, max_retries=2)
              def test_exception_task():
                  log.error("test celery log", extra={"location": "celery"})
                  log.warning("test celery log", extra={"location": "celery"})
                  raise Exception("Celery exception test")
              @celery.task(queue="default", default_retry_delay=1, max_retries=2)
              def test_retry_exception_task():
                  try:
                      import time
                      time.sleep(1.3)
                      log.error("test retry celery log", extra={"location": "celery"})
                      log.warning("test retry celery log", extra={"location": "celery"})
                      raise Exception("Celery exception test")
                  except Exception as exc:
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      test_retry_exception_task.retry(exc=exc)
              @celery.task(queue="reports", default_retry_delay=600, max_retries=144)
              def add_reports(resource_id, request_params, dataset, **kwargs):
                  proto_version = parse_proto(request_params.get("protocol_version", ""))
                  current_time = datetime.utcnow().replace(second=0, microsecond=0)
                  try:
                      # we will store solr docs here for single insert
                      es_report_docs = {}
                      es_report_group_docs = {}
                      resource = ApplicationService.by_id(resource_id)
                      tags = []
                      es_slow_calls_docs = {}
                      es_reports_stats_rows = {}
                      for report_data in dataset:
                          # build report details for later
                          added_details = 0
                          report = Report()
                          report.set_data(report_data, resource, proto_version)
                          report._skip_ft_index = True
                          # find latest group in this months partition
                          report_group = ReportGroupService.by_hash_and_resource(
                              report.resource_id,
                              report.grouping_hash,
                              since_when=datetime.utcnow().date().replace(day=1),
                          )
                          occurences = report_data.get("occurences", 1)
                          if not report_group:
                              # total reports will be +1 moment later
                              report_group = ReportGroup(
                                  grouping_hash=report.grouping_hash,
                                  occurences=0,
                                  total_reports=0,
                                  last_report=0,
                                  priority=report.priority,
                                  error=report.error,
                                  first_timestamp=report.start_time,
                              )
                              report_group._skip_ft_index = True
                              report_group.report_type = report.report_type
                          report.report_group_time = report_group.first_timestamp
                          add_sample = pick_sample(
                              report_group.occurences, report_type=report_group.report_type
                          )
                          if add_sample:
                              resource.report_groups.append(report_group)
                              report_group.reports.append(report)
                              added_details += 1
                              DBSession.flush()
                              if report.partition_id not in es_report_docs:
                                  es_report_docs[report.partition_id] = []
                              es_report_docs[report.partition_id].append(report.es_doc())
                              tags.extend(list(report.tags.items()))
                              slow_calls = report.add_slow_calls(report_data, report_group)
                              DBSession.flush()
                              for s_call in slow_calls:
                                  if s_call.partition_id not in es_slow_calls_docs:
                                      es_slow_calls_docs[s_call.partition_id] = []
                                  es_slow_calls_docs[s_call.partition_id].append(s_call.es_doc())
                                  # try generating new stat rows if needed
                          else:
                              # required for postprocessing to not fail later
                              report.report_group = report_group
                          stat_row = ReportService.generate_stat_rows(report, resource, report_group)
                          if stat_row.partition_id not in es_reports_stats_rows:
                              es_reports_stats_rows[stat_row.partition_id] = []
                          es_reports_stats_rows[stat_row.partition_id].append(stat_row.es_doc())
                          # see if we should mark 10th occurence of report
                          last_occurences_10 = int(math.floor(report_group.occurences / 10))
                          curr_occurences_10 = int(
                              math.floor((report_group.occurences + report.occurences) / 10)
                          )
                          last_occurences_100 = int(math.floor(report_group.occurences / 100))
                          curr_occurences_100 = int(
                              math.floor((report_group.occurences + report.occurences) / 100)
                          )
                          notify_occurences_10 = last_occurences_10 != curr_occurences_10
                          notify_occurences_100 = last_occurences_100 != curr_occurences_100
                          report_group.occurences = ReportGroup.occurences + occurences
                          report_group.last_timestamp = report.start_time
                          report_group.summed_duration = ReportGroup.summed_duration + report.duration
                          summed_duration = ReportGroup.summed_duration + report.duration
                          summed_occurences = ReportGroup.occurences + occurences
                          report_group.average_duration = summed_duration / summed_occurences
                          report_group.run_postprocessing(report)
                          if added_details:
                              report_group.total_reports = ReportGroup.total_reports + 1
                              report_group.last_report = report.id
                          report_group.set_notification_info(
                              notify_10=notify_occurences_10, notify_100=notify_occurences_100
                          )
                          DBSession.flush()
                          report_group.get_report().notify_channel(report_group)
                          if report_group.partition_id not in es_report_group_docs:
                              es_report_group_docs[report_group.partition_id] = []
                          es_report_group_docs[report_group.partition_id].append(
                              report_group.es_doc()
                          )
                          action = "REPORT"
                          log_msg = "%s: %s %s, client: %s, proto: %s" % (
                              action,
                              report_data.get("http_status", "unknown"),
                              str(resource),
                              report_data.get("client"),
                              proto_version,
                          )
                          log.info(log_msg)
                      total_reports = len(dataset)
                      redis_pipeline = Datastores.redis.pipeline(transaction=False)
                      key = REDIS_KEYS["counters"]["reports_per_minute"].format(current_time)
                      redis_pipeline.incr(key, total_reports)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                          resource.owner_user_id, current_time
                      )
                      redis_pipeline.incr(key, total_reports)
                      redis_pipeline.expire(key, 3600)
                      key = REDIS_KEYS["counters"]["reports_per_hour_per_app"].format(
                          resource_id, current_time.replace(minute=0)
                      )
                      redis_pipeline.incr(key, total_reports)
                      redis_pipeline.expire(key, 3600 * 24 * 7)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                              current_time.replace(minute=0)
                          ),
                          resource_id,
                      )
                      redis_pipeline.execute()
                      add_reports_es(es_report_group_docs, es_report_docs)
                      add_reports_slow_calls_es(es_slow_calls_docs)
                      add_reports_stats_rows_es(es_reports_stats_rows)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      add_reports.retry(exc=exc)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_reports_es(report_group_docs, report_docs):
                  for k, v in report_group_docs.items():
-                     to_update = {"_index": k, "_type": "report_group"}
+                     to_update = {"_index": k, "_type": "report"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
                  for k, v in report_docs.items():
                      to_update = {"_index": k, "_type": "report"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_reports_slow_calls_es(es_docs):
                  for k, v in es_docs.items():
                      to_update = {"_index": k, "_type": "log"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_reports_stats_rows_es(es_docs):
                  for k, v in es_docs.items():
-                     to_update = {"_index": k, "_type": "log"}
+                     to_update = {"_index": k, "_type": "report"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="logs", default_retry_delay=600, max_retries=144)
              def add_logs(resource_id, request_params, dataset, **kwargs):
                  proto_version = request_params.get("protocol_version")
                  current_time = datetime.utcnow().replace(second=0, microsecond=0)
                  try:
                      es_docs = collections.defaultdict(list)
                      resource = ApplicationService.by_id_cached()(resource_id)
                      resource = DBSession.merge(resource, load=False)
                      ns_pairs = []
                      for entry in dataset:
                          # gather pk and ns so we can remove older versions of row later
                          if entry["primary_key"] is not None:
                              ns_pairs.append({"pk": entry["primary_key"], "ns": entry["namespace"]})
                          log_entry = Log()
                          log_entry.set_data(entry, resource=resource)
                          log_entry._skip_ft_index = True
                          resource.logs.append(log_entry)
                          DBSession.flush()
                          # insert non pk rows first
                          if entry["primary_key"] is None:
                              es_docs[log_entry.partition_id].append(log_entry.es_doc())
-                     # 2nd pass to delete all log entries from db foe same pk/ns pair
+                     # 2nd pass to delete all log entries from db for same pk/ns pair
                      if ns_pairs:
                          ids_to_delete = []
                          es_docs = collections.defaultdict(list)
                          es_docs_to_delete = collections.defaultdict(list)
                          found_pkey_logs = LogService.query_by_primary_key_and_namespace(
                              list_of_pairs=ns_pairs
                          )
                          log_dict = {}
                          for log_entry in found_pkey_logs:
                              log_key = (log_entry.primary_key, log_entry.namespace)
                              if log_key not in log_dict:
                                  log_dict[log_key] = []
                              log_dict[log_key].append(log_entry)
                          for ns, entry_list in log_dict.items():
                              entry_list = sorted(entry_list, key=lambda x: x.timestamp)
                              # newest row needs to be indexed in es
                              log_entry = entry_list[-1]
                              # delete everything from pg and ES, leave the last row in pg
                              for e in entry_list[:-1]:
                                  ids_to_delete.append(e.log_id)
                                  es_docs_to_delete[e.partition_id].append(e.delete_hash)
                              es_docs_to_delete[log_entry.partition_id].append(log_entry.delete_hash)
                              es_docs[log_entry.partition_id].append(log_entry.es_doc())
                          if ids_to_delete:
                              query = DBSession.query(Log).filter(Log.log_id.in_(ids_to_delete))
                              query.delete(synchronize_session=False)
                          if es_docs_to_delete:
                              # batch this to avoid problems with default ES bulk limits
                              for es_index in es_docs_to_delete.keys():
                                  for batch in in_batches(es_docs_to_delete[es_index], 20):
                                      query = {"query": {"terms": {"delete_hash": batch}}}
                                      try:
                                          Datastores.es.delete_by_query(
                                              index=es_index, doc_type="log",
                                              body=query, conflicts="proceed"
                                          )
                                      except elasticsearch.exceptions.NotFoundError as exc:
                                          msg = "skipping index {}".format(es_index)
                                          log.info(msg)
                      total_logs = len(dataset)
                      log_msg = "LOG_NEW: %s, entries: %s, proto:%s" % (
                          str(resource),
                          total_logs,
                          proto_version,
                      )
                      log.info(log_msg)
                      # mark_changed(session)
                      redis_pipeline = Datastores.redis.pipeline(transaction=False)
                      key = REDIS_KEYS["counters"]["logs_per_minute"].format(current_time)
                      redis_pipeline.incr(key, total_logs)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                          resource.owner_user_id, current_time
                      )
                      redis_pipeline.incr(key, total_logs)
                      redis_pipeline.expire(key, 3600)
                      key = REDIS_KEYS["counters"]["logs_per_hour_per_app"].format(
                          resource_id, current_time.replace(minute=0)
                      )
                      redis_pipeline.incr(key, total_logs)
                      redis_pipeline.expire(key, 3600 * 24 * 7)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                              current_time.replace(minute=0)
                          ),
                          resource_id,
                      )
                      redis_pipeline.execute()
                      add_logs_es(es_docs)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      add_logs.retry(exc=exc)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_logs_es(es_docs):
                  for k, v in es_docs.items():
                      to_update = {"_index": k, "_type": "log"}
                      [i.update(to_update) for i in v]
                      elasticsearch.helpers.bulk(Datastores.es, v)
              @celery.task(queue="metrics", default_retry_delay=600, max_retries=144)
              def add_metrics(resource_id, request_params, dataset, proto_version):
                  current_time = datetime.utcnow().replace(second=0, microsecond=0)
                  try:
                      resource = ApplicationService.by_id_cached()(resource_id)
                      resource = DBSession.merge(resource, load=False)
                      es_docs = []
                      rows = []
                      for metric in dataset:
                          tags = dict(metric["tags"])
                          server_n = tags.get("server_name", metric["server_name"]).lower()
                          tags["server_name"] = server_n or "unknown"
                          new_metric = Metric(
                              timestamp=metric["timestamp"],
                              resource_id=resource.resource_id,
                              namespace=metric["namespace"],
                              tags=tags,
                          )
                          rows.append(new_metric)
                          es_docs.append(new_metric.es_doc())
                      session = DBSession()
                      session.bulk_save_objects(rows)
                      session.flush()
                      action = "METRICS"
                      metrics_msg = "%s: %s, metrics: %s, proto:%s" % (
                          action,
                          str(resource),
                          len(dataset),
                          proto_version,
                      )
                      log.info(metrics_msg)
                      mark_changed(session)
                      redis_pipeline = Datastores.redis.pipeline(transaction=False)
                      key = REDIS_KEYS["counters"]["metrics_per_minute"].format(current_time)
                      redis_pipeline.incr(key, len(rows))
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
                          resource.owner_user_id, current_time
                      )
                      redis_pipeline.incr(key, len(rows))
                      redis_pipeline.expire(key, 3600)
                      key = REDIS_KEYS["counters"]["metrics_per_hour_per_app"].format(
                          resource_id, current_time.replace(minute=0)
                      )
                      redis_pipeline.incr(key, len(rows))
                      redis_pipeline.expire(key, 3600 * 24 * 7)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                              current_time.replace(minute=0)
                          ),
                          resource_id,
                      )
                      redis_pipeline.execute()
                      add_metrics_es(es_docs)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      add_metrics.retry(exc=exc)
              @celery.task(queue="es", default_retry_delay=600, max_retries=144)
              def add_metrics_es(es_docs):
                  for doc in es_docs:
                      partition = "rcae_m_%s" % doc["timestamp"].strftime("%Y_%m_%d")
                      Datastores.es.index(partition, "log", doc)
              @celery.task(queue="default", default_retry_delay=5, max_retries=2)
              def check_user_report_notifications(resource_id):
                  since_when = datetime.utcnow()
                  try:
                      request = get_current_request()
                      application = ApplicationService.by_id(resource_id)
                      if not application:
                          return
                      error_key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                          ReportType.error, resource_id
                      )
                      slow_key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                          ReportType.slow, resource_id
                      )
                      error_group_ids = Datastores.redis.smembers(error_key)
                      slow_group_ids = Datastores.redis.smembers(slow_key)
                      Datastores.redis.delete(error_key)
                      Datastores.redis.delete(slow_key)
                      err_gids = [int(g_id) for g_id in error_group_ids]
                      slow_gids = [int(g_id) for g_id in list(slow_group_ids)]
                      group_ids = err_gids + slow_gids
                      occurence_dict = {}
                      for g_id in group_ids:
                          key = REDIS_KEYS["counters"]["report_group_occurences"].format(g_id)
                          val = Datastores.redis.get(key)
                          Datastores.redis.delete(key)
                          if val:
                              occurence_dict[g_id] = int(val)
                          else:
                              occurence_dict[g_id] = 1
                      report_groups = ReportGroupService.by_ids(group_ids)
                      report_groups.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                      ApplicationService.check_for_groups_alert(
                          application,
                          "alert",
                          report_groups=report_groups,
                          occurence_dict=occurence_dict,
                      )
                      users = set(
                          [p.user for p in ResourceService.users_for_perm(application, "view")]
                      )
                      report_groups = report_groups.all()
                      for user in users:
                          UserService.report_notify(
                              user,
                              request,
                              application,
                              report_groups=report_groups,
                              occurence_dict=occurence_dict,
                          )
                      for group in report_groups:
                          # marks report_groups as notified
                          if not group.notified:
                              group.notified = True
                  except Exception as exc:
                      print_traceback(log)
                      raise
              @celery.task(queue="default", default_retry_delay=5, max_retries=2)
              def check_alerts(resource_id):
                  since_when = datetime.utcnow()
                  try:
                      request = get_current_request()
                      application = ApplicationService.by_id(resource_id)
                      if not application:
                          return
                      error_key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                          ReportType.error, resource_id
                      )
                      slow_key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                          ReportType.slow, resource_id
                      )
                      error_group_ids = Datastores.redis.smembers(error_key)
                      slow_group_ids = Datastores.redis.smembers(slow_key)
                      Datastores.redis.delete(error_key)
                      Datastores.redis.delete(slow_key)
                      err_gids = [int(g_id) for g_id in error_group_ids]
                      slow_gids = [int(g_id) for g_id in list(slow_group_ids)]
                      group_ids = err_gids + slow_gids
                      occurence_dict = {}
                      for g_id in group_ids:
                          key = REDIS_KEYS["counters"]["report_group_occurences_alerting"].format(
                              g_id
                          )
                          val = Datastores.redis.get(key)
                          Datastores.redis.delete(key)
                          if val:
                              occurence_dict[g_id] = int(val)
                          else:
                              occurence_dict[g_id] = 1
                      report_groups = ReportGroupService.by_ids(group_ids)
                      report_groups.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                      ApplicationService.check_for_groups_alert(
                          application,
                          "alert",
                          report_groups=report_groups,
                          occurence_dict=occurence_dict,
                          since_when=since_when,
                      )
                  except Exception as exc:
                      print_traceback(log)
                      raise
              @celery.task(queue="default", default_retry_delay=1, max_retries=2)
              def close_alerts():
                  log.warning("Checking alerts")
                  since_when = datetime.utcnow()
                  try:
                      event_types = [
                          Event.types["error_report_alert"],
                          Event.types["slow_report_alert"],
                      ]
                      statuses = [Event.statuses["active"]]
                      # get events older than 5 min
                      events = EventService.by_type_and_status(
                          event_types, statuses, older_than=(since_when - timedelta(minutes=5))
                      )
                      for event in events:
                          # see if we can close them
                          event.validate_or_close(since_when=(since_when - timedelta(minutes=1)))
                  except Exception as exc:
                      print_traceback(log)
                      raise
              @celery.task(queue="default", default_retry_delay=600, max_retries=144)
              def update_tag_counter(tag_name, tag_value, count):
                  try:
                      query = (
                          DBSession.query(Tag)
                          .filter(Tag.name == tag_name)
                          .filter(
                              sa.cast(Tag.value, sa.types.TEXT)
                              == sa.cast(json.dumps(tag_value), sa.types.TEXT)
                          )
                      )
                      query.update(
                          {"times_seen": Tag.times_seen + count, "last_timestamp": datetime.utcnow()},
                          synchronize_session=False,
                      )
                      session = DBSession()
                      mark_changed(session)
                      return True
                  except Exception as exc:
                      print_traceback(log)
                      if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
                          raise
                      update_tag_counter.retry(exc=exc)
              @celery.task(queue="default")
              def update_tag_counters():
                  """
                  Sets task to update counters for application tags
                  """
                  tags = Datastores.redis.lrange(REDIS_KEYS["seen_tag_list"], 0, -1)
                  Datastores.redis.delete(REDIS_KEYS["seen_tag_list"])
                  c = collections.Counter(tags)
                  for t_json, count in c.items():
                      tag_info = json.loads(t_json)
                      update_tag_counter.delay(tag_info[0], tag_info[1], count)
              @celery.task(queue="default")
              def daily_digest():
                  """
                  Sends daily digest with top 50 error reports
                  """
                  request = get_current_request()
                  apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports"])
                  Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports"])
                  since_when = datetime.utcnow() - timedelta(hours=8)
                  log.warning("Generating daily digests")
                  for resource_id in apps:
                      resource_id = resource_id.decode("utf8")
                      end_date = datetime.utcnow().replace(microsecond=0, second=0)
                      filter_settings = {
                          "resource": [resource_id],
                          "tags": [{"name": "type", "value": ["error"], "op": None}],
                          "type": "error",
                          "start_date": since_when,
                          "end_date": end_date,
                      }
                      reports = ReportGroupService.get_trending(
                          request, filter_settings=filter_settings, limit=50
                      )
                      application = ApplicationService.by_id(resource_id)
                      if application:
                          users = set(
                              [p.user for p in ResourceService.users_for_perm(application, "view")]
                          )
                          for user in users:
                              user.send_digest(
                                  request, application, reports=reports, since_when=since_when
                              )
              @celery.task(queue="default")
              def notifications_reports():
                  """
                  Loop that checks redis for info and then issues new tasks to celery to
                  issue notifications
                  """
                  apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports"])
                  Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports"])
                  for app in apps:
                      log.warning("Notify for app: %s" % app)
                      check_user_report_notifications.delay(app.decode("utf8"))
              @celery.task(queue="default")
              def alerting_reports():
                  """
                  Loop that checks redis for info and then issues new tasks to celery to
                  perform the following:
                  - which applications should have new alerts opened
                  """
                  apps = Datastores.redis.smembers(REDIS_KEYS["apps_that_had_reports_alerting"])
                  Datastores.redis.delete(REDIS_KEYS["apps_that_had_reports_alerting"])
                  for app in apps:
                      log.warning("Notify for app: %s" % app)
                      check_alerts.delay(app.decode("utf8"))
              @celery.task(
                  queue="default", soft_time_limit=3600 * 4, hard_time_limit=3600 * 4, max_retries=144
              )
              def logs_cleanup(resource_id, filter_settings):
                  request = get_current_request()
                  request.tm.begin()
                  es_query = {
                      "query": {
                          "bool": {"filter": [{"term": {"resource_id": resource_id}}]}
                      }
                  }
                  query = DBSession.query(Log).filter(Log.resource_id == resource_id)
                  if filter_settings["namespace"]:
                      query = query.filter(Log.namespace == filter_settings["namespace"][0])
                      es_query["query"]["bool"]["filter"].append(
                          {"term": {"namespace": filter_settings["namespace"][0]}}
                      )
                  query.delete(synchronize_session=False)
                  request.tm.commit()
                  Datastores.es.delete_by_query(
                      index="rcae_l_*",  doc_type="log", body=es_query, conflicts="proceed"
                  )

backend/src/appenlight/models/log.py

0 +1 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import sqlalchemy as sa
              import logging
              import hashlib
              from datetime import datetime
              from appenlight.models import Base
              from appenlight.lib.utils import convert_es_type
              from appenlight.lib.enums import LogLevel
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              log = logging.getLogger(__name__)
              class Log(Base, BaseModel):
                  __tablename__ = "logs"
                  __table_args__ = {"implicit_returning": False}
                  log_id = sa.Column(sa.BigInteger(), nullable=False, primary_key=True)
                  resource_id = sa.Column(
                      sa.Integer(),
                      sa.ForeignKey(
                          "applications.resource_id", onupdate="CASCADE", ondelete="CASCADE"
                      ),
                      nullable=False,
                      index=True,
                  )
                  log_level = sa.Column(sa.Unicode, nullable=False, index=True, default="INFO")
                  message = sa.Column(sa.UnicodeText(), default="")
                  timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  request_id = sa.Column(sa.Unicode())
                  namespace = sa.Column(sa.Unicode())
                  primary_key = sa.Column(sa.Unicode())
                  tags = sa.Column(JSON(), default={})
                  permanent = sa.Column(sa.Boolean(), nullable=False, default=False)
                  def __str__(self):
                      return self.__unicode__().encode("utf8")
                  def __unicode__(self):
                      return "<Log id:%s, lv:%s, ns:%s >" % (
                          self.log_id,
                          self.log_level,
                          self.namespace,
                      )
                  def set_data(self, data, resource):
                      level = data.get("log_level").upper()
                      self.log_level = getattr(LogLevel, level, LogLevel.UNKNOWN)
                      self.message = data.get("message", "")
                      server_name = data.get("server", "").lower() or "unknown"
                      self.tags = {"server_name": server_name}
                      if data.get("tags"):
                          for tag_tuple in data["tags"]:
                              self.tags[tag_tuple[0]] = tag_tuple[1]
                      self.timestamp = data["date"]
                      r_id = data.get("request_id", "")
                      if not r_id:
                          r_id = ""
                      self.request_id = r_id.replace("-", "")
                      self.resource_id = resource.resource_id
                      self.namespace = data.get("namespace") or ""
                      self.permanent = data.get("permanent")
                      self.primary_key = data.get("primary_key")
                      if self.primary_key is not None:
                          self.tags["appenlight_primary_key"] = self.primary_key
                  def get_dict(self):
                      instance_dict = super(Log, self).get_dict()
                      instance_dict["log_level"] = LogLevel.key_from_value(self.log_level)
                      instance_dict["resource_name"] = self.application.resource_name
                      return instance_dict
                  @property
                  def delete_hash(self):
                      if not self.primary_key:
                          return None
                      to_hash = "{}_{}_{}".format(self.resource_id, self.primary_key, self.namespace)
                      return hashlib.sha1(to_hash.encode("utf8")).hexdigest()
                  def es_doc(self):
                      tags = {}
                      tag_list = []
                      for name, value in self.tags.items():
                          # replace dot in indexed tag name
                          name = name.replace(".", "_")
                          tag_list.append(name)
                          tags[name] = {
                              "values": convert_es_type(value),
                              "numeric_values": value
                              if (isinstance(value, (int, float)) and not isinstance(value, bool))
                              else None,
                          }
                      return {
-                         "pg_id": str(self.log_id),
+                         "log_id": str(self.log_id),
                          "delete_hash": self.delete_hash,
                          "resource_id": self.resource_id,
                          "request_id": self.request_id,
                          "log_level": LogLevel.key_from_value(self.log_level),
                          "timestamp": self.timestamp,
                          "message": self.message if self.message else "",
                          "namespace": self.namespace if self.namespace else "",
                          "tags": tags,
                          "tag_list": tag_list,
                      }
                  @property
                  def partition_id(self):
                      if self.permanent:
                          return "rcae_l_%s" % self.timestamp.strftime("%Y_%m")
                      else:
                          return "rcae_l_%s" % self.timestamp.strftime("%Y_%m_%d")

backend/src/appenlight/models/metric.py

0 +1 0

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from datetime import datetime
              import sqlalchemy as sa
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              from appenlight.lib.utils import convert_es_type
              from appenlight.models import Base
              class Metric(Base, BaseModel):
                  __tablename__ = "metrics"
                  __table_args__ = {"implicit_returning": False}
                  pkey = sa.Column(sa.BigInteger(), primary_key=True)
                  resource_id = sa.Column(
                      sa.Integer(),
                      sa.ForeignKey("applications.resource_id"),
                      nullable=False,
                      primary_key=True,
                  )
                  timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  tags = sa.Column(JSON(), default={})
                  namespace = sa.Column(sa.Unicode(255))
                  @property
                  def partition_id(self):
                      return "rcae_m_%s" % self.timestamp.strftime("%Y_%m_%d")
                  def es_doc(self):
                      tags = {}
                      tag_list = []
                      for name, value in self.tags.items():
                          # replace dot in indexed tag name
                          name = name.replace(".", "_")
                          tag_list.append(name)
                          tags[name] = {
                              "values": convert_es_type(value),
                              "numeric_values": value
                              if (isinstance(value, (int, float)) and not isinstance(value, bool))
                              else None,
                          }
                      return {
+                         "metric_id": self.pkey,
                          "resource_id": self.resource_id,
                          "timestamp": self.timestamp,
                          "namespace": self.namespace,
                          "tags": tags,
                          "tag_list": tag_list,
                      }

backend/src/appenlight/models/report.py

0 +12 -7

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from datetime import datetime, timedelta
              import math
              import uuid
              import hashlib
              import copy
              import urllib.parse
              import logging
              import sqlalchemy as sa
              from appenlight.models import Base, Datastores
              from appenlight.lib.utils.date_utils import convert_date
              from appenlight.lib.utils import convert_es_type
              from appenlight.models.slow_call import SlowCall
              from appenlight.lib.utils import channelstream_request
              from appenlight.lib.enums import ReportType, Language
              from pyramid.threadlocal import get_current_registry, get_current_request
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              log = logging.getLogger(__name__)
              REPORT_TYPE_MATRIX = {
                  "http_status": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
                  "group:priority": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
                  "duration": {"type": "float", "ops": ("ge", "le")},
                  "url_domain": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "url_path": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "error": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "tags:server_name": {
                      "type": "unicode",
                      "ops": ("eq", "ne", "startswith", "endswith", "contains"),
                  },
                  "traceback": {"type": "unicode", "ops": ("contains",)},
                  "group:occurences": {"type": "int", "ops": ("eq", "ne", "ge", "le")},
              }
              class Report(Base, BaseModel):
                  __tablename__ = "reports"
                  __table_args__ = {"implicit_returning": False}
                  id = sa.Column(sa.Integer, nullable=False, primary_key=True)
                  group_id = sa.Column(
                      sa.BigInteger,
                      sa.ForeignKey("reports_groups.id", ondelete="cascade", onupdate="cascade"),
                  )
                  resource_id = sa.Column(sa.Integer(), nullable=False, index=True)
                  report_type = sa.Column(sa.Integer(), nullable=False, index=True)
                  error = sa.Column(sa.UnicodeText(), index=True)
                  extra = sa.Column(JSON(), default={})
                  request = sa.Column(JSON(), nullable=False, default={})
                  ip = sa.Column(sa.String(39), index=True, default="")
                  username = sa.Column(sa.Unicode(255), default="")
                  user_agent = sa.Column(sa.Unicode(255), default="")
                  url = sa.Column(sa.UnicodeText(), index=True)
                  request_id = sa.Column(sa.Text())
                  request_stats = sa.Column(JSON(), nullable=False, default={})
                  traceback = sa.Column(JSON(), nullable=False, default=None)
                  traceback_hash = sa.Column(sa.Text())
                  start_time = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  end_time = sa.Column(sa.DateTime())
                  duration = sa.Column(sa.Float, default=0)
                  http_status = sa.Column(sa.Integer, index=True)
                  url_domain = sa.Column(sa.Unicode(100), index=True)
                  url_path = sa.Column(sa.Unicode(255), index=True)
                  tags = sa.Column(JSON(), nullable=False, default={})
                  language = sa.Column(sa.Integer(), default=0)
                  # this is used to determine partition for the report
                  report_group_time = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  logs = sa.orm.relationship(
                      "Log",
                      lazy="dynamic",
                      passive_deletes=True,
                      passive_updates=True,
                      primaryjoin="and_(Report.request_id==Log.request_id, "
                      "Log.request_id != None, Log.request_id != '')",
                      foreign_keys="[Log.request_id]",
                  )
                  slow_calls = sa.orm.relationship(
                      "SlowCall",
                      backref="detail",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                      order_by="SlowCall.timestamp",
                  )
                  def set_data(self, data, resource, protocol_version=None):
                      self.http_status = data["http_status"]
                      self.priority = data["priority"]
                      self.error = data["error"]
                      report_language = data.get("language", "").lower()
                      self.language = getattr(Language, report_language, Language.unknown)
                      # we need temp holder here to decide later
                      # if we want to to commit the tags if report is marked for creation
                      self.tags = {"server_name": data["server"], "view_name": data["view_name"]}
                      if data.get("tags"):
                          for tag_tuple in data["tags"]:
                              self.tags[tag_tuple[0]] = tag_tuple[1]
                      self.traceback = data["traceback"]
                      stripped_traceback = self.stripped_traceback()
                      tb_repr = repr(stripped_traceback).encode("utf8")
                      self.traceback_hash = hashlib.sha1(tb_repr).hexdigest()
                      url_info = urllib.parse.urlsplit(data.get("url", ""), allow_fragments=False)
                      self.url_domain = url_info.netloc[:128]
                      self.url_path = url_info.path[:2048]
                      self.occurences = data["occurences"]
                      if self.error:
                          self.report_type = ReportType.error
                      else:
                          self.report_type = ReportType.slow
                      # but if its status 404 its 404 type
                      if self.http_status in [404, "404"] or self.error == "404 Not Found":
                          self.report_type = ReportType.not_found
                          self.error = ""
                      self.generate_grouping_hash(
                          data.get("appenlight.group_string", data.get("group_string")),
                          resource.default_grouping,
                          protocol_version,
                      )
                      # details
                      if data["http_status"] in [404, "404"]:
                          data = {
                              "username": data["username"],
                              "ip": data["ip"],
                              "url": data["url"],
                              "user_agent": data["user_agent"],
                          }
                          if data.get("HTTP_REFERER") or data.get("http_referer"):
                              data["HTTP_REFERER"] = data.get("HTTP_REFERER", "") or data.get(
                                  "http_referer", ""
                              )
                      self.resource_id = resource.resource_id
                      self.username = data["username"]
                      self.user_agent = data["user_agent"]
                      self.ip = data["ip"]
                      self.extra = {}
                      if data.get("extra"):
                          for extra_tuple in data["extra"]:
                              self.extra[extra_tuple[0]] = extra_tuple[1]
                      self.url = data["url"]
                      self.request_id = data.get("request_id", "").replace("-", "") or str(
                          uuid.uuid4()
                      )
                      request_data = data.get("request", {})
                      self.request = request_data
                      self.request_stats = data.get("request_stats") or {}
                      traceback = data.get("traceback")
                      if not traceback:
                          traceback = data.get("frameinfo")
                      self.traceback = traceback
                      start_date = convert_date(data.get("start_time"))
                      if not self.start_time or self.start_time < start_date:
                          self.start_time = start_date
                      self.end_time = convert_date(data.get("end_time"), False)
                      self.duration = 0
                      if self.start_time and self.end_time:
                          d = self.end_time - self.start_time
                          self.duration = d.total_seconds()
                      # update tags with other vars
                      if self.username:
                          self.tags["user_name"] = self.username
                      self.tags["report_language"] = Language.key_from_value(self.language)
                  def add_slow_calls(self, data, report_group):
                      slow_calls = []
                      for call in data.get("slow_calls", []):
                          sc_inst = SlowCall()
                          sc_inst.set_data(
                              call, resource_id=self.resource_id, report_group=report_group
                          )
                          slow_calls.append(sc_inst)
                          self.slow_calls.extend(slow_calls)
                      return slow_calls
                  def get_dict(self, request, details=False, exclude_keys=None, include_keys=None):
                      from appenlight.models.services.report_group import ReportGroupService
                      instance_dict = super(Report, self).get_dict()
                      instance_dict["req_stats"] = self.req_stats()
                      instance_dict["group"] = {}
                      instance_dict["group"]["id"] = self.report_group.id
                      instance_dict["group"]["total_reports"] = self.report_group.total_reports
                      instance_dict["group"]["last_report"] = self.report_group.last_report
                      instance_dict["group"]["priority"] = self.report_group.priority
                      instance_dict["group"]["occurences"] = self.report_group.occurences
                      instance_dict["group"]["last_timestamp"] = self.report_group.last_timestamp
                      instance_dict["group"]["first_timestamp"] = self.report_group.first_timestamp
                      instance_dict["group"]["public"] = self.report_group.public
                      instance_dict["group"]["fixed"] = self.report_group.fixed
                      instance_dict["group"]["read"] = self.report_group.read
                      instance_dict["group"]["average_duration"] = self.report_group.average_duration
                      instance_dict["resource_name"] = self.report_group.application.resource_name
                      instance_dict["report_type"] = self.report_type
                      if instance_dict["http_status"] == 404 and not instance_dict["error"]:
                          instance_dict["error"] = "404 Not Found"
                      if details:
                          instance_dict[
                              "affected_users_count"
                          ] = ReportGroupService.affected_users_count(self.report_group)
                          instance_dict["top_affected_users"] = [
                              {"username": u.username, "count": u.count}
                              for u in ReportGroupService.top_affected_users(self.report_group)
                          ]
                          instance_dict["application"] = {"integrations": []}
                          for integration in self.report_group.application.integrations:
                              if integration.front_visible:
                                  instance_dict["application"]["integrations"].append(
                                      {
                                          "name": integration.integration_name,
                                          "action": integration.integration_action,
                                      }
                                  )
                          instance_dict["comments"] = [
                              c.get_dict() for c in self.report_group.comments
                          ]
                          instance_dict["group"]["next_report"] = None
                          instance_dict["group"]["previous_report"] = None
                          next_in_group = self.get_next_in_group(request)
                          previous_in_group = self.get_previous_in_group(request)
                          if next_in_group:
                              instance_dict["group"]["next_report"] = next_in_group
                          if previous_in_group:
                              instance_dict["group"]["previous_report"] = previous_in_group
                          # slow call ordering
                          def find_parent(row, data):
                              for r in reversed(data):
                                  try:
                                      if (
                                          row["timestamp"] > r["timestamp"]
                                          and row["end_time"] < r["end_time"]
                                      ):
                                          return r
                                  except TypeError as e:
                                      log.warning("reports_view.find_parent: %s" % e)
                              return None
                          new_calls = []
                          calls = [c.get_dict() for c in self.slow_calls]
                          while calls:
                              # start from end
                              for x in range(len(calls) - 1, -1, -1):
                                  parent = find_parent(calls[x], calls)
                                  if parent:
                                      parent["children"].append(calls[x])
                                  else:
                                      # no parent at all? append to new calls anyways
                                      new_calls.append(calls[x])
                                      # print 'append', calls[x]
                                  del calls[x]
                                  break
                          instance_dict["slow_calls"] = new_calls
                      instance_dict["front_url"] = self.get_public_url(request)
                      exclude_keys_list = exclude_keys or []
                      include_keys_list = include_keys or []
                      for k in list(instance_dict.keys()):
                          if k == "group":
                              continue
                          if k in exclude_keys_list or (k not in include_keys_list and include_keys):
                              del instance_dict[k]
                      return instance_dict
                  def get_previous_in_group(self, request):
                      query = {
                          "size": 1,
                          "query": {
                              "bool": {
                                  "filter": [
                                      {"term": {"group_id": self.group_id}},
-                                     {"range": {"pg_id": {"lt": self.id}}},
+                                     {"range": {"report_id": {"lt": self.id}}},
                                  ]
                              }
                          },
                          "sort": [{"_doc": {"order": "desc"}}],
                      }
                      result = request.es_conn.search(
                          body=query, index=self.partition_id, doc_type="report"
                      )
                      if result["hits"]["total"]:
-                         return result["hits"]["hits"][0]["_source"]["pg_id"]
+                         return result["hits"]["hits"][0]["_source"]["report_id"]
                  def get_next_in_group(self, request):
                      query = {
                          "size": 1,
                          "query": {
                              "bool": {
                                  "filter": [
                                      {"term": {"group_id": self.group_id}},
-                                     {"range": {"pg_id": {"gt": self.id}}},
+                                     {"range": {"report_id": {"gt": self.id}}},
                                  ]
                              }
                          },
                          "sort": [{"_doc": {"order": "asc"}}],
                      }
                      result = request.es_conn.search(
                          body=query, index=self.partition_id, doc_type="report"
                      )
                      if result["hits"]["total"]:
-                         return result["hits"]["hits"][0]["_source"]["pg_id"]
+                         return result["hits"]["hits"][0]["_source"]["report_id"]
                  def get_public_url(self, request=None, report_group=None, _app_url=None):
                      """
                      Returns url that user can use to visit specific report
                      """
                      if not request:
                          request = get_current_request()
                      url = request.route_url("/", _app_url=_app_url)
                      if report_group:
                          return (url + "ui/report/%s/%s") % (report_group.id, self.id)
                      return (url + "ui/report/%s/%s") % (self.group_id, self.id)
                  def req_stats(self):
                      stats = self.request_stats.copy()
                      stats["percentages"] = {}
                      stats["percentages"]["main"] = 100.0
                      main = stats.get("main", 0.0)
                      if not main:
                          return None
                      for name, call_time in stats.items():
                          if "calls" not in name and "main" not in name and "percentages" not in name:
                              stats["main"] -= call_time
                              stats["percentages"][name] = math.floor((call_time / main * 100.0))
                              stats["percentages"]["main"] -= stats["percentages"][name]
                      if stats["percentages"]["main"] < 0.0:
                          stats["percentages"]["main"] = 0.0
                          stats["main"] = 0.0
                      return stats
                  def generate_grouping_hash(
                      self, hash_string=None, default_grouping=None, protocol_version=None
                  ):
                      """
                      Generates SHA1 hash that will be used to group reports together
                      """
                      if not hash_string:
                          location = self.tags.get("view_name") or self.url_path
                          server_name = self.tags.get("server_name") or ""
                          if default_grouping == "url_traceback":
                              hash_string = "%s_%s_%s" % (self.traceback_hash, location, self.error)
                              if self.language == Language.javascript:
                                  hash_string = "%s_%s" % (self.traceback_hash, self.error)
                          elif default_grouping == "traceback_server":
                              hash_string = "%s_%s" % (self.traceback_hash, server_name)
                              if self.language == Language.javascript:
                                  hash_string = "%s_%s" % (self.traceback_hash, server_name)
                          else:
                              hash_string = "%s_%s" % (self.error, location)
                      month = datetime.utcnow().date().replace(day=1)
                      hash_string = "{}_{}".format(month, hash_string)
                      binary_string = hash_string.encode("utf8")
                      self.grouping_hash = hashlib.sha1(binary_string).hexdigest()
                      return self.grouping_hash
                  def stripped_traceback(self):
                      """
                      Traceback without local vars
                      """
                      stripped_traceback = copy.deepcopy(self.traceback)
                      if isinstance(stripped_traceback, list):
                          for row in stripped_traceback:
                              row.pop("vars", None)
                      return stripped_traceback
                  def notify_channel(self, report_group):
                      """
                      Sends notification to websocket channel
                      """
                      settings = get_current_registry().settings
                      log.info("notify channelstream")
                      if self.report_type != ReportType.error:
                          return
                      payload = {
                          "type": "message",
                          "user": "__system__",
                          "channel": "app_%s" % self.resource_id,
                          "message": {
                              "topic": "front_dashboard.new_topic",
                              "report": {
                                  "group": {
                                      "priority": report_group.priority,
                                      "first_timestamp": report_group.first_timestamp,
                                      "last_timestamp": report_group.last_timestamp,
                                      "average_duration": report_group.average_duration,
                                      "occurences": report_group.occurences,
                                  },
                                  "report_id": self.id,
                                  "group_id": self.group_id,
                                  "resource_id": self.resource_id,
                                  "http_status": self.http_status,
                                  "url_domain": self.url_domain,
                                  "url_path": self.url_path,
                                  "error": self.error or "",
                                  "server": self.tags.get("server_name"),
                                  "view_name": self.tags.get("view_name"),
                                  "front_url": self.get_public_url(),
                              },
                          },
                      }
                      channelstream_request(
                          settings["cometd.secret"],
                          "/message",
                          [payload],
                          servers=[settings["cometd_servers"]],
                      )
                  def es_doc(self):
                      tags = {}
                      tag_list = []
                      for name, value in self.tags.items():
                          name = name.replace(".", "_")
                          tag_list.append(name)
                          tags[name] = {
                              "values": convert_es_type(value),
                              "numeric_values": value
                              if (isinstance(value, (int, float)) and not isinstance(value, bool))
                              else None,
                          }
                      if "user_name" not in self.tags and self.username:
                          tags["user_name"] = {"value": [self.username], "numeric_value": None}
                      return {
                          "_id": str(self.id),
-                         "pg_id": str(self.id),
+                         "report_id": str(self.id),
                          "resource_id": self.resource_id,
                          "http_status": self.http_status or "",
                          "start_time": self.start_time,
                          "end_time": self.end_time,
                          "url_domain": self.url_domain if self.url_domain else "",
                          "url_path": self.url_path if self.url_path else "",
                          "duration": self.duration,
                          "error": self.error if self.error else "",
                          "report_type": self.report_type,
                          "request_id": self.request_id,
                          "ip": self.ip,
                          "group_id": str(self.group_id),
-                         "_parent": str(self.group_id),
+                         "type": "report",
+                         "join_field": {
+                             "name": "report",
+                             "parent": str(self.group_id)
+                         },
                          "tags": tags,
                          "tag_list": tag_list,
+                         "_routing": str(self.group_id)
                      }
                  @property
                  def partition_id(self):
                      return "rcae_r_%s" % self.report_group_time.strftime("%Y_%m")
                  def partition_range(self):
                      start_date = self.report_group_time.date().replace(day=1)
                      end_date = start_date + timedelta(days=40)
                      end_date = end_date.replace(day=1)
                      return start_date, end_date
              def after_insert(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
                      Datastores.es.index(
                          target.partition_id, "report", data, parent=target.group_id, id=target.id
                      )
              def after_update(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
                      Datastores.es.index(
                          target.partition_id, "report", data, parent=target.group_id, id=target.id
                      )
              def after_delete(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
-                     query = {"query": {"term": {"pg_id": target.id}}}
+                     query = {"query": {"term": {"report_id": target.id}}}
                      Datastores.es.delete_by_query(
                          index=target.partition_id, doc_type="report", body=query, conflicts="proceed"
                      )
              sa.event.listen(Report, "after_insert", after_insert)
              sa.event.listen(Report, "after_update", after_update)
              sa.event.listen(Report, "after_delete", after_delete)

backend/src/appenlight/models/report_group.py

0 +7 -7

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import logging
              import sqlalchemy as sa
              from datetime import datetime, timedelta
              from pyramid.threadlocal import get_current_request
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              from appenlight.models import Base, get_db_session, Datastores
              from appenlight.lib.enums import ReportType
              from appenlight.lib.rule import Rule
              from appenlight.lib.redis_keys import REDIS_KEYS
              from appenlight.models.report import REPORT_TYPE_MATRIX
              log = logging.getLogger(__name__)
              class ReportGroup(Base, BaseModel):
                  __tablename__ = "reports_groups"
                  __table_args__ = {"implicit_returning": False}
                  id = sa.Column(sa.BigInteger(), nullable=False, primary_key=True)
                  resource_id = sa.Column(
                      sa.Integer(),
                      sa.ForeignKey(
                          "applications.resource_id", onupdate="CASCADE", ondelete="CASCADE"
                      ),
                      nullable=False,
                      index=True,
                  )
                  priority = sa.Column(
                      sa.Integer, nullable=False, index=True, default=5, server_default="5"
                  )
                  first_timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  last_timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  error = sa.Column(sa.UnicodeText(), index=True)
                  grouping_hash = sa.Column(sa.String(40), default="")
                  triggered_postprocesses_ids = sa.Column(JSON(), nullable=False, default=list)
                  report_type = sa.Column(sa.Integer, default=1)
                  total_reports = sa.Column(sa.Integer, default=1)
                  last_report = sa.Column(sa.Integer)
                  occurences = sa.Column(sa.Integer, default=1)
                  average_duration = sa.Column(sa.Float, default=0)
                  summed_duration = sa.Column(sa.Float, default=0)
                  read = sa.Column(sa.Boolean(), index=True, default=False)
                  fixed = sa.Column(sa.Boolean(), index=True, default=False)
                  notified = sa.Column(sa.Boolean(), index=True, default=False)
                  public = sa.Column(sa.Boolean(), index=True, default=False)
                  reports = sa.orm.relationship(
                      "Report",
                      lazy="dynamic",
                      backref="report_group",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                  )
                  comments = sa.orm.relationship(
                      "ReportComment",
                      lazy="dynamic",
                      backref="report",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                      order_by="ReportComment.comment_id",
                  )
                  assigned_users = sa.orm.relationship(
                      "User",
                      backref=sa.orm.backref(
                          "assigned_reports_relation",
                          lazy="dynamic",
                          order_by=sa.desc(sa.text("reports_groups.id")),
                      ),
                      passive_deletes=True,
                      passive_updates=True,
                      secondary="reports_assignments",
                      order_by="User.user_name",
                  )
                  stats = sa.orm.relationship(
                      "ReportStat",
                      lazy="dynamic",
                      backref="report",
                      passive_deletes=True,
                      passive_updates=True,
                  )
                  last_report_ref = sa.orm.relationship(
                      "Report",
                      uselist=False,
                      primaryjoin="ReportGroup.last_report " "== Report.id",
                      foreign_keys="Report.id",
                      cascade="all, delete-orphan",
                      passive_deletes=True,
                      passive_updates=True,
                  )
                  def __repr__(self):
                      return "<ReportGroup id:{}>".format(self.id)
                  def get_report(self, report_id=None, public=False):
                      """
                      Gets report with specific id or latest report if id was not specified
                      """
                      from .report import Report
                      if not report_id:
                          return self.last_report_ref
                      else:
                          return self.reports.filter(Report.id == report_id).first()
                  def get_public_url(self, request, _app_url=None):
                      url = request.route_url("/", _app_url=_app_url)
                      return (url + "ui/report/%s") % self.id
                  def run_postprocessing(self, report):
                      """
                      Alters report group priority based on postprocessing configuration
                      """
                      request = get_current_request()
                      get_db_session(None, self).flush()
                      for action in self.application.postprocess_conf:
                          get_db_session(None, self).flush()
                          rule_obj = Rule(action.rule, REPORT_TYPE_MATRIX)
                          report_dict = report.get_dict(request)
                          # if was not processed yet
                          if (
                              rule_obj.match(report_dict)
                              and action.pkey not in self.triggered_postprocesses_ids
                          ):
                              action.postprocess(self)
                              # this way sqla can track mutation of list
                              self.triggered_postprocesses_ids = self.triggered_postprocesses_ids + [
                                  action.pkey
                              ]
                      get_db_session(None, self).flush()
                      # do not go out of bounds
                      if self.priority < 1:
                          self.priority = 1
                      if self.priority > 10:
                          self.priority = 10
                  def get_dict(self, request):
                      instance_dict = super(ReportGroup, self).get_dict()
                      instance_dict["server_name"] = self.get_report().tags.get("server_name")
                      instance_dict["view_name"] = self.get_report().tags.get("view_name")
                      instance_dict["resource_name"] = self.application.resource_name
                      instance_dict["report_type"] = self.get_report().report_type
                      instance_dict["url_path"] = self.get_report().url_path
                      instance_dict["front_url"] = self.get_report().get_public_url(request)
                      del instance_dict["triggered_postprocesses_ids"]
                      return instance_dict
                  def es_doc(self):
                      return {
                          "_id": str(self.id),
-                         "pg_id": str(self.id),
+                         "group_id": str(self.id),
                          "resource_id": self.resource_id,
                          "error": self.error,
                          "fixed": self.fixed,
                          "public": self.public,
                          "read": self.read,
                          "priority": self.priority,
                          "occurences": self.occurences,
                          "average_duration": self.average_duration,
                          "summed_duration": self.summed_duration,
                          "first_timestamp": self.first_timestamp,
                          "last_timestamp": self.last_timestamp,
+                         "type": "report_group",
+                         "join_field": {
+                             "name": "report_group"
+                         },
                      }
                  def set_notification_info(self, notify_10=False, notify_100=False):
                      """
                      Update redis notification maps for notification job
                      """
                      current_time = datetime.utcnow().replace(second=0, microsecond=0)
                      # global app counter
                      key = REDIS_KEYS["counters"]["reports_per_type"].format(
                          self.report_type, current_time
                      )
                      redis_pipeline = Datastores.redis.pipeline()
                      redis_pipeline.incr(key)
                      redis_pipeline.expire(key, 3600 * 24)
                      # detailed app notification for alerts and notifications
                      redis_pipeline.sadd(REDIS_KEYS["apps_that_had_reports"], self.resource_id)
                      redis_pipeline.sadd(
                          REDIS_KEYS["apps_that_had_reports_alerting"], self.resource_id
                      )
                      # only notify for exceptions here
                      if self.report_type == ReportType.error:
                          redis_pipeline.sadd(REDIS_KEYS["apps_that_had_reports"], self.resource_id)
                          redis_pipeline.sadd(
                              REDIS_KEYS["apps_that_had_error_reports_alerting"], self.resource_id
                          )
                      key = REDIS_KEYS["counters"]["report_group_occurences"].format(self.id)
                      redis_pipeline.incr(key)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["counters"]["report_group_occurences_alerting"].format(self.id)
                      redis_pipeline.incr(key)
                      redis_pipeline.expire(key, 3600 * 24)
                      if notify_10:
                          key = REDIS_KEYS["counters"]["report_group_occurences_10th"].format(self.id)
                          redis_pipeline.setex(key, 3600 * 24, 1)
                      if notify_100:
                          key = REDIS_KEYS["counters"]["report_group_occurences_100th"].format(
                              self.id
                          )
                          redis_pipeline.setex(key, 3600 * 24, 1)
                      key = REDIS_KEYS["reports_to_notify_per_type_per_app"].format(
                          self.report_type, self.resource_id
                      )
                      redis_pipeline.sadd(key, self.id)
                      redis_pipeline.expire(key, 3600 * 24)
                      key = REDIS_KEYS["reports_to_notify_per_type_per_app_alerting"].format(
                          self.report_type, self.resource_id
                      )
                      redis_pipeline.sadd(key, self.id)
                      redis_pipeline.expire(key, 3600 * 24)
                      redis_pipeline.execute()
                  @property
                  def partition_id(self):
                      return "rcae_r_%s" % self.first_timestamp.strftime("%Y_%m")
                  def partition_range(self):
                      start_date = self.first_timestamp.date().replace(day=1)
                      end_date = start_date + timedelta(days=40)
                      end_date = end_date.replace(day=1)
                      return start_date, end_date
              def after_insert(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
-                     Datastores.es.index(target.partition_id, "report_group", data, id=target.id)
+                     Datastores.es.index(target.partition_id, "report", data, id=target.id)
              def after_update(mapper, connection, target):
                  if not hasattr(target, "_skip_ft_index"):
                      data = target.es_doc()
                      data.pop("_id", None)
-                     Datastores.es.index(target.partition_id, "report_group", data, id=target.id)
+                     Datastores.es.index(target.partition_id, "report", data, id=target.id)
              def after_delete(mapper, connection, target):
                  query = {"query": {"term": {"group_id": target.id}}}
                  # delete by query
                  Datastores.es.delete_by_query(
                      index=target.partition_id, doc_type="report", body=query, conflicts="proceed"
                  )
-                 query = {"query": {"term": {"pg_id": target.id}}}
-                 Datastores.es.delete_by_query(
-                     index=target.partition_id, doc_type="report_group", body=query, conflicts="proceed"
+                 )
              sa.event.listen(ReportGroup, "after_insert", after_insert)
              sa.event.listen(ReportGroup, "after_update", after_update)
              sa.event.listen(ReportGroup, "after_delete", after_delete)

backend/src/appenlight/models/report_stat.py

0 +3 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import sqlalchemy as sa
              from appenlight.lib.enums import ReportType
              from appenlight.models import Base
              from ziggurat_foundations.models.base import BaseModel
              class ReportStat(Base, BaseModel):
                  __tablename__ = "reports_stats"
                  __table_args__ = {"implicit_returning": False}
                  group_id = sa.Column(
                      sa.BigInteger(), sa.ForeignKey("reports_groups.id"), nullable=False
                  )
                  resource_id = sa.Column(
                      sa.Integer(), sa.ForeignKey("applications.resource_id"), nullable=False
                  )
                  start_interval = sa.Column(sa.DateTime(), nullable=False)
                  occurences = sa.Column(sa.Integer, nullable=True, default=0)
                  owner_user_id = sa.Column(sa.Integer(), sa.ForeignKey("users.id"), nullable=True)
                  type = sa.Column(sa.Integer, nullable=True, default=0)
                  duration = sa.Column(sa.Float, nullable=True, default=0)
                  id = sa.Column(sa.BigInteger, nullable=False, primary_key=True)
                  server_name = sa.Column(sa.Unicode(128), nullable=False, default="")
                  view_name = sa.Column(sa.Unicode(128), nullable=False, default="")
                  @property
                  def partition_id(self):
                      return "rcae_r_%s" % self.start_interval.strftime("%Y_%m")
                  def es_doc(self):
                      return {
                          "resource_id": self.resource_id,
                          "timestamp": self.start_interval,
-                         "pg_id": str(self.id),
+                         "report_stat_id": str(self.id),
                          "permanent": True,
                          "request_id": None,
                          "log_level": "ERROR",
                          "message": None,
                          "namespace": "appenlight.error",
+                         "group_id": str(self.group_id),
                          "tags": {
                              "duration": {"values": self.duration, "numeric_values": self.duration},
                              "occurences": {
                                  "values": self.occurences,
                                  "numeric_values": self.occurences,
                              },
                              "group_id": {"values": self.group_id, "numeric_values": self.group_id},
                              "type": {
                                  "values": ReportType.key_from_value(self.type),
                                  "numeric_values": self.type,
                              },
                              "server_name": {"values": self.server_name, "numeric_values": None},
                              "view_name": {"values": self.view_name, "numeric_values": None},
                          },
                          "tag_list": [
                              "duration",
                              "occurences",
                              "group_id",
                              "type",
                              "server_name",
                              "view_name",
                          ],
+                         "type": "report_stat",
                      }

backend/src/appenlight/models/services/log.py

0 +1 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import paginate
              import logging
              import sqlalchemy as sa
              from appenlight.models.log import Log
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.services.base import BaseService
              from appenlight.lib.utils import es_index_name_limiter
              log = logging.getLogger(__name__)
              class LogService(BaseService):
                  @classmethod
                  def get_logs(cls, resource_ids=None, filter_settings=None, db_session=None):
                      # ensure we always have id's passed
                      if not resource_ids:
                          # raise Exception('No App ID passed')
                          return []
                      db_session = get_db_session(db_session)
                      q = db_session.query(Log)
                      q = q.filter(Log.resource_id.in_(resource_ids))
                      if filter_settings.get("start_date"):
                          q = q.filter(Log.timestamp >= filter_settings.get("start_date"))
                      if filter_settings.get("end_date"):
                          q = q.filter(Log.timestamp <= filter_settings.get("end_date"))
                      if filter_settings.get("log_level"):
                          q = q.filter(Log.log_level == filter_settings.get("log_level").upper())
                      if filter_settings.get("request_id"):
                          request_id = filter_settings.get("request_id", "")
                          q = q.filter(Log.request_id == request_id.replace("-", ""))
                      if filter_settings.get("namespace"):
                          q = q.filter(Log.namespace == filter_settings.get("namespace"))
                      q = q.order_by(sa.desc(Log.timestamp))
                      return q
                  @classmethod
                  def es_query_builder(cls, app_ids, filter_settings):
                      if not filter_settings:
                          filter_settings = {}
                      query = {
                          "query": {
                              "bool": {
                                  "filter": [{"terms": {"resource_id": list(app_ids)}}]
                              }
                          }
                      }
                      start_date = filter_settings.get("start_date")
                      end_date = filter_settings.get("end_date")
                      filter_part = query["query"]["bool"]["filter"]
                      for tag in filter_settings.get("tags", []):
                          tag_values = [v.lower() for v in tag["value"]]
                          key = "tags.%s.values" % tag["name"].replace(".", "_")
                          filter_part.append({"terms": {key: tag_values}})
                      date_range = {"range": {"timestamp": {}}}
                      if start_date:
                          date_range["range"]["timestamp"]["gte"] = start_date
                      if end_date:
                          date_range["range"]["timestamp"]["lte"] = end_date
                      if start_date or end_date:
                          filter_part.append(date_range)
                      levels = filter_settings.get("level")
                      if levels:
                          filter_part.append({"terms": {"log_level": levels}})
                      namespaces = filter_settings.get("namespace")
                      if namespaces:
                          filter_part.append({"terms": {"namespace": namespaces}})
                      request_ids = filter_settings.get("request_id")
                      if request_ids:
                          filter_part.append({"terms": {"request_id": request_ids}})
                      messages = filter_settings.get("message")
                      if messages:
                          query["query"]["bool"]["must"] = {
                              "match": {"message": {"query": " ".join(messages), "operator": "and"}}
                          }
                      return query
                  @classmethod
                  def get_time_series_aggregate(cls, app_ids=None, filter_settings=None):
                      if not app_ids:
                          return {}
                      es_query = cls.es_query_builder(app_ids, filter_settings)
                      es_query["aggs"] = {
                          "events_over_time": {
                              "date_histogram": {
                                  "field": "timestamp",
                                  "interval": "1h",
                                  "min_doc_count": 0,
                                  "extended_bounds": {
                                      "max": filter_settings.get("end_date"),
                                      "min": filter_settings.get("start_date"),
                                  },
                              }
                          }
                      }
                      log.debug(es_query)
                      index_names = es_index_name_limiter(
                          filter_settings.get("start_date"),
                          filter_settings.get("end_date"),
                          ixtypes=["logs"],
                      )
                      if index_names:
                          results = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                      else:
                          results = []
                      return results
                  @classmethod
                  def get_search_iterator(
                          cls,
                          app_ids=None,
                          page=1,
                          items_per_page=50,
                          order_by=None,
                          filter_settings=None,
                          limit=None,
                  ):
                      if not app_ids:
                          return {}, 0
                      es_query = cls.es_query_builder(app_ids, filter_settings)
                      sort_query = {"sort": [{"timestamp": {"order": "desc"}}]}
                      es_query.update(sort_query)
                      log.debug(es_query)
                      es_from = (page - 1) * items_per_page
                      index_names = es_index_name_limiter(
                          filter_settings.get("start_date"),
                          filter_settings.get("end_date"),
                          ixtypes=["logs"],
                      )
                      if not index_names:
                          return {}, 0
                      results = Datastores.es.search(
                          body=es_query,
                          index=index_names,
                          doc_type="log",
                          size=items_per_page,
                          from_=es_from,
                      )
                      if results["hits"]["total"] > 5000:
                          count = 5000
                      else:
                          count = results["hits"]["total"]
                      return results["hits"], count
                  @classmethod
                  def get_paginator_by_app_ids(
                          cls,
                          app_ids=None,
                          page=1,
                          item_count=None,
                          items_per_page=50,
                          order_by=None,
                          filter_settings=None,
                          exclude_columns=None,
                          db_session=None,
                  ):
                      if not filter_settings:
                          filter_settings = {}
                      results, item_count = cls.get_search_iterator(
                          app_ids, page, items_per_page, order_by, filter_settings
                      )
                      paginator = paginate.Page(
                          [], item_count=item_count, items_per_page=items_per_page, **filter_settings
                      )
                      ordered_ids = tuple(
-                         item["_source"]["pg_id"] for item in results.get("hits", [])
+                         item["_source"]["log_id"] for item in results.get("hits", [])
                      )
                      sorted_instance_list = []
                      if ordered_ids:
                          db_session = get_db_session(db_session)
                          query = db_session.query(Log)
                          query = query.filter(Log.log_id.in_(ordered_ids))
                          query = query.order_by(sa.desc("timestamp"))
                          sa_items = query.all()
                          # resort by score
                          for i_id in ordered_ids:
                              for item in sa_items:
                                  if str(item.log_id) == str(i_id):
                                      sorted_instance_list.append(item)
                      paginator.sa_items = sorted_instance_list
                      return paginator
                  @classmethod
                  def query_by_primary_key_and_namespace(cls, list_of_pairs, db_session=None):
                      db_session = get_db_session(db_session)
                      list_of_conditions = []
                      query = db_session.query(Log)
                      for pair in list_of_pairs:
                          list_of_conditions.append(
                              sa.and_(Log.primary_key == pair["pk"], Log.namespace == pair["ns"])
                          )
                      query = query.filter(sa.or_(*list_of_conditions))
                      query = query.order_by(sa.asc(Log.timestamp), sa.asc(Log.log_id))
                      return query

backend/src/appenlight/models/services/report_group.py

0 +3 -3

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import logging
              import paginate
              import sqlalchemy as sa
              import appenlight.lib.helpers as h
              from datetime import datetime
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.report import Report
              from appenlight.models.report_group import ReportGroup
              from appenlight.models.report_comment import ReportComment
              from appenlight.models.user import User
              from appenlight.models.services.base import BaseService
              from appenlight.lib.enums import ReportType
              from appenlight.lib.utils import es_index_name_limiter
              log = logging.getLogger(__name__)
              class ReportGroupService(BaseService):
                  @classmethod
                  def get_trending(cls, request, filter_settings, limit=15, db_session=None):
                      """
                      Returns report groups trending for specific time interval
                      """
                      db_session = get_db_session(db_session)
                      tags = []
                      if filter_settings.get("tags"):
                          for tag in filter_settings["tags"]:
                              tags.append(
                                  {"terms": {"tags.{}.values".format(tag["name"]): tag["value"]}}
                              )
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["reports"],
                      )
                      if not index_names or not filter_settings["resource"]:
                          return []
                      es_query = {
                          "aggs": {
                              "parent_agg": {
                                  "aggs": {
                                      "groups": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "value_count": {"field": "tags.group_id.values.keyword"}
                                              }
                                          },
                                          "filter": {"exists": {"field": "tags.group_id.values"}},
                                      }
                                  },
                                  "terms": {"field": "tags.group_id.values.keyword", "size": limit},
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
                                      {
                                          "terms": {
                                              "resource_id": [filter_settings["resource"][0]]
                                          }
                                      },
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                  ]
                              }
                          },
                      }
                      if tags:
                          es_query["query"]["bool"]["filter"].extend(tags)
                      result = Datastores.es.search(
-                         body=es_query, index=index_names, doc_type="log", size=0
+                         body=es_query, index=index_names, doc_type="report", size=0
                      )
                      series = []
                      for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                          series.append(
                              {"key": bucket["key"], "groups": bucket["groups"]["sub_agg"]["value"]}
                          )
                      report_groups_d = {}
                      for g in series:
                          report_groups_d[int(g["key"])] = g["groups"] or 0
                      query = db_session.query(ReportGroup)
                      query = query.filter(ReportGroup.id.in_(list(report_groups_d.keys())))
                      query = query.options(sa.orm.joinedload(ReportGroup.last_report_ref))
                      results = [(report_groups_d[group.id], group) for group in query]
                      return sorted(results, reverse=True, key=lambda x: x[0])
                  @classmethod
                  def get_search_iterator(
                      cls,
                      app_ids=None,
                      page=1,
                      items_per_page=50,
                      order_by=None,
                      filter_settings=None,
                      limit=None,
                  ):
                      if not app_ids:
                          return {}
                      if not filter_settings:
                          filter_settings = {}
                      query = {
                          "size": 0,
                          "query": {
                              "bool": {
                                  "must": [],
                                  "should": [],
                                  "filter": [{"terms": {"resource_id": list(app_ids)}}]
                              }
                          },
                          "aggs": {
                              "top_groups": {
                                  "terms": {
                                      "size": 5000,
-                                     "field": "_parent#report_group",
+                                     "field": "join_field#report_group",
                                      "order": {"newest": "desc"},
                                  },
                                  "aggs": {
                                      "top_reports_hits": {
                                          "top_hits": {"size": 1, "sort": {"start_time": "desc"}}
                                      },
                                      "newest": {"max": {"field": "start_time"}},
                                  },
                              }
                          },
                      }
                      start_date = filter_settings.get("start_date")
                      end_date = filter_settings.get("end_date")
                      filter_part = query["query"]["bool"]["filter"]
                      date_range = {"range": {"start_time": {}}}
                      if start_date:
                          date_range["range"]["start_time"]["gte"] = start_date
                      if end_date:
                          date_range["range"]["start_time"]["lte"] = end_date
                      if start_date or end_date:
                          filter_part.append(date_range)
                      priorities = filter_settings.get("priority")
                      for tag in filter_settings.get("tags", []):
                          tag_values = [v.lower() for v in tag["value"]]
                          key = "tags.%s.values" % tag["name"].replace(".", "_")
                          filter_part.append({"terms": {key: tag_values}})
                      if priorities:
                          filter_part.append(
                              {
                                  "has_parent": {
                                      "parent_type": "report_group",
                                      "query": {"terms": {"priority": priorities}},
                                  }
                              }
                          )
                      min_occurences = filter_settings.get("min_occurences")
                      if min_occurences:
                          filter_part.append(
                              {
                                  "has_parent": {
                                      "parent_type": "report_group",
                                      "query": {"range": {"occurences": {"gte": min_occurences[0]}}},
                                  }
                              }
                          )
                      min_duration = filter_settings.get("min_duration")
                      max_duration = filter_settings.get("max_duration")
                      request_ids = filter_settings.get("request_id")
                      if request_ids:
                          filter_part.append({"terms": {"request_id": request_ids}})
                      duration_range = {"range": {"average_duration": {}}}
                      if min_duration:
                          duration_range["range"]["average_duration"]["gte"] = min_duration[0]
                      if max_duration:
                          duration_range["range"]["average_duration"]["lte"] = max_duration[0]
                      if min_duration or max_duration:
                          filter_part.append(
                              {"has_parent": {"parent_type": "report_group", "query": duration_range}}
                          )
                      http_status = filter_settings.get("http_status")
                      report_type = filter_settings.get("report_type", [ReportType.error])
                      # set error report type if http status is not found
                      # and we are dealing with slow reports
                      if not http_status or ReportType.slow in report_type:
                          filter_part.append({"terms": {"report_type": report_type}})
                      if http_status:
                          filter_part.append({"terms": {"http_status": http_status}})
                      messages = filter_settings.get("message")
                      if messages:
                          condition = {"match": {"message": " ".join(messages)}}
                          query["query"]["bool"]["must"].append(condition)
                      errors = filter_settings.get("error")
                      if errors:
                          condition = {"match": {"error": " ".join(errors)}}
                          query["query"]["bool"]["must"].append(condition)
                      url_domains = filter_settings.get("url_domain")
                      if url_domains:
                          condition = {"terms": {"url_domain": url_domains}}
                          query["query"]["bool"]["must"].append(condition)
                      url_paths = filter_settings.get("url_path")
                      if url_paths:
                          condition = {"terms": {"url_path": url_paths}}
                          query["query"]["bool"]["must"].append(condition)
                      if filter_settings.get("report_status"):
                          for status in filter_settings.get("report_status"):
                              if status == "never_reviewed":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"read": False}},
                                          }
                                      }
                                  )
                              elif status == "reviewed":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"read": True}},
                                          }
                                      }
                                  )
                              elif status == "public":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"public": True}},
                                          }
                                      }
                                  )
                              elif status == "fixed":
                                  filter_part.append(
                                      {
                                          "has_parent": {
                                              "parent_type": "report_group",
                                              "query": {"term": {"fixed": True}},
                                          }
                                      }
                                  )
                      # logging.getLogger('pyelasticsearch').setLevel(logging.DEBUG)
                      index_names = es_index_name_limiter(
                          filter_settings.get("start_date"),
                          filter_settings.get("end_date"),
                          ixtypes=["reports"],
                      )
                      if index_names:
                          results = Datastores.es.search(
                              body=query,
                              index=index_names,
                              doc_type=["report", "report_group"],
                              size=0,
                          )
                      else:
                          return []
                      return results["aggregations"]
                  @classmethod
                  def get_paginator_by_app_ids(
                      cls,
                      app_ids=None,
                      page=1,
                      item_count=None,
                      items_per_page=50,
                      order_by=None,
                      filter_settings=None,
                      exclude_columns=None,
                      db_session=None,
                  ):
                      if not filter_settings:
                          filter_settings = {}
                      results = cls.get_search_iterator(
                          app_ids, page, items_per_page, order_by, filter_settings
                      )
                      ordered_ids = []
                      if results:
                          for item in results["top_groups"]["buckets"]:
-                             pg_id = item["top_reports_hits"]["hits"]["hits"][0]["_source"]["pg_id"]
+                             pg_id = item["top_reports_hits"]["hits"]["hits"][0]["_source"]["report_id"]
                              ordered_ids.append(pg_id)
                      log.info(filter_settings)
                      paginator = paginate.Page(
                          ordered_ids, items_per_page=items_per_page, **filter_settings
                      )
                      sa_items = ()
                      if paginator.items:
                          db_session = get_db_session(db_session)
                          # latest report detail
                          query = db_session.query(Report)
                          query = query.options(sa.orm.joinedload(Report.report_group))
                          query = query.filter(Report.id.in_(paginator.items))
                          if filter_settings.get("order_col"):
                              order_col = filter_settings.get("order_col")
                              if filter_settings.get("order_dir") == "dsc":
                                  sort_on = "desc"
                              else:
                                  sort_on = "asc"
                              if order_col == "when":
                                  order_col = "last_timestamp"
                              query = query.order_by(
                                  getattr(sa, sort_on)(getattr(ReportGroup, order_col))
                              )
                          sa_items = query.all()
                      sorted_instance_list = []
                      for i_id in ordered_ids:
                          for report in sa_items:
                              if str(report.id) == i_id and report not in sorted_instance_list:
                                  sorted_instance_list.append(report)
                      paginator.sa_items = sorted_instance_list
                      return paginator
                  @classmethod
                  def by_app_ids(cls, app_ids=None, order_by=True, db_session=None):
                      db_session = get_db_session(db_session)
                      q = db_session.query(ReportGroup)
                      if app_ids:
                          q = q.filter(ReportGroup.resource_id.in_(app_ids))
                      if order_by:
                          q = q.order_by(sa.desc(ReportGroup.id))
                      return q
                  @classmethod
                  def by_id(cls, group_id, app_ids=None, db_session=None):
                      db_session = get_db_session(db_session)
                      q = db_session.query(ReportGroup).filter(ReportGroup.id == int(group_id))
                      if app_ids:
                          q = q.filter(ReportGroup.resource_id.in_(app_ids))
                      return q.first()
                  @classmethod
                  def by_ids(cls, group_ids=None, db_session=None):
                      db_session = get_db_session(db_session)
                      query = db_session.query(ReportGroup)
                      query = query.filter(ReportGroup.id.in_(group_ids))
                      return query
                  @classmethod
                  def by_hash_and_resource(
                      cls, resource_id, grouping_hash, since_when=None, db_session=None
                  ):
                      db_session = get_db_session(db_session)
                      q = db_session.query(ReportGroup)
                      q = q.filter(ReportGroup.resource_id == resource_id)
                      q = q.filter(ReportGroup.grouping_hash == grouping_hash)
                      q = q.filter(ReportGroup.fixed == False)
                      if since_when:
                          q = q.filter(ReportGroup.first_timestamp >= since_when)
                      return q.first()
                  @classmethod
                  def users_commenting(cls, report_group, exclude_user_id=None, db_session=None):
                      db_session = get_db_session(None, report_group)
                      query = db_session.query(User).distinct()
                      query = query.filter(User.id == ReportComment.owner_id)
                      query = query.filter(ReportComment.group_id == report_group.id)
                      if exclude_user_id:
                          query = query.filter(ReportComment.owner_id != exclude_user_id)
                      return query
                  @classmethod
                  def affected_users_count(cls, report_group, db_session=None):
                      db_session = get_db_session(db_session)
                      query = db_session.query(sa.func.count(Report.username))
                      query = query.filter(Report.group_id == report_group.id)
                      query = query.filter(Report.username != "")
                      query = query.filter(Report.username != None)
                      query = query.group_by(Report.username)
                      return query.count()
                  @classmethod
                  def top_affected_users(cls, report_group, db_session=None):
                      db_session = get_db_session(db_session)
                      count_label = sa.func.count(Report.username).label("count")
                      query = db_session.query(Report.username, count_label)
                      query = query.filter(Report.group_id == report_group.id)
                      query = query.filter(Report.username != None)
                      query = query.filter(Report.username != "")
                      query = query.group_by(Report.username)
                      query = query.order_by(sa.desc(count_label))
                      query = query.limit(50)
                      return query
                  @classmethod
                  def get_report_stats(cls, request, filter_settings):
                      """
                      Gets report dashboard graphs
                      Returns information for BAR charts with occurences/interval information
                      detailed means version that returns time intervals - non detailed
                      returns total sum
                      """
                      delta = filter_settings["end_date"] - filter_settings["start_date"]
                      if delta < h.time_deltas.get("12h")["delta"]:
                          interval = "1m"
                      elif delta <= h.time_deltas.get("3d")["delta"]:
                          interval = "5m"
                      elif delta >= h.time_deltas.get("2w")["delta"]:
                          interval = "24h"
                      else:
                          interval = "1h"
                      group_id = filter_settings.get("group_id")
                      es_query = {
                          "aggs": {
                              "parent_agg": {
                                  "aggs": {
                                      "types": {
                                          "aggs": {
                                              "sub_agg": {"terms": {"field": "tags.type.values.keyword"}}
                                          },
                                          "filter": {
                                          "bool": {
                                              "filter": [{"exists": {"field": "tags.type.values"}}]
                                          }
                                          },
                                      }
                                  },
                                  "date_histogram": {
                                      "extended_bounds": {
                                          "max": filter_settings["end_date"],
                                          "min": filter_settings["start_date"],
                                      },
                                      "field": "timestamp",
                                      "interval": interval,
                                      "min_doc_count": 0,
                                  },
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
                                      {
                                          "terms": {
                                              "resource_id": [filter_settings["resource"][0]]
                                          }
                                      },
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                  ]
                              }
                          },
                      }
                      if group_id:
                          parent_agg = es_query["aggs"]["parent_agg"]
                          filters = parent_agg["aggs"]["types"]["filter"]["bool"]["filter"]
                          filters.append({"terms": {"tags.group_id.values": [group_id]}})
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["reports"],
                      )
                      if not index_names:
                          return []
                      result = Datastores.es.search(
                          body=es_query, index=index_names, doc_type="log", size=0
                      )
                      series = []
                      for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                          point = {
                              "x": datetime.utcfromtimestamp(int(bucket["key"]) / 1000),
                              "report": 0,
                              "not_found": 0,
                              "slow_report": 0,
                          }
                          for subbucket in bucket["types"]["sub_agg"]["buckets"]:
                              if subbucket["key"] == "slow":
                                  point["slow_report"] = subbucket["doc_count"]
                              elif subbucket["key"] == "error":
                                  point["report"] = subbucket["doc_count"]
                              elif subbucket["key"] == "not_found":
                                  point["not_found"] = subbucket["doc_count"]
                          series.append(point)
                      return series

backend/src/appenlight/models/services/request_metric.py

0 +1 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              from datetime import datetime
              import appenlight.lib.helpers as h
              from appenlight.models import get_db_session, Datastores
              from appenlight.models.services.base import BaseService
              from appenlight.lib.enums import ReportType
              from appenlight.lib.utils import es_index_name_limiter
              try:
                  from ae_uptime_ce.models.services.uptime_metric import UptimeMetricService
              except ImportError:
                  UptimeMetricService = None
              def check_key(key, stats, uptime, total_seconds):
                  if key not in stats:
                      stats[key] = {
                          "name": key,
                          "requests": 0,
                          "errors": 0,
                          "tolerated_requests": 0,
                          "frustrating_requests": 0,
                          "satisfying_requests": 0,
                          "total_minutes": total_seconds / 60.0,
                          "uptime": uptime,
                          "apdex": 0,
                          "rpm": 0,
                          "response_time": 0,
                          "avg_response_time": 0,
                      }
              class RequestMetricService(BaseService):
                  @classmethod
                  def get_metrics_stats(cls, request, filter_settings, db_session=None):
                      delta = filter_settings["end_date"] - filter_settings["start_date"]
                      if delta < h.time_deltas.get("12h")["delta"]:
                          interval = "1m"
                      elif delta <= h.time_deltas.get("3d")["delta"]:
                          interval = "5m"
                      elif delta >= h.time_deltas.get("2w")["delta"]:
                          interval = "24h"
                      else:
                          interval = "1h"
                      filter_settings["namespace"] = ["appenlight.request_metric"]
                      es_query = {
                          "aggs": {
                              "parent_agg": {
                                  "aggs": {
                                      "custom": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.custom.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.custom.numeric_values"}
                                          },
                                      },
                                      "main": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.main.numeric_values"}
                                              }
                                          },
                                          "filter": {"exists": {"field": "tags.main.numeric_values"}},
                                      },
                                      "nosql": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.nosql.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.nosql.numeric_values"}
                                          },
                                      },
                                      "remote": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.remote.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.remote.numeric_values"}
                                          },
                                      },
                                      "requests": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.requests.numeric_values"}
                                              }
                                          },
                                          "filter": {
                                              "exists": {"field": "tags.requests.numeric_values"}
                                          },
                                      },
                                      "sql": {
                                          "aggs": {
                                              "sub_agg": {"sum": {"field": "tags.sql.numeric_values"}}
                                          },
                                          "filter": {"exists": {"field": "tags.sql.numeric_values"}},
                                      },
                                      "tmpl": {
                                          "aggs": {
                                              "sub_agg": {
                                                  "sum": {"field": "tags.tmpl.numeric_values"}
                                              }
                                          },
                                          "filter": {"exists": {"field": "tags.tmpl.numeric_values"}},
                                      },
                                  },
                                  "date_histogram": {
                                      "extended_bounds": {
                                          "max": filter_settings["end_date"],
                                          "min": filter_settings["start_date"],
                                      },
                                      "field": "timestamp",
                                      "interval": interval,
                                      "min_doc_count": 0,
                                  },
                              }
                          },
                          "query": {
                              "bool": {
                                  "filter": [
                                      {
                                          "terms": {
                                              "resource_id": [filter_settings["resource"][0]]
                                          }
                                      },
                                      {
                                          "range": {
                                              "timestamp": {
                                                  "gte": filter_settings["start_date"],
                                                  "lte": filter_settings["end_date"],
                                              }
                                          }
                                      },
                                      {"terms": {"namespace": ["appenlight.request_metric"]}},
                                  ]
                              }
                          },
                      }
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["metrics"],
                      )
                      if not index_names:
                          return []
                      result = Datastores.es.search(
                          body=es_query, index=index_names, doc_type="log", size=0
                      )
                      plot_data = []
                      for item in result["aggregations"]["parent_agg"]["buckets"]:
                          x_time = datetime.utcfromtimestamp(int(item["key"]) / 1000)
                          point = {"x": x_time}
                          for key in ["custom", "main", "nosql", "remote", "requests", "sql", "tmpl"]:
                              value = item[key]["sub_agg"]["value"]
                              point[key] = round(value, 3) if value else 0
                          plot_data.append(point)
                      return plot_data
                  @classmethod
                  def get_requests_breakdown(cls, request, filter_settings, db_session=None):
                      db_session = get_db_session(db_session)
                      # fetch total time of all requests in this time range
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["metrics"],
                      )
                      if index_names and filter_settings["resource"]:
                          es_query = {
                              "aggs": {
                                  "main": {
                                      "aggs": {
                                          "sub_agg": {"sum": {"field": "tags.main.numeric_values"}}
                                      },
                                      "filter": {"exists": {"field": "tags.main.numeric_values"}},
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                          {"terms": {"namespace": ["appenlight.request_metric"]}},
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          total_time_spent = result["aggregations"]["main"]["sub_agg"]["value"]
                      else:
                          total_time_spent = 0
                      script_text = "doc['tags.main.numeric_values'].value / {}".format(
                          total_time_spent
                      )
                      if total_time_spent == 0:
                          script_text = '0'
                      if index_names and filter_settings["resource"]:
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "main": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.main.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.main.numeric_values"}
                                              },
                                          },
                                          "percentage": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {
                                                          "script": script_text,
                                                      }
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.main.numeric_values"}
                                              },
                                          },
                                          "requests": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.requests.numeric_values"}
                                              },
                                          },
                                      },
                                      "terms": {
                                          "field": "tags.view_name.values.keyword",
                                          "order": {"percentage>sub_agg": "desc"},
                                          "size": 15,
                                      },
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          series = result["aggregations"]["parent_agg"]["buckets"]
                      else:
                          series = []
                      and_part = [
                          {"term": {"resource_id": filter_settings["resource"][0]}},
                          {"terms": {"tags.view_name.values": [row["key"] for row in series]}},
                          {"term": {"report_type": str(ReportType.slow)}},
                      ]
                      query = {
                          "aggs": {
                              "top_reports": {
                                  "terms": {"field": "tags.view_name.values.keyword", "size": len(series)},
                                  "aggs": {
                                      "top_calls_hits": {
                                          "top_hits": {"sort": {"start_time": "desc"}, "size": 5}
                                      }
                                  },
                              }
                          },
                          "query": {"bool": {"filter": and_part}},
                      }
                      details = {}
                      index_names = es_index_name_limiter(ixtypes=["reports"])
                      if index_names and series:
                          result = Datastores.es.search(
                              body=query, doc_type="report", size=0, index=index_names
                          )
                          for bucket in result["aggregations"]["top_reports"]["buckets"]:
                              details[bucket["key"]] = []
                              for hit in bucket["top_calls_hits"]["hits"]["hits"]:
                                  details[bucket["key"]].append(
                                      {
-                                         "report_id": hit["_source"]["pg_id"],
+                                         "report_id": hit["_source"]["request_metric_id"],
                                          "group_id": hit["_source"]["group_id"],
                                      }
                                  )
                      results = []
                      for row in series:
                          result = {
                              "key": row["key"],
                              "main": row["main"]["sub_agg"]["value"],
                              "requests": row["requests"]["sub_agg"]["value"],
                          }
                          # es can return 'infinity'
                          try:
                              result["percentage"] = float(row["percentage"]["sub_agg"]["value"])
                          except ValueError:
                              result["percentage"] = 0
                          result["latest_details"] = details.get(row["key"]) or []
                          results.append(result)
                      return results
                  @classmethod
                  def get_apdex_stats(cls, request, filter_settings, threshold=1, db_session=None):
                      """
                      Returns information and calculates APDEX score per server for dashboard
                      server information (upper right stats boxes)
                      """
                      # Apdex t = (Satisfied Count + Tolerated Count / 2) / Total Samples
                      db_session = get_db_session(db_session)
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["metrics"],
                      )
                      requests_series = []
                      if index_names and filter_settings["resource"]:
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "frustrating": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "bool": {
                                                      "filter": [
                                                          {
                                                              "range": {
                                                                  "tags.main.numeric_values": {"gte": "4"}
                                                              }
                                                          },
                                                          {
                                                              "exists": {
                                                                  "field": "tags.requests.numeric_values"
                                                              }
                                                          },
                                                      ]
                                                  }
                                              },
                                          },
                                          "main": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.main.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.main.numeric_values"}
                                              },
                                          },
                                          "requests": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "exists": {"field": "tags.requests.numeric_values"}
                                              },
                                          },
                                          "tolerated": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {"field": "tags.requests.numeric_values"}
                                                  }
                                              },
                                              "filter": {
                                                  "bool": {"filter": [
                                                      {
                                                          "range": {
                                                              "tags.main.numeric_values": {"gte": "1"}
                                                          }
                                                      },
                                                      {
                                                          "range": {
                                                              "tags.main.numeric_values": {"lt": "4"}
                                                          }
                                                      },
                                                      {
                                                          "exists": {
                                                              "field": "tags.requests.numeric_values"
                                                          }
                                                      },
                                                  ]}
                                              },
                                          },
                                      },
                                      "terms": {"field": "tags.server_name.values.keyword", "size": 999999},
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                          {"terms": {"namespace": ["appenlight.request_metric"]}},
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                              requests_series.append(
                                  {
                                      "frustrating": bucket["frustrating"]["sub_agg"]["value"],
                                      "main": bucket["main"]["sub_agg"]["value"],
                                      "requests": bucket["requests"]["sub_agg"]["value"],
                                      "tolerated": bucket["tolerated"]["sub_agg"]["value"],
                                      "key": bucket["key"],
                                  }
                              )
                      since_when = filter_settings["start_date"]
                      until = filter_settings["end_date"]
                      # total errors
                      index_names = es_index_name_limiter(
                          start_date=filter_settings["start_date"],
                          end_date=filter_settings["end_date"],
                          ixtypes=["reports"],
                      )
                      report_series = []
                      if index_names and filter_settings["resource"]:
                          report_type = ReportType.key_from_value(ReportType.error)
                          es_query = {
                              "aggs": {
                                  "parent_agg": {
                                      "aggs": {
                                          "errors": {
                                              "aggs": {
                                                  "sub_agg": {
                                                      "sum": {
                                                          "field": "tags.occurences.numeric_values"
                                                      }
                                                  }
                                              },
                                              "filter": {
                                                  "bool": {
                                                      "filter": [
                                                          {"terms": {"tags.type.values": [report_type]}},
                                                          {
                                                              "exists": {
                                                                  "field": "tags.occurences.numeric_values"
                                                              }
                                                          },
                                                      ]
                                                  }
                                              },
                                          }
                                      },
                                      "terms": {"field": "tags.server_name.values.keyword", "size": 999999},
                                  }
                              },
                              "query": {
                                  "bool": {
                                      "filter": [
                                          {
                                              "terms": {
                                                  "resource_id": [filter_settings["resource"][0]]
                                              }
                                          },
                                          {
                                              "range": {
                                                  "timestamp": {
                                                      "gte": filter_settings["start_date"],
                                                      "lte": filter_settings["end_date"],
                                                  }
                                              }
                                          },
                                          {"terms": {"namespace": ["appenlight.error"]}},
                                      ]
                                  }
                              },
                          }
                          result = Datastores.es.search(
                              body=es_query, index=index_names, doc_type="log", size=0
                          )
                          for bucket in result["aggregations"]["parent_agg"]["buckets"]:
                              report_series.append(
                                  {
                                      "key": bucket["key"],
                                      "errors": bucket["errors"]["sub_agg"]["value"],
                                  }
                              )
                      stats = {}
                      if UptimeMetricService is not None:
                          uptime = UptimeMetricService.get_uptime_by_app(
                              filter_settings["resource"][0], since_when=since_when, until=until
                          )
                      else:
                          uptime = 0
                      total_seconds = (until - since_when).total_seconds()
                      for stat in requests_series:
                          check_key(stat["key"], stats, uptime, total_seconds)
                          stats[stat["key"]]["requests"] = int(stat["requests"])
                          stats[stat["key"]]["response_time"] = stat["main"]
                          stats[stat["key"]]["tolerated_requests"] = stat["tolerated"]
                          stats[stat["key"]]["frustrating_requests"] = stat["frustrating"]
                      for server in report_series:
                          check_key(server["key"], stats, uptime, total_seconds)
                          stats[server["key"]]["errors"] = server["errors"]
                      server_stats = list(stats.values())
                      for stat in server_stats:
                          stat["satisfying_requests"] = (
                                  stat["requests"]
                                  - stat["errors"]
                                  - stat["frustrating_requests"]
                                  - stat["tolerated_requests"]
                          )
                          if stat["satisfying_requests"] < 0:
                              stat["satisfying_requests"] = 0
                          if stat["requests"]:
                              stat["avg_response_time"] = round(
                                  stat["response_time"] / stat["requests"], 3
                              )
                              qual_requests = (
                                      stat["satisfying_requests"] + stat["tolerated_requests"] / 2.0
                              )
                              stat["apdex"] = round((qual_requests / stat["requests"]) * 100, 2)
                              stat["rpm"] = round(stat["requests"] / stat["total_minutes"], 2)
                      return sorted(server_stats, key=lambda x: x["name"])

backend/src/appenlight/models/slow_call.py

0 +1 -1

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import sqlalchemy as sa
              import hashlib
              from datetime import datetime, timedelta
              from appenlight.models import Base
              from sqlalchemy.dialects.postgresql import JSON
              from ziggurat_foundations.models.base import BaseModel
              class SlowCall(Base, BaseModel):
                  __tablename__ = "slow_calls"
                  __table_args__ = {"implicit_returning": False}
                  resource_id = sa.Column(sa.Integer(), nullable=False, index=True)
                  id = sa.Column(sa.Integer, nullable=False, primary_key=True)
                  report_id = sa.Column(
                      sa.BigInteger,
                      sa.ForeignKey("reports.id", ondelete="cascade", onupdate="cascade"),
                      primary_key=True,
                  )
                  duration = sa.Column(sa.Float(), default=0)
                  statement = sa.Column(sa.UnicodeText(), default="")
                  statement_hash = sa.Column(sa.Unicode(60), default="")
                  parameters = sa.Column(JSON(), nullable=False, default=dict)
                  type = sa.Column(sa.Unicode(16), default="")
                  subtype = sa.Column(sa.Unicode(16), default=None)
                  location = sa.Column(sa.Unicode(255), default="")
                  timestamp = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  report_group_time = sa.Column(
                      sa.DateTime(), default=datetime.utcnow, server_default=sa.func.now()
                  )
                  def set_data(
                      self, data, protocol_version=None, resource_id=None, report_group=None
                  ):
                      self.resource_id = resource_id
                      if data.get("start") and data.get("end"):
                          self.timestamp = data.get("start")
                          d = data.get("end") - data.get("start")
                          self.duration = d.total_seconds()
                      self.statement = data.get("statement", "")
                      self.type = data.get("type", "unknown")[:16]
                      self.parameters = data.get("parameters", {})
                      self.location = data.get("location", "")[:255]
                      self.report_group_time = report_group.first_timestamp
                      if "subtype" in data:
                          self.subtype = data.get("subtype", "unknown")[:16]
                      if self.type == "tmpl":
                          self.set_hash("{} {}".format(self.statement, self.parameters))
                      else:
                          self.set_hash()
                  def set_hash(self, custom_statement=None):
                      statement = custom_statement or self.statement
                      self.statement_hash = hashlib.sha1(statement.encode("utf8")).hexdigest()
                  @property
                  def end_time(self):
                      if self.duration and self.timestamp:
                          return self.timestamp + timedelta(seconds=self.duration)
                      return None
                  def get_dict(self):
                      instance_dict = super(SlowCall, self).get_dict()
                      instance_dict["children"] = []
                      instance_dict["end_time"] = self.end_time
                      return instance_dict
                  def es_doc(self):
                      doc = {
                          "resource_id": self.resource_id,
                          "timestamp": self.timestamp,
-                         "pg_id": str(self.id),
+                         "slow_call_id": str(self.id),
                          "permanent": False,
                          "request_id": None,
                          "log_level": "UNKNOWN",
                          "message": self.statement,
                          "namespace": "appenlight.slow_call",
                          "tags": {
                              "report_id": {
                                  "values": self.report_id,
                                  "numeric_values": self.report_id,
                              },
                              "duration": {"values": None, "numeric_values": self.duration},
                              "statement_hash": {
                                  "values": self.statement_hash,
                                  "numeric_values": None,
                              },
                              "type": {"values": self.type, "numeric_values": None},
                              "subtype": {"values": self.subtype, "numeric_values": None},
                              "location": {"values": self.location, "numeric_values": None},
                              "parameters": {"values": None, "numeric_values": None},
                          },
                          "tag_list": [
                              "report_id",
                              "duration",
                              "statement_hash",
                              "type",
                              "subtype",
                              "location",
                          ],
                      }
                      if isinstance(self.parameters, str):
                          doc["tags"]["parameters"]["values"] = self.parameters[:255]
                      return doc
                  @property
                  def partition_id(self):
                      return "rcae_sc_%s" % self.report_group_time.strftime("%Y_%m")

backend/src/appenlight/scripts/reindex_elasticsearch.py

0 +167 -48

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              import argparse
              import datetime
              import logging
+             import copy
              import sqlalchemy as sa
              import elasticsearch.exceptions
              import elasticsearch.helpers
              from collections import defaultdict
              from pyramid.paster import setup_logging
              from pyramid.paster import bootstrap
              from appenlight.models import DBSession, Datastores, metadata
              from appenlight.lib import get_callable
              from appenlight.models.report_group import ReportGroup
              from appenlight.models.report import Report
              from appenlight.models.report_stat import ReportStat
              from appenlight.models.log import Log
              from appenlight.models.slow_call import SlowCall
              from appenlight.models.metric import Metric
              log = logging.getLogger(__name__)
              tables = {
                  "slow_calls_p_": [],
                  "reports_stats_p_": [],
                  "reports_p_": [],
                  "reports_groups_p_": [],
                  "logs_p_": [],
                  "metrics_p_": [],
              }
              def detect_tables(table_prefix):
                  found_tables = []
                  db_tables_query = """
                  SELECT tablename FROM pg_tables WHERE tablename NOT LIKE 'pg_%' AND
                  tablename NOT LIKE 'sql_%' ORDER BY tablename ASC;"""
                  for table in DBSession.execute(db_tables_query).fetchall():
                      tablename = table.tablename
                      if tablename.startswith(table_prefix):
                          t = sa.Table(
                              tablename, metadata, autoload=True, autoload_with=DBSession.bind.engine
                          )
                          found_tables.append(t)
                  return found_tables
              def main():
                  """
                  Recreates Elasticsearch indexes
                  Performs reindex of whole db to Elasticsearch
                  """
                  # need parser twice because we first need to load ini file
                  # bootstrap pyramid and then load plugins
                  pre_parser = argparse.ArgumentParser(
                      description="Reindex AppEnlight data", add_help=False
                  )
                  pre_parser.add_argument(
                      "-c", "--config", required=True, help="Configuration ini file of application"
                  )
                  pre_parser.add_argument("-h", "--help", help="Show help", nargs="?")
                  pre_parser.add_argument(
                      "-t", "--types", nargs="+", help="Which parts of database should get reindexed"
                  )
                  args = pre_parser.parse_args()
                  config_uri = args.config
                  setup_logging(config_uri)
                  log.setLevel(logging.INFO)
                  env = bootstrap(config_uri)
                  parser = argparse.ArgumentParser(description="Reindex AppEnlight data")
                  choices = {
                      "reports": "appenlight.scripts.reindex_elasticsearch:reindex_reports",
                      "logs": "appenlight.scripts.reindex_elasticsearch:reindex_logs",
                      "metrics": "appenlight.scripts.reindex_elasticsearch:reindex_metrics",
                      "slow_calls": "appenlight.scripts.reindex_elasticsearch:reindex_slow_calls",
                      "template": "appenlight.scripts.reindex_elasticsearch:update_template",
                  }
                  for k, v in env["registry"].appenlight_plugins.items():
                      if v.get("fulltext_indexer"):
                          choices[k] = v["fulltext_indexer"]
                  parser.add_argument(
                      "-t",
                      "--types",
                      nargs="*",
                      choices=["all"] + list(choices.keys()),
                      default=[],
                      help="Which parts of database should get reindexed",
                  )
                  parser.add_argument(
                      "-c", "--config", required=True, help="Configuration ini file of application"
                  )
                  args = parser.parse_args()
                  if "all" in args.types:
                      args.types = list(choices.keys())
                  print("Selected types to reindex: {}".format(args.types))
                  log.info("settings {}".format(args.types))
                  if "template" in args.types:
                      get_callable(choices["template"])()
                      args.types.remove("template")
                  for selected in args.types:
                      get_callable(choices[selected])()
              def update_template():
                  try:
-                     Datastores.es.indices.delete_template("rcae")
+                     Datastores.es.indices.delete_template("rcae_reports")
+                 except elasticsearch.exceptions.NotFoundError as e:
+                     log.error(e)
+                 try:
+                     Datastores.es.indices.delete_template("rcae_logs")
+                 except elasticsearch.exceptions.NotFoundError as e:
+                     log.error(e)
+                 try:
+                     Datastores.es.indices.delete_template("rcae_slow_calls")
+                 except elasticsearch.exceptions.NotFoundError as e:
+                     log.error(e)
+                 try:
+                     Datastores.es.indices.delete_template("rcae_metrics")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("updating elasticsearch template")
                  tag_templates = [
                      {
                          "values": {
                              "path_match": "tags.*",
                              "mapping": {
                                  "type": "object",
                                  "properties": {
                                      "values": {"type": "text", "analyzer": "tag_value",
                                                 "fields": {
                                                     "keyword": {
                                                         "type": "keyword",
                                                         "ignore_above": 256
                                                     }
                                                 }},
                                      "numeric_values": {"type": "float"},
                                  },
                              },
                          }
                      }
                  ]
-                 template_schema = {
-                     "template": "rcae_*",
-                     "settings": {
-                         "index": {
-                             "refresh_interval": "5s",
-                             "translog": {"sync_interval": "5s", "durability": "async"},
-                         },
-                         "number_of_shards": 5,
-                         "analysis": {
+                 shared_analysis = {
-                             "analyzer": {
-                                 "url_path": {
-                                     "type": "custom",
-                                     "char_filter": [],
-                                     "tokenizer": "path_hierarchy",
-                                     "filter": [],
-                                 },
-                                 "tag_value": {
-                                     "type": "custom",
-                                     "char_filter": [],
-                                     "tokenizer": "keyword",
-                                     "filter": ["lowercase"],
-                                 },
-                             }
+                 }
+                 shared_log_mapping = {
+                     "_all": {"enabled": False},
+                     "dynamic_templates": tag_templates,
+                     "properties": {
+                         "pg_id": {"type": "keyword", "index": True},
+                         "delete_hash": {"type": "keyword", "index": True},
+                         "resource_id": {"type": "integer"},
+                         "timestamp": {"type": "date"},
+                         "permanent": {"type": "boolean"},
+                         "request_id": {"type": "keyword", "index": True},
+                         "log_level": {"type": "text", "analyzer": "simple"},
+                         "message": {"type": "text", "analyzer": "simple"},
+                         "namespace": {
+                             "type": "text",
+                             "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                          },
+                         "tags": {"type": "object"},
+                         "tag_list": {"type": "text", "analyzer": "tag_value",
+                                      "fields": {
+                                          "keyword": {
+                                              "type": "keyword",
+                                              "ignore_above": 256
+                                          }
+                                      }},
+                     },
+                 }
+                 report_schema = {
+                     "template": "rcae_r_*",
+                     "settings": {
+                         "index": {
+                             "refresh_interval": "5s",
+                             "translog": {"sync_interval": "5s", "durability": "async"},
+                             "mapping": {"single_type": True}
+                         },
+                         "number_of_shards": 5,
+                         "analysis": shared_analysis,
                      },
                      "mappings": {
-                         "report_group": {
+                         "report": {
                              "_all": {"enabled": False},
                              "dynamic_templates": tag_templates,
                              "properties": {
-                                 "pg_id": {"type": "keyword", "index": True},
+                                 "type": {"type": "keyword", "index": True},
+                                 # report group
+                                 "group_id": {"type": "keyword", "index": True},
                                  "resource_id": {"type": "integer"},
                                  "priority": {"type": "integer"},
                                  "error": {"type": "text", "analyzer": "simple"},
                                  "read": {"type": "boolean"},
                                  "occurences": {"type": "integer"},
                                  "fixed": {"type": "boolean"},
                                  "first_timestamp": {"type": "date"},
                                  "last_timestamp": {"type": "date"},
                                  "average_duration": {"type": "float"},
                                  "summed_duration": {"type": "float"},
                                  "public": {"type": "boolean"},
-                             },
-                         },
-                         "report": {
-                             "_all": {"enabled": False},
-                             "dynamic_templates": tag_templates,
-                             "properties": {
-                                 "pg_id": {"type": "keyword", "index": True},
-                                 "resource_id": {"type": "integer"},
-                                 "group_id": {"type": "keyword"},
+                                 # report
+                                 "report_id": {"type": "keyword", "index": True},
                                  "http_status": {"type": "integer"},
                                  "ip": {"type": "keyword", "index": True},
                                  "url_domain": {"type": "text", "analyzer": "simple"},
                                  "url_path": {"type": "text", "analyzer": "url_path"},
-                                 "error": {"type": "text", "analyzer": "simple"},
                                  "report_type": {"type": "integer"},
                                  "start_time": {"type": "date"},
                                  "request_id": {"type": "keyword", "index": True},
                                  "end_time": {"type": "date"},
                                  "duration": {"type": "float"},
                                  "tags": {"type": "object"},
                                  "tag_list": {"type": "text", "analyzer": "tag_value",
                                               "fields": {
                                                   "keyword": {
                                                       "type": "keyword",
                                                       "ignore_above": 256
                                                   }
                                               }},
                                  "extra": {"type": "object"},
-                             },
-                             "_parent": {"type": "report_group"},
-                         },
-                         "log": {
-                             "_all": {"enabled": False},
-                             "dynamic_templates": tag_templates,
-                             "properties": {
-                                 "pg_id": {"type": "keyword", "index": True},
-                                 "delete_hash": {"type": "keyword", "index": True},
-                                 "resource_id": {"type": "integer"},
+                                 # report stats
+                                 "report_stat_id": {"type": "keyword", "index": True},
                                  "timestamp": {"type": "date"},
                                  "permanent": {"type": "boolean"},
-                                 "request_id": {"type": "keyword", "index": True},
                                  "log_level": {"type": "text", "analyzer": "simple"},
                                  "message": {"type": "text", "analyzer": "simple"},
                                  "namespace": {
                                      "type": "text",
                                      "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                                  },
-                                 "tags": {"type": "object"},
-                                 "tag_list": {"type": "text", "analyzer": "tag_value",
-                                              "fields": {
-                                                  "keyword": {
-                                                      "type": "keyword",
-                                                      "ignore_above": 256
+                                 "join_field": {
+                                     "type": "join",
+                                     "relations": {
+                                         "report_group": ["report", "report_stat"]
-                                                  }
-                                              }},
+                                 }
                              },
+                         }
+                     }
+                 }
+                 Datastores.es.indices.put_template("rcae_reports", body=report_schema)
+                 logs_mapping = copy.deepcopy(shared_log_mapping)
+                 logs_mapping["properties"]["log_id"] = logs_mapping["properties"]["pg_id"]
+                 del logs_mapping["properties"]["pg_id"]
+                 log_template = {
+                     "template": "rcae_l_*",
+                     "settings": {
+                         "index": {
+                             "refresh_interval": "5s",
+                             "translog": {"sync_interval": "5s", "durability": "async"},
+                             "mapping": {"single_type": True}
+                         },
+                         "number_of_shards": 5,
+                         "analysis": shared_analysis,
-                         },
+                     "mappings": {
+                         "log": logs_mapping,
                      },
                  }
-                 Datastores.es.indices.put_template("rcae", body=template_schema)
+                 Datastores.es.indices.put_template("rcae_logs", body=log_template)
+                 slow_call_mapping = copy.deepcopy(shared_log_mapping)
+                 slow_call_mapping["properties"]["slow_call_id"] = slow_call_mapping["properties"]["pg_id"]
+                 del slow_call_mapping["properties"]["pg_id"]
+                 slow_call_template = {
+                     "template": "rcae_sc_*",
+                     "settings": {
+                         "index": {
+                             "refresh_interval": "5s",
+                             "translog": {"sync_interval": "5s", "durability": "async"},
+                             "mapping": {"single_type": True}
+                         },
+                         "number_of_shards": 5,
+                         "analysis": shared_analysis,
+                     },
+                     "mappings": {
+                         "log": slow_call_mapping,
+                     },
+                 }
+                 Datastores.es.indices.put_template("rcae_slow_calls", body=slow_call_template)
+                 metric_mapping = copy.deepcopy(shared_log_mapping)
+                 metric_mapping["properties"]["metric_id"] = metric_mapping["properties"]["pg_id"]
+                 del metric_mapping["properties"]["pg_id"]
+                 metrics_template = {
+                     "template": "rcae_m_*",
+                     "settings": {
+                         "index": {
+                             "refresh_interval": "5s",
+                             "translog": {"sync_interval": "5s", "durability": "async"},
+                             "mapping": {"single_type": True}
+                         },
+                         "number_of_shards": 5,
+                         "analysis": shared_analysis,
+                     },
+                     "mappings": {
+                         "log": metric_mapping,
+                     },
+                 }
+                 Datastores.es.indices.put_template("rcae_metrics", body=metrics_template)
+                 uptime_metric_mapping = copy.deepcopy(shared_log_mapping)
+                 uptime_metric_mapping["properties"]["uptime_id"] = uptime_metric_mapping["properties"]["pg_id"]
+                 del uptime_metric_mapping["properties"]["pg_id"]
+                 uptime_metrics_template = {
+                     "template": "rcae_uptime_ce_*",
+                     "settings": {
+                         "index": {
+                             "refresh_interval": "5s",
+                             "translog": {"sync_interval": "5s", "durability": "async"},
+                             "mapping": {"single_type": True}
+                         },
+                         "number_of_shards": 5,
+                         "analysis": shared_analysis,
+                     },
+                     "mappings": {
+                         "log": shared_log_mapping,
+                     },
+                 }
+                 Datastores.es.indices.put_template("rcae_uptime_metrics", body=uptime_metrics_template)
              def reindex_reports():
                  reports_groups_tables = detect_tables("reports_groups_p_")
                  try:
-                     Datastores.es.indices.delete("rcae_r*")
+                     Datastores.es.indices.delete("`rcae_r_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("reindexing report groups")
                  i = 0
                  task_start = datetime.datetime.now()
                  for partition_table in reports_groups_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = ReportGroup(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round {}, {}".format(i, name))
                              for k, v in es_docs.items():
-                                 to_update = {"_index": k, "_type": "report_group"}
+                                 to_update = {"_index": k, "_type": "report"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
                  i = 0
                  log.info("reindexing reports")
                  task_start = datetime.datetime.now()
                  reports_tables = detect_tables("reports_p_")
                  for partition_table in reports_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = Report(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "report"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
                  log.info("reindexing reports stats")
                  i = 0
                  task_start = datetime.datetime.now()
                  reports_stats_tables = detect_tables("reports_stats_p_")
                  for partition_table in reports_stats_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              rd = dict(list(row.items()))
                              # remove legacy columns
                              # TODO: remove the column later
                              rd.pop("size", None)
                              item = ReportStat(**rd)
                              i += 1
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
-                                 to_update = {"_index": k, "_type": "log"}
+                                 to_update = {"_index": k, "_type": "report"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              def reindex_logs():
                  try:
-                     Datastores.es.indices.delete("rcae_l*")
+                     Datastores.es.indices.delete("rcae_l_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  # logs
                  log.info("reindexing logs")
                  i = 0
                  task_start = datetime.datetime.now()
                  log_tables = detect_tables("logs_p_")
                  for partition_table in log_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = Log(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "log"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              def reindex_metrics():
                  try:
-                     Datastores.es.indices.delete("rcae_m*")
+                     Datastores.es.indices.delete("rcae_m_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("reindexing applications metrics")
                  i = 0
                  task_start = datetime.datetime.now()
                  metric_tables = detect_tables("metrics_p_")
                  for partition_table in metric_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = Metric(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "log"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              def reindex_slow_calls():
                  try:
-                     Datastores.es.indices.delete("rcae_sc*")
+                     Datastores.es.indices.delete("rcae_sc_*")
                  except elasticsearch.exceptions.NotFoundError as e:
                      log.error(e)
                  log.info("reindexing slow calls")
                  i = 0
                  task_start = datetime.datetime.now()
                  slow_calls_tables = detect_tables("slow_calls_p_")
                  for partition_table in slow_calls_tables:
                      conn = DBSession.connection().execution_options(stream_results=True)
                      result = conn.execute(partition_table.select())
                      while True:
                          chunk = result.fetchmany(2000)
                          if not chunk:
                              break
                          es_docs = defaultdict(list)
                          for row in chunk:
                              i += 1
                              item = SlowCall(**dict(list(row.items())))
                              d_range = item.partition_id
                              es_docs[d_range].append(item.es_doc())
                          if es_docs:
                              name = partition_table.name
                              log.info("round  {}, {}".format(i, name))
                              for k, v in es_docs.items():
                                  to_update = {"_index": k, "_type": "log"}
                                  [i.update(to_update) for i in v]
                                  elasticsearch.helpers.bulk(Datastores.es, v)
                  log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
              if __name__ == "__main__":
                  main()

General Comments 4

vaingmuny

|

Auto status change to "Under Review"

vaingmuny

|

Auto status change to "Under Review"

Write
Preview

You need to be logged in to leave comments. Login now

		Auto status change to \|new_status\|...
		rgrtg
		Hi
		Auto status change to \|new_status\|...

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages