appenlight Commit - r159:61d79e6b · RhodeCode Free Hosting

utils: added get_es_info function

ergo -

r159:61d79e6b

parent child

backend/src/appenlight/lib/utils/__init__.py

0 +10 0

              # -*- coding: utf-8 -*-
              # Copyright 2010 - 2017 RhodeCode GmbH and the AppEnlight project authors
              #
              # Licensed under the Apache License, Version 2.0 (the "License");
              # you may not use this file except in compliance with the License.
              # You may obtain a copy of the License at
              #
              #   http://www.apache.org/licenses/LICENSE-2.0
              #
              # Unless required by applicable law or agreed to in writing, software
              # distributed under the License is distributed on an "AS IS" BASIS,
              # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              # See the License for the specific language governing permissions and
              # limitations under the License.
              """
              Utility functions.
              """
              import logging
              import requests
              import hashlib
              import json
              import copy
              import uuid
              import appenlight.lib.helpers as h
              from collections import namedtuple
              from datetime import timedelta, datetime, date
              from dogpile.cache.api import NO_VALUE
              from appenlight.models import Datastores
              from appenlight.validators import LogSearchSchema, TagListSchema, accepted_search_params
              from itsdangerous import TimestampSigner
              from ziggurat_foundations.permissions import ALL_PERMISSIONS
              from ziggurat_foundations.models.services.user import UserService
              from dateutil.relativedelta import relativedelta
              from dateutil.rrule import rrule, MONTHLY, DAILY
              log = logging.getLogger(__name__)
              Stat = namedtuple("Stat", "start_interval value")
              def default_extractor(item):
                  """
                  :param item - item to extract date from
                  """
                  if hasattr(item, "start_interval"):
                      return item.start_interval
                  return item["start_interval"]
              # fast gap generator
              def gap_gen_default(start, step, itemiterator, end_time=None, iv_extractor=None):
                  """ generates a list of time/value items based on step and itemiterator
                      if there are entries missing from iterator time/None will be returned
                      instead
                  :param start - datetime - what time should we start generating our values
                  :param step - timedelta - stepsize
                  :param itemiterator - iterable - we will check this iterable for values
                  corresponding to generated steps
                  :param end_time - datetime - when last step is >= end_time stop iterating
                  :param iv_extractor - extracts current step from iterable items
                  """
                  if not iv_extractor:
                      iv_extractor = default_extractor
                  next_step = start
                  minutes = step.total_seconds() / 60.0
                  while next_step.minute % minutes != 0:
                      next_step = next_step.replace(minute=next_step.minute - 1)
                  for item in itemiterator:
                      item_start_interval = iv_extractor(item)
                      # do we have a match for current time step in our data?
                      # no gen a new tuple with 0 values
                      while next_step < item_start_interval:
                          yield Stat(next_step, None)
                          next_step = next_step + step
                      if next_step == item_start_interval:
                          yield Stat(item_start_interval, item)
                          next_step = next_step + step
                  if end_time:
                      while next_step < end_time:
                          yield Stat(next_step, None)
                          next_step = next_step + step
              class DateTimeEncoder(json.JSONEncoder):
                  """ Simple datetime to ISO encoder for json serialization"""
                  def default(self, obj):
                      if isinstance(obj, date):
                          return obj.isoformat()
                      if isinstance(obj, datetime):
                          return obj.isoformat()
                      return json.JSONEncoder.default(self, obj)
              def channelstream_request(
                  secret, endpoint, payload, throw_exceptions=False, servers=None
              ):
                  responses = []
                  if not servers:
                      servers = []
                  signer = TimestampSigner(secret)
                  sig_for_server = signer.sign(endpoint)
                  for secret, server in [(s["secret"], s["server"]) for s in servers]:
                      response = {}
                      secret_headers = {
                          "x-channelstream-secret": sig_for_server,
                          "x-channelstream-endpoint": endpoint,
                          "Content-Type": "application/json",
                      }
                      url = "%s%s" % (server, endpoint)
                      try:
                          response = requests.post(
                              url,
                              data=json.dumps(payload, cls=DateTimeEncoder),
                              headers=secret_headers,
                              verify=False,
                              timeout=2,
                          ).json()
                      except requests.exceptions.RequestException as e:
                          if throw_exceptions:
                              raise
                      responses.append(response)
                  return responses
              def add_cors_headers(response):
                  # allow CORS
                  response.headers.add("Access-Control-Allow-Origin", "*")
                  response.headers.add("XDomainRequestAllowed", "1")
                  response.headers.add("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
                  # response.headers.add('Access-Control-Allow-Credentials', 'true')
                  response.headers.add(
                      "Access-Control-Allow-Headers",
                      "Content-Type, Depth, User-Agent, X-File-Size, X-Requested-With, If-Modified-Since, X-File-Name, Cache-Control, Pragma, Origin, Connection, Referer, Cookie",
                  )
                  response.headers.add("Access-Control-Max-Age", "86400")
              from sqlalchemy.sql import compiler
              from psycopg2.extensions import adapt as sqlescape
              # or use the appropiate escape function from your db driver
              def compile_query(query):
                  dialect = query.session.bind.dialect
                  statement = query.statement
                  comp = compiler.SQLCompiler(dialect, statement)
                  comp.compile()
                  enc = dialect.encoding
                  params = {}
                  for k, v in comp.params.items():
                      if isinstance(v, str):
                          v = v.encode(enc)
                      params[k] = sqlescape(v)
                  return (comp.string.encode(enc) % params).decode(enc)
              def convert_es_type(input_data):
                  """
                  This might need to convert some text or other types to corresponding ES types
                  """
                  return str(input_data)
              ProtoVersion = namedtuple("ProtoVersion", ["major", "minor", "patch"])
              def parse_proto(input_data):
                  try:
                      parts = [int(x) for x in input_data.split(".")]
                      while len(parts) < 3:
                          parts.append(0)
                      return ProtoVersion(*parts)
                  except Exception as e:
                      log.info("Unknown protocol version: %s" % e)
                  return ProtoVersion(99, 99, 99)
              def es_index_name_limiter(
                  start_date=None, end_date=None, months_in_past=6, ixtypes=None
              ):
                  """
                  This function limits the search to 6 months by default so we don't have to
                  query 300 elasticsearch indices for 20 years of historical data for example
                  """
                  # should be cached later
                  def get_possible_names():
                      return list(Datastores.es.indices.get_alias("*"))
                  possible_names = get_possible_names()
                  es_index_types = []
                  if not ixtypes:
                      ixtypes = ["reports", "metrics", "logs"]
                  for t in ixtypes:
                      if t == "reports":
                          es_index_types.append("rcae_r_%s")
                      elif t == "logs":
                          es_index_types.append("rcae_l_%s")
                      elif t == "metrics":
                          es_index_types.append("rcae_m_%s")
                      elif t == "uptime":
                          es_index_types.append("rcae_u_%s")
                      elif t == "slow_calls":
                          es_index_types.append("rcae_sc_%s")
                  if start_date:
                      start_date = copy.copy(start_date)
                  else:
                      if not end_date:
                          end_date = datetime.utcnow()
                      start_date = end_date + relativedelta(months=months_in_past * -1)
                  if not end_date:
                      end_date = start_date + relativedelta(months=months_in_past)
                  index_dates = list(
                      rrule(
                          MONTHLY,
                          dtstart=start_date.date().replace(day=1),
                          until=end_date.date(),
                          count=36,
                      )
                  )
                  index_names = []
                  for ix_type in es_index_types:
                      to_extend = [
                          ix_type % d.strftime("%Y_%m")
                          for d in index_dates
                          if ix_type % d.strftime("%Y_%m") in possible_names
                      ]
                      index_names.extend(to_extend)
                      for day in list(
                          rrule(DAILY, dtstart=start_date.date(), until=end_date.date(), count=366)
                      ):
                          ix_name = ix_type % day.strftime("%Y_%m_%d")
                          if ix_name in possible_names:
                              index_names.append(ix_name)
                  return index_names
              def build_filter_settings_from_query_dict(
                  request, params=None, override_app_ids=None, resource_permissions=None
              ):
                  """
                  Builds list of normalized search terms for ES from query params
                  ensuring application list is restricted to only applications user
                  has access to
                  :param params (dictionary)
                  :param override_app_ids - list of application id's to use instead of
                  applications user normally has access to
                  """
                  params = copy.deepcopy(params)
                  applications = []
                  if not resource_permissions:
                      resource_permissions = ["view"]
                  if request.user:
                      applications = UserService.resources_with_perms(
                          request.user, resource_permissions, resource_types=["application"]
                      )
                  # CRITICAL - this ensures our resultset is limited to only the ones
                  # user has view permissions
                  all_possible_app_ids = set([app.resource_id for app in applications])
                  # if override is preset we force permission for app to be present
                  # this allows users to see dashboards and applications they would
                  # normally not be able to
                  if override_app_ids:
                      all_possible_app_ids = set(override_app_ids)
                  schema = LogSearchSchema().bind(resources=all_possible_app_ids)
                  tag_schema = TagListSchema()
                  filter_settings = schema.deserialize(params)
                  tag_list = []
                  for k, v in list(filter_settings.items()):
                      if k in accepted_search_params:
                          continue
                      tag_list.append({"name": k, "value": v, "op": "eq"})
                      # remove the key from filter_settings
                      filter_settings.pop(k, None)
                  tags = tag_schema.deserialize(tag_list)
                  filter_settings["tags"] = tags
                  return filter_settings
              def gen_uuid():
                  return str(uuid.uuid4())
              def gen_uuid4_sha_hex():
                  return hashlib.sha1(uuid.uuid4().bytes).hexdigest()
              def permission_tuple_to_dict(data):
                  out = {
                      "user_name": None,
                      "perm_name": data.perm_name,
                      "owner": data.owner,
                      "type": data.type,
                      "resource_name": None,
                      "resource_type": None,
                      "resource_id": None,
                      "group_name": None,
                      "group_id": None,
                  }
                  if data.user:
                      out["user_name"] = data.user.user_name
                  if data.perm_name == ALL_PERMISSIONS:
                      out["perm_name"] = "__all_permissions__"
                  if data.resource:
                      out["resource_name"] = data.resource.resource_name
                      out["resource_type"] = data.resource.resource_type
                      out["resource_id"] = data.resource.resource_id
                  if data.group:
                      out["group_name"] = data.group.group_name
                      out["group_id"] = data.group.id
                  return out
              def get_cached_buckets(
                  request,
                  stats_since,
                  end_time,
                  fn,
                  cache_key,
                  gap_gen=None,
                  db_session=None,
                  step_interval=None,
                  iv_extractor=None,
                  rerange=False,
                  *args,
                  **kwargs
              ):
                  """ Takes "fn" that should return some data and tries to load the data
                  dividing it into daily buckets - if the stats_since and end time give a
                  delta bigger than 24hours, then only "todays" data is computed on the fly
                  :param request: (request) request object
                  :param stats_since: (datetime) start date of buckets range
                  :param end_time: (datetime) end date of buckets range - utcnow() if None
                  :param fn: (callable) callable to use to populate buckets should have
                  following signature:
                      def get_data(request, since_when, until, *args, **kwargs):
                  :param cache_key: (string) cache key that will be used to build bucket
                  caches
                  :param gap_gen: (callable) gap generator - should return step intervals
                  to use with out `fn` callable
                  :param db_session: (Session) sqlalchemy session
                  :param step_interval: (timedelta) optional step interval if we want to
                  override the default determined from total start/end time delta
                  :param iv_extractor: (callable) used to get step intervals from data
                  returned by `fn` callable
                  :param rerange: (bool) handy if we want to change ranges from hours to
                  days when cached data is missing - will shorten execution time if `fn`
                  callable supports that and we are working with multiple rows - like metrics
                  :param args:
                  :param kwargs:
                  :return: iterable
                  """
                  if not end_time:
                      end_time = datetime.utcnow().replace(second=0, microsecond=0)
                  delta = end_time - stats_since
                  # if smaller than 3 days we want to group by 5min else by 1h,
                  # for 60 min group by min
                  if not gap_gen:
                      gap_gen = gap_gen_default
                  if not iv_extractor:
                      iv_extractor = default_extractor
                  # do not use custom interval if total time range with new iv would exceed
                  # end time
                  if not step_interval or stats_since + step_interval >= end_time:
                      if delta < h.time_deltas.get("12h")["delta"]:
                          step_interval = timedelta(seconds=60)
                      elif delta < h.time_deltas.get("3d")["delta"]:
                          step_interval = timedelta(seconds=60 * 5)
                      elif delta > h.time_deltas.get("2w")["delta"]:
                          step_interval = timedelta(days=1)
                      else:
                          step_interval = timedelta(minutes=60)
                  if step_interval >= timedelta(minutes=60):
                      log.info(
                          "cached_buckets:{}: adjusting start time "
                          "for hourly or daily intervals".format(cache_key)
                      )
                      stats_since = stats_since.replace(hour=0, minute=0)
                  ranges = [
                      i.start_interval
                      for i in list(gap_gen(stats_since, step_interval, [], end_time=end_time))
                  ]
                  buckets = {}
                  storage_key = "buckets:" + cache_key + "{}|{}"
                  # this means we basicly cache per hour in 3-14 day intervals but i think
                  # its fine at this point - will be faster than db access anyways
                  if len(ranges) >= 1:
                      last_ranges = [ranges[-1]]
                  else:
                      last_ranges = []
                  if step_interval >= timedelta(minutes=60):
                      for r in ranges:
                          k = storage_key.format(step_interval.total_seconds(), r)
                          value = request.registry.cache_regions.redis_day_30.get(k)
                          # last buckets are never loaded from cache
                          is_last_result = r >= end_time - timedelta(hours=6) or r in last_ranges
                          if value is not NO_VALUE and not is_last_result:
                              log.info(
                                  "cached_buckets:{}: "
                                  "loading range {} from cache".format(cache_key, r)
                              )
                              buckets[r] = value
                          else:
                              log.info(
                                  "cached_buckets:{}: "
                                  "loading range {} from storage".format(cache_key, r)
                              )
                              range_size = step_interval
                              if (
                                  step_interval == timedelta(minutes=60)
                                  and not is_last_result
                                  and rerange
                              ):
                                  range_size = timedelta(days=1)
                                  r = r.replace(hour=0, minute=0)
                                  log.info(
                                      "cached_buckets:{}: "
                                      "loading collapsed "
                                      "range {} {}".format(cache_key, r, r + range_size)
                                  )
                              bucket_data = fn(
                                  request,
                                  r,
                                  r + range_size,
                                  step_interval,
                                  gap_gen,
                                  bucket_count=len(ranges),
                                  *args,
                                  **kwargs
                              )
                              for b in bucket_data:
                                  b_iv = iv_extractor(b)
                                  buckets[b_iv] = b
                                  k2 = storage_key.format(step_interval.total_seconds(), b_iv)
                                  request.registry.cache_regions.redis_day_30.set(k2, b)
                      log.info("cached_buckets:{}: saving cache".format(cache_key))
                  else:
                      # bucket count is 1 for short time ranges <= 24h from now
                      bucket_data = fn(
                          request,
                          stats_since,
                          end_time,
                          step_interval,
                          gap_gen,
                          bucket_count=1,
                          *args,
                          **kwargs
                      )
                      for b in bucket_data:
                          buckets[iv_extractor(b)] = b
                  return buckets
              def get_cached_split_data(
                  request, stats_since, end_time, fn, cache_key, db_session=None, *args, **kwargs
              ):
                  """ Takes "fn" that should return some data and tries to load the data
                  dividing it into 2 buckets - cached "since_from" bucket and "today"
                  bucket - then the data can be reduced into single value
                  Data is cached if the stats_since and end time give a delta bigger
                  than 24hours - then only 24h is computed on the fly
                  """
                  if not end_time:
                      end_time = datetime.utcnow().replace(second=0, microsecond=0)
                  delta = end_time - stats_since
                  if delta >= timedelta(minutes=60):
                      log.info(
                          "cached_split_data:{}: adjusting start time "
                          "for hourly or daily intervals".format(cache_key)
                      )
                      stats_since = stats_since.replace(hour=0, minute=0)
                  storage_key = "buckets_split_data:" + cache_key + ":{}|{}"
                  old_end_time = end_time.replace(hour=0, minute=0)
                  final_storage_key = storage_key.format(delta.total_seconds(), old_end_time)
                  older_data = None
                  cdata = request.registry.cache_regions.redis_day_7.get(final_storage_key)
                  if cdata:
                      log.info("cached_split_data:{}: found old " "bucket data".format(cache_key))
                      older_data = cdata
                  if stats_since < end_time - h.time_deltas.get("24h")["delta"] and not cdata:
                      log.info(
                          "cached_split_data:{}: didn't find the "
                          "start bucket in cache so load older data".format(cache_key)
                      )
                      recent_stats_since = old_end_time
                      older_data = fn(
                          request,
                          stats_since,
                          recent_stats_since,
                          db_session=db_session,
                          *args,
                          **kwargs
                      )
                      request.registry.cache_regions.redis_day_7.set(final_storage_key, older_data)
                  elif stats_since < end_time - h.time_deltas.get("24h")["delta"]:
                      recent_stats_since = old_end_time
                  else:
                      recent_stats_since = stats_since
                  log.info(
                      "cached_split_data:{}: loading fresh "
                      "data bucksts from last 24h ".format(cache_key)
                  )
                  todays_data = fn(
                      request, recent_stats_since, end_time, db_session=db_session, *args, **kwargs
                  )
                  return older_data, todays_data
              def in_batches(seq, size):
                  """
                  Splits am iterable into batches of specified size
                  :param seq (iterable)
                  :param size integer
                  """
                  return (seq[pos : pos + size] for pos in range(0, len(seq), size))
+             def get_es_info(cache_regions, es_conn):
+                 @cache_regions.memory_min_10.cache_on_arguments()
+                 def get_es_info_cached():
+                     returned_info = {"raw_info": es_conn.info()}
+                     returned_info["version"] = returned_info["raw_info"]["version"]["number"].split('.')
+                     return returned_info
+                 return get_es_info_cached()

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages