openreplay/ee/api/chalicelib/core/metrics.py
Kraiem Taha Yassine d2697061e9
Dev (#2722)
* fix(chalice): fixed Math-operators validation
refactor(chalice): search for sessions that have events for heatmaps

* refactor(chalice): search for sessions that have at least 1 location event for heatmaps

* fix(chalice): fixed Math-operators validation
refactor(chalice): search for sessions that have events for heatmaps

* refactor(chalice): search for sessions that have at least 1 location event for heatmaps

* feat(chalice): autocomplete return top 10 with stats

* fix(chalice): fixed autocomplete top 10 meta-filters

* refactor(DB): changed dashboard&metrics constraints

* refactor(chalice): removed cards
refactor(chalice): removed code related to resources
refactor(DB): removed cards
refactor(DB): removed code related to resources
2024-11-04 17:57:14 +01:00

1010 lines
50 KiB
Python

import logging
import math
from math import isnan
import schemas
from chalicelib.utils import args_transformer
from chalicelib.utils import ch_client
from chalicelib.utils import exp_ch_helper
from chalicelib.utils import helper
from chalicelib.utils.TimeUTC import TimeUTC
from chalicelib.utils.metrics_helper import __get_step_size
logger = logging.getLogger(__name__)
def __get_basic_constraints(table_name=None, time_constraint=True, round_start=False, data={}, identifier="project_id"):
if table_name:
table_name += "."
else:
table_name = ""
ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"]
if time_constraint:
if round_start:
ch_sub_query.append(
f"toStartOfInterval({table_name}datetime, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)")
else:
ch_sub_query.append(f"{table_name}datetime >= toDateTime(%(startTimestamp)s/1000)")
ch_sub_query.append(f"{table_name}datetime < toDateTime(%(endTimestamp)s/1000)")
return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name)
def __frange(start, stop, step):
result = []
i = start
while i < stop:
result.append(i)
i += step
return result
def __add_missing_keys(original, complete):
for missing in [key for key in complete.keys() if key not in original.keys()]:
original[missing] = complete[missing]
return original
def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000):
if len(rows) == density:
return rows
step = __get_step_size(start_time, end_time, density, decimal=True)
optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in
__frange(start_time // time_coefficient, end_time // time_coefficient, step)]
result = []
r = 0
o = 0
for i in range(density):
neutral_clone = dict(neutral)
for k in neutral_clone.keys():
if callable(neutral_clone[k]):
neutral_clone[k] = neutral_clone[k]()
if r < len(rows) and len(result) + len(rows) - r == density:
result += rows[r:]
break
if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]:
# complete missing keys in original object
rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone)
result.append(rows[r])
r += 1
elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]:
# complete missing keys in original object
rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone)
result.append(rows[r])
r += 1
o += 1
else:
neutral_clone[time_key] = optimal[o][0]
result.append(neutral_clone)
o += 1
# elif r < len(rows) and rows[r][time_key] >= optimal[o][1]:
# neutral_clone[time_key] = optimal[o][0]
# result.append(neutral_clone)
# o += 1
# else:
# neutral_clone[time_key] = optimal[o][0]
# result.append(neutral_clone)
# o += 1
return result
def __merge_charts(list1, list2, time_key="timestamp"):
if len(list1) != len(list2):
raise Exception("cannot merge unequal lists")
result = []
for i in range(len(list1)):
timestamp = min(list1[i][time_key], list2[i][time_key])
result.append({**list1[i], **list2[i], time_key: timestamp})
return result
def __get_constraint(data, fields, table_name):
constraints = []
# for k in fields.keys():
for i, f in enumerate(data.get("filters", [])):
if f["key"] in fields.keys():
if f["value"] in ["*", ""]:
constraints.append(f"isNotNull({table_name}{fields[f['key']]})")
else:
constraints.append(f"{table_name}{fields[f['key']]} = %({f['key']}_{i})s")
# TODO: remove this in next release
offset = len(data.get("filters", []))
for i, f in enumerate(data.keys()):
if f in fields.keys():
if data[f] in ["*", ""]:
constraints.append(f"isNotNull({table_name}{fields[f]})")
else:
constraints.append(f"{table_name}{fields[f]} = %({f}_{i + offset})s")
return constraints
def __get_constraint_values(data):
params = {}
for i, f in enumerate(data.get("filters", [])):
params[f"{f['key']}_{i}"] = f["value"]
# TODO: remove this in next release
offset = len(data.get("filters", []))
for i, f in enumerate(data.keys()):
params[f"{f}_{i + offset}"] = data[f]
return params
METADATA_FIELDS = {"userId": "user_id",
"userAnonymousId": "user_anonymous_id",
"metadata1": "metadata_1",
"metadata2": "metadata_2",
"metadata3": "metadata_3",
"metadata4": "metadata_4",
"metadata5": "metadata_5",
"metadata6": "metadata_6",
"metadata7": "metadata_7",
"metadata8": "metadata_8",
"metadata9": "metadata_9",
"metadata10": "metadata_10"}
def __get_meta_constraint(data):
return __get_constraint(data=data, fields=METADATA_FIELDS, table_name="sessions_metadata.")
SESSIONS_META_FIELDS = {"revId": "rev_id",
"country": "user_country",
"os": "user_os",
"platform": "user_device_type",
"device": "user_device",
"browser": "user_browser"}
def __get_generic_constraint(data, table_name):
return __get_constraint(data=data, fields=SESSIONS_META_FIELDS, table_name=table_name)
def get_processed_sessions(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(),
density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args)
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""\
SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
COUNT(DISTINCT sessions.session_id) AS value
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;\
"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
results = {
"value": sum([r["value"] for r in rows]),
"chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp,
density=density,
neutral={"value": 0})
}
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
ch_query = f""" SELECT COUNT(1) AS count
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query)};"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
count = ch.execute(query=ch_query, params=params)
count = count[0]["count"]
results["progress"] = helper.__progress(old_val=count, new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results
def get_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(),
density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="errors", data=args)
ch_sub_query.append("errors.event_type = 'ERROR'")
ch_sub_query.append("errors.source = 'js_exception'")
ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args)
ch_sub_query_chart.append("errors.event_type = 'ERROR'")
ch_sub_query_chart.append("errors.source = 'js_exception'")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""\
SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
COUNT(DISTINCT errors.session_id) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;\
"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
results = {
"count": 0 if len(rows) == 0 else __count_distinct_errors(ch, project_id, startTimestamp, endTimestamp,
ch_sub_query),
"impactedSessions": sum([r["count"] for r in rows]),
"chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp,
density=density,
neutral={"count": 0})
}
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
count = __count_distinct_errors(ch, project_id, startTimestamp, endTimestamp, ch_sub_query,
meta=len(meta_condition) > 0, **args)
results["progress"] = helper.__progress(old_val=count, new_val=results["count"])
return results
def __count_distinct_errors(ch, project_id, startTimestamp, endTimestamp, ch_sub_query, meta=False, **args):
ch_query = f"""\
SELECT
COUNT(DISTINCT errors.message) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE {" AND ".join(ch_sub_query)};"""
count = ch.execute(query=ch_query,
params={"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)})
if count is not None and len(count) > 0:
return count[0]["count"]
return 0
def get_errors_trend(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(),
density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="errors", data=args)
ch_sub_query.append("errors.event_type='ERROR'")
ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args)
ch_sub_query_chart.append("errors.event_type='ERROR'")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT *
FROM (SELECT errors.error_id AS error_id,
errors.message AS error,
COUNT(1) AS count,
COUNT(DISTINCT errors.session_id) AS sessions
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE {" AND ".join(ch_sub_query)}
GROUP BY errors.error_id, errors.message) AS errors_chart
INNER JOIN (SELECT error_id AS error_id,
toUnixTimestamp(MAX(datetime))*1000 AS lastOccurrenceAt,
toUnixTimestamp(MIN(datetime))*1000 AS firstOccurrenceAt
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE event_type='ERROR' AND project_id=%(project_id)s
GROUP BY error_id) AS errors_time USING(error_id)
ORDER BY sessions DESC, count DESC LIMIT 10;"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
# print(f"got {len(rows)} rows")
if len(rows) == 0:
return []
error_ids = [r["error_id"] for r in rows]
ch_sub_query.append("error_id = %(error_id)s")
errors = {}
for error_id in error_ids:
ch_query = f"""\
SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;"""
params["error_id"] = error_id
errors[error_id] = ch.execute(query=ch_query, params=params)
for row in rows:
row["startTimestamp"] = startTimestamp
row["endTimestamp"] = endTimestamp
row["chart"] = __complete_missing_steps(rows=errors[row["error_id"]], start_time=startTimestamp,
end_time=endTimestamp,
density=density,
neutral={"count": 0})
return rows
def get_page_metrics(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
with ch_client.ClickHouseClient() as ch:
rows = __get_page_metrics(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
results = helper.dict_to_camel_case(rows[0])
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
rows = __get_page_metrics(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
previous = helper.dict_to_camel_case(rows[0])
for key in previous.keys():
results[key + "Progress"] = helper.__progress(old_val=previous[key], new_val=results[key])
return results
def __get_page_metrics(ch, project_id, startTimestamp, endTimestamp, **args):
ch_sub_query = __get_basic_constraints(table_name="pages", data=args)
ch_sub_query.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query.append("(pages.dom_content_loaded_event_end>0 OR pages.first_contentful_paint_time>0)")
# changed dom_content_loaded_event_start to dom_content_loaded_event_end
ch_query = f"""SELECT COALESCE(avgOrNull(NULLIF(pages.dom_content_loaded_event_end ,0)),0) AS avg_dom_content_load_start,
COALESCE(avgOrNull(NULLIF(pages.first_contentful_paint_time,0)),0) AS avg_first_contentful_pixel
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query)};"""
params = {"project_id": project_id, "type": 'fetch', "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
return rows
def get_user_activity(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
results = {}
with ch_client.ClickHouseClient() as ch:
rows = __get_user_activity(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
results = helper.dict_to_camel_case(rows[0])
for key in results:
if isnan(results[key]):
results[key] = 0
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
rows = __get_user_activity(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
previous = helper.dict_to_camel_case(rows[0])
for key in previous:
results[key + "Progress"] = helper.__progress(old_val=previous[key], new_val=results[key])
return results
def __get_user_activity(ch, project_id, startTimestamp, endTimestamp, **args):
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query.append("(sessions.pages_count>0 OR sessions.duration>0)")
ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(NULLIF(sessions.pages_count,0))),0) AS avg_visited_pages,
COALESCE(avgOrNull(NULLIF(sessions.duration,0)),0) AS avg_session_duration
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query)};"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
return rows
RESOURCS_TYPE_TO_DB_TYPE = {
"img": "IMG",
"fetch": "REQUEST",
"stylesheet": "STYLESHEET",
"script": "SCRIPT",
"other": "OTHER",
"media": "MEDIA"
}
def __get_resource_type_from_db_type(db_type):
db_type = db_type.lower()
return RESOURCS_TYPE_TO_DB_TYPE.get(db_type, db_type)
def __get_resource_db_type_from_type(resource_type):
resource_type = resource_type.upper()
return {v: k for k, v in RESOURCS_TYPE_TO_DB_TYPE.items()}.get(resource_type, resource_type)
KEYS = {
'startTimestamp': args_transformer.int_arg,
'endTimestamp': args_transformer.int_arg,
'density': args_transformer.int_arg,
'performanceDensity': args_transformer.int_arg,
'platform': args_transformer.string
}
def dashboard_args(params):
args = {}
if params is not None:
for key in params.keys():
if key in KEYS.keys():
args[key] = KEYS[key](params.get(key))
return args
def get_sessions_location(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT user_country, COUNT(1) AS count
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query)}
GROUP BY user_country
ORDER BY user_country;"""
params = {"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
return {"count": sum(i["count"] for i in rows), "chart": helper.list_to_camel_case(rows)}
def get_top_metrics(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), value=None, **args):
ch_sub_query = __get_basic_constraints(table_name="pages", data=args)
ch_sub_query.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
if value is not None:
ch_sub_query.append("pages.url_path = %(value)s")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT COALESCE(avgOrNull(if(pages.response_time>0,pages.response_time,null)),0) AS avg_response_time,
COALESCE(avgOrNull(if(pages.first_paint>0,pages.first_paint,null)),0) AS avg_first_paint,
COALESCE(avgOrNull(if(pages.dom_content_loaded_event_time>0,pages.dom_content_loaded_event_time,null)),0) AS avg_dom_content_loaded,
COALESCE(avgOrNull(if(pages.ttfb>0,pages.ttfb,null)),0) AS avg_till_first_bit,
COALESCE(avgOrNull(if(pages.time_to_interactive>0,pages.time_to_interactive,null)),0) AS avg_time_to_interactive,
(SELECT COUNT(1) FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages WHERE {" AND ".join(ch_sub_query)}) AS count_requests
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query)}
AND (isNotNull(pages.response_time) AND pages.response_time>0 OR
isNotNull(pages.first_paint) AND pages.first_paint>0 OR
isNotNull(pages.dom_content_loaded_event_time) AND pages.dom_content_loaded_event_time>0 OR
isNotNull(pages.ttfb) AND pages.ttfb>0 OR
isNotNull(pages.time_to_interactive) AND pages.time_to_interactive >0);"""
params = {"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp,
"value": value, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
return helper.dict_to_camel_case(rows[0])
def __get_domains_errors_neutral(rows):
neutral = {l: 0 for l in [i for k in [list(v.keys()) for v in rows] for i in k]}
if len(neutral.keys()) == 0:
neutral = {"All": 0}
return neutral
def __merge_rows_with_neutral(rows, neutral):
for i in range(len(rows)):
rows[i] = {**neutral, **rows[i]}
return rows
def get_domains_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=6, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args)
ch_sub_query.append("requests.event_type='REQUEST'")
ch_sub_query.append("intDiv(requests.status, 100) == %(status_code)s")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT timestamp,
groupArray([domain, toString(count)]) AS keys
FROM (SELECT toUnixTimestamp(toStartOfInterval(requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
requests.url_host AS domain, COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests
WHERE {" AND ".join(ch_sub_query)}
GROUP BY timestamp,requests.url_host
ORDER BY timestamp, count DESC
LIMIT 5 BY timestamp) AS domain_stats
GROUP BY timestamp;"""
params = {"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp,
"step_size": step_size,
"status_code": 4, **__get_constraint_values(args)}
# print(ch.format(query=ch_query, params=params))
rows = ch.execute(query=ch_query, params=params)
rows = __nested_array_to_dict_array(rows)
neutral = __get_domains_errors_neutral(rows)
rows = __merge_rows_with_neutral(rows, neutral)
result = {"4xx": __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral=neutral)}
params["status_code"] = 5
rows = ch.execute(query=ch_query, params=params)
rows = __nested_array_to_dict_array(rows)
neutral = __get_domains_errors_neutral(rows)
rows = __merge_rows_with_neutral(rows, neutral)
result["5xx"] = __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral=neutral)
return result
def __get_domains_errors_4xx_and_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=6, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args)
ch_sub_query.append("requests.event_type='REQUEST'")
ch_sub_query.append("intDiv(requests.status, 100) == %(status_code)s")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT timestamp,
groupArray([domain, toString(count)]) AS keys
FROM (SELECT toUnixTimestamp(toStartOfInterval(requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
requests.url_host AS domain, COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests
WHERE {" AND ".join(ch_sub_query)}
GROUP BY timestamp,requests.url_host
ORDER BY timestamp, count DESC
LIMIT 5 BY timestamp) AS domain_stats
GROUP BY timestamp;"""
params = {"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp,
"step_size": step_size,
"status_code": status, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
rows = __nested_array_to_dict_array(rows)
neutral = __get_domains_errors_neutral(rows)
rows = __merge_rows_with_neutral(rows, neutral)
return __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral=neutral)
def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=6, **args):
return __get_domains_errors_4xx_and_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp,
endTimestamp=endTimestamp, density=density, **args)
def get_domains_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=6, **args):
return __get_domains_errors_4xx_and_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp,
endTimestamp=endTimestamp, density=density, **args)
def __nested_array_to_dict_array(rows):
for r in rows:
for i in range(len(r["keys"])):
r[r["keys"][i][0]] = int(r["keys"][i][1])
r.pop("keys")
return rows
def get_errors_per_domains(project_id, limit, page, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
ch_sub_query = __get_basic_constraints(table_name="requests", data=args)
ch_sub_query.append("requests.event_type = 'REQUEST'")
ch_sub_query.append("requests.success = 0")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
params = {"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp,
**__get_constraint_values(args),
"limit_s": (page - 1) * limit,
"limit": limit}
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT
requests.url_host AS domain,
COUNT(1) AS errors_count,
COUNT(1) OVER () AS total,
SUM(errors_count) OVER () AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests
WHERE {" AND ".join(ch_sub_query)}
GROUP BY requests.url_host
ORDER BY errors_count DESC
LIMIT %(limit)s OFFSET %(limit_s)s;"""
logger.debug("-----------")
logger.debug(ch.format(query=ch_query, params=params))
logger.debug("-----------")
rows = ch.execute(query=ch_query, params=params)
response = {"count": 0, "total": 0, "values": []}
if len(rows) > 0:
response["count"] = rows[0]["count"]
response["total"] = rows[0]["total"]
rows = helper.list_to_camel_case(rows)
for r in rows:
r.pop("count")
r.pop("total")
return response
def __get_calls_errors_4xx_or_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(),
platform=None, **args):
ch_sub_query = __get_basic_constraints(table_name="requests", data=args)
ch_sub_query.append("requests.event_type = 'REQUEST'")
ch_sub_query.append(f"intDiv(requests.status, 100) == {status}")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT requests.method,
requests.url_hostpath,
COUNT(1) AS all_requests
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests
WHERE {" AND ".join(ch_sub_query)}
GROUP BY requests.method, requests.url_hostpath
ORDER BY all_requests DESC
LIMIT 10;"""
params = {"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(ch.format(query=ch_query, params=params))
rows = ch.execute(query=ch_query, params=params)
return helper.list_to_camel_case(rows)
def get_calls_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(),
platform=None, **args):
return __get_calls_errors_4xx_or_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp,
endTimestamp=endTimestamp,
platform=platform, **args)
def get_calls_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(),
platform=None, **args):
return __get_calls_errors_4xx_or_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp,
endTimestamp=endTimestamp,
platform=platform, **args)
def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(),
platform=None, density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query_chart = __get_basic_constraints(table_name="events", round_start=True,
data=args)
ch_sub_query_chart.append("(events.event_type = 'REQUEST' OR events.event_type = 'ERROR')")
ch_sub_query_chart.append("(events.status>200 OR events.event_type = 'ERROR')")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
SUM(events.event_type = 'REQUEST' AND intDiv(events.status, 100) == 4) AS _4xx,
SUM(events.event_type = 'REQUEST' AND intDiv(events.status, 100) == 5) AS _5xx,
SUM(events.event_type = 'ERROR' AND events.source == 'js_exception') AS js,
SUM(events.event_type = 'ERROR' AND events.source != 'js_exception') AS integrations
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS events
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;"""
params = {"step_size": step_size,
"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(ch.format(query=ch_query, params=params))
rows = ch.execute(query=ch_query, params=params)
rows = helper.list_to_camel_case(rows)
return __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density,
neutral={"4xx": 0, "5xx": 0, "js": 0, "integrations": 0})
def get_impacted_sessions_by_js_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args)
ch_sub_query_chart.append("errors.event_type='ERROR'")
ch_sub_query_chart.append("errors.source == 'js_exception'")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
COUNT(DISTINCT errors.session_id) AS sessions_count,
COUNT(DISTINCT errors.error_id) AS errors_count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;;"""
rows = ch.execute(query=ch_query,
params={"step_size": step_size,
"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)})
ch_query = f"""SELECT COUNT(DISTINCT errors.session_id) AS sessions_count,
COUNT(DISTINCT errors.error_id) AS errors_count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors
WHERE {" AND ".join(ch_sub_query_chart)};"""
counts = ch.execute(query=ch_query,
params={"step_size": step_size,
"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)})
return {"sessionsCount": counts[0]["sessions_count"],
"errorsCount": counts[0]["errors_count"],
"chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density,
neutral={"sessions_count": 0,
"errors_count": 0}))}
def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args)
ch_sub_query.append("requests.event_type='REQUEST'")
ch_sub_query.append("requests.success = 0")
sch_sub_query = ["rs.project_id =toUInt16(%(project_id)s)", "rs.event_type='REQUEST'"]
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
# sch_sub_query += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
SUM(first.url_host = sub_requests.url_host) AS first_party,
SUM(first.url_host != sub_requests.url_host) AS third_party
FROM
(
SELECT requests.datetime, requests.url_host
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests
WHERE {" AND ".join(ch_sub_query)}
) AS sub_requests
CROSS JOIN
(
SELECT
rs.url_host,
COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS rs
WHERE {" AND ".join(sch_sub_query)}
GROUP BY rs.url_host
ORDER BY count DESC
LIMIT 1
) AS first
GROUP BY timestamp
ORDER BY timestamp;"""
params = {"step_size": step_size,
"project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(ch.format(query=ch_query, params=params))
rows = ch.execute(query=ch_query, params=params)
return helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density,
neutral={"first_party": 0,
"third_party": 0}))
def get_performance_avg_page_load_time(ch, project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(),
density=19, resources=None, **args):
step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density)
location_constraints = []
meta_condition = __get_meta_constraint(args)
location_constraints_vals = {}
if resources and len(resources) > 0:
for r in resources:
if r["type"] == "LOCATION":
location_constraints.append(f"pages.url_path = %(val_{len(location_constraints)})s")
location_constraints_vals["val_" + str(len(location_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}
ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True,
data=args)
ch_sub_query_chart.append("pages.event_type='LOCATION'")
ch_sub_query_chart += meta_condition
ch_sub_query_chart.append("pages.load_event_end>0")
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp,
COALESCE(avgOrNull(pages.load_event_end),0) AS value
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query_chart)}
{(f' AND ({" OR ".join(location_constraints)})') if len(location_constraints) > 0 else ""}
GROUP BY timestamp
ORDER BY timestamp;"""
rows = ch.execute(query=ch_query, params={**params, **location_constraints_vals, **__get_constraint_values(args)})
pages = __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral={"value": 0})
# for s in pages:
# for k in s:
# if s[k] is None:
# s[k] = 0
return pages
def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
results = {}
with ch_client.ClickHouseClient() as ch:
rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
results = helper.dict_to_camel_case(rows[0])
for key in results:
if isnan(results[key]):
results[key] = 0
results["chart"] = __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp,
endTimestamp, **args)
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
previous = helper.dict_to_camel_case(rows[0])
results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results
def __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args):
ch_sub_query = __get_basic_constraints(table_name="pages", data=args)
ch_sub_query.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(count)),0) AS value
FROM (SELECT COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query)}
GROUP BY session_id) AS groupped_data
WHERE count>0;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
return rows
def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, endTimestamp, density=20, **args):
step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density)
ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args)
ch_sub_query_chart.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
ch_query = f"""SELECT timestamp, COALESCE(avgOrNull(count), 0) AS value
FROM (SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp,
session_id, COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp,session_id
ORDER BY timestamp) AS groupped_data
WHERE count>0
GROUP BY timestamp
ORDER BY timestamp;"""
rows = ch.execute(query=ch_query, params=params)
rows = __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral={"value": 0})
return rows
def get_top_metrics_count_requests(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), value=None, density=20, **args):
step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density)
ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args)
ch_sub_query_chart.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
ch_sub_query = __get_basic_constraints(table_name="pages", data=args)
ch_sub_query.append("pages.event_type='LOCATION'")
ch_sub_query += meta_condition
if value is not None:
ch_sub_query.append("pages.url_path = %(value)s")
ch_sub_query_chart.append("pages.url_path = %(value)s")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT COUNT(1) AS value
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query)};"""
params = {"step_size": step_size, "project_id": project_id,
"startTimestamp": startTimestamp,
"endTimestamp": endTimestamp,
"value": value, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
result = rows[0]
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp,
COUNT(1) AS value
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;"""
rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)})
rows = __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral={"value": 0})
result["chart"] = rows
result["unit"] = schemas.TemplatePredefinedUnits.COUNT
return helper.dict_to_camel_case(result)
def get_unique_users(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(),
density=7, **args):
step_size = __get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args)
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
ch_sub_query_chart.append("isNotNull(sessions.user_id)")
ch_sub_query_chart.append("sessions.user_id!=''")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""\
SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
COUNT(DISTINCT sessions.user_id) AS value
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;\
"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
results = {
"value": sum([r["value"] for r in rows]),
"chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp,
density=density,
neutral={"value": 0})
}
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
ch_query = f""" SELECT COUNT(DISTINCT user_id) AS count
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query)};"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
count = ch.execute(query=ch_query, params=params)
count = count[0]["count"]
results["progress"] = helper.__progress(old_val=count, new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results