From 3654dccec1138910be409eaf0525288d85c5a4ac Mon Sep 17 00:00:00 2001 From: Kraiem Taha Yassine Date: Mon, 24 Jun 2024 15:53:00 +0200 Subject: [PATCH] Dev (#2300) * refactor(chalice): upgraded dependencies * refactor(chalice): upgraded dependencies feat(chalice): support heatmaps * feat(chalice): support table-of-browsers showing user-count * feat(chalice): support table-of-devices showing user-count * feat(chalice): support table-of-URLs showing user-count * refactor(chalice): refactored insights * feat(chalice): support funnels showing user-count --- api/chalicelib/core/custom_metrics.py | 5 ++- api/chalicelib/core/funnels.py | 19 ++++---- api/chalicelib/core/significance.py | 49 ++++++++++++--------- api/schemas/schemas.py | 4 +- ee/api/chalicelib/core/custom_metrics.py | 4 +- ee/api/chalicelib/core/sessions_insights.py | 43 +++++++----------- 6 files changed, 65 insertions(+), 59 deletions(-) diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py index 823601409..947a5ad01 100644 --- a/api/chalicelib/core/custom_metrics.py +++ b/api/chalicelib/core/custom_metrics.py @@ -64,7 +64,10 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int = "stages": [], "totalDropDueToIssues": 0 } - return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter) + + return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, + data=data.series[0].filter, + metric_of=data.metric_of) def __get_errors_list(project_id, user_id, data: schemas.CardSchema): diff --git a/api/chalicelib/core/funnels.py b/api/chalicelib/core/funnels.py index 870d6eac6..e15e89c31 100644 --- a/api/chalicelib/core/funnels.py +++ b/api/chalicelib/core/funnels.py @@ -36,20 +36,23 @@ def __fix_stages(f_events: List[schemas.SessionSearchEventSchema2]): # def get_top_insights_on_the_fly_widget(project_id, data: schemas.FunnelInsightsPayloadSchema): -def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema): +def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema, + metric_of: schemas.MetricOfFunnels): data.events = filter_stages(__parse_events(data.events)) data.events = __fix_stages(data.events) if len(data.events) == 0: return {"stages": [], "totalDropDueToIssues": 0} - insights, total_drop_due_to_issues = significance.get_top_insights(filter_d=data, project_id=project_id) + insights, total_drop_due_to_issues = significance.get_top_insights(filter_d=data, + project_id=project_id, + metric_of=metric_of) insights = helper.list_to_camel_case(insights) if len(insights) > 0: - # TODO: check if this correct - if total_drop_due_to_issues > insights[0]["sessionsCount"]: - if len(insights) == 0: - total_drop_due_to_issues = 0 - else: - total_drop_due_to_issues = insights[0]["sessionsCount"] - insights[-1]["sessionsCount"] + if metric_of == schemas.MetricOfFunnels.session_count and total_drop_due_to_issues > ( + insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]): + total_drop_due_to_issues = insights[0]["sessionsCount"] - insights[-1]["sessionsCount"] + elif metric_of == schemas.MetricOfFunnels.user_count and total_drop_due_to_issues > ( + insights[0]["usersCount"] - insights[-1]["usersCount"]): + total_drop_due_to_issues = insights[0]["usersCount"] - insights[-1]["usersCount"] insights[-1]["dropDueToIssues"] = total_drop_due_to_issues return {"stages": insights, "totalDropDueToIssues": total_drop_due_to_issues} diff --git a/api/chalicelib/core/significance.py b/api/chalicelib/core/significance.py index 59db18589..3ccc22240 100644 --- a/api/chalicelib/core/significance.py +++ b/api/chalicelib/core/significance.py @@ -34,10 +34,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) stages: [dict] = filter_d.events filters: [dict] = filter_d.filters filter_issues = [] - # TODO: enable this if needed by an endpoint - # filter_issues = filter_d.get("issueTypes") - # if filter_issues is None or len(filter_issues) == 0: - # filter_issues = [] + stage_constraints = ["main.timestamp <= %(endTimestamp)s"] first_stage_extra_constraints = ["s.project_id=%(project_id)s", "s.start_ts >= %(startTimestamp)s", "s.start_ts <= %(endTimestamp)s"] @@ -50,7 +47,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) if len(f.value) == 0: continue f.value = helper.values_for_operator(value=f.value, op=f.operator) - # filter_args = _multiple_values(f["value"]) + op = sh.get_sql_operator(f.operator) filter_type = f.type @@ -195,7 +192,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) n_stages_query += ") AS stages_t" n_stages_query = f""" - SELECT stages_and_issues_t.*, sessions.user_uuid + SELECT stages_and_issues_t.*, sessions.user_uuid, sessions.user_id FROM ( SELECT * FROM ( SELECT T1.session_id, {",".join([f"stage{i + 1}_timestamp" for i in range(n_stages)])} @@ -217,7 +214,6 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) ) AS stages_and_issues_t INNER JOIN sessions USING(session_id); """ - # LIMIT 10000 params = {"project_id": project_id, "startTimestamp": filter_d.startTimestamp, "endTimestamp": filter_d.endTimestamp, "issueTypes": tuple(filter_issues), **values} @@ -236,6 +232,9 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) logging.warning(filter_d.model_dump_json()) logging.warning("--------------------") raise err + for r in rows: + if r["user_id"] == "": + r["user_id"] = None return rows @@ -421,42 +420,47 @@ def count_sessions(rows, n_stages): return session_counts -def count_users(rows, n_stages): +def count_users(rows, n_stages, user_key="user_uuid"): users_in_stages = {i: set() for i in range(1, n_stages + 1)} for row in rows: for i in range(1, n_stages + 1): - if row[f"stage{i}_timestamp"] is not None: - users_in_stages[i].add(row["user_uuid"]) + if row[f"stage{i}_timestamp"] is not None and row[user_key] is not None: + users_in_stages[i].add(row[user_key]) users_count = {i: len(users_in_stages[i]) for i in range(1, n_stages + 1)} return users_count -def get_stages(stages, rows): +def get_stages(stages, rows, metric_of=schemas.MetricOfFunnels.session_count): n_stages = len(stages) - session_counts = count_sessions(rows, n_stages) - users_counts = count_users(rows, n_stages) + if metric_of == "sessionCount": + base_counts = count_sessions(rows, n_stages) + else: + base_counts = count_users(rows, n_stages, user_key="user_id") stages_list = [] for i, stage in enumerate(stages): drop = None if i != 0: - if session_counts[i] == 0: + if base_counts[i] == 0: drop = 0 - elif session_counts[i] > 0: - drop = int(100 * (session_counts[i] - session_counts[i + 1]) / session_counts[i]) + elif base_counts[i] > 0: + drop = int(100 * (base_counts[i] - base_counts[i + 1]) / base_counts[i]) stages_list.append( {"value": stage.value, "type": stage.type, "operator": stage.operator, - "sessionsCount": session_counts[i + 1], "drop_pct": drop, - "usersCount": users_counts[i + 1], "dropDueToIssues": 0 } ) + if metric_of == "sessionCount": + stages_list[-1]["sessionsCount"] = base_counts[i + 1] + else: + stages_list[-1]["usersCount"] = base_counts[i + 1] + return stages_list @@ -539,7 +543,7 @@ def get_issues(stages, rows, first_stage=None, last_stage=None, drop_only=False) return n_critical_issues, issues_dict, total_drop_due_to_issues -def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id): +def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id, metric_of: schemas.MetricOfFunnels): output = [] stages = filter_d.events @@ -549,10 +553,11 @@ def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id): # The result of the multi-stage query rows = get_stages_and_events(filter_d=filter_d, project_id=project_id) - if len(rows) == 0: - return get_stages(stages, []), 0 # Obtain the first part of the output - stages_list = get_stages(stages, rows) + stages_list = get_stages(stages, rows, metric_of=metric_of) + if len(rows) == 0: + return stages_list, 0 + # Obtain the second part of the output total_drop_due_to_issues = get_issues(stages, rows, first_stage=1, diff --git a/api/schemas/schemas.py b/api/schemas/schemas.py index 33a02c7e8..6a57b47e1 100644 --- a/api/schemas/schemas.py +++ b/api/schemas/schemas.py @@ -1023,6 +1023,7 @@ class MetricOfTimeseries(str, Enum): class MetricOfFunnels(str, Enum): session_count = MetricOfTimeseries.session_count.value + user_count = MetricOfTimeseries.user_count.value class MetricOfClickMap(str, Enum): @@ -1166,7 +1167,8 @@ class CardFunnel(__CardSchema): @model_validator(mode="before") def __enforce_default(cls, values): - values["metricOf"] = MetricOfFunnels.session_count + if values.get("metricOf") and not MetricOfFunnels.has_value(values["metricOf"]): + values["metricOf"] = MetricOfFunnels.session_count values["viewType"] = MetricOtherViewType.other_chart if values.get("series") is not None and len(values["series"]) > 0: values["series"] = [values["series"][0]] diff --git a/ee/api/chalicelib/core/custom_metrics.py b/ee/api/chalicelib/core/custom_metrics.py index a1cf8c0a8..c9335cc46 100644 --- a/ee/api/chalicelib/core/custom_metrics.py +++ b/ee/api/chalicelib/core/custom_metrics.py @@ -75,7 +75,9 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int = "stages": [], "totalDropDueToIssues": 0 } - return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter) + return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, + data=data.series[0].filter, + metric_of=data.metric_of) def __get_errors_list(project_id, user_id, data: schemas.CardSchema): diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index 781f7ed20..5bb433f1c 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -1,10 +1,12 @@ from typing import Optional - +import logging import schemas from chalicelib.core import metrics from chalicelib.core import sessions_exp from chalicelib.utils import ch_client +logger = logging.getLogger(__name__) + def _table_slice(table, index): col = list() @@ -22,14 +24,12 @@ def _table_where(table, index, value): def _sum_table_index(table, index): - # print(f'index {index}') s = 0 count = 0 for row in table: v = row[index] if v is None: continue - # print(v) s += v count += 1 return s @@ -46,8 +46,6 @@ def _sort_table_index(table, index, reverse=False): def _select_rec(l, selector): - # print('selector:', selector) - # print('list:', l) if len(selector) == 1: return l[selector[0]] else: @@ -109,9 +107,9 @@ def query_requests_by_period(project_id, start_time, end_time, filters: Optional ORDER BY T1.hh DESC;""" with ch_client.ClickHouseClient() as conn: query = conn.format(query=query, params=params) - # print("--------") - # print(query) - # print("--------") + logging.debug("--------") + logging.debug(query) + logging.debug("--------") res = conn.execute(query=query) if res is None or sum([r.get("sessions") for r in res]) == 0: return [] @@ -119,7 +117,6 @@ def query_requests_by_period(project_id, start_time, end_time, filters: Optional table_hh1, table_hh2, columns, this_period_hosts, last_period_hosts = __get_two_values(res, time_index='hh', name_index='source') test = [k[4] for k in table_hh1] - # print(f'length {len(test)}, uniques {len(set(test))}') del res new_hosts = [x for x in this_period_hosts if x not in last_period_hosts] @@ -218,9 +215,9 @@ def query_most_errors_by_period(project_id, start_time, end_time, with ch_client.ClickHouseClient() as conn: query = conn.format(query=query, params=params) - # print("--------") - # print(query) - # print("--------") + logging.debug("--------") + logging.debug(query) + logging.debug("--------") res = conn.execute(query=query) if res is None or sum([r.get("sessions") for r in res]) == 0: return [] @@ -228,18 +225,12 @@ def query_most_errors_by_period(project_id, start_time, end_time, table_hh1, table_hh2, columns, this_period_errors, last_period_errors = __get_two_values(res, time_index='hh', name_index='names') del res - # print(table_hh1) - # print('\n') - # print(table_hh2) - # print('\n') new_errors = [x for x in this_period_errors if x not in last_period_errors] common_errors = [x for x in this_period_errors if x not in new_errors] sessions_idx = columns.index('sessions') names_idx = columns.index('names') - print(_table_where(table_hh1, names_idx, this_period_errors[0])) - percentage_errors = dict() total = _sum_table_index(table_hh1, sessions_idx) # error_increase = dict() @@ -308,9 +299,9 @@ def query_cpu_memory_by_period(project_id, start_time, end_time, ORDER BY T1.hh DESC;""" with ch_client.ClickHouseClient() as conn: query = conn.format(query=query, params=params) - # print("--------") - # print(query) - # print("--------") + logging.debug("--------") + logging.debug(query) + logging.debug("--------") res = conn.execute(query=query) if res is None or sum([r.get("sessions") for r in res]) == 0: return [] @@ -318,8 +309,8 @@ def query_cpu_memory_by_period(project_id, start_time, end_time, table_hh1, table_hh2, columns, this_period_resources, last_period_resources = __get_two_values(res, time_index='hh', name_index='names') - print(f'TB1\n{table_hh1}') - print(f'TB2\n{table_hh2}') + logging.debug(f'TB1\n{table_hh1}') + logging.debug(f'TB2\n{table_hh2}') del res memory_idx = columns.index('memory_used') @@ -387,9 +378,9 @@ def query_click_rage_by_period(project_id, start_time, end_time, ORDER BY T1.hh DESC;""" with ch_client.ClickHouseClient() as conn: query = conn.format(query=query, params=params) - # print("--------") - # print(query) - # print("--------") + logging.debug("--------") + logging.debug(query) + logging.debug("--------") res = conn.execute(query=query) if res is None or sum([r.get("sessions") for r in res]) == 0: return []