* refactor(chalice): upgraded dependencies

* refactor(chalice): upgraded dependencies
feat(chalice): support heatmaps

* feat(chalice): support table-of-browsers showing user-count

* feat(chalice): support table-of-devices showing user-count

* feat(chalice): support table-of-URLs showing user-count

* refactor(chalice): refactored insights

* feat(chalice): support funnels showing user-count
This commit is contained in:
Kraiem Taha Yassine 2024-06-24 15:53:00 +02:00 committed by GitHub
parent 960da9f037
commit 3654dccec1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 65 additions and 59 deletions

View file

@ -64,7 +64,10 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int =
"stages": [],
"totalDropDueToIssues": 0
}
return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter)
return funnels.get_top_insights_on_the_fly_widget(project_id=project_id,
data=data.series[0].filter,
metric_of=data.metric_of)
def __get_errors_list(project_id, user_id, data: schemas.CardSchema):

View file

@ -36,20 +36,23 @@ def __fix_stages(f_events: List[schemas.SessionSearchEventSchema2]):
# def get_top_insights_on_the_fly_widget(project_id, data: schemas.FunnelInsightsPayloadSchema):
def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema):
def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema,
metric_of: schemas.MetricOfFunnels):
data.events = filter_stages(__parse_events(data.events))
data.events = __fix_stages(data.events)
if len(data.events) == 0:
return {"stages": [], "totalDropDueToIssues": 0}
insights, total_drop_due_to_issues = significance.get_top_insights(filter_d=data, project_id=project_id)
insights, total_drop_due_to_issues = significance.get_top_insights(filter_d=data,
project_id=project_id,
metric_of=metric_of)
insights = helper.list_to_camel_case(insights)
if len(insights) > 0:
# TODO: check if this correct
if total_drop_due_to_issues > insights[0]["sessionsCount"]:
if len(insights) == 0:
total_drop_due_to_issues = 0
else:
total_drop_due_to_issues = insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]
if metric_of == schemas.MetricOfFunnels.session_count and total_drop_due_to_issues > (
insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]):
total_drop_due_to_issues = insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]
elif metric_of == schemas.MetricOfFunnels.user_count and total_drop_due_to_issues > (
insights[0]["usersCount"] - insights[-1]["usersCount"]):
total_drop_due_to_issues = insights[0]["usersCount"] - insights[-1]["usersCount"]
insights[-1]["dropDueToIssues"] = total_drop_due_to_issues
return {"stages": insights,
"totalDropDueToIssues": total_drop_due_to_issues}

View file

@ -34,10 +34,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
stages: [dict] = filter_d.events
filters: [dict] = filter_d.filters
filter_issues = []
# TODO: enable this if needed by an endpoint
# filter_issues = filter_d.get("issueTypes")
# if filter_issues is None or len(filter_issues) == 0:
# filter_issues = []
stage_constraints = ["main.timestamp <= %(endTimestamp)s"]
first_stage_extra_constraints = ["s.project_id=%(project_id)s", "s.start_ts >= %(startTimestamp)s",
"s.start_ts <= %(endTimestamp)s"]
@ -50,7 +47,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
if len(f.value) == 0:
continue
f.value = helper.values_for_operator(value=f.value, op=f.operator)
# filter_args = _multiple_values(f["value"])
op = sh.get_sql_operator(f.operator)
filter_type = f.type
@ -195,7 +192,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
n_stages_query += ") AS stages_t"
n_stages_query = f"""
SELECT stages_and_issues_t.*, sessions.user_uuid
SELECT stages_and_issues_t.*, sessions.user_uuid, sessions.user_id
FROM (
SELECT * FROM (
SELECT T1.session_id, {",".join([f"stage{i + 1}_timestamp" for i in range(n_stages)])}
@ -217,7 +214,6 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
) AS stages_and_issues_t INNER JOIN sessions USING(session_id);
"""
# LIMIT 10000
params = {"project_id": project_id, "startTimestamp": filter_d.startTimestamp,
"endTimestamp": filter_d.endTimestamp,
"issueTypes": tuple(filter_issues), **values}
@ -236,6 +232,9 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
logging.warning(filter_d.model_dump_json())
logging.warning("--------------------")
raise err
for r in rows:
if r["user_id"] == "":
r["user_id"] = None
return rows
@ -421,42 +420,47 @@ def count_sessions(rows, n_stages):
return session_counts
def count_users(rows, n_stages):
def count_users(rows, n_stages, user_key="user_uuid"):
users_in_stages = {i: set() for i in range(1, n_stages + 1)}
for row in rows:
for i in range(1, n_stages + 1):
if row[f"stage{i}_timestamp"] is not None:
users_in_stages[i].add(row["user_uuid"])
if row[f"stage{i}_timestamp"] is not None and row[user_key] is not None:
users_in_stages[i].add(row[user_key])
users_count = {i: len(users_in_stages[i]) for i in range(1, n_stages + 1)}
return users_count
def get_stages(stages, rows):
def get_stages(stages, rows, metric_of=schemas.MetricOfFunnels.session_count):
n_stages = len(stages)
session_counts = count_sessions(rows, n_stages)
users_counts = count_users(rows, n_stages)
if metric_of == "sessionCount":
base_counts = count_sessions(rows, n_stages)
else:
base_counts = count_users(rows, n_stages, user_key="user_id")
stages_list = []
for i, stage in enumerate(stages):
drop = None
if i != 0:
if session_counts[i] == 0:
if base_counts[i] == 0:
drop = 0
elif session_counts[i] > 0:
drop = int(100 * (session_counts[i] - session_counts[i + 1]) / session_counts[i])
elif base_counts[i] > 0:
drop = int(100 * (base_counts[i] - base_counts[i + 1]) / base_counts[i])
stages_list.append(
{"value": stage.value,
"type": stage.type,
"operator": stage.operator,
"sessionsCount": session_counts[i + 1],
"drop_pct": drop,
"usersCount": users_counts[i + 1],
"dropDueToIssues": 0
}
)
if metric_of == "sessionCount":
stages_list[-1]["sessionsCount"] = base_counts[i + 1]
else:
stages_list[-1]["usersCount"] = base_counts[i + 1]
return stages_list
@ -539,7 +543,7 @@ def get_issues(stages, rows, first_stage=None, last_stage=None, drop_only=False)
return n_critical_issues, issues_dict, total_drop_due_to_issues
def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id):
def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id, metric_of: schemas.MetricOfFunnels):
output = []
stages = filter_d.events
@ -549,10 +553,11 @@ def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id):
# The result of the multi-stage query
rows = get_stages_and_events(filter_d=filter_d, project_id=project_id)
if len(rows) == 0:
return get_stages(stages, []), 0
# Obtain the first part of the output
stages_list = get_stages(stages, rows)
stages_list = get_stages(stages, rows, metric_of=metric_of)
if len(rows) == 0:
return stages_list, 0
# Obtain the second part of the output
total_drop_due_to_issues = get_issues(stages, rows,
first_stage=1,

View file

@ -1023,6 +1023,7 @@ class MetricOfTimeseries(str, Enum):
class MetricOfFunnels(str, Enum):
session_count = MetricOfTimeseries.session_count.value
user_count = MetricOfTimeseries.user_count.value
class MetricOfClickMap(str, Enum):
@ -1166,7 +1167,8 @@ class CardFunnel(__CardSchema):
@model_validator(mode="before")
def __enforce_default(cls, values):
values["metricOf"] = MetricOfFunnels.session_count
if values.get("metricOf") and not MetricOfFunnels.has_value(values["metricOf"]):
values["metricOf"] = MetricOfFunnels.session_count
values["viewType"] = MetricOtherViewType.other_chart
if values.get("series") is not None and len(values["series"]) > 0:
values["series"] = [values["series"][0]]

View file

@ -75,7 +75,9 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int =
"stages": [],
"totalDropDueToIssues": 0
}
return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter)
return funnels.get_top_insights_on_the_fly_widget(project_id=project_id,
data=data.series[0].filter,
metric_of=data.metric_of)
def __get_errors_list(project_id, user_id, data: schemas.CardSchema):

View file

@ -1,10 +1,12 @@
from typing import Optional
import logging
import schemas
from chalicelib.core import metrics
from chalicelib.core import sessions_exp
from chalicelib.utils import ch_client
logger = logging.getLogger(__name__)
def _table_slice(table, index):
col = list()
@ -22,14 +24,12 @@ def _table_where(table, index, value):
def _sum_table_index(table, index):
# print(f'index {index}')
s = 0
count = 0
for row in table:
v = row[index]
if v is None:
continue
# print(v)
s += v
count += 1
return s
@ -46,8 +46,6 @@ def _sort_table_index(table, index, reverse=False):
def _select_rec(l, selector):
# print('selector:', selector)
# print('list:', l)
if len(selector) == 1:
return l[selector[0]]
else:
@ -109,9 +107,9 @@ def query_requests_by_period(project_id, start_time, end_time, filters: Optional
ORDER BY T1.hh DESC;"""
with ch_client.ClickHouseClient() as conn:
query = conn.format(query=query, params=params)
# print("--------")
# print(query)
# print("--------")
logging.debug("--------")
logging.debug(query)
logging.debug("--------")
res = conn.execute(query=query)
if res is None or sum([r.get("sessions") for r in res]) == 0:
return []
@ -119,7 +117,6 @@ def query_requests_by_period(project_id, start_time, end_time, filters: Optional
table_hh1, table_hh2, columns, this_period_hosts, last_period_hosts = __get_two_values(res, time_index='hh',
name_index='source')
test = [k[4] for k in table_hh1]
# print(f'length {len(test)}, uniques {len(set(test))}')
del res
new_hosts = [x for x in this_period_hosts if x not in last_period_hosts]
@ -218,9 +215,9 @@ def query_most_errors_by_period(project_id, start_time, end_time,
with ch_client.ClickHouseClient() as conn:
query = conn.format(query=query, params=params)
# print("--------")
# print(query)
# print("--------")
logging.debug("--------")
logging.debug(query)
logging.debug("--------")
res = conn.execute(query=query)
if res is None or sum([r.get("sessions") for r in res]) == 0:
return []
@ -228,18 +225,12 @@ def query_most_errors_by_period(project_id, start_time, end_time,
table_hh1, table_hh2, columns, this_period_errors, last_period_errors = __get_two_values(res, time_index='hh',
name_index='names')
del res
# print(table_hh1)
# print('\n')
# print(table_hh2)
# print('\n')
new_errors = [x for x in this_period_errors if x not in last_period_errors]
common_errors = [x for x in this_period_errors if x not in new_errors]
sessions_idx = columns.index('sessions')
names_idx = columns.index('names')
print(_table_where(table_hh1, names_idx, this_period_errors[0]))
percentage_errors = dict()
total = _sum_table_index(table_hh1, sessions_idx)
# error_increase = dict()
@ -308,9 +299,9 @@ def query_cpu_memory_by_period(project_id, start_time, end_time,
ORDER BY T1.hh DESC;"""
with ch_client.ClickHouseClient() as conn:
query = conn.format(query=query, params=params)
# print("--------")
# print(query)
# print("--------")
logging.debug("--------")
logging.debug(query)
logging.debug("--------")
res = conn.execute(query=query)
if res is None or sum([r.get("sessions") for r in res]) == 0:
return []
@ -318,8 +309,8 @@ def query_cpu_memory_by_period(project_id, start_time, end_time,
table_hh1, table_hh2, columns, this_period_resources, last_period_resources = __get_two_values(res, time_index='hh',
name_index='names')
print(f'TB1\n{table_hh1}')
print(f'TB2\n{table_hh2}')
logging.debug(f'TB1\n{table_hh1}')
logging.debug(f'TB2\n{table_hh2}')
del res
memory_idx = columns.index('memory_used')
@ -387,9 +378,9 @@ def query_click_rage_by_period(project_id, start_time, end_time,
ORDER BY T1.hh DESC;"""
with ch_client.ClickHouseClient() as conn:
query = conn.format(query=query, params=params)
# print("--------")
# print(query)
# print("--------")
logging.debug("--------")
logging.debug(query)
logging.debug("--------")
res = conn.execute(query=query)
if res is None or sum([r.get("sessions") for r in res]) == 0:
return []