From 3654dccec1138910be409eaf0525288d85c5a4ac Mon Sep 17 00:00:00 2001
From: Kraiem Taha Yassine <tahayk2@gmail.com>
Date: Mon, 24 Jun 2024 15:53:00 +0200
Subject: [PATCH] Dev (#2300)

* refactor(chalice): upgraded dependencies

* refactor(chalice): upgraded dependencies
feat(chalice): support heatmaps

* feat(chalice): support table-of-browsers showing user-count

* feat(chalice): support table-of-devices showing user-count

* feat(chalice): support table-of-URLs showing user-count

* refactor(chalice): refactored insights

* feat(chalice): support funnels showing user-count
---
 api/chalicelib/core/custom_metrics.py       |  5 ++-
 api/chalicelib/core/funnels.py              | 19 ++++----
 api/chalicelib/core/significance.py         | 49 ++++++++++++---------
 api/schemas/schemas.py                      |  4 +-
 ee/api/chalicelib/core/custom_metrics.py    |  4 +-
 ee/api/chalicelib/core/sessions_insights.py | 43 +++++++-----------
 6 files changed, 65 insertions(+), 59 deletions(-)

diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py
index 823601409..947a5ad01 100644
--- a/api/chalicelib/core/custom_metrics.py
+++ b/api/chalicelib/core/custom_metrics.py
@@ -64,7 +64,10 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int =
             "stages": [],
             "totalDropDueToIssues": 0
         }
-    return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter)
+
+    return funnels.get_top_insights_on_the_fly_widget(project_id=project_id,
+                                                      data=data.series[0].filter,
+                                                      metric_of=data.metric_of)
 
 
 def __get_errors_list(project_id, user_id, data: schemas.CardSchema):
diff --git a/api/chalicelib/core/funnels.py b/api/chalicelib/core/funnels.py
index 870d6eac6..e15e89c31 100644
--- a/api/chalicelib/core/funnels.py
+++ b/api/chalicelib/core/funnels.py
@@ -36,20 +36,23 @@ def __fix_stages(f_events: List[schemas.SessionSearchEventSchema2]):
 
 
 # def get_top_insights_on_the_fly_widget(project_id, data: schemas.FunnelInsightsPayloadSchema):
-def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema):
+def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema,
+                                       metric_of: schemas.MetricOfFunnels):
     data.events = filter_stages(__parse_events(data.events))
     data.events = __fix_stages(data.events)
     if len(data.events) == 0:
         return {"stages": [], "totalDropDueToIssues": 0}
-    insights, total_drop_due_to_issues = significance.get_top_insights(filter_d=data, project_id=project_id)
+    insights, total_drop_due_to_issues = significance.get_top_insights(filter_d=data,
+                                                                       project_id=project_id,
+                                                                       metric_of=metric_of)
     insights = helper.list_to_camel_case(insights)
     if len(insights) > 0:
-        # TODO: check if this correct
-        if total_drop_due_to_issues > insights[0]["sessionsCount"]:
-            if len(insights) == 0:
-                total_drop_due_to_issues = 0
-            else:
-                total_drop_due_to_issues = insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]
+        if metric_of == schemas.MetricOfFunnels.session_count and total_drop_due_to_issues > (
+                insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]):
+            total_drop_due_to_issues = insights[0]["sessionsCount"] - insights[-1]["sessionsCount"]
+        elif metric_of == schemas.MetricOfFunnels.user_count and total_drop_due_to_issues > (
+                insights[0]["usersCount"] - insights[-1]["usersCount"]):
+            total_drop_due_to_issues = insights[0]["usersCount"] - insights[-1]["usersCount"]
         insights[-1]["dropDueToIssues"] = total_drop_due_to_issues
     return {"stages": insights,
             "totalDropDueToIssues": total_drop_due_to_issues}
diff --git a/api/chalicelib/core/significance.py b/api/chalicelib/core/significance.py
index 59db18589..3ccc22240 100644
--- a/api/chalicelib/core/significance.py
+++ b/api/chalicelib/core/significance.py
@@ -34,10 +34,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
     stages: [dict] = filter_d.events
     filters: [dict] = filter_d.filters
     filter_issues = []
-    # TODO: enable this if needed by an endpoint
-    # filter_issues = filter_d.get("issueTypes")
-    # if filter_issues is None or len(filter_issues) == 0:
-    #     filter_issues = []
+
     stage_constraints = ["main.timestamp <= %(endTimestamp)s"]
     first_stage_extra_constraints = ["s.project_id=%(project_id)s", "s.start_ts >= %(startTimestamp)s",
                                      "s.start_ts <= %(endTimestamp)s"]
@@ -50,7 +47,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
             if len(f.value) == 0:
                 continue
             f.value = helper.values_for_operator(value=f.value, op=f.operator)
-            # filter_args = _multiple_values(f["value"])
+
             op = sh.get_sql_operator(f.operator)
 
             filter_type = f.type
@@ -195,7 +192,7 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
     n_stages_query += ") AS stages_t"
 
     n_stages_query = f"""
-    SELECT stages_and_issues_t.*, sessions.user_uuid
+    SELECT stages_and_issues_t.*, sessions.user_uuid, sessions.user_id
     FROM (
         SELECT * FROM (
              SELECT T1.session_id, {",".join([f"stage{i + 1}_timestamp" for i in range(n_stages)])}
@@ -217,7 +214,6 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
     ) AS stages_and_issues_t INNER JOIN sessions USING(session_id);
     """
 
-    #  LIMIT 10000
     params = {"project_id": project_id, "startTimestamp": filter_d.startTimestamp,
               "endTimestamp": filter_d.endTimestamp,
               "issueTypes": tuple(filter_issues), **values}
@@ -236,6 +232,9 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
             logging.warning(filter_d.model_dump_json())
             logging.warning("--------------------")
             raise err
+    for r in rows:
+        if r["user_id"] == "":
+            r["user_id"] = None
     return rows
 
 
@@ -421,42 +420,47 @@ def count_sessions(rows, n_stages):
     return session_counts
 
 
-def count_users(rows, n_stages):
+def count_users(rows, n_stages, user_key="user_uuid"):
     users_in_stages = {i: set() for i in range(1, n_stages + 1)}
     for row in rows:
         for i in range(1, n_stages + 1):
-            if row[f"stage{i}_timestamp"] is not None:
-                users_in_stages[i].add(row["user_uuid"])
+            if row[f"stage{i}_timestamp"] is not None and row[user_key] is not None:
+                users_in_stages[i].add(row[user_key])
 
     users_count = {i: len(users_in_stages[i]) for i in range(1, n_stages + 1)}
     return users_count
 
 
-def get_stages(stages, rows):
+def get_stages(stages, rows, metric_of=schemas.MetricOfFunnels.session_count):
     n_stages = len(stages)
-    session_counts = count_sessions(rows, n_stages)
-    users_counts = count_users(rows, n_stages)
+    if metric_of == "sessionCount":
+        base_counts = count_sessions(rows, n_stages)
+    else:
+        base_counts = count_users(rows, n_stages, user_key="user_id")
 
     stages_list = []
     for i, stage in enumerate(stages):
 
         drop = None
         if i != 0:
-            if session_counts[i] == 0:
+            if base_counts[i] == 0:
                 drop = 0
-            elif session_counts[i] > 0:
-                drop = int(100 * (session_counts[i] - session_counts[i + 1]) / session_counts[i])
+            elif base_counts[i] > 0:
+                drop = int(100 * (base_counts[i] - base_counts[i + 1]) / base_counts[i])
 
         stages_list.append(
             {"value": stage.value,
              "type": stage.type,
              "operator": stage.operator,
-             "sessionsCount": session_counts[i + 1],
              "drop_pct": drop,
-             "usersCount": users_counts[i + 1],
              "dropDueToIssues": 0
              }
         )
+        if metric_of == "sessionCount":
+            stages_list[-1]["sessionsCount"] = base_counts[i + 1]
+        else:
+            stages_list[-1]["usersCount"] = base_counts[i + 1]
+
     return stages_list
 
 
@@ -539,7 +543,7 @@ def get_issues(stages, rows, first_stage=None, last_stage=None, drop_only=False)
     return n_critical_issues, issues_dict, total_drop_due_to_issues
 
 
-def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id):
+def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id, metric_of: schemas.MetricOfFunnels):
     output = []
     stages = filter_d.events
 
@@ -549,10 +553,11 @@ def get_top_insights(filter_d: schemas.CardSeriesFilterSchema, project_id):
 
     # The result of the multi-stage query
     rows = get_stages_and_events(filter_d=filter_d, project_id=project_id)
-    if len(rows) == 0:
-        return get_stages(stages, []), 0
     # Obtain the first part of the output
-    stages_list = get_stages(stages, rows)
+    stages_list = get_stages(stages, rows, metric_of=metric_of)
+    if len(rows) == 0:
+        return stages_list, 0
+
     # Obtain the second part of the output
     total_drop_due_to_issues = get_issues(stages, rows,
                                           first_stage=1,
diff --git a/api/schemas/schemas.py b/api/schemas/schemas.py
index 33a02c7e8..6a57b47e1 100644
--- a/api/schemas/schemas.py
+++ b/api/schemas/schemas.py
@@ -1023,6 +1023,7 @@ class MetricOfTimeseries(str, Enum):
 
 class MetricOfFunnels(str, Enum):
     session_count = MetricOfTimeseries.session_count.value
+    user_count = MetricOfTimeseries.user_count.value
 
 
 class MetricOfClickMap(str, Enum):
@@ -1166,7 +1167,8 @@ class CardFunnel(__CardSchema):
 
     @model_validator(mode="before")
     def __enforce_default(cls, values):
-        values["metricOf"] = MetricOfFunnels.session_count
+        if values.get("metricOf") and not MetricOfFunnels.has_value(values["metricOf"]):
+            values["metricOf"] = MetricOfFunnels.session_count
         values["viewType"] = MetricOtherViewType.other_chart
         if values.get("series") is not None and len(values["series"]) > 0:
             values["series"] = [values["series"][0]]
diff --git a/ee/api/chalicelib/core/custom_metrics.py b/ee/api/chalicelib/core/custom_metrics.py
index a1cf8c0a8..c9335cc46 100644
--- a/ee/api/chalicelib/core/custom_metrics.py
+++ b/ee/api/chalicelib/core/custom_metrics.py
@@ -75,7 +75,9 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int =
             "stages": [],
             "totalDropDueToIssues": 0
         }
-    return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter)
+    return funnels.get_top_insights_on_the_fly_widget(project_id=project_id,
+                                                      data=data.series[0].filter,
+                                                      metric_of=data.metric_of)
 
 
 def __get_errors_list(project_id, user_id, data: schemas.CardSchema):
diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py
index 781f7ed20..5bb433f1c 100644
--- a/ee/api/chalicelib/core/sessions_insights.py
+++ b/ee/api/chalicelib/core/sessions_insights.py
@@ -1,10 +1,12 @@
 from typing import Optional
-
+import logging
 import schemas
 from chalicelib.core import metrics
 from chalicelib.core import sessions_exp
 from chalicelib.utils import ch_client
 
+logger = logging.getLogger(__name__)
+
 
 def _table_slice(table, index):
     col = list()
@@ -22,14 +24,12 @@ def _table_where(table, index, value):
 
 
 def _sum_table_index(table, index):
-    # print(f'index {index}')
     s = 0
     count = 0
     for row in table:
         v = row[index]
         if v is None:
             continue
-        # print(v)
         s += v
         count += 1
     return s
@@ -46,8 +46,6 @@ def _sort_table_index(table, index, reverse=False):
 
 
 def _select_rec(l, selector):
-    # print('selector:', selector)
-    # print('list:', l)
     if len(selector) == 1:
         return l[selector[0]]
     else:
@@ -109,9 +107,9 @@ def query_requests_by_period(project_id, start_time, end_time, filters: Optional
                 ORDER BY T1.hh DESC;"""
     with ch_client.ClickHouseClient() as conn:
         query = conn.format(query=query, params=params)
-        # print("--------")
-        # print(query)
-        # print("--------")
+        logging.debug("--------")
+        logging.debug(query)
+        logging.debug("--------")
         res = conn.execute(query=query)
         if res is None or sum([r.get("sessions") for r in res]) == 0:
             return []
@@ -119,7 +117,6 @@ def query_requests_by_period(project_id, start_time, end_time, filters: Optional
     table_hh1, table_hh2, columns, this_period_hosts, last_period_hosts = __get_two_values(res, time_index='hh',
                                                                                            name_index='source')
     test = [k[4] for k in table_hh1]
-    # print(f'length {len(test)}, uniques {len(set(test))}')
     del res
 
     new_hosts = [x for x in this_period_hosts if x not in last_period_hosts]
@@ -218,9 +215,9 @@ def query_most_errors_by_period(project_id, start_time, end_time,
 
     with ch_client.ClickHouseClient() as conn:
         query = conn.format(query=query, params=params)
-        # print("--------")
-        # print(query)
-        # print("--------")
+        logging.debug("--------")
+        logging.debug(query)
+        logging.debug("--------")
         res = conn.execute(query=query)
         if res is None or sum([r.get("sessions") for r in res]) == 0:
             return []
@@ -228,18 +225,12 @@ def query_most_errors_by_period(project_id, start_time, end_time,
     table_hh1, table_hh2, columns, this_period_errors, last_period_errors = __get_two_values(res, time_index='hh',
                                                                                              name_index='names')
     del res
-    # print(table_hh1)
-    # print('\n')
-    # print(table_hh2)
-    # print('\n')
     new_errors = [x for x in this_period_errors if x not in last_period_errors]
     common_errors = [x for x in this_period_errors if x not in new_errors]
 
     sessions_idx = columns.index('sessions')
     names_idx = columns.index('names')
 
-    print(_table_where(table_hh1, names_idx, this_period_errors[0]))
-
     percentage_errors = dict()
     total = _sum_table_index(table_hh1, sessions_idx)
     # error_increase = dict()
@@ -308,9 +299,9 @@ def query_cpu_memory_by_period(project_id, start_time, end_time,
                 ORDER BY T1.hh DESC;"""
     with ch_client.ClickHouseClient() as conn:
         query = conn.format(query=query, params=params)
-        # print("--------")
-        # print(query)
-        # print("--------")
+        logging.debug("--------")
+        logging.debug(query)
+        logging.debug("--------")
         res = conn.execute(query=query)
         if res is None or sum([r.get("sessions") for r in res]) == 0:
             return []
@@ -318,8 +309,8 @@ def query_cpu_memory_by_period(project_id, start_time, end_time,
     table_hh1, table_hh2, columns, this_period_resources, last_period_resources = __get_two_values(res, time_index='hh',
                                                                                                    name_index='names')
 
-    print(f'TB1\n{table_hh1}')
-    print(f'TB2\n{table_hh2}')
+    logging.debug(f'TB1\n{table_hh1}')
+    logging.debug(f'TB2\n{table_hh2}')
     del res
 
     memory_idx = columns.index('memory_used')
@@ -387,9 +378,9 @@ def query_click_rage_by_period(project_id, start_time, end_time,
                 ORDER BY T1.hh DESC;"""
     with ch_client.ClickHouseClient() as conn:
         query = conn.format(query=query, params=params)
-        # print("--------")
-        # print(query)
-        # print("--------")
+        logging.debug("--------")
+        logging.debug(query)
+        logging.debug("--------")
         res = conn.execute(query=query)
         if res is None or sum([r.get("sessions") for r in res]) == 0:
             return []