From d11aa741e0c5d042d46da0ca71057784638c1df4 Mon Sep 17 00:00:00 2001 From: Kraiem Taha Yassine Date: Thu, 7 Nov 2024 16:03:02 +0100 Subject: [PATCH] Dev (#2729) * fix(chalice): fixed Math-operators validation refactor(chalice): search for sessions that have events for heatmaps * refactor(chalice): search for sessions that have at least 1 location event for heatmaps * fix(chalice): fixed Math-operators validation refactor(chalice): search for sessions that have events for heatmaps * refactor(chalice): search for sessions that have at least 1 location event for heatmaps * feat(chalice): autocomplete return top 10 with stats * fix(chalice): fixed autocomplete top 10 meta-filters * refactor(chalice): cleaned exp search sessions code * refactor(chalice): simplified funnels refactor(chalice): support metric-format=user_id for funnels --- api/chalicelib/core/custom_metrics.py | 9 +- api/chalicelib/core/funnels.py | 15 +- api/chalicelib/core/significance.py | 221 +++++++++++++++++++++++++ ee/api/chalicelib/core/sessions_exp.py | 118 +------------ 4 files changed, 243 insertions(+), 120 deletions(-) diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py index 1f2de51e8..c8a91028b 100644 --- a/api/chalicelib/core/custom_metrics.py +++ b/api/chalicelib/core/custom_metrics.py @@ -44,9 +44,12 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int = "totalDropDueToIssues": 0 } - return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, - data=data.series[0].filter, - metric_format=data.metric_format) + # return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, + # data=data.series[0].filter, + # metric_format=data.metric_format) + return funnels.get_simple_funnel(project_id=project_id, + data=data.series[0].filter, + metric_format=data.metric_format) def __get_errors_list(project_id, user_id, data: schemas.CardSchema): diff --git a/api/chalicelib/core/funnels.py b/api/chalicelib/core/funnels.py index 427bc996b..cba19417a 100644 --- a/api/chalicelib/core/funnels.py +++ b/api/chalicelib/core/funnels.py @@ -35,7 +35,6 @@ def __fix_stages(f_events: List[schemas.SessionSearchEventSchema2]): return events -# def get_top_insights_on_the_fly_widget(project_id, data: schemas.FunnelInsightsPayloadSchema): def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema, metric_format: schemas.MetricExtendedFormatType): data.events = filter_stages(__parse_events(data.events)) @@ -58,7 +57,6 @@ def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilte "totalDropDueToIssues": total_drop_due_to_issues} -# def get_issues_on_the_fly_widget(project_id, data: schemas.FunnelSearchPayloadSchema): def get_issues_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema): data.events = filter_stages(data.events) data.events = __fix_stages(data.events) @@ -69,3 +67,16 @@ def get_issues_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchem "issues": helper.dict_to_camel_case( significance.get_issues_list(filter_d=data, project_id=project_id, first_stage=1, last_stage=len(data.events)))} + + +def get_simple_funnel(project_id, data: schemas.CardSeriesFilterSchema, + metric_format: schemas.MetricExtendedFormatType): + data.events = filter_stages(__parse_events(data.events)) + data.events = __fix_stages(data.events) + if len(data.events) == 0: + return {"stages": [], "totalDropDueToIssues": 0} + insights = significance.get_simple_funnel(filter_d=data, + project_id=project_id, + metric_format=metric_format) + + return {"stages": insights, "totalDropDueToIssues": 0} diff --git a/api/chalicelib/core/significance.py b/api/chalicelib/core/significance.py index 8eeae541c..6ea9b6ad9 100644 --- a/api/chalicelib/core/significance.py +++ b/api/chalicelib/core/significance.py @@ -238,6 +238,227 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id) return rows +def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project_id: int, + metric_format: schemas.MetricExtendedFormatType) -> List[RealDictRow]: + """ + Add minimal timestamp + :param filter_d: dict contains events&filters&... + :return: + """ + + stages: List[schemas.SessionSearchEventSchema2] = filter_d.events + filters: List[schemas.SessionSearchFilterSchema] = filter_d.filters + + stage_constraints = ["main.timestamp <= %(endTimestamp)s"] + first_stage_extra_constraints = ["s.project_id=%(project_id)s", "s.start_ts >= %(startTimestamp)s", + "s.start_ts <= %(endTimestamp)s"] + if metric_format == schemas.MetricExtendedFormatType.SESSION_COUNT: + count_value = '1' + else: + count_value = 'user_id' + first_stage_extra_constraints.append("user_id IS NOT NULL") + first_stage_extra_constraints.append("user_id !=''") + + filter_extra_from = [] + n_stages_query = [] + values = {} + if len(filters) > 0: + meta_keys = None + for i, f in enumerate(filters): + if len(f.value) == 0: + continue + f.value = helper.values_for_operator(value=f.value, op=f.operator) + + op = sh.get_sql_operator(f.operator) + + filter_type = f.type + f_k = f"f_value{i}" + values = {**values, + **sh.multi_values(f.value, value_key=f_k)} + is_not = False + if sh.is_negation_operator(f.operator): + is_not = True + if filter_type == schemas.FilterType.USER_BROWSER: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.USER_OS, schemas.FilterType.USER_OS_MOBILE]: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.USER_DEVICE, schemas.FilterType.USER_DEVICE_MOBILE]: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.USER_COUNTRY, schemas.FilterType.USER_COUNTRY_MOBILE]: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type == schemas.FilterType.DURATION: + if len(f.value) > 0 and f.value[0] is not None: + first_stage_extra_constraints.append(f's.duration >= %(minDuration)s') + values["minDuration"] = f.value[0] + if len(f["value"]) > 1 and f.value[1] is not None and int(f.value[1]) > 0: + first_stage_extra_constraints.append('s.duration <= %(maxDuration)s') + values["maxDuration"] = f.value[1] + elif filter_type == schemas.FilterType.REFERRER: + # events_query_part = events_query_part + f"INNER JOIN events.pages AS p USING(session_id)" + filter_extra_from = [f"INNER JOIN {events.EventType.LOCATION.table} AS p USING(session_id)"] + first_stage_extra_constraints.append( + sh.multi_conditions(f"p.base_referrer {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k)) + elif filter_type == events.EventType.METADATA.ui_type: + if meta_keys is None: + meta_keys = metadata.get(project_id=project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + if f.source in meta_keys.keys(): + first_stage_extra_constraints.append( + sh.multi_conditions( + f's.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s', f.value, + is_not=is_not, value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + elif filter_type in [schemas.FilterType.USER_ID, schemas.FilterType.USER_ID_MOBILE]: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.user_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + elif filter_type in [schemas.FilterType.USER_ANONYMOUS_ID, + schemas.FilterType.USER_ANONYMOUS_ID_MOBILE]: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.user_anonymous_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + elif filter_type in [schemas.FilterType.REV_ID, schemas.FilterType.REV_ID_MOBILE]: + first_stage_extra_constraints.append( + sh.multi_conditions(f's.rev_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + i = -1 + for s in stages: + + if s.operator is None: + s.operator = schemas.SearchEventOperator.IS + + if not isinstance(s.value, list): + s.value = [s.value] + is_any = sh.isAny_opreator(s.operator) + if not is_any and isinstance(s.value, list) and len(s.value) == 0: + continue + i += 1 + if i == 0: + extra_from = filter_extra_from + ["INNER JOIN public.sessions AS s USING (session_id)"] + else: + extra_from = [] + op = sh.get_sql_operator(s.operator) + # event_type = s["type"].upper() + event_type = s.type + if event_type == events.EventType.CLICK.ui_type: + next_table = events.EventType.CLICK.table + next_col_name = events.EventType.CLICK.column + elif event_type == events.EventType.INPUT.ui_type: + next_table = events.EventType.INPUT.table + next_col_name = events.EventType.INPUT.column + elif event_type == events.EventType.LOCATION.ui_type: + next_table = events.EventType.LOCATION.table + next_col_name = events.EventType.LOCATION.column + elif event_type == events.EventType.CUSTOM.ui_type: + next_table = events.EventType.CUSTOM.table + next_col_name = events.EventType.CUSTOM.column + # IOS -------------- + elif event_type == events.EventType.CLICK_MOBILE.ui_type: + next_table = events.EventType.CLICK_MOBILE.table + next_col_name = events.EventType.CLICK_MOBILE.column + elif event_type == events.EventType.INPUT_MOBILE.ui_type: + next_table = events.EventType.INPUT_MOBILE.table + next_col_name = events.EventType.INPUT_MOBILE.column + elif event_type == events.EventType.VIEW_MOBILE.ui_type: + next_table = events.EventType.VIEW_MOBILE.table + next_col_name = events.EventType.VIEW_MOBILE.column + elif event_type == events.EventType.CUSTOM_MOBILE.ui_type: + next_table = events.EventType.CUSTOM_MOBILE.table + next_col_name = events.EventType.CUSTOM_MOBILE.column + else: + logger.warning(f"=================UNDEFINED:{event_type}") + continue + + values = {**values, **sh.multi_values(helper.values_for_operator(value=s.value, op=s.operator), + value_key=f"value{i + 1}")} + if sh.is_negation_operator(s.operator) and i > 0: + op = sh.reverse_sql_operator(op) + main_condition = "left_not.session_id ISNULL" + extra_from.append(f"""LEFT JOIN LATERAL (SELECT session_id + FROM {next_table} AS s_main + WHERE + {sh.multi_conditions(f"s_main.{next_col_name} {op} %(value{i + 1})s", + values=s.value, value_key=f"value{i + 1}")} + AND s_main.timestamp >= T{i}.stage{i}_timestamp + AND s_main.session_id = T1.session_id) AS left_not ON (TRUE)""") + else: + if is_any: + main_condition = "TRUE" + else: + main_condition = sh.multi_conditions(f"main.{next_col_name} {op} %(value{i + 1})s", + values=s.value, value_key=f"value{i + 1}") + n_stages_query.append(f""" + (SELECT main.session_id, + {"MIN(main.timestamp)" if i + 1 < len(stages) else "MAX(main.timestamp)"} AS stage{i + 1}_timestamp, + {count_value} AS c + FROM {next_table} AS main {" ".join(extra_from)} + WHERE main.timestamp >= {f"T{i}.stage{i}_timestamp" if i > 0 else "%(startTimestamp)s"} + {f"AND main.session_id=T1.session_id" if i > 0 else ""} + AND {main_condition} + {(" AND " + " AND ".join(stage_constraints)) if len(stage_constraints) > 0 else ""} + {(" AND " + " AND ".join(first_stage_extra_constraints)) if len(first_stage_extra_constraints) > 0 and i == 0 else ""} + GROUP BY main.session_id,{count_value}) + AS T{i + 1} {"ON (TRUE)" if i > 0 else ""} + """) + count_value = '1' + n_stages = len(n_stages_query) + if n_stages == 0: + return [] + n_stages_query = " LEFT JOIN LATERAL ".join(n_stages_query) + + n_stages_query = f""" + SELECT {",".join([f"COUNT(T{i + 1}.c) AS stage{i + 1}" for i in range(n_stages)])} + FROM {n_stages_query}; + """ + + params = {"project_id": project_id, "startTimestamp": filter_d.startTimestamp, + "endTimestamp": filter_d.endTimestamp, **values} + with pg_client.PostgresClient() as cur: + query = cur.mogrify(n_stages_query, params) + logger.debug("---------------------------------------------------") + logger.debug(query) + logger.debug("---------------------------------------------------") + try: + cur.execute(query) + row = cur.fetchone() + except Exception as err: + logger.warning("--------- SIMPLE FUNNEL SEARCH QUERY EXCEPTION -----------") + logger.warning(query.decode('UTF-8')) + logger.warning("--------- PAYLOAD -----------") + logger.warning(filter_d.model_dump_json()) + logger.warning("--------------------") + raise err + + stages_list = [] + for i, stage in enumerate(stages): + count = row[f"stage{i + 1}"] + drop = None + if i != 0: + base_count = row[f"stage{i}"] + if base_count == 0: + drop = 0 + elif base_count > 0: + drop = int(100 * (base_count - count) / base_count) + + stages_list.append( + {"value": stage.value, + "type": stage.type, + "operator": stage.operator, + "dropPct": drop, + "count": count + } + ) + + return stages_list + + def pearson_corr(x: list, y: list): n = len(x) if n != len(y): diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 0654f03c7..85ad9a4a3 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -222,9 +222,6 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_ sessions[i]["metadata"] = ast.literal_eval(sessions[i]["metadata"]) sessions[i] = schemas.SessionModel.parse_obj(helper.dict_to_camel_case(sessions[i])) - # if not data.group_by_user and data.sort is not None and data.sort != "session_id": - # sessions = sorted(sessions, key=lambda s: s[helper.key_to_snake_case(data.sort)], - # reverse=data.order.upper() == "DESC") return { 'total': total, 'sessions': sessions @@ -871,16 +868,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu event_where = ["main.project_id = %(projectId)s", "main.datetime >= toDateTime(%(startDate)s/1000)", "main.datetime <= toDateTime(%(endDate)s/1000)"] - # if favorite_only and not errors_only: - # event_from += f"INNER JOIN {exp_ch_helper.get_user_favorite_sessions_table()} AS fs USING(session_id)" - # event_where.append("fs.user_id = %(userId)s") - # else: - # event_from = "%s" - # event_where = ["main.datetime >= toDateTime(%(startDate)s/1000)", - # "main.datetime <= toDateTime(%(endDate)s/1000)", - # "main.session_id=event_0.session_id"] - # if data.events_order == schemas.SearchEventOrder._then: - # event_where.append(f"event_{event_index - 1}.datetime <= main.datetime") + e_k = f"e_value{i}" s_k = e_k + "_source" @@ -1040,15 +1028,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", event.value, value_key=e_k)) events_conditions[-1]["condition"] = event_where[-1] - # elif EventType == events.EventType.GRAPHQL.ui_type: - # event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main" - # event_where.append(f"main.event_type='GRAPHQL'") - # events_conditions.append({"type": event_where[-1]}) - # if not is_any: - # event_where.append( - # _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k})s", event.value, - # value_key=e_k)) - # events_conditions[-1]["condition"] = event_where[-1] + elif event_type == events.EventType.STATEACTION.ui_type: event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " _column = events.EventType.STATEACTION.column @@ -1217,21 +1197,6 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu events_conditions[-1]["condition"].append(event_where[-1]) events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) - # elif EventType == schemas.PerformanceEventType.fetch_duration: - # event_from = event_from % f"{events.EventType.REQUEST.table} AS main " - # if not is_any: - # event_where.append( - # _multiple_conditions(f"main.url_path {op} %({e_k})s", - # event.value, value_key=e_k)) - # col = performance_event.get_col(EventType) - # colname = col["column"] - # tname = "main" - # e_k += "_custom" - # full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} - # event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " + - # _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", - # event.source, value_key=e_k)) - # TODO: isNot for PerformanceEvent elif event_type in [schemas.PerformanceEventType.LOCATION_DOM_COMPLETE, schemas.PerformanceEventType.LOCATION_LARGEST_CONTENTFUL_PAINT_TIME, schemas.PerformanceEventType.LOCATION_TTFB]: @@ -1278,58 +1243,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu event.source, value_key=e_k)) events_conditions[-1]["condition"].append(event_where[-1]) events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) - # # TODO: no isNot for TimeBetweenEvents - # elif event_type == schemas.PerformanceEventType.time_between_events: - # event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " - # # event_from = event_from % f"{getattr(events.event_type, event.value[0].type).table} AS main INNER JOIN {getattr(events.event_type, event.value[1].type).table} AS main2 USING(session_id) " - # event_where.append(f"main.event_type='{__exp_ch_helper.get_event_type(event.value[0].type, platform=platform)}'") - # events_conditions.append({"type": event_where[-1]}) - # event_where.append(f"main.event_type='{__exp_ch_helper.get_event_type(event.value[0].type, platform=platform)}'") - # events_conditions.append({"type": event_where[-1]}) - # - # if not isinstance(event.value[0].value, list): - # event.value[0].value = [event.value[0].value] - # if not isinstance(event.value[1].value, list): - # event.value[1].value = [event.value[1].value] - # event.value[0].value = helper.values_for_operator(value=event.value[0].value, - # op=event.value[0].operator) - # event.value[1].value = helper.values_for_operator(value=event.value[1].value, - # op=event.value[0].operator) - # e_k1 = e_k + "_e1" - # e_k2 = e_k + "_e2" - # full_args = {**full_args, - # **_multiple_values(event.value[0].value, value_key=e_k1), - # **_multiple_values(event.value[1].value, value_key=e_k2)} - # s_op = get_sql_operator(event.value[0].operator) - # # event_where += ["main2.timestamp >= %(startDate)s", "main2.timestamp <= %(endDate)s"] - # # if event_index > 0 and not or_events: - # # event_where.append("main2.session_id=event_0.session_id") - # is_any = _isAny_opreator(event.value[0].operator) - # if not is_any: - # event_where.append( - # _multiple_conditions( - # f"main.{getattr(events.EventType, event.value[0].type).column} {s_op} %({e_k1})s", - # event.value[0].value, value_key=e_k1)) - # events_conditions[-2]["condition"] = event_where[-1] - # s_op = get_sql_operator(event.value[1].operator) - # is_any = _isAny_opreator(event.value[1].operator) - # if not is_any: - # event_where.append( - # _multiple_conditions( - # f"main.{getattr(events.EventType, event.value[1].type).column} {s_op} %({e_k2})s", - # event.value[1].value, value_key=e_k2)) - # events_conditions[-1]["condition"] = event_where[-1] - # - # e_k += "_custom" - # full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} - # # event_where.append( - # # _multiple_conditions(f"main2.timestamp - main.timestamp {event.sourceOperator} %({e_k})s", - # # event.source, value_key=e_k)) - # # events_conditions[-2]["time"] = f"(?t{event.sourceOperator} %({e_k})s)" - # events_conditions[-2]["time"] = _multiple_conditions(f"?t{event.sourceOperator}%({e_k})s", - # event.source, value_key=e_k) - # event_index += 1 - # TODO: no isNot for RequestDetails + elif event_type == schemas.EventType.REQUEST_DETAILS: event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " event_where.append(f"main.event_type='{exp_ch_helper.get_event_type(event_type, platform=platform)}'") @@ -1579,33 +1493,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu extra_constraints.append("s.datetime >= toDateTime(%(startDate)s/1000)") if data.endTimestamp is not None: extra_constraints.append("s.datetime <= toDateTime(%(endDate)s/1000)") - # if data.platform is not None: - # if data.platform == schemas.PlatformType.mobile: - # extra_constraints.append(b"s.user_os in ('Android','BlackBerry OS','iOS','Tizen','Windows Phone')") - # elif data.platform == schemas.PlatformType.desktop: - # extra_constraints.append( - # b"s.user_os in ('Chrome OS','Fedora','Firefox OS','Linux','Mac OS X','Ubuntu','Windows')") - # if errors_only: - # extra_from += f" INNER JOIN {events.EventType.ERROR.table} AS er USING (session_id) INNER JOIN public.errors AS ser USING (error_id)" - # extra_constraints.append("ser.source = 'js_exception'") - # extra_constraints.append("ser.project_id = %(project_id)s") - # if error_status != schemas.ErrorStatus.all: - # extra_constraints.append("ser.status = %(error_status)s") - # full_args["error_status"] = error_status - # if favorite_only: - # extra_from += " INNER JOIN final.user_favorite_errors AS ufe USING (error_id)" - # extra_constraints.append("ufe.user_id = %(userId)s") - - # if favorite_only and not errors_only and user_id is not None: - # extra_from += f"""INNER JOIN (SELECT session_id - # FROM {exp_ch_helper.get_user_favorite_sessions_table()} - # WHERE user_id=%(userId)s) AS favorite_sessions USING (session_id)""" - # elif not favorite_only and not errors_only and user_id is not None: - # extra_from += f"""LEFT JOIN (SELECT session_id - # FROM {exp_ch_helper.get_user_favorite_sessions_table()} AS user_favorite_sessions - # WHERE user_id = %(userId)s) AS favorite_sessions - # ON (s.session_id=favorite_sessions.session_id)""" extra_join = "" if issue is not None: extra_join = """