* fix(chalice): fixed Math-operators validation
refactor(chalice): search for sessions that have events for heatmaps

* refactor(chalice): search for sessions that have at least 1 location event for heatmaps

* fix(chalice): fixed Math-operators validation
refactor(chalice): search for sessions that have events for heatmaps

* refactor(chalice): search for sessions that have at least 1 location event for heatmaps

* feat(chalice): autocomplete return top 10 with stats

* fix(chalice): fixed autocomplete top 10 meta-filters

* refactor(chalice): cleaned exp search sessions code

* refactor(chalice): simplified funnels
refactor(chalice): support metric-format=user_id for funnels
This commit is contained in:
Kraiem Taha Yassine 2024-11-07 16:03:02 +01:00 committed by GitHub
parent 92c4c5a1e3
commit d11aa741e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 243 additions and 120 deletions

View file

@ -44,9 +44,12 @@ def __get_funnel_chart(project_id: int, data: schemas.CardFunnel, user_id: int =
"totalDropDueToIssues": 0
}
return funnels.get_top_insights_on_the_fly_widget(project_id=project_id,
data=data.series[0].filter,
metric_format=data.metric_format)
# return funnels.get_top_insights_on_the_fly_widget(project_id=project_id,
# data=data.series[0].filter,
# metric_format=data.metric_format)
return funnels.get_simple_funnel(project_id=project_id,
data=data.series[0].filter,
metric_format=data.metric_format)
def __get_errors_list(project_id, user_id, data: schemas.CardSchema):

View file

@ -35,7 +35,6 @@ def __fix_stages(f_events: List[schemas.SessionSearchEventSchema2]):
return events
# def get_top_insights_on_the_fly_widget(project_id, data: schemas.FunnelInsightsPayloadSchema):
def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema,
metric_format: schemas.MetricExtendedFormatType):
data.events = filter_stages(__parse_events(data.events))
@ -58,7 +57,6 @@ def get_top_insights_on_the_fly_widget(project_id, data: schemas.CardSeriesFilte
"totalDropDueToIssues": total_drop_due_to_issues}
# def get_issues_on_the_fly_widget(project_id, data: schemas.FunnelSearchPayloadSchema):
def get_issues_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchema):
data.events = filter_stages(data.events)
data.events = __fix_stages(data.events)
@ -69,3 +67,16 @@ def get_issues_on_the_fly_widget(project_id, data: schemas.CardSeriesFilterSchem
"issues": helper.dict_to_camel_case(
significance.get_issues_list(filter_d=data, project_id=project_id, first_stage=1,
last_stage=len(data.events)))}
def get_simple_funnel(project_id, data: schemas.CardSeriesFilterSchema,
metric_format: schemas.MetricExtendedFormatType):
data.events = filter_stages(__parse_events(data.events))
data.events = __fix_stages(data.events)
if len(data.events) == 0:
return {"stages": [], "totalDropDueToIssues": 0}
insights = significance.get_simple_funnel(filter_d=data,
project_id=project_id,
metric_format=metric_format)
return {"stages": insights, "totalDropDueToIssues": 0}

View file

@ -238,6 +238,227 @@ def get_stages_and_events(filter_d: schemas.CardSeriesFilterSchema, project_id)
return rows
def get_simple_funnel(filter_d: schemas.CardSeriesFilterSchema, project_id: int,
metric_format: schemas.MetricExtendedFormatType) -> List[RealDictRow]:
"""
Add minimal timestamp
:param filter_d: dict contains events&filters&...
:return:
"""
stages: List[schemas.SessionSearchEventSchema2] = filter_d.events
filters: List[schemas.SessionSearchFilterSchema] = filter_d.filters
stage_constraints = ["main.timestamp <= %(endTimestamp)s"]
first_stage_extra_constraints = ["s.project_id=%(project_id)s", "s.start_ts >= %(startTimestamp)s",
"s.start_ts <= %(endTimestamp)s"]
if metric_format == schemas.MetricExtendedFormatType.SESSION_COUNT:
count_value = '1'
else:
count_value = 'user_id'
first_stage_extra_constraints.append("user_id IS NOT NULL")
first_stage_extra_constraints.append("user_id !=''")
filter_extra_from = []
n_stages_query = []
values = {}
if len(filters) > 0:
meta_keys = None
for i, f in enumerate(filters):
if len(f.value) == 0:
continue
f.value = helper.values_for_operator(value=f.value, op=f.operator)
op = sh.get_sql_operator(f.operator)
filter_type = f.type
f_k = f"f_value{i}"
values = {**values,
**sh.multi_values(f.value, value_key=f_k)}
is_not = False
if sh.is_negation_operator(f.operator):
is_not = True
if filter_type == schemas.FilterType.USER_BROWSER:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
elif filter_type in [schemas.FilterType.USER_OS, schemas.FilterType.USER_OS_MOBILE]:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
elif filter_type in [schemas.FilterType.USER_DEVICE, schemas.FilterType.USER_DEVICE_MOBILE]:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
elif filter_type in [schemas.FilterType.USER_COUNTRY, schemas.FilterType.USER_COUNTRY_MOBILE]:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
elif filter_type == schemas.FilterType.DURATION:
if len(f.value) > 0 and f.value[0] is not None:
first_stage_extra_constraints.append(f's.duration >= %(minDuration)s')
values["minDuration"] = f.value[0]
if len(f["value"]) > 1 and f.value[1] is not None and int(f.value[1]) > 0:
first_stage_extra_constraints.append('s.duration <= %(maxDuration)s')
values["maxDuration"] = f.value[1]
elif filter_type == schemas.FilterType.REFERRER:
# events_query_part = events_query_part + f"INNER JOIN events.pages AS p USING(session_id)"
filter_extra_from = [f"INNER JOIN {events.EventType.LOCATION.table} AS p USING(session_id)"]
first_stage_extra_constraints.append(
sh.multi_conditions(f"p.base_referrer {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k))
elif filter_type == events.EventType.METADATA.ui_type:
if meta_keys is None:
meta_keys = metadata.get(project_id=project_id)
meta_keys = {m["key"]: m["index"] for m in meta_keys}
if f.source in meta_keys.keys():
first_stage_extra_constraints.append(
sh.multi_conditions(
f's.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s', f.value,
is_not=is_not, value_key=f_k))
# values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op)
elif filter_type in [schemas.FilterType.USER_ID, schemas.FilterType.USER_ID_MOBILE]:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.user_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
# values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op)
elif filter_type in [schemas.FilterType.USER_ANONYMOUS_ID,
schemas.FilterType.USER_ANONYMOUS_ID_MOBILE]:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.user_anonymous_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
# values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op)
elif filter_type in [schemas.FilterType.REV_ID, schemas.FilterType.REV_ID_MOBILE]:
first_stage_extra_constraints.append(
sh.multi_conditions(f's.rev_id {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k))
# values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op)
i = -1
for s in stages:
if s.operator is None:
s.operator = schemas.SearchEventOperator.IS
if not isinstance(s.value, list):
s.value = [s.value]
is_any = sh.isAny_opreator(s.operator)
if not is_any and isinstance(s.value, list) and len(s.value) == 0:
continue
i += 1
if i == 0:
extra_from = filter_extra_from + ["INNER JOIN public.sessions AS s USING (session_id)"]
else:
extra_from = []
op = sh.get_sql_operator(s.operator)
# event_type = s["type"].upper()
event_type = s.type
if event_type == events.EventType.CLICK.ui_type:
next_table = events.EventType.CLICK.table
next_col_name = events.EventType.CLICK.column
elif event_type == events.EventType.INPUT.ui_type:
next_table = events.EventType.INPUT.table
next_col_name = events.EventType.INPUT.column
elif event_type == events.EventType.LOCATION.ui_type:
next_table = events.EventType.LOCATION.table
next_col_name = events.EventType.LOCATION.column
elif event_type == events.EventType.CUSTOM.ui_type:
next_table = events.EventType.CUSTOM.table
next_col_name = events.EventType.CUSTOM.column
# IOS --------------
elif event_type == events.EventType.CLICK_MOBILE.ui_type:
next_table = events.EventType.CLICK_MOBILE.table
next_col_name = events.EventType.CLICK_MOBILE.column
elif event_type == events.EventType.INPUT_MOBILE.ui_type:
next_table = events.EventType.INPUT_MOBILE.table
next_col_name = events.EventType.INPUT_MOBILE.column
elif event_type == events.EventType.VIEW_MOBILE.ui_type:
next_table = events.EventType.VIEW_MOBILE.table
next_col_name = events.EventType.VIEW_MOBILE.column
elif event_type == events.EventType.CUSTOM_MOBILE.ui_type:
next_table = events.EventType.CUSTOM_MOBILE.table
next_col_name = events.EventType.CUSTOM_MOBILE.column
else:
logger.warning(f"=================UNDEFINED:{event_type}")
continue
values = {**values, **sh.multi_values(helper.values_for_operator(value=s.value, op=s.operator),
value_key=f"value{i + 1}")}
if sh.is_negation_operator(s.operator) and i > 0:
op = sh.reverse_sql_operator(op)
main_condition = "left_not.session_id ISNULL"
extra_from.append(f"""LEFT JOIN LATERAL (SELECT session_id
FROM {next_table} AS s_main
WHERE
{sh.multi_conditions(f"s_main.{next_col_name} {op} %(value{i + 1})s",
values=s.value, value_key=f"value{i + 1}")}
AND s_main.timestamp >= T{i}.stage{i}_timestamp
AND s_main.session_id = T1.session_id) AS left_not ON (TRUE)""")
else:
if is_any:
main_condition = "TRUE"
else:
main_condition = sh.multi_conditions(f"main.{next_col_name} {op} %(value{i + 1})s",
values=s.value, value_key=f"value{i + 1}")
n_stages_query.append(f"""
(SELECT main.session_id,
{"MIN(main.timestamp)" if i + 1 < len(stages) else "MAX(main.timestamp)"} AS stage{i + 1}_timestamp,
{count_value} AS c
FROM {next_table} AS main {" ".join(extra_from)}
WHERE main.timestamp >= {f"T{i}.stage{i}_timestamp" if i > 0 else "%(startTimestamp)s"}
{f"AND main.session_id=T1.session_id" if i > 0 else ""}
AND {main_condition}
{(" AND " + " AND ".join(stage_constraints)) if len(stage_constraints) > 0 else ""}
{(" AND " + " AND ".join(first_stage_extra_constraints)) if len(first_stage_extra_constraints) > 0 and i == 0 else ""}
GROUP BY main.session_id,{count_value})
AS T{i + 1} {"ON (TRUE)" if i > 0 else ""}
""")
count_value = '1'
n_stages = len(n_stages_query)
if n_stages == 0:
return []
n_stages_query = " LEFT JOIN LATERAL ".join(n_stages_query)
n_stages_query = f"""
SELECT {",".join([f"COUNT(T{i + 1}.c) AS stage{i + 1}" for i in range(n_stages)])}
FROM {n_stages_query};
"""
params = {"project_id": project_id, "startTimestamp": filter_d.startTimestamp,
"endTimestamp": filter_d.endTimestamp, **values}
with pg_client.PostgresClient() as cur:
query = cur.mogrify(n_stages_query, params)
logger.debug("---------------------------------------------------")
logger.debug(query)
logger.debug("---------------------------------------------------")
try:
cur.execute(query)
row = cur.fetchone()
except Exception as err:
logger.warning("--------- SIMPLE FUNNEL SEARCH QUERY EXCEPTION -----------")
logger.warning(query.decode('UTF-8'))
logger.warning("--------- PAYLOAD -----------")
logger.warning(filter_d.model_dump_json())
logger.warning("--------------------")
raise err
stages_list = []
for i, stage in enumerate(stages):
count = row[f"stage{i + 1}"]
drop = None
if i != 0:
base_count = row[f"stage{i}"]
if base_count == 0:
drop = 0
elif base_count > 0:
drop = int(100 * (base_count - count) / base_count)
stages_list.append(
{"value": stage.value,
"type": stage.type,
"operator": stage.operator,
"dropPct": drop,
"count": count
}
)
return stages_list
def pearson_corr(x: list, y: list):
n = len(x)
if n != len(y):

View file

@ -222,9 +222,6 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_
sessions[i]["metadata"] = ast.literal_eval(sessions[i]["metadata"])
sessions[i] = schemas.SessionModel.parse_obj(helper.dict_to_camel_case(sessions[i]))
# if not data.group_by_user and data.sort is not None and data.sort != "session_id":
# sessions = sorted(sessions, key=lambda s: s[helper.key_to_snake_case(data.sort)],
# reverse=data.order.upper() == "DESC")
return {
'total': total,
'sessions': sessions
@ -871,16 +868,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
event_where = ["main.project_id = %(projectId)s",
"main.datetime >= toDateTime(%(startDate)s/1000)",
"main.datetime <= toDateTime(%(endDate)s/1000)"]
# if favorite_only and not errors_only:
# event_from += f"INNER JOIN {exp_ch_helper.get_user_favorite_sessions_table()} AS fs USING(session_id)"
# event_where.append("fs.user_id = %(userId)s")
# else:
# event_from = "%s"
# event_where = ["main.datetime >= toDateTime(%(startDate)s/1000)",
# "main.datetime <= toDateTime(%(endDate)s/1000)",
# "main.session_id=event_0.session_id"]
# if data.events_order == schemas.SearchEventOrder._then:
# event_where.append(f"event_{event_index - 1}.datetime <= main.datetime")
e_k = f"e_value{i}"
s_k = e_k + "_source"
@ -1040,15 +1028,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", event.value,
value_key=e_k))
events_conditions[-1]["condition"] = event_where[-1]
# elif EventType == events.EventType.GRAPHQL.ui_type:
# event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main"
# event_where.append(f"main.event_type='GRAPHQL'")
# events_conditions.append({"type": event_where[-1]})
# if not is_any:
# event_where.append(
# _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k})s", event.value,
# value_key=e_k))
# events_conditions[-1]["condition"] = event_where[-1]
elif event_type == events.EventType.STATEACTION.ui_type:
event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main "
_column = events.EventType.STATEACTION.column
@ -1217,21 +1197,6 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
events_conditions[-1]["condition"].append(event_where[-1])
events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"])
# elif EventType == schemas.PerformanceEventType.fetch_duration:
# event_from = event_from % f"{events.EventType.REQUEST.table} AS main "
# if not is_any:
# event_where.append(
# _multiple_conditions(f"main.url_path {op} %({e_k})s",
# event.value, value_key=e_k))
# col = performance_event.get_col(EventType)
# colname = col["column"]
# tname = "main"
# e_k += "_custom"
# full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)}
# event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " +
# _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s",
# event.source, value_key=e_k))
# TODO: isNot for PerformanceEvent
elif event_type in [schemas.PerformanceEventType.LOCATION_DOM_COMPLETE,
schemas.PerformanceEventType.LOCATION_LARGEST_CONTENTFUL_PAINT_TIME,
schemas.PerformanceEventType.LOCATION_TTFB]:
@ -1278,58 +1243,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
event.source, value_key=e_k))
events_conditions[-1]["condition"].append(event_where[-1])
events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"])
# # TODO: no isNot for TimeBetweenEvents
# elif event_type == schemas.PerformanceEventType.time_between_events:
# event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main "
# # event_from = event_from % f"{getattr(events.event_type, event.value[0].type).table} AS main INNER JOIN {getattr(events.event_type, event.value[1].type).table} AS main2 USING(session_id) "
# event_where.append(f"main.event_type='{__exp_ch_helper.get_event_type(event.value[0].type, platform=platform)}'")
# events_conditions.append({"type": event_where[-1]})
# event_where.append(f"main.event_type='{__exp_ch_helper.get_event_type(event.value[0].type, platform=platform)}'")
# events_conditions.append({"type": event_where[-1]})
#
# if not isinstance(event.value[0].value, list):
# event.value[0].value = [event.value[0].value]
# if not isinstance(event.value[1].value, list):
# event.value[1].value = [event.value[1].value]
# event.value[0].value = helper.values_for_operator(value=event.value[0].value,
# op=event.value[0].operator)
# event.value[1].value = helper.values_for_operator(value=event.value[1].value,
# op=event.value[0].operator)
# e_k1 = e_k + "_e1"
# e_k2 = e_k + "_e2"
# full_args = {**full_args,
# **_multiple_values(event.value[0].value, value_key=e_k1),
# **_multiple_values(event.value[1].value, value_key=e_k2)}
# s_op = get_sql_operator(event.value[0].operator)
# # event_where += ["main2.timestamp >= %(startDate)s", "main2.timestamp <= %(endDate)s"]
# # if event_index > 0 and not or_events:
# # event_where.append("main2.session_id=event_0.session_id")
# is_any = _isAny_opreator(event.value[0].operator)
# if not is_any:
# event_where.append(
# _multiple_conditions(
# f"main.{getattr(events.EventType, event.value[0].type).column} {s_op} %({e_k1})s",
# event.value[0].value, value_key=e_k1))
# events_conditions[-2]["condition"] = event_where[-1]
# s_op = get_sql_operator(event.value[1].operator)
# is_any = _isAny_opreator(event.value[1].operator)
# if not is_any:
# event_where.append(
# _multiple_conditions(
# f"main.{getattr(events.EventType, event.value[1].type).column} {s_op} %({e_k2})s",
# event.value[1].value, value_key=e_k2))
# events_conditions[-1]["condition"] = event_where[-1]
#
# e_k += "_custom"
# full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)}
# # event_where.append(
# # _multiple_conditions(f"main2.timestamp - main.timestamp {event.sourceOperator} %({e_k})s",
# # event.source, value_key=e_k))
# # events_conditions[-2]["time"] = f"(?t{event.sourceOperator} %({e_k})s)"
# events_conditions[-2]["time"] = _multiple_conditions(f"?t{event.sourceOperator}%({e_k})s",
# event.source, value_key=e_k)
# event_index += 1
# TODO: no isNot for RequestDetails
elif event_type == schemas.EventType.REQUEST_DETAILS:
event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main "
event_where.append(f"main.event_type='{exp_ch_helper.get_event_type(event_type, platform=platform)}'")
@ -1579,33 +1493,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
extra_constraints.append("s.datetime >= toDateTime(%(startDate)s/1000)")
if data.endTimestamp is not None:
extra_constraints.append("s.datetime <= toDateTime(%(endDate)s/1000)")
# if data.platform is not None:
# if data.platform == schemas.PlatformType.mobile:
# extra_constraints.append(b"s.user_os in ('Android','BlackBerry OS','iOS','Tizen','Windows Phone')")
# elif data.platform == schemas.PlatformType.desktop:
# extra_constraints.append(
# b"s.user_os in ('Chrome OS','Fedora','Firefox OS','Linux','Mac OS X','Ubuntu','Windows')")
# if errors_only:
# extra_from += f" INNER JOIN {events.EventType.ERROR.table} AS er USING (session_id) INNER JOIN public.errors AS ser USING (error_id)"
# extra_constraints.append("ser.source = 'js_exception'")
# extra_constraints.append("ser.project_id = %(project_id)s")
# if error_status != schemas.ErrorStatus.all:
# extra_constraints.append("ser.status = %(error_status)s")
# full_args["error_status"] = error_status
# if favorite_only:
# extra_from += " INNER JOIN final.user_favorite_errors AS ufe USING (error_id)"
# extra_constraints.append("ufe.user_id = %(userId)s")
# if favorite_only and not errors_only and user_id is not None:
# extra_from += f"""INNER JOIN (SELECT session_id
# FROM {exp_ch_helper.get_user_favorite_sessions_table()}
# WHERE user_id=%(userId)s) AS favorite_sessions USING (session_id)"""
# elif not favorite_only and not errors_only and user_id is not None:
# extra_from += f"""LEFT JOIN (SELECT session_id
# FROM {exp_ch_helper.get_user_favorite_sessions_table()} AS user_favorite_sessions
# WHERE user_id = %(userId)s) AS favorite_sessions
# ON (s.session_id=favorite_sessions.session_id)"""
extra_join = ""
if issue is not None:
extra_join = """