* feat(chalice): autocomplete return top 10 with stats

* fix(chalice): fixed autocomplete top 10 meta-filters

* feat(chalice): new user journey 1/2

* feat(chalice): new user journey hideExcess support

* feat(chalice): new user journey create drop to drop nodes&links
This commit is contained in:
Kraiem Taha Yassine 2025-01-21 15:16:14 +01:00 committed by GitHub
parent 57e604794c
commit c26c235f2f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 466 additions and 312 deletions

View file

@ -209,14 +209,15 @@ def get_issues(project: schemas.ProjectContext, user_id: int, data: schemas.Card
def __get_global_card_info(data: schemas.CardSchema):
r = {"hideExcess": data.hide_excess, "compareTo": data.compare_to}
r = {"hideExcess": data.hide_excess, "compareTo": data.compare_to, "rows": data.rows}
return r
def __get_path_analysis_card_info(data: schemas.CardPathAnalysis):
r = {"start_point": [s.model_dump() for s in data.start_point],
"start_type": data.start_type,
"excludes": [e.model_dump() for e in data.excludes]}
"excludes": [e.model_dump() for e in data.excludes],
"rows": data.rows}
return r

View file

@ -25,15 +25,17 @@ def __transform_journey(rows, reverse_path=False):
nodes = []
nodes_values = []
links = []
drops = []
max_depth = 0
for r in rows:
source = f"{r['event_number_in_session']}_{r['event_type']}_{r['e_value']}"
source = f"{r['event_number_in_session'] - 1}_{r['event_type']}_{r['e_value']}"
if source not in nodes:
nodes.append(source)
nodes_values.append({"depth": r['event_number_in_session'] - 1,
"name": r['e_value'],
"eventType": r['event_type']})
# if r['next_value']:
target = f"{r['event_number_in_session'] + 1}_{r['next_type']}_{r['next_value']}"
target = f"{r['event_number_in_session']}_{r['next_type']}_{r['next_value']}"
if target not in nodes:
nodes.append(target)
nodes_values.append({"depth": r['event_number_in_session'],
@ -52,6 +54,40 @@ def __transform_journey(rows, reverse_path=False):
link["target"] = sr_idx
links.append(link)
max_depth = r['event_number_in_session']
if r["next_type"] == "DROP":
for d in drops:
if d["depth"] == r['event_number_in_session']:
d["sessions_count"] += r["sessions_count"]
break
else:
drops.append({"depth": r['event_number_in_session'], "sessions_count": r["sessions_count"]})
for i in range(len(drops)):
if drops[i]["depth"] < max_depth:
source = f"{drops[i]['depth']}_DROP_None"
target = f"{drops[i]['depth'] + 1}_DROP_None"
sr_idx = nodes.index(source)
if i < len(drops) - 1 and drops[i]["depth"] + 1 == drops[i + 1]["depth"]:
tg_idx = nodes.index(target)
else:
nodes.append(target)
nodes_values.append({"depth": drops[i]["depth"] + 1,
"name": None,
"eventType": "DROP"})
tg_idx = len(nodes) - 1
link = {"eventType": "DROP", "sessionsCount": drops[i]["sessions_count"], "value": None}
if not reverse_path:
link["source"] = sr_idx
link["target"] = tg_idx
else:
link["source"] = tg_idx
link["target"] = sr_idx
links.append(link)
return {"nodes": nodes_values,
"links": sorted(links, key=lambda x: (x["source"], x["target"]), reverse=False)}

View file

@ -23,246 +23,237 @@ JOURNEY_TYPES = {
}
# Q6: use events as a sub_query to support filter of materialized columns when doing a join
# query: Q5, the result is correct,
# startPoints are computed before ranked_events to reduce the number of window functions over rows
# replaced time_to_target by time_from_previous
# compute avg_time_from_previous at the same level as sessions_count (this was removed in v1.22)
# sort by top 5 according to sessions_count at the CTE level
# final part project data without grouping
# if start-point is selected, the selected event is ranked n°1
def path_analysis(project_id: int, data: schemas.CardPathAnalysis):
# This code is used for testing only
def __get_test_data():
with ch_client.ClickHouseClient(database="experimental") as ch:
ch_query1 = """
CREATE TEMPORARY TABLE pre_ranked_events_1736344377403 AS
(WITH initial_event AS (SELECT events.session_id, MIN(datetime) AS start_event_timestamp
FROM experimental.events AS events
WHERE ((event_type = 'LOCATION' AND (url_path = '/en/deployment/')))
AND events.project_id = toUInt16(65)
AND events.datetime >= toDateTime(1735599600000 / 1000)
AND events.datetime < toDateTime(1736290799999 / 1000)
GROUP BY 1),
pre_ranked_events AS (SELECT *
FROM (SELECT session_id,
event_type,
datetime,
url_path AS e_value,
row_number() OVER (PARTITION BY session_id
ORDER BY datetime ,
message_id ) AS event_number_in_session
FROM experimental.events AS events
INNER JOIN initial_event ON (events.session_id = initial_event.session_id)
WHERE events.project_id = toUInt16(65)
AND events.datetime >= toDateTime(1735599600000 / 1000)
AND events.datetime < toDateTime(1736290799999 / 1000)
AND (events.event_type = 'LOCATION')
AND events.datetime >= initial_event.start_event_timestamp
) AS full_ranked_events
WHERE event_number_in_session <= 5)
SELECT *
FROM pre_ranked_events);
"""
CREATE TEMPORARY TABLE pre_ranked_events_1736344377403 AS
(WITH initial_event AS (SELECT events.session_id, MIN(datetime) AS start_event_timestamp
FROM experimental.events AS events
WHERE ((event_type = 'LOCATION' AND (url_path = '/en/deployment/')))
AND events.project_id = toUInt16(65)
AND events.datetime >= toDateTime(1735599600000 / 1000)
AND events.datetime < toDateTime(1736290799999 / 1000)
GROUP BY 1),
pre_ranked_events AS (SELECT *
FROM (SELECT session_id,
event_type,
datetime,
url_path AS e_value,
row_number() OVER (PARTITION BY session_id
ORDER BY datetime ,
message_id ) AS event_number_in_session
FROM experimental.events AS events
INNER JOIN initial_event ON (events.session_id = initial_event.session_id)
WHERE events.project_id = toUInt16(65)
AND events.datetime >= toDateTime(1735599600000 / 1000)
AND events.datetime < toDateTime(1736290799999 / 1000)
AND (events.event_type = 'LOCATION')
AND events.datetime >= initial_event.start_event_timestamp
) AS full_ranked_events
WHERE event_number_in_session <= 5)
SELECT *
FROM pre_ranked_events);
"""
ch.execute(query=ch_query1, parameters={})
ch_query1 = """
CREATE TEMPORARY TABLE ranked_events_1736344377403 AS
(WITH pre_ranked_events AS (SELECT *
FROM pre_ranked_events_1736344377403),
start_points AS (SELECT DISTINCT session_id
FROM pre_ranked_events
WHERE ((event_type = 'LOCATION' AND (e_value = '/en/deployment/')))
AND pre_ranked_events.event_number_in_session = 1),
ranked_events AS (SELECT pre_ranked_events.*,
leadInFrame(e_value)
OVER (PARTITION BY session_id ORDER BY datetime
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_value,
leadInFrame(toNullable(event_type))
OVER (PARTITION BY session_id ORDER BY datetime
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_type
FROM start_points
INNER JOIN pre_ranked_events USING (session_id))
SELECT *
FROM ranked_events);
"""
CREATE TEMPORARY TABLE ranked_events_1736344377403 AS
(WITH pre_ranked_events AS (SELECT *
FROM pre_ranked_events_1736344377403),
start_points AS (SELECT DISTINCT session_id
FROM pre_ranked_events
WHERE ((event_type = 'LOCATION' AND (e_value = '/en/deployment/')))
AND pre_ranked_events.event_number_in_session = 1),
ranked_events AS (SELECT pre_ranked_events.*,
leadInFrame(e_value)
OVER (PARTITION BY session_id ORDER BY datetime
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_value,
leadInFrame(toNullable(event_type))
OVER (PARTITION BY session_id ORDER BY datetime
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_type
FROM start_points
INNER JOIN pre_ranked_events USING (session_id))
SELECT *
FROM ranked_events);
"""
ch.execute(query=ch_query1, parameters={})
ch_query1 = """
WITH ranked_events AS (SELECT *
FROM ranked_events_1736344377403),
n1 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 1
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC),
n2 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 2
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC),
n3 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 3
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC),
WITH ranked_events AS (SELECT *
FROM ranked_events_1736344377403),
n1 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 1
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC),
n2 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 2
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC),
n3 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 3
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC),
drop_n AS (-- STEP 1
SELECT event_number_in_session,
event_type,
e_value,
'DROP' AS next_type,
NULL AS next_value,
sessions_count
FROM n1
WHERE isNull(n1.next_type)
UNION ALL
-- STEP 2
SELECT event_number_in_session,
event_type,
e_value,
'DROP' AS next_type,
NULL AS next_value,
sessions_count
FROM n2
WHERE isNull(n2.next_type)),
top_n AS (SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n1
GROUP BY event_number_in_session, event_type, e_value
LIMIT 1
UNION ALL
-- STEP 2
SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n2
GROUP BY event_number_in_session, event_type, e_value
ORDER BY sessions_count DESC
LIMIT 3
UNION ALL
-- STEP 3
SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n3
GROUP BY event_number_in_session, event_type, e_value
ORDER BY sessions_count DESC
LIMIT 3),
top_n_with_next AS (SELECT n1.*
FROM n1
UNION ALL
SELECT n2.*
FROM n2
INNER JOIN top_n ON (n2.event_number_in_session = top_n.event_number_in_session
AND n2.event_type = top_n.event_type
AND n2.e_value = top_n.e_value)),
others_n AS (
-- STEP 2
SELECT n2.*
FROM n2
WHERE (n2.event_number_in_session, n2.event_type, n2.e_value) NOT IN
(SELECT event_number_in_session, event_type, e_value
FROM top_n
WHERE top_n.event_number_in_session = 2)
UNION ALL
-- STEP 3
SELECT n3.*
FROM n3
WHERE (n3.event_number_in_session, n3.event_type, n3.e_value) NOT IN
(SELECT event_number_in_session, event_type, e_value
FROM top_n
WHERE top_n.event_number_in_session = 3))
SELECT *
FROM (
-- Top to Top: valid
SELECT top_n_with_next.*
FROM top_n_with_next
INNER JOIN top_n
ON (top_n_with_next.event_number_in_session + 1 = top_n.event_number_in_session
AND top_n_with_next.next_type = top_n.event_type
AND top_n_with_next.next_value = top_n.e_value)
UNION ALL
-- Top to Others: valid
SELECT top_n_with_next.event_number_in_session,
top_n_with_next.event_type,
top_n_with_next.e_value,
'OTHER' AS next_type,
NULL AS next_value,
SUM(top_n_with_next.sessions_count) AS sessions_count
FROM top_n_with_next
WHERE (top_n_with_next.event_number_in_session + 1, top_n_with_next.next_type, top_n_with_next.next_value) IN
(SELECT others_n.event_number_in_session, others_n.event_type, others_n.e_value FROM others_n)
GROUP BY top_n_with_next.event_number_in_session, top_n_with_next.event_type, top_n_with_next.e_value
UNION ALL
-- Top go to Drop: valid
SELECT drop_n.event_number_in_session,
drop_n.event_type,
drop_n.e_value,
drop_n.next_type,
drop_n.next_value,
drop_n.sessions_count
FROM drop_n
INNER JOIN top_n ON (drop_n.event_number_in_session = top_n.event_number_in_session
AND drop_n.event_type = top_n.event_type
AND drop_n.e_value = top_n.e_value)
ORDER BY drop_n.event_number_in_session
UNION ALL
-- Others got to Drop: valid
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
'DROP' AS next_type,
NULL AS next_value,
SUM(others_n.sessions_count) AS sessions_count
FROM others_n
WHERE isNull(others_n.next_type)
AND others_n.event_number_in_session < 3
GROUP BY others_n.event_number_in_session, next_type, next_value
UNION ALL
-- Others got to Top:valid
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
others_n.next_type,
others_n.next_value,
SUM(others_n.sessions_count) AS sessions_count
FROM others_n
WHERE isNotNull(others_n.next_type)
AND (others_n.event_number_in_session + 1, others_n.next_type, others_n.next_value) IN
(SELECT top_n.event_number_in_session, top_n.event_type, top_n.e_value FROM top_n)
GROUP BY others_n.event_number_in_session, others_n.next_type, others_n.next_value
UNION ALL
-- Others got to Others
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
'OTHER' AS next_type,
NULL AS next_value,
SUM(sessions_count) AS sessions_count
FROM others_n
WHERE isNotNull(others_n.next_type)
AND others_n.event_number_in_session < 3
AND (others_n.event_number_in_session + 1, others_n.next_type, others_n.next_value) NOT IN
(SELECT event_number_in_session, event_type, e_value FROM top_n)
GROUP BY others_n.event_number_in_session)
ORDER BY event_number_in_session, sessions_count DESC;"""
drop_n AS (-- STEP 1
SELECT event_number_in_session,
event_type,
e_value,
'DROP' AS next_type,
NULL AS next_value,
sessions_count
FROM n1
WHERE isNull(n1.next_type)
UNION ALL
-- STEP 2
SELECT event_number_in_session,
event_type,
e_value,
'DROP' AS next_type,
NULL AS next_value,
sessions_count
FROM n2
WHERE isNull(n2.next_type)),
top_n AS (SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n1
GROUP BY event_number_in_session, event_type, e_value
LIMIT 1
UNION ALL
-- STEP 2
SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n2
GROUP BY event_number_in_session, event_type, e_value
ORDER BY sessions_count DESC
LIMIT 3
UNION ALL
-- STEP 3
SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n3
GROUP BY event_number_in_session, event_type, e_value
ORDER BY sessions_count DESC
LIMIT 3),
top_n_with_next AS (SELECT n1.*
FROM n1
UNION ALL
SELECT n2.*
FROM n2
INNER JOIN top_n ON (n2.event_number_in_session = top_n.event_number_in_session
AND n2.event_type = top_n.event_type
AND n2.e_value = top_n.e_value)),
others_n AS (
-- STEP 2
SELECT n2.*
FROM n2
WHERE (n2.event_number_in_session, n2.event_type, n2.e_value) NOT IN
(SELECT event_number_in_session, event_type, e_value
FROM top_n
WHERE top_n.event_number_in_session = 2)
UNION ALL
-- STEP 3
SELECT n3.*
FROM n3
WHERE (n3.event_number_in_session, n3.event_type, n3.e_value) NOT IN
(SELECT event_number_in_session, event_type, e_value
FROM top_n
WHERE top_n.event_number_in_session = 3))
SELECT *
FROM (
-- Top to Top: valid
SELECT top_n_with_next.*
FROM top_n_with_next
INNER JOIN top_n
ON (top_n_with_next.event_number_in_session + 1 = top_n.event_number_in_session
AND top_n_with_next.next_type = top_n.event_type
AND top_n_with_next.next_value = top_n.e_value)
UNION ALL
-- Top to Others: valid
SELECT top_n_with_next.event_number_in_session,
top_n_with_next.event_type,
top_n_with_next.e_value,
'OTHER' AS next_type,
NULL AS next_value,
SUM(top_n_with_next.sessions_count) AS sessions_count
FROM top_n_with_next
WHERE (top_n_with_next.event_number_in_session + 1, top_n_with_next.next_type, top_n_with_next.next_value) IN
(SELECT others_n.event_number_in_session, others_n.event_type, others_n.e_value FROM others_n)
GROUP BY top_n_with_next.event_number_in_session, top_n_with_next.event_type, top_n_with_next.e_value
UNION ALL
-- Top go to Drop: valid
SELECT drop_n.event_number_in_session,
drop_n.event_type,
drop_n.e_value,
drop_n.next_type,
drop_n.next_value,
drop_n.sessions_count
FROM drop_n
INNER JOIN top_n ON (drop_n.event_number_in_session = top_n.event_number_in_session
AND drop_n.event_type = top_n.event_type
AND drop_n.e_value = top_n.e_value)
ORDER BY drop_n.event_number_in_session
UNION ALL
-- Others got to Drop: valid
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
'DROP' AS next_type,
NULL AS next_value,
SUM(others_n.sessions_count) AS sessions_count
FROM others_n
WHERE isNull(others_n.next_type)
AND others_n.event_number_in_session < 3
GROUP BY others_n.event_number_in_session, next_type, next_value
UNION ALL
-- Others got to Top:valid
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
others_n.next_type,
others_n.next_value,
SUM(others_n.sessions_count) AS sessions_count
FROM others_n
WHERE isNotNull(others_n.next_type)
AND (others_n.event_number_in_session + 1, others_n.next_type, others_n.next_value) IN
(SELECT top_n.event_number_in_session, top_n.event_type, top_n.e_value FROM top_n)
GROUP BY others_n.event_number_in_session, others_n.next_type, others_n.next_value
UNION ALL
-- Others got to Others
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
'OTHER' AS next_type,
NULL AS next_value,
SUM(sessions_count) AS sessions_count
FROM others_n
WHERE isNotNull(others_n.next_type)
AND others_n.event_number_in_session < 3
AND (others_n.event_number_in_session + 1, others_n.next_type, others_n.next_value) NOT IN
(SELECT event_number_in_session, event_type, e_value FROM top_n)
GROUP BY others_n.event_number_in_session)
ORDER BY event_number_in_session, sessions_count DESC;"""
rows = ch.execute(query=ch_query1, parameters={})
drop = 0
for r in rows:
@ -272,6 +263,14 @@ ORDER BY event_number_in_session, sessions_count DESC;"""
return __transform_journey(rows=rows, reverse_path=False)
# startPoints are computed before ranked_events to reduce the number of window functions over rows
# compute avg_time_from_previous at the same level as sessions_count (this was removed in v1.22)
# if start-point is selected, the selected event is ranked n°1
def path_analysis(project_id: int, data: schemas.CardPathAnalysis):
# # This code is used for testing only
# return __get_test_data()
# ------ end of testing code ---
sub_events = []
start_points_conditions = []
@ -567,55 +566,77 @@ ORDER BY event_number_in_session, sessions_count DESC;"""
main_events_table += " INNER JOIN initial_event ON (events.session_id = initial_event.session_id)"
sessions_conditions = []
steps_query = ["""n1 AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = 1
AND isNotNull(next_value)
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC
LIMIT %(eventThresholdNumberInGroup)s)"""]
projection_query = ["""SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
sessions_count
FROM n1"""]
for i in range(2, data.density + 1):
steps_query.append(f"""n{i} AS (SELECT *
FROM (SELECT re.event_number_in_session AS event_number_in_session,
re.event_type AS event_type,
re.e_value AS e_value,
re.next_type AS next_type,
re.next_value AS next_value,
COUNT(1) AS sessions_count
FROM n{i - 1} INNER JOIN ranked_events AS re
ON (n{i - 1}.next_value = re.e_value AND n{i - 1}.next_type = re.event_type)
WHERE re.event_number_in_session = {i}
GROUP BY re.event_number_in_session, re.event_type, re.e_value, re.next_type, re.next_value) AS sub_level
ORDER BY sessions_count DESC
LIMIT %(eventThresholdNumberInGroup)s)""")
projection_query.append(f"""SELECT event_number_in_session,
steps_query = []
# This is used if data.hideExcess is True
projection_query = []
drop_query = []
top_query = []
top_with_next_query = []
other_query = []
for i in range(1, data.density + (0 if data.hide_excess else 1)):
steps_query.append(f"""n{i} AS (SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
COUNT(1) AS sessions_count
FROM ranked_events
WHERE event_number_in_session = {i}
GROUP BY event_number_in_session, event_type, e_value, next_type, next_value
ORDER BY sessions_count DESC)""")
if data.hide_excess:
projection_query.append(f"""\
SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
sessions_count
FROM n{i}""")
FROM n{i}
WHERE isNotNull(next_type)""")
else:
top_query.append(f"""\
SELECT event_number_in_session,
event_type,
e_value,
SUM(sessions_count) AS sessions_count
FROM n{i}
GROUP BY event_number_in_session, event_type, e_value
ORDER BY sessions_count DESC
LIMIT %(visibleRows)s""")
if i < data.density:
drop_query.append(f"""SELECT event_number_in_session,
event_type,
e_value,
'DROP' AS next_type,
NULL AS next_value,
sessions_count
FROM n{i}
WHERE isNull(n{i}.next_type)""")
if not data.hide_excess:
top_with_next_query.append(f"""\
SELECT n{i}.*
FROM n{i}
INNER JOIN top_n
ON (n{i}.event_number_in_session = top_n.event_number_in_session
AND n{i}.event_type = top_n.event_type
AND n{i}.e_value = top_n.e_value)""")
if i > 1 and not data.hide_excess:
other_query.append(f"""SELECT n{i}.*
FROM n{i}
WHERE (event_number_in_session, event_type, e_value) NOT IN
(SELECT event_number_in_session, event_type, e_value
FROM top_n
WHERE top_n.event_number_in_session = {i})""")
with ch_client.ClickHouseClient(database="experimental") as ch:
time_key = TimeUTC.now()
_now = time()
params = {"project_id": project_id, "startTimestamp": data.startTimestamp,
"endTimestamp": data.endTimestamp, "density": data.density,
# This is ignored because UI will take care of it
# "eventThresholdNumberInGroup": 4 if data.hide_excess else 8,
"eventThresholdNumberInGroup": 8,
"visibleRows": data.rows,
**extra_values}
ch_query1 = f"""\
@ -640,7 +661,7 @@ FROM pre_ranked_events;"""
ch.execute(query=ch_query1, parameters=params)
if time() - _now > 2:
logger.warning(f">>>>>>>>>PathAnalysis long query EE ({int(time() - _now)}s)<<<<<<<<<")
logger.warning(ch.format(ch_query1, params))
logger.warning(ch.format(query=ch_query1, parameters=params))
logger.warning("----------------------")
_now = time()
@ -663,22 +684,115 @@ FROM ranked_events;"""
ch.execute(query=ch_query2, parameters=params)
if time() - _now > 2:
logger.warning(f">>>>>>>>>PathAnalysis long query EE ({int(time() - _now)}s)<<<<<<<<<")
logger.warning(ch.format(ch_query2, params))
logger.warning(ch.format(query=ch_query2, parameters=params))
logger.warning("----------------------")
_now = time()
sub_cte = ""
if not data.hide_excess:
sub_cte = f""",
top_n AS ({"\nUNION ALL\n".join(top_query)}),
top_n_with_next AS ({"\nUNION ALL\n".join(top_with_next_query)}),
others_n AS ({"\nUNION ALL\n".join(other_query)})"""
projection_query = """\
-- Top to Top: valid
SELECT top_n_with_next.*
FROM top_n_with_next
INNER JOIN top_n
ON (top_n_with_next.event_number_in_session + 1 = top_n.event_number_in_session
AND top_n_with_next.next_type = top_n.event_type
AND top_n_with_next.next_value = top_n.e_value)
UNION ALL
-- Top to Others: valid
SELECT top_n_with_next.event_number_in_session,
top_n_with_next.event_type,
top_n_with_next.e_value,
'OTHER' AS next_type,
NULL AS next_value,
SUM(top_n_with_next.sessions_count) AS sessions_count
FROM top_n_with_next
WHERE (top_n_with_next.event_number_in_session + 1, top_n_with_next.next_type, top_n_with_next.next_value) IN
(SELECT others_n.event_number_in_session, others_n.event_type, others_n.e_value FROM others_n)
GROUP BY top_n_with_next.event_number_in_session, top_n_with_next.event_type, top_n_with_next.e_value
UNION ALL
-- Top go to Drop: valid
SELECT drop_n.event_number_in_session,
drop_n.event_type,
drop_n.e_value,
drop_n.next_type,
drop_n.next_value,
drop_n.sessions_count
FROM drop_n
INNER JOIN top_n ON (drop_n.event_number_in_session = top_n.event_number_in_session
AND drop_n.event_type = top_n.event_type
AND drop_n.e_value = top_n.e_value)
ORDER BY drop_n.event_number_in_session
UNION ALL
-- Others got to Drop: valid
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
'DROP' AS next_type,
NULL AS next_value,
SUM(others_n.sessions_count) AS sessions_count
FROM others_n
WHERE isNull(others_n.next_type)
AND others_n.event_number_in_session < 3
GROUP BY others_n.event_number_in_session, next_type, next_value
UNION ALL
-- Others got to Top:valid
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
others_n.next_type,
others_n.next_value,
SUM(others_n.sessions_count) AS sessions_count
FROM others_n
WHERE isNotNull(others_n.next_type)
AND (others_n.event_number_in_session + 1, others_n.next_type, others_n.next_value) IN
(SELECT top_n.event_number_in_session, top_n.event_type, top_n.e_value FROM top_n)
GROUP BY others_n.event_number_in_session, others_n.next_type, others_n.next_value
UNION ALL
-- Others got to Others
SELECT others_n.event_number_in_session,
'OTHER' AS event_type,
NULL AS e_value,
'OTHER' AS next_type,
NULL AS next_value,
SUM(sessions_count) AS sessions_count
FROM others_n
WHERE isNotNull(others_n.next_type)
AND others_n.event_number_in_session < %(density)s
AND (others_n.event_number_in_session + 1, others_n.next_type, others_n.next_value) NOT IN
(SELECT event_number_in_session, event_type, e_value FROM top_n)
GROUP BY others_n.event_number_in_session"""
else:
projection_query.append("""\
SELECT event_number_in_session,
event_type,
e_value,
next_type,
next_value,
sessions_count
FROM drop_n""")
projection_query = "\nUNION ALL\n".join(projection_query)
ch_query3 = f"""\
WITH ranked_events AS (SELECT *
FROM ranked_events_{time_key}),
{",".join(steps_query)}
SELECT *
FROM ({" UNION ALL ".join(projection_query)}) AS chart_steps
ORDER BY event_number_in_session;"""
WITH ranked_events AS (SELECT *
FROM ranked_events_{time_key}),
{",\n".join(steps_query)},
drop_n AS ({"\nUNION ALL\n".join(drop_query)})
{sub_cte}
SELECT *
FROM (
{projection_query}
) AS chart_steps
ORDER BY event_number_in_session, sessions_count DESC;"""
logger.debug("---------Q3-----------")
rows = ch.execute(query=ch_query3, parameters=params)
if time() - _now > 2:
logger.warning(f">>>>>>>>>PathAnalysis long query EE ({int(time() - _now)}s)<<<<<<<<<")
logger.warning(ch.format(ch_query3, params))
logger.warning(ch.format(query=ch_query3, parameters=params))
logger.warning("----------------------")
return __transform_journey(rows=rows, reverse_path=reverse)

View file

@ -133,7 +133,7 @@ class _TimedSchema(BaseModel):
class NotificationsViewSchema(_TimedSchema):
ids: List[int] = Field(default=[])
ids: List[int] = Field(default_factory=list)
startTimestamp: Optional[int] = Field(default=None)
endTimestamp: Optional[int] = Field(default=None)
@ -545,7 +545,7 @@ class SessionSearchEventSchema2(BaseModel):
operator: Union[SearchEventOperator, ClickEventExtraOperator] = Field(...)
source: Optional[List[Union[ErrorSource, int, str]]] = Field(default=None)
sourceOperator: Optional[MathOperator] = Field(default=None)
filters: Optional[List[RequestGraphqlFilterSchema]] = Field(default=[])
filters: Optional[List[RequestGraphqlFilterSchema]] = Field(default_factory=list)
_remove_duplicate_values = field_validator('value', mode='before')(remove_duplicate_values)
_single_to_list_values = field_validator('value', mode='before')(single_to_list)
@ -579,7 +579,7 @@ class SessionSearchEventSchema2(BaseModel):
class SessionSearchFilterSchema(BaseModel):
is_event: Literal[False] = False
value: List[Union[IssueType, PlatformType, int, str]] = Field(default=[])
value: List[Union[IssueType, PlatformType, int, str]] = Field(default_factory=list)
type: FilterType = Field(...)
operator: Union[SearchEventOperator, MathOperator] = Field(...)
source: Optional[Union[ErrorSource, str]] = Field(default=None)
@ -658,8 +658,8 @@ Field(discriminator='is_event'), BeforeValidator(add_missing_is_event)]
class SessionsSearchPayloadSchema(_TimedSchema, _PaginatedSchema):
events: List[SessionSearchEventSchema2] = Field(default=[], doc_hidden=True)
filters: List[GroupedFilterType] = Field(default=[])
events: List[SessionSearchEventSchema2] = Field(default_factory=list, doc_hidden=True)
filters: List[GroupedFilterType] = Field(default_factory=list)
sort: str = Field(default="startTs")
order: SortOrderType = Field(default=SortOrderType.DESC)
events_order: Optional[SearchEventOrder] = Field(default=SearchEventOrder.THEN)
@ -816,7 +816,7 @@ Field(discriminator='is_event')]
class PathAnalysisSchema(_TimedSchema, _PaginatedSchema):
density: int = Field(default=7)
filters: List[ProductAnalyticsFilter] = Field(default=[])
filters: List[ProductAnalyticsFilter] = Field(default_factory=list)
type: Optional[str] = Field(default=None)
_transform_filters = field_validator('filters', mode='before') \
@ -928,10 +928,10 @@ class CardSessionsSchema(_TimedSchema, _PaginatedSchema):
startTimestamp: int = Field(default=TimeUTC.now(-7))
endTimestamp: int = Field(default=TimeUTC.now())
density: int = Field(default=7, ge=1, le=200)
series: List[CardSeriesSchema] = Field(default=[])
series: List[CardSeriesSchema] = Field(default_factory=list)
# events: List[SessionSearchEventSchema2] = Field(default=[], doc_hidden=True)
filters: List[GroupedFilterType] = Field(default=[])
# events: List[SessionSearchEventSchema2] = Field(default_factory=list, doc_hidden=True)
filters: List[GroupedFilterType] = Field(default_factory=list)
compare_to: Optional[List[str]] = Field(default=None)
@ -1037,9 +1037,11 @@ class __CardSchema(CardSessionsSchema):
view_type: Any
metric_type: MetricType = Field(...)
metric_of: Any
metric_value: List[IssueType] = Field(default=[])
metric_value: List[IssueType] = Field(default_factory=list)
# This is used to save the selected session for heatmaps
session_id: Optional[int] = Field(default=None)
# This is used to specify the number of top values for PathAnalysis
rows: int = Field(default=3, ge=1, le=10)
@computed_field
@property
@ -1185,14 +1187,15 @@ class CardPathAnalysis(__CardSchema):
metric_type: Literal[MetricType.PATH_ANALYSIS]
metric_of: MetricOfPathAnalysis = Field(default=MetricOfPathAnalysis.session_count)
view_type: MetricOtherViewType = Field(...)
metric_value: List[ProductAnalyticsSelectedEventType] = Field(default=[])
metric_value: List[ProductAnalyticsSelectedEventType] = Field(default_factory=list)
density: int = Field(default=4, ge=2, le=10)
rows: int = Field(default=3, ge=1, le=10)
start_type: Literal["start", "end"] = Field(default="start")
start_point: List[PathAnalysisSubFilterSchema] = Field(default=[])
excludes: List[PathAnalysisSubFilterSchema] = Field(default=[])
start_point: List[PathAnalysisSubFilterSchema] = Field(default_factory=list)
excludes: List[PathAnalysisSubFilterSchema] = Field(default_factory=list)
series: List[CardPathAnalysisSeriesSchema] = Field(default=[])
series: List[CardPathAnalysisSeriesSchema] = Field(default_factory=list)
@model_validator(mode="before")
@classmethod
@ -1258,13 +1261,13 @@ class ProjectConditions(BaseModel):
condition_id: Optional[int] = Field(default=None)
name: str = Field(...)
capture_rate: int = Field(..., ge=0, le=100)
filters: List[GroupedFilterType] = Field(default=[])
filters: List[GroupedFilterType] = Field(default_factory=list)
class ProjectSettings(BaseModel):
rate: int = Field(..., ge=0, le=100)
conditional_capture: bool = Field(default=False)
conditions: List[ProjectConditions] = Field(default=[])
conditions: List[ProjectConditions] = Field(default_factory=list)
class CreateDashboardSchema(BaseModel):
@ -1272,7 +1275,7 @@ class CreateDashboardSchema(BaseModel):
description: Optional[str] = Field(default='')
is_public: bool = Field(default=False)
is_pinned: bool = Field(default=False)
metrics: Optional[List[int]] = Field(default=[])
metrics: Optional[List[int]] = Field(default_factory=list)
class EditDashboardSchema(CreateDashboardSchema):
@ -1281,7 +1284,7 @@ class EditDashboardSchema(CreateDashboardSchema):
class UpdateWidgetPayloadSchema(BaseModel):
config: dict = Field(default={})
config: dict = Field(default_factory=dict)
class AddWidgetToDashboardPayloadSchema(UpdateWidgetPayloadSchema):
@ -1379,7 +1382,7 @@ class IntegrationType(str, Enum):
class SearchNoteSchema(_PaginatedSchema):
sort: str = Field(default="createdAt")
order: SortOrderType = Field(default=SortOrderType.DESC)
tags: Optional[List[str]] = Field(default=[])
tags: Optional[List[str]] = Field(default_factory=list)
shared_only: bool = Field(default=False)
mine_only: bool = Field(default=False)
search: Optional[str] = Field(default=None)
@ -1426,8 +1429,8 @@ class _HeatMapSearchEventRaw(SessionSearchEventSchema2):
class HeatMapSessionsSearch(SessionsSearchPayloadSchema):
events: Optional[List[_HeatMapSearchEventRaw]] = Field(default=[])
filters: List[Union[SessionSearchFilterSchema, _HeatMapSearchEventRaw]] = Field(default=[])
events: Optional[List[_HeatMapSearchEventRaw]] = Field(default_factory=list)
filters: List[Union[SessionSearchFilterSchema, _HeatMapSearchEventRaw]] = Field(default_factory=list)
@model_validator(mode="before")
@classmethod
@ -1442,14 +1445,14 @@ class HeatMapSessionsSearch(SessionsSearchPayloadSchema):
class HeatMapFilterSchema(BaseModel):
value: List[Literal[IssueType.CLICK_RAGE, IssueType.DEAD_CLICK]] = Field(default=[])
value: List[Literal[IssueType.CLICK_RAGE, IssueType.DEAD_CLICK]] = Field(default_factory=list)
type: Literal[FilterType.ISSUE] = Field(...)
operator: Literal[SearchEventOperator.IS, MathOperator.EQUAL] = Field(...)
class GetHeatMapPayloadSchema(_TimedSchema):
url: Optional[str] = Field(default=None)
filters: List[HeatMapFilterSchema] = Field(default=[])
filters: List[HeatMapFilterSchema] = Field(default_factory=list)
click_rage: bool = Field(default=False)
operator: Literal[SearchEventOperator.IS, SearchEventOperator.STARTS_WITH,
SearchEventOperator.CONTAINS, SearchEventOperator.ENDS_WITH] = Field(default=SearchEventOperator.STARTS_WITH)
@ -1470,7 +1473,7 @@ class FeatureFlagVariant(BaseModel):
class FeatureFlagConditionFilterSchema(BaseModel):
is_event: Literal[False] = False
type: FilterType = Field(...)
value: List[str] = Field(default=[], min_length=1)
value: List[str] = Field(default_factory=list, min_length=1)
operator: Union[SearchEventOperator, MathOperator] = Field(...)
source: Optional[str] = Field(default=None)
sourceOperator: Optional[Union[SearchEventOperator, MathOperator]] = Field(default=None)
@ -1486,7 +1489,7 @@ class FeatureFlagCondition(BaseModel):
condition_id: Optional[int] = Field(default=None)
name: str = Field(...)
rollout_percentage: Optional[int] = Field(default=0)
filters: List[FeatureFlagConditionFilterSchema] = Field(default=[])
filters: List[FeatureFlagConditionFilterSchema] = Field(default_factory=list)
class SearchFlagsSchema(_PaginatedSchema):
@ -1513,8 +1516,8 @@ class FeatureFlagSchema(BaseModel):
flag_type: FeatureFlagType = Field(default=FeatureFlagType.SINGLE_VARIANT)
is_persist: Optional[bool] = Field(default=False)
is_active: Optional[bool] = Field(default=True)
conditions: List[FeatureFlagCondition] = Field(default=[], min_length=1)
variants: List[FeatureFlagVariant] = Field(default=[])
conditions: List[FeatureFlagCondition] = Field(default_factory=list, min_length=1)
variants: List[FeatureFlagVariant] = Field(default_factory=list)
class ModuleType(str, Enum):