feat(api): insights fixes and optimizations
feat(db): insights indexes
This commit is contained in:
parent
b61f2ed27c
commit
7e6229d377
4 changed files with 50 additions and 30 deletions
|
|
@ -97,7 +97,7 @@ def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=
|
|||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, "event_start": event_start, "JOURNEY_DEPTH": JOURNEY_DEPTH,
|
||||
**__get_constraint_values(args), **extra_values}
|
||||
print(cur.mogrify(pg_query, params))
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
|
||||
|
|
@ -225,7 +225,7 @@ def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endT
|
|||
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
print(cur.mogrify(pg_query, params))
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
|
||||
|
|
@ -273,7 +273,7 @@ def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
print(cur.mogrify(pg_query, params))
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
|
||||
|
|
@ -328,7 +328,8 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
if row is not None:
|
||||
event_value = row["value"]
|
||||
extra_values["value"] = event_value
|
||||
|
||||
if len(event_value) > 2:
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - to_timestamp(%(startTimestamp)s/1000)) / 7)::integer AS week,
|
||||
COUNT(DISTINCT connexions_list.user_id) AS users_count,
|
||||
ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users
|
||||
|
|
@ -347,8 +348,7 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
GROUP BY user_id) AS users_list
|
||||
LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week,
|
||||
user_id
|
||||
FROM sessions
|
||||
INNER JOIN events.pages AS feature USING (session_id)
|
||||
FROM sessions INNER JOIN {event_table} AS feature USING (session_id)
|
||||
WHERE users_list.user_id = sessions.user_id
|
||||
AND %(startTimestamp)s <= sessions.start_ts
|
||||
AND sessions.project_id = 1
|
||||
|
|
@ -362,7 +362,7 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
print(cur.mogrify(pg_query, params))
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
|
||||
|
|
@ -419,7 +419,8 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
if row is not None:
|
||||
event_value = row["value"]
|
||||
extra_values["value"] = event_value
|
||||
|
||||
if len(event_value) > 2:
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week,
|
||||
FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week,
|
||||
COUNT(DISTINCT connexions_list.user_id) AS users_count,
|
||||
|
|
@ -454,7 +455,7 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
print(cur.mogrify(pg_query, params))
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
|
||||
|
|
@ -475,12 +476,14 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
time_constraint=True)
|
||||
event_table = JOURNEY_TYPES["CLICK"]["table"]
|
||||
event_column = JOURNEY_TYPES["CLICK"]["column"]
|
||||
extra_values = {}
|
||||
for f in filters:
|
||||
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
|
||||
event_table = JOURNEY_TYPES[f["value"]]["table"]
|
||||
event_column = JOURNEY_TYPES[f["value"]]["column"]
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
|
||||
with pg_client.PostgresClient() as cur:
|
||||
pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
|
|
@ -488,7 +491,7 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
WHERE {" AND ".join(pg_sub_query)}
|
||||
AND user_id IS NOT NULL;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
|
|
@ -505,16 +508,18 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT 7;"""
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
# TODO: solve full scan
|
||||
print(cur.mogrify(pg_query, params))
|
||||
print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
popularity = cur.fetchall()
|
||||
pg_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
|
||||
WHERE {" AND ".join(pg_sub_query)}
|
||||
GROUP BY value;"""
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
# TODO: solve full scan
|
||||
print(cur.mogrify(pg_query, params))
|
||||
print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
frequencies = cur.fetchall()
|
||||
total_usage = sum([f["count"] for f in frequencies])
|
||||
|
|
@ -544,6 +549,7 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
with pg_client.PostgresClient() as cur:
|
||||
|
|
@ -552,7 +558,7 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end
|
|||
WHERE {" AND ".join(pg_sub_query)}
|
||||
AND user_id IS NOT NULL;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
|
|
@ -562,7 +568,6 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end
|
|||
{"type": "EVENT_VALUE", "value": event_value}], }
|
||||
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
|
||||
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
if default:
|
||||
# get most used value
|
||||
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
|
|
@ -579,6 +584,8 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end
|
|||
if row is not None:
|
||||
event_value = row["value"]
|
||||
extra_values["value"] = event_value
|
||||
if len(event_value) > 2:
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
pg_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
|
||||
|
|
@ -612,12 +619,12 @@ def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
with pg_client.PostgresClient() as cur:
|
||||
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
|
||||
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
if default:
|
||||
# get most used value
|
||||
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
|
|
@ -634,6 +641,8 @@ def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days
|
|||
if row is not None:
|
||||
event_value = row["value"]
|
||||
extra_values["value"] = event_value
|
||||
if len(event_value) > 2:
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
pg_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
pg_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count
|
||||
FROM {event_table} AS feature
|
||||
|
|
@ -671,20 +680,20 @@ def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
with pg_client.PostgresClient() as cur:
|
||||
pg_sub_query_chart.append("feature.timestamp >= %(startTimestamp)s")
|
||||
pg_sub_query_chart.append("feature.timestamp < %(endTimestamp)s")
|
||||
pg_sub_query_chart.append(f"length({event_column})>2")
|
||||
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
|
||||
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
if default:
|
||||
# get most used value
|
||||
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id)
|
||||
WHERE {" AND ".join(pg_sub_query)}
|
||||
AND length({event_column})>2
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT 1;"""
|
||||
|
|
@ -695,6 +704,8 @@ def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
if row is not None:
|
||||
event_value = row["value"]
|
||||
extra_values["value"] = event_value
|
||||
if len(event_value) > 2:
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
pg_sub_query_chart.append(f"feature.{event_column} = %(value)s")
|
||||
pg_query = f"""SELECT generated_timestamp AS timestamp,
|
||||
COALESCE(COUNT(session_id), 0) AS count
|
||||
|
|
@ -707,8 +718,8 @@ def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
ORDER BY generated_timestamp;"""
|
||||
params = {"step_size": TimeUTC.MS_DAY, "project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
print(cur.mogrify(pg_query, params))
|
||||
print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
return {"users": helper.list_to_camel_case(rows),
|
||||
|
|
@ -725,12 +736,14 @@ def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
|
||||
event_table = JOURNEY_TYPES["CLICK"]["table"]
|
||||
event_column = JOURNEY_TYPES["CLICK"]["column"]
|
||||
extra_values = {}
|
||||
for f in filters:
|
||||
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
|
||||
event_table = JOURNEY_TYPES[f["value"]]["table"]
|
||||
event_column = JOURNEY_TYPES[f["value"]]["column"]
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
with pg_client.PostgresClient() as cur:
|
||||
pg_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg
|
||||
|
|
@ -740,10 +753,10 @@ def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
ORDER BY avg DESC
|
||||
LIMIT 7;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# TODO: solve full scan issue
|
||||
print(cur.mogrify(pg_query, params))
|
||||
print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
rows = cur.fetchall()
|
||||
|
||||
|
|
@ -759,11 +772,13 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime
|
|||
|
||||
pg_sub_query_chart.append("user_id IS NOT NULL")
|
||||
period = "DAY"
|
||||
extra_values = {}
|
||||
for f in filters:
|
||||
if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]:
|
||||
period = f["value"]
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
|
||||
with pg_client.PostgresClient() as cur:
|
||||
pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(chart) AS chart
|
||||
|
|
@ -780,7 +795,8 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime
|
|||
"project_id": project_id,
|
||||
"startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week(
|
||||
startTimestamp),
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args),
|
||||
**extra_values}
|
||||
# print(cur.mogrify(pg_query, params))
|
||||
# print("---------------------")
|
||||
cur.execute(cur.mogrify(pg_query, params))
|
||||
|
|
@ -856,7 +872,8 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi
|
|||
if row is not None:
|
||||
event_value = row["value"]
|
||||
extra_values["value"] = event_value
|
||||
|
||||
if len(event_value) > 2:
|
||||
pg_sub_query.append(f"length({event_column})>2")
|
||||
pg_query = f"""SELECT user_id, last_time, interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen
|
||||
FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count
|
||||
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
|
||||
|
|
@ -874,7 +891,7 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi
|
|||
return {
|
||||
"startTimestamp": startTimestamp,
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}],
|
||||
"chart": helper.list_to_camel_case(rows)
|
||||
"list": helper.list_to_camel_case(rows)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -682,7 +682,7 @@ CREATE INDEX pages_visually_complete_idx ON events.pages (visually_complete) WHE
|
|||
CREATE INDEX pages_dom_building_time_idx ON events.pages (dom_building_time) WHERE dom_building_time > 0;
|
||||
CREATE INDEX pages_load_time_idx ON events.pages (load_time) WHERE load_time > 0;
|
||||
CREATE INDEX pages_base_path_session_id_timestamp_idx ON events.pages (base_path, session_id, timestamp);
|
||||
|
||||
CREATE INDEX pages_session_id_timestamp_idx ON events.pages (session_id, timestamp);
|
||||
|
||||
CREATE TABLE events.clicks
|
||||
(
|
||||
|
|
@ -702,6 +702,7 @@ CREATE INDEX clicks_label_session_id_timestamp_idx ON events.clicks (label, sess
|
|||
CREATE INDEX clicks_url_idx ON events.clicks (url);
|
||||
CREATE INDEX clicks_url_gin_idx ON events.clicks USING GIN (url gin_trgm_ops);
|
||||
CREATE INDEX clicks_url_session_id_timestamp_selector_idx ON events.clicks (url, session_id, timestamp, selector);
|
||||
CREATE INDEX clicks_session_id_timestamp_idx ON events.clicks (session_id, timestamp);
|
||||
|
||||
|
||||
CREATE TABLE events.inputs
|
||||
|
|
|
|||
|
|
@ -4,4 +4,5 @@ CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id I
|
|||
CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0;
|
||||
CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2;
|
||||
|
||||
CREATE INDEX clicks_session_id_timestamp_idx ON events.clicks (session_id, timestamp);
|
||||
COMMIT;
|
||||
|
|
@ -682,7 +682,7 @@ CREATE INDEX pages_session_id_speed_indexgt0nn_idx ON events.pages (session_id,
|
|||
CREATE INDEX pages_session_id_timestamp_dom_building_timegt0nn_idx ON events.pages (session_id, timestamp, dom_building_time) WHERE dom_building_time > 0 AND dom_building_time IS NOT NULL;
|
||||
CREATE INDEX pages_base_path_session_id_timestamp_idx ON events.pages (base_path, session_id, timestamp);
|
||||
CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2;
|
||||
|
||||
CREATE INDEX pages_session_id_timestamp_idx ON events.pages (session_id, timestamp);
|
||||
|
||||
CREATE TABLE events.clicks
|
||||
(
|
||||
|
|
@ -702,6 +702,7 @@ CREATE INDEX clicks_label_session_id_timestamp_idx ON events.clicks (label, sess
|
|||
CREATE INDEX clicks_url_idx ON events.clicks (url);
|
||||
CREATE INDEX clicks_url_gin_idx ON events.clicks USING GIN (url gin_trgm_ops);
|
||||
CREATE INDEX clicks_url_session_id_timestamp_selector_idx ON events.clicks (url, session_id, timestamp, selector);
|
||||
CREATE INDEX clicks_session_id_timestamp_idx ON events.clicks (session_id, timestamp);
|
||||
|
||||
|
||||
CREATE TABLE events.inputs
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue