Merge remote-tracking branch 'origin/api_insights' into dev

# Conflicts:
#	api/chalicelib/core/insights.py
#	api/chalicelib/utils/TimeUTC.py
This commit is contained in:
Taha Yassine Kraiem 2021-09-27 13:20:45 +02:00
commit e22f277f90
5 changed files with 749 additions and 18 deletions

View file

@ -26,7 +26,18 @@ def get_insights_journey(projectId, context):
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.get_journey(project_id=projectId, **{**data, **args})}
return {"data": insights.journey(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/users_acquisition', methods=['GET', 'POST'])
def get_users_acquisition(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.users_acquisition(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/users_retention', methods=['GET', 'POST'])
@ -37,7 +48,105 @@ def get_users_retention(projectId, context):
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.get_retention(project_id=projectId, **{**data, **args})}
return {"data": insights.users_retention(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST'])
def get_feature_rentention(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.feature_retention(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/feature_acquisition', methods=['GET', 'POST'])
def get_feature_acquisition(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.feature_acquisition(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/feature_popularity_frequency', methods=['GET', 'POST'])
def get_feature_popularity_frequency(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.feature_popularity_frequency(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/feature_intensity', methods=['GET', 'POST'])
def get_feature_intensity(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.feature_intensity(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/feature_adoption', methods=['GET', 'POST'])
def get_feature_adoption(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.feature_adoption(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/feature_adoption_top_users', methods=['GET', 'POST'])
def get_feature_adoption(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.feature_adoption_top_users(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/users_active', methods=['GET', 'POST'])
def get_users_active(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.users_active(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/users_power', methods=['GET', 'POST'])
def get_users_power(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.users_power(project_id=projectId, **{**data, **args})}
@app.route('/{projectId}/insights/users_slipping', methods=['GET', 'POST'])
def get_users_slipping(projectId, context):
data = app.current_request.json_body
if data is None:
data = {}
params = app.current_request.query_params
args = dashboard.dashboard_args(params)
return {"data": insights.users_slipping(project_id=projectId, **{**data, **args})}
#
#

View file

@ -1,5 +1,4 @@
from chalicelib.core import sessions_metas
from chalicelib.utils import args_transformer
from chalicelib.utils import helper, dev
from chalicelib.utils import pg_client
from chalicelib.utils.TimeUTC import TimeUTC
@ -32,7 +31,7 @@ JOURNEY_TYPES = {
@dev.timed
def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args):
def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args):
pg_sub_query_subset = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
event_start = None
@ -105,7 +104,7 @@ def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimest
return __transform_journey(rows)
def __compute_retention_percentage(rows):
def __compute_weekly_percentage(rows):
if rows is None or len(rows) == 0:
return rows
t = -1
@ -117,8 +116,30 @@ def __compute_retention_percentage(rows):
def __complete_retention(rows, start_date, end_date=None):
if rows is None or len(rows) == 0:
return rows
if rows is None:
return []
max_week = 10
for i in range(max_week):
if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date:
break
neutral = {
"firstConnexionWeek": start_date,
"week": i,
"usersCount": 0,
"connectedUsers": [],
"percentage": 0
}
if i < len(rows) \
and i != rows[i]["week"]:
rows.insert(i, neutral)
elif i >= len(rows):
rows.append(neutral)
return rows
def __complete_acquisition(rows, start_date, end_date=None):
if rows is None:
return []
max_week = 10
week = 0
delta_date = 0
@ -167,13 +188,62 @@ def __complete_retention(rows, start_date, end_date=None):
@dev.timed
def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[],
**args):
def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[],
**args):
startTimestamp = TimeUTC.trunc_week(startTimestamp)
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("user_id IS NOT NULL")
pg_sub_query.append("DATE_TRUNC('week', to_timestamp(start_ts / 1000)) = to_timestamp(%(startTimestamp)s / 1000)")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - DATE_TRUNC('week', to_timestamp(%(startTimestamp)s / 1000)::timestamp)) / 7)::integer AS week,
COUNT(DISTINCT connexions_list.user_id) AS users_count,
ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users
FROM (SELECT DISTINCT user_id
FROM sessions
WHERE {" AND ".join(pg_sub_query)}
AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1
AND NOT EXISTS((SELECT 1
FROM sessions AS bsess
WHERE bsess.start_ts < %(startTimestamp)s
AND project_id = %(project_id)s
AND bsess.user_id = sessions.user_id
LIMIT 1))
GROUP BY user_id) AS users_list
LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week,
user_id
FROM sessions
WHERE users_list.user_id = sessions.user_id
AND %(startTimestamp)s <=sessions.start_ts
AND sessions.project_id = %(project_id)s
AND sessions.start_ts < (%(endTimestamp)s - 1)
GROUP BY connexion_week, user_id
) AS connexions_list ON (TRUE)
GROUP BY week
ORDER BY week;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(cur.mogrify(pg_query, params))
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
return {
"startTimestamp": startTimestamp,
"chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now())
}
@dev.timed
def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
startTimestamp = TimeUTC.trunc_week(startTimestamp)
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("user_id IS NOT NULL")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week,
FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week,
@ -181,11 +251,10 @@ def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTim
ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users
FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week
FROM sessions
WHERE {" AND ".join(pg_sub_query)}
AND user_id IS NOT NULL
WHERE {" AND ".join(pg_sub_query)}
AND NOT EXISTS((SELECT 1
FROM sessions AS bsess
WHERE bsess.start_ts<EXTRACT('EPOCH' FROM DATE_TRUNC('week', to_timestamp(%(startTimestamp)s / 1000))) * 1000
WHERE bsess.start_ts<%(startTimestamp)s
AND project_id = %(project_id)s
AND bsess.user_id = sessions.user_id
LIMIT 1))
@ -207,5 +276,541 @@ def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTim
# print(cur.mogrify(pg_query, params))
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
rows = __compute_retention_percentage(helper.list_to_camel_case(rows))
return __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now())
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
return {
"startTimestamp": startTimestamp,
"chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now())
}
@dev.timed
def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
startTimestamp = TimeUTC.trunc_week(startTimestamp)
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("user_id IS NOT NULL")
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
event_type = "PAGES"
event_value = "/"
extra_values = {}
default = True
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_type = f["value"]
elif f["type"] == "EVENT_VALUE":
event_value = f["value"]
default = False
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
extra_values["user_id"] = f["value"]
event_table = JOURNEY_TYPES[event_type]["table"]
event_column = JOURNEY_TYPES[event_type]["column"]
pg_sub_query.append(f"feature.{event_column} = %(value)s")
with pg_client.PostgresClient() as cur:
if default:
# get most used value
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query[:-1])}
AND length({event_column}) > 2
GROUP BY value
ORDER BY count DESC
LIMIT 1;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
cur.execute(cur.mogrify(pg_query, params))
row = cur.fetchone()
if row is not None:
event_value = row["value"]
extra_values["value"] = event_value
pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - to_timestamp(%(startTimestamp)s/1000)) / 7)::integer AS week,
COUNT(DISTINCT connexions_list.user_id) AS users_count,
ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users
FROM (SELECT DISTINCT user_id
FROM sessions INNER JOIN {event_table} AS feature USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1
AND NOT EXISTS((SELECT 1
FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id)
WHERE bsess.start_ts<%(startTimestamp)s
AND project_id = %(project_id)s
AND bsess.user_id = sessions.user_id
AND bfeature.timestamp<%(startTimestamp)s
AND bfeature.{event_column}=%(value)s
LIMIT 1))
GROUP BY user_id) AS users_list
LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week,
user_id
FROM sessions
INNER JOIN events.pages AS feature USING (session_id)
WHERE users_list.user_id = sessions.user_id
AND %(startTimestamp)s <= sessions.start_ts
AND sessions.project_id = 1
AND sessions.start_ts < (%(endTimestamp)s - 1)
AND feature.timestamp >= %(startTimestamp)s
AND feature.timestamp < %(endTimestamp)s
AND feature.{event_column} = %(value)s
GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE)
GROUP BY week
ORDER BY week;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
# print(cur.mogrify(pg_query, params))
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
return {
"startTimestamp": startTimestamp,
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}],
"chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now())
}
@dev.timed
def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
startTimestamp = TimeUTC.trunc_week(startTimestamp)
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("user_id IS NOT NULL")
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
event_type = "PAGES"
event_value = "/"
extra_values = {}
default = True
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_type = f["value"]
elif f["type"] == "EVENT_VALUE":
event_value = f["value"]
default = False
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
extra_values["user_id"] = f["value"]
event_table = JOURNEY_TYPES[event_type]["table"]
event_column = JOURNEY_TYPES[event_type]["column"]
pg_sub_query.append(f"feature.{event_column} = %(value)s")
with pg_client.PostgresClient() as cur:
if default:
# get most used value
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query[:-1])}
AND length({event_column}) > 2
GROUP BY value
ORDER BY count DESC
LIMIT 1;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
cur.execute(cur.mogrify(pg_query, params))
row = cur.fetchone()
if row is not None:
event_value = row["value"]
extra_values["value"] = event_value
pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week,
FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week,
COUNT(DISTINCT connexions_list.user_id) AS users_count,
ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users
FROM (SELECT user_id, DATE_TRUNC('week', to_timestamp(first_connexion_week / 1000)) AS first_connexion_week
FROM(SELECT DISTINCT user_id, MIN(start_ts) AS first_connexion_week
FROM sessions INNER JOIN {event_table} AS feature USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
AND NOT EXISTS((SELECT 1
FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id)
WHERE bsess.start_ts<%(startTimestamp)s
AND project_id = %(project_id)s
AND bsess.user_id = sessions.user_id
AND bfeature.timestamp<%(startTimestamp)s
AND bfeature.{event_column}=%(value)s
LIMIT 1))
GROUP BY user_id) AS raw_users_list) AS users_list
LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week,
user_id
FROM sessions INNER JOIN {event_table} AS feature USING(session_id)
WHERE users_list.user_id = sessions.user_id
AND first_connexion_week <=
DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp)
AND sessions.project_id = %(project_id)s
AND sessions.start_ts < (%(endTimestamp)s - 1)
AND feature.timestamp >= %(startTimestamp)s
AND feature.timestamp < %(endTimestamp)s
AND feature.{event_column} = %(value)s
GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE)
GROUP BY first_connexion_week, week
ORDER BY first_connexion_week, week;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
# print(cur.mogrify(pg_query, params))
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
return {
"startTimestamp": startTimestamp,
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}],
"chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now())
}
@dev.timed
def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
startTimestamp = TimeUTC.trunc_week(startTimestamp)
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
event_table = JOURNEY_TYPES["CLICK"]["table"]
event_column = JOURNEY_TYPES["CLICK"]["column"]
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_table = JOURNEY_TYPES[f["value"]]["table"]
event_column = JOURNEY_TYPES[f["value"]]["column"]
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count
FROM sessions
WHERE {" AND ".join(pg_sub_query)}
AND user_id IS NOT NULL;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
all_user_count = cur.fetchone()["count"]
if all_user_count == 0:
return []
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
pg_sub_query.append(f"length({event_column})>2")
pg_query = f"""SELECT {event_column} AS value, COUNT(DISTINCT user_id) AS count
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
AND user_id IS NOT NULL
GROUP BY value
ORDER BY count DESC
LIMIT 7;"""
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
popularity = cur.fetchall()
pg_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
GROUP BY value;"""
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
frequencies = cur.fetchall()
total_usage = sum([f["count"] for f in frequencies])
frequencies = {f["value"]: f["count"] for f in frequencies}
for p in popularity:
p["popularity"] = p.pop("count") / all_user_count
p["frequency"] = frequencies[p["value"]] / total_usage
return popularity
@dev.timed
def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
event_type = "CLICK"
event_value = '/'
extra_values = {}
default = True
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_type = f["value"]
elif f["type"] == "EVENT_VALUE":
event_value = f["value"]
default = False
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
event_table = JOURNEY_TYPES[event_type]["table"]
event_column = JOURNEY_TYPES[event_type]["column"]
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count
FROM sessions
WHERE {" AND ".join(pg_sub_query)}
AND user_id IS NOT NULL;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
all_user_count = cur.fetchone()["count"]
if all_user_count == 0:
return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type},
{"type": "EVENT_VALUE", "value": event_value}], }
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
pg_sub_query.append(f"length({event_column})>2")
if default:
# get most used value
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query[:-1])}
AND length({event_column}) > 2
GROUP BY value
ORDER BY count DESC
LIMIT 1;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
cur.execute(cur.mogrify(pg_query, params))
row = cur.fetchone()
if row is not None:
event_value = row["value"]
extra_values["value"] = event_value
pg_sub_query.append(f"feature.{event_column} = %(value)s")
pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
AND user_id IS NOT NULL;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
adoption = cur.fetchone()["count"] / all_user_count
return {"target": all_user_count, "adoption": adoption,
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]}
@dev.timed
def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("user_id IS NOT NULL")
event_type = "CLICK"
event_value = '/'
extra_values = {}
default = True
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_type = f["value"]
elif f["type"] == "EVENT_VALUE":
event_value = f["value"]
default = False
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
event_table = JOURNEY_TYPES[event_type]["table"]
event_column = JOURNEY_TYPES[event_type]["column"]
with pg_client.PostgresClient() as cur:
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
pg_sub_query.append(f"length({event_column})>2")
if default:
# get most used value
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query[:-1])}
AND length({event_column}) > 2
GROUP BY value
ORDER BY count DESC
LIMIT 1;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
cur.execute(cur.mogrify(pg_query, params))
row = cur.fetchone()
if row is not None:
event_value = row["value"]
extra_values["value"] = event_value
pg_sub_query.append(f"feature.{event_column} = %(value)s")
pg_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count
FROM {event_table} AS feature
INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
return {"users": helper.list_to_camel_case(rows),
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]}
@dev.timed
def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
event_table = JOURNEY_TYPES["CLICK"]["table"]
event_column = JOURNEY_TYPES["CLICK"]["column"]
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_table = JOURNEY_TYPES[f["value"]]["table"]
event_column = JOURNEY_TYPES[f["value"]]["column"]
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
pg_sub_query.append(f"length({event_column})>2")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
GROUP BY value
ORDER BY avg DESC
LIMIT 7;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
return rows
@dev.timed
def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[],
**args):
pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True,
chart=True, data=args)
pg_sub_query_chart.append("user_id IS NOT NULL")
period = "DAY"
for f in filters:
if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]:
period = f["value"]
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(chart) AS chart
FROM (SELECT generated_timestamp AS timestamp,
COALESCE(COUNT(users), 0) AS count
FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp
LEFT JOIN LATERAL ( SELECT DISTINCT user_id
FROM public.sessions
WHERE {" AND ".join(pg_sub_query_chart)}
) AS users ON (TRUE)
GROUP BY generated_timestamp
ORDER BY generated_timestamp) AS chart;"""
params = {"step_size": TimeUTC.MS_DAY if period == "DAY" else TimeUTC.MS_WEEK,
"project_id": project_id,
"startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week(
startTimestamp),
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
row_users = cur.fetchone()
return row_users
@dev.timed
def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[], **args):
pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args)
pg_sub_query.append("user_id IS NOT NULL")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(day_users_partition) AS partition
FROM (SELECT number_of_days, COUNT(user_id) AS count
FROM (SELECT user_id, COUNT(DISTINCT DATE_TRUNC('day', to_timestamp(start_ts / 1000))) AS number_of_days
FROM sessions
WHERE {" AND ".join(pg_sub_query)}
GROUP BY 1) AS users_connexions
GROUP BY number_of_days
ORDER BY number_of_days) AS day_users_partition;"""
params = {"project_id": project_id,
"startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)}
# print(cur.mogrify(pg_query, params))
# print("---------------------")
cur.execute(cur.mogrify(pg_query, params))
row_users = cur.fetchone()
return helper.dict_to_camel_case(row_users)
@dev.timed
def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(),
filters=[], **args):
pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions",
time_constraint=True)
pg_sub_query.append("user_id IS NOT NULL")
pg_sub_query.append("feature.timestamp >= %(startTimestamp)s")
pg_sub_query.append("feature.timestamp < %(endTimestamp)s")
event_type = "PAGES"
event_value = "/"
extra_values = {}
default = True
for f in filters:
if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]):
event_type = f["value"]
elif f["type"] == "EVENT_VALUE":
event_value = f["value"]
default = False
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
pg_sub_query.append(f"sessions.user_id = %(user_id)s")
extra_values["user_id"] = f["value"]
event_table = JOURNEY_TYPES[event_type]["table"]
event_column = JOURNEY_TYPES[event_type]["column"]
pg_sub_query.append(f"feature.{event_column} = %(value)s")
with pg_client.PostgresClient() as cur:
if default:
# get most used value
pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query[:-1])}
AND length({event_column}) > 2
GROUP BY value
ORDER BY count DESC
LIMIT 1;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
cur.execute(cur.mogrify(pg_query, params))
row = cur.fetchone()
if row is not None:
event_value = row["value"]
extra_values["value"] = event_value
pg_query = f"""SELECT user_id, last_time, interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen
FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count
FROM {event_table} AS feature INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
GROUP BY user_id) AS user_last_usage
INNER JOIN sessions USING (user_id)
WHERE EXTRACT(EPOCH FROM now()) * 1000 - last_time > 7 * 24 * 60 * 60 * 1000
GROUP BY user_id, last_time,interactions_count;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
# print(cur.mogrify(pg_query, params))
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
return {
"startTimestamp": startTimestamp,
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}],
"chart": helper.list_to_camel_case(rows)
}

View file

@ -115,6 +115,13 @@ class TimeUTC:
def get_utc_offset():
return int((datetime.now(pytz.utc).now() - datetime.now(pytz.utc).replace(tzinfo=None)).total_seconds() * 1000)
@staticmethod
def trunc_day(timestamp):
dt = TimeUTC.from_ms_timestamp(timestamp)
return TimeUTC.datetime_to_timestamp(dt
.replace(hour=0, minute=0, second=0, microsecond=0)
.astimezone(pytz.utc))
@staticmethod
def trunc_week(timestamp):
dt = TimeUTC.from_ms_timestamp(timestamp)

View file

@ -0,0 +1,7 @@
BEGIN;
CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id IS NOT NULL;
CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0;
CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2;
COMMIT;

View file

@ -172,7 +172,7 @@ CREATE TABLE projects
"defaultInputMode": "plain"
}'::jsonb -- ??????
);
CREATE INDEX projects_tenant_id_idx ON projects(tenant_id);
CREATE INDEX projects_tenant_id_idx ON projects (tenant_id);
CREATE OR REPLACE FUNCTION notify_project() RETURNS trigger AS
$$
@ -248,7 +248,7 @@ create table webhooks
index integer default 0 not null,
name varchar(100)
);
CREATE INDEX webhooks_tenant_id_idx ON webhooks(tenant_id);
CREATE INDEX webhooks_tenant_id_idx ON webhooks (tenant_id);
-- --- notifications.sql ---
@ -388,7 +388,7 @@ CREATE TABLE issues
);
CREATE INDEX ON issues (issue_id, type);
CREATE INDEX issues_context_string_gin_idx ON public.issues USING GIN (context_string gin_trgm_ops);
CREATE INDEX issues_project_id_idx ON issues(project_id);
CREATE INDEX issues_project_id_idx ON issues (project_id);
-- --- errors.sql ---
@ -522,6 +522,8 @@ CREATE INDEX sessions_start_ts_idx ON public.sessions (start_ts) WHERE duration
CREATE INDEX sessions_project_id_idx ON public.sessions (project_id) WHERE duration > 0;
CREATE INDEX sessions_session_id_project_id_start_ts_idx ON sessions (session_id, project_id, start_ts) WHERE duration > 0;
CREATE INDEX sessions_session_id_project_id_start_ts_durationNN_idx ON sessions (session_id, project_id, start_ts) WHERE duration IS NOT NULL;
CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id IS NOT NULL;
CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0;
ALTER TABLE public.sessions
ADD CONSTRAINT web_browser_constraint CHECK ( (sessions.platform = 'web' AND sessions.user_browser NOTNULL) OR
@ -679,6 +681,7 @@ CREATE INDEX pages_timestamp_metgt0_idx ON events.pages (timestamp) WHERE respon
CREATE INDEX pages_session_id_speed_indexgt0nn_idx ON events.pages (session_id, speed_index) WHERE speed_index > 0 AND speed_index IS NOT NULL;
CREATE INDEX pages_session_id_timestamp_dom_building_timegt0nn_idx ON events.pages (session_id, timestamp, dom_building_time) WHERE dom_building_time > 0 AND dom_building_time IS NOT NULL;
CREATE INDEX pages_base_path_session_id_timestamp_idx ON events.pages (base_path, session_id, timestamp);
CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2;
CREATE TABLE events.clicks
@ -872,6 +875,6 @@ CREATE TABLE jobs
);
CREATE INDEX ON jobs (status);
CREATE INDEX ON jobs (start_at);
CREATE INDEX jobs_project_id_idx ON jobs(project_id);
CREATE INDEX jobs_project_id_idx ON jobs (project_id);
COMMIT;