diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 8c79e2663..1e016c238 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -26,7 +26,18 @@ def get_insights_journey(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_journey(project_id=projectId, **{**data, **args})} + return {"data": insights.journey(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/users_acquisition', methods=['GET', 'POST']) +def get_users_acquisition(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_acquisition(project_id=projectId, **{**data, **args})} @app.route('/{projectId}/insights/users_retention', methods=['GET', 'POST']) @@ -37,7 +48,105 @@ def get_users_retention(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_retention(project_id=projectId, **{**data, **args})} + return {"data": insights.users_retention(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST']) +def get_feature_rentention(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_retention(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_acquisition', methods=['GET', 'POST']) +def get_feature_acquisition(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_acquisition(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_popularity_frequency', methods=['GET', 'POST']) +def get_feature_popularity_frequency(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_popularity_frequency(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_intensity', methods=['GET', 'POST']) +def get_feature_intensity(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_intensity(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_adoption', methods=['GET', 'POST']) +def get_feature_adoption(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_adoption(project_id=projectId, **{**data, **args})} + +@app.route('/{projectId}/insights/feature_adoption_top_users', methods=['GET', 'POST']) +def get_feature_adoption(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_adoption_top_users(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/users_active', methods=['GET', 'POST']) +def get_users_active(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_active(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/users_power', methods=['GET', 'POST']) +def get_users_power(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_power(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/users_slipping', methods=['GET', 'POST']) +def get_users_slipping(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_slipping(project_id=projectId, **{**data, **args})} # # diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index c39fb5cea..e639ba55a 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -1,5 +1,4 @@ from chalicelib.core import sessions_metas -from chalicelib.utils import args_transformer from chalicelib.utils import helper, dev from chalicelib.utils import pg_client from chalicelib.utils.TimeUTC import TimeUTC @@ -32,7 +31,7 @@ JOURNEY_TYPES = { @dev.timed -def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args): +def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args): pg_sub_query_subset = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) event_start = None @@ -105,7 +104,7 @@ def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimest return __transform_journey(rows) -def __compute_retention_percentage(rows): +def __compute_weekly_percentage(rows): if rows is None or len(rows) == 0: return rows t = -1 @@ -117,8 +116,30 @@ def __compute_retention_percentage(rows): def __complete_retention(rows, start_date, end_date=None): - if rows is None or len(rows) == 0: - return rows + if rows is None: + return [] + max_week = 10 + for i in range(max_week): + if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: + break + neutral = { + "firstConnexionWeek": start_date, + "week": i, + "usersCount": 0, + "connectedUsers": [], + "percentage": 0 + } + if i < len(rows) \ + and i != rows[i]["week"]: + rows.insert(i, neutral) + elif i >= len(rows): + rows.append(neutral) + return rows + + +def __complete_acquisition(rows, start_date, end_date=None): + if rows is None: + return [] max_week = 10 week = 0 delta_date = 0 @@ -167,13 +188,62 @@ def __complete_retention(rows, start_date, end_date=None): @dev.timed -def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], - **args): +def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + pg_sub_query.append("DATE_TRUNC('week', to_timestamp(start_ts / 1000)) = to_timestamp(%(startTimestamp)s / 1000)") + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - DATE_TRUNC('week', to_timestamp(%(startTimestamp)s / 1000)::timestamp)) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess + WHERE bsess.start_ts < %(startTimestamp)s + AND project_id = %(project_id)s + AND bsess.user_id = sessions.user_id + LIMIT 1)) + GROUP BY user_id) AS users_list + LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, + user_id + FROM sessions + WHERE users_list.user_id = sessions.user_id + AND %(startTimestamp)s <=sessions.start_ts + AND sessions.project_id = %(project_id)s + AND sessions.start_ts < (%(endTimestamp)s - 1) + GROUP BY connexion_week, user_id + ) AS connexions_list ON (TRUE) + GROUP BY week + ORDER BY week;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) + return { + "startTimestamp": startTimestamp, + "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } + + +@dev.timed +def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") with pg_client.PostgresClient() as cur: pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, @@ -181,11 +251,10 @@ def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTim ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL + WHERE {" AND ".join(pg_sub_query)} AND NOT EXISTS((SELECT 1 FROM sessions AS bsess - WHERE bsess.start_ts= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_type = "PAGES" + event_value = "/" + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + + pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - to_timestamp(%(startTimestamp)s/1000)) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id + FROM sessions INNER JOIN {event_table} AS feature USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) + WHERE bsess.start_ts<%(startTimestamp)s + AND project_id = %(project_id)s + AND bsess.user_id = sessions.user_id + AND bfeature.timestamp<%(startTimestamp)s + AND bfeature.{event_column}=%(value)s + LIMIT 1)) + GROUP BY user_id) AS users_list + LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, + user_id + FROM sessions + INNER JOIN events.pages AS feature USING (session_id) + WHERE users_list.user_id = sessions.user_id + AND %(startTimestamp)s <= sessions.start_ts + AND sessions.project_id = 1 + AND sessions.start_ts < (%(endTimestamp)s - 1) + AND feature.timestamp >= %(startTimestamp)s + AND feature.timestamp < %(endTimestamp)s + AND feature.{event_column} = %(value)s + GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) + GROUP BY week + ORDER BY week;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) + return { + "startTimestamp": startTimestamp, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], + "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } + + +@dev.timed +def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_type = "PAGES" + event_value = "/" + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + + pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, + FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT user_id, DATE_TRUNC('week', to_timestamp(first_connexion_week / 1000)) AS first_connexion_week + FROM(SELECT DISTINCT user_id, MIN(start_ts) AS first_connexion_week + FROM sessions INNER JOIN {event_table} AS feature USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) + WHERE bsess.start_ts<%(startTimestamp)s + AND project_id = %(project_id)s + AND bsess.user_id = sessions.user_id + AND bfeature.timestamp<%(startTimestamp)s + AND bfeature.{event_column}=%(value)s + LIMIT 1)) + GROUP BY user_id) AS raw_users_list) AS users_list + LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, + user_id + FROM sessions INNER JOIN {event_table} AS feature USING(session_id) + WHERE users_list.user_id = sessions.user_id + AND first_connexion_week <= + DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp) + AND sessions.project_id = %(project_id)s + AND sessions.start_ts < (%(endTimestamp)s - 1) + AND feature.timestamp >= %(startTimestamp)s + AND feature.timestamp < %(endTimestamp)s + AND feature.{event_column} = %(value)s + GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) + GROUP BY first_connexion_week, week + ORDER BY first_connexion_week, week;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) + return { + "startTimestamp": startTimestamp, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], + "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } + + +@dev.timed +def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + event_table = JOURNEY_TYPES["CLICK"]["table"] + event_column = JOURNEY_TYPES["CLICK"]["column"] + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_table = JOURNEY_TYPES[f["value"]]["table"] + event_column = JOURNEY_TYPES[f["value"]]["column"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + all_user_count = cur.fetchone()["count"] + if all_user_count == 0: + return [] + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + pg_sub_query.append(f"length({event_column})>2") + pg_query = f"""SELECT {event_column} AS value, COUNT(DISTINCT user_id) AS count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL + GROUP BY value + ORDER BY count DESC + LIMIT 7;""" + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + popularity = cur.fetchall() + pg_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY value;""" + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + frequencies = cur.fetchall() + total_usage = sum([f["count"] for f in frequencies]) + frequencies = {f["value"]: f["count"] for f in frequencies} + for p in popularity: + p["popularity"] = p.pop("count") / all_user_count + p["frequency"] = frequencies[p["value"]] / total_usage + + return popularity + + +@dev.timed +def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + event_type = "CLICK" + event_value = '/' + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + all_user_count = cur.fetchone()["count"] + if all_user_count == 0: + return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type}, + {"type": "EVENT_VALUE", "value": event_value}], } + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + pg_sub_query.append(f"length({event_column})>2") + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + pg_sub_query.append(f"feature.{event_column} = %(value)s") + pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + adoption = cur.fetchone()["count"] / all_user_count + return {"target": all_user_count, "adoption": adoption, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} + + +@dev.timed +def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + event_type = "CLICK" + event_value = '/' + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + with pg_client.PostgresClient() as cur: + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + pg_sub_query.append(f"length({event_column})>2") + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + pg_sub_query.append(f"feature.{event_column} = %(value)s") + pg_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count + FROM {event_table} AS feature + INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY 1 + ORDER BY 2 DESC + LIMIT 10;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + return {"users": helper.list_to_camel_case(rows), + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} + + +@dev.timed +def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_table = JOURNEY_TYPES["CLICK"]["table"] + event_column = JOURNEY_TYPES["CLICK"]["column"] + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_table = JOURNEY_TYPES[f["value"]]["table"] + event_column = JOURNEY_TYPES[f["value"]]["column"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + pg_sub_query.append(f"length({event_column})>2") + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY value + ORDER BY avg DESC + LIMIT 7;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + + return rows + + +@dev.timed +def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, + chart=True, data=args) + + pg_sub_query_chart.append("user_id IS NOT NULL") + period = "DAY" + for f in filters: + if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]: + period = f["value"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s") + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(chart) AS chart + FROM (SELECT generated_timestamp AS timestamp, + COALESCE(COUNT(users), 0) AS count + FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp + LEFT JOIN LATERAL ( SELECT DISTINCT user_id + FROM public.sessions + WHERE {" AND ".join(pg_sub_query_chart)} + ) AS users ON (TRUE) + GROUP BY generated_timestamp + ORDER BY generated_timestamp) AS chart;""" + params = {"step_size": TimeUTC.MS_DAY if period == "DAY" else TimeUTC.MS_WEEK, + "project_id": project_id, + "startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week( + startTimestamp), + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + row_users = cur.fetchone() + + return row_users + + +@dev.timed +def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], **args): + pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) + pg_sub_query.append("user_id IS NOT NULL") + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(day_users_partition) AS partition + FROM (SELECT number_of_days, COUNT(user_id) AS count + FROM (SELECT user_id, COUNT(DISTINCT DATE_TRUNC('day', to_timestamp(start_ts / 1000))) AS number_of_days + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + GROUP BY 1) AS users_connexions + GROUP BY number_of_days + ORDER BY number_of_days) AS day_users_partition;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + row_users = cur.fetchone() + + return helper.dict_to_camel_case(row_users) + + +@dev.timed +def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_type = "PAGES" + event_value = "/" + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + + pg_query = f"""SELECT user_id, last_time, interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen + FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY user_id) AS user_last_usage + INNER JOIN sessions USING (user_id) + WHERE EXTRACT(EPOCH FROM now()) * 1000 - last_time > 7 * 24 * 60 * 60 * 1000 + GROUP BY user_id, last_time,interactions_count;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + return { + "startTimestamp": startTimestamp, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], + "chart": helper.list_to_camel_case(rows) + } diff --git a/api/chalicelib/utils/TimeUTC.py b/api/chalicelib/utils/TimeUTC.py index 9cd353eb4..c95359a00 100644 --- a/api/chalicelib/utils/TimeUTC.py +++ b/api/chalicelib/utils/TimeUTC.py @@ -115,6 +115,13 @@ class TimeUTC: def get_utc_offset(): return int((datetime.now(pytz.utc).now() - datetime.now(pytz.utc).replace(tzinfo=None)).total_seconds() * 1000) + @staticmethod + def trunc_day(timestamp): + dt = TimeUTC.from_ms_timestamp(timestamp) + return TimeUTC.datetime_to_timestamp(dt + .replace(hour=0, minute=0, second=0, microsecond=0) + .astimezone(pytz.utc)) + @staticmethod def trunc_week(timestamp): dt = TimeUTC.from_ms_timestamp(timestamp) diff --git a/scripts/helm/db/init_dbs/postgresql/1.3.6/1.3.6.sql b/scripts/helm/db/init_dbs/postgresql/1.3.6/1.3.6.sql new file mode 100644 index 000000000..cfa7b19a2 --- /dev/null +++ b/scripts/helm/db/init_dbs/postgresql/1.3.6/1.3.6.sql @@ -0,0 +1,7 @@ +BEGIN; + +CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id IS NOT NULL; +CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0; +CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2; + +COMMIT; \ No newline at end of file diff --git a/scripts/helm/db/init_dbs/postgresql/init_schema.sql b/scripts/helm/db/init_dbs/postgresql/init_schema.sql index 65811f7ba..586f84e74 100644 --- a/scripts/helm/db/init_dbs/postgresql/init_schema.sql +++ b/scripts/helm/db/init_dbs/postgresql/init_schema.sql @@ -172,7 +172,7 @@ CREATE TABLE projects "defaultInputMode": "plain" }'::jsonb -- ?????? ); -CREATE INDEX projects_tenant_id_idx ON projects(tenant_id); +CREATE INDEX projects_tenant_id_idx ON projects (tenant_id); CREATE OR REPLACE FUNCTION notify_project() RETURNS trigger AS $$ @@ -248,7 +248,7 @@ create table webhooks index integer default 0 not null, name varchar(100) ); -CREATE INDEX webhooks_tenant_id_idx ON webhooks(tenant_id); +CREATE INDEX webhooks_tenant_id_idx ON webhooks (tenant_id); -- --- notifications.sql --- @@ -388,7 +388,7 @@ CREATE TABLE issues ); CREATE INDEX ON issues (issue_id, type); CREATE INDEX issues_context_string_gin_idx ON public.issues USING GIN (context_string gin_trgm_ops); -CREATE INDEX issues_project_id_idx ON issues(project_id); +CREATE INDEX issues_project_id_idx ON issues (project_id); -- --- errors.sql --- @@ -522,6 +522,8 @@ CREATE INDEX sessions_start_ts_idx ON public.sessions (start_ts) WHERE duration CREATE INDEX sessions_project_id_idx ON public.sessions (project_id) WHERE duration > 0; CREATE INDEX sessions_session_id_project_id_start_ts_idx ON sessions (session_id, project_id, start_ts) WHERE duration > 0; CREATE INDEX sessions_session_id_project_id_start_ts_durationNN_idx ON sessions (session_id, project_id, start_ts) WHERE duration IS NOT NULL; +CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id IS NOT NULL; +CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0; ALTER TABLE public.sessions ADD CONSTRAINT web_browser_constraint CHECK ( (sessions.platform = 'web' AND sessions.user_browser NOTNULL) OR @@ -679,6 +681,7 @@ CREATE INDEX pages_timestamp_metgt0_idx ON events.pages (timestamp) WHERE respon CREATE INDEX pages_session_id_speed_indexgt0nn_idx ON events.pages (session_id, speed_index) WHERE speed_index > 0 AND speed_index IS NOT NULL; CREATE INDEX pages_session_id_timestamp_dom_building_timegt0nn_idx ON events.pages (session_id, timestamp, dom_building_time) WHERE dom_building_time > 0 AND dom_building_time IS NOT NULL; CREATE INDEX pages_base_path_session_id_timestamp_idx ON events.pages (base_path, session_id, timestamp); +CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2; CREATE TABLE events.clicks @@ -872,6 +875,6 @@ CREATE TABLE jobs ); CREATE INDEX ON jobs (status); CREATE INDEX ON jobs (start_at); -CREATE INDEX jobs_project_id_idx ON jobs(project_id); +CREATE INDEX jobs_project_id_idx ON jobs (project_id); COMMIT;