diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..c564646a6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/gitguardian/ggshield + rev: v1.14.5 + hooks: + - id: ggshield + language_version: python3 + stages: [commit] diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py index 9ee84b018..88641351a 100644 --- a/api/chalicelib/core/custom_metrics.py +++ b/api/chalicelib/core/custom_metrics.py @@ -5,14 +5,14 @@ from decouple import config from fastapi import HTTPException, status import schemas -from chalicelib.core import sessions, funnels, errors, issues, metrics, click_maps, sessions_mobs +from chalicelib.core import sessions, funnels, errors, issues, metrics, click_maps, sessions_mobs, product_analytics from chalicelib.utils import helper, pg_client, s3 from chalicelib.utils.TimeUTC import TimeUTC PIE_CHART_GROUP = 5 -def __try_live(project_id, data: schemas.CreateCardSchema): +def __try_live(project_id, data: schemas.CardSchema): results = [] for i, s in enumerate(data.series): s.filter.startDate = data.startTimestamp @@ -45,11 +45,11 @@ def __try_live(project_id, data: schemas.CreateCardSchema): return results -def __is_funnel_chart(data: schemas.CreateCardSchema): +def __is_funnel_chart(data: schemas.CardSchema): return data.metric_type == schemas.MetricType.funnel -def __get_funnel_chart(project_id, data: schemas.CreateCardSchema): +def __get_funnel_chart(project_id, data: schemas.CardSchema): if len(data.series) == 0: return { "stages": [], @@ -60,12 +60,12 @@ def __get_funnel_chart(project_id, data: schemas.CreateCardSchema): return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter) -def __is_errors_list(data: schemas.CreateCardSchema): +def __is_errors_list(data: schemas.CardSchema): return data.metric_type == schemas.MetricType.table \ and data.metric_of == schemas.MetricOfTable.errors -def __get_errors_list(project_id, user_id, data: schemas.CreateCardSchema): +def __get_errors_list(project_id, user_id, data: schemas.CardSchema): if len(data.series) == 0: return { "total": 0, @@ -78,12 +78,12 @@ def __get_errors_list(project_id, user_id, data: schemas.CreateCardSchema): return errors.search(data.series[0].filter, project_id=project_id, user_id=user_id) -def __is_sessions_list(data: schemas.CreateCardSchema): +def __is_sessions_list(data: schemas.CardSchema): return data.metric_type == schemas.MetricType.table \ and data.metric_of == schemas.MetricOfTable.sessions -def __get_sessions_list(project_id, user_id, data: schemas.CreateCardSchema): +def __get_sessions_list(project_id, user_id, data: schemas.CardSchema): if len(data.series) == 0: print("empty series") return { @@ -97,15 +97,15 @@ def __get_sessions_list(project_id, user_id, data: schemas.CreateCardSchema): return sessions.search_sessions(data=data.series[0].filter, project_id=project_id, user_id=user_id) -def __is_predefined(data: schemas.CreateCardSchema): +def __is_predefined(data: schemas.CardSchema): return data.is_template -def __is_click_map(data: schemas.CreateCardSchema): +def __is_click_map(data: schemas.CardSchema): return data.metric_type == schemas.MetricType.click_map -def __get_click_map_chart(project_id, user_id, data: schemas.CreateCardSchema, include_mobs: bool = True): +def __get_click_map_chart(project_id, user_id, data: schemas.CardSchema, include_mobs: bool = True): if len(data.series) == 0: return None data.series[0].filter.startDate = data.startTimestamp @@ -115,7 +115,22 @@ def __get_click_map_chart(project_id, user_id, data: schemas.CreateCardSchema, i include_mobs=include_mobs) -def merged_live(project_id, data: schemas.CreateCardSchema, user_id=None): +def __get_path_analysis_chart(project_id, data: schemas.CardSchema): + if len(data.series) == 0: + data.series.append(schemas.CardSeriesSchema()) + elif not isinstance(data.series[0].filter, schemas.PathAnalysisSchema): + data.series[0].filter = schemas.PathAnalysisSchema() + data.series[0].filter.startTimestamp = data.startTimestamp + data.series[0].filter.endTimestamp = data.endTimestamp + return product_analytics.path_analysis(project_id=project_id, + data=schemas.PathAnalysisSchema(**data.series[0].filter.dict())) + + +def __is_path_analysis(data: schemas.CardSchema): + return data.metric_type == schemas.MetricType.pathAnalysis + + +def merged_live(project_id, data: schemas.CardSchema, user_id=None): if data.is_template: return get_predefined_metric(key=data.metric_of, project_id=project_id, data=data.dict()) elif __is_funnel_chart(data): @@ -126,6 +141,8 @@ def merged_live(project_id, data: schemas.CreateCardSchema, user_id=None): return __get_sessions_list(project_id=project_id, user_id=user_id, data=data) elif __is_click_map(data): return __get_click_map_chart(project_id=project_id, user_id=user_id, data=data) + elif __is_path_analysis(data): + return __get_path_analysis_chart(project_id=project_id, data=data) elif len(data.series) == 0: return [] series_charts = __try_live(project_id=project_id, data=data) @@ -139,11 +156,11 @@ def merged_live(project_id, data: schemas.CreateCardSchema, user_id=None): return results -def __merge_metric_with_data(metric: schemas.CreateCardSchema, - data: schemas.CardChartSchema) -> schemas.CreateCardSchema: +def __merge_metric_with_data(metric: schemas.CardSchema, + data: schemas.CardChartSchema) -> schemas.CardSchema: if data.series is not None and len(data.series) > 0: metric.series = data.series - metric: schemas.CreateCardSchema = schemas.CreateCardSchema( + metric: schemas.CardSchema = schemas.CardSchema( **{**data.dict(by_alias=True), **metric.dict(by_alias=True)}) if len(data.filters) > 0 or len(data.events) > 0: for s in metric.series: @@ -158,10 +175,10 @@ def __merge_metric_with_data(metric: schemas.CreateCardSchema, return metric -def make_chart(project_id, user_id, data: schemas.CardChartSchema, metric: schemas.CreateCardSchema): +def make_chart(project_id, user_id, data: schemas.CardChartSchema, metric: schemas.CardSchema): if metric is None: return None - metric: schemas.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) return merged_live(project_id=project_id, data=metric, user_id=user_id) @@ -171,8 +188,8 @@ def get_sessions(project_id, user_id, metric_id, data: schemas.CardSessionsSchem raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if raw_metric is None: return None - metric: schemas.CreateCardSchema = schemas.CreateCardSchema(**raw_metric) - metric: schemas.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas.CardSchema = schemas.CardSchema(**raw_metric) + metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None results = [] @@ -198,8 +215,8 @@ def get_funnel_issues(project_id, user_id, metric_id, data: schemas.CardSessions raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if raw_metric is None: return None - metric: schemas.CreateCardSchema = schemas.CreateCardSchema(**raw_metric) - metric: schemas.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas.CardSchema = schemas.CardSchema(**raw_metric) + metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None for s in metric.series: @@ -215,8 +232,8 @@ def get_errors_list(project_id, user_id, metric_id, data: schemas.CardSessionsSc raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if raw_metric is None: return None - metric: schemas.CreateCardSchema = schemas.CreateCardSchema(**raw_metric) - metric: schemas.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas.CardSchema = schemas.CardSchema(**raw_metric) + metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None for s in metric.series: @@ -247,7 +264,7 @@ def try_sessions(project_id, user_id, data: schemas.CardSessionsSchema): return results -def create(project_id, user_id, data: schemas.CreateCardSchema, dashboard=False): +def create(project_id, user_id, data: schemas.CardSchema, dashboard=False): with pg_client.PostgresClient() as cur: session_data = None if __is_click_map(data): @@ -539,8 +556,8 @@ def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if metric is None: return None - metric: schemas.CreateCardSchema = schemas.CreateCardSchema(**metric) - metric: schemas.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas.CardSchema = schemas.CardSchema(**metric) + metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None for s in metric.series: @@ -575,7 +592,7 @@ def make_chart_from_card(project_id, user_id, metric_id, data: schemas.CardChart raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, include_data=True) if raw_metric is None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="card not found") - metric: schemas.CreateCardSchema = schemas.CreateCardSchema(**raw_metric) + metric: schemas.CardSchema = schemas.CardSchema(**raw_metric) if metric.is_template: return get_predefined_metric(key=metric.metric_of, project_id=project_id, data=data.dict()) elif __is_click_map(metric): diff --git a/api/chalicelib/core/dashboards.py b/api/chalicelib/core/dashboards.py index d78a778ef..4161846a4 100644 --- a/api/chalicelib/core/dashboards.py +++ b/api/chalicelib/core/dashboards.py @@ -223,7 +223,7 @@ def pin_dashboard(project_id, user_id, dashboard_id): return helper.dict_to_camel_case(row) -def create_metric_add_widget(project_id, user_id, dashboard_id, data: schemas.CreateCardSchema): +def create_metric_add_widget(project_id, user_id, dashboard_id, data: schemas.CardSchema): metric_id = custom_metrics.create(project_id=project_id, user_id=user_id, data=data, dashboard=True) return add_widget(project_id=project_id, user_id=user_id, dashboard_id=dashboard_id, data=schemas.AddWidgetToDashboardPayloadSchema(metricId=metric_id)) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py deleted file mode 100644 index c04fd3981..000000000 --- a/api/chalicelib/core/insights.py +++ /dev/null @@ -1,926 +0,0 @@ -import schemas -from chalicelib.core.metrics import __get_constraints, __get_constraint_values -from chalicelib.utils import helper, dev -from chalicelib.utils import pg_client -from chalicelib.utils.TimeUTC import TimeUTC - - -def __transform_journey(rows): - nodes = [] - links = [] - for r in rows: - source = r["source_event"][r["source_event"].index("_") + 1:] - target = r["target_event"][r["target_event"].index("_") + 1:] - if source not in nodes: - nodes.append(source) - if target not in nodes: - nodes.append(target) - links.append({"source": nodes.index(source), "target": nodes.index(target), "value": r["value"]}) - return {"nodes": nodes, "links": sorted(links, key=lambda x: x["value"], reverse=True)} - - -JOURNEY_DEPTH = 5 -JOURNEY_TYPES = { - "PAGES": {"table": "events.pages", "column": "path", "table_id": "message_id"}, - "CLICK": {"table": "events.clicks", "column": "label", "table_id": "message_id"}, - # "VIEW": {"table": "events_ios.views", "column": "name", "table_id": "seq_index"}, TODO: enable this for SAAS only - "EVENT": {"table": "events_common.customs", "column": "name", "table_id": "seq_index"} -} - - -def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args): - pg_sub_query_subset = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - event_start = None - event_table = JOURNEY_TYPES["PAGES"]["table"] - event_column = JOURNEY_TYPES["PAGES"]["column"] - event_table_id = JOURNEY_TYPES["PAGES"]["table_id"] - extra_values = {} - for f in filters: - if f["type"] == "START_POINT": - event_start = f["value"] - elif f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_table = JOURNEY_TYPES[f["value"]]["table"] - event_column = JOURNEY_TYPES[f["value"]]["column"] - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query_subset.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT source_event, - target_event, - count(*) AS value - - FROM (SELECT event_number || '_' || value as target_event, - LAG(event_number || '_' || value, 1) OVER ( PARTITION BY session_rank ) AS source_event - FROM (SELECT value, - session_rank, - message_id, - ROW_NUMBER() OVER ( PARTITION BY session_rank ORDER BY timestamp ) AS event_number - - {f"FROM (SELECT * FROM (SELECT *, MIN(mark) OVER ( PARTITION BY session_id , session_rank ORDER BY timestamp ) AS max FROM (SELECT *, CASE WHEN value = %(event_start)s THEN timestamp ELSE NULL END as mark" - if event_start else ""} - - FROM (SELECT session_id, - message_id, - timestamp, - value, - SUM(new_session) OVER (ORDER BY session_id, timestamp) AS session_rank - FROM (SELECT *, - CASE - WHEN source_timestamp IS NULL THEN 1 - ELSE 0 END AS new_session - FROM (SELECT session_id, - {event_table_id} AS message_id, - timestamp, - {event_column} AS value, - LAG(timestamp) - OVER (PARTITION BY session_id ORDER BY timestamp) AS source_timestamp - FROM {event_table} INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query_subset)} - ) AS related_events) AS ranked_events) AS processed - {") AS marked) AS maxed WHERE timestamp >= max) AS filtered" if event_start else ""} - ) AS sorted_events - WHERE event_number <= %(JOURNEY_DEPTH)s) AS final - WHERE source_event IS NOT NULL - and target_event IS NOT NULL - GROUP BY source_event, target_event - ORDER BY value DESC - LIMIT 20;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, "event_start": event_start, "JOURNEY_DEPTH": JOURNEY_DEPTH, - **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - - return __transform_journey(rows) - - -def __compute_weekly_percentage(rows): - if rows is None or len(rows) == 0: - return rows - t = -1 - for r in rows: - if r["week"] == 0: - t = r["usersCount"] - r["percentage"] = r["usersCount"] / t - return rows - - -def __complete_retention(rows, start_date, end_date=None): - if rows is None: - return [] - max_week = 10 - for i in range(max_week): - if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: - break - neutral = { - "firstConnexionWeek": start_date, - "week": i, - "usersCount": 0, - "connectedUsers": [], - "percentage": 0 - } - if i < len(rows) \ - and i != rows[i]["week"]: - rows.insert(i, neutral) - elif i >= len(rows): - rows.append(neutral) - return rows - - -def __complete_acquisition(rows, start_date, end_date=None): - if rows is None: - return [] - max_week = 10 - week = 0 - delta_date = 0 - while max_week > 0: - start_date += TimeUTC.MS_WEEK - if end_date is not None and start_date >= end_date: - break - delta = 0 - if delta_date + week >= len(rows) \ - or delta_date + week < len(rows) and rows[delta_date + week]["firstConnexionWeek"] > start_date: - for i in range(max_week): - if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: - break - - neutral = { - "firstConnexionWeek": start_date, - "week": i, - "usersCount": 0, - "connectedUsers": [], - "percentage": 0 - } - rows.insert(delta_date + week + i, neutral) - delta = i - else: - for i in range(max_week): - if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: - break - - neutral = { - "firstConnexionWeek": start_date, - "week": i, - "usersCount": 0, - "connectedUsers": [], - "percentage": 0 - } - if delta_date + week + i < len(rows) \ - and i != rows[delta_date + week + i]["week"]: - rows.insert(delta_date + week + i, neutral) - elif delta_date + week + i >= len(rows): - rows.append(neutral) - delta = i - week += delta - max_week -= 1 - delta_date += 1 - return rows - - -def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], - **args): - startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("user_id IS NOT NULL") - pg_sub_query.append("DATE_TRUNC('week', to_timestamp(start_ts / 1000)) = to_timestamp(%(startTimestamp)s / 1000)") - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - DATE_TRUNC('week', to_timestamp(%(startTimestamp)s / 1000)::timestamp)) / 7)::integer AS week, - COUNT(DISTINCT connexions_list.user_id) AS users_count, - ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users - FROM (SELECT DISTINCT user_id - FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 - AND NOT EXISTS((SELECT 1 - FROM sessions AS bsess - WHERE bsess.start_ts < %(startTimestamp)s - AND project_id = %(project_id)s - AND bsess.user_id = sessions.user_id - LIMIT 1)) - ) AS users_list - LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, - user_id - FROM sessions - WHERE users_list.user_id = sessions.user_id - AND %(startTimestamp)s <=sessions.start_ts - AND sessions.project_id = %(project_id)s - AND sessions.start_ts < (%(endTimestamp)s - 1) - GROUP BY connexion_week, user_id - ) AS connexions_list ON (TRUE) - GROUP BY week - ORDER BY week;""" - - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)} - print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return { - "startTimestamp": startTimestamp, - "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) - } - - -def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("user_id IS NOT NULL") - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, - FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, - COUNT(DISTINCT connexions_list.user_id) AS users_count, - ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users - FROM (SELECT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week - FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND NOT EXISTS((SELECT 1 - FROM sessions AS bsess - WHERE bsess.start_ts<%(startTimestamp)s - AND project_id = %(project_id)s - AND bsess.user_id = sessions.user_id - LIMIT 1)) - GROUP BY user_id) AS users_list - LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, - user_id - FROM sessions - WHERE users_list.user_id = sessions.user_id - AND first_connexion_week <= - DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp) - AND sessions.project_id = %(project_id)s - AND sessions.start_ts < (%(endTimestamp)s - 1) - GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) - GROUP BY first_connexion_week, week - ORDER BY first_connexion_week, week;""" - - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)} - print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return { - "startTimestamp": startTimestamp, - "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) - } - - -def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("user_id IS NOT NULL") - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - event_type = "PAGES" - event_value = "/" - extra_values = {} - default = True - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_type = f["value"] - elif f["type"] == "EVENT_VALUE": - event_value = f["value"] - default = False - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - event_table = JOURNEY_TYPES[event_type]["table"] - event_column = JOURNEY_TYPES[event_type]["column"] - pg_sub_query.append(f"feature.{event_column} = %(value)s") - - with pg_client.PostgresClient() as cur: - if default: - # get most used value - pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query[:-1])} - AND length({event_column}) > 2 - GROUP BY value - ORDER BY count DESC - LIMIT 1;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - if row is not None: - event_value = row["value"] - extra_values["value"] = event_value - if len(event_value) > 2: - pg_sub_query.append(f"length({event_column})>2") - pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - to_timestamp(%(startTimestamp)s/1000)) / 7)::integer AS week, - COUNT(DISTINCT connexions_list.user_id) AS users_count, - ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users - FROM (SELECT DISTINCT user_id - FROM sessions INNER JOIN {event_table} AS feature USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 - AND NOT EXISTS((SELECT 1 - FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) - WHERE bsess.start_ts<%(startTimestamp)s - AND project_id = %(project_id)s - AND bsess.user_id = sessions.user_id - AND bfeature.timestamp<%(startTimestamp)s - AND bfeature.{event_column}=%(value)s - LIMIT 1)) - GROUP BY user_id) AS users_list - LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, - user_id - FROM sessions INNER JOIN {event_table} AS feature USING (session_id) - WHERE users_list.user_id = sessions.user_id - AND %(startTimestamp)s <= sessions.start_ts - AND sessions.project_id = %(project_id)s - AND sessions.start_ts < (%(endTimestamp)s - 1) - AND feature.timestamp >= %(startTimestamp)s - AND feature.timestamp < %(endTimestamp)s - AND feature.{event_column} = %(value)s - GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) - GROUP BY week - ORDER BY week;""" - - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return { - "startTimestamp": startTimestamp, - "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], - "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) - } - - - -def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("user_id IS NOT NULL") - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - event_type = "PAGES" - event_value = "/" - extra_values = {} - default = True - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_type = f["value"] - elif f["type"] == "EVENT_VALUE": - event_value = f["value"] - default = False - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - event_table = JOURNEY_TYPES[event_type]["table"] - event_column = JOURNEY_TYPES[event_type]["column"] - - pg_sub_query.append(f"feature.{event_column} = %(value)s") - - with pg_client.PostgresClient() as cur: - if default: - # get most used value - pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query[:-1])} - AND length({event_column}) > 2 - GROUP BY value - ORDER BY count DESC - LIMIT 1;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - if row is not None: - event_value = row["value"] - extra_values["value"] = event_value - if len(event_value) > 2: - pg_sub_query.append(f"length({event_column})>2") - pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, - FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, - COUNT(DISTINCT connexions_list.user_id) AS users_count, - ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users - FROM (SELECT user_id, DATE_TRUNC('week', to_timestamp(first_connexion_week / 1000)) AS first_connexion_week - FROM(SELECT DISTINCT user_id, MIN(start_ts) AS first_connexion_week - FROM sessions INNER JOIN {event_table} AS feature USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - AND NOT EXISTS((SELECT 1 - FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) - WHERE bsess.start_ts<%(startTimestamp)s - AND project_id = %(project_id)s - AND bsess.user_id = sessions.user_id - AND bfeature.timestamp<%(startTimestamp)s - AND bfeature.{event_column}=%(value)s - LIMIT 1)) - GROUP BY user_id) AS raw_users_list) AS users_list - LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, - user_id - FROM sessions INNER JOIN {event_table} AS feature USING(session_id) - WHERE users_list.user_id = sessions.user_id - AND first_connexion_week <= - DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp) - AND sessions.project_id = %(project_id)s - AND sessions.start_ts < (%(endTimestamp)s - 1) - AND feature.timestamp >= %(startTimestamp)s - AND feature.timestamp < %(endTimestamp)s - AND feature.{event_column} = %(value)s - GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) - GROUP BY first_connexion_week, week - ORDER BY first_connexion_week, week;""" - - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return { - "startTimestamp": startTimestamp, - "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], - "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) - } - - - -def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - event_table = JOURNEY_TYPES["CLICK"]["table"] - event_column = JOURNEY_TYPES["CLICK"]["column"] - extra_values = {} - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_table = JOURNEY_TYPES[f["value"]]["table"] - event_column = JOURNEY_TYPES[f["value"]]["column"] - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count - FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) - # print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - all_user_count = cur.fetchone()["count"] - if all_user_count == 0: - return [] - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - pg_sub_query.append(f"length({event_column})>2") - pg_query = f"""SELECT {event_column} AS value, COUNT(DISTINCT user_id) AS count - FROM {event_table} AS feature INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL - GROUP BY value - ORDER BY count DESC - LIMIT 7;""" - # TODO: solve full scan - print(cur.mogrify(pg_query, params)) - print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - popularity = cur.fetchall() - pg_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count - FROM {event_table} AS feature INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - GROUP BY value;""" - # TODO: solve full scan - print(cur.mogrify(pg_query, params)) - print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - frequencies = cur.fetchall() - total_usage = sum([f["count"] for f in frequencies]) - frequencies = {f["value"]: f["count"] for f in frequencies} - for p in popularity: - p["popularity"] = p.pop("count") / all_user_count - p["frequency"] = frequencies[p["value"]] / total_usage - - return popularity - - - -def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - event_type = "CLICK" - event_value = '/' - extra_values = {} - default = True - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_type = f["value"] - elif f["type"] == "EVENT_VALUE": - event_value = f["value"] - default = False - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - event_table = JOURNEY_TYPES[event_type]["table"] - event_column = JOURNEY_TYPES[event_type]["column"] - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count - FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) - # print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - all_user_count = cur.fetchone()["count"] - if all_user_count == 0: - return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type}, - {"type": "EVENT_VALUE", "value": event_value}], } - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - if default: - # get most used value - pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query[:-1])} - AND length({event_column}) > 2 - GROUP BY value - ORDER BY count DESC - LIMIT 1;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - if row is not None: - event_value = row["value"] - extra_values["value"] = event_value - if len(event_value) > 2: - pg_sub_query.append(f"length({event_column})>2") - pg_sub_query.append(f"feature.{event_column} = %(value)s") - pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count - FROM {event_table} AS feature INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) - # print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - adoption = cur.fetchone()["count"] / all_user_count - return {"target": all_user_count, "adoption": adoption, - "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} - - - -def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("user_id IS NOT NULL") - event_type = "CLICK" - event_value = '/' - extra_values = {} - default = True - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_type = f["value"] - elif f["type"] == "EVENT_VALUE": - event_value = f["value"] - default = False - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - event_table = JOURNEY_TYPES[event_type]["table"] - event_column = JOURNEY_TYPES[event_type]["column"] - with pg_client.PostgresClient() as cur: - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - if default: - # get most used value - pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query[:-1])} - AND length({event_column}) > 2 - GROUP BY value - ORDER BY count DESC - LIMIT 1;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - if row is not None: - event_value = row["value"] - extra_values["value"] = event_value - if len(event_value) > 2: - pg_sub_query.append(f"length({event_column})>2") - pg_sub_query.append(f"feature.{event_column} = %(value)s") - pg_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count - FROM {event_table} AS feature - INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - GROUP BY 1 - ORDER BY 2 DESC - LIMIT 10;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) - # print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - return {"users": helper.list_to_camel_case(rows), - "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} - - - -def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, - chart=True, data=args) - event_type = "CLICK" - event_value = '/' - extra_values = {} - default = True - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_type = f["value"] - elif f["type"] == "EVENT_VALUE": - event_value = f["value"] - default = False - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - event_table = JOURNEY_TYPES[event_type]["table"] - event_column = JOURNEY_TYPES[event_type]["column"] - with pg_client.PostgresClient() as cur: - pg_sub_query_chart.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query_chart.append("feature.timestamp < %(endTimestamp)s") - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - if default: - # get most used value - pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - AND length({event_column})>2 - GROUP BY value - ORDER BY count DESC - LIMIT 1;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - if row is not None: - event_value = row["value"] - extra_values["value"] = event_value - if len(event_value) > 2: - pg_sub_query.append(f"length({event_column})>2") - pg_sub_query_chart.append(f"feature.{event_column} = %(value)s") - pg_query = f"""SELECT generated_timestamp AS timestamp, - COALESCE(COUNT(session_id), 0) AS count - FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( SELECT DISTINCT session_id - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query_chart)} - ) AS users ON (TRUE) - GROUP BY generated_timestamp - ORDER BY generated_timestamp;""" - params = {"step_size": TimeUTC.MS_DAY, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - print(cur.mogrify(pg_query, params)) - print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - return {"chart": helper.list_to_camel_case(rows), - "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} - - - -def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - event_table = JOURNEY_TYPES["CLICK"]["table"] - event_column = JOURNEY_TYPES["CLICK"]["column"] - extra_values = {} - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_table = JOURNEY_TYPES[f["value"]]["table"] - event_column = JOURNEY_TYPES[f["value"]]["column"] - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - pg_sub_query.append(f"length({event_column})>2") - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg - FROM {event_table} AS feature INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - GROUP BY value - ORDER BY avg DESC - LIMIT 7;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # TODO: solve full scan issue - print(cur.mogrify(pg_query, params)) - print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - - return rows - - - -def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): - pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, - chart=True, data=args) - - pg_sub_query_chart.append("user_id IS NOT NULL") - period = "DAY" - extra_values = {} - for f in filters: - if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]: - period = f["value"] - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(chart) AS chart - FROM (SELECT generated_timestamp AS timestamp, - COALESCE(COUNT(users), 0) AS count - FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( SELECT DISTINCT user_id - FROM public.sessions - WHERE {" AND ".join(pg_sub_query_chart)} - ) AS users ON (TRUE) - GROUP BY generated_timestamp - ORDER BY generated_timestamp) AS chart;""" - params = {"step_size": TimeUTC.MS_DAY if period == "DAY" else TimeUTC.MS_WEEK, - "project_id": project_id, - "startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week( - startTimestamp), - "endTimestamp": endTimestamp, **__get_constraint_values(args), - **extra_values} - # print(cur.mogrify(pg_query, params)) - # print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - row_users = cur.fetchone() - - return row_users - - - -def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], **args): - pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) - pg_sub_query.append("user_id IS NOT NULL") - - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(day_users_partition) AS partition - FROM (SELECT number_of_days, COUNT(user_id) AS count - FROM (SELECT user_id, COUNT(DISTINCT DATE_TRUNC('day', to_timestamp(start_ts / 1000))) AS number_of_days - FROM sessions - WHERE {" AND ".join(pg_sub_query)} - GROUP BY 1) AS users_connexions - GROUP BY number_of_days - ORDER BY number_of_days) AS day_users_partition;""" - params = {"project_id": project_id, - "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} - # print(cur.mogrify(pg_query, params)) - # print("---------------------") - cur.execute(cur.mogrify(pg_query, params)) - row_users = cur.fetchone() - - return helper.dict_to_camel_case(row_users) - - - -def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", - time_constraint=True) - pg_sub_query.append("user_id IS NOT NULL") - pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") - pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - event_type = "PAGES" - event_value = "/" - extra_values = {} - default = True - for f in filters: - if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_type = f["value"] - elif f["type"] == "EVENT_VALUE": - event_value = f["value"] - default = False - elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - pg_sub_query.append(f"sessions.user_id = %(user_id)s") - extra_values["user_id"] = f["value"] - event_table = JOURNEY_TYPES[event_type]["table"] - event_column = JOURNEY_TYPES[event_type]["column"] - pg_sub_query.append(f"feature.{event_column} = %(value)s") - - with pg_client.PostgresClient() as cur: - if default: - # get most used value - pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count - FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query[:-1])} - AND length({event_column}) > 2 - GROUP BY value - ORDER BY count DESC - LIMIT 1;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - cur.execute(cur.mogrify(pg_query, params)) - row = cur.fetchone() - if row is not None: - event_value = row["value"] - extra_values["value"] = event_value - if len(event_value) > 2: - pg_sub_query.append(f"length({event_column})>2") - pg_query = f"""SELECT user_id, last_time, interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen - FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count - FROM {event_table} AS feature INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - GROUP BY user_id) AS user_last_usage - INNER JOIN sessions USING (user_id) - WHERE EXTRACT(EPOCH FROM now()) * 1000 - last_time > 7 * 24 * 60 * 60 * 1000 - GROUP BY user_id, last_time,interactions_count;""" - - params = {"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - return { - "startTimestamp": startTimestamp, - "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], - "list": helper.list_to_camel_case(rows) - } - - - -def search(text, feature_type, project_id, platform=None): - if not feature_type: - resource_type = "ALL" - data = search(text=text, feature_type=resource_type, project_id=project_id, platform=platform) - return data - - pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, duration=True, - data={} if platform is None else {"platform": platform}) - - params = {"startTimestamp": TimeUTC.now() - 2 * TimeUTC.MS_MONTH, - "endTimestamp": TimeUTC.now(), - "project_id": project_id, - "value": helper.string_to_sql_like(text.lower()), - "platform_0": platform} - if feature_type == "ALL": - with pg_client.PostgresClient() as cur: - sub_queries = [] - for e in JOURNEY_TYPES: - sub_queries.append(f"""(SELECT DISTINCT {JOURNEY_TYPES[e]["column"]} AS value, '{e}' AS "type" - FROM {JOURNEY_TYPES[e]["table"]} INNER JOIN public.sessions USING(session_id) - WHERE {" AND ".join(pg_sub_query)} AND {JOURNEY_TYPES[e]["column"]} ILIKE %(value)s - LIMIT 10)""") - pg_query = "UNION ALL".join(sub_queries) - # print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - elif JOURNEY_TYPES.get(feature_type) is not None: - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT DISTINCT {JOURNEY_TYPES[feature_type]["column"]} AS value, '{feature_type}' AS "type" - FROM {JOURNEY_TYPES[feature_type]["table"]} INNER JOIN public.sessions USING(session_id) - WHERE {" AND ".join(pg_sub_query)} AND {JOURNEY_TYPES[feature_type]["column"]} ILIKE %(value)s - LIMIT 10;""" - # print(cur.mogrify(pg_query, params)) - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - else: - return [] - return [helper.dict_to_camel_case(row) for row in rows] diff --git a/api/chalicelib/core/product_analytics.py b/api/chalicelib/core/product_analytics.py new file mode 100644 index 000000000..4b1e15a04 --- /dev/null +++ b/api/chalicelib/core/product_analytics.py @@ -0,0 +1,924 @@ +import schemas +from chalicelib.core.metrics import __get_constraints, __get_constraint_values +from chalicelib.utils import helper, dev +from chalicelib.utils import pg_client +from chalicelib.utils.TimeUTC import TimeUTC + + +def __transform_journey(rows): + nodes = [] + links = [] + for r in rows: + source = r["source_event"][r["source_event"].index("_") + 1:] + target = r["target_event"][r["target_event"].index("_") + 1:] + if source not in nodes: + nodes.append(source) + if target not in nodes: + nodes.append(target) + links.append({"source": nodes.index(source), "target": nodes.index(target), "value": r["value"]}) + return {"nodes": nodes, "links": sorted(links, key=lambda x: x["value"], reverse=True)} + + +JOURNEY_DEPTH = 5 +JOURNEY_TYPES = { + schemas.ProductAnalyticsEventType.location: {"table": "events.pages", "column": "path", "table_id": "message_id"}, + schemas.ProductAnalyticsEventType.click: {"table": "events.clicks", "column": "label", "table_id": "message_id"}, + schemas.ProductAnalyticsEventType.input: {"table": "events.inputs", "column": "label", "table_id": "message_id"}, + schemas.ProductAnalyticsEventType.custom_event: {"table": "events_common.customs", "column": "name", + "table_id": "seq_index"} +} + + +def path_analysis(project_id, data: schemas.PathAnalysisSchema): + # pg_sub_query_subset = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + # time_constraint=True) + # TODO: check if data=args is required + pg_sub_query_subset = __get_constraints(project_id=project_id, duration=True, main_table="sessions", + time_constraint=True) + event_start = None + event_table = JOURNEY_TYPES[schemas.ProductAnalyticsEventType.location]["table"] + event_column = JOURNEY_TYPES[schemas.ProductAnalyticsEventType.location]["column"] + event_table_id = JOURNEY_TYPES[schemas.ProductAnalyticsEventType.location]["table_id"] + extra_values = {} + for f in data.filters: + if f.type == schemas.ProductAnalyticsFilterType.start_point: + event_start = f.value[0] + elif f.type == schemas.ProductAnalyticsFilterType.event_type and JOURNEY_TYPES.get(f.value[0]): + event_table = JOURNEY_TYPES[f.value[0]]["table"] + event_column = JOURNEY_TYPES[f.value[0]]["column"] + elif f.type == schemas.ProductAnalyticsFilterType.user_id: + pg_sub_query_subset.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f.value + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT source_event, + target_event, + count(*) AS value + FROM (SELECT event_number || '_' || value as target_event, + LAG(event_number || '_' || value, 1) OVER ( PARTITION BY session_rank ) AS source_event + FROM (SELECT value, + session_rank, + message_id, + ROW_NUMBER() OVER ( PARTITION BY session_rank ORDER BY timestamp ) AS event_number + + {f"FROM (SELECT * FROM (SELECT *, MIN(mark) OVER ( PARTITION BY session_id , session_rank ORDER BY timestamp ) AS max FROM (SELECT *, CASE WHEN value = %(event_start)s THEN timestamp ELSE NULL END as mark" + if event_start else ""} + + FROM (SELECT session_id, + message_id, + timestamp, + value, + SUM(new_session) OVER (ORDER BY session_id, timestamp) AS session_rank + FROM (SELECT *, + CASE + WHEN source_timestamp IS NULL THEN 1 + ELSE 0 END AS new_session + FROM (SELECT session_id, + {event_table_id} AS message_id, + timestamp, + {event_column} AS value, + LAG(timestamp) + OVER (PARTITION BY session_id ORDER BY timestamp) AS source_timestamp + FROM {event_table} INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query_subset)} + ) AS related_events) AS ranked_events) AS processed + {") AS marked) AS maxed WHERE timestamp >= max) AS filtered" if event_start else ""} + ) AS sorted_events + WHERE event_number <= %(JOURNEY_DEPTH)s) AS final + WHERE source_event IS NOT NULL + and target_event IS NOT NULL + GROUP BY source_event, target_event + ORDER BY value DESC + LIMIT 20;""" + params = {"project_id": project_id, "startTimestamp": data.startTimestamp, + "endTimestamp": data.endTimestamp, "event_start": event_start, "JOURNEY_DEPTH": JOURNEY_DEPTH, + # TODO: add if data=args is required + # **__get_constraint_values(args), + **extra_values} + query = cur.mogrify(pg_query, params) + print("----------------------") + print(query) + print("----------------------") + cur.execute(query) + rows = cur.fetchall() + + return __transform_journey(rows) + +# +# def __compute_weekly_percentage(rows): +# if rows is None or len(rows) == 0: +# return rows +# t = -1 +# for r in rows: +# if r["week"] == 0: +# t = r["usersCount"] +# r["percentage"] = r["usersCount"] / t +# return rows +# +# +# def __complete_retention(rows, start_date, end_date=None): +# if rows is None: +# return [] +# max_week = 10 +# for i in range(max_week): +# if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: +# break +# neutral = { +# "firstConnexionWeek": start_date, +# "week": i, +# "usersCount": 0, +# "connectedUsers": [], +# "percentage": 0 +# } +# if i < len(rows) \ +# and i != rows[i]["week"]: +# rows.insert(i, neutral) +# elif i >= len(rows): +# rows.append(neutral) +# return rows +# +# +# def __complete_acquisition(rows, start_date, end_date=None): +# if rows is None: +# return [] +# max_week = 10 +# week = 0 +# delta_date = 0 +# while max_week > 0: +# start_date += TimeUTC.MS_WEEK +# if end_date is not None and start_date >= end_date: +# break +# delta = 0 +# if delta_date + week >= len(rows) \ +# or delta_date + week < len(rows) and rows[delta_date + week]["firstConnexionWeek"] > start_date: +# for i in range(max_week): +# if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: +# break +# +# neutral = { +# "firstConnexionWeek": start_date, +# "week": i, +# "usersCount": 0, +# "connectedUsers": [], +# "percentage": 0 +# } +# rows.insert(delta_date + week + i, neutral) +# delta = i +# else: +# for i in range(max_week): +# if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: +# break +# +# neutral = { +# "firstConnexionWeek": start_date, +# "week": i, +# "usersCount": 0, +# "connectedUsers": [], +# "percentage": 0 +# } +# if delta_date + week + i < len(rows) \ +# and i != rows[delta_date + week + i]["week"]: +# rows.insert(delta_date + week + i, neutral) +# elif delta_date + week + i >= len(rows): +# rows.append(neutral) +# delta = i +# week += delta +# max_week -= 1 +# delta_date += 1 +# return rows +# +# +# def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], +# **args): +# startTimestamp = TimeUTC.trunc_week(startTimestamp) +# endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("user_id IS NOT NULL") +# pg_sub_query.append("DATE_TRUNC('week', to_timestamp(start_ts / 1000)) = to_timestamp(%(startTimestamp)s / 1000)") +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - DATE_TRUNC('week', to_timestamp(%(startTimestamp)s / 1000)::timestamp)) / 7)::integer AS week, +# COUNT(DISTINCT connexions_list.user_id) AS users_count, +# ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users +# FROM (SELECT DISTINCT user_id +# FROM sessions +# WHERE {" AND ".join(pg_sub_query)} +# AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 +# AND NOT EXISTS((SELECT 1 +# FROM sessions AS bsess +# WHERE bsess.start_ts < %(startTimestamp)s +# AND project_id = %(project_id)s +# AND bsess.user_id = sessions.user_id +# LIMIT 1)) +# ) AS users_list +# LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, +# user_id +# FROM sessions +# WHERE users_list.user_id = sessions.user_id +# AND %(startTimestamp)s <=sessions.start_ts +# AND sessions.project_id = %(project_id)s +# AND sessions.start_ts < (%(endTimestamp)s - 1) +# GROUP BY connexion_week, user_id +# ) AS connexions_list ON (TRUE) +# GROUP BY week +# ORDER BY week;""" +# +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args)} +# print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) +# return { +# "startTimestamp": startTimestamp, +# "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) +# } +# +# +# def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# startTimestamp = TimeUTC.trunc_week(startTimestamp) +# endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("user_id IS NOT NULL") +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, +# FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, +# COUNT(DISTINCT connexions_list.user_id) AS users_count, +# ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users +# FROM (SELECT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week +# FROM sessions +# WHERE {" AND ".join(pg_sub_query)} +# AND NOT EXISTS((SELECT 1 +# FROM sessions AS bsess +# WHERE bsess.start_ts<%(startTimestamp)s +# AND project_id = %(project_id)s +# AND bsess.user_id = sessions.user_id +# LIMIT 1)) +# GROUP BY user_id) AS users_list +# LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, +# user_id +# FROM sessions +# WHERE users_list.user_id = sessions.user_id +# AND first_connexion_week <= +# DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp) +# AND sessions.project_id = %(project_id)s +# AND sessions.start_ts < (%(endTimestamp)s - 1) +# GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) +# GROUP BY first_connexion_week, week +# ORDER BY first_connexion_week, week;""" +# +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args)} +# print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) +# return { +# "startTimestamp": startTimestamp, +# "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) +# } +# +# +# def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# startTimestamp = TimeUTC.trunc_week(startTimestamp) +# endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("user_id IS NOT NULL") +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# event_type = "PAGES" +# event_value = "/" +# extra_values = {} +# default = True +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_type = f["value"] +# elif f["type"] == "EVENT_VALUE": +# event_value = f["value"] +# default = False +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# event_table = JOURNEY_TYPES[event_type]["table"] +# event_column = JOURNEY_TYPES[event_type]["column"] +# pg_sub_query.append(f"feature.{event_column} = %(value)s") +# +# with pg_client.PostgresClient() as cur: +# if default: +# # get most used value +# pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query[:-1])} +# AND length({event_column}) > 2 +# GROUP BY value +# ORDER BY count DESC +# LIMIT 1;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# cur.execute(cur.mogrify(pg_query, params)) +# row = cur.fetchone() +# if row is not None: +# event_value = row["value"] +# extra_values["value"] = event_value +# if len(event_value) > 2: +# pg_sub_query.append(f"length({event_column})>2") +# pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - to_timestamp(%(startTimestamp)s/1000)) / 7)::integer AS week, +# COUNT(DISTINCT connexions_list.user_id) AS users_count, +# ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users +# FROM (SELECT DISTINCT user_id +# FROM sessions INNER JOIN {event_table} AS feature USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 +# AND NOT EXISTS((SELECT 1 +# FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) +# WHERE bsess.start_ts<%(startTimestamp)s +# AND project_id = %(project_id)s +# AND bsess.user_id = sessions.user_id +# AND bfeature.timestamp<%(startTimestamp)s +# AND bfeature.{event_column}=%(value)s +# LIMIT 1)) +# GROUP BY user_id) AS users_list +# LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, +# user_id +# FROM sessions INNER JOIN {event_table} AS feature USING (session_id) +# WHERE users_list.user_id = sessions.user_id +# AND %(startTimestamp)s <= sessions.start_ts +# AND sessions.project_id = %(project_id)s +# AND sessions.start_ts < (%(endTimestamp)s - 1) +# AND feature.timestamp >= %(startTimestamp)s +# AND feature.timestamp < %(endTimestamp)s +# AND feature.{event_column} = %(value)s +# GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) +# GROUP BY week +# ORDER BY week;""" +# +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) +# return { +# "startTimestamp": startTimestamp, +# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], +# "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) +# } +# +# +# def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# startTimestamp = TimeUTC.trunc_week(startTimestamp) +# endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("user_id IS NOT NULL") +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# event_type = "PAGES" +# event_value = "/" +# extra_values = {} +# default = True +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_type = f["value"] +# elif f["type"] == "EVENT_VALUE": +# event_value = f["value"] +# default = False +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# event_table = JOURNEY_TYPES[event_type]["table"] +# event_column = JOURNEY_TYPES[event_type]["column"] +# +# pg_sub_query.append(f"feature.{event_column} = %(value)s") +# +# with pg_client.PostgresClient() as cur: +# if default: +# # get most used value +# pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query[:-1])} +# AND length({event_column}) > 2 +# GROUP BY value +# ORDER BY count DESC +# LIMIT 1;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# cur.execute(cur.mogrify(pg_query, params)) +# row = cur.fetchone() +# if row is not None: +# event_value = row["value"] +# extra_values["value"] = event_value +# if len(event_value) > 2: +# pg_sub_query.append(f"length({event_column})>2") +# pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, +# FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, +# COUNT(DISTINCT connexions_list.user_id) AS users_count, +# ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users +# FROM (SELECT user_id, DATE_TRUNC('week', to_timestamp(first_connexion_week / 1000)) AS first_connexion_week +# FROM(SELECT DISTINCT user_id, MIN(start_ts) AS first_connexion_week +# FROM sessions INNER JOIN {event_table} AS feature USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# AND NOT EXISTS((SELECT 1 +# FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) +# WHERE bsess.start_ts<%(startTimestamp)s +# AND project_id = %(project_id)s +# AND bsess.user_id = sessions.user_id +# AND bfeature.timestamp<%(startTimestamp)s +# AND bfeature.{event_column}=%(value)s +# LIMIT 1)) +# GROUP BY user_id) AS raw_users_list) AS users_list +# LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, +# user_id +# FROM sessions INNER JOIN {event_table} AS feature USING(session_id) +# WHERE users_list.user_id = sessions.user_id +# AND first_connexion_week <= +# DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp) +# AND sessions.project_id = %(project_id)s +# AND sessions.start_ts < (%(endTimestamp)s - 1) +# AND feature.timestamp >= %(startTimestamp)s +# AND feature.timestamp < %(endTimestamp)s +# AND feature.{event_column} = %(value)s +# GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) +# GROUP BY first_connexion_week, week +# ORDER BY first_connexion_week, week;""" +# +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) +# return { +# "startTimestamp": startTimestamp, +# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], +# "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) +# } +# +# +# def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# startTimestamp = TimeUTC.trunc_week(startTimestamp) +# endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# event_table = JOURNEY_TYPES["CLICK"]["table"] +# event_column = JOURNEY_TYPES["CLICK"]["column"] +# extra_values = {} +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_table = JOURNEY_TYPES[f["value"]]["table"] +# event_column = JOURNEY_TYPES[f["value"]]["column"] +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count +# FROM sessions +# WHERE {" AND ".join(pg_sub_query)} +# AND user_id IS NOT NULL;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# # print(cur.mogrify(pg_query, params)) +# # print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# all_user_count = cur.fetchone()["count"] +# if all_user_count == 0: +# return [] +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# pg_sub_query.append(f"length({event_column})>2") +# pg_query = f"""SELECT {event_column} AS value, COUNT(DISTINCT user_id) AS count +# FROM {event_table} AS feature INNER JOIN sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# AND user_id IS NOT NULL +# GROUP BY value +# ORDER BY count DESC +# LIMIT 7;""" +# # TODO: solve full scan +# print(cur.mogrify(pg_query, params)) +# print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# popularity = cur.fetchall() +# pg_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count +# FROM {event_table} AS feature INNER JOIN sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# GROUP BY value;""" +# # TODO: solve full scan +# print(cur.mogrify(pg_query, params)) +# print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# frequencies = cur.fetchall() +# total_usage = sum([f["count"] for f in frequencies]) +# frequencies = {f["value"]: f["count"] for f in frequencies} +# for p in popularity: +# p["popularity"] = p.pop("count") / all_user_count +# p["frequency"] = frequencies[p["value"]] / total_usage +# +# return popularity +# +# +# def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# event_type = "CLICK" +# event_value = '/' +# extra_values = {} +# default = True +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_type = f["value"] +# elif f["type"] == "EVENT_VALUE": +# event_value = f["value"] +# default = False +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# event_table = JOURNEY_TYPES[event_type]["table"] +# event_column = JOURNEY_TYPES[event_type]["column"] +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count +# FROM sessions +# WHERE {" AND ".join(pg_sub_query)} +# AND user_id IS NOT NULL;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# # print(cur.mogrify(pg_query, params)) +# # print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# all_user_count = cur.fetchone()["count"] +# if all_user_count == 0: +# return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type}, +# {"type": "EVENT_VALUE", "value": event_value}], } +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# if default: +# # get most used value +# pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query[:-1])} +# AND length({event_column}) > 2 +# GROUP BY value +# ORDER BY count DESC +# LIMIT 1;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# cur.execute(cur.mogrify(pg_query, params)) +# row = cur.fetchone() +# if row is not None: +# event_value = row["value"] +# extra_values["value"] = event_value +# if len(event_value) > 2: +# pg_sub_query.append(f"length({event_column})>2") +# pg_sub_query.append(f"feature.{event_column} = %(value)s") +# pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count +# FROM {event_table} AS feature INNER JOIN sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# AND user_id IS NOT NULL;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# # print(cur.mogrify(pg_query, params)) +# # print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# adoption = cur.fetchone()["count"] / all_user_count +# return {"target": all_user_count, "adoption": adoption, +# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} +# +# +# def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], **args): +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("user_id IS NOT NULL") +# event_type = "CLICK" +# event_value = '/' +# extra_values = {} +# default = True +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_type = f["value"] +# elif f["type"] == "EVENT_VALUE": +# event_value = f["value"] +# default = False +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# event_table = JOURNEY_TYPES[event_type]["table"] +# event_column = JOURNEY_TYPES[event_type]["column"] +# with pg_client.PostgresClient() as cur: +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# if default: +# # get most used value +# pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query[:-1])} +# AND length({event_column}) > 2 +# GROUP BY value +# ORDER BY count DESC +# LIMIT 1;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# cur.execute(cur.mogrify(pg_query, params)) +# row = cur.fetchone() +# if row is not None: +# event_value = row["value"] +# extra_values["value"] = event_value +# if len(event_value) > 2: +# pg_sub_query.append(f"length({event_column})>2") +# pg_sub_query.append(f"feature.{event_column} = %(value)s") +# pg_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count +# FROM {event_table} AS feature +# INNER JOIN sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# GROUP BY 1 +# ORDER BY 2 DESC +# LIMIT 10;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# # print(cur.mogrify(pg_query, params)) +# # print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# return {"users": helper.list_to_camel_case(rows), +# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} +# +# +# def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], **args): +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, +# chart=True, data=args) +# event_type = "CLICK" +# event_value = '/' +# extra_values = {} +# default = True +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_type = f["value"] +# elif f["type"] == "EVENT_VALUE": +# event_value = f["value"] +# default = False +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# event_table = JOURNEY_TYPES[event_type]["table"] +# event_column = JOURNEY_TYPES[event_type]["column"] +# with pg_client.PostgresClient() as cur: +# pg_sub_query_chart.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query_chart.append("feature.timestamp < %(endTimestamp)s") +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# if default: +# # get most used value +# pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# AND length({event_column})>2 +# GROUP BY value +# ORDER BY count DESC +# LIMIT 1;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# cur.execute(cur.mogrify(pg_query, params)) +# row = cur.fetchone() +# if row is not None: +# event_value = row["value"] +# extra_values["value"] = event_value +# if len(event_value) > 2: +# pg_sub_query.append(f"length({event_column})>2") +# pg_sub_query_chart.append(f"feature.{event_column} = %(value)s") +# pg_query = f"""SELECT generated_timestamp AS timestamp, +# COALESCE(COUNT(session_id), 0) AS count +# FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp +# LEFT JOIN LATERAL ( SELECT DISTINCT session_id +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query_chart)} +# ) AS users ON (TRUE) +# GROUP BY generated_timestamp +# ORDER BY generated_timestamp;""" +# params = {"step_size": TimeUTC.MS_DAY, "project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# print(cur.mogrify(pg_query, params)) +# print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# return {"chart": helper.list_to_camel_case(rows), +# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} +# +# +# def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# event_table = JOURNEY_TYPES["CLICK"]["table"] +# event_column = JOURNEY_TYPES["CLICK"]["column"] +# extra_values = {} +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_table = JOURNEY_TYPES[f["value"]]["table"] +# event_column = JOURNEY_TYPES[f["value"]]["column"] +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# pg_sub_query.append(f"length({event_column})>2") +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg +# FROM {event_table} AS feature INNER JOIN sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# GROUP BY value +# ORDER BY avg DESC +# LIMIT 7;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# # TODO: solve full scan issue +# print(cur.mogrify(pg_query, params)) +# print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# +# return rows +# +# +# def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], +# **args): +# pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, +# chart=True, data=args) +# +# pg_sub_query_chart.append("user_id IS NOT NULL") +# period = "DAY" +# extra_values = {} +# for f in filters: +# if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]: +# period = f["value"] +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(chart) AS chart +# FROM (SELECT generated_timestamp AS timestamp, +# COALESCE(COUNT(users), 0) AS count +# FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp +# LEFT JOIN LATERAL ( SELECT DISTINCT user_id +# FROM public.sessions +# WHERE {" AND ".join(pg_sub_query_chart)} +# ) AS users ON (TRUE) +# GROUP BY generated_timestamp +# ORDER BY generated_timestamp) AS chart;""" +# params = {"step_size": TimeUTC.MS_DAY if period == "DAY" else TimeUTC.MS_WEEK, +# "project_id": project_id, +# "startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week( +# startTimestamp), +# "endTimestamp": endTimestamp, **__get_constraint_values(args), +# **extra_values} +# # print(cur.mogrify(pg_query, params)) +# # print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# row_users = cur.fetchone() +# +# return row_users +# +# +# def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], **args): +# pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) +# pg_sub_query.append("user_id IS NOT NULL") +# +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(day_users_partition) AS partition +# FROM (SELECT number_of_days, COUNT(user_id) AS count +# FROM (SELECT user_id, COUNT(DISTINCT DATE_TRUNC('day', to_timestamp(start_ts / 1000))) AS number_of_days +# FROM sessions +# WHERE {" AND ".join(pg_sub_query)} +# GROUP BY 1) AS users_connexions +# GROUP BY number_of_days +# ORDER BY number_of_days) AS day_users_partition;""" +# params = {"project_id": project_id, +# "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} +# # print(cur.mogrify(pg_query, params)) +# # print("---------------------") +# cur.execute(cur.mogrify(pg_query, params)) +# row_users = cur.fetchone() +# +# return helper.dict_to_camel_case(row_users) +# +# +# def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), +# filters=[], **args): +# pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", +# time_constraint=True) +# pg_sub_query.append("user_id IS NOT NULL") +# pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") +# pg_sub_query.append("feature.timestamp < %(endTimestamp)s") +# event_type = "PAGES" +# event_value = "/" +# extra_values = {} +# default = True +# for f in filters: +# if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): +# event_type = f["value"] +# elif f["type"] == "EVENT_VALUE": +# event_value = f["value"] +# default = False +# elif f["type"] in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: +# pg_sub_query.append(f"sessions.user_id = %(user_id)s") +# extra_values["user_id"] = f["value"] +# event_table = JOURNEY_TYPES[event_type]["table"] +# event_column = JOURNEY_TYPES[event_type]["column"] +# pg_sub_query.append(f"feature.{event_column} = %(value)s") +# +# with pg_client.PostgresClient() as cur: +# if default: +# # get most used value +# pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count +# FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query[:-1])} +# AND length({event_column}) > 2 +# GROUP BY value +# ORDER BY count DESC +# LIMIT 1;""" +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# cur.execute(cur.mogrify(pg_query, params)) +# row = cur.fetchone() +# if row is not None: +# event_value = row["value"] +# extra_values["value"] = event_value +# if len(event_value) > 2: +# pg_sub_query.append(f"length({event_column})>2") +# pg_query = f"""SELECT user_id, last_time, interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen +# FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count +# FROM {event_table} AS feature INNER JOIN sessions USING (session_id) +# WHERE {" AND ".join(pg_sub_query)} +# GROUP BY user_id) AS user_last_usage +# INNER JOIN sessions USING (user_id) +# WHERE EXTRACT(EPOCH FROM now()) * 1000 - last_time > 7 * 24 * 60 * 60 * 1000 +# GROUP BY user_id, last_time,interactions_count;""" +# +# params = {"project_id": project_id, "startTimestamp": startTimestamp, +# "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} +# # print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# return { +# "startTimestamp": startTimestamp, +# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], +# "list": helper.list_to_camel_case(rows) +# } +# +# +# def search(text, feature_type, project_id, platform=None): +# if not feature_type: +# resource_type = "ALL" +# data = search(text=text, feature_type=resource_type, project_id=project_id, platform=platform) +# return data +# +# pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, duration=True, +# data={} if platform is None else {"platform": platform}) +# +# params = {"startTimestamp": TimeUTC.now() - 2 * TimeUTC.MS_MONTH, +# "endTimestamp": TimeUTC.now(), +# "project_id": project_id, +# "value": helper.string_to_sql_like(text.lower()), +# "platform_0": platform} +# if feature_type == "ALL": +# with pg_client.PostgresClient() as cur: +# sub_queries = [] +# for e in JOURNEY_TYPES: +# sub_queries.append(f"""(SELECT DISTINCT {JOURNEY_TYPES[e]["column"]} AS value, '{e}' AS "type" +# FROM {JOURNEY_TYPES[e]["table"]} INNER JOIN public.sessions USING(session_id) +# WHERE {" AND ".join(pg_sub_query)} AND {JOURNEY_TYPES[e]["column"]} ILIKE %(value)s +# LIMIT 10)""") +# pg_query = "UNION ALL".join(sub_queries) +# # print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# elif JOURNEY_TYPES.get(feature_type) is not None: +# with pg_client.PostgresClient() as cur: +# pg_query = f"""SELECT DISTINCT {JOURNEY_TYPES[feature_type]["column"]} AS value, '{feature_type}' AS "type" +# FROM {JOURNEY_TYPES[feature_type]["table"]} INNER JOIN public.sessions USING(session_id) +# WHERE {" AND ".join(pg_sub_query)} AND {JOURNEY_TYPES[feature_type]["column"]} ILIKE %(value)s +# LIMIT 10;""" +# # print(cur.mogrify(pg_query, params)) +# cur.execute(cur.mogrify(pg_query, params)) +# rows = cur.fetchall() +# else: +# return [] +# return [helper.dict_to_camel_case(row) for row in rows] diff --git a/api/chalicelib/core/sessions.py b/api/chalicelib/core/sessions.py index 6e939cdfb..a9457ac3a 100644 --- a/api/chalicelib/core/sessions.py +++ b/api/chalicelib/core/sessions.py @@ -350,6 +350,28 @@ def search_query_parts(data: schemas.SessionsSearchPayloadSchema, error_status, sh.multi_conditions(f'ms.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type == schemas.FilterType.user_city: + if is_any: + extra_constraints.append('s.user_city IS NOT NULL') + ss_constraints.append('ms.user_city IS NOT NULL') + else: + extra_constraints.append( + sh.multi_conditions(f's.user_city {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + sh.multi_conditions(f'ms.user_city {op} %({f_k})s', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type == schemas.FilterType.user_state: + if is_any: + extra_constraints.append('s.user_state IS NOT NULL') + ss_constraints.append('ms.user_state IS NOT NULL') + else: + extra_constraints.append( + sh.multi_conditions(f's.user_state {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + sh.multi_conditions(f'ms.user_state {op} %({f_k})s', f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_source]: if is_any: extra_constraints.append('s.utm_source IS NOT NULL') diff --git a/api/requirements-alerts.txt b/api/requirements-alerts.txt index 09262e6a3..1487e29ad 100644 --- a/api/requirements-alerts.txt +++ b/api/requirements-alerts.txt @@ -1,15 +1,15 @@ requests==2.29.0 urllib3==1.26.15 -boto3==1.26.122 -pyjwt==2.6.0 +boto3==1.26.145 +pyjwt==2.7.0 psycopg2-binary==2.9.6 elasticsearch==8.7.0 jira==3.5.0 -fastapi==0.95.1 +fastapi==0.95.2 uvicorn[standard]==0.22.0 python-decouple==3.8 -pydantic[email]==1.10.7 +pydantic[email]==1.10.8 apscheduler==3.10.1 \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt index 75d558309..96c51c7c0 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,17 +1,17 @@ requests==2.29.0 urllib3==1.26.15 -boto3==1.26.122 -pyjwt==2.6.0 +boto3==1.26.145 +pyjwt==2.7.0 psycopg2-binary==2.9.6 elasticsearch==8.7.0 jira==3.5.0 -fastapi==0.95.1 +fastapi==0.95.2 uvicorn[standard]==0.22.0 python-decouple==3.8 -pydantic[email]==1.10.7 +pydantic[email]==1.10.8 apscheduler==3.10.1 redis==4.5.4 \ No newline at end of file diff --git a/api/routers/subs/insights.py b/api/routers/subs/insights.py index c27e4d704..3fb71c09c 100644 --- a/api/routers/subs/insights.py +++ b/api/routers/subs/insights.py @@ -1,82 +1,88 @@ from fastapi import Body import schemas -from chalicelib.core import insights +from chalicelib.core import product_analytics from routers.base import get_routers public_app, app, app_apikey = get_routers() -@app.post('/{projectId}/insights/journey', tags=["insights"]) @app.get('/{projectId}/insights/journey', tags=["insights"]) -async def get_insights_journey(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.journey(project_id=projectId, **data.dict())} +async def get_insights_journey(projectId: int): + return {"data": product_analytics.path_analysis(project_id=projectId, data=schemas.PathAnalysisSchema())} + +# +# @app.post('/{projectId}/insights/journey', tags=["insights"]) +# async def get_insights_journey(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.journey(project_id=projectId, data=data)} +# +# +# @app.post('/{projectId}/insights/users_acquisition', tags=["insights"]) +# @app.get('/{projectId}/insights/users_acquisition', tags=["insights"]) +# async def get_users_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_acquisition(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_retention', tags=["insights"]) +# @app.get('/{projectId}/insights/users_retention', tags=["insights"]) +# async def get_users_retention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_retention(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_retention', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_retention', tags=["insights"]) +# async def get_feature_rentention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_retention(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_acquisition', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_acquisition', tags=["insights"]) +# async def get_feature_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_acquisition(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) +# async def get_feature_popularity_frequency(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_popularity_frequency(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_intensity', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_intensity', tags=["insights"]) +# async def get_feature_intensity(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_intensity(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_adoption', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_adoption', tags=["insights"]) +# async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_adoption(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) +# async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_adoption_top_users(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_active', tags=["insights"]) +# @app.get('/{projectId}/insights/users_active', tags=["insights"]) +# async def get_users_active(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_active(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_power', tags=["insights"]) +# @app.get('/{projectId}/insights/users_power', tags=["insights"]) +# async def get_users_power(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_power(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_slipping', tags=["insights"]) +# @app.get('/{projectId}/insights/users_slipping', tags=["insights"]) +# async def get_users_slipping(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_slipping(project_id=projectId, **data.dict())} -@app.post('/{projectId}/insights/users_acquisition', tags=["insights"]) -@app.get('/{projectId}/insights/users_acquisition', tags=["insights"]) -async def get_users_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_acquisition(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_retention', tags=["insights"]) -@app.get('/{projectId}/insights/users_retention', tags=["insights"]) -async def get_users_retention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_retention(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_retention', tags=["insights"]) -@app.get('/{projectId}/insights/feature_retention', tags=["insights"]) -async def get_feature_rentention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_retention(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_acquisition', tags=["insights"]) -@app.get('/{projectId}/insights/feature_acquisition', tags=["insights"]) -async def get_feature_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_acquisition(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) -@app.get('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) -async def get_feature_popularity_frequency(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_popularity_frequency(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_intensity', tags=["insights"]) -@app.get('/{projectId}/insights/feature_intensity', tags=["insights"]) -async def get_feature_intensity(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_intensity(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_adoption', tags=["insights"]) -@app.get('/{projectId}/insights/feature_adoption', tags=["insights"]) -async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_adoption(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) -@app.get('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) -async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_adoption_top_users(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_active', tags=["insights"]) -@app.get('/{projectId}/insights/users_active', tags=["insights"]) -async def get_users_active(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_active(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_power', tags=["insights"]) -@app.get('/{projectId}/insights/users_power', tags=["insights"]) -async def get_users_power(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_power(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_slipping', tags=["insights"]) -@app.get('/{projectId}/insights/users_slipping', tags=["insights"]) -async def get_users_slipping(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_slipping(project_id=projectId, **data.dict())} # # diff --git a/api/routers/subs/metrics.py b/api/routers/subs/metrics.py index fef41fe34..b878184c8 100644 --- a/api/routers/subs/metrics.py +++ b/api/routers/subs/metrics.py @@ -62,7 +62,7 @@ async def add_card_to_dashboard(projectId: int, dashboardId: int, @app.post('/{projectId}/dashboards/{dashboardId}/metrics', tags=["dashboard"]) @app.put('/{projectId}/dashboards/{dashboardId}/metrics', tags=["dashboard"]) async def create_metric_and_add_to_dashboard(projectId: int, dashboardId: int, - data: schemas.CreateCardSchema = Body(...), + data: schemas.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return {"data": dashboards.create_metric_add_widget(project_id=projectId, user_id=context.user_id, dashboard_id=dashboardId, data=data)} @@ -100,7 +100,7 @@ async def remove_widget_from_dashboard(projectId: int, dashboardId: int, widgetI @app.put('/{projectId}/metrics/try', tags=["dashboard"]) @app.post('/{projectId}/custom_metrics/try', tags=["customMetrics"]) @app.put('/{projectId}/custom_metrics/try', tags=["customMetrics"]) -async def try_card(projectId: int, data: schemas.CreateCardSchema = Body(...), +async def try_card(projectId: int, data: schemas.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return {"data": custom_metrics.merged_live(project_id=projectId, data=data, user_id=context.user_id)} @@ -139,7 +139,7 @@ async def get_cards(projectId: int, context: schemas.CurrentContext = Depends(OR @app.put('/{projectId}/metrics', tags=["dashboard"]) @app.post('/{projectId}/custom_metrics', tags=["customMetrics"]) @app.put('/{projectId}/custom_metrics', tags=["customMetrics"]) -async def create_card(projectId: int, data: schemas.CreateCardSchema = Body(...), +async def create_card(projectId: int, data: schemas.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return custom_metrics.create(project_id=projectId, user_id=context.user_id, data=data) diff --git a/api/schemas.py b/api/schemas.py index e7aacfdb5..ddd1686a1 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -342,7 +342,6 @@ class MathOperator(str, Enum): class _AlertQuerySchema(BaseModel): left: Union[AlertColumn, int] = Field(...) right: float = Field(...) - # operator: Literal["<", ">", "<=", ">="] = Field(...) operator: MathOperator = Field(...) @@ -846,12 +845,41 @@ class SearchErrorsSchema(FlatSessionsSearchPayloadSchema): query: Optional[str] = Field(default=None) -class MetricPayloadSchema(_TimedSchema): +class ProductAnalyticsFilterType(str, Enum): + event_type = 'eventType' + start_point = 'startPoint' + user_id = FilterType.user_id.value + + +class ProductAnalyticsEventType(str, Enum): + click = EventType.click.value + input = EventType.input.value + location = EventType.location.value + custom_event = EventType.custom.value + + +class ProductAnalyticsFilter(BaseModel): + type: ProductAnalyticsFilterType = Field(...) + operator: Union[SearchEventOperator, ClickEventExtraOperator] = Field(...) + value: List[Union[ProductAnalyticsEventType | str]] = Field(...) + + @root_validator + def validator(cls, values): + if values.get("type") == ProductAnalyticsFilterType.event_type: + assert values.get("value") is not None and len(values["value"]) > 0, \ + f"value must be provided for type:{ProductAnalyticsFilterType.event_type}" + assert isinstance(values["value"][0], ProductAnalyticsEventType), \ + f"value must be of type {ProductAnalyticsEventType} for type:{ProductAnalyticsFilterType.event_type}" + + return values + + +class PathAnalysisSchema(_TimedSchema): startTimestamp: int = Field(TimeUTC.now(delta_days=-1)) endTimestamp: int = Field(TimeUTC.now()) density: int = Field(7) - filters: List[dict] = Field([]) - type: Optional[str] = Field(None) + filters: List[ProductAnalyticsFilter] = Field(default=[]) + type: Optional[str] = Field(default=None) class Config: alias_generator = attribute_to_camel_case @@ -879,11 +907,11 @@ class CardSeriesFilterSchema(SearchErrorsSchema): group_by_user: Optional[bool] = Field(default=False, const=True) -class CardCreateSeriesSchema(BaseModel): +class CardSeriesSchema(BaseModel): series_id: Optional[int] = Field(None) name: Optional[str] = Field(None) index: Optional[int] = Field(None) - filter: Optional[CardSeriesFilterSchema] = Field([]) + filter: Optional[Union[CardSeriesFilterSchema | PathAnalysisSchema]] = Field(default=None) class Config: alias_generator = attribute_to_camel_case @@ -1002,7 +1030,7 @@ class MetricOfClickMap(str, Enum): class CardSessionsSchema(FlatSessionsSearch, _PaginatedSchema, _TimedSchema): startTimestamp: int = Field(TimeUTC.now(-7)) endTimestamp: int = Field(TimeUTC.now()) - series: List[CardCreateSeriesSchema] = Field(default=[]) + series: List[CardSeriesSchema] = Field(default=[]) class Config: alias_generator = attribute_to_camel_case @@ -1018,20 +1046,26 @@ class CardConfigSchema(BaseModel): position: Optional[int] = Field(default=0) -class CreateCardSchema(CardChartSchema): +class __CardSchema(BaseModel): name: Optional[str] = Field(...) is_public: bool = Field(default=True) + default_config: CardConfigSchema = Field(..., alias="config") + thumbnail: Optional[str] = Field(default=None) + metric_format: Optional[MetricFormatType] = Field(default=None) + + class Config: + alias_generator = attribute_to_camel_case + + +class CardSchema(__CardSchema, CardChartSchema): view_type: Union[MetricTimeseriesViewType, \ - MetricTableViewType, MetricOtherViewType] = Field(...) + MetricTableViewType, MetricOtherViewType] = Field(...) metric_type: MetricType = Field(...) metric_of: Union[MetricOfTimeseries, MetricOfTable, MetricOfErrors, \ MetricOfPerformance, MetricOfResources, MetricOfWebVitals, \ - MetricOfClickMap] = Field(MetricOfTable.user_id) + MetricOfClickMap] = Field(default=MetricOfTable.user_id) metric_value: List[IssueType] = Field(default=[]) - metric_format: Optional[MetricFormatType] = Field(default=None) - default_config: CardConfigSchema = Field(..., alias="config") is_template: bool = Field(default=False) - thumbnail: Optional[str] = Field(default=None) # This is used to handle wrong values sent by the UI @root_validator(pre=True) @@ -1043,14 +1077,15 @@ class CreateCardSchema(CardChartSchema): and values.get("metricOf") != MetricOfTable.issues: values["metricValue"] = [] - if values.get("metricType") == MetricType.funnel and \ + if values.get("metricType") in [MetricType.funnel, MetricType.pathAnalysis] and \ values.get("series") is not None and len(values["series"]) > 0: values["series"] = [values["series"][0]] elif values.get("metricType") not in [MetricType.table, MetricType.timeseries, MetricType.insights, MetricType.click_map, - MetricType.funnel] \ + MetricType.funnel, + MetricType.pathAnalysis] \ and values.get("series") is not None and len(values["series"]) > 0: values["series"] = [] @@ -1086,6 +1121,8 @@ class CreateCardSchema(CardChartSchema): # ignore this for now, let the UI send whatever he wants for metric_of # assert isinstance(values.get("metric_of"), MetricOfTimeseries), \ # f"metricOf must be of type {MetricOfTimeseries} for metricType:{MetricType.funnel}" + elif values.get("metric_type") == MetricType.pathAnalysis: + pass else: if values.get("metric_type") == MetricType.errors: assert isinstance(values.get("metric_of"), MetricOfErrors), \ @@ -1116,14 +1153,14 @@ class CreateCardSchema(CardChartSchema): alias_generator = attribute_to_camel_case -class CardUpdateSeriesSchema(CardCreateSeriesSchema): +class CardUpdateSeriesSchema(CardSeriesSchema): series_id: Optional[int] = Field(None) class Config: alias_generator = attribute_to_camel_case -class UpdateCardSchema(CreateCardSchema): +class UpdateCardSchema(CardSchema): series: List[CardUpdateSeriesSchema] = Field(...) diff --git a/ee/api/chalicelib/core/custom_metrics.py b/ee/api/chalicelib/core/custom_metrics.py index 460d1c689..134bd7ca9 100644 --- a/ee/api/chalicelib/core/custom_metrics.py +++ b/ee/api/chalicelib/core/custom_metrics.py @@ -24,7 +24,7 @@ else: PIE_CHART_GROUP = 5 -def __try_live(project_id, data: schemas_ee.CreateCardSchema): +def __try_live(project_id, data: schemas_ee.CardSchema): results = [] for i, s in enumerate(data.series): s.filter.startDate = data.startTimestamp @@ -57,11 +57,11 @@ def __try_live(project_id, data: schemas_ee.CreateCardSchema): return results -def __is_funnel_chart(data: schemas_ee.CreateCardSchema): +def __is_funnel_chart(data: schemas_ee.CardSchema): return data.metric_type == schemas.MetricType.funnel -def __get_funnel_chart(project_id, data: schemas_ee.CreateCardSchema): +def __get_funnel_chart(project_id, data: schemas_ee.CardSchema): if len(data.series) == 0: return { "stages": [], @@ -72,12 +72,12 @@ def __get_funnel_chart(project_id, data: schemas_ee.CreateCardSchema): return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, data=data.series[0].filter) -def __is_errors_list(data: schemas_ee.CreateCardSchema): +def __is_errors_list(data: schemas_ee.CardSchema): return data.metric_type == schemas.MetricType.table \ and data.metric_of == schemas.MetricOfTable.errors -def __get_errors_list(project_id, user_id, data: schemas_ee.CreateCardSchema): +def __get_errors_list(project_id, user_id, data: schemas_ee.CardSchema): if len(data.series) == 0: return { "total": 0, @@ -90,12 +90,12 @@ def __get_errors_list(project_id, user_id, data: schemas_ee.CreateCardSchema): return errors.search(data.series[0].filter, project_id=project_id, user_id=user_id) -def __is_sessions_list(data: schemas_ee.CreateCardSchema): +def __is_sessions_list(data: schemas_ee.CardSchema): return data.metric_type == schemas.MetricType.table \ and data.metric_of == schemas.MetricOfTable.sessions -def __get_sessions_list(project_id, user_id, data: schemas_ee.CreateCardSchema): +def __get_sessions_list(project_id, user_id, data: schemas_ee.CardSchema): if len(data.series) == 0: print("empty series") return { @@ -109,15 +109,15 @@ def __get_sessions_list(project_id, user_id, data: schemas_ee.CreateCardSchema): return sessions.search_sessions(data=data.series[0].filter, project_id=project_id, user_id=user_id) -def __is_predefined(data: schemas_ee.CreateCardSchema): +def __is_predefined(data: schemas_ee.CardSchema): return data.is_template -def __is_click_map(data: schemas_ee.CreateCardSchema): +def __is_click_map(data: schemas_ee.CardSchema): return data.metric_type == schemas.MetricType.click_map -def __get_click_map_chart(project_id, user_id, data: schemas_ee.CreateCardSchema, include_mobs: bool = True): +def __get_click_map_chart(project_id, user_id, data: schemas_ee.CardSchema, include_mobs: bool = True): if len(data.series) == 0: return None data.series[0].filter.startDate = data.startTimestamp @@ -128,12 +128,12 @@ def __get_click_map_chart(project_id, user_id, data: schemas_ee.CreateCardSchema # EE only -def __is_insights(data: schemas_ee.CreateCardSchema): +def __is_insights(data: schemas_ee.CardSchema): return data.metric_type == schemas.MetricType.insights # EE only -def __get_insights_chart(project_id, user_id, data: schemas_ee.CreateCardSchema): +def __get_insights_chart(project_id, user_id, data: schemas_ee.CardSchema): return sessions_insights.fetch_selected(project_id=project_id, data=schemas_ee.GetInsightsSchema(startTimestamp=data.startTimestamp, endTimestamp=data.endTimestamp, @@ -141,7 +141,7 @@ def __get_insights_chart(project_id, user_id, data: schemas_ee.CreateCardSchema) series=data.series)) -def merged_live(project_id, data: schemas_ee.CreateCardSchema, user_id=None): +def merged_live(project_id, data: schemas_ee.CardSchema, user_id=None): if data.is_template: return get_predefined_metric(key=data.metric_of, project_id=project_id, data=data.dict()) elif __is_funnel_chart(data): @@ -168,11 +168,11 @@ def merged_live(project_id, data: schemas_ee.CreateCardSchema, user_id=None): return results -def __merge_metric_with_data(metric: schemas_ee.CreateCardSchema, - data: schemas.CardChartSchema) -> schemas_ee.CreateCardSchema: +def __merge_metric_with_data(metric: schemas_ee.CardSchema, + data: schemas.CardChartSchema) -> schemas_ee.CardSchema: if data.series is not None and len(data.series) > 0: metric.series = data.series - metric: schemas_ee.CreateCardSchema = schemas_ee.CreateCardSchema( + metric: schemas_ee.CardSchema = schemas_ee.CardSchema( **{**data.dict(by_alias=True), **metric.dict(by_alias=True)}) if len(data.filters) > 0 or len(data.events) > 0: for s in metric.series: @@ -187,10 +187,10 @@ def __merge_metric_with_data(metric: schemas_ee.CreateCardSchema, return metric -def make_chart(project_id, user_id, data: schemas.CardChartSchema, metric: schemas_ee.CreateCardSchema): +def make_chart(project_id, user_id, data: schemas.CardChartSchema, metric: schemas_ee.CardSchema): if metric is None: return None - metric: schemas_ee.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas_ee.CardSchema = __merge_metric_with_data(metric=metric, data=data) return merged_live(project_id=project_id, data=metric, user_id=user_id) @@ -200,8 +200,8 @@ def get_sessions(project_id, user_id, metric_id, data: schemas.CardSessionsSchem raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if raw_metric is None: return None - metric: schemas_ee.CreateCardSchema = schemas_ee.CreateCardSchema(**raw_metric) - metric: schemas_ee.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas_ee.CardSchema = schemas_ee.CardSchema(**raw_metric) + metric: schemas_ee.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None results = [] @@ -227,8 +227,8 @@ def get_funnel_issues(project_id, user_id, metric_id, data: schemas.CardSessions raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if raw_metric is None: return None - metric: schemas_ee.CreateCardSchema = schemas_ee.CreateCardSchema(**raw_metric) - metric: schemas_ee.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas_ee.CardSchema = schemas_ee.CardSchema(**raw_metric) + metric: schemas_ee.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None for s in metric.series: @@ -244,8 +244,8 @@ def get_errors_list(project_id, user_id, metric_id, data: schemas.CardSessionsSc raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if raw_metric is None: return None - metric: schemas_ee.CreateCardSchema = schemas_ee.CreateCardSchema(**raw_metric) - metric: schemas_ee.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas_ee.CardSchema = schemas_ee.CardSchema(**raw_metric) + metric: schemas_ee.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None for s in metric.series: @@ -276,7 +276,7 @@ def try_sessions(project_id, user_id, data: schemas.CardSessionsSchema): return results -def create(project_id, user_id, data: schemas_ee.CreateCardSchema, dashboard=False): +def create(project_id, user_id, data: schemas_ee.CardSchema, dashboard=False): with pg_client.PostgresClient() as cur: session_data = None if __is_click_map(data): @@ -595,8 +595,8 @@ def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) if metric is None: return None - metric: schemas_ee.CreateCardSchema = schemas.CreateCardSchema(**metric) - metric: schemas_ee.CreateCardSchema = __merge_metric_with_data(metric=metric, data=data) + metric: schemas_ee.CardSchema = schemas.CardSchema(**metric) + metric: schemas_ee.CardSchema = __merge_metric_with_data(metric=metric, data=data) if metric is None: return None for s in metric.series: @@ -631,7 +631,7 @@ def make_chart_from_card(project_id, user_id, metric_id, data: schemas.CardChart raw_metric: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, include_data=True) if raw_metric is None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="card not found") - metric: schemas_ee.CreateCardSchema = schemas_ee.CreateCardSchema(**raw_metric) + metric: schemas_ee.CardSchema = schemas_ee.CardSchema(**raw_metric) if metric.is_template: return get_predefined_metric(key=metric.metric_of, project_id=project_id, data=data.dict()) elif __is_click_map(metric): diff --git a/ee/api/chalicelib/core/insights.py b/ee/api/chalicelib/core/product_analytics.py similarity index 99% rename from ee/api/chalicelib/core/insights.py rename to ee/api/chalicelib/core/product_analytics.py index 3dba723e4..ff98f026c 100644 --- a/ee/api/chalicelib/core/insights.py +++ b/ee/api/chalicelib/core/product_analytics.py @@ -24,18 +24,19 @@ JOURNEY_DEPTH = 5 JOURNEY_TYPES = { "PAGES": {"table": "pages", "column": "url_path"}, "CLICK": {"table": "clicks", "column": "label"}, - # "VIEW": {"table": "events_ios.views", "column": "name"}, TODO: enable this for SAAS only + # TODO: support input event "EVENT": {"table": "customs", "column": "name"} } - -def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args): +def path_analysis(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], + **args): event_start = None event_table = JOURNEY_TYPES["CLICK"]["table"] event_column = JOURNEY_TYPES["CLICK"]["column"] extra_values = {} meta_condition = [] + # TODO: support multi-value for f in filters: if f["type"] == "START_POINT": event_start = f["value"] @@ -190,7 +191,6 @@ def __complete_acquisition(rows, start_date, end_date=None): return rows - def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) @@ -233,7 +233,6 @@ def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endT } - def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) @@ -286,7 +285,6 @@ def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en } - def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) @@ -386,7 +384,6 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en } - def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) @@ -497,7 +494,6 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), } - def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) @@ -572,7 +568,6 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da return popularity - def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): event_type = "CLICK" @@ -658,7 +653,6 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} - def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): event_type = "CLICK" @@ -728,7 +722,6 @@ def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} - def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): event_type = "CLICK" @@ -796,7 +789,6 @@ def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_da "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} - def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): event_table = JOURNEY_TYPES["CLICK"]["table"] @@ -838,7 +830,6 @@ PERIOD_TO_FUNCTION = { } - def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): meta_condition = __get_meta_constraint(args) @@ -885,7 +876,6 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime return {"avg": avg, "chart": rows} - def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): ch_sub_query = __get_basic_constraints(table_name="sessions_metadata", data=args) meta_condition = __get_meta_constraint(args) @@ -925,7 +915,6 @@ def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimes return {"avg": avg, "partition": helper.list_to_camel_case(rows)} - def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): ch_sub_query = __get_basic_constraints(table_name="feature", data=args) @@ -1008,7 +997,6 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi } - def search(text, feature_type, project_id, platform=None): if not feature_type: resource_type = "ALL" diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 4752720b9..a7dc97c05 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -482,6 +482,26 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu ss_constraints.append( _multiple_conditions(f'ms.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type in schemas.FilterType.user_city: + if is_any: + extra_constraints.append('isNotNull(s.user_city)') + ss_constraints.append('isNotNull(ms.user_city)') + else: + extra_constraints.append( + _multiple_conditions(f's.user_city {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_city {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in schemas.FilterType.user_state: + if is_any: + extra_constraints.append('isNotNull(s.user_state)') + ss_constraints.append('isNotNull(ms.user_state)') + else: + extra_constraints.append( + _multiple_conditions(f's.user_state {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_state {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_source]: if is_any: extra_constraints.append('isNotNull(s.utm_source)') diff --git a/ee/api/requirements-alerts.txt b/ee/api/requirements-alerts.txt index cf8579c6b..f79cd002e 100644 --- a/ee/api/requirements-alerts.txt +++ b/ee/api/requirements-alerts.txt @@ -1,17 +1,17 @@ requests==2.29.0 urllib3==1.26.15 -boto3==1.26.122 -pyjwt==2.6.0 +boto3==1.26.145 +pyjwt==2.7.0 psycopg2-binary==2.9.6 elasticsearch==8.7.0 jira==3.5.0 -fastapi==0.95.1 +fastapi==0.95.2 uvicorn[standard]==0.22.0 python-decouple==3.8 -pydantic[email]==1.10.7 +pydantic[email]==1.10.8 apscheduler==3.10.1 clickhouse-driver==0.2.5 diff --git a/ee/api/requirements-crons.txt b/ee/api/requirements-crons.txt index a747fea40..99d0d1379 100644 --- a/ee/api/requirements-crons.txt +++ b/ee/api/requirements-crons.txt @@ -1,15 +1,15 @@ requests==2.29.0 urllib3==1.26.15 -boto3==1.26.122 -pyjwt==2.6.0 +boto3==1.26.145 +pyjwt==2.7.0 psycopg2-binary==2.9.6 elasticsearch==8.7.0 jira==3.5.0 -fastapi==0.95.1 +fastapi==0.95.2 python-decouple==3.8 -pydantic[email]==1.10.7 +pydantic[email]==1.10.8 apscheduler==3.10.1 clickhouse-driver==0.2.5 diff --git a/ee/api/requirements.txt b/ee/api/requirements.txt index f6a45e5b3..14c909a26 100644 --- a/ee/api/requirements.txt +++ b/ee/api/requirements.txt @@ -1,22 +1,25 @@ requests==2.29.0 urllib3==1.26.15 -boto3==1.26.122 -pyjwt==2.6.0 +boto3==1.26.145 +pyjwt==2.7.0 psycopg2-binary==2.9.6 elasticsearch==8.7.0 jira==3.5.0 -fastapi==0.95.1 +fastapi==0.95.2 uvicorn[standard]==0.22.0 python-decouple==3.8 -pydantic[email]==1.10.7 +pydantic[email]==1.10.8 apscheduler==3.10.1 clickhouse-driver==0.2.5 clickhouse-driver[lz4]==0.2.5 -python3-saml==1.15.0 +# TODO: enable after xmlsec fix +#--no-binary is used to avoid libxml2 library version incompatibilities between xmlsec and lxml +python3-saml==1.15.0 --no-binary=lxml +# enable when this issue is fixed https://github.com/xmlsec/python-xmlsec/issues/257 python-multipart==0.0.6 redis==4.5.4 diff --git a/ee/api/routers/saml.py b/ee/api/routers/saml.py index cf52aa720..eb162db71 100644 --- a/ee/api/routers/saml.py +++ b/ee/api/routers/saml.py @@ -1,249 +1,250 @@ +# TODO: enable after xmlsec fix from fastapi import HTTPException, Request, Response, status -from chalicelib.utils import SAML2_helper -from chalicelib.utils.SAML2_helper import prepare_request, init_saml_auth +# from chalicelib.utils import SAML2_helper +# from chalicelib.utils.SAML2_helper import prepare_request, init_saml_auth from routers.base import get_routers public_app, app, app_apikey = get_routers() -from decouple import config - -from onelogin.saml2.auth import OneLogin_Saml2_Logout_Request - -from chalicelib.core import users, tenants, roles -from starlette.responses import RedirectResponse - - -@public_app.get("/sso/saml2", tags=["saml2"]) -@public_app.get("/sso/saml2/", tags=["saml2"]) -async def start_sso(request: Request): - request.path = '' - req = await prepare_request(request=request) - auth = init_saml_auth(req) - sso_built_url = auth.login() - return RedirectResponse(url=sso_built_url) - - -@public_app.post('/sso/saml2/acs', tags=["saml2"]) -@public_app.post('/sso/saml2/acs/', tags=["saml2"]) -async def process_sso_assertion(request: Request): - req = await prepare_request(request=request) - session = req["cookie"]["session"] - auth = init_saml_auth(req) - - request_id = None - if 'AuthNRequestID' in session: - request_id = session['AuthNRequestID'] - - auth.process_response(request_id=request_id) - errors = auth.get_errors() - user_data = {} - if len(errors) == 0: - if 'AuthNRequestID' in session: - del session['AuthNRequestID'] - user_data = auth.get_attributes() - elif auth.get_settings().is_debug_active(): - error_reason = auth.get_last_error_reason() - print("SAML2 error:") - print(error_reason) - return {"errors": [error_reason]} - - email = auth.get_nameid() - print("received nameId:") - print(email) - existing = users.get_by_email_only(auth.get_nameid()) - - internal_id = next(iter(user_data.get("internalId", [])), None) - tenant_key = user_data.get("tenantKey", []) - if len(tenant_key) == 0: - print("tenantKey not present in assertion, please check your SP-assertion-configuration") - return {"errors": ["tenantKey not present in assertion, please check your SP-assertion-configuration"]} - else: - t = tenants.get_by_tenant_key(tenant_key[0]) - if t is None: - print("invalid tenantKey, please copy the correct value from Preferences > Account") - return {"errors": ["invalid tenantKey, please copy the correct value from Preferences > Account"]} - print(user_data) - role_name = user_data.get("role", []) - if len(role_name) == 0: - print("No role specified, setting role to member") - role_name = ["member"] - role_name = role_name[0] - role = roles.get_role_by_name(tenant_id=t['tenantId'], name=role_name) - if role is None: - return {"errors": [f"role {role_name} not found, please create it in openreplay first"]} - - admin_privileges = user_data.get("adminPrivileges", []) - admin_privileges = not (len(admin_privileges) == 0 - or admin_privileges[0] is None - or admin_privileges[0].lower() == "false") - - if existing is None: - deleted = users.get_deleted_user_by_email(auth.get_nameid()) - if deleted is not None: - print("== restore deleted user ==") - users.restore_sso_user(user_id=deleted["userId"], tenant_id=t['tenantId'], email=email, - admin=admin_privileges, origin=SAML2_helper.get_saml2_provider(), - name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), - internal_id=internal_id, role_id=role["roleId"]) - else: - print("== new user ==") - users.create_sso_user(tenant_id=t['tenantId'], email=email, admin=admin_privileges, - origin=SAML2_helper.get_saml2_provider(), - name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), - internal_id=internal_id, role_id=role["roleId"]) - else: - if t['tenantId'] != existing["tenantId"]: - print("user exists for a different tenant") - return {"errors": ["user exists for a different tenant"]} - if existing.get("origin") is None: - print(f"== migrating user to {SAML2_helper.get_saml2_provider()} ==") - users.update(tenant_id=t['tenantId'], user_id=existing["id"], - changes={"origin": SAML2_helper.get_saml2_provider(), "internal_id": internal_id}) - expiration = auth.get_session_expiration() - expiration = expiration if expiration is not None and expiration > 10 * 60 \ - else int(config("sso_exp_delta_seconds", cast=int, default=24 * 60 * 60)) - jwt = users.authenticate_sso(email=email, internal_id=internal_id, exp=expiration) - if jwt is None: - return {"errors": ["null JWT"]} - return Response( - status_code=status.HTTP_302_FOUND, - headers={'Location': SAML2_helper.get_landing_URL(jwt)}) - - -@public_app.post('/sso/saml2/acs/{tenantKey}', tags=["saml2"]) -@public_app.post('/sso/saml2/acs/{tenantKey}/', tags=["saml2"]) -async def process_sso_assertion_tk(tenantKey: str, request: Request): - req = await prepare_request(request=request) - session = req["cookie"]["session"] - auth = init_saml_auth(req) - - request_id = None - if 'AuthNRequestID' in session: - request_id = session['AuthNRequestID'] - - auth.process_response(request_id=request_id) - errors = auth.get_errors() - user_data = {} - if len(errors) == 0: - if 'AuthNRequestID' in session: - del session['AuthNRequestID'] - user_data = auth.get_attributes() - elif auth.get_settings().is_debug_active(): - error_reason = auth.get_last_error_reason() - print("SAML2 error:") - print(error_reason) - return {"errors": [error_reason]} - - email = auth.get_nameid() - print("received nameId:") - print(email) - existing = users.get_by_email_only(auth.get_nameid()) - - internal_id = next(iter(user_data.get("internalId", [])), None) - - t = tenants.get_by_tenant_key(tenantKey) - if t is None: - print("invalid tenantKey, please copy the correct value from Preferences > Account") - return {"errors": ["invalid tenantKey, please copy the correct value from Preferences > Account"]} - print(user_data) - role_name = user_data.get("role", []) - if len(role_name) == 0: - print("No role specified, setting role to member") - role_name = ["member"] - role_name = role_name[0] - role = roles.get_role_by_name(tenant_id=t['tenantId'], name=role_name) - if role is None: - return {"errors": [f"role {role_name} not found, please create it in openreplay first"]} - - admin_privileges = user_data.get("adminPrivileges", []) - admin_privileges = not (len(admin_privileges) == 0 - or admin_privileges[0] is None - or admin_privileges[0].lower() == "false") - - if existing is None: - deleted = users.get_deleted_user_by_email(auth.get_nameid()) - if deleted is not None: - print("== restore deleted user ==") - users.restore_sso_user(user_id=deleted["userId"], tenant_id=t['tenantId'], email=email, - admin=admin_privileges, origin=SAML2_helper.get_saml2_provider(), - name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), - internal_id=internal_id, role_id=role["roleId"]) - else: - print("== new user ==") - users.create_sso_user(tenant_id=t['tenantId'], email=email, admin=admin_privileges, - origin=SAML2_helper.get_saml2_provider(), - name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), - internal_id=internal_id, role_id=role["roleId"]) - else: - if t['tenantId'] != existing["tenantId"]: - print("user exists for a different tenant") - return {"errors": ["user exists for a different tenant"]} - if existing.get("origin") is None: - print(f"== migrating user to {SAML2_helper.get_saml2_provider()} ==") - users.update(tenant_id=t['tenantId'], user_id=existing["id"], - changes={"origin": SAML2_helper.get_saml2_provider(), "internal_id": internal_id}) - expiration = auth.get_session_expiration() - expiration = expiration if expiration is not None and expiration > 10 * 60 \ - else int(config("sso_exp_delta_seconds", cast=int, default=24 * 60 * 60)) - jwt = users.authenticate_sso(email=email, internal_id=internal_id, exp=expiration) - if jwt is None: - return {"errors": ["null JWT"]} - return Response( - status_code=status.HTTP_302_FOUND, - headers={'Location': SAML2_helper.get_landing_URL(jwt)}) - - -@public_app.get('/sso/saml2/sls', tags=["saml2"]) -@public_app.get('/sso/saml2/sls/', tags=["saml2"]) -async def process_sls_assertion(request: Request): - req = await prepare_request(request=request) - session = req["cookie"]["session"] - auth = init_saml_auth(req) - request_id = None - if 'LogoutRequestID' in session: - request_id = session['LogoutRequestID'] - - def dscb(): - session.clear() - - url = auth.process_slo(request_id=request_id, delete_session_cb=dscb) - - errors = auth.get_errors() - if len(errors) == 0: - if 'SAMLRequest' in req['get_data']: - logout_request = OneLogin_Saml2_Logout_Request(auth.get_settings(), req['get_data']['SAMLRequest']) - user_email = logout_request.get_nameid(auth.get_last_request_xml()) - to_logout = users.get_by_email_only(user_email) - - if len(to_logout) > 0: - to_logout = to_logout[0]['id'] - users.change_jwt_iat(to_logout) - else: - print("Unknown user SLS-Request By IdP") - else: - print("Preprocessed SLS-Request by SP") - - if url is not None: - return RedirectResponse(url=url) - - return RedirectResponse(url=config("SITE_URL")) - - -@public_app.get('/sso/saml2/metadata', tags=["saml2"]) -@public_app.get('/sso/saml2/metadata/', tags=["saml2"]) -async def saml2_metadata(request: Request): - req = await prepare_request(request=request) - auth = init_saml_auth(req) - settings = auth.get_settings() - metadata = settings.get_sp_metadata() - errors = settings.validate_metadata(metadata) - - if len(errors) == 0: - return Response( - status_code=status.HTTP_200_OK, - content=metadata, - headers={'Content-Type': 'text/xml'}) - else: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=', '.join(errors)) +# from decouple import config +# +# from onelogin.saml2.auth import OneLogin_Saml2_Logout_Request +# +# from chalicelib.core import users, tenants, roles +# from starlette.responses import RedirectResponse +# +# +# @public_app.get("/sso/saml2", tags=["saml2"]) +# @public_app.get("/sso/saml2/", tags=["saml2"]) +# async def start_sso(request: Request): +# request.path = '' +# req = await prepare_request(request=request) +# auth = init_saml_auth(req) +# sso_built_url = auth.login() +# return RedirectResponse(url=sso_built_url) +# +# +# @public_app.post('/sso/saml2/acs', tags=["saml2"]) +# @public_app.post('/sso/saml2/acs/', tags=["saml2"]) +# async def process_sso_assertion(request: Request): +# req = await prepare_request(request=request) +# session = req["cookie"]["session"] +# auth = init_saml_auth(req) +# +# request_id = None +# if 'AuthNRequestID' in session: +# request_id = session['AuthNRequestID'] +# +# auth.process_response(request_id=request_id) +# errors = auth.get_errors() +# user_data = {} +# if len(errors) == 0: +# if 'AuthNRequestID' in session: +# del session['AuthNRequestID'] +# user_data = auth.get_attributes() +# elif auth.get_settings().is_debug_active(): +# error_reason = auth.get_last_error_reason() +# print("SAML2 error:") +# print(error_reason) +# return {"errors": [error_reason]} +# +# email = auth.get_nameid() +# print("received nameId:") +# print(email) +# existing = users.get_by_email_only(auth.get_nameid()) +# +# internal_id = next(iter(user_data.get("internalId", [])), None) +# tenant_key = user_data.get("tenantKey", []) +# if len(tenant_key) == 0: +# print("tenantKey not present in assertion, please check your SP-assertion-configuration") +# return {"errors": ["tenantKey not present in assertion, please check your SP-assertion-configuration"]} +# else: +# t = tenants.get_by_tenant_key(tenant_key[0]) +# if t is None: +# print("invalid tenantKey, please copy the correct value from Preferences > Account") +# return {"errors": ["invalid tenantKey, please copy the correct value from Preferences > Account"]} +# print(user_data) +# role_name = user_data.get("role", []) +# if len(role_name) == 0: +# print("No role specified, setting role to member") +# role_name = ["member"] +# role_name = role_name[0] +# role = roles.get_role_by_name(tenant_id=t['tenantId'], name=role_name) +# if role is None: +# return {"errors": [f"role {role_name} not found, please create it in openreplay first"]} +# +# admin_privileges = user_data.get("adminPrivileges", []) +# admin_privileges = not (len(admin_privileges) == 0 +# or admin_privileges[0] is None +# or admin_privileges[0].lower() == "false") +# +# if existing is None: +# deleted = users.get_deleted_user_by_email(auth.get_nameid()) +# if deleted is not None: +# print("== restore deleted user ==") +# users.restore_sso_user(user_id=deleted["userId"], tenant_id=t['tenantId'], email=email, +# admin=admin_privileges, origin=SAML2_helper.get_saml2_provider(), +# name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), +# internal_id=internal_id, role_id=role["roleId"]) +# else: +# print("== new user ==") +# users.create_sso_user(tenant_id=t['tenantId'], email=email, admin=admin_privileges, +# origin=SAML2_helper.get_saml2_provider(), +# name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), +# internal_id=internal_id, role_id=role["roleId"]) +# else: +# if t['tenantId'] != existing["tenantId"]: +# print("user exists for a different tenant") +# return {"errors": ["user exists for a different tenant"]} +# if existing.get("origin") is None: +# print(f"== migrating user to {SAML2_helper.get_saml2_provider()} ==") +# users.update(tenant_id=t['tenantId'], user_id=existing["id"], +# changes={"origin": SAML2_helper.get_saml2_provider(), "internal_id": internal_id}) +# expiration = auth.get_session_expiration() +# expiration = expiration if expiration is not None and expiration > 10 * 60 \ +# else int(config("sso_exp_delta_seconds", cast=int, default=24 * 60 * 60)) +# jwt = users.authenticate_sso(email=email, internal_id=internal_id, exp=expiration) +# if jwt is None: +# return {"errors": ["null JWT"]} +# return Response( +# status_code=status.HTTP_302_FOUND, +# headers={'Location': SAML2_helper.get_landing_URL(jwt)}) +# +# +# @public_app.post('/sso/saml2/acs/{tenantKey}', tags=["saml2"]) +# @public_app.post('/sso/saml2/acs/{tenantKey}/', tags=["saml2"]) +# async def process_sso_assertion_tk(tenantKey: str, request: Request): +# req = await prepare_request(request=request) +# session = req["cookie"]["session"] +# auth = init_saml_auth(req) +# +# request_id = None +# if 'AuthNRequestID' in session: +# request_id = session['AuthNRequestID'] +# +# auth.process_response(request_id=request_id) +# errors = auth.get_errors() +# user_data = {} +# if len(errors) == 0: +# if 'AuthNRequestID' in session: +# del session['AuthNRequestID'] +# user_data = auth.get_attributes() +# elif auth.get_settings().is_debug_active(): +# error_reason = auth.get_last_error_reason() +# print("SAML2 error:") +# print(error_reason) +# return {"errors": [error_reason]} +# +# email = auth.get_nameid() +# print("received nameId:") +# print(email) +# existing = users.get_by_email_only(auth.get_nameid()) +# +# internal_id = next(iter(user_data.get("internalId", [])), None) +# +# t = tenants.get_by_tenant_key(tenantKey) +# if t is None: +# print("invalid tenantKey, please copy the correct value from Preferences > Account") +# return {"errors": ["invalid tenantKey, please copy the correct value from Preferences > Account"]} +# print(user_data) +# role_name = user_data.get("role", []) +# if len(role_name) == 0: +# print("No role specified, setting role to member") +# role_name = ["member"] +# role_name = role_name[0] +# role = roles.get_role_by_name(tenant_id=t['tenantId'], name=role_name) +# if role is None: +# return {"errors": [f"role {role_name} not found, please create it in openreplay first"]} +# +# admin_privileges = user_data.get("adminPrivileges", []) +# admin_privileges = not (len(admin_privileges) == 0 +# or admin_privileges[0] is None +# or admin_privileges[0].lower() == "false") +# +# if existing is None: +# deleted = users.get_deleted_user_by_email(auth.get_nameid()) +# if deleted is not None: +# print("== restore deleted user ==") +# users.restore_sso_user(user_id=deleted["userId"], tenant_id=t['tenantId'], email=email, +# admin=admin_privileges, origin=SAML2_helper.get_saml2_provider(), +# name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), +# internal_id=internal_id, role_id=role["roleId"]) +# else: +# print("== new user ==") +# users.create_sso_user(tenant_id=t['tenantId'], email=email, admin=admin_privileges, +# origin=SAML2_helper.get_saml2_provider(), +# name=" ".join(user_data.get("firstName", []) + user_data.get("lastName", [])), +# internal_id=internal_id, role_id=role["roleId"]) +# else: +# if t['tenantId'] != existing["tenantId"]: +# print("user exists for a different tenant") +# return {"errors": ["user exists for a different tenant"]} +# if existing.get("origin") is None: +# print(f"== migrating user to {SAML2_helper.get_saml2_provider()} ==") +# users.update(tenant_id=t['tenantId'], user_id=existing["id"], +# changes={"origin": SAML2_helper.get_saml2_provider(), "internal_id": internal_id}) +# expiration = auth.get_session_expiration() +# expiration = expiration if expiration is not None and expiration > 10 * 60 \ +# else int(config("sso_exp_delta_seconds", cast=int, default=24 * 60 * 60)) +# jwt = users.authenticate_sso(email=email, internal_id=internal_id, exp=expiration) +# if jwt is None: +# return {"errors": ["null JWT"]} +# return Response( +# status_code=status.HTTP_302_FOUND, +# headers={'Location': SAML2_helper.get_landing_URL(jwt)}) +# +# +# @public_app.get('/sso/saml2/sls', tags=["saml2"]) +# @public_app.get('/sso/saml2/sls/', tags=["saml2"]) +# async def process_sls_assertion(request: Request): +# req = await prepare_request(request=request) +# session = req["cookie"]["session"] +# auth = init_saml_auth(req) +# request_id = None +# if 'LogoutRequestID' in session: +# request_id = session['LogoutRequestID'] +# +# def dscb(): +# session.clear() +# +# url = auth.process_slo(request_id=request_id, delete_session_cb=dscb) +# +# errors = auth.get_errors() +# if len(errors) == 0: +# if 'SAMLRequest' in req['get_data']: +# logout_request = OneLogin_Saml2_Logout_Request(auth.get_settings(), req['get_data']['SAMLRequest']) +# user_email = logout_request.get_nameid(auth.get_last_request_xml()) +# to_logout = users.get_by_email_only(user_email) +# +# if len(to_logout) > 0: +# to_logout = to_logout[0]['id'] +# users.change_jwt_iat(to_logout) +# else: +# print("Unknown user SLS-Request By IdP") +# else: +# print("Preprocessed SLS-Request by SP") +# +# if url is not None: +# return RedirectResponse(url=url) +# +# return RedirectResponse(url=config("SITE_URL")) +# +# +# @public_app.get('/sso/saml2/metadata', tags=["saml2"]) +# @public_app.get('/sso/saml2/metadata/', tags=["saml2"]) +# async def saml2_metadata(request: Request): +# req = await prepare_request(request=request) +# auth = init_saml_auth(req) +# settings = auth.get_settings() +# metadata = settings.get_sp_metadata() +# errors = settings.validate_metadata(metadata) +# +# if len(errors) == 0: +# return Response( +# status_code=status.HTTP_200_OK, +# content=metadata, +# headers={'Content-Type': 'text/xml'}) +# else: +# raise HTTPException( +# status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, +# detail=', '.join(errors)) diff --git a/ee/api/routers/subs/insights.py b/ee/api/routers/subs/insights.py index 5bd68d313..822e37185 100644 --- a/ee/api/routers/subs/insights.py +++ b/ee/api/routers/subs/insights.py @@ -1,7 +1,7 @@ from fastapi import Body import schemas -from chalicelib.core import insights +from chalicelib.core import product_analytics from or_dependencies import OR_scope from routers.base import get_routers from schemas_ee import Permissions @@ -11,74 +11,76 @@ public_app, app, app_apikey = get_routers([OR_scope(Permissions.metrics)]) @app.post('/{projectId}/insights/journey', tags=["insights"]) @app.get('/{projectId}/insights/journey', tags=["insights"]) -async def get_insights_journey(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.journey(project_id=projectId, **data.dict())} +async def get_insights_journey(projectId: int, data: schemas.PathAnalysisSchema = Body(...)): + return {"data": product_analytics.path_analysis(project_id=projectId, **data.dict())} -@app.post('/{projectId}/insights/users_acquisition', tags=["insights"]) -@app.get('/{projectId}/insights/users_acquisition', tags=["insights"]) -async def get_users_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_acquisition(project_id=projectId, **data.dict())} +# @app.post('/{projectId}/insights/users_acquisition', tags=["insights"]) +# @app.get('/{projectId}/insights/users_acquisition', tags=["insights"]) +# async def get_users_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_acquisition(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_retention', tags=["insights"]) +# @app.get('/{projectId}/insights/users_retention', tags=["insights"]) +# async def get_users_retention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_retention(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_retention', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_retention', tags=["insights"]) +# async def get_feature_rentention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_retention(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_acquisition', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_acquisition', tags=["insights"]) +# async def get_feature_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_acquisition(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) +# async def get_feature_popularity_frequency(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_popularity_frequency(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_intensity', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_intensity', tags=["insights"]) +# async def get_feature_intensity(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_intensity(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_adoption', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_adoption', tags=["insights"]) +# async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_adoption(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) +# @app.get('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) +# async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.feature_adoption_top_users(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_active', tags=["insights"]) +# @app.get('/{projectId}/insights/users_active', tags=["insights"]) +# async def get_users_active(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_active(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_power', tags=["insights"]) +# @app.get('/{projectId}/insights/users_power', tags=["insights"]) +# async def get_users_power(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_power(project_id=projectId, **data.dict())} +# +# +# @app.post('/{projectId}/insights/users_slipping', tags=["insights"]) +# @app.get('/{projectId}/insights/users_slipping', tags=["insights"]) +# async def get_users_slipping(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): +# return {"data": product_analytics.users_slipping(project_id=projectId, **data.dict())} -@app.post('/{projectId}/insights/users_retention', tags=["insights"]) -@app.get('/{projectId}/insights/users_retention', tags=["insights"]) -async def get_users_retention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_retention(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_retention', tags=["insights"]) -@app.get('/{projectId}/insights/feature_retention', tags=["insights"]) -async def get_feature_rentention(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_retention(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_acquisition', tags=["insights"]) -@app.get('/{projectId}/insights/feature_acquisition', tags=["insights"]) -async def get_feature_acquisition(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_acquisition(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) -@app.get('/{projectId}/insights/feature_popularity_frequency', tags=["insights"]) -async def get_feature_popularity_frequency(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_popularity_frequency(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_intensity', tags=["insights"]) -@app.get('/{projectId}/insights/feature_intensity', tags=["insights"]) -async def get_feature_intensity(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_intensity(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_adoption', tags=["insights"]) -@app.get('/{projectId}/insights/feature_adoption', tags=["insights"]) -async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_adoption(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) -@app.get('/{projectId}/insights/feature_adoption_top_users', tags=["insights"]) -async def get_feature_adoption(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.feature_adoption_top_users(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_active', tags=["insights"]) -@app.get('/{projectId}/insights/users_active', tags=["insights"]) -async def get_users_active(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_active(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_power', tags=["insights"]) -@app.get('/{projectId}/insights/users_power', tags=["insights"]) -async def get_users_power(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_power(project_id=projectId, **data.dict())} - - -@app.post('/{projectId}/insights/users_slipping', tags=["insights"]) -@app.get('/{projectId}/insights/users_slipping', tags=["insights"]) -async def get_users_slipping(projectId: int, data: schemas.MetricPayloadSchema = Body(...)): - return {"data": insights.users_slipping(project_id=projectId, **data.dict())} # # diff --git a/ee/api/routers/subs/metrics.py b/ee/api/routers/subs/metrics.py index 05f3b3072..097ba81ed 100644 --- a/ee/api/routers/subs/metrics.py +++ b/ee/api/routers/subs/metrics.py @@ -64,7 +64,7 @@ async def add_card_to_dashboard(projectId: int, dashboardId: int, @app.post('/{projectId}/dashboards/{dashboardId}/metrics', tags=["dashboard"]) @app.put('/{projectId}/dashboards/{dashboardId}/metrics', tags=["dashboard"]) async def create_metric_and_add_to_dashboard(projectId: int, dashboardId: int, - data: schemas_ee.CreateCardSchema = Body(...), + data: schemas_ee.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return {"data": dashboards.create_metric_add_widget(project_id=projectId, user_id=context.user_id, dashboard_id=dashboardId, data=data)} @@ -102,7 +102,7 @@ async def remove_widget_from_dashboard(projectId: int, dashboardId: int, widgetI @app.put('/{projectId}/metrics/try', tags=["dashboard"]) @app.post('/{projectId}/custom_metrics/try', tags=["customMetrics"]) @app.put('/{projectId}/custom_metrics/try', tags=["customMetrics"]) -async def try_card(projectId: int, data: schemas_ee.CreateCardSchema = Body(...), +async def try_card(projectId: int, data: schemas_ee.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return {"data": custom_metrics.merged_live(project_id=projectId, data=data, user_id=context.user_id)} @@ -141,7 +141,7 @@ async def get_cards(projectId: int, context: schemas.CurrentContext = Depends(OR @app.put('/{projectId}/metrics', tags=["dashboard"]) @app.post('/{projectId}/custom_metrics', tags=["customMetrics"]) @app.put('/{projectId}/custom_metrics', tags=["customMetrics"]) -async def create_card(projectId: int, data: schemas_ee.CreateCardSchema = Body(...), +async def create_card(projectId: int, data: schemas_ee.CardSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return custom_metrics.create(project_id=projectId, user_id=context.user_id, data=data) diff --git a/ee/api/schemas_ee.py b/ee/api/schemas_ee.py index 416d9e860..8674f207e 100644 --- a/ee/api/schemas_ee.py +++ b/ee/api/schemas_ee.py @@ -53,7 +53,7 @@ class GetInsightsSchema(schemas._TimedSchema): startTimestamp: int = Field(default=TimeUTC.now(-7)) endTimestamp: int = Field(default=TimeUTC.now()) metricValue: List[InsightCategories] = Field(default=[]) - series: List[schemas.CardCreateSeriesSchema] = Field(default=[]) + series: List[schemas.CardSeriesSchema] = Field(default=[]) class Config: alias_generator = schemas.attribute_to_camel_case @@ -150,7 +150,7 @@ class MetricOfInsights(str, Enum): issue_categories = "issueCategories" -class CreateCardSchema(schemas.CreateCardSchema): +class CardSchema(schemas.CardSchema): metric_of: Union[schemas.MetricOfTimeseries, schemas.MetricOfTable, \ schemas.MetricOfErrors, schemas.MetricOfPerformance, \ schemas.MetricOfResources, schemas.MetricOfWebVitals, \ @@ -177,5 +177,5 @@ class CreateCardSchema(schemas.CreateCardSchema): return values -class UpdateCardSchema(CreateCardSchema): +class UpdateCardSchema(CardSchema): series: List[schemas.CardUpdateSeriesSchema] = Field(...) diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/1.13.0/1.13.0.sql b/ee/scripts/schema/db/init_dbs/clickhouse/1.13.0/1.13.0.sql new file mode 100644 index 000000000..d1f202e82 --- /dev/null +++ b/ee/scripts/schema/db/init_dbs/clickhouse/1.13.0/1.13.0.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION openreplay_version AS() -> 'v1.13.0-ee'; + + +ALTER TABLE experimental.sessions + ADD COLUMN IF NOT EXISTS user_city LowCardinality(String), + ADD COLUMN IF NOT EXISTS user_state LowCardinality(String); \ No newline at end of file diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index 1db327ce0..2bad5e80b 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -127,6 +127,8 @@ CREATE TABLE IF NOT EXISTS experimental.sessions user_device Nullable(String), user_device_type Enum8('other'=0, 'desktop'=1, 'mobile'=2), user_country Enum8('UN'=-128, 'RW'=-127, 'SO'=-126, 'YE'=-125, 'IQ'=-124, 'SA'=-123, 'IR'=-122, 'CY'=-121, 'TZ'=-120, 'SY'=-119, 'AM'=-118, 'KE'=-117, 'CD'=-116, 'DJ'=-115, 'UG'=-114, 'CF'=-113, 'SC'=-112, 'JO'=-111, 'LB'=-110, 'KW'=-109, 'OM'=-108, 'QA'=-107, 'BH'=-106, 'AE'=-105, 'IL'=-104, 'TR'=-103, 'ET'=-102, 'ER'=-101, 'EG'=-100, 'SD'=-99, 'GR'=-98, 'BI'=-97, 'EE'=-96, 'LV'=-95, 'AZ'=-94, 'LT'=-93, 'SJ'=-92, 'GE'=-91, 'MD'=-90, 'BY'=-89, 'FI'=-88, 'AX'=-87, 'UA'=-86, 'MK'=-85, 'HU'=-84, 'BG'=-83, 'AL'=-82, 'PL'=-81, 'RO'=-80, 'XK'=-79, 'ZW'=-78, 'ZM'=-77, 'KM'=-76, 'MW'=-75, 'LS'=-74, 'BW'=-73, 'MU'=-72, 'SZ'=-71, 'RE'=-70, 'ZA'=-69, 'YT'=-68, 'MZ'=-67, 'MG'=-66, 'AF'=-65, 'PK'=-64, 'BD'=-63, 'TM'=-62, 'TJ'=-61, 'LK'=-60, 'BT'=-59, 'IN'=-58, 'MV'=-57, 'IO'=-56, 'NP'=-55, 'MM'=-54, 'UZ'=-53, 'KZ'=-52, 'KG'=-51, 'TF'=-50, 'HM'=-49, 'CC'=-48, 'PW'=-47, 'VN'=-46, 'TH'=-45, 'ID'=-44, 'LA'=-43, 'TW'=-42, 'PH'=-41, 'MY'=-40, 'CN'=-39, 'HK'=-38, 'BN'=-37, 'MO'=-36, 'KH'=-35, 'KR'=-34, 'JP'=-33, 'KP'=-32, 'SG'=-31, 'CK'=-30, 'TL'=-29, 'RU'=-28, 'MN'=-27, 'AU'=-26, 'CX'=-25, 'MH'=-24, 'FM'=-23, 'PG'=-22, 'SB'=-21, 'TV'=-20, 'NR'=-19, 'VU'=-18, 'NC'=-17, 'NF'=-16, 'NZ'=-15, 'FJ'=-14, 'LY'=-13, 'CM'=-12, 'SN'=-11, 'CG'=-10, 'PT'=-9, 'LR'=-8, 'CI'=-7, 'GH'=-6, 'GQ'=-5, 'NG'=-4, 'BF'=-3, 'TG'=-2, 'GW'=-1, 'MR'=0, 'BJ'=1, 'GA'=2, 'SL'=3, 'ST'=4, 'GI'=5, 'GM'=6, 'GN'=7, 'TD'=8, 'NE'=9, 'ML'=10, 'EH'=11, 'TN'=12, 'ES'=13, 'MA'=14, 'MT'=15, 'DZ'=16, 'FO'=17, 'DK'=18, 'IS'=19, 'GB'=20, 'CH'=21, 'SE'=22, 'NL'=23, 'AT'=24, 'BE'=25, 'DE'=26, 'LU'=27, 'IE'=28, 'MC'=29, 'FR'=30, 'AD'=31, 'LI'=32, 'JE'=33, 'IM'=34, 'GG'=35, 'SK'=36, 'CZ'=37, 'NO'=38, 'VA'=39, 'SM'=40, 'IT'=41, 'SI'=42, 'ME'=43, 'HR'=44, 'BA'=45, 'AO'=46, 'NA'=47, 'SH'=48, 'BV'=49, 'BB'=50, 'CV'=51, 'GY'=52, 'GF'=53, 'SR'=54, 'PM'=55, 'GL'=56, 'PY'=57, 'UY'=58, 'BR'=59, 'FK'=60, 'GS'=61, 'JM'=62, 'DO'=63, 'CU'=64, 'MQ'=65, 'BS'=66, 'BM'=67, 'AI'=68, 'TT'=69, 'KN'=70, 'DM'=71, 'AG'=72, 'LC'=73, 'TC'=74, 'AW'=75, 'VG'=76, 'VC'=77, 'MS'=78, 'MF'=79, 'BL'=80, 'GP'=81, 'GD'=82, 'KY'=83, 'BZ'=84, 'SV'=85, 'GT'=86, 'HN'=87, 'NI'=88, 'CR'=89, 'VE'=90, 'EC'=91, 'CO'=92, 'PA'=93, 'HT'=94, 'AR'=95, 'CL'=96, 'BO'=97, 'PE'=98, 'MX'=99, 'PF'=100, 'PN'=101, 'KI'=102, 'TK'=103, 'TO'=104, 'WF'=105, 'WS'=106, 'NU'=107, 'MP'=108, 'GU'=109, 'PR'=110, 'VI'=111, 'UM'=112, 'AS'=113, 'CA'=114, 'US'=115, 'PS'=116, 'RS'=117, 'AQ'=118, 'SX'=119, 'CW'=120, 'BQ'=121, 'SS'=122,'BU'=123, 'VD'=124, 'YD'=125, 'DD'=126), + user_city LowCardinality(String), + user_state LowCardinality(String), platform Enum8('web'=1,'ios'=2,'android'=3) DEFAULT 'web', datetime DateTime, duration UInt32, @@ -324,6 +326,8 @@ SELECT session_id, user_device, user_device_type, user_country, + user_city, + user_state, platform, datetime, duration, diff --git a/ee/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql b/ee/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql index 7e69f9847..ce79c0f89 100644 --- a/ee/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql +++ b/ee/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql @@ -54,4 +54,11 @@ UPDATE public.roles SET permissions = (SELECT array_agg(distinct e) FROM unnest(permissions || '{FEATURE_FLAGS}') AS e) where not permissions @> '{FEATURE_FLAGS}'; -COMMIT; \ No newline at end of file +ALTER TABLE IF EXISTS public.sessions + ADD COLUMN IF NOT EXISTS user_city text, + ADD COLUMN IF NOT EXISTS user_state text; + +COMMIT; + +CREATE INDEX CONCURRENTLY IF NOT EXISTS sessions_project_id_user_city_idx ON sessions (project_id, user_city); +CREATE INDEX CONCURRENTLY IF NOT EXISTS sessions_project_id_user_state_idx ON sessions (project_id, user_state); diff --git a/ee/scripts/schema/db/init_dbs/postgresql/init_schema.sql b/ee/scripts/schema/db/init_dbs/postgresql/init_schema.sql index 7d3e028bd..4c1c55517 100644 --- a/ee/scripts/schema/db/init_dbs/postgresql/init_schema.sql +++ b/ee/scripts/schema/db/init_dbs/postgresql/init_schema.sql @@ -527,6 +527,8 @@ $$ user_device_memory_size integer DEFAULT NULL, user_device_heap_size bigint DEFAULT NULL, user_country country NOT NULL, + user_city text NULL, + user_state text NULL, pages_count integer NOT NULL DEFAULT 0, events_count integer NOT NULL DEFAULT 0, errors_count integer NOT NULL DEFAULT 0, @@ -555,6 +557,8 @@ $$ CREATE INDEX IF NOT EXISTS sessions_project_id_user_anonymous_id_idx ON sessions (project_id, user_anonymous_id); CREATE INDEX IF NOT EXISTS sessions_project_id_user_device_idx ON sessions (project_id, user_device); CREATE INDEX IF NOT EXISTS sessions_project_id_user_country_idx ON sessions (project_id, user_country); + CREATE INDEX IF NOT EXISTS sessions_project_id_user_city_idx ON sessions (project_id, user_city); + CREATE INDEX IF NOT EXISTS sessions_project_id_user_state_idx ON sessions (project_id, user_state); CREATE INDEX IF NOT EXISTS sessions_project_id_user_browser_idx ON sessions (project_id, user_browser); CREATE INDEX IF NOT EXISTS sessions_project_id_metadata_1_idx ON sessions (project_id, metadata_1); CREATE INDEX IF NOT EXISTS sessions_project_id_metadata_2_idx ON sessions (project_id, metadata_2); @@ -964,6 +968,7 @@ $$ response_time bigint DEFAULT NULL, response_end bigint DEFAULT NULL, ttfb integer DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS pages_session_id_idx ON events.pages (session_id); @@ -1011,6 +1016,7 @@ $$ path text, selector text DEFAULT '' NOT NULL, hesitation integer DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS clicks_session_id_idx ON events.clicks (session_id); @@ -1035,6 +1041,7 @@ $$ value text DEFAULT NULL, duration integer DEFAULT NULL, hesitation integer DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS inputs_session_id_idx ON events.inputs (session_id); @@ -1048,6 +1055,7 @@ $$ message_id bigint NOT NULL, timestamp bigint NOT NULL, error_id text NOT NULL REFERENCES errors (error_id) ON DELETE CASCADE, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS errors_session_id_idx ON events.errors (session_id); @@ -1087,6 +1095,7 @@ $$ request_body text NULL, response_body text NULL, method http_method NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS graphql_name_idx ON events.graphql (name); @@ -1103,6 +1112,7 @@ $$ message_id bigint NOT NULL, timestamp bigint NOT NULL, name text NOT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS state_actions_name_gin_idx ON events.state_actions USING GIN (name gin_trgm_ops); @@ -1135,6 +1145,7 @@ $$ header_size bigint NULL, encoded_body_size integer NULL, decoded_body_size integer NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id, timestamp) ); CREATE INDEX IF NOT EXISTS resources_session_id_idx ON events.resources (session_id); @@ -1174,6 +1185,7 @@ $$ min_used_js_heap_size bigint NOT NULL, avg_used_js_heap_size bigint NOT NULL, max_used_js_heap_size bigint NOT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX IF NOT EXISTS performance_session_id_idx ON events.performance (session_id); @@ -1213,6 +1225,7 @@ $$ name text NOT NULL, payload jsonb NOT NULL, level events_common.custom_level NOT NULL DEFAULT 'info', + tab_id text NULL, PRIMARY KEY (session_id, timestamp, seq_index) ); CREATE INDEX IF NOT EXISTS customs_name_idx ON events_common.customs (name); @@ -1227,6 +1240,7 @@ $$ seq_index integer NOT NULL, issue_id text NOT NULL REFERENCES issues (issue_id) ON DELETE CASCADE, payload jsonb DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, timestamp, seq_index) ); CREATE INDEX IF NOT EXISTS issues_issue_id_timestamp_idx ON events_common.issues (issue_id, timestamp); @@ -1252,6 +1266,7 @@ $$ host text NULL, path text NULL, query text NULL, + tab_id text NULL, PRIMARY KEY (session_id, timestamp, seq_index) ); diff --git a/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql b/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql index 5be391046..03676807e 100644 --- a/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql +++ b/scripts/schema/db/init_dbs/postgresql/1.13.0/1.13.0.sql @@ -50,4 +50,34 @@ CREATE TABLE IF NOT EXISTS public.feature_flags_conditions filters jsonb NOT NULL DEFAULT '[]'::jsonb ); -COMMIT; \ No newline at end of file +ALTER TABLE IF EXISTS events.clicks + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.errors + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.graphql + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.inputs + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.pages + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.performance + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.resources + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events.state_actions + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events_common.customs + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events_common.issues + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; +ALTER TABLE IF EXISTS events_common.requests + ADD COLUMN IF NOT EXISTS tab_id text DEFAULT NULL; + +ALTER TABLE IF EXISTS public.sessions + ADD COLUMN IF NOT EXISTS user_city text, + ADD COLUMN IF NOT EXISTS user_state text; + +COMMIT; + +CREATE INDEX CONCURRENTLY IF NOT EXISTS sessions_project_id_user_city_idx ON sessions (project_id, user_city); +CREATE INDEX CONCURRENTLY IF NOT EXISTS sessions_project_id_user_state_idx ON sessions (project_id, user_state); \ No newline at end of file diff --git a/scripts/schema/db/init_dbs/postgresql/init_schema.sql b/scripts/schema/db/init_dbs/postgresql/init_schema.sql index f684dfc4f..c7dcc75fd 100644 --- a/scripts/schema/db/init_dbs/postgresql/init_schema.sql +++ b/scripts/schema/db/init_dbs/postgresql/init_schema.sql @@ -404,6 +404,8 @@ $$ user_device_memory_size integer DEFAULT NULL, user_device_heap_size bigint DEFAULT NULL, user_country country NOT NULL, + user_city text NULL, + user_state text NULL, pages_count integer NOT NULL DEFAULT 0, events_count integer NOT NULL DEFAULT 0, errors_count integer NOT NULL DEFAULT 0, @@ -431,6 +433,8 @@ $$ CREATE INDEX sessions_project_id_user_anonymous_id_idx ON sessions (project_id, user_anonymous_id); CREATE INDEX sessions_project_id_user_device_idx ON sessions (project_id, user_device); CREATE INDEX sessions_project_id_user_country_idx ON sessions (project_id, user_country); + CREATE INDEX sessions_project_id_user_city_idx ON sessions (project_id, user_city); + CREATE INDEX sessions_project_id_user_state_idx ON sessions (project_id, user_state); CREATE INDEX sessions_project_id_user_browser_idx ON sessions (project_id, user_browser); CREATE INDEX sessions_project_id_metadata_1_idx ON sessions (project_id, metadata_1); CREATE INDEX sessions_project_id_metadata_2_idx ON sessions (project_id, metadata_2); @@ -522,6 +526,7 @@ $$ name text NOT NULL, payload jsonb NOT NULL, level events_common.custom_level NOT NULL DEFAULT 'info', + tab_id text NULL, PRIMARY KEY (session_id, timestamp, seq_index) ); CREATE INDEX customs_name_idx ON events_common.customs (name); @@ -536,6 +541,7 @@ $$ seq_index integer NOT NULL, issue_id text NOT NULL REFERENCES issues (issue_id) ON DELETE CASCADE, payload jsonb DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, timestamp, seq_index) ); CREATE INDEX issues_issue_id_timestamp_idx ON events_common.issues (issue_id, timestamp); @@ -557,6 +563,7 @@ $$ host text NULL, path text NULL, query text NULL, + tab_id text NULL, PRIMARY KEY (session_id, timestamp, seq_index) ); @@ -594,6 +601,7 @@ $$ response_time bigint DEFAULT NULL, response_end bigint DEFAULT NULL, ttfb integer DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX pages_session_id_idx ON events.pages (session_id); @@ -638,6 +646,7 @@ $$ path text, selector text DEFAULT '' NOT NULL, hesitation integer DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX clicks_session_id_idx ON events.clicks (session_id); @@ -661,6 +670,7 @@ $$ value text DEFAULT NULL, duration integer DEFAULT NULL, hesitation integer DEFAULT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX inputs_session_id_idx ON events.inputs (session_id); @@ -674,6 +684,7 @@ $$ message_id bigint NOT NULL, timestamp bigint NOT NULL, error_id text NOT NULL REFERENCES errors (error_id) ON DELETE CASCADE, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX errors_session_id_idx ON events.errors (session_id); @@ -708,6 +719,7 @@ $$ request_body text NULL, response_body text NULL, method http_method NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX graphql_name_idx ON events.graphql (name); @@ -724,6 +736,7 @@ $$ message_id bigint NOT NULL, timestamp bigint NOT NULL, name text NOT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX state_actions_name_gin_idx ON events.state_actions USING GIN (name gin_trgm_ops); @@ -748,6 +761,7 @@ $$ header_size bigint NULL, encoded_body_size integer NULL, decoded_body_size integer NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id, timestamp) ); CREATE INDEX resources_session_id_idx ON events.resources (session_id); @@ -787,6 +801,7 @@ $$ min_used_js_heap_size bigint NOT NULL, avg_used_js_heap_size bigint NOT NULL, max_used_js_heap_size bigint NOT NULL, + tab_id text NULL, PRIMARY KEY (session_id, message_id) ); CREATE INDEX performance_session_id_idx ON events.performance (session_id);