From 3f52cefad2b313a53f55e0ba8c32e03bc61a545e Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Sun, 5 Sep 2021 11:39:53 +0100 Subject: [PATCH 01/19] feat(db): added new foreign-key indexes --- scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql | 4 ++++ scripts/helm/db/init_dbs/postgresql/init_schema.sql | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql b/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql index dd8f6f318..fa0fdebdc 100644 --- a/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql +++ b/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql @@ -1,5 +1,9 @@ BEGIN; CREATE INDEX pages_session_id_timestamp_idx ON events.pages (session_id, timestamp); +CREATE INDEX projects_tenant_id_idx ON projects(tenant_id); +CREATE INDEX webhooks_tenant_id_idx ON webhooks(tenant_id); +CREATE INDEX issues_project_id_idx ON issues(project_id); + COMMIT; \ No newline at end of file diff --git a/scripts/helm/db/init_dbs/postgresql/init_schema.sql b/scripts/helm/db/init_dbs/postgresql/init_schema.sql index 419ed9c9b..babaf93e6 100644 --- a/scripts/helm/db/init_dbs/postgresql/init_schema.sql +++ b/scripts/helm/db/init_dbs/postgresql/init_schema.sql @@ -172,6 +172,7 @@ CREATE TABLE projects "defaultInputMode": "plain" }'::jsonb -- ?????? ); +CREATE INDEX projects_tenant_id_idx ON projects(tenant_id); CREATE OR REPLACE FUNCTION notify_project() RETURNS trigger AS $$ @@ -247,7 +248,7 @@ create table webhooks index integer default 0 not null, name varchar(100) ); - +CREATE INDEX webhooks_tenant_id_idx ON webhooks(tenant_id); -- --- notifications.sql --- @@ -387,6 +388,7 @@ CREATE TABLE issues ); CREATE INDEX ON issues (issue_id, type); CREATE INDEX issues_context_string_gin_idx ON public.issues USING GIN (context_string gin_trgm_ops); +CREATE INDEX issues_project_id_idx ON issues(project_id); -- --- errors.sql --- From cc84329547524ad7fd847d2b7ff89861cf48b474 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Sun, 5 Sep 2021 11:41:16 +0100 Subject: [PATCH 02/19] feat(db): added new foreign-key indexes --- scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql | 1 + scripts/helm/db/init_dbs/postgresql/init_schema.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql b/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql index fa0fdebdc..5b5a8b3de 100644 --- a/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql +++ b/scripts/helm/db/init_dbs/postgresql/1.4.0/1.4.0.sql @@ -4,6 +4,7 @@ CREATE INDEX pages_session_id_timestamp_idx ON events.pages (session_id, timesta CREATE INDEX projects_tenant_id_idx ON projects(tenant_id); CREATE INDEX webhooks_tenant_id_idx ON webhooks(tenant_id); CREATE INDEX issues_project_id_idx ON issues(project_id); +CREATE INDEX jobs_project_id_idx ON jobs(project_id); COMMIT; \ No newline at end of file diff --git a/scripts/helm/db/init_dbs/postgresql/init_schema.sql b/scripts/helm/db/init_dbs/postgresql/init_schema.sql index babaf93e6..65811f7ba 100644 --- a/scripts/helm/db/init_dbs/postgresql/init_schema.sql +++ b/scripts/helm/db/init_dbs/postgresql/init_schema.sql @@ -872,5 +872,6 @@ CREATE TABLE jobs ); CREATE INDEX ON jobs (status); CREATE INDEX ON jobs (start_at); +CREATE INDEX jobs_project_id_idx ON jobs(project_id); COMMIT; From bec4bc37d55fb45c23b67e74c21dd0735d2e811b Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Sun, 5 Sep 2021 14:25:43 +0100 Subject: [PATCH 03/19] feat(api): pg_client fixed exception handler --- api/chalicelib/utils/pg_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/chalicelib/utils/pg_client.py b/api/chalicelib/utils/pg_client.py index ba72868d6..c54e514ec 100644 --- a/api/chalicelib/utils/pg_client.py +++ b/api/chalicelib/utils/pg_client.py @@ -51,7 +51,7 @@ class PostgresClient: try: self.connection.commit() self.cursor.close() - except: + except Exception as error: print("Error while committing/closing PG-connection", error) raise error finally: From f147ab0bd1f5888540a04ec65a0980f7b2eb2e2a Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Mon, 6 Sep 2021 23:56:51 +0100 Subject: [PATCH 04/19] feat(api): changed change password response --- api/chalicelib/core/users.py | 23 +++++++++++++++++++++-- ee/api/chalicelib/core/users.py | 24 ++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/api/chalicelib/core/users.py b/api/chalicelib/core/users.py index a87a6023f..917328910 100644 --- a/api/chalicelib/core/users.py +++ b/api/chalicelib/core/users.py @@ -426,8 +426,27 @@ def change_password(tenant_id, user_id, email, old_password, new_password): if auth is None: return {"errors": ["wrong password"]} changes = {"password": new_password, "generatedPassword": False} - return {"data": update(tenant_id=tenant_id, user_id=user_id, changes=changes), - "jwt": authenticate(email, new_password)["jwt"]} + user = update(tenant_id=tenant_id, user_id=user_id, changes=changes) + r = authenticate(user['email'], new_password) + tenant_id = r.pop("tenantId") + + r["limits"] = { + "teamMember": -1, + "projects": -1, + "metadata": metadata.get_remaining_metadata_with_count(tenant_id)} + + c = tenants.get_by_tenant_id(tenant_id) + c.pop("createdAt") + c["projects"] = projects.get_projects(tenant_id=tenant_id, recording_state=True, recorded=True, + stack_integrations=True) + c["smtp"] = helper.has_smtp() + return { + 'jwt': r.pop('jwt'), + 'data': { + "user": r, + "client": c + } + } def set_password_invitation(user_id, new_password): diff --git a/ee/api/chalicelib/core/users.py b/ee/api/chalicelib/core/users.py index 034a9549d..6c3434255 100644 --- a/ee/api/chalicelib/core/users.py +++ b/ee/api/chalicelib/core/users.py @@ -436,8 +436,27 @@ def change_password(tenant_id, user_id, email, old_password, new_password): if auth is None: return {"errors": ["wrong password"]} changes = {"password": new_password, "generatedPassword": False} - return {"data": update(tenant_id=tenant_id, user_id=user_id, changes=changes), - "jwt": authenticate(email, new_password)["jwt"]} + user = update(tenant_id=tenant_id, user_id=user_id, changes=changes) + r = authenticate(user['email'], new_password) + + tenant_id = r.pop("tenantId") + r["limits"] = { + "teamMember": -1, + "projects": -1, + "metadata": metadata.get_remaining_metadata_with_count(tenant_id)} + + c = tenants.get_by_tenant_id(tenant_id) + c.pop("createdAt") + c["projects"] = projects.get_projects(tenant_id=tenant_id, recording_state=True, recorded=True, + stack_integrations=True) + c["smtp"] = helper.has_smtp() + return { + 'jwt': r.pop('jwt'), + 'data': { + "user": r, + "client": c, + } + } def set_password_invitation(tenant_id, user_id, new_password): @@ -457,6 +476,7 @@ def set_password_invitation(tenant_id, user_id, new_password): c.pop("createdAt") c["projects"] = projects.get_projects(tenant_id=tenant_id, recording_state=True, recorded=True, stack_integrations=True) + c["smtp"] = helper.has_smtp() return { 'jwt': r.pop('jwt'), 'data': { From c5a27794150f735b864e257b8c4ad348d3dccf42 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Mon, 20 Sep 2021 20:33:30 +0200 Subject: [PATCH 05/19] feat(api): insights retention --- api/chalicelib/blueprints/subs/bp_insights.py | 11 ++++ api/chalicelib/core/insights.py | 55 ++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 062e8fdc8..8c79e2663 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -28,6 +28,17 @@ def get_insights_journey(projectId, context): return {"data": insights.get_journey(project_id=projectId, **{**data, **args})} + +@app.route('/{projectId}/insights/users_retention', methods=['GET', 'POST']) +def get_users_retention(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.get_retention(project_id=projectId, **{**data, **args})} + # # # @app.route('/{projectId}/dashboard/{widget}/search', methods=['GET']) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index d470e1537..37042e64d 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -98,8 +98,61 @@ def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimest params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, "event_start": event_start, "JOURNEY_DEPTH": JOURNEY_DEPTH, **__get_constraint_values(args), **extra_values} - print(cur.mogrify(pg_query, params)) + # print(cur.mogrify(pg_query, params)) cur.execute(cur.mogrify(pg_query, params)) rows = cur.fetchall() return __transform_journey(rows) + + +def __compute_retention_percentage(rows): + if rows is None or len(rows) == 0: + return rows + t = -1 + for r in rows: + if r["week"] == 0: + t = r["usersCount"] + r["percentage"] = r["usersCount"] / t + return rows + + +@dev.timed +def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], + **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, + FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess + WHERE bsess.start_ts Date: Tue, 21 Sep 2021 17:19:12 +0200 Subject: [PATCH 06/19] feat(api): insights retention: fill missing values feat(api): insights retention: max date restrictions --- api/chalicelib/core/insights.py | 61 ++++++++++++++++++++++++++++++--- api/chalicelib/utils/TimeUTC.py | 9 +++++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 37042e64d..8e8611dfb 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -116,9 +116,61 @@ def __compute_retention_percentage(rows): return rows +def __complete_retention(rows, start_date, end_date=None): + if rows is None or len(rows) == 0: + return rows + max_week = 10 + week = 0 + delta_date = 0 + while max_week > 0: + start_date += TimeUTC.MS_WEEK + if end_date is not None and start_date >= end_date: + break + delta = 0 + if delta_date + week >= len(rows) \ + or delta_date + week < len(rows) and rows[delta_date + week]["firstConnexionWeek"] > start_date: + for i in range(max_week): + if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: + break + + neutral = { + "firstConnexionWeek": start_date, + "week": i, + "usersCount": 0, + "connectedUsers": [], + "percentage": 0 + } + rows.insert(delta_date + week + i, neutral) + delta = i + else: + for i in range(max_week): + if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: + break + + neutral = { + "firstConnexionWeek": start_date, + "week": i, + "usersCount": 0, + "connectedUsers": [], + "percentage": 0 + } + if delta_date + week + i < len(rows) \ + and i != rows[delta_date + week + i]["week"]: + rows.insert(delta_date + week + i, neutral) + elif delta_date + week + i >= len(rows): + rows.append(neutral) + delta = i + week += delta + max_week -= 1 + delta_date += 1 + return rows + + @dev.timed -def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], +def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * 7 * 24 * 60 * 60 * 1000 pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) @@ -130,6 +182,7 @@ def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTime FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week FROM sessions WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL AND NOT EXISTS((SELECT 1 FROM sessions AS bsess WHERE bsess.start_ts Date: Tue, 21 Sep 2021 17:22:53 +0200 Subject: [PATCH 07/19] feat(api): insights retention changed end date --- api/chalicelib/core/insights.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 8e8611dfb..c39fb5cea 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -170,7 +170,7 @@ def __complete_retention(rows, start_date, end_date=None): def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * 7 * 24 * 60 * 60 * 1000 + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) From 09176692d081cf44ec63f75624a29b6c6d2c2b54 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Tue, 21 Sep 2021 20:32:52 +0200 Subject: [PATCH 08/19] feat(api): insights feature retention --- api/chalicelib/blueprints/subs/bp_insights.py | 13 ++- api/chalicelib/core/insights.py | 95 ++++++++++++++++++- 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 8c79e2663..cdb207c21 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -37,7 +37,18 @@ def get_users_retention(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_retention(project_id=projectId, **{**data, **args})} + return {"data": insights.get_users_retention(project_id=projectId, **{**data, **args})} + + +@app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST']) +def get_feature_retention(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.get_feature_retention(project_id=projectId, **{**data, **args})} # # diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index c39fb5cea..1df47eda2 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -167,13 +167,13 @@ def __complete_retention(rows, start_date, end_date=None): @dev.timed -def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], - **args): +def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) - + pg_sub_query.append("user_id IS NOT NULL") with pg_client.PostgresClient() as cur: pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, @@ -181,8 +181,7 @@ def get_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTim ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week FROM sessions - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL + WHERE {" AND ".join(pg_sub_query)} AND NOT EXISTS((SELECT 1 FROM sessions AS bsess WHERE bsess.start_ts= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_table = JOURNEY_TYPES["PAGES"]["table"] + event_column = JOURNEY_TYPES["PAGES"]["column"] + extra_values = {"value": "/"} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_table = JOURNEY_TYPES[f["value"]]["table"] + event_column = JOURNEY_TYPES[f["value"]]["column"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + # TODO: This will change later when the search is clear + default = False + extra_values["value"] = f["value"] + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + extra_values["value"] = row["value"] + + pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, + FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week + FROM sessions INNER JOIN {event_table} AS feature USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) + WHERE bsess.start_ts= %(startTimestamp)s + AND feature.timestamp < %(endTimestamp)s + AND feature.{event_column} = %(value)s + GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) + GROUP BY first_connexion_week, week + ORDER BY first_connexion_week, week;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_retention_percentage(helper.list_to_camel_case(rows)) + return __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) From d4ffd10af67318ee5bf772c38f9058b09a57f408 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 22 Sep 2021 15:04:35 +0200 Subject: [PATCH 09/19] feat(api): changed insights feature retention query feat(db): optimized indexes for feature retention --- api/chalicelib/core/insights.py | 28 +++++++++---------- .../db/init_dbs/postgresql/1.3.6/1.3.6.sql | 7 +++++ .../db/init_dbs/postgresql/init_schema.sql | 11 +++++--- 3 files changed, 28 insertions(+), 18 deletions(-) create mode 100644 scripts/helm/db/init_dbs/postgresql/1.3.6/1.3.6.sql diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 1df47eda2..18390d987 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -249,7 +249,6 @@ def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70) LIMIT 1;""" params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} - # print(cur.mogrify(pg_query, params)) cur.execute(cur.mogrify(pg_query, params)) row = cur.fetchone() if row is not None: @@ -259,19 +258,20 @@ def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70) FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, COUNT(DISTINCT connexions_list.user_id) AS users_count, ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users - FROM (SELECT DISTINCT user_id, MIN(DATE_TRUNC('week', to_timestamp(start_ts / 1000))) AS first_connexion_week - FROM sessions INNER JOIN {event_table} AS feature USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL - AND NOT EXISTS((SELECT 1 - FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) - WHERE bsess.start_ts 0; +CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2; + +COMMIT; \ No newline at end of file diff --git a/scripts/helm/db/init_dbs/postgresql/init_schema.sql b/scripts/helm/db/init_dbs/postgresql/init_schema.sql index 65811f7ba..586f84e74 100644 --- a/scripts/helm/db/init_dbs/postgresql/init_schema.sql +++ b/scripts/helm/db/init_dbs/postgresql/init_schema.sql @@ -172,7 +172,7 @@ CREATE TABLE projects "defaultInputMode": "plain" }'::jsonb -- ?????? ); -CREATE INDEX projects_tenant_id_idx ON projects(tenant_id); +CREATE INDEX projects_tenant_id_idx ON projects (tenant_id); CREATE OR REPLACE FUNCTION notify_project() RETURNS trigger AS $$ @@ -248,7 +248,7 @@ create table webhooks index integer default 0 not null, name varchar(100) ); -CREATE INDEX webhooks_tenant_id_idx ON webhooks(tenant_id); +CREATE INDEX webhooks_tenant_id_idx ON webhooks (tenant_id); -- --- notifications.sql --- @@ -388,7 +388,7 @@ CREATE TABLE issues ); CREATE INDEX ON issues (issue_id, type); CREATE INDEX issues_context_string_gin_idx ON public.issues USING GIN (context_string gin_trgm_ops); -CREATE INDEX issues_project_id_idx ON issues(project_id); +CREATE INDEX issues_project_id_idx ON issues (project_id); -- --- errors.sql --- @@ -522,6 +522,8 @@ CREATE INDEX sessions_start_ts_idx ON public.sessions (start_ts) WHERE duration CREATE INDEX sessions_project_id_idx ON public.sessions (project_id) WHERE duration > 0; CREATE INDEX sessions_session_id_project_id_start_ts_idx ON sessions (session_id, project_id, start_ts) WHERE duration > 0; CREATE INDEX sessions_session_id_project_id_start_ts_durationNN_idx ON sessions (session_id, project_id, start_ts) WHERE duration IS NOT NULL; +CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id IS NOT NULL; +CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0; ALTER TABLE public.sessions ADD CONSTRAINT web_browser_constraint CHECK ( (sessions.platform = 'web' AND sessions.user_browser NOTNULL) OR @@ -679,6 +681,7 @@ CREATE INDEX pages_timestamp_metgt0_idx ON events.pages (timestamp) WHERE respon CREATE INDEX pages_session_id_speed_indexgt0nn_idx ON events.pages (session_id, speed_index) WHERE speed_index > 0 AND speed_index IS NOT NULL; CREATE INDEX pages_session_id_timestamp_dom_building_timegt0nn_idx ON events.pages (session_id, timestamp, dom_building_time) WHERE dom_building_time > 0 AND dom_building_time IS NOT NULL; CREATE INDEX pages_base_path_session_id_timestamp_idx ON events.pages (base_path, session_id, timestamp); +CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2; CREATE TABLE events.clicks @@ -872,6 +875,6 @@ CREATE TABLE jobs ); CREATE INDEX ON jobs (status); CREATE INDEX ON jobs (start_at); -CREATE INDEX jobs_project_id_idx ON jobs(project_id); +CREATE INDEX jobs_project_id_idx ON jobs (project_id); COMMIT; From 1769ee6b328fa09e32d29e2da6fc198c568bfe90 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 22 Sep 2021 19:13:42 +0200 Subject: [PATCH 10/19] feat(api): changed insights feature popularity frequency --- api/chalicelib/blueprints/subs/bp_insights.py | 11 ++++ api/chalicelib/core/insights.py | 61 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index cdb207c21..64030dd3d 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -50,6 +50,17 @@ def get_feature_retention(projectId, context): return {"data": insights.get_feature_retention(project_id=projectId, **{**data, **args})} + +@app.route('/{projectId}/insights/feature_popularity_frequency', methods=['GET', 'POST']) +def get_feature_popularity_frequency(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_popularity_frequency(project_id=projectId, **{**data, **args})} + # # # @app.route('/{projectId}/dashboard/{widget}/search', methods=['GET']) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 18390d987..a94e75fdb 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -294,3 +294,64 @@ def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70) rows = cur.fetchall() rows = __compute_retention_percentage(helper.list_to_camel_case(rows)) return __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + + +@dev.timed +def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + event_table = JOURNEY_TYPES["CLICK"]["table"] + event_column = JOURNEY_TYPES["CLICK"]["column"] + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_table = JOURNEY_TYPES[f["value"]]["table"] + event_column = JOURNEY_TYPES[f["value"]]["column"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + all_user_count = cur.fetchone()["count"] + if all_user_count == 0: + return [] + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + pg_sub_query.append(f"length({event_column})>2") + pg_query = f"""SELECT {event_column} AS value, COUNT(DISTINCT user_id) AS count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL + GROUP BY value + ORDER BY count DESC + LIMIT 7;""" + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + popularity = cur.fetchall() + pg_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY value;""" + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + frequencies = cur.fetchall() + total_usage = sum([f["count"] for f in frequencies]) + frequencies = {f["value"]: f["count"] for f in frequencies} + for p in popularity: + p["popularity"] = p.pop("count") / all_user_count + p["frequency"] = frequencies[p["value"]] / total_usage + + return popularity From fbc2677fc6c1adf4651c85cf5cb952cacd622a4c Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 22 Sep 2021 20:06:28 +0200 Subject: [PATCH 11/19] feat(api): changed insights users acquisition --- api/chalicelib/blueprints/subs/bp_insights.py | 17 +++- api/chalicelib/core/insights.py | 89 ++++++++++++++++--- 2 files changed, 92 insertions(+), 14 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 64030dd3d..c4b38c7f6 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -29,6 +29,17 @@ def get_insights_journey(projectId, context): return {"data": insights.get_journey(project_id=projectId, **{**data, **args})} +@app.route('/{projectId}/insights/users_acquisition', methods=['GET', 'POST']) +def get_users_acquisition(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.get_users_acquisition(project_id=projectId, **{**data, **args})} + + @app.route('/{projectId}/insights/users_retention', methods=['GET', 'POST']) def get_users_retention(projectId, context): data = app.current_request.json_body @@ -40,15 +51,15 @@ def get_users_retention(projectId, context): return {"data": insights.get_users_retention(project_id=projectId, **{**data, **args})} -@app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST']) -def get_feature_retention(projectId, context): +@app.route('/{projectId}/insights/feature_acquisition', methods=['GET', 'POST']) +def get_feature_acquisition(projectId, context): data = app.current_request.json_body if data is None: data = {} params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_feature_retention(project_id=projectId, **{**data, **args})} + return {"data": insights.get_feature_acquisition(project_id=projectId, **{**data, **args})} @app.route('/{projectId}/insights/feature_popularity_frequency', methods=['GET', 'POST']) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index a94e75fdb..e13608762 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -105,7 +105,7 @@ def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimest return __transform_journey(rows) -def __compute_retention_percentage(rows): +def __compute_weekly_percentage(rows): if rows is None or len(rows) == 0: return rows t = -1 @@ -117,8 +117,30 @@ def __compute_retention_percentage(rows): def __complete_retention(rows, start_date, end_date=None): - if rows is None or len(rows) == 0: - return rows + if rows is None: + return [] + max_week = 10 + for i in range(max_week): + if end_date is not None and start_date + i * TimeUTC.MS_WEEK >= end_date: + break + neutral = { + "firstConnexionWeek": start_date, + "week": i, + "usersCount": 0, + "connectedUsers": [], + "percentage": 0 + } + if i < len(rows) \ + and i != rows[i]["week"]: + rows.insert(i, neutral) + elif i >= len(rows): + rows.append(neutral) + return rows + + +def __complete_acquisition(rows, start_date, end_date=None): + if rows is None: + return [] max_week = 10 week = 0 delta_date = 0 @@ -174,6 +196,51 @@ def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) pg_sub_query.append("user_id IS NOT NULL") + pg_sub_query.append("DATE_TRUNC('week', to_timestamp(start_ts / 1000)) = to_timestamp(%(startTimestamp)s / 1000)") + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - DATE_TRUNC('week', to_timestamp(%(startTimestamp)s / 1000)::timestamp)) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess + WHERE bsess.start_ts < %(startTimestamp)s + AND project_id = %(project_id)s + AND bsess.user_id = sessions.user_id + LIMIT 1)) + GROUP BY user_id) AS users_list + LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, + user_id + FROM sessions + WHERE users_list.user_id = sessions.user_id + AND %(startTimestamp)s <=sessions.start_ts + AND sessions.project_id = %(project_id)s + AND sessions.start_ts < (%(endTimestamp)s - 1) + GROUP BY connexion_week, user_id + ) AS connexions_list ON (TRUE) + GROUP BY week + ORDER BY week;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) + return __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + + +@dev.timed +def get_users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") with pg_client.PostgresClient() as cur: pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, @@ -184,7 +251,7 @@ def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), WHERE {" AND ".join(pg_sub_query)} AND NOT EXISTS((SELECT 1 FROM sessions AS bsess - WHERE bsess.start_ts Date: Thu, 23 Sep 2021 18:53:36 +0200 Subject: [PATCH 12/19] feat(api): changed insights feature acquisition result feat(api): insights feature retention --- api/chalicelib/blueprints/subs/bp_insights.py | 10 ++ api/chalicelib/core/insights.py | 137 +++++++++++++++--- 2 files changed, 130 insertions(+), 17 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index c4b38c7f6..a7529908d 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -51,6 +51,16 @@ def get_users_retention(projectId, context): return {"data": insights.get_users_retention(project_id=projectId, **{**data, **args})} +@app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST']) +def get_feature_rentention(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.get_feature_retention(project_id=projectId, **{**data, **args})} + @app.route('/{projectId}/insights/feature_acquisition', methods=['GET', 'POST']) def get_feature_acquisition(projectId, context): data = app.current_request.json_body diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index e13608762..e4ed90e24 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -204,6 +204,7 @@ def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), FROM (SELECT DISTINCT user_id FROM sessions WHERE {" AND ".join(pg_sub_query)} + AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 AND NOT EXISTS((SELECT 1 FROM sessions AS bsess WHERE bsess.start_ts < %(startTimestamp)s @@ -225,11 +226,14 @@ def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} - print(cur.mogrify(pg_query, params)) + # print(cur.mogrify(pg_query, params)) cur.execute(cur.mogrify(pg_query, params)) rows = cur.fetchall() rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + return { + "startTimestamp": startTimestamp, + "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } @dev.timed @@ -274,13 +278,16 @@ def get_users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70) cur.execute(cur.mogrify(pg_query, params)) rows = cur.fetchall() rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + return { + "startTimestamp": startTimestamp, + "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } @dev.timed -def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): +def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", @@ -288,20 +295,21 @@ def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-7 pg_sub_query.append("user_id IS NOT NULL") pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") pg_sub_query.append("feature.timestamp < %(endTimestamp)s") - event_table = JOURNEY_TYPES["PAGES"]["table"] - event_column = JOURNEY_TYPES["PAGES"]["column"] - extra_values = {"value": "/"} + event_type = "PAGES" + event_value = "/" + extra_values = {} default = True for f in filters: if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): - event_table = JOURNEY_TYPES[f["value"]]["table"] - event_column = JOURNEY_TYPES[f["value"]]["column"] + event_type = f["value"] + elif f["type"] == "EVENT_VALUE" and JOURNEY_TYPES.get(f["value"]): + event_value = f["value"] elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: pg_sub_query.append(f"sessions.user_id = %(user_id)s") extra_values["user_id"] = f["value"] - # TODO: This will change later when the search is clear default = False - extra_values["value"] = f["value"] + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] pg_sub_query.append(f"feature.{event_column} = %(value)s") with pg_client.PostgresClient() as cur: @@ -319,7 +327,99 @@ def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-7 cur.execute(cur.mogrify(pg_query, params)) row = cur.fetchone() if row is not None: - extra_values["value"] = row["value"] + event_value = row["value"] + extra_values["value"] = event_value + + pg_query = f"""SELECT FLOOR(DATE_PART('day', connexion_week - to_timestamp(%(startTimestamp)s/1000)) / 7)::integer AS week, + COUNT(DISTINCT connexions_list.user_id) AS users_count, + ARRAY_AGG(DISTINCT connexions_list.user_id) AS connected_users + FROM (SELECT DISTINCT user_id + FROM sessions INNER JOIN {event_table} AS feature USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND DATE_PART('week', to_timestamp((sessions.start_ts - %(startTimestamp)s)/1000)) = 1 + AND NOT EXISTS((SELECT 1 + FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) + WHERE bsess.start_ts<%(startTimestamp)s + AND project_id = %(project_id)s + AND bsess.user_id = sessions.user_id + AND bfeature.timestamp<%(startTimestamp)s + AND bfeature.{event_column}=%(value)s + LIMIT 1)) + GROUP BY user_id) AS users_list + LEFT JOIN LATERAL (SELECT DATE_TRUNC('week', to_timestamp(start_ts / 1000)::timestamp) AS connexion_week, + user_id + FROM sessions + INNER JOIN events.pages AS feature USING (session_id) + WHERE users_list.user_id = sessions.user_id + AND %(startTimestamp)s <= sessions.start_ts + AND sessions.project_id = 1 + AND sessions.start_ts < (%(endTimestamp)s - 1) + AND feature.timestamp >= %(startTimestamp)s + AND feature.timestamp < %(endTimestamp)s + AND feature.{event_column} = %(value)s + GROUP BY connexion_week, user_id) AS connexions_list ON (TRUE) + GROUP BY week + ORDER BY week;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) + return { + "startTimestamp": startTimestamp, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], + "chart": __complete_retention(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } + + +@dev.timed +def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_type = "PAGES" + event_value = "/" + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE" and JOURNEY_TYPES.get(f["value"]): + event_value = f["value"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + default = False + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value pg_query = f"""SELECT EXTRACT(EPOCH FROM first_connexion_week::date)::bigint*1000 AS first_connexion_week, FLOOR(DATE_PART('day', connexion_week - first_connexion_week) / 7)::integer AS week, @@ -329,7 +429,6 @@ def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-7 FROM(SELECT DISTINCT user_id, MIN(start_ts) AS first_connexion_week FROM sessions INNER JOIN {event_table} AS feature USING (session_id) WHERE {" AND ".join(pg_sub_query)} - AND user_id IS NOT NULL AND NOT EXISTS((SELECT 1 FROM sessions AS bsess INNER JOIN {event_table} AS bfeature USING (session_id) WHERE bsess.start_ts<%(startTimestamp)s @@ -345,7 +444,7 @@ def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-7 WHERE users_list.user_id = sessions.user_id AND first_connexion_week <= DATE_TRUNC('week', to_timestamp(sessions.start_ts / 1000)::timestamp) - AND sessions.project_id = 1 + AND sessions.project_id = %(project_id)s AND sessions.start_ts < (%(endTimestamp)s - 1) AND feature.timestamp >= %(startTimestamp)s AND feature.timestamp < %(endTimestamp)s @@ -360,7 +459,11 @@ def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-7 cur.execute(cur.mogrify(pg_query, params)) rows = cur.fetchall() rows = __compute_weekly_percentage(helper.list_to_camel_case(rows)) - return __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + return { + "startTimestamp": startTimestamp, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], + "chart": __complete_acquisition(rows=rows, start_date=startTimestamp, end_date=TimeUTC.now()) + } @dev.timed From 89ea81ae7743077241bea2eee18a799f22288042 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Thu, 23 Sep 2021 19:28:40 +0200 Subject: [PATCH 13/19] feat(api): insights active users --- api/chalicelib/blueprints/subs/bp_insights.py | 22 ++++-- api/chalicelib/core/insights.py | 71 ++++++++++++++----- api/chalicelib/utils/TimeUTC.py | 7 ++ 3 files changed, 78 insertions(+), 22 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index a7529908d..463b52d3d 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -26,7 +26,7 @@ def get_insights_journey(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_journey(project_id=projectId, **{**data, **args})} + return {"data": insights.journey(project_id=projectId, **{**data, **args})} @app.route('/{projectId}/insights/users_acquisition', methods=['GET', 'POST']) @@ -37,7 +37,7 @@ def get_users_acquisition(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_users_acquisition(project_id=projectId, **{**data, **args})} + return {"data": insights.users_acquisition(project_id=projectId, **{**data, **args})} @app.route('/{projectId}/insights/users_retention', methods=['GET', 'POST']) @@ -48,7 +48,7 @@ def get_users_retention(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_users_retention(project_id=projectId, **{**data, **args})} + return {"data": insights.users_retention(project_id=projectId, **{**data, **args})} @app.route('/{projectId}/insights/feature_retention', methods=['GET', 'POST']) @@ -59,7 +59,8 @@ def get_feature_rentention(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_feature_retention(project_id=projectId, **{**data, **args})} + return {"data": insights.feature_retention(project_id=projectId, **{**data, **args})} + @app.route('/{projectId}/insights/feature_acquisition', methods=['GET', 'POST']) def get_feature_acquisition(projectId, context): @@ -69,7 +70,7 @@ def get_feature_acquisition(projectId, context): params = app.current_request.query_params args = dashboard.dashboard_args(params) - return {"data": insights.get_feature_acquisition(project_id=projectId, **{**data, **args})} + return {"data": insights.feature_acquisition(project_id=projectId, **{**data, **args})} @app.route('/{projectId}/insights/feature_popularity_frequency', methods=['GET', 'POST']) @@ -82,6 +83,17 @@ def get_feature_popularity_frequency(projectId, context): return {"data": insights.feature_popularity_frequency(project_id=projectId, **{**data, **args})} + +@app.route('/{projectId}/insights/users_active', methods=['GET', 'POST']) +def get_users_active(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_active(project_id=projectId, **{**data, **args})} + # # # @app.route('/{projectId}/dashboard/{widget}/search', methods=['GET']) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index e4ed90e24..03f96ea25 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -1,5 +1,4 @@ from chalicelib.core import sessions_metas -from chalicelib.utils import args_transformer from chalicelib.utils import helper, dev from chalicelib.utils import pg_client from chalicelib.utils.TimeUTC import TimeUTC @@ -32,7 +31,7 @@ JOURNEY_TYPES = { @dev.timed -def get_journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args): +def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), filters=[], **args): pg_sub_query_subset = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) event_start = None @@ -189,8 +188,8 @@ def __complete_acquisition(rows, start_date, end_date=None): @dev.timed -def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], - **args): +def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", @@ -237,9 +236,9 @@ def get_users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), @dev.timed -def get_users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): +def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", @@ -285,9 +284,9 @@ def get_users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70) @dev.timed -def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): +def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", @@ -302,12 +301,12 @@ def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70) for f in filters: if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): event_type = f["value"] - elif f["type"] == "EVENT_VALUE" and JOURNEY_TYPES.get(f["value"]): + elif f["type"] == "EVENT_VALUE": event_value = f["value"] + default = False elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: pg_sub_query.append(f"sessions.user_id = %(user_id)s") extra_values["user_id"] = f["value"] - default = False event_table = JOURNEY_TYPES[event_type]["table"] event_column = JOURNEY_TYPES[event_type]["column"] pg_sub_query.append(f"feature.{event_column} = %(value)s") @@ -375,9 +374,9 @@ def get_feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70) @dev.timed -def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[], - **args): +def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): startTimestamp = TimeUTC.trunc_week(startTimestamp) endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", @@ -392,12 +391,12 @@ def get_feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-7 for f in filters: if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): event_type = f["value"] - elif f["type"] == "EVENT_VALUE" and JOURNEY_TYPES.get(f["value"]): + elif f["type"] == "EVENT_VALUE": event_value = f["value"] + default = False elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: pg_sub_query.append(f"sessions.user_id = %(user_id)s") extra_values["user_id"] = f["value"] - default = False event_table = JOURNEY_TYPES[event_type]["table"] event_column = JOURNEY_TYPES[event_type]["column"] @@ -525,3 +524,41 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da p["frequency"] = frequencies[p["value"]] / total_usage return popularity + + +@dev.timed +def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, + chart=True, data=args) + + period = "DAY" + for f in filters: + if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]: + period = f["value"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query_chart.append(f"sessions.user_id = %(user_id)s") + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(chart) AS chart + FROM (SELECT generated_timestamp AS timestamp, + COALESCE(COUNT(users), 0) AS count + FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp + LEFT JOIN LATERAL ( SELECT DISTINCT user_id + FROM public.sessions + WHERE {" AND ".join(pg_sub_query_chart)} + ) AS users ON (TRUE) + GROUP BY generated_timestamp + ORDER BY generated_timestamp) AS chart;""" + params = {"step_size": TimeUTC.MS_DAY if period == "DAY" else TimeUTC.MS_WEEK, + "project_id": project_id, + "startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week( + startTimestamp), + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + row_users = cur.fetchone() + + return row_users diff --git a/api/chalicelib/utils/TimeUTC.py b/api/chalicelib/utils/TimeUTC.py index 9cd353eb4..c95359a00 100644 --- a/api/chalicelib/utils/TimeUTC.py +++ b/api/chalicelib/utils/TimeUTC.py @@ -115,6 +115,13 @@ class TimeUTC: def get_utc_offset(): return int((datetime.now(pytz.utc).now() - datetime.now(pytz.utc).replace(tzinfo=None)).total_seconds() * 1000) + @staticmethod + def trunc_day(timestamp): + dt = TimeUTC.from_ms_timestamp(timestamp) + return TimeUTC.datetime_to_timestamp(dt + .replace(hour=0, minute=0, second=0, microsecond=0) + .astimezone(pytz.utc)) + @staticmethod def trunc_week(timestamp): dt = TimeUTC.from_ms_timestamp(timestamp) From ff44fa2a8fc2ab1400246f038a71f70c406a5f92 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Thu, 23 Sep 2021 20:13:10 +0200 Subject: [PATCH 14/19] feat(api): insights power users --- api/chalicelib/blueprints/subs/bp_insights.py | 11 ++++++++ api/chalicelib/core/insights.py | 26 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 463b52d3d..acf4184b2 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -94,6 +94,17 @@ def get_users_active(projectId, context): return {"data": insights.users_active(project_id=projectId, **{**data, **args})} + +@app.route('/{projectId}/insights/users_power', methods=['GET', 'POST']) +def get_users_power(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_power(project_id=projectId, **{**data, **args})} + # # # @app.route('/{projectId}/dashboard/{widget}/search', methods=['GET']) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 03f96ea25..a591cbd62 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -533,6 +533,7 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, chart=True, data=args) + pg_sub_query_chart.append("user_id IS NOT NULL") period = "DAY" for f in filters: if f["type"] == "PERIOD" and f["value"] in ["DAY", "WEEK"]: @@ -562,3 +563,28 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime row_users = cur.fetchone() return row_users + + +@dev.timed +def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[],**args): + pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) + pg_sub_query.append("user_id IS NOT NULL") + + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT AVG(count) AS avg, JSONB_AGG(day_users_partition) AS partition + FROM (SELECT number_of_days, COUNT(user_id) AS count + FROM (SELECT user_id, COUNT(DISTINCT DATE_TRUNC('day', to_timestamp(start_ts / 1000))) AS number_of_days + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + GROUP BY 1) AS users_connexions + GROUP BY number_of_days + ORDER BY number_of_days) AS day_users_partition;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp,"endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + row_users = cur.fetchone() + + return helper.dict_to_camel_case(row_users) From f23b273a7073dd618da32080ce24ce214e988ad4 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 24 Sep 2021 18:51:14 +0200 Subject: [PATCH 15/19] feat(api): insights users slipping --- api/chalicelib/blueprints/subs/bp_insights.py | 10 +++ api/chalicelib/core/insights.py | 68 ++++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index acf4184b2..f0dc226bc 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -105,6 +105,16 @@ def get_users_power(projectId, context): return {"data": insights.users_power(project_id=projectId, **{**data, **args})} +@app.route('/{projectId}/insights/users_slipping', methods=['GET', 'POST']) +def get_users_slipping(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.users_slipping(project_id=projectId, **{**data, **args})} + # # # @app.route('/{projectId}/dashboard/{widget}/search', methods=['GET']) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index a591cbd62..a94a7d429 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -567,7 +567,7 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime @dev.timed def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), - filters=[],**args): + filters=[], **args): pg_sub_query = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) pg_sub_query.append("user_id IS NOT NULL") @@ -581,10 +581,74 @@ def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimes GROUP BY number_of_days ORDER BY number_of_days) AS day_users_partition;""" params = {"project_id": project_id, - "startTimestamp": startTimestamp,"endTimestamp": endTimestamp, **__get_constraint_values(args)} + "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} # print(cur.mogrify(pg_query, params)) # print("---------------------") cur.execute(cur.mogrify(pg_query, params)) row_users = cur.fetchone() return helper.dict_to_camel_case(row_users) + + +@dev.timed +def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_type = "PAGES" + event_value = "/" + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + extra_values["user_id"] = f["value"] + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + pg_sub_query.append(f"feature.{event_column} = %(value)s") + + with pg_client.PostgresClient() as cur: + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + + pg_query = f"""SELECT user_id, last_time,interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen + FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY user_id) AS user_last_usage + INNER JOIN sessions USING (user_id) + WHERE EXTRACT(EPOCH FROM now()) * 1000 - last_time > 7 * 24 * 60 * 60 * 1000 + GROUP BY user_id, last_time,interactions_count;""" + + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + return { + "startTimestamp": startTimestamp, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}], + "chart": helper.list_to_camel_case(rows) + } From 8e4c6187d5c63df3d6be897365c73c819f7c53c2 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 24 Sep 2021 19:18:53 +0200 Subject: [PATCH 16/19] feat(api): insights users slipping added interactions count --- api/chalicelib/core/insights.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index a94a7d429..421072a26 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -633,7 +633,7 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi event_value = row["value"] extra_values["value"] = event_value - pg_query = f"""SELECT user_id, last_time,interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen + pg_query = f"""SELECT user_id, last_time, interactions_count, MIN(start_ts) AS first_seen, MAX(start_ts) AS last_seen FROM (SELECT user_id, MAX(timestamp) AS last_time, COUNT(DISTINCT session_id) AS interactions_count FROM {event_table} AS feature INNER JOIN sessions USING (session_id) WHERE {" AND ".join(pg_sub_query)} From 0babeb6b60639dc6ac952f416c7e702041033318 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 24 Sep 2021 19:39:58 +0200 Subject: [PATCH 17/19] feat(api): insights feature intensity --- api/chalicelib/blueprints/subs/bp_insights.py | 12 +++++++ api/chalicelib/core/insights.py | 35 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index f0dc226bc..fc40885b7 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -84,6 +84,17 @@ def get_feature_popularity_frequency(projectId, context): return {"data": insights.feature_popularity_frequency(project_id=projectId, **{**data, **args})} +@app.route('/{projectId}/insights/feature_intensity', methods=['GET', 'POST']) +def get_feature_intensity(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_intensity(project_id=projectId, **{**data, **args})} + + @app.route('/{projectId}/insights/users_active', methods=['GET', 'POST']) def get_users_active(projectId, context): data = app.current_request.json_body @@ -105,6 +116,7 @@ def get_users_power(projectId, context): return {"data": insights.users_power(project_id=projectId, **{**data, **args})} + @app.route('/{projectId}/insights/users_slipping', methods=['GET', 'POST']) def get_users_slipping(projectId, context): data = app.current_request.json_body diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 421072a26..7c06e4c8c 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -526,6 +526,41 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da return popularity +@dev.timed +def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + event_table = JOURNEY_TYPES["CLICK"]["table"] + event_column = JOURNEY_TYPES["CLICK"]["column"] + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_table = JOURNEY_TYPES[f["value"]]["table"] + event_column = JOURNEY_TYPES[f["value"]]["column"] + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + pg_sub_query.append(f"length({event_column})>2") + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY value + ORDER BY avg DESC + LIMIT 7;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + + return rows + + @dev.timed def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], From 7e6592de3f5e0bc485727bda5eff2b6c7456db63 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 24 Sep 2021 20:06:06 +0200 Subject: [PATCH 18/19] feat(api): insights feature adoption --- api/chalicelib/blueprints/subs/bp_insights.py | 11 +++ api/chalicelib/core/insights.py | 70 +++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index fc40885b7..30894028b 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -95,6 +95,17 @@ def get_feature_intensity(projectId, context): return {"data": insights.feature_intensity(project_id=projectId, **{**data, **args})} +@app.route('/{projectId}/insights/feature_adoption', methods=['GET', 'POST']) +def get_feature_adoption(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_adoption(project_id=projectId, **{**data, **args})} + + @app.route('/{projectId}/insights/users_active', methods=['GET', 'POST']) def get_users_active(projectId, context): data = app.current_request.json_body diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 7c06e4c8c..0df8e63b0 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -526,6 +526,76 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da return popularity +@dev.timed +def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + startTimestamp = TimeUTC.trunc_week(startTimestamp) + endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + event_type = "CLICK" + event_value = '/' + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + with pg_client.PostgresClient() as cur: + pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count + FROM sessions + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + all_user_count = cur.fetchone()["count"] + if all_user_count == 0: + return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type}, + {"type": "EVENT_VALUE", "value": event_value}], } + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + pg_sub_query.append(f"length({event_column})>2") + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + pg_sub_query.append(f"feature.{event_column} = %(value)s") + pg_query = f"""SELECT COUNT(DISTINCT user_id) AS count + FROM {event_table} AS feature INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + AND user_id IS NOT NULL;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + adoption = cur.fetchone()["count"] / all_user_count + return {"target": all_user_count, "adoption": adoption, + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} + + @dev.timed def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], From b64b052fe9c1fc39a0746496b5405fb83a5700b6 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 24 Sep 2021 20:26:05 +0200 Subject: [PATCH 19/19] feat(api): insights feature adoption top user --- api/chalicelib/blueprints/subs/bp_insights.py | 10 +++ api/chalicelib/core/insights.py | 61 ++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/api/chalicelib/blueprints/subs/bp_insights.py b/api/chalicelib/blueprints/subs/bp_insights.py index 30894028b..1e016c238 100644 --- a/api/chalicelib/blueprints/subs/bp_insights.py +++ b/api/chalicelib/blueprints/subs/bp_insights.py @@ -105,6 +105,16 @@ def get_feature_adoption(projectId, context): return {"data": insights.feature_adoption(project_id=projectId, **{**data, **args})} +@app.route('/{projectId}/insights/feature_adoption_top_users', methods=['GET', 'POST']) +def get_feature_adoption(projectId, context): + data = app.current_request.json_body + if data is None: + data = {} + params = app.current_request.query_params + args = dashboard.dashboard_args(params) + + return {"data": insights.feature_adoption_top_users(project_id=projectId, **{**data, **args})} + @app.route('/{projectId}/insights/users_active', methods=['GET', 'POST']) def get_users_active(projectId, context): diff --git a/api/chalicelib/core/insights.py b/api/chalicelib/core/insights.py index 0df8e63b0..e639ba55a 100644 --- a/api/chalicelib/core/insights.py +++ b/api/chalicelib/core/insights.py @@ -530,8 +530,6 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args): - startTimestamp = TimeUTC.trunc_week(startTimestamp) - endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", time_constraint=True) event_type = "CLICK" @@ -596,6 +594,65 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} +@dev.timed +def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), + filters=[], + **args): + pg_sub_query = __get_constraints(project_id=project_id, data=args, duration=True, main_table="sessions", + time_constraint=True) + pg_sub_query.append("user_id IS NOT NULL") + event_type = "CLICK" + event_value = '/' + extra_values = {} + default = True + for f in filters: + if f["type"] == "EVENT_TYPE" and JOURNEY_TYPES.get(f["value"]): + event_type = f["value"] + elif f["type"] == "EVENT_VALUE": + event_value = f["value"] + default = False + elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]: + pg_sub_query.append(f"sessions.user_id = %(user_id)s") + event_table = JOURNEY_TYPES[event_type]["table"] + event_column = JOURNEY_TYPES[event_type]["column"] + with pg_client.PostgresClient() as cur: + pg_sub_query.append("feature.timestamp >= %(startTimestamp)s") + pg_sub_query.append("feature.timestamp < %(endTimestamp)s") + pg_sub_query.append(f"length({event_column})>2") + if default: + # get most used value + pg_query = f"""SELECT {event_column} AS value, COUNT(*) AS count + FROM {event_table} AS feature INNER JOIN public.sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query[:-1])} + AND length({event_column}) > 2 + GROUP BY value + ORDER BY count DESC + LIMIT 1;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + cur.execute(cur.mogrify(pg_query, params)) + row = cur.fetchone() + if row is not None: + event_value = row["value"] + extra_values["value"] = event_value + pg_sub_query.append(f"feature.{event_column} = %(value)s") + pg_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count + FROM {event_table} AS feature + INNER JOIN sessions USING (session_id) + WHERE {" AND ".join(pg_sub_query)} + GROUP BY 1 + ORDER BY 2 DESC + LIMIT 10;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values} + # print(cur.mogrify(pg_query, params)) + # print("---------------------") + cur.execute(cur.mogrify(pg_query, params)) + rows = cur.fetchall() + return {"users": helper.list_to_camel_case(rows), + "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]} + + @dev.timed def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[],