feat(api): optimized weekly report

feat(db): changed indexes to optimize weekly report
This commit is contained in:
Taha Yassine Kraiem 2021-09-27 16:32:03 +02:00
parent e22f277f90
commit 6c62897942
3 changed files with 30 additions and 10 deletions

View file

@ -1,4 +1,5 @@
from chalicelib.utils import pg_client, helper
from chalicelib.utils.TimeUTC import TimeUTC
from chalicelib.utils.helper import environ
from chalicelib.utils.helper import get_issue_title
@ -30,7 +31,11 @@ def edit_config(user_id, weekly_report):
def cron():
with pg_client.PostgresClient() as cur:
cur.execute("""\
params = {"3_days_ago": TimeUTC.midnight(delta_days=-3),
"1_week_ago": TimeUTC.midnight(delta_days=-7),
"2_week_ago": TimeUTC.midnight(delta_days=-14),
"5_week_ago": TimeUTC.midnight(delta_days=-35)}
cur.execute(cur.mogrify("""\
SELECT project_id,
name AS project_name,
users.emails AS emails,
@ -44,7 +49,7 @@ def cron():
SELECT sessions.project_id
FROM public.sessions
WHERE sessions.project_id = projects.project_id
AND start_ts >= (EXTRACT(EPOCH FROM now() - INTERVAL '3 days') * 1000)::BIGINT
AND start_ts >= %(3_days_ago)s
LIMIT 1) AS recently_active USING (project_id)
INNER JOIN LATERAL (
SELECT COALESCE(ARRAY_AGG(email), '{}') AS emails
@ -54,14 +59,14 @@ def cron():
AND users.weekly_report
) AS users ON (TRUE)
LEFT JOIN LATERAL (
SELECT COUNT(issues.*) AS count
SELECT COUNT(1) AS count
FROM events_common.issues
INNER JOIN public.sessions USING (session_id)
WHERE sessions.project_id = projects.project_id
AND issues.timestamp >= (EXTRACT(EPOCH FROM DATE_TRUNC('day', now()) - INTERVAL '1 week') * 1000)::BIGINT
) AS week_0_issues ON (TRUE)
LEFT JOIN LATERAL (
SELECT COUNT(issues.*) AS count
SELECT COUNT(1) AS count
FROM events_common.issues
INNER JOIN public.sessions USING (session_id)
WHERE sessions.project_id = projects.project_id
@ -69,16 +74,17 @@ def cron():
AND issues.timestamp >= (EXTRACT(EPOCH FROM DATE_TRUNC('day', now()) - INTERVAL '2 week') * 1000)::BIGINT
) AS week_1_issues ON (TRUE)
LEFT JOIN LATERAL (
SELECT COUNT(issues.*) AS count
SELECT COUNT(1) AS count
FROM events_common.issues
INNER JOIN public.sessions USING (session_id)
WHERE sessions.project_id = projects.project_id
AND issues.timestamp <= (EXTRACT(EPOCH FROM DATE_TRUNC('day', now()) - INTERVAL '1 week') * 1000)::BIGINT
AND issues.timestamp >= (EXTRACT(EPOCH FROM DATE_TRUNC('day', now()) - INTERVAL '5 week') * 1000)::BIGINT
) AS month_1_issues ON (TRUE)
WHERE projects.deleted_at ISNULL;""")
WHERE projects.deleted_at ISNULL;"""), params)
projects_data = cur.fetchall()
for p in projects_data:
params["project_id"] = p["project_id"]
print(f"checking {p['project_name']} : {p['project_id']}")
if len(p["emails"]) == 0 \
or p["this_week_issues_count"] + p["past_week_issues_count"] + p["past_month_issues_count"] == 0:
@ -104,7 +110,7 @@ def cron():
DATE_TRUNC('day', now()) - INTERVAL '1 day',
'1 day'::INTERVAL
) AS timestamp_i
ORDER BY timestamp_i;""", {"project_id": p["project_id"]}))
ORDER BY timestamp_i;""", params))
days_partition = cur.fetchall()
max_days_partition = max(x['issues_count'] for x in days_partition)
for d in days_partition:
@ -120,7 +126,7 @@ def cron():
AND timestamp >= (EXTRACT(EPOCH FROM DATE_TRUNC('day', now()) - INTERVAL '7 days') * 1000)::BIGINT
GROUP BY type
ORDER BY count DESC, type
LIMIT 4;""", {"project_id": p["project_id"]}))
LIMIT 4;""", params))
issues_by_type = cur.fetchall()
max_issues_by_type = sum(i["count"] for i in issues_by_type)
for i in issues_by_type:
@ -149,7 +155,7 @@ def cron():
'1 day'::INTERVAL
) AS timestamp_i
GROUP BY timestamp_i
ORDER BY timestamp_i;""", {"project_id": p["project_id"]}))
ORDER BY timestamp_i;""", params))
issues_breakdown_by_day = cur.fetchall()
for i in issues_breakdown_by_day:
i["sum"] = sum(x["count"] for x in i["partition"])
@ -195,7 +201,7 @@ def cron():
WHERE mi.project_id = %(project_id)s AND sessions.project_id = %(project_id)s AND sessions.duration IS NOT NULL
AND sessions.start_ts >= (EXTRACT(EPOCH FROM DATE_TRUNC('day', now()) - INTERVAL '1 week') * 1000)::BIGINT
GROUP BY type
ORDER BY issue_count DESC;""", {"project_id": p["project_id"]}))
ORDER BY issue_count DESC;""", params))
issues_breakdown_list = cur.fetchall()
if len(issues_breakdown_list) > 4:
others = {"type": "Others",

View file

@ -0,0 +1,11 @@
BEGIN;
CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id IS NOT NULL;
CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0;
CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2;
CREATE INDEX users_tenant_id_deleted_at_N_idx ON users (tenant_id) WHERE deleted_at ISNULL;
CREATE INDEX issues_issue_id_timestamp_idx ON events_common.issues(issue_id,timestamp);
CREATE INDEX issues_timestamp_idx ON events_common.issues (timestamp);
CREATE INDEX issues_project_id_issue_id_idx ON public.issues (project_id, issue_id);
COMMIT;

View file

@ -4,4 +4,7 @@ CREATE INDEX sessions_user_id_useridNN_idx ON sessions (user_id) WHERE user_id I
CREATE INDEX sessions_uid_projectid_startts_sessionid_uidNN_durGTZ_idx ON sessions (user_id, project_id, start_ts, session_id) WHERE user_id IS NOT NULL AND duration > 0;
CREATE INDEX pages_base_path_base_pathLNGT2_idx ON events.pages (base_path) WHERE length(base_path) > 2;
CREATE INDEX issues_issue_id_timestamp_idx ON events_common.issues(issue_id,timestamp);
CREATE INDEX issues_timestamp_idx ON events_common.issues (timestamp);
CREATE INDEX issues_project_id_issue_id_idx ON public.issues (project_id, issue_id);
COMMIT;