feat(chalice): changed resources_by_party to use requests only instead of fetch&script resources
feat(chalice): fixed clickhouse client helper timeout
This commit is contained in:
parent
8c7c25e7cd
commit
261595f075
4 changed files with 44 additions and 33 deletions
|
|
@ -2135,44 +2135,44 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1)
|
|||
pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True,
|
||||
chart=False, data=args)
|
||||
pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, project=False,
|
||||
chart=True, data=args, main_table="resources", time_column="timestamp",
|
||||
chart=True, data=args, main_table="requests", time_column="timestamp",
|
||||
duration=False)
|
||||
pg_sub_query_subset.append("resources.timestamp >= %(startTimestamp)s")
|
||||
pg_sub_query_subset.append("resources.timestamp < %(endTimestamp)s")
|
||||
pg_sub_query_subset.append("resources.success = FALSE")
|
||||
pg_sub_query_subset.append("requests.timestamp >= %(startTimestamp)s")
|
||||
pg_sub_query_subset.append("requests.timestamp < %(endTimestamp)s")
|
||||
# pg_sub_query_subset.append("resources.type IN ('fetch', 'script')")
|
||||
pg_sub_query_subset.append("requests.success = FALSE")
|
||||
|
||||
with pg_client.PostgresClient() as cur:
|
||||
pg_query = f"""WITH resources AS (
|
||||
SELECT resources.url_host, timestamp
|
||||
FROM events.resources
|
||||
pg_query = f"""WITH requests AS (
|
||||
SELECT requests.host, timestamp
|
||||
FROM events_common.requests
|
||||
INNER JOIN public.sessions USING (session_id)
|
||||
WHERE {" AND ".join(pg_sub_query_subset)}
|
||||
)
|
||||
SELECT generated_timestamp AS timestamp,
|
||||
SUM(CASE WHEN first.url_host = sub_resources.url_host THEN 1 ELSE 0 END) AS first_party,
|
||||
SUM(CASE WHEN first.url_host != sub_resources.url_host THEN 1 ELSE 0 END) AS third_party
|
||||
SUM(CASE WHEN first.host = sub_requests.host THEN 1 ELSE 0 END) AS first_party,
|
||||
SUM(CASE WHEN first.host != sub_requests.host THEN 1 ELSE 0 END) AS third_party
|
||||
FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp
|
||||
LEFT JOIN (
|
||||
SELECT resources.url_host,
|
||||
COUNT(resources.session_id) AS count
|
||||
FROM events.resources
|
||||
SELECT requests.host,
|
||||
COUNT(requests.session_id) AS count
|
||||
FROM events_common.requests
|
||||
INNER JOIN public.sessions USING (session_id)
|
||||
WHERE sessions.project_id = '1'
|
||||
AND resources.type IN ('fetch', 'script')
|
||||
AND sessions.start_ts > (EXTRACT(EPOCH FROM now() - INTERVAL '31 days') * 1000)::BIGINT
|
||||
AND sessions.start_ts < (EXTRACT(EPOCH FROM now()) * 1000)::BIGINT
|
||||
AND resources.timestamp > (EXTRACT(EPOCH FROM now() - INTERVAL '31 days') * 1000)::BIGINT
|
||||
AND resources.timestamp < (EXTRACT(EPOCH FROM now()) * 1000)::BIGINT
|
||||
AND requests.timestamp > (EXTRACT(EPOCH FROM now() - INTERVAL '31 days') * 1000)::BIGINT
|
||||
AND requests.timestamp < (EXTRACT(EPOCH FROM now()) * 1000)::BIGINT
|
||||
AND sessions.duration>0
|
||||
GROUP BY resources.url_host
|
||||
GROUP BY requests.host
|
||||
ORDER BY count DESC
|
||||
LIMIT 1
|
||||
) AS first ON (TRUE)
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT resources.url_host
|
||||
FROM resources
|
||||
SELECT requests.host
|
||||
FROM requests
|
||||
WHERE {" AND ".join(pg_sub_query_chart)}
|
||||
) AS sub_resources ON (TRUE)
|
||||
) AS sub_requests ON (TRUE)
|
||||
GROUP BY generated_timestamp
|
||||
ORDER BY generated_timestamp;"""
|
||||
cur.execute(cur.mogrify(pg_query, {"step_size": step_size,
|
||||
|
|
|
|||
|
|
@ -2030,6 +2030,7 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1)
|
|||
step_size = __get_step_size(startTimestamp, endTimestamp, density)
|
||||
ch_sub_query = __get_basic_constraints(table_name="resources", round_start=True, data=args)
|
||||
ch_sub_query.append("resources.success = 0")
|
||||
ch_sub_query.append("resources.type IN ('fetch','script')")
|
||||
sch_sub_query = ["rs.project_id =toUInt32(%(project_id)s)", "rs.type IN ('fetch','script')"]
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
|
|
@ -2037,8 +2038,8 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1)
|
|||
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
|
||||
SUM(if(first.url_host = sub_resources.url_host, 1, 0)) AS first_party,
|
||||
SUM(if(first.url_host = sub_resources.url_host, 0, 1)) AS third_party
|
||||
SUM(first.url_host = sub_resources.url_host) AS first_party,
|
||||
SUM(first.url_host != sub_resources.url_host) AS third_party
|
||||
FROM
|
||||
(
|
||||
SELECT resources.datetime, resources.url_host
|
||||
|
|
|
|||
|
|
@ -2041,31 +2041,31 @@ def get_resources_count_by_type(project_id, startTimestamp=TimeUTC.now(delta_day
|
|||
|
||||
def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
|
||||
endTimestamp=TimeUTC.now(), density=7, **args):
|
||||
raise Exception("not supported widget")
|
||||
step_size = __get_step_size(startTimestamp, endTimestamp, density)
|
||||
ch_sub_query = __get_basic_constraints(table_name="resources", round_start=True, data=args)
|
||||
ch_sub_query.append("resources.success = 0")
|
||||
sch_sub_query = ["rs.project_id =toUInt16(%(project_id)s)", "rs.type IN ('fetch','script')"]
|
||||
ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args)
|
||||
ch_sub_query.append("requests.event_type='REQUEST'")
|
||||
ch_sub_query.append("requests.success = 0")
|
||||
sch_sub_query = ["rs.project_id =toUInt16(%(project_id)s)", "rs.event_type='REQUEST'"]
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
# sch_sub_query += meta_condition
|
||||
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
|
||||
SUM(if(first.url_host = sub_resources.url_host, 1, 0)) AS first_party,
|
||||
SUM(if(first.url_host = sub_resources.url_host, 0, 1)) AS third_party
|
||||
ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
|
||||
SUM(first.url_host = sub_requests.url_host) AS first_party,
|
||||
SUM(first.url_host != sub_requests.url_host) AS third_party
|
||||
FROM
|
||||
(
|
||||
SELECT resources.datetime, resources.url_host
|
||||
FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""}
|
||||
SELECT requests.datetime, requests.url_host
|
||||
FROM {sessions_helper.get_main_events_table(startTimestamp)} AS requests
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
) AS sub_resources
|
||||
) AS sub_requests
|
||||
CROSS JOIN
|
||||
(
|
||||
SELECT
|
||||
rs.url_host,
|
||||
COUNT(rs.session_id) AS count
|
||||
FROM resources AS rs
|
||||
FROM {sessions_helper.get_main_events_table(startTimestamp)} AS rs
|
||||
WHERE {" AND ".join(sch_sub_query)}
|
||||
GROUP BY rs.url_host
|
||||
ORDER BY count DESC
|
||||
|
|
@ -2073,6 +2073,11 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1)
|
|||
) AS first
|
||||
GROUP BY timestamp
|
||||
ORDER BY timestamp;"""
|
||||
print(ch.format(query=ch_query,
|
||||
params={"step_size": step_size,
|
||||
"project_id": project_id,
|
||||
"startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}))
|
||||
rows = ch.execute(query=ch_query,
|
||||
params={"step_size": step_size,
|
||||
"project_id": project_id,
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
import logging
|
||||
|
||||
import clickhouse_driver
|
||||
from decouple import config
|
||||
|
||||
logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO))
|
||||
logging.getLogger('apscheduler').setLevel(config("LOGLEVEL", default=logging.INFO))
|
||||
|
||||
settings = None
|
||||
if config('pg_timeout', cast=int, default=-1) <= 0:
|
||||
if config('pg_timeout', cast=int, default=-1) > 0:
|
||||
settings = {"max_execution_time": config('pg_timeout', cast=int)}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue