feat(chalice): user url_hostpath instead of url for metrics

This commit is contained in:
Taha Yassine Kraiem 2022-11-21 18:38:50 +01:00
parent ac578d927e
commit 81795681d0
3 changed files with 35 additions and 35 deletions

View file

@ -419,7 +419,7 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True,
chart=True, data=args)
pg_sub_query_chart.append("resources.type = 'img'")
pg_sub_query_chart.append("resources.url = top_img.url")
pg_sub_query_chart.append("resources.url_hostpath = top_img.url_hostpath")
pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True,
chart=False, data=args)
@ -431,13 +431,13 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT *
FROM (SELECT resources.url,
FROM (SELECT resources.url_hostpath,
COALESCE(AVG(resources.duration), 0) AS avg_duration,
COUNT(resources.session_id) AS sessions_count
FROM events.resources
INNER JOIN sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query_subset)}
GROUP BY resources.url
GROUP BY resources.url_hostpath
ORDER BY avg_duration DESC
LIMIT 10) AS top_img
LEFT JOIN LATERAL (
@ -485,13 +485,13 @@ def get_performance(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTi
if resources and len(resources) > 0:
for r in resources:
if r["type"] == "IMG":
img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s")
img_constraints.append(f"resources.url_hostpath = %(val_{len(img_constraints)})s")
img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value']
elif r["type"] == "LOCATION":
location_constraints.append(f"pages.path = %(val_{len(location_constraints)})s")
location_constraints_vals["val_" + str(len(location_constraints) - 1)] = r['value']
else:
request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s")
request_constraints.append(f"resources.url_hostpath = %(val_{len(request_constraints)})s")
request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}
@ -627,12 +627,12 @@ def search(text, resource_type, project_id, performance=False, pages_only=False,
pg_sub_query.append("url_hostpath ILIKE %(value)s")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT key, value
FROM ( SELECT DISTINCT ON (url) ROW_NUMBER() OVER (PARTITION BY type ORDER BY url) AS r,
url AS value,
FROM ( SELECT DISTINCT ON (url_hostpath) ROW_NUMBER() OVER (PARTITION BY type ORDER BY url_hostpath) AS r,
url_hostpath AS value,
type AS key
FROM events.resources INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}
ORDER BY url, type ASC) AS ranked_values
ORDER BY url_hostpath, type ASC) AS ranked_values
WHERE ranked_values.r<=5;"""
cur.execute(cur.mogrify(pg_query, {"project_id": project_id, "value": helper.string_to_sql_like(text)}))
rows = cur.fetchall()
@ -893,7 +893,7 @@ def get_resources_loading_time(project_id, startTimestamp=TimeUTC.now(delta_days
if type is not None:
pg_sub_query_subset.append(f"resources.type = '{__get_resource_db_type_from_type(type)}'")
if url is not None:
pg_sub_query_subset.append(f"resources.url = %(value)s")
pg_sub_query_subset.append(f"resources.url_hostpath = %(value)s")
with pg_client.PostgresClient() as cur:
pg_query = f"""WITH resources AS (SELECT resources.duration, timestamp
@ -1009,7 +1009,7 @@ def get_slowest_resources(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
ORDER BY avg DESC
LIMIT 10) AS main_list
INNER JOIN LATERAL (
SELECT url, type
SELECT url_hostpath AS url, type
FROM events.resources
INNER JOIN public.sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query)}

View file

@ -452,18 +452,18 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
ch_sub_query.append("resources.type = 'img'")
ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args)
ch_sub_query_chart.append("resources.type = 'img'")
ch_sub_query_chart.append("resources.url IN %(url)s")
ch_sub_query_chart.append("resources.url_hostpath IN %(url)s")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT resources.url,
ch_query = f"""SELECT resources.url_hostpath AS url,
COALESCE(avgOrNull(resources.duration),0) AS avg,
COUNT(1) AS count
FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""}
WHERE {" AND ".join(ch_sub_query)} AND resources.duration>0
GROUP BY resources.url ORDER BY avg DESC LIMIT 10;"""
GROUP BY resources.url_hostpath ORDER BY avg DESC LIMIT 10;"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
@ -474,13 +474,13 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
urls = [row["url"] for row in rows]
charts = {}
ch_query = f"""SELECT url,
ch_query = f"""SELECT url_hostpath AS url,
toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp,
COALESCE(avgOrNull(resources.duration),0) AS avg
FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""}
WHERE {" AND ".join(ch_sub_query_chart)} AND resources.duration>0
GROUP BY url, timestamp
ORDER BY url, timestamp;"""
GROUP BY url_hostpath, timestamp
ORDER BY url_hostpath, timestamp;"""
params["url"] = urls
u_rows = ch.execute(query=ch_query, params=params)
for url in urls:
@ -526,13 +526,13 @@ def get_performance(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTi
if resources and len(resources) > 0:
for r in resources:
if r["type"] == "IMG":
img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s")
img_constraints.append(f"resources.url_hostpath = %(val_{len(img_constraints)})s")
img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value']
elif r["type"] == "LOCATION":
location_constraints.append(f"pages.url_path = %(val_{len(location_constraints)})s")
location_constraints_vals["val_" + str(len(location_constraints) - 1)] = r['value']
else:
request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s")
request_constraints.append(f"resources.url_hostpath = %(val_{len(request_constraints)})s")
request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}
@ -638,7 +638,7 @@ def search(text, resource_type, project_id, performance=False, pages_only=False,
if resource_type == "ALL" and not pages_only and not events_only:
ch_sub_query.append("positionUTF8(url_hostpath,%(value)s)!=0")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT arrayJoin(arraySlice(arrayReverseSort(arrayDistinct(groupArray(url))), 1, 5)) AS value,
ch_query = f"""SELECT arrayJoin(arraySlice(arrayReverseSort(arrayDistinct(groupArray(url_hostpath))), 1, 5)) AS value,
type AS key
FROM resources
WHERE {" AND ".join(ch_sub_query)}
@ -884,7 +884,7 @@ def get_resources_loading_time(project_id, startTimestamp=TimeUTC.now(delta_days
if type is not None:
ch_sub_query_chart.append(f"resources.type = '{__get_resource_db_type_from_type(type)}'")
if url is not None:
ch_sub_query_chart.append(f"resources.url = %(value)s")
ch_sub_query_chart.append(f"resources.url_hostpath = %(value)s")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
ch_sub_query_chart.append("resources.duration>0")
@ -966,7 +966,7 @@ def get_slowest_resources(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
ch_sub_query_chart.append("isNotNull(resources.duration)")
ch_sub_query_chart.append("resources.duration>0")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT any(url) AS url, any(type) AS type,
ch_query = f"""SELECT any(url_hostpath) AS url, any(type) AS type,
splitByChar('/', resources.url_hostpath)[-1] AS name,
COALESCE(avgOrNull(NULLIF(resources.duration,0)),0) AS avg
FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""}
@ -2179,7 +2179,7 @@ def get_performance_avg_image_load_time(ch, project_id, startTimestamp=TimeUTC.n
if resources and len(resources) > 0:
for r in resources:
if r["type"] == "IMG":
img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s")
img_constraints.append(f"resources.url_hostpath = %(val_{len(img_constraints)})s")
img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
@ -2254,7 +2254,7 @@ def get_performance_avg_request_load_time(ch, project_id, startTimestamp=TimeUTC
if resources and len(resources) > 0:
for r in resources:
if r["type"] != "IMG" and r["type"] == "LOCATION":
request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s")
request_constraints.append(f"resources.url_hostpath = %(val_{len(request_constraints)})s")
request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}

View file

@ -462,18 +462,18 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args)
# ch_sub_query_chart.append("events.event_type='RESOURCE'")
ch_sub_query_chart.append("resources.type = 'img'")
ch_sub_query_chart.append("resources.url IN %(url)s")
ch_sub_query_chart.append("resources.url_hostpath IN %(url)s")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT resources.url,
ch_query = f"""SELECT resources.url_hostpath AS url,
COALESCE(avgOrNull(resources.duration),0) AS avg,
COUNT(1) AS count
FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources
WHERE {" AND ".join(ch_sub_query)} AND resources.duration>0
GROUP BY resources.url ORDER BY avg DESC LIMIT 10;"""
GROUP BY resources.url_hostpath ORDER BY avg DESC LIMIT 10;"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, params=params)
@ -484,13 +484,13 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
urls = [row["url"] for row in rows]
charts = {}
ch_query = f"""SELECT url,
ch_query = f"""SELECT url_hostpath AS url,
toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp,
COALESCE(avgOrNull(resources.duration),0) AS avg
FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources
WHERE {" AND ".join(ch_sub_query_chart)} AND resources.duration>0
GROUP BY url, timestamp
ORDER BY url, timestamp;"""
GROUP BY url_hostpath, timestamp
ORDER BY url_hostpath, timestamp;"""
params["url"] = urls
# print(ch.format(query=ch_query, params=params))
u_rows = ch.execute(query=ch_query, params=params)
@ -538,13 +538,13 @@ def get_performance(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTi
if resources and len(resources) > 0:
for r in resources:
if r["type"] == "IMG":
img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s")
img_constraints.append(f"resources.url_hostpath = %(val_{len(img_constraints)})s")
img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value']
elif r["type"] == "LOCATION":
location_constraints.append(f"pages.url_path = %(val_{len(location_constraints)})s")
location_constraints_vals["val_" + str(len(location_constraints) - 1)] = r['value']
else:
request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s")
request_constraints.append(f"resources.url_hostpath = %(val_{len(request_constraints)})s")
request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}
@ -891,7 +891,7 @@ def get_resources_loading_time(project_id, startTimestamp=TimeUTC.now(delta_days
if type is not None:
ch_sub_query_chart.append(f"resources.type = '{__get_resource_db_type_from_type(type)}'")
if url is not None:
ch_sub_query_chart.append(f"resources.url = %(value)s")
ch_sub_query_chart.append(f"resources.url_hostpath = %(value)s")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
ch_sub_query_chart.append("resources.duration>0")
@ -974,7 +974,7 @@ def get_slowest_resources(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
ch_sub_query_chart.append("isNotNull(resources.duration)")
ch_sub_query_chart.append("resources.duration>0")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""SELECT any(url) AS url, any(type) AS type, name,
ch_query = f"""SELECT any(url_hostpath) AS url, any(type) AS type, name,
COALESCE(avgOrNull(NULLIF(resources.duration,0)),0) AS avg
FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources
WHERE {" AND ".join(ch_sub_query)}
@ -2185,7 +2185,7 @@ def get_performance_avg_image_load_time(ch, project_id, startTimestamp=TimeUTC.n
if resources and len(resources) > 0:
for r in resources:
if r["type"] == "IMG":
img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s")
img_constraints.append(f"resources.url_hostpath = %(val_{len(img_constraints)})s")
img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
@ -2260,7 +2260,7 @@ def get_performance_avg_request_load_time(ch, project_id, startTimestamp=TimeUTC
if resources and len(resources) > 0:
for r in resources:
if r["type"] != "IMG" and r["type"] == "LOCATION":
request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s")
request_constraints.append(f"resources.url_hostpath = %(val_{len(request_constraints)})s")
request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value']
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}