From 32c7e00c747f77cf9627d18fb547f29139f2c936 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 26 Aug 2022 18:24:54 +0100 Subject: [PATCH] feat(alerts): optimized predefined alerts --- .../chalicelib/core/alerts_processor_exp.py | 168 +++++++++++------- 1 file changed, 105 insertions(+), 63 deletions(-) diff --git a/ee/api/chalicelib/core/alerts_processor_exp.py b/ee/api/chalicelib/core/alerts_processor_exp.py index afa453c2e..b4b1fb406 100644 --- a/ee/api/chalicelib/core/alerts_processor_exp.py +++ b/ee/api/chalicelib/core/alerts_processor_exp.py @@ -4,80 +4,117 @@ import logging import schemas from chalicelib.core import alerts_listener, alerts_processor from chalicelib.core import sessions, alerts -from chalicelib.utils import pg_client, ch_client +from chalicelib.utils import pg_client, ch_client, exp_ch_helper from chalicelib.utils.TimeUTC import TimeUTC LeftToDb = { schemas.AlertColumn.performance__dom_content_loaded__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", - "formula": "COALESCE(AVG(NULLIF(dom_content_loaded_time ,0)),0)"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "COALESCE(AVG(NULLIF(dom_content_loaded_event_time ,0)),0)", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__first_meaningful_paint__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", - "formula": "COALESCE(AVG(NULLIF(first_contentful_paint_time,0)),0)"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "COALESCE(AVG(NULLIF(first_contentful_paint_time,0)),0)", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__page_load_time__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", "formula": "AVG(NULLIF(load_time ,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "AVG(NULLIF(load_event_time ,0))", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__dom_build_time__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(dom_building_time,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "AVG(NULLIF(dom_building_time,0))", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__speed_index__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", "formula": "AVG(NULLIF(speed_index,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "AVG(NULLIF(speed_index,0))", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__page_response_time__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(response_time,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "AVG(NULLIF(response_time,0))", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__ttfb__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(first_paint_time,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "AVG(NULLIF(first_contentful_paint_time,0))", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__time_to_render__average: { - "table": "events.pages INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(visually_complete,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS pages", + "formula": "AVG(NULLIF(visually_complete,0))", + "eventType": "LOCATION" + }, schemas.AlertColumn.performance__image_load_time__average: { - "table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(resources.duration,0))", "condition": "type='img'"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_resources_table(timestamp)} AS resources", + "formula": "AVG(NULLIF(resources.duration,0))", + "condition": "type='img'" + }, schemas.AlertColumn.performance__request_load_time__average: { - "table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(resources.duration,0))", "condition": "type='fetch'"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_resources_table(timestamp)} AS resources", + "formula": "AVG(NULLIF(resources.duration,0))", + "condition": "type='fetch'" + }, schemas.AlertColumn.resources__load_time__average: { - "table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "AVG(NULLIF(resources.duration,0))"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_resources_table(timestamp)} AS resources", + "formula": "AVG(NULLIF(resources.duration,0))" + }, schemas.AlertColumn.resources__missing__count: { - "table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "COUNT(DISTINCT url_hostpath)", "condition": "success= FALSE"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_resources_table(timestamp)} AS resources", + "formula": "COUNT(DISTINCT url_hostpath)", + "condition": "success= FALSE AND type='img'" + }, schemas.AlertColumn.errors__4xx_5xx__count: { - "table": "events.resources INNER JOIN public.sessions USING(session_id)", "formula": "COUNT(session_id)", - "condition": "status/100!=2"}, - schemas.AlertColumn.errors__4xx__count: {"table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "COUNT(session_id)", "condition": "status/100=4"}, - schemas.AlertColumn.errors__5xx__count: {"table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "COUNT(session_id)", "condition": "status/100=5"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS requests", + "eventType": "REQUEST", + "formula": "COUNT(1)", + "condition": "intDiv(requests.status, 100)!=2" + }, + schemas.AlertColumn.errors__4xx__count: { + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS requests", + "eventType": "REQUEST", + "formula": "COUNT(1)", + "condition": "intDiv(requests.status, 100)==4" + }, + schemas.AlertColumn.errors__5xx__count: { + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS requests", + "eventType": "REQUEST", + "formula": "COUNT(1)", + "condition": "intDiv(requests.status, 100)==5" + }, schemas.AlertColumn.errors__javascript__impacted_sessions__count: { - "table": "events.resources INNER JOIN public.sessions USING(session_id)", - "formula": "COUNT(DISTINCT session_id)", "condition": "success= FALSE AND type='script'"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS errors", + "eventType": "ERROR", + "formula": "COUNT(DISTINCT session_id)", + "condition": "source='js_exception'" + }, schemas.AlertColumn.performance__crashes__count: { - "table": "(SELECT *, start_ts AS timestamp FROM public.sessions WHERE errors_count > 0) AS sessions", - "formula": "COUNT(DISTINCT session_id)", "condition": "errors_count > 0"}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_sessions_table(timestamp)} AS sessions", + "formula": "COUNT(DISTINCT session_id)", + "condition": "duration>0 AND errors_count>0" + }, schemas.AlertColumn.errors__javascript__count: { - "table": "events.errors INNER JOIN public.errors AS m_errors USING (error_id)", - "formula": "COUNT(DISTINCT session_id)", "condition": "source='js_exception'", "joinSessions": False}, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS errors", + "eventType": "ERROR", + "formula": "COUNT(DISTINCT session_id)", + "condition": "source='js_exception'" + }, schemas.AlertColumn.errors__backend__count: { - "table": "events.errors INNER JOIN public.errors AS m_errors USING (error_id)", - "formula": "COUNT(DISTINCT session_id)", "condition": "source!='js_exception'", "joinSessions": False}, -} - -# This is the frequency of execution for each threshold -TimeInterval = { - 15: 3, - 30: 5, - 60: 10, - 120: 20, - 240: 30, - 1440: 60, + "table": lambda timestamp: f"{exp_ch_helper.get_main_events_table(timestamp)} AS errors", + "eventType": "ERROR", + "formula": "COUNT(DISTINCT session_id)", + "condition": "source!='js_exception'" + }, } def Build(a): - params = {"project_id": a["projectId"], "now": TimeUTC.now()} + now = TimeUTC.now() + params = {"project_id": a["projectId"], "now": now} full_args = {} - j_s = True if a["seriesId"] is not None: a["filter"]["sort"] = "session_id" a["filter"]["order"] = schemas.SortOrderType.desc @@ -90,11 +127,11 @@ def Build(a): {query_part}""" else: colDef = LeftToDb[a["query"]["left"]] + params["event_type"] = LeftToDb[a["query"]["left"]].get("eventType") subQ = f"""SELECT {colDef["formula"]} AS value - FROM {colDef["table"]} - WHERE project_id = %(project_id)s + FROM {colDef["table"](now)} + WHERE project_id = %(project_id)s {"AND event_type=%(event_type)s" if params["event_type"] else ""} {"AND " + colDef["condition"] if colDef.get("condition") is not None else ""}""" - j_s = colDef.get("joinSessions", True) q = f"""SELECT coalesce(value,0) AS value, coalesce(value,0) {a["query"]["operator"]} {a["query"]["right"]} AS valid""" @@ -102,8 +139,9 @@ def Build(a): if a["seriesId"] is not None: q += f""" FROM ({subQ}) AS stat""" else: - q += f""" FROM ({subQ} AND timestamp>=%(startDate)s AND timestamp<=%(now)s - {"AND sessions.start_ts >= %(startDate)s AND sessions.start_ts <= %(now)s" if j_s else ""}) AS stat""" + q += f""" FROM ({subQ} + AND datetime>=toDateTime(%(startDate)s/1000) + AND datetime<=toDateTime(%(now)s/1000) ) AS stat""" params = {**params, **full_args, "startDate": TimeUTC.now() - a["options"]["currentPeriod"] * 60 * 1000} else: if a["options"]["change"] == schemas.AlertDetectionChangeType.change: @@ -115,12 +153,10 @@ def Build(a): "startDate": TimeUTC.now() - a["options"]["currentPeriod"] * 60 * 1000, "timestamp_sub2": TimeUTC.now() - 2 * a["options"]["currentPeriod"] * 60 * 1000} else: - sub1 = f"""{subQ} AND timestamp>=%(startDate)s - {"AND sessions.start_ts >= %(startDate)s" if j_s else ""}""" + sub1 = f"""{subQ} AND timestamp>=%(startDate)s""" params["startDate"] = TimeUTC.now() - a["options"]["currentPeriod"] * 60 * 1000 sub2 = f"""{subQ} AND timestamp<%(startDate)s - AND timestamp>=%(timestamp_sub2)s - {"AND sessions.start_ts < %(startDate)s AND sessions.start_ts >= %(timestamp_sub2)s" if j_s else ""}""" + AND timestamp>=%(timestamp_sub2)s""" params["timestamp_sub2"] = TimeUTC.now() - 2 * a["options"]["currentPeriod"] * 60 * 1000 sub1 = f"SELECT (( {sub1} )-( {sub2} )) AS value" q += f" FROM ( {sub1} ) AS stat" @@ -140,8 +176,7 @@ def Build(a): {"AND sessions.start_ts >= %(startDate)s" if j_s else ""}""" params["startDate"] = TimeUTC.now() - a["options"]["currentPeriod"] * 60 * 1000 sub2 = f"""{subQ} AND timestamp<%(startDate)s - AND timestamp>=%(timestamp_sub2)s - {"AND sessions.start_ts < %(startDate)s AND sessions.start_ts >= %(timestamp_sub2)s" if j_s else ""}""" + AND timestamp>=%(timestamp_sub2)s""" params["timestamp_sub2"] = TimeUTC.now() \ - (a["options"]["currentPeriod"] + a["options"]["currentPeriod"]) * 60 * 1000 sub1 = f"SELECT (({sub1})/NULLIF(({sub2}),0)-1)*100 AS value" @@ -155,20 +190,27 @@ def process(): all_alerts = alerts_listener.get_all_alerts() with pg_client.PostgresClient() as cur, ch_client.ClickHouseClient() as curc: for alert in all_alerts: + if alert["query"]["left"] == "CUSTOM": + continue + if alert["query"]["left"] == schemas.AlertColumn.performance__dom_content_loaded__average: + alert["query"]["left"] = schemas.AlertColumn.errors__backend__count if True or alerts_processor.can_check(alert): logging.info(f"Querying alertId:{alert['alertId']} name: {alert['name']}") query, params = Build(alert) - query = cur.mogrify(query, params) + query = curc.format(query, params) logging.debug(alert) logging.debug(query) try: print("------------------Alerts") + print(params) print(alert) print(query) print("------------------") + # continue + result = curc.execute(query) + if len(result) > 0: + result = result[0] continue - cur.execute(query) - result = cur.fetchone() if result["valid"]: logging.info("Valid alert, notifying users") notifications.append({