From 961c685310bf2c15462a4573c37bf5f2130bb348 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Thu, 13 Mar 2025 13:39:31 +0100 Subject: [PATCH] refactor(chalice): refactored error-details code refactor(chalice): moved error-details to use new product-analytics DB structure --- api/chalicelib/core/errors/__init__.py | 4 +- api/chalicelib/core/errors/errors_ch.py | 25 +---- api/chalicelib/core/errors/errors_details.py | 25 ++--- .../core/errors/{errors.py => errors_pg.py} | 27 +---- .../core/errors/modules/__init__.py | 1 + api/chalicelib/core/errors/modules/helper.py | 37 +++++++ ee/api/.gitignore | 2 +- .../core/errors/errors_details_exp.py | 99 +++++++------------ ee/api/clean-dev.sh | 2 +- 9 files changed, 101 insertions(+), 121 deletions(-) rename api/chalicelib/core/errors/{errors.py => errors_pg.py} (91%) create mode 100644 api/chalicelib/core/errors/modules/helper.py diff --git a/api/chalicelib/core/errors/__init__.py b/api/chalicelib/core/errors/__init__.py index e59155f1f..7363d005d 100644 --- a/api/chalicelib/core/errors/__init__.py +++ b/api/chalicelib/core/errors/__init__.py @@ -4,10 +4,10 @@ from decouple import config logger = logging.getLogger(__name__) -from . import errors as errors_legacy +from . import errors_pg as errors_legacy if config("EXP_ERRORS_SEARCH", cast=bool, default=False): logger.info(">>> Using experimental error search") from . import errors_ch as errors else: - from . import errors + from . import errors_pg as errors diff --git a/api/chalicelib/core/errors/errors_ch.py b/api/chalicelib/core/errors/errors_ch.py index ba728769f..5e984f0ed 100644 --- a/api/chalicelib/core/errors/errors_ch.py +++ b/api/chalicelib/core/errors/errors_ch.py @@ -5,6 +5,7 @@ from chalicelib.utils import ch_client, exp_ch_helper from chalicelib.utils import helper, metrics_helper from chalicelib.utils.TimeUTC import TimeUTC from . import errors as errors_legacy +from chalicelib.core.errors.modules import errors_helper def _multiple_values(values, value_key="value"): @@ -61,25 +62,6 @@ def get_batch(error_ids): return errors_legacy.get_batch(error_ids=error_ids) -def __get_basic_constraints(platform=None, time_constraint=True, startTime_arg_name="startDate", - endTime_arg_name="endDate", type_condition=True, project_key="project_id", table_name=None): - ch_sub_query = [f"{project_key} =toUInt16(%(project_id)s)"] - if table_name is not None: - table_name = table_name + "." - else: - table_name = "" - if type_condition: - ch_sub_query.append(f"{table_name}`$event_name`='ERROR'") - if time_constraint: - ch_sub_query += [f"{table_name}datetime >= toDateTime(%({startTime_arg_name})s/1000)", - f"{table_name}datetime < toDateTime(%({endTime_arg_name})s/1000)"] - if platform == schemas.PlatformType.MOBILE: - ch_sub_query.append("user_device_type = 'mobile'") - elif platform == schemas.PlatformType.DESKTOP: - ch_sub_query.append("user_device_type = 'desktop'") - return ch_sub_query - - def __get_basic_constraints_events(platform=None, time_constraint=True, startTime_arg_name="startDate", endTime_arg_name="endDate", type_condition=True, project_key="project_id", table_name=None): @@ -116,7 +98,7 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us for f in data.filters: if f.type == schemas.FilterType.PLATFORM and len(f.value) > 0: platform = f.value[0] - ch_sessions_sub_query = __get_basic_constraints(platform, type_condition=False) + ch_sessions_sub_query = errors_helper.__get_basic_constraints_ch(platform, type_condition=False) # ignore platform for errors table ch_sub_query = __get_basic_constraints_events(None, type_condition=True) ch_sub_query.append("JSONExtractString(toString(`$properties`), 'source') = 'js_exception'") @@ -148,7 +130,8 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us if len(data.events) > errors_condition_count: subquery_part_args, subquery_part = sessions.search_query_parts_ch(data=data, error_status=data.status, errors_only=True, - project_id=project.project_id, user_id=user_id, + project_id=project.project_id, + user_id=user_id, issue=None, favorite_only=False) subquery_part = f"INNER JOIN {subquery_part} USING(session_id)" diff --git a/api/chalicelib/core/errors/errors_details.py b/api/chalicelib/core/errors/errors_details.py index 55ed87b24..d2d3a16cb 100644 --- a/api/chalicelib/core/errors/errors_details.py +++ b/api/chalicelib/core/errors/errors_details.py @@ -1,4 +1,4 @@ -from chalicelib.core.errors import errors_legacy as errors +from chalicelib.core.errors.modules import errors_helper from chalicelib.utils import errors_helper from chalicelib.utils import pg_client, helper from chalicelib.utils.TimeUTC import TimeUTC @@ -40,26 +40,29 @@ def __process_tags(row): def get_details(project_id, error_id, user_id, **data): - pg_sub_query24 = errors.__get_basic_constraints(time_constraint=False, chart=True, step_size_name="step_size24") + pg_sub_query24 = errors_helper.__get_basic_constraints(time_constraint=False, chart=True, + step_size_name="step_size24") pg_sub_query24.append("error_id = %(error_id)s") - pg_sub_query30_session = errors.__get_basic_constraints(time_constraint=True, chart=False, - startTime_arg_name="startDate30", - endTime_arg_name="endDate30", - project_key="sessions.project_id") + pg_sub_query30_session = errors_helper.__get_basic_constraints(time_constraint=True, chart=False, + startTime_arg_name="startDate30", + endTime_arg_name="endDate30", + project_key="sessions.project_id") pg_sub_query30_session.append("sessions.start_ts >= %(startDate30)s") pg_sub_query30_session.append("sessions.start_ts <= %(endDate30)s") pg_sub_query30_session.append("error_id = %(error_id)s") - pg_sub_query30_err = errors.__get_basic_constraints(time_constraint=True, chart=False, - startTime_arg_name="startDate30", - endTime_arg_name="endDate30", project_key="errors.project_id") + pg_sub_query30_err = errors_helper.__get_basic_constraints(time_constraint=True, chart=False, + startTime_arg_name="startDate30", + endTime_arg_name="endDate30", + project_key="errors.project_id") pg_sub_query30_err.append("sessions.project_id = %(project_id)s") pg_sub_query30_err.append("sessions.start_ts >= %(startDate30)s") pg_sub_query30_err.append("sessions.start_ts <= %(endDate30)s") pg_sub_query30_err.append("error_id = %(error_id)s") pg_sub_query30_err.append("source ='js_exception'") - pg_sub_query30 = errors.__get_basic_constraints(time_constraint=False, chart=True, step_size_name="step_size30") + pg_sub_query30 = errors_helper.__get_basic_constraints(time_constraint=False, chart=True, + step_size_name="step_size30") pg_sub_query30.append("error_id = %(error_id)s") - pg_basic_query = errors.__get_basic_constraints(time_constraint=False) + pg_basic_query = errors_helper.__get_basic_constraints(time_constraint=False) pg_basic_query.append("error_id = %(error_id)s") with pg_client.PostgresClient() as cur: data["startDate24"] = TimeUTC.now(-1) diff --git a/api/chalicelib/core/errors/errors.py b/api/chalicelib/core/errors/errors_pg.py similarity index 91% rename from api/chalicelib/core/errors/errors.py rename to api/chalicelib/core/errors/errors_pg.py index 8679471ea..6799e9968 100644 --- a/api/chalicelib/core/errors/errors.py +++ b/api/chalicelib/core/errors/errors_pg.py @@ -7,6 +7,7 @@ from chalicelib.core.sourcemaps import sourcemaps from chalicelib.utils import pg_client, helper from chalicelib.utils.TimeUTC import TimeUTC from chalicelib.utils.metrics_helper import get_step_size +from chalicelib.core.errors.modules import errors_helper def get(error_id, family=False) -> dict | List[dict]: @@ -51,27 +52,6 @@ def get_batch(error_ids): return helper.list_to_camel_case(errors) -def __get_basic_constraints(platform: Optional[schemas.PlatformType] = None, time_constraint: bool = True, - startTime_arg_name: str = "startDate", endTime_arg_name: str = "endDate", - chart: bool = False, step_size_name: str = "step_size", - project_key: Optional[str] = "project_id"): - if project_key is None: - ch_sub_query = [] - else: - ch_sub_query = [f"{project_key} =%(project_id)s"] - if time_constraint: - ch_sub_query += [f"timestamp >= %({startTime_arg_name})s", - f"timestamp < %({endTime_arg_name})s"] - if chart: - ch_sub_query += [f"timestamp >= generated_timestamp", - f"timestamp < generated_timestamp + %({step_size_name})s"] - if platform == schemas.PlatformType.MOBILE: - ch_sub_query.append("user_device_type = 'mobile'") - elif platform == schemas.PlatformType.DESKTOP: - ch_sub_query.append("user_device_type = 'desktop'") - return ch_sub_query - - def __get_sort_key(key): return { schemas.ErrorSort.OCCURRENCE: "max_datetime", @@ -90,12 +70,13 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us for f in data.filters: if f.type == schemas.FilterType.PLATFORM and len(f.value) > 0: platform = f.value[0] - pg_sub_query = __get_basic_constraints(platform, project_key="sessions.project_id") + pg_sub_query = errors_helper.__get_basic_constraints(platform, project_key="sessions.project_id") pg_sub_query += ["sessions.start_ts>=%(startDate)s", "sessions.start_ts<%(endDate)s", "source ='js_exception'", "pe.project_id=%(project_id)s"] # To ignore Script error pg_sub_query.append("pe.message!='Script error.'") - pg_sub_query_chart = __get_basic_constraints(platform, time_constraint=False, chart=True, project_key=None) + pg_sub_query_chart = errors_helper.__get_basic_constraints(platform, time_constraint=False, chart=True, + project_key=None) if platform: pg_sub_query_chart += ["start_ts>=%(startDate)s", "start_ts<%(endDate)s", "project_id=%(project_id)s"] pg_sub_query_chart.append("errors.error_id =details.error_id") diff --git a/api/chalicelib/core/errors/modules/__init__.py b/api/chalicelib/core/errors/modules/__init__.py index 6039c07ee..1a6014fd2 100644 --- a/api/chalicelib/core/errors/modules/__init__.py +++ b/api/chalicelib/core/errors/modules/__init__.py @@ -3,6 +3,7 @@ import logging from decouple import config logger = logging.getLogger(__name__) +from . import helper as errors_helper if config("EXP_ERRORS_SEARCH", cast=bool, default=False): import chalicelib.core.sessions.sessions_ch as sessions diff --git a/api/chalicelib/core/errors/modules/helper.py b/api/chalicelib/core/errors/modules/helper.py new file mode 100644 index 000000000..4515c6126 --- /dev/null +++ b/api/chalicelib/core/errors/modules/helper.py @@ -0,0 +1,37 @@ +def __get_basic_constraints(platform: Optional[schemas.PlatformType] = None, time_constraint: bool = True, + startTime_arg_name: str = "startDate", endTime_arg_name: str = "endDate", + chart: bool = False, step_size_name: str = "step_size", + project_key: Optional[str] = "project_id"): + if project_key is None: + ch_sub_query = [] + else: + ch_sub_query = [f"{project_key} =%(project_id)s"] + if time_constraint: + ch_sub_query += [f"timestamp >= %({startTime_arg_name})s", + f"timestamp < %({endTime_arg_name})s"] + if chart: + ch_sub_query += [f"timestamp >= generated_timestamp", + f"timestamp < generated_timestamp + %({step_size_name})s"] + if platform == schemas.PlatformType.MOBILE: + ch_sub_query.append("user_device_type = 'mobile'") + elif platform == schemas.PlatformType.DESKTOP: + ch_sub_query.append("user_device_type = 'desktop'") + return ch_sub_query + +def __get_basic_constraints_ch(platform=None, time_constraint=True, startTime_arg_name="startDate", + endTime_arg_name="endDate", type_condition=True, project_key="project_id", table_name=None): + ch_sub_query = [f"{project_key} =toUInt16(%(project_id)s)"] + if table_name is not None: + table_name = table_name + "." + else: + table_name = "" + if type_condition: + ch_sub_query.append(f"{table_name}`$event_name`='ERROR'") + if time_constraint: + ch_sub_query += [f"{table_name}datetime >= toDateTime(%({startTime_arg_name})s/1000)", + f"{table_name}datetime < toDateTime(%({endTime_arg_name})s/1000)"] + if platform == schemas.PlatformType.MOBILE: + ch_sub_query.append("user_device_type = 'mobile'") + elif platform == schemas.PlatformType.DESKTOP: + ch_sub_query.append("user_device_type = 'desktop'") + return ch_sub_query diff --git a/ee/api/.gitignore b/ee/api/.gitignore index 60d59ac35..0bd67932c 100644 --- a/ee/api/.gitignore +++ b/ee/api/.gitignore @@ -287,7 +287,7 @@ Pipfile.lock /chalicelib/core/alerts/alerts_listener.py /chalicelib/core/alerts/modules/helpers.py /chalicelib/core/errors/modules/* -/chalicelib/core/errors/errors.py +/chalicelib/core/errors/errors_pg.py /chalicelib/core/errors/errors_ch.py /chalicelib/core/errors/errors_details.py /chalicelib/utils/contextual_validators.py diff --git a/ee/api/chalicelib/core/errors/errors_details_exp.py b/ee/api/chalicelib/core/errors/errors_details_exp.py index 11ae7bd5d..7f7d5feb2 100644 --- a/ee/api/chalicelib/core/errors/errors_details_exp.py +++ b/ee/api/chalicelib/core/errors/errors_details_exp.py @@ -1,31 +1,8 @@ -from decouple import config - -import schemas -from . import errors -from chalicelib.core import metrics, metadata -from chalicelib.core import sessions +from chalicelib.core.errors.modules import errors_helper from chalicelib.utils import ch_client, exp_ch_helper -from chalicelib.utils import pg_client, helper +from chalicelib.utils import helper from chalicelib.utils.TimeUTC import TimeUTC - - -def __flatten_sort_key_count_version(data, merge_nested=False): - if data is None: - return [] - return sorted( - [ - { - "name": f"{o[0][0][0]}@{v[0]}", - "count": v[1] - } for o in data for v in o[2] - ], - key=lambda o: o["count"], reverse=True) if merge_nested else \ - [ - { - "name": o[0][0][0], - "count": o[1][0][0], - } for o in data - ] +from chalicelib.utils.metrics_helper import get_step_size def __transform_map_to_tag(data, key1, key2, requested_key): @@ -89,14 +66,8 @@ def get_details(project_id, error_id, user_id, **data): MAIN_ERR_SESS_TABLE = exp_ch_helper.get_main_js_errors_sessions_table(0) MAIN_EVENTS_TABLE = exp_ch_helper.get_main_events_table(0) - ch_sub_query24 = errors.__get_basic_constraints(startTime_arg_name="startDate24", endTime_arg_name="endDate24") - ch_sub_query24.append("error_id = %(error_id)s") - - ch_sub_query30 = errors.__get_basic_constraints(startTime_arg_name="startDate30", endTime_arg_name="endDate30", - project_key="errors.project_id") - ch_sub_query30.append("error_id = %(error_id)s") - ch_basic_query = errors.__get_basic_constraints(time_constraint=False) - ch_basic_query.append("error_id = %(error_id)s") + ch_basic_query = errors_helper.__get_basic_constraints_ch(time_constraint=False) + ch_basic_query.append("toString(`$properties`.error_id) = %(error_id)s") with ch_client.ClickHouseClient() as ch: data["startDate24"] = TimeUTC.now(-1) @@ -105,9 +76,9 @@ def get_details(project_id, error_id, user_id, **data): data["endDate30"] = TimeUTC.now() density24 = int(data.get("density24", 24)) - step_size24 = errors.get_step_size(data["startDate24"], data["endDate24"], density24) + step_size24 = get_step_size(data["startDate24"], data["endDate24"], density24) density30 = int(data.get("density30", 30)) - step_size30 = errors.get_step_size(data["startDate30"], data["endDate30"], density30) + step_size30 = get_step_size(data["startDate30"], data["endDate30"], density30) params = { "startDate24": data['startDate24'], "endDate24": data['endDate24'], @@ -120,21 +91,21 @@ def get_details(project_id, error_id, user_id, **data): "error_id": error_id} main_ch_query = f"""\ - WITH pre_processed AS (SELECT error_id, - name, - message, + WITH pre_processed AS (SELECT toString(`$properties`.error_id) AS error_id, + toString(`$properties`.name) AS name, + toString(`$properties`.message) AS message, session_id, - datetime, - user_id, - user_browser, - user_browser_version, - user_os, - user_os_version, - user_device_type, - user_device, - user_country, - error_tags_keys, - error_tags_values + created_at AS datetime, + `$user_id` AS user_id, + `$browser` AS user_browser, + `$browser_version` AS user_browser_version, + `$os` AS user_os, + 'UNDEFINED' AS user_os_version, + NULL AS user_device_type, + `$device` AS user_device, + `$country` AS user_country, + [] AS error_tags_keys, + [] AS error_tags_values FROM {MAIN_ERR_SESS_TABLE} AS errors WHERE {" AND ".join(ch_basic_query)} ) @@ -203,20 +174,28 @@ def get_details(project_id, error_id, user_id, **data): ORDER BY count DESC) AS count_per_country_details ) AS mapped_country_details ON TRUE INNER JOIN (SELECT groupArray(map('timestamp', timestamp, 'count', count)) AS chart24 - FROM (SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL 3756 second)) * - 1000 AS timestamp, + FROM (SELECT gs.generate_series AS timestamp, COUNT(DISTINCT session_id) AS count - FROM {MAIN_EVENTS_TABLE} AS errors - WHERE {" AND ".join(ch_sub_query24)} + FROM generate_series(%(startDate24)s, %(endDate24)s, %(step_size24)s) AS gs + LEFT JOIN {MAIN_EVENTS_TABLE} AS errors ON(TRUE) + WHERE project_id = toUInt16(%(project_id)s) + AND `$event_name` = 'ERROR' + AND events.created_at >= toDateTime(timestamp / 1000) + AND events.created_at < toDateTime((timestamp + %(step_size24)s) / 1000) + AND toString(`$properties`.error_id) = %(error_id)s GROUP BY timestamp ORDER BY timestamp) AS chart_details ) AS chart_details24 ON TRUE INNER JOIN (SELECT groupArray(map('timestamp', timestamp, 'count', count)) AS chart30 - FROM (SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL 3724 second)) * - 1000 AS timestamp, + FROM (SELECT gs.generate_series AS timestamp, COUNT(DISTINCT session_id) AS count - FROM {MAIN_EVENTS_TABLE} AS errors - WHERE {" AND ".join(ch_sub_query30)} + FROM generate_series(%(startDate30)s, %(endDate30)s, %(step_size30)s) AS gs + LEFT JOIN {MAIN_EVENTS_TABLE} AS errors ON(TRUE) + WHERE project_id = toUInt16(%(project_id)s) + AND `$event_name` = 'ERROR' + AND events.created_at >= toDateTime(timestamp / 1000) + AND events.created_at < toDateTime((timestamp + %(step_size30)s) / 1000) + AND toString(`$properties`.error_id) = %(error_id)s GROUP BY timestamp ORDER BY timestamp) AS chart_details ) AS chart_details30 ON TRUE;""" @@ -254,8 +233,4 @@ def get_details(project_id, error_id, user_id, **data): row["last_hydrated_session"] = None row["favorite"] = False row["viewed"] = False - row["chart24"] = metrics.__complete_missing_steps(start_time=data["startDate24"], end_time=data["endDate24"], - density=density24, rows=row["chart24"], neutral={"count": 0}) - row["chart30"] = metrics.__complete_missing_steps(start_time=data["startDate30"], end_time=data["endDate30"], - density=density30, rows=row["chart30"], neutral={"count": 0}) return {"data": helper.dict_to_camel_case(row)} diff --git a/ee/api/clean-dev.sh b/ee/api/clean-dev.sh index 37e145634..b24d6a3e9 100755 --- a/ee/api/clean-dev.sh +++ b/ee/api/clean-dev.sh @@ -107,7 +107,7 @@ rm -rf ./chalicelib/core/alerts/alerts_processor_ch.py rm -rf ./chalicelib/core/alerts/alerts_listener.py rm -rf ./chalicelib/core/alerts/modules/helpers.py rm -rf ./chalicelib/core/errors/modules -rm -rf ./chalicelib/core/errors/errors.py +rm -rf ./chalicelib/core/errors/errors_pg.py rm -rf ./chalicelib/core/errors/errors_ch.py rm -rf ./chalicelib/core/errors/errors_details.py rm -rf ./chalicelib/utils/contextual_validators.py