feat(chalice): get error details experimental

This commit is contained in:
Taha Yassine Kraiem 2022-10-14 19:42:49 +02:00
parent 419f834b9a
commit 742c1ad7fe
6 changed files with 345 additions and 28 deletions

View file

@ -139,7 +139,7 @@ def get_details(project_id, error_id, user_id, **data):
FROM (SELECT error_id,
name,
message,
COUNT(DISTINCT user_uuid) AS users,
COUNT(DISTINCT user_id) AS users,
COUNT(DISTINCT session_id) AS sessions
FROM public.errors
INNER JOIN events.errors AS s_errors USING (error_id)

View file

@ -145,7 +145,7 @@ def get_details(project_id, error_id, user_id, **data):
FROM (SELECT error_id,
name,
message,
COUNT(DISTINCT user_uuid) AS users,
COUNT(DISTINCT user_id) AS users,
COUNT(DISTINCT session_id) AS sessions
FROM public.errors
INNER JOIN events.errors AS s_errors USING (error_id)

View file

@ -115,6 +115,18 @@ def __flatten_sort_key_count_version(data, merge_nested=False):
]
def __transform_map_to_tag(data, key1, key2, requested_key):
result = []
for i in data:
if requested_key == 0 and i.get(key1) is None and i.get(key2) is None:
result.append({"name": "all", "count": int(i.get("count"))})
elif requested_key == 1 and i.get(key1) is not None and i.get(key2) is None:
result.append({"name": i.get(key1), "count": int(i.get("count"))})
elif requested_key == 2 and i.get(key1) is not None and i.get(key2) is not None:
result.append({"name": i.get(key2), "count": int(i.get("count"))})
return result
def __flatten_sort_key_count(data):
if data is None:
return []
@ -149,8 +161,52 @@ def __process_tags(row):
{"name": "country", "partitions": __flatten_sort_key_count(data=row.pop("country_partition"))}
]
# TODO: solve memory issue
def get_details(project_id, error_id, user_id, **data):
def __process_tags_map(row):
browsers_partition = row.pop("browsers_partition")
os_partition = row.pop("os_partition")
device_partition = row.pop("device_partition")
country_partition = row.pop("country_partition")
return [
{"name": "browser",
"partitions": __transform_map_to_tag(data=browsers_partition,
key1="browser",
key2="browser_version",
requested_key=1)},
{"name": "browser.ver",
"partitions": __transform_map_to_tag(data=browsers_partition,
key1="browser",
key2="browser_version",
requested_key=2)},
{"name": "OS",
"partitions": __transform_map_to_tag(data=os_partition,
key1="os",
key2="os_version",
requested_key=1)
},
{"name": "OS.ver",
"partitions": __transform_map_to_tag(data=os_partition,
key1="os",
key2="os_version",
requested_key=2)},
{"name": "device.family",
"partitions": __transform_map_to_tag(data=device_partition,
key1="device_type",
key2="device",
requested_key=1)},
{"name": "device",
"partitions": __transform_map_to_tag(data=device_partition,
key1="device_type",
key2="device",
requested_key=2)},
{"name": "country", "partitions": __transform_map_to_tag(data=country_partition,
key1="country",
key2="",
requested_key=1)}
]
def get_details_deprecated(project_id, error_id, user_id, **data):
if not config("EXP_ERRORS_GET", cast=bool, default=False):
return errors_legacy.get_details(project_id, error_id, user_id, **data)
@ -176,15 +232,15 @@ def get_details(project_id, error_id, user_id, **data):
ch_basic_query_session = ch_basic_query[:]
ch_basic_query_session.append("sessions.project_id = toUInt16(%(project_id)s)")
with ch_client.ClickHouseClient() as ch:
data["startDate24"] = TimeUTC.now(-1)
data["endDate24"] = TimeUTC.now()
data["startDate30"] = TimeUTC.now(-30)
data["endDate30"] = TimeUTC.now()
# # TODO: remove time limits
# data["startDate24"] = 1650470729000 - 24 * 60 * 60 * 1000
# data["endDate24"] = 1650470729000
# data["startDate30"] = 1650470729000 - 30 * 60 * 60 * 1000
# data["endDate30"] = 1650470729000
# data["startDate24"] = TimeUTC.now(-1)
# data["endDate24"] = TimeUTC.now()
# data["startDate30"] = TimeUTC.now(-30)
# data["endDate30"] = TimeUTC.now()
# TODO: remove time limits
data["startDate24"] = 1650470729000 - 24 * 60 * 60 * 1000
data["endDate24"] = 1650470729000
data["startDate30"] = 1650470729000 - 30 * 60 * 60 * 1000
data["endDate30"] = 1650470729000
density24 = int(data.get("density24", 24))
step_size24 = __get_step_size(data["startDate24"], data["endDate24"], density24)
density30 = int(data.get("density30", 30))
@ -282,7 +338,7 @@ def get_details(project_id, error_id, user_id, **data):
INNER JOIN (SELECT user_device_type,
groupArray([user_device, toString(count_per_device)]) AS versions_partition
FROM (SELECT user_device_type,
COALESCE(user_device,'unknown') AS user_device,
coalesce(user_device,'unknown') AS user_device,
COUNT(session_id) AS count_per_device
FROM {MAIN_EVENTS_TABLE} AS errors INNER JOIN {MAIN_SESSIONS_TABLE} AS sessions USING (session_id)
WHERE {" AND ".join(pg_sub_query30_err)}
@ -316,9 +372,9 @@ def get_details(project_id, error_id, user_id, **data):
ORDER BY timestamp) AS chart_details) AS chart_details30
ON details.error_id = chart_details30.error_id;"""
print("--------------------")
print(ch.format(main_ch_query, params))
print("--------------------")
# print("--------------------")
# print(ch.format(main_ch_query, params))
# print("--------------------")
row = ch.execute(query=main_ch_query, params=params)
if len(row) == 0:
return {"errors": ["error not found"]}
@ -327,7 +383,7 @@ def get_details(project_id, error_id, user_id, **data):
with pg_client.PostgresClient() as cur:
query = cur.mogrify(
f"""SELECT error_id, status, session_id, start_ts,
parent_error_id,session_id, user_anonymous_id,
parent_error_id, user_anonymous_id,
user_id, user_uuid, user_browser, user_browser_version,
user_os, user_os_version, user_device, payload,
FALSE AS favorite,
@ -359,11 +415,203 @@ def get_details(project_id, error_id, user_id, **data):
row["favorite"] = False
row["viewed"] = False
row["chart24"] = __rearrange_chart_details(start_at=data["startDate24"], end_at=data["endDate24"],
density=density24,
chart=row["chart24"])
density=density24, chart=row["chart24"])
row["chart30"] = __rearrange_chart_details(start_at=data["startDate30"], end_at=data["endDate30"],
density=density30,
chart=row["chart30"])
density=density30, chart=row["chart30"])
return {"data": helper.dict_to_camel_case(row)}
def get_details(project_id, error_id, user_id, **data):
if not config("EXP_ERRORS_GET", cast=bool, default=False):
return errors_legacy.get_details(project_id, error_id, user_id, **data)
MAIN_SESSIONS_TABLE = exp_ch_helper.get_main_sessions_table(0)
MAIN_ERR_SESS_TABLE = exp_ch_helper.get_main_js_errors_sessions_table(0)
MAIN_EVENTS_TABLE = exp_ch_helper.get_main_events_table(0)
MAIN_EVENTS_TABLE_24 = exp_ch_helper.get_main_events_table(TimeUTC.now())
ch_sub_query24 = __get_basic_constraints(startTime_arg_name="startDate24", endTime_arg_name="endDate24")
ch_sub_query24.append("error_id = %(error_id)s")
# pg_sub_query30_err = __get_basic_constraints(time_constraint=True, startTime_arg_name="startDate30",
# endTime_arg_name="endDate30", project_key="errors.project_id",
# table_name="errors")
# pg_sub_query30_err.append("sessions.project_id = toUInt16(%(project_id)s)")
# pg_sub_query30_err.append("sessions.datetime >= toDateTime(%(startDate30)s/1000)")
# pg_sub_query30_err.append("sessions.datetime <= toDateTime(%(endDate30)s/1000)")
# pg_sub_query30_err.append("error_id = %(error_id)s")
# pg_sub_query30_err.append("source ='js_exception'")
ch_sub_query30 = __get_basic_constraints(startTime_arg_name="startDate30", endTime_arg_name="endDate30",
project_key="errors.project_id")
ch_sub_query30.append("error_id = %(error_id)s")
ch_basic_query = __get_basic_constraints(time_constraint=False)
ch_basic_query.append("error_id = %(error_id)s")
# ch_basic_query_session = ch_basic_query[:]
# ch_basic_query_session.append("sessions.project_id = toUInt16(%(project_id)s)")
with ch_client.ClickHouseClient() as ch:
# data["startDate24"] = TimeUTC.now(-1)
# data["endDate24"] = TimeUTC.now()
# data["startDate30"] = TimeUTC.now(-30)
# data["endDate30"] = TimeUTC.now()
# TODO: remove time limits
data["startDate24"] = 1650470729000 - 24 * 60 * 60 * 1000
data["endDate24"] = 1650470729000
data["startDate30"] = 1650470729000 - 30 * 60 * 60 * 1000
data["endDate30"] = 1650470729000
density24 = int(data.get("density24", 24))
step_size24 = __get_step_size(data["startDate24"], data["endDate24"], density24)
density30 = int(data.get("density30", 30))
step_size30 = __get_step_size(data["startDate30"], data["endDate30"], density30)
params = {
"startDate24": data['startDate24'],
"endDate24": data['endDate24'],
"startDate30": data['startDate30'],
"endDate30": data['endDate30'],
"project_id": project_id,
"userId": user_id,
"step_size24": step_size24,
"step_size30": step_size30,
"error_id": error_id}
main_ch_query = f"""\
WITH pre_processed AS (SELECT error_id,
name,
message,
session_id,
datetime,
user_id,
user_browser,
user_browser_version,
user_os,
user_os_version,
user_device_type,
user_device,
user_country
FROM {MAIN_ERR_SESS_TABLE} AS errors
WHERE {" AND ".join(ch_basic_query)}
)
SELECT %(error_id)s AS error_id, name, message,users,
first_occurrence,last_occurrence,last_session_id,
sessions,browsers_partition,os_partition,device_partition,
country_partition,chart24,chart30
FROM (SELECT error_id,
name,
message,
COUNT(DISTINCT user_id) AS users,
COUNT(DISTINCT session_id) AS sessions
FROM pre_processed
WHERE datetime >= toDateTime(%(startDate30)s / 1000)
AND datetime <= toDateTime(%(endDate30)s / 1000)
GROUP BY error_id, name, message) AS details
INNER JOIN (SELECT toUnixTimestamp(max(datetime)) * 1000 AS last_occurrence,
toUnixTimestamp(min(datetime)) * 1000 AS first_occurrence
FROM pre_processed) AS time_details ON TRUE
INNER JOIN (SELECT session_id AS last_session_id
FROM pre_processed
ORDER BY datetime DESC
LIMIT 1) AS last_session_details ON TRUE
INNER JOIN (SELECT groupArray(details) AS browsers_partition
FROM (SELECT COUNT(1) AS count,
coalesce(nullIf(user_browser,''),toNullable('unknown')) AS browser,
coalesce(nullIf(user_browser_version,''),toNullable('unknown')) AS browser_version,
map('browser', browser,
'browser_version', browser_version,
'count', toString(count)) AS details
FROM pre_processed
GROUP BY ROLLUP(browser, browser_version)
ORDER BY browser nulls first, browser_version nulls first, count DESC) AS mapped_browser_details
) AS browser_details ON TRUE
INNER JOIN (SELECT groupArray(details) AS os_partition
FROM (SELECT COUNT(1) AS count,
coalesce(nullIf(user_os,''),toNullable('unknown')) AS os,
coalesce(nullIf(user_os_version,''),toNullable('unknown')) AS os_version,
map('os', os,
'os_version', os_version,
'count', toString(count)) AS details
FROM pre_processed
GROUP BY ROLLUP(os, os_version)
ORDER BY os nulls first, os_version nulls first, count DESC) AS mapped_os_details
) AS os_details ON TRUE
INNER JOIN (SELECT groupArray(details) AS device_partition
FROM (SELECT COUNT(1) AS count,
coalesce(nullIf(user_device,''),toNullable('unknown')) AS user_device,
map('device_type', toString(user_device_type),
'device', user_device,
'count', toString(count)) AS details
FROM pre_processed
GROUP BY ROLLUP(user_device_type, user_device)
ORDER BY user_device_type nulls first, user_device nulls first, count DESC
) AS count_per_device_details
) AS mapped_device_details ON TRUE
INNER JOIN (SELECT groupArray(details) AS country_partition
FROM (SELECT COUNT(1) AS count,
map('country', toString(user_country),
'count', toString(count)) AS details
FROM pre_processed
GROUP BY user_country
ORDER BY count DESC) AS count_per_country_details
) AS mapped_country_details ON TRUE
INNER JOIN (SELECT groupArray(map('timestamp', timestamp, 'count', count)) AS chart24
FROM (SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL 3756 second)) *
1000 AS timestamp,
COUNT(DISTINCT session_id) AS count
FROM {MAIN_EVENTS_TABLE} AS errors
WHERE {" AND ".join(ch_sub_query24)}
GROUP BY timestamp
ORDER BY timestamp) AS chart_details
) AS chart_details24 ON TRUE
INNER JOIN (SELECT groupArray(map('timestamp', timestamp, 'count', count)) AS chart30
FROM (SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL 3724 second)) *
1000 AS timestamp,
COUNT(DISTINCT session_id) AS count
FROM {MAIN_EVENTS_TABLE} AS errors
WHERE {" AND ".join(ch_sub_query30)}
GROUP BY timestamp
ORDER BY timestamp) AS chart_details
) AS chart_details30 ON TRUE;"""
# print("--------------------")
# print(ch.format(main_ch_query, params))
# print("--------------------")
row = ch.execute(query=main_ch_query, params=params)
if len(row) == 0:
return {"errors": ["error not found"]}
row = row[0]
row["tags"] = __process_tags_map(row)
query = f"""SELECT session_id, toUnixTimestamp(datetime) * 1000 AS start_ts,
user_anonymous_id,user_id, user_uuid, user_browser, user_browser_version,
user_os, user_os_version, user_device, FALSE AS favorite, True AS viewed
FROM {MAIN_SESSIONS_TABLE} AS sessions
WHERE project_id = toUInt16(%(project_id)s)
AND session_id = %(session_id)s
ORDER BY datetime DESC
LIMIT 1;"""
params = {"project_id": project_id, "session_id": row["last_session_id"], "userId": user_id}
# print("--------------------")
# print(ch.format(query, params))
# print("--------------------")
status = ch.execute(query=query, params=params)
if status is not None:
status = status[0]
# row["stack"] = format_first_stack_frame(status).pop("stack")
# row["status"] = status.pop("status")
# row["parent_error_id"] = status.pop("parent_error_id")
row["favorite"] = status.pop("favorite")
row["viewed"] = status.pop("viewed")
row["last_hydrated_session"] = status
else:
# row["stack"] = []
row["last_hydrated_session"] = None
# row["status"] = "untracked"
# row["parent_error_id"] = None
row["favorite"] = False
row["viewed"] = False
row["chart24"] = metrics.__complete_missing_steps(start_time=data["startDate24"], end_time=data["endDate24"],
density=density24, rows=row["chart24"], neutral={"count": 0})
row["chart30"] = metrics.__complete_missing_steps(start_time=data["startDate30"], end_time=data["endDate30"],
density=density30, rows=row["chart30"], neutral={"count": 0})
return {"data": helper.dict_to_camel_case(row)}
@ -444,7 +692,7 @@ def get_details_chart(project_id, error_id, user_id, **data):
INNER JOIN (SELECT user_device_type,
groupArray([user_device, toString(count_per_device)]) AS versions_partition
FROM (SELECT user_device_type,
COALESCE(user_device,'unknown') AS user_device,
coalesce(user_device,'unknown') AS user_device,
COUNT(session_id) AS count_per_device
FROM errors
WHERE {" AND ".join(ch_sub_query)}
@ -904,11 +1152,11 @@ def get_sessions(start_date, end_date, project_id, user_id, error_id):
s.pages_count,
s.errors_count,
s.issue_types,
COALESCE((SELECT TRUE
coalesce((SELECT TRUE
FROM public.user_favorite_sessions AS fs
WHERE s.session_id = fs.session_id
AND fs.user_id = %(userId)s LIMIT 1), FALSE) AS favorite,
COALESCE((SELECT TRUE
coalesce((SELECT TRUE
FROM public.user_viewed_sessions AS fs
WHERE s.session_id = fs.session_id
AND fs.user_id = %(userId)s LIMIT 1), FALSE) AS viewed

View file

@ -40,3 +40,9 @@ def get_user_viewed_sessions_table(timestamp=0):
def get_user_viewed_errors_table(timestamp=0):
return "experimental.user_viewed_errors"
def get_main_js_errors_sessions_table(timestamp=0):
return "experimental.js_errors_sessions_mv" # \
# if config("EXP_7D_MV", cast=bool, default=True) \
# and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.events"

View file

@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS experimental.issues
project_id UInt16,
issue_id String,
type Enum8('click_rage'=1,'dead_click'=2,'excessive_scrolling'=3,'bad_request'=4,'missing_resource'=5,'memory'=6,'cpu'=7,'slow_resource'=8,'slow_page_load'=9,'crash'=10,'ml_cpu'=11,'ml_memory'=12,'ml_dead_click'=13,'ml_click_rage'=14,'ml_mouse_thrashing'=15,'ml_excessive_scrolling'=16,'ml_slow_resources'=17,'custom'=18,'js_exception'=19),
context_string text NOT NULL,
context_string String,
context_keys Array(String),
context_values Array(Nullable(String)),
_timestamp DateTime DEFAULT now()
@ -25,4 +25,35 @@ CREATE TABLE IF NOT EXISTS experimental.issues
ORDER BY (project_id, issue_id, type)
TTL _timestamp + INTERVAL 3 MONTH;
CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.js_errors_sessions_mv
ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMM(datetime)
ORDER BY (project_id, datetime, event_type, error_id, session_id)
TTL _timestamp + INTERVAL 35 DAY
POPULATE
AS
SELECT session_id,
project_id,
events.datetime AS datetime,
event_type,
assumeNotNull(error_id) AS error_id,
source,
name,
message,
error_tags_keys,
error_tags_values,
message_id,
user_browser,
user_browser_version,
user_os,
user_os_version,
user_device_type,
user_device,
user_country,
_timestamp
FROM experimental.events
INNER JOIN experimental.sessions USING (session_id)
WHERE event_type = 'ERROR'
AND source = 'js_exception';
-- TODO: find a way to update materialized views; or drop and re-create them

View file

@ -201,7 +201,7 @@ CREATE TABLE IF NOT EXISTS experimental.issues
project_id UInt16,
issue_id String,
type Enum8('click_rage'=1,'dead_click'=2,'excessive_scrolling'=3,'bad_request'=4,'missing_resource'=5,'memory'=6,'cpu'=7,'slow_resource'=8,'slow_page_load'=9,'crash'=10,'ml_cpu'=11,'ml_memory'=12,'ml_dead_click'=13,'ml_click_rage'=14,'ml_mouse_thrashing'=15,'ml_excessive_scrolling'=16,'ml_slow_resources'=17,'custom'=18,'js_exception'=19),
context_string text NOT NULL,
context_string String,
context_keys Array(String),
context_values Array(Nullable(String)),
_timestamp DateTime DEFAULT now()
@ -360,4 +360,36 @@ SELECT session_id,
FROM experimental.sessions
WHERE datetime >= now() - INTERVAL 7 DAY
AND isNotNull(duration)
AND duration > 0;
AND duration > 0;
CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.js_errors_sessions_mv
ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMM(datetime)
ORDER BY (project_id, datetime, event_type, error_id, session_id)
TTL _timestamp + INTERVAL 35 DAY
POPULATE
AS
SELECT session_id,
project_id,
events.datetime AS datetime,
event_type,
assumeNotNull(error_id) AS error_id,
source,
name,
message,
error_tags_keys,
error_tags_values,
message_id,
user_id,
user_browser,
user_browser_version,
user_os,
user_os_version,
user_device_type,
user_device,
user_country,
_timestamp
FROM experimental.events
INNER JOIN experimental.sessions USING (session_id)
WHERE event_type = 'ERROR'
AND source = 'js_exception';