feat(chalice): CH errors search

feat(DB): CH new structure
This commit is contained in:
Taha Yassine Kraiem 2022-08-05 17:02:31 +02:00
parent 751298f650
commit 6bc18dc330
3 changed files with 148 additions and 148 deletions

View file

@ -536,15 +536,19 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id):
subquery_part = ""
params = {}
if len(data.events) > 0:
errors_filters = []
for e in data.events:
errors_condition_count = 0
for i, e in enumerate(data.events):
if e.type == schemas.EventType.error:
errors_filters.append(e)
if len(errors_filters) == len(data.events):
# TODO: search errors by name and message
print("----------Error conditions only")
print(errors_filters)
else:
errors_condition_count += 1
is_any = _isAny_opreator(e.operator)
op = __get_sql_operator(e.operator)
e_k = f"e_value{i}"
params = {**params, **_multiple_values(e.value, value_key=e_k)}
if not is_any and e.value not in [None, "*", ""]:
ch_sub_query.append(
_multiple_conditions(f"(message {op} %({e_k})s OR name {op} %({e_k})s)",
e.value, value_key=e_k))
if len(data.events) > errors_condition_count:
print("----------Sessions conditions")
subquery_part_args, subquery_part = sessions.search_query_parts_ch(data=data, error_status=data.status,
errors_only=True,
@ -640,22 +644,14 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id):
if len(f.value) > 1 and f.value[1] is not None and int(f.value[1]) > 0:
ch_sessions_sub_query.append("s.duration <= %(maxDuration)s")
params["maxDuration"] = f.value[1]
# TODO: support referrer search
# elif filter_type == schemas.FilterType.referrer:
# # extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)"
# if is_any:
# referrer_constraint = 'isNotNull(r.base_referrer)'
# else:
# referrer_constraint = _multiple_conditions(f"r.base_referrer {op} %({f_k})s", f.value,
# is_not=is_not, value_key=f_k)
# referrer_constraint = f"""(SELECT DISTINCT session_id
# FROM {MAIN_EVENTS_TABLE} AS r
# WHERE {" AND ".join([f"r.{b}" for b in __events_where_basic])}
# AND event_type='{__get_event_type(schemas.EventType.location)}'
# AND {referrer_constraint})"""
# # events_conditions_where.append(f"""main.session_id IN {referrer_constraint}""")
# # ch_sessions_sub_query.append(f"""s.session_id IN {referrer_constraint}""")
# extra_from += f"\nINNER JOIN {referrer_constraint} AS referred ON(referred.session_id=s.session_id)"
elif filter_type == schemas.FilterType.referrer:
# extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)"
if is_any:
referrer_constraint = 'isNotNull(s.base_referrer)'
else:
referrer_constraint = _multiple_conditions(f"s.base_referrer {op} %({f_k})s", f.value,
is_not=is_not, value_key=f_k)
elif filter_type == schemas.FilterType.metadata:
# get metadata list only if you need it
if meta_keys is None:

View file

@ -227,8 +227,8 @@ def _isUndefined_operator(op: schemas.SearchEventOperator):
# This function executes the query and return result
def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False,
error_status=schemas.ErrorStatus.all, count_only=False, issue=None):
def search_sessions_pg(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False,
error_status=schemas.ErrorStatus.all, count_only=False, issue=None):
full_args, query_part = search_query_parts(data=data, error_status=error_status, errors_only=errors_only,
favorite_only=data.bookmarked, issue=issue, project_id=project_id,
user_id=user_id)
@ -335,6 +335,7 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_
}
# This function executes the query and return result
def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False,
error_status=schemas.ErrorStatus.all, count_only=False, issue=None):
print("------ search2_ch")
@ -1225,6 +1226,7 @@ def __get_event_type(event_type: Union[schemas.EventType, schemas.PerformanceEve
raise Exception(f"unsupported event_type:{event_type}")
return defs.get(event_type)
# this function generates the query and return the generated-query with the dict of query arguments
def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, project_id, user_id, extra_event=None):
ss_constraints = []

View file

@ -1,25 +1,25 @@
ALTER TABLE sessions
DROP COLUMN pages_count;
-- ALTER TABLE sessions
-- DROP COLUMN pages_count;
CREATE TABLE projects_metadata
(
project_id UInt16,
metadata_1 Nullable(String),
metadata_2 Nullable(String),
metadata_3 Nullable(String),
metadata_4 Nullable(String),
metadata_5 Nullable(String),
metadata_6 Nullable(String),
metadata_7 Nullable(String),
metadata_8 Nullable(String),
metadata_9 Nullable(String),
metadata_10 Nullable(String),
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMM(_timestamp)
ORDER BY (project_id)
SETTINGS index_granularity = 512;
-- CREATE TABLE projects_metadata
-- (
-- project_id UInt16,
-- metadata_1 Nullable(String),
-- metadata_2 Nullable(String),
-- metadata_3 Nullable(String),
-- metadata_4 Nullable(String),
-- metadata_5 Nullable(String),
-- metadata_6 Nullable(String),
-- metadata_7 Nullable(String),
-- metadata_8 Nullable(String),
-- metadata_9 Nullable(String),
-- metadata_10 Nullable(String),
-- _timestamp DateTime DEFAULT now()
-- ) ENGINE = ReplacingMergeTree(_timestamp)
-- PARTITION BY toYYYYMM(_timestamp)
-- ORDER BY (project_id)
-- SETTINGS index_granularity = 512;
CREATE TABLE IF NOT EXISTS events
(
@ -132,6 +132,8 @@ CREATE TABLE IF NOT EXISTS sessions
metadata_9 Nullable(String),
metadata_10 Nullable(String),
issue_types Array(LowCardinality(String)),
referrer Nullable(String),
base_referrer Nullable(String),
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMMDD(datetime)
@ -142,7 +144,7 @@ CREATE TABLE IF NOT EXISTS sessions
CREATE TABLE IF NOT EXISTS user_viewed_sessions
(
project_id UInt16,
user_id UInt64,
user_id UInt32,
session_id UInt64,
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
@ -153,7 +155,7 @@ CREATE TABLE IF NOT EXISTS user_viewed_sessions
CREATE TABLE IF NOT EXISTS user_viewed_errors
(
project_id UInt16,
user_id UInt64,
user_id UInt32,
error_id String,
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
@ -172,23 +174,22 @@ CREATE TABLE IF NOT EXISTS autocomplete
ORDER BY (project_id, type, value)
TTL _timestamp + INTERVAL 1 MONTH;
-- TODO: remove this table
CREATE TABLE IF NOT EXISTS errors
(
error_id String,
project_id UInt16,
source Enum8('js_exception'=1,'bugsnag'=2,'cloudwatch'=3,'datadog'=4,'newrelic'=5,'rollbar'=6,'sentry'=7,'stackdriver'=8,'sumologic'=9, 'elasticsearch'=10),
name Nullable(String),
message String,
payload String,
stacktrace Nullable(String), --to save the stacktrace and not query S3 another time
stacktrace_parsed_at Nullable(DateTime),
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMMDD(_timestamp)
ORDER BY (project_id, source, error_id)
TTL _timestamp + INTERVAL 1 MONTH
SETTINGS index_granularity = 512;
-- CREATE TABLE IF NOT EXISTS errors
-- (
-- error_id String,
-- project_id UInt16,
-- source Enum8('js_exception'=1,'bugsnag'=2,'cloudwatch'=3,'datadog'=4,'newrelic'=5,'rollbar'=6,'sentry'=7,'stackdriver'=8,'sumologic'=9, 'elasticsearch'=10),
-- name Nullable(String),
-- message String,
-- payload String,
-- stacktrace Nullable(String), --to save the stacktrace and not query S3 another time
-- stacktrace_parsed_at Nullable(DateTime),
-- _timestamp DateTime DEFAULT now()
-- ) ENGINE = ReplacingMergeTree(_timestamp)
-- PARTITION BY toYYYYMMDD(_timestamp)
-- ORDER BY (project_id, source, error_id)
-- TTL _timestamp + INTERVAL 1 MONTH
-- SETTINGS index_granularity = 512;
CREATE MATERIALIZED VIEW sessions_l7d_mv
ENGINE = ReplacingMergeTree(_timestamp)
@ -215,86 +216,87 @@ SELECT *
FROM massive_split.events_s
WHERE datetime >= now() - INTERVAL 7 DAY;
CREATE MATERIALIZED VIEW sessions_info_l1m_mv
ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMM(datetime)
ORDER BY (project_id, datetime, session_id)
TTL datetime + INTERVAL 1 MONTH
SETTINGS index_granularity = 512
POPULATE
AS
SELECT project_id,
session_id,
datetime,
now() AS _timestamp,
toJSONString(map('project_id', toString(project_id),
'session_id', toString(session_id),
'user_uuid', toString(user_uuid),
'user_id', user_id,
'user_os', user_os,
'user_browser', user_browser,
'user_device', user_device,
--'user_device_type', user_device_type,
--'user_country', user_country,
'start_ts', toString(datetime),
'duration', toString(duration),
'events_count', toString(events_count),
'pages_count', toString(pages_count),
'errors_count', toString(errors_count),
-- 'user_anonymous_id', user_anonymous_id,
-- 'platform', platform,
-- 'issue_score', issue_score,
-- issue_types,
-- favorite,
-- viewed,
'metadata', CAST((arrayFilter(x->isNotNull(x),
arrayMap(
x->if(isNotNull(x[1]) AND isNotNull(x[2]), toString(x[1]),
NULL),
[
[projects_meta.metadata_1,sessions.metadata_1],
[projects_meta.metadata_2,sessions.metadata_2],
[projects_meta.metadata_3,sessions.metadata_3],
[projects_meta.metadata_4,sessions.metadata_4],
[projects_meta.metadata_5,sessions.metadata_5],
[projects_meta.metadata_6,sessions.metadata_6],
[projects_meta.metadata_7,sessions.metadata_7],
[projects_meta.metadata_8,sessions.metadata_8],
[projects_meta.metadata_9,sessions.metadata_9],
[projects_meta.metadata_10,sessions.metadata_10]
])),
arrayFilter(x->isNotNull(x),
arrayMap(
x->if(isNotNull(x[1]) AND isNotNull(x[2]), toString(x[2]),
NULL),
[
[projects_meta.metadata_1,sessions.metadata_1],
[projects_meta.metadata_2,sessions.metadata_2],
[projects_meta.metadata_3,sessions.metadata_3],
[projects_meta.metadata_4,sessions.metadata_4],
[projects_meta.metadata_5,sessions.metadata_5],
[projects_meta.metadata_6,sessions.metadata_6],
[projects_meta.metadata_7,sessions.metadata_7],
[projects_meta.metadata_8,sessions.metadata_8],
[projects_meta.metadata_9,sessions.metadata_9],
[projects_meta.metadata_10,sessions.metadata_10]
]))), 'Map(String,String)')
)) AS info
FROM massive_split.sessions
INNER JOIN projects_metadata USING (project_id)
WHERE datetime >= now() - INTERVAL 1 MONTH
AND isNotNull(duration)
AND duration > 0;
CREATE MATERIALIZED VIEW sessions_info_l7d_mv
ENGINE = ReplacingMergeTree(_timestamp)
PARTITION BY toYYYYMMDD(datetime)
ORDER BY (project_id, datetime, session_id)
TTL datetime + INTERVAL 7 DAY
SETTINGS index_granularity = 512
POPULATE
AS
SELECT *
FROM sessions_info_l1m_mv
WHERE datetime >= now() - INTERVAL 7 DAY;
--
--
-- CREATE MATERIALIZED VIEW sessions_info_l1m_mv
-- ENGINE = ReplacingMergeTree(_timestamp)
-- PARTITION BY toYYYYMM(datetime)
-- ORDER BY (project_id, datetime, session_id)
-- TTL datetime + INTERVAL 1 MONTH
-- SETTINGS index_granularity = 512
-- POPULATE
-- AS
-- SELECT project_id,
-- session_id,
-- datetime,
-- now() AS _timestamp,
-- toJSONString(map('project_id', toString(project_id),
-- 'session_id', toString(session_id),
-- 'user_uuid', toString(user_uuid),
-- 'user_id', user_id,
-- 'user_os', user_os,
-- 'user_browser', user_browser,
-- 'user_device', user_device,
-- --'user_device_type', user_device_type,
-- --'user_country', user_country,
-- 'start_ts', toString(datetime),
-- 'duration', toString(duration),
-- 'events_count', toString(events_count),
-- 'pages_count', toString(pages_count),
-- 'errors_count', toString(errors_count),
-- -- 'user_anonymous_id', user_anonymous_id,
-- -- 'platform', platform,
-- -- 'issue_score', issue_score,
-- -- issue_types,
-- -- favorite,
-- -- viewed,
-- 'metadata', CAST((arrayFilter(x->isNotNull(x),
-- arrayMap(
-- x->if(isNotNull(x[1]) AND isNotNull(x[2]), toString(x[1]),
-- NULL),
-- [
-- [projects_meta.metadata_1,sessions.metadata_1],
-- [projects_meta.metadata_2,sessions.metadata_2],
-- [projects_meta.metadata_3,sessions.metadata_3],
-- [projects_meta.metadata_4,sessions.metadata_4],
-- [projects_meta.metadata_5,sessions.metadata_5],
-- [projects_meta.metadata_6,sessions.metadata_6],
-- [projects_meta.metadata_7,sessions.metadata_7],
-- [projects_meta.metadata_8,sessions.metadata_8],
-- [projects_meta.metadata_9,sessions.metadata_9],
-- [projects_meta.metadata_10,sessions.metadata_10]
-- ])),
-- arrayFilter(x->isNotNull(x),
-- arrayMap(
-- x->if(isNotNull(x[1]) AND isNotNull(x[2]), toString(x[2]),
-- NULL),
-- [
-- [projects_meta.metadata_1,sessions.metadata_1],
-- [projects_meta.metadata_2,sessions.metadata_2],
-- [projects_meta.metadata_3,sessions.metadata_3],
-- [projects_meta.metadata_4,sessions.metadata_4],
-- [projects_meta.metadata_5,sessions.metadata_5],
-- [projects_meta.metadata_6,sessions.metadata_6],
-- [projects_meta.metadata_7,sessions.metadata_7],
-- [projects_meta.metadata_8,sessions.metadata_8],
-- [projects_meta.metadata_9,sessions.metadata_9],
-- [projects_meta.metadata_10,sessions.metadata_10]
-- ]))), 'Map(String,String)')
-- )) AS info
-- FROM massive_split.sessions
-- INNER JOIN projects_metadata USING (project_id)
-- WHERE datetime >= now() - INTERVAL 1 MONTH
-- AND isNotNull(duration)
-- AND duration > 0;
--
-- CREATE MATERIALIZED VIEW sessions_info_l7d_mv
-- ENGINE = ReplacingMergeTree(_timestamp)
-- PARTITION BY toYYYYMMDD(datetime)
-- ORDER BY (project_id, datetime, session_id)
-- TTL datetime + INTERVAL 7 DAY
-- SETTINGS index_granularity = 512
-- POPULATE
-- AS
-- SELECT *
-- FROM sessions_info_l1m_mv
-- WHERE datetime >= now() - INTERVAL 7 DAY;