From 1e0b84496e631c7d1e4125bea8e3ff38fbf30e97 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 16:55:20 +0100 Subject: [PATCH 01/10] feat(DB): CH added missing column --- ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/1.8.0.sql | 2 ++ ee/scripts/helm/db/init_dbs/clickhouse/create/init_schema.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/1.8.0.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/1.8.0.sql index c1687164c..912b1b7e6 100644 --- a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/1.8.0.sql +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/1.8.0.sql @@ -148,6 +148,7 @@ CREATE TABLE IF NOT EXISTS experimental.sessions issue_types Array(LowCardinality(String)), referrer Nullable(String), base_referrer Nullable(String) MATERIALIZED lower(concat(domain(referrer), path(referrer))), + issue_score Nullable(UInt32), _timestamp DateTime DEFAULT now() ) ENGINE = ReplacingMergeTree(_timestamp) PARTITION BY toYYYYMMDD(datetime) @@ -328,6 +329,7 @@ SELECT session_id, issue_types, referrer, base_referrer, + issue_score, _timestamp FROM experimental.sessions WHERE datetime >= now() - INTERVAL 7 DAY diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/init_schema.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/init_schema.sql index c1687164c..912b1b7e6 100644 --- a/ee/scripts/helm/db/init_dbs/clickhouse/create/init_schema.sql +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/init_schema.sql @@ -148,6 +148,7 @@ CREATE TABLE IF NOT EXISTS experimental.sessions issue_types Array(LowCardinality(String)), referrer Nullable(String), base_referrer Nullable(String) MATERIALIZED lower(concat(domain(referrer), path(referrer))), + issue_score Nullable(UInt32), _timestamp DateTime DEFAULT now() ) ENGINE = ReplacingMergeTree(_timestamp) PARTITION BY toYYYYMMDD(datetime) @@ -328,6 +329,7 @@ SELECT session_id, issue_types, referrer, base_referrer, + issue_score, _timestamp FROM experimental.sessions WHERE datetime >= now() - INTERVAL 7 DAY From b3fec22b6b9de9f479e2a73d3d11429db282974d Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 17:17:32 +0100 Subject: [PATCH 02/10] feat(DB): CH fixed sessions search with empty payload values --- ee/api/chalicelib/core/sessions_exp.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 6d0cc35fd..6f8fa46e3 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -271,7 +271,8 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_ {query_part} ) AS raw ORDER BY sort_key {data.order} - LIMIT %(sessions_limit)s OFFSET %(sessions_limit_s)s) AS sorted_sessions;""", full_args) + LIMIT %(sessions_limit)s OFFSET %(sessions_limit_s)s) AS sorted_sessions;""", + full_args) # print("--------------------") # print(main_query) # print("--------------------") @@ -1821,7 +1822,8 @@ def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, sequence_conditions[-1] += " AND " + c["condition"] del _value_conditions - events_conditions_where.append(f"({' OR '.join([c for c in type_conditions])})") + if len(events_conditions) > 0: + events_conditions_where.append(f"({' OR '.join([c for c in type_conditions])})") del type_conditions if len(value_conditions) > 0: events_conditions_where.append(f"({' OR '.join([c for c in value_conditions])})") @@ -1867,8 +1869,8 @@ def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, if c.get('condition'): has_values = True sequence_conditions[-1] += " AND " + c["condition"] - - events_conditions_where.append(f"({' OR '.join([c for c in type_conditions])})") + if len(events_conditions) > 0: + events_conditions_where.append(f"({' OR '.join([c for c in type_conditions])})") if len(events_conditions_not) > 0: has_values = True From c84f2a12224d23f3b4a734420d44c7649849ea43 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 18:19:18 +0100 Subject: [PATCH 03/10] feat(DB): CH fixed sessions viewed status --- ee/api/chalicelib/core/sessions_exp.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 6f8fa46e3..2c8a2161e 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -23,7 +23,7 @@ s.pages_count AS pages_count, s.errors_count AS errors_count, s.user_anonymous_id AS user_anonymous_id, s.platform AS platform, -0 AS issue_score, +coalesce(issue_score,0) AS issue_score, s.issue_types AS issue_types """ @@ -44,7 +44,8 @@ SESSION_PROJECTION_COLS_CH_MAP = """\ 'errors_count', toString(s.errors_count), 'user_anonymous_id', toString(s.user_anonymous_id), 'platform', toString(s.platform), -'issue_score', '0' +'issue_score', toString(coalesce(issue_score,0)), +'viewed', toString(viewed_sessions.session_id > 0) """ @@ -269,6 +270,11 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_ {sort} AS sort_key, map({SESSION_PROJECTION_COLS_CH_MAP}) AS details {query_part} + LEFT JOIN (SELECT session_id + FROM experimental.user_viewed_sessions + WHERE user_id = %(userId)s AND project_id=%(project_id)s + AND _timestamp >= toDateTime(%(startDate)s / 1000)) AS viewed_sessions + ON (viewed_sessions.session_id = s.session_id) ) AS raw ORDER BY sort_key {data.order} LIMIT %(sessions_limit)s OFFSET %(sessions_limit_s)s) AS sorted_sessions;""", From da8dbe54d69c3f0b3a55740d64505a355c8aac30 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 18:39:22 +0100 Subject: [PATCH 04/10] feat(DB): CH fixed fetch session search --- ee/api/chalicelib/core/sessions_exp.py | 643 +------------------------ 1 file changed, 5 insertions(+), 638 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 2c8a2161e..f37b6f2e2 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -286,7 +286,6 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_ sessions = cur.execute(main_query) except Exception as err: print("--------- SESSIONS-CH SEARCH QUERY EXCEPTION -----------") - print(main_query) print("--------- PAYLOAD -----------") print(data.json()) print("--------------------") @@ -401,7 +400,7 @@ def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, d extra_where = f"WHERE ({' OR '.join(extra_where)})" elif metric_of == schemas.TableMetricOfType.visited_url: main_col = "url_path" - extra_col = "s.rul_path" + extra_col = "s.url_path" main_query = cur.format(f"""{pre_query} SELECT COUNT(DISTINCT {main_col}) OVER () AS main_count, {main_col} AS name, @@ -443,638 +442,6 @@ def __is_valid_event(is_any: bool, event: schemas._SessionSearchEventSchema): event.filters is None or len(event.filters) == 0)) -# this function generates the query and return the generated-query with the dict of query arguments -def search_query_parts(data, error_status, errors_only, favorite_only, issue, project_id, user_id, extra_event=None): - ss_constraints = [] - full_args = {"project_id": project_id, "startDate": data.startDate, "endDate": data.endDate, - "projectId": project_id, "userId": user_id} - extra_constraints = [ - "s.project_id = %(project_id)s", - "s.duration IS NOT NULL" - ] - extra_from = "" - events_query_part = "" - if len(data.filters) > 0: - meta_keys = None - for i, f in enumerate(data.filters): - if not isinstance(f.value, list): - f.value = [f.value] - filter_type = f.type - f.value = helper.values_for_operator(value=f.value, op=f.operator) - f_k = f"f_value{i}" - full_args = {**full_args, **_multiple_values(f.value, value_key=f_k)} - op = __get_sql_operator(f.operator) \ - if filter_type not in [schemas.FilterType.events_count] else f.operator - is_any = _isAny_opreator(f.operator) - is_undefined = _isUndefined_operator(f.operator) - if not is_any and not is_undefined and len(f.value) == 0: - continue - is_not = False - if __is_negation_operator(f.operator): - is_not = True - if filter_type == schemas.FilterType.user_browser: - if is_any: - extra_constraints.append('s.user_browser IS NOT NULL') - ss_constraints.append('ms.user_browser IS NOT NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - - elif filter_type in [schemas.FilterType.user_os, schemas.FilterType.user_os_ios]: - if is_any: - extra_constraints.append('s.user_os IS NOT NULL') - ss_constraints.append('ms.user_os IS NOT NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - - elif filter_type in [schemas.FilterType.user_device, schemas.FilterType.user_device_ios]: - if is_any: - extra_constraints.append('s.user_device IS NOT NULL') - ss_constraints.append('ms.user_device IS NOT NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - - elif filter_type in [schemas.FilterType.user_country, schemas.FilterType.user_country_ios]: - if is_any: - extra_constraints.append('s.user_country IS NOT NULL') - ss_constraints.append('ms.user_country IS NOT NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) - - elif filter_type in [schemas.FilterType.utm_source]: - if is_any: - extra_constraints.append('s.utm_source IS NOT NULL') - ss_constraints.append('ms.utm_source IS NOT NULL') - elif is_undefined: - extra_constraints.append('s.utm_source IS NULL') - ss_constraints.append('ms.utm_source IS NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.utm_source {op} %({f_k})s::text', f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.utm_source {op} %({f_k})s::text', f.value, is_not=is_not, - value_key=f_k)) - elif filter_type in [schemas.FilterType.utm_medium]: - if is_any: - extra_constraints.append('s.utm_medium IS NOT NULL') - ss_constraints.append('ms.utm_medium IS NOT NULL') - elif is_undefined: - extra_constraints.append('s.utm_medium IS NULL') - ss_constraints.append('ms.utm_medium IS NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.utm_medium {op} %({f_k})s::text', f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.utm_medium {op} %({f_k})s::text', f.value, is_not=is_not, - value_key=f_k)) - elif filter_type in [schemas.FilterType.utm_campaign]: - if is_any: - extra_constraints.append('s.utm_campaign IS NOT NULL') - ss_constraints.append('ms.utm_campaign IS NOT NULL') - elif is_undefined: - extra_constraints.append('s.utm_campaign IS NULL') - ss_constraints.append('ms.utm_campaign IS NULL') - else: - extra_constraints.append( - _multiple_conditions(f's.utm_campaign {op} %({f_k})s::text', f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f'ms.utm_campaign {op} %({f_k})s::text', f.value, is_not=is_not, - value_key=f_k)) - - elif filter_type == schemas.FilterType.duration: - if len(f.value) > 0 and f.value[0] is not None: - extra_constraints.append("s.duration >= %(minDuration)s") - ss_constraints.append("ms.duration >= %(minDuration)s") - full_args["minDuration"] = f.value[0] - if len(f.value) > 1 and f.value[1] is not None and int(f.value[1]) > 0: - extra_constraints.append("s.duration <= %(maxDuration)s") - ss_constraints.append("ms.duration <= %(maxDuration)s") - full_args["maxDuration"] = f.value[1] - elif filter_type == schemas.FilterType.referrer: - extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)" - if is_any: - extra_constraints.append('p.base_referrer IS NOT NULL') - else: - extra_constraints.append( - _multiple_conditions(f"p.base_referrer {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k)) - elif filter_type == events.event_type.METADATA.ui_type: - # get metadata list only if you need it - if meta_keys is None: - meta_keys = metadata.get(project_id=project_id) - meta_keys = {m["key"]: m["index"] for m in meta_keys} - if f.source in meta_keys.keys(): - if is_any: - extra_constraints.append(f"s.{metadata.index_to_colname(meta_keys[f.source])} IS NOT NULL") - ss_constraints.append(f"ms.{metadata.index_to_colname(meta_keys[f.source])} IS NOT NULL") - elif is_undefined: - extra_constraints.append(f"s.{metadata.index_to_colname(meta_keys[f.source])} IS NULL") - ss_constraints.append(f"ms.{metadata.index_to_colname(meta_keys[f.source])} IS NULL") - else: - extra_constraints.append( - _multiple_conditions( - f"s.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s::text", - f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions( - f"ms.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s::text", - f.value, is_not=is_not, value_key=f_k)) - elif filter_type in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: - if is_any: - extra_constraints.append('s.user_id IS NOT NULL') - ss_constraints.append('ms.user_id IS NOT NULL') - elif is_undefined: - extra_constraints.append('s.user_id IS NULL') - ss_constraints.append('ms.user_id IS NULL') - else: - extra_constraints.append( - _multiple_conditions(f"s.user_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f"ms.user_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) - elif filter_type in [schemas.FilterType.user_anonymous_id, - schemas.FilterType.user_anonymous_id_ios]: - if is_any: - extra_constraints.append('s.user_anonymous_id IS NOT NULL') - ss_constraints.append('ms.user_anonymous_id IS NOT NULL') - elif is_undefined: - extra_constraints.append('s.user_anonymous_id IS NULL') - ss_constraints.append('ms.user_anonymous_id IS NULL') - else: - extra_constraints.append( - _multiple_conditions(f"s.user_anonymous_id {op} %({f_k})s::text", f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f"ms.user_anonymous_id {op} %({f_k})s::text", f.value, is_not=is_not, - value_key=f_k)) - elif filter_type in [schemas.FilterType.rev_id, schemas.FilterType.rev_id_ios]: - if is_any: - extra_constraints.append('s.rev_id IS NOT NULL') - ss_constraints.append('ms.rev_id IS NOT NULL') - elif is_undefined: - extra_constraints.append('s.rev_id IS NULL') - ss_constraints.append('ms.rev_id IS NULL') - else: - extra_constraints.append( - _multiple_conditions(f"s.rev_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f"ms.rev_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) - elif filter_type == schemas.FilterType.platform: - # op = __get_sql_operator(f.operator) - extra_constraints.append( - _multiple_conditions(f"s.user_device_type {op} %({f_k})s", f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f"ms.user_device_type {op} %({f_k})s", f.value, is_not=is_not, - value_key=f_k)) - elif filter_type == schemas.FilterType.issue: - if is_any: - extra_constraints.append("array_length(s.issue_types, 1) > 0") - ss_constraints.append("array_length(ms.issue_types, 1) > 0") - else: - extra_constraints.append( - _multiple_conditions(f"%({f_k})s {op} ANY (s.issue_types)", f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f"%({f_k})s {op} ANY (ms.issue_types)", f.value, is_not=is_not, - value_key=f_k)) - elif filter_type == schemas.FilterType.events_count: - extra_constraints.append( - _multiple_conditions(f"s.events_count {op} %({f_k})s", f.value, is_not=is_not, - value_key=f_k)) - ss_constraints.append( - _multiple_conditions(f"ms.events_count {op} %({f_k})s", f.value, is_not=is_not, - value_key=f_k)) - # --------------------------------------------------------------------------- - if len(data.events) > 0: - valid_events_count = 0 - for event in data.events: - is_any = _isAny_opreator(event.operator) - if not isinstance(event.value, list): - event.value = [event.value] - if __is_valid_event(is_any=is_any, event=event): - valid_events_count += 1 - events_query_from = [] - event_index = 0 - or_events = data.events_order == schemas.SearchEventOrder._or - # events_joiner = " FULL JOIN " if or_events else " INNER JOIN LATERAL " - events_joiner = " UNION " if or_events else " INNER JOIN LATERAL " - for i, event in enumerate(data.events): - event_type = event.type - is_any = _isAny_opreator(event.operator) - if not isinstance(event.value, list): - event.value = [event.value] - if not __is_valid_event(is_any=is_any, event=event): - continue - op = __get_sql_operator(event.operator) - is_not = False - if __is_negation_operator(event.operator): - is_not = True - op = __reverse_sql_operator(op) - if event_index == 0 or or_events: - event_from = "%s INNER JOIN public.sessions AS ms USING (session_id)" - event_where = ["ms.project_id = %(projectId)s", "main.timestamp >= %(startDate)s", - "main.timestamp <= %(endDate)s", "ms.start_ts >= %(startDate)s", - "ms.start_ts <= %(endDate)s", "ms.duration IS NOT NULL"] - if favorite_only and not errors_only: - event_from += "INNER JOIN public.user_favorite_sessions AS fs USING(session_id)" - event_where.append("fs.user_id = %(userId)s") - else: - event_from = "%s" - event_where = ["main.timestamp >= %(startDate)s", "main.timestamp <= %(endDate)s", - "main.session_id=event_0.session_id"] - if data.events_order == schemas.SearchEventOrder._then: - event_where.append(f"event_{event_index - 1}.timestamp <= main.timestamp") - e_k = f"e_value{i}" - s_k = e_k + "_source" - if event.type != schemas.PerformanceEventType.time_between_events: - event.value = helper.values_for_operator(value=event.value, op=event.operator) - full_args = {**full_args, - **_multiple_values(event.value, value_key=e_k), - **_multiple_values(event.source, value_key=s_k)} - - if event_type == events.event_type.CLICK.ui_type: - event_from = event_from % f"{events.event_type.CLICK.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.CLICK.column} {op} %({e_k})s", event.value, - value_key=e_k)) - - elif event_type == events.event_type.INPUT.ui_type: - event_from = event_from % f"{events.event_type.INPUT.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.INPUT.column} {op} %({e_k})s", event.value, - value_key=e_k)) - if event.source is not None and len(event.source) > 0: - event_where.append(_multiple_conditions(f"main.value ILIKE %(custom{i})s", event.source, - value_key=f"custom{i}")) - full_args = {**full_args, **_multiple_values(event.source, value_key=f"custom{i}")} - - elif event_type == events.event_type.LOCATION.ui_type: - event_from = event_from % f"{events.event_type.LOCATION.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.url_path {op} %({e_k})s", - event.value, value_key=e_k)) - elif event_type == events.event_type.CUSTOM.ui_type: - event_from = event_from % f"{events.event_type.CUSTOM.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.CUSTOM.column} {op} %({e_k})s", event.value, - value_key=e_k)) - elif event_type == events.event_type.REQUEST.ui_type: - event_from = event_from % f"{events.event_type.REQUEST.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", event.value, - value_key=e_k)) - elif event_type == events.event_type.GRAPHQL.ui_type: - event_from = event_from % f"{events.event_type.GRAPHQL.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k})s", event.value, - value_key=e_k)) - elif event_type == events.event_type.STATEACTION.ui_type: - event_from = event_from % f"{events.event_type.STATEACTION.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.STATEACTION.column} {op} %({e_k})s", - event.value, value_key=e_k)) - elif event_type == events.event_type.ERROR.ui_type: - event_from = event_from % f"{events.event_type.ERROR.table} AS main INNER JOIN public.errors AS main1 USING(error_id)" - event.source = tuple(event.source) - if not is_any and event.value not in [None, "*", ""]: - event_where.append( - _multiple_conditions(f"(main1.message {op} %({e_k})s OR main1.name {op} %({e_k})s)", - event.value, value_key=e_k)) - if len(event.source) > 0 and event.source[0] not in [None, "*", ""]: - event_where.append(_multiple_conditions(f"main1.source = %({s_k})s", event.source, value_key=s_k)) - - - # ----- IOS - elif event_type == events.event_type.CLICK_IOS.ui_type: - event_from = event_from % f"{events.event_type.CLICK_IOS.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.CLICK_IOS.column} {op} %({e_k})s", - event.value, value_key=e_k)) - - elif event_type == events.event_type.INPUT_IOS.ui_type: - event_from = event_from % f"{events.event_type.INPUT_IOS.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.INPUT_IOS.column} {op} %({e_k})s", - event.value, value_key=e_k)) - if event.source is not None and len(event.source) > 0: - event_where.append(_multiple_conditions(f"main.value ILIKE %(custom{i})s", event.source, - value_key="custom{i}")) - full_args = {**full_args, **_multiple_values(event.source, f"custom{i}")} - elif event_type == events.event_type.VIEW_IOS.ui_type: - event_from = event_from % f"{events.event_type.VIEW_IOS.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.VIEW_IOS.column} {op} %({e_k})s", - event.value, value_key=e_k)) - elif event_type == events.event_type.CUSTOM_IOS.ui_type: - event_from = event_from % f"{events.event_type.CUSTOM_IOS.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.CUSTOM_IOS.column} {op} %({e_k})s", - event.value, value_key=e_k)) - elif event_type == events.event_type.REQUEST_IOS.ui_type: - event_from = event_from % f"{events.event_type.REQUEST_IOS.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.REQUEST_IOS.column} {op} %({e_k})s", - event.value, value_key=e_k)) - elif event_type == events.event_type.ERROR_IOS.ui_type: - event_from = event_from % f"{events.event_type.ERROR_IOS.table} AS main INNER JOIN public.crashes_ios AS main1 USING(crash_id)" - if not is_any and event.value not in [None, "*", ""]: - event_where.append( - _multiple_conditions(f"(main1.reason {op} %({e_k})s OR main1.name {op} %({e_k})s)", - event.value, value_key=e_k)) - elif event_type == schemas.PerformanceEventType.fetch_failed: - event_from = event_from % f"{events.event_type.REQUEST.table} AS main " - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", - event.value, value_key=e_k)) - col = performance_event.get_col(event_type) - colname = col["column"] - event_where.append(f"main.{colname} = FALSE") - # elif event_type == schemas.PerformanceEventType.fetch_duration: - # event_from = event_from % f"{events.event_type.REQUEST.table} AS main " - # if not is_any: - # event_where.append( - # _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", - # event.value, value_key=e_k)) - # col = performance_event.get_col(event_type) - # colname = col["column"] - # tname = "main" - # e_k += "_custom" - # full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} - # event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " + - # _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", - # event.source, value_key=e_k)) - elif event_type in [schemas.PerformanceEventType.location_dom_complete, - schemas.PerformanceEventType.location_largest_contentful_paint_time, - schemas.PerformanceEventType.location_ttfb, - schemas.PerformanceEventType.location_avg_cpu_load, - schemas.PerformanceEventType.location_avg_memory_usage - ]: - event_from = event_from % f"{events.event_type.LOCATION.table} AS main " - col = performance_event.get_col(event_type) - colname = col["column"] - tname = "main" - if col.get("extraJoin") is not None: - tname = "ej" - event_from += f" INNER JOIN {col['extraJoin']} AS {tname} USING(session_id)" - event_where += [f"{tname}.timestamp >= main.timestamp", f"{tname}.timestamp >= %(startDate)s", - f"{tname}.timestamp <= %(endDate)s"] - if not is_any: - event_where.append( - _multiple_conditions(f"main.{events.event_type.LOCATION.column} {op} %({e_k})s", - event.value, value_key=e_k)) - e_k += "_custom" - full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} - - event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " + - _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", - event.source, value_key=e_k)) - elif event_type == schemas.PerformanceEventType.time_between_events: - event_from = event_from % f"{getattr(events.event_type, event.value[0].type).table} AS main INNER JOIN {getattr(events.event_type, event.value[1].type).table} AS main2 USING(session_id) " - if not isinstance(event.value[0].value, list): - event.value[0].value = [event.value[0].value] - if not isinstance(event.value[1].value, list): - event.value[1].value = [event.value[1].value] - event.value[0].value = helper.values_for_operator(value=event.value[0].value, - op=event.value[0].operator) - event.value[1].value = helper.values_for_operator(value=event.value[1].value, - op=event.value[0].operator) - e_k1 = e_k + "_e1" - e_k2 = e_k + "_e2" - full_args = {**full_args, - **_multiple_values(event.value[0].value, value_key=e_k1), - **_multiple_values(event.value[1].value, value_key=e_k2)} - s_op = __get_sql_operator(event.value[0].operator) - event_where += ["main2.timestamp >= %(startDate)s", "main2.timestamp <= %(endDate)s"] - if event_index > 0 and not or_events: - event_where.append("main2.session_id=event_0.session_id") - is_any = _isAny_opreator(event.value[0].operator) - if not is_any: - event_where.append( - _multiple_conditions( - f"main.{getattr(events.event_type, event.value[0].type).column} {s_op} %({e_k1})s", - event.value[0].value, value_key=e_k1)) - s_op = __get_sql_operator(event.value[1].operator) - is_any = _isAny_opreator(event.value[1].operator) - if not is_any: - event_where.append( - _multiple_conditions( - f"main2.{getattr(events.event_type, event.value[1].type).column} {s_op} %({e_k2})s", - event.value[1].value, value_key=e_k2)) - - e_k += "_custom" - full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} - event_where.append( - _multiple_conditions(f"main2.timestamp - main.timestamp {event.sourceOperator} %({e_k})s", - event.source, value_key=e_k)) - - elif event_type == schemas.EventType.request_details: - event_from = event_from % f"{events.event_type.REQUEST.table} AS main " - apply = False - for j, f in enumerate(event.filters): - is_any = _isAny_opreator(f.operator) - if is_any or len(f.value) == 0: - continue - f.value = helper.values_for_operator(value=f.value, op=f.operator) - op = __get_sql_operator(f.operator) - e_k_f = e_k + f"_fetch{j}" - full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} - if f.type == schemas.FetchFilterType._url: - event_where.append( - _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k_f})s::text", - f.value, value_key=e_k_f)) - apply = True - elif f.type == schemas.FetchFilterType._status_code: - event_where.append( - _multiple_conditions(f"main.status_code {f.operator} %({e_k_f})s::integer", f.value, - value_key=e_k_f)) - apply = True - elif f.type == schemas.FetchFilterType._method: - event_where.append( - _multiple_conditions(f"main.method {op} %({e_k_f})s", f.value, value_key=e_k_f)) - apply = True - elif f.type == schemas.FetchFilterType._duration: - event_where.append( - _multiple_conditions(f"main.duration {f.operator} %({e_k_f})s::integer", f.value, - value_key=e_k_f)) - apply = True - elif f.type == schemas.FetchFilterType._request_body: - event_where.append( - _multiple_conditions(f"main.request_body {op} %({e_k_f})s::text", f.value, value_key=e_k_f)) - apply = True - elif f.type == schemas.FetchFilterType._response_body: - event_where.append( - _multiple_conditions(f"main.response_body {op} %({e_k_f})s::text", f.value, - value_key=e_k_f)) - apply = True - else: - print(f"undefined FETCH filter: {f.type}") - if not apply: - continue - elif event_type == schemas.EventType.graphql: - event_from = event_from % f"{events.event_type.GRAPHQL.table} AS main " - for j, f in enumerate(event.filters): - is_any = _isAny_opreator(f.operator) - if is_any or len(f.value) == 0: - continue - f.value = helper.values_for_operator(value=f.value, op=f.operator) - op = __get_sql_operator(f.operator) - e_k_f = e_k + f"_graphql{j}" - full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} - if f.type == schemas.GraphqlFilterType._name: - event_where.append( - _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k_f})s", f.value, - value_key=e_k_f)) - elif f.type == schemas.GraphqlFilterType._method: - event_where.append( - _multiple_conditions(f"main.method {op} %({e_k_f})s", f.value, value_key=e_k_f)) - elif f.type == schemas.GraphqlFilterType._request_body: - event_where.append( - _multiple_conditions(f"main.request_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) - elif f.type == schemas.GraphqlFilterType._response_body: - event_where.append( - _multiple_conditions(f"main.response_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) - else: - print(f"undefined GRAPHQL filter: {f.type}") - else: - continue - if event_index == 0 or or_events: - event_where += ss_constraints - if is_not: - if event_index == 0 or or_events: - events_query_from.append(f"""\ - (SELECT - session_id, - 0 AS timestamp - FROM sessions - WHERE EXISTS(SELECT session_id - FROM {event_from} - WHERE {" AND ".join(event_where)} - AND sessions.session_id=ms.session_id) IS FALSE - AND project_id = %(projectId)s - AND start_ts >= %(startDate)s - AND start_ts <= %(endDate)s - AND duration IS NOT NULL - ) {"" if or_events else (f"AS event_{event_index}" + ("ON(TRUE)" if event_index > 0 else ""))}\ - """) - else: - events_query_from.append(f"""\ - (SELECT - event_0.session_id, - event_{event_index - 1}.timestamp AS timestamp - WHERE EXISTS(SELECT session_id FROM {event_from} WHERE {" AND ".join(event_where)}) IS FALSE - ) AS event_{event_index} {"ON(TRUE)" if event_index > 0 else ""}\ - """) - else: - events_query_from.append(f"""\ - (SELECT main.session_id, {"MIN" if event_index < (valid_events_count - 1) else "MAX"}(main.timestamp) AS timestamp - FROM {event_from} - WHERE {" AND ".join(event_where)} - GROUP BY 1 - ) {"" if or_events else (f"AS event_{event_index} " + ("ON(TRUE)" if event_index > 0 else ""))}\ - """) - event_index += 1 - if event_index > 0: - if or_events: - events_query_part = f"""SELECT - session_id, - MIN(timestamp) AS first_event_ts, - MAX(timestamp) AS last_event_ts - FROM ({events_joiner.join(events_query_from)}) AS u - GROUP BY 1""" - else: - events_query_part = f"""SELECT - event_0.session_id, - MIN(event_0.timestamp) AS first_event_ts, - MAX(event_{event_index - 1}.timestamp) AS last_event_ts - FROM {events_joiner.join(events_query_from)} - GROUP BY 1""" - else: - data.events = [] - # --------------------------------------------------------------------------- - if data.startDate is not None: - extra_constraints.append("s.start_ts >= %(startDate)s") - if data.endDate is not None: - extra_constraints.append("s.start_ts <= %(endDate)s") - # if data.platform is not None: - # if data.platform == schemas.PlatformType.mobile: - # extra_constraints.append(b"s.user_os in ('Android','BlackBerry OS','iOS','Tizen','Windows Phone')") - # elif data.platform == schemas.PlatformType.desktop: - # extra_constraints.append( - # b"s.user_os in ('Chrome OS','Fedora','Firefox OS','Linux','Mac OS X','Ubuntu','Windows')") - - if errors_only: - extra_from += f" INNER JOIN {events.event_type.ERROR.table} AS er USING (session_id) INNER JOIN public.errors AS ser USING (error_id)" - extra_constraints.append("ser.source = 'js_exception'") - extra_constraints.append("ser.project_id = %(project_id)s") - if error_status != schemas.ErrorStatus.all: - extra_constraints.append("ser.status = %(error_status)s") - full_args["error_status"] = error_status - if favorite_only: - extra_from += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" - extra_constraints.append("ufe.user_id = %(userId)s") - # extra_constraints = [extra.decode('UTF-8') + "\n" for extra in extra_constraints] - if favorite_only and not errors_only and user_id is not None: - extra_from += """INNER JOIN (SELECT user_id, session_id - FROM public.user_favorite_sessions - WHERE user_id = %(userId)s) AS favorite_sessions - USING (session_id)""" - elif not favorite_only and not errors_only and user_id is not None: - extra_from += """LEFT JOIN (SELECT user_id, session_id - FROM public.user_favorite_sessions - WHERE user_id = %(userId)s) AS favorite_sessions - USING (session_id)""" - extra_join = "" - if issue is not None: - extra_join = """ - INNER JOIN LATERAL(SELECT TRUE FROM events_common.issues INNER JOIN public.issues AS p_issues USING (issue_id) - WHERE issues.session_id=f.session_id - AND p_issues.type=%(issue_type)s - AND p_issues.context_string=%(issue_contextString)s - AND timestamp >= f.first_event_ts - AND timestamp <= f.last_event_ts) AS issues ON(TRUE) - """ - full_args["issue_contextString"] = issue["contextString"] - full_args["issue_type"] = issue["type"] - if extra_event: - extra_join += f"""INNER JOIN {extra_event} AS ev USING(session_id)""" - extra_constraints.append("ev.timestamp>=%(startDate)s") - extra_constraints.append("ev.timestamp<=%(endDate)s") - query_part = f"""\ - FROM {f"({events_query_part}) AS f" if len(events_query_part) > 0 else "public.sessions AS s"} - {extra_join} - {"INNER JOIN public.sessions AS s USING(session_id)" if len(events_query_part) > 0 else ""} - {extra_from} - WHERE - {" AND ".join(extra_constraints)}""" - return full_args, query_part - - def __get_event_type(event_type: Union[schemas.EventType, schemas.PerformanceEventType]): defs = { schemas.EventType.click: "CLICK", @@ -1473,7 +840,7 @@ def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, events_conditions[-1]["condition"] = event_where[-1] elif event_type == events.event_type.REQUEST.ui_type: event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " - _column = events.event_type.REQUEST.column + _column = 'url_path' event_where.append(f"main.event_type='{__get_event_type(event_type)}'") events_conditions.append({"type": event_where[-1]}) if not is_any: @@ -1533,7 +900,7 @@ def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, elif event_type == schemas.PerformanceEventType.fetch_failed: event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " - _column = events.event_type.REQUEST.column + _column = 'url_path' event_where.append(f"main.event_type='{__get_event_type(event_type)}'") events_conditions.append({"type": event_where[-1]}) events_conditions[-1]["condition"] = [] @@ -1557,7 +924,7 @@ def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, # event_from = event_from % f"{events.event_type.REQUEST.table} AS main " # if not is_any: # event_where.append( - # _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", + # _multiple_conditions(f"main.url_path {op} %({e_k})s", # event.value, value_key=e_k)) # col = performance_event.get_col(event_type) # colname = col["column"] @@ -1682,7 +1049,7 @@ def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} if f.type == schemas.FetchFilterType._url: event_where.append( - _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k_f})s", f.value, + _multiple_conditions(f"main.url_path {op} %({e_k_f})s", f.value, value_key=e_k_f)) events_conditions[-1]["condition"].append(event_where[-1]) apply = True From 760b6e0304fed4da82434e35dc2b177514d96ad4 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 19:11:47 +0100 Subject: [PATCH 05/10] feat(chalice): metrics support wrong metric_id --- api/chalicelib/core/dashboards.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/chalicelib/core/dashboards.py b/api/chalicelib/core/dashboards.py index bdd0518e0..9d1dc4c81 100644 --- a/api/chalicelib/core/dashboards.py +++ b/api/chalicelib/core/dashboards.py @@ -304,7 +304,9 @@ def make_chart_metrics(project_id, user_id, metric_id, data: schemas.CustomMetri include_dashboard=False) if raw_metric is None: return None - metric = schemas.CustomMetricAndTemplate = schemas.CustomMetricAndTemplate.parse_obj(raw_metric) + metric: schemas.CustomMetricAndTemplate = schemas.CustomMetricAndTemplate.parse_obj(raw_metric) + if metric.is_template and metric.predefined_key is None: + return None if metric.is_template: return get_predefined_metric(key=metric.predefined_key, project_id=project_id, data=data.dict()) else: From e0151008c17366d8cfb13ec5bd7e17f31b2aa8ca Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 19:24:53 +0100 Subject: [PATCH 06/10] feat(chalice): metrics logs --- ee/api/chalicelib/core/metrics_exp.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ee/api/chalicelib/core/metrics_exp.py b/ee/api/chalicelib/core/metrics_exp.py index 958a335b9..61e020361 100644 --- a/ee/api/chalicelib/core/metrics_exp.py +++ b/ee/api/chalicelib/core/metrics_exp.py @@ -819,6 +819,11 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day e["startTimestamp"] = startTimestamp e["endTimestamp"] = endTimestamp params["value"] = e["url"] + print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + print(params) + print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + print(ch_query) + print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") r = ch.execute(query=ch_query, params=params) e["endedAt"] = r[-1]["max_datatime"] From 678027efa3d8fe3b0a13b60523f1624119ba590a Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 19:30:32 +0100 Subject: [PATCH 07/10] feat(chalice): metrics fixed missing step_size --- ee/api/chalicelib/core/metrics_exp.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ee/api/chalicelib/core/metrics_exp.py b/ee/api/chalicelib/core/metrics_exp.py index 61e020361..2084cab2e 100644 --- a/ee/api/chalicelib/core/metrics_exp.py +++ b/ee/api/chalicelib/core/metrics_exp.py @@ -798,7 +798,7 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day GROUP BY url_path ORDER BY doc_count DESC LIMIT 10;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} # print(ch.format(query=ch_query, params=params)) rows = ch.execute(query=ch_query, params=params) @@ -819,11 +819,6 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day e["startTimestamp"] = startTimestamp e["endTimestamp"] = endTimestamp params["value"] = e["url"] - print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") - print(params) - print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") - print(ch_query) - print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") r = ch.execute(query=ch_query, params=params) e["endedAt"] = r[-1]["max_datatime"] From 37f372d1b8cd5f093103e7263d97b0bf91f6ab58 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 19:36:11 +0100 Subject: [PATCH 08/10] feat(chalice): metrics fixed wrong table name --- ee/api/chalicelib/core/metrics_exp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/api/chalicelib/core/metrics_exp.py b/ee/api/chalicelib/core/metrics_exp.py index 2084cab2e..04e180e93 100644 --- a/ee/api/chalicelib/core/metrics_exp.py +++ b/ee/api/chalicelib/core/metrics_exp.py @@ -806,7 +806,7 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day rows = [{"url": i["key"], "sessions": i["doc_count"]} for i in rows] if len(rows) == 0: return [] - ch_sub_query.append("events.url_path = %(value)s") + ch_sub_query.append("resources.url_path = %(value)s") ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, COUNT(1) AS doc_count, toUnixTimestamp(MAX(resources.datetime))*1000 AS max_datatime From aa9949f315ac99cd6caa47b373ca5474d6c28b71 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 19:47:28 +0100 Subject: [PATCH 09/10] feat(chalice): CH sessions search fixed ambiguous column --- ee/api/chalicelib/core/sessions_exp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index f37b6f2e2..58ce154b0 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -267,7 +267,7 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_ main_query = cur.format(f"""SELECT any(total) AS count, groupArray(%(sessions_limit)s)(details) AS sessions FROM (SELECT total, details FROM (SELECT COUNT() OVER () AS total, - {sort} AS sort_key, + s.{sort} AS sort_key, map({SESSION_PROJECTION_COLS_CH_MAP}) AS details {query_part} LEFT JOIN (SELECT session_id From 39cf5b35866bc64241724bc4d045c1e8233132a7 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 31 Aug 2022 19:53:48 +0100 Subject: [PATCH 10/10] feat(chalice): CH funnels mock --- ee/api/chalicelib/core/__init__.py | 4 + ee/api/chalicelib/core/significance_exp.py | 638 +++++++++++++++++++++ 2 files changed, 642 insertions(+) create mode 100644 ee/api/chalicelib/core/significance_exp.py diff --git a/ee/api/chalicelib/core/__init__.py b/ee/api/chalicelib/core/__init__.py index 369d5f476..602a54998 100644 --- a/ee/api/chalicelib/core/__init__.py +++ b/ee/api/chalicelib/core/__init__.py @@ -5,6 +5,7 @@ logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) if config("EXP_SESSIONS_SEARCH", cast=bool, default=False): print(">>> Using experimental sessions search") + from . import sessions as sessions_legacy from . import sessions_exp as sessions else: from . import sessions as sessions @@ -32,3 +33,6 @@ if config("EXP_ALERTS", cast=bool, default=False): from . import alerts_processor_exp as alerts_processor else: from . import alerts_processor as alerts_processor + + +from . import significance_exp as significance diff --git a/ee/api/chalicelib/core/significance_exp.py b/ee/api/chalicelib/core/significance_exp.py new file mode 100644 index 000000000..1f845ec06 --- /dev/null +++ b/ee/api/chalicelib/core/significance_exp.py @@ -0,0 +1,638 @@ +__author__ = "AZNAUROV David" +__maintainer__ = "KRAIEM Taha Yassine" + +import schemas +from chalicelib.core import events, metadata +from chalicelib.core import sessions_legacy as sessions +from chalicelib.utils import dev + +""" +todo: remove LIMIT from the query +""" + +from typing import List +import math +import warnings +from collections import defaultdict + +from psycopg2.extras import RealDictRow +from chalicelib.utils import pg_client, helper + +SIGNIFICANCE_THRSH = 0.4 + +T_VALUES = {1: 12.706, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571, 6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228, + 11: 2.201, 12: 2.179, 13: 2.160, 14: 2.145, 15: 2.13, 16: 2.120, 17: 2.110, 18: 2.101, 19: 2.093, 20: 2.086, + 21: 2.080, 22: 2.074, 23: 2.069, 25: 2.064, 26: 2.060, 27: 2.056, 28: 2.052, 29: 2.045, 30: 2.042} + + +def get_stages_and_events(filter_d, project_id) -> List[RealDictRow]: + """ + Add minimal timestamp + :param filter_d: dict contains events&filters&... + :return: + """ + stages: [dict] = filter_d.get("events", []) + filters: [dict] = filter_d.get("filters", []) + filter_issues = filter_d.get("issueTypes") + if filter_issues is None or len(filter_issues) == 0: + filter_issues = [] + stage_constraints = ["main.timestamp <= %(endTimestamp)s"] + first_stage_extra_constraints = ["s.project_id=%(project_id)s", "s.start_ts >= %(startTimestamp)s", + "s.start_ts <= %(endTimestamp)s"] + filter_extra_from = [] + n_stages_query = [] + values = {} + if len(filters) > 0: + meta_keys = None + for i, f in enumerate(filters): + if not isinstance(f["value"], list): + f.value = [f["value"]] + if len(f["value"]) == 0 or f["value"] is None: + continue + f["value"] = helper.values_for_operator(value=f["value"], op=f["operator"]) + # filter_args = _multiple_values(f["value"]) + op = sessions.__get_sql_operator(f["operator"]) + + filter_type = f["type"] + # values[f_k] = sessions.__get_sql_value_multiple(f["value"]) + f_k = f"f_value{i}" + values = {**values, + **sessions._multiple_values(helper.values_for_operator(value=f["value"], op=f["operator"]), + value_key=f_k)} + if filter_type == schemas.FilterType.user_browser: + # op = sessions.__get_sql_operator_multiple(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.user_browser {op} %({f_k})s', f["value"], value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_os, schemas.FilterType.user_os_ios]: + # op = sessions.__get_sql_operator_multiple(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.user_os {op} %({f_k})s', f["value"], value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_device, schemas.FilterType.user_device_ios]: + # op = sessions.__get_sql_operator_multiple(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.user_device {op} %({f_k})s', f["value"], value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_country, schemas.FilterType.user_country_ios]: + # op = sessions.__get_sql_operator_multiple(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.user_country {op} %({f_k})s', f["value"], value_key=f_k)) + elif filter_type == schemas.FilterType.duration: + if len(f["value"]) > 0 and f["value"][0] is not None: + first_stage_extra_constraints.append(f's.duration >= %(minDuration)s') + values["minDuration"] = f["value"][0] + if len(f["value"]) > 1 and f["value"][1] is not None and int(f["value"][1]) > 0: + first_stage_extra_constraints.append('s.duration <= %(maxDuration)s') + values["maxDuration"] = f["value"][1] + elif filter_type == schemas.FilterType.referrer: + # events_query_part = events_query_part + f"INNER JOIN events.pages AS p USING(session_id)" + filter_extra_from = [f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)"] + # op = sessions.__get_sql_operator_multiple(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f"p.base_referrer {op} %({f_k})s", f["value"], value_key=f_k)) + elif filter_type == events.event_type.METADATA.ui_type: + if meta_keys is None: + meta_keys = metadata.get(project_id=project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + # op = sessions.__get_sql_operator(f["operator"]) + if f.get("key") in meta_keys.keys(): + first_stage_extra_constraints.append( + sessions._multiple_conditions( + f's.{metadata.index_to_colname(meta_keys[f["key"]])} {op} %({f_k})s', f["value"], + value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + elif filter_type in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: + # op = sessions.__get_sql_operator(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.user_id {op} %({f_k})s', f["value"], value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + elif filter_type in [schemas.FilterType.user_anonymous_id, + schemas.FilterType.user_anonymous_id_ios]: + # op = sessions.__get_sql_operator(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.user_anonymous_id {op} %({f_k})s', f["value"], value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + elif filter_type in [schemas.FilterType.rev_id, schemas.FilterType.rev_id_ios]: + # op = sessions.__get_sql_operator(f["operator"]) + first_stage_extra_constraints.append( + sessions._multiple_conditions(f's.rev_id {op} %({f_k})s', f["value"], value_key=f_k)) + # values[f_k] = helper.string_to_sql_like_with_op(f["value"][0], op) + i = -1 + for s in stages: + + if s.get("operator") is None: + s["operator"] = "is" + + if not isinstance(s["value"], list): + s["value"] = [s["value"]] + is_any = sessions._isAny_opreator(s["operator"]) + if not is_any and isinstance(s["value"], list) and len(s["value"]) == 0: + continue + i += 1 + if i == 0: + extra_from = filter_extra_from + ["INNER JOIN public.sessions AS s USING (session_id)"] + else: + extra_from = [] + op = sessions.__get_sql_operator(s["operator"]) + event_type = s["type"].upper() + if event_type == events.event_type.CLICK.ui_type: + next_table = events.event_type.CLICK.table + next_col_name = events.event_type.CLICK.column + elif event_type == events.event_type.INPUT.ui_type: + next_table = events.event_type.INPUT.table + next_col_name = events.event_type.INPUT.column + elif event_type == events.event_type.LOCATION.ui_type: + next_table = events.event_type.LOCATION.table + next_col_name = events.event_type.LOCATION.column + elif event_type == events.event_type.CUSTOM.ui_type: + next_table = events.event_type.CUSTOM.table + next_col_name = events.event_type.CUSTOM.column + # IOS -------------- + elif event_type == events.event_type.CLICK_IOS.ui_type: + next_table = events.event_type.CLICK_IOS.table + next_col_name = events.event_type.CLICK_IOS.column + elif event_type == events.event_type.INPUT_IOS.ui_type: + next_table = events.event_type.INPUT_IOS.table + next_col_name = events.event_type.INPUT_IOS.column + elif event_type == events.event_type.VIEW_IOS.ui_type: + next_table = events.event_type.VIEW_IOS.table + next_col_name = events.event_type.VIEW_IOS.column + elif event_type == events.event_type.CUSTOM_IOS.ui_type: + next_table = events.event_type.CUSTOM_IOS.table + next_col_name = events.event_type.CUSTOM_IOS.column + else: + print("=================UNDEFINED") + continue + + values = {**values, **sessions._multiple_values(helper.values_for_operator(value=s["value"], op=s["operator"]), + value_key=f"value{i + 1}")} + if sessions.__is_negation_operator(op) and i > 0: + op = sessions.__reverse_sql_operator(op) + main_condition = "left_not.session_id ISNULL" + extra_from.append(f"""LEFT JOIN LATERAL (SELECT session_id + FROM {next_table} AS s_main + WHERE s_main.{next_col_name} {op} %(value{i + 1})s + AND s_main.timestamp >= T{i}.stage{i}_timestamp + AND s_main.session_id = T1.session_id) AS left_not ON (TRUE)""") + else: + if is_any: + main_condition = "TRUE" + else: + main_condition = sessions._multiple_conditions(f"main.{next_col_name} {op} %(value{i + 1})s", + values=s["value"], value_key=f"value{i + 1}") + n_stages_query.append(f""" + (SELECT main.session_id, + {"MIN(main.timestamp)" if i + 1 < len(stages) else "MAX(main.timestamp)"} AS stage{i + 1}_timestamp, + '{event_type}' AS type, + '{s["operator"]}' AS operator + FROM {next_table} AS main {" ".join(extra_from)} + WHERE main.timestamp >= {f"T{i}.stage{i}_timestamp" if i > 0 else "%(startTimestamp)s"} + {f"AND main.session_id=T1.session_id" if i > 0 else ""} + AND {main_condition} + {(" AND " + " AND ".join(stage_constraints)) if len(stage_constraints) > 0 else ""} + {(" AND " + " AND ".join(first_stage_extra_constraints)) if len(first_stage_extra_constraints) > 0 and i == 0 else ""} + GROUP BY main.session_id) + AS T{i + 1} {"USING (session_id)" if i > 0 else ""} + """) + if len(n_stages_query) == 0: + return [] + n_stages_query = " LEFT JOIN LATERAL ".join(n_stages_query) + n_stages_query += ") AS stages_t" + + n_stages_query = f""" + SELECT stages_and_issues_t.*,sessions.session_id, sessions.user_uuid FROM ( + SELECT * FROM ( + SELECT * FROM + {n_stages_query} + LEFT JOIN LATERAL + ( + SELECT * FROM + (SELECT ISE.session_id, + ISS.type as issue_type, + ISE.timestamp AS issue_timestamp, + ISS.context_string as issue_context, + ISS.issue_id as issue_id + FROM events_common.issues AS ISE INNER JOIN issues AS ISS USING (issue_id) + WHERE ISE.timestamp >= stages_t.stage1_timestamp + AND ISE.timestamp <= stages_t.stage{i + 1}_timestamp + AND ISS.project_id=%(project_id)s + {"AND ISS.type IN %(issueTypes)s" if len(filter_issues) > 0 else ""}) AS base_t + ) AS issues_t + USING (session_id)) AS stages_and_issues_t + inner join sessions USING(session_id); + """ + + # LIMIT 10000 + params = {"project_id": project_id, "startTimestamp": filter_d["startDate"], "endTimestamp": filter_d["endDate"], + "issueTypes": tuple(filter_issues), **values} + with pg_client.PostgresClient() as cur: + # print("---------------------------------------------------") + # print(cur.mogrify(n_stages_query, params)) + # print("---------------------------------------------------") + cur.execute(cur.mogrify(n_stages_query, params)) + rows = cur.fetchall() + return rows + + +def pearson_corr(x: list, y: list): + n = len(x) + if n != len(y): + raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} instead') + + if n < 2: + warnings.warn(f'x and y must have length at least 2. Got {n} instead') + return None, None, False + + # If an input is constant, the correlation coefficient is not defined. + if all(t == x[0] for t in x) or all(t == y[0] for t in y): + warnings.warn("An input array is constant; the correlation coefficent is not defined.") + return None, None, False + + if n == 2: + return math.copysign(1, x[1] - x[0]) * math.copysign(1, y[1] - y[0]), 1.0 + + xmean = sum(x) / len(x) + ymean = sum(y) / len(y) + + xm = [el - xmean for el in x] + ym = [el - ymean for el in y] + + normxm = math.sqrt((sum([xm[i] * xm[i] for i in range(len(xm))]))) + normym = math.sqrt((sum([ym[i] * ym[i] for i in range(len(ym))]))) + + threshold = 1e-8 + if normxm < threshold * abs(xmean) or normym < threshold * abs(ymean): + # If all the values in x (likewise y) are very close to the mean, + # the loss of precision that occurs in the subtraction xm = x - xmean + # might result in large errors in r. + warnings.warn("An input array is constant; the correlation coefficent is not defined.") + + r = sum( + i[0] * i[1] for i in zip([xm[i] / normxm for i in range(len(xm))], [ym[i] / normym for i in range(len(ym))])) + + # Presumably, if abs(r) > 1, then it is only some small artifact of floating point arithmetic. + # However, if r < 0, we don't care, as our problem is to find only positive correlations + r = max(min(r, 1.0), 0.0) + + # approximated confidence + if n < 31: + t_c = T_VALUES[n] + elif n < 50: + t_c = 2.02 + else: + t_c = 2 + if r >= 0.999: + confidence = 1 + else: + confidence = r * math.sqrt(n - 2) / math.sqrt(1 - r ** 2) + + if confidence > SIGNIFICANCE_THRSH: + return r, confidence, True + else: + return r, confidence, False + + +def get_transitions_and_issues_of_each_type(rows: List[RealDictRow], all_issues_with_context, first_stage, last_stage): + """ + Returns two lists with binary values 0/1: + + transitions ::: if transited from the first stage to the last - 1 + else - 0 + errors ::: a dictionary where the keys are all unique issues (currently context-wise) + the values are lists + if an issue happened between the first stage to the last - 1 + else - 0 + + For a small task of calculating a total drop due to issues, + we need to disregard the issue type when creating the `errors`-like array. + The `all_errors` array can be obtained by logical OR statement applied to all errors by issue + The `transitions` array stays the same + """ + transitions = [] + n_sess_affected = 0 + errors = {} + for issue in all_issues_with_context: + split = issue.split('__^__') + errors[issue] = { + "errors": [], + "issue_type": split[0], + "context": split[1]} + + for row in rows: + t = 0 + first_ts = row[f'stage{first_stage}_timestamp'] + last_ts = row[f'stage{last_stage}_timestamp'] + if first_ts is None: + continue + elif first_ts is not None and last_ts is not None: + t = 1 + transitions.append(t) + + ic_present = False + for issue_type_with_context in errors: + ic = 0 + issue_type = errors[issue_type_with_context]["issue_type"] + context = errors[issue_type_with_context]["context"] + if row['issue_type'] is not None: + if last_ts is None or (first_ts < row['issue_timestamp'] < last_ts): + context_in_row = row['issue_context'] if row['issue_context'] is not None else '' + if issue_type == row['issue_type'] and context == context_in_row: + ic = 1 + ic_present = True + errors[issue_type_with_context]["errors"].append(ic) + + if ic_present and t: + n_sess_affected += 1 + + # def tuple_or(t: tuple): + # x = 0 + # for el in t: + # x |= el + # return x + def tuple_or(t: tuple): + for el in t: + if el > 0: + return 1 + return 0 + + errors = {key: errors[key]["errors"] for key in errors} + all_errors = [tuple_or(t) for t in zip(*errors.values())] + + return transitions, errors, all_errors, n_sess_affected + + +def get_affected_users_for_all_issues(rows, first_stage, last_stage): + """ + + :param rows: + :param first_stage: + :param last_stage: + :return: + """ + affected_users = defaultdict(lambda: set()) + affected_sessions = defaultdict(lambda: set()) + contexts = defaultdict(lambda: None) + n_affected_users_dict = defaultdict(lambda: None) + n_affected_sessions_dict = defaultdict(lambda: None) + all_issues_with_context = set() + n_issues_dict = defaultdict(lambda: 0) + issues_by_session = defaultdict(lambda: 0) + + for row in rows: + + # check that the session has reached the first stage of subfunnel: + if row[f'stage{first_stage}_timestamp'] is None: + continue + + iss = row['issue_type'] + iss_ts = row['issue_timestamp'] + + # check that the issue exists and belongs to subfunnel: + if iss is not None and (row[f'stage{last_stage}_timestamp'] is None or + (row[f'stage{first_stage}_timestamp'] < iss_ts < row[f'stage{last_stage}_timestamp'])): + context_string = row['issue_context'] if row['issue_context'] is not None else '' + issue_with_context = iss + '__^__' + context_string + contexts[issue_with_context] = {"context": context_string, "id": row["issue_id"]} + all_issues_with_context.add(issue_with_context) + n_issues_dict[issue_with_context] += 1 + if row['user_uuid'] is not None: + affected_users[issue_with_context].add(row['user_uuid']) + + affected_sessions[issue_with_context].add(row['session_id']) + issues_by_session[row[f'session_id']] += 1 + + if len(affected_users) > 0: + n_affected_users_dict.update({ + iss: len(affected_users[iss]) for iss in affected_users + }) + if len(affected_sessions) > 0: + n_affected_sessions_dict.update({ + iss: len(affected_sessions[iss]) for iss in affected_sessions + }) + return all_issues_with_context, n_issues_dict, n_affected_users_dict, n_affected_sessions_dict, contexts + + +def count_sessions(rows, n_stages): + session_counts = {i: set() for i in range(1, n_stages + 1)} + for ind, row in enumerate(rows): + for i in range(1, n_stages + 1): + if row[f"stage{i}_timestamp"] is not None: + session_counts[i].add(row[f"session_id"]) + session_counts = {i: len(session_counts[i]) for i in session_counts} + return session_counts + + +def count_users(rows, n_stages): + users_in_stages = defaultdict(lambda: set()) + + for ind, row in enumerate(rows): + for i in range(1, n_stages + 1): + if row[f"stage{i}_timestamp"] is not None: + users_in_stages[i].add(row["user_uuid"]) + + users_count = {i: len(users_in_stages[i]) for i in range(1, n_stages + 1)} + + return users_count + + +def get_stages(stages, rows): + n_stages = len(stages) + session_counts = count_sessions(rows, n_stages) + users_counts = count_users(rows, n_stages) + + stages_list = [] + for i, stage in enumerate(stages): + + drop = None + if i != 0: + if session_counts[i] == 0: + drop = 0 + elif session_counts[i] > 0: + drop = int(100 * (session_counts[i] - session_counts[i + 1]) / session_counts[i]) + + stages_list.append( + {"value": stage["value"], + "type": stage["type"], + "operator": stage["operator"], + "sessionsCount": session_counts[i + 1], + "drop_pct": drop, + "usersCount": users_counts[i + 1], + "dropDueToIssues": 0 + } + ) + return stages_list + + +def get_issues(stages, rows, first_stage=None, last_stage=None, drop_only=False): + """ + + :param stages: + :param rows: + :param first_stage: If it's a part of the initial funnel, provide a number of the first stage (starting from 1) + :param last_stage: If it's a part of the initial funnel, provide a number of the last stage (starting from 1) + :return: + """ + + n_stages = len(stages) + + if first_stage is None: + first_stage = 1 + if last_stage is None: + last_stage = n_stages + if last_stage > n_stages: + print("The number of the last stage provided is greater than the number of stages. Using n_stages instead") + last_stage = n_stages + + n_critical_issues = 0 + issues_dict = dict({"significant": [], + "insignificant": []}) + session_counts = count_sessions(rows, n_stages) + drop = session_counts[first_stage] - session_counts[last_stage] + + all_issues_with_context, n_issues_dict, affected_users_dict, affected_sessions, contexts = get_affected_users_for_all_issues( + rows, first_stage, last_stage) + transitions, errors, all_errors, n_sess_affected = get_transitions_and_issues_of_each_type(rows, + all_issues_with_context, + first_stage, last_stage) + + print("len(transitions) =", len(transitions)) + + if any(all_errors): + total_drop_corr, conf, is_sign = pearson_corr(transitions, all_errors) + if total_drop_corr is not None and drop is not None: + total_drop_due_to_issues = int(total_drop_corr * n_sess_affected) + else: + total_drop_due_to_issues = 0 + else: + total_drop_due_to_issues = 0 + + if drop_only: + return total_drop_due_to_issues + for issue in all_issues_with_context: + + if not any(errors[issue]): + continue + r, confidence, is_sign = pearson_corr(transitions, errors[issue]) + + if r is not None and drop is not None and is_sign: + lost_conversions = int(r * affected_sessions[issue]) + else: + lost_conversions = None + if r is None: + r = 0 + split = issue.split('__^__') + issues_dict['significant' if is_sign else 'insignificant'].append({ + "type": split[0], + "title": helper.get_issue_title(split[0]), + "affected_sessions": affected_sessions[issue], + "unaffected_sessions": session_counts[1] - affected_sessions[issue], + "lost_conversions": lost_conversions, + "affected_users": affected_users_dict[issue], + "conversion_impact": round(r * 100), + "context_string": contexts[issue]["context"], + "issue_id": contexts[issue]["id"] + }) + + if is_sign: + n_critical_issues += n_issues_dict[issue] + + return n_critical_issues, issues_dict, total_drop_due_to_issues + + +def get_top_insights(filter_d, project_id): + output = [] + stages = filter_d.get("events", []) + # TODO: handle 1 stage alone + if len(stages) == 0: + print("no stages found") + return output, 0 + elif len(stages) == 1: + # TODO: count sessions, and users for single stage + output = [{ + "type": stages[0]["type"], + "value": stages[0]["value"], + "dropPercentage": None, + "operator": stages[0]["operator"], + "sessionsCount": 0, + "dropPct": 0, + "usersCount": 0, + "dropDueToIssues": 0 + + }] + counts = sessions.search_sessions(data=schemas.SessionsSearchCountSchema.parse_obj(filter_d), + project_id=project_id, + user_id=None, count_only=True) + output[0]["sessionsCount"] = counts["countSessions"] + output[0]["usersCount"] = counts["countUsers"] + return output, 0 + # The result of the multi-stage query + rows = get_stages_and_events(filter_d=filter_d, project_id=project_id) + if len(rows) == 0: + return get_stages(stages, []), 0 + # Obtain the first part of the output + stages_list = get_stages(stages, rows) + # Obtain the second part of the output + total_drop_due_to_issues = get_issues(stages, rows, first_stage=filter_d.get("firstStage"), + last_stage=filter_d.get("lastStage"), drop_only=True) + return stages_list, total_drop_due_to_issues + + +def get_issues_list(filter_d, project_id, first_stage=None, last_stage=None): + output = dict({"total_drop_due_to_issues": 0, "critical_issues_count": 0, "significant": [], "insignificant": []}) + stages = filter_d.get("events", []) + # The result of the multi-stage query + rows = get_stages_and_events(filter_d=filter_d, project_id=project_id) + # print(json.dumps(rows[0],indent=4)) + # return + if len(rows) == 0: + return output + # Obtain the second part of the output + n_critical_issues, issues_dict, total_drop_due_to_issues = get_issues(stages, rows, first_stage=first_stage, + last_stage=last_stage) + output['total_drop_due_to_issues'] = total_drop_due_to_issues + # output['critical_issues_count'] = n_critical_issues + output = {**output, **issues_dict} + return output + + +def get_overview(filter_d, project_id, first_stage=None, last_stage=None): + output = dict() + stages = filter_d["events"] + # TODO: handle 1 stage alone + if len(stages) == 0: + return {"stages": [], + "criticalIssuesCount": 0} + elif len(stages) == 1: + # TODO: count sessions, and users for single stage + output["stages"] = [{ + "type": stages[0]["type"], + "value": stages[0]["value"], + "sessionsCount": None, + "dropPercentage": None, + "usersCount": None + }] + return output + # The result of the multi-stage query + rows = get_stages_and_events(filter_d=filter_d, project_id=project_id) + if len(rows) == 0: + # PS: not sure what to return if rows are empty + output["stages"] = [{ + "type": stages[0]["type"], + "value": stages[0]["value"], + "sessionsCount": None, + "dropPercentage": None, + "usersCount": None + }] + output['criticalIssuesCount'] = 0 + return output + # Obtain the first part of the output + stages_list = get_stages(stages, rows) + + # Obtain the second part of the output + n_critical_issues, issues_dict, total_drop_due_to_issues = get_issues(stages, rows, first_stage=first_stage, + last_stage=last_stage) + + output['stages'] = stages_list + output['criticalIssuesCount'] = n_critical_issues + return output